bio-ucsc-api 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +2 -0
- data/COPYING +58 -0
- data/COPYING.ja +51 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +49 -0
- data/README.rdoc +57 -0
- data/Rakefile +72 -0
- data/VERSION +1 -0
- data/bio-ucsc-api.gemspec +212 -0
- data/lib/bio-ucsc.rb +20 -0
- data/lib/bio-ucsc/hg18.rb +27 -0
- data/lib/bio-ucsc/hg18/activerecord.rb +166 -0
- data/lib/bio-ucsc/hg18/cnpiafrate2.rb +31 -0
- data/lib/bio-ucsc/hg18/cnplocke.rb +31 -0
- data/lib/bio-ucsc/hg18/cnpredon.rb +32 -0
- data/lib/bio-ucsc/hg18/cnpsebat2.rb +35 -0
- data/lib/bio-ucsc/hg18/cnpsharp2.rb +32 -0
- data/lib/bio-ucsc/hg18/db_connection.rb +61 -0
- data/lib/bio-ucsc/hg18/dgv.rb +27 -0
- data/lib/bio-ucsc/hg18/refgene.rb +27 -0
- data/lib/bio-ucsc/hg18/rmsk.rb +51 -0
- data/lib/bio-ucsc/hg18/tables.rb +142 -0
- data/lib/bio-ucsc/hg19.rb +54 -0
- data/lib/bio-ucsc/hg19/activerecord.rb +217 -0
- data/lib/bio-ucsc/hg19/ccdsgene.rb +33 -0
- data/lib/bio-ucsc/hg19/cytoband.rb +33 -0
- data/lib/bio-ucsc/hg19/db_connection.rb +61 -0
- data/lib/bio-ucsc/hg19/dgv.rb +27 -0
- data/lib/bio-ucsc/hg19/ensgene.rb +21 -0
- data/lib/bio-ucsc/hg19/gwascatalog.rb +26 -0
- data/lib/bio-ucsc/hg19/hapmapalleleschimp.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapallelesmacaque.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsasw.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsceu.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpschb.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpschd.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsgih.rb +34 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsjpt.rb +34 -0
- data/lib/bio-ucsc/hg19/hapmapsnpslwk.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsmex.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsmkk.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpstsi.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsyri.rb +35 -0
- data/lib/bio-ucsc/hg19/knowngene.rb +34 -0
- data/lib/bio-ucsc/hg19/omimgene.rb +25 -0
- data/lib/bio-ucsc/hg19/phastconselements46wayprimates.rb +27 -0
- data/lib/bio-ucsc/hg19/phylop46wayprimates.rb +27 -0
- data/lib/bio-ucsc/hg19/refgene.rb +27 -0
- data/lib/bio-ucsc/hg19/rmsk.rb +32 -0
- data/lib/bio-ucsc/hg19/snp131.rb +30 -0
- data/lib/bio-ucsc/hg19/snp132.rb +31 -0
- data/lib/bio-ucsc/hg19/snp132codingdbsnp.rb +29 -0
- data/lib/bio-ucsc/hg19/snp132common.rb +32 -0
- data/lib/bio-ucsc/hg19/snp132flagged.rb +31 -0
- data/lib/bio-ucsc/hg19/snp132mult.rb +32 -0
- data/lib/bio-ucsc/hg19/wgrna.rb +29 -0
- data/lib/bio-ucsc/ucsc_bin.rb +148 -0
- data/samples/hg19-sample.rb +66 -0
- data/spec/hg18/cnpiafrate2_spec.rb +23 -0
- data/spec/hg18/cnplocke_spec.rb +23 -0
- data/spec/hg18/cnpredon_spec.rb +23 -0
- data/spec/hg18/cnpsebat2_spec.rb +23 -0
- data/spec/hg18/cnpsharp2_spec.rb +23 -0
- data/spec/hg18/db_connection_spec.rb +36 -0
- data/spec/hg18/dgv_spec.rb +23 -0
- data/spec/hg18/refgene_spec.rb +23 -0
- data/spec/hg18/rmsk_spec.rb +33 -0
- data/spec/hg19/ccdsgene_spec.rb +23 -0
- data/spec/hg19/cytoband_spec.rb +23 -0
- data/spec/hg19/db_connection_spec.rb +37 -0
- data/spec/hg19/dgv_spec.rb +23 -0
- data/spec/hg19/ensgene_spec.rb +23 -0
- data/spec/hg19/gwascatalog_spec.rb +23 -0
- data/spec/hg19/hapmapalleleschimp_spec.rb +23 -0
- data/spec/hg19/hapmapallelesmacaque_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsasw_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsceu_spec.rb +23 -0
- data/spec/hg19/hapmapsnpschb_spec.rb +23 -0
- data/spec/hg19/hapmapsnpschd_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsgih_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsjpt_spec.rb +23 -0
- data/spec/hg19/hapmapsnpslwk_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsmex_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsmkk_spec.rb +23 -0
- data/spec/hg19/hapmapsnpstsi_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsyri_spec.rb +23 -0
- data/spec/hg19/knowngene_spec.rb +23 -0
- data/spec/hg19/omimGene_spec.rb +23 -0
- data/spec/hg19/phastconselements46wayprimates_spec.rb +23 -0
- data/spec/hg19/phyloP46wayPrimates_spec.rb +23 -0
- data/spec/hg19/refgene_spec.rb +23 -0
- data/spec/hg19/rmsk_spec.rb +23 -0
- data/spec/hg19/snp132Flagged_spec.rb +24 -0
- data/spec/hg19/snp132_spec.rb +23 -0
- data/spec/hg19/snp132codingdbsnp_spec.rb +23 -0
- data/spec/hg19/snp132common_spec.rb +24 -0
- data/spec/hg19/snp132mult_spec.rb +23 -0
- data/spec/hg19/wgrna_spec.rb +23 -0
- data/spec/spec_helper.rb +12 -0
- metadata +368 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = hg19/snp132codingdbsnp.rb
|
|
3
|
+
# Copyright::
|
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
|
5
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
6
|
+
#
|
|
7
|
+
# = Table description in UCSC Table Browser
|
|
8
|
+
# This track contains information about a subset of the single
|
|
9
|
+
# nucleotide polymorphisms and small insertions and deletions (indels)
|
|
10
|
+
# -- collectively Simple Nucleotide Polymorphisms -- from dbSNP
|
|
11
|
+
# build 132, available from ftp.ncbi.nih.gov/snp. Only SNPs that have a
|
|
12
|
+
# minor allele frequency of at least 1% and are mapped to a single
|
|
13
|
+
# location in the reference genome assembly are included in this
|
|
14
|
+
# subset. Frequency data are not available for all SNPs, so this subset
|
|
15
|
+
# is incomplete.
|
|
16
|
+
#
|
|
17
|
+
# Annotations of the effects of SNPs on translated protein sequence.
|
|
18
|
+
|
|
19
|
+
module Bio
|
|
20
|
+
module Ucsc
|
|
21
|
+
module Hg19
|
|
22
|
+
class Snp132CodingDbSnp < DBConnection
|
|
23
|
+
extend Bio::Ucsc::Hg19::QueryUsingChromBin
|
|
24
|
+
set_table_name 'snp132CodingDbSnp'
|
|
25
|
+
set_primary_key nil
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = hg19/snp132common.rb
|
|
3
|
+
# Copyright::
|
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
|
5
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
6
|
+
#
|
|
7
|
+
# = Table desfription in UCSC Table Browser
|
|
8
|
+
# This track contains information about a subset of the single
|
|
9
|
+
# nucleotide polymorphisms and small insertions and deletions (indels)
|
|
10
|
+
# -- collectively Simple Nucleotide Polymorphisms -- from dbSNP
|
|
11
|
+
# build 132, available from ftp.ncbi.nih.gov/snp. Only SNPs that have a
|
|
12
|
+
# minor allele frequency of at least 1% and are mapped to a single
|
|
13
|
+
# location in the reference genome assembly are included in this
|
|
14
|
+
# subset. Frequency data are not available for all SNPs, so this subset
|
|
15
|
+
# is incomplete.
|
|
16
|
+
#
|
|
17
|
+
# Common SNPs (132): uniquely mapped variants that appear in at least
|
|
18
|
+
# 1% of the population
|
|
19
|
+
|
|
20
|
+
module Bio
|
|
21
|
+
module Ucsc
|
|
22
|
+
module Hg19
|
|
23
|
+
class Snp132Common < DBConnection
|
|
24
|
+
extend Bio::Ucsc::Hg19::QueryUsingChromBin
|
|
25
|
+
set_table_name 'snp132Common'
|
|
26
|
+
set_primary_key nil
|
|
27
|
+
columns_hash.delete("valid")
|
|
28
|
+
columns_hash.delete("class")
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# = hg19/snp132flagged.rb
|
|
2
|
+
# Copyright::
|
|
3
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
|
4
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
5
|
+
#
|
|
6
|
+
# = Table desfription in UCSC Table Browser
|
|
7
|
+
# This track contains information about a subset of the single
|
|
8
|
+
# nucleotide polymorphisms and small insertions and deletions (indels)
|
|
9
|
+
# -- collectively Simple Nucleotide Polymorphisms -- from dbSNP
|
|
10
|
+
# build 132, available from ftp.ncbi.nih.gov/snp. Only SNPs that have a
|
|
11
|
+
# minor allele frequency of at least 1% and are mapped to a single
|
|
12
|
+
# location in the reference genome assembly are included in this
|
|
13
|
+
# subset. Frequency data are not available for all SNPs, so this subset
|
|
14
|
+
# is incomplete.
|
|
15
|
+
#
|
|
16
|
+
# Flagged SNPs (132): uniquely mapped variants, excluding Common SNPs,
|
|
17
|
+
# that have been flagged by dbSNP as "clinically associated"
|
|
18
|
+
|
|
19
|
+
module Bio
|
|
20
|
+
module Ucsc
|
|
21
|
+
module Hg19
|
|
22
|
+
class Snp132Flagged < DBConnection
|
|
23
|
+
extend Bio::Ucsc::Hg19::QueryUsingChromBin
|
|
24
|
+
set_table_name 'snp132Flagged'
|
|
25
|
+
set_primary_key nil
|
|
26
|
+
columns_hash.delete("valid")
|
|
27
|
+
columns_hash.delete("class")
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = hg19/snp132mult.rb
|
|
3
|
+
# Copyright::
|
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
|
5
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
6
|
+
#
|
|
7
|
+
# = Table description in UCSC Table Browser
|
|
8
|
+
# This track contains information about a subset of the single
|
|
9
|
+
# nucleotide polymorphisms and small insertions and deletions (indels)
|
|
10
|
+
# -- collectively Simple Nucleotide Polymorphisms -- from dbSNP
|
|
11
|
+
# build 132, available from ftp.ncbi.nih.gov/snp. Only SNPs that have a
|
|
12
|
+
# minor allele frequency of at least 1% and are mapped to a single
|
|
13
|
+
# location in the reference genome assembly are included in this
|
|
14
|
+
# subset. Frequency data are not available for all SNPs, so this subset
|
|
15
|
+
# is incomplete.
|
|
16
|
+
#
|
|
17
|
+
# Mult. SNPs(132): variants that have been mapped to more than one
|
|
18
|
+
# genomic location
|
|
19
|
+
|
|
20
|
+
module Bio
|
|
21
|
+
module Ucsc
|
|
22
|
+
module Hg19
|
|
23
|
+
class Snp132Mult < DBConnection
|
|
24
|
+
extend Bio::Ucsc::Hg19::QueryUsingChromBin
|
|
25
|
+
set_table_name 'snp132Mult'
|
|
26
|
+
set_primary_key nil
|
|
27
|
+
columns_hash.delete("valid")
|
|
28
|
+
columns_hash.delete("class")
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = hg19/wbrna.rb
|
|
3
|
+
# Copyright::
|
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
|
5
|
+
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
|
6
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
7
|
+
#
|
|
8
|
+
# = Table desfription in UCSC Table Browser
|
|
9
|
+
# This track displays positions of four different types of RNA in the
|
|
10
|
+
# human genome:
|
|
11
|
+
# * precursor forms of microRNAs (pre-miRNAs) C/D box small
|
|
12
|
+
# * nucleolar RNAs (C/D box snoRNAs) H/ACA box snoRNAs small Cajal
|
|
13
|
+
# * body-specific RNAs (scaRNAs)
|
|
14
|
+
#
|
|
15
|
+
# = ommitted dynamic method(s) due to the method name collision
|
|
16
|
+
# use results[:type] instead of results.type
|
|
17
|
+
|
|
18
|
+
module Bio
|
|
19
|
+
module Ucsc
|
|
20
|
+
module Hg19
|
|
21
|
+
class WgRna < DBConnection
|
|
22
|
+
extend Bio::Ucsc::Hg19::QueryUsingChromBin
|
|
23
|
+
set_table_name 'wgRna'
|
|
24
|
+
set_primary_key nil
|
|
25
|
+
columns_hash.delete("type")
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# = UCSCBin
|
|
2
|
+
# Author:: MISHIMA, Hiroyuki
|
|
3
|
+
# Copyright:: MISHIMA, Hiroyuki, 2010-2011
|
|
4
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
5
|
+
#
|
|
6
|
+
# Original program in C by Jim Kent, 2002
|
|
7
|
+
# See also http://genomewiki.ucsc.edu/index.php/Bin_indexing_system;
|
|
8
|
+
# a paper Kent, et. al. Genome Research 2002.12:996-1006;
|
|
9
|
+
# and src/lib/binRange.c in the kent source tree.
|
|
10
|
+
#
|
|
11
|
+
# Bio::Ucsc::UcscBin -
|
|
12
|
+
# 1) convert between 0-based half-open interval and
|
|
13
|
+
# 1-based full-close intervals.
|
|
14
|
+
# 2) Calculate Bin number from genomic physical position
|
|
15
|
+
# according to UCSC's Bin Indexing System.
|
|
16
|
+
#
|
|
17
|
+
module Bio
|
|
18
|
+
module Ucsc
|
|
19
|
+
class UcscBin
|
|
20
|
+
# Version = "0.1.0" # 20100714
|
|
21
|
+
# Version = "0.2.0" # 20101028
|
|
22
|
+
# Version = "0.2.1" # 20110408
|
|
23
|
+
Version = "0.2.2" # 20110418 the licence is changed
|
|
24
|
+
# embeded in BioRubyUcscApi
|
|
25
|
+
# handle the case, start==end in [start, end)
|
|
26
|
+
|
|
27
|
+
# 'zero_start' and 'zero_end' are 0-based half-open
|
|
28
|
+
# used in UCSC MySQL database and the BED format.
|
|
29
|
+
# the first one base in a chromosome is [0, 1)
|
|
30
|
+
# OLD: Positions must be start<end
|
|
31
|
+
# New: Positions can be start<=end (e.g. positions for insersions)
|
|
32
|
+
|
|
33
|
+
def self.zero_to_one(zero_start, zero_end)
|
|
34
|
+
case
|
|
35
|
+
when (zero_start < 0 || zero_end < 0)
|
|
36
|
+
raise ArgumentError, "positions must be >=0"
|
|
37
|
+
when zero_start > zero_end
|
|
38
|
+
raise ArgumentError, "positions must be start<=end"
|
|
39
|
+
when zero_start == zero_end
|
|
40
|
+
[zero_start + 1, zero_end + 1]
|
|
41
|
+
else
|
|
42
|
+
[zero_start + 1, zero_end]
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# 'one_start' and 'one_end' are 1-based full-close
|
|
47
|
+
# used in UCSC genome browser's human interface and most of other formats
|
|
48
|
+
# the first one base in a chromosome is [1, 1]
|
|
49
|
+
# Positions must be start<=end
|
|
50
|
+
def self.one_to_zero(one_start, one_end)
|
|
51
|
+
case
|
|
52
|
+
when (one_start < 1 || one_end < 1)
|
|
53
|
+
raise ArgumentError, "positions must be >=1"
|
|
54
|
+
when one_start > one_end
|
|
55
|
+
raise ArgumentError, "positions must be start<=end"
|
|
56
|
+
end
|
|
57
|
+
[one_start - 1 , one_end]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
BINRANGE_MAXEND_512M = (512*1024*1024)
|
|
61
|
+
BIN_OFFSETS_EXTENDED = [4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0]
|
|
62
|
+
BIN_OFFSETS = [512+64+8+1, 64+8+1, 8+1, 1, 0]
|
|
63
|
+
BIN_OFFSET_OLD_TO_EXTENDED = 4681
|
|
64
|
+
# How much to shift to get to finest bin.
|
|
65
|
+
BIN_FIRST_SHIFT = 17
|
|
66
|
+
# How much to shift to get to next larger bin.
|
|
67
|
+
BIN_NEXT_SHIFT = 3
|
|
68
|
+
|
|
69
|
+
# Return a Integer of a BIN which is the smallest/finest bin
|
|
70
|
+
# containing whole the interval/range.
|
|
71
|
+
#
|
|
72
|
+
# Extended bin index for positions >= 512M is not supported yet
|
|
73
|
+
# Do you need it? Please email me.
|
|
74
|
+
def self.bin_from_range(bin_start, bin_end)
|
|
75
|
+
if bin_end <= BINRANGE_MAXEND_512M
|
|
76
|
+
bin_from_range_standard(bin_start, bin_end)
|
|
77
|
+
else
|
|
78
|
+
bin_from_range_extended(bin_start, bin_end)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
class << self; alias bin bin_from_range; end
|
|
83
|
+
|
|
84
|
+
# Return an Array of BINs which are all bins containing whole the
|
|
85
|
+
# interval/range. Thus, it always contains "0" indicating a bin
|
|
86
|
+
# containing whole of a chromosome.
|
|
87
|
+
#
|
|
88
|
+
# extended bin index for positions >= 512M is not supported yet
|
|
89
|
+
# Do you need it? Please email me.
|
|
90
|
+
#
|
|
91
|
+
def self.bin_all(p_start, p_end)
|
|
92
|
+
if p_end <= BINRANGE_MAXEND_512M
|
|
93
|
+
bin_all_standard(p_start, p_end)
|
|
94
|
+
else
|
|
95
|
+
bin_all_extended(p_start, p_end)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
private
|
|
100
|
+
|
|
101
|
+
def self.bin_from_range_standard(bin_start, bin_end)
|
|
102
|
+
# Given start,end in chromosome coordinates assign it
|
|
103
|
+
# a bin. There's a bin for each 128k segment, for each
|
|
104
|
+
# 1M segment, for each 8M segment, for each 64M segment,
|
|
105
|
+
# and for each chromosome (which is assumed to be less than
|
|
106
|
+
# 512M.) A range goes into the smallest bin it will fit in.
|
|
107
|
+
|
|
108
|
+
bin_start >>= BIN_FIRST_SHIFT
|
|
109
|
+
bin_end -= 1
|
|
110
|
+
bin_end >>= BIN_FIRST_SHIFT
|
|
111
|
+
|
|
112
|
+
BIN_OFFSETS.each do |offset|
|
|
113
|
+
return offset + bin_start if bin_start == bin_end
|
|
114
|
+
bin_start >>= BIN_NEXT_SHIFT
|
|
115
|
+
bin_end >>= BIN_NEXT_SHIFT
|
|
116
|
+
end
|
|
117
|
+
raise RangeError, \
|
|
118
|
+
"start #{bin_start}, end #{bin_end} out of range in findBin (max is 512M)"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def self.bin_from_range_extended(bin_start, bin_end)
|
|
122
|
+
raise NotImplementedError, "Extended bins are not supported yet"
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def self.bin_all_standard(bin_start, bin_end)
|
|
126
|
+
bin_start_orig = bin_start
|
|
127
|
+
bin_end_orig = bin_end
|
|
128
|
+
results = Array.new
|
|
129
|
+
|
|
130
|
+
bin_start >>= BIN_FIRST_SHIFT
|
|
131
|
+
bin_end -= 1
|
|
132
|
+
bin_end >>= BIN_FIRST_SHIFT
|
|
133
|
+
|
|
134
|
+
BIN_OFFSETS.each do |offset|
|
|
135
|
+
results.concat(((offset + bin_start)..(offset + bin_end)).to_a)
|
|
136
|
+
bin_start >>= BIN_NEXT_SHIFT
|
|
137
|
+
bin_end >>= BIN_NEXT_SHIFT
|
|
138
|
+
end
|
|
139
|
+
return results
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def self.bin_all_extended(bin_start, bin_end)
|
|
143
|
+
raise NotImplementedError, "Extended bins are not supported yet"
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
end # class UcscBin
|
|
147
|
+
end # module Ucsc
|
|
148
|
+
end # module Bio
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#!/usr/local/bin/ruby-1.9
|
|
2
|
+
#
|
|
3
|
+
# Copyright::
|
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki
|
|
5
|
+
# <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
|
6
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
7
|
+
#
|
|
8
|
+
|
|
9
|
+
require File.dirname(__FILE__) + '/../lib/bio-ucsc'
|
|
10
|
+
|
|
11
|
+
include Bio::Ucsc
|
|
12
|
+
|
|
13
|
+
Hg19::DBConnection.default
|
|
14
|
+
Hg19::DBConnection.connect
|
|
15
|
+
|
|
16
|
+
itvs_a =
|
|
17
|
+
[Bio::GenomicInterval.parse("chr1:1-200,000"),
|
|
18
|
+
Bio::GenomicInterval.parse("chr2:1-200,000"),
|
|
19
|
+
Bio::GenomicInterval.parse("chr3:1-300,000"),
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
puts
|
|
23
|
+
puts "Queries in Slice objects using 1-based [start,end] closed intervals"
|
|
24
|
+
puts "Results in 0-based [start,end) half-open intervals"
|
|
25
|
+
puts
|
|
26
|
+
|
|
27
|
+
::puts "test 1 (hg19/RefGene) --- Bio::Ucsc::Hg19::RefGene.find_by_interval"
|
|
28
|
+
results = itvs_a.map{|i|Hg19::RefGene.find_by_interval(i)}
|
|
29
|
+
puts "0-based interval\t1-based interval\tGene Symbol"
|
|
30
|
+
results.flatten.each do |e|
|
|
31
|
+
i = Bio::GenomicInterval.zero_based(e.chrom, e.txStart, e.txEnd)
|
|
32
|
+
print "#{e.chrom}:#{e.txStart}-#{e.txEnd}\t"
|
|
33
|
+
print "#{i.chrom}:#{i.chr_start}-#{i.chr_end}\t#{e.name2}\n"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
#
|
|
37
|
+
#
|
|
38
|
+
|
|
39
|
+
itvs_b =
|
|
40
|
+
[Bio::GenomicInterval.parse("chr1:1-11,000"),
|
|
41
|
+
Bio::GenomicInterval.parse("chr2:1-11,000"),
|
|
42
|
+
Bio::GenomicInterval.parse("chr3:1-12,000"),
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
puts
|
|
46
|
+
puts "test 2 (hg19/Snp131) --- Bio::Ucsc::Hg19::Snp131.find_by_interval"
|
|
47
|
+
puts "0-based interval\t1-based interval\tdbSNP rs ID\tClass"
|
|
48
|
+
results = itvs_b.map{|i|Hg19::Snp131.find_by_interval(i)}
|
|
49
|
+
results.flatten.each do |e|
|
|
50
|
+
i = Bio::GenomicInterval.zero_based(e.chrom, e.chromStart, e.chromEnd)
|
|
51
|
+
print "#{e.chrom}:#{e.chromStart}-#{e.chromEnd}\t"
|
|
52
|
+
print "#{i.chrom}:#{i.chr_start}-#{i.chr_end}\t#{e.name}\t#{e[:class]}\n"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
#
|
|
56
|
+
#
|
|
57
|
+
|
|
58
|
+
names = %w(rs56289060 rs62636508 rs28888107)
|
|
59
|
+
|
|
60
|
+
puts
|
|
61
|
+
puts "test 3 (hg19/Snp131) ---Bio::Ucsc::Hg19::Snp131.find_by_name"
|
|
62
|
+
names.each do |n|
|
|
63
|
+
r = Hg19::Snp131.find_by_name(n)
|
|
64
|
+
i = Bio::GenomicInterval.zero_based(r.chrom, r.chromStart, r.chromEnd)
|
|
65
|
+
puts "Query: #{n}\t#{i.chrom}\t#{i.chr_start}\t#{i.chr_end}\t#{r[:class]}"
|
|
66
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require 'bio-ucsc'
|
|
2
|
+
describe "Bio::Ucsc::Hg18::CnpIafrate2" do
|
|
3
|
+
|
|
4
|
+
describe "#find_by_inetrval" do
|
|
5
|
+
context "given range chr1:1-2,000,000" do
|
|
6
|
+
it "returens an array of results" do
|
|
7
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
|
8
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
|
9
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
|
10
|
+
Bio::Ucsc::Hg18::CnpIafrate2.find_by_interval(i).should have(2).items
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "returens an array of results with column accessors" do
|
|
14
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
|
15
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
|
16
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
|
17
|
+
r = Bio::Ucsc::Hg18::CnpIafrate2.find_by_interval(i)
|
|
18
|
+
r[0].chrom.should == "chr1"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require 'bio-ucsc'
|
|
2
|
+
describe "Bio::Ucsc::Hg18::CnpLocke" do
|
|
3
|
+
|
|
4
|
+
describe "#find_by_interval" do
|
|
5
|
+
context "given range chr1:1-2,000,000" do
|
|
6
|
+
it "returens an array of results" do
|
|
7
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
|
8
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
|
9
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
|
10
|
+
Bio::Ucsc::Hg18::CnpLocke.find_by_interval(i).should have(2).items
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "returens an array of results with column accessors" do
|
|
14
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
|
15
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
|
16
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
|
17
|
+
r = Bio::Ucsc::Hg18::CnpLocke.find_by_interval(i)
|
|
18
|
+
r[0].chrom.should == "chr1"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require 'bio-ucsc'
|
|
2
|
+
describe "Bio::Ucsc::Hg18::CnpRedon" do
|
|
3
|
+
|
|
4
|
+
describe "#find_by_interval" do
|
|
5
|
+
context "given range chr1:1-2,000,000" do
|
|
6
|
+
it "returns an array of results" do
|
|
7
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
|
8
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
|
9
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
|
10
|
+
Bio::Ucsc::Hg18::CnpRedon.find_by_interval(i).should have(3).items
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "returns an array of results with column accessors" do
|
|
14
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
|
15
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
|
16
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
|
17
|
+
r = Bio::Ucsc::Hg18::CnpRedon.find_by_interval(i)
|
|
18
|
+
r[0].chrom.should == "chr1"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|