bio-ucsc-api 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +2 -0
- data/COPYING +58 -0
- data/COPYING.ja +51 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +49 -0
- data/README.rdoc +57 -0
- data/Rakefile +72 -0
- data/VERSION +1 -0
- data/bio-ucsc-api.gemspec +212 -0
- data/lib/bio-ucsc.rb +20 -0
- data/lib/bio-ucsc/hg18.rb +27 -0
- data/lib/bio-ucsc/hg18/activerecord.rb +166 -0
- data/lib/bio-ucsc/hg18/cnpiafrate2.rb +31 -0
- data/lib/bio-ucsc/hg18/cnplocke.rb +31 -0
- data/lib/bio-ucsc/hg18/cnpredon.rb +32 -0
- data/lib/bio-ucsc/hg18/cnpsebat2.rb +35 -0
- data/lib/bio-ucsc/hg18/cnpsharp2.rb +32 -0
- data/lib/bio-ucsc/hg18/db_connection.rb +61 -0
- data/lib/bio-ucsc/hg18/dgv.rb +27 -0
- data/lib/bio-ucsc/hg18/refgene.rb +27 -0
- data/lib/bio-ucsc/hg18/rmsk.rb +51 -0
- data/lib/bio-ucsc/hg18/tables.rb +142 -0
- data/lib/bio-ucsc/hg19.rb +54 -0
- data/lib/bio-ucsc/hg19/activerecord.rb +217 -0
- data/lib/bio-ucsc/hg19/ccdsgene.rb +33 -0
- data/lib/bio-ucsc/hg19/cytoband.rb +33 -0
- data/lib/bio-ucsc/hg19/db_connection.rb +61 -0
- data/lib/bio-ucsc/hg19/dgv.rb +27 -0
- data/lib/bio-ucsc/hg19/ensgene.rb +21 -0
- data/lib/bio-ucsc/hg19/gwascatalog.rb +26 -0
- data/lib/bio-ucsc/hg19/hapmapalleleschimp.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapallelesmacaque.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsasw.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsceu.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpschb.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpschd.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsgih.rb +34 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsjpt.rb +34 -0
- data/lib/bio-ucsc/hg19/hapmapsnpslwk.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsmex.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsmkk.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpstsi.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsyri.rb +35 -0
- data/lib/bio-ucsc/hg19/knowngene.rb +34 -0
- data/lib/bio-ucsc/hg19/omimgene.rb +25 -0
- data/lib/bio-ucsc/hg19/phastconselements46wayprimates.rb +27 -0
- data/lib/bio-ucsc/hg19/phylop46wayprimates.rb +27 -0
- data/lib/bio-ucsc/hg19/refgene.rb +27 -0
- data/lib/bio-ucsc/hg19/rmsk.rb +32 -0
- data/lib/bio-ucsc/hg19/snp131.rb +30 -0
- data/lib/bio-ucsc/hg19/snp132.rb +31 -0
- data/lib/bio-ucsc/hg19/snp132codingdbsnp.rb +29 -0
- data/lib/bio-ucsc/hg19/snp132common.rb +32 -0
- data/lib/bio-ucsc/hg19/snp132flagged.rb +31 -0
- data/lib/bio-ucsc/hg19/snp132mult.rb +32 -0
- data/lib/bio-ucsc/hg19/wgrna.rb +29 -0
- data/lib/bio-ucsc/ucsc_bin.rb +148 -0
- data/samples/hg19-sample.rb +66 -0
- data/spec/hg18/cnpiafrate2_spec.rb +23 -0
- data/spec/hg18/cnplocke_spec.rb +23 -0
- data/spec/hg18/cnpredon_spec.rb +23 -0
- data/spec/hg18/cnpsebat2_spec.rb +23 -0
- data/spec/hg18/cnpsharp2_spec.rb +23 -0
- data/spec/hg18/db_connection_spec.rb +36 -0
- data/spec/hg18/dgv_spec.rb +23 -0
- data/spec/hg18/refgene_spec.rb +23 -0
- data/spec/hg18/rmsk_spec.rb +33 -0
- data/spec/hg19/ccdsgene_spec.rb +23 -0
- data/spec/hg19/cytoband_spec.rb +23 -0
- data/spec/hg19/db_connection_spec.rb +37 -0
- data/spec/hg19/dgv_spec.rb +23 -0
- data/spec/hg19/ensgene_spec.rb +23 -0
- data/spec/hg19/gwascatalog_spec.rb +23 -0
- data/spec/hg19/hapmapalleleschimp_spec.rb +23 -0
- data/spec/hg19/hapmapallelesmacaque_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsasw_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsceu_spec.rb +23 -0
- data/spec/hg19/hapmapsnpschb_spec.rb +23 -0
- data/spec/hg19/hapmapsnpschd_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsgih_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsjpt_spec.rb +23 -0
- data/spec/hg19/hapmapsnpslwk_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsmex_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsmkk_spec.rb +23 -0
- data/spec/hg19/hapmapsnpstsi_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsyri_spec.rb +23 -0
- data/spec/hg19/knowngene_spec.rb +23 -0
- data/spec/hg19/omimGene_spec.rb +23 -0
- data/spec/hg19/phastconselements46wayprimates_spec.rb +23 -0
- data/spec/hg19/phyloP46wayPrimates_spec.rb +23 -0
- data/spec/hg19/refgene_spec.rb +23 -0
- data/spec/hg19/rmsk_spec.rb +23 -0
- data/spec/hg19/snp132Flagged_spec.rb +24 -0
- data/spec/hg19/snp132_spec.rb +23 -0
- data/spec/hg19/snp132codingdbsnp_spec.rb +23 -0
- data/spec/hg19/snp132common_spec.rb +24 -0
- data/spec/hg19/snp132mult_spec.rb +23 -0
- data/spec/hg19/wgrna_spec.rb +23 -0
- data/spec/spec_helper.rb +12 -0
- metadata +368 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
#
|
2
|
+
# = hg19/snp132codingdbsnp.rb
|
3
|
+
# Copyright::
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
6
|
+
#
|
7
|
+
# = Table description in UCSC Table Browser
|
8
|
+
# This track contains information about a subset of the single
|
9
|
+
# nucleotide polymorphisms and small insertions and deletions (indels)
|
10
|
+
# -- collectively Simple Nucleotide Polymorphisms -- from dbSNP
|
11
|
+
# build 132, available from ftp.ncbi.nih.gov/snp. Only SNPs that have a
|
12
|
+
# minor allele frequency of at least 1% and are mapped to a single
|
13
|
+
# location in the reference genome assembly are included in this
|
14
|
+
# subset. Frequency data are not available for all SNPs, so this subset
|
15
|
+
# is incomplete.
|
16
|
+
#
|
17
|
+
# Annotations of the effects of SNPs on translated protein sequence.
|
18
|
+
|
19
|
+
module Bio
|
20
|
+
module Ucsc
|
21
|
+
module Hg19
|
22
|
+
class Snp132CodingDbSnp < DBConnection
|
23
|
+
extend Bio::Ucsc::Hg19::QueryUsingChromBin
|
24
|
+
set_table_name 'snp132CodingDbSnp'
|
25
|
+
set_primary_key nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#
|
2
|
+
# = hg19/snp132common.rb
|
3
|
+
# Copyright::
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
6
|
+
#
|
7
|
+
# = Table desfription in UCSC Table Browser
|
8
|
+
# This track contains information about a subset of the single
|
9
|
+
# nucleotide polymorphisms and small insertions and deletions (indels)
|
10
|
+
# -- collectively Simple Nucleotide Polymorphisms -- from dbSNP
|
11
|
+
# build 132, available from ftp.ncbi.nih.gov/snp. Only SNPs that have a
|
12
|
+
# minor allele frequency of at least 1% and are mapped to a single
|
13
|
+
# location in the reference genome assembly are included in this
|
14
|
+
# subset. Frequency data are not available for all SNPs, so this subset
|
15
|
+
# is incomplete.
|
16
|
+
#
|
17
|
+
# Common SNPs (132): uniquely mapped variants that appear in at least
|
18
|
+
# 1% of the population
|
19
|
+
|
20
|
+
module Bio
|
21
|
+
module Ucsc
|
22
|
+
module Hg19
|
23
|
+
class Snp132Common < DBConnection
|
24
|
+
extend Bio::Ucsc::Hg19::QueryUsingChromBin
|
25
|
+
set_table_name 'snp132Common'
|
26
|
+
set_primary_key nil
|
27
|
+
columns_hash.delete("valid")
|
28
|
+
columns_hash.delete("class")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# = hg19/snp132flagged.rb
|
2
|
+
# Copyright::
|
3
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
4
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
5
|
+
#
|
6
|
+
# = Table desfription in UCSC Table Browser
|
7
|
+
# This track contains information about a subset of the single
|
8
|
+
# nucleotide polymorphisms and small insertions and deletions (indels)
|
9
|
+
# -- collectively Simple Nucleotide Polymorphisms -- from dbSNP
|
10
|
+
# build 132, available from ftp.ncbi.nih.gov/snp. Only SNPs that have a
|
11
|
+
# minor allele frequency of at least 1% and are mapped to a single
|
12
|
+
# location in the reference genome assembly are included in this
|
13
|
+
# subset. Frequency data are not available for all SNPs, so this subset
|
14
|
+
# is incomplete.
|
15
|
+
#
|
16
|
+
# Flagged SNPs (132): uniquely mapped variants, excluding Common SNPs,
|
17
|
+
# that have been flagged by dbSNP as "clinically associated"
|
18
|
+
|
19
|
+
module Bio
|
20
|
+
module Ucsc
|
21
|
+
module Hg19
|
22
|
+
class Snp132Flagged < DBConnection
|
23
|
+
extend Bio::Ucsc::Hg19::QueryUsingChromBin
|
24
|
+
set_table_name 'snp132Flagged'
|
25
|
+
set_primary_key nil
|
26
|
+
columns_hash.delete("valid")
|
27
|
+
columns_hash.delete("class")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#
|
2
|
+
# = hg19/snp132mult.rb
|
3
|
+
# Copyright::
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
6
|
+
#
|
7
|
+
# = Table description in UCSC Table Browser
|
8
|
+
# This track contains information about a subset of the single
|
9
|
+
# nucleotide polymorphisms and small insertions and deletions (indels)
|
10
|
+
# -- collectively Simple Nucleotide Polymorphisms -- from dbSNP
|
11
|
+
# build 132, available from ftp.ncbi.nih.gov/snp. Only SNPs that have a
|
12
|
+
# minor allele frequency of at least 1% and are mapped to a single
|
13
|
+
# location in the reference genome assembly are included in this
|
14
|
+
# subset. Frequency data are not available for all SNPs, so this subset
|
15
|
+
# is incomplete.
|
16
|
+
#
|
17
|
+
# Mult. SNPs(132): variants that have been mapped to more than one
|
18
|
+
# genomic location
|
19
|
+
|
20
|
+
module Bio
|
21
|
+
module Ucsc
|
22
|
+
module Hg19
|
23
|
+
class Snp132Mult < DBConnection
|
24
|
+
extend Bio::Ucsc::Hg19::QueryUsingChromBin
|
25
|
+
set_table_name 'snp132Mult'
|
26
|
+
set_primary_key nil
|
27
|
+
columns_hash.delete("valid")
|
28
|
+
columns_hash.delete("class")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#
|
2
|
+
# = hg19/wbrna.rb
|
3
|
+
# Copyright::
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
+
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
|
+
#
|
8
|
+
# = Table desfription in UCSC Table Browser
|
9
|
+
# This track displays positions of four different types of RNA in the
|
10
|
+
# human genome:
|
11
|
+
# * precursor forms of microRNAs (pre-miRNAs) C/D box small
|
12
|
+
# * nucleolar RNAs (C/D box snoRNAs) H/ACA box snoRNAs small Cajal
|
13
|
+
# * body-specific RNAs (scaRNAs)
|
14
|
+
#
|
15
|
+
# = ommitted dynamic method(s) due to the method name collision
|
16
|
+
# use results[:type] instead of results.type
|
17
|
+
|
18
|
+
module Bio
|
19
|
+
module Ucsc
|
20
|
+
module Hg19
|
21
|
+
class WgRna < DBConnection
|
22
|
+
extend Bio::Ucsc::Hg19::QueryUsingChromBin
|
23
|
+
set_table_name 'wgRna'
|
24
|
+
set_primary_key nil
|
25
|
+
columns_hash.delete("type")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
# = UCSCBin
|
2
|
+
# Author:: MISHIMA, Hiroyuki
|
3
|
+
# Copyright:: MISHIMA, Hiroyuki, 2010-2011
|
4
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
5
|
+
#
|
6
|
+
# Original program in C by Jim Kent, 2002
|
7
|
+
# See also http://genomewiki.ucsc.edu/index.php/Bin_indexing_system;
|
8
|
+
# a paper Kent, et. al. Genome Research 2002.12:996-1006;
|
9
|
+
# and src/lib/binRange.c in the kent source tree.
|
10
|
+
#
|
11
|
+
# Bio::Ucsc::UcscBin -
|
12
|
+
# 1) convert between 0-based half-open interval and
|
13
|
+
# 1-based full-close intervals.
|
14
|
+
# 2) Calculate Bin number from genomic physical position
|
15
|
+
# according to UCSC's Bin Indexing System.
|
16
|
+
#
|
17
|
+
module Bio
|
18
|
+
module Ucsc
|
19
|
+
class UcscBin
|
20
|
+
# Version = "0.1.0" # 20100714
|
21
|
+
# Version = "0.2.0" # 20101028
|
22
|
+
# Version = "0.2.1" # 20110408
|
23
|
+
Version = "0.2.2" # 20110418 the licence is changed
|
24
|
+
# embeded in BioRubyUcscApi
|
25
|
+
# handle the case, start==end in [start, end)
|
26
|
+
|
27
|
+
# 'zero_start' and 'zero_end' are 0-based half-open
|
28
|
+
# used in UCSC MySQL database and the BED format.
|
29
|
+
# the first one base in a chromosome is [0, 1)
|
30
|
+
# OLD: Positions must be start<end
|
31
|
+
# New: Positions can be start<=end (e.g. positions for insersions)
|
32
|
+
|
33
|
+
def self.zero_to_one(zero_start, zero_end)
|
34
|
+
case
|
35
|
+
when (zero_start < 0 || zero_end < 0)
|
36
|
+
raise ArgumentError, "positions must be >=0"
|
37
|
+
when zero_start > zero_end
|
38
|
+
raise ArgumentError, "positions must be start<=end"
|
39
|
+
when zero_start == zero_end
|
40
|
+
[zero_start + 1, zero_end + 1]
|
41
|
+
else
|
42
|
+
[zero_start + 1, zero_end]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# 'one_start' and 'one_end' are 1-based full-close
|
47
|
+
# used in UCSC genome browser's human interface and most of other formats
|
48
|
+
# the first one base in a chromosome is [1, 1]
|
49
|
+
# Positions must be start<=end
|
50
|
+
def self.one_to_zero(one_start, one_end)
|
51
|
+
case
|
52
|
+
when (one_start < 1 || one_end < 1)
|
53
|
+
raise ArgumentError, "positions must be >=1"
|
54
|
+
when one_start > one_end
|
55
|
+
raise ArgumentError, "positions must be start<=end"
|
56
|
+
end
|
57
|
+
[one_start - 1 , one_end]
|
58
|
+
end
|
59
|
+
|
60
|
+
BINRANGE_MAXEND_512M = (512*1024*1024)
|
61
|
+
BIN_OFFSETS_EXTENDED = [4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0]
|
62
|
+
BIN_OFFSETS = [512+64+8+1, 64+8+1, 8+1, 1, 0]
|
63
|
+
BIN_OFFSET_OLD_TO_EXTENDED = 4681
|
64
|
+
# How much to shift to get to finest bin.
|
65
|
+
BIN_FIRST_SHIFT = 17
|
66
|
+
# How much to shift to get to next larger bin.
|
67
|
+
BIN_NEXT_SHIFT = 3
|
68
|
+
|
69
|
+
# Return a Integer of a BIN which is the smallest/finest bin
|
70
|
+
# containing whole the interval/range.
|
71
|
+
#
|
72
|
+
# Extended bin index for positions >= 512M is not supported yet
|
73
|
+
# Do you need it? Please email me.
|
74
|
+
def self.bin_from_range(bin_start, bin_end)
|
75
|
+
if bin_end <= BINRANGE_MAXEND_512M
|
76
|
+
bin_from_range_standard(bin_start, bin_end)
|
77
|
+
else
|
78
|
+
bin_from_range_extended(bin_start, bin_end)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
class << self; alias bin bin_from_range; end
|
83
|
+
|
84
|
+
# Return an Array of BINs which are all bins containing whole the
|
85
|
+
# interval/range. Thus, it always contains "0" indicating a bin
|
86
|
+
# containing whole of a chromosome.
|
87
|
+
#
|
88
|
+
# extended bin index for positions >= 512M is not supported yet
|
89
|
+
# Do you need it? Please email me.
|
90
|
+
#
|
91
|
+
def self.bin_all(p_start, p_end)
|
92
|
+
if p_end <= BINRANGE_MAXEND_512M
|
93
|
+
bin_all_standard(p_start, p_end)
|
94
|
+
else
|
95
|
+
bin_all_extended(p_start, p_end)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
def self.bin_from_range_standard(bin_start, bin_end)
|
102
|
+
# Given start,end in chromosome coordinates assign it
|
103
|
+
# a bin. There's a bin for each 128k segment, for each
|
104
|
+
# 1M segment, for each 8M segment, for each 64M segment,
|
105
|
+
# and for each chromosome (which is assumed to be less than
|
106
|
+
# 512M.) A range goes into the smallest bin it will fit in.
|
107
|
+
|
108
|
+
bin_start >>= BIN_FIRST_SHIFT
|
109
|
+
bin_end -= 1
|
110
|
+
bin_end >>= BIN_FIRST_SHIFT
|
111
|
+
|
112
|
+
BIN_OFFSETS.each do |offset|
|
113
|
+
return offset + bin_start if bin_start == bin_end
|
114
|
+
bin_start >>= BIN_NEXT_SHIFT
|
115
|
+
bin_end >>= BIN_NEXT_SHIFT
|
116
|
+
end
|
117
|
+
raise RangeError, \
|
118
|
+
"start #{bin_start}, end #{bin_end} out of range in findBin (max is 512M)"
|
119
|
+
end
|
120
|
+
|
121
|
+
def self.bin_from_range_extended(bin_start, bin_end)
|
122
|
+
raise NotImplementedError, "Extended bins are not supported yet"
|
123
|
+
end
|
124
|
+
|
125
|
+
def self.bin_all_standard(bin_start, bin_end)
|
126
|
+
bin_start_orig = bin_start
|
127
|
+
bin_end_orig = bin_end
|
128
|
+
results = Array.new
|
129
|
+
|
130
|
+
bin_start >>= BIN_FIRST_SHIFT
|
131
|
+
bin_end -= 1
|
132
|
+
bin_end >>= BIN_FIRST_SHIFT
|
133
|
+
|
134
|
+
BIN_OFFSETS.each do |offset|
|
135
|
+
results.concat(((offset + bin_start)..(offset + bin_end)).to_a)
|
136
|
+
bin_start >>= BIN_NEXT_SHIFT
|
137
|
+
bin_end >>= BIN_NEXT_SHIFT
|
138
|
+
end
|
139
|
+
return results
|
140
|
+
end
|
141
|
+
|
142
|
+
def self.bin_all_extended(bin_start, bin_end)
|
143
|
+
raise NotImplementedError, "Extended bins are not supported yet"
|
144
|
+
end
|
145
|
+
|
146
|
+
end # class UcscBin
|
147
|
+
end # module Ucsc
|
148
|
+
end # module Bio
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/local/bin/ruby-1.9
|
2
|
+
#
|
3
|
+
# Copyright::
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki
|
5
|
+
# <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
6
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
|
+
#
|
8
|
+
|
9
|
+
require File.dirname(__FILE__) + '/../lib/bio-ucsc'
|
10
|
+
|
11
|
+
include Bio::Ucsc
|
12
|
+
|
13
|
+
Hg19::DBConnection.default
|
14
|
+
Hg19::DBConnection.connect
|
15
|
+
|
16
|
+
itvs_a =
|
17
|
+
[Bio::GenomicInterval.parse("chr1:1-200,000"),
|
18
|
+
Bio::GenomicInterval.parse("chr2:1-200,000"),
|
19
|
+
Bio::GenomicInterval.parse("chr3:1-300,000"),
|
20
|
+
]
|
21
|
+
|
22
|
+
puts
|
23
|
+
puts "Queries in Slice objects using 1-based [start,end] closed intervals"
|
24
|
+
puts "Results in 0-based [start,end) half-open intervals"
|
25
|
+
puts
|
26
|
+
|
27
|
+
::puts "test 1 (hg19/RefGene) --- Bio::Ucsc::Hg19::RefGene.find_by_interval"
|
28
|
+
results = itvs_a.map{|i|Hg19::RefGene.find_by_interval(i)}
|
29
|
+
puts "0-based interval\t1-based interval\tGene Symbol"
|
30
|
+
results.flatten.each do |e|
|
31
|
+
i = Bio::GenomicInterval.zero_based(e.chrom, e.txStart, e.txEnd)
|
32
|
+
print "#{e.chrom}:#{e.txStart}-#{e.txEnd}\t"
|
33
|
+
print "#{i.chrom}:#{i.chr_start}-#{i.chr_end}\t#{e.name2}\n"
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
#
|
38
|
+
|
39
|
+
itvs_b =
|
40
|
+
[Bio::GenomicInterval.parse("chr1:1-11,000"),
|
41
|
+
Bio::GenomicInterval.parse("chr2:1-11,000"),
|
42
|
+
Bio::GenomicInterval.parse("chr3:1-12,000"),
|
43
|
+
]
|
44
|
+
|
45
|
+
puts
|
46
|
+
puts "test 2 (hg19/Snp131) --- Bio::Ucsc::Hg19::Snp131.find_by_interval"
|
47
|
+
puts "0-based interval\t1-based interval\tdbSNP rs ID\tClass"
|
48
|
+
results = itvs_b.map{|i|Hg19::Snp131.find_by_interval(i)}
|
49
|
+
results.flatten.each do |e|
|
50
|
+
i = Bio::GenomicInterval.zero_based(e.chrom, e.chromStart, e.chromEnd)
|
51
|
+
print "#{e.chrom}:#{e.chromStart}-#{e.chromEnd}\t"
|
52
|
+
print "#{i.chrom}:#{i.chr_start}-#{i.chr_end}\t#{e.name}\t#{e[:class]}\n"
|
53
|
+
end
|
54
|
+
|
55
|
+
#
|
56
|
+
#
|
57
|
+
|
58
|
+
names = %w(rs56289060 rs62636508 rs28888107)
|
59
|
+
|
60
|
+
puts
|
61
|
+
puts "test 3 (hg19/Snp131) ---Bio::Ucsc::Hg19::Snp131.find_by_name"
|
62
|
+
names.each do |n|
|
63
|
+
r = Hg19::Snp131.find_by_name(n)
|
64
|
+
i = Bio::GenomicInterval.zero_based(r.chrom, r.chromStart, r.chromEnd)
|
65
|
+
puts "Query: #{n}\t#{i.chrom}\t#{i.chr_start}\t#{i.chr_end}\t#{r[:class]}"
|
66
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'bio-ucsc'
|
2
|
+
describe "Bio::Ucsc::Hg18::CnpIafrate2" do
|
3
|
+
|
4
|
+
describe "#find_by_inetrval" do
|
5
|
+
context "given range chr1:1-2,000,000" do
|
6
|
+
it "returens an array of results" do
|
7
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
8
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
9
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
10
|
+
Bio::Ucsc::Hg18::CnpIafrate2.find_by_interval(i).should have(2).items
|
11
|
+
end
|
12
|
+
|
13
|
+
it "returens an array of results with column accessors" do
|
14
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
15
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
16
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
17
|
+
r = Bio::Ucsc::Hg18::CnpIafrate2.find_by_interval(i)
|
18
|
+
r[0].chrom.should == "chr1"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'bio-ucsc'
|
2
|
+
describe "Bio::Ucsc::Hg18::CnpLocke" do
|
3
|
+
|
4
|
+
describe "#find_by_interval" do
|
5
|
+
context "given range chr1:1-2,000,000" do
|
6
|
+
it "returens an array of results" do
|
7
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
8
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
9
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
10
|
+
Bio::Ucsc::Hg18::CnpLocke.find_by_interval(i).should have(2).items
|
11
|
+
end
|
12
|
+
|
13
|
+
it "returens an array of results with column accessors" do
|
14
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
15
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
16
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
17
|
+
r = Bio::Ucsc::Hg18::CnpLocke.find_by_interval(i)
|
18
|
+
r[0].chrom.should == "chr1"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'bio-ucsc'
|
2
|
+
describe "Bio::Ucsc::Hg18::CnpRedon" do
|
3
|
+
|
4
|
+
describe "#find_by_interval" do
|
5
|
+
context "given range chr1:1-2,000,000" do
|
6
|
+
it "returns an array of results" do
|
7
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
8
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
9
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
10
|
+
Bio::Ucsc::Hg18::CnpRedon.find_by_interval(i).should have(3).items
|
11
|
+
end
|
12
|
+
|
13
|
+
it "returns an array of results with column accessors" do
|
14
|
+
Bio::Ucsc::Hg18::DBConnection.default
|
15
|
+
Bio::Ucsc::Hg18::DBConnection.connect
|
16
|
+
i = Bio::GenomicInterval.parse("chr1:1-2,000,000")
|
17
|
+
r = Bio::Ucsc::Hg18::CnpRedon.find_by_interval(i)
|
18
|
+
r[0].chrom.should == "chr1"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|