bio-ucsc-api 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +2 -0
- data/COPYING +58 -0
- data/COPYING.ja +51 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +49 -0
- data/README.rdoc +57 -0
- data/Rakefile +72 -0
- data/VERSION +1 -0
- data/bio-ucsc-api.gemspec +212 -0
- data/lib/bio-ucsc.rb +20 -0
- data/lib/bio-ucsc/hg18.rb +27 -0
- data/lib/bio-ucsc/hg18/activerecord.rb +166 -0
- data/lib/bio-ucsc/hg18/cnpiafrate2.rb +31 -0
- data/lib/bio-ucsc/hg18/cnplocke.rb +31 -0
- data/lib/bio-ucsc/hg18/cnpredon.rb +32 -0
- data/lib/bio-ucsc/hg18/cnpsebat2.rb +35 -0
- data/lib/bio-ucsc/hg18/cnpsharp2.rb +32 -0
- data/lib/bio-ucsc/hg18/db_connection.rb +61 -0
- data/lib/bio-ucsc/hg18/dgv.rb +27 -0
- data/lib/bio-ucsc/hg18/refgene.rb +27 -0
- data/lib/bio-ucsc/hg18/rmsk.rb +51 -0
- data/lib/bio-ucsc/hg18/tables.rb +142 -0
- data/lib/bio-ucsc/hg19.rb +54 -0
- data/lib/bio-ucsc/hg19/activerecord.rb +217 -0
- data/lib/bio-ucsc/hg19/ccdsgene.rb +33 -0
- data/lib/bio-ucsc/hg19/cytoband.rb +33 -0
- data/lib/bio-ucsc/hg19/db_connection.rb +61 -0
- data/lib/bio-ucsc/hg19/dgv.rb +27 -0
- data/lib/bio-ucsc/hg19/ensgene.rb +21 -0
- data/lib/bio-ucsc/hg19/gwascatalog.rb +26 -0
- data/lib/bio-ucsc/hg19/hapmapalleleschimp.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapallelesmacaque.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsasw.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsceu.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpschb.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpschd.rb +33 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsgih.rb +34 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsjpt.rb +34 -0
- data/lib/bio-ucsc/hg19/hapmapsnpslwk.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsmex.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsmkk.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpstsi.rb +35 -0
- data/lib/bio-ucsc/hg19/hapmapsnpsyri.rb +35 -0
- data/lib/bio-ucsc/hg19/knowngene.rb +34 -0
- data/lib/bio-ucsc/hg19/omimgene.rb +25 -0
- data/lib/bio-ucsc/hg19/phastconselements46wayprimates.rb +27 -0
- data/lib/bio-ucsc/hg19/phylop46wayprimates.rb +27 -0
- data/lib/bio-ucsc/hg19/refgene.rb +27 -0
- data/lib/bio-ucsc/hg19/rmsk.rb +32 -0
- data/lib/bio-ucsc/hg19/snp131.rb +30 -0
- data/lib/bio-ucsc/hg19/snp132.rb +31 -0
- data/lib/bio-ucsc/hg19/snp132codingdbsnp.rb +29 -0
- data/lib/bio-ucsc/hg19/snp132common.rb +32 -0
- data/lib/bio-ucsc/hg19/snp132flagged.rb +31 -0
- data/lib/bio-ucsc/hg19/snp132mult.rb +32 -0
- data/lib/bio-ucsc/hg19/wgrna.rb +29 -0
- data/lib/bio-ucsc/ucsc_bin.rb +148 -0
- data/samples/hg19-sample.rb +66 -0
- data/spec/hg18/cnpiafrate2_spec.rb +23 -0
- data/spec/hg18/cnplocke_spec.rb +23 -0
- data/spec/hg18/cnpredon_spec.rb +23 -0
- data/spec/hg18/cnpsebat2_spec.rb +23 -0
- data/spec/hg18/cnpsharp2_spec.rb +23 -0
- data/spec/hg18/db_connection_spec.rb +36 -0
- data/spec/hg18/dgv_spec.rb +23 -0
- data/spec/hg18/refgene_spec.rb +23 -0
- data/spec/hg18/rmsk_spec.rb +33 -0
- data/spec/hg19/ccdsgene_spec.rb +23 -0
- data/spec/hg19/cytoband_spec.rb +23 -0
- data/spec/hg19/db_connection_spec.rb +37 -0
- data/spec/hg19/dgv_spec.rb +23 -0
- data/spec/hg19/ensgene_spec.rb +23 -0
- data/spec/hg19/gwascatalog_spec.rb +23 -0
- data/spec/hg19/hapmapalleleschimp_spec.rb +23 -0
- data/spec/hg19/hapmapallelesmacaque_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsasw_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsceu_spec.rb +23 -0
- data/spec/hg19/hapmapsnpschb_spec.rb +23 -0
- data/spec/hg19/hapmapsnpschd_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsgih_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsjpt_spec.rb +23 -0
- data/spec/hg19/hapmapsnpslwk_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsmex_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsmkk_spec.rb +23 -0
- data/spec/hg19/hapmapsnpstsi_spec.rb +23 -0
- data/spec/hg19/hapmapsnpsyri_spec.rb +23 -0
- data/spec/hg19/knowngene_spec.rb +23 -0
- data/spec/hg19/omimGene_spec.rb +23 -0
- data/spec/hg19/phastconselements46wayprimates_spec.rb +23 -0
- data/spec/hg19/phyloP46wayPrimates_spec.rb +23 -0
- data/spec/hg19/refgene_spec.rb +23 -0
- data/spec/hg19/rmsk_spec.rb +23 -0
- data/spec/hg19/snp132Flagged_spec.rb +24 -0
- data/spec/hg19/snp132_spec.rb +23 -0
- data/spec/hg19/snp132codingdbsnp_spec.rb +23 -0
- data/spec/hg19/snp132common_spec.rb +24 -0
- data/spec/hg19/snp132mult_spec.rb +23 -0
- data/spec/hg19/wgrna_spec.rb +23 -0
- data/spec/spec_helper.rb +12 -0
- metadata +368 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = hg18/dgv.rb
|
|
3
|
+
# Copyright::
|
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
|
5
|
+
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
|
6
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
7
|
+
#
|
|
8
|
+
# = Table desfription in UCSC Table Browser
|
|
9
|
+
# This track displays copy number variants (CNVs),
|
|
10
|
+
# insertions/deletions (InDels), inversions and inversion breakpoints
|
|
11
|
+
# annotated by the Database of Genomic Variants (DGV), which contains
|
|
12
|
+
# genomic variations observed in healthy individuals. DGV focuses on
|
|
13
|
+
# structural variation, defined as genomic alterations that involve
|
|
14
|
+
# segments of DNA that are larger than 1000 bp. Insertions/deletions
|
|
15
|
+
# of 100 bp or larger are also included.
|
|
16
|
+
#
|
|
17
|
+
module Bio
|
|
18
|
+
module Ucsc
|
|
19
|
+
module Hg18
|
|
20
|
+
class Dgv < DBConnection
|
|
21
|
+
extend Bio::Ucsc::Hg18::QueryUsingChromBin
|
|
22
|
+
set_table_name 'dgv'
|
|
23
|
+
set_primary_key nil
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# = hg18/refgene.rb
|
|
3
|
+
# Copyright::
|
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
|
5
|
+
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
|
6
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
7
|
+
#
|
|
8
|
+
# = Table desfription in UCSC Table Browser
|
|
9
|
+
# The RefSeq Genes track shows known human protein-coding and
|
|
10
|
+
# non-protein-coding genes taken from the NCBI RNA reference sequences
|
|
11
|
+
# collection (RefSeq). The data underlying this track are updated
|
|
12
|
+
# daily.
|
|
13
|
+
#
|
|
14
|
+
# = ommitted dynamic method(s) due to the method name collision
|
|
15
|
+
# none
|
|
16
|
+
|
|
17
|
+
module Bio
|
|
18
|
+
module Ucsc
|
|
19
|
+
module Hg18
|
|
20
|
+
class RefGene < DBConnection
|
|
21
|
+
extend Bio::Ucsc::Hg18::QueryUsingTxBin
|
|
22
|
+
set_table_name 'refGene'
|
|
23
|
+
set_primary_key nil
|
|
24
|
+
end # class RefGene
|
|
25
|
+
end # module Hg18
|
|
26
|
+
end # module Ucsc
|
|
27
|
+
end # Bio
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# = hg18/rmsk.rb
|
|
3
|
+
# Copyright::
|
|
4
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
|
5
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
6
|
+
#
|
|
7
|
+
# = Table description in UCSC Table Browser
|
|
8
|
+
# This track was created by using Arian Smit's RepeatMasker program,
|
|
9
|
+
# which screens DNA sequences for interspersed repeats and low
|
|
10
|
+
# complexity DNA sequences. The program outputs a detailed annotation
|
|
11
|
+
# of the repeats that are present in the query sequence (represented
|
|
12
|
+
# by this track), as well as a modified version of the query sequence
|
|
13
|
+
# in which all the annotated repeats have been masked (generally
|
|
14
|
+
# available on the Downloads page). RepeatMasker uses the RepBase
|
|
15
|
+
# library of repeats from the Genetic Information Research Institute
|
|
16
|
+
# (GIRI). RepBase is described in Jurka, J. (2000) in the References
|
|
17
|
+
# section below.
|
|
18
|
+
#
|
|
19
|
+
# = ommitted dynamic method(s) due to the method name collision
|
|
20
|
+
# none
|
|
21
|
+
#
|
|
22
|
+
# = Note
|
|
23
|
+
# In the hg18 database, the Rmsk table is actually separated
|
|
24
|
+
# into "chr1_rmsk", "chr2_rmsk", etc. The Rmsk class dynamically
|
|
25
|
+
# define Rmsk::Chr1_Rmsk, Rmsk::Chr2_Rmsk, etc. The
|
|
26
|
+
# Rmsk.find_by_interval calls an appropreate class automatically.
|
|
27
|
+
|
|
28
|
+
module Bio
|
|
29
|
+
module Ucsc
|
|
30
|
+
module Hg18
|
|
31
|
+
class Rmsk
|
|
32
|
+
%w(
|
|
33
|
+
ChrM Chr1 Chr2 Chr3 Chr4 Chr5 Chr6 Chr7 Chr8 Chr9
|
|
34
|
+
Chr10 Chr11 Chr12 Chr13 Chr14 Chr15 Chr16 Chr17 Chr18 Chr19
|
|
35
|
+
Chr20 Chr21 Chr22 ChrX ChrY).each do |chr|
|
|
36
|
+
klass = Class.new(DBConnection) do
|
|
37
|
+
extend Bio::Ucsc::Hg18::QueryUsingGenoBin
|
|
38
|
+
set_table_name "#{chr.downcase}_rmsk"
|
|
39
|
+
set_primary_key nil
|
|
40
|
+
end
|
|
41
|
+
self.const_set("#{chr}_Rmsk", klass)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.find_by_interval(interval)
|
|
45
|
+
chr_klass = self.const_get("#{interval.chrom.capitalize}_Rmsk")
|
|
46
|
+
chr_klass.__send__(:find_by_interval, interval)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
=begin
|
|
2
|
+
# = DESCRIPTION
|
|
3
|
+
# From Structural Variants description page when clicking the "Describe
|
|
4
|
+
# table schema" in the table browser:
|
|
5
|
+
# "Paired-end sequences from a human fosmid DNA library were mapped to the
|
|
6
|
+
# assembly. The average resolution of this technique was ~8kb, and included
|
|
7
|
+
# 56 sites of inversion not detectable by the array-based approaches.
|
|
8
|
+
# However, because of the physical constraints of fosmid insert size, this
|
|
9
|
+
# technique was unable to detect insertions greater than 40 kb in size."
|
|
10
|
+
class CnpTuzun < DBConnection
|
|
11
|
+
include Ucsc::Hg19::Feature
|
|
12
|
+
|
|
13
|
+
set_table_name 'cnpTuzun'
|
|
14
|
+
set_primary_key nil
|
|
15
|
+
|
|
16
|
+
def self.find_by_slice(slice)
|
|
17
|
+
start = slice.range.begin
|
|
18
|
+
stop = slice.range.end
|
|
19
|
+
CnpTuzun.find_by_sql('SELECT * FROM cnpTuzun' + overlap_sql(slice, start, stop))
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# = DESCRIPTION
|
|
25
|
+
# From Simple Repeats description page when clicking the "Describe
|
|
26
|
+
# table schema" in the table browser:
|
|
27
|
+
# "This track displays simple tandem repeats (possibly imperfect) located
|
|
28
|
+
# by Tandem Repeats Finder (TRF), which is specialized for this purpose.
|
|
29
|
+
# These repeats can occur within coding regions of genes and may be quite
|
|
30
|
+
# polymorphic. Repeat expansions are sometimes associated with specific
|
|
31
|
+
# diseases."
|
|
32
|
+
class SimpleRepeat < DBConnection
|
|
33
|
+
include Ucsc::Hg19::Feature
|
|
34
|
+
|
|
35
|
+
set_table_name 'simpleRepeat'
|
|
36
|
+
set_primary_key nil
|
|
37
|
+
|
|
38
|
+
def self.find_by_slice(slice)
|
|
39
|
+
start = slice.range.begin
|
|
40
|
+
stop = slice.range.end
|
|
41
|
+
SimpleRepeat.find_by_sql('SELECT * FROM simpleRepeat' + overlap_sql(slice, start, stop))
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# = DESCRIPTION
|
|
46
|
+
# From Structural Variants description page when clicking the "Describe
|
|
47
|
+
# table schema" in the table browser:
|
|
48
|
+
# "This track shows regions detected as putative genomic duplications
|
|
49
|
+
# within the golden path. The following display conventions are used to
|
|
50
|
+
# distinguish levels of similarity:
|
|
51
|
+
# * Light to dark gray: 90 - 98% similarity
|
|
52
|
+
# * Light to dark yellow: 98 - 99% similarity
|
|
53
|
+
# * Light to dark orange: greater than 99% similarity
|
|
54
|
+
# * Red: duplications of greater than 98% similarity that lack sufficient
|
|
55
|
+
# Segmental Duplication Database evidence (most likely missed overlaps)
|
|
56
|
+
# For a region to be included in the track, at least 1 Kb of the total
|
|
57
|
+
# sequence (containing at least 500 bp of non-RepeatMasked sequence) had
|
|
58
|
+
# to align and a sequence identity of at least 90% was required."
|
|
59
|
+
class GenomicSuperDup < DBConnection
|
|
60
|
+
include Ucsc::Hg19::Feature
|
|
61
|
+
|
|
62
|
+
set_table_name 'genomicSuperDups'
|
|
63
|
+
set_primary_key nil
|
|
64
|
+
|
|
65
|
+
def self.find_by_slice(slice)
|
|
66
|
+
start = slice.range.begin
|
|
67
|
+
stop = slice.range.end
|
|
68
|
+
return GenomicSuperDup.find_by_sql('SELECT * FROM genomicSuperDups' + overlap_sql(slice, start, stop))
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# = DESCRIPTION
|
|
73
|
+
# From Exapted Repeat description page when clicking the "Describe
|
|
74
|
+
# table schema" in the table browser:
|
|
75
|
+
# "This track displays conserved non-exonic elements that have been
|
|
76
|
+
# deposited by mobile elements (repeats), a process termed "exaptation"
|
|
77
|
+
# (Gould et al., 1982). These regions were identified during a genome-wide
|
|
78
|
+
# survey (Lowe et al., 2007) with the expectation that regions of this type
|
|
79
|
+
# may act as distal transcriptional regulators for nearby genes. A previous
|
|
80
|
+
# case study experimentally verified an exapted mobile element acting as a
|
|
81
|
+
# distal enhancer (Bejerano et al. , 2006)."
|
|
82
|
+
class ExaptedRepeat < DBConnection
|
|
83
|
+
include Ucsc::Hg19::Feature
|
|
84
|
+
|
|
85
|
+
set_table_name 'exaptedRepeats'
|
|
86
|
+
set_primary_key nil
|
|
87
|
+
|
|
88
|
+
def self.find_by_slice(slice)
|
|
89
|
+
start = slice.range.begin
|
|
90
|
+
stop = slice.range.end
|
|
91
|
+
return ExaptedRepeat.find_by_sql('SELECT * FROM exaptedRepeats' + overlap_sql(slice, start, stop))
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# = DESCRIPTION
|
|
96
|
+
# From Interrupted Repeat description page when clicking the "Describe
|
|
97
|
+
# table schema" in the table browser:
|
|
98
|
+
# "This track shows joined fragments of interrupted repeats extracted from
|
|
99
|
+
# the output of the RepeatMasker program, which screens DNA sequences for
|
|
100
|
+
# interspersed repeats and low complexity DNA sequences using the RepBase
|
|
101
|
+
# library of repeats from the Genetic Information Research Institute (GIRI).
|
|
102
|
+
# RepBase is described in Jurka, J. (2000) in the References section below.
|
|
103
|
+
#
|
|
104
|
+
# The detailed annotations from RepeatMasker are in the RepeatMasker track.
|
|
105
|
+
# This track shows fragments of original repeat insertions which have been
|
|
106
|
+
# interrupted by insertions of younger repeats or through local
|
|
107
|
+
# rearrangements. The fragments are joined using the ID column of
|
|
108
|
+
# RepeatMasker output."
|
|
109
|
+
class InterruptedRepeat < DBConnection
|
|
110
|
+
include Ucsc::Hg19::Feature
|
|
111
|
+
|
|
112
|
+
set_table_name 'nestedRepeats'
|
|
113
|
+
set_primary_key nil
|
|
114
|
+
|
|
115
|
+
def self.find_by_slice(slice)
|
|
116
|
+
start = slice.range.begin
|
|
117
|
+
stop = slice.range.end
|
|
118
|
+
return InterruptedRepeat.find_by_sql('SELECT * FROM nestedRepeats' + overlap_sql(slice, start, stop))
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# = DESCRIPTION
|
|
123
|
+
# From Microsatellite description page when clicking the "Describe
|
|
124
|
+
# table schema" in the table browser:
|
|
125
|
+
# "This track displays regions that are likely to be useful as
|
|
126
|
+
# microsatellite markers. These are sequences of at least 15 perfect
|
|
127
|
+
# di-nucleotide and tri-nucleotide repeats, and tend to be highly
|
|
128
|
+
# polymorphic in the population."
|
|
129
|
+
class Microsatellite < DBConnection
|
|
130
|
+
include Ucsc::Hg19::Feature
|
|
131
|
+
|
|
132
|
+
set_table_name 'microsat'
|
|
133
|
+
set_primary_key nil
|
|
134
|
+
|
|
135
|
+
def self.find_by_slice(slice)
|
|
136
|
+
start = slice.range.begin
|
|
137
|
+
stop = slice.range.end
|
|
138
|
+
return Microsatellite.find_by_sql('SELECT * FROM microsat' + overlap_sql(slice, start, stop))
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
=end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = hg19.rb
|
|
3
|
+
# Copyright:: Cioyrught (C) 2011
|
|
4
|
+
# MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
|
5
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
6
|
+
|
|
7
|
+
base = File.dirname(__FILE__)
|
|
8
|
+
require "#{base}/hg19/activerecord"
|
|
9
|
+
require "#{base}/hg19/db_connection"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
module Bio
|
|
13
|
+
module Ucsc
|
|
14
|
+
module Hg19
|
|
15
|
+
base = File.dirname(__FILE__)
|
|
16
|
+
autoload :Dgv, "#{base}/hg19/dgv"
|
|
17
|
+
|
|
18
|
+
autoload :Snp131, "#{base}/hg19/snp131"
|
|
19
|
+
|
|
20
|
+
autoload :Snp132, "#{base}/hg19/snp132"
|
|
21
|
+
autoload :Snp132Common, "#{base}/hg19/snp132common"
|
|
22
|
+
autoload :Snp132Flagged, "#{base}/hg19/snp132flagged"
|
|
23
|
+
autoload :Snp132Mult, "#{base}/hg19/snp132mult"
|
|
24
|
+
autoload :Snp132CodingDbSnp, "#{base}/hg19/snp132codingdbsnp"
|
|
25
|
+
|
|
26
|
+
autoload :KnownGene, "#{base}/hg19/knowngene"
|
|
27
|
+
autoload :RefGene, "#{base}/hg19/refgene"
|
|
28
|
+
autoload :GwasCatalog, "#{base}/hg19/gwascatalog"
|
|
29
|
+
autoload :CytoBand, "#{base}/hg19/cytoband"
|
|
30
|
+
autoload :OmimGene, "#{base}/hg19/omimgene"
|
|
31
|
+
autoload :WgRna, "#{base}/hg19/wgrna"
|
|
32
|
+
autoload :EnsGene, "#{base}/hg19/ensgene"
|
|
33
|
+
|
|
34
|
+
autoload :HapMapSnpsASW, "#{base}/hg19/hapmapsnpsasw"
|
|
35
|
+
autoload :HapMapSnpsCEU, "#{base}/hg19/hapmapsnpsceu"
|
|
36
|
+
autoload :HapMapSnpsCHB, "#{base}/hg19/hapmapsnpschb"
|
|
37
|
+
autoload :HapMapSnpsCHD, "#{base}/hg19/hapmapsnpschd"
|
|
38
|
+
autoload :HapMapSnpsGIH, "#{base}/hg19/hapmapsnpsgih"
|
|
39
|
+
autoload :HapMapSnpsJPT, "#{base}/hg19/hapmapsnpsjpt"
|
|
40
|
+
autoload :HapMapSnpsLWK, "#{base}/hg19/hapmapsnpslwk"
|
|
41
|
+
autoload :HapMapSnpsMEX, "#{base}/hg19/hapmapsnpsmex"
|
|
42
|
+
autoload :HapMapSnpsMKK, "#{base}/hg19/hapmapsnpsmkk"
|
|
43
|
+
autoload :HapMapSnpsTSI, "#{base}/hg19/hapmapsnpstsi"
|
|
44
|
+
autoload :HapMapSnpsYRI, "#{base}/hg19/hapmapsnpsyri"
|
|
45
|
+
autoload :HapMapAllelesChimp, "#{base}/hg19/hapmapalleleschimp"
|
|
46
|
+
autoload :HapMapAllelesMacaque, "#{base}/hg19/hapmapallelesmacaque"
|
|
47
|
+
|
|
48
|
+
autoload :Rmsk, "#{base}/hg19/rmsk"
|
|
49
|
+
|
|
50
|
+
autoload :PhyloP46wayPrimates, "#{base}/hg19/phylop46wayprimates"
|
|
51
|
+
autoload :PhastConsElements46wayPrimates, "#{base}/hg19/phastconselements46wayprimates"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = ucsc/hg19/activerecord.rb - ActiveRecord mappings to UCSC hg19 database
|
|
3
|
+
#
|
|
4
|
+
# Copyright::
|
|
5
|
+
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
|
6
|
+
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
|
7
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
|
8
|
+
#
|
|
9
|
+
# = DESCRIPTION
|
|
10
|
+
# == What is it?
|
|
11
|
+
# The UCSC module provides an API to the UCSC databases
|
|
12
|
+
# stored at genome-mysql.cse.ucsc.edu. This is the same information that is
|
|
13
|
+
# available from http://genome.ucsc.edu
|
|
14
|
+
#
|
|
15
|
+
# The Ucsc::Hg19 module covers the hg19 (= GRCh37) assembly.
|
|
16
|
+
#
|
|
17
|
+
# == ActiveRecord
|
|
18
|
+
# The UCSC API provides a ruby interface to the UCSC mysql databases
|
|
19
|
+
# at genome-mysql.cse.ucsc.edu. Most of the API is based on ActiveRecord to
|
|
20
|
+
# get data from that database. In general, each table is described by a
|
|
21
|
+
# class with the same name: the cnpRedon table is covered by the
|
|
22
|
+
# CnpRedon class, the dgv table is covered by the Dgv class,
|
|
23
|
+
# etc. As a result, accessors are available for all columns in each table.
|
|
24
|
+
# For example, the cnpRedon table has the following columns: chrom, chromStart,
|
|
25
|
+
# chromEnd and name. Through ActiveRecord, these column names become available
|
|
26
|
+
# as attributes of CnpRedon objects:
|
|
27
|
+
# puts my_cnp_redon.name
|
|
28
|
+
# puts my_cnp_redon.chrom
|
|
29
|
+
# puts my_cnp_redon.chromStart
|
|
30
|
+
# puts my_cnp_redon.chromEnd
|
|
31
|
+
#
|
|
32
|
+
# ActiveRecord makes it easy to extract data from those tables using the
|
|
33
|
+
# collection of #find methods. There are three types of #find methods (e.g.
|
|
34
|
+
# for the CnpRedon class):
|
|
35
|
+
# a. find based on primary key in table:
|
|
36
|
+
# # not possible with the UCSC database
|
|
37
|
+
# b. find_by_sql:
|
|
38
|
+
# my_cnp = CnpRedon.find_by_sql('SELECT * FROM cnpRedon WHERE name = 'cnp1'")
|
|
39
|
+
# c. find_by_<insert_your_column_name_here>
|
|
40
|
+
# my_cnp = CnpRedon.find_by_name('cnp1')
|
|
41
|
+
# my_cnp2 = CnpRedon.find_by_chrom_and_chromStart('chr1',377)
|
|
42
|
+
# To find out which find_by_<column> methods are available, you can list the
|
|
43
|
+
# column names using the column_names class methods:
|
|
44
|
+
#
|
|
45
|
+
# puts Ucsc::Hg19::CnpRedon.column_names.join("\t")
|
|
46
|
+
#
|
|
47
|
+
# For more information on the find methods, see
|
|
48
|
+
# http://ar.rubyonrails.org/classes/ActiveRecord/Base.html#M000344
|
|
49
|
+
#
|
|
50
|
+
|
|
51
|
+
module Bio
|
|
52
|
+
module Ucsc
|
|
53
|
+
|
|
54
|
+
# = DESCRIPTION
|
|
55
|
+
# The Bin::Ucsc::Hg19 module covers the hg19 database from
|
|
56
|
+
# genome-mysql.cse.ucsc.edu and covers mainly sequences and their annotations.
|
|
57
|
+
# For a more information about the database tables, click on the "Describe
|
|
58
|
+
# table schema" in the Table Browser.
|
|
59
|
+
module Hg19
|
|
60
|
+
|
|
61
|
+
# interval: chromStart, chromEnd
|
|
62
|
+
# bin index is enabled
|
|
63
|
+
module QueryUsingChromBin
|
|
64
|
+
def find_by_interval(interval)
|
|
65
|
+
zstart = interval.zero_start
|
|
66
|
+
zend = interval.zero_end
|
|
67
|
+
where = <<-SQL
|
|
68
|
+
chrom = :chrom
|
|
69
|
+
AND bin in (:bins)
|
|
70
|
+
AND ((chromStart BETWEEN :zstart AND :zend)
|
|
71
|
+
OR (chromEnd BETWEEN :zstart AND :zend)
|
|
72
|
+
OR (chromStart <= :zstart AND chromEnd >= :zend))
|
|
73
|
+
SQL
|
|
74
|
+
cond = {
|
|
75
|
+
:chrom => interval.chrom,
|
|
76
|
+
:bins => Ucsc::UcscBin.bin_all(zstart, zend),
|
|
77
|
+
:zstart => zstart,
|
|
78
|
+
:zend => zend,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
self.find(:all,
|
|
82
|
+
:select => "*",
|
|
83
|
+
:conditions => [where, cond],
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
end # module QueryUsingChromBin
|
|
87
|
+
|
|
88
|
+
# interval: chromStart, chromEnd
|
|
89
|
+
# bin index is disabled
|
|
90
|
+
module QueryUsingChrom
|
|
91
|
+
def find_by_interval(interval)
|
|
92
|
+
zstart = interval.zero_start
|
|
93
|
+
zend = interval.zero_end
|
|
94
|
+
where = <<-SQL
|
|
95
|
+
chrom = :chrom
|
|
96
|
+
AND ((chromStart BETWEEN :zstart AND :zend)
|
|
97
|
+
OR (chromEnd BETWEEN :zstart AND :zend)
|
|
98
|
+
OR (chromStart <= :zstart AND chromEnd >= :zend))
|
|
99
|
+
SQL
|
|
100
|
+
cond = {
|
|
101
|
+
:chrom => interval.chrom,
|
|
102
|
+
:zstart => zstart,
|
|
103
|
+
:zend => zend,
|
|
104
|
+
}
|
|
105
|
+
self.find(:all,
|
|
106
|
+
:select => "*",
|
|
107
|
+
:conditions => [where, cond],
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
end # module QueryUsingChrom
|
|
111
|
+
|
|
112
|
+
# interval: txStart, txEnd
|
|
113
|
+
# bin index is disabled
|
|
114
|
+
module QueryUsingTx
|
|
115
|
+
def find_by_interval(interval)
|
|
116
|
+
zstart = interval.zero_start
|
|
117
|
+
zend = interval.zero_end
|
|
118
|
+
where = <<-SQL
|
|
119
|
+
chrom = :chrom
|
|
120
|
+
AND ((txStart BETWEEN :zstart AND :zend)
|
|
121
|
+
OR (txEnd BETWEEN :zstart AND :zend)
|
|
122
|
+
OR (txStart <= :zstart AND txEnd >= :zend))
|
|
123
|
+
SQL
|
|
124
|
+
cond = {
|
|
125
|
+
:chrom => interval.chrom,
|
|
126
|
+
:zstart => zstart,
|
|
127
|
+
:zend => zend,
|
|
128
|
+
}
|
|
129
|
+
self.find(:all,
|
|
130
|
+
:select => "*",
|
|
131
|
+
:conditions => [where, cond],
|
|
132
|
+
)
|
|
133
|
+
end
|
|
134
|
+
end # module QueryUsingTx
|
|
135
|
+
|
|
136
|
+
# interval: txStart, txEnd
|
|
137
|
+
# bin index is enabled
|
|
138
|
+
module QueryUsingTxBin
|
|
139
|
+
def find_by_interval(interval)
|
|
140
|
+
zstart = interval.zero_start
|
|
141
|
+
zend = interval.zero_end
|
|
142
|
+
where = <<-SQL
|
|
143
|
+
chrom = :chrom
|
|
144
|
+
AND bin in (:bins)
|
|
145
|
+
AND ((txStart BETWEEN :zstart AND :zend)
|
|
146
|
+
OR (txEnd BETWEEN :zstart AND :zend)
|
|
147
|
+
OR (txStart <= :zstart AND txEnd >= :zend))
|
|
148
|
+
SQL
|
|
149
|
+
cond = {
|
|
150
|
+
:chrom => interval.chrom,
|
|
151
|
+
:bins => Bio::Ucsc::UcscBin.bin_all(zstart, zend),
|
|
152
|
+
:zstart => zstart,
|
|
153
|
+
:zend => zend,
|
|
154
|
+
}
|
|
155
|
+
self.find(:all,
|
|
156
|
+
:select => "*",
|
|
157
|
+
:conditions => [where, cond],
|
|
158
|
+
)
|
|
159
|
+
end
|
|
160
|
+
end # module QueryUsingUsingTxBin
|
|
161
|
+
|
|
162
|
+
# interval: ccdsStart, ccdsEnd
|
|
163
|
+
# bin index is enabled
|
|
164
|
+
module QueryUsingCcdsBin
|
|
165
|
+
def find_by_interval(interval)
|
|
166
|
+
zstart = interval.zero_start
|
|
167
|
+
zend = interval.zero_end
|
|
168
|
+
where = <<-SQL
|
|
169
|
+
chrom = :chrom
|
|
170
|
+
AND bin in (:bins)
|
|
171
|
+
AND ((cdsStart BETWEEN :zstart AND :zend)
|
|
172
|
+
OR (cdsEnd BETWEEN :zstart AND :zend)
|
|
173
|
+
OR (cdsStart <= :zstart AND cdsEnd >= :zend))
|
|
174
|
+
SQL
|
|
175
|
+
cond = {
|
|
176
|
+
:chrom => interval.chrom,
|
|
177
|
+
:bins => Bio::Ucsc::UcscBin.bin_all(zstart, zend),
|
|
178
|
+
:zstart => zstart,
|
|
179
|
+
:zend => zend,
|
|
180
|
+
}
|
|
181
|
+
self.find(:all,
|
|
182
|
+
:select => "*",
|
|
183
|
+
:conditions => [where, cond],
|
|
184
|
+
)
|
|
185
|
+
end
|
|
186
|
+
end # module QueryUsingCcdsBin
|
|
187
|
+
|
|
188
|
+
# interval: genoName, genoStart, genoEnd
|
|
189
|
+
# bin index is enabled
|
|
190
|
+
module QueryUsingGenoBin
|
|
191
|
+
def find_by_interval(interval)
|
|
192
|
+
zstart = interval.zero_start
|
|
193
|
+
zend = interval.zero_end
|
|
194
|
+
where = <<-SQL
|
|
195
|
+
genoName = :chrom
|
|
196
|
+
AND bin in (:bins)
|
|
197
|
+
AND ((genoStart BETWEEN :zstart AND :zend)
|
|
198
|
+
OR (genoEnd BETWEEN :zstart AND :zend)
|
|
199
|
+
OR (genoStart <= :zstart AND genoEnd >= :zend))
|
|
200
|
+
SQL
|
|
201
|
+
cond = {
|
|
202
|
+
:chrom => interval.chrom,
|
|
203
|
+
:bins => Ucsc::UcscBin.bin_all(zstart, zend),
|
|
204
|
+
:zstart => zstart,
|
|
205
|
+
:zend => zend,
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
self.find(:all,
|
|
209
|
+
:select => "*",
|
|
210
|
+
:conditions => [where, cond],
|
|
211
|
+
)
|
|
212
|
+
end
|
|
213
|
+
end # module QueryUsingChromBin
|
|
214
|
+
|
|
215
|
+
end # module Hg19
|
|
216
|
+
end # module Ucsc
|
|
217
|
+
end # module Bio
|