bio-ucsc-api 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. data/.document +5 -0
  2. data/.rspec +2 -0
  3. data/COPYING +58 -0
  4. data/COPYING.ja +51 -0
  5. data/Gemfile +19 -0
  6. data/Gemfile.lock +49 -0
  7. data/README.rdoc +57 -0
  8. data/Rakefile +72 -0
  9. data/VERSION +1 -0
  10. data/bio-ucsc-api.gemspec +212 -0
  11. data/lib/bio-ucsc.rb +20 -0
  12. data/lib/bio-ucsc/hg18.rb +27 -0
  13. data/lib/bio-ucsc/hg18/activerecord.rb +166 -0
  14. data/lib/bio-ucsc/hg18/cnpiafrate2.rb +31 -0
  15. data/lib/bio-ucsc/hg18/cnplocke.rb +31 -0
  16. data/lib/bio-ucsc/hg18/cnpredon.rb +32 -0
  17. data/lib/bio-ucsc/hg18/cnpsebat2.rb +35 -0
  18. data/lib/bio-ucsc/hg18/cnpsharp2.rb +32 -0
  19. data/lib/bio-ucsc/hg18/db_connection.rb +61 -0
  20. data/lib/bio-ucsc/hg18/dgv.rb +27 -0
  21. data/lib/bio-ucsc/hg18/refgene.rb +27 -0
  22. data/lib/bio-ucsc/hg18/rmsk.rb +51 -0
  23. data/lib/bio-ucsc/hg18/tables.rb +142 -0
  24. data/lib/bio-ucsc/hg19.rb +54 -0
  25. data/lib/bio-ucsc/hg19/activerecord.rb +217 -0
  26. data/lib/bio-ucsc/hg19/ccdsgene.rb +33 -0
  27. data/lib/bio-ucsc/hg19/cytoband.rb +33 -0
  28. data/lib/bio-ucsc/hg19/db_connection.rb +61 -0
  29. data/lib/bio-ucsc/hg19/dgv.rb +27 -0
  30. data/lib/bio-ucsc/hg19/ensgene.rb +21 -0
  31. data/lib/bio-ucsc/hg19/gwascatalog.rb +26 -0
  32. data/lib/bio-ucsc/hg19/hapmapalleleschimp.rb +35 -0
  33. data/lib/bio-ucsc/hg19/hapmapallelesmacaque.rb +35 -0
  34. data/lib/bio-ucsc/hg19/hapmapsnpsasw.rb +33 -0
  35. data/lib/bio-ucsc/hg19/hapmapsnpsceu.rb +33 -0
  36. data/lib/bio-ucsc/hg19/hapmapsnpschb.rb +33 -0
  37. data/lib/bio-ucsc/hg19/hapmapsnpschd.rb +33 -0
  38. data/lib/bio-ucsc/hg19/hapmapsnpsgih.rb +34 -0
  39. data/lib/bio-ucsc/hg19/hapmapsnpsjpt.rb +34 -0
  40. data/lib/bio-ucsc/hg19/hapmapsnpslwk.rb +35 -0
  41. data/lib/bio-ucsc/hg19/hapmapsnpsmex.rb +35 -0
  42. data/lib/bio-ucsc/hg19/hapmapsnpsmkk.rb +35 -0
  43. data/lib/bio-ucsc/hg19/hapmapsnpstsi.rb +35 -0
  44. data/lib/bio-ucsc/hg19/hapmapsnpsyri.rb +35 -0
  45. data/lib/bio-ucsc/hg19/knowngene.rb +34 -0
  46. data/lib/bio-ucsc/hg19/omimgene.rb +25 -0
  47. data/lib/bio-ucsc/hg19/phastconselements46wayprimates.rb +27 -0
  48. data/lib/bio-ucsc/hg19/phylop46wayprimates.rb +27 -0
  49. data/lib/bio-ucsc/hg19/refgene.rb +27 -0
  50. data/lib/bio-ucsc/hg19/rmsk.rb +32 -0
  51. data/lib/bio-ucsc/hg19/snp131.rb +30 -0
  52. data/lib/bio-ucsc/hg19/snp132.rb +31 -0
  53. data/lib/bio-ucsc/hg19/snp132codingdbsnp.rb +29 -0
  54. data/lib/bio-ucsc/hg19/snp132common.rb +32 -0
  55. data/lib/bio-ucsc/hg19/snp132flagged.rb +31 -0
  56. data/lib/bio-ucsc/hg19/snp132mult.rb +32 -0
  57. data/lib/bio-ucsc/hg19/wgrna.rb +29 -0
  58. data/lib/bio-ucsc/ucsc_bin.rb +148 -0
  59. data/samples/hg19-sample.rb +66 -0
  60. data/spec/hg18/cnpiafrate2_spec.rb +23 -0
  61. data/spec/hg18/cnplocke_spec.rb +23 -0
  62. data/spec/hg18/cnpredon_spec.rb +23 -0
  63. data/spec/hg18/cnpsebat2_spec.rb +23 -0
  64. data/spec/hg18/cnpsharp2_spec.rb +23 -0
  65. data/spec/hg18/db_connection_spec.rb +36 -0
  66. data/spec/hg18/dgv_spec.rb +23 -0
  67. data/spec/hg18/refgene_spec.rb +23 -0
  68. data/spec/hg18/rmsk_spec.rb +33 -0
  69. data/spec/hg19/ccdsgene_spec.rb +23 -0
  70. data/spec/hg19/cytoband_spec.rb +23 -0
  71. data/spec/hg19/db_connection_spec.rb +37 -0
  72. data/spec/hg19/dgv_spec.rb +23 -0
  73. data/spec/hg19/ensgene_spec.rb +23 -0
  74. data/spec/hg19/gwascatalog_spec.rb +23 -0
  75. data/spec/hg19/hapmapalleleschimp_spec.rb +23 -0
  76. data/spec/hg19/hapmapallelesmacaque_spec.rb +23 -0
  77. data/spec/hg19/hapmapsnpsasw_spec.rb +23 -0
  78. data/spec/hg19/hapmapsnpsceu_spec.rb +23 -0
  79. data/spec/hg19/hapmapsnpschb_spec.rb +23 -0
  80. data/spec/hg19/hapmapsnpschd_spec.rb +23 -0
  81. data/spec/hg19/hapmapsnpsgih_spec.rb +23 -0
  82. data/spec/hg19/hapmapsnpsjpt_spec.rb +23 -0
  83. data/spec/hg19/hapmapsnpslwk_spec.rb +23 -0
  84. data/spec/hg19/hapmapsnpsmex_spec.rb +23 -0
  85. data/spec/hg19/hapmapsnpsmkk_spec.rb +23 -0
  86. data/spec/hg19/hapmapsnpstsi_spec.rb +23 -0
  87. data/spec/hg19/hapmapsnpsyri_spec.rb +23 -0
  88. data/spec/hg19/knowngene_spec.rb +23 -0
  89. data/spec/hg19/omimGene_spec.rb +23 -0
  90. data/spec/hg19/phastconselements46wayprimates_spec.rb +23 -0
  91. data/spec/hg19/phyloP46wayPrimates_spec.rb +23 -0
  92. data/spec/hg19/refgene_spec.rb +23 -0
  93. data/spec/hg19/rmsk_spec.rb +23 -0
  94. data/spec/hg19/snp132Flagged_spec.rb +24 -0
  95. data/spec/hg19/snp132_spec.rb +23 -0
  96. data/spec/hg19/snp132codingdbsnp_spec.rb +23 -0
  97. data/spec/hg19/snp132common_spec.rb +24 -0
  98. data/spec/hg19/snp132mult_spec.rb +23 -0
  99. data/spec/hg19/wgrna_spec.rb +23 -0
  100. data/spec/spec_helper.rb +12 -0
  101. metadata +368 -0
@@ -0,0 +1,27 @@
1
+ #
2
+ # = hg18/dgv.rb
3
+ # Copyright::
4
+ # Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
5
+ # Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
6
+ # License:: The Ruby licence (Ryby's / GPLv2 dual)
7
+ #
8
+ # = Table desfription in UCSC Table Browser
9
+ # This track displays copy number variants (CNVs),
10
+ # insertions/deletions (InDels), inversions and inversion breakpoints
11
+ # annotated by the Database of Genomic Variants (DGV), which contains
12
+ # genomic variations observed in healthy individuals. DGV focuses on
13
+ # structural variation, defined as genomic alterations that involve
14
+ # segments of DNA that are larger than 1000 bp. Insertions/deletions
15
+ # of 100 bp or larger are also included.
16
+ #
17
+ module Bio
18
+ module Ucsc
19
+ module Hg18
20
+ class Dgv < DBConnection
21
+ extend Bio::Ucsc::Hg18::QueryUsingChromBin
22
+ set_table_name 'dgv'
23
+ set_primary_key nil
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # -*- coding: utf-8 -*-
2
+ # = hg18/refgene.rb
3
+ # Copyright::
4
+ # Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
5
+ # Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
6
+ # License:: The Ruby licence (Ryby's / GPLv2 dual)
7
+ #
8
+ # = Table desfription in UCSC Table Browser
9
+ # The RefSeq Genes track shows known human protein-coding and
10
+ # non-protein-coding genes taken from the NCBI RNA reference sequences
11
+ # collection (RefSeq). The data underlying this track are updated
12
+ # daily.
13
+ #
14
+ # = ommitted dynamic method(s) due to the method name collision
15
+ # none
16
+
17
+ module Bio
18
+ module Ucsc
19
+ module Hg18
20
+ class RefGene < DBConnection
21
+ extend Bio::Ucsc::Hg18::QueryUsingTxBin
22
+ set_table_name 'refGene'
23
+ set_primary_key nil
24
+ end # class RefGene
25
+ end # module Hg18
26
+ end # module Ucsc
27
+ end # Bio
@@ -0,0 +1,51 @@
1
+ # -*- coding: utf-8 -*-
2
+ # = hg18/rmsk.rb
3
+ # Copyright::
4
+ # Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
5
+ # License:: The Ruby licence (Ryby's / GPLv2 dual)
6
+ #
7
+ # = Table description in UCSC Table Browser
8
+ # This track was created by using Arian Smit's RepeatMasker program,
9
+ # which screens DNA sequences for interspersed repeats and low
10
+ # complexity DNA sequences. The program outputs a detailed annotation
11
+ # of the repeats that are present in the query sequence (represented
12
+ # by this track), as well as a modified version of the query sequence
13
+ # in which all the annotated repeats have been masked (generally
14
+ # available on the Downloads page). RepeatMasker uses the RepBase
15
+ # library of repeats from the Genetic Information Research Institute
16
+ # (GIRI). RepBase is described in Jurka, J. (2000) in the References
17
+ # section below.
18
+ #
19
+ # = ommitted dynamic method(s) due to the method name collision
20
+ # none
21
+ #
22
+ # = Note
23
+ # In the hg18 database, the Rmsk table is actually separated
24
+ # into "chr1_rmsk", "chr2_rmsk", etc. The Rmsk class dynamically
25
+ # define Rmsk::Chr1_Rmsk, Rmsk::Chr2_Rmsk, etc. The
26
+ # Rmsk.find_by_interval calls an appropreate class automatically.
27
+
28
+ module Bio
29
+ module Ucsc
30
+ module Hg18
31
+ class Rmsk
32
+ %w(
33
+ ChrM Chr1 Chr2 Chr3 Chr4 Chr5 Chr6 Chr7 Chr8 Chr9
34
+ Chr10 Chr11 Chr12 Chr13 Chr14 Chr15 Chr16 Chr17 Chr18 Chr19
35
+ Chr20 Chr21 Chr22 ChrX ChrY).each do |chr|
36
+ klass = Class.new(DBConnection) do
37
+ extend Bio::Ucsc::Hg18::QueryUsingGenoBin
38
+ set_table_name "#{chr.downcase}_rmsk"
39
+ set_primary_key nil
40
+ end
41
+ self.const_set("#{chr}_Rmsk", klass)
42
+ end
43
+
44
+ def self.find_by_interval(interval)
45
+ chr_klass = self.const_get("#{interval.chrom.capitalize}_Rmsk")
46
+ chr_klass.__send__(:find_by_interval, interval)
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,142 @@
1
+ =begin
2
+ # = DESCRIPTION
3
+ # From Structural Variants description page when clicking the "Describe
4
+ # table schema" in the table browser:
5
+ # "Paired-end sequences from a human fosmid DNA library were mapped to the
6
+ # assembly. The average resolution of this technique was ~8kb, and included
7
+ # 56 sites of inversion not detectable by the array-based approaches.
8
+ # However, because of the physical constraints of fosmid insert size, this
9
+ # technique was unable to detect insertions greater than 40 kb in size."
10
+ class CnpTuzun < DBConnection
11
+ include Ucsc::Hg19::Feature
12
+
13
+ set_table_name 'cnpTuzun'
14
+ set_primary_key nil
15
+
16
+ def self.find_by_slice(slice)
17
+ start = slice.range.begin
18
+ stop = slice.range.end
19
+ CnpTuzun.find_by_sql('SELECT * FROM cnpTuzun' + overlap_sql(slice, start, stop))
20
+ end
21
+ end
22
+
23
+
24
+ # = DESCRIPTION
25
+ # From Simple Repeats description page when clicking the "Describe
26
+ # table schema" in the table browser:
27
+ # "This track displays simple tandem repeats (possibly imperfect) located
28
+ # by Tandem Repeats Finder (TRF), which is specialized for this purpose.
29
+ # These repeats can occur within coding regions of genes and may be quite
30
+ # polymorphic. Repeat expansions are sometimes associated with specific
31
+ # diseases."
32
+ class SimpleRepeat < DBConnection
33
+ include Ucsc::Hg19::Feature
34
+
35
+ set_table_name 'simpleRepeat'
36
+ set_primary_key nil
37
+
38
+ def self.find_by_slice(slice)
39
+ start = slice.range.begin
40
+ stop = slice.range.end
41
+ SimpleRepeat.find_by_sql('SELECT * FROM simpleRepeat' + overlap_sql(slice, start, stop))
42
+ end
43
+ end
44
+
45
+ # = DESCRIPTION
46
+ # From Structural Variants description page when clicking the "Describe
47
+ # table schema" in the table browser:
48
+ # "This track shows regions detected as putative genomic duplications
49
+ # within the golden path. The following display conventions are used to
50
+ # distinguish levels of similarity:
51
+ # * Light to dark gray: 90 - 98% similarity
52
+ # * Light to dark yellow: 98 - 99% similarity
53
+ # * Light to dark orange: greater than 99% similarity
54
+ # * Red: duplications of greater than 98% similarity that lack sufficient
55
+ # Segmental Duplication Database evidence (most likely missed overlaps)
56
+ # For a region to be included in the track, at least 1 Kb of the total
57
+ # sequence (containing at least 500 bp of non-RepeatMasked sequence) had
58
+ # to align and a sequence identity of at least 90% was required."
59
+ class GenomicSuperDup < DBConnection
60
+ include Ucsc::Hg19::Feature
61
+
62
+ set_table_name 'genomicSuperDups'
63
+ set_primary_key nil
64
+
65
+ def self.find_by_slice(slice)
66
+ start = slice.range.begin
67
+ stop = slice.range.end
68
+ return GenomicSuperDup.find_by_sql('SELECT * FROM genomicSuperDups' + overlap_sql(slice, start, stop))
69
+ end
70
+ end
71
+
72
+ # = DESCRIPTION
73
+ # From Exapted Repeat description page when clicking the "Describe
74
+ # table schema" in the table browser:
75
+ # "This track displays conserved non-exonic elements that have been
76
+ # deposited by mobile elements (repeats), a process termed "exaptation"
77
+ # (Gould et al., 1982). These regions were identified during a genome-wide
78
+ # survey (Lowe et al., 2007) with the expectation that regions of this type
79
+ # may act as distal transcriptional regulators for nearby genes. A previous
80
+ # case study experimentally verified an exapted mobile element acting as a
81
+ # distal enhancer (Bejerano et al. , 2006)."
82
+ class ExaptedRepeat < DBConnection
83
+ include Ucsc::Hg19::Feature
84
+
85
+ set_table_name 'exaptedRepeats'
86
+ set_primary_key nil
87
+
88
+ def self.find_by_slice(slice)
89
+ start = slice.range.begin
90
+ stop = slice.range.end
91
+ return ExaptedRepeat.find_by_sql('SELECT * FROM exaptedRepeats' + overlap_sql(slice, start, stop))
92
+ end
93
+ end
94
+
95
+ # = DESCRIPTION
96
+ # From Interrupted Repeat description page when clicking the "Describe
97
+ # table schema" in the table browser:
98
+ # "This track shows joined fragments of interrupted repeats extracted from
99
+ # the output of the RepeatMasker program, which screens DNA sequences for
100
+ # interspersed repeats and low complexity DNA sequences using the RepBase
101
+ # library of repeats from the Genetic Information Research Institute (GIRI).
102
+ # RepBase is described in Jurka, J. (2000) in the References section below.
103
+ #
104
+ # The detailed annotations from RepeatMasker are in the RepeatMasker track.
105
+ # This track shows fragments of original repeat insertions which have been
106
+ # interrupted by insertions of younger repeats or through local
107
+ # rearrangements. The fragments are joined using the ID column of
108
+ # RepeatMasker output."
109
+ class InterruptedRepeat < DBConnection
110
+ include Ucsc::Hg19::Feature
111
+
112
+ set_table_name 'nestedRepeats'
113
+ set_primary_key nil
114
+
115
+ def self.find_by_slice(slice)
116
+ start = slice.range.begin
117
+ stop = slice.range.end
118
+ return InterruptedRepeat.find_by_sql('SELECT * FROM nestedRepeats' + overlap_sql(slice, start, stop))
119
+ end
120
+ end
121
+
122
+ # = DESCRIPTION
123
+ # From Microsatellite description page when clicking the "Describe
124
+ # table schema" in the table browser:
125
+ # "This track displays regions that are likely to be useful as
126
+ # microsatellite markers. These are sequences of at least 15 perfect
127
+ # di-nucleotide and tri-nucleotide repeats, and tend to be highly
128
+ # polymorphic in the population."
129
+ class Microsatellite < DBConnection
130
+ include Ucsc::Hg19::Feature
131
+
132
+ set_table_name 'microsat'
133
+ set_primary_key nil
134
+
135
+ def self.find_by_slice(slice)
136
+ start = slice.range.begin
137
+ stop = slice.range.end
138
+ return Microsatellite.find_by_sql('SELECT * FROM microsat' + overlap_sql(slice, start, stop))
139
+ end
140
+ end
141
+
142
+ =end
@@ -0,0 +1,54 @@
1
+ #
2
+ # = hg19.rb
3
+ # Copyright:: Cioyrught (C) 2011
4
+ # MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
5
+ # License:: The Ruby licence (Ryby's / GPLv2 dual)
6
+
7
+ base = File.dirname(__FILE__)
8
+ require "#{base}/hg19/activerecord"
9
+ require "#{base}/hg19/db_connection"
10
+
11
+
12
+ module Bio
13
+ module Ucsc
14
+ module Hg19
15
+ base = File.dirname(__FILE__)
16
+ autoload :Dgv, "#{base}/hg19/dgv"
17
+
18
+ autoload :Snp131, "#{base}/hg19/snp131"
19
+
20
+ autoload :Snp132, "#{base}/hg19/snp132"
21
+ autoload :Snp132Common, "#{base}/hg19/snp132common"
22
+ autoload :Snp132Flagged, "#{base}/hg19/snp132flagged"
23
+ autoload :Snp132Mult, "#{base}/hg19/snp132mult"
24
+ autoload :Snp132CodingDbSnp, "#{base}/hg19/snp132codingdbsnp"
25
+
26
+ autoload :KnownGene, "#{base}/hg19/knowngene"
27
+ autoload :RefGene, "#{base}/hg19/refgene"
28
+ autoload :GwasCatalog, "#{base}/hg19/gwascatalog"
29
+ autoload :CytoBand, "#{base}/hg19/cytoband"
30
+ autoload :OmimGene, "#{base}/hg19/omimgene"
31
+ autoload :WgRna, "#{base}/hg19/wgrna"
32
+ autoload :EnsGene, "#{base}/hg19/ensgene"
33
+
34
+ autoload :HapMapSnpsASW, "#{base}/hg19/hapmapsnpsasw"
35
+ autoload :HapMapSnpsCEU, "#{base}/hg19/hapmapsnpsceu"
36
+ autoload :HapMapSnpsCHB, "#{base}/hg19/hapmapsnpschb"
37
+ autoload :HapMapSnpsCHD, "#{base}/hg19/hapmapsnpschd"
38
+ autoload :HapMapSnpsGIH, "#{base}/hg19/hapmapsnpsgih"
39
+ autoload :HapMapSnpsJPT, "#{base}/hg19/hapmapsnpsjpt"
40
+ autoload :HapMapSnpsLWK, "#{base}/hg19/hapmapsnpslwk"
41
+ autoload :HapMapSnpsMEX, "#{base}/hg19/hapmapsnpsmex"
42
+ autoload :HapMapSnpsMKK, "#{base}/hg19/hapmapsnpsmkk"
43
+ autoload :HapMapSnpsTSI, "#{base}/hg19/hapmapsnpstsi"
44
+ autoload :HapMapSnpsYRI, "#{base}/hg19/hapmapsnpsyri"
45
+ autoload :HapMapAllelesChimp, "#{base}/hg19/hapmapalleleschimp"
46
+ autoload :HapMapAllelesMacaque, "#{base}/hg19/hapmapallelesmacaque"
47
+
48
+ autoload :Rmsk, "#{base}/hg19/rmsk"
49
+
50
+ autoload :PhyloP46wayPrimates, "#{base}/hg19/phylop46wayprimates"
51
+ autoload :PhastConsElements46wayPrimates, "#{base}/hg19/phastconselements46wayprimates"
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,217 @@
1
+ #
2
+ # = ucsc/hg19/activerecord.rb - ActiveRecord mappings to UCSC hg19 database
3
+ #
4
+ # Copyright::
5
+ # Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
6
+ # Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
7
+ # License:: The Ruby licence (Ryby's / GPLv2 dual)
8
+ #
9
+ # = DESCRIPTION
10
+ # == What is it?
11
+ # The UCSC module provides an API to the UCSC databases
12
+ # stored at genome-mysql.cse.ucsc.edu. This is the same information that is
13
+ # available from http://genome.ucsc.edu
14
+ #
15
+ # The Ucsc::Hg19 module covers the hg19 (= GRCh37) assembly.
16
+ #
17
+ # == ActiveRecord
18
+ # The UCSC API provides a ruby interface to the UCSC mysql databases
19
+ # at genome-mysql.cse.ucsc.edu. Most of the API is based on ActiveRecord to
20
+ # get data from that database. In general, each table is described by a
21
+ # class with the same name: the cnpRedon table is covered by the
22
+ # CnpRedon class, the dgv table is covered by the Dgv class,
23
+ # etc. As a result, accessors are available for all columns in each table.
24
+ # For example, the cnpRedon table has the following columns: chrom, chromStart,
25
+ # chromEnd and name. Through ActiveRecord, these column names become available
26
+ # as attributes of CnpRedon objects:
27
+ # puts my_cnp_redon.name
28
+ # puts my_cnp_redon.chrom
29
+ # puts my_cnp_redon.chromStart
30
+ # puts my_cnp_redon.chromEnd
31
+ #
32
+ # ActiveRecord makes it easy to extract data from those tables using the
33
+ # collection of #find methods. There are three types of #find methods (e.g.
34
+ # for the CnpRedon class):
35
+ # a. find based on primary key in table:
36
+ # # not possible with the UCSC database
37
+ # b. find_by_sql:
38
+ # my_cnp = CnpRedon.find_by_sql('SELECT * FROM cnpRedon WHERE name = 'cnp1'")
39
+ # c. find_by_<insert_your_column_name_here>
40
+ # my_cnp = CnpRedon.find_by_name('cnp1')
41
+ # my_cnp2 = CnpRedon.find_by_chrom_and_chromStart('chr1',377)
42
+ # To find out which find_by_<column> methods are available, you can list the
43
+ # column names using the column_names class methods:
44
+ #
45
+ # puts Ucsc::Hg19::CnpRedon.column_names.join("\t")
46
+ #
47
+ # For more information on the find methods, see
48
+ # http://ar.rubyonrails.org/classes/ActiveRecord/Base.html#M000344
49
+ #
50
+
51
+ module Bio
52
+ module Ucsc
53
+
54
+ # = DESCRIPTION
55
+ # The Bin::Ucsc::Hg19 module covers the hg19 database from
56
+ # genome-mysql.cse.ucsc.edu and covers mainly sequences and their annotations.
57
+ # For a more information about the database tables, click on the "Describe
58
+ # table schema" in the Table Browser.
59
+ module Hg19
60
+
61
+ # interval: chromStart, chromEnd
62
+ # bin index is enabled
63
+ module QueryUsingChromBin
64
+ def find_by_interval(interval)
65
+ zstart = interval.zero_start
66
+ zend = interval.zero_end
67
+ where = <<-SQL
68
+ chrom = :chrom
69
+ AND bin in (:bins)
70
+ AND ((chromStart BETWEEN :zstart AND :zend)
71
+ OR (chromEnd BETWEEN :zstart AND :zend)
72
+ OR (chromStart <= :zstart AND chromEnd >= :zend))
73
+ SQL
74
+ cond = {
75
+ :chrom => interval.chrom,
76
+ :bins => Ucsc::UcscBin.bin_all(zstart, zend),
77
+ :zstart => zstart,
78
+ :zend => zend,
79
+ }
80
+
81
+ self.find(:all,
82
+ :select => "*",
83
+ :conditions => [where, cond],
84
+ )
85
+ end
86
+ end # module QueryUsingChromBin
87
+
88
+ # interval: chromStart, chromEnd
89
+ # bin index is disabled
90
+ module QueryUsingChrom
91
+ def find_by_interval(interval)
92
+ zstart = interval.zero_start
93
+ zend = interval.zero_end
94
+ where = <<-SQL
95
+ chrom = :chrom
96
+ AND ((chromStart BETWEEN :zstart AND :zend)
97
+ OR (chromEnd BETWEEN :zstart AND :zend)
98
+ OR (chromStart <= :zstart AND chromEnd >= :zend))
99
+ SQL
100
+ cond = {
101
+ :chrom => interval.chrom,
102
+ :zstart => zstart,
103
+ :zend => zend,
104
+ }
105
+ self.find(:all,
106
+ :select => "*",
107
+ :conditions => [where, cond],
108
+ )
109
+ end
110
+ end # module QueryUsingChrom
111
+
112
+ # interval: txStart, txEnd
113
+ # bin index is disabled
114
+ module QueryUsingTx
115
+ def find_by_interval(interval)
116
+ zstart = interval.zero_start
117
+ zend = interval.zero_end
118
+ where = <<-SQL
119
+ chrom = :chrom
120
+ AND ((txStart BETWEEN :zstart AND :zend)
121
+ OR (txEnd BETWEEN :zstart AND :zend)
122
+ OR (txStart <= :zstart AND txEnd >= :zend))
123
+ SQL
124
+ cond = {
125
+ :chrom => interval.chrom,
126
+ :zstart => zstart,
127
+ :zend => zend,
128
+ }
129
+ self.find(:all,
130
+ :select => "*",
131
+ :conditions => [where, cond],
132
+ )
133
+ end
134
+ end # module QueryUsingTx
135
+
136
+ # interval: txStart, txEnd
137
+ # bin index is enabled
138
+ module QueryUsingTxBin
139
+ def find_by_interval(interval)
140
+ zstart = interval.zero_start
141
+ zend = interval.zero_end
142
+ where = <<-SQL
143
+ chrom = :chrom
144
+ AND bin in (:bins)
145
+ AND ((txStart BETWEEN :zstart AND :zend)
146
+ OR (txEnd BETWEEN :zstart AND :zend)
147
+ OR (txStart <= :zstart AND txEnd >= :zend))
148
+ SQL
149
+ cond = {
150
+ :chrom => interval.chrom,
151
+ :bins => Bio::Ucsc::UcscBin.bin_all(zstart, zend),
152
+ :zstart => zstart,
153
+ :zend => zend,
154
+ }
155
+ self.find(:all,
156
+ :select => "*",
157
+ :conditions => [where, cond],
158
+ )
159
+ end
160
+ end # module QueryUsingUsingTxBin
161
+
162
+ # interval: ccdsStart, ccdsEnd
163
+ # bin index is enabled
164
+ module QueryUsingCcdsBin
165
+ def find_by_interval(interval)
166
+ zstart = interval.zero_start
167
+ zend = interval.zero_end
168
+ where = <<-SQL
169
+ chrom = :chrom
170
+ AND bin in (:bins)
171
+ AND ((cdsStart BETWEEN :zstart AND :zend)
172
+ OR (cdsEnd BETWEEN :zstart AND :zend)
173
+ OR (cdsStart <= :zstart AND cdsEnd >= :zend))
174
+ SQL
175
+ cond = {
176
+ :chrom => interval.chrom,
177
+ :bins => Bio::Ucsc::UcscBin.bin_all(zstart, zend),
178
+ :zstart => zstart,
179
+ :zend => zend,
180
+ }
181
+ self.find(:all,
182
+ :select => "*",
183
+ :conditions => [where, cond],
184
+ )
185
+ end
186
+ end # module QueryUsingCcdsBin
187
+
188
+ # interval: genoName, genoStart, genoEnd
189
+ # bin index is enabled
190
+ module QueryUsingGenoBin
191
+ def find_by_interval(interval)
192
+ zstart = interval.zero_start
193
+ zend = interval.zero_end
194
+ where = <<-SQL
195
+ genoName = :chrom
196
+ AND bin in (:bins)
197
+ AND ((genoStart BETWEEN :zstart AND :zend)
198
+ OR (genoEnd BETWEEN :zstart AND :zend)
199
+ OR (genoStart <= :zstart AND genoEnd >= :zend))
200
+ SQL
201
+ cond = {
202
+ :chrom => interval.chrom,
203
+ :bins => Ucsc::UcscBin.bin_all(zstart, zend),
204
+ :zstart => zstart,
205
+ :zend => zend,
206
+ }
207
+
208
+ self.find(:all,
209
+ :select => "*",
210
+ :conditions => [where, cond],
211
+ )
212
+ end
213
+ end # module QueryUsingChromBin
214
+
215
+ end # module Hg19
216
+ end # module Ucsc
217
+ end # module Bio