jandot-ruby-ucsc-api 0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/ucsc +2 -0
- data/lib/ucsc.rb +35 -0
- data/lib/ucsc/db_connection.rb +41 -0
- data/lib/ucsc/hg18/activerecord.rb +308 -0
- data/lib/ucsc/hg18/slice.rb +50 -0
- data/samples/ranges.txt +18 -0
- data/samples/tryout.rb +36 -0
- data/test/unit/test_activerecord.rb +94 -0
- metadata +77 -0
data/bin/ucsc
ADDED
data/lib/ucsc.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
begin
|
2
|
+
require 'bio'
|
3
|
+
rescue nil
|
4
|
+
end
|
5
|
+
|
6
|
+
class Range
|
7
|
+
def contained_by?(other_range)
|
8
|
+
if self.begin > other_range.begin and self.end < other_range.end
|
9
|
+
return true
|
10
|
+
else
|
11
|
+
return false
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def overlaps_with?(other_range)
|
16
|
+
if ((self.begin >= other_range.begin and self.begin <= other_range.end) or (other_range.begin >= self.begin and other_range.begin <= self.end))
|
17
|
+
return true
|
18
|
+
else
|
19
|
+
return false
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Database connection
|
25
|
+
require File.dirname(__FILE__) + '/ucsc/db_connection.rb'
|
26
|
+
include Ucsc::Hg18
|
27
|
+
Ucsc::Hg18::DBConnection.connect
|
28
|
+
|
29
|
+
# Core modules
|
30
|
+
require File.dirname(__FILE__) + '/ucsc/hg18/activerecord.rb'
|
31
|
+
require File.dirname(__FILE__) + '/ucsc/hg18/slice.rb'
|
32
|
+
|
33
|
+
ALL_CNPS = [Dgv, CnpIafrate, CnpLocke, CnpRedon, CnpSebat, CnpSharp, CnpTuzun]
|
34
|
+
SEGDUPS = [GenomicSuperDup]
|
35
|
+
ALL_REPEATS = [SimpleRepeat, ExaptedRepeat, InterruptedRepeat, Microsatellite]
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activerecord'
|
3
|
+
|
4
|
+
DB_ADAPTER = 'mysql'
|
5
|
+
DB_HOST = 'genome-mysql.cse.ucsc.edu'
|
6
|
+
DB_USERNAME = 'genome'
|
7
|
+
DB_PASSWORD = ''
|
8
|
+
|
9
|
+
module Ucsc
|
10
|
+
module Hg18
|
11
|
+
# = DESCRIPTION
|
12
|
+
# The Ucsc::Hg18::DBConnection is the actual connection established
|
13
|
+
# with the UCSC mysql server.
|
14
|
+
class DBConnection < ActiveRecord::Base
|
15
|
+
self.abstract_class = true
|
16
|
+
|
17
|
+
# = DESCRIPTION
|
18
|
+
# The Ucsc::Hg18::DBConnection#connect method makes the connection
|
19
|
+
# to the UCSC hg18 database.
|
20
|
+
#
|
21
|
+
# = USAGE
|
22
|
+
# # Connect to the hg18
|
23
|
+
# Ensembl::Core::DBConnection.connect
|
24
|
+
#
|
25
|
+
# ---
|
26
|
+
# *Arguments*: none
|
27
|
+
def self.connect
|
28
|
+
establish_connection(
|
29
|
+
:adapter => DB_ADAPTER,
|
30
|
+
:host => DB_HOST,
|
31
|
+
:database => 'hg18',
|
32
|
+
:username => DB_USERNAME,
|
33
|
+
:password => DB_PASSWORD
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,308 @@
|
|
1
|
+
#
|
2
|
+
# = ucsc/hg18/activerecord.rb - ActiveRecord mappings to UCSC hg18 database
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
|
8
|
+
# = DESCRIPTION
|
9
|
+
# == What is it?
|
10
|
+
# The UCSC module provides an API to the UCSC databases
|
11
|
+
# stored at genome-mysql.cse.ucsc.edu. This is the same information that is
|
12
|
+
# available from http://genome.ucsc.edu
|
13
|
+
#
|
14
|
+
# The Ucsc::Hg18 module covers the hg18 (= NCBI build 36) assembly.
|
15
|
+
#
|
16
|
+
# == ActiveRecord
|
17
|
+
# The UCSC API provides a ruby interface to the UCSC mysql databases
|
18
|
+
# at genome-mysql.cse.ucsc.edu. Most of the API is based on ActiveRecord to
|
19
|
+
# get data from that database. In general, each table is described by a
|
20
|
+
# class with the same name: the cnpRedon table is covered by the
|
21
|
+
# CnpRedon class, the dgv table is covered by the Dgv class,
|
22
|
+
# etc. As a result, accessors are available for all columns in each table.
|
23
|
+
# For example, the cnpRedon table has the following columns: chrom, chromStart,
|
24
|
+
# chromEnd and name. Through ActiveRecord, these column names become available
|
25
|
+
# as attributes of CnpRedon objects:
|
26
|
+
# puts my_cnp_redon.name
|
27
|
+
# puts my_cnp_redon.chrom
|
28
|
+
# puts my_cnp_redon.chromStart
|
29
|
+
# puts my_cnp_redon.chromEnd
|
30
|
+
#
|
31
|
+
# ActiveRecord makes it easy to extract data from those tables using the
|
32
|
+
# collection of #find methods. There are three types of #find methods (e.g.
|
33
|
+
# for the CnpRedon class):
|
34
|
+
# a. find based on primary key in table:
|
35
|
+
# # not possible with the UCSC database
|
36
|
+
# b. find_by_sql:
|
37
|
+
# my_cnp = CnpRedon.find_by_sql('SELECT * FROM cnpRedon WHERE name = 'cnp1'")
|
38
|
+
# c. find_by_<insert_your_column_name_here>
|
39
|
+
# my_cnp = CnpRedon.find_by_name('cnp1')
|
40
|
+
# my_cnp2 = CnpRedon.find_by_chrom_and_chromStart('chr1',377)
|
41
|
+
# To find out which find_by_<column> methods are available, you can list the
|
42
|
+
# column names using the column_names class methods:
|
43
|
+
#
|
44
|
+
# puts Ucsc::Hg18::CnpRedon.column_names.join("\t")
|
45
|
+
#
|
46
|
+
# For more information on the find methods, see
|
47
|
+
# http://ar.rubyonrails.org/classes/ActiveRecord/Base.html#M000344
|
48
|
+
#
|
49
|
+
module Ucsc
|
50
|
+
# = DESCRIPTION
|
51
|
+
# The Ucsc::Hg18 module covers the hg18 database from
|
52
|
+
# genome-mysql.cse.ucsc.edu and covers mainly sequences and their annotations.
|
53
|
+
# For a more information about the database tables, click on the "Describe
|
54
|
+
# table schema" in the Table Browser.
|
55
|
+
module Hg18
|
56
|
+
# = DESCRIPTION
|
57
|
+
# The Sliceable mixin holds the get_slice method and can be included
|
58
|
+
# in any class that lends itself to having a position on a chromosome.
|
59
|
+
module Sliceable
|
60
|
+
def slice
|
61
|
+
start, stop, strand = nil, nil, nil
|
62
|
+
if self.class.column_names.include?('chromStart')
|
63
|
+
start = self.chromStart
|
64
|
+
end
|
65
|
+
if self.class.column_names.include?('chromEnd')
|
66
|
+
stop = self.chromEnd
|
67
|
+
end
|
68
|
+
if self.class.column_names.include?('strand')
|
69
|
+
strand = self.strand
|
70
|
+
end
|
71
|
+
|
72
|
+
return Ucsc::Hg18::Slice.new(self.chrom, Range.new(start.to_i, stop.to_i), strand)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# = DESCRIPTION
|
77
|
+
# The Feature mixin holds common methods for all feature-like classes, such
|
78
|
+
# as how to print itself to the screen.
|
79
|
+
module Feature
|
80
|
+
include Sliceable
|
81
|
+
|
82
|
+
def to_s
|
83
|
+
return self.class.to_s + "\t" + self.slice.to_s + "\t" + self.name
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# = DESCRIPTION
|
88
|
+
# From Structural Variants description page when clicking the "Describe
|
89
|
+
# table schema" in the table browser:
|
90
|
+
# "All hybridizations were performed in duplicate incorporating a
|
91
|
+
# dye-reversal using proprietary 1 Mb GenomeChip V1.2 Human BAC Arrays
|
92
|
+
# consisting of 2,632 BAC clones (Spectral Genomics, Houston, TX). The
|
93
|
+
# false positive rate was estimated at ~1 clone per 5,264 tested."
|
94
|
+
class CnpIafrate < DBConnection
|
95
|
+
include Ucsc::Hg18::Feature
|
96
|
+
|
97
|
+
set_table_name 'cnpIafrate2'
|
98
|
+
set_primary_key nil
|
99
|
+
end
|
100
|
+
|
101
|
+
# = DESCRIPTION
|
102
|
+
# From Structural Variants description page when clicking the "Describe
|
103
|
+
# table schema" in the table browser:
|
104
|
+
# "DNA samples were obtained from Coriell Cell Repositories. The reference
|
105
|
+
# DNA used for all hybridizations was from a single male of Czechoslovakian
|
106
|
+
# descent, Coriell ID GM15724 (also used in the Sharp study).
|
107
|
+
#
|
108
|
+
# A locus was considered a CNV (copy number variation) if the log ratio of
|
109
|
+
# fluroescence measurements for the individuals assayed exceeded twice the
|
110
|
+
# standard deviation of the autosomal clones in replicate dye-swapped
|
111
|
+
# experiments. A CNV was classified as a CNP if altered copy number was
|
112
|
+
# observed in more than 1% of the 269 individuals."
|
113
|
+
class CnpLocke < DBConnection
|
114
|
+
include Ucsc::Hg18::Feature
|
115
|
+
|
116
|
+
set_table_name 'cnpLocke'
|
117
|
+
set_primary_key nil
|
118
|
+
end
|
119
|
+
|
120
|
+
# = DESCRIPTION
|
121
|
+
# From Structural Variants description page when clicking the "Describe
|
122
|
+
# table schema" in the table browser:
|
123
|
+
# "Experiments were performed with the International HapMap DNA and
|
124
|
+
# cell-line collection using two technologies: comparative analysis of
|
125
|
+
# hybridization intensities on Affymetric GeneChip Human Mapping 500K early
|
126
|
+
# access arrays (500K EA) and comparative genomic hybridization with a
|
127
|
+
# Whole Genome TilePath (WGTP) array."
|
128
|
+
class CnpRedon < DBConnection
|
129
|
+
include Ucsc::Hg18::Feature
|
130
|
+
|
131
|
+
set_table_name 'cnpRedon'
|
132
|
+
set_primary_key nil
|
133
|
+
end
|
134
|
+
|
135
|
+
# = DESCRIPTION
|
136
|
+
# From Structural Variants description page when clicking the "Describe
|
137
|
+
# table schema" in the table browser:
|
138
|
+
# "Following digestion with BglII or HindIII, genomic DNA was hybridized to
|
139
|
+
# a custom array consisting of 85,000 oligonucleotide probes. The probes
|
140
|
+
# were selected to be free of common repeats and have unique homology
|
141
|
+
# within the human genome. The average resolution of the array was ~35kb;
|
142
|
+
# however, only intervals in which three consecutive probes showed
|
143
|
+
# concordant signals were scored as CNPs. All hybridizations were performed
|
144
|
+
# in duplicate incorporating a dye-reversal, with the false positive rate
|
145
|
+
# estimated to be ~6%."
|
146
|
+
class CnpSebat < DBConnection
|
147
|
+
include Ucsc::Hg18::Feature
|
148
|
+
|
149
|
+
set_table_name 'cnpSebat2'
|
150
|
+
set_primary_key nil
|
151
|
+
end
|
152
|
+
|
153
|
+
# = DESCRIPTION
|
154
|
+
# From Structural Variants description page when clicking the "Describe
|
155
|
+
# table schema" in the table browser:
|
156
|
+
# "All hybridizations were performed in duplicate incorporating a
|
157
|
+
# dye-reversal using a custom array consisting of 2,194 end-sequence or
|
158
|
+
# FISH-confirmed BACs, targeted to regions of the genome flanked by
|
159
|
+
# segmental duplications. The false positive rate was estimated at ~3
|
160
|
+
# clones per 4,000 tested."
|
161
|
+
class CnpSharp < DBConnection
|
162
|
+
include Ucsc::Hg18::Feature
|
163
|
+
|
164
|
+
set_table_name 'cnpSharp2'
|
165
|
+
set_primary_key nil
|
166
|
+
end
|
167
|
+
|
168
|
+
# = DESCRIPTION
|
169
|
+
# From Structural Variants description page when clicking the "Describe
|
170
|
+
# table schema" in the table browser:
|
171
|
+
# "Paired-end sequences from a human fosmid DNA library were mapped to the
|
172
|
+
# assembly. The average resolution of this technique was ~8kb, and included
|
173
|
+
# 56 sites of inversion not detectable by the array-based approaches.
|
174
|
+
# However, because of the physical constraints of fosmid insert size, this
|
175
|
+
# technique was unable to detect insertions greater than 40 kb in size."
|
176
|
+
class CnpTuzun < DBConnection
|
177
|
+
include Ucsc::Hg18::Feature
|
178
|
+
|
179
|
+
set_table_name 'cnpTuzun'
|
180
|
+
set_primary_key nil
|
181
|
+
end
|
182
|
+
|
183
|
+
# = DESCRIPTION
|
184
|
+
# From Structural Variants description page when clicking the "Describe
|
185
|
+
# table schema" in the table browser:
|
186
|
+
# ""
|
187
|
+
class Dgv < DBConnection
|
188
|
+
include Ucsc::Hg18::Feature
|
189
|
+
|
190
|
+
set_table_name 'dgv'
|
191
|
+
set_primary_key nil
|
192
|
+
|
193
|
+
def to_s
|
194
|
+
return self.class.to_s + "\t" + self.slice.to_s + "\t" + self.reference + "\t" + self.method
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
|
199
|
+
# = DESCRIPTION
|
200
|
+
# From Simple Repeats description page when clicking the "Describe
|
201
|
+
# table schema" in the table browser:
|
202
|
+
# "This track displays simple tandem repeats (possibly imperfect) located
|
203
|
+
# by Tandem Repeats Finder (TRF), which is specialized for this purpose.
|
204
|
+
# These repeats can occur within coding regions of genes and may be quite
|
205
|
+
# polymorphic. Repeat expansions are sometimes associated with specific
|
206
|
+
# diseases."
|
207
|
+
class SimpleRepeat < DBConnection
|
208
|
+
include Ucsc::Hg18::Feature
|
209
|
+
|
210
|
+
set_table_name 'simpleRepeat'
|
211
|
+
set_primary_key nil
|
212
|
+
end
|
213
|
+
|
214
|
+
# = DESCRIPTION
|
215
|
+
# From Structural Variants description page when clicking the "Describe
|
216
|
+
# table schema" in the table browser:
|
217
|
+
# "This track shows regions detected as putative genomic duplications
|
218
|
+
# within the golden path. The following display conventions are used to
|
219
|
+
# distinguish levels of similarity:
|
220
|
+
# * Light to dark gray: 90 - 98% similarity
|
221
|
+
# * Light to dark yellow: 98 - 99% similarity
|
222
|
+
# * Light to dark orange: greater than 99% similarity
|
223
|
+
# * Red: duplications of greater than 98% similarity that lack sufficient
|
224
|
+
# Segmental Duplication Database evidence (most likely missed overlaps)
|
225
|
+
# For a region to be included in the track, at least 1 Kb of the total
|
226
|
+
# sequence (containing at least 500 bp of non-RepeatMasked sequence) had
|
227
|
+
# to align and a sequence identity of at least 90% was required."
|
228
|
+
class GenomicSuperDup < DBConnection
|
229
|
+
include Ucsc::Hg18::Feature
|
230
|
+
|
231
|
+
set_table_name 'genomicSuperDups'
|
232
|
+
set_primary_key nil
|
233
|
+
end
|
234
|
+
|
235
|
+
# = DESCRIPTION
|
236
|
+
# From Exapted Repeat description page when clicking the "Describe
|
237
|
+
# table schema" in the table browser:
|
238
|
+
# "This track displays conserved non-exonic elements that have been
|
239
|
+
# deposited by mobile elements (repeats), a process termed "exaptation"
|
240
|
+
# (Gould et al., 1982). These regions were identified during a genome-wide
|
241
|
+
# survey (Lowe et al., 2007) with the expectation that regions of this type
|
242
|
+
# may act as distal transcriptional regulators for nearby genes. A previous
|
243
|
+
# case study experimentally verified an exapted mobile element acting as a
|
244
|
+
# distal enhancer (Bejerano et al. , 2006)."
|
245
|
+
class ExaptedRepeat < DBConnection
|
246
|
+
include Ucsc::Hg18::Feature
|
247
|
+
|
248
|
+
set_table_name 'exaptedRepeats'
|
249
|
+
set_primary_key nil
|
250
|
+
end
|
251
|
+
|
252
|
+
#TODO: The repeatmasker features are distributed over different tables; one for
|
253
|
+
# each chromosome.
|
254
|
+
# # = DESCRIPTION
|
255
|
+
# # From RepeatMasker description page when clicking the "Describe
|
256
|
+
# # table schema" in the table browser:
|
257
|
+
# # "This track was created by using Arian Smit's RepeatMasker program, which
|
258
|
+
# # screens DNA sequences for interspersed repeats and low complexity DNA
|
259
|
+
# # sequences. The program outputs a detailed annotation of the repeats that
|
260
|
+
# # are present in the query sequence, as well as a modified version of the
|
261
|
+
# # query sequence in which all the annotated repeats have been masked.
|
262
|
+
# # RepeatMasker uses the RepBase library of repeats from the Genetic
|
263
|
+
# # Information Research Institute (GIRI). RepBase is described in Jurka, J.
|
264
|
+
# # (2000) in the References section below."
|
265
|
+
# class RepeatMasker < DBConnection
|
266
|
+
# include Ucsc::Hg18::Feature
|
267
|
+
#
|
268
|
+
# set_table_name 'rmsk'
|
269
|
+
# set_primary_key nil
|
270
|
+
# end
|
271
|
+
|
272
|
+
# = DESCRIPTION
|
273
|
+
# From Interrupted Repeat description page when clicking the "Describe
|
274
|
+
# table schema" in the table browser:
|
275
|
+
# "This track shows joined fragments of interrupted repeats extracted from
|
276
|
+
# the output of the RepeatMasker program, which screens DNA sequences for
|
277
|
+
# interspersed repeats and low complexity DNA sequences using the RepBase
|
278
|
+
# library of repeats from the Genetic Information Research Institute (GIRI).
|
279
|
+
# RepBase is described in Jurka, J. (2000) in the References section below.
|
280
|
+
#
|
281
|
+
# The detailed annotations from RepeatMasker are in the RepeatMasker track.
|
282
|
+
# This track shows fragments of original repeat insertions which have been
|
283
|
+
# interrupted by insertions of younger repeats or through local
|
284
|
+
# rearrangements. The fragments are joined using the ID column of
|
285
|
+
# RepeatMasker output."
|
286
|
+
class InterruptedRepeat < DBConnection
|
287
|
+
include Ucsc::Hg18::Feature
|
288
|
+
|
289
|
+
set_table_name 'nestedRepeats'
|
290
|
+
set_primary_key nil
|
291
|
+
end
|
292
|
+
|
293
|
+
# = DESCRIPTION
|
294
|
+
# From Microsatellite description page when clicking the "Describe
|
295
|
+
# table schema" in the table browser:
|
296
|
+
# "This track displays regions that are likely to be useful as
|
297
|
+
# microsatellite markers. These are sequences of at least 15 perfect
|
298
|
+
# di-nucleotide and tri-nucleotide repeats, and tend to be highly
|
299
|
+
# polymorphic in the population."
|
300
|
+
class Microsatellite < DBConnection
|
301
|
+
include Ucsc::Hg18::Feature
|
302
|
+
|
303
|
+
set_table_name 'microsat'
|
304
|
+
set_primary_key nil
|
305
|
+
end
|
306
|
+
|
307
|
+
end
|
308
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Ucsc
|
2
|
+
module Hg18
|
3
|
+
class Slice
|
4
|
+
def initialize(chromosome, range, strand = nil)
|
5
|
+
@chromosome, @range = chromosome, range, strand
|
6
|
+
end
|
7
|
+
attr_accessor :chromosome, :range, :strand
|
8
|
+
|
9
|
+
def to_s
|
10
|
+
return @chromosome + ':' + @range.to_s
|
11
|
+
end
|
12
|
+
|
13
|
+
def overlaps?(other_slice)
|
14
|
+
if self.chromosome != other_slice.chromosome
|
15
|
+
return false
|
16
|
+
end
|
17
|
+
|
18
|
+
if self.range.overlaps?(other_slice.range)
|
19
|
+
return true
|
20
|
+
else
|
21
|
+
return false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def contained_by?(other_slice)
|
26
|
+
if self.chromosome != other_slice.chromosome
|
27
|
+
return false
|
28
|
+
end
|
29
|
+
|
30
|
+
if self.range.contained_by?(other_slice.range)
|
31
|
+
return true
|
32
|
+
else
|
33
|
+
return false
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def contains?(other_slice)
|
38
|
+
if self.chromosome != other_slice.chromosome
|
39
|
+
return false
|
40
|
+
end
|
41
|
+
|
42
|
+
if self.range.contains?(other_slice.range)
|
43
|
+
return true
|
44
|
+
else
|
45
|
+
return false
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/samples/ranges.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
chrX 365739 366104
|
2
|
+
chrX 435678 436376
|
3
|
+
chrX 823067 823982
|
4
|
+
chrX 827850 828111
|
5
|
+
chrX 830087 830927
|
6
|
+
chrX 839913 840259
|
7
|
+
chrX 1386851 1388015
|
8
|
+
chrX 1574525 1574825
|
9
|
+
chrX 1852006 1852321
|
10
|
+
chrX 1871048 1871715
|
11
|
+
chr5 1881979 1882347
|
12
|
+
chr5 1997045 1997838
|
13
|
+
chr5 2204818 2205098
|
14
|
+
chr5 3044350 3044625
|
15
|
+
chr5 3473977 3475116
|
16
|
+
chr3 4100974 4103932
|
17
|
+
chr3 4536840 4537115
|
18
|
+
chr3 4914689 4915030
|
data/samples/tryout.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
require 'yaml'
|
3
|
+
require '../lib/ucsc.rb'
|
4
|
+
|
5
|
+
ranges = Hash.new
|
6
|
+
File.open('ranges.txt').each do |line|
|
7
|
+
line.chomp!
|
8
|
+
chromosome, start, stop = line.split(/\t/)
|
9
|
+
target_slice = Slice.new(chromosome, Range.new(start.to_i, stop.to_i))
|
10
|
+
if ! ranges.keys.include?(chromosome)
|
11
|
+
ranges[chromosome] = Array.new
|
12
|
+
end
|
13
|
+
ranges[chromosome].push(target_slice)
|
14
|
+
end
|
15
|
+
|
16
|
+
ranges.keys.each do |chromosome|
|
17
|
+
all_annotations = Array.new
|
18
|
+
|
19
|
+
ALL_CNPS.each do |klass|
|
20
|
+
all_annotations.push(klass.find_all_by_chrom(chromosome))
|
21
|
+
end
|
22
|
+
|
23
|
+
ALL_REPEATS.each do |klass|
|
24
|
+
all_annotations.push(klass.find_all_by_chrom(chromosome))
|
25
|
+
end
|
26
|
+
|
27
|
+
all_annotations.flatten!
|
28
|
+
|
29
|
+
ranges[chromosome].each do |target_slice|
|
30
|
+
all_annotations.each do |annotation|
|
31
|
+
if annotation.slice.overlaps?(target_slice)
|
32
|
+
puts target_slice.to_s + "\t" + annotation.to_s
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
#
|
2
|
+
# = test/unit/test_activerecord.rb - Unit test for Ucsc::Hg18
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008
|
5
|
+
# Jan Aerts <jan.aerts@gmail.com>
|
6
|
+
# License:: Ruby's
|
7
|
+
#
|
8
|
+
# $Id:
|
9
|
+
require 'pathname'
|
10
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'lib')).cleanpath.to_s
|
11
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
12
|
+
|
13
|
+
require 'test/unit'
|
14
|
+
|
15
|
+
require 'ucsc'
|
16
|
+
|
17
|
+
include Ucsc::Hg18
|
18
|
+
|
19
|
+
# Let's see if we can 'find' things
|
20
|
+
class SimpleRecordsTest < Test::Unit::TestCase
|
21
|
+
def test_iafrage
|
22
|
+
assert_equal('CTC-232B23', CnpIafrate.find_by_name('CTC-232B23').name)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_locke
|
26
|
+
assert_equal('RP11-430E19', CnpLocke.find_by_name('RP11-430E19').name)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_redon
|
30
|
+
assert_equal('cnp1', CnpRedon.find_by_name('cnp1').name)
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_sebat
|
34
|
+
assert_equal(1, CnpSebat.find_all_by_chrom_and_chromStart('chr1',12826893).length)
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_sharp
|
38
|
+
assert_equal('RP11-430E19', CnpSharp.find_by_name('RP11-430E19').name)
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_tuzun
|
42
|
+
assert_equal('chr1.1', CnpTuzun.find_by_name('chr1.1').name)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_dgv
|
46
|
+
assert_equal('31596', Dgv.find_by_name('31596').name)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_simple_repeats
|
50
|
+
assert_equal('TAACCC', SimpleRepeat.find_by_chrom_and_chromStart('chr1', 0).sequence)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_genomic_super_dup
|
54
|
+
assert_equal('chr2:114046768', GenomicSuperDup.find_by_chrom_and_chromStart('chr1',465).name)
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_exapted_repeat
|
58
|
+
assert_equal(3180908, ExaptedRepeat.find_by_name('exap1').chromStart)
|
59
|
+
end
|
60
|
+
|
61
|
+
# def test_repeatmasker
|
62
|
+
#
|
63
|
+
# end
|
64
|
+
|
65
|
+
def test_interrupted_repeat
|
66
|
+
assert_equal('L2', InterruptedRepeat.find_by_chrom_and_chromStart('chr1',13687).name)
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_microsatellite
|
70
|
+
assert_equal('16xGT', Microsatellite.find_by_chrom_and_chromStart('chr1', 40344).name)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class MixinsTest < Test::Unit::TestCase
|
75
|
+
def test_feature
|
76
|
+
assert_equal(true, CnpIafrate.include?(Feature))
|
77
|
+
assert_equal(true, CnpLocke.include?(Feature))
|
78
|
+
assert_equal(true, CnpRedon.include?(Feature))
|
79
|
+
assert_equal(true, CnpSebat.include?(Feature))
|
80
|
+
assert_equal(true, CnpSharp.include?(Feature))
|
81
|
+
assert_equal(true, CnpTuzun.include?(Feature))
|
82
|
+
assert_equal(true, Dgv.include?(Feature))
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_sliceable
|
86
|
+
assert_equal(true, CnpIafrate.include?(Sliceable))
|
87
|
+
assert_equal(true, CnpLocke.include?(Sliceable))
|
88
|
+
assert_equal(true, CnpRedon.include?(Sliceable))
|
89
|
+
assert_equal(true, CnpSebat.include?(Sliceable))
|
90
|
+
assert_equal(true, CnpSharp.include?(Sliceable))
|
91
|
+
assert_equal(true, CnpTuzun.include?(Sliceable))
|
92
|
+
assert_equal(true, Dgv.include?(Sliceable))
|
93
|
+
end
|
94
|
+
end
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jandot-ruby-ucsc-api
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.9"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jan Aerts
|
8
|
+
autorequire: ucsc
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-08-13 00:00:00 -07:00
|
13
|
+
default_executable: ucsc
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: bio
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: "1"
|
23
|
+
version:
|
24
|
+
- !ruby/object:Gem::Dependency
|
25
|
+
name: activerecord
|
26
|
+
version_requirement:
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: "0"
|
32
|
+
version:
|
33
|
+
description: ruby-ucsc-api provides a ruby API to the UCSC databases (http://genome.ucsc.edu)
|
34
|
+
email: jan.aerts@gmail.com
|
35
|
+
executables:
|
36
|
+
- ucsc
|
37
|
+
extensions: []
|
38
|
+
|
39
|
+
extra_rdoc_files: []
|
40
|
+
|
41
|
+
files:
|
42
|
+
- bin/ucsc
|
43
|
+
- lib/ucsc/db_connection.rb
|
44
|
+
- lib/ucsc/hg18/activerecord.rb
|
45
|
+
- lib/ucsc/hg18/slice.rb
|
46
|
+
- lib/ucsc.rb
|
47
|
+
- samples/ranges.txt
|
48
|
+
- samples/tryout.rb
|
49
|
+
- test/unit/test_activerecord.rb
|
50
|
+
has_rdoc: true
|
51
|
+
homepage: http://github.com/jandot/ruby-ucsc-api
|
52
|
+
post_install_message:
|
53
|
+
rdoc_options:
|
54
|
+
- --exclude .
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: "0"
|
62
|
+
version:
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: "0"
|
68
|
+
version:
|
69
|
+
requirements: []
|
70
|
+
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 1.2.0
|
73
|
+
signing_key:
|
74
|
+
specification_version: 2
|
75
|
+
summary: API to UCSC databases
|
76
|
+
test_files:
|
77
|
+
- test/unit/test_activerecord.rb
|