jandot-ruby-ucsc-api 0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/ucsc +2 -0
- data/lib/ucsc.rb +35 -0
- data/lib/ucsc/db_connection.rb +41 -0
- data/lib/ucsc/hg18/activerecord.rb +308 -0
- data/lib/ucsc/hg18/slice.rb +50 -0
- data/samples/ranges.txt +18 -0
- data/samples/tryout.rb +36 -0
- data/test/unit/test_activerecord.rb +94 -0
- metadata +77 -0
data/bin/ucsc
ADDED
data/lib/ucsc.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
begin
|
2
|
+
require 'bio'
|
3
|
+
rescue nil
|
4
|
+
end
|
5
|
+
|
6
|
+
class Range
|
7
|
+
def contained_by?(other_range)
|
8
|
+
if self.begin > other_range.begin and self.end < other_range.end
|
9
|
+
return true
|
10
|
+
else
|
11
|
+
return false
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def overlaps_with?(other_range)
|
16
|
+
if ((self.begin >= other_range.begin and self.begin <= other_range.end) or (other_range.begin >= self.begin and other_range.begin <= self.end))
|
17
|
+
return true
|
18
|
+
else
|
19
|
+
return false
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Database connection
|
25
|
+
require File.dirname(__FILE__) + '/ucsc/db_connection.rb'
|
26
|
+
include Ucsc::Hg18
|
27
|
+
Ucsc::Hg18::DBConnection.connect
|
28
|
+
|
29
|
+
# Core modules
|
30
|
+
require File.dirname(__FILE__) + '/ucsc/hg18/activerecord.rb'
|
31
|
+
require File.dirname(__FILE__) + '/ucsc/hg18/slice.rb'
|
32
|
+
|
33
|
+
ALL_CNPS = [Dgv, CnpIafrate, CnpLocke, CnpRedon, CnpSebat, CnpSharp, CnpTuzun]
|
34
|
+
SEGDUPS = [GenomicSuperDup]
|
35
|
+
ALL_REPEATS = [SimpleRepeat, ExaptedRepeat, InterruptedRepeat, Microsatellite]
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activerecord'
|
3
|
+
|
4
|
+
DB_ADAPTER = 'mysql'
|
5
|
+
DB_HOST = 'genome-mysql.cse.ucsc.edu'
|
6
|
+
DB_USERNAME = 'genome'
|
7
|
+
DB_PASSWORD = ''
|
8
|
+
|
9
|
+
module Ucsc
|
10
|
+
module Hg18
|
11
|
+
# = DESCRIPTION
|
12
|
+
# The Ucsc::Hg18::DBConnection is the actual connection established
|
13
|
+
# with the UCSC mysql server.
|
14
|
+
class DBConnection < ActiveRecord::Base
|
15
|
+
self.abstract_class = true
|
16
|
+
|
17
|
+
# = DESCRIPTION
|
18
|
+
# The Ucsc::Hg18::DBConnection#connect method makes the connection
|
19
|
+
# to the UCSC hg18 database.
|
20
|
+
#
|
21
|
+
# = USAGE
|
22
|
+
# # Connect to the hg18
|
23
|
+
# Ensembl::Core::DBConnection.connect
|
24
|
+
#
|
25
|
+
# ---
|
26
|
+
# *Arguments*: none
|
27
|
+
def self.connect
|
28
|
+
establish_connection(
|
29
|
+
:adapter => DB_ADAPTER,
|
30
|
+
:host => DB_HOST,
|
31
|
+
:database => 'hg18',
|
32
|
+
:username => DB_USERNAME,
|
33
|
+
:password => DB_PASSWORD
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,308 @@
|
|
1
|
+
#
|
2
|
+
# = ucsc/hg18/activerecord.rb - ActiveRecord mappings to UCSC hg18 database
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
|
8
|
+
# = DESCRIPTION
|
9
|
+
# == What is it?
|
10
|
+
# The UCSC module provides an API to the UCSC databases
|
11
|
+
# stored at genome-mysql.cse.ucsc.edu. This is the same information that is
|
12
|
+
# available from http://genome.ucsc.edu
|
13
|
+
#
|
14
|
+
# The Ucsc::Hg18 module covers the hg18 (= NCBI build 36) assembly.
|
15
|
+
#
|
16
|
+
# == ActiveRecord
|
17
|
+
# The UCSC API provides a ruby interface to the UCSC mysql databases
|
18
|
+
# at genome-mysql.cse.ucsc.edu. Most of the API is based on ActiveRecord to
|
19
|
+
# get data from that database. In general, each table is described by a
|
20
|
+
# class with the same name: the cnpRedon table is covered by the
|
21
|
+
# CnpRedon class, the dgv table is covered by the Dgv class,
|
22
|
+
# etc. As a result, accessors are available for all columns in each table.
|
23
|
+
# For example, the cnpRedon table has the following columns: chrom, chromStart,
|
24
|
+
# chromEnd and name. Through ActiveRecord, these column names become available
|
25
|
+
# as attributes of CnpRedon objects:
|
26
|
+
# puts my_cnp_redon.name
|
27
|
+
# puts my_cnp_redon.chrom
|
28
|
+
# puts my_cnp_redon.chromStart
|
29
|
+
# puts my_cnp_redon.chromEnd
|
30
|
+
#
|
31
|
+
# ActiveRecord makes it easy to extract data from those tables using the
|
32
|
+
# collection of #find methods. There are three types of #find methods (e.g.
|
33
|
+
# for the CnpRedon class):
|
34
|
+
# a. find based on primary key in table:
|
35
|
+
# # not possible with the UCSC database
|
36
|
+
# b. find_by_sql:
|
37
|
+
# my_cnp = CnpRedon.find_by_sql('SELECT * FROM cnpRedon WHERE name = 'cnp1'")
|
38
|
+
# c. find_by_<insert_your_column_name_here>
|
39
|
+
# my_cnp = CnpRedon.find_by_name('cnp1')
|
40
|
+
# my_cnp2 = CnpRedon.find_by_chrom_and_chromStart('chr1',377)
|
41
|
+
# To find out which find_by_<column> methods are available, you can list the
|
42
|
+
# column names using the column_names class methods:
|
43
|
+
#
|
44
|
+
# puts Ucsc::Hg18::CnpRedon.column_names.join("\t")
|
45
|
+
#
|
46
|
+
# For more information on the find methods, see
|
47
|
+
# http://ar.rubyonrails.org/classes/ActiveRecord/Base.html#M000344
|
48
|
+
#
|
49
|
+
module Ucsc
|
50
|
+
# = DESCRIPTION
|
51
|
+
# The Ucsc::Hg18 module covers the hg18 database from
|
52
|
+
# genome-mysql.cse.ucsc.edu and covers mainly sequences and their annotations.
|
53
|
+
# For a more information about the database tables, click on the "Describe
|
54
|
+
# table schema" in the Table Browser.
|
55
|
+
module Hg18
|
56
|
+
# = DESCRIPTION
|
57
|
+
# The Sliceable mixin holds the get_slice method and can be included
|
58
|
+
# in any class that lends itself to having a position on a chromosome.
|
59
|
+
module Sliceable
|
60
|
+
def slice
|
61
|
+
start, stop, strand = nil, nil, nil
|
62
|
+
if self.class.column_names.include?('chromStart')
|
63
|
+
start = self.chromStart
|
64
|
+
end
|
65
|
+
if self.class.column_names.include?('chromEnd')
|
66
|
+
stop = self.chromEnd
|
67
|
+
end
|
68
|
+
if self.class.column_names.include?('strand')
|
69
|
+
strand = self.strand
|
70
|
+
end
|
71
|
+
|
72
|
+
return Ucsc::Hg18::Slice.new(self.chrom, Range.new(start.to_i, stop.to_i), strand)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# = DESCRIPTION
|
77
|
+
# The Feature mixin holds common methods for all feature-like classes, such
|
78
|
+
# as how to print itself to the screen.
|
79
|
+
module Feature
|
80
|
+
include Sliceable
|
81
|
+
|
82
|
+
def to_s
|
83
|
+
return self.class.to_s + "\t" + self.slice.to_s + "\t" + self.name
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# = DESCRIPTION
|
88
|
+
# From Structural Variants description page when clicking the "Describe
|
89
|
+
# table schema" in the table browser:
|
90
|
+
# "All hybridizations were performed in duplicate incorporating a
|
91
|
+
# dye-reversal using proprietary 1 Mb GenomeChip V1.2 Human BAC Arrays
|
92
|
+
# consisting of 2,632 BAC clones (Spectral Genomics, Houston, TX). The
|
93
|
+
# false positive rate was estimated at ~1 clone per 5,264 tested."
|
94
|
+
class CnpIafrate < DBConnection
|
95
|
+
include Ucsc::Hg18::Feature
|
96
|
+
|
97
|
+
set_table_name 'cnpIafrate2'
|
98
|
+
set_primary_key nil
|
99
|
+
end
|
100
|
+
|
101
|
+
# = DESCRIPTION
|
102
|
+
# From Structural Variants description page when clicking the "Describe
|
103
|
+
# table schema" in the table browser:
|
104
|
+
# "DNA samples were obtained from Coriell Cell Repositories. The reference
|
105
|
+
# DNA used for all hybridizations was from a single male of Czechoslovakian
|
106
|
+
# descent, Coriell ID GM15724 (also used in the Sharp study).
|
107
|
+
#
|
108
|
+
# A locus was considered a CNV (copy number variation) if the log ratio of
|
109
|
+
# fluroescence measurements for the individuals assayed exceeded twice the
|
110
|
+
# standard deviation of the autosomal clones in replicate dye-swapped
|
111
|
+
# experiments. A CNV was classified as a CNP if altered copy number was
|
112
|
+
# observed in more than 1% of the 269 individuals."
|
113
|
+
class CnpLocke < DBConnection
|
114
|
+
include Ucsc::Hg18::Feature
|
115
|
+
|
116
|
+
set_table_name 'cnpLocke'
|
117
|
+
set_primary_key nil
|
118
|
+
end
|
119
|
+
|
120
|
+
# = DESCRIPTION
|
121
|
+
# From Structural Variants description page when clicking the "Describe
|
122
|
+
# table schema" in the table browser:
|
123
|
+
# "Experiments were performed with the International HapMap DNA and
|
124
|
+
# cell-line collection using two technologies: comparative analysis of
|
125
|
+
# hybridization intensities on Affymetric GeneChip Human Mapping 500K early
|
126
|
+
# access arrays (500K EA) and comparative genomic hybridization with a
|
127
|
+
# Whole Genome TilePath (WGTP) array."
|
128
|
+
class CnpRedon < DBConnection
|
129
|
+
include Ucsc::Hg18::Feature
|
130
|
+
|
131
|
+
set_table_name 'cnpRedon'
|
132
|
+
set_primary_key nil
|
133
|
+
end
|
134
|
+
|
135
|
+
# = DESCRIPTION
|
136
|
+
# From Structural Variants description page when clicking the "Describe
|
137
|
+
# table schema" in the table browser:
|
138
|
+
# "Following digestion with BglII or HindIII, genomic DNA was hybridized to
|
139
|
+
# a custom array consisting of 85,000 oligonucleotide probes. The probes
|
140
|
+
# were selected to be free of common repeats and have unique homology
|
141
|
+
# within the human genome. The average resolution of the array was ~35kb;
|
142
|
+
# however, only intervals in which three consecutive probes showed
|
143
|
+
# concordant signals were scored as CNPs. All hybridizations were performed
|
144
|
+
# in duplicate incorporating a dye-reversal, with the false positive rate
|
145
|
+
# estimated to be ~6%."
|
146
|
+
class CnpSebat < DBConnection
|
147
|
+
include Ucsc::Hg18::Feature
|
148
|
+
|
149
|
+
set_table_name 'cnpSebat2'
|
150
|
+
set_primary_key nil
|
151
|
+
end
|
152
|
+
|
153
|
+
# = DESCRIPTION
|
154
|
+
# From Structural Variants description page when clicking the "Describe
|
155
|
+
# table schema" in the table browser:
|
156
|
+
# "All hybridizations were performed in duplicate incorporating a
|
157
|
+
# dye-reversal using a custom array consisting of 2,194 end-sequence or
|
158
|
+
# FISH-confirmed BACs, targeted to regions of the genome flanked by
|
159
|
+
# segmental duplications. The false positive rate was estimated at ~3
|
160
|
+
# clones per 4,000 tested."
|
161
|
+
class CnpSharp < DBConnection
|
162
|
+
include Ucsc::Hg18::Feature
|
163
|
+
|
164
|
+
set_table_name 'cnpSharp2'
|
165
|
+
set_primary_key nil
|
166
|
+
end
|
167
|
+
|
168
|
+
# = DESCRIPTION
|
169
|
+
# From Structural Variants description page when clicking the "Describe
|
170
|
+
# table schema" in the table browser:
|
171
|
+
# "Paired-end sequences from a human fosmid DNA library were mapped to the
|
172
|
+
# assembly. The average resolution of this technique was ~8kb, and included
|
173
|
+
# 56 sites of inversion not detectable by the array-based approaches.
|
174
|
+
# However, because of the physical constraints of fosmid insert size, this
|
175
|
+
# technique was unable to detect insertions greater than 40 kb in size."
|
176
|
+
class CnpTuzun < DBConnection
|
177
|
+
include Ucsc::Hg18::Feature
|
178
|
+
|
179
|
+
set_table_name 'cnpTuzun'
|
180
|
+
set_primary_key nil
|
181
|
+
end
|
182
|
+
|
183
|
+
# = DESCRIPTION
|
184
|
+
# From Structural Variants description page when clicking the "Describe
|
185
|
+
# table schema" in the table browser:
|
186
|
+
# ""
|
187
|
+
class Dgv < DBConnection
|
188
|
+
include Ucsc::Hg18::Feature
|
189
|
+
|
190
|
+
set_table_name 'dgv'
|
191
|
+
set_primary_key nil
|
192
|
+
|
193
|
+
def to_s
|
194
|
+
return self.class.to_s + "\t" + self.slice.to_s + "\t" + self.reference + "\t" + self.method
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
|
199
|
+
# = DESCRIPTION
|
200
|
+
# From Simple Repeats description page when clicking the "Describe
|
201
|
+
# table schema" in the table browser:
|
202
|
+
# "This track displays simple tandem repeats (possibly imperfect) located
|
203
|
+
# by Tandem Repeats Finder (TRF), which is specialized for this purpose.
|
204
|
+
# These repeats can occur within coding regions of genes and may be quite
|
205
|
+
# polymorphic. Repeat expansions are sometimes associated with specific
|
206
|
+
# diseases."
|
207
|
+
class SimpleRepeat < DBConnection
|
208
|
+
include Ucsc::Hg18::Feature
|
209
|
+
|
210
|
+
set_table_name 'simpleRepeat'
|
211
|
+
set_primary_key nil
|
212
|
+
end
|
213
|
+
|
214
|
+
# = DESCRIPTION
|
215
|
+
# From Structural Variants description page when clicking the "Describe
|
216
|
+
# table schema" in the table browser:
|
217
|
+
# "This track shows regions detected as putative genomic duplications
|
218
|
+
# within the golden path. The following display conventions are used to
|
219
|
+
# distinguish levels of similarity:
|
220
|
+
# * Light to dark gray: 90 - 98% similarity
|
221
|
+
# * Light to dark yellow: 98 - 99% similarity
|
222
|
+
# * Light to dark orange: greater than 99% similarity
|
223
|
+
# * Red: duplications of greater than 98% similarity that lack sufficient
|
224
|
+
# Segmental Duplication Database evidence (most likely missed overlaps)
|
225
|
+
# For a region to be included in the track, at least 1 Kb of the total
|
226
|
+
# sequence (containing at least 500 bp of non-RepeatMasked sequence) had
|
227
|
+
# to align and a sequence identity of at least 90% was required."
|
228
|
+
class GenomicSuperDup < DBConnection
|
229
|
+
include Ucsc::Hg18::Feature
|
230
|
+
|
231
|
+
set_table_name 'genomicSuperDups'
|
232
|
+
set_primary_key nil
|
233
|
+
end
|
234
|
+
|
235
|
+
# = DESCRIPTION
|
236
|
+
# From Exapted Repeat description page when clicking the "Describe
|
237
|
+
# table schema" in the table browser:
|
238
|
+
# "This track displays conserved non-exonic elements that have been
|
239
|
+
# deposited by mobile elements (repeats), a process termed "exaptation"
|
240
|
+
# (Gould et al., 1982). These regions were identified during a genome-wide
|
241
|
+
# survey (Lowe et al., 2007) with the expectation that regions of this type
|
242
|
+
# may act as distal transcriptional regulators for nearby genes. A previous
|
243
|
+
# case study experimentally verified an exapted mobile element acting as a
|
244
|
+
# distal enhancer (Bejerano et al. , 2006)."
|
245
|
+
class ExaptedRepeat < DBConnection
|
246
|
+
include Ucsc::Hg18::Feature
|
247
|
+
|
248
|
+
set_table_name 'exaptedRepeats'
|
249
|
+
set_primary_key nil
|
250
|
+
end
|
251
|
+
|
252
|
+
#TODO: The repeatmasker features are distributed over different tables; one for
|
253
|
+
# each chromosome.
|
254
|
+
# # = DESCRIPTION
|
255
|
+
# # From RepeatMasker description page when clicking the "Describe
|
256
|
+
# # table schema" in the table browser:
|
257
|
+
# # "This track was created by using Arian Smit's RepeatMasker program, which
|
258
|
+
# # screens DNA sequences for interspersed repeats and low complexity DNA
|
259
|
+
# # sequences. The program outputs a detailed annotation of the repeats that
|
260
|
+
# # are present in the query sequence, as well as a modified version of the
|
261
|
+
# # query sequence in which all the annotated repeats have been masked.
|
262
|
+
# # RepeatMasker uses the RepBase library of repeats from the Genetic
|
263
|
+
# # Information Research Institute (GIRI). RepBase is described in Jurka, J.
|
264
|
+
# # (2000) in the References section below."
|
265
|
+
# class RepeatMasker < DBConnection
|
266
|
+
# include Ucsc::Hg18::Feature
|
267
|
+
#
|
268
|
+
# set_table_name 'rmsk'
|
269
|
+
# set_primary_key nil
|
270
|
+
# end
|
271
|
+
|
272
|
+
# = DESCRIPTION
|
273
|
+
# From Interrupted Repeat description page when clicking the "Describe
|
274
|
+
# table schema" in the table browser:
|
275
|
+
# "This track shows joined fragments of interrupted repeats extracted from
|
276
|
+
# the output of the RepeatMasker program, which screens DNA sequences for
|
277
|
+
# interspersed repeats and low complexity DNA sequences using the RepBase
|
278
|
+
# library of repeats from the Genetic Information Research Institute (GIRI).
|
279
|
+
# RepBase is described in Jurka, J. (2000) in the References section below.
|
280
|
+
#
|
281
|
+
# The detailed annotations from RepeatMasker are in the RepeatMasker track.
|
282
|
+
# This track shows fragments of original repeat insertions which have been
|
283
|
+
# interrupted by insertions of younger repeats or through local
|
284
|
+
# rearrangements. The fragments are joined using the ID column of
|
285
|
+
# RepeatMasker output."
|
286
|
+
class InterruptedRepeat < DBConnection
|
287
|
+
include Ucsc::Hg18::Feature
|
288
|
+
|
289
|
+
set_table_name 'nestedRepeats'
|
290
|
+
set_primary_key nil
|
291
|
+
end
|
292
|
+
|
293
|
+
# = DESCRIPTION
|
294
|
+
# From Microsatellite description page when clicking the "Describe
|
295
|
+
# table schema" in the table browser:
|
296
|
+
# "This track displays regions that are likely to be useful as
|
297
|
+
# microsatellite markers. These are sequences of at least 15 perfect
|
298
|
+
# di-nucleotide and tri-nucleotide repeats, and tend to be highly
|
299
|
+
# polymorphic in the population."
|
300
|
+
class Microsatellite < DBConnection
|
301
|
+
include Ucsc::Hg18::Feature
|
302
|
+
|
303
|
+
set_table_name 'microsat'
|
304
|
+
set_primary_key nil
|
305
|
+
end
|
306
|
+
|
307
|
+
end
|
308
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Ucsc
|
2
|
+
module Hg18
|
3
|
+
class Slice
|
4
|
+
def initialize(chromosome, range, strand = nil)
|
5
|
+
@chromosome, @range = chromosome, range, strand
|
6
|
+
end
|
7
|
+
attr_accessor :chromosome, :range, :strand
|
8
|
+
|
9
|
+
def to_s
|
10
|
+
return @chromosome + ':' + @range.to_s
|
11
|
+
end
|
12
|
+
|
13
|
+
def overlaps?(other_slice)
|
14
|
+
if self.chromosome != other_slice.chromosome
|
15
|
+
return false
|
16
|
+
end
|
17
|
+
|
18
|
+
if self.range.overlaps?(other_slice.range)
|
19
|
+
return true
|
20
|
+
else
|
21
|
+
return false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def contained_by?(other_slice)
|
26
|
+
if self.chromosome != other_slice.chromosome
|
27
|
+
return false
|
28
|
+
end
|
29
|
+
|
30
|
+
if self.range.contained_by?(other_slice.range)
|
31
|
+
return true
|
32
|
+
else
|
33
|
+
return false
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def contains?(other_slice)
|
38
|
+
if self.chromosome != other_slice.chromosome
|
39
|
+
return false
|
40
|
+
end
|
41
|
+
|
42
|
+
if self.range.contains?(other_slice.range)
|
43
|
+
return true
|
44
|
+
else
|
45
|
+
return false
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/samples/ranges.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
chrX 365739 366104
|
2
|
+
chrX 435678 436376
|
3
|
+
chrX 823067 823982
|
4
|
+
chrX 827850 828111
|
5
|
+
chrX 830087 830927
|
6
|
+
chrX 839913 840259
|
7
|
+
chrX 1386851 1388015
|
8
|
+
chrX 1574525 1574825
|
9
|
+
chrX 1852006 1852321
|
10
|
+
chrX 1871048 1871715
|
11
|
+
chr5 1881979 1882347
|
12
|
+
chr5 1997045 1997838
|
13
|
+
chr5 2204818 2205098
|
14
|
+
chr5 3044350 3044625
|
15
|
+
chr5 3473977 3475116
|
16
|
+
chr3 4100974 4103932
|
17
|
+
chr3 4536840 4537115
|
18
|
+
chr3 4914689 4915030
|
data/samples/tryout.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
require 'yaml'
|
3
|
+
require '../lib/ucsc.rb'
|
4
|
+
|
5
|
+
ranges = Hash.new
|
6
|
+
File.open('ranges.txt').each do |line|
|
7
|
+
line.chomp!
|
8
|
+
chromosome, start, stop = line.split(/\t/)
|
9
|
+
target_slice = Slice.new(chromosome, Range.new(start.to_i, stop.to_i))
|
10
|
+
if ! ranges.keys.include?(chromosome)
|
11
|
+
ranges[chromosome] = Array.new
|
12
|
+
end
|
13
|
+
ranges[chromosome].push(target_slice)
|
14
|
+
end
|
15
|
+
|
16
|
+
ranges.keys.each do |chromosome|
|
17
|
+
all_annotations = Array.new
|
18
|
+
|
19
|
+
ALL_CNPS.each do |klass|
|
20
|
+
all_annotations.push(klass.find_all_by_chrom(chromosome))
|
21
|
+
end
|
22
|
+
|
23
|
+
ALL_REPEATS.each do |klass|
|
24
|
+
all_annotations.push(klass.find_all_by_chrom(chromosome))
|
25
|
+
end
|
26
|
+
|
27
|
+
all_annotations.flatten!
|
28
|
+
|
29
|
+
ranges[chromosome].each do |target_slice|
|
30
|
+
all_annotations.each do |annotation|
|
31
|
+
if annotation.slice.overlaps?(target_slice)
|
32
|
+
puts target_slice.to_s + "\t" + annotation.to_s
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
#
|
2
|
+
# = test/unit/test_activerecord.rb - Unit test for Ucsc::Hg18
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008
|
5
|
+
# Jan Aerts <jan.aerts@gmail.com>
|
6
|
+
# License:: Ruby's
|
7
|
+
#
|
8
|
+
# $Id:
|
9
|
+
require 'pathname'
|
10
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'lib')).cleanpath.to_s
|
11
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
12
|
+
|
13
|
+
require 'test/unit'
|
14
|
+
|
15
|
+
require 'ucsc'
|
16
|
+
|
17
|
+
include Ucsc::Hg18
|
18
|
+
|
19
|
+
# Let's see if we can 'find' things
|
20
|
+
class SimpleRecordsTest < Test::Unit::TestCase
|
21
|
+
def test_iafrage
|
22
|
+
assert_equal('CTC-232B23', CnpIafrate.find_by_name('CTC-232B23').name)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_locke
|
26
|
+
assert_equal('RP11-430E19', CnpLocke.find_by_name('RP11-430E19').name)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_redon
|
30
|
+
assert_equal('cnp1', CnpRedon.find_by_name('cnp1').name)
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_sebat
|
34
|
+
assert_equal(1, CnpSebat.find_all_by_chrom_and_chromStart('chr1',12826893).length)
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_sharp
|
38
|
+
assert_equal('RP11-430E19', CnpSharp.find_by_name('RP11-430E19').name)
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_tuzun
|
42
|
+
assert_equal('chr1.1', CnpTuzun.find_by_name('chr1.1').name)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_dgv
|
46
|
+
assert_equal('31596', Dgv.find_by_name('31596').name)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_simple_repeats
|
50
|
+
assert_equal('TAACCC', SimpleRepeat.find_by_chrom_and_chromStart('chr1', 0).sequence)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_genomic_super_dup
|
54
|
+
assert_equal('chr2:114046768', GenomicSuperDup.find_by_chrom_and_chromStart('chr1',465).name)
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_exapted_repeat
|
58
|
+
assert_equal(3180908, ExaptedRepeat.find_by_name('exap1').chromStart)
|
59
|
+
end
|
60
|
+
|
61
|
+
# def test_repeatmasker
|
62
|
+
#
|
63
|
+
# end
|
64
|
+
|
65
|
+
def test_interrupted_repeat
|
66
|
+
assert_equal('L2', InterruptedRepeat.find_by_chrom_and_chromStart('chr1',13687).name)
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_microsatellite
|
70
|
+
assert_equal('16xGT', Microsatellite.find_by_chrom_and_chromStart('chr1', 40344).name)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class MixinsTest < Test::Unit::TestCase
|
75
|
+
def test_feature
|
76
|
+
assert_equal(true, CnpIafrate.include?(Feature))
|
77
|
+
assert_equal(true, CnpLocke.include?(Feature))
|
78
|
+
assert_equal(true, CnpRedon.include?(Feature))
|
79
|
+
assert_equal(true, CnpSebat.include?(Feature))
|
80
|
+
assert_equal(true, CnpSharp.include?(Feature))
|
81
|
+
assert_equal(true, CnpTuzun.include?(Feature))
|
82
|
+
assert_equal(true, Dgv.include?(Feature))
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_sliceable
|
86
|
+
assert_equal(true, CnpIafrate.include?(Sliceable))
|
87
|
+
assert_equal(true, CnpLocke.include?(Sliceable))
|
88
|
+
assert_equal(true, CnpRedon.include?(Sliceable))
|
89
|
+
assert_equal(true, CnpSebat.include?(Sliceable))
|
90
|
+
assert_equal(true, CnpSharp.include?(Sliceable))
|
91
|
+
assert_equal(true, CnpTuzun.include?(Sliceable))
|
92
|
+
assert_equal(true, Dgv.include?(Sliceable))
|
93
|
+
end
|
94
|
+
end
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jandot-ruby-ucsc-api
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.9"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jan Aerts
|
8
|
+
autorequire: ucsc
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-08-13 00:00:00 -07:00
|
13
|
+
default_executable: ucsc
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: bio
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: "1"
|
23
|
+
version:
|
24
|
+
- !ruby/object:Gem::Dependency
|
25
|
+
name: activerecord
|
26
|
+
version_requirement:
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: "0"
|
32
|
+
version:
|
33
|
+
description: ruby-ucsc-api provides a ruby API to the UCSC databases (http://genome.ucsc.edu)
|
34
|
+
email: jan.aerts@gmail.com
|
35
|
+
executables:
|
36
|
+
- ucsc
|
37
|
+
extensions: []
|
38
|
+
|
39
|
+
extra_rdoc_files: []
|
40
|
+
|
41
|
+
files:
|
42
|
+
- bin/ucsc
|
43
|
+
- lib/ucsc/db_connection.rb
|
44
|
+
- lib/ucsc/hg18/activerecord.rb
|
45
|
+
- lib/ucsc/hg18/slice.rb
|
46
|
+
- lib/ucsc.rb
|
47
|
+
- samples/ranges.txt
|
48
|
+
- samples/tryout.rb
|
49
|
+
- test/unit/test_activerecord.rb
|
50
|
+
has_rdoc: true
|
51
|
+
homepage: http://github.com/jandot/ruby-ucsc-api
|
52
|
+
post_install_message:
|
53
|
+
rdoc_options:
|
54
|
+
- --exclude .
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: "0"
|
62
|
+
version:
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: "0"
|
68
|
+
version:
|
69
|
+
requirements: []
|
70
|
+
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 1.2.0
|
73
|
+
signing_key:
|
74
|
+
specification_version: 2
|
75
|
+
summary: API to UCSC databases
|
76
|
+
test_files:
|
77
|
+
- test/unit/test_activerecord.rb
|