bio-ucsc-api 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +36 -16
- data/Rakefile +3 -3
- data/VERSION +1 -1
- data/bio-ucsc-api.gemspec +9 -7
- data/lib/bio-ucsc/hg18.rb +2 -4
- data/lib/bio-ucsc/hg18/activerecord.rb +1 -1
- data/lib/bio-ucsc/hg18/all_bacends.rb +0 -1
- data/lib/bio-ucsc/hg18/cnpiafrate2.rb +0 -1
- data/lib/bio-ucsc/hg18/cnplocke.rb +0 -1
- data/lib/bio-ucsc/hg18/cnpredon.rb +0 -1
- data/lib/bio-ucsc/hg18/cnpsebat2.rb +0 -1
- data/lib/bio-ucsc/hg18/cnpsharp2.rb +0 -1
- data/lib/bio-ucsc/hg18/cnptuzun.rb +0 -1
- data/lib/bio-ucsc/hg18/cytoband.rb +0 -1
- data/lib/bio-ucsc/hg18/db_connection.rb +1 -1
- data/lib/bio-ucsc/hg18/delconrad2.rb +0 -1
- data/lib/bio-ucsc/hg18/delhinds2.rb +0 -1
- data/lib/bio-ucsc/hg18/delmccarroll.rb +0 -1
- data/lib/bio-ucsc/hg18/dgv.rb +0 -1
- data/lib/bio-ucsc/hg18/ensgene.rb +0 -1
- data/lib/bio-ucsc/hg18/exaptedrepeats.rb +0 -1
- data/lib/bio-ucsc/hg18/hgcentral_wikitrack.rb +0 -1
- data/lib/bio-ucsc/hg18/kgprotmap2.rb +0 -1
- data/lib/bio-ucsc/hg18/kgtargetali.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc10.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc11.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc12.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc13.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc14.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc8.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc9.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscg248.rb +0 -1
- data/lib/bio-ucsc/hg18/reference.rb +4 -163
- data/lib/bio-ucsc/hg18/reference_sequence.rb +171 -0
- data/lib/bio-ucsc/hg18/refgene.rb +0 -1
- data/lib/bio-ucsc/hg18/snp130.rb +0 -1
- data/lib/bio-ucsc/hg19.rb +2 -1
- data/lib/bio-ucsc/hg19/activerecord.rb +1 -1
- data/lib/bio-ucsc/hg19/cytoband.rb +0 -1
- data/lib/bio-ucsc/hg19/cytobandideo.rb +0 -1
- data/lib/bio-ucsc/hg19/db_connection.rb +1 -1
- data/lib/bio-ucsc/hg19/dgv.rb +0 -1
- data/lib/bio-ucsc/hg19/ensgene.rb +0 -1
- data/lib/bio-ucsc/hg19/reference.rb +4 -163
- data/lib/bio-ucsc/hg19/reference_sequence.rb +171 -0
- data/lib/bio-ucsc/hg19/refgene.rb +0 -1
- data/lib/bio-ucsc/hg19/snp131.rb +0 -1
- data/lib/bio-ucsc/hg19/wgrna.rb +0 -1
- data/samples/hg19-2bit-retrieve.rb +2 -2
- data/spec/hg18/{reference_spec.rb → reference_sequence_spec.rb} +26 -26
- data/spec/hg19/{reference_spec.rb → reference_sequence_spec.rb} +26 -26
- metadata +11 -10
data/README.rdoc
CHANGED
@@ -1,37 +1,40 @@
|
|
1
1
|
= bio-ucsc-api
|
2
2
|
|
3
|
-
|
3
|
+
The Ruby UCSC API: accessing the UCSC Genome Database using Ruby.
|
4
4
|
|
5
5
|
This package is based on the followings:
|
6
|
-
* ruby-ucsc-api: https://github.com/jandot/ruby-ucsc-api
|
6
|
+
* original ruby-ucsc-api: https://github.com/jandot/ruby-ucsc-api
|
7
7
|
* ruby-ensembl-api: https://github.com/jandot/ruby-ensembl-api
|
8
8
|
|
9
9
|
Major dependent gems:
|
10
10
|
* active_record http://api.rubyonrails.org/classes/ActiveRecord/Base.html
|
11
11
|
* bioruby-genomic-interval https://github.com/misshie/bioruby-genomic-interval
|
12
12
|
|
13
|
+
Requirement:
|
14
|
+
* Ruby version 1.9.2 or later
|
15
|
+
|
13
16
|
See also a paper:
|
14
17
|
|
15
18
|
Strozzi F, Aerts J: A Ruby API to query the Ensembl database for genomic features.
|
16
19
|
Bioinformatics 2011, 27:1013-1014.
|
17
20
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
http://github.com/misshie/bioruby-ucsc-api/issues
|
21
|
+
Your comments, suggestions and requests are welcome. Documentation and
|
22
|
+
feedback are available at the UserEcho site at
|
23
|
+
http://rubyucscapi.userecho.com/.
|
22
24
|
|
23
25
|
== Features
|
24
26
|
|
25
27
|
* Designed as a BioRuby plugin
|
26
28
|
* Using ActiveRecord as an O/R mapping framework
|
27
29
|
* Using the Bin index system to improve query performance (see https://github.com/misshie/UCSCBin )
|
28
|
-
* Automatic conversion of "1-based full-closed intervals" to internal "0-based
|
30
|
+
* Automatic conversion of "1-based full-closed intervals" to internal "0-based left-closed right-open intervals" (see also bioruby-genomic-interval)
|
29
31
|
* Supporting non-official MySql hosts (e.g. local servers)
|
30
32
|
* Using Rspec for the testing framework
|
31
|
-
* Support hg19 and hg18 databases.
|
33
|
+
* Support hg19 and hg18 databases. Current version does not support tables containing only single records of the fileName columns linking to bigWIG or bigBED files.
|
32
34
|
|
33
35
|
== Change Log
|
34
|
-
|
36
|
+
* *MODIFIED* (v0.1.0): The name of this library is now "Ruby UCSC API". The RubyGem name and the GitHub account and the library name are not changed.
|
37
|
+
* *MODIFIED* (v0.1.0): Bio::Ucsc::[Hg18|Hg19]::Reference is replaced by Bio::Ucsc::[Hg18|Hg19]::ReferenceSequence.
|
35
38
|
* *UPDATE* (v0.0.5): Almost all hg18 tables are supported.
|
36
39
|
* *UPDATE* (v0.0.5): find_by_interval and find_all_by_interval class methods accept the "partial" option. Default is true. When "partial: false" is opted, return value will be only fully-included (non-partially-included) records.
|
37
40
|
* *UPDATE* (v0.0.4): Almost all hg19 tables are supported. "filename" tables in ENCODE dataset are omitted. Each of them contains only single record of a path to the raw data file. Definitions of table relations are incomplete.
|
@@ -49,13 +52,13 @@ You may need to be root or use "sudo". "--no-ri" and "--no-rdoc" options are rec
|
|
49
52
|
|
50
53
|
require 'bio-ucsc'
|
51
54
|
|
52
|
-
include Bio::Ucsc::Hg19
|
55
|
+
include Bio::Ucsc::Hg19
|
53
56
|
DBConnection.connect
|
54
57
|
|
55
58
|
itv = Bio::GenomicInterval.parse("chr1:1-11,000")
|
56
59
|
Snp131.find_all_by_interval(itv).each do |e|
|
57
60
|
i = Bio::GenomicInterval.zero_based(e.chrom, e.chromStart, e.chromEnd)
|
58
|
-
puts "#{i.chrom}\t#{i.
|
61
|
+
puts "#{i.chrom}\t#{i.chr_start}\t#{e.name}\t#{e[:class]}"
|
59
62
|
end
|
60
63
|
|
61
64
|
itv = Bio::GenomicInterval.parse("chr17:7,579,614-7,579,700")
|
@@ -64,15 +67,32 @@ You may need to be root or use "sudo". "--no-ri" and "--no-rdoc" options are rec
|
|
64
67
|
|
65
68
|
p Snp131.find_by_name("rs56289060")
|
66
69
|
|
70
|
+
# Sometimes, queries using raw SQL provide elegant solutions.
|
71
|
+
#
|
72
|
+
sql << 'SQL'
|
73
|
+
SELECT name,chrom,chromStart,chromEnd,observed
|
74
|
+
FROM snp131
|
75
|
+
WHERE name="rs56289060"
|
76
|
+
SQL
|
77
|
+
p Snp131.find_by_sql(sql)
|
78
|
+
|
67
79
|
# retrieve reference sequence from a locally-stored 2bit file
|
68
|
-
|
80
|
+
ReferenceSequence.load("hg19.2bit")
|
69
81
|
itv = Bio::GenomicInterval.parse("chr1:9,500-10,999")
|
70
|
-
p
|
82
|
+
p ReferenceSequence.find_by_interval(itv)
|
71
83
|
|
72
|
-
|
73
|
-
|
84
|
+
# Connetcting to non-official or local mirror MySQL servers
|
85
|
+
DBConnection.db_host = 'foo.example.edu'
|
86
|
+
DBConnection.db_username = 'genome'
|
87
|
+
DBConnection.db_password = ''
|
88
|
+
DBConnection.connect
|
89
|
+
|
90
|
+
DBConnection.default # reset to connect UCSC's public MySQL sever
|
91
|
+
DBConnection.connect
|
74
92
|
|
75
|
-
Copyright
|
93
|
+
== Copyright
|
94
|
+
Copyright:: (c) 2011 MISHIMA, Hiroyuki (missy at be.to / hmishima at nagasaki-u.ac.jp / @mishimahryk at Twitter)
|
95
|
+
Copyright:: (c) 2010 Jan Aerts
|
76
96
|
|
77
97
|
License:: Ruby license (Ruby's / GPLv2 dual). See COPYING and COPYING.ja for further details.
|
78
98
|
|
data/Rakefile
CHANGED
@@ -16,10 +16,10 @@ Jeweler::Tasks.new do |gem|
|
|
16
16
|
gem.name = "bio-ucsc-api"
|
17
17
|
gem.homepage = "http://github.com/misshie/bioruby-ucsc-api"
|
18
18
|
gem.license = "Ruby (Ruby's/GPLv2 dual)"
|
19
|
-
gem.summary = %Q{
|
20
|
-
gem.description = %Q{
|
19
|
+
gem.summary = %Q{The Ruby UCSC API: accessing the UCSC Genome Database using Ruby}
|
20
|
+
gem.description = %Q{Ruby UCSC API: accessing the UCSC Genome Database using Ruby}
|
21
21
|
gem.email = "missy@be.to"
|
22
|
-
gem.authors = ["Hiroyuki Mishima", "
|
22
|
+
gem.authors = ["Hiroyuki Mishima", "Jan Aerts"]
|
23
23
|
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
24
24
|
# and development dependencies are only needed for development
|
25
25
|
# (ie running rake tasks, tests, etc)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/bio-ucsc-api.gemspec
CHANGED
@@ -5,12 +5,12 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{bio-ucsc-api}
|
8
|
-
s.version = "0.0
|
8
|
+
s.version = "0.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = ["Hiroyuki Mishima", "
|
12
|
-
s.date = %q{2011-
|
13
|
-
s.description = %q{
|
11
|
+
s.authors = ["Hiroyuki Mishima", "Jan Aerts"]
|
12
|
+
s.date = %q{2011-07-07}
|
13
|
+
s.description = %q{Ruby UCSC API: accessing the UCSC Genome Database using Ruby}
|
14
14
|
s.email = %q{missy@be.to}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"README.rdoc"
|
@@ -1009,6 +1009,7 @@ Gem::Specification.new do |s|
|
|
1009
1009
|
"lib/bio-ucsc/hg18/rdmrraw.rb",
|
1010
1010
|
"lib/bio-ucsc/hg18/recombrate.rb",
|
1011
1011
|
"lib/bio-ucsc/hg18/reference.rb",
|
1012
|
+
"lib/bio-ucsc/hg18/reference_sequence.rb",
|
1012
1013
|
"lib/bio-ucsc/hg18/refflat.rb",
|
1013
1014
|
"lib/bio-ucsc/hg18/refgene.rb",
|
1014
1015
|
"lib/bio-ucsc/hg18/reflink.rb",
|
@@ -3651,6 +3652,7 @@ Gem::Specification.new do |s|
|
|
3651
3652
|
"lib/bio-ucsc/hg19/polyapredict.rb",
|
3652
3653
|
"lib/bio-ucsc/hg19/recombrate.rb",
|
3653
3654
|
"lib/bio-ucsc/hg19/reference.rb",
|
3655
|
+
"lib/bio-ucsc/hg19/reference_sequence.rb",
|
3654
3656
|
"lib/bio-ucsc/hg19/refflat.rb",
|
3655
3657
|
"lib/bio-ucsc/hg19/refgene.rb",
|
3656
3658
|
"lib/bio-ucsc/hg19/reflink.rb",
|
@@ -4939,7 +4941,7 @@ Gem::Specification.new do |s|
|
|
4939
4941
|
s.licenses = ["Ruby (Ruby's/GPLv2 dual)"]
|
4940
4942
|
s.require_paths = ["lib"]
|
4941
4943
|
s.rubygems_version = %q{1.7.2}
|
4942
|
-
s.summary = %q{
|
4944
|
+
s.summary = %q{The Ruby UCSC API: accessing the UCSC Genome Database using Ruby}
|
4943
4945
|
s.test_files = [
|
4944
4946
|
"spec/hg18/acembly_spec.rb",
|
4945
4947
|
"spec/hg18/acemblyclass_spec.rb",
|
@@ -5921,7 +5923,7 @@ Gem::Specification.new do |s|
|
|
5921
5923
|
"spec/hg18/rdmr_spec.rb",
|
5922
5924
|
"spec/hg18/rdmrraw_spec.rb",
|
5923
5925
|
"spec/hg18/recombrate_spec.rb",
|
5924
|
-
"spec/hg18/
|
5926
|
+
"spec/hg18/reference_sequence_spec.rb",
|
5925
5927
|
"spec/hg18/refflat_spec.rb",
|
5926
5928
|
"spec/hg18/refgene_spec.rb",
|
5927
5929
|
"spec/hg18/reflink_spec.rb",
|
@@ -8560,7 +8562,7 @@ Gem::Specification.new do |s|
|
|
8560
8562
|
"spec/hg19/polyadb_spec.rb",
|
8561
8563
|
"spec/hg19/polyapredict_spec.rb",
|
8562
8564
|
"spec/hg19/recombrate_spec.rb",
|
8563
|
-
"spec/hg19/
|
8565
|
+
"spec/hg19/reference_sequence_spec.rb",
|
8564
8566
|
"spec/hg19/refflat_spec.rb",
|
8565
8567
|
"spec/hg19/refgene_spec.rb",
|
8566
8568
|
"spec/hg19/reflink_spec.rb",
|
data/lib/bio-ucsc/hg18.rb
CHANGED
@@ -2,8 +2,6 @@
|
|
2
2
|
# = hg18/ucsc.rb
|
3
3
|
# Copyright:: Cioyrught (C) 2011
|
4
4
|
# MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright:: Copyright (C) 2008
|
6
|
-
# Jan Aerts <jan.aerts@gmail.com>
|
7
5
|
# License:: Ruby's
|
8
6
|
|
9
7
|
base = "#{File.dirname(__FILE__)}/hg18"
|
@@ -15,7 +13,8 @@ module Bio
|
|
15
13
|
module Hg18
|
16
14
|
base = "#{File.dirname(__FILE__)}/hg18"
|
17
15
|
# Reference sequence retrieval via the 2bit file
|
18
|
-
autoload :Reference,
|
16
|
+
autoload :Reference, "#{base}/reference" # OBSOLETE
|
17
|
+
autoload :ReferenceSequence, "#{base}/reference_sequence"
|
19
18
|
|
20
19
|
# group: Mapping and Sequencing Tracks ----------
|
21
20
|
#
|
@@ -3867,4 +3866,3 @@ module Bio
|
|
3867
3866
|
end
|
3868
3867
|
end
|
3869
3868
|
end
|
3870
|
-
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
# Copyright::
|
5
5
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2010 Jan Aerts <jan.aerts@gmail.com>
|
7
7
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
8
8
|
#
|
9
9
|
# = DESCRIPTION
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnpiafrate2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnpiafrate2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnplocke.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnpredon.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnpsebat2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnpsharp2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnptuzun.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/wikitrack.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser The Wiki track displays
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/DelConrad2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/DelHinds2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/DelMccarroll.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
data/lib/bio-ucsc/hg18/dgv.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/dgv.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/ensgene.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/exaptedrepeats.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/wikitrack.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser The Wiki track displays
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kgprotmap2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kgtargetali.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc10.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc11.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc12.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc13.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc14.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc8.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc9.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscg248
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,170 +2,11 @@
|
|
2
2
|
# = reference.rb
|
3
3
|
# handle UCSC's 2bit file (locally stored) to retrieve the reference sequence
|
4
4
|
#
|
5
|
+
# In version 0.1.0, this file is OBSOLETE.
|
6
|
+
# Use Ucsc::Hg18::ReferenceSequence instead.
|
7
|
+
#
|
5
8
|
# Copyright:: Cioyrught (C) 2011
|
6
9
|
# MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
7
10
|
# License:: Ruby license (Ryby's / GPLv2 dual)
|
8
11
|
|
9
|
-
|
10
|
-
|
11
|
-
module Bio
|
12
|
-
module Ucsc
|
13
|
-
module Hg18
|
14
|
-
TwoBitHeader =
|
15
|
-
Struct.new(:signature, :version, :sequence_count, :reserved)
|
16
|
-
TwoBitRecord =
|
17
|
-
Struct.new(:dna_size,
|
18
|
-
:n_block_intervals, :mask_block_intervals,
|
19
|
-
:reserved, :packed_dna_offset)
|
20
|
-
|
21
|
-
class ByteQueue
|
22
|
-
def initialize(str)
|
23
|
-
@str = str
|
24
|
-
@index = 0
|
25
|
-
end
|
26
|
-
|
27
|
-
attr_accessor :index
|
28
|
-
|
29
|
-
def next(n)
|
30
|
-
result = @str[@index, n]
|
31
|
-
@index += n
|
32
|
-
result
|
33
|
-
end
|
34
|
-
end # class ByteQueue
|
35
|
-
|
36
|
-
class Reference
|
37
|
-
BINCODE = {0b00 => "T", 0b01 => "C", 0b10 => "A", 0b11 => "G"}
|
38
|
-
|
39
|
-
cattr_reader :filename, :header, :offsets
|
40
|
-
|
41
|
-
def self.load(filename)
|
42
|
-
two_bit = nil
|
43
|
-
open(filename, 'rb') {|f| two_bit = f.read}
|
44
|
-
@@tbq = ByteQueue.new(two_bit)
|
45
|
-
@@filename = filename
|
46
|
-
|
47
|
-
twobit_header = TwoBitHeader.new
|
48
|
-
twobit_header.signature = @@tbq.next(4).unpack('L').first
|
49
|
-
twobit_header.version = @@tbq.next(4).unpack('L').first
|
50
|
-
twobit_header.sequence_count = @@tbq.next(4).unpack('L').first
|
51
|
-
twobit_header.reserved = @@tbq.next(4).unpack('L').first
|
52
|
-
@@header = twobit_header
|
53
|
-
|
54
|
-
@@offsets = Hash.new
|
55
|
-
@@header.sequence_count.times do
|
56
|
-
name_length = @@tbq.next(1).unpack('C').first
|
57
|
-
@@offsets[@@tbq.next(name_length).unpack('a*').first] =
|
58
|
-
@@tbq.next(4).unpack('L').first
|
59
|
-
end
|
60
|
-
@@records = Hash.new
|
61
|
-
end
|
62
|
-
|
63
|
-
def self.records(chrom)
|
64
|
-
return @@records[chrom] if @@records[chrom]
|
65
|
-
|
66
|
-
@@tbq.index = @@offsets[chrom]
|
67
|
-
@@records[chrom] = TwoBitRecord.new
|
68
|
-
@@records[chrom].dna_size = @@tbq.next(4).unpack('L').first
|
69
|
-
|
70
|
-
n_block_count = @@tbq.next(4).unpack('L').first
|
71
|
-
n_block_starts = Array.new
|
72
|
-
n_block_count.times do
|
73
|
-
n_block_starts << @@tbq.next(4).unpack('L').first
|
74
|
-
end
|
75
|
-
n_block_sizes = Array.new
|
76
|
-
n_block_count.times do
|
77
|
-
n_block_sizes << @@tbq.next(4).unpack('L').first
|
78
|
-
end
|
79
|
-
@@records[chrom].n_block_intervals = Array.new
|
80
|
-
n_block_count.times do |idx|
|
81
|
-
@@records[chrom].n_block_intervals <<
|
82
|
-
Bio::GenomicInterval.zero_based(chrom,
|
83
|
-
n_block_starts[idx],
|
84
|
-
n_block_starts[idx]+n_block_sizes[idx])
|
85
|
-
end
|
86
|
-
|
87
|
-
mask_block_count = @@tbq.next(4).unpack('L').first
|
88
|
-
mask_block_starts = Array.new
|
89
|
-
mask_block_count.times do
|
90
|
-
mask_block_starts << @@tbq.next(4).unpack('L').first
|
91
|
-
end
|
92
|
-
mask_block_sizes = Array.new
|
93
|
-
mask_block_count.times do
|
94
|
-
mask_block_sizes << @@tbq.next(4).unpack('L').first
|
95
|
-
end
|
96
|
-
@@records[chrom].mask_block_intervals = Array.new
|
97
|
-
mask_block_count.times do |idx|
|
98
|
-
@@records[chrom].mask_block_intervals <<
|
99
|
-
Bio::GenomicInterval.zero_based(chrom,
|
100
|
-
mask_block_starts[idx],
|
101
|
-
mask_block_starts[idx]+mask_block_sizes[idx])
|
102
|
-
end
|
103
|
-
|
104
|
-
@@records[chrom].reserved = @@tbq.next(4).unpack('L').first
|
105
|
-
@@records[chrom].packed_dna_offset = @@tbq.index
|
106
|
-
|
107
|
-
@@records[chrom]
|
108
|
-
end
|
109
|
-
|
110
|
-
def self.find_by_interval(interval)
|
111
|
-
seq = self.find_by_interval_raw(interval)
|
112
|
-
@@records[interval.chrom].n_block_intervals.map do |nb|
|
113
|
-
if interval.overlapped?(nb)
|
114
|
-
case interval.compare(nb)
|
115
|
-
when :equal,:contained_by
|
116
|
-
seq = 'N' * interval.overlap(nb)
|
117
|
-
when :contains
|
118
|
-
left_len = nb.chr_start - interval.chr_start + 1
|
119
|
-
right_len = interval.chr_end - nb.chr_end + 1
|
120
|
-
seq[0, left_len] = 'N' * left_len
|
121
|
-
seq[-right_len, right_len] = 'N' * right_len
|
122
|
-
when :left_overlapped
|
123
|
-
left_len = nb.chr_end - interval.chr_start + 1
|
124
|
-
seq[0, left_len] = 'N' * left_len
|
125
|
-
when :right_overlapped
|
126
|
-
right_len = interval.chr_end - nb.chr_start + 1
|
127
|
-
seq[-right_len, right_len] = 'N' * right_len
|
128
|
-
when :right_adjacent, :right_off
|
129
|
-
# expecting that N-blocks are sorted
|
130
|
-
# return Bio::Sequence::NA.new(seq)
|
131
|
-
seq
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
135
|
-
#Bio::Sequence::NA.new(seq)
|
136
|
-
seq
|
137
|
-
end
|
138
|
-
|
139
|
-
def self.find_by_interval_raw(interval)
|
140
|
-
byte_count, byte_mod = interval.zero_start.divmod 4
|
141
|
-
chrom_top = self.records(interval.chrom).packed_dna_offset
|
142
|
-
div_start, mod_start = interval.zero_start.divmod 4
|
143
|
-
div_end, mod_end = interval.zero_end.divmod 4
|
144
|
-
div_len, mod_len = interval.length.divmod 4
|
145
|
-
|
146
|
-
byte_length = div_end - div_start + 1
|
147
|
-
@@tbq.index = chrom_top + div_start
|
148
|
-
bytes = @@tbq.next(byte_length).unpack('C*')
|
149
|
-
seq = Bio::Ucsc::Hg18::Reference.bytes_to_nucleotides(bytes)
|
150
|
-
seq[mod_start..(-1-(4-mod_end))]
|
151
|
-
end
|
152
|
-
|
153
|
-
def self.bytes_to_nucleotides(bytes)
|
154
|
-
results = ""
|
155
|
-
bytes.each do |byte|
|
156
|
-
results << Bio::Ucsc::Hg18::Reference.byte_to_nucleotides(byte)
|
157
|
-
end
|
158
|
-
results
|
159
|
-
end
|
160
|
-
|
161
|
-
def self.byte_to_nucleotides(byte)
|
162
|
-
BINCODE[byte >> 6] +
|
163
|
-
BINCODE[(byte >> 4) & 0b11] +
|
164
|
-
BINCODE[(byte >> 2) & 0b11] +
|
165
|
-
BINCODE[byte & 0b11]
|
166
|
-
end
|
167
|
-
end # class Reference
|
168
|
-
|
169
|
-
end # module Hg18
|
170
|
-
end # module Ucsc
|
171
|
-
end # module Bio
|
12
|
+
raise "Bio::Ucsc::Hg18::Reference is OBSOLETE. Use Bio::Ucsc::Hg18::ReferenceSequence instead."
|