bio-ucsc-api 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +36 -16
- data/Rakefile +3 -3
- data/VERSION +1 -1
- data/bio-ucsc-api.gemspec +9 -7
- data/lib/bio-ucsc/hg18.rb +2 -4
- data/lib/bio-ucsc/hg18/activerecord.rb +1 -1
- data/lib/bio-ucsc/hg18/all_bacends.rb +0 -1
- data/lib/bio-ucsc/hg18/cnpiafrate2.rb +0 -1
- data/lib/bio-ucsc/hg18/cnplocke.rb +0 -1
- data/lib/bio-ucsc/hg18/cnpredon.rb +0 -1
- data/lib/bio-ucsc/hg18/cnpsebat2.rb +0 -1
- data/lib/bio-ucsc/hg18/cnpsharp2.rb +0 -1
- data/lib/bio-ucsc/hg18/cnptuzun.rb +0 -1
- data/lib/bio-ucsc/hg18/cytoband.rb +0 -1
- data/lib/bio-ucsc/hg18/db_connection.rb +1 -1
- data/lib/bio-ucsc/hg18/delconrad2.rb +0 -1
- data/lib/bio-ucsc/hg18/delhinds2.rb +0 -1
- data/lib/bio-ucsc/hg18/delmccarroll.rb +0 -1
- data/lib/bio-ucsc/hg18/dgv.rb +0 -1
- data/lib/bio-ucsc/hg18/ensgene.rb +0 -1
- data/lib/bio-ucsc/hg18/exaptedrepeats.rb +0 -1
- data/lib/bio-ucsc/hg18/hgcentral_wikitrack.rb +0 -1
- data/lib/bio-ucsc/hg18/kgprotmap2.rb +0 -1
- data/lib/bio-ucsc/hg18/kgtargetali.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc10.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc11.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc12.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc13.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc14.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc8.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscabc9.rb +0 -1
- data/lib/bio-ucsc/hg18/kiddeichlerdiscg248.rb +0 -1
- data/lib/bio-ucsc/hg18/reference.rb +4 -163
- data/lib/bio-ucsc/hg18/reference_sequence.rb +171 -0
- data/lib/bio-ucsc/hg18/refgene.rb +0 -1
- data/lib/bio-ucsc/hg18/snp130.rb +0 -1
- data/lib/bio-ucsc/hg19.rb +2 -1
- data/lib/bio-ucsc/hg19/activerecord.rb +1 -1
- data/lib/bio-ucsc/hg19/cytoband.rb +0 -1
- data/lib/bio-ucsc/hg19/cytobandideo.rb +0 -1
- data/lib/bio-ucsc/hg19/db_connection.rb +1 -1
- data/lib/bio-ucsc/hg19/dgv.rb +0 -1
- data/lib/bio-ucsc/hg19/ensgene.rb +0 -1
- data/lib/bio-ucsc/hg19/reference.rb +4 -163
- data/lib/bio-ucsc/hg19/reference_sequence.rb +171 -0
- data/lib/bio-ucsc/hg19/refgene.rb +0 -1
- data/lib/bio-ucsc/hg19/snp131.rb +0 -1
- data/lib/bio-ucsc/hg19/wgrna.rb +0 -1
- data/samples/hg19-2bit-retrieve.rb +2 -2
- data/spec/hg18/{reference_spec.rb → reference_sequence_spec.rb} +26 -26
- data/spec/hg19/{reference_spec.rb → reference_sequence_spec.rb} +26 -26
- metadata +11 -10
data/README.rdoc
CHANGED
@@ -1,37 +1,40 @@
|
|
1
1
|
= bio-ucsc-api
|
2
2
|
|
3
|
-
|
3
|
+
The Ruby UCSC API: accessing the UCSC Genome Database using Ruby.
|
4
4
|
|
5
5
|
This package is based on the followings:
|
6
|
-
* ruby-ucsc-api: https://github.com/jandot/ruby-ucsc-api
|
6
|
+
* original ruby-ucsc-api: https://github.com/jandot/ruby-ucsc-api
|
7
7
|
* ruby-ensembl-api: https://github.com/jandot/ruby-ensembl-api
|
8
8
|
|
9
9
|
Major dependent gems:
|
10
10
|
* active_record http://api.rubyonrails.org/classes/ActiveRecord/Base.html
|
11
11
|
* bioruby-genomic-interval https://github.com/misshie/bioruby-genomic-interval
|
12
12
|
|
13
|
+
Requirement:
|
14
|
+
* Ruby version 1.9.2 or later
|
15
|
+
|
13
16
|
See also a paper:
|
14
17
|
|
15
18
|
Strozzi F, Aerts J: A Ruby API to query the Ensembl database for genomic features.
|
16
19
|
Bioinformatics 2011, 27:1013-1014.
|
17
20
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
http://github.com/misshie/bioruby-ucsc-api/issues
|
21
|
+
Your comments, suggestions and requests are welcome. Documentation and
|
22
|
+
feedback are available at the UserEcho site at
|
23
|
+
http://rubyucscapi.userecho.com/.
|
22
24
|
|
23
25
|
== Features
|
24
26
|
|
25
27
|
* Designed as a BioRuby plugin
|
26
28
|
* Using ActiveRecord as an O/R mapping framework
|
27
29
|
* Using the Bin index system to improve query performance (see https://github.com/misshie/UCSCBin )
|
28
|
-
* Automatic conversion of "1-based full-closed intervals" to internal "0-based
|
30
|
+
* Automatic conversion of "1-based full-closed intervals" to internal "0-based left-closed right-open intervals" (see also bioruby-genomic-interval)
|
29
31
|
* Supporting non-official MySql hosts (e.g. local servers)
|
30
32
|
* Using Rspec for the testing framework
|
31
|
-
* Support hg19 and hg18 databases.
|
33
|
+
* Support hg19 and hg18 databases. Current version does not support tables containing only single records of the fileName columns linking to bigWIG or bigBED files.
|
32
34
|
|
33
35
|
== Change Log
|
34
|
-
|
36
|
+
* *MODIFIED* (v0.1.0): The name of this library is now "Ruby UCSC API". The RubyGem name and the GitHub account and the library name are not changed.
|
37
|
+
* *MODIFIED* (v0.1.0): Bio::Ucsc::[Hg18|Hg19]::Reference is replaced by Bio::Ucsc::[Hg18|Hg19]::ReferenceSequence.
|
35
38
|
* *UPDATE* (v0.0.5): Almost all hg18 tables are supported.
|
36
39
|
* *UPDATE* (v0.0.5): find_by_interval and find_all_by_interval class methods accept the "partial" option. Default is true. When "partial: false" is opted, return value will be only fully-included (non-partially-included) records.
|
37
40
|
* *UPDATE* (v0.0.4): Almost all hg19 tables are supported. "filename" tables in ENCODE dataset are omitted. Each of them contains only single record of a path to the raw data file. Definitions of table relations are incomplete.
|
@@ -49,13 +52,13 @@ You may need to be root or use "sudo". "--no-ri" and "--no-rdoc" options are rec
|
|
49
52
|
|
50
53
|
require 'bio-ucsc'
|
51
54
|
|
52
|
-
include Bio::Ucsc::Hg19
|
55
|
+
include Bio::Ucsc::Hg19
|
53
56
|
DBConnection.connect
|
54
57
|
|
55
58
|
itv = Bio::GenomicInterval.parse("chr1:1-11,000")
|
56
59
|
Snp131.find_all_by_interval(itv).each do |e|
|
57
60
|
i = Bio::GenomicInterval.zero_based(e.chrom, e.chromStart, e.chromEnd)
|
58
|
-
puts "#{i.chrom}\t#{i.
|
61
|
+
puts "#{i.chrom}\t#{i.chr_start}\t#{e.name}\t#{e[:class]}"
|
59
62
|
end
|
60
63
|
|
61
64
|
itv = Bio::GenomicInterval.parse("chr17:7,579,614-7,579,700")
|
@@ -64,15 +67,32 @@ You may need to be root or use "sudo". "--no-ri" and "--no-rdoc" options are rec
|
|
64
67
|
|
65
68
|
p Snp131.find_by_name("rs56289060")
|
66
69
|
|
70
|
+
# Sometimes, queries using raw SQL provide elegant solutions.
|
71
|
+
#
|
72
|
+
sql << 'SQL'
|
73
|
+
SELECT name,chrom,chromStart,chromEnd,observed
|
74
|
+
FROM snp131
|
75
|
+
WHERE name="rs56289060"
|
76
|
+
SQL
|
77
|
+
p Snp131.find_by_sql(sql)
|
78
|
+
|
67
79
|
# retrieve reference sequence from a locally-stored 2bit file
|
68
|
-
|
80
|
+
ReferenceSequence.load("hg19.2bit")
|
69
81
|
itv = Bio::GenomicInterval.parse("chr1:9,500-10,999")
|
70
|
-
p
|
82
|
+
p ReferenceSequence.find_by_interval(itv)
|
71
83
|
|
72
|
-
|
73
|
-
|
84
|
+
# Connetcting to non-official or local mirror MySQL servers
|
85
|
+
DBConnection.db_host = 'foo.example.edu'
|
86
|
+
DBConnection.db_username = 'genome'
|
87
|
+
DBConnection.db_password = ''
|
88
|
+
DBConnection.connect
|
89
|
+
|
90
|
+
DBConnection.default # reset to connect UCSC's public MySQL sever
|
91
|
+
DBConnection.connect
|
74
92
|
|
75
|
-
Copyright
|
93
|
+
== Copyright
|
94
|
+
Copyright:: (c) 2011 MISHIMA, Hiroyuki (missy at be.to / hmishima at nagasaki-u.ac.jp / @mishimahryk at Twitter)
|
95
|
+
Copyright:: (c) 2010 Jan Aerts
|
76
96
|
|
77
97
|
License:: Ruby license (Ruby's / GPLv2 dual). See COPYING and COPYING.ja for further details.
|
78
98
|
|
data/Rakefile
CHANGED
@@ -16,10 +16,10 @@ Jeweler::Tasks.new do |gem|
|
|
16
16
|
gem.name = "bio-ucsc-api"
|
17
17
|
gem.homepage = "http://github.com/misshie/bioruby-ucsc-api"
|
18
18
|
gem.license = "Ruby (Ruby's/GPLv2 dual)"
|
19
|
-
gem.summary = %Q{
|
20
|
-
gem.description = %Q{
|
19
|
+
gem.summary = %Q{The Ruby UCSC API: accessing the UCSC Genome Database using Ruby}
|
20
|
+
gem.description = %Q{Ruby UCSC API: accessing the UCSC Genome Database using Ruby}
|
21
21
|
gem.email = "missy@be.to"
|
22
|
-
gem.authors = ["Hiroyuki Mishima", "
|
22
|
+
gem.authors = ["Hiroyuki Mishima", "Jan Aerts"]
|
23
23
|
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
24
24
|
# and development dependencies are only needed for development
|
25
25
|
# (ie running rake tasks, tests, etc)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/bio-ucsc-api.gemspec
CHANGED
@@ -5,12 +5,12 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{bio-ucsc-api}
|
8
|
-
s.version = "0.0
|
8
|
+
s.version = "0.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = ["Hiroyuki Mishima", "
|
12
|
-
s.date = %q{2011-
|
13
|
-
s.description = %q{
|
11
|
+
s.authors = ["Hiroyuki Mishima", "Jan Aerts"]
|
12
|
+
s.date = %q{2011-07-07}
|
13
|
+
s.description = %q{Ruby UCSC API: accessing the UCSC Genome Database using Ruby}
|
14
14
|
s.email = %q{missy@be.to}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"README.rdoc"
|
@@ -1009,6 +1009,7 @@ Gem::Specification.new do |s|
|
|
1009
1009
|
"lib/bio-ucsc/hg18/rdmrraw.rb",
|
1010
1010
|
"lib/bio-ucsc/hg18/recombrate.rb",
|
1011
1011
|
"lib/bio-ucsc/hg18/reference.rb",
|
1012
|
+
"lib/bio-ucsc/hg18/reference_sequence.rb",
|
1012
1013
|
"lib/bio-ucsc/hg18/refflat.rb",
|
1013
1014
|
"lib/bio-ucsc/hg18/refgene.rb",
|
1014
1015
|
"lib/bio-ucsc/hg18/reflink.rb",
|
@@ -3651,6 +3652,7 @@ Gem::Specification.new do |s|
|
|
3651
3652
|
"lib/bio-ucsc/hg19/polyapredict.rb",
|
3652
3653
|
"lib/bio-ucsc/hg19/recombrate.rb",
|
3653
3654
|
"lib/bio-ucsc/hg19/reference.rb",
|
3655
|
+
"lib/bio-ucsc/hg19/reference_sequence.rb",
|
3654
3656
|
"lib/bio-ucsc/hg19/refflat.rb",
|
3655
3657
|
"lib/bio-ucsc/hg19/refgene.rb",
|
3656
3658
|
"lib/bio-ucsc/hg19/reflink.rb",
|
@@ -4939,7 +4941,7 @@ Gem::Specification.new do |s|
|
|
4939
4941
|
s.licenses = ["Ruby (Ruby's/GPLv2 dual)"]
|
4940
4942
|
s.require_paths = ["lib"]
|
4941
4943
|
s.rubygems_version = %q{1.7.2}
|
4942
|
-
s.summary = %q{
|
4944
|
+
s.summary = %q{The Ruby UCSC API: accessing the UCSC Genome Database using Ruby}
|
4943
4945
|
s.test_files = [
|
4944
4946
|
"spec/hg18/acembly_spec.rb",
|
4945
4947
|
"spec/hg18/acemblyclass_spec.rb",
|
@@ -5921,7 +5923,7 @@ Gem::Specification.new do |s|
|
|
5921
5923
|
"spec/hg18/rdmr_spec.rb",
|
5922
5924
|
"spec/hg18/rdmrraw_spec.rb",
|
5923
5925
|
"spec/hg18/recombrate_spec.rb",
|
5924
|
-
"spec/hg18/
|
5926
|
+
"spec/hg18/reference_sequence_spec.rb",
|
5925
5927
|
"spec/hg18/refflat_spec.rb",
|
5926
5928
|
"spec/hg18/refgene_spec.rb",
|
5927
5929
|
"spec/hg18/reflink_spec.rb",
|
@@ -8560,7 +8562,7 @@ Gem::Specification.new do |s|
|
|
8560
8562
|
"spec/hg19/polyadb_spec.rb",
|
8561
8563
|
"spec/hg19/polyapredict_spec.rb",
|
8562
8564
|
"spec/hg19/recombrate_spec.rb",
|
8563
|
-
"spec/hg19/
|
8565
|
+
"spec/hg19/reference_sequence_spec.rb",
|
8564
8566
|
"spec/hg19/refflat_spec.rb",
|
8565
8567
|
"spec/hg19/refgene_spec.rb",
|
8566
8568
|
"spec/hg19/reflink_spec.rb",
|
data/lib/bio-ucsc/hg18.rb
CHANGED
@@ -2,8 +2,6 @@
|
|
2
2
|
# = hg18/ucsc.rb
|
3
3
|
# Copyright:: Cioyrught (C) 2011
|
4
4
|
# MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright:: Copyright (C) 2008
|
6
|
-
# Jan Aerts <jan.aerts@gmail.com>
|
7
5
|
# License:: Ruby's
|
8
6
|
|
9
7
|
base = "#{File.dirname(__FILE__)}/hg18"
|
@@ -15,7 +13,8 @@ module Bio
|
|
15
13
|
module Hg18
|
16
14
|
base = "#{File.dirname(__FILE__)}/hg18"
|
17
15
|
# Reference sequence retrieval via the 2bit file
|
18
|
-
autoload :Reference,
|
16
|
+
autoload :Reference, "#{base}/reference" # OBSOLETE
|
17
|
+
autoload :ReferenceSequence, "#{base}/reference_sequence"
|
19
18
|
|
20
19
|
# group: Mapping and Sequencing Tracks ----------
|
21
20
|
#
|
@@ -3867,4 +3866,3 @@ module Bio
|
|
3867
3866
|
end
|
3868
3867
|
end
|
3869
3868
|
end
|
3870
|
-
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
# Copyright::
|
5
5
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2010 Jan Aerts <jan.aerts@gmail.com>
|
7
7
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
8
8
|
#
|
9
9
|
# = DESCRIPTION
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnpiafrate2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnpiafrate2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnplocke.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnpredon.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnpsebat2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnpsharp2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/cnptuzun.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/wikitrack.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser The Wiki track displays
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/DelConrad2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/DelHinds2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/DelMccarroll.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
data/lib/bio-ucsc/hg18/dgv.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/dgv.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/ensgene.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/exaptedrepeats.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/wikitrack.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser The Wiki track displays
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kgprotmap2.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kgtargetali.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc10.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc11.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc12.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc13.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc14.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc8.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscabc9.rb
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# = hg18/kiddeichlerdiscg248
|
3
3
|
# Copyright::
|
4
4
|
# Copyright (C) 2011 MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
5
|
-
# Copyright (C) 2008 Jan Aerts <jan.aerts@gmail.com>
|
6
5
|
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
7
6
|
#
|
8
7
|
# = Table desfription in UCSC Table Browser
|
@@ -2,170 +2,11 @@
|
|
2
2
|
# = reference.rb
|
3
3
|
# handle UCSC's 2bit file (locally stored) to retrieve the reference sequence
|
4
4
|
#
|
5
|
+
# In version 0.1.0, this file is OBSOLETE.
|
6
|
+
# Use Ucsc::Hg18::ReferenceSequence instead.
|
7
|
+
#
|
5
8
|
# Copyright:: Cioyrught (C) 2011
|
6
9
|
# MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
|
7
10
|
# License:: Ruby license (Ryby's / GPLv2 dual)
|
8
11
|
|
9
|
-
|
10
|
-
|
11
|
-
module Bio
|
12
|
-
module Ucsc
|
13
|
-
module Hg18
|
14
|
-
TwoBitHeader =
|
15
|
-
Struct.new(:signature, :version, :sequence_count, :reserved)
|
16
|
-
TwoBitRecord =
|
17
|
-
Struct.new(:dna_size,
|
18
|
-
:n_block_intervals, :mask_block_intervals,
|
19
|
-
:reserved, :packed_dna_offset)
|
20
|
-
|
21
|
-
class ByteQueue
|
22
|
-
def initialize(str)
|
23
|
-
@str = str
|
24
|
-
@index = 0
|
25
|
-
end
|
26
|
-
|
27
|
-
attr_accessor :index
|
28
|
-
|
29
|
-
def next(n)
|
30
|
-
result = @str[@index, n]
|
31
|
-
@index += n
|
32
|
-
result
|
33
|
-
end
|
34
|
-
end # class ByteQueue
|
35
|
-
|
36
|
-
class Reference
|
37
|
-
BINCODE = {0b00 => "T", 0b01 => "C", 0b10 => "A", 0b11 => "G"}
|
38
|
-
|
39
|
-
cattr_reader :filename, :header, :offsets
|
40
|
-
|
41
|
-
def self.load(filename)
|
42
|
-
two_bit = nil
|
43
|
-
open(filename, 'rb') {|f| two_bit = f.read}
|
44
|
-
@@tbq = ByteQueue.new(two_bit)
|
45
|
-
@@filename = filename
|
46
|
-
|
47
|
-
twobit_header = TwoBitHeader.new
|
48
|
-
twobit_header.signature = @@tbq.next(4).unpack('L').first
|
49
|
-
twobit_header.version = @@tbq.next(4).unpack('L').first
|
50
|
-
twobit_header.sequence_count = @@tbq.next(4).unpack('L').first
|
51
|
-
twobit_header.reserved = @@tbq.next(4).unpack('L').first
|
52
|
-
@@header = twobit_header
|
53
|
-
|
54
|
-
@@offsets = Hash.new
|
55
|
-
@@header.sequence_count.times do
|
56
|
-
name_length = @@tbq.next(1).unpack('C').first
|
57
|
-
@@offsets[@@tbq.next(name_length).unpack('a*').first] =
|
58
|
-
@@tbq.next(4).unpack('L').first
|
59
|
-
end
|
60
|
-
@@records = Hash.new
|
61
|
-
end
|
62
|
-
|
63
|
-
def self.records(chrom)
|
64
|
-
return @@records[chrom] if @@records[chrom]
|
65
|
-
|
66
|
-
@@tbq.index = @@offsets[chrom]
|
67
|
-
@@records[chrom] = TwoBitRecord.new
|
68
|
-
@@records[chrom].dna_size = @@tbq.next(4).unpack('L').first
|
69
|
-
|
70
|
-
n_block_count = @@tbq.next(4).unpack('L').first
|
71
|
-
n_block_starts = Array.new
|
72
|
-
n_block_count.times do
|
73
|
-
n_block_starts << @@tbq.next(4).unpack('L').first
|
74
|
-
end
|
75
|
-
n_block_sizes = Array.new
|
76
|
-
n_block_count.times do
|
77
|
-
n_block_sizes << @@tbq.next(4).unpack('L').first
|
78
|
-
end
|
79
|
-
@@records[chrom].n_block_intervals = Array.new
|
80
|
-
n_block_count.times do |idx|
|
81
|
-
@@records[chrom].n_block_intervals <<
|
82
|
-
Bio::GenomicInterval.zero_based(chrom,
|
83
|
-
n_block_starts[idx],
|
84
|
-
n_block_starts[idx]+n_block_sizes[idx])
|
85
|
-
end
|
86
|
-
|
87
|
-
mask_block_count = @@tbq.next(4).unpack('L').first
|
88
|
-
mask_block_starts = Array.new
|
89
|
-
mask_block_count.times do
|
90
|
-
mask_block_starts << @@tbq.next(4).unpack('L').first
|
91
|
-
end
|
92
|
-
mask_block_sizes = Array.new
|
93
|
-
mask_block_count.times do
|
94
|
-
mask_block_sizes << @@tbq.next(4).unpack('L').first
|
95
|
-
end
|
96
|
-
@@records[chrom].mask_block_intervals = Array.new
|
97
|
-
mask_block_count.times do |idx|
|
98
|
-
@@records[chrom].mask_block_intervals <<
|
99
|
-
Bio::GenomicInterval.zero_based(chrom,
|
100
|
-
mask_block_starts[idx],
|
101
|
-
mask_block_starts[idx]+mask_block_sizes[idx])
|
102
|
-
end
|
103
|
-
|
104
|
-
@@records[chrom].reserved = @@tbq.next(4).unpack('L').first
|
105
|
-
@@records[chrom].packed_dna_offset = @@tbq.index
|
106
|
-
|
107
|
-
@@records[chrom]
|
108
|
-
end
|
109
|
-
|
110
|
-
def self.find_by_interval(interval)
|
111
|
-
seq = self.find_by_interval_raw(interval)
|
112
|
-
@@records[interval.chrom].n_block_intervals.map do |nb|
|
113
|
-
if interval.overlapped?(nb)
|
114
|
-
case interval.compare(nb)
|
115
|
-
when :equal,:contained_by
|
116
|
-
seq = 'N' * interval.overlap(nb)
|
117
|
-
when :contains
|
118
|
-
left_len = nb.chr_start - interval.chr_start + 1
|
119
|
-
right_len = interval.chr_end - nb.chr_end + 1
|
120
|
-
seq[0, left_len] = 'N' * left_len
|
121
|
-
seq[-right_len, right_len] = 'N' * right_len
|
122
|
-
when :left_overlapped
|
123
|
-
left_len = nb.chr_end - interval.chr_start + 1
|
124
|
-
seq[0, left_len] = 'N' * left_len
|
125
|
-
when :right_overlapped
|
126
|
-
right_len = interval.chr_end - nb.chr_start + 1
|
127
|
-
seq[-right_len, right_len] = 'N' * right_len
|
128
|
-
when :right_adjacent, :right_off
|
129
|
-
# expecting that N-blocks are sorted
|
130
|
-
# return Bio::Sequence::NA.new(seq)
|
131
|
-
seq
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
135
|
-
#Bio::Sequence::NA.new(seq)
|
136
|
-
seq
|
137
|
-
end
|
138
|
-
|
139
|
-
def self.find_by_interval_raw(interval)
|
140
|
-
byte_count, byte_mod = interval.zero_start.divmod 4
|
141
|
-
chrom_top = self.records(interval.chrom).packed_dna_offset
|
142
|
-
div_start, mod_start = interval.zero_start.divmod 4
|
143
|
-
div_end, mod_end = interval.zero_end.divmod 4
|
144
|
-
div_len, mod_len = interval.length.divmod 4
|
145
|
-
|
146
|
-
byte_length = div_end - div_start + 1
|
147
|
-
@@tbq.index = chrom_top + div_start
|
148
|
-
bytes = @@tbq.next(byte_length).unpack('C*')
|
149
|
-
seq = Bio::Ucsc::Hg18::Reference.bytes_to_nucleotides(bytes)
|
150
|
-
seq[mod_start..(-1-(4-mod_end))]
|
151
|
-
end
|
152
|
-
|
153
|
-
def self.bytes_to_nucleotides(bytes)
|
154
|
-
results = ""
|
155
|
-
bytes.each do |byte|
|
156
|
-
results << Bio::Ucsc::Hg18::Reference.byte_to_nucleotides(byte)
|
157
|
-
end
|
158
|
-
results
|
159
|
-
end
|
160
|
-
|
161
|
-
def self.byte_to_nucleotides(byte)
|
162
|
-
BINCODE[byte >> 6] +
|
163
|
-
BINCODE[(byte >> 4) & 0b11] +
|
164
|
-
BINCODE[(byte >> 2) & 0b11] +
|
165
|
-
BINCODE[byte & 0b11]
|
166
|
-
end
|
167
|
-
end # class Reference
|
168
|
-
|
169
|
-
end # module Hg18
|
170
|
-
end # module Ucsc
|
171
|
-
end # module Bio
|
12
|
+
raise "Bio::Ucsc::Hg18::Reference is OBSOLETE. Use Bio::Ucsc::Hg18::ReferenceSequence instead."
|