bio-liftover 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.travis.yml +13 -0
- data/Gemfile +16 -0
- data/LICENSE.txt +20 -0
- data/README.md +68 -0
- data/README.rdoc +48 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/bin/bio-liftover +294 -0
- data/lib/bio-liftover.rb +257 -0
- data/test/test_bio-liftover.rb +11 -0
- metadata +191 -0
data/.document
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.2
|
4
|
+
- 1.9.3
|
5
|
+
- jruby-19mode # JRuby in 1.9 mode
|
6
|
+
|
7
|
+
# - rbx-19mode
|
8
|
+
# - 1.8.7
|
9
|
+
# - jruby-18mode # JRuby in 1.8 mode
|
10
|
+
# - rbx-18mode
|
11
|
+
|
12
|
+
# uncomment this line if your project needs to run something other than `rake`:
|
13
|
+
# script: bundle exec rspec spec
|
data/Gemfile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem "interval-tree", ">= 0.1.3"
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "shoulda", ">= 0"
|
10
|
+
gem "rdoc", "~> 3.12"
|
11
|
+
gem "simplecov", ">= 0"
|
12
|
+
gem "jeweler", "~> 2.0.1", :git => "https://github.com/technicalpickles/jeweler.git"
|
13
|
+
gem "bundler", ">= 1.0.21"
|
14
|
+
gem "bio", ">= 1.4.2"
|
15
|
+
gem "rdoc", "~> 3.12"
|
16
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2014 Andrei Rozanski
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
# bio-liftover
|
2
|
+
|
3
|
+
[](http://travis-ci.org/andreirozanski/bioruby-liftover)
|
4
|
+
|
5
|
+
Full description goes here
|
6
|
+
|
7
|
+
Note: this software is under active development!
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
```sh
|
12
|
+
gem install bio-liftover
|
13
|
+
```
|
14
|
+
|
15
|
+
## Usage
|
16
|
+
|
17
|
+
Inside script:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
require 'bio-liftover'
|
21
|
+
|
22
|
+
```
|
23
|
+
|
24
|
+
As bin file:
|
25
|
+
|
26
|
+
bio-liftover -h --help
|
27
|
+
|
28
|
+
bio-liftover
|
29
|
+
|
30
|
+
Usage:
|
31
|
+
bio-liftover.rb ([-v] -b <genome1> <genome2> <file>|[-v] -c <genome1> <genome2> <chromosome> <start> <end>)
|
32
|
+
bio-liftover.rb -h | --help
|
33
|
+
bio-liftover.rb -v | --verbose
|
34
|
+
|
35
|
+
Options:
|
36
|
+
-h --help Show this screen.
|
37
|
+
-v --verbose Increase information during run.
|
38
|
+
-b --bed Bed file as input.
|
39
|
+
-c --coord Coordinate as input i.e. hg19,hg18,chr2,55000.
|
40
|
+
|
41
|
+
|
42
|
+
The API doc is online. For more code examples see the test files in
|
43
|
+
the source tree.
|
44
|
+
|
45
|
+
## Project home page
|
46
|
+
|
47
|
+
Information on the source tree, documentation, examples, issues and
|
48
|
+
how to contribute, see
|
49
|
+
|
50
|
+
http://github.com/andreirozanski/bioruby-liftover
|
51
|
+
|
52
|
+
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
53
|
+
|
54
|
+
## Cite
|
55
|
+
|
56
|
+
If you use this software, please cite one of
|
57
|
+
|
58
|
+
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
59
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
60
|
+
|
61
|
+
## Biogems.info
|
62
|
+
|
63
|
+
This Biogem is published at (http://biogems.info/index.html#bio-liftover)
|
64
|
+
|
65
|
+
## Copyright
|
66
|
+
|
67
|
+
Copyright (c) 2014 Andrei Rozanski. See LICENSE.txt for further details.
|
68
|
+
|
data/README.rdoc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
= bio-liftover
|
2
|
+
|
3
|
+
{<img
|
4
|
+
src="https://secure.travis-ci.org/andreirozanski/bioruby-liftover.png"
|
5
|
+
/>}[http://travis-ci.org/#!/andreirozanski/bioruby-liftover]
|
6
|
+
|
7
|
+
Full description goes here
|
8
|
+
|
9
|
+
Note: this software is under active development!
|
10
|
+
|
11
|
+
== Installation
|
12
|
+
|
13
|
+
gem install bio-liftover
|
14
|
+
|
15
|
+
== Usage
|
16
|
+
|
17
|
+
== Developers
|
18
|
+
|
19
|
+
To use the library
|
20
|
+
|
21
|
+
require 'bio-liftover'
|
22
|
+
|
23
|
+
The API doc is online. For more code examples see also the test files in
|
24
|
+
the source tree.
|
25
|
+
|
26
|
+
== Project home page
|
27
|
+
|
28
|
+
Information on the source tree, documentation, issues and how to contribute, see
|
29
|
+
|
30
|
+
http://github.com/andreirozanski/bioruby-liftover
|
31
|
+
|
32
|
+
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
33
|
+
|
34
|
+
== Cite
|
35
|
+
|
36
|
+
If you use this software, please cite one of
|
37
|
+
|
38
|
+
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
39
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
40
|
+
|
41
|
+
== Biogems.info
|
42
|
+
|
43
|
+
This Biogem is published at http://biogems.info/index.html#bio-liftover
|
44
|
+
|
45
|
+
== Copyright
|
46
|
+
|
47
|
+
Copyright (c) 2014 Andrei Rozanski. See LICENSE.txt for further details.
|
48
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "bio-liftover"
|
18
|
+
gem.homepage = "http://github.com/andreirozanski/bioruby-liftover"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Ruby solution for UCSC LiftOver tool- (UCSC http://genome.ucsc.edu/cgi-bin/hgLiftOver)}
|
21
|
+
gem.description = %Q{Simple, under development Ruby solution for UCSC LiftOver tool}
|
22
|
+
gem.email = "andrei@ruivo.org"
|
23
|
+
gem.authors = ["Andrei Rozanski"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
task :default => :test
|
36
|
+
|
37
|
+
require 'rdoc/task'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "bio-liftover #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
data/bin/bio-liftover
ADDED
@@ -0,0 +1,294 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
# Copyright:: Copyright (C) 2009
|
4
|
+
# Andrei Rozanski <rozanski.andrei@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
# == Description
|
7
|
+
# This file containts a liftover for Ruby
|
8
|
+
|
9
|
+
require 'zlib'
|
10
|
+
require 'open-uri'
|
11
|
+
require 'interval-tree'
|
12
|
+
require 'docopt'
|
13
|
+
|
14
|
+
doc = <<DOCOPT
|
15
|
+
bio-liftover
|
16
|
+
|
17
|
+
Usage:
|
18
|
+
#{__FILE__} ([-v] -c <genome1> <genome2> <chromosome> <start> <end>)
|
19
|
+
#{__FILE__} ([-v] -f <chain_file> -c <genome1> <genome2> <chromosome> <start> <end>)
|
20
|
+
#{__FILE__} -h | --help
|
21
|
+
#{__FILE__} -v | --verbose
|
22
|
+
|
23
|
+
Options:
|
24
|
+
-h --help Show this screen.
|
25
|
+
-v --verbose Increase information during run.
|
26
|
+
-c --coord Coordinate as input i.e. hg19,hg18,chr2,55000.
|
27
|
+
-f --file Load local chain file.
|
28
|
+
|
29
|
+
DOCOPT
|
30
|
+
|
31
|
+
@@doc=nil
|
32
|
+
begin
|
33
|
+
@@doc=Docopt::docopt(doc)
|
34
|
+
#{"-v"=>false, "-c"=>true, "<genome1>"=>"hg19", "<genome2>"=>"Hg18", "<chromosome>"=>"chr2", "<start>"=>"10", "<end>"=>"100", "-f"=>false, "<chain_file>"=>nil, "-h"=>false, "--help"=>false, "--verbose"=>false}
|
35
|
+
rescue Docopt::Exit => e
|
36
|
+
puts e.message
|
37
|
+
end
|
38
|
+
|
39
|
+
module LiftOver
|
40
|
+
class Query
|
41
|
+
def initilize;end
|
42
|
+
|
43
|
+
#Given a pair of genomes, fetch chain file and parse it into a array.
|
44
|
+
def self.parse_chain_file
|
45
|
+
@chains=[]
|
46
|
+
@chains_complete=[]
|
47
|
+
@@gen1=@@doc["<genome1>"] #parse DOCOPT value for reference genome
|
48
|
+
@@gen2=@@doc["<genome2>"] #parse DOCOPT value for query genome
|
49
|
+
@@chr_input=@@doc["<chromosome>"] #parse DOCOPT value for reference chromosome
|
50
|
+
@@coord_start=@@doc["<start>"].to_i #parse DOCOPT value for reference start coordinate
|
51
|
+
@@coord_end=@@doc["<end>"].to_i #parse DOCOPT value for reference end coordinate
|
52
|
+
if @@doc["<chain_file>"].nil? #switch between local/remote parse file
|
53
|
+
LiftOver::Query.parse_remote_chain
|
54
|
+
else
|
55
|
+
LiftOver::Query.parse_local_chain
|
56
|
+
end
|
57
|
+
end #parse_chain_file
|
58
|
+
|
59
|
+
#Retrieve and parse remote chain file from http://hgdownload.cse.ucsc.edu/goldenPath/
|
60
|
+
def self.parse_remote_chain
|
61
|
+
begin
|
62
|
+
puts "Downloading chain file...\n" if @@doc["--verbose"]
|
63
|
+
open("http://hgdownload.cse.ucsc.edu/goldenPath/#{@@gen1}/liftOver/#{@@gen1}To#{@@gen2}.over.chain.gz") do |rmt_file|
|
64
|
+
puts "Download finished\n" if @@doc["--verbose"]
|
65
|
+
puts "Unpacking remote file...\n" if @@doc["--verbose"]
|
66
|
+
gz=Zlib::GzipReader.new(rmt_file).read
|
67
|
+
puts "Unpack finished\n" if @@doc["--verbose"]
|
68
|
+
flag=0
|
69
|
+
header=""
|
70
|
+
coords=[]
|
71
|
+
result=[]
|
72
|
+
gz.split("\n").each do |line|
|
73
|
+
if line.start_with?("chain")
|
74
|
+
qry=LiftOver::Query.search_for_chain_by_interval(line)
|
75
|
+
if qry == true
|
76
|
+
flag=1
|
77
|
+
header=line
|
78
|
+
else
|
79
|
+
flag=0
|
80
|
+
if header!=""
|
81
|
+
a={"#{header.chomp}"=>coords}
|
82
|
+
result.push(a)
|
83
|
+
end
|
84
|
+
a=""
|
85
|
+
header=""
|
86
|
+
coords=[]
|
87
|
+
end
|
88
|
+
else
|
89
|
+
if flag==1
|
90
|
+
coords.push(line.chomp.split("\t").map(&:to_i))
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
scores=[]
|
95
|
+
if not result.empty?
|
96
|
+
result.each do |hits|
|
97
|
+
hits.keys.each do |ev|
|
98
|
+
scores.push(ev.split(" ")[1].to_i)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
else
|
102
|
+
puts "Candidate chain not found"
|
103
|
+
end
|
104
|
+
start_val=""
|
105
|
+
end_val=""
|
106
|
+
res=[]
|
107
|
+
result.each do |hits|
|
108
|
+
start_val=LiftOver::Query.lift(hits,@@coord_start)
|
109
|
+
end_val=LiftOver::Query.lift(hits,@@coord_end)
|
110
|
+
puts "\"Lifting event\"...\n" if @@doc["--verbose"]
|
111
|
+
if hits.keys[0].split(" ")[9] != "-"
|
112
|
+
res.push("#{hits.keys[0].split(" ")[7]}:#{start_val}-#{end_val}")
|
113
|
+
else
|
114
|
+
res.push("#{hits.keys[0].split(" ")[7]}:#{end_val}-#{end_val}")
|
115
|
+
end
|
116
|
+
start_val=""
|
117
|
+
end_val=""
|
118
|
+
end
|
119
|
+
puts "Done\n" if @@doc["--verbose"]
|
120
|
+
puts res
|
121
|
+
end
|
122
|
+
rescue SocketError => e
|
123
|
+
puts "There's a connection problem with your request.\n Error message: #{e.message}."
|
124
|
+
rescue SystemCallError => e
|
125
|
+
puts "There's a problem with your request.\n Error message: #{e.message}."
|
126
|
+
rescue OpenURI::HTTPError => e
|
127
|
+
puts "Couldn't retrieve chain file. Genome names are case sensitive. Please check http://hgdownload.cse.ucsc.edu/goldenPath\n Error message: #{e.message}."
|
128
|
+
exit
|
129
|
+
end
|
130
|
+
end #parse_remote_chain
|
131
|
+
|
132
|
+
#Parse local chain file
|
133
|
+
def self.parse_local_chain
|
134
|
+
puts "Unpacking local file...\n" if @@doc["--verbose"]
|
135
|
+
gz_file=open(@@doc["<chain_file>"])
|
136
|
+
gz=Zlib::GzipReader.new(gz_file)
|
137
|
+
puts "Unpack finished\n" if @@doc["--verbose"]
|
138
|
+
flag=0
|
139
|
+
header=""
|
140
|
+
coords=[]
|
141
|
+
result=[]
|
142
|
+
gz.each_line do |line|
|
143
|
+
if line.chomp.start_with?("chain")
|
144
|
+
qry=LiftOver::Query.search_for_chain_by_interval(line)
|
145
|
+
if qry == true
|
146
|
+
flag=1
|
147
|
+
header=line
|
148
|
+
else
|
149
|
+
flag=0
|
150
|
+
if header!=""
|
151
|
+
a={"#{header.chomp}"=>coords}
|
152
|
+
result.push(a)
|
153
|
+
end
|
154
|
+
a=""
|
155
|
+
header=""
|
156
|
+
coords=[]
|
157
|
+
end
|
158
|
+
else
|
159
|
+
if flag==1
|
160
|
+
coords.push(line.chomp.split("\t").map(&:to_i))
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
scores=[]
|
165
|
+
if not result.empty?
|
166
|
+
result.each do |hits|
|
167
|
+
hits.keys.each do |ev|
|
168
|
+
scores.push(ev.split(" ")[1].to_i)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
else
|
172
|
+
puts "Candidate chain not found"
|
173
|
+
end
|
174
|
+
start_val=""
|
175
|
+
end_val=""
|
176
|
+
res=[]
|
177
|
+
result.each do |hits|
|
178
|
+
start_val=LiftOver::Query.lift(hits,@@coord_start)
|
179
|
+
end_val=LiftOver::Query.lift(hits,@@coord_end)
|
180
|
+
if hits.keys[0].split(" ")[9] != "-"
|
181
|
+
res.push("#{hits.keys[0].split(" ")[7]}:#{start_val}-#{end_val}")
|
182
|
+
else
|
183
|
+
res.push("#{hits.keys[0].split(" ")[7]}:#{end_val}-#{end_val}")
|
184
|
+
end
|
185
|
+
start_val=""
|
186
|
+
end_val=""
|
187
|
+
end
|
188
|
+
puts res
|
189
|
+
end #parse_local_chain
|
190
|
+
|
191
|
+
#Search for chains that fits on coordinates given to be "lifted"
|
192
|
+
def self.search_for_chain_by_interval(string)
|
193
|
+
hits=[]
|
194
|
+
field=string.split(" ")
|
195
|
+
if @@chr_input==field[2]
|
196
|
+
itval=IntervalTree::InclusiveTree.new(field[5].to_i...field[6].to_i)
|
197
|
+
res=itval.search(@@coord_start...@@coord_end)
|
198
|
+
if not res.nil?
|
199
|
+
if not res.empty?
|
200
|
+
return true
|
201
|
+
else
|
202
|
+
return false
|
203
|
+
end
|
204
|
+
else
|
205
|
+
return false
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end #search_for_chain_by_interval
|
209
|
+
|
210
|
+
#Lift coordinates
|
211
|
+
def self.lift(array,coordinate)
|
212
|
+
res_chr=""
|
213
|
+
res_start=""
|
214
|
+
flag_st=0
|
215
|
+
result=[]
|
216
|
+
array.each do |k,v|
|
217
|
+
start_ref=k.split(" ")[5].to_i
|
218
|
+
start_query=k.split(" ")[10].to_i
|
219
|
+
incr=nil
|
220
|
+
end_incr=0
|
221
|
+
gap=0
|
222
|
+
fl_st=0
|
223
|
+
strand=k.split(" ")[9]
|
224
|
+
q_size=k.split(" ")[8].to_i
|
225
|
+
res_chr=k.split(" ")[7]
|
226
|
+
v.each do |val|
|
227
|
+
break if flag_st==1
|
228
|
+
if incr.nil?
|
229
|
+
end_incr=val[2]
|
230
|
+
gap=val[1]
|
231
|
+
incr=val[0]+gap
|
232
|
+
itval0=IntervalTree::InclusiveTree.new(start_ref...start_ref+val[0].to_i)
|
233
|
+
res1=itval0.search(coordinate)
|
234
|
+
fl_st=start_ref+val[0]+gap
|
235
|
+
if not res1.empty?
|
236
|
+
if strand=="-"
|
237
|
+
res_start = "#{q_size-start_query-(coordinate-start_ref)}"
|
238
|
+
flag_st=1
|
239
|
+
else
|
240
|
+
res_start = "#{start_query+(coordinate-start_ref)}"
|
241
|
+
flag_st=1
|
242
|
+
end
|
243
|
+
end
|
244
|
+
else
|
245
|
+
if gap != 0
|
246
|
+
itval1=IntervalTree::InclusiveTree.new(fl_st...fl_st+gap.to_i)
|
247
|
+
res2=itval1.search(coordinate)
|
248
|
+
if not res2.empty?
|
249
|
+
if strand=="-"
|
250
|
+
res_start = "#{q_size-start_query-(coordinate-start_ref)}"
|
251
|
+
flag_st=1
|
252
|
+
else
|
253
|
+
res_start = "#{start_query+(coordinate-start_ref)+end_incr}"
|
254
|
+
flag_st=1
|
255
|
+
end
|
256
|
+
end
|
257
|
+
fl_st=fl_st+gap
|
258
|
+
else
|
259
|
+
incr=val[0].to_i
|
260
|
+
if val[0].to_i !=0
|
261
|
+
itval2=IntervalTree::InclusiveTree.new(fl_st...fl_st+val[0].to_i)
|
262
|
+
res3=itval2.search(coordinate)
|
263
|
+
if not res3.empty?
|
264
|
+
if strand=="-"
|
265
|
+
res_start = "#{q_size-start_query-(coordinate-start_ref)}"
|
266
|
+
flag_st=1
|
267
|
+
else
|
268
|
+
res_start = "#{start_query+(coordinate-start_ref)+end_incr}"
|
269
|
+
flag_st=1
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
fl_st=fl_st+val[0].to_i+gap
|
274
|
+
end_incr=end_incr+val[2].to_i
|
275
|
+
gap=val[1].to_i
|
276
|
+
else
|
277
|
+
fl_st=fl_st+val[0].to_i+gap
|
278
|
+
end_incr=end_incr+val[2].to_i
|
279
|
+
gap=val[1].to_i
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|
283
|
+
gap=val[1].to_i
|
284
|
+
end
|
285
|
+
end
|
286
|
+
a= "#{res_start.to_i}"
|
287
|
+
return a
|
288
|
+
end #lift
|
289
|
+
end #Query
|
290
|
+
end #LiftOver
|
291
|
+
|
292
|
+
if not @@doc.nil?
|
293
|
+
LiftOver::Query.parse_chain_file
|
294
|
+
end
|
data/lib/bio-liftover.rb
ADDED
@@ -0,0 +1,257 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
# Copyright:: Copyright (C) 2009
|
4
|
+
# Andrei Rozanski <rozanski.andrei@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
# == Description
|
7
|
+
# This file containts a liftover for Ruby
|
8
|
+
|
9
|
+
require 'zlib'
|
10
|
+
require 'open-uri'
|
11
|
+
require 'interval-tree'
|
12
|
+
|
13
|
+
module LiftOver
|
14
|
+
class Query
|
15
|
+
def initilize;end
|
16
|
+
|
17
|
+
#Given a pair of genomes, fetch chain file and parse it into a array.
|
18
|
+
def self.parse_chain_file(gen1,gen2,chr_input,coord_start,coord_end,chain_file)
|
19
|
+
@chains=[]
|
20
|
+
@chains_complete=[]
|
21
|
+
@@gen1=gen1
|
22
|
+
@@gen2=gen2
|
23
|
+
@@chr_input=chr_input
|
24
|
+
@@coord_start=coord_start.to_i
|
25
|
+
@@coord_end=coord_end.to_i
|
26
|
+
if chain_file.nil? #switch between local/remote parse file
|
27
|
+
LiftOver::Query.parse_remote_chain
|
28
|
+
else
|
29
|
+
LiftOver::Query.parse_local_chain
|
30
|
+
end
|
31
|
+
end #parse_chain_file
|
32
|
+
|
33
|
+
#Retrieve and parse remote chain file from http://hgdownload.cse.ucsc.edu/goldenPath/
|
34
|
+
def self.parse_remote_chain
|
35
|
+
begin
|
36
|
+
open("http://hgdownload.cse.ucsc.edu/goldenPath/#{@@gen1}/liftOver/#{@@gen1}To#{@@gen2}.over.chain.gz") do |rmt_file|
|
37
|
+
gz=Zlib::GzipReader.new(rmt_file).read
|
38
|
+
flag=0
|
39
|
+
header=""
|
40
|
+
coords=[]
|
41
|
+
result=[]
|
42
|
+
gz.split("\n").each do |line|
|
43
|
+
if line.start_with?("chain")
|
44
|
+
qry=LiftOver::Query.search_for_chain_by_interval(line)
|
45
|
+
if qry == true
|
46
|
+
flag=1
|
47
|
+
header=line
|
48
|
+
else
|
49
|
+
flag=0
|
50
|
+
if header!=""
|
51
|
+
a={"#{header.chomp}"=>coords}
|
52
|
+
result.push(a)
|
53
|
+
end
|
54
|
+
a=""
|
55
|
+
header=""
|
56
|
+
coords=[]
|
57
|
+
end
|
58
|
+
else
|
59
|
+
if flag==1
|
60
|
+
coords.push(line.chomp.split("\t").map(&:to_i))
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
scores=[]
|
65
|
+
if not result.empty?
|
66
|
+
result.each do |hits|
|
67
|
+
hits.keys.each do |ev|
|
68
|
+
scores.push(ev.split(" ")[1].to_i)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
else
|
72
|
+
puts "Candidate chain not found"
|
73
|
+
end
|
74
|
+
start_val=""
|
75
|
+
end_val=""
|
76
|
+
res=[]
|
77
|
+
result.each do |hits|
|
78
|
+
start_val=LiftOver::Query.lift(hits,@@coord_start)
|
79
|
+
end_val=LiftOver::Query.lift(hits,@@coord_end)
|
80
|
+
if hits.keys[0].split(" ")[9] != "-"
|
81
|
+
res.push("#{hits.keys[0].split(" ")[7]}:#{start_val}-#{end_val}")
|
82
|
+
else
|
83
|
+
res.push("#{hits.keys[0].split(" ")[7]}:#{end_val}-#{end_val}")
|
84
|
+
end
|
85
|
+
start_val=""
|
86
|
+
end_val=""
|
87
|
+
end
|
88
|
+
puts res
|
89
|
+
end
|
90
|
+
rescue SocketError => e
|
91
|
+
puts "There's a connection problem with your request.\n Error message: #{e.message}."
|
92
|
+
rescue SystemCallError => e
|
93
|
+
puts "There's a problem with your request.\n Error message: #{e.message}."
|
94
|
+
rescue OpenURI::HTTPError => e
|
95
|
+
puts "Couldn't retrieve chain file. Genome names are case sensitive. Please check http://hgdownload.cse.ucsc.edu/goldenPath\n Error message: #{e.message}."
|
96
|
+
exit
|
97
|
+
end
|
98
|
+
end #parse_remote_chain
|
99
|
+
|
100
|
+
#Parse local chain file
|
101
|
+
def self.parse_local_chain
|
102
|
+
gz_file=open(doc["<chain_file>"])
|
103
|
+
gz=Zlib::GzipReader.new(gz_file)
|
104
|
+
flag=0
|
105
|
+
header=""
|
106
|
+
coords=[]
|
107
|
+
result=[]
|
108
|
+
gz.each_line do |line|
|
109
|
+
if line.chomp.start_with?("chain")
|
110
|
+
qry=LiftOver::Query.search_for_chain_by_interval(line)
|
111
|
+
if qry == true
|
112
|
+
flag=1
|
113
|
+
header=line
|
114
|
+
else
|
115
|
+
flag=0
|
116
|
+
if header!=""
|
117
|
+
a={"#{header.chomp}"=>coords}
|
118
|
+
result.push(a)
|
119
|
+
end
|
120
|
+
a=""
|
121
|
+
header=""
|
122
|
+
coords=[]
|
123
|
+
end
|
124
|
+
else
|
125
|
+
if flag==1
|
126
|
+
coords.push(line.chomp.split("\t").map(&:to_i))
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
scores=[]
|
131
|
+
if not result.empty?
|
132
|
+
result.each do |hits|
|
133
|
+
hits.keys.each do |ev|
|
134
|
+
scores.push(ev.split(" ")[1].to_i)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
else
|
138
|
+
puts "Candidate chain not found"
|
139
|
+
end
|
140
|
+
start_val=""
|
141
|
+
end_val=""
|
142
|
+
res=[]
|
143
|
+
result.each do |hits|
|
144
|
+
start_val=LiftOver::Query.lift(hits,@@coord_start)
|
145
|
+
end_val=LiftOver::Query.lift(hits,@@coord_end)
|
146
|
+
if hits.keys[0].split(" ")[9] != "-"
|
147
|
+
res.push("#{hits.keys[0].split(" ")[7]}:#{start_val}-#{end_val}")
|
148
|
+
else
|
149
|
+
res.push("#{hits.keys[0].split(" ")[7]}:#{end_val}-#{end_val}")
|
150
|
+
end
|
151
|
+
start_val=""
|
152
|
+
end_val=""
|
153
|
+
end
|
154
|
+
puts res
|
155
|
+
end #parse_local_chain
|
156
|
+
|
157
|
+
#Search for chains that fits on coordinates given to be "lifted"
|
158
|
+
def self.search_for_chain_by_interval(string)
|
159
|
+
hits=[]
|
160
|
+
field=string.split(" ")
|
161
|
+
if @@chr_input==field[2]
|
162
|
+
itval=IntervalTree::InclusiveTree.new(field[5].to_i...field[6].to_i)
|
163
|
+
res=itval.search(@@coord_start...@@coord_end)
|
164
|
+
if not res.nil?
|
165
|
+
if not res.empty?
|
166
|
+
return true
|
167
|
+
else
|
168
|
+
return false
|
169
|
+
end
|
170
|
+
else
|
171
|
+
return false
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end #search_for_chain_by_interval
|
175
|
+
|
176
|
+
#Lift coordinates
|
177
|
+
def self.lift(array,coordinate)
|
178
|
+
res_chr=""
|
179
|
+
res_start=""
|
180
|
+
flag_st=0
|
181
|
+
result=[]
|
182
|
+
array.each do |k,v|
|
183
|
+
start_ref=k.split(" ")[5].to_i
|
184
|
+
start_query=k.split(" ")[10].to_i
|
185
|
+
incr=nil
|
186
|
+
end_incr=0
|
187
|
+
gap=0
|
188
|
+
fl_st=0
|
189
|
+
strand=k.split(" ")[9]
|
190
|
+
q_size=k.split(" ")[8].to_i
|
191
|
+
res_chr=k.split(" ")[7]
|
192
|
+
v.each do |val|
|
193
|
+
break if flag_st==1
|
194
|
+
if incr.nil?
|
195
|
+
end_incr=val[2]
|
196
|
+
gap=val[1]
|
197
|
+
incr=val[0]+gap
|
198
|
+
itval0=IntervalTree::InclusiveTree.new(start_ref...start_ref+val[0].to_i)
|
199
|
+
res1=itval0.search(coordinate)
|
200
|
+
fl_st=start_ref+val[0]+gap
|
201
|
+
if not res1.empty?
|
202
|
+
if strand=="-"
|
203
|
+
res_start = "#{q_size-start_query-(coordinate-start_ref)}"
|
204
|
+
flag_st=1
|
205
|
+
else
|
206
|
+
res_start = "#{start_query+(coordinate-start_ref)}"
|
207
|
+
flag_st=1
|
208
|
+
end
|
209
|
+
end
|
210
|
+
else
|
211
|
+
if gap != 0
|
212
|
+
itval1=IntervalTree::InclusiveTree.new(fl_st...fl_st+gap.to_i)
|
213
|
+
res2=itval1.search(coordinate)
|
214
|
+
if not res2.empty?
|
215
|
+
if strand=="-"
|
216
|
+
res_start = "#{q_size-start_query-(coordinate-start_ref)}"
|
217
|
+
flag_st=1
|
218
|
+
else
|
219
|
+
res_start = "#{start_query+(coordinate-start_ref)+end_incr}"
|
220
|
+
flag_st=1
|
221
|
+
end
|
222
|
+
end
|
223
|
+
fl_st=fl_st+gap
|
224
|
+
else
|
225
|
+
incr=val[0].to_i
|
226
|
+
if val[0].to_i !=0
|
227
|
+
itval2=IntervalTree::InclusiveTree.new(fl_st...fl_st+val[0].to_i)
|
228
|
+
res3=itval2.search(coordinate)
|
229
|
+
if not res3.empty?
|
230
|
+
if strand=="-"
|
231
|
+
res_start = "#{q_size-start_query-(coordinate-start_ref)}"
|
232
|
+
flag_st=1
|
233
|
+
else
|
234
|
+
res_start = "#{start_query+(coordinate-start_ref)+end_incr}"
|
235
|
+
flag_st=1
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
fl_st=fl_st+val[0].to_i+gap
|
240
|
+
end_incr=end_incr+val[2].to_i
|
241
|
+
gap=val[1].to_i
|
242
|
+
else
|
243
|
+
fl_st=fl_st+val[0].to_i+gap
|
244
|
+
end_incr=end_incr+val[2].to_i
|
245
|
+
gap=val[1].to_i
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
249
|
+
gap=val[1].to_i
|
250
|
+
end
|
251
|
+
end
|
252
|
+
a= "#{res_start.to_i}"
|
253
|
+
return a
|
254
|
+
end #lift
|
255
|
+
end #Query
|
256
|
+
end #LiftOver
|
257
|
+
|
metadata
ADDED
@@ -0,0 +1,191 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-liftover
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Andrei Rozanski
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-03-11 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: interval-tree
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.1.3
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.1.3
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: shoulda
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rdoc
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '3.12'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.12'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: simplecov
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: jeweler
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ~>
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 2.0.1
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 2.0.1
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: bundler
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: 1.0.21
|
102
|
+
type: :development
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.0.21
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: bio
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 1.4.2
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 1.4.2
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: rdoc
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ~>
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '3.12'
|
134
|
+
type: :development
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ~>
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '3.12'
|
142
|
+
description: Simple, under development Ruby solution for UCSC LiftOver tool
|
143
|
+
email: andrei@ruivo.org
|
144
|
+
executables:
|
145
|
+
- bio-liftover
|
146
|
+
extensions: []
|
147
|
+
extra_rdoc_files:
|
148
|
+
- LICENSE.txt
|
149
|
+
- README.md
|
150
|
+
- README.rdoc
|
151
|
+
files:
|
152
|
+
- .document
|
153
|
+
- .travis.yml
|
154
|
+
- Gemfile
|
155
|
+
- LICENSE.txt
|
156
|
+
- README.md
|
157
|
+
- README.rdoc
|
158
|
+
- Rakefile
|
159
|
+
- VERSION
|
160
|
+
- bin/bio-liftover
|
161
|
+
- lib/bio-liftover.rb
|
162
|
+
- test/test_bio-liftover.rb
|
163
|
+
homepage: http://github.com/andreirozanski/bioruby-liftover
|
164
|
+
licenses:
|
165
|
+
- MIT
|
166
|
+
post_install_message:
|
167
|
+
rdoc_options: []
|
168
|
+
require_paths:
|
169
|
+
- lib
|
170
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
171
|
+
none: false
|
172
|
+
requirements:
|
173
|
+
- - ! '>='
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0'
|
176
|
+
segments:
|
177
|
+
- 0
|
178
|
+
hash: -1033425692335380436
|
179
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
180
|
+
none: false
|
181
|
+
requirements:
|
182
|
+
- - ! '>='
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: '0'
|
185
|
+
requirements: []
|
186
|
+
rubyforge_project:
|
187
|
+
rubygems_version: 1.8.23
|
188
|
+
signing_key:
|
189
|
+
specification_version: 3
|
190
|
+
summary: Ruby solution for UCSC LiftOver tool- (UCSC http://genome.ucsc.edu/cgi-bin/hgLiftOver)
|
191
|
+
test_files: []
|