anncrsnp 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +41 -0
- data/Rakefile +6 -0
- data/anncrsnp.gemspec +35 -0
- data/bin/console +14 -0
- data/bin/grdbfinder.rb +472 -0
- data/bin/grdbmanager.rb +226 -0
- data/bin/masterfeatures.rb +188 -0
- data/bin/setup +7 -0
- data/bin/statistics.rb +193 -0
- data/database/deleteme +0 -0
- data/lib/anncrsnp/dataset.rb +178 -0
- data/lib/anncrsnp/parsers/ucscparser.rb +35 -0
- data/lib/anncrsnp/version.rb +3 -0
- data/lib/anncrsnp.rb +5 -0
- metadata +144 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 45770752f232d33dc6c3c5c46f96d5a8978c49bc
|
4
|
+
data.tar.gz: 02aed0e5cd60873006e6f4070ece816d7ba7103b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 895816f11e25e0d8046e63d81712b775794c989e413d6b4564588edcc3e6cc803c42f5bfe6c17c53e6b273cf7da973f03a96e18784622ec2ecc36a71eb11e30b
|
7
|
+
data.tar.gz: daaa2211a52f6b2464d57cccc87ce18bfe4afc69e467194e09207df73696d29c90d92ba11af2ce1d23c0f75bebe89754f26e409fb7fe18eedeeb5d380ce91cad
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 TODO: Write your name
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Anncrsnp
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/anncrsnp`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
TODO: Delete this and the text above, and describe your gem
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'anncrsnp'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install anncrsnp
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
TODO: Write usage instructions here
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/anncrsnp.
|
36
|
+
|
37
|
+
|
38
|
+
## License
|
39
|
+
|
40
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
41
|
+
|
data/Rakefile
ADDED
data/anncrsnp.gemspec
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'anncrsnp/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "anncrsnp"
|
8
|
+
spec.version = Anncrsnp::VERSION
|
9
|
+
spec.authors = ["Elena Rojano", "Pedro Seoane"]
|
10
|
+
spec.email = ["elenarojano@outlook.com", "seoanezonjic@hotmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Tool to characterize Single Nucleotide Polymorphisms (SNP) in genomic non-coding regions.}
|
13
|
+
spec.description = %q{AnNCR-SNP integrates data from various sources, allowing the user to investigate the potential effects of variants in non-coding regions of the human genome. AnNCR-SNP consists of a database containing data on all non-coding elements and two main programs: manager and finder. The manager program is responsible for creating the local data-base, and the finder program receives the user queries in order to search in the local database and retrieve information. The user can find information about various regu-latory elements, such as TFBs, open chromatin, histone modification and methyla-tion sites, information about SNPs from dbSNP and gene information from RefSeq.}
|
14
|
+
spec.homepage = ""
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
18
|
+
# # delete this section to allow pushing this gem to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
21
|
+
# else
|
22
|
+
# raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
23
|
+
# end
|
24
|
+
|
25
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
26
|
+
spec.bindir = "exe"
|
27
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
+
spec.require_paths = ["lib"]
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
31
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
32
|
+
spec.add_development_dependency "rspec"
|
33
|
+
spec.add_dependency "sqlite3"
|
34
|
+
spec.add_dependency "rubyzip"
|
35
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "anncrsnp"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/grdbfinder.rb
ADDED
@@ -0,0 +1,472 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp'))
|
5
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp', 'parsers'))
|
6
|
+
|
7
|
+
require 'optparse'
|
8
|
+
require 'dataset'
|
9
|
+
require 'sqlite3'
|
10
|
+
require 'benchmark'
|
11
|
+
require 'net/http'
|
12
|
+
require 'zip'
|
13
|
+
|
14
|
+
######################################################################################################################
|
15
|
+
## METHODS
|
16
|
+
######################################################################################################################
|
17
|
+
|
18
|
+
# QUERING METHODS
|
19
|
+
#----------------------------------------------------
|
20
|
+
def query_coordinates(coords, flanking_region)
|
21
|
+
genomic_regions = []
|
22
|
+
coords.each do |chr, start, stop|
|
23
|
+
start = start.to_i-flanking_region
|
24
|
+
start = 0 if start < 0
|
25
|
+
stop = stop.to_i+flanking_region
|
26
|
+
#bins = (start/10000).upto(stop/10000).to_a
|
27
|
+
#genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (start>=? AND end<=? )", chr, start, stop)
|
28
|
+
#genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (start>=? AND end<=? )", chr, start, stop)
|
29
|
+
#genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND bin IN(#{Array.new(bins.length, '?').join(',')})", chr, bins)
|
30
|
+
local_genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (bin BETWEEN ? AND ?)", chr, start/10000, stop/10000)
|
31
|
+
#puts "QUERY_COORDS",'--------------',genomic_regions.inspect
|
32
|
+
local_genomic_regions.select!{|g_reg|
|
33
|
+
(g_reg[2] >= start && g_reg[2] <= stop) || #ge_reg start is in region
|
34
|
+
(g_reg[3] >= start && g_reg[3] <= stop) || #ge_reg end is in region
|
35
|
+
(g_reg[2] <= start && g_reg[3] >= stop) #region is in ge_reg
|
36
|
+
}
|
37
|
+
genomic_regions.concat(local_genomic_regions)
|
38
|
+
end
|
39
|
+
return genomic_regions
|
40
|
+
end
|
41
|
+
|
42
|
+
def query_name(name)
|
43
|
+
genomic_regions = []
|
44
|
+
genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE name=?", name)
|
45
|
+
#puts "QUERY_NAME",'--------------',genomic_regions.inspect
|
46
|
+
return genomic_regions
|
47
|
+
end
|
48
|
+
|
49
|
+
def query_name_and_region(name, flanking_region)
|
50
|
+
genomic_regions_by_name = []
|
51
|
+
name.each do |reg_name|
|
52
|
+
genomic_regions = genomic_regions_by_name.concat(query_name(reg_name))
|
53
|
+
end
|
54
|
+
#puts "QUERY_NAME_AND_REGION",'--------------',genomic_regions_by_name.inspect
|
55
|
+
genomic_regions = query_coordinates(genomic_regions_by_name.map{|g_reg| g_reg[1..3]}, flanking_region) #g_reg[1..3] => chr, start, stop
|
56
|
+
genomic_regions.uniq! #subqueries can retrie the same elements and may repeat results.
|
57
|
+
return genomic_regions
|
58
|
+
end
|
59
|
+
|
60
|
+
# REPORTING METHODS
|
61
|
+
#----------------------------------------------------
|
62
|
+
def simple_list(genomic_regions, output_path, output_format)
|
63
|
+
path = output_path + '_simple_list.' + output_format
|
64
|
+
if output_format == 'gff'
|
65
|
+
simple_list_gff(genomic_regions, path)
|
66
|
+
else
|
67
|
+
simple_list_html(genomic_regions, path)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def simple_list_html(genomic_regions, path)
|
72
|
+
report = File.open(path, 'w')
|
73
|
+
report.puts '<HTML>',
|
74
|
+
'<header>',
|
75
|
+
'</header>',
|
76
|
+
'<body>',
|
77
|
+
'<table border=1>',
|
78
|
+
'<tr>',
|
79
|
+
'<th>Chromosome</th><th>Start</th><th>End</th><th>Region type</th><th>Region id</th>'
|
80
|
+
'</tr>'
|
81
|
+
genomic_regions.each do |bin, chr, start, stop, type, name, annotationid|
|
82
|
+
report.puts '<tr>',
|
83
|
+
"<td>#{chr}</td><td>#{start}</td><td>#{stop}</td><td>#{type}</td><td>#{name}</td>"
|
84
|
+
'</tr>'
|
85
|
+
end
|
86
|
+
report.puts '</table>',
|
87
|
+
'</body>',
|
88
|
+
'</HTML>'
|
89
|
+
report.close
|
90
|
+
end
|
91
|
+
|
92
|
+
def simple_list_gff(genomic_regions, path) #use generated file on http://genometools.org/cgi-bin/annotationsketch_demo.cgi
|
93
|
+
report = File.open(path, 'w')
|
94
|
+
report.puts '##gff-version 3'
|
95
|
+
main_chr = genomic_regions.first[1]
|
96
|
+
min_start = genomic_regions.map{|g_reg| g_reg[2]}.min
|
97
|
+
max_stop = genomic_regions.map{|g_reg| g_reg[3]}.max
|
98
|
+
add_region = ((max_stop - min_start).abs * 0.05).to_i
|
99
|
+
region_start = min_start - add_region
|
100
|
+
region_start = 0 if region_start < 0
|
101
|
+
region_stop = max_stop + add_region
|
102
|
+
report.puts "#{main_chr}\t#{File.basename(__FILE__)}\tchromosome\t#{region_start}\t#{region_stop}\t.\t.\t.\tID=#{main_chr}"
|
103
|
+
genomic_regions.each do |bin, chr, start, stop, type, name, annotationid|
|
104
|
+
report.puts "#{chr}\t#{File.basename(__FILE__)}\t#{type}\t#{start}\t#{stop}\t.\t.\t.\tName=#{name}"
|
105
|
+
end
|
106
|
+
report.close
|
107
|
+
end
|
108
|
+
|
109
|
+
def get_uniq_ids_from_records(records)
|
110
|
+
ids = {}
|
111
|
+
records.each do |rec|
|
112
|
+
annotation_ids = rec.last.split(',')
|
113
|
+
rec[6] = annotation_ids
|
114
|
+
if annotation_ids.first != ''
|
115
|
+
annotation_ids.each do |annot_id|
|
116
|
+
ids[annot_id] = nil
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
return ids
|
121
|
+
end
|
122
|
+
|
123
|
+
def load_annotations(genomic_regions)
|
124
|
+
annotations = get_uniq_ids_from_records(genomic_regions)
|
125
|
+
if !annotations.empty?
|
126
|
+
db_annotations = $db.execute("SELECT rowid, * FROM Annotation WHERE rowid IN(#{Array.new(annotations.length, '?').join(',')})", annotations.keys)
|
127
|
+
annotation_types = db_annotations.map{|db_an| db_an.last}.uniq
|
128
|
+
if !annotation_types.empty?
|
129
|
+
db_annotation_types = $db.execute("SELECT rowid, * FROM AnnotationType WHERE rowid IN(#{Array.new(annotation_types.length, '?').join(',')})", annotation_types)
|
130
|
+
db_annotation_types = db_annotation_types.group_by {|r| r.first}
|
131
|
+
db_annotations.each do |db_an|
|
132
|
+
db_an[0] = db_an[0].to_s
|
133
|
+
db_an[2] = db_annotation_types[db_an[2]].first.last
|
134
|
+
end
|
135
|
+
db_annotations.each do |annot|
|
136
|
+
id = annot.shift
|
137
|
+
annotations[id] = annot
|
138
|
+
end
|
139
|
+
genomic_regions.each do |g_reg|
|
140
|
+
annot_ids = g_reg.last
|
141
|
+
final_annot = {}
|
142
|
+
if !annot_ids.empty?
|
143
|
+
annot_ids.each do |id|
|
144
|
+
value, type = annotations[id]
|
145
|
+
final_annot[type] = value
|
146
|
+
end
|
147
|
+
end
|
148
|
+
g_reg[6] = final_annot
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def generate_query_regions(coords)
|
155
|
+
query_regions = []
|
156
|
+
coords.each do |chr, start, stop|
|
157
|
+
query_regions << [nil, chr, start, stop, 'query_coords', "Q_#{chr}_#{start}-#{stop}", ''] #bin, chr, start, stop, type, name, annot
|
158
|
+
end
|
159
|
+
return query_regions
|
160
|
+
end
|
161
|
+
|
162
|
+
def grouping_list(group, genomic_regions, output_path, output_format)
|
163
|
+
path = output_path + '_grouping_list.' + output_format
|
164
|
+
load_annotations(genomic_regions)
|
165
|
+
main_regions = genomic_regions.select{|reg| reg[4] == group}
|
166
|
+
putative_overlapping_regions = genomic_regions.select{|reg| reg[4] != group}
|
167
|
+
overlaping_index = get_overlapping_regions_batch(main_regions, putative_overlapping_regions)
|
168
|
+
if output_format == 'html'
|
169
|
+
grouping_list_html(overlaping_index, main_regions, putative_overlapping_regions, path)
|
170
|
+
elsif output_format == 'txt'
|
171
|
+
grouping_list_txt(overlaping_index, main_regions, putative_overlapping_regions, path)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def grouping_list_txt(overlaping_index, main_regions, putative_overlapping_regions, path)
|
176
|
+
overlaping_regions = []
|
177
|
+
overlaping_index.values.flatten.uniq.each do |pos|
|
178
|
+
overlaping_regions << putative_overlapping_regions[pos]
|
179
|
+
end
|
180
|
+
grouping_type = main_regions.first[4]
|
181
|
+
basic_fields = ['Id', 'Chromosome', 'Start', 'Stop']
|
182
|
+
header_structure = get_header({grouping_type => basic_fields}, main_regions + overlaping_regions)
|
183
|
+
report = File.open(path, 'w')
|
184
|
+
txt_header = ''
|
185
|
+
header_structure.each do |region_type, annotations|
|
186
|
+
if annotations.length > 0
|
187
|
+
txt_header << annotations.map{|an| region_type + '.' + an}.join("\t") + "\t"
|
188
|
+
else
|
189
|
+
txt_header << region_type + "\t"
|
190
|
+
end
|
191
|
+
end
|
192
|
+
report.puts txt_header.chop
|
193
|
+
main_regions.each_with_index do |main_region, position|
|
194
|
+
local_overlapping_regions = overlaping_index[position].map{|pos| putative_overlapping_regions[pos]}
|
195
|
+
report.print "#{main_region[5]}\t#{main_region[1]}\t#{main_region[2]}\t#{main_region[3]}\t"
|
196
|
+
header_structure[grouping_type] = header_structure[grouping_type] - ['Id', 'Chromosome', 'Start', 'Stop']
|
197
|
+
header_structure[grouping_type].each do |annotation_type|
|
198
|
+
report.print "#{main_region.last[annotation_type]}\t"
|
199
|
+
end
|
200
|
+
header_structure.each do |region_type, annotation_types|
|
201
|
+
next if region_type == grouping_type
|
202
|
+
record = local_overlapping_regions.select{|r| r[4] == region_type} #array
|
203
|
+
if record.empty?
|
204
|
+
if annotation_types.length == 0
|
205
|
+
report.print "-\t"
|
206
|
+
else
|
207
|
+
report.print "-\t"*annotation_types.length
|
208
|
+
end
|
209
|
+
else
|
210
|
+
if annotation_types.length == 0
|
211
|
+
report.print "#{record.map{|r| r[5]}.uniq.join(',')}\t"
|
212
|
+
else
|
213
|
+
annotation_types.each do |an_type|
|
214
|
+
report.print "#{record.map{|r| r.last[an_type]}.uniq.join(',')}\t"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
report.puts
|
220
|
+
end
|
221
|
+
report.close
|
222
|
+
end
|
223
|
+
|
224
|
+
def get_overlapping_regions_batch(main_regions, putative_overlapping_regions)
|
225
|
+
index = {}
|
226
|
+
main_regions.length.times do |n|
|
227
|
+
index[n] = []
|
228
|
+
end
|
229
|
+
main_position = 0
|
230
|
+
main_regions.each do |bin, chr, start, stop, type, name, annotations|
|
231
|
+
over_position = 0
|
232
|
+
putative_overlapping_regions.each do |bin_over, chr_over, start_over, stop_over, type_over, name_over, annotations_over|
|
233
|
+
if chr == chr_over &&
|
234
|
+
((start >= start_over && start <= stop_over) || (stop >= start_over && stop <= stop_over))
|
235
|
+
index[main_position] << over_position
|
236
|
+
end
|
237
|
+
over_position += 1
|
238
|
+
end
|
239
|
+
main_position += 1
|
240
|
+
end
|
241
|
+
return index
|
242
|
+
end
|
243
|
+
|
244
|
+
def grouping_list_html(overlaping_index, main_regions, putative_overlapping_regions, path)
|
245
|
+
overlaping_regions = []
|
246
|
+
overlaping_index.values.flatten.uniq.each do |pos|
|
247
|
+
overlaping_regions << putative_overlapping_regions[pos]
|
248
|
+
end
|
249
|
+
report = File.open(path, 'w')
|
250
|
+
report.puts '<HTML>',
|
251
|
+
'<header>',
|
252
|
+
'</header>',
|
253
|
+
'<body>',
|
254
|
+
'<table border=1>'
|
255
|
+
grouping_type = main_regions.first[4]
|
256
|
+
basic_fields = ['Id', 'Chromosome', 'Start', 'Stop']
|
257
|
+
header_structure = get_header({grouping_type => basic_fields}, main_regions + overlaping_regions)
|
258
|
+
report.puts get_grouping_html_header(header_structure)
|
259
|
+
header_structure[grouping_type] = header_structure[grouping_type] - basic_fields
|
260
|
+
main_regions.each_with_index do |main_region, position|
|
261
|
+
local_overlapping_regions = overlaping_index[position].map{|pos| putative_overlapping_regions[pos]}
|
262
|
+
record_rows = get_max_overlapping_regions_by_type(local_overlapping_regions)
|
263
|
+
rowspan = nil
|
264
|
+
rowspan = " rowspan=#{record_rows}" if record_rows > 1
|
265
|
+
report.puts '<tr>',
|
266
|
+
"<td#{rowspan}>#{main_region[5]}</td>",
|
267
|
+
"<td#{rowspan}>#{main_region[1]}</td>",
|
268
|
+
"<td#{rowspan}>#{main_region[2]}</td>",
|
269
|
+
"<td#{rowspan}>#{main_region[3]}</td>"
|
270
|
+
header_structure[grouping_type].each do |annotation_type|
|
271
|
+
report.puts "<td#{rowspan}>#{main_region.last[annotation_type]}</td>"
|
272
|
+
end
|
273
|
+
record_rows.times do
|
274
|
+
header_structure.each do |region_type, annotation_types|
|
275
|
+
next if region_type == grouping_type
|
276
|
+
record = local_overlapping_regions.select{|r| r[4] == region_type}.first
|
277
|
+
if record.nil?
|
278
|
+
if annotation_types.length == 0
|
279
|
+
report.puts "<td></td>"
|
280
|
+
else
|
281
|
+
report.puts "<td></td>"*annotation_types.length
|
282
|
+
end
|
283
|
+
else
|
284
|
+
if annotation_types.length == 0
|
285
|
+
report.puts "<td>#{record[5]}</td>"
|
286
|
+
else
|
287
|
+
annotation_types.each do |an_type|
|
288
|
+
report.puts "<td>#{record.last[an_type]}</td>"
|
289
|
+
end
|
290
|
+
end
|
291
|
+
local_overlapping_regions.delete(record)
|
292
|
+
end
|
293
|
+
end
|
294
|
+
report.puts '</tr>'
|
295
|
+
end
|
296
|
+
end
|
297
|
+
report.puts '</table>',
|
298
|
+
'</body>',
|
299
|
+
'</HTML>'
|
300
|
+
report.close
|
301
|
+
end
|
302
|
+
|
303
|
+
def get_max_overlapping_regions_by_type(local_overlapping_regions)
|
304
|
+
res = 1
|
305
|
+
local_overlapping_regions.group_by{|r| r[4]}.each do |region_type, regions|
|
306
|
+
reg_length = regions.length
|
307
|
+
res = reg_length if reg_length > res
|
308
|
+
end
|
309
|
+
return res
|
310
|
+
end
|
311
|
+
|
312
|
+
def get_grouping_html_header(header_structure)
|
313
|
+
main_header = "<tr>\n"
|
314
|
+
sub_header = "<tr>\n"
|
315
|
+
header_structure.each do |main_title, cols|
|
316
|
+
main_header << '<th'
|
317
|
+
main_header << " rowspan=2" if cols.length == 0
|
318
|
+
main_header << " colspan=#{cols.length}" if cols.length > 1
|
319
|
+
main_header << ">#{main_title}</th>\n"
|
320
|
+
cols.each do |col|
|
321
|
+
sub_header << "<th>#{col}</th>\n"
|
322
|
+
end
|
323
|
+
end
|
324
|
+
main_header << "</tr>\n"
|
325
|
+
sub_header << "</tr>\n"
|
326
|
+
return main_header + sub_header
|
327
|
+
end
|
328
|
+
|
329
|
+
def get_header(header, genomic_regions)
|
330
|
+
genomic_regions.each do |ge_reg|
|
331
|
+
region_type = ge_reg[4]
|
332
|
+
region_annotations = ge_reg.last.keys
|
333
|
+
query = header[region_type]
|
334
|
+
if query.nil?
|
335
|
+
header[region_type] = region_annotations
|
336
|
+
else
|
337
|
+
header[region_type] = query | region_annotations
|
338
|
+
end
|
339
|
+
end
|
340
|
+
return header
|
341
|
+
end
|
342
|
+
|
343
|
+
# DATABASE METHODS
|
344
|
+
#----------------------------------------------------
|
345
|
+
|
346
|
+
def download_database(database_path)
|
347
|
+
out_path = File.dirname(database_path)
|
348
|
+
puts "Downloading database in #{out_path}, please be patient..."
|
349
|
+
zip_path = File.join(out_path, 'database.zip')
|
350
|
+
f = File.open(zip_path, 'w')
|
351
|
+
Net::HTTP.start("bio-267-data.uma.es") do |http|
|
352
|
+
http.request_get('/database.zip') do |resp|
|
353
|
+
resp.read_body do |segment|
|
354
|
+
f.write(segment)
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
358
|
+
f.close
|
359
|
+
puts "Decompressing database..."
|
360
|
+
Zip::File.open(zip_path) do |zip_file|
|
361
|
+
zip_file.each do |entry|
|
362
|
+
entry.extract(database_path)
|
363
|
+
end
|
364
|
+
end
|
365
|
+
if File.exists?(database_path)
|
366
|
+
File.delete(zip_path)
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
######################################################################################################################
|
371
|
+
## INPUT PARAMETER PARSING
|
372
|
+
######################################################################################################################
|
373
|
+
options = {}
|
374
|
+
OptionParser.new do |opts|
|
375
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
376
|
+
|
377
|
+
options[:coords] = []
|
378
|
+
opts.on("-c", '--region_coordinates STRING', 'Coordinates to make the search. Format: chrN:start:end') do |coords|
|
379
|
+
coord_lines = []
|
380
|
+
if File.exists?(coords) == FALSE
|
381
|
+
coord_lines = coords.split(',')
|
382
|
+
else
|
383
|
+
coord_lines = File.readlines(coords).map{|line| line.chomp}
|
384
|
+
end
|
385
|
+
options[:coords] = coord_lines.map{|line| line.split(':')}.map{|coords| [coords[0], coords[1].to_i, coords[2].to_i ]}
|
386
|
+
end
|
387
|
+
|
388
|
+
options[:name] = []
|
389
|
+
opts.on("-n", '--region_name STRING', 'Search region by name') do |region|
|
390
|
+
if File.exists?(region) == FALSE
|
391
|
+
options[:name] = region.split(',')
|
392
|
+
else
|
393
|
+
options[:name] = File.readlines(region).map{|line| line.chomp}
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
options[:input_names_only] = FALSE
|
398
|
+
opts.on("-i", '--input_names_only', 'Show info about only input data') do
|
399
|
+
options[:input_names_only] = TRUE
|
400
|
+
end
|
401
|
+
|
402
|
+
options[:flanking_region] = 0
|
403
|
+
opts.on("-F", '--flanking_region INTEGER', 'Flanking region to search aroun the elements') do |flanking_region|
|
404
|
+
options[:flanking_region] = flanking_region.to_i
|
405
|
+
end
|
406
|
+
|
407
|
+
options[:path_sql] = File.join(File.dirname(__FILE__), "..", "database", "genomic_data.sqlite")
|
408
|
+
opts.on("-p", "--path_sql PATH", "Path SQL DB to make queries") do |path|
|
409
|
+
options[:path_sql] = path
|
410
|
+
end
|
411
|
+
|
412
|
+
options[:group] = nil
|
413
|
+
opts.on("-g", '--group_by_region_type STRING', 'Use region type for group results by their coordinates') do |group|
|
414
|
+
options[:group] = group
|
415
|
+
end
|
416
|
+
|
417
|
+
options[:output_format] = 'html'
|
418
|
+
opts.on("-f", '--output_format PATH', 'Output format for results. Default:html') do |output_format|
|
419
|
+
options[:output_format] = output_format
|
420
|
+
end
|
421
|
+
|
422
|
+
options[:output_path] = "results"
|
423
|
+
opts.on("-o", '--output_path PATH', 'Output path for queries') do |output_path|
|
424
|
+
options[:output_path] = output_path
|
425
|
+
end
|
426
|
+
|
427
|
+
options[:representation] = FALSE
|
428
|
+
opts.on("-r", '--graphical_representation', 'Make a representation of the selected region') do
|
429
|
+
options[:representation] = TRUE
|
430
|
+
end
|
431
|
+
|
432
|
+
options[:type] = []
|
433
|
+
opts.on("-t", '--type_regions STRING', 'Region types to make the search. Format: region1,region2,region3...') do |type|
|
434
|
+
options[:type] = type.split(',')
|
435
|
+
end
|
436
|
+
|
437
|
+
options[:verbose] = nil
|
438
|
+
opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
|
439
|
+
options[:verbose] = v
|
440
|
+
end
|
441
|
+
|
442
|
+
end.parse!
|
443
|
+
|
444
|
+
######################################################################################################################
|
445
|
+
## MAIN
|
446
|
+
######################################################################################################################
|
447
|
+
if !File.exists?(options[:path_sql])
|
448
|
+
download_database(options[:path_sql])
|
449
|
+
end
|
450
|
+
$db = SQLite3::Database.new(options[:path_sql])
|
451
|
+
genomic_regions = []
|
452
|
+
#Benchmark.bm do |bm|
|
453
|
+
# bm.report {
|
454
|
+
if !options[:coords].empty?
|
455
|
+
genomic_regions = query_coordinates(options[:coords], options[:flanking_region])
|
456
|
+
elsif !options[:name].empty?
|
457
|
+
genomic_regions = query_name_and_region(options[:name], options[:flanking_region])
|
458
|
+
end
|
459
|
+
# }
|
460
|
+
#end
|
461
|
+
#puts 'FINAL', '---------------', genomic_regions.inspect
|
462
|
+
if !genomic_regions.empty?
|
463
|
+
genomic_regions.select!{|reg| options[:name].include?(reg[5]) || reg[4] != options[:group]} if options[:input_names_only] && !options[:group].nil?
|
464
|
+
simple_list(genomic_regions, options[:output_path], options[:output_format])
|
465
|
+
simple_list(genomic_regions, options[:output_path], 'gff') if options[:representation]
|
466
|
+
if !options[:group].nil?
|
467
|
+
genomic_regions.concat(generate_query_regions(options[:coords])) if options[:group] == 'query_coords' && !options[:coords].empty?
|
468
|
+
grouping_list(options[:group], genomic_regions, options[:output_path], options[:output_format])
|
469
|
+
end
|
470
|
+
else
|
471
|
+
puts 'Results not found'
|
472
|
+
end
|