anncrsnp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +41 -0
- data/Rakefile +6 -0
- data/anncrsnp.gemspec +35 -0
- data/bin/console +14 -0
- data/bin/grdbfinder.rb +472 -0
- data/bin/grdbmanager.rb +226 -0
- data/bin/masterfeatures.rb +188 -0
- data/bin/setup +7 -0
- data/bin/statistics.rb +193 -0
- data/database/deleteme +0 -0
- data/lib/anncrsnp/dataset.rb +178 -0
- data/lib/anncrsnp/parsers/ucscparser.rb +35 -0
- data/lib/anncrsnp/version.rb +3 -0
- data/lib/anncrsnp.rb +5 -0
- metadata +144 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 45770752f232d33dc6c3c5c46f96d5a8978c49bc
|
4
|
+
data.tar.gz: 02aed0e5cd60873006e6f4070ece816d7ba7103b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 895816f11e25e0d8046e63d81712b775794c989e413d6b4564588edcc3e6cc803c42f5bfe6c17c53e6b273cf7da973f03a96e18784622ec2ecc36a71eb11e30b
|
7
|
+
data.tar.gz: daaa2211a52f6b2464d57cccc87ce18bfe4afc69e467194e09207df73696d29c90d92ba11af2ce1d23c0f75bebe89754f26e409fb7fe18eedeeb5d380ce91cad
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 TODO: Write your name
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Anncrsnp
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/anncrsnp`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
TODO: Delete this and the text above, and describe your gem
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'anncrsnp'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install anncrsnp
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
TODO: Write usage instructions here
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/anncrsnp.
|
36
|
+
|
37
|
+
|
38
|
+
## License
|
39
|
+
|
40
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
41
|
+
|
data/Rakefile
ADDED
data/anncrsnp.gemspec
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'anncrsnp/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "anncrsnp"
|
8
|
+
spec.version = Anncrsnp::VERSION
|
9
|
+
spec.authors = ["Elena Rojano", "Pedro Seoane"]
|
10
|
+
spec.email = ["elenarojano@outlook.com", "seoanezonjic@hotmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Tool to characterize Single Nucleotide Polymorphisms (SNP) in genomic non-coding regions.}
|
13
|
+
spec.description = %q{AnNCR-SNP integrates data from various sources, allowing the user to investigate the potential effects of variants in non-coding regions of the human genome. AnNCR-SNP consists of a database containing data on all non-coding elements and two main programs: manager and finder. The manager program is responsible for creating the local data-base, and the finder program receives the user queries in order to search in the local database and retrieve information. The user can find information about various regu-latory elements, such as TFBs, open chromatin, histone modification and methyla-tion sites, information about SNPs from dbSNP and gene information from RefSeq.}
|
14
|
+
spec.homepage = ""
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
18
|
+
# # delete this section to allow pushing this gem to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
21
|
+
# else
|
22
|
+
# raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
23
|
+
# end
|
24
|
+
|
25
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
26
|
+
spec.bindir = "exe"
|
27
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
+
spec.require_paths = ["lib"]
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
31
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
32
|
+
spec.add_development_dependency "rspec"
|
33
|
+
spec.add_dependency "sqlite3"
|
34
|
+
spec.add_dependency "rubyzip"
|
35
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "anncrsnp"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/grdbfinder.rb
ADDED
@@ -0,0 +1,472 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp'))
|
5
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp', 'parsers'))
|
6
|
+
|
7
|
+
require 'optparse'
|
8
|
+
require 'dataset'
|
9
|
+
require 'sqlite3'
|
10
|
+
require 'benchmark'
|
11
|
+
require 'net/http'
|
12
|
+
require 'zip'
|
13
|
+
|
14
|
+
######################################################################################################################
|
15
|
+
## METHODS
|
16
|
+
######################################################################################################################
|
17
|
+
|
18
|
+
# QUERING METHODS
|
19
|
+
#----------------------------------------------------
|
20
|
+
def query_coordinates(coords, flanking_region)
|
21
|
+
genomic_regions = []
|
22
|
+
coords.each do |chr, start, stop|
|
23
|
+
start = start.to_i-flanking_region
|
24
|
+
start = 0 if start < 0
|
25
|
+
stop = stop.to_i+flanking_region
|
26
|
+
#bins = (start/10000).upto(stop/10000).to_a
|
27
|
+
#genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (start>=? AND end<=? )", chr, start, stop)
|
28
|
+
#genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (start>=? AND end<=? )", chr, start, stop)
|
29
|
+
#genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND bin IN(#{Array.new(bins.length, '?').join(',')})", chr, bins)
|
30
|
+
local_genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (bin BETWEEN ? AND ?)", chr, start/10000, stop/10000)
|
31
|
+
#puts "QUERY_COORDS",'--------------',genomic_regions.inspect
|
32
|
+
local_genomic_regions.select!{|g_reg|
|
33
|
+
(g_reg[2] >= start && g_reg[2] <= stop) || #ge_reg start is in region
|
34
|
+
(g_reg[3] >= start && g_reg[3] <= stop) || #ge_reg end is in region
|
35
|
+
(g_reg[2] <= start && g_reg[3] >= stop) #region is in ge_reg
|
36
|
+
}
|
37
|
+
genomic_regions.concat(local_genomic_regions)
|
38
|
+
end
|
39
|
+
return genomic_regions
|
40
|
+
end
|
41
|
+
|
42
|
+
def query_name(name)
|
43
|
+
genomic_regions = []
|
44
|
+
genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE name=?", name)
|
45
|
+
#puts "QUERY_NAME",'--------------',genomic_regions.inspect
|
46
|
+
return genomic_regions
|
47
|
+
end
|
48
|
+
|
49
|
+
def query_name_and_region(name, flanking_region)
|
50
|
+
genomic_regions_by_name = []
|
51
|
+
name.each do |reg_name|
|
52
|
+
genomic_regions = genomic_regions_by_name.concat(query_name(reg_name))
|
53
|
+
end
|
54
|
+
#puts "QUERY_NAME_AND_REGION",'--------------',genomic_regions_by_name.inspect
|
55
|
+
genomic_regions = query_coordinates(genomic_regions_by_name.map{|g_reg| g_reg[1..3]}, flanking_region) #g_reg[1..3] => chr, start, stop
|
56
|
+
genomic_regions.uniq! #subqueries can retrie the same elements and may repeat results.
|
57
|
+
return genomic_regions
|
58
|
+
end
|
59
|
+
|
60
|
+
# REPORTING METHODS
|
61
|
+
#----------------------------------------------------
|
62
|
+
def simple_list(genomic_regions, output_path, output_format)
|
63
|
+
path = output_path + '_simple_list.' + output_format
|
64
|
+
if output_format == 'gff'
|
65
|
+
simple_list_gff(genomic_regions, path)
|
66
|
+
else
|
67
|
+
simple_list_html(genomic_regions, path)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def simple_list_html(genomic_regions, path)
|
72
|
+
report = File.open(path, 'w')
|
73
|
+
report.puts '<HTML>',
|
74
|
+
'<header>',
|
75
|
+
'</header>',
|
76
|
+
'<body>',
|
77
|
+
'<table border=1>',
|
78
|
+
'<tr>',
|
79
|
+
'<th>Chromosome</th><th>Start</th><th>End</th><th>Region type</th><th>Region id</th>'
|
80
|
+
'</tr>'
|
81
|
+
genomic_regions.each do |bin, chr, start, stop, type, name, annotationid|
|
82
|
+
report.puts '<tr>',
|
83
|
+
"<td>#{chr}</td><td>#{start}</td><td>#{stop}</td><td>#{type}</td><td>#{name}</td>"
|
84
|
+
'</tr>'
|
85
|
+
end
|
86
|
+
report.puts '</table>',
|
87
|
+
'</body>',
|
88
|
+
'</HTML>'
|
89
|
+
report.close
|
90
|
+
end
|
91
|
+
|
92
|
+
def simple_list_gff(genomic_regions, path) #use generated file on http://genometools.org/cgi-bin/annotationsketch_demo.cgi
|
93
|
+
report = File.open(path, 'w')
|
94
|
+
report.puts '##gff-version 3'
|
95
|
+
main_chr = genomic_regions.first[1]
|
96
|
+
min_start = genomic_regions.map{|g_reg| g_reg[2]}.min
|
97
|
+
max_stop = genomic_regions.map{|g_reg| g_reg[3]}.max
|
98
|
+
add_region = ((max_stop - min_start).abs * 0.05).to_i
|
99
|
+
region_start = min_start - add_region
|
100
|
+
region_start = 0 if region_start < 0
|
101
|
+
region_stop = max_stop + add_region
|
102
|
+
report.puts "#{main_chr}\t#{File.basename(__FILE__)}\tchromosome\t#{region_start}\t#{region_stop}\t.\t.\t.\tID=#{main_chr}"
|
103
|
+
genomic_regions.each do |bin, chr, start, stop, type, name, annotationid|
|
104
|
+
report.puts "#{chr}\t#{File.basename(__FILE__)}\t#{type}\t#{start}\t#{stop}\t.\t.\t.\tName=#{name}"
|
105
|
+
end
|
106
|
+
report.close
|
107
|
+
end
|
108
|
+
|
109
|
+
def get_uniq_ids_from_records(records)
|
110
|
+
ids = {}
|
111
|
+
records.each do |rec|
|
112
|
+
annotation_ids = rec.last.split(',')
|
113
|
+
rec[6] = annotation_ids
|
114
|
+
if annotation_ids.first != ''
|
115
|
+
annotation_ids.each do |annot_id|
|
116
|
+
ids[annot_id] = nil
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
return ids
|
121
|
+
end
|
122
|
+
|
123
|
+
def load_annotations(genomic_regions)
|
124
|
+
annotations = get_uniq_ids_from_records(genomic_regions)
|
125
|
+
if !annotations.empty?
|
126
|
+
db_annotations = $db.execute("SELECT rowid, * FROM Annotation WHERE rowid IN(#{Array.new(annotations.length, '?').join(',')})", annotations.keys)
|
127
|
+
annotation_types = db_annotations.map{|db_an| db_an.last}.uniq
|
128
|
+
if !annotation_types.empty?
|
129
|
+
db_annotation_types = $db.execute("SELECT rowid, * FROM AnnotationType WHERE rowid IN(#{Array.new(annotation_types.length, '?').join(',')})", annotation_types)
|
130
|
+
db_annotation_types = db_annotation_types.group_by {|r| r.first}
|
131
|
+
db_annotations.each do |db_an|
|
132
|
+
db_an[0] = db_an[0].to_s
|
133
|
+
db_an[2] = db_annotation_types[db_an[2]].first.last
|
134
|
+
end
|
135
|
+
db_annotations.each do |annot|
|
136
|
+
id = annot.shift
|
137
|
+
annotations[id] = annot
|
138
|
+
end
|
139
|
+
genomic_regions.each do |g_reg|
|
140
|
+
annot_ids = g_reg.last
|
141
|
+
final_annot = {}
|
142
|
+
if !annot_ids.empty?
|
143
|
+
annot_ids.each do |id|
|
144
|
+
value, type = annotations[id]
|
145
|
+
final_annot[type] = value
|
146
|
+
end
|
147
|
+
end
|
148
|
+
g_reg[6] = final_annot
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def generate_query_regions(coords)
|
155
|
+
query_regions = []
|
156
|
+
coords.each do |chr, start, stop|
|
157
|
+
query_regions << [nil, chr, start, stop, 'query_coords', "Q_#{chr}_#{start}-#{stop}", ''] #bin, chr, start, stop, type, name, annot
|
158
|
+
end
|
159
|
+
return query_regions
|
160
|
+
end
|
161
|
+
|
162
|
+
def grouping_list(group, genomic_regions, output_path, output_format)
|
163
|
+
path = output_path + '_grouping_list.' + output_format
|
164
|
+
load_annotations(genomic_regions)
|
165
|
+
main_regions = genomic_regions.select{|reg| reg[4] == group}
|
166
|
+
putative_overlapping_regions = genomic_regions.select{|reg| reg[4] != group}
|
167
|
+
overlaping_index = get_overlapping_regions_batch(main_regions, putative_overlapping_regions)
|
168
|
+
if output_format == 'html'
|
169
|
+
grouping_list_html(overlaping_index, main_regions, putative_overlapping_regions, path)
|
170
|
+
elsif output_format == 'txt'
|
171
|
+
grouping_list_txt(overlaping_index, main_regions, putative_overlapping_regions, path)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def grouping_list_txt(overlaping_index, main_regions, putative_overlapping_regions, path)
|
176
|
+
overlaping_regions = []
|
177
|
+
overlaping_index.values.flatten.uniq.each do |pos|
|
178
|
+
overlaping_regions << putative_overlapping_regions[pos]
|
179
|
+
end
|
180
|
+
grouping_type = main_regions.first[4]
|
181
|
+
basic_fields = ['Id', 'Chromosome', 'Start', 'Stop']
|
182
|
+
header_structure = get_header({grouping_type => basic_fields}, main_regions + overlaping_regions)
|
183
|
+
report = File.open(path, 'w')
|
184
|
+
txt_header = ''
|
185
|
+
header_structure.each do |region_type, annotations|
|
186
|
+
if annotations.length > 0
|
187
|
+
txt_header << annotations.map{|an| region_type + '.' + an}.join("\t") + "\t"
|
188
|
+
else
|
189
|
+
txt_header << region_type + "\t"
|
190
|
+
end
|
191
|
+
end
|
192
|
+
report.puts txt_header.chop
|
193
|
+
main_regions.each_with_index do |main_region, position|
|
194
|
+
local_overlapping_regions = overlaping_index[position].map{|pos| putative_overlapping_regions[pos]}
|
195
|
+
report.print "#{main_region[5]}\t#{main_region[1]}\t#{main_region[2]}\t#{main_region[3]}\t"
|
196
|
+
header_structure[grouping_type] = header_structure[grouping_type] - ['Id', 'Chromosome', 'Start', 'Stop']
|
197
|
+
header_structure[grouping_type].each do |annotation_type|
|
198
|
+
report.print "#{main_region.last[annotation_type]}\t"
|
199
|
+
end
|
200
|
+
header_structure.each do |region_type, annotation_types|
|
201
|
+
next if region_type == grouping_type
|
202
|
+
record = local_overlapping_regions.select{|r| r[4] == region_type} #array
|
203
|
+
if record.empty?
|
204
|
+
if annotation_types.length == 0
|
205
|
+
report.print "-\t"
|
206
|
+
else
|
207
|
+
report.print "-\t"*annotation_types.length
|
208
|
+
end
|
209
|
+
else
|
210
|
+
if annotation_types.length == 0
|
211
|
+
report.print "#{record.map{|r| r[5]}.uniq.join(',')}\t"
|
212
|
+
else
|
213
|
+
annotation_types.each do |an_type|
|
214
|
+
report.print "#{record.map{|r| r.last[an_type]}.uniq.join(',')}\t"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
report.puts
|
220
|
+
end
|
221
|
+
report.close
|
222
|
+
end
|
223
|
+
|
224
|
+
def get_overlapping_regions_batch(main_regions, putative_overlapping_regions)
|
225
|
+
index = {}
|
226
|
+
main_regions.length.times do |n|
|
227
|
+
index[n] = []
|
228
|
+
end
|
229
|
+
main_position = 0
|
230
|
+
main_regions.each do |bin, chr, start, stop, type, name, annotations|
|
231
|
+
over_position = 0
|
232
|
+
putative_overlapping_regions.each do |bin_over, chr_over, start_over, stop_over, type_over, name_over, annotations_over|
|
233
|
+
if chr == chr_over &&
|
234
|
+
((start >= start_over && start <= stop_over) || (stop >= start_over && stop <= stop_over))
|
235
|
+
index[main_position] << over_position
|
236
|
+
end
|
237
|
+
over_position += 1
|
238
|
+
end
|
239
|
+
main_position += 1
|
240
|
+
end
|
241
|
+
return index
|
242
|
+
end
|
243
|
+
|
244
|
+
def grouping_list_html(overlaping_index, main_regions, putative_overlapping_regions, path)
|
245
|
+
overlaping_regions = []
|
246
|
+
overlaping_index.values.flatten.uniq.each do |pos|
|
247
|
+
overlaping_regions << putative_overlapping_regions[pos]
|
248
|
+
end
|
249
|
+
report = File.open(path, 'w')
|
250
|
+
report.puts '<HTML>',
|
251
|
+
'<header>',
|
252
|
+
'</header>',
|
253
|
+
'<body>',
|
254
|
+
'<table border=1>'
|
255
|
+
grouping_type = main_regions.first[4]
|
256
|
+
basic_fields = ['Id', 'Chromosome', 'Start', 'Stop']
|
257
|
+
header_structure = get_header({grouping_type => basic_fields}, main_regions + overlaping_regions)
|
258
|
+
report.puts get_grouping_html_header(header_structure)
|
259
|
+
header_structure[grouping_type] = header_structure[grouping_type] - basic_fields
|
260
|
+
main_regions.each_with_index do |main_region, position|
|
261
|
+
local_overlapping_regions = overlaping_index[position].map{|pos| putative_overlapping_regions[pos]}
|
262
|
+
record_rows = get_max_overlapping_regions_by_type(local_overlapping_regions)
|
263
|
+
rowspan = nil
|
264
|
+
rowspan = " rowspan=#{record_rows}" if record_rows > 1
|
265
|
+
report.puts '<tr>',
|
266
|
+
"<td#{rowspan}>#{main_region[5]}</td>",
|
267
|
+
"<td#{rowspan}>#{main_region[1]}</td>",
|
268
|
+
"<td#{rowspan}>#{main_region[2]}</td>",
|
269
|
+
"<td#{rowspan}>#{main_region[3]}</td>"
|
270
|
+
header_structure[grouping_type].each do |annotation_type|
|
271
|
+
report.puts "<td#{rowspan}>#{main_region.last[annotation_type]}</td>"
|
272
|
+
end
|
273
|
+
record_rows.times do
|
274
|
+
header_structure.each do |region_type, annotation_types|
|
275
|
+
next if region_type == grouping_type
|
276
|
+
record = local_overlapping_regions.select{|r| r[4] == region_type}.first
|
277
|
+
if record.nil?
|
278
|
+
if annotation_types.length == 0
|
279
|
+
report.puts "<td></td>"
|
280
|
+
else
|
281
|
+
report.puts "<td></td>"*annotation_types.length
|
282
|
+
end
|
283
|
+
else
|
284
|
+
if annotation_types.length == 0
|
285
|
+
report.puts "<td>#{record[5]}</td>"
|
286
|
+
else
|
287
|
+
annotation_types.each do |an_type|
|
288
|
+
report.puts "<td>#{record.last[an_type]}</td>"
|
289
|
+
end
|
290
|
+
end
|
291
|
+
local_overlapping_regions.delete(record)
|
292
|
+
end
|
293
|
+
end
|
294
|
+
report.puts '</tr>'
|
295
|
+
end
|
296
|
+
end
|
297
|
+
report.puts '</table>',
|
298
|
+
'</body>',
|
299
|
+
'</HTML>'
|
300
|
+
report.close
|
301
|
+
end
|
302
|
+
|
303
|
+
def get_max_overlapping_regions_by_type(local_overlapping_regions)
|
304
|
+
res = 1
|
305
|
+
local_overlapping_regions.group_by{|r| r[4]}.each do |region_type, regions|
|
306
|
+
reg_length = regions.length
|
307
|
+
res = reg_length if reg_length > res
|
308
|
+
end
|
309
|
+
return res
|
310
|
+
end
|
311
|
+
|
312
|
+
def get_grouping_html_header(header_structure)
|
313
|
+
main_header = "<tr>\n"
|
314
|
+
sub_header = "<tr>\n"
|
315
|
+
header_structure.each do |main_title, cols|
|
316
|
+
main_header << '<th'
|
317
|
+
main_header << " rowspan=2" if cols.length == 0
|
318
|
+
main_header << " colspan=#{cols.length}" if cols.length > 1
|
319
|
+
main_header << ">#{main_title}</th>\n"
|
320
|
+
cols.each do |col|
|
321
|
+
sub_header << "<th>#{col}</th>\n"
|
322
|
+
end
|
323
|
+
end
|
324
|
+
main_header << "</tr>\n"
|
325
|
+
sub_header << "</tr>\n"
|
326
|
+
return main_header + sub_header
|
327
|
+
end
|
328
|
+
|
329
|
+
def get_header(header, genomic_regions)
|
330
|
+
genomic_regions.each do |ge_reg|
|
331
|
+
region_type = ge_reg[4]
|
332
|
+
region_annotations = ge_reg.last.keys
|
333
|
+
query = header[region_type]
|
334
|
+
if query.nil?
|
335
|
+
header[region_type] = region_annotations
|
336
|
+
else
|
337
|
+
header[region_type] = query | region_annotations
|
338
|
+
end
|
339
|
+
end
|
340
|
+
return header
|
341
|
+
end
|
342
|
+
|
343
|
+
# DATABASE METHODS
|
344
|
+
#----------------------------------------------------
|
345
|
+
|
346
|
+
def download_database(database_path)
|
347
|
+
out_path = File.dirname(database_path)
|
348
|
+
puts "Downloading database in #{out_path}, please be patient..."
|
349
|
+
zip_path = File.join(out_path, 'database.zip')
|
350
|
+
f = File.open(zip_path, 'w')
|
351
|
+
Net::HTTP.start("bio-267-data.uma.es") do |http|
|
352
|
+
http.request_get('/database.zip') do |resp|
|
353
|
+
resp.read_body do |segment|
|
354
|
+
f.write(segment)
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
358
|
+
f.close
|
359
|
+
puts "Decompressing database..."
|
360
|
+
Zip::File.open(zip_path) do |zip_file|
|
361
|
+
zip_file.each do |entry|
|
362
|
+
entry.extract(database_path)
|
363
|
+
end
|
364
|
+
end
|
365
|
+
if File.exists?(database_path)
|
366
|
+
File.delete(zip_path)
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
######################################################################################################################
|
371
|
+
## INPUT PARAMETER PARSING
|
372
|
+
######################################################################################################################
|
373
|
+
options = {}
|
374
|
+
OptionParser.new do |opts|
|
375
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
376
|
+
|
377
|
+
options[:coords] = []
|
378
|
+
opts.on("-c", '--region_coordinates STRING', 'Coordinates to make the search. Format: chrN:start:end') do |coords|
|
379
|
+
coord_lines = []
|
380
|
+
if File.exists?(coords) == FALSE
|
381
|
+
coord_lines = coords.split(',')
|
382
|
+
else
|
383
|
+
coord_lines = File.readlines(coords).map{|line| line.chomp}
|
384
|
+
end
|
385
|
+
options[:coords] = coord_lines.map{|line| line.split(':')}.map{|coords| [coords[0], coords[1].to_i, coords[2].to_i ]}
|
386
|
+
end
|
387
|
+
|
388
|
+
options[:name] = []
|
389
|
+
opts.on("-n", '--region_name STRING', 'Search region by name') do |region|
|
390
|
+
if File.exists?(region) == FALSE
|
391
|
+
options[:name] = region.split(',')
|
392
|
+
else
|
393
|
+
options[:name] = File.readlines(region).map{|line| line.chomp}
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
options[:input_names_only] = FALSE
|
398
|
+
opts.on("-i", '--input_names_only', 'Show info about only input data') do
|
399
|
+
options[:input_names_only] = TRUE
|
400
|
+
end
|
401
|
+
|
402
|
+
options[:flanking_region] = 0
|
403
|
+
opts.on("-F", '--flanking_region INTEGER', 'Flanking region to search aroun the elements') do |flanking_region|
|
404
|
+
options[:flanking_region] = flanking_region.to_i
|
405
|
+
end
|
406
|
+
|
407
|
+
options[:path_sql] = File.join(File.dirname(__FILE__), "..", "database", "genomic_data.sqlite")
|
408
|
+
opts.on("-p", "--path_sql PATH", "Path SQL DB to make queries") do |path|
|
409
|
+
options[:path_sql] = path
|
410
|
+
end
|
411
|
+
|
412
|
+
options[:group] = nil
|
413
|
+
opts.on("-g", '--group_by_region_type STRING', 'Use region type for group results by their coordinates') do |group|
|
414
|
+
options[:group] = group
|
415
|
+
end
|
416
|
+
|
417
|
+
options[:output_format] = 'html'
|
418
|
+
opts.on("-f", '--output_format PATH', 'Output format for results. Default:html') do |output_format|
|
419
|
+
options[:output_format] = output_format
|
420
|
+
end
|
421
|
+
|
422
|
+
options[:output_path] = "results"
|
423
|
+
opts.on("-o", '--output_path PATH', 'Output path for queries') do |output_path|
|
424
|
+
options[:output_path] = output_path
|
425
|
+
end
|
426
|
+
|
427
|
+
options[:representation] = FALSE
|
428
|
+
opts.on("-r", '--graphical_representation', 'Make a representation of the selected region') do
|
429
|
+
options[:representation] = TRUE
|
430
|
+
end
|
431
|
+
|
432
|
+
options[:type] = []
|
433
|
+
opts.on("-t", '--type_regions STRING', 'Region types to make the search. Format: region1,region2,region3...') do |type|
|
434
|
+
options[:type] = type.split(',')
|
435
|
+
end
|
436
|
+
|
437
|
+
options[:verbose] = nil
|
438
|
+
opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
|
439
|
+
options[:verbose] = v
|
440
|
+
end
|
441
|
+
|
442
|
+
end.parse!
|
443
|
+
|
444
|
+
######################################################################################################################
|
445
|
+
## MAIN
|
446
|
+
######################################################################################################################
|
447
|
+
if !File.exists?(options[:path_sql])
|
448
|
+
download_database(options[:path_sql])
|
449
|
+
end
|
450
|
+
$db = SQLite3::Database.new(options[:path_sql])
|
451
|
+
genomic_regions = []
|
452
|
+
#Benchmark.bm do |bm|
|
453
|
+
# bm.report {
|
454
|
+
if !options[:coords].empty?
|
455
|
+
genomic_regions = query_coordinates(options[:coords], options[:flanking_region])
|
456
|
+
elsif !options[:name].empty?
|
457
|
+
genomic_regions = query_name_and_region(options[:name], options[:flanking_region])
|
458
|
+
end
|
459
|
+
# }
|
460
|
+
#end
|
461
|
+
#puts 'FINAL', '---------------', genomic_regions.inspect
|
462
|
+
if !genomic_regions.empty?
|
463
|
+
genomic_regions.select!{|reg| options[:name].include?(reg[5]) || reg[4] != options[:group]} if options[:input_names_only] && !options[:group].nil?
|
464
|
+
simple_list(genomic_regions, options[:output_path], options[:output_format])
|
465
|
+
simple_list(genomic_regions, options[:output_path], 'gff') if options[:representation]
|
466
|
+
if !options[:group].nil?
|
467
|
+
genomic_regions.concat(generate_query_regions(options[:coords])) if options[:group] == 'query_coords' && !options[:coords].empty?
|
468
|
+
grouping_list(options[:group], genomic_regions, options[:output_path], options[:output_format])
|
469
|
+
end
|
470
|
+
else
|
471
|
+
puts 'Results not found'
|
472
|
+
end
|