anncrsnp 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 45770752f232d33dc6c3c5c46f96d5a8978c49bc
4
+ data.tar.gz: 02aed0e5cd60873006e6f4070ece816d7ba7103b
5
+ SHA512:
6
+ metadata.gz: 895816f11e25e0d8046e63d81712b775794c989e413d6b4564588edcc3e6cc803c42f5bfe6c17c53e6b273cf7da973f03a96e18784622ec2ecc36a71eb11e30b
7
+ data.tar.gz: daaa2211a52f6b2464d57cccc87ce18bfe4afc69e467194e09207df73696d29c90d92ba11af2ce1d23c0f75bebe89754f26e409fb7fe18eedeeb5d380ce91cad
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in anncrsnp.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 TODO: Write your name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # Anncrsnp
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/anncrsnp`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'anncrsnp'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install anncrsnp
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/anncrsnp.
36
+
37
+
38
+ ## License
39
+
40
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
41
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/anncrsnp.gemspec ADDED
@@ -0,0 +1,35 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'anncrsnp/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "anncrsnp"
8
+ spec.version = Anncrsnp::VERSION
9
+ spec.authors = ["Elena Rojano", "Pedro Seoane"]
10
+ spec.email = ["elenarojano@outlook.com", "seoanezonjic@hotmail.com"]
11
+
12
+ spec.summary = %q{Tool to characterize Single Nucleotide Polymorphisms (SNP) in genomic non-coding regions.}
13
+ spec.description = %q{AnNCR-SNP integrates data from various sources, allowing the user to investigate the potential effects of variants in non-coding regions of the human genome. AnNCR-SNP consists of a database containing data on all non-coding elements and two main programs: manager and finder. The manager program is responsible for creating the local data-base, and the finder program receives the user queries in order to search in the local database and retrieve information. The user can find information about various regu-latory elements, such as TFBs, open chromatin, histone modification and methyla-tion sites, information about SNPs from dbSNP and gene information from RefSeq.}
14
+ spec.homepage = ""
15
+ spec.license = "MIT"
16
+
17
+ # # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
18
+ # # delete this section to allow pushing this gem to any host.
19
+ # if spec.respond_to?(:metadata)
20
+ # spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
21
+ # else
22
+ # raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
23
+ # end
24
+
25
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.add_development_dependency "bundler", "~> 1.10"
31
+ spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "rspec"
33
+ spec.add_dependency "sqlite3"
34
+ spec.add_dependency "rubyzip"
35
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "anncrsnp"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/grdbfinder.rb ADDED
@@ -0,0 +1,472 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp'))
5
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp', 'parsers'))
6
+
7
+ require 'optparse'
8
+ require 'dataset'
9
+ require 'sqlite3'
10
+ require 'benchmark'
11
+ require 'net/http'
12
+ require 'zip'
13
+
14
+ ######################################################################################################################
15
+ ## METHODS
16
+ ######################################################################################################################
17
+
18
+ # QUERING METHODS
19
+ #----------------------------------------------------
20
+ def query_coordinates(coords, flanking_region)
21
+ genomic_regions = []
22
+ coords.each do |chr, start, stop|
23
+ start = start.to_i-flanking_region
24
+ start = 0 if start < 0
25
+ stop = stop.to_i+flanking_region
26
+ #bins = (start/10000).upto(stop/10000).to_a
27
+ #genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (start>=? AND end<=? )", chr, start, stop)
28
+ #genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (start>=? AND end<=? )", chr, start, stop)
29
+ #genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND bin IN(#{Array.new(bins.length, '?').join(',')})", chr, bins)
30
+ local_genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (bin BETWEEN ? AND ?)", chr, start/10000, stop/10000)
31
+ #puts "QUERY_COORDS",'--------------',genomic_regions.inspect
32
+ local_genomic_regions.select!{|g_reg|
33
+ (g_reg[2] >= start && g_reg[2] <= stop) || #ge_reg start is in region
34
+ (g_reg[3] >= start && g_reg[3] <= stop) || #ge_reg end is in region
35
+ (g_reg[2] <= start && g_reg[3] >= stop) #region is in ge_reg
36
+ }
37
+ genomic_regions.concat(local_genomic_regions)
38
+ end
39
+ return genomic_regions
40
+ end
41
+
42
+ def query_name(name)
43
+ genomic_regions = []
44
+ genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE name=?", name)
45
+ #puts "QUERY_NAME",'--------------',genomic_regions.inspect
46
+ return genomic_regions
47
+ end
48
+
49
+ def query_name_and_region(name, flanking_region)
50
+ genomic_regions_by_name = []
51
+ name.each do |reg_name|
52
+ genomic_regions = genomic_regions_by_name.concat(query_name(reg_name))
53
+ end
54
+ #puts "QUERY_NAME_AND_REGION",'--------------',genomic_regions_by_name.inspect
55
+ genomic_regions = query_coordinates(genomic_regions_by_name.map{|g_reg| g_reg[1..3]}, flanking_region) #g_reg[1..3] => chr, start, stop
56
+ genomic_regions.uniq! #subqueries can retrie the same elements and may repeat results.
57
+ return genomic_regions
58
+ end
59
+
60
+ # REPORTING METHODS
61
+ #----------------------------------------------------
62
+ def simple_list(genomic_regions, output_path, output_format)
63
+ path = output_path + '_simple_list.' + output_format
64
+ if output_format == 'gff'
65
+ simple_list_gff(genomic_regions, path)
66
+ else
67
+ simple_list_html(genomic_regions, path)
68
+ end
69
+ end
70
+
71
+ def simple_list_html(genomic_regions, path)
72
+ report = File.open(path, 'w')
73
+ report.puts '<HTML>',
74
+ '<header>',
75
+ '</header>',
76
+ '<body>',
77
+ '<table border=1>',
78
+ '<tr>',
79
+ '<th>Chromosome</th><th>Start</th><th>End</th><th>Region type</th><th>Region id</th>'
80
+ '</tr>'
81
+ genomic_regions.each do |bin, chr, start, stop, type, name, annotationid|
82
+ report.puts '<tr>',
83
+ "<td>#{chr}</td><td>#{start}</td><td>#{stop}</td><td>#{type}</td><td>#{name}</td>"
84
+ '</tr>'
85
+ end
86
+ report.puts '</table>',
87
+ '</body>',
88
+ '</HTML>'
89
+ report.close
90
+ end
91
+
92
+ def simple_list_gff(genomic_regions, path) #use generated file on http://genometools.org/cgi-bin/annotationsketch_demo.cgi
93
+ report = File.open(path, 'w')
94
+ report.puts '##gff-version 3'
95
+ main_chr = genomic_regions.first[1]
96
+ min_start = genomic_regions.map{|g_reg| g_reg[2]}.min
97
+ max_stop = genomic_regions.map{|g_reg| g_reg[3]}.max
98
+ add_region = ((max_stop - min_start).abs * 0.05).to_i
99
+ region_start = min_start - add_region
100
+ region_start = 0 if region_start < 0
101
+ region_stop = max_stop + add_region
102
+ report.puts "#{main_chr}\t#{File.basename(__FILE__)}\tchromosome\t#{region_start}\t#{region_stop}\t.\t.\t.\tID=#{main_chr}"
103
+ genomic_regions.each do |bin, chr, start, stop, type, name, annotationid|
104
+ report.puts "#{chr}\t#{File.basename(__FILE__)}\t#{type}\t#{start}\t#{stop}\t.\t.\t.\tName=#{name}"
105
+ end
106
+ report.close
107
+ end
108
+
109
+ def get_uniq_ids_from_records(records)
110
+ ids = {}
111
+ records.each do |rec|
112
+ annotation_ids = rec.last.split(',')
113
+ rec[6] = annotation_ids
114
+ if annotation_ids.first != ''
115
+ annotation_ids.each do |annot_id|
116
+ ids[annot_id] = nil
117
+ end
118
+ end
119
+ end
120
+ return ids
121
+ end
122
+
123
+ def load_annotations(genomic_regions)
124
+ annotations = get_uniq_ids_from_records(genomic_regions)
125
+ if !annotations.empty?
126
+ db_annotations = $db.execute("SELECT rowid, * FROM Annotation WHERE rowid IN(#{Array.new(annotations.length, '?').join(',')})", annotations.keys)
127
+ annotation_types = db_annotations.map{|db_an| db_an.last}.uniq
128
+ if !annotation_types.empty?
129
+ db_annotation_types = $db.execute("SELECT rowid, * FROM AnnotationType WHERE rowid IN(#{Array.new(annotation_types.length, '?').join(',')})", annotation_types)
130
+ db_annotation_types = db_annotation_types.group_by {|r| r.first}
131
+ db_annotations.each do |db_an|
132
+ db_an[0] = db_an[0].to_s
133
+ db_an[2] = db_annotation_types[db_an[2]].first.last
134
+ end
135
+ db_annotations.each do |annot|
136
+ id = annot.shift
137
+ annotations[id] = annot
138
+ end
139
+ genomic_regions.each do |g_reg|
140
+ annot_ids = g_reg.last
141
+ final_annot = {}
142
+ if !annot_ids.empty?
143
+ annot_ids.each do |id|
144
+ value, type = annotations[id]
145
+ final_annot[type] = value
146
+ end
147
+ end
148
+ g_reg[6] = final_annot
149
+ end
150
+ end
151
+ end
152
+ end
153
+
154
+ def generate_query_regions(coords)
155
+ query_regions = []
156
+ coords.each do |chr, start, stop|
157
+ query_regions << [nil, chr, start, stop, 'query_coords', "Q_#{chr}_#{start}-#{stop}", ''] #bin, chr, start, stop, type, name, annot
158
+ end
159
+ return query_regions
160
+ end
161
+
162
+ def grouping_list(group, genomic_regions, output_path, output_format)
163
+ path = output_path + '_grouping_list.' + output_format
164
+ load_annotations(genomic_regions)
165
+ main_regions = genomic_regions.select{|reg| reg[4] == group}
166
+ putative_overlapping_regions = genomic_regions.select{|reg| reg[4] != group}
167
+ overlaping_index = get_overlapping_regions_batch(main_regions, putative_overlapping_regions)
168
+ if output_format == 'html'
169
+ grouping_list_html(overlaping_index, main_regions, putative_overlapping_regions, path)
170
+ elsif output_format == 'txt'
171
+ grouping_list_txt(overlaping_index, main_regions, putative_overlapping_regions, path)
172
+ end
173
+ end
174
+
175
+ def grouping_list_txt(overlaping_index, main_regions, putative_overlapping_regions, path)
176
+ overlaping_regions = []
177
+ overlaping_index.values.flatten.uniq.each do |pos|
178
+ overlaping_regions << putative_overlapping_regions[pos]
179
+ end
180
+ grouping_type = main_regions.first[4]
181
+ basic_fields = ['Id', 'Chromosome', 'Start', 'Stop']
182
+ header_structure = get_header({grouping_type => basic_fields}, main_regions + overlaping_regions)
183
+ report = File.open(path, 'w')
184
+ txt_header = ''
185
+ header_structure.each do |region_type, annotations|
186
+ if annotations.length > 0
187
+ txt_header << annotations.map{|an| region_type + '.' + an}.join("\t") + "\t"
188
+ else
189
+ txt_header << region_type + "\t"
190
+ end
191
+ end
192
+ report.puts txt_header.chop
193
+ main_regions.each_with_index do |main_region, position|
194
+ local_overlapping_regions = overlaping_index[position].map{|pos| putative_overlapping_regions[pos]}
195
+ report.print "#{main_region[5]}\t#{main_region[1]}\t#{main_region[2]}\t#{main_region[3]}\t"
196
+ header_structure[grouping_type] = header_structure[grouping_type] - ['Id', 'Chromosome', 'Start', 'Stop']
197
+ header_structure[grouping_type].each do |annotation_type|
198
+ report.print "#{main_region.last[annotation_type]}\t"
199
+ end
200
+ header_structure.each do |region_type, annotation_types|
201
+ next if region_type == grouping_type
202
+ record = local_overlapping_regions.select{|r| r[4] == region_type} #array
203
+ if record.empty?
204
+ if annotation_types.length == 0
205
+ report.print "-\t"
206
+ else
207
+ report.print "-\t"*annotation_types.length
208
+ end
209
+ else
210
+ if annotation_types.length == 0
211
+ report.print "#{record.map{|r| r[5]}.uniq.join(',')}\t"
212
+ else
213
+ annotation_types.each do |an_type|
214
+ report.print "#{record.map{|r| r.last[an_type]}.uniq.join(',')}\t"
215
+ end
216
+ end
217
+ end
218
+ end
219
+ report.puts
220
+ end
221
+ report.close
222
+ end
223
+
224
+ def get_overlapping_regions_batch(main_regions, putative_overlapping_regions)
225
+ index = {}
226
+ main_regions.length.times do |n|
227
+ index[n] = []
228
+ end
229
+ main_position = 0
230
+ main_regions.each do |bin, chr, start, stop, type, name, annotations|
231
+ over_position = 0
232
+ putative_overlapping_regions.each do |bin_over, chr_over, start_over, stop_over, type_over, name_over, annotations_over|
233
+ if chr == chr_over &&
234
+ ((start >= start_over && start <= stop_over) || (stop >= start_over && stop <= stop_over))
235
+ index[main_position] << over_position
236
+ end
237
+ over_position += 1
238
+ end
239
+ main_position += 1
240
+ end
241
+ return index
242
+ end
243
+
244
+ def grouping_list_html(overlaping_index, main_regions, putative_overlapping_regions, path)
245
+ overlaping_regions = []
246
+ overlaping_index.values.flatten.uniq.each do |pos|
247
+ overlaping_regions << putative_overlapping_regions[pos]
248
+ end
249
+ report = File.open(path, 'w')
250
+ report.puts '<HTML>',
251
+ '<header>',
252
+ '</header>',
253
+ '<body>',
254
+ '<table border=1>'
255
+ grouping_type = main_regions.first[4]
256
+ basic_fields = ['Id', 'Chromosome', 'Start', 'Stop']
257
+ header_structure = get_header({grouping_type => basic_fields}, main_regions + overlaping_regions)
258
+ report.puts get_grouping_html_header(header_structure)
259
+ header_structure[grouping_type] = header_structure[grouping_type] - basic_fields
260
+ main_regions.each_with_index do |main_region, position|
261
+ local_overlapping_regions = overlaping_index[position].map{|pos| putative_overlapping_regions[pos]}
262
+ record_rows = get_max_overlapping_regions_by_type(local_overlapping_regions)
263
+ rowspan = nil
264
+ rowspan = " rowspan=#{record_rows}" if record_rows > 1
265
+ report.puts '<tr>',
266
+ "<td#{rowspan}>#{main_region[5]}</td>",
267
+ "<td#{rowspan}>#{main_region[1]}</td>",
268
+ "<td#{rowspan}>#{main_region[2]}</td>",
269
+ "<td#{rowspan}>#{main_region[3]}</td>"
270
+ header_structure[grouping_type].each do |annotation_type|
271
+ report.puts "<td#{rowspan}>#{main_region.last[annotation_type]}</td>"
272
+ end
273
+ record_rows.times do
274
+ header_structure.each do |region_type, annotation_types|
275
+ next if region_type == grouping_type
276
+ record = local_overlapping_regions.select{|r| r[4] == region_type}.first
277
+ if record.nil?
278
+ if annotation_types.length == 0
279
+ report.puts "<td></td>"
280
+ else
281
+ report.puts "<td></td>"*annotation_types.length
282
+ end
283
+ else
284
+ if annotation_types.length == 0
285
+ report.puts "<td>#{record[5]}</td>"
286
+ else
287
+ annotation_types.each do |an_type|
288
+ report.puts "<td>#{record.last[an_type]}</td>"
289
+ end
290
+ end
291
+ local_overlapping_regions.delete(record)
292
+ end
293
+ end
294
+ report.puts '</tr>'
295
+ end
296
+ end
297
+ report.puts '</table>',
298
+ '</body>',
299
+ '</HTML>'
300
+ report.close
301
+ end
302
+
303
+ def get_max_overlapping_regions_by_type(local_overlapping_regions)
304
+ res = 1
305
+ local_overlapping_regions.group_by{|r| r[4]}.each do |region_type, regions|
306
+ reg_length = regions.length
307
+ res = reg_length if reg_length > res
308
+ end
309
+ return res
310
+ end
311
+
312
+ def get_grouping_html_header(header_structure)
313
+ main_header = "<tr>\n"
314
+ sub_header = "<tr>\n"
315
+ header_structure.each do |main_title, cols|
316
+ main_header << '<th'
317
+ main_header << " rowspan=2" if cols.length == 0
318
+ main_header << " colspan=#{cols.length}" if cols.length > 1
319
+ main_header << ">#{main_title}</th>\n"
320
+ cols.each do |col|
321
+ sub_header << "<th>#{col}</th>\n"
322
+ end
323
+ end
324
+ main_header << "</tr>\n"
325
+ sub_header << "</tr>\n"
326
+ return main_header + sub_header
327
+ end
328
+
329
+ def get_header(header, genomic_regions)
330
+ genomic_regions.each do |ge_reg|
331
+ region_type = ge_reg[4]
332
+ region_annotations = ge_reg.last.keys
333
+ query = header[region_type]
334
+ if query.nil?
335
+ header[region_type] = region_annotations
336
+ else
337
+ header[region_type] = query | region_annotations
338
+ end
339
+ end
340
+ return header
341
+ end
342
+
343
+ # DATABASE METHODS
344
+ #----------------------------------------------------
345
+
346
+ def download_database(database_path)
347
+ out_path = File.dirname(database_path)
348
+ puts "Downloading database in #{out_path}, please be patient..."
349
+ zip_path = File.join(out_path, 'database.zip')
350
+ f = File.open(zip_path, 'w')
351
+ Net::HTTP.start("bio-267-data.uma.es") do |http|
352
+ http.request_get('/database.zip') do |resp|
353
+ resp.read_body do |segment|
354
+ f.write(segment)
355
+ end
356
+ end
357
+ end
358
+ f.close
359
+ puts "Decompressing database..."
360
+ Zip::File.open(zip_path) do |zip_file|
361
+ zip_file.each do |entry|
362
+ entry.extract(database_path)
363
+ end
364
+ end
365
+ if File.exists?(database_path)
366
+ File.delete(zip_path)
367
+ end
368
+ end
369
+
370
+ ######################################################################################################################
371
+ ## INPUT PARAMETER PARSING
372
+ ######################################################################################################################
373
+ options = {}
374
+ OptionParser.new do |opts|
375
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
376
+
377
+ options[:coords] = []
378
+ opts.on("-c", '--region_coordinates STRING', 'Coordinates to make the search. Format: chrN:start:end') do |coords|
379
+ coord_lines = []
380
+ if File.exists?(coords) == FALSE
381
+ coord_lines = coords.split(',')
382
+ else
383
+ coord_lines = File.readlines(coords).map{|line| line.chomp}
384
+ end
385
+ options[:coords] = coord_lines.map{|line| line.split(':')}.map{|coords| [coords[0], coords[1].to_i, coords[2].to_i ]}
386
+ end
387
+
388
+ options[:name] = []
389
+ opts.on("-n", '--region_name STRING', 'Search region by name') do |region|
390
+ if File.exists?(region) == FALSE
391
+ options[:name] = region.split(',')
392
+ else
393
+ options[:name] = File.readlines(region).map{|line| line.chomp}
394
+ end
395
+ end
396
+
397
+ options[:input_names_only] = FALSE
398
+ opts.on("-i", '--input_names_only', 'Show info about only input data') do
399
+ options[:input_names_only] = TRUE
400
+ end
401
+
402
+ options[:flanking_region] = 0
403
+ opts.on("-F", '--flanking_region INTEGER', 'Flanking region to search aroun the elements') do |flanking_region|
404
+ options[:flanking_region] = flanking_region.to_i
405
+ end
406
+
407
+ options[:path_sql] = File.join(File.dirname(__FILE__), "..", "database", "genomic_data.sqlite")
408
+ opts.on("-p", "--path_sql PATH", "Path SQL DB to make queries") do |path|
409
+ options[:path_sql] = path
410
+ end
411
+
412
+ options[:group] = nil
413
+ opts.on("-g", '--group_by_region_type STRING', 'Use region type for group results by their coordinates') do |group|
414
+ options[:group] = group
415
+ end
416
+
417
+ options[:output_format] = 'html'
418
+ opts.on("-f", '--output_format PATH', 'Output format for results. Default:html') do |output_format|
419
+ options[:output_format] = output_format
420
+ end
421
+
422
+ options[:output_path] = "results"
423
+ opts.on("-o", '--output_path PATH', 'Output path for queries') do |output_path|
424
+ options[:output_path] = output_path
425
+ end
426
+
427
+ options[:representation] = FALSE
428
+ opts.on("-r", '--graphical_representation', 'Make a representation of the selected region') do
429
+ options[:representation] = TRUE
430
+ end
431
+
432
+ options[:type] = []
433
+ opts.on("-t", '--type_regions STRING', 'Region types to make the search. Format: region1,region2,region3...') do |type|
434
+ options[:type] = type.split(',')
435
+ end
436
+
437
+ options[:verbose] = nil
438
+ opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
439
+ options[:verbose] = v
440
+ end
441
+
442
+ end.parse!
443
+
444
+ ######################################################################################################################
445
+ ## MAIN
446
+ ######################################################################################################################
447
+ if !File.exists?(options[:path_sql])
448
+ download_database(options[:path_sql])
449
+ end
450
+ $db = SQLite3::Database.new(options[:path_sql])
451
+ genomic_regions = []
452
+ #Benchmark.bm do |bm|
453
+ # bm.report {
454
+ if !options[:coords].empty?
455
+ genomic_regions = query_coordinates(options[:coords], options[:flanking_region])
456
+ elsif !options[:name].empty?
457
+ genomic_regions = query_name_and_region(options[:name], options[:flanking_region])
458
+ end
459
+ # }
460
+ #end
461
+ #puts 'FINAL', '---------------', genomic_regions.inspect
462
+ if !genomic_regions.empty?
463
+ genomic_regions.select!{|reg| options[:name].include?(reg[5]) || reg[4] != options[:group]} if options[:input_names_only] && !options[:group].nil?
464
+ simple_list(genomic_regions, options[:output_path], options[:output_format])
465
+ simple_list(genomic_regions, options[:output_path], 'gff') if options[:representation]
466
+ if !options[:group].nil?
467
+ genomic_regions.concat(generate_query_regions(options[:coords])) if options[:group] == 'query_coords' && !options[:coords].empty?
468
+ grouping_list(options[:group], genomic_regions, options[:output_path], options[:output_format])
469
+ end
470
+ else
471
+ puts 'Results not found'
472
+ end