anncrsnp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 45770752f232d33dc6c3c5c46f96d5a8978c49bc
4
+ data.tar.gz: 02aed0e5cd60873006e6f4070ece816d7ba7103b
5
+ SHA512:
6
+ metadata.gz: 895816f11e25e0d8046e63d81712b775794c989e413d6b4564588edcc3e6cc803c42f5bfe6c17c53e6b273cf7da973f03a96e18784622ec2ecc36a71eb11e30b
7
+ data.tar.gz: daaa2211a52f6b2464d57cccc87ce18bfe4afc69e467194e09207df73696d29c90d92ba11af2ce1d23c0f75bebe89754f26e409fb7fe18eedeeb5d380ce91cad
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in anncrsnp.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 TODO: Write your name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # Anncrsnp
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/anncrsnp`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'anncrsnp'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install anncrsnp
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/anncrsnp.
36
+
37
+
38
+ ## License
39
+
40
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
41
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/anncrsnp.gemspec ADDED
@@ -0,0 +1,35 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'anncrsnp/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "anncrsnp"
8
+ spec.version = Anncrsnp::VERSION
9
+ spec.authors = ["Elena Rojano", "Pedro Seoane"]
10
+ spec.email = ["elenarojano@outlook.com", "seoanezonjic@hotmail.com"]
11
+
12
+ spec.summary = %q{Tool to characterize Single Nucleotide Polymorphisms (SNP) in genomic non-coding regions.}
13
+ spec.description = %q{AnNCR-SNP integrates data from various sources, allowing the user to investigate the potential effects of variants in non-coding regions of the human genome. AnNCR-SNP consists of a database containing data on all non-coding elements and two main programs: manager and finder. The manager program is responsible for creating the local data-base, and the finder program receives the user queries in order to search in the local database and retrieve information. The user can find information about various regu-latory elements, such as TFBs, open chromatin, histone modification and methyla-tion sites, information about SNPs from dbSNP and gene information from RefSeq.}
14
+ spec.homepage = ""
15
+ spec.license = "MIT"
16
+
17
+ # # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
18
+ # # delete this section to allow pushing this gem to any host.
19
+ # if spec.respond_to?(:metadata)
20
+ # spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
21
+ # else
22
+ # raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
23
+ # end
24
+
25
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.add_development_dependency "bundler", "~> 1.10"
31
+ spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "rspec"
33
+ spec.add_dependency "sqlite3"
34
+ spec.add_dependency "rubyzip"
35
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "anncrsnp"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/grdbfinder.rb ADDED
@@ -0,0 +1,472 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp'))
5
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp', 'parsers'))
6
+
7
+ require 'optparse'
8
+ require 'dataset'
9
+ require 'sqlite3'
10
+ require 'benchmark'
11
+ require 'net/http'
12
+ require 'zip'
13
+
14
+ ######################################################################################################################
15
+ ## METHODS
16
+ ######################################################################################################################
17
+
18
+ # QUERING METHODS
19
+ #----------------------------------------------------
20
+ def query_coordinates(coords, flanking_region)
21
+ genomic_regions = []
22
+ coords.each do |chr, start, stop|
23
+ start = start.to_i-flanking_region
24
+ start = 0 if start < 0
25
+ stop = stop.to_i+flanking_region
26
+ #bins = (start/10000).upto(stop/10000).to_a
27
+ #genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (start>=? AND end<=? )", chr, start, stop)
28
+ #genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (start>=? AND end<=? )", chr, start, stop)
29
+ #genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND bin IN(#{Array.new(bins.length, '?').join(',')})", chr, bins)
30
+ local_genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE chr=? AND (bin BETWEEN ? AND ?)", chr, start/10000, stop/10000)
31
+ #puts "QUERY_COORDS",'--------------',genomic_regions.inspect
32
+ local_genomic_regions.select!{|g_reg|
33
+ (g_reg[2] >= start && g_reg[2] <= stop) || #ge_reg start is in region
34
+ (g_reg[3] >= start && g_reg[3] <= stop) || #ge_reg end is in region
35
+ (g_reg[2] <= start && g_reg[3] >= stop) #region is in ge_reg
36
+ }
37
+ genomic_regions.concat(local_genomic_regions)
38
+ end
39
+ return genomic_regions
40
+ end
41
+
42
+ def query_name(name)
43
+ genomic_regions = []
44
+ genomic_regions = $db.execute("SELECT * FROM GenomicRange WHERE name=?", name)
45
+ #puts "QUERY_NAME",'--------------',genomic_regions.inspect
46
+ return genomic_regions
47
+ end
48
+
49
+ def query_name_and_region(name, flanking_region)
50
+ genomic_regions_by_name = []
51
+ name.each do |reg_name|
52
+ genomic_regions = genomic_regions_by_name.concat(query_name(reg_name))
53
+ end
54
+ #puts "QUERY_NAME_AND_REGION",'--------------',genomic_regions_by_name.inspect
55
+ genomic_regions = query_coordinates(genomic_regions_by_name.map{|g_reg| g_reg[1..3]}, flanking_region) #g_reg[1..3] => chr, start, stop
56
+ genomic_regions.uniq! #subqueries can retrie the same elements and may repeat results.
57
+ return genomic_regions
58
+ end
59
+
60
+ # REPORTING METHODS
61
+ #----------------------------------------------------
62
+ def simple_list(genomic_regions, output_path, output_format)
63
+ path = output_path + '_simple_list.' + output_format
64
+ if output_format == 'gff'
65
+ simple_list_gff(genomic_regions, path)
66
+ else
67
+ simple_list_html(genomic_regions, path)
68
+ end
69
+ end
70
+
71
+ def simple_list_html(genomic_regions, path)
72
+ report = File.open(path, 'w')
73
+ report.puts '<HTML>',
74
+ '<header>',
75
+ '</header>',
76
+ '<body>',
77
+ '<table border=1>',
78
+ '<tr>',
79
+ '<th>Chromosome</th><th>Start</th><th>End</th><th>Region type</th><th>Region id</th>'
80
+ '</tr>'
81
+ genomic_regions.each do |bin, chr, start, stop, type, name, annotationid|
82
+ report.puts '<tr>',
83
+ "<td>#{chr}</td><td>#{start}</td><td>#{stop}</td><td>#{type}</td><td>#{name}</td>"
84
+ '</tr>'
85
+ end
86
+ report.puts '</table>',
87
+ '</body>',
88
+ '</HTML>'
89
+ report.close
90
+ end
91
+
92
+ def simple_list_gff(genomic_regions, path) #use generated file on http://genometools.org/cgi-bin/annotationsketch_demo.cgi
93
+ report = File.open(path, 'w')
94
+ report.puts '##gff-version 3'
95
+ main_chr = genomic_regions.first[1]
96
+ min_start = genomic_regions.map{|g_reg| g_reg[2]}.min
97
+ max_stop = genomic_regions.map{|g_reg| g_reg[3]}.max
98
+ add_region = ((max_stop - min_start).abs * 0.05).to_i
99
+ region_start = min_start - add_region
100
+ region_start = 0 if region_start < 0
101
+ region_stop = max_stop + add_region
102
+ report.puts "#{main_chr}\t#{File.basename(__FILE__)}\tchromosome\t#{region_start}\t#{region_stop}\t.\t.\t.\tID=#{main_chr}"
103
+ genomic_regions.each do |bin, chr, start, stop, type, name, annotationid|
104
+ report.puts "#{chr}\t#{File.basename(__FILE__)}\t#{type}\t#{start}\t#{stop}\t.\t.\t.\tName=#{name}"
105
+ end
106
+ report.close
107
+ end
108
+
109
+ def get_uniq_ids_from_records(records)
110
+ ids = {}
111
+ records.each do |rec|
112
+ annotation_ids = rec.last.split(',')
113
+ rec[6] = annotation_ids
114
+ if annotation_ids.first != ''
115
+ annotation_ids.each do |annot_id|
116
+ ids[annot_id] = nil
117
+ end
118
+ end
119
+ end
120
+ return ids
121
+ end
122
+
123
+ def load_annotations(genomic_regions)
124
+ annotations = get_uniq_ids_from_records(genomic_regions)
125
+ if !annotations.empty?
126
+ db_annotations = $db.execute("SELECT rowid, * FROM Annotation WHERE rowid IN(#{Array.new(annotations.length, '?').join(',')})", annotations.keys)
127
+ annotation_types = db_annotations.map{|db_an| db_an.last}.uniq
128
+ if !annotation_types.empty?
129
+ db_annotation_types = $db.execute("SELECT rowid, * FROM AnnotationType WHERE rowid IN(#{Array.new(annotation_types.length, '?').join(',')})", annotation_types)
130
+ db_annotation_types = db_annotation_types.group_by {|r| r.first}
131
+ db_annotations.each do |db_an|
132
+ db_an[0] = db_an[0].to_s
133
+ db_an[2] = db_annotation_types[db_an[2]].first.last
134
+ end
135
+ db_annotations.each do |annot|
136
+ id = annot.shift
137
+ annotations[id] = annot
138
+ end
139
+ genomic_regions.each do |g_reg|
140
+ annot_ids = g_reg.last
141
+ final_annot = {}
142
+ if !annot_ids.empty?
143
+ annot_ids.each do |id|
144
+ value, type = annotations[id]
145
+ final_annot[type] = value
146
+ end
147
+ end
148
+ g_reg[6] = final_annot
149
+ end
150
+ end
151
+ end
152
+ end
153
+
154
+ def generate_query_regions(coords)
155
+ query_regions = []
156
+ coords.each do |chr, start, stop|
157
+ query_regions << [nil, chr, start, stop, 'query_coords', "Q_#{chr}_#{start}-#{stop}", ''] #bin, chr, start, stop, type, name, annot
158
+ end
159
+ return query_regions
160
+ end
161
+
162
+ def grouping_list(group, genomic_regions, output_path, output_format)
163
+ path = output_path + '_grouping_list.' + output_format
164
+ load_annotations(genomic_regions)
165
+ main_regions = genomic_regions.select{|reg| reg[4] == group}
166
+ putative_overlapping_regions = genomic_regions.select{|reg| reg[4] != group}
167
+ overlaping_index = get_overlapping_regions_batch(main_regions, putative_overlapping_regions)
168
+ if output_format == 'html'
169
+ grouping_list_html(overlaping_index, main_regions, putative_overlapping_regions, path)
170
+ elsif output_format == 'txt'
171
+ grouping_list_txt(overlaping_index, main_regions, putative_overlapping_regions, path)
172
+ end
173
+ end
174
+
175
+ def grouping_list_txt(overlaping_index, main_regions, putative_overlapping_regions, path)
176
+ overlaping_regions = []
177
+ overlaping_index.values.flatten.uniq.each do |pos|
178
+ overlaping_regions << putative_overlapping_regions[pos]
179
+ end
180
+ grouping_type = main_regions.first[4]
181
+ basic_fields = ['Id', 'Chromosome', 'Start', 'Stop']
182
+ header_structure = get_header({grouping_type => basic_fields}, main_regions + overlaping_regions)
183
+ report = File.open(path, 'w')
184
+ txt_header = ''
185
+ header_structure.each do |region_type, annotations|
186
+ if annotations.length > 0
187
+ txt_header << annotations.map{|an| region_type + '.' + an}.join("\t") + "\t"
188
+ else
189
+ txt_header << region_type + "\t"
190
+ end
191
+ end
192
+ report.puts txt_header.chop
193
+ main_regions.each_with_index do |main_region, position|
194
+ local_overlapping_regions = overlaping_index[position].map{|pos| putative_overlapping_regions[pos]}
195
+ report.print "#{main_region[5]}\t#{main_region[1]}\t#{main_region[2]}\t#{main_region[3]}\t"
196
+ header_structure[grouping_type] = header_structure[grouping_type] - ['Id', 'Chromosome', 'Start', 'Stop']
197
+ header_structure[grouping_type].each do |annotation_type|
198
+ report.print "#{main_region.last[annotation_type]}\t"
199
+ end
200
+ header_structure.each do |region_type, annotation_types|
201
+ next if region_type == grouping_type
202
+ record = local_overlapping_regions.select{|r| r[4] == region_type} #array
203
+ if record.empty?
204
+ if annotation_types.length == 0
205
+ report.print "-\t"
206
+ else
207
+ report.print "-\t"*annotation_types.length
208
+ end
209
+ else
210
+ if annotation_types.length == 0
211
+ report.print "#{record.map{|r| r[5]}.uniq.join(',')}\t"
212
+ else
213
+ annotation_types.each do |an_type|
214
+ report.print "#{record.map{|r| r.last[an_type]}.uniq.join(',')}\t"
215
+ end
216
+ end
217
+ end
218
+ end
219
+ report.puts
220
+ end
221
+ report.close
222
+ end
223
+
224
+ def get_overlapping_regions_batch(main_regions, putative_overlapping_regions)
225
+ index = {}
226
+ main_regions.length.times do |n|
227
+ index[n] = []
228
+ end
229
+ main_position = 0
230
+ main_regions.each do |bin, chr, start, stop, type, name, annotations|
231
+ over_position = 0
232
+ putative_overlapping_regions.each do |bin_over, chr_over, start_over, stop_over, type_over, name_over, annotations_over|
233
+ if chr == chr_over &&
234
+ ((start >= start_over && start <= stop_over) || (stop >= start_over && stop <= stop_over))
235
+ index[main_position] << over_position
236
+ end
237
+ over_position += 1
238
+ end
239
+ main_position += 1
240
+ end
241
+ return index
242
+ end
243
+
244
+ def grouping_list_html(overlaping_index, main_regions, putative_overlapping_regions, path)
245
+ overlaping_regions = []
246
+ overlaping_index.values.flatten.uniq.each do |pos|
247
+ overlaping_regions << putative_overlapping_regions[pos]
248
+ end
249
+ report = File.open(path, 'w')
250
+ report.puts '<HTML>',
251
+ '<header>',
252
+ '</header>',
253
+ '<body>',
254
+ '<table border=1>'
255
+ grouping_type = main_regions.first[4]
256
+ basic_fields = ['Id', 'Chromosome', 'Start', 'Stop']
257
+ header_structure = get_header({grouping_type => basic_fields}, main_regions + overlaping_regions)
258
+ report.puts get_grouping_html_header(header_structure)
259
+ header_structure[grouping_type] = header_structure[grouping_type] - basic_fields
260
+ main_regions.each_with_index do |main_region, position|
261
+ local_overlapping_regions = overlaping_index[position].map{|pos| putative_overlapping_regions[pos]}
262
+ record_rows = get_max_overlapping_regions_by_type(local_overlapping_regions)
263
+ rowspan = nil
264
+ rowspan = " rowspan=#{record_rows}" if record_rows > 1
265
+ report.puts '<tr>',
266
+ "<td#{rowspan}>#{main_region[5]}</td>",
267
+ "<td#{rowspan}>#{main_region[1]}</td>",
268
+ "<td#{rowspan}>#{main_region[2]}</td>",
269
+ "<td#{rowspan}>#{main_region[3]}</td>"
270
+ header_structure[grouping_type].each do |annotation_type|
271
+ report.puts "<td#{rowspan}>#{main_region.last[annotation_type]}</td>"
272
+ end
273
+ record_rows.times do
274
+ header_structure.each do |region_type, annotation_types|
275
+ next if region_type == grouping_type
276
+ record = local_overlapping_regions.select{|r| r[4] == region_type}.first
277
+ if record.nil?
278
+ if annotation_types.length == 0
279
+ report.puts "<td></td>"
280
+ else
281
+ report.puts "<td></td>"*annotation_types.length
282
+ end
283
+ else
284
+ if annotation_types.length == 0
285
+ report.puts "<td>#{record[5]}</td>"
286
+ else
287
+ annotation_types.each do |an_type|
288
+ report.puts "<td>#{record.last[an_type]}</td>"
289
+ end
290
+ end
291
+ local_overlapping_regions.delete(record)
292
+ end
293
+ end
294
+ report.puts '</tr>'
295
+ end
296
+ end
297
+ report.puts '</table>',
298
+ '</body>',
299
+ '</HTML>'
300
+ report.close
301
+ end
302
+
303
+ def get_max_overlapping_regions_by_type(local_overlapping_regions)
304
+ res = 1
305
+ local_overlapping_regions.group_by{|r| r[4]}.each do |region_type, regions|
306
+ reg_length = regions.length
307
+ res = reg_length if reg_length > res
308
+ end
309
+ return res
310
+ end
311
+
312
+ def get_grouping_html_header(header_structure)
313
+ main_header = "<tr>\n"
314
+ sub_header = "<tr>\n"
315
+ header_structure.each do |main_title, cols|
316
+ main_header << '<th'
317
+ main_header << " rowspan=2" if cols.length == 0
318
+ main_header << " colspan=#{cols.length}" if cols.length > 1
319
+ main_header << ">#{main_title}</th>\n"
320
+ cols.each do |col|
321
+ sub_header << "<th>#{col}</th>\n"
322
+ end
323
+ end
324
+ main_header << "</tr>\n"
325
+ sub_header << "</tr>\n"
326
+ return main_header + sub_header
327
+ end
328
+
329
+ def get_header(header, genomic_regions)
330
+ genomic_regions.each do |ge_reg|
331
+ region_type = ge_reg[4]
332
+ region_annotations = ge_reg.last.keys
333
+ query = header[region_type]
334
+ if query.nil?
335
+ header[region_type] = region_annotations
336
+ else
337
+ header[region_type] = query | region_annotations
338
+ end
339
+ end
340
+ return header
341
+ end
342
+
343
+ # DATABASE METHODS
344
+ #----------------------------------------------------
345
+
346
+ def download_database(database_path)
347
+ out_path = File.dirname(database_path)
348
+ puts "Downloading database in #{out_path}, please be patient..."
349
+ zip_path = File.join(out_path, 'database.zip')
350
+ f = File.open(zip_path, 'w')
351
+ Net::HTTP.start("bio-267-data.uma.es") do |http|
352
+ http.request_get('/database.zip') do |resp|
353
+ resp.read_body do |segment|
354
+ f.write(segment)
355
+ end
356
+ end
357
+ end
358
+ f.close
359
+ puts "Decompressing database..."
360
+ Zip::File.open(zip_path) do |zip_file|
361
+ zip_file.each do |entry|
362
+ entry.extract(database_path)
363
+ end
364
+ end
365
+ if File.exists?(database_path)
366
+ File.delete(zip_path)
367
+ end
368
+ end
369
+
370
+ ######################################################################################################################
371
+ ## INPUT PARAMETER PARSING
372
+ ######################################################################################################################
373
+ options = {}
374
+ OptionParser.new do |opts|
375
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
376
+
377
+ options[:coords] = []
378
+ opts.on("-c", '--region_coordinates STRING', 'Coordinates to make the search. Format: chrN:start:end') do |coords|
379
+ coord_lines = []
380
+ if File.exists?(coords) == FALSE
381
+ coord_lines = coords.split(',')
382
+ else
383
+ coord_lines = File.readlines(coords).map{|line| line.chomp}
384
+ end
385
+ options[:coords] = coord_lines.map{|line| line.split(':')}.map{|coords| [coords[0], coords[1].to_i, coords[2].to_i ]}
386
+ end
387
+
388
+ options[:name] = []
389
+ opts.on("-n", '--region_name STRING', 'Search region by name') do |region|
390
+ if File.exists?(region) == FALSE
391
+ options[:name] = region.split(',')
392
+ else
393
+ options[:name] = File.readlines(region).map{|line| line.chomp}
394
+ end
395
+ end
396
+
397
+ options[:input_names_only] = FALSE
398
+ opts.on("-i", '--input_names_only', 'Show info about only input data') do
399
+ options[:input_names_only] = TRUE
400
+ end
401
+
402
+ options[:flanking_region] = 0
403
+ opts.on("-F", '--flanking_region INTEGER', 'Flanking region to search aroun the elements') do |flanking_region|
404
+ options[:flanking_region] = flanking_region.to_i
405
+ end
406
+
407
+ options[:path_sql] = File.join(File.dirname(__FILE__), "..", "database", "genomic_data.sqlite")
408
+ opts.on("-p", "--path_sql PATH", "Path SQL DB to make queries") do |path|
409
+ options[:path_sql] = path
410
+ end
411
+
412
+ options[:group] = nil
413
+ opts.on("-g", '--group_by_region_type STRING', 'Use region type for group results by their coordinates') do |group|
414
+ options[:group] = group
415
+ end
416
+
417
+ options[:output_format] = 'html'
418
+ opts.on("-f", '--output_format PATH', 'Output format for results. Default:html') do |output_format|
419
+ options[:output_format] = output_format
420
+ end
421
+
422
+ options[:output_path] = "results"
423
+ opts.on("-o", '--output_path PATH', 'Output path for queries') do |output_path|
424
+ options[:output_path] = output_path
425
+ end
426
+
427
+ options[:representation] = FALSE
428
+ opts.on("-r", '--graphical_representation', 'Make a representation of the selected region') do
429
+ options[:representation] = TRUE
430
+ end
431
+
432
+ options[:type] = []
433
+ opts.on("-t", '--type_regions STRING', 'Region types to make the search. Format: region1,region2,region3...') do |type|
434
+ options[:type] = type.split(',')
435
+ end
436
+
437
+ options[:verbose] = nil
438
+ opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
439
+ options[:verbose] = v
440
+ end
441
+
442
+ end.parse!
443
+
444
+ ######################################################################################################################
445
+ ## MAIN
446
+ ######################################################################################################################
447
+ if !File.exists?(options[:path_sql])
448
+ download_database(options[:path_sql])
449
+ end
450
+ $db = SQLite3::Database.new(options[:path_sql])
451
+ genomic_regions = []
452
+ #Benchmark.bm do |bm|
453
+ # bm.report {
454
+ if !options[:coords].empty?
455
+ genomic_regions = query_coordinates(options[:coords], options[:flanking_region])
456
+ elsif !options[:name].empty?
457
+ genomic_regions = query_name_and_region(options[:name], options[:flanking_region])
458
+ end
459
+ # }
460
+ #end
461
+ #puts 'FINAL', '---------------', genomic_regions.inspect
462
+ if !genomic_regions.empty?
463
+ genomic_regions.select!{|reg| options[:name].include?(reg[5]) || reg[4] != options[:group]} if options[:input_names_only] && !options[:group].nil?
464
+ simple_list(genomic_regions, options[:output_path], options[:output_format])
465
+ simple_list(genomic_regions, options[:output_path], 'gff') if options[:representation]
466
+ if !options[:group].nil?
467
+ genomic_regions.concat(generate_query_regions(options[:coords])) if options[:group] == 'query_coords' && !options[:coords].empty?
468
+ grouping_list(options[:group], genomic_regions, options[:output_path], options[:output_format])
469
+ end
470
+ else
471
+ puts 'Results not found'
472
+ end