anncrsnp 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/grdbfinder.rb +38 -10
- data/bin/retriever.rb +147 -0
- data/lib/anncrsnp/file_parser.rb +53 -0
- data/lib/anncrsnp/file_parsers/wigfix_parser.rb +65 -0
- data/lib/anncrsnp/position_selection_manager.rb +214 -0
- data/lib/anncrsnp/position_selection_worker.rb +140 -0
- data/lib/anncrsnp/preprocessing_manager.rb +87 -0
- data/lib/anncrsnp/preprocessing_worker.rb +139 -0
- data/lib/anncrsnp/version.rb +1 -1
- metadata +10 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ee18ae125e8e7b9738d3dd493ee66d57a374f885
|
|
4
|
+
data.tar.gz: 27e49ea014fbbdfb2aba568847c3214d24ee621e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cd254d0ed92720ce4a6bc1909e2bff86a845a3714549460e29f4f290bed7376e6d9b872aea58bc8093724dc0545fa3403213a43d3fe269130477cf6cb8ced556
|
|
7
|
+
data.tar.gz: 1269b73c8b8f6147940428a27ad5d54ec06b6ea8381fe0894ace30854e14b851cf893778cb4310630db56c21f7fb704ecfc1889ffa9624a315b454e4f8911d0d
|
data/bin/grdbfinder.rb
CHANGED
|
@@ -347,16 +347,44 @@ def download_database(database_path)
|
|
|
347
347
|
out_path = File.dirname(database_path)
|
|
348
348
|
puts "Downloading database in #{out_path}, please be patient..."
|
|
349
349
|
zip_path = File.join(out_path, 'database.zip')
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
350
|
+
# Code from https://www.ruby-forum.com/topic/4413829
|
|
351
|
+
target = "http://bio-267-data.uma.es/database.zip"
|
|
352
|
+
|
|
353
|
+
bytes_total = nil
|
|
354
|
+
|
|
355
|
+
open(target, "rb",
|
|
356
|
+
:content_length_proc => lambda{|content_length|
|
|
357
|
+
bytes_total = content_length},
|
|
358
|
+
:progress_proc => lambda{|bytes_transferred|
|
|
359
|
+
if bytes_total
|
|
360
|
+
# Print progress
|
|
361
|
+
print("\r#{bytes_transferred}/#{bytes_total}")
|
|
362
|
+
else
|
|
363
|
+
# We don’t know how much we get, so just print number
|
|
364
|
+
# of transferred bytes
|
|
365
|
+
print("\r#{bytes_transferred} (total size unknown)")
|
|
366
|
+
end
|
|
367
|
+
}) do |page|
|
|
368
|
+
# Now the real operation
|
|
369
|
+
File.open(zip_path, "wb") do |file|
|
|
370
|
+
# The file may not fit into RAM entirely, so copy it
|
|
371
|
+
# chunk by chunk.
|
|
372
|
+
while chunk = page.read(1024)
|
|
373
|
+
file.write(chunk)
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
if File.exists?(zip_path)
|
|
379
|
+
puts "\nDecompressing database..."
|
|
380
|
+
Zip::File.open(zip_path) do |zip_file|
|
|
381
|
+
zip_file.each do |entry|
|
|
382
|
+
entry.extract(database_path)
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
else
|
|
386
|
+
puts "ERROR: #{zip_path} was not found"
|
|
387
|
+
Process.exit
|
|
360
388
|
end
|
|
361
389
|
if File.exists?(database_path)
|
|
362
390
|
File.delete(zip_path)
|
data/bin/retriever.rb
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
#! /usr/bin/env ruby
|
|
2
|
+
ROOT_PATH = File.dirname(__FILE__)
|
|
3
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp'))
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
require 'optparse'
|
|
7
|
+
require 'scbi_mapreduce'
|
|
8
|
+
require 'preprocessing_manager'
|
|
9
|
+
require 'position_selection_manager'
|
|
10
|
+
|
|
11
|
+
#####################################################################
|
|
12
|
+
### OPTPARSE
|
|
13
|
+
#####################################################################
|
|
14
|
+
|
|
15
|
+
options = {}
|
|
16
|
+
OptionParser.new do |opts|
|
|
17
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
|
18
|
+
|
|
19
|
+
### PARALELISATION OPTIONS
|
|
20
|
+
#####################################################################
|
|
21
|
+
options[:server_ip] = '0.0.0.0'
|
|
22
|
+
opts.on( '-s', '--server IP', 'Server ip. You can use a partial ip to select the apropriate interface' ) do |server_ip|
|
|
23
|
+
options[:server_ip] = server_ip
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# server port
|
|
27
|
+
options[:port] = 0 # any free port
|
|
28
|
+
opts.on( '-p', '--port PORT', 'Server port. If set to 0, an arbitrary empty port will be used') do |port|
|
|
29
|
+
options[:port] = port.to_i
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# set number of workers. You can also provide an array with worker names.
|
|
33
|
+
# Those workers names can be read from a file produced by the existing
|
|
34
|
+
# queue system, if any.
|
|
35
|
+
options[:workers] = 2
|
|
36
|
+
opts.on( '-w', '--workers COUNT', 'Number of workers, or file containing machine names to launch workers with ssh' ) do |workers|
|
|
37
|
+
if File.exists?(workers)
|
|
38
|
+
# use workers file
|
|
39
|
+
options[:workers] = File.read(workers).split("\n").map{|w| w.chomp}
|
|
40
|
+
else
|
|
41
|
+
begin
|
|
42
|
+
options[:workers] = Integer(workers)
|
|
43
|
+
rescue
|
|
44
|
+
STDERR.puts "ERROR:Invalid workers parameter #{options[:workers]}"
|
|
45
|
+
exit
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# chunk size
|
|
51
|
+
options[:chunk_size] = 1
|
|
52
|
+
opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
|
|
53
|
+
options[:chunk_size] = cs.to_i
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
### EXECUTION OPTIONS
|
|
57
|
+
#####################################################################
|
|
58
|
+
options[:index_size] = 1000000
|
|
59
|
+
opts.on( '-x', '--index_size INTEGER', 'Size of genomic features data packs' ) do |is|
|
|
60
|
+
options[:index_size] = is.to_i
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
options[:file] = nil
|
|
64
|
+
opts.on("-f", "--file-links PATH", "Input file with links to retrieve data") do |f|
|
|
65
|
+
options[:file] = f
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
options[:output] = 'data'
|
|
69
|
+
opts.on("-o", "--output PATH", "Folder output path") do |f|
|
|
70
|
+
options[:output] = f
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
options[:downloaded_only] = FALSE
|
|
74
|
+
opts.on("--download_only", "Only download gemonic features files but not process them") do
|
|
75
|
+
options[:downloaded_only] = TRUE
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
options[:no_auc] = FALSE
|
|
79
|
+
opts.on("--no_auc", "No calculate auc by each genomic feature") do
|
|
80
|
+
options[:no_auc] = TRUE
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
options[:selected_positions] = nil
|
|
84
|
+
opts.on("--selected_positions PATH", "Tabular file with chromosome (as chrN) and base 1 coordinates. Optionally a third field can be added with 0/1 values for positive/negative groups") do |selected|
|
|
85
|
+
options[:selected_positions] = selected
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
end.parse!
|
|
89
|
+
|
|
90
|
+
#####################################################################
|
|
91
|
+
### MAIN
|
|
92
|
+
#####################################################################
|
|
93
|
+
|
|
94
|
+
# GENERAL FOLDER
|
|
95
|
+
Dir.mkdir(options[:output]) if !Dir.exist?(options[:output])
|
|
96
|
+
|
|
97
|
+
# MAPREDUCE LAUNCHING
|
|
98
|
+
##########################################################
|
|
99
|
+
$LOG = Logger.new(STDOUT)
|
|
100
|
+
$LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
|
|
101
|
+
# Genomic feature data downloading and preprocessing
|
|
102
|
+
#-----------------------------------------------------------------------------
|
|
103
|
+
if !options[:file].nil?
|
|
104
|
+
if File.exists?(options[:file])
|
|
105
|
+
temp = File.join(options[:output], 'temp')
|
|
106
|
+
options[:temp] = temp
|
|
107
|
+
Dir.mkdir(temp) if !Dir.exist?(temp)
|
|
108
|
+
preprocessed_data = File.join(options[:output], 'preprocessed_data')
|
|
109
|
+
options[:preprocessed_data] = preprocessed_data
|
|
110
|
+
Dir.mkdir(preprocessed_data) if !Dir.exist?(preprocessed_data)
|
|
111
|
+
|
|
112
|
+
$LOG.info 'Starting PREPROCESSING server'
|
|
113
|
+
custom_worker_file = File.join(ROOT_PATH, '..', 'lib', 'anncrsnp', 'preprocessing_worker.rb')
|
|
114
|
+
PreprocessingManager.init_work_manager(options)
|
|
115
|
+
|
|
116
|
+
mgr = ScbiMapreduce::Manager.new( options[:server_ip], options[:port], options[:workers], PreprocessingManager, custom_worker_file, STDOUT) # launch processor server
|
|
117
|
+
mgr.chunk_size = options[:chunk_size]
|
|
118
|
+
mgr.start_server # start processing
|
|
119
|
+
$LOG.info 'Closing PREPROCESSING server'
|
|
120
|
+
else
|
|
121
|
+
puts "Links file not exists\n#{options[:file]}"
|
|
122
|
+
Process.exit()
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Genomic feature data position selection
|
|
127
|
+
#-----------------------------------------------------------------------------
|
|
128
|
+
if !options[:selected_positions].nil?
|
|
129
|
+
if File.exist?(options[:selected_positions])
|
|
130
|
+
selected_positions_folder = File.join(options[:output], 'selected_positions')
|
|
131
|
+
options[:selected_positions_folder] = selected_positions_folder
|
|
132
|
+
Dir.mkdir(selected_positions_folder) if !Dir.exist?(selected_positions_folder)
|
|
133
|
+
|
|
134
|
+
$LOG.info 'Starting POSITION_SELECTION server'
|
|
135
|
+
custom_worker_file = File.join(ROOT_PATH, '..', 'lib', 'anncrsnp', 'position_selection_worker.rb')
|
|
136
|
+
PositionSelectionManager.init_work_manager(options)
|
|
137
|
+
|
|
138
|
+
mgr = ScbiMapreduce::Manager.new( options[:server_ip], options[:port], options[:workers], PositionSelectionManager, custom_worker_file, STDOUT) # launch processor server
|
|
139
|
+
mgr.chunk_size = options[:chunk_size]
|
|
140
|
+
mgr.start_server # start processing
|
|
141
|
+
$LOG.info 'Closing POSITION_SELECTION server'
|
|
142
|
+
else
|
|
143
|
+
puts "File with selected positions not exists:\n#{options[:selected_positions]}"
|
|
144
|
+
Process.exit()
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
require 'yajl'
|
|
2
|
+
class FileParser
|
|
3
|
+
@@parsers = {}
|
|
4
|
+
def self.get_descendants
|
|
5
|
+
return ObjectSpace.each_object(Class).select { |klass| klass < self }
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def self.load
|
|
9
|
+
path_parsers = File.join(File.dirname(__FILE__), 'file_parsers')
|
|
10
|
+
Dir.glob(path_parsers+'/*').each do |parser|
|
|
11
|
+
require parser
|
|
12
|
+
end
|
|
13
|
+
get_descendants.each do |descendant|
|
|
14
|
+
@@parsers[descendant.format] = descendant if descendant.available?
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def self.select(format)
|
|
19
|
+
return @@parsers[format]
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
########################################################################################
|
|
23
|
+
## PARSER DEPENDANT METHODS
|
|
24
|
+
########################################################################################
|
|
25
|
+
def self.available?
|
|
26
|
+
return FALSE
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def self.format
|
|
30
|
+
return 'master'
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def initialize(folder, chunk_size)
|
|
34
|
+
@folder = folder
|
|
35
|
+
@chunk_size = chunk_size
|
|
36
|
+
@chrom = nil
|
|
37
|
+
@coords = []
|
|
38
|
+
@packs = 0
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def parse(line)
|
|
42
|
+
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def write_compressed_data
|
|
46
|
+
p = @packs * @chunk_size
|
|
47
|
+
gz_path = File.join(@folder, "#{@chrom}_#{p}.gz")
|
|
48
|
+
Zlib::GzipWriter.open(gz_path) do |writer|
|
|
49
|
+
Yajl::Encoder.encode(@coords, writer)
|
|
50
|
+
end
|
|
51
|
+
@packs += 1
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
class WigfixParser < FileParser
|
|
2
|
+
def initialize(folder, chunk_size)
|
|
3
|
+
super
|
|
4
|
+
@start = 1
|
|
5
|
+
@step = 1
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def self.available?
|
|
9
|
+
return TRUE
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.format
|
|
13
|
+
return 'wigfix'
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def parse(line)
|
|
17
|
+
#fixedStep chrom=chr11 start=60001 step=1
|
|
18
|
+
if line.include?('fixedStep')
|
|
19
|
+
line =~ /fixedStep chrom=(\S+) start=(\d+) step=(\d+)/
|
|
20
|
+
if !@chrom.nil? && @chrom != $1 #We change of chromosome, we write the buffered coordinates
|
|
21
|
+
#puts "=> #{@packs}\t#{@start}\tx"
|
|
22
|
+
#puts @coords.first.inspect
|
|
23
|
+
#puts @coords.last.inspect
|
|
24
|
+
write_compressed_data
|
|
25
|
+
@coords = []
|
|
26
|
+
end
|
|
27
|
+
@chrom = $1
|
|
28
|
+
last_start = @start
|
|
29
|
+
@start = $2.to_i
|
|
30
|
+
diff = @start - last_start #Create dummy files to fill gaps on coordinate scores
|
|
31
|
+
if diff >= @chunk_size
|
|
32
|
+
(diff/@chunk_size).times do
|
|
33
|
+
#puts "=> #{@packs}\t#{@start}\td"
|
|
34
|
+
#puts @coords.first.inspect
|
|
35
|
+
#puts @coords.last.inspect
|
|
36
|
+
write_compressed_data
|
|
37
|
+
@coords = []
|
|
38
|
+
end
|
|
39
|
+
else
|
|
40
|
+
if @start/@chunk_size != last_start/@chunk_size #Current coordinate belongs to another pack that the previous, write the buffered coordinates
|
|
41
|
+
#puts "=> #{@packs}\t#{@start}\te"
|
|
42
|
+
#puts @coords.first.inspect
|
|
43
|
+
#puts @coords.last.inspect
|
|
44
|
+
write_compressed_data
|
|
45
|
+
@coords = []
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
@step = $3.to_i
|
|
49
|
+
else
|
|
50
|
+
if @start % @chunk_size == 0 # We have reached the chun size, write it to disk
|
|
51
|
+
#puts "=> #{@packs}\t#{@start}\tl"
|
|
52
|
+
#puts @coords.first.inspect
|
|
53
|
+
#puts @coords.last.inspect
|
|
54
|
+
write_compressed_data
|
|
55
|
+
@coords = []
|
|
56
|
+
end
|
|
57
|
+
@coords << [@start, line.to_f]
|
|
58
|
+
@start += @step
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def get_data
|
|
63
|
+
return @coords
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
require 'rroc'
|
|
3
|
+
require 'gchart'
|
|
4
|
+
|
|
5
|
+
# MyWorkerManager class is used to implement the methods
|
|
6
|
+
# to send and receive the data to or from workers
|
|
7
|
+
class PositionSelectionManager < ScbiMapreduce::WorkManager
|
|
8
|
+
|
|
9
|
+
######################################################################################################
|
|
10
|
+
## MANAGER BASIC METHODS
|
|
11
|
+
######################################################################################################
|
|
12
|
+
|
|
13
|
+
# init_work_manager is executed at the start, prior to any processing.
|
|
14
|
+
# You can use init_work_manager to initialize global variables, open files, etc...
|
|
15
|
+
# Note that an instance of MyWorkerManager will be created for each
|
|
16
|
+
# worker connection, and thus, all global variables here should be
|
|
17
|
+
# class variables (starting with @@)
|
|
18
|
+
def self.init_work_manager(options)
|
|
19
|
+
@@options = options
|
|
20
|
+
@@positions, @@groups = load_selected_positions(@@options[:selected_positions])
|
|
21
|
+
@@active_data = File.open(File.join(@@options[:preprocessed_data], 'active_data')).readlines.map {|item| item.chomp}
|
|
22
|
+
@@used_data = 0
|
|
23
|
+
@@used_position = 0
|
|
24
|
+
@@all_data = {}
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# end_work_manager is executed at the end, when all the process is done.
|
|
28
|
+
# You can use it to close files opened in init_work_manager
|
|
29
|
+
def self.end_work_manager
|
|
30
|
+
positions_ids = []
|
|
31
|
+
scores = {} # Create genomic features table
|
|
32
|
+
$LOG.info "Create general scores table"
|
|
33
|
+
@@all_data.each do |data, positions_info|
|
|
34
|
+
data_scores = []
|
|
35
|
+
positions_info.each do |chr, position_info|
|
|
36
|
+
position_info.each do |position, score|
|
|
37
|
+
data_scores << ["#{chr}_#{position.to_s}", score]
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
data_scores.sort!{|sc1, sc2| sc1.first <=> sc2.first}
|
|
41
|
+
scores[data] = data_scores.map{|sc| sc.last}
|
|
42
|
+
positions_ids = data_scores.map{|sc| sc.first} if positions_ids.empty?
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
if !@@groups.empty?
|
|
46
|
+
tags = positions_ids.map{|id| # Create vector tag group related to scores table
|
|
47
|
+
tag = @@groups[id]
|
|
48
|
+
if tag == 0
|
|
49
|
+
tag = -1
|
|
50
|
+
else
|
|
51
|
+
tag = 1
|
|
52
|
+
end
|
|
53
|
+
}
|
|
54
|
+
if !@@options[:no_auc]
|
|
55
|
+
$LOG.info "Calculating AUC for each genomic feature"
|
|
56
|
+
aucs = get_aucs(tags, scores) # GEnerate area under curve by each genomic feature
|
|
57
|
+
File.open(File.join(@@options[:selected_positions_folder], 'AUCs'), 'w'){ |f|
|
|
58
|
+
aucs.each do |data_type, auc|
|
|
59
|
+
f.puts "#{data_type}\t#{auc.join("\t")}"
|
|
60
|
+
end
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
$LOG.info "Creating training files for tensorflow"
|
|
64
|
+
create_positions_sets_for_tensorflow(@@options[:selected_positions_folder], scores, tags)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
data_types = scores.keys
|
|
68
|
+
File.open(File.join(@@options[:selected_positions_folder], 'all_data'), 'w'){ |f| #final genomic feature scores table for goldstandard
|
|
69
|
+
f.puts ['position'].concat(data_types).join("\t")
|
|
70
|
+
positions_ids.each_with_index do |id, i|
|
|
71
|
+
record = [id]
|
|
72
|
+
data_types.each do |dt|
|
|
73
|
+
record << scores[dt][i]
|
|
74
|
+
end
|
|
75
|
+
f.puts record.join("\t")
|
|
76
|
+
end
|
|
77
|
+
}
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# worker_initial_config is used to send initial parameters to workers.
|
|
81
|
+
# The method is executed once per each worker
|
|
82
|
+
def worker_initial_config
|
|
83
|
+
return @@options
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# next_work method is called every time a worker needs a new work
|
|
87
|
+
# Here you can read data from disk
|
|
88
|
+
# This method must return the work data or nil if no more data is available
|
|
89
|
+
def next_work
|
|
90
|
+
begin
|
|
91
|
+
if @@used_data >= @@active_data.length
|
|
92
|
+
e = nil # worker signal disconect
|
|
93
|
+
else
|
|
94
|
+
chr = @@positions.keys[@@used_position]
|
|
95
|
+
e = [@@active_data[@@used_data], chr, @@positions[chr]]
|
|
96
|
+
@@used_position += 1
|
|
97
|
+
if @@used_position >= @@positions.length
|
|
98
|
+
@@used_data +=1
|
|
99
|
+
@@used_position = 0
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
rescue Exception => e
|
|
104
|
+
puts e.message
|
|
105
|
+
puts e.backtrace
|
|
106
|
+
|
|
107
|
+
end
|
|
108
|
+
return e
|
|
109
|
+
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# work_received is executed each time a worker has finished a job.
|
|
114
|
+
# Here you can write results down to disk, perform some aggregated statistics, etc...
|
|
115
|
+
def work_received(results)
|
|
116
|
+
results.each do |data, positions_info|
|
|
117
|
+
query = @@all_data[data]
|
|
118
|
+
if query.nil?
|
|
119
|
+
@@all_data[data] = positions_info
|
|
120
|
+
else
|
|
121
|
+
@@all_data[data] = query.merge(positions_info)
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
######################################################################################################
|
|
127
|
+
## CUSTOM ADDITIONAL METHODS
|
|
128
|
+
######################################################################################################
|
|
129
|
+
|
|
130
|
+
def self.load_selected_positions(file_path)
|
|
131
|
+
selected_positions = {}
|
|
132
|
+
groups = {}
|
|
133
|
+
File.open(file_path).each do |line|
|
|
134
|
+
line.chomp!
|
|
135
|
+
chr, position, group = line.split("\t")
|
|
136
|
+
record = position.to_i
|
|
137
|
+
if !group.nil?
|
|
138
|
+
group = group.to_i
|
|
139
|
+
groups["#{chr}_#{position}"] = group
|
|
140
|
+
end
|
|
141
|
+
query = selected_positions[chr]
|
|
142
|
+
if query.nil?
|
|
143
|
+
selected_positions[chr] = [record]
|
|
144
|
+
else
|
|
145
|
+
query << record
|
|
146
|
+
query.uniq!
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
selected_positions.each do |chr, positions|
|
|
150
|
+
positions.sort!
|
|
151
|
+
end
|
|
152
|
+
return selected_positions, groups
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def self.get_aucs(tags, scores)
|
|
156
|
+
aucs = {}
|
|
157
|
+
scores.each do | data_type, scores|
|
|
158
|
+
matrix = []
|
|
159
|
+
scores.each_with_index do |score, i|
|
|
160
|
+
matrix << [score, tags[i]]
|
|
161
|
+
end
|
|
162
|
+
pts = ROC.curve_points(matrix)
|
|
163
|
+
aucs[data_type] = [ROC.auc(matrix), GChart.scatter(:data => [pts.collect { |x| x[0] }, pts.collect { |x| x[1] }]).to_url]
|
|
164
|
+
end
|
|
165
|
+
return aucs
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def self.create_positions_sets_for_tensorflow(path_folder, scores, tags)
|
|
169
|
+
validation_set_proportion = 0.2
|
|
170
|
+
positions_number = tags.length
|
|
171
|
+
validation_set_length = (positions_number * validation_set_proportion).to_i
|
|
172
|
+
training_set_length = positions_number - validation_set_length
|
|
173
|
+
validation_set_positions = [] # Set which positions will belong to validation set
|
|
174
|
+
while validation_set_positions.length < validation_set_length
|
|
175
|
+
position = rand(positions_number - 1) # We need random 0 based positions
|
|
176
|
+
validation_set_positions << position if !validation_set_positions.include?(position)
|
|
177
|
+
end
|
|
178
|
+
tags.map!{|t| #tensorflow nedd positive integer as tags, we change tag used in AUC operation
|
|
179
|
+
if t == -1
|
|
180
|
+
0
|
|
181
|
+
else
|
|
182
|
+
t
|
|
183
|
+
end
|
|
184
|
+
}
|
|
185
|
+
genomic_features = scores.keys
|
|
186
|
+
training_set = []
|
|
187
|
+
validation_set = []
|
|
188
|
+
tags.each_with_index do |tag, n|
|
|
189
|
+
record = [] # Create record position
|
|
190
|
+
genomic_features.each do |gf|
|
|
191
|
+
record << scores[gf][n]
|
|
192
|
+
end
|
|
193
|
+
record << tag
|
|
194
|
+
if validation_set_positions.include?(n) # Send record to correspondant set
|
|
195
|
+
validation_set << record
|
|
196
|
+
else
|
|
197
|
+
training_set << record
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
tag_names = tags.uniq #TODO: improve to ensure exact correspondance
|
|
201
|
+
training_set.unshift([training_set.length, genomic_features.length].concat(tag_names)) # set headers
|
|
202
|
+
validation_set.unshift([validation_set.length, genomic_features.length].concat(tag_names)) # set headers
|
|
203
|
+
write_set(training_set, File.join(path_folder, 'training_set.csv'))
|
|
204
|
+
write_set(validation_set, File.join(path_folder, 'validation_set.csv'))
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def self.write_set(set, path)
|
|
208
|
+
File.open(path, 'w'){|f|
|
|
209
|
+
set.each do |record|
|
|
210
|
+
f.puts record.join(',')
|
|
211
|
+
end
|
|
212
|
+
}
|
|
213
|
+
end
|
|
214
|
+
end
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
ROOT_PATH=File.dirname(__FILE__)
|
|
2
|
+
$: << File.expand_path(ROOT_PATH)
|
|
3
|
+
|
|
4
|
+
require 'benchmark'
|
|
5
|
+
|
|
6
|
+
# MyWorker defines the behaviour of workers.
|
|
7
|
+
# Here is where the real processing takes place
|
|
8
|
+
class PositionSelectionWorker < ScbiMapreduce::Worker
|
|
9
|
+
|
|
10
|
+
######################################################################################################
|
|
11
|
+
## WORKER BASIC METHODS
|
|
12
|
+
######################################################################################################
|
|
13
|
+
|
|
14
|
+
# starting_worker method is called one time at initialization
|
|
15
|
+
# and allows you to initialize your variables
|
|
16
|
+
def starting_worker
|
|
17
|
+
|
|
18
|
+
# You can use worker logs at any time in this way:
|
|
19
|
+
$WORKER_LOG.info "Starting a worker"
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# receive_initial_config is called only once just after
|
|
25
|
+
# the first connection, when initial parameters are
|
|
26
|
+
# received from manager
|
|
27
|
+
def receive_initial_config(parameters)
|
|
28
|
+
@options = parameters
|
|
29
|
+
# Reads the parameters
|
|
30
|
+
|
|
31
|
+
# You can use worker logs at any time in this way:
|
|
32
|
+
$WORKER_LOG.info "Params received"
|
|
33
|
+
|
|
34
|
+
# save received parameters, if any
|
|
35
|
+
# @params = parameters
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# process_object method is called for each received object.
|
|
40
|
+
# Be aware that objs is always an array, and you must iterate
|
|
41
|
+
# over it if you need to process it independently
|
|
42
|
+
#
|
|
43
|
+
# The value returned here will be received by the work_received
|
|
44
|
+
# method at your worker_manager subclass.
|
|
45
|
+
def process_object(objs)
|
|
46
|
+
all_data = nil
|
|
47
|
+
Benchmark.bm do |x|
|
|
48
|
+
x.report('PosS'){
|
|
49
|
+
|
|
50
|
+
packs, datas = get_info_to_search(objs)
|
|
51
|
+
all_data = {}
|
|
52
|
+
datas.each do |data|
|
|
53
|
+
selected_scores = {}
|
|
54
|
+
packs.each do |chr, ps|
|
|
55
|
+
scores = []
|
|
56
|
+
ps.each do |pack, positions|
|
|
57
|
+
info_path = File.join(@options[:preprocessed_data], data, "#{chr}_#{pack}.gz")
|
|
58
|
+
#puts info_path
|
|
59
|
+
if File.exists?(info_path)
|
|
60
|
+
chr_data = []
|
|
61
|
+
Zlib::GzipReader.open(info_path) {|gz| chr_data = JSON.parse(gz.read)}
|
|
62
|
+
scores.concat(get_scores(chr_data, positions))
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
selected_scores[chr] = scores
|
|
66
|
+
end
|
|
67
|
+
all_data[data] = selected_scores
|
|
68
|
+
end
|
|
69
|
+
# return objs back to manager
|
|
70
|
+
|
|
71
|
+
}
|
|
72
|
+
end
|
|
73
|
+
return all_data
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# called once, when the worker is about to be closed
|
|
77
|
+
def closing_worker
|
|
78
|
+
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
######################################################################################################
|
|
82
|
+
## WORKER CUSTOM METHODS
|
|
83
|
+
######################################################################################################
|
|
84
|
+
|
|
85
|
+
def get_info_to_search(objs)
|
|
86
|
+
packs = {}
|
|
87
|
+
datas = []
|
|
88
|
+
objs.each do |data, chr, positions| # Analyse which chromosomes and packs must be loaded
|
|
89
|
+
datas << data if !datas.include?(data)
|
|
90
|
+
positions.each do |position|
|
|
91
|
+
pack = position/@options[:index_size]
|
|
92
|
+
pack = pack * @options[:index_size]
|
|
93
|
+
#puts "#{position} ==> #{pack}"
|
|
94
|
+
query_chr = packs[chr]
|
|
95
|
+
if query_chr.nil?
|
|
96
|
+
packs[chr] = { pack => [position]}
|
|
97
|
+
else
|
|
98
|
+
query_pack = query_chr[pack]
|
|
99
|
+
if query_pack.nil?
|
|
100
|
+
query_chr[pack] = [position]
|
|
101
|
+
else
|
|
102
|
+
query_pack << position
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
return packs, datas
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def get_scores(chr_data, positions)
|
|
111
|
+
positions_scores = []
|
|
112
|
+
# Remove positions out of existing coordinates
|
|
113
|
+
lower_limit = chr_data.first.first
|
|
114
|
+
upper_limit = chr_data.last.first
|
|
115
|
+
positions_scores.concat(positions.select{|pos| pos < lower_limit}.map{|pos| [pos, 0]}) #At the beginning
|
|
116
|
+
filtered_positions = positions.select{|pos| pos >= lower_limit && pos <= upper_limit }
|
|
117
|
+
#--------------------------------------------------------------------------------------------------
|
|
118
|
+
if !filtered_positions.empty?
|
|
119
|
+
current_position = filtered_positions.shift
|
|
120
|
+
chr_data.each do |coord, score|
|
|
121
|
+
if coord == current_position
|
|
122
|
+
positions_scores << [current_position, score]
|
|
123
|
+
break if filtered_positions.empty?
|
|
124
|
+
current_position = filtered_positions.shift
|
|
125
|
+
elsif coord > current_position # We have encountered a gap and current position is in it
|
|
126
|
+
while coord > current_position # drop positions within the gap
|
|
127
|
+
positions_scores << [current_position, 0]
|
|
128
|
+
break if filtered_positions.empty?
|
|
129
|
+
current_position = filtered_positions.shift
|
|
130
|
+
end
|
|
131
|
+
break if filtered_positions.empty?
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
positions_scores.concat(positions.select{|pos| pos > upper_limit}.map{|pos| [pos, 0]}) # At the end
|
|
137
|
+
|
|
138
|
+
return positions_scores
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
|
|
3
|
+
# MyWorkerManager class is used to implement the methods
|
|
4
|
+
# to send and receive the data to or from workers
|
|
5
|
+
class PreprocessingManager < ScbiMapreduce::WorkManager
|
|
6
|
+
|
|
7
|
+
######################################################################################################
|
|
8
|
+
## MANAGER BASIC METHODS
|
|
9
|
+
######################################################################################################
|
|
10
|
+
|
|
11
|
+
# init_work_manager is executed at the start, prior to any processing.
|
|
12
|
+
# You can use init_work_manager to initialize global variables, open files, etc...
|
|
13
|
+
# Note that an instance of MyWorkerManager will be created for each
|
|
14
|
+
# worker connection, and thus, all global variables here should be
|
|
15
|
+
# class variables (starting with @@)
|
|
16
|
+
def self.init_work_manager(options)
|
|
17
|
+
@@options = options
|
|
18
|
+
$LOG.info 'Load genomic features links'
|
|
19
|
+
@@features = load_links(options[:file])
|
|
20
|
+
$LOG.info "Loaded #{@@features.length} genomic features links"
|
|
21
|
+
|
|
22
|
+
# FEATURE DIRECTORIES
|
|
23
|
+
@@features.each do |feature|
|
|
24
|
+
ft_folder = File.join(@@options[:preprocessed_data], feature[1]) #feature name
|
|
25
|
+
ft_temp_folder = File.join(@@options[:temp], feature[1])
|
|
26
|
+
Dir.mkdir(ft_folder) if !Dir.exist?(ft_folder)
|
|
27
|
+
Dir.mkdir(ft_temp_folder) if !Dir.exist?(ft_temp_folder)
|
|
28
|
+
end
|
|
29
|
+
@@processed_features = 0
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# end_work_manager is executed at the end, when all the process is done.
|
|
33
|
+
# You can use it to close files opened in init_work_manager
|
|
34
|
+
def self.end_work_manager
|
|
35
|
+
File.open(File.join(@@options[:preprocessed_data], 'active_data'), 'w'){ |f| f.puts @@features.map{|f| f[1]}.uniq.join("\n")}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# worker_initial_config is used to send initial parameters to workers.
|
|
39
|
+
# The method is executed once per each worker
|
|
40
|
+
def worker_initial_config
|
|
41
|
+
return @@options
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# next_work method is called every time a worker needs a new work
|
|
45
|
+
# Here you can read data from disk
|
|
46
|
+
# This method must return the work data or nil if no more data is available
|
|
47
|
+
def next_work
|
|
48
|
+
begin
|
|
49
|
+
if @@processed_features >= @@features.length
|
|
50
|
+
e = nil # worker signal disconect
|
|
51
|
+
else
|
|
52
|
+
e = @@features[@@processed_features]
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
@@processed_features += 1
|
|
56
|
+
rescue Exception => e
|
|
57
|
+
puts e.message
|
|
58
|
+
puts e.backtrace
|
|
59
|
+
|
|
60
|
+
end
|
|
61
|
+
return e
|
|
62
|
+
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# work_received is executed each time a worker has finished a job.
|
|
67
|
+
# Here you can write results down to disk, perform some aggregated statistics, etc...
|
|
68
|
+
def work_received(results)
|
|
69
|
+
|
|
70
|
+
# write_data_to_disk(results)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
######################################################################################################
|
|
74
|
+
## CUSTOM ADDITIONAL METHODS
|
|
75
|
+
######################################################################################################
|
|
76
|
+
|
|
77
|
+
def self.load_links(file_path)
|
|
78
|
+
features = []
|
|
79
|
+
File.open(file_path).each do |line|
|
|
80
|
+
line.chomp!
|
|
81
|
+
link, feature, cols, header, format = line.split("\t")
|
|
82
|
+
features << [link, feature, cols.split(',').map{|col| col.to_i}, header.to_i, format]
|
|
83
|
+
end
|
|
84
|
+
return features
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
end
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
ROOT_PATH=File.dirname(__FILE__)
|
|
2
|
+
$: << File.expand_path(ROOT_PATH)
|
|
3
|
+
|
|
4
|
+
require 'yajl'
|
|
5
|
+
require 'open-uri'
|
|
6
|
+
require 'benchmark'
|
|
7
|
+
require 'file_parser'
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# MyWorker defines the behaviour of workers.
|
|
11
|
+
# Here is where the real processing takes place
|
|
12
|
+
class PreprocessingWorker < ScbiMapreduce::Worker
|
|
13
|
+
|
|
14
|
+
######################################################################################################
|
|
15
|
+
## WORKER BASIC METHODS
|
|
16
|
+
######################################################################################################
|
|
17
|
+
|
|
18
|
+
# starting_worker method is called one time at initialization
|
|
19
|
+
# and allows you to initialize your variables
|
|
20
|
+
def starting_worker
|
|
21
|
+
|
|
22
|
+
# You can use worker logs at any time in this way:
|
|
23
|
+
$WORKER_LOG.info "Starting a worker"
|
|
24
|
+
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# receive_initial_config is called only once just after
|
|
29
|
+
# the first connection, when initial parameters are
|
|
30
|
+
# received from manager
|
|
31
|
+
def receive_initial_config(parameters)
|
|
32
|
+
@options = parameters
|
|
33
|
+
# Reads the parameters
|
|
34
|
+
|
|
35
|
+
# You can use worker logs at any time in this way:
|
|
36
|
+
$WORKER_LOG.info "Params received"
|
|
37
|
+
|
|
38
|
+
# save received parameters, if any
|
|
39
|
+
# @params = parameters
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# process_object method is called for each received object.
|
|
44
|
+
# Be aware that objs is always an array, and you must iterate
|
|
45
|
+
# over it if you need to process it independently
|
|
46
|
+
#
|
|
47
|
+
# The value returned here will be received by the work_received
|
|
48
|
+
# method at your worker_manager subclass.
|
|
49
|
+
def process_object(objs)
|
|
50
|
+
Benchmark.bm do |x|
|
|
51
|
+
x.report('Prep'){
|
|
52
|
+
|
|
53
|
+
FileParser.load
|
|
54
|
+
objs.each do |link, feature, cols, header, format| # iterate over all objects received
|
|
55
|
+
$WORKER_LOG.info "Processing link: #{feature}, #{format}, #{link}"
|
|
56
|
+
ft_folder = File.join(@options[:preprocessed_data], feature)
|
|
57
|
+
ft_temp_folder = File.join(@options[:temp], feature)
|
|
58
|
+
temp_file = download_data(link, cols, header, format, ft_temp_folder)
|
|
59
|
+
if !@options[:downloaded_only]
|
|
60
|
+
if File.exist?(temp_file)
|
|
61
|
+
extract_data(format, temp_file, ft_folder)
|
|
62
|
+
else
|
|
63
|
+
$WORKER_LOG.info "WARNING: Temporal file #{temp_file} have not been downloaded for feature #{feature} so it will be skipped"
|
|
64
|
+
end
|
|
65
|
+
else
|
|
66
|
+
$WORKER_LOG.info "Download only mode, skipping processing temp files"
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
}
|
|
71
|
+
end
|
|
72
|
+
# return objs back to manager
|
|
73
|
+
return []
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# called once, when the worker is about to be closed
|
|
77
|
+
def closing_worker
|
|
78
|
+
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
######################################################################################################
|
|
82
|
+
## WORKER CUSTOM METHODS
|
|
83
|
+
######################################################################################################
|
|
84
|
+
|
|
85
|
+
# Download protocols
|
|
86
|
+
#-----------------------------------------
|
|
87
|
+
def download_data(link, cols, header, format, temp)
|
|
88
|
+
protocol, url = link.split('://')
|
|
89
|
+
temp_file = nil
|
|
90
|
+
if protocol == 'http'
|
|
91
|
+
temp_file = File.join(temp, url.split('/').last)
|
|
92
|
+
if !File.exist?(temp_file)
|
|
93
|
+
get_http_data(url, temp_file)
|
|
94
|
+
$WORKER_LOG.info "Downloading #{link}"
|
|
95
|
+
else
|
|
96
|
+
$WORKER_LOG.info "Link was downloaded in a previous execution. Skipping download #{link}"
|
|
97
|
+
end
|
|
98
|
+
elsif protocol == 'ftp'
|
|
99
|
+
else
|
|
100
|
+
$WORKER_LOG.info "WARNING: protocol: #{protocol} in link: #{link} is not supported"
|
|
101
|
+
end
|
|
102
|
+
return temp_file
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def get_http_data(url, temp)
|
|
106
|
+
File.open(temp, "wb") do |saved_file|
|
|
107
|
+
open("http://#{url}", "rb") do |read_file|
|
|
108
|
+
saved_file.write(read_file.read)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# File decompression methods
|
|
114
|
+
#-----------------------------------------
|
|
115
|
+
def extract_data(format, temp, folder)
|
|
116
|
+
data = {}
|
|
117
|
+
parser_class = FileParser.select(format)
|
|
118
|
+
parser = parser_class.new(folder, @options[:index_size])
|
|
119
|
+
$WORKER_LOG.info "Processing temporal file #{temp}"
|
|
120
|
+
if temp.include?('.gz')
|
|
121
|
+
#data = get_gz(temp, parser)
|
|
122
|
+
get_gz(temp, parser)
|
|
123
|
+
else
|
|
124
|
+
|
|
125
|
+
end
|
|
126
|
+
parser.write_compressed_data # Write remaining buffered data
|
|
127
|
+
$WORKER_LOG.info "End processing temporal file #{temp}"
|
|
128
|
+
return data
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def get_gz(temp, parser)
|
|
132
|
+
Zlib::GzipReader.open(temp) {|gz|
|
|
133
|
+
gz.each do |line|
|
|
134
|
+
parser.parse(line.chomp)
|
|
135
|
+
end
|
|
136
|
+
}
|
|
137
|
+
#return parser.get_data
|
|
138
|
+
end
|
|
139
|
+
end
|
data/lib/anncrsnp/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: anncrsnp
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Elena Rojano
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2016-
|
|
12
|
+
date: 2016-09-28 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: bundler
|
|
@@ -98,6 +98,7 @@ executables:
|
|
|
98
98
|
- grdbfinder.rb
|
|
99
99
|
- grdbmanager.rb
|
|
100
100
|
- masterfeatures.rb
|
|
101
|
+
- retriever.rb
|
|
101
102
|
- setup
|
|
102
103
|
- statistics.rb
|
|
103
104
|
extensions: []
|
|
@@ -115,6 +116,7 @@ files:
|
|
|
115
116
|
- bin/grdbfinder.rb
|
|
116
117
|
- bin/grdbmanager.rb
|
|
117
118
|
- bin/masterfeatures.rb
|
|
119
|
+
- bin/retriever.rb
|
|
118
120
|
- bin/setup
|
|
119
121
|
- bin/statistics.rb
|
|
120
122
|
- database/.DS_Store
|
|
@@ -122,7 +124,13 @@ files:
|
|
|
122
124
|
- database/deleteme
|
|
123
125
|
- lib/anncrsnp.rb
|
|
124
126
|
- lib/anncrsnp/dataset.rb
|
|
127
|
+
- lib/anncrsnp/file_parser.rb
|
|
128
|
+
- lib/anncrsnp/file_parsers/wigfix_parser.rb
|
|
125
129
|
- lib/anncrsnp/parsers/ucscparser.rb
|
|
130
|
+
- lib/anncrsnp/position_selection_manager.rb
|
|
131
|
+
- lib/anncrsnp/position_selection_worker.rb
|
|
132
|
+
- lib/anncrsnp/preprocessing_manager.rb
|
|
133
|
+
- lib/anncrsnp/preprocessing_worker.rb
|
|
126
134
|
- lib/anncrsnp/version.rb
|
|
127
135
|
homepage: ''
|
|
128
136
|
licenses:
|