anncrsnp 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,226 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp'))
5
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp', 'parsers'))
6
+
7
+ require 'optparse'
8
+ require 'ucscparser'
9
+ require 'dataset'
10
+ require 'sqlite3'
11
+
12
+ options = {}
13
+ OptionParser.new do |opts|
14
+ opts.banner = "Usage: #{__FILE__} [options]"
15
+
16
+ options[:data] = nil
17
+ opts.on("-d", "--data_directory PATH", "Directory used to extract data") do |data|
18
+ options[:data] = data
19
+ end
20
+
21
+ options[:create_sql] = FALSE
22
+ opts.on("-s", "--create_sql", "Create SQL DB") do
23
+ options[:create_sql] = TRUE
24
+ end
25
+
26
+ options[:output_path] = "genomic_data.sqlite"
27
+ opts.on("-o", '--output_path PATH', 'Output path for DB') do |output_path|
28
+ options[:output_path] = output_path
29
+ end
30
+
31
+ options[:verbose] = nil
32
+ opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
33
+ options[:verbose] = v
34
+ end
35
+
36
+ end.parse!
37
+
38
+
39
+
40
+ all_data = {}
41
+ if File.exist?(options[:data])
42
+ Dir.glob(File.join(options[:data],'*.{txt,bed,csv}')).each do |file| # we get the path to each file on directory
43
+ current_file = File.basename(file)
44
+ ### Definitive sources
45
+ #If bin field from UCSC doesn't exist, put FALSE as input data to parseUCSCformat method
46
+ if current_file == "wgEncodeAwgDnaseMasterSites.bed"
47
+ header = [:score, :floatScore, :sourceCount, :sourceIds]
48
+ current_dataset = parseUCSCformat(file, header, FALSE)
49
+ current_dataset.numeric_filter(:sourceCount, 2)
50
+ current_dataset.drop_columns(header)
51
+ current_dataset.add_metadata(:classification, 'DNAseHS')
52
+ all_data['dnaseData'] = current_dataset
53
+ elsif current_file == "wgEncodeHaibMethyl450Ag04449SitesRep1.bed"
54
+ header = [:score, :strand, :thickStart, :thickEnd, :itemRgb]
55
+ current_dataset = parseUCSCformat(file, header, FALSE)
56
+ current_dataset.drop_columns(header)
57
+ current_dataset.add_metadata(:classification, 'Metilation_sites')
58
+ all_data['metilationData'] = current_dataset
59
+ elsif current_file == "snp144Common.txt" # current_file == "test.txt"
60
+ header = [:score, :strand, :refNCBI, :refUCSC, :observed, :molType, :class, :valid, :avHet, :avHetSE, :func, :locType, :weight, :exceptions, :submitterCount, :submitters, :alleleFreqCount, :alleles, :alleleNs, :alleleFreqs, :bitfields]
61
+ current_dataset = parseUCSCformat(file, header)
62
+ current_dataset.drop_columns([:score, :strand, :refNCBI, :refUCSC, :observed, :molType, :valid, :avHet, :avHetSE, :locType, :weight, :exceptions, :submitterCount, :submitters, :alleleFreqCount, :alleles, :alleleNs, :alleleFreqs, :bitfields])
63
+ current_dataset.add_metadata(:classification, 'SNP')
64
+ all_data['snpDbSnp'] = current_dataset
65
+ elsif current_file == "refGene.txt"
66
+ header = [:name, :strand, :cdsStart, :cdsEnd, :exonCount, :exonStarts, :exonEnds, :score, :cdsStartStat, :cdsEndStat, :exonFrames]
67
+ current_dataset = parseUCSCrefseqformat(file, header)
68
+ current_dataset.drop_columns(header)
69
+ current_dataset.add_metadata(:classification, 'gene')
70
+ all_data['gene'] = current_dataset
71
+ elsif current_file == "TFBSMasterSites.txt" #Must be generated with "masterfeatures.rb tfbs/files.txt antibody import_data/TFBSMasterSites.txt tfbs/"
72
+ header = []
73
+ current_dataset = parseUCSCformat(file, header, FALSE)
74
+ current_dataset.add_metadata(:classification, 'TFBS')
75
+ all_data['tfbs'] = current_dataset
76
+ elsif current_file == "HistoneModMasterSites.txt" #Must be generated with "masterfeatures.rb tfbs/files.txt antibody import_data/TFBSMasterSites.txt tfbs/"
77
+ header = []
78
+ current_dataset = parseUCSCformat(file, header, FALSE)
79
+ current_dataset.add_metadata(:classification, 'HistoneModification')
80
+ all_data['HistoneModification'] = current_dataset
81
+ elsif current_file == "46waycons.txt"
82
+ header = [:span, :count, :offset, :file, :lowerLimit, :dataRange, :validCount, :sumData, :sumSquares]
83
+ current_dataset = parseUCSCformat(file, header)
84
+ current_dataset.drop_columns(header)
85
+ current_dataset.add_metadata(:classification, 'ConservedRegions')
86
+ all_data['ConservedRegions'] = current_dataset
87
+ elsif current_file == "enhancer_tss_associations.bed"
88
+ header = [:score, :strand, :enh_start, :enh_stop, :array, :index, :val1, :val2]
89
+ current_dataset = parseUCSCformat(file, header, FALSE)
90
+ current_dataset.drop_columns(header)
91
+ current_dataset.add_metadata(:classification, 'Enhancers')
92
+ all_data['Enhancers'] = current_dataset
93
+ elsif current_file == "enhancers.csv"
94
+ header = [:cell_line, :index1, :index2, :index3, :index4, :index5, :index6, :index7]
95
+ current_dataset = parseDENdbCSVformat(file, header)
96
+ current_dataset.drop_columns(header)
97
+ current_dataset.add_metadata(:classification, 'DENdbEnhancers')
98
+ all_data['DENdbEnhancers'] = current_dataset
99
+ elsif current_file == "all_hg19_bed.bed"
100
+ header = [:counter]
101
+ current_dataset = parseUCSCformat(file, header, FALSE)
102
+ current_dataset.drop_columns(header)
103
+ current_dataset.add_metadata(:classification, 'SuperEnhancers')
104
+ all_data['SuperEnhancers'] = current_dataset
105
+ end
106
+ end
107
+ end
108
+
109
+ if options[:create_sql]
110
+ commands = []
111
+ if !File.exists?(options[:output_path])
112
+ commands << "CREATE TABLE GenomicRange(
113
+ bin,
114
+ chr,
115
+ start,
116
+ end,
117
+ type,
118
+ name,
119
+ AnnotationId
120
+ )"
121
+ commands << "CREATE TABLE Annotation(
122
+ value,
123
+ AnnotationTypeId
124
+ )"
125
+ commands << "CREATE TABLE AnnotationType(
126
+ type
127
+ )"
128
+ #File.delete(options[:output_path])
129
+ end
130
+
131
+ DB = SQLite3::Database.new( options[:output_path] )
132
+ commands.each do |cmd|
133
+ DB.execute(cmd)
134
+ end
135
+ # Import data process speed up configuration
136
+ DB.execute("PRAGMA synchronous = OFF;")
137
+ DB.execute("PRAGMA journal_mode = MEMORY;")
138
+
139
+ # Creating memory indexes for incremental updates
140
+ annotation_type_index = DB.execute("SELECT rowid, * FROM AnnotationType").group_by {|r| r[1]}
141
+ annotation_index = DB.execute("SELECT rowid, * FROM Annotation").group_by {|r| r[1]}
142
+
143
+ all_data.each do |class_data, dataset|
144
+ puts "#{class_data} import started"
145
+ # Save and create AnnotationType data
146
+ #------------------------------------------------------------------
147
+ header = dataset.get_metadata(:header)
148
+ annotation_type = header.map{|h| h.to_s}
149
+ annotation_type.shift(4)
150
+ if !commands.empty?
151
+ records = annotation_type
152
+ else
153
+ records = annotation_type.select{|at| annotation_type_index[at].first.nil?}
154
+ end
155
+
156
+ DB.transaction do |db|
157
+ db.prepare("INSERT INTO AnnotationType(type) VALUES(?)") do |smnt| # Precompile query for speed up process
158
+ records.each do |rec|
159
+ smnt.execute(rec)
160
+ end
161
+ end
162
+ end
163
+ annotation_type_index = DB.execute("SELECT rowid, * FROM AnnotationType").group_by {|r| r[1]} if !records.empty?
164
+
165
+ # Save and create AnnotationType data
166
+ #------------------------------------------------------------------
167
+ annotations = {}
168
+ annotation_type.each do |at|
169
+ annotations[at] = {}
170
+ end
171
+ if dataset.first.length > 4
172
+ dataset.each_record do |record|
173
+ record[5..record.length - 1].each_with_index do |annotation, i|
174
+ annotations[annotation_type[i]][annotation] = nil
175
+ end
176
+ end
177
+ end
178
+
179
+ records = []
180
+ annotations.each do |annotation_type, values|
181
+ annotation_type_id = annotation_type_index[annotation_type].first.first
182
+ if !commands.empty?
183
+ records = records.concat(values.keys.map{|v| [v, annotation_type_id]})
184
+ else
185
+ records = records.concat(values.keys.select{|v| annotation_index[v].nil? }.map{|v| [v, annotation_type_id]})
186
+ end
187
+ end
188
+
189
+ DB.transaction do |db|
190
+ db.prepare("INSERT INTO Annotation(value, AnnotationTypeId) VALUES(?, ?)") do |smnt|
191
+ records.each do |rec|
192
+ smnt.execute(rec[0], rec[1])
193
+ end
194
+ end
195
+ end
196
+ annotation_index = DB.execute("SELECT rowid, * FROM Annotation").group_by {|r| r[1]} if !records.empty?
197
+
198
+ # Save and create GenomicRange data
199
+ #------------------------------------------------------------------
200
+ DB.transaction do |db|
201
+ db.prepare("INSERT INTO GenomicRange(bin, chr, start, end, type, name, AnnotationId) VALUES(?, ?, ?, ?, ?, ?, ?)") do |smnt|
202
+ dataset.each_record do |record|
203
+ region_data = record.shift(4)
204
+ annotation_ids = []
205
+ record.each do |annotation|
206
+ id = annotation_index[annotation]
207
+ annotation_ids << id.first.first if !id.nil?
208
+ end
209
+ smnt.execute(
210
+ region_data[1]/10000,
211
+ region_data[0],
212
+ region_data[1],
213
+ region_data[2],
214
+ class_data,
215
+ region_data[3],
216
+ annotation_ids.join(',')
217
+ )
218
+ end
219
+ end
220
+ end
221
+ puts "#{class_data} import finished"
222
+ end
223
+ end
224
+ DB.execute("CREATE INDEX name_index ON GenomicRange (name)")
225
+ DB.execute("CREATE INDEX bin_index ON GenomicRange (bin)")
226
+ DB.close
@@ -0,0 +1,188 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'benchmark'
4
+
5
+ #Description
6
+ #--------------
7
+ #Tool to unify data from ENCODE
8
+
9
+ #Methods
10
+ #--------------
11
+
12
+ def load_metadata_file(file)
13
+ name_storage = {}
14
+ file_text = File.open(file)
15
+ file_text.each do |line|
16
+ line.chomp!
17
+ fields = line.split("\t")
18
+ features_storage = {} #metadata hash
19
+ features = fields[1].split("; ")
20
+ features.each do |feature|
21
+ met_name, metadata = feature.split("=")
22
+ features_storage[met_name] = metadata
23
+ end
24
+ name_storage[fields[0].gsub('.gz', '')] = features_storage
25
+ end
26
+ file_text.close
27
+ return name_storage
28
+ end
29
+
30
+ def element_grouper(grouping_element, name_storage) #erase redundance by antibody by default
31
+ package_grouping = {}
32
+ name_storage.each do |file_name, metadata|
33
+ selected_element = metadata[grouping_element]
34
+ if !selected_element.nil? #verify is there is an element in this field (avoid mistakes).
35
+ if !package_grouping[selected_element].nil?
36
+ package_grouping[selected_element] << file_name
37
+ else
38
+ package_grouping[selected_element] = [file_name]
39
+ end
40
+ end
41
+ end
42
+ return package_grouping
43
+ end
44
+
45
+ def load_files_to_compare(file_name)
46
+ genomic_regions = {}
47
+ File.open(file_name).each do |line|
48
+ line.chomp!
49
+ genomic_info = line.split("\t")
50
+ chr = genomic_info.shift
51
+ genomic_info = genomic_info[0..1].map{|c| c.to_i}
52
+ bin = genomic_info.first/10000
53
+ query = genomic_regions[chr]
54
+ if query.nil?
55
+ genomic_regions[chr] = {bin => [genomic_info]}
56
+ else
57
+ query_bin = query[bin]
58
+ if query_bin.nil?
59
+ query[bin] = [genomic_info]
60
+ else
61
+ query_bin << genomic_info
62
+ end
63
+ end
64
+ end
65
+ return genomic_regions
66
+ end
67
+
68
+ def compare_genomics_regions(main_genomic_regions, genomic_regions_to_compare, thresold_overlap)
69
+ selected_genomic_regions = {}
70
+ genomic_regions_to_compare.each do |chr_reg, genomic_region_to_compare|
71
+ genomic_region_to_compare.each do |bin, regs|
72
+ batch_match = false
73
+ query_main = main_genomic_regions[chr_reg] # main_genomic_regions has chr_reg?
74
+ if !query_main.nil? # main_genomic_regions has chr_reg!
75
+ query_main_bin = query_main[bin]
76
+ if !query_main_bin.nil?
77
+ batch_match = true
78
+ regs.each do |reg|
79
+ match = false
80
+ query_main_bin.each do |main|
81
+ match = compare_genomics_regions_coords(main, reg, thresold_overlap)
82
+ break if match
83
+ end
84
+ save_reg(selected_genomic_regions, chr_reg, bin, reg) if !match
85
+ end
86
+ end
87
+ end
88
+ if !batch_match
89
+ regs.each do |reg|
90
+ save_reg(selected_genomic_regions, chr_reg, bin, reg)
91
+ end
92
+ end
93
+ end
94
+ end
95
+ return selected_genomic_regions
96
+ end
97
+
98
+ def save_reg(selected_genomic_regions, chr_reg, bin, reg)
99
+ query_chr = selected_genomic_regions[chr_reg]
100
+ if !query_chr.nil?
101
+ query_bin = query_chr[bin]
102
+ if query_bin.nil?
103
+ query_chr[bin] = [reg]
104
+ else
105
+ query_bin << reg
106
+ end
107
+ else
108
+ selected_genomic_regions[chr_reg] = {bin => [reg]}
109
+ end
110
+ end
111
+
112
+ def save_reg_concat(selected_genomic_regions, chr_reg, bin, reg)
113
+ query_chr = selected_genomic_regions[chr_reg]
114
+ if !query_chr.nil?
115
+ query_bin = query_chr[bin]
116
+ if query_bin.nil?
117
+ query_chr[bin] = reg
118
+ else
119
+ query_bin.concat(reg)
120
+ end
121
+ else
122
+ selected_genomic_regions[chr_reg] = {bin => reg}
123
+ end
124
+ end
125
+
126
+ def compare_genomics_regions_coords(main_genomic_region, genomic_region_to_compare, thresold_overlap)
127
+ match = false
128
+
129
+ main_beg, main_end = main_genomic_region
130
+ reg_beg, reg_end = genomic_region_to_compare
131
+ size_main_genomic_region = main_end - main_beg
132
+ size_genomic_region_to_compare = reg_end - reg_beg
133
+
134
+ absolute_overlap = 0
135
+ if reg_beg >= main_beg && reg_beg <= main_end
136
+ absolute_overlap = main_end - reg_beg
137
+ elsif reg_end >= main_beg && reg_end <= main_end
138
+ absolute_overlap = reg_end - main_beg
139
+ elsif reg_beg <= main_beg && reg_end >= main_end
140
+ absolute_overlap = size_main_genomic_region
141
+ elsif reg_beg >= main_beg && reg_end <= main_end
142
+ absolute_overlap = size_genomic_region_to_compare
143
+ end
144
+ main_relative_overlap = absolute_overlap / size_main_genomic_region * 1.0
145
+ compare_relative_overlap = absolute_overlap / size_genomic_region_to_compare * 1.0
146
+ if main_relative_overlap >= thresold_overlap || compare_relative_overlap >= thresold_overlap
147
+ match = true
148
+ end
149
+
150
+ return match
151
+ end
152
+
153
+ #Main
154
+ #--------------
155
+ file_input_folder = ARGV[3]
156
+ name_storage = load_metadata_file(ARGV[0])
157
+ package_grouping = element_grouper(ARGV[1], name_storage)
158
+ file_writer = File.open(ARGV[2],'w')
159
+
160
+ package_grouping.each do |grouping_element, file_names|
161
+ #abrir el primer archivo del paquete
162
+ genomic_regions_references = load_files_to_compare(File.join(file_input_folder, file_names.shift))
163
+ file_names.each do |f_name|
164
+ file2compare = load_files_to_compare(File.join(file_input_folder, f_name))
165
+ selected_genomic_regions = selected_genomic_regions = compare_genomics_regions(genomic_regions_references, file2compare, 0.8)
166
+ #puts Benchmark.measure{selected_genomic_regions = compare_genomics_regions(genomic_regions_references, file2compare, 0.8)}
167
+ selected_genomic_regions.each do |chr, ge_regs|
168
+ ge_regs.each do |bin, reg|
169
+ save_reg_concat(genomic_regions_references, chr, bin, reg)
170
+ end
171
+ end
172
+ end
173
+ genomic_regions_references.each do |chr, ge_regs|
174
+ ge_regs.each do |bin, reg|
175
+ reg.each do |gr|
176
+ file_writer.puts "#{chr}\t#{gr.join("\t")}\t#{grouping_element}"
177
+ end
178
+ end
179
+ end
180
+ puts "Wrote #{grouping_element}"
181
+ end
182
+
183
+ file_writer.close
184
+
185
+
186
+
187
+
188
+
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
data/bin/statistics.rb ADDED
@@ -0,0 +1,193 @@
1
+ #! /usr/bin/env ruby
2
+ require 'scbi_plot'
3
+ #METHODS
4
+ #----------
5
+ def load_snp_data(input_file, fields_length)
6
+ snp_storage = {}
7
+ index = {}
8
+ counter = 0
9
+ File.open(input_file).each do |line|
10
+ line.chomp!
11
+ fields = line.split("\t")
12
+ snp_fields = fields.shift(fields_length) #in fields you store the genomic factors (histone modif, tfbs...)
13
+ if counter == 0
14
+ fields.each_with_index do |category, position|
15
+ index[category] = position
16
+ end
17
+ else
18
+ categories = {
19
+ "HistoneModification" => [],
20
+ "tfbs" => [],
21
+ "dnaseData" => [],
22
+ "metilationData" => [],
23
+ "ConservedRegions" => [],
24
+ "Enhancers" => [],
25
+ "DENdbEnhancers" => [],
26
+ "SuperEnhancers" => []
27
+ }
28
+ categories.each do |category_name, category_value|
29
+ column_position = index[category_name]
30
+ if !column_position.nil?
31
+ snp_category_values = fields[column_position]
32
+ category_value.concat(snp_category_values.split(',')) if snp_category_values != '-'
33
+ end
34
+ end
35
+ snp_storage[snp_fields[0]] = categories
36
+
37
+ #bloque de código para tratar la información de los snps
38
+ end
39
+ counter += 1
40
+ end
41
+ return snp_storage
42
+ end
43
+
44
+ def snp_calculate_stats(snp_storage)
45
+ snp_percentage = {
46
+ "HistoneModification" => 0,
47
+ "tfbs" => 0,
48
+ "dnaseData" => 0,
49
+ "metilationData" => 0,
50
+ "ConservedRegions" => 0,
51
+ "Enhancers" => 0,
52
+ "DENdbEnhancers" => 0,
53
+ "SuperEnhancers" => 0
54
+ }
55
+ snp_storage.each do |snp_name, annotations|
56
+ annotations.each do |annotation_category, annotation_value|
57
+ if !annotation_value.empty?
58
+ snp_percentage[annotation_category] += 1 #possible error point!
59
+ end
60
+ end
61
+ end
62
+ total_snps = snp_storage.length.to_f
63
+ snp_percentage.each do |annotation_category, true_positive_number|
64
+ percentage = true_positive_number / total_snps * 100
65
+ snp_percentage[annotation_category] = percentage
66
+ end
67
+ return snp_percentage
68
+ end
69
+
70
+ def create_histogram(snp_percentage, name)
71
+ # create Histogram
72
+ p=ScbiPlot::Histogram.new(name,'SNPs genomic region annotations')
73
+
74
+ # add x axis data
75
+
76
+ p.add_x(snp_percentage.keys)
77
+ puts snp_percentage.keys.inspect
78
+ # add y axis data
79
+ p.add_y(snp_percentage.values)
80
+ puts snp_percentage.values.inspect
81
+ # generate graph
82
+ p.do_graph
83
+ end
84
+
85
+
86
+ def snp_calculate_stats_with_reference(snp_storage, snp_storage_reference)
87
+ snp_percentage = {
88
+ "HistoneModification" => 0,
89
+ "tfbs" => 0,
90
+ "dnaseData" => 0,
91
+ "metilationData" => 0,
92
+ "ConservedRegions" => 0,
93
+ "Enhancers" => 0,
94
+ "DENdbEnhancers" => 0,
95
+ "SuperEnhancers" => 0
96
+ }
97
+
98
+ snp_storage_reference.each do |snp_name_ref, annotations_ref|
99
+ query = snp_storage[snp_name_ref]
100
+ if !query.nil?
101
+ annotations_ref.each do |annotation_category_ref, annotation_value_ref|
102
+ annotation_value = query[annotation_category_ref]
103
+ if annotation_comparison(annotation_value_ref, annotation_value, annotation_category_ref)
104
+ snp_percentage[annotation_category_ref] += 1
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ total_snps = snp_storage_reference.length.to_f
111
+ snp_percentage.each do |annotation_category, true_positive_number|
112
+ percentage = true_positive_number / total_snps * 100
113
+ snp_percentage[annotation_category] = percentage
114
+ end
115
+ return snp_percentage
116
+ end
117
+
118
+ def annotation_comparison(annotation_value_ref, annotation_value, annotation_category_ref)
119
+ result = false
120
+ annotation_value_ref.uniq!
121
+ annotation_value.uniq!
122
+ #puts "#{annotation_value_ref.inspect} => #{annotation_value}" if annotation_category_ref == 'dnaseData'
123
+ if annotation_value_ref.sort == annotation_value.sort
124
+ result = true
125
+ elsif annotation_category_ref == 'dnaseData' &&
126
+ !annotation_value.empty?
127
+ result = true
128
+ elsif annotation_category_ref == 'tfbs'
129
+ if !(annotation_value_ref & annotation_value).empty? || annotation_value.length >= 5
130
+ result= true
131
+ end
132
+ elsif annotation_category_ref == 'metilationData' &&
133
+ !annotation_value.empty?
134
+ result = true
135
+ elsif annotation_category_ref == 'HistoneModification'
136
+ annotation_value_ref = annotation_value_ref.map{|an|
137
+ if /(H\d+K\d+)\w*/ =~ an
138
+ $1
139
+ else
140
+ an
141
+ end
142
+ }.uniq
143
+ annotation_value = annotation_value.map{|an|
144
+ if /(H\d+K\d+)\w*/ =~ an
145
+ $1
146
+ else
147
+ an
148
+ end
149
+ }.uniq
150
+ if !(annotation_value_ref & annotation_value).empty? || annotation_value.length >= 5
151
+ result= true
152
+ end
153
+ elsif annotation_category_ref == 'ConservedRegions' &&
154
+ !annotation_value.empty?
155
+ result = true
156
+ elsif annotation_category_ref == 'Enhancers' &&
157
+ !annotation_value.empty?
158
+ result = true
159
+ elsif annotation_category_ref == 'DENdbEnhancers' &&
160
+ !annotation_value.empty?
161
+ result = true
162
+ elsif annotation_category_ref == 'SuperEnhancers' &&
163
+ !annotation_value.empty?
164
+ result = true
165
+ end
166
+ return result
167
+ end
168
+
169
+ #MAIN
170
+ #----------
171
+
172
+ #RECUERDA: este programa hace analisis estadisticos y compara resultados para dos archivos dados.
173
+ #En nuestro caso, comparamos los datos dados por nuestro programa con los datos obtenidos experimentalmente.
174
+ #nuestros datos = ARGV[0], datos del experimento = ARGV[1]
175
+ #si no se especifica segundo argumento de entrada = se hace el análisis sobre el propio resultado del programa
176
+ fields_length = 5
177
+ fields_length = ARGV[2].to_i if !ARGV[2].nil?
178
+
179
+ snp_storage = load_snp_data(ARGV[0], fields_length)
180
+ if !ARGV[1].nil? && ARGV[1].downcase != 'false'
181
+ snp_storage_reference = load_snp_data(ARGV[1])
182
+ snp_percentage = snp_calculate_stats_with_reference(snp_storage, snp_storage_reference)
183
+ else
184
+ snp_percentage = snp_calculate_stats(snp_storage)
185
+ end
186
+ snp_percentage.each do |category_name, percentage|
187
+ puts "#{category_name}\t#{percentage}\t#{ARGV[3]}"
188
+ end
189
+
190
+ #El archivo de graficado aparecera donde se ejecute el script
191
+ # file_name = File.basename(ARGV[0], ".txt")
192
+ # graph_name = file_name + ".png"
193
+ # create_histogram(snp_percentage, graph_name)
data/database/deleteme ADDED
File without changes