anncrsnp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,226 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp'))
5
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'anncrsnp', 'parsers'))
6
+
7
+ require 'optparse'
8
+ require 'ucscparser'
9
+ require 'dataset'
10
+ require 'sqlite3'
11
+
12
+ options = {}
13
+ OptionParser.new do |opts|
14
+ opts.banner = "Usage: #{__FILE__} [options]"
15
+
16
+ options[:data] = nil
17
+ opts.on("-d", "--data_directory PATH", "Directory used to extract data") do |data|
18
+ options[:data] = data
19
+ end
20
+
21
+ options[:create_sql] = FALSE
22
+ opts.on("-s", "--create_sql", "Create SQL DB") do
23
+ options[:create_sql] = TRUE
24
+ end
25
+
26
+ options[:output_path] = "genomic_data.sqlite"
27
+ opts.on("-o", '--output_path PATH', 'Output path for DB') do |output_path|
28
+ options[:output_path] = output_path
29
+ end
30
+
31
+ options[:verbose] = nil
32
+ opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
33
+ options[:verbose] = v
34
+ end
35
+
36
+ end.parse!
37
+
38
+
39
+
40
+ all_data = {}
41
+ if File.exist?(options[:data])
42
+ Dir.glob(File.join(options[:data],'*.{txt,bed,csv}')).each do |file| # we get the path to each file on directory
43
+ current_file = File.basename(file)
44
+ ### Definitive sources
45
+ #If bin field from UCSC doesn't exist, put FALSE as input data to parseUCSCformat method
46
+ if current_file == "wgEncodeAwgDnaseMasterSites.bed"
47
+ header = [:score, :floatScore, :sourceCount, :sourceIds]
48
+ current_dataset = parseUCSCformat(file, header, FALSE)
49
+ current_dataset.numeric_filter(:sourceCount, 2)
50
+ current_dataset.drop_columns(header)
51
+ current_dataset.add_metadata(:classification, 'DNAseHS')
52
+ all_data['dnaseData'] = current_dataset
53
+ elsif current_file == "wgEncodeHaibMethyl450Ag04449SitesRep1.bed"
54
+ header = [:score, :strand, :thickStart, :thickEnd, :itemRgb]
55
+ current_dataset = parseUCSCformat(file, header, FALSE)
56
+ current_dataset.drop_columns(header)
57
+ current_dataset.add_metadata(:classification, 'Metilation_sites')
58
+ all_data['metilationData'] = current_dataset
59
+ elsif current_file == "snp144Common.txt" # current_file == "test.txt"
60
+ header = [:score, :strand, :refNCBI, :refUCSC, :observed, :molType, :class, :valid, :avHet, :avHetSE, :func, :locType, :weight, :exceptions, :submitterCount, :submitters, :alleleFreqCount, :alleles, :alleleNs, :alleleFreqs, :bitfields]
61
+ current_dataset = parseUCSCformat(file, header)
62
+ current_dataset.drop_columns([:score, :strand, :refNCBI, :refUCSC, :observed, :molType, :valid, :avHet, :avHetSE, :locType, :weight, :exceptions, :submitterCount, :submitters, :alleleFreqCount, :alleles, :alleleNs, :alleleFreqs, :bitfields])
63
+ current_dataset.add_metadata(:classification, 'SNP')
64
+ all_data['snpDbSnp'] = current_dataset
65
+ elsif current_file == "refGene.txt"
66
+ header = [:name, :strand, :cdsStart, :cdsEnd, :exonCount, :exonStarts, :exonEnds, :score, :cdsStartStat, :cdsEndStat, :exonFrames]
67
+ current_dataset = parseUCSCrefseqformat(file, header)
68
+ current_dataset.drop_columns(header)
69
+ current_dataset.add_metadata(:classification, 'gene')
70
+ all_data['gene'] = current_dataset
71
+ elsif current_file == "TFBSMasterSites.txt" #Must be generated with "masterfeatures.rb tfbs/files.txt antibody import_data/TFBSMasterSites.txt tfbs/"
72
+ header = []
73
+ current_dataset = parseUCSCformat(file, header, FALSE)
74
+ current_dataset.add_metadata(:classification, 'TFBS')
75
+ all_data['tfbs'] = current_dataset
76
+ elsif current_file == "HistoneModMasterSites.txt" #Must be generated with "masterfeatures.rb tfbs/files.txt antibody import_data/TFBSMasterSites.txt tfbs/"
77
+ header = []
78
+ current_dataset = parseUCSCformat(file, header, FALSE)
79
+ current_dataset.add_metadata(:classification, 'HistoneModification')
80
+ all_data['HistoneModification'] = current_dataset
81
+ elsif current_file == "46waycons.txt"
82
+ header = [:span, :count, :offset, :file, :lowerLimit, :dataRange, :validCount, :sumData, :sumSquares]
83
+ current_dataset = parseUCSCformat(file, header)
84
+ current_dataset.drop_columns(header)
85
+ current_dataset.add_metadata(:classification, 'ConservedRegions')
86
+ all_data['ConservedRegions'] = current_dataset
87
+ elsif current_file == "enhancer_tss_associations.bed"
88
+ header = [:score, :strand, :enh_start, :enh_stop, :array, :index, :val1, :val2]
89
+ current_dataset = parseUCSCformat(file, header, FALSE)
90
+ current_dataset.drop_columns(header)
91
+ current_dataset.add_metadata(:classification, 'Enhancers')
92
+ all_data['Enhancers'] = current_dataset
93
+ elsif current_file == "enhancers.csv"
94
+ header = [:cell_line, :index1, :index2, :index3, :index4, :index5, :index6, :index7]
95
+ current_dataset = parseDENdbCSVformat(file, header)
96
+ current_dataset.drop_columns(header)
97
+ current_dataset.add_metadata(:classification, 'DENdbEnhancers')
98
+ all_data['DENdbEnhancers'] = current_dataset
99
+ elsif current_file == "all_hg19_bed.bed"
100
+ header = [:counter]
101
+ current_dataset = parseUCSCformat(file, header, FALSE)
102
+ current_dataset.drop_columns(header)
103
+ current_dataset.add_metadata(:classification, 'SuperEnhancers')
104
+ all_data['SuperEnhancers'] = current_dataset
105
+ end
106
+ end
107
+ end
108
+
109
+ if options[:create_sql]
110
+ commands = []
111
+ if !File.exists?(options[:output_path])
112
+ commands << "CREATE TABLE GenomicRange(
113
+ bin,
114
+ chr,
115
+ start,
116
+ end,
117
+ type,
118
+ name,
119
+ AnnotationId
120
+ )"
121
+ commands << "CREATE TABLE Annotation(
122
+ value,
123
+ AnnotationTypeId
124
+ )"
125
+ commands << "CREATE TABLE AnnotationType(
126
+ type
127
+ )"
128
+ #File.delete(options[:output_path])
129
+ end
130
+
131
+ DB = SQLite3::Database.new( options[:output_path] )
132
+ commands.each do |cmd|
133
+ DB.execute(cmd)
134
+ end
135
+ # Import data process speed up configuration
136
+ DB.execute("PRAGMA synchronous = OFF;")
137
+ DB.execute("PRAGMA journal_mode = MEMORY;")
138
+
139
+ # Creating memory indexes for incremental updates
140
+ annotation_type_index = DB.execute("SELECT rowid, * FROM AnnotationType").group_by {|r| r[1]}
141
+ annotation_index = DB.execute("SELECT rowid, * FROM Annotation").group_by {|r| r[1]}
142
+
143
+ all_data.each do |class_data, dataset|
144
+ puts "#{class_data} import started"
145
+ # Save and create AnnotationType data
146
+ #------------------------------------------------------------------
147
+ header = dataset.get_metadata(:header)
148
+ annotation_type = header.map{|h| h.to_s}
149
+ annotation_type.shift(4)
150
+ if !commands.empty?
151
+ records = annotation_type
152
+ else
153
+ records = annotation_type.select{|at| annotation_type_index[at].first.nil?}
154
+ end
155
+
156
+ DB.transaction do |db|
157
+ db.prepare("INSERT INTO AnnotationType(type) VALUES(?)") do |smnt| # Precompile query for speed up process
158
+ records.each do |rec|
159
+ smnt.execute(rec)
160
+ end
161
+ end
162
+ end
163
+ annotation_type_index = DB.execute("SELECT rowid, * FROM AnnotationType").group_by {|r| r[1]} if !records.empty?
164
+
165
+ # Save and create AnnotationType data
166
+ #------------------------------------------------------------------
167
+ annotations = {}
168
+ annotation_type.each do |at|
169
+ annotations[at] = {}
170
+ end
171
+ if dataset.first.length > 4
172
+ dataset.each_record do |record|
173
+ record[5..record.length - 1].each_with_index do |annotation, i|
174
+ annotations[annotation_type[i]][annotation] = nil
175
+ end
176
+ end
177
+ end
178
+
179
+ records = []
180
+ annotations.each do |annotation_type, values|
181
+ annotation_type_id = annotation_type_index[annotation_type].first.first
182
+ if !commands.empty?
183
+ records = records.concat(values.keys.map{|v| [v, annotation_type_id]})
184
+ else
185
+ records = records.concat(values.keys.select{|v| annotation_index[v].nil? }.map{|v| [v, annotation_type_id]})
186
+ end
187
+ end
188
+
189
+ DB.transaction do |db|
190
+ db.prepare("INSERT INTO Annotation(value, AnnotationTypeId) VALUES(?, ?)") do |smnt|
191
+ records.each do |rec|
192
+ smnt.execute(rec[0], rec[1])
193
+ end
194
+ end
195
+ end
196
+ annotation_index = DB.execute("SELECT rowid, * FROM Annotation").group_by {|r| r[1]} if !records.empty?
197
+
198
+ # Save and create GenomicRange data
199
+ #------------------------------------------------------------------
200
+ DB.transaction do |db|
201
+ db.prepare("INSERT INTO GenomicRange(bin, chr, start, end, type, name, AnnotationId) VALUES(?, ?, ?, ?, ?, ?, ?)") do |smnt|
202
+ dataset.each_record do |record|
203
+ region_data = record.shift(4)
204
+ annotation_ids = []
205
+ record.each do |annotation|
206
+ id = annotation_index[annotation]
207
+ annotation_ids << id.first.first if !id.nil?
208
+ end
209
+ smnt.execute(
210
+ region_data[1]/10000,
211
+ region_data[0],
212
+ region_data[1],
213
+ region_data[2],
214
+ class_data,
215
+ region_data[3],
216
+ annotation_ids.join(',')
217
+ )
218
+ end
219
+ end
220
+ end
221
+ puts "#{class_data} import finished"
222
+ end
223
+ end
224
+ DB.execute("CREATE INDEX name_index ON GenomicRange (name)")
225
+ DB.execute("CREATE INDEX bin_index ON GenomicRange (bin)")
226
+ DB.close
@@ -0,0 +1,188 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'benchmark'
4
+
5
+ #Description
6
+ #--------------
7
+ #Tool to unify data from ENCODE
8
+
9
+ #Methods
10
+ #--------------
11
+
12
+ def load_metadata_file(file)
13
+ name_storage = {}
14
+ file_text = File.open(file)
15
+ file_text.each do |line|
16
+ line.chomp!
17
+ fields = line.split("\t")
18
+ features_storage = {} #metadata hash
19
+ features = fields[1].split("; ")
20
+ features.each do |feature|
21
+ met_name, metadata = feature.split("=")
22
+ features_storage[met_name] = metadata
23
+ end
24
+ name_storage[fields[0].gsub('.gz', '')] = features_storage
25
+ end
26
+ file_text.close
27
+ return name_storage
28
+ end
29
+
30
+ def element_grouper(grouping_element, name_storage) #erase redundance by antibody by default
31
+ package_grouping = {}
32
+ name_storage.each do |file_name, metadata|
33
+ selected_element = metadata[grouping_element]
34
+ if !selected_element.nil? #verify is there is an element in this field (avoid mistakes).
35
+ if !package_grouping[selected_element].nil?
36
+ package_grouping[selected_element] << file_name
37
+ else
38
+ package_grouping[selected_element] = [file_name]
39
+ end
40
+ end
41
+ end
42
+ return package_grouping
43
+ end
44
+
45
+ def load_files_to_compare(file_name)
46
+ genomic_regions = {}
47
+ File.open(file_name).each do |line|
48
+ line.chomp!
49
+ genomic_info = line.split("\t")
50
+ chr = genomic_info.shift
51
+ genomic_info = genomic_info[0..1].map{|c| c.to_i}
52
+ bin = genomic_info.first/10000
53
+ query = genomic_regions[chr]
54
+ if query.nil?
55
+ genomic_regions[chr] = {bin => [genomic_info]}
56
+ else
57
+ query_bin = query[bin]
58
+ if query_bin.nil?
59
+ query[bin] = [genomic_info]
60
+ else
61
+ query_bin << genomic_info
62
+ end
63
+ end
64
+ end
65
+ return genomic_regions
66
+ end
67
+
68
+ def compare_genomics_regions(main_genomic_regions, genomic_regions_to_compare, thresold_overlap)
69
+ selected_genomic_regions = {}
70
+ genomic_regions_to_compare.each do |chr_reg, genomic_region_to_compare|
71
+ genomic_region_to_compare.each do |bin, regs|
72
+ batch_match = false
73
+ query_main = main_genomic_regions[chr_reg] # main_genomic_regions has chr_reg?
74
+ if !query_main.nil? # main_genomic_regions has chr_reg!
75
+ query_main_bin = query_main[bin]
76
+ if !query_main_bin.nil?
77
+ batch_match = true
78
+ regs.each do |reg|
79
+ match = false
80
+ query_main_bin.each do |main|
81
+ match = compare_genomics_regions_coords(main, reg, thresold_overlap)
82
+ break if match
83
+ end
84
+ save_reg(selected_genomic_regions, chr_reg, bin, reg) if !match
85
+ end
86
+ end
87
+ end
88
+ if !batch_match
89
+ regs.each do |reg|
90
+ save_reg(selected_genomic_regions, chr_reg, bin, reg)
91
+ end
92
+ end
93
+ end
94
+ end
95
+ return selected_genomic_regions
96
+ end
97
+
98
+ def save_reg(selected_genomic_regions, chr_reg, bin, reg)
99
+ query_chr = selected_genomic_regions[chr_reg]
100
+ if !query_chr.nil?
101
+ query_bin = query_chr[bin]
102
+ if query_bin.nil?
103
+ query_chr[bin] = [reg]
104
+ else
105
+ query_bin << reg
106
+ end
107
+ else
108
+ selected_genomic_regions[chr_reg] = {bin => [reg]}
109
+ end
110
+ end
111
+
112
+ def save_reg_concat(selected_genomic_regions, chr_reg, bin, reg)
113
+ query_chr = selected_genomic_regions[chr_reg]
114
+ if !query_chr.nil?
115
+ query_bin = query_chr[bin]
116
+ if query_bin.nil?
117
+ query_chr[bin] = reg
118
+ else
119
+ query_bin.concat(reg)
120
+ end
121
+ else
122
+ selected_genomic_regions[chr_reg] = {bin => reg}
123
+ end
124
+ end
125
+
126
+ def compare_genomics_regions_coords(main_genomic_region, genomic_region_to_compare, thresold_overlap)
127
+ match = false
128
+
129
+ main_beg, main_end = main_genomic_region
130
+ reg_beg, reg_end = genomic_region_to_compare
131
+ size_main_genomic_region = main_end - main_beg
132
+ size_genomic_region_to_compare = reg_end - reg_beg
133
+
134
+ absolute_overlap = 0
135
+ if reg_beg >= main_beg && reg_beg <= main_end
136
+ absolute_overlap = main_end - reg_beg
137
+ elsif reg_end >= main_beg && reg_end <= main_end
138
+ absolute_overlap = reg_end - main_beg
139
+ elsif reg_beg <= main_beg && reg_end >= main_end
140
+ absolute_overlap = size_main_genomic_region
141
+ elsif reg_beg >= main_beg && reg_end <= main_end
142
+ absolute_overlap = size_genomic_region_to_compare
143
+ end
144
+ main_relative_overlap = absolute_overlap / size_main_genomic_region * 1.0
145
+ compare_relative_overlap = absolute_overlap / size_genomic_region_to_compare * 1.0
146
+ if main_relative_overlap >= thresold_overlap || compare_relative_overlap >= thresold_overlap
147
+ match = true
148
+ end
149
+
150
+ return match
151
+ end
152
+
153
+ #Main
154
+ #--------------
155
+ file_input_folder = ARGV[3]
156
+ name_storage = load_metadata_file(ARGV[0])
157
+ package_grouping = element_grouper(ARGV[1], name_storage)
158
+ file_writer = File.open(ARGV[2],'w')
159
+
160
+ package_grouping.each do |grouping_element, file_names|
161
+ #abrir el primer archivo del paquete
162
+ genomic_regions_references = load_files_to_compare(File.join(file_input_folder, file_names.shift))
163
+ file_names.each do |f_name|
164
+ file2compare = load_files_to_compare(File.join(file_input_folder, f_name))
165
+ selected_genomic_regions = selected_genomic_regions = compare_genomics_regions(genomic_regions_references, file2compare, 0.8)
166
+ #puts Benchmark.measure{selected_genomic_regions = compare_genomics_regions(genomic_regions_references, file2compare, 0.8)}
167
+ selected_genomic_regions.each do |chr, ge_regs|
168
+ ge_regs.each do |bin, reg|
169
+ save_reg_concat(genomic_regions_references, chr, bin, reg)
170
+ end
171
+ end
172
+ end
173
+ genomic_regions_references.each do |chr, ge_regs|
174
+ ge_regs.each do |bin, reg|
175
+ reg.each do |gr|
176
+ file_writer.puts "#{chr}\t#{gr.join("\t")}\t#{grouping_element}"
177
+ end
178
+ end
179
+ end
180
+ puts "Wrote #{grouping_element}"
181
+ end
182
+
183
+ file_writer.close
184
+
185
+
186
+
187
+
188
+
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
data/bin/statistics.rb ADDED
@@ -0,0 +1,193 @@
1
+ #! /usr/bin/env ruby
2
+ require 'scbi_plot'
3
+ #METHODS
4
+ #----------
5
+ def load_snp_data(input_file, fields_length)
6
+ snp_storage = {}
7
+ index = {}
8
+ counter = 0
9
+ File.open(input_file).each do |line|
10
+ line.chomp!
11
+ fields = line.split("\t")
12
+ snp_fields = fields.shift(fields_length) #in fields you store the genomic factors (histone modif, tfbs...)
13
+ if counter == 0
14
+ fields.each_with_index do |category, position|
15
+ index[category] = position
16
+ end
17
+ else
18
+ categories = {
19
+ "HistoneModification" => [],
20
+ "tfbs" => [],
21
+ "dnaseData" => [],
22
+ "metilationData" => [],
23
+ "ConservedRegions" => [],
24
+ "Enhancers" => [],
25
+ "DENdbEnhancers" => [],
26
+ "SuperEnhancers" => []
27
+ }
28
+ categories.each do |category_name, category_value|
29
+ column_position = index[category_name]
30
+ if !column_position.nil?
31
+ snp_category_values = fields[column_position]
32
+ category_value.concat(snp_category_values.split(',')) if snp_category_values != '-'
33
+ end
34
+ end
35
+ snp_storage[snp_fields[0]] = categories
36
+
37
+ #bloque de código para tratar la información de los snps
38
+ end
39
+ counter += 1
40
+ end
41
+ return snp_storage
42
+ end
43
+
44
+ def snp_calculate_stats(snp_storage)
45
+ snp_percentage = {
46
+ "HistoneModification" => 0,
47
+ "tfbs" => 0,
48
+ "dnaseData" => 0,
49
+ "metilationData" => 0,
50
+ "ConservedRegions" => 0,
51
+ "Enhancers" => 0,
52
+ "DENdbEnhancers" => 0,
53
+ "SuperEnhancers" => 0
54
+ }
55
+ snp_storage.each do |snp_name, annotations|
56
+ annotations.each do |annotation_category, annotation_value|
57
+ if !annotation_value.empty?
58
+ snp_percentage[annotation_category] += 1 #possible error point!
59
+ end
60
+ end
61
+ end
62
+ total_snps = snp_storage.length.to_f
63
+ snp_percentage.each do |annotation_category, true_positive_number|
64
+ percentage = true_positive_number / total_snps * 100
65
+ snp_percentage[annotation_category] = percentage
66
+ end
67
+ return snp_percentage
68
+ end
69
+
70
+ def create_histogram(snp_percentage, name)
71
+ # create Histogram
72
+ p=ScbiPlot::Histogram.new(name,'SNPs genomic region annotations')
73
+
74
+ # add x axis data
75
+
76
+ p.add_x(snp_percentage.keys)
77
+ puts snp_percentage.keys.inspect
78
+ # add y axis data
79
+ p.add_y(snp_percentage.values)
80
+ puts snp_percentage.values.inspect
81
+ # generate graph
82
+ p.do_graph
83
+ end
84
+
85
+
86
+ def snp_calculate_stats_with_reference(snp_storage, snp_storage_reference)
87
+ snp_percentage = {
88
+ "HistoneModification" => 0,
89
+ "tfbs" => 0,
90
+ "dnaseData" => 0,
91
+ "metilationData" => 0,
92
+ "ConservedRegions" => 0,
93
+ "Enhancers" => 0,
94
+ "DENdbEnhancers" => 0,
95
+ "SuperEnhancers" => 0
96
+ }
97
+
98
+ snp_storage_reference.each do |snp_name_ref, annotations_ref|
99
+ query = snp_storage[snp_name_ref]
100
+ if !query.nil?
101
+ annotations_ref.each do |annotation_category_ref, annotation_value_ref|
102
+ annotation_value = query[annotation_category_ref]
103
+ if annotation_comparison(annotation_value_ref, annotation_value, annotation_category_ref)
104
+ snp_percentage[annotation_category_ref] += 1
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ total_snps = snp_storage_reference.length.to_f
111
+ snp_percentage.each do |annotation_category, true_positive_number|
112
+ percentage = true_positive_number / total_snps * 100
113
+ snp_percentage[annotation_category] = percentage
114
+ end
115
+ return snp_percentage
116
+ end
117
+
118
+ def annotation_comparison(annotation_value_ref, annotation_value, annotation_category_ref)
119
+ result = false
120
+ annotation_value_ref.uniq!
121
+ annotation_value.uniq!
122
+ #puts "#{annotation_value_ref.inspect} => #{annotation_value}" if annotation_category_ref == 'dnaseData'
123
+ if annotation_value_ref.sort == annotation_value.sort
124
+ result = true
125
+ elsif annotation_category_ref == 'dnaseData' &&
126
+ !annotation_value.empty?
127
+ result = true
128
+ elsif annotation_category_ref == 'tfbs'
129
+ if !(annotation_value_ref & annotation_value).empty? || annotation_value.length >= 5
130
+ result= true
131
+ end
132
+ elsif annotation_category_ref == 'metilationData' &&
133
+ !annotation_value.empty?
134
+ result = true
135
+ elsif annotation_category_ref == 'HistoneModification'
136
+ annotation_value_ref = annotation_value_ref.map{|an|
137
+ if /(H\d+K\d+)\w*/ =~ an
138
+ $1
139
+ else
140
+ an
141
+ end
142
+ }.uniq
143
+ annotation_value = annotation_value.map{|an|
144
+ if /(H\d+K\d+)\w*/ =~ an
145
+ $1
146
+ else
147
+ an
148
+ end
149
+ }.uniq
150
+ if !(annotation_value_ref & annotation_value).empty? || annotation_value.length >= 5
151
+ result= true
152
+ end
153
+ elsif annotation_category_ref == 'ConservedRegions' &&
154
+ !annotation_value.empty?
155
+ result = true
156
+ elsif annotation_category_ref == 'Enhancers' &&
157
+ !annotation_value.empty?
158
+ result = true
159
+ elsif annotation_category_ref == 'DENdbEnhancers' &&
160
+ !annotation_value.empty?
161
+ result = true
162
+ elsif annotation_category_ref == 'SuperEnhancers' &&
163
+ !annotation_value.empty?
164
+ result = true
165
+ end
166
+ return result
167
+ end
168
+
169
+ #MAIN
170
+ #----------
171
+
172
+ #RECUERDA: este programa hace analisis estadisticos y compara resultados para dos archivos dados.
173
+ #En nuestro caso, comparamos los datos dados por nuestro programa con los datos obtenidos experimentalmente.
174
+ #nuestros datos = ARGV[0], datos del experimento = ARGV[1]
175
+ #si no se especifica segundo argumento de entrada = se hace el análisis sobre el propio resultado del programa
176
+ fields_length = 5
177
+ fields_length = ARGV[2].to_i if !ARGV[2].nil?
178
+
179
+ snp_storage = load_snp_data(ARGV[0], fields_length)
180
+ if !ARGV[1].nil? && ARGV[1].downcase != 'false'
181
+ snp_storage_reference = load_snp_data(ARGV[1])
182
+ snp_percentage = snp_calculate_stats_with_reference(snp_storage, snp_storage_reference)
183
+ else
184
+ snp_percentage = snp_calculate_stats(snp_storage)
185
+ end
186
+ snp_percentage.each do |category_name, percentage|
187
+ puts "#{category_name}\t#{percentage}\t#{ARGV[3]}"
188
+ end
189
+
190
+ #El archivo de graficado aparecera donde se ejecute el script
191
+ # file_name = File.basename(ARGV[0], ".txt")
192
+ # graph_name = file_name + ".png"
193
+ # create_histogram(snp_percentage, graph_name)
data/database/deleteme ADDED
File without changes