pets 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,307 @@
1
+ require 'json'
2
+ require 'semtools'
3
+
4
+ class Cohort
5
+ @@ont = {}
6
+ class << self # https://www.ruby-forum.com/t/attr-accessor-for-class-variable/136693
7
+ attr_accessor :act_ont # Which ontology use for ont related operations
8
+ end
9
+
10
+ attr_accessor :profiles
11
+
12
+ def self.get_ontology(ont_id)
13
+ return @@ont[ont_id]
14
+ end
15
+
16
+ def self.load_ontology(ont_name, ont_file, excluded_terms_file = nil)
17
+ ont = nil
18
+ if !ont_file.include?('.json')
19
+ if !excluded_terms_file.nil?
20
+ ont = Ontology.new(file: ont_file, load_file: true, removable_terms: read_excluded_ont_file(excluded_terms_file))
21
+ else
22
+ ont = Ontology.new(file: ont_file, load_file: true)
23
+ end
24
+ else
25
+ ont = Ontology.new
26
+ ont.read(ont_file)
27
+ if !excluded_terms_file.nil?
28
+ ont.add_removable_terms(read_excluded_ont_file(excluded_terms_file))
29
+ ont.remove_removable()
30
+ ont.build_index()
31
+ end
32
+ end
33
+ @@ont[ont_name] = ont
34
+ end
35
+
36
+ def self.read_excluded_ont_file(file)
37
+ excluded_hpo = []
38
+ File.open(file).each do |line|
39
+ excluded_hpo << line.chomp
40
+ end
41
+ return excluded_hpo
42
+ end
43
+
44
+ def initialize()
45
+ @profiles = {}
46
+ @vars = {}
47
+ @var_idx = Genomic_Feature.new([])
48
+ end
49
+
50
+ def add_record(rec) #[id, [profile], [[chr1, start1, stop1],[chr1, start1, stop1]]]
51
+ id, profile, vars = rec
52
+ @profiles[id] = profile.map{|t| t.to_sym} if !profile.nil?
53
+ add_gen_feat(id, vars) if !vars.nil?
54
+ end
55
+
56
+ def delete(id)
57
+ @profiles.delete(id)
58
+ @vars.delete(id)
59
+ end
60
+
61
+ def select_by_profile!
62
+ @profiles.select!{|id, profile| yield(id, profile)}
63
+ current_ids = @profiles.keys
64
+ @vars.select!{|id, var| current_ids.include?(id)}
65
+ end
66
+
67
+ def select_by_var!
68
+ @vars.select!{|id, profile| yield(id, profile)}
69
+ current_ids = @vars.keys
70
+ @profiles.select!{|id, var| current_ids.include?(id)}
71
+ end
72
+
73
+ def filter_by_term_number(n_terms)
74
+ select_by_profile!{|id, profile| profile.length >= n_terms}
75
+ end
76
+
77
+ def remove_incomplete_records # remove resc that lacks of vars or phenotypes
78
+ ids_with_terms = @profiles.keys
79
+ ids_with_vars = []
80
+ @vars.each{|id, regs| ids_with_vars << id if regs.length > 0}
81
+ full_ids = ids_with_vars & ids_with_terms
82
+ @profiles.select!{|id, prof| full_ids.include?(id)}
83
+ @vars.select!{|id, var| full_ids.include?(id)}
84
+ end
85
+
86
+ def add_gen_feat(id, feat_array) # [[chr1, start1, stop1],[chr1, start1, stop1]]
87
+ @vars[id] = Genomic_Feature.new(feat_array)
88
+ end
89
+
90
+ def get_profile(id)
91
+ return @profiles[id]
92
+ end
93
+
94
+ def get_vars(id)
95
+ return @vars[id]
96
+ end
97
+
98
+ def each_profile()
99
+ @profiles.each do |id, profile|
100
+ yield(id, profile)
101
+ end
102
+ end
103
+
104
+ def each_var()
105
+ @vars.each do |id, var_info|
106
+ yield(id, var_info)
107
+ end
108
+ end
109
+
110
+ def get_general_profile(thr=0) # TODO move funcionality to semtools
111
+ term_count = Hash.new(0)
112
+ each_profile do |id, prof|
113
+ prof.each do |term|
114
+ general_profile[prof] += 1
115
+ end
116
+ end
117
+ records = @profiles.length
118
+ general_profile = []
119
+ term_count.each do |term, count|
120
+ general_profile << term if count.fdiv(records) >= thr
121
+ end
122
+ ont = @@ont[Cohort.act_ont]
123
+ return ont.clean_profile_hard(general_profile)
124
+ end
125
+
126
+ def check(hard=false) # OLD format_patient_data
127
+ ont = @@ont[Cohort.act_ont]
128
+ rejected_terms = []
129
+ rejected_recs = []
130
+ @profiles.each do |id, terms|
131
+ if hard
132
+ terms = ont.clean_profile_hard(terms)
133
+ rejec_terms = []
134
+ else
135
+ terms, rejec_terms = ont.check_ids(terms)
136
+ end
137
+ if !rejec_terms.empty?
138
+ STDERR.puts "WARNING: record #{id} has the unknown CODES '#{rejec_terms.join(',')}'. Codes removed."
139
+ rejected_terms.concat(rejec_terms)
140
+ end
141
+ if terms.empty?
142
+ rejected_recs << id
143
+ else
144
+ @profiles[id] = terms
145
+ end
146
+ end
147
+ @profiles.select!{|id, record| !rejected_recs.include?(id)}
148
+ @vars.select!{|id, record| !rejected_recs.include?(id)}
149
+ return rejected_terms.uniq, rejected_recs
150
+ end
151
+
152
+ def link2ont(ont_id)
153
+ @@ont[ont_id].load_profiles(@profiles)
154
+ end
155
+
156
+ def get_profile_redundancy
157
+ ont = @@ont[Cohort.act_ont]
158
+ profile_sizes, parental_terms_per_profile = ont.get_profile_redundancy
159
+ return profile_sizes, parental_terms_per_profile
160
+ end
161
+
162
+ def get_profiles_terms_frequency(options={})
163
+ ont = @@ont[Cohort.act_ont]
164
+ term_stats = ont.get_profiles_terms_frequency(**options) #https://www.ruby-lang.org/en/news/2019/12/12/separation-of-positional-and-keyword-arguments-in-ruby-3-0/
165
+ return term_stats
166
+ end
167
+
168
+ def compute_term_list_and_childs()
169
+ ont = @@ont[Cohort.act_ont]
170
+ suggested_childs, term_with_childs_ratio = ont.compute_term_list_and_childs()
171
+ end
172
+
173
+ def get_profile_ontology_distribution_tables()
174
+ ont = @@ont[Cohort.act_ont]
175
+ ontology_levels, distribution_percentage = ont.get_profile_ontology_distribution_tables
176
+ ontology_levels.unshift(["level", "ontology", "cohort"])
177
+ distribution_percentage.unshift(["level", "ontology", "weighted cohort", "uniq terms cohort"])
178
+ return ontology_levels, distribution_percentage
179
+ end
180
+
181
+ def get_ic_analysis()
182
+ ont = @@ont[Cohort.act_ont]
183
+ onto_ic, freq_ic = ont.get_observed_ics_by_onto_and_freq # IC for TERMS
184
+ onto_ic_profile, freq_ic_profile = ont.get_profiles_resnik_dual_ICs # IC for PROFILES
185
+ return onto_ic, freq_ic, onto_ic_profile, freq_ic_profile
186
+ end
187
+
188
+ def get_profiles_mean_size
189
+ ont = @@ont[Cohort.act_ont]
190
+ profile_mean_size = ont.get_profiles_mean_size
191
+ return profile_mean_size
192
+ end
193
+
194
+ def get_profile_length_at_percentile(perc=50, increasing_sort: false)
195
+ ont = @@ont[Cohort.act_ont]
196
+ length_percent = ont.get_profile_length_at_percentile(perc=perc, increasing_sort: increasing_sort)
197
+ return length_percent
198
+ end
199
+
200
+ def get_dataset_specifity_index(type)
201
+ ont = @@ont[Cohort.act_ont]
202
+ dsi = ont.get_dataset_specifity_index(type)
203
+ return dsi
204
+ end
205
+
206
+ def compare_profiles(options={})
207
+ ont = @@ont[Cohort.act_ont]
208
+ similarities = ont.compare_profiles(**options)
209
+ return similarities
210
+ end
211
+
212
+ def index_vars # equivalent to process_patient_data
213
+ each_var do |id, var|
214
+ @var_idx.merge(var, id)
215
+ end
216
+ end
217
+
218
+ def get_vars_sizes(summary=false)
219
+ if summary
220
+ return @var_idx.get_summary_sizes
221
+ else
222
+ return @var_idx.get_sizes
223
+ end
224
+ end
225
+
226
+ def generate_cluster_regions(meth, tag, lim)
227
+ @var_idx.generate_cluster_regions(meth, tag, lim)
228
+ end
229
+
230
+ def save(output_file, mode = :default, translate = false)
231
+ File.open(output_file, 'w') do |f|
232
+ f.puts "id\tchr\tstart\tstop\tterms" if mode == 'paco'
233
+ ont = @@ont[Cohort.act_ont]
234
+ @profiles.each do |id, terms|
235
+ terms, rejected = ont.translate_ids(terms) if translate
236
+ id_variants = @vars[id]
237
+ variants = []
238
+ if id_variants.nil? || id_variants.length == 0
239
+ variants << ['-', '-', '-']
240
+ else
241
+ id_variants.each do |chr, reg|
242
+ variants << [chr, reg[:start], reg[:stop]]
243
+ end
244
+ end
245
+ variants.each do |var|
246
+ if mode == :default
247
+ f.puts "#{id}\t#{terms.join('|')}\t#{var.join("\t")}"
248
+ elsif mode == :paco
249
+ f.puts "#{id}\t#{var.join("\t")}\t#{terms.join('|')}"
250
+ else
251
+ abort('Wrong save mode option, please try default or paco')
252
+ end
253
+ end
254
+ end
255
+ end
256
+ end
257
+
258
+ def export_phenopackets(output_folder, genome_assembly, vcf_index: nil)
259
+ ont = @@ont[Cohort.act_ont]
260
+ metaData = {
261
+ "createdBy" => "PETS",
262
+ "resources" => [{
263
+ "id" => "hp",
264
+ "name" => "human phenotype ontology",
265
+ "namespacePrefix" => "HP",
266
+ "url" => "http://purl.obolibrary.org/obo/hp.owl",
267
+ # "version" => "2018-03-08",
268
+ "iriPrefix" => "http://purl.obolibrary.org/obo/HP_"
269
+ }]
270
+ }
271
+
272
+ @profiles.each do |id, terms|
273
+ phenopacket = {metaData: metaData}
274
+ phenopacket[:subject] = {id: id}
275
+ phenotypicFeatures = []
276
+ terms.each do |term|
277
+ term_name = ont.translate_id(term)
278
+ phenotypicFeatures << {
279
+ type: { id: term, label: term_name},
280
+ classOfOnset: {"id" => "HP:0003577", "label" => "Congenital onset"}
281
+ }
282
+ end
283
+ phenopacket[:phenotypicFeatures] = phenotypicFeatures
284
+ if !vcf_index.nil? && vcf_index.include?(id)
285
+ htsFiles = []
286
+ htsFiles << {
287
+ "uri" => "file:/" + vcf_index[id],
288
+ "description" => id,
289
+ "htsFormat" => "VCF",
290
+ "genomeAssembly" => genome_assembly,
291
+ "individualToSampleIdentifiers" => { "patient1" => id }
292
+ }
293
+ phenopacket[:htsFiles] = htsFiles
294
+ end
295
+ File.open(File.join(output_folder, id.to_s + ".json"), "w") { |f| f.write JSON.pretty_generate(phenopacket) }
296
+ id_variants = @vars[id]
297
+ variants = []
298
+ if id_variants.nil? || id_variants.length == 0
299
+ variants << ['-', '-', '-']
300
+ else
301
+ id_variants.each do |chr, reg|
302
+ variants << [chr, reg[:start], reg[:stop]]
303
+ end
304
+ end
305
+ end
306
+ end
307
+ end
@@ -0,0 +1,7 @@
1
+ # Needs define ROOT_PATH constant in file requiring this file
2
+ REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
3
+ EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
4
+ EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code'))
5
+ HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
6
+ MONDO_FILE = File.join(EXTERNAL_DATA, 'mondo.obo')
7
+ IC_FILE = File.join(EXTERNAL_DATA, 'uniq_hpo_with_CI.txt')
@@ -13,80 +13,15 @@ def system_call(code_folder, script, args_string)
13
13
  end
14
14
  end
15
15
 
16
- def read_excluded_hpo_file(file)
17
- excluded_hpo = []
18
- File.open(file).each do |line|
19
- excluded_hpo << line.chomp
20
- end
21
- return excluded_hpo
22
- end
23
-
24
- #Common methods for predictors
25
- #Training file example = 9 131371492 131375954 HP:0010974 2.41161970596 9.3.A.5
26
- #1. Indexing by chr (region)
27
- def coor_overlap?(ref_start, ref_stop, start, stop)
28
- overlap = false
29
- if (stop > ref_start && stop <= ref_stop) ||
30
- (start >= ref_start && start < ref_stop) ||
31
- (start <= ref_start && stop >= ref_stop) ||
32
- (start > ref_start && stop < ref_stop)
33
- overlap = true
34
- end
35
- return overlap
36
- end
37
-
38
- def load_training_file4regions(training_file)
39
- training_set = {}
40
- posInfo = loadFile(training_file)
41
- posInfo.each do |info|
42
- chr = info.shift
43
- query = training_set[chr]
44
- if query.nil?
45
- training_set[chr] = [info]
46
- else
47
- query << info
48
- end
49
- end
50
- return training_set
51
- end
52
-
53
- #2. Indexing by hpo (code)
54
- #prepare training file for analysis using phenotype2region prediction
55
- def load_training_file4HPO(training_file, thresold=0)
56
- training_set = {}
57
- information = loadFile(training_file, thresold)
58
- information.each do |info|
59
- hpoCode = info.delete_at(4)
60
- query = training_set[hpoCode]
61
- if query.nil?
62
- training_set[hpoCode] = [info]
63
- else
64
- query << info
65
- end
66
- end
67
- # STDERR.puts training_set.keys.inspect
68
- return training_set
69
- end
70
-
71
-
72
- #3. Load training info file:
73
- #Chr;Start;Stop;HPO;Association;node
74
- def loadFile(file, thresold=0)
75
- information = []
76
- File.open(file).each do |line|
77
- line.chomp!
78
- allInfo = line.split("\t")
79
- associationValue = allInfo[4].to_f
80
- if associationValue >= thresold
81
- chr = allInfo[0]
82
- startPos = allInfo[1].to_i
83
- stopPos = allInfo[2].to_i
84
- hpoCode = allInfo[3]
85
- nodeID = allInfo[5]
86
- information << [chr, startPos, stopPos, nodeID, hpoCode, associationValue]
87
- end
16
+ def add_record(hash, key, record, uniq=false)
17
+ query = hash[key]
18
+ if query.nil?
19
+ hash[key] = [record]
20
+ elsif !uniq # We not take care by repeated entries
21
+ query << record
22
+ elsif !query.include?(record) # We want uniq entries
23
+ query << record
88
24
  end
89
- return information
90
25
  end
91
26
 
92
27
 
@@ -111,105 +46,6 @@ def compute_IC_values(patient_data, total_patients)
111
46
  return patients_per_hpo
112
47
  end
113
48
 
114
- def load_hpo_ci_values(information_coefficient_file)
115
- hpos_ci_values = {}
116
- File.open(information_coefficient_file).each do |line|
117
- line.chomp!
118
- hpo_code, ci = line.split("\t")
119
- hpos_ci_values[hpo_code.to_sym] = ci.to_f
120
- end
121
- return hpos_ci_values
122
- end
123
-
124
- def load_clustered_patients(file)
125
- clusters = {}
126
- File.open(file).each do |line|
127
- line.chomp!
128
- pat_id, cluster_id = line.split("\t")
129
- query = clusters[cluster_id]
130
- if query.nil?
131
- clusters[cluster_id] = [pat_id]
132
- else
133
- query << pat_id
134
- end
135
- end
136
- return clusters
137
- end
138
-
139
- def load_gene_data(gene_data_path)
140
- gene_list = {} #geneID => attr
141
- gene_location = {} # chr => gene
142
- infile = open(gene_data_path)
143
- gz = Zlib::GzipReader.new(infile)
144
- current_chr = nil
145
- genes = []
146
- gz.each_line do |line|
147
- line.chomp!
148
- next if line =~ /^#/
149
- fields = line.split("\t")
150
- if fields[8].include?('genome=chromosome')
151
- chr = fields[8].split(';')[1].split('=').last
152
- gene_location[current_chr] = genes
153
- genes = []
154
- current_chr = chr
155
- elsif fields[2] == 'gene'
156
- attributes = {}
157
- fields[8].split(';').each do |pair|
158
- key, value = pair.split('=')
159
- attributes[key] = value
160
- end
161
- geneName = nil
162
- geneName = attributes['gene'] if !attributes['gene'].nil?
163
- geneSyns = []
164
- geneSyns = attributes['gene_synonym'].split(',') if !attributes['gene_synonym'].nil?
165
- description = attributes['description']
166
- description = URI.unescape(description) if !description.nil?
167
- attributes['Dbxref'] =~ /GeneID:(\d+)/
168
- gene_list[$1] = [geneName, geneSyns, description]
169
- genes << [$1, fields[3].to_i, fields[4].to_i]
170
- end
171
- end
172
- gene_location[current_chr] = genes
173
- return gene_list, gene_location
174
- end
175
-
176
- def parse_kegg_data(query_genes)
177
- kegg_data = {} #gene => attb
178
- while !query_genes.empty?
179
- gene_set = query_genes.shift(10)
180
- url = "http://rest.kegg.jp/get/#{gene_set.map{|qg| "hsa:#{qg}"}.join('+')}"
181
- uri = URI(url)
182
- response = Net::HTTP.get(uri)
183
- geneID = nil
184
- gene_names = []
185
- definition = nil
186
- pathways = []
187
- parsing_pathway_field = false
188
- response.squeeze(' ').each_line do |line|
189
- line.chomp!
190
- if line =~ /^ENTRY/
191
- geneID = line.split(' ')[1]
192
- elsif line =~ /^NAME/
193
- gene_names = line.split(' ', 2).last.split(', ')
194
- elsif line =~ /^DEFINITION/
195
- definition = line.split(' ', 2)[1]
196
- elsif line =~ /^PATHWAY/
197
- pathways << line.split(' ', 3)[1..2]
198
- parsing_pathway_field = true
199
- elsif line =~ /^BRITE/ || line =~ /^POSITION/ || line =~ /^DISEASE/ || line =~ /^MODULE/ || line =~ /^DRUG_TARGET/ || line =~ /^NETWORK/
200
- parsing_pathway_field = false
201
- elsif parsing_pathway_field
202
- pathways << line.strip.split(' ', 2)
203
- elsif line == '///'
204
- parsing_pathway_field = false
205
- kegg_data[geneID] = [gene_names, definition, pathways]
206
- pathways = []
207
- gene_names = []
208
- end
209
- end
210
- end
211
- return kegg_data
212
- end
213
49
 
214
50
  def parse_kegg_from_biosystems(biosystems_gene_path, biosystems_info_path)
215
51
  kegg_data = {}
@@ -270,21 +106,6 @@ def merge_genes_with_kegg_data(gene_list, kegg_data)
270
106
  return merged_data
271
107
  end
272
108
 
273
- def write_compressed_plain_file(data, path)
274
- File.open(path, 'w') do |f|
275
- gz = Zlib::GzipWriter.new(f)
276
- gz.write data.to_json
277
- gz.close
278
- end
279
- end
280
-
281
- def read_compressed_json(path)
282
- infile = open(path)
283
- gz = Zlib::GzipReader.new(infile)
284
- object = JSON.parse(gz.read)
285
- return object
286
- end
287
-
288
109
  def compute_pathway_enrichment(genes_clusters, genes_with_kegg)
289
110
  pathways_genes_in_predictions = {}
290
111
  genes_in_predictions = []
@@ -358,138 +179,8 @@ def binom(n,k)
358
179
  end
359
180
  end
360
181
 
361
- def get_reference(genomic_ranges)
362
- #genomic_ranges = [patientID, mut_start, mut_stop]
363
- reference = []
364
- reference.concat(genomic_ranges.map{|gr| gr[1]})# get start
365
- reference.concat(genomic_ranges.map{|gr| gr[2]})# get stop
366
- reference.uniq!
367
- reference.sort!
368
- #Define overlap range
369
- final_reference = []
370
- reference.each_with_index do |coord,i|
371
- next_coord = reference[i + 1]
372
- final_reference << [coord, next_coord] if !next_coord.nil?
373
- end
374
- return final_reference
375
- end
376
-
377
- def overlap_patients(genomic_ranges, reference)
378
- overlaps = []
379
- reference.each do |start, stop|
380
- patients = []
381
- genomic_ranges.each do |pt_id, pt_start, pt_stop|
382
- if (start <= pt_start && stop >= pt_stop) ||
383
- (start > pt_start && stop < pt_stop) ||
384
- (stop > pt_start && stop <= pt_stop) ||
385
- (start >= pt_start && start < pt_stop)
386
- patients << pt_id
387
- end
388
- end
389
- overlaps << patients.uniq
390
- end
391
- return overlaps
392
- end
393
-
394
- def generate_cluster_regions(patients_genomic_region_by_chr, mutation_type, pat_per_reg = 1)
395
- patients_out_of_cluster = 0
396
- patients_by_cluster = {}
397
- sors = []
398
- patients_genomic_region_by_chr.each do |chrm, genomic_ranges|
399
- reference = get_reference(genomic_ranges) # Get putative overlap regions
400
- overlapping_patients = overlap_patients(genomic_ranges, reference) # See what patient has match with a overlap region
401
- clust_number = 1
402
- reference.each_with_index do |ref, i|
403
- current_patients = overlapping_patients[i]
404
- if current_patients.length > pat_per_reg
405
- ref << chrm
406
- node_identifier = "#{chrm}.#{clust_number}.#{mutation_type}.#{current_patients.length}"
407
- ref << node_identifier
408
- save_sor(current_patients, node_identifier, patients_by_cluster)
409
- sors << ref
410
- clust_number += 1
411
- end
412
- end
413
- end
414
- return patients_by_cluster, sors
415
- end
416
182
 
417
- def save_sor(current_patients, node_identifier, patients_by_cluster)
418
- current_patients.each do |patient|
419
- add_record(patients_by_cluster, patient, node_identifier)
420
- end
421
- end
422
183
 
423
- def add_record(hash, key, record)
424
- query = hash[key]
425
- if query.nil?
426
- hash[key] = [record]
427
- elsif !query.include?(record)
428
- query << record
429
- end
430
- end
431
-
432
- def load_patient_cohort(options)
433
- patient_data = {}
434
- count = 0
435
- fields2extract = get_fields2extract(options)
436
- field_numbers = fields2extract.values
437
- File.open(options[:input_file]).each do |line|
438
- line.chomp!
439
- if options[:header] && count == 0
440
- line.gsub!(/#\s*/,'') # correct comment like headers
441
- field_names = line.split("\t")
442
- get_field_numbers2extract(field_names, fields2extract)
443
- field_numbers = fields2extract.values
444
- else
445
- fields = line.split("\t")
446
- pat_record = field_numbers.map{|n| fields[n]}
447
- if fields2extract[:pat_id_col].nil?
448
- pat_id = "pat_#{count}" #generate ids
449
- else
450
- original_id = pat_record.shift
451
- pat_id = original_id + "_i#{count}" # make sure that ids are uniq
452
- end
453
- if !pat_record[0].nil?
454
- pat_record[0] = pat_record[0].split(options[:hpo_separator])
455
- else
456
- pat_record[0] = []
457
- end
458
- pat_record[2] = pat_record[2].to_i if !options[:start_col].nil?
459
- pat_record[3] = pat_record[3].to_i if !options[:end_col].nil?
460
- patient_data[pat_id] = pat_record
461
- end
462
- count +=1
463
- end
464
- options[:pat_id_col] = 'generated' if fields2extract[:pat_id_col].nil?
465
- return patient_data
466
- end
467
-
468
- def get_fields2extract(options)
469
- fields2extract = {}
470
- [:pat_id_col, :hpo_col, :chromosome_col, :start_col, :end_col].each do |field|
471
- col = options[field]
472
- if !col.nil?
473
- col = col.to_i if !options[:header]
474
- fields2extract[field] = col
475
- end
476
- end
477
- return fields2extract
478
- end
479
-
480
- def get_field_numbers2extract(field_names, fields2extract)
481
- fields2extract.each do |field, name|
482
- fields2extract[field] = field_names.index(name)
483
- end
484
- end
485
-
486
- def download(ftp_server, path, name)
487
- ftp = Net::FTP.new()
488
- ftp.connect(ftp_server)
489
- ftp.login
490
- ftp.getbinaryfile(path, name)
491
- ftp.close
492
- end
493
184
 
494
185
  def get_and_parse_external_data(all_paths)
495
186
  sources = [