pets 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,307 @@
1
+ require 'json'
2
+ require 'semtools'
3
+
4
+ class Cohort
5
+ @@ont = {}
6
+ class << self # https://www.ruby-forum.com/t/attr-accessor-for-class-variable/136693
7
+ attr_accessor :act_ont # Which ontology use for ont related operations
8
+ end
9
+
10
+ attr_accessor :profiles
11
+
12
+ def self.get_ontology(ont_id)
13
+ return @@ont[ont_id]
14
+ end
15
+
16
+ def self.load_ontology(ont_name, ont_file, excluded_terms_file = nil)
17
+ ont = nil
18
+ if !ont_file.include?('.json')
19
+ if !excluded_terms_file.nil?
20
+ ont = Ontology.new(file: ont_file, load_file: true, removable_terms: read_excluded_ont_file(excluded_terms_file))
21
+ else
22
+ ont = Ontology.new(file: ont_file, load_file: true)
23
+ end
24
+ else
25
+ ont = Ontology.new
26
+ ont.read(ont_file)
27
+ if !excluded_terms_file.nil?
28
+ ont.add_removable_terms(read_excluded_ont_file(excluded_terms_file))
29
+ ont.remove_removable()
30
+ ont.build_index()
31
+ end
32
+ end
33
+ @@ont[ont_name] = ont
34
+ end
35
+
36
+ def self.read_excluded_ont_file(file)
37
+ excluded_hpo = []
38
+ File.open(file).each do |line|
39
+ excluded_hpo << line.chomp
40
+ end
41
+ return excluded_hpo
42
+ end
43
+
44
+ def initialize()
45
+ @profiles = {}
46
+ @vars = {}
47
+ @var_idx = Genomic_Feature.new([])
48
+ end
49
+
50
+ def add_record(rec) #[id, [profile], [[chr1, start1, stop1],[chr1, start1, stop1]]]
51
+ id, profile, vars = rec
52
+ @profiles[id] = profile.map{|t| t.to_sym} if !profile.nil?
53
+ add_gen_feat(id, vars) if !vars.nil?
54
+ end
55
+
56
+ def delete(id)
57
+ @profiles.delete(id)
58
+ @vars.delete(id)
59
+ end
60
+
61
+ def select_by_profile!
62
+ @profiles.select!{|id, profile| yield(id, profile)}
63
+ current_ids = @profiles.keys
64
+ @vars.select!{|id, var| current_ids.include?(id)}
65
+ end
66
+
67
+ def select_by_var!
68
+ @vars.select!{|id, profile| yield(id, profile)}
69
+ current_ids = @vars.keys
70
+ @profiles.select!{|id, var| current_ids.include?(id)}
71
+ end
72
+
73
+ def filter_by_term_number(n_terms)
74
+ select_by_profile!{|id, profile| profile.length >= n_terms}
75
+ end
76
+
77
+ def remove_incomplete_records # remove resc that lacks of vars or phenotypes
78
+ ids_with_terms = @profiles.keys
79
+ ids_with_vars = []
80
+ @vars.each{|id, regs| ids_with_vars << id if regs.length > 0}
81
+ full_ids = ids_with_vars & ids_with_terms
82
+ @profiles.select!{|id, prof| full_ids.include?(id)}
83
+ @vars.select!{|id, var| full_ids.include?(id)}
84
+ end
85
+
86
+ def add_gen_feat(id, feat_array) # [[chr1, start1, stop1],[chr1, start1, stop1]]
87
+ @vars[id] = Genomic_Feature.new(feat_array)
88
+ end
89
+
90
+ def get_profile(id)
91
+ return @profiles[id]
92
+ end
93
+
94
+ def get_vars(id)
95
+ return @vars[id]
96
+ end
97
+
98
+ def each_profile()
99
+ @profiles.each do |id, profile|
100
+ yield(id, profile)
101
+ end
102
+ end
103
+
104
+ def each_var()
105
+ @vars.each do |id, var_info|
106
+ yield(id, var_info)
107
+ end
108
+ end
109
+
110
+ def get_general_profile(thr=0) # TODO move funcionality to semtools
111
+ term_count = Hash.new(0)
112
+ each_profile do |id, prof|
113
+ prof.each do |term|
114
+ general_profile[prof] += 1
115
+ end
116
+ end
117
+ records = @profiles.length
118
+ general_profile = []
119
+ term_count.each do |term, count|
120
+ general_profile << term if count.fdiv(records) >= thr
121
+ end
122
+ ont = @@ont[Cohort.act_ont]
123
+ return ont.clean_profile_hard(general_profile)
124
+ end
125
+
126
+ def check(hard=false) # OLD format_patient_data
127
+ ont = @@ont[Cohort.act_ont]
128
+ rejected_terms = []
129
+ rejected_recs = []
130
+ @profiles.each do |id, terms|
131
+ if hard
132
+ terms = ont.clean_profile_hard(terms)
133
+ rejec_terms = []
134
+ else
135
+ terms, rejec_terms = ont.check_ids(terms)
136
+ end
137
+ if !rejec_terms.empty?
138
+ STDERR.puts "WARNING: record #{id} has the unknown CODES '#{rejec_terms.join(',')}'. Codes removed."
139
+ rejected_terms.concat(rejec_terms)
140
+ end
141
+ if terms.empty?
142
+ rejected_recs << id
143
+ else
144
+ @profiles[id] = terms
145
+ end
146
+ end
147
+ @profiles.select!{|id, record| !rejected_recs.include?(id)}
148
+ @vars.select!{|id, record| !rejected_recs.include?(id)}
149
+ return rejected_terms.uniq, rejected_recs
150
+ end
151
+
152
+ def link2ont(ont_id)
153
+ @@ont[ont_id].load_profiles(@profiles)
154
+ end
155
+
156
+ def get_profile_redundancy
157
+ ont = @@ont[Cohort.act_ont]
158
+ profile_sizes, parental_terms_per_profile = ont.get_profile_redundancy
159
+ return profile_sizes, parental_terms_per_profile
160
+ end
161
+
162
+ def get_profiles_terms_frequency(options={})
163
+ ont = @@ont[Cohort.act_ont]
164
+ term_stats = ont.get_profiles_terms_frequency(**options) #https://www.ruby-lang.org/en/news/2019/12/12/separation-of-positional-and-keyword-arguments-in-ruby-3-0/
165
+ return term_stats
166
+ end
167
+
168
+ def compute_term_list_and_childs()
169
+ ont = @@ont[Cohort.act_ont]
170
+ suggested_childs, term_with_childs_ratio = ont.compute_term_list_and_childs()
171
+ end
172
+
173
+ def get_profile_ontology_distribution_tables()
174
+ ont = @@ont[Cohort.act_ont]
175
+ ontology_levels, distribution_percentage = ont.get_profile_ontology_distribution_tables
176
+ ontology_levels.unshift(["level", "ontology", "cohort"])
177
+ distribution_percentage.unshift(["level", "ontology", "weighted cohort", "uniq terms cohort"])
178
+ return ontology_levels, distribution_percentage
179
+ end
180
+
181
+ def get_ic_analysis()
182
+ ont = @@ont[Cohort.act_ont]
183
+ onto_ic, freq_ic = ont.get_observed_ics_by_onto_and_freq # IC for TERMS
184
+ onto_ic_profile, freq_ic_profile = ont.get_profiles_resnik_dual_ICs # IC for PROFILES
185
+ return onto_ic, freq_ic, onto_ic_profile, freq_ic_profile
186
+ end
187
+
188
+ def get_profiles_mean_size
189
+ ont = @@ont[Cohort.act_ont]
190
+ profile_mean_size = ont.get_profiles_mean_size
191
+ return profile_mean_size
192
+ end
193
+
194
+ def get_profile_length_at_percentile(perc=50, increasing_sort: false)
195
+ ont = @@ont[Cohort.act_ont]
196
+ length_percent = ont.get_profile_length_at_percentile(perc=perc, increasing_sort: increasing_sort)
197
+ return length_percent
198
+ end
199
+
200
+ def get_dataset_specifity_index(type)
201
+ ont = @@ont[Cohort.act_ont]
202
+ dsi = ont.get_dataset_specifity_index(type)
203
+ return dsi
204
+ end
205
+
206
+ def compare_profiles(options={})
207
+ ont = @@ont[Cohort.act_ont]
208
+ similarities = ont.compare_profiles(**options)
209
+ return similarities
210
+ end
211
+
212
+ def index_vars # equivalent to process_patient_data
213
+ each_var do |id, var|
214
+ @var_idx.merge(var, id)
215
+ end
216
+ end
217
+
218
+ def get_vars_sizes(summary=false)
219
+ if summary
220
+ return @var_idx.get_summary_sizes
221
+ else
222
+ return @var_idx.get_sizes
223
+ end
224
+ end
225
+
226
+ def generate_cluster_regions(meth, tag, lim)
227
+ @var_idx.generate_cluster_regions(meth, tag, lim)
228
+ end
229
+
230
+ def save(output_file, mode = :default, translate = false)
231
+ File.open(output_file, 'w') do |f|
232
+ f.puts "id\tchr\tstart\tstop\tterms" if mode == 'paco'
233
+ ont = @@ont[Cohort.act_ont]
234
+ @profiles.each do |id, terms|
235
+ terms, rejected = ont.translate_ids(terms) if translate
236
+ id_variants = @vars[id]
237
+ variants = []
238
+ if id_variants.nil? || id_variants.length == 0
239
+ variants << ['-', '-', '-']
240
+ else
241
+ id_variants.each do |chr, reg|
242
+ variants << [chr, reg[:start], reg[:stop]]
243
+ end
244
+ end
245
+ variants.each do |var|
246
+ if mode == :default
247
+ f.puts "#{id}\t#{terms.join('|')}\t#{var.join("\t")}"
248
+ elsif mode == :paco
249
+ f.puts "#{id}\t#{var.join("\t")}\t#{terms.join('|')}"
250
+ else
251
+ abort('Wrong save mode option, please try default or paco')
252
+ end
253
+ end
254
+ end
255
+ end
256
+ end
257
+
258
+ def export_phenopackets(output_folder, genome_assembly, vcf_index: nil)
259
+ ont = @@ont[Cohort.act_ont]
260
+ metaData = {
261
+ "createdBy" => "PETS",
262
+ "resources" => [{
263
+ "id" => "hp",
264
+ "name" => "human phenotype ontology",
265
+ "namespacePrefix" => "HP",
266
+ "url" => "http://purl.obolibrary.org/obo/hp.owl",
267
+ # "version" => "2018-03-08",
268
+ "iriPrefix" => "http://purl.obolibrary.org/obo/HP_"
269
+ }]
270
+ }
271
+
272
+ @profiles.each do |id, terms|
273
+ phenopacket = {metaData: metaData}
274
+ phenopacket[:subject] = {id: id}
275
+ phenotypicFeatures = []
276
+ terms.each do |term|
277
+ term_name = ont.translate_id(term)
278
+ phenotypicFeatures << {
279
+ type: { id: term, label: term_name},
280
+ classOfOnset: {"id" => "HP:0003577", "label" => "Congenital onset"}
281
+ }
282
+ end
283
+ phenopacket[:phenotypicFeatures] = phenotypicFeatures
284
+ if !vcf_index.nil? && vcf_index.include?(id)
285
+ htsFiles = []
286
+ htsFiles << {
287
+ "uri" => "file:/" + vcf_index[id],
288
+ "description" => id,
289
+ "htsFormat" => "VCF",
290
+ "genomeAssembly" => genome_assembly,
291
+ "individualToSampleIdentifiers" => { "patient1" => id }
292
+ }
293
+ phenopacket[:htsFiles] = htsFiles
294
+ end
295
+ File.open(File.join(output_folder, id.to_s + ".json"), "w") { |f| f.write JSON.pretty_generate(phenopacket) }
296
+ id_variants = @vars[id]
297
+ variants = []
298
+ if id_variants.nil? || id_variants.length == 0
299
+ variants << ['-', '-', '-']
300
+ else
301
+ id_variants.each do |chr, reg|
302
+ variants << [chr, reg[:start], reg[:stop]]
303
+ end
304
+ end
305
+ end
306
+ end
307
+ end
@@ -0,0 +1,7 @@
1
+ # Needs define ROOT_PATH constant in file requiring this file
2
+ REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
3
+ EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
4
+ EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code'))
5
+ HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
6
+ MONDO_FILE = File.join(EXTERNAL_DATA, 'mondo.obo')
7
+ IC_FILE = File.join(EXTERNAL_DATA, 'uniq_hpo_with_CI.txt')
@@ -13,80 +13,15 @@ def system_call(code_folder, script, args_string)
13
13
  end
14
14
  end
15
15
 
16
- def read_excluded_hpo_file(file)
17
- excluded_hpo = []
18
- File.open(file).each do |line|
19
- excluded_hpo << line.chomp
20
- end
21
- return excluded_hpo
22
- end
23
-
24
- #Common methods for predictors
25
- #Training file example = 9 131371492 131375954 HP:0010974 2.41161970596 9.3.A.5
26
- #1. Indexing by chr (region)
27
- def coor_overlap?(ref_start, ref_stop, start, stop)
28
- overlap = false
29
- if (stop > ref_start && stop <= ref_stop) ||
30
- (start >= ref_start && start < ref_stop) ||
31
- (start <= ref_start && stop >= ref_stop) ||
32
- (start > ref_start && stop < ref_stop)
33
- overlap = true
34
- end
35
- return overlap
36
- end
37
-
38
- def load_training_file4regions(training_file)
39
- training_set = {}
40
- posInfo = loadFile(training_file)
41
- posInfo.each do |info|
42
- chr = info.shift
43
- query = training_set[chr]
44
- if query.nil?
45
- training_set[chr] = [info]
46
- else
47
- query << info
48
- end
49
- end
50
- return training_set
51
- end
52
-
53
- #2. Indexing by hpo (code)
54
- #prepare training file for analysis using phenotype2region prediction
55
- def load_training_file4HPO(training_file, thresold=0)
56
- training_set = {}
57
- information = loadFile(training_file, thresold)
58
- information.each do |info|
59
- hpoCode = info.delete_at(4)
60
- query = training_set[hpoCode]
61
- if query.nil?
62
- training_set[hpoCode] = [info]
63
- else
64
- query << info
65
- end
66
- end
67
- # STDERR.puts training_set.keys.inspect
68
- return training_set
69
- end
70
-
71
-
72
- #3. Load training info file:
73
- #Chr;Start;Stop;HPO;Association;node
74
- def loadFile(file, thresold=0)
75
- information = []
76
- File.open(file).each do |line|
77
- line.chomp!
78
- allInfo = line.split("\t")
79
- associationValue = allInfo[4].to_f
80
- if associationValue >= thresold
81
- chr = allInfo[0]
82
- startPos = allInfo[1].to_i
83
- stopPos = allInfo[2].to_i
84
- hpoCode = allInfo[3]
85
- nodeID = allInfo[5]
86
- information << [chr, startPos, stopPos, nodeID, hpoCode, associationValue]
87
- end
16
+ def add_record(hash, key, record, uniq=false)
17
+ query = hash[key]
18
+ if query.nil?
19
+ hash[key] = [record]
20
+ elsif !uniq # We not take care by repeated entries
21
+ query << record
22
+ elsif !query.include?(record) # We want uniq entries
23
+ query << record
88
24
  end
89
- return information
90
25
  end
91
26
 
92
27
 
@@ -111,105 +46,6 @@ def compute_IC_values(patient_data, total_patients)
111
46
  return patients_per_hpo
112
47
  end
113
48
 
114
- def load_hpo_ci_values(information_coefficient_file)
115
- hpos_ci_values = {}
116
- File.open(information_coefficient_file).each do |line|
117
- line.chomp!
118
- hpo_code, ci = line.split("\t")
119
- hpos_ci_values[hpo_code.to_sym] = ci.to_f
120
- end
121
- return hpos_ci_values
122
- end
123
-
124
- def load_clustered_patients(file)
125
- clusters = {}
126
- File.open(file).each do |line|
127
- line.chomp!
128
- pat_id, cluster_id = line.split("\t")
129
- query = clusters[cluster_id]
130
- if query.nil?
131
- clusters[cluster_id] = [pat_id]
132
- else
133
- query << pat_id
134
- end
135
- end
136
- return clusters
137
- end
138
-
139
- def load_gene_data(gene_data_path)
140
- gene_list = {} #geneID => attr
141
- gene_location = {} # chr => gene
142
- infile = open(gene_data_path)
143
- gz = Zlib::GzipReader.new(infile)
144
- current_chr = nil
145
- genes = []
146
- gz.each_line do |line|
147
- line.chomp!
148
- next if line =~ /^#/
149
- fields = line.split("\t")
150
- if fields[8].include?('genome=chromosome')
151
- chr = fields[8].split(';')[1].split('=').last
152
- gene_location[current_chr] = genes
153
- genes = []
154
- current_chr = chr
155
- elsif fields[2] == 'gene'
156
- attributes = {}
157
- fields[8].split(';').each do |pair|
158
- key, value = pair.split('=')
159
- attributes[key] = value
160
- end
161
- geneName = nil
162
- geneName = attributes['gene'] if !attributes['gene'].nil?
163
- geneSyns = []
164
- geneSyns = attributes['gene_synonym'].split(',') if !attributes['gene_synonym'].nil?
165
- description = attributes['description']
166
- description = URI.unescape(description) if !description.nil?
167
- attributes['Dbxref'] =~ /GeneID:(\d+)/
168
- gene_list[$1] = [geneName, geneSyns, description]
169
- genes << [$1, fields[3].to_i, fields[4].to_i]
170
- end
171
- end
172
- gene_location[current_chr] = genes
173
- return gene_list, gene_location
174
- end
175
-
176
- def parse_kegg_data(query_genes)
177
- kegg_data = {} #gene => attb
178
- while !query_genes.empty?
179
- gene_set = query_genes.shift(10)
180
- url = "http://rest.kegg.jp/get/#{gene_set.map{|qg| "hsa:#{qg}"}.join('+')}"
181
- uri = URI(url)
182
- response = Net::HTTP.get(uri)
183
- geneID = nil
184
- gene_names = []
185
- definition = nil
186
- pathways = []
187
- parsing_pathway_field = false
188
- response.squeeze(' ').each_line do |line|
189
- line.chomp!
190
- if line =~ /^ENTRY/
191
- geneID = line.split(' ')[1]
192
- elsif line =~ /^NAME/
193
- gene_names = line.split(' ', 2).last.split(', ')
194
- elsif line =~ /^DEFINITION/
195
- definition = line.split(' ', 2)[1]
196
- elsif line =~ /^PATHWAY/
197
- pathways << line.split(' ', 3)[1..2]
198
- parsing_pathway_field = true
199
- elsif line =~ /^BRITE/ || line =~ /^POSITION/ || line =~ /^DISEASE/ || line =~ /^MODULE/ || line =~ /^DRUG_TARGET/ || line =~ /^NETWORK/
200
- parsing_pathway_field = false
201
- elsif parsing_pathway_field
202
- pathways << line.strip.split(' ', 2)
203
- elsif line == '///'
204
- parsing_pathway_field = false
205
- kegg_data[geneID] = [gene_names, definition, pathways]
206
- pathways = []
207
- gene_names = []
208
- end
209
- end
210
- end
211
- return kegg_data
212
- end
213
49
 
214
50
  def parse_kegg_from_biosystems(biosystems_gene_path, biosystems_info_path)
215
51
  kegg_data = {}
@@ -270,21 +106,6 @@ def merge_genes_with_kegg_data(gene_list, kegg_data)
270
106
  return merged_data
271
107
  end
272
108
 
273
- def write_compressed_plain_file(data, path)
274
- File.open(path, 'w') do |f|
275
- gz = Zlib::GzipWriter.new(f)
276
- gz.write data.to_json
277
- gz.close
278
- end
279
- end
280
-
281
- def read_compressed_json(path)
282
- infile = open(path)
283
- gz = Zlib::GzipReader.new(infile)
284
- object = JSON.parse(gz.read)
285
- return object
286
- end
287
-
288
109
  def compute_pathway_enrichment(genes_clusters, genes_with_kegg)
289
110
  pathways_genes_in_predictions = {}
290
111
  genes_in_predictions = []
@@ -358,138 +179,8 @@ def binom(n,k)
358
179
  end
359
180
  end
360
181
 
361
- def get_reference(genomic_ranges)
362
- #genomic_ranges = [patientID, mut_start, mut_stop]
363
- reference = []
364
- reference.concat(genomic_ranges.map{|gr| gr[1]})# get start
365
- reference.concat(genomic_ranges.map{|gr| gr[2]})# get stop
366
- reference.uniq!
367
- reference.sort!
368
- #Define overlap range
369
- final_reference = []
370
- reference.each_with_index do |coord,i|
371
- next_coord = reference[i + 1]
372
- final_reference << [coord, next_coord] if !next_coord.nil?
373
- end
374
- return final_reference
375
- end
376
-
377
- def overlap_patients(genomic_ranges, reference)
378
- overlaps = []
379
- reference.each do |start, stop|
380
- patients = []
381
- genomic_ranges.each do |pt_id, pt_start, pt_stop|
382
- if (start <= pt_start && stop >= pt_stop) ||
383
- (start > pt_start && stop < pt_stop) ||
384
- (stop > pt_start && stop <= pt_stop) ||
385
- (start >= pt_start && start < pt_stop)
386
- patients << pt_id
387
- end
388
- end
389
- overlaps << patients.uniq
390
- end
391
- return overlaps
392
- end
393
-
394
- def generate_cluster_regions(patients_genomic_region_by_chr, mutation_type, pat_per_reg = 1)
395
- patients_out_of_cluster = 0
396
- patients_by_cluster = {}
397
- sors = []
398
- patients_genomic_region_by_chr.each do |chrm, genomic_ranges|
399
- reference = get_reference(genomic_ranges) # Get putative overlap regions
400
- overlapping_patients = overlap_patients(genomic_ranges, reference) # See what patient has match with a overlap region
401
- clust_number = 1
402
- reference.each_with_index do |ref, i|
403
- current_patients = overlapping_patients[i]
404
- if current_patients.length > pat_per_reg
405
- ref << chrm
406
- node_identifier = "#{chrm}.#{clust_number}.#{mutation_type}.#{current_patients.length}"
407
- ref << node_identifier
408
- save_sor(current_patients, node_identifier, patients_by_cluster)
409
- sors << ref
410
- clust_number += 1
411
- end
412
- end
413
- end
414
- return patients_by_cluster, sors
415
- end
416
182
 
417
- def save_sor(current_patients, node_identifier, patients_by_cluster)
418
- current_patients.each do |patient|
419
- add_record(patients_by_cluster, patient, node_identifier)
420
- end
421
- end
422
183
 
423
- def add_record(hash, key, record)
424
- query = hash[key]
425
- if query.nil?
426
- hash[key] = [record]
427
- elsif !query.include?(record)
428
- query << record
429
- end
430
- end
431
-
432
- def load_patient_cohort(options)
433
- patient_data = {}
434
- count = 0
435
- fields2extract = get_fields2extract(options)
436
- field_numbers = fields2extract.values
437
- File.open(options[:input_file]).each do |line|
438
- line.chomp!
439
- if options[:header] && count == 0
440
- line.gsub!(/#\s*/,'') # correct comment like headers
441
- field_names = line.split("\t")
442
- get_field_numbers2extract(field_names, fields2extract)
443
- field_numbers = fields2extract.values
444
- else
445
- fields = line.split("\t")
446
- pat_record = field_numbers.map{|n| fields[n]}
447
- if fields2extract[:pat_id_col].nil?
448
- pat_id = "pat_#{count}" #generate ids
449
- else
450
- original_id = pat_record.shift
451
- pat_id = original_id + "_i#{count}" # make sure that ids are uniq
452
- end
453
- if !pat_record[0].nil?
454
- pat_record[0] = pat_record[0].split(options[:hpo_separator])
455
- else
456
- pat_record[0] = []
457
- end
458
- pat_record[2] = pat_record[2].to_i if !options[:start_col].nil?
459
- pat_record[3] = pat_record[3].to_i if !options[:end_col].nil?
460
- patient_data[pat_id] = pat_record
461
- end
462
- count +=1
463
- end
464
- options[:pat_id_col] = 'generated' if fields2extract[:pat_id_col].nil?
465
- return patient_data
466
- end
467
-
468
- def get_fields2extract(options)
469
- fields2extract = {}
470
- [:pat_id_col, :hpo_col, :chromosome_col, :start_col, :end_col].each do |field|
471
- col = options[field]
472
- if !col.nil?
473
- col = col.to_i if !options[:header]
474
- fields2extract[field] = col
475
- end
476
- end
477
- return fields2extract
478
- end
479
-
480
- def get_field_numbers2extract(field_names, fields2extract)
481
- fields2extract.each do |field, name|
482
- fields2extract[field] = field_names.index(name)
483
- end
484
- end
485
-
486
- def download(ftp_server, path, name)
487
- ftp = Net::FTP.new()
488
- ftp.connect(ftp_server)
489
- ftp.login
490
- ftp.getbinaryfile(path, name)
491
- ftp.close
492
- end
493
184
 
494
185
  def get_and_parse_external_data(all_paths)
495
186
  sources = [