pets 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,166 +1,63 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
3
  ROOT_PATH = File.dirname(__FILE__)
4
- REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
5
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
6
- EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code'))
7
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
8
4
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
5
 
10
6
  require 'fileutils'
11
7
  require 'optparse'
12
8
  require 'report_html'
13
9
  require 'semtools'
14
- require 'generalMethods.rb'
15
-
16
-
17
- class Report_html
18
- def circular_genome(user_options = {}, &block)
19
- default_options = {}.merge!(user_options)
20
- coordinates = user_options[:genomic_coordinates]
21
- html_string = canvasXpress_main(default_options, block) do |options, config, samples, vars, values, object_id, x, z|
22
- config['graphType'] = 'Circular'
23
- config["arcSegmentsSeparation"] = 3
24
- config["colorScheme"] = "Tableau"
25
- config["colors"] = ["#332288","#6699CC","#88CCEE","#44AA99","#117733","#999933","#DDCC77","#661100","#CC6677","#AA4466","#882255","#AA4499"]
26
- config["showIdeogram"] = true
27
- chr = []
28
- pos = []
29
- tags2remove = []
30
- vars.each_with_index do |var, i|
31
- coord = coordinates[var]
32
- if !coord.nil?
33
- tag = coord.first.gsub(/[^\dXY]/,'')
34
- if tag == 'X' || tag == 'Y' || (tag.to_i > 0 && tag.to_i <= 22)
35
- chr << coord.first.gsub(/[^\dXY]/,'')
36
- pos << coord.last - 1
37
- else
38
- tags2remove << i
39
- end
40
- else
41
- tags2remove << i
42
- end
43
- end
44
- tags2remove.reverse_each{|i| ent = vars.delete_at(i); warn("Feature #{ent} has not valid coordinates")} # Remove entities with invalid coordinates
45
- z['chr'] = chr
46
- z['pos'] = pos
47
- end
48
- return html_string
49
- end
50
- end
10
+ require 'pets'
51
11
 
52
12
  #############################################################################################
53
13
  ## METHODS
54
14
  ############################################################################################
55
- def load_profiles(file_path, hpo)
56
- profiles = {}
57
- #count = 0
58
- File.open(file_path).each do |line|
59
- id, profile = line.chomp.split("\t")
60
- hpos = profile.split(',').map{|a| a.to_sym}
61
- hpos, rejected_hpos = hpo.check_ids(hpos)
62
- if !hpos.empty?
63
- hpos = hpo.clean_profile(hpos)
64
- profiles[id] = hpos if !hpos.empty?
65
- end
66
- end
67
- return profiles
68
- end
69
-
70
- def load_variants(variant_folder)
71
- variants = {}
72
- coordinates = {}
73
- count = 0
74
- all_vars = {}
75
- Dir.glob(File.join(variant_folder, '*.tab')).each do |path|
76
- profile_id = File.basename(path, '.tab')
77
- vars = {}
78
- File.open(path).each do |line|
79
- fields = line.chomp.split("\t")
80
- chr = fields[0]
81
- start = fields[1].to_i
82
- query = coordinates[chr]
83
- if query.nil?
84
- coordinates[chr] = [start]
85
- count += 1
86
- id = "var_#{count}"
87
- else
88
- if !query.include?(start)
89
- query << start
90
- count += 1
91
- id = "var_#{count}"
92
- else
93
- id = all_vars.key([chr, start])
94
- end
95
- end
96
- vars[id] = [chr, start]
97
- end
98
- all_vars.merge!(vars)
99
- variants[profile_id] = vars
100
- end
101
- return variants
102
- end
103
-
104
- def load_evidences(evidences_path, hpo)
105
- genomic_coordinates = {}
106
- coord_files = Dir.glob(File.join(evidences_path, '*.coords'))
107
- coord_files.each do |cd_f|
108
- entity = File.basename(cd_f, '.coords')
109
- coordinates = load_coordinates(cd_f)
110
- genomic_coordinates[entity] = coordinates
111
- end
112
- evidences = {}
113
- evidence_files = Dir.glob(File.join(evidences_path, '*_HP.txt'))
114
- evidence_files.each do |e_f|
115
- pair = File.basename(e_f, '.txt')
116
- profiles, id2label = load_evidence_profiles(e_f, hpo)
117
- evidences[pair] = {prof: profiles, id2lab: id2label}
118
- end
119
- return evidences, genomic_coordinates
15
+ def get_evidence_coordinates(entity, genomic_coordinates, candidates_ids)
16
+ all_coordinates = genomic_coordinates[entity]
17
+ coords = all_coordinates.select{|id, coordinates| candidates_ids.include?(id.to_sym)}
18
+ return coords
120
19
  end
121
20
 
122
- def load_coordinates(file_path)
123
- coordinates = {}
124
- header = true
125
- File.open(file_path).each do |line|
126
- fields = line.chomp.split("\t")
127
- if header
128
- header = false
129
- else
130
- entity, chr, strand, start, stop = fields
131
- coordinates[entity] = [chr, start.to_i, stop.to_i, strand]
132
- end
133
- end
134
- return coordinates
21
+ def make_report(profile_id, all_candidates, all_genomic_coordinates, similarity_matrixs, evidences, prof_vars, template, output)
22
+ var_ids, var_coors = format_variants4report(prof_vars)
23
+ container = {
24
+ profile_id: profile_id,
25
+ candidates: all_candidates.each{|c| c[0] = c.first.to_s},
26
+ genomic_coordinates: all_genomic_coordinates.transform_values{|c| c.first(2) },
27
+ similarity_matrixs: similarity_matrixs,
28
+ evidences: evidences,
29
+ var_ids: var_ids,
30
+ var_coordinates: var_coors
31
+ }
32
+ report = Report_html.new(container, 'Evidence profile report')
33
+ report.build(template)
34
+ report.write(File.join(output, profile_id.to_s + '.html'))
135
35
  end
136
36
 
137
- def load_evidence_profiles(file_path, hpo)
138
- profiles = {}
139
- id2label = {}
140
- #count = 0
141
- File.open(file_path).each do |line|
142
- id, label, profile = line.chomp.split("\t")
143
- hpos = profile.split(',').map{|a| a.to_sym}
144
- hpos, rejected_hpos = hpo.check_ids(hpos)
145
- if !hpos.empty?
146
- hpos = hpo.clean_profile(hpos)
147
- profiles[id] = hpos if !hpos.empty?
148
- id2label[id] = label
37
+ def format_variants4report(var_data)
38
+ if var_data.nil?
39
+ var_ids, var_coors = nil
40
+ else
41
+ var_ids = []
42
+ var_coors = {}
43
+ count = 0
44
+ var_data.each do |chr, reg|
45
+ var_id = "var_#{count}"
46
+ var_ids << [var_id, 0]
47
+ var_coors[var_id] = [chr.to_s, reg[:start]]
48
+ count += 1
149
49
  end
150
50
  end
151
- return profiles, id2label
51
+ return var_ids, var_coors
152
52
  end
153
53
 
154
-
155
-
156
- def get_evidence_coordinates(entity, genomic_coordinates, candidates_ids)
157
- all_coordinates = genomic_coordinates[entity]
158
- coords = all_coordinates.select{|id, coordinates| candidates_ids.include?(id.to_sym)}
159
- return coords
54
+ def get_genome_hotspots(similarity_matrixs, all_genomic_coordinates)
55
+ regions = Genomic_Feature.new(all_genomic_coordinates.values.map{|g| g[0..2]})
56
+ candidates_by_window, genome_windows = regions.generate_cluster_regions(:reg_overlap, 'A', 1)
57
+ # TODO: COMPLETE UNTIL FULL PREDICTOR
160
58
  end
161
59
 
162
60
 
163
-
164
61
  #############################################################################################
165
62
  ## OPTPARSE
166
63
  ############################################################################################
@@ -245,17 +142,7 @@ profiles.each do |profile_id, reference_prof|
245
142
  coords = get_evidence_coordinates(entity, genomic_coordinates, candidates_ids)
246
143
  all_genomic_coordinates.merge!(coords)
247
144
  end
145
+ get_genome_hotspots(similarity_matrixs, all_genomic_coordinates)
248
146
  prof_vars = profile_variants[profile_id]
249
- container = {
250
- profile_id: profile_id,
251
- candidates: all_candidates.each{|c| c[0] = c.first.to_s},
252
- genomic_coordinates: all_genomic_coordinates.transform_values{|c| c.first(2) },
253
- similarity_matrixs: similarity_matrixs,
254
- evidences: evidences,
255
- var_ids: prof_vars.nil? ? nil : prof_vars.keys.map{|i| [i, 0]},
256
- var_coordinates: prof_vars
257
- }
258
- report = Report_html.new(container, 'Evidence profile report')
259
- report.build(template)
260
- report.write(File.join(options[:output_folder], profile_id.to_s + '.html'))
147
+ make_report(profile_id, all_candidates, all_genomic_coordinates, similarity_matrixs, evidences, prof_vars, template, options[:output_folder])
261
148
  end
@@ -3,112 +3,34 @@
3
3
  # Code to prepare data to get the associations between pathological phenotypes (HPO) and genomic regions (SOR)
4
4
 
5
5
  ROOT_PATH = File.dirname(__FILE__)
6
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
7
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
8
6
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
7
 
10
8
  ##############################
11
9
  #LIBRARIES
12
10
  ##############################
13
- require 'generalMethods.rb'
14
11
  require 'optparse'
15
- require 'semtools'
12
+ require 'pets'
16
13
 
17
14
  ###############################
18
15
  #METHODS
19
16
  ###############################
20
17
 
21
- def loadPatientFile(patient_file, hpo, add_parents)
22
- patient2phenotype = {}
23
- hpo_count = {}
24
- not_found = []
25
- patients_genomic_region_by_chr = {}
26
- File.open(patient_file).each do |line|
27
- line.chomp!
28
- next if line.include?("#")
29
- patient, chr, start, stop, phenotype_profile = line.split("\t", 5)
30
- next if phenotype_profile.nil? #For skipping patients without phenotypes
31
- phenotypes = phenotype_profile.split('|')
32
- # phenotypes, rejected = hpo.translate_names2codes(phenotypes)
33
- phenotypes, rejected = hpo.translate_names(phenotypes)
34
- not_found = not_found | rejected
35
- phenotypes.each do |hpo_code|
36
- get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents) if !hpo.is_removable(hpo_code)
37
- end
38
- info = [patient, start.to_i, stop.to_i]
39
- add_record(patients_genomic_region_by_chr, chr, info)
40
- end
41
- return patient2phenotype, hpo_count, not_found, patients_genomic_region_by_chr
42
- end
43
-
44
-
45
- def get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents)
46
- add_record(hpo_count, hpo_code, patient)
47
- add_record(patient2phenotype, patient, hpo_code)
48
- if add_parents == 'root'
49
- # hpo_parent_codes = hpo.get_parents(hpo_code)
50
- hpo_parent_codes = hpo.get_ancestors(hpo_code)
51
- hpo_parent_codes.each do |parent_code|
52
- add_record(hpo_count, parent_code, patient)
53
- add_record(patient2phenotype, patient, parent_code)
54
- end
55
- end
56
- end
57
-
58
- def build_tripartite_network(patients2hpo, hpo_stats, ic_threshold, patients_by_cluster)
18
+ def build_tripartite_network(patient_data, patients_by_cluster, add_parents, ont)
59
19
  tripartite_network = []
60
20
  patients_by_cluster.each do |patient, node_ids|
61
21
  node_ids.each do |node_id|
62
22
  tripartite_network << [node_id, patient]
63
23
  end
64
24
  end
65
- patients_list = patients_by_cluster.keys
66
- patients2hpo.each do |patient, code|
67
- if patients_list.include?(patient)
68
- code.each do |c|
69
- tripartite_network << [c, patient] if hpo_stats[c].last >= ic_threshold
70
- end
71
- end
25
+ patient_data.each_profile do |id, profile|
26
+ profile = profile.map{|term| ont.get_ancestors(term)}.flatten.uniq if add_parents == 'root'
27
+ profile.each do |term|
28
+ tripartite_network << [term, id]
29
+ end
72
30
  end
73
31
  return tripartite_network
74
32
  end
75
33
 
76
- def compute_hpo_stats(hpo_count, patient_number)
77
- hpo_stats = {}
78
- patient_hpo_ic = []
79
- hpo_count.each do |hpo_code, patient_ids|
80
- hpo_freq = patient_ids.length.fdiv(patient_number) #hpo frequency in patients
81
- hpo_ic = -Math.log10(hpo_freq)
82
- hpo_stats[hpo_code] = [hpo_freq, hpo_ic]
83
- patient_ids.each do |patient_id|
84
- patient_hpo_ic << [patient_id, hpo_code, hpo_ic]
85
- end
86
- end
87
- return hpo_stats, patient_hpo_ic.sort{|a,b| a.first <=> b.first}
88
- end
89
-
90
- def write_hash(hash, file_path, header = [])
91
- File.open(file_path, 'w') do |handler|
92
- handler.puts header.join("\t") if !header.empty?
93
- hash.each do |key, array|
94
- handler.puts "#{key}\t#{array.join("\t")}"
95
- end
96
- end
97
- end
98
-
99
- def write_array(array, file_path)
100
- File.open(file_path, 'w') do |handler|
101
- array.each do |record|
102
- if record.class == String
103
- line = record
104
- else
105
- line = record.join("\t")
106
- end
107
- handler.puts line
108
- end
109
- end
110
- end
111
-
112
34
  ##############################
113
35
  #OPTPARSE
114
36
  ##############################
@@ -117,33 +39,75 @@ options = {}
117
39
  OptionParser.new do |opts|
118
40
  opts.banner = "Usage: #{__FILE__} [options]"
119
41
 
120
- options[:cluster_file] = 'cluster_coords.txt'
121
- opts.on("-c", "--cluster_file PATH", "Cluster coords output file that will be used to translate SOR nodes") do |value|
122
- options[:cluster_file] = File.basename(value)
123
- end
42
+ options[:chromosome_col] = nil
43
+ opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
44
+ options[:chromosome_col] = data
45
+ end
124
46
 
125
- options[:excluded_hpo] = nil
126
- opts.on("-e", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
127
- options[:excluded_hpo] = excluded_hpo
47
+ options[:id_col] = nil
48
+ opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
49
+ options[:id_col] = data
128
50
  end
129
51
 
130
- options[:patient_file] = nil
131
- opts.on("-i", "--input_file PATH", "Input file with patients for parsing phenotypes to HPO codes") do |value|
132
- options[:patient_file] = value
52
+ options[:end_col] = nil
53
+ opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
54
+ options[:end_col] = data
55
+ end
56
+
57
+ options[:ont_col] = nil
58
+ opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
59
+ options[:ont_col] = data
133
60
  end
134
61
 
135
- options[:mutation_type] = 'A'
136
- opts.on("-m", "--mutation_type STRING", "Type of patient mutation, either it is a deletion (d) or duplication (D)") do |type|
137
- options[:mutation_type] = type
62
+ options[:start_col] = nil
63
+ opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
64
+ options[:start_col] = data
65
+ end
66
+
67
+ options[:separator] = '|'
68
+ opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
69
+ options[:separator] = data
70
+ end
71
+
72
+ options[:names] = false
73
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
74
+ options[:names] = true
75
+ end
76
+
77
+ options[:header] = true
78
+ opts.on("-H", "--header", "File has a line header. Default true") do
79
+ options[:header] = false
80
+ end
81
+
82
+ #===================================================================
83
+
84
+ options[:input_file] = nil
85
+ opts.on("-i", "--input_file PATH", "Input file with patients for parsing phenotypes to HPO codes") do |value|
86
+ options[:input_file] = value
138
87
  end
139
88
 
140
89
  options[:output_file] = 'tripartite_network.txt'
141
90
  opts.on("-o", "--output_file PATH", "Output file for the tripartite network") do |value|
142
91
  options[:output_file] = value
92
+ end
93
+
94
+ options[:cluster_file] = 'cluster_coords.txt'
95
+ opts.on("-u", "--cluster_file PATH", "Cluster coords output file that will be used to translate SOR nodes") do |value|
96
+ options[:cluster_file] = File.basename(value)
143
97
  end
144
98
 
99
+ options[:excluded_hpo] = nil
100
+ opts.on("-x", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
101
+ options[:excluded_hpo] = excluded_hpo
102
+ end
103
+
104
+ options[:tag] = 'A'
105
+ opts.on("-m", "--mutation_type STRING", "Type of patient mutation, either it is a deletion (d) or duplication (D)") do |type|
106
+ options[:tag] = type
107
+ end
108
+
145
109
  options[:hpo_file] = nil
146
- opts.on("-p", "--hpo_file PATH", "Input HPO file for extracting HPO codes") do |value|
110
+ opts.on("-O", "--hpo_file PATH", "Input HPO file for extracting HPO codes") do |value|
147
111
  options[:hpo_file] = value
148
112
  end
149
113
 
@@ -152,16 +116,6 @@ OptionParser.new do |opts|
152
116
  options[:add_parents] = value
153
117
  end
154
118
 
155
- options[:hpo_stat_file] = 'hpo_stats.txt'
156
- opts.on("-s", "--hpo_stat_file PATH", "Output file with HPO codes, their frequency and CI") do |value|
157
- options[:hpo_stat_file] = File.basename(value)
158
- end
159
-
160
- options[:thresold] = 0
161
- opts.on("-t", "--info_thresold FLOAT", "IC thresold to discard non informative hpo. Default: 0.") do |thresold|
162
- options[:thresold] = thresold.to_f
163
- end
164
-
165
119
  opts.on_tail("-h", "--help", "Show this message") do
166
120
  puts opts
167
121
  exit
@@ -176,27 +130,20 @@ output_folder = File.dirname(File.expand_path(options[:output_file]))
176
130
  Dir.mkdir(output_folder) if !File.exists?(output_folder)
177
131
 
178
132
  hpo_file = options[:hpo_file]
179
- hpo_file = ENV['hpo_file'] if hpo_file.nil?
180
- hpo_file = HPO_FILE if hpo_file.nil?
181
-
182
- # hpo = Ontology.new
183
- # hpo.load_black_list(options[:excluded_hpo]) if !options[:excluded_hpo].nil?
184
- # hpo.load_data(hpo_file)
185
- if !options[:excluded_hpo].nil?
186
- hpo = Ontology.new(file: hpo_file, load_file: true, removable_terms: read_excluded_hpo_file(options[:excluded_hpo]))
187
- else
188
- hpo = Ontology.new(file: hpo_file, load_file: true)
189
- end
190
- patients2hpo, hpo_count, not_found, chr_patients_genomic_region = loadPatientFile(options[:patient_file], hpo, options[:add_parents])
191
-
192
- hpo_stats, patient_hpo_ic = compute_hpo_stats(hpo_count, patients2hpo.length)
193
- patients_by_cluster, sors = generate_cluster_regions(chr_patients_genomic_region, options[:mutation_type])
194
-
195
- tripartite_network = build_tripartite_network(patients2hpo, hpo_stats, options[:thresold], patients_by_cluster)
196
-
197
- # write_array(not_found - hpo.excluded_codes, File.join(output_folder, 'missing_hpo_names'))
198
- write_array(not_found - hpo.removable_terms, File.join(output_folder, 'missing_hpo_names'))
133
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE if hpo_file.nil?
134
+ Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
135
+ Cohort.act_ont = :hpo
136
+ hpo = Cohort.get_ontology(Cohort.act_ont)
137
+
138
+ patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
139
+ rejected_hpos_C, rejected_patients_C = patient_data.check
140
+ rejected_hpos = rejected_hpos_L | rejected_hpos_C
141
+ rejected_patients = rejected_patients_L + rejected_patients_C
142
+ patient_data.remove_incomplete_records
143
+ patient_data.index_vars
144
+ patients_by_cluster, sors = patient_data.generate_cluster_regions(:reg_overlap, options[:tag], 1)
145
+ tripartite_network = build_tripartite_network(patient_data, patients_by_cluster, options[:add_parents], hpo)
146
+
147
+ write_array(rejected_hpos, File.join(output_folder, 'missing_hpo_names'))
199
148
  write_array(sors, File.join(output_folder, options[:cluster_file]))
200
- write_hash(hpo_stats.select{|hp_code, stats| stats.last > options[:thresold]}, File.join(output_folder, options[:hpo_stat_file]), %w[HPOcode Frequency IC])
201
- write_array(tripartite_network, options[:output_file])
202
- write_array(patient_hpo_ic, File.join(output_folder, 'filtered_hpo.txt'))
149
+ write_array(tripartite_network, options[:output_file])
@@ -1,33 +1,11 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
-
4
3
  ROOT_PATH = File.dirname(__FILE__)
5
- REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
6
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
7
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
8
4
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
5
 
10
6
  require 'optparse'
11
7
  require 'report_html'
12
- require 'semtools'
13
- require 'generalMethods.rb'
14
-
15
- #############################################################################################
16
- ## METHODS
17
- ############################################################################################
18
- def procces_patient_data(patient_data, hpo)
19
- clean_profiles = {}
20
- all_hpo = []
21
- patient_data.each do |pat_id, data|
22
- profile = hpo.clean_profile_hard(data.first.map{|c| c.to_sym})
23
- if !profile.empty?
24
- clean_profiles[pat_id] = profile
25
- all_hpo.concat(profile)
26
- end
27
- end
28
- ref_prof = hpo.clean_profile_hard(all_hpo.uniq)
29
- return ref_prof, clean_profiles
30
- end
8
+ require 'pets'
31
9
 
32
10
  #############################################################################################
33
11
  ## OPTPARSE
@@ -42,9 +20,9 @@ OptionParser.new do |opts|
42
20
  options[:chromosome_col] = data
43
21
  end
44
22
 
45
- options[:pat_id_col] = nil
23
+ options[:id_col] = nil
46
24
  opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
47
- options[:pat_id_col] = data
25
+ options[:id_col] = data
48
26
  end
49
27
 
50
28
  options[:end_col] = nil
@@ -67,9 +45,9 @@ OptionParser.new do |opts|
67
45
  options[:input_file] = value
68
46
  end
69
47
 
70
- options[:hpo_col] = nil
48
+ options[:ont_col] = nil
71
49
  opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
72
- options[:hpo_col] = data
50
+ options[:ont_col] = data
73
51
  end
74
52
 
75
53
  options[:start_col] = nil
@@ -77,9 +55,14 @@ OptionParser.new do |opts|
77
55
  options[:start_col] = data
78
56
  end
79
57
 
80
- options[:hpo_separator] = '|'
58
+ options[:separator] = '|'
81
59
  opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
82
- options[:hpo_separator] = data
60
+ options[:separator] = data
61
+ end
62
+
63
+ options[:term_freq] = 0
64
+ opts.on("-f", "--general_prof_freq INTEGER", "When reference profile is not given, a general ine is computed with all profiles. If a freq is defined (0-1), all terms with freq minor than limit are removed") do |data|
65
+ options[:term_freq] = data.to_i
83
66
  end
84
67
 
85
68
  options[:matrix_limits] = [20, 40]
@@ -101,15 +84,21 @@ end.parse!
101
84
  #############################################################################################
102
85
  ## MAIN
103
86
  ############################################################################################
104
- patient_data = load_patient_cohort(options)
105
87
 
106
88
  hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
107
- hpo = Ontology.new
108
- hpo.read(hpo_file)
109
-
110
- ref_profile, clean_profiles = procces_patient_data(patient_data, hpo)
111
- ref_profile = hpo.clean_profile_hard(options[:ref_prof]) if !options[:ref_prof].nil?
112
- hpo.load_profiles({ref: ref_profile})
89
+ Cohort.load_ontology(:hpo, hpo_file)
90
+ Cohort.act_ont = :hpo
91
+ hpo = Cohort.get_ontology(Cohort.act_ont)
92
+ patient_data, _, _ = Cohort_Parser.load(options)
93
+ patient_data.check(hard=true)
94
+
95
+ clean_profiles = patient_data.profiles
96
+ if !options[:ref_prof].nil?
97
+ ref_profile = hpo.clean_profile_hard(options[:ref_prof])
98
+ else
99
+ ref_profile = patient_data.get_general_profile(options[:term_freq])
100
+ end
101
+ hpo.load_profiles({ref: ref_profile}, reset_stored: true)
113
102
 
114
103
  similarities = hpo.compare_profiles(external_profiles: clean_profiles, sim_type: :lin, bidirectional: false)
115
104
 
@@ -0,0 +1,7 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code')
5
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
6
+
7
+ system_call(EXTERNAL_CODE, 'install_R_dependencies.R')