pets 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,166 +1,63 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
3
  ROOT_PATH = File.dirname(__FILE__)
4
- REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
5
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
6
- EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code'))
7
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
8
4
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
5
 
10
6
  require 'fileutils'
11
7
  require 'optparse'
12
8
  require 'report_html'
13
9
  require 'semtools'
14
- require 'generalMethods.rb'
15
-
16
-
17
- class Report_html
18
- def circular_genome(user_options = {}, &block)
19
- default_options = {}.merge!(user_options)
20
- coordinates = user_options[:genomic_coordinates]
21
- html_string = canvasXpress_main(default_options, block) do |options, config, samples, vars, values, object_id, x, z|
22
- config['graphType'] = 'Circular'
23
- config["arcSegmentsSeparation"] = 3
24
- config["colorScheme"] = "Tableau"
25
- config["colors"] = ["#332288","#6699CC","#88CCEE","#44AA99","#117733","#999933","#DDCC77","#661100","#CC6677","#AA4466","#882255","#AA4499"]
26
- config["showIdeogram"] = true
27
- chr = []
28
- pos = []
29
- tags2remove = []
30
- vars.each_with_index do |var, i|
31
- coord = coordinates[var]
32
- if !coord.nil?
33
- tag = coord.first.gsub(/[^\dXY]/,'')
34
- if tag == 'X' || tag == 'Y' || (tag.to_i > 0 && tag.to_i <= 22)
35
- chr << coord.first.gsub(/[^\dXY]/,'')
36
- pos << coord.last - 1
37
- else
38
- tags2remove << i
39
- end
40
- else
41
- tags2remove << i
42
- end
43
- end
44
- tags2remove.reverse_each{|i| ent = vars.delete_at(i); warn("Feature #{ent} has not valid coordinates")} # Remove entities with invalid coordinates
45
- z['chr'] = chr
46
- z['pos'] = pos
47
- end
48
- return html_string
49
- end
50
- end
10
+ require 'pets'
51
11
 
52
12
  #############################################################################################
53
13
  ## METHODS
54
14
  ############################################################################################
55
- def load_profiles(file_path, hpo)
56
- profiles = {}
57
- #count = 0
58
- File.open(file_path).each do |line|
59
- id, profile = line.chomp.split("\t")
60
- hpos = profile.split(',').map{|a| a.to_sym}
61
- hpos, rejected_hpos = hpo.check_ids(hpos)
62
- if !hpos.empty?
63
- hpos = hpo.clean_profile(hpos)
64
- profiles[id] = hpos if !hpos.empty?
65
- end
66
- end
67
- return profiles
68
- end
69
-
70
- def load_variants(variant_folder)
71
- variants = {}
72
- coordinates = {}
73
- count = 0
74
- all_vars = {}
75
- Dir.glob(File.join(variant_folder, '*.tab')).each do |path|
76
- profile_id = File.basename(path, '.tab')
77
- vars = {}
78
- File.open(path).each do |line|
79
- fields = line.chomp.split("\t")
80
- chr = fields[0]
81
- start = fields[1].to_i
82
- query = coordinates[chr]
83
- if query.nil?
84
- coordinates[chr] = [start]
85
- count += 1
86
- id = "var_#{count}"
87
- else
88
- if !query.include?(start)
89
- query << start
90
- count += 1
91
- id = "var_#{count}"
92
- else
93
- id = all_vars.key([chr, start])
94
- end
95
- end
96
- vars[id] = [chr, start]
97
- end
98
- all_vars.merge!(vars)
99
- variants[profile_id] = vars
100
- end
101
- return variants
102
- end
103
-
104
- def load_evidences(evidences_path, hpo)
105
- genomic_coordinates = {}
106
- coord_files = Dir.glob(File.join(evidences_path, '*.coords'))
107
- coord_files.each do |cd_f|
108
- entity = File.basename(cd_f, '.coords')
109
- coordinates = load_coordinates(cd_f)
110
- genomic_coordinates[entity] = coordinates
111
- end
112
- evidences = {}
113
- evidence_files = Dir.glob(File.join(evidences_path, '*_HP.txt'))
114
- evidence_files.each do |e_f|
115
- pair = File.basename(e_f, '.txt')
116
- profiles, id2label = load_evidence_profiles(e_f, hpo)
117
- evidences[pair] = {prof: profiles, id2lab: id2label}
118
- end
119
- return evidences, genomic_coordinates
15
+ def get_evidence_coordinates(entity, genomic_coordinates, candidates_ids)
16
+ all_coordinates = genomic_coordinates[entity]
17
+ coords = all_coordinates.select{|id, coordinates| candidates_ids.include?(id.to_sym)}
18
+ return coords
120
19
  end
121
20
 
122
- def load_coordinates(file_path)
123
- coordinates = {}
124
- header = true
125
- File.open(file_path).each do |line|
126
- fields = line.chomp.split("\t")
127
- if header
128
- header = false
129
- else
130
- entity, chr, strand, start, stop = fields
131
- coordinates[entity] = [chr, start.to_i, stop.to_i, strand]
132
- end
133
- end
134
- return coordinates
21
+ def make_report(profile_id, all_candidates, all_genomic_coordinates, similarity_matrixs, evidences, prof_vars, template, output)
22
+ var_ids, var_coors = format_variants4report(prof_vars)
23
+ container = {
24
+ profile_id: profile_id,
25
+ candidates: all_candidates.each{|c| c[0] = c.first.to_s},
26
+ genomic_coordinates: all_genomic_coordinates.transform_values{|c| c.first(2) },
27
+ similarity_matrixs: similarity_matrixs,
28
+ evidences: evidences,
29
+ var_ids: var_ids,
30
+ var_coordinates: var_coors
31
+ }
32
+ report = Report_html.new(container, 'Evidence profile report')
33
+ report.build(template)
34
+ report.write(File.join(output, profile_id.to_s + '.html'))
135
35
  end
136
36
 
137
- def load_evidence_profiles(file_path, hpo)
138
- profiles = {}
139
- id2label = {}
140
- #count = 0
141
- File.open(file_path).each do |line|
142
- id, label, profile = line.chomp.split("\t")
143
- hpos = profile.split(',').map{|a| a.to_sym}
144
- hpos, rejected_hpos = hpo.check_ids(hpos)
145
- if !hpos.empty?
146
- hpos = hpo.clean_profile(hpos)
147
- profiles[id] = hpos if !hpos.empty?
148
- id2label[id] = label
37
+ def format_variants4report(var_data)
38
+ if var_data.nil?
39
+ var_ids, var_coors = nil
40
+ else
41
+ var_ids = []
42
+ var_coors = {}
43
+ count = 0
44
+ var_data.each do |chr, reg|
45
+ var_id = "var_#{count}"
46
+ var_ids << [var_id, 0]
47
+ var_coors[var_id] = [chr.to_s, reg[:start]]
48
+ count += 1
149
49
  end
150
50
  end
151
- return profiles, id2label
51
+ return var_ids, var_coors
152
52
  end
153
53
 
154
-
155
-
156
- def get_evidence_coordinates(entity, genomic_coordinates, candidates_ids)
157
- all_coordinates = genomic_coordinates[entity]
158
- coords = all_coordinates.select{|id, coordinates| candidates_ids.include?(id.to_sym)}
159
- return coords
54
+ def get_genome_hotspots(similarity_matrixs, all_genomic_coordinates)
55
+ regions = Genomic_Feature.new(all_genomic_coordinates.values.map{|g| g[0..2]})
56
+ candidates_by_window, genome_windows = regions.generate_cluster_regions(:reg_overlap, 'A', 1)
57
+ # TODO: COMPLETE UNTIL FULL PREDICTOR
160
58
  end
161
59
 
162
60
 
163
-
164
61
  #############################################################################################
165
62
  ## OPTPARSE
166
63
  ############################################################################################
@@ -245,17 +142,7 @@ profiles.each do |profile_id, reference_prof|
245
142
  coords = get_evidence_coordinates(entity, genomic_coordinates, candidates_ids)
246
143
  all_genomic_coordinates.merge!(coords)
247
144
  end
145
+ get_genome_hotspots(similarity_matrixs, all_genomic_coordinates)
248
146
  prof_vars = profile_variants[profile_id]
249
- container = {
250
- profile_id: profile_id,
251
- candidates: all_candidates.each{|c| c[0] = c.first.to_s},
252
- genomic_coordinates: all_genomic_coordinates.transform_values{|c| c.first(2) },
253
- similarity_matrixs: similarity_matrixs,
254
- evidences: evidences,
255
- var_ids: prof_vars.nil? ? nil : prof_vars.keys.map{|i| [i, 0]},
256
- var_coordinates: prof_vars
257
- }
258
- report = Report_html.new(container, 'Evidence profile report')
259
- report.build(template)
260
- report.write(File.join(options[:output_folder], profile_id.to_s + '.html'))
147
+ make_report(profile_id, all_candidates, all_genomic_coordinates, similarity_matrixs, evidences, prof_vars, template, options[:output_folder])
261
148
  end
@@ -3,112 +3,34 @@
3
3
  # Code to prepare data to get the associations between pathological phenotypes (HPO) and genomic regions (SOR)
4
4
 
5
5
  ROOT_PATH = File.dirname(__FILE__)
6
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
7
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
8
6
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
7
 
10
8
  ##############################
11
9
  #LIBRARIES
12
10
  ##############################
13
- require 'generalMethods.rb'
14
11
  require 'optparse'
15
- require 'semtools'
12
+ require 'pets'
16
13
 
17
14
  ###############################
18
15
  #METHODS
19
16
  ###############################
20
17
 
21
- def loadPatientFile(patient_file, hpo, add_parents)
22
- patient2phenotype = {}
23
- hpo_count = {}
24
- not_found = []
25
- patients_genomic_region_by_chr = {}
26
- File.open(patient_file).each do |line|
27
- line.chomp!
28
- next if line.include?("#")
29
- patient, chr, start, stop, phenotype_profile = line.split("\t", 5)
30
- next if phenotype_profile.nil? #For skipping patients without phenotypes
31
- phenotypes = phenotype_profile.split('|')
32
- # phenotypes, rejected = hpo.translate_names2codes(phenotypes)
33
- phenotypes, rejected = hpo.translate_names(phenotypes)
34
- not_found = not_found | rejected
35
- phenotypes.each do |hpo_code|
36
- get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents) if !hpo.is_removable(hpo_code)
37
- end
38
- info = [patient, start.to_i, stop.to_i]
39
- add_record(patients_genomic_region_by_chr, chr, info)
40
- end
41
- return patient2phenotype, hpo_count, not_found, patients_genomic_region_by_chr
42
- end
43
-
44
-
45
- def get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents)
46
- add_record(hpo_count, hpo_code, patient)
47
- add_record(patient2phenotype, patient, hpo_code)
48
- if add_parents == 'root'
49
- # hpo_parent_codes = hpo.get_parents(hpo_code)
50
- hpo_parent_codes = hpo.get_ancestors(hpo_code)
51
- hpo_parent_codes.each do |parent_code|
52
- add_record(hpo_count, parent_code, patient)
53
- add_record(patient2phenotype, patient, parent_code)
54
- end
55
- end
56
- end
57
-
58
- def build_tripartite_network(patients2hpo, hpo_stats, ic_threshold, patients_by_cluster)
18
+ def build_tripartite_network(patient_data, patients_by_cluster, add_parents, ont)
59
19
  tripartite_network = []
60
20
  patients_by_cluster.each do |patient, node_ids|
61
21
  node_ids.each do |node_id|
62
22
  tripartite_network << [node_id, patient]
63
23
  end
64
24
  end
65
- patients_list = patients_by_cluster.keys
66
- patients2hpo.each do |patient, code|
67
- if patients_list.include?(patient)
68
- code.each do |c|
69
- tripartite_network << [c, patient] if hpo_stats[c].last >= ic_threshold
70
- end
71
- end
25
+ patient_data.each_profile do |id, profile|
26
+ profile = profile.map{|term| ont.get_ancestors(term)}.flatten.uniq if add_parents == 'root'
27
+ profile.each do |term|
28
+ tripartite_network << [term, id]
29
+ end
72
30
  end
73
31
  return tripartite_network
74
32
  end
75
33
 
76
- def compute_hpo_stats(hpo_count, patient_number)
77
- hpo_stats = {}
78
- patient_hpo_ic = []
79
- hpo_count.each do |hpo_code, patient_ids|
80
- hpo_freq = patient_ids.length.fdiv(patient_number) #hpo frequency in patients
81
- hpo_ic = -Math.log10(hpo_freq)
82
- hpo_stats[hpo_code] = [hpo_freq, hpo_ic]
83
- patient_ids.each do |patient_id|
84
- patient_hpo_ic << [patient_id, hpo_code, hpo_ic]
85
- end
86
- end
87
- return hpo_stats, patient_hpo_ic.sort{|a,b| a.first <=> b.first}
88
- end
89
-
90
- def write_hash(hash, file_path, header = [])
91
- File.open(file_path, 'w') do |handler|
92
- handler.puts header.join("\t") if !header.empty?
93
- hash.each do |key, array|
94
- handler.puts "#{key}\t#{array.join("\t")}"
95
- end
96
- end
97
- end
98
-
99
- def write_array(array, file_path)
100
- File.open(file_path, 'w') do |handler|
101
- array.each do |record|
102
- if record.class == String
103
- line = record
104
- else
105
- line = record.join("\t")
106
- end
107
- handler.puts line
108
- end
109
- end
110
- end
111
-
112
34
  ##############################
113
35
  #OPTPARSE
114
36
  ##############################
@@ -117,33 +39,75 @@ options = {}
117
39
  OptionParser.new do |opts|
118
40
  opts.banner = "Usage: #{__FILE__} [options]"
119
41
 
120
- options[:cluster_file] = 'cluster_coords.txt'
121
- opts.on("-c", "--cluster_file PATH", "Cluster coords output file that will be used to translate SOR nodes") do |value|
122
- options[:cluster_file] = File.basename(value)
123
- end
42
+ options[:chromosome_col] = nil
43
+ opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
44
+ options[:chromosome_col] = data
45
+ end
124
46
 
125
- options[:excluded_hpo] = nil
126
- opts.on("-e", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
127
- options[:excluded_hpo] = excluded_hpo
47
+ options[:id_col] = nil
48
+ opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
49
+ options[:id_col] = data
128
50
  end
129
51
 
130
- options[:patient_file] = nil
131
- opts.on("-i", "--input_file PATH", "Input file with patients for parsing phenotypes to HPO codes") do |value|
132
- options[:patient_file] = value
52
+ options[:end_col] = nil
53
+ opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
54
+ options[:end_col] = data
55
+ end
56
+
57
+ options[:ont_col] = nil
58
+ opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
59
+ options[:ont_col] = data
133
60
  end
134
61
 
135
- options[:mutation_type] = 'A'
136
- opts.on("-m", "--mutation_type STRING", "Type of patient mutation, either it is a deletion (d) or duplication (D)") do |type|
137
- options[:mutation_type] = type
62
+ options[:start_col] = nil
63
+ opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
64
+ options[:start_col] = data
65
+ end
66
+
67
+ options[:separator] = '|'
68
+ opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
69
+ options[:separator] = data
70
+ end
71
+
72
+ options[:names] = false
73
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
74
+ options[:names] = true
75
+ end
76
+
77
+ options[:header] = true
78
+ opts.on("-H", "--header", "File has a line header. Default true") do
79
+ options[:header] = false
80
+ end
81
+
82
+ #===================================================================
83
+
84
+ options[:input_file] = nil
85
+ opts.on("-i", "--input_file PATH", "Input file with patients for parsing phenotypes to HPO codes") do |value|
86
+ options[:input_file] = value
138
87
  end
139
88
 
140
89
  options[:output_file] = 'tripartite_network.txt'
141
90
  opts.on("-o", "--output_file PATH", "Output file for the tripartite network") do |value|
142
91
  options[:output_file] = value
92
+ end
93
+
94
+ options[:cluster_file] = 'cluster_coords.txt'
95
+ opts.on("-u", "--cluster_file PATH", "Cluster coords output file that will be used to translate SOR nodes") do |value|
96
+ options[:cluster_file] = File.basename(value)
143
97
  end
144
98
 
99
+ options[:excluded_hpo] = nil
100
+ opts.on("-x", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
101
+ options[:excluded_hpo] = excluded_hpo
102
+ end
103
+
104
+ options[:tag] = 'A'
105
+ opts.on("-m", "--mutation_type STRING", "Type of patient mutation, either it is a deletion (d) or duplication (D)") do |type|
106
+ options[:tag] = type
107
+ end
108
+
145
109
  options[:hpo_file] = nil
146
- opts.on("-p", "--hpo_file PATH", "Input HPO file for extracting HPO codes") do |value|
110
+ opts.on("-O", "--hpo_file PATH", "Input HPO file for extracting HPO codes") do |value|
147
111
  options[:hpo_file] = value
148
112
  end
149
113
 
@@ -152,16 +116,6 @@ OptionParser.new do |opts|
152
116
  options[:add_parents] = value
153
117
  end
154
118
 
155
- options[:hpo_stat_file] = 'hpo_stats.txt'
156
- opts.on("-s", "--hpo_stat_file PATH", "Output file with HPO codes, their frequency and CI") do |value|
157
- options[:hpo_stat_file] = File.basename(value)
158
- end
159
-
160
- options[:thresold] = 0
161
- opts.on("-t", "--info_thresold FLOAT", "IC thresold to discard non informative hpo. Default: 0.") do |thresold|
162
- options[:thresold] = thresold.to_f
163
- end
164
-
165
119
  opts.on_tail("-h", "--help", "Show this message") do
166
120
  puts opts
167
121
  exit
@@ -176,27 +130,20 @@ output_folder = File.dirname(File.expand_path(options[:output_file]))
176
130
  Dir.mkdir(output_folder) if !File.exists?(output_folder)
177
131
 
178
132
  hpo_file = options[:hpo_file]
179
- hpo_file = ENV['hpo_file'] if hpo_file.nil?
180
- hpo_file = HPO_FILE if hpo_file.nil?
181
-
182
- # hpo = Ontology.new
183
- # hpo.load_black_list(options[:excluded_hpo]) if !options[:excluded_hpo].nil?
184
- # hpo.load_data(hpo_file)
185
- if !options[:excluded_hpo].nil?
186
- hpo = Ontology.new(file: hpo_file, load_file: true, removable_terms: read_excluded_hpo_file(options[:excluded_hpo]))
187
- else
188
- hpo = Ontology.new(file: hpo_file, load_file: true)
189
- end
190
- patients2hpo, hpo_count, not_found, chr_patients_genomic_region = loadPatientFile(options[:patient_file], hpo, options[:add_parents])
191
-
192
- hpo_stats, patient_hpo_ic = compute_hpo_stats(hpo_count, patients2hpo.length)
193
- patients_by_cluster, sors = generate_cluster_regions(chr_patients_genomic_region, options[:mutation_type])
194
-
195
- tripartite_network = build_tripartite_network(patients2hpo, hpo_stats, options[:thresold], patients_by_cluster)
196
-
197
- # write_array(not_found - hpo.excluded_codes, File.join(output_folder, 'missing_hpo_names'))
198
- write_array(not_found - hpo.removable_terms, File.join(output_folder, 'missing_hpo_names'))
133
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE if hpo_file.nil?
134
+ Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
135
+ Cohort.act_ont = :hpo
136
+ hpo = Cohort.get_ontology(Cohort.act_ont)
137
+
138
+ patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
139
+ rejected_hpos_C, rejected_patients_C = patient_data.check
140
+ rejected_hpos = rejected_hpos_L | rejected_hpos_C
141
+ rejected_patients = rejected_patients_L + rejected_patients_C
142
+ patient_data.remove_incomplete_records
143
+ patient_data.index_vars
144
+ patients_by_cluster, sors = patient_data.generate_cluster_regions(:reg_overlap, options[:tag], 1)
145
+ tripartite_network = build_tripartite_network(patient_data, patients_by_cluster, options[:add_parents], hpo)
146
+
147
+ write_array(rejected_hpos, File.join(output_folder, 'missing_hpo_names'))
199
148
  write_array(sors, File.join(output_folder, options[:cluster_file]))
200
- write_hash(hpo_stats.select{|hp_code, stats| stats.last > options[:thresold]}, File.join(output_folder, options[:hpo_stat_file]), %w[HPOcode Frequency IC])
201
- write_array(tripartite_network, options[:output_file])
202
- write_array(patient_hpo_ic, File.join(output_folder, 'filtered_hpo.txt'))
149
+ write_array(tripartite_network, options[:output_file])
@@ -1,33 +1,11 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
-
4
3
  ROOT_PATH = File.dirname(__FILE__)
5
- REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
6
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
7
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
8
4
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
5
 
10
6
  require 'optparse'
11
7
  require 'report_html'
12
- require 'semtools'
13
- require 'generalMethods.rb'
14
-
15
- #############################################################################################
16
- ## METHODS
17
- ############################################################################################
18
- def procces_patient_data(patient_data, hpo)
19
- clean_profiles = {}
20
- all_hpo = []
21
- patient_data.each do |pat_id, data|
22
- profile = hpo.clean_profile_hard(data.first.map{|c| c.to_sym})
23
- if !profile.empty?
24
- clean_profiles[pat_id] = profile
25
- all_hpo.concat(profile)
26
- end
27
- end
28
- ref_prof = hpo.clean_profile_hard(all_hpo.uniq)
29
- return ref_prof, clean_profiles
30
- end
8
+ require 'pets'
31
9
 
32
10
  #############################################################################################
33
11
  ## OPTPARSE
@@ -42,9 +20,9 @@ OptionParser.new do |opts|
42
20
  options[:chromosome_col] = data
43
21
  end
44
22
 
45
- options[:pat_id_col] = nil
23
+ options[:id_col] = nil
46
24
  opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
47
- options[:pat_id_col] = data
25
+ options[:id_col] = data
48
26
  end
49
27
 
50
28
  options[:end_col] = nil
@@ -67,9 +45,9 @@ OptionParser.new do |opts|
67
45
  options[:input_file] = value
68
46
  end
69
47
 
70
- options[:hpo_col] = nil
48
+ options[:ont_col] = nil
71
49
  opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
72
- options[:hpo_col] = data
50
+ options[:ont_col] = data
73
51
  end
74
52
 
75
53
  options[:start_col] = nil
@@ -77,9 +55,14 @@ OptionParser.new do |opts|
77
55
  options[:start_col] = data
78
56
  end
79
57
 
80
- options[:hpo_separator] = '|'
58
+ options[:separator] = '|'
81
59
  opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
82
- options[:hpo_separator] = data
60
+ options[:separator] = data
61
+ end
62
+
63
+ options[:term_freq] = 0
64
+ opts.on("-f", "--general_prof_freq INTEGER", "When reference profile is not given, a general ine is computed with all profiles. If a freq is defined (0-1), all terms with freq minor than limit are removed") do |data|
65
+ options[:term_freq] = data.to_i
83
66
  end
84
67
 
85
68
  options[:matrix_limits] = [20, 40]
@@ -101,15 +84,21 @@ end.parse!
101
84
  #############################################################################################
102
85
  ## MAIN
103
86
  ############################################################################################
104
- patient_data = load_patient_cohort(options)
105
87
 
106
88
  hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
107
- hpo = Ontology.new
108
- hpo.read(hpo_file)
109
-
110
- ref_profile, clean_profiles = procces_patient_data(patient_data, hpo)
111
- ref_profile = hpo.clean_profile_hard(options[:ref_prof]) if !options[:ref_prof].nil?
112
- hpo.load_profiles({ref: ref_profile})
89
+ Cohort.load_ontology(:hpo, hpo_file)
90
+ Cohort.act_ont = :hpo
91
+ hpo = Cohort.get_ontology(Cohort.act_ont)
92
+ patient_data, _, _ = Cohort_Parser.load(options)
93
+ patient_data.check(hard=true)
94
+
95
+ clean_profiles = patient_data.profiles
96
+ if !options[:ref_prof].nil?
97
+ ref_profile = hpo.clean_profile_hard(options[:ref_prof])
98
+ else
99
+ ref_profile = patient_data.get_general_profile(options[:term_freq])
100
+ end
101
+ hpo.load_profiles({ref: ref_profile}, reset_stored: true)
113
102
 
114
103
  similarities = hpo.compare_profiles(external_profiles: clean_profiles, sim_type: :lin, bidirectional: false)
115
104
 
@@ -0,0 +1,7 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code')
5
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
6
+
7
+ system_call(EXTERNAL_CODE, 'install_R_dependencies.R')