pets 0.2.3 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/README.md +79 -5
  4. data/bin/coPatReporter.rb +68 -156
  5. data/bin/comPatMondo.rb +1 -4
  6. data/bin/evidence_profiler.rb +102 -150
  7. data/bin/get_gen_features.rb +146 -0
  8. data/bin/get_network_nodes.rb +79 -132
  9. data/bin/get_sorted_profs.rb +25 -36
  10. data/bin/install_deps.rb +8 -0
  11. data/bin/paco_translator.rb +29 -72
  12. data/bin/phen2reg.rb +1 -4
  13. data/bin/profiles2phenopacket.rb +86 -0
  14. data/bin/reg2phen.rb +1 -3
  15. data/example_datasets/associations_file.txt +757 -0
  16. data/example_datasets/example_patient.txt +6 -0
  17. data/example_datasets/example_patient_hpos.txt +15 -0
  18. data/example_datasets/genes.txt +8 -0
  19. data/example_datasets/hpo2ci.txt +2798 -0
  20. data/example_datasets/hummu_congenital_full_dataset.txt +4183 -0
  21. data/example_datasets/launch.sh +20 -0
  22. data/external_code/generate_boxpot.R +51 -21
  23. data/external_code/get_clusters.R +2 -2
  24. data/external_code/install_R_dependencies.R +16 -0
  25. data/external_code/plot_heatmap.R +34 -30
  26. data/lib/pets/coPatReporterMethods.rb +172 -424
  27. data/lib/pets/cohort.rb +309 -0
  28. data/lib/pets/common_optparse.rb +30 -0
  29. data/lib/pets/constants.rb +8 -0
  30. data/lib/pets/generalMethods.rb +29 -319
  31. data/lib/pets/genomic_features.rb +240 -0
  32. data/lib/pets/io.rb +481 -0
  33. data/lib/pets/parsers/cohort_parser.rb +111 -0
  34. data/lib/pets/parsers/reference_parser.rb +39 -0
  35. data/lib/pets/version.rb +1 -1
  36. data/lib/pets.rb +9 -0
  37. data/pets.gemspec +7 -3
  38. data/templates/cluster_report.erb +25 -5
  39. data/templates/cohort_report.erb +5 -7
  40. data/templates/evidence_profile.erb +20 -4
  41. data/templates/patient_report.erb +1 -1
  42. metadata +96 -5
@@ -3,112 +3,34 @@
3
3
  # Code to prepare data to get the associations between pathological phenotypes (HPO) and genomic regions (SOR)
4
4
 
5
5
  ROOT_PATH = File.dirname(__FILE__)
6
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
7
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
8
6
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
7
 
10
8
  ##############################
11
9
  #LIBRARIES
12
10
  ##############################
13
- require 'generalMethods.rb'
14
11
  require 'optparse'
15
- require 'semtools'
12
+ require 'pets'
16
13
 
17
14
  ###############################
18
15
  #METHODS
19
16
  ###############################
20
17
 
21
- def loadPatientFile(patient_file, hpo, add_parents)
22
- patient2phenotype = {}
23
- hpo_count = {}
24
- not_found = []
25
- patients_genomic_region_by_chr = {}
26
- File.open(patient_file).each do |line|
27
- line.chomp!
28
- next if line.include?("#")
29
- patient, chr, start, stop, phenotype_profile = line.split("\t", 5)
30
- next if phenotype_profile.nil? #For skipping patients without phenotypes
31
- phenotypes = phenotype_profile.split('|')
32
- # phenotypes, rejected = hpo.translate_names2codes(phenotypes)
33
- phenotypes, rejected = hpo.translate_names(phenotypes)
34
- not_found = not_found | rejected
35
- phenotypes.each do |hpo_code|
36
- get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents) if !hpo.is_removable(hpo_code)
37
- end
38
- info = [patient, start.to_i, stop.to_i]
39
- add_record(patients_genomic_region_by_chr, chr, info)
40
- end
41
- return patient2phenotype, hpo_count, not_found, patients_genomic_region_by_chr
42
- end
43
-
44
-
45
- def get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents)
46
- add_record(hpo_count, hpo_code, patient)
47
- add_record(patient2phenotype, patient, hpo_code)
48
- if add_parents == 'root'
49
- # hpo_parent_codes = hpo.get_parents(hpo_code)
50
- hpo_parent_codes = hpo.get_ancestors(hpo_code)
51
- hpo_parent_codes.each do |parent_code|
52
- add_record(hpo_count, parent_code, patient)
53
- add_record(patient2phenotype, patient, parent_code)
54
- end
55
- end
56
- end
57
-
58
- def build_tripartite_network(patients2hpo, hpo_stats, ic_threshold, patients_by_cluster)
18
+ def build_tripartite_network(patient_data, patients_by_cluster, add_parents, ont)
59
19
  tripartite_network = []
60
20
  patients_by_cluster.each do |patient, node_ids|
61
21
  node_ids.each do |node_id|
62
22
  tripartite_network << [node_id, patient]
63
23
  end
64
24
  end
65
- patients_list = patients_by_cluster.keys
66
- patients2hpo.each do |patient, code|
67
- if patients_list.include?(patient)
68
- code.each do |c|
69
- tripartite_network << [c, patient] if hpo_stats[c].last >= ic_threshold
70
- end
71
- end
25
+ patient_data.each_profile do |id, profile|
26
+ profile = profile.map{|term| ont.get_ancestors(term)}.flatten.uniq if add_parents == 'root'
27
+ profile.each do |term|
28
+ tripartite_network << [term, id]
29
+ end
72
30
  end
73
31
  return tripartite_network
74
32
  end
75
33
 
76
- def compute_hpo_stats(hpo_count, patient_number)
77
- hpo_stats = {}
78
- patient_hpo_ic = []
79
- hpo_count.each do |hpo_code, patient_ids|
80
- hpo_freq = patient_ids.length.fdiv(patient_number) #hpo frequency in patients
81
- hpo_ic = -Math.log10(hpo_freq)
82
- hpo_stats[hpo_code] = [hpo_freq, hpo_ic]
83
- patient_ids.each do |patient_id|
84
- patient_hpo_ic << [patient_id, hpo_code, hpo_ic]
85
- end
86
- end
87
- return hpo_stats, patient_hpo_ic.sort{|a,b| a.first <=> b.first}
88
- end
89
-
90
- def write_hash(hash, file_path, header = [])
91
- File.open(file_path, 'w') do |handler|
92
- handler.puts header.join("\t") if !header.empty?
93
- hash.each do |key, array|
94
- handler.puts "#{key}\t#{array.join("\t")}"
95
- end
96
- end
97
- end
98
-
99
- def write_array(array, file_path)
100
- File.open(file_path, 'w') do |handler|
101
- array.each do |record|
102
- if record.class == String
103
- line = record
104
- else
105
- line = record.join("\t")
106
- end
107
- handler.puts line
108
- end
109
- end
110
- end
111
-
112
34
  ##############################
113
35
  #OPTPARSE
114
36
  ##############################
@@ -117,33 +39,75 @@ options = {}
117
39
  OptionParser.new do |opts|
118
40
  opts.banner = "Usage: #{__FILE__} [options]"
119
41
 
120
- options[:cluster_file] = 'cluster_coords.txt'
121
- opts.on("-c", "--cluster_file PATH", "Cluster coords output file that will be used to translate SOR nodes") do |value|
122
- options[:cluster_file] = File.basename(value)
123
- end
42
+ options[:chromosome_col] = nil
43
+ opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
44
+ options[:chromosome_col] = data
45
+ end
124
46
 
125
- options[:excluded_hpo] = nil
126
- opts.on("-e", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
127
- options[:excluded_hpo] = excluded_hpo
47
+ options[:id_col] = nil
48
+ opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
49
+ options[:id_col] = data
128
50
  end
129
51
 
130
- options[:patient_file] = nil
131
- opts.on("-i", "--input_file PATH", "Input file with patients for parsing phenotypes to HPO codes") do |value|
132
- options[:patient_file] = value
52
+ options[:end_col] = nil
53
+ opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
54
+ options[:end_col] = data
55
+ end
56
+
57
+ options[:ont_col] = nil
58
+ opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
59
+ options[:ont_col] = data
133
60
  end
134
61
 
135
- options[:mutation_type] = 'A'
136
- opts.on("-m", "--mutation_type STRING", "Type of patient mutation, either it is a deletion (d) or duplication (D)") do |type|
137
- options[:mutation_type] = type
62
+ options[:start_col] = nil
63
+ opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
64
+ options[:start_col] = data
65
+ end
66
+
67
+ options[:separator] = '|'
68
+ opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
69
+ options[:separator] = data
70
+ end
71
+
72
+ options[:names] = false
73
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
74
+ options[:names] = true
75
+ end
76
+
77
+ options[:header] = true
78
+ opts.on("-H", "--header", "File has a line header. Default true") do
79
+ options[:header] = false
80
+ end
81
+
82
+ #===================================================================
83
+
84
+ options[:input_file] = nil
85
+ opts.on("-i", "--input_file PATH", "Input file with patients for parsing phenotypes to HPO codes") do |value|
86
+ options[:input_file] = value
138
87
  end
139
88
 
140
89
  options[:output_file] = 'tripartite_network.txt'
141
90
  opts.on("-o", "--output_file PATH", "Output file for the tripartite network") do |value|
142
91
  options[:output_file] = value
92
+ end
93
+
94
+ options[:cluster_file] = 'cluster_coords.txt'
95
+ opts.on("-u", "--cluster_file PATH", "Cluster coords output file that will be used to translate SOR nodes") do |value|
96
+ options[:cluster_file] = File.basename(value)
143
97
  end
144
98
 
99
+ options[:excluded_hpo] = nil
100
+ opts.on("-x", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
101
+ options[:excluded_hpo] = excluded_hpo
102
+ end
103
+
104
+ options[:tag] = 'A'
105
+ opts.on("-m", "--mutation_type STRING", "Type of patient mutation, either it is a deletion (d) or duplication (D)") do |type|
106
+ options[:tag] = type
107
+ end
108
+
145
109
  options[:hpo_file] = nil
146
- opts.on("-p", "--hpo_file PATH", "Input HPO file for extracting HPO codes") do |value|
110
+ opts.on("-O", "--hpo_file PATH", "Input HPO file for extracting HPO codes") do |value|
147
111
  options[:hpo_file] = value
148
112
  end
149
113
 
@@ -152,16 +116,6 @@ OptionParser.new do |opts|
152
116
  options[:add_parents] = value
153
117
  end
154
118
 
155
- options[:hpo_stat_file] = 'hpo_stats.txt'
156
- opts.on("-s", "--hpo_stat_file PATH", "Output file with HPO codes, their frequency and CI") do |value|
157
- options[:hpo_stat_file] = File.basename(value)
158
- end
159
-
160
- options[:thresold] = 0
161
- opts.on("-t", "--info_thresold FLOAT", "IC thresold to discard non informative hpo. Default: 0.") do |thresold|
162
- options[:thresold] = thresold.to_f
163
- end
164
-
165
119
  opts.on_tail("-h", "--help", "Show this message") do
166
120
  puts opts
167
121
  exit
@@ -176,27 +130,20 @@ output_folder = File.dirname(File.expand_path(options[:output_file]))
176
130
  Dir.mkdir(output_folder) if !File.exists?(output_folder)
177
131
 
178
132
  hpo_file = options[:hpo_file]
179
- hpo_file = ENV['hpo_file'] if hpo_file.nil?
180
- hpo_file = HPO_FILE if hpo_file.nil?
181
-
182
- # hpo = Ontology.new
183
- # hpo.load_black_list(options[:excluded_hpo]) if !options[:excluded_hpo].nil?
184
- # hpo.load_data(hpo_file)
185
- if !options[:excluded_hpo].nil?
186
- hpo = Ontology.new(file: hpo_file, load_file: true, removable_terms: read_excluded_hpo_file(options[:excluded_hpo]))
187
- else
188
- hpo = Ontology.new(file: hpo_file, load_file: true)
189
- end
190
- patients2hpo, hpo_count, not_found, chr_patients_genomic_region = loadPatientFile(options[:patient_file], hpo, options[:add_parents])
191
-
192
- hpo_stats, patient_hpo_ic = compute_hpo_stats(hpo_count, patients2hpo.length)
193
- patients_by_cluster, sors = generate_cluster_regions(chr_patients_genomic_region, options[:mutation_type])
194
-
195
- tripartite_network = build_tripartite_network(patients2hpo, hpo_stats, options[:thresold], patients_by_cluster)
196
-
197
- # write_array(not_found - hpo.excluded_codes, File.join(output_folder, 'missing_hpo_names'))
198
- write_array(not_found - hpo.removable_terms, File.join(output_folder, 'missing_hpo_names'))
133
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE if hpo_file.nil?
134
+ Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
135
+ Cohort.act_ont = :hpo
136
+ hpo = Cohort.get_ontology(Cohort.act_ont)
137
+
138
+ patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
139
+ rejected_hpos_C, rejected_patients_C = patient_data.check
140
+ rejected_hpos = rejected_hpos_L | rejected_hpos_C
141
+ rejected_patients = rejected_patients_L + rejected_patients_C
142
+ patient_data.remove_incomplete_records
143
+ patient_data.index_vars
144
+ patients_by_cluster, sors = patient_data.generate_cluster_regions(:reg_overlap, options[:tag], 1)
145
+ tripartite_network = build_tripartite_network(patient_data, patients_by_cluster, options[:add_parents], hpo)
146
+
147
+ write_array(rejected_hpos, File.join(output_folder, 'missing_hpo_names'))
199
148
  write_array(sors, File.join(output_folder, options[:cluster_file]))
200
- write_hash(hpo_stats.select{|hp_code, stats| stats.last > options[:thresold]}, File.join(output_folder, options[:hpo_stat_file]), %w[HPOcode Frequency IC])
201
- write_array(tripartite_network, options[:output_file])
202
- write_array(patient_hpo_ic, File.join(output_folder, 'filtered_hpo.txt'))
149
+ write_array(tripartite_network, options[:output_file])
@@ -1,33 +1,11 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
-
4
3
  ROOT_PATH = File.dirname(__FILE__)
5
- REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
6
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
7
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
8
4
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
5
 
10
6
  require 'optparse'
11
7
  require 'report_html'
12
- require 'semtools'
13
- require 'generalMethods.rb'
14
-
15
- #############################################################################################
16
- ## METHODS
17
- ############################################################################################
18
- def procces_patient_data(patient_data, hpo)
19
- clean_profiles = {}
20
- all_hpo = []
21
- patient_data.each do |pat_id, data|
22
- profile = hpo.clean_profile_hard(data.first.map{|c| c.to_sym})
23
- if !profile.empty?
24
- clean_profiles[pat_id] = profile
25
- all_hpo.concat(profile)
26
- end
27
- end
28
- ref_prof = hpo.clean_profile_hard(all_hpo.uniq)
29
- return ref_prof, clean_profiles
30
- end
8
+ require 'pets'
31
9
 
32
10
  #############################################################################################
33
11
  ## OPTPARSE
@@ -42,9 +20,9 @@ OptionParser.new do |opts|
42
20
  options[:chromosome_col] = data
43
21
  end
44
22
 
45
- options[:pat_id_col] = nil
23
+ options[:id_col] = nil
46
24
  opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
47
- options[:pat_id_col] = data
25
+ options[:id_col] = data
48
26
  end
49
27
 
50
28
  options[:end_col] = nil
@@ -67,9 +45,9 @@ OptionParser.new do |opts|
67
45
  options[:input_file] = value
68
46
  end
69
47
 
70
- options[:hpo_col] = nil
48
+ options[:ont_col] = nil
71
49
  opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
72
- options[:hpo_col] = data
50
+ options[:ont_col] = data
73
51
  end
74
52
 
75
53
  options[:start_col] = nil
@@ -77,9 +55,14 @@ OptionParser.new do |opts|
77
55
  options[:start_col] = data
78
56
  end
79
57
 
80
- options[:hpo_separator] = '|'
58
+ options[:separator] = '|'
81
59
  opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
82
- options[:hpo_separator] = data
60
+ options[:separator] = data
61
+ end
62
+
63
+ options[:term_freq] = 0
64
+ opts.on("-f", "--general_prof_freq INTEGER", "When reference profile is not given, a general ine is computed with all profiles. If a freq is defined (0-1), all terms with freq minor than limit are removed") do |data|
65
+ options[:term_freq] = data.to_i
83
66
  end
84
67
 
85
68
  options[:matrix_limits] = [20, 40]
@@ -101,15 +84,21 @@ end.parse!
101
84
  #############################################################################################
102
85
  ## MAIN
103
86
  ############################################################################################
104
- patient_data = load_patient_cohort(options)
105
87
 
106
88
  hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
107
- hpo = Ontology.new
108
- hpo.read(hpo_file)
109
-
110
- ref_profile, clean_profiles = procces_patient_data(patient_data, hpo)
111
- ref_profile = hpo.clean_profile_hard(options[:ref_prof]) if !options[:ref_prof].nil?
112
- hpo.load_profiles({ref: ref_profile})
89
+ Cohort.load_ontology(:hpo, hpo_file)
90
+ Cohort.act_ont = :hpo
91
+ hpo = Cohort.get_ontology(Cohort.act_ont)
92
+ patient_data, _, _ = Cohort_Parser.load(options)
93
+ patient_data.check(hard=true)
94
+
95
+ clean_profiles = patient_data.profiles
96
+ if !options[:ref_prof].nil?
97
+ ref_profile = hpo.clean_profile_hard(options[:ref_prof])
98
+ else
99
+ ref_profile = patient_data.get_general_profile(options[:term_freq])
100
+ end
101
+ hpo.load_profiles({ref: ref_profile}, reset_stored: true)
113
102
 
114
103
  similarities = hpo.compare_profiles(external_profiles: clean_profiles, sim_type: :lin, bidirectional: false)
115
104
 
@@ -0,0 +1,8 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code'))
5
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
6
+ require 'pets'
7
+
8
+ system_call(EXTERNAL_CODE, 'install_R_dependencies.R', '')
@@ -1,59 +1,10 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
3
  ROOT_PATH = File.dirname(__FILE__)
4
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
5
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
6
4
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
7
5
 
8
- require 'generalMethods.rb'
9
6
  require 'optparse'
10
- require 'semtools'
11
-
12
- ###############
13
- #METHODS
14
- ###############
15
-
16
- def translate_hpo(patient_data, hpo, translate)
17
- reject_pats = []
18
- patient_data.each do |patientID, patient_record|
19
- hpos, chr, start, stop = patient_record
20
- if translate == 'names'
21
- # hpos, rejected = hpo.translate_codes2names(hpos)
22
- hpos, rejected = hpo.translate_ids(hpos)
23
- elsif translate =='codes'
24
- # hpos, rejected = hpo.translate_names2codes(hpos)
25
- hpos, rejected = hpo.translate_names(hpos)
26
- STDERR.puts(" The ontology names '#{rejected.join(',')}' were not found") if !rejected.empty?
27
- end
28
- if hpos.empty?
29
- reject_pats << patientID
30
- else
31
- patient_record[0] = hpos
32
- end
33
- end
34
- reject_pats.each do | rj_pat|
35
- patient_data.delete(rj_pat)
36
- end
37
- end
38
-
39
- def save_translated_file(patients_with_hpo_names, output_file, mode)
40
- File.open(output_file, 'w') do |f|
41
- if mode == 'paco'
42
- f.puts "patient_id\tchr\tstart\tstop\tphenotypes"
43
- end
44
- patients_with_hpo_names.each do |id, patient_record|
45
- hpos, chr, start, stop = patient_record
46
- id = id.gsub(/_i[0-9]+$/,'')
47
- if mode == 'default'
48
- f.puts "#{id}\t#{hpos.join('|')}\t#{[chr, start, stop].join("\t")}"
49
- elsif mode == 'paco'
50
- f.puts "#{id}\t#{[chr, start, stop].join("\t")}\t#{hpos.join('|')}"
51
- else
52
- abort('Wrong save_mode] option, please try default or paco')
53
- end
54
- end
55
- end
56
- end
7
+ require 'pets'
57
8
 
58
9
  ###############
59
10
  #OPTIONS
@@ -68,9 +19,9 @@ OptionParser.new do |opts|
68
19
  options[:chromosome_col] = data
69
20
  end
70
21
 
71
- options[:pat_id_col] = nil
22
+ options[:id_col] = nil
72
23
  opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
73
- options[:pat_id_col] = data
24
+ options[:id_col] = data
74
25
  end
75
26
 
76
27
  options[:end_col] = nil
@@ -93,9 +44,9 @@ OptionParser.new do |opts|
93
44
  options[:input_file] = value
94
45
  end
95
46
 
96
- options[:hpo_col] = nil
47
+ options[:ont_col] = nil
97
48
  opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
98
- options[:hpo_col] = data
49
+ options[:ont_col] = data
99
50
  end
100
51
 
101
52
  options[:start_col] = nil
@@ -103,33 +54,39 @@ OptionParser.new do |opts|
103
54
  options[:start_col] = data
104
55
  end
105
56
 
106
- options[:hpo_separator] = '|'
57
+ options[:separator] = '|'
107
58
  opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
108
- options[:hpo_separator] = data
59
+ options[:separator] = data
60
+ end
61
+
62
+ options[:n_phens] = nil
63
+ opts.on("--n_phens INTEGER", "Remove records with N or less phenotypes") do |data|
64
+ options[:n_phens] = data.to_i
109
65
  end
110
66
 
111
- options[:save_mode] = 'default'
67
+ options[:save_mode] = :default
112
68
  opts.on("-m", "--save_mode STRING", "Set output data mode") do |data|
113
- options[:save_mode] = data
69
+ options[:save_mode] = data.to_sym
114
70
  end
115
71
 
116
- options[:translate] = nil
117
- opts.on("-t", "--translate STRING", "Set 'names' to translate from hpo codes to names or set 'codes' to translate from hpo names to codes. By default, ther is not translation") do |data|
118
- options[:translate] = data
72
+ options[:names] = false
73
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
74
+ options[:names] = true
75
+ end
76
+
77
+ options[:translate] = false
78
+ opts.on("-t", "--translate", "Set to translate from hpo codes to names. By default, ther is not translation") do
79
+ options[:translate] = true
119
80
  end
120
81
  end.parse!
121
82
 
122
83
  ###############
123
84
  #MAIN
124
85
  ###############
125
- hpo_file = ENV['hpo_file']
126
- hpo_file = HPO_FILE if hpo_file.nil?
127
-
128
- patient_data = load_patient_cohort(options)
129
- if !options[:translate].nil?
130
- # hpo = Ontology.new
131
- # hpo.load_data(hpo_file)
132
- hpo = Ontology.new(file: hpo_file, load_file: true)
133
- translate_hpo(patient_data, hpo, options[:translate])
134
- end
135
- save_translated_file(patient_data, options[:output_file], options[:save_mode])
86
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
87
+ Cohort.load_ontology(:hpo, hpo_file)
88
+ Cohort.act_ont = :hpo
89
+
90
+ patient_data, rejected_hpos, rejected_patients = Cohort_Parser.load(options)
91
+ rejected_patients_by_phen = patient_data.filter_by_term_number(options[:n_phens]) if !options[:n_phens].nil?
92
+ patient_data.save(options[:output_file], options[:save_mode], options[:translate])
data/bin/phen2reg.rb CHANGED
@@ -2,16 +2,13 @@
2
2
  # Rojano E. & Seoane P., September 2016
3
3
  # Program to predict the position from given HPO codes, sorted by their association values.
4
4
 
5
- REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
6
5
  ROOT_PATH = File.dirname(__FILE__)
7
6
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
8
7
 
9
- require 'generalMethods.rb'
10
- require 'phen2reg_methods.rb'
11
8
  require 'optparse'
12
9
  require 'report_html'
13
10
  require 'semtools'
14
-
11
+ require 'pets'
15
12
 
16
13
  ##########################
17
14
  #OPT-PARSER
@@ -0,0 +1,86 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
5
+
6
+ require 'optparse'
7
+ require 'pets'
8
+
9
+ #############################
10
+ ## METHODS
11
+ #############################
12
+ def load_index(path_index)
13
+ vcf_index = {}
14
+ File.open(path_index).each do |line|
15
+ id, path = line.chomp.split("\t")
16
+ vcf_index[id] = path
17
+ end
18
+ return vcf_index
19
+ end
20
+
21
+
22
+ ##########################
23
+ #OPT-PARSER
24
+ ##########################
25
+
26
+ options = {}
27
+ OptionParser.new do |opts|
28
+ opts.banner = "Usage: #{__FILE__} [options]"
29
+
30
+ eval(File.open(COMMON_OPTPARSE).read)
31
+
32
+ options[:input_file] = nil
33
+ opts.on("-i", "--input_file PATH", "Input file with patient data") do |data|
34
+ options[:input_file] = data
35
+ end
36
+
37
+ options[:vcf_index] = nil
38
+ opts.on("-I", "--vcf_index PATH", "VCF file with patient id pointing to vcf path") do |data|
39
+ options[:vcf_index] = data
40
+ end
41
+
42
+ options[:names] = false
43
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
44
+ options[:names] = true
45
+ end
46
+
47
+ options[:output_folder] = nil
48
+ opts.on("-o", "--output_file PATH", "Output folder") do |data|
49
+ options[:output_folder] = data
50
+ end
51
+
52
+ options[:ont_col] = nil
53
+ opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
54
+ options[:ont_col] = data
55
+ end
56
+
57
+ options[:separator] = '|'
58
+ opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
59
+ options[:separator] = data
60
+ end
61
+
62
+ options[:start_col] = nil
63
+ opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
64
+ options[:start_col] = data
65
+ end
66
+
67
+ opts.on_tail("-h", "--help", "Show this message") do
68
+ puts opts
69
+ exit
70
+ end
71
+
72
+ end.parse!
73
+
74
+ #############################################################
75
+ ## MAIN
76
+ #############################################################
77
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
78
+ Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
79
+ Cohort.act_ont = :hpo
80
+
81
+ patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
82
+ rejected_hpos_C, rejected_patients_C = patient_data.check(hard=true)
83
+ patient_data.link2ont(Cohort.act_ont)
84
+
85
+ vcf_index = load_index(options[:vcf_index]) if !options[:vcf_index].nil?
86
+ patient_data.export_phenopackets(options[:output_folder], options[:genome_assembly], vcf_index: vcf_index)
data/bin/reg2phen.rb CHANGED
@@ -3,14 +3,12 @@
3
3
  #data2predict = file to predict
4
4
  #training_file.txt = file with training data (association values and hpo codes).
5
5
 
6
- REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
7
6
  ROOT_PATH = File.dirname(__FILE__)
8
7
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
8
 
10
9
  require 'optparse'
11
- require 'generalMethods.rb'
12
10
  require 'semtools'
13
- require 'reg2phen_methods'
11
+ require 'pets'
14
12
 
15
13
  ##########################
16
14
  #OPT-PARSER