pets 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/README.md +79 -5
  4. data/bin/coPatReporter.rb +68 -156
  5. data/bin/comPatMondo.rb +1 -4
  6. data/bin/evidence_profiler.rb +102 -150
  7. data/bin/get_gen_features.rb +146 -0
  8. data/bin/get_network_nodes.rb +79 -132
  9. data/bin/get_sorted_profs.rb +25 -36
  10. data/bin/install_deps.rb +8 -0
  11. data/bin/paco_translator.rb +29 -72
  12. data/bin/phen2reg.rb +1 -4
  13. data/bin/profiles2phenopacket.rb +86 -0
  14. data/bin/reg2phen.rb +1 -3
  15. data/example_datasets/associations_file.txt +757 -0
  16. data/example_datasets/example_patient.txt +6 -0
  17. data/example_datasets/example_patient_hpos.txt +15 -0
  18. data/example_datasets/genes.txt +8 -0
  19. data/example_datasets/hpo2ci.txt +2798 -0
  20. data/example_datasets/hummu_congenital_full_dataset.txt +4183 -0
  21. data/example_datasets/launch.sh +20 -0
  22. data/external_code/generate_boxpot.R +51 -21
  23. data/external_code/get_clusters.R +2 -2
  24. data/external_code/install_R_dependencies.R +16 -0
  25. data/external_code/plot_heatmap.R +34 -30
  26. data/lib/pets/coPatReporterMethods.rb +172 -424
  27. data/lib/pets/cohort.rb +309 -0
  28. data/lib/pets/common_optparse.rb +30 -0
  29. data/lib/pets/constants.rb +8 -0
  30. data/lib/pets/generalMethods.rb +29 -319
  31. data/lib/pets/genomic_features.rb +240 -0
  32. data/lib/pets/io.rb +481 -0
  33. data/lib/pets/parsers/cohort_parser.rb +111 -0
  34. data/lib/pets/parsers/reference_parser.rb +39 -0
  35. data/lib/pets/version.rb +1 -1
  36. data/lib/pets.rb +9 -0
  37. data/pets.gemspec +7 -3
  38. data/templates/cluster_report.erb +25 -5
  39. data/templates/cohort_report.erb +5 -7
  40. data/templates/evidence_profile.erb +20 -4
  41. data/templates/patient_report.erb +1 -1
  42. metadata +96 -5
@@ -3,112 +3,34 @@
3
3
  # Code to prepare data to get the associations between pathological phenotypes (HPO) and genomic regions (SOR)
4
4
 
5
5
  ROOT_PATH = File.dirname(__FILE__)
6
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
7
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
8
6
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
7
 
10
8
  ##############################
11
9
  #LIBRARIES
12
10
  ##############################
13
- require 'generalMethods.rb'
14
11
  require 'optparse'
15
- require 'semtools'
12
+ require 'pets'
16
13
 
17
14
  ###############################
18
15
  #METHODS
19
16
  ###############################
20
17
 
21
- def loadPatientFile(patient_file, hpo, add_parents)
22
- patient2phenotype = {}
23
- hpo_count = {}
24
- not_found = []
25
- patients_genomic_region_by_chr = {}
26
- File.open(patient_file).each do |line|
27
- line.chomp!
28
- next if line.include?("#")
29
- patient, chr, start, stop, phenotype_profile = line.split("\t", 5)
30
- next if phenotype_profile.nil? #For skipping patients without phenotypes
31
- phenotypes = phenotype_profile.split('|')
32
- # phenotypes, rejected = hpo.translate_names2codes(phenotypes)
33
- phenotypes, rejected = hpo.translate_names(phenotypes)
34
- not_found = not_found | rejected
35
- phenotypes.each do |hpo_code|
36
- get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents) if !hpo.is_removable(hpo_code)
37
- end
38
- info = [patient, start.to_i, stop.to_i]
39
- add_record(patients_genomic_region_by_chr, chr, info)
40
- end
41
- return patient2phenotype, hpo_count, not_found, patients_genomic_region_by_chr
42
- end
43
-
44
-
45
- def get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents)
46
- add_record(hpo_count, hpo_code, patient)
47
- add_record(patient2phenotype, patient, hpo_code)
48
- if add_parents == 'root'
49
- # hpo_parent_codes = hpo.get_parents(hpo_code)
50
- hpo_parent_codes = hpo.get_ancestors(hpo_code)
51
- hpo_parent_codes.each do |parent_code|
52
- add_record(hpo_count, parent_code, patient)
53
- add_record(patient2phenotype, patient, parent_code)
54
- end
55
- end
56
- end
57
-
58
- def build_tripartite_network(patients2hpo, hpo_stats, ic_threshold, patients_by_cluster)
18
+ def build_tripartite_network(patient_data, patients_by_cluster, add_parents, ont)
59
19
  tripartite_network = []
60
20
  patients_by_cluster.each do |patient, node_ids|
61
21
  node_ids.each do |node_id|
62
22
  tripartite_network << [node_id, patient]
63
23
  end
64
24
  end
65
- patients_list = patients_by_cluster.keys
66
- patients2hpo.each do |patient, code|
67
- if patients_list.include?(patient)
68
- code.each do |c|
69
- tripartite_network << [c, patient] if hpo_stats[c].last >= ic_threshold
70
- end
71
- end
25
+ patient_data.each_profile do |id, profile|
26
+ profile = profile.map{|term| ont.get_ancestors(term)}.flatten.uniq if add_parents == 'root'
27
+ profile.each do |term|
28
+ tripartite_network << [term, id]
29
+ end
72
30
  end
73
31
  return tripartite_network
74
32
  end
75
33
 
76
- def compute_hpo_stats(hpo_count, patient_number)
77
- hpo_stats = {}
78
- patient_hpo_ic = []
79
- hpo_count.each do |hpo_code, patient_ids|
80
- hpo_freq = patient_ids.length.fdiv(patient_number) #hpo frequency in patients
81
- hpo_ic = -Math.log10(hpo_freq)
82
- hpo_stats[hpo_code] = [hpo_freq, hpo_ic]
83
- patient_ids.each do |patient_id|
84
- patient_hpo_ic << [patient_id, hpo_code, hpo_ic]
85
- end
86
- end
87
- return hpo_stats, patient_hpo_ic.sort{|a,b| a.first <=> b.first}
88
- end
89
-
90
- def write_hash(hash, file_path, header = [])
91
- File.open(file_path, 'w') do |handler|
92
- handler.puts header.join("\t") if !header.empty?
93
- hash.each do |key, array|
94
- handler.puts "#{key}\t#{array.join("\t")}"
95
- end
96
- end
97
- end
98
-
99
- def write_array(array, file_path)
100
- File.open(file_path, 'w') do |handler|
101
- array.each do |record|
102
- if record.class == String
103
- line = record
104
- else
105
- line = record.join("\t")
106
- end
107
- handler.puts line
108
- end
109
- end
110
- end
111
-
112
34
  ##############################
113
35
  #OPTPARSE
114
36
  ##############################
@@ -117,33 +39,75 @@ options = {}
117
39
  OptionParser.new do |opts|
118
40
  opts.banner = "Usage: #{__FILE__} [options]"
119
41
 
120
- options[:cluster_file] = 'cluster_coords.txt'
121
- opts.on("-c", "--cluster_file PATH", "Cluster coords output file that will be used to translate SOR nodes") do |value|
122
- options[:cluster_file] = File.basename(value)
123
- end
42
+ options[:chromosome_col] = nil
43
+ opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
44
+ options[:chromosome_col] = data
45
+ end
124
46
 
125
- options[:excluded_hpo] = nil
126
- opts.on("-e", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
127
- options[:excluded_hpo] = excluded_hpo
47
+ options[:id_col] = nil
48
+ opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
49
+ options[:id_col] = data
128
50
  end
129
51
 
130
- options[:patient_file] = nil
131
- opts.on("-i", "--input_file PATH", "Input file with patients for parsing phenotypes to HPO codes") do |value|
132
- options[:patient_file] = value
52
+ options[:end_col] = nil
53
+ opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
54
+ options[:end_col] = data
55
+ end
56
+
57
+ options[:ont_col] = nil
58
+ opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
59
+ options[:ont_col] = data
133
60
  end
134
61
 
135
- options[:mutation_type] = 'A'
136
- opts.on("-m", "--mutation_type STRING", "Type of patient mutation, either it is a deletion (d) or duplication (D)") do |type|
137
- options[:mutation_type] = type
62
+ options[:start_col] = nil
63
+ opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
64
+ options[:start_col] = data
65
+ end
66
+
67
+ options[:separator] = '|'
68
+ opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
69
+ options[:separator] = data
70
+ end
71
+
72
+ options[:names] = false
73
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
74
+ options[:names] = true
75
+ end
76
+
77
+ options[:header] = true
78
+ opts.on("-H", "--header", "File has a line header. Default true") do
79
+ options[:header] = false
80
+ end
81
+
82
+ #===================================================================
83
+
84
+ options[:input_file] = nil
85
+ opts.on("-i", "--input_file PATH", "Input file with patients for parsing phenotypes to HPO codes") do |value|
86
+ options[:input_file] = value
138
87
  end
139
88
 
140
89
  options[:output_file] = 'tripartite_network.txt'
141
90
  opts.on("-o", "--output_file PATH", "Output file for the tripartite network") do |value|
142
91
  options[:output_file] = value
92
+ end
93
+
94
+ options[:cluster_file] = 'cluster_coords.txt'
95
+ opts.on("-u", "--cluster_file PATH", "Cluster coords output file that will be used to translate SOR nodes") do |value|
96
+ options[:cluster_file] = File.basename(value)
143
97
  end
144
98
 
99
+ options[:excluded_hpo] = nil
100
+ opts.on("-x", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
101
+ options[:excluded_hpo] = excluded_hpo
102
+ end
103
+
104
+ options[:tag] = 'A'
105
+ opts.on("-m", "--mutation_type STRING", "Type of patient mutation, either it is a deletion (d) or duplication (D)") do |type|
106
+ options[:tag] = type
107
+ end
108
+
145
109
  options[:hpo_file] = nil
146
- opts.on("-p", "--hpo_file PATH", "Input HPO file for extracting HPO codes") do |value|
110
+ opts.on("-O", "--hpo_file PATH", "Input HPO file for extracting HPO codes") do |value|
147
111
  options[:hpo_file] = value
148
112
  end
149
113
 
@@ -152,16 +116,6 @@ OptionParser.new do |opts|
152
116
  options[:add_parents] = value
153
117
  end
154
118
 
155
- options[:hpo_stat_file] = 'hpo_stats.txt'
156
- opts.on("-s", "--hpo_stat_file PATH", "Output file with HPO codes, their frequency and CI") do |value|
157
- options[:hpo_stat_file] = File.basename(value)
158
- end
159
-
160
- options[:thresold] = 0
161
- opts.on("-t", "--info_thresold FLOAT", "IC thresold to discard non informative hpo. Default: 0.") do |thresold|
162
- options[:thresold] = thresold.to_f
163
- end
164
-
165
119
  opts.on_tail("-h", "--help", "Show this message") do
166
120
  puts opts
167
121
  exit
@@ -176,27 +130,20 @@ output_folder = File.dirname(File.expand_path(options[:output_file]))
176
130
  Dir.mkdir(output_folder) if !File.exists?(output_folder)
177
131
 
178
132
  hpo_file = options[:hpo_file]
179
- hpo_file = ENV['hpo_file'] if hpo_file.nil?
180
- hpo_file = HPO_FILE if hpo_file.nil?
181
-
182
- # hpo = Ontology.new
183
- # hpo.load_black_list(options[:excluded_hpo]) if !options[:excluded_hpo].nil?
184
- # hpo.load_data(hpo_file)
185
- if !options[:excluded_hpo].nil?
186
- hpo = Ontology.new(file: hpo_file, load_file: true, removable_terms: read_excluded_hpo_file(options[:excluded_hpo]))
187
- else
188
- hpo = Ontology.new(file: hpo_file, load_file: true)
189
- end
190
- patients2hpo, hpo_count, not_found, chr_patients_genomic_region = loadPatientFile(options[:patient_file], hpo, options[:add_parents])
191
-
192
- hpo_stats, patient_hpo_ic = compute_hpo_stats(hpo_count, patients2hpo.length)
193
- patients_by_cluster, sors = generate_cluster_regions(chr_patients_genomic_region, options[:mutation_type])
194
-
195
- tripartite_network = build_tripartite_network(patients2hpo, hpo_stats, options[:thresold], patients_by_cluster)
196
-
197
- # write_array(not_found - hpo.excluded_codes, File.join(output_folder, 'missing_hpo_names'))
198
- write_array(not_found - hpo.removable_terms, File.join(output_folder, 'missing_hpo_names'))
133
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE if hpo_file.nil?
134
+ Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
135
+ Cohort.act_ont = :hpo
136
+ hpo = Cohort.get_ontology(Cohort.act_ont)
137
+
138
+ patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
139
+ rejected_hpos_C, rejected_patients_C = patient_data.check
140
+ rejected_hpos = rejected_hpos_L | rejected_hpos_C
141
+ rejected_patients = rejected_patients_L + rejected_patients_C
142
+ patient_data.remove_incomplete_records
143
+ patient_data.index_vars
144
+ patients_by_cluster, sors = patient_data.generate_cluster_regions(:reg_overlap, options[:tag], 1)
145
+ tripartite_network = build_tripartite_network(patient_data, patients_by_cluster, options[:add_parents], hpo)
146
+
147
+ write_array(rejected_hpos, File.join(output_folder, 'missing_hpo_names'))
199
148
  write_array(sors, File.join(output_folder, options[:cluster_file]))
200
- write_hash(hpo_stats.select{|hp_code, stats| stats.last > options[:thresold]}, File.join(output_folder, options[:hpo_stat_file]), %w[HPOcode Frequency IC])
201
- write_array(tripartite_network, options[:output_file])
202
- write_array(patient_hpo_ic, File.join(output_folder, 'filtered_hpo.txt'))
149
+ write_array(tripartite_network, options[:output_file])
@@ -1,33 +1,11 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
-
4
3
  ROOT_PATH = File.dirname(__FILE__)
5
- REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
6
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
7
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
8
4
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
5
 
10
6
  require 'optparse'
11
7
  require 'report_html'
12
- require 'semtools'
13
- require 'generalMethods.rb'
14
-
15
- #############################################################################################
16
- ## METHODS
17
- ############################################################################################
18
- def procces_patient_data(patient_data, hpo)
19
- clean_profiles = {}
20
- all_hpo = []
21
- patient_data.each do |pat_id, data|
22
- profile = hpo.clean_profile_hard(data.first.map{|c| c.to_sym})
23
- if !profile.empty?
24
- clean_profiles[pat_id] = profile
25
- all_hpo.concat(profile)
26
- end
27
- end
28
- ref_prof = hpo.clean_profile_hard(all_hpo.uniq)
29
- return ref_prof, clean_profiles
30
- end
8
+ require 'pets'
31
9
 
32
10
  #############################################################################################
33
11
  ## OPTPARSE
@@ -42,9 +20,9 @@ OptionParser.new do |opts|
42
20
  options[:chromosome_col] = data
43
21
  end
44
22
 
45
- options[:pat_id_col] = nil
23
+ options[:id_col] = nil
46
24
  opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
47
- options[:pat_id_col] = data
25
+ options[:id_col] = data
48
26
  end
49
27
 
50
28
  options[:end_col] = nil
@@ -67,9 +45,9 @@ OptionParser.new do |opts|
67
45
  options[:input_file] = value
68
46
  end
69
47
 
70
- options[:hpo_col] = nil
48
+ options[:ont_col] = nil
71
49
  opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
72
- options[:hpo_col] = data
50
+ options[:ont_col] = data
73
51
  end
74
52
 
75
53
  options[:start_col] = nil
@@ -77,9 +55,14 @@ OptionParser.new do |opts|
77
55
  options[:start_col] = data
78
56
  end
79
57
 
80
- options[:hpo_separator] = '|'
58
+ options[:separator] = '|'
81
59
  opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
82
- options[:hpo_separator] = data
60
+ options[:separator] = data
61
+ end
62
+
63
+ options[:term_freq] = 0
64
+ opts.on("-f", "--general_prof_freq INTEGER", "When reference profile is not given, a general ine is computed with all profiles. If a freq is defined (0-1), all terms with freq minor than limit are removed") do |data|
65
+ options[:term_freq] = data.to_i
83
66
  end
84
67
 
85
68
  options[:matrix_limits] = [20, 40]
@@ -101,15 +84,21 @@ end.parse!
101
84
  #############################################################################################
102
85
  ## MAIN
103
86
  ############################################################################################
104
- patient_data = load_patient_cohort(options)
105
87
 
106
88
  hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
107
- hpo = Ontology.new
108
- hpo.read(hpo_file)
109
-
110
- ref_profile, clean_profiles = procces_patient_data(patient_data, hpo)
111
- ref_profile = hpo.clean_profile_hard(options[:ref_prof]) if !options[:ref_prof].nil?
112
- hpo.load_profiles({ref: ref_profile})
89
+ Cohort.load_ontology(:hpo, hpo_file)
90
+ Cohort.act_ont = :hpo
91
+ hpo = Cohort.get_ontology(Cohort.act_ont)
92
+ patient_data, _, _ = Cohort_Parser.load(options)
93
+ patient_data.check(hard=true)
94
+
95
+ clean_profiles = patient_data.profiles
96
+ if !options[:ref_prof].nil?
97
+ ref_profile = hpo.clean_profile_hard(options[:ref_prof])
98
+ else
99
+ ref_profile = patient_data.get_general_profile(options[:term_freq])
100
+ end
101
+ hpo.load_profiles({ref: ref_profile}, reset_stored: true)
113
102
 
114
103
  similarities = hpo.compare_profiles(external_profiles: clean_profiles, sim_type: :lin, bidirectional: false)
115
104
 
@@ -0,0 +1,8 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code'))
5
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
6
+ require 'pets'
7
+
8
+ system_call(EXTERNAL_CODE, 'install_R_dependencies.R', '')
@@ -1,59 +1,10 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
3
  ROOT_PATH = File.dirname(__FILE__)
4
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
5
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
6
4
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
7
5
 
8
- require 'generalMethods.rb'
9
6
  require 'optparse'
10
- require 'semtools'
11
-
12
- ###############
13
- #METHODS
14
- ###############
15
-
16
- def translate_hpo(patient_data, hpo, translate)
17
- reject_pats = []
18
- patient_data.each do |patientID, patient_record|
19
- hpos, chr, start, stop = patient_record
20
- if translate == 'names'
21
- # hpos, rejected = hpo.translate_codes2names(hpos)
22
- hpos, rejected = hpo.translate_ids(hpos)
23
- elsif translate =='codes'
24
- # hpos, rejected = hpo.translate_names2codes(hpos)
25
- hpos, rejected = hpo.translate_names(hpos)
26
- STDERR.puts(" The ontology names '#{rejected.join(',')}' were not found") if !rejected.empty?
27
- end
28
- if hpos.empty?
29
- reject_pats << patientID
30
- else
31
- patient_record[0] = hpos
32
- end
33
- end
34
- reject_pats.each do | rj_pat|
35
- patient_data.delete(rj_pat)
36
- end
37
- end
38
-
39
- def save_translated_file(patients_with_hpo_names, output_file, mode)
40
- File.open(output_file, 'w') do |f|
41
- if mode == 'paco'
42
- f.puts "patient_id\tchr\tstart\tstop\tphenotypes"
43
- end
44
- patients_with_hpo_names.each do |id, patient_record|
45
- hpos, chr, start, stop = patient_record
46
- id = id.gsub(/_i[0-9]+$/,'')
47
- if mode == 'default'
48
- f.puts "#{id}\t#{hpos.join('|')}\t#{[chr, start, stop].join("\t")}"
49
- elsif mode == 'paco'
50
- f.puts "#{id}\t#{[chr, start, stop].join("\t")}\t#{hpos.join('|')}"
51
- else
52
- abort('Wrong save_mode] option, please try default or paco')
53
- end
54
- end
55
- end
56
- end
7
+ require 'pets'
57
8
 
58
9
  ###############
59
10
  #OPTIONS
@@ -68,9 +19,9 @@ OptionParser.new do |opts|
68
19
  options[:chromosome_col] = data
69
20
  end
70
21
 
71
- options[:pat_id_col] = nil
22
+ options[:id_col] = nil
72
23
  opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
73
- options[:pat_id_col] = data
24
+ options[:id_col] = data
74
25
  end
75
26
 
76
27
  options[:end_col] = nil
@@ -93,9 +44,9 @@ OptionParser.new do |opts|
93
44
  options[:input_file] = value
94
45
  end
95
46
 
96
- options[:hpo_col] = nil
47
+ options[:ont_col] = nil
97
48
  opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
98
- options[:hpo_col] = data
49
+ options[:ont_col] = data
99
50
  end
100
51
 
101
52
  options[:start_col] = nil
@@ -103,33 +54,39 @@ OptionParser.new do |opts|
103
54
  options[:start_col] = data
104
55
  end
105
56
 
106
- options[:hpo_separator] = '|'
57
+ options[:separator] = '|'
107
58
  opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
108
- options[:hpo_separator] = data
59
+ options[:separator] = data
60
+ end
61
+
62
+ options[:n_phens] = nil
63
+ opts.on("--n_phens INTEGER", "Remove records with N or less phenotypes") do |data|
64
+ options[:n_phens] = data.to_i
109
65
  end
110
66
 
111
- options[:save_mode] = 'default'
67
+ options[:save_mode] = :default
112
68
  opts.on("-m", "--save_mode STRING", "Set output data mode") do |data|
113
- options[:save_mode] = data
69
+ options[:save_mode] = data.to_sym
114
70
  end
115
71
 
116
- options[:translate] = nil
117
- opts.on("-t", "--translate STRING", "Set 'names' to translate from hpo codes to names or set 'codes' to translate from hpo names to codes. By default, ther is not translation") do |data|
118
- options[:translate] = data
72
+ options[:names] = false
73
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
74
+ options[:names] = true
75
+ end
76
+
77
+ options[:translate] = false
78
+ opts.on("-t", "--translate", "Set to translate from hpo codes to names. By default, ther is not translation") do
79
+ options[:translate] = true
119
80
  end
120
81
  end.parse!
121
82
 
122
83
  ###############
123
84
  #MAIN
124
85
  ###############
125
- hpo_file = ENV['hpo_file']
126
- hpo_file = HPO_FILE if hpo_file.nil?
127
-
128
- patient_data = load_patient_cohort(options)
129
- if !options[:translate].nil?
130
- # hpo = Ontology.new
131
- # hpo.load_data(hpo_file)
132
- hpo = Ontology.new(file: hpo_file, load_file: true)
133
- translate_hpo(patient_data, hpo, options[:translate])
134
- end
135
- save_translated_file(patient_data, options[:output_file], options[:save_mode])
86
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
87
+ Cohort.load_ontology(:hpo, hpo_file)
88
+ Cohort.act_ont = :hpo
89
+
90
+ patient_data, rejected_hpos, rejected_patients = Cohort_Parser.load(options)
91
+ rejected_patients_by_phen = patient_data.filter_by_term_number(options[:n_phens]) if !options[:n_phens].nil?
92
+ patient_data.save(options[:output_file], options[:save_mode], options[:translate])
data/bin/phen2reg.rb CHANGED
@@ -2,16 +2,13 @@
2
2
  # Rojano E. & Seoane P., September 2016
3
3
  # Program to predict the position from given HPO codes, sorted by their association values.
4
4
 
5
- REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
6
5
  ROOT_PATH = File.dirname(__FILE__)
7
6
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
8
7
 
9
- require 'generalMethods.rb'
10
- require 'phen2reg_methods.rb'
11
8
  require 'optparse'
12
9
  require 'report_html'
13
10
  require 'semtools'
14
-
11
+ require 'pets'
15
12
 
16
13
  ##########################
17
14
  #OPT-PARSER
@@ -0,0 +1,86 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
5
+
6
+ require 'optparse'
7
+ require 'pets'
8
+
9
+ #############################
10
+ ## METHODS
11
+ #############################
12
+ def load_index(path_index)
13
+ vcf_index = {}
14
+ File.open(path_index).each do |line|
15
+ id, path = line.chomp.split("\t")
16
+ vcf_index[id] = path
17
+ end
18
+ return vcf_index
19
+ end
20
+
21
+
22
+ ##########################
23
+ #OPT-PARSER
24
+ ##########################
25
+
26
+ options = {}
27
+ OptionParser.new do |opts|
28
+ opts.banner = "Usage: #{__FILE__} [options]"
29
+
30
+ eval(File.open(COMMON_OPTPARSE).read)
31
+
32
+ options[:input_file] = nil
33
+ opts.on("-i", "--input_file PATH", "Input file with patient data") do |data|
34
+ options[:input_file] = data
35
+ end
36
+
37
+ options[:vcf_index] = nil
38
+ opts.on("-I", "--vcf_index PATH", "VCF file with patient id pointing to vcf path") do |data|
39
+ options[:vcf_index] = data
40
+ end
41
+
42
+ options[:names] = false
43
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
44
+ options[:names] = true
45
+ end
46
+
47
+ options[:output_folder] = nil
48
+ opts.on("-o", "--output_file PATH", "Output folder") do |data|
49
+ options[:output_folder] = data
50
+ end
51
+
52
+ options[:ont_col] = nil
53
+ opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
54
+ options[:ont_col] = data
55
+ end
56
+
57
+ options[:separator] = '|'
58
+ opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
59
+ options[:separator] = data
60
+ end
61
+
62
+ options[:start_col] = nil
63
+ opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
64
+ options[:start_col] = data
65
+ end
66
+
67
+ opts.on_tail("-h", "--help", "Show this message") do
68
+ puts opts
69
+ exit
70
+ end
71
+
72
+ end.parse!
73
+
74
+ #############################################################
75
+ ## MAIN
76
+ #############################################################
77
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
78
+ Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
79
+ Cohort.act_ont = :hpo
80
+
81
+ patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
82
+ rejected_hpos_C, rejected_patients_C = patient_data.check(hard=true)
83
+ patient_data.link2ont(Cohort.act_ont)
84
+
85
+ vcf_index = load_index(options[:vcf_index]) if !options[:vcf_index].nil?
86
+ patient_data.export_phenopackets(options[:output_folder], options[:genome_assembly], vcf_index: vcf_index)
data/bin/reg2phen.rb CHANGED
@@ -3,14 +3,12 @@
3
3
  #data2predict = file to predict
4
4
  #training_file.txt = file with training data (association values and hpo codes).
5
5
 
6
- REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
7
6
  ROOT_PATH = File.dirname(__FILE__)
8
7
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
8
 
10
9
  require 'optparse'
11
- require 'generalMethods.rb'
12
10
  require 'semtools'
13
- require 'reg2phen_methods'
11
+ require 'pets'
14
12
 
15
13
  ##########################
16
14
  #OPT-PARSER