pets 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +79 -5
- data/bin/coPatReporter.rb +68 -156
- data/bin/comPatMondo.rb +1 -4
- data/bin/evidence_profiler.rb +102 -150
- data/bin/get_gen_features.rb +146 -0
- data/bin/get_network_nodes.rb +79 -132
- data/bin/get_sorted_profs.rb +25 -36
- data/bin/install_deps.rb +8 -0
- data/bin/paco_translator.rb +29 -72
- data/bin/phen2reg.rb +1 -4
- data/bin/profiles2phenopacket.rb +86 -0
- data/bin/reg2phen.rb +1 -3
- data/example_datasets/associations_file.txt +757 -0
- data/example_datasets/example_patient.txt +6 -0
- data/example_datasets/example_patient_hpos.txt +15 -0
- data/example_datasets/genes.txt +8 -0
- data/example_datasets/hpo2ci.txt +2798 -0
- data/example_datasets/hummu_congenital_full_dataset.txt +4183 -0
- data/example_datasets/launch.sh +20 -0
- data/external_code/generate_boxpot.R +51 -21
- data/external_code/get_clusters.R +2 -2
- data/external_code/install_R_dependencies.R +16 -0
- data/external_code/plot_heatmap.R +34 -30
- data/lib/pets/coPatReporterMethods.rb +172 -424
- data/lib/pets/cohort.rb +309 -0
- data/lib/pets/common_optparse.rb +30 -0
- data/lib/pets/constants.rb +8 -0
- data/lib/pets/generalMethods.rb +29 -319
- data/lib/pets/genomic_features.rb +240 -0
- data/lib/pets/io.rb +481 -0
- data/lib/pets/parsers/cohort_parser.rb +111 -0
- data/lib/pets/parsers/reference_parser.rb +39 -0
- data/lib/pets/version.rb +1 -1
- data/lib/pets.rb +9 -0
- data/pets.gemspec +7 -3
- data/templates/cluster_report.erb +25 -5
- data/templates/cohort_report.erb +5 -7
- data/templates/evidence_profile.erb +20 -4
- data/templates/patient_report.erb +1 -1
- metadata +96 -5
data/bin/get_network_nodes.rb
CHANGED
@@ -3,112 +3,34 @@
|
|
3
3
|
# Code to prepare data to get the associations between pathological phenotypes (HPO) and genomic regions (SOR)
|
4
4
|
|
5
5
|
ROOT_PATH = File.dirname(__FILE__)
|
6
|
-
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
7
|
-
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
|
8
6
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
9
7
|
|
10
8
|
##############################
|
11
9
|
#LIBRARIES
|
12
10
|
##############################
|
13
|
-
require 'generalMethods.rb'
|
14
11
|
require 'optparse'
|
15
|
-
require '
|
12
|
+
require 'pets'
|
16
13
|
|
17
14
|
###############################
|
18
15
|
#METHODS
|
19
16
|
###############################
|
20
17
|
|
21
|
-
def
|
22
|
-
patient2phenotype = {}
|
23
|
-
hpo_count = {}
|
24
|
-
not_found = []
|
25
|
-
patients_genomic_region_by_chr = {}
|
26
|
-
File.open(patient_file).each do |line|
|
27
|
-
line.chomp!
|
28
|
-
next if line.include?("#")
|
29
|
-
patient, chr, start, stop, phenotype_profile = line.split("\t", 5)
|
30
|
-
next if phenotype_profile.nil? #For skipping patients without phenotypes
|
31
|
-
phenotypes = phenotype_profile.split('|')
|
32
|
-
# phenotypes, rejected = hpo.translate_names2codes(phenotypes)
|
33
|
-
phenotypes, rejected = hpo.translate_names(phenotypes)
|
34
|
-
not_found = not_found | rejected
|
35
|
-
phenotypes.each do |hpo_code|
|
36
|
-
get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents) if !hpo.is_removable(hpo_code)
|
37
|
-
end
|
38
|
-
info = [patient, start.to_i, stop.to_i]
|
39
|
-
add_record(patients_genomic_region_by_chr, chr, info)
|
40
|
-
end
|
41
|
-
return patient2phenotype, hpo_count, not_found, patients_genomic_region_by_chr
|
42
|
-
end
|
43
|
-
|
44
|
-
|
45
|
-
def get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents)
|
46
|
-
add_record(hpo_count, hpo_code, patient)
|
47
|
-
add_record(patient2phenotype, patient, hpo_code)
|
48
|
-
if add_parents == 'root'
|
49
|
-
# hpo_parent_codes = hpo.get_parents(hpo_code)
|
50
|
-
hpo_parent_codes = hpo.get_ancestors(hpo_code)
|
51
|
-
hpo_parent_codes.each do |parent_code|
|
52
|
-
add_record(hpo_count, parent_code, patient)
|
53
|
-
add_record(patient2phenotype, patient, parent_code)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def build_tripartite_network(patients2hpo, hpo_stats, ic_threshold, patients_by_cluster)
|
18
|
+
def build_tripartite_network(patient_data, patients_by_cluster, add_parents, ont)
|
59
19
|
tripartite_network = []
|
60
20
|
patients_by_cluster.each do |patient, node_ids|
|
61
21
|
node_ids.each do |node_id|
|
62
22
|
tripartite_network << [node_id, patient]
|
63
23
|
end
|
64
24
|
end
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
end
|
71
|
-
end
|
25
|
+
patient_data.each_profile do |id, profile|
|
26
|
+
profile = profile.map{|term| ont.get_ancestors(term)}.flatten.uniq if add_parents == 'root'
|
27
|
+
profile.each do |term|
|
28
|
+
tripartite_network << [term, id]
|
29
|
+
end
|
72
30
|
end
|
73
31
|
return tripartite_network
|
74
32
|
end
|
75
33
|
|
76
|
-
def compute_hpo_stats(hpo_count, patient_number)
|
77
|
-
hpo_stats = {}
|
78
|
-
patient_hpo_ic = []
|
79
|
-
hpo_count.each do |hpo_code, patient_ids|
|
80
|
-
hpo_freq = patient_ids.length.fdiv(patient_number) #hpo frequency in patients
|
81
|
-
hpo_ic = -Math.log10(hpo_freq)
|
82
|
-
hpo_stats[hpo_code] = [hpo_freq, hpo_ic]
|
83
|
-
patient_ids.each do |patient_id|
|
84
|
-
patient_hpo_ic << [patient_id, hpo_code, hpo_ic]
|
85
|
-
end
|
86
|
-
end
|
87
|
-
return hpo_stats, patient_hpo_ic.sort{|a,b| a.first <=> b.first}
|
88
|
-
end
|
89
|
-
|
90
|
-
def write_hash(hash, file_path, header = [])
|
91
|
-
File.open(file_path, 'w') do |handler|
|
92
|
-
handler.puts header.join("\t") if !header.empty?
|
93
|
-
hash.each do |key, array|
|
94
|
-
handler.puts "#{key}\t#{array.join("\t")}"
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
def write_array(array, file_path)
|
100
|
-
File.open(file_path, 'w') do |handler|
|
101
|
-
array.each do |record|
|
102
|
-
if record.class == String
|
103
|
-
line = record
|
104
|
-
else
|
105
|
-
line = record.join("\t")
|
106
|
-
end
|
107
|
-
handler.puts line
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
34
|
##############################
|
113
35
|
#OPTPARSE
|
114
36
|
##############################
|
@@ -117,33 +39,75 @@ options = {}
|
|
117
39
|
OptionParser.new do |opts|
|
118
40
|
opts.banner = "Usage: #{__FILE__} [options]"
|
119
41
|
|
120
|
-
options[:
|
121
|
-
opts.on("-c", "--
|
122
|
-
options[:
|
123
|
-
end
|
42
|
+
options[:chromosome_col] = nil
|
43
|
+
opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
|
44
|
+
options[:chromosome_col] = data
|
45
|
+
end
|
124
46
|
|
125
|
-
options[:
|
126
|
-
opts.on("-
|
127
|
-
options[:
|
47
|
+
options[:id_col] = nil
|
48
|
+
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
49
|
+
options[:id_col] = data
|
128
50
|
end
|
129
51
|
|
130
|
-
options[:
|
131
|
-
opts.on("-
|
132
|
-
options[:
|
52
|
+
options[:end_col] = nil
|
53
|
+
opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
|
54
|
+
options[:end_col] = data
|
55
|
+
end
|
56
|
+
|
57
|
+
options[:ont_col] = nil
|
58
|
+
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
59
|
+
options[:ont_col] = data
|
133
60
|
end
|
134
61
|
|
135
|
-
options[:
|
136
|
-
opts.on("-
|
137
|
-
options[:
|
62
|
+
options[:start_col] = nil
|
63
|
+
opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
|
64
|
+
options[:start_col] = data
|
65
|
+
end
|
66
|
+
|
67
|
+
options[:separator] = '|'
|
68
|
+
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
69
|
+
options[:separator] = data
|
70
|
+
end
|
71
|
+
|
72
|
+
options[:names] = false
|
73
|
+
opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
|
74
|
+
options[:names] = true
|
75
|
+
end
|
76
|
+
|
77
|
+
options[:header] = true
|
78
|
+
opts.on("-H", "--header", "File has a line header. Default true") do
|
79
|
+
options[:header] = false
|
80
|
+
end
|
81
|
+
|
82
|
+
#===================================================================
|
83
|
+
|
84
|
+
options[:input_file] = nil
|
85
|
+
opts.on("-i", "--input_file PATH", "Input file with patients for parsing phenotypes to HPO codes") do |value|
|
86
|
+
options[:input_file] = value
|
138
87
|
end
|
139
88
|
|
140
89
|
options[:output_file] = 'tripartite_network.txt'
|
141
90
|
opts.on("-o", "--output_file PATH", "Output file for the tripartite network") do |value|
|
142
91
|
options[:output_file] = value
|
92
|
+
end
|
93
|
+
|
94
|
+
options[:cluster_file] = 'cluster_coords.txt'
|
95
|
+
opts.on("-u", "--cluster_file PATH", "Cluster coords output file that will be used to translate SOR nodes") do |value|
|
96
|
+
options[:cluster_file] = File.basename(value)
|
143
97
|
end
|
144
98
|
|
99
|
+
options[:excluded_hpo] = nil
|
100
|
+
opts.on("-x", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
|
101
|
+
options[:excluded_hpo] = excluded_hpo
|
102
|
+
end
|
103
|
+
|
104
|
+
options[:tag] = 'A'
|
105
|
+
opts.on("-m", "--mutation_type STRING", "Type of patient mutation, either it is a deletion (d) or duplication (D)") do |type|
|
106
|
+
options[:tag] = type
|
107
|
+
end
|
108
|
+
|
145
109
|
options[:hpo_file] = nil
|
146
|
-
opts.on("-
|
110
|
+
opts.on("-O", "--hpo_file PATH", "Input HPO file for extracting HPO codes") do |value|
|
147
111
|
options[:hpo_file] = value
|
148
112
|
end
|
149
113
|
|
@@ -152,16 +116,6 @@ OptionParser.new do |opts|
|
|
152
116
|
options[:add_parents] = value
|
153
117
|
end
|
154
118
|
|
155
|
-
options[:hpo_stat_file] = 'hpo_stats.txt'
|
156
|
-
opts.on("-s", "--hpo_stat_file PATH", "Output file with HPO codes, their frequency and CI") do |value|
|
157
|
-
options[:hpo_stat_file] = File.basename(value)
|
158
|
-
end
|
159
|
-
|
160
|
-
options[:thresold] = 0
|
161
|
-
opts.on("-t", "--info_thresold FLOAT", "IC thresold to discard non informative hpo. Default: 0.") do |thresold|
|
162
|
-
options[:thresold] = thresold.to_f
|
163
|
-
end
|
164
|
-
|
165
119
|
opts.on_tail("-h", "--help", "Show this message") do
|
166
120
|
puts opts
|
167
121
|
exit
|
@@ -176,27 +130,20 @@ output_folder = File.dirname(File.expand_path(options[:output_file]))
|
|
176
130
|
Dir.mkdir(output_folder) if !File.exists?(output_folder)
|
177
131
|
|
178
132
|
hpo_file = options[:hpo_file]
|
179
|
-
hpo_file = ENV['hpo_file'] if hpo_file.nil?
|
180
|
-
hpo_file
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
tripartite_network = build_tripartite_network(patients2hpo, hpo_stats, options[:thresold], patients_by_cluster)
|
196
|
-
|
197
|
-
# write_array(not_found - hpo.excluded_codes, File.join(output_folder, 'missing_hpo_names'))
|
198
|
-
write_array(not_found - hpo.removable_terms, File.join(output_folder, 'missing_hpo_names'))
|
133
|
+
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE if hpo_file.nil?
|
134
|
+
Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
|
135
|
+
Cohort.act_ont = :hpo
|
136
|
+
hpo = Cohort.get_ontology(Cohort.act_ont)
|
137
|
+
|
138
|
+
patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
|
139
|
+
rejected_hpos_C, rejected_patients_C = patient_data.check
|
140
|
+
rejected_hpos = rejected_hpos_L | rejected_hpos_C
|
141
|
+
rejected_patients = rejected_patients_L + rejected_patients_C
|
142
|
+
patient_data.remove_incomplete_records
|
143
|
+
patient_data.index_vars
|
144
|
+
patients_by_cluster, sors = patient_data.generate_cluster_regions(:reg_overlap, options[:tag], 1)
|
145
|
+
tripartite_network = build_tripartite_network(patient_data, patients_by_cluster, options[:add_parents], hpo)
|
146
|
+
|
147
|
+
write_array(rejected_hpos, File.join(output_folder, 'missing_hpo_names'))
|
199
148
|
write_array(sors, File.join(output_folder, options[:cluster_file]))
|
200
|
-
|
201
|
-
write_array(tripartite_network, options[:output_file])
|
202
|
-
write_array(patient_hpo_ic, File.join(output_folder, 'filtered_hpo.txt'))
|
149
|
+
write_array(tripartite_network, options[:output_file])
|
data/bin/get_sorted_profs.rb
CHANGED
@@ -1,33 +1,11 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
3
|
ROOT_PATH = File.dirname(__FILE__)
|
5
|
-
REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
|
6
|
-
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
7
|
-
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
|
8
4
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
9
5
|
|
10
6
|
require 'optparse'
|
11
7
|
require 'report_html'
|
12
|
-
require '
|
13
|
-
require 'generalMethods.rb'
|
14
|
-
|
15
|
-
#############################################################################################
|
16
|
-
## METHODS
|
17
|
-
############################################################################################
|
18
|
-
def procces_patient_data(patient_data, hpo)
|
19
|
-
clean_profiles = {}
|
20
|
-
all_hpo = []
|
21
|
-
patient_data.each do |pat_id, data|
|
22
|
-
profile = hpo.clean_profile_hard(data.first.map{|c| c.to_sym})
|
23
|
-
if !profile.empty?
|
24
|
-
clean_profiles[pat_id] = profile
|
25
|
-
all_hpo.concat(profile)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
ref_prof = hpo.clean_profile_hard(all_hpo.uniq)
|
29
|
-
return ref_prof, clean_profiles
|
30
|
-
end
|
8
|
+
require 'pets'
|
31
9
|
|
32
10
|
#############################################################################################
|
33
11
|
## OPTPARSE
|
@@ -42,9 +20,9 @@ OptionParser.new do |opts|
|
|
42
20
|
options[:chromosome_col] = data
|
43
21
|
end
|
44
22
|
|
45
|
-
options[:
|
23
|
+
options[:id_col] = nil
|
46
24
|
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
47
|
-
options[:
|
25
|
+
options[:id_col] = data
|
48
26
|
end
|
49
27
|
|
50
28
|
options[:end_col] = nil
|
@@ -67,9 +45,9 @@ OptionParser.new do |opts|
|
|
67
45
|
options[:input_file] = value
|
68
46
|
end
|
69
47
|
|
70
|
-
options[:
|
48
|
+
options[:ont_col] = nil
|
71
49
|
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
72
|
-
options[:
|
50
|
+
options[:ont_col] = data
|
73
51
|
end
|
74
52
|
|
75
53
|
options[:start_col] = nil
|
@@ -77,9 +55,14 @@ OptionParser.new do |opts|
|
|
77
55
|
options[:start_col] = data
|
78
56
|
end
|
79
57
|
|
80
|
-
options[:
|
58
|
+
options[:separator] = '|'
|
81
59
|
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
82
|
-
options[:
|
60
|
+
options[:separator] = data
|
61
|
+
end
|
62
|
+
|
63
|
+
options[:term_freq] = 0
|
64
|
+
opts.on("-f", "--general_prof_freq INTEGER", "When reference profile is not given, a general ine is computed with all profiles. If a freq is defined (0-1), all terms with freq minor than limit are removed") do |data|
|
65
|
+
options[:term_freq] = data.to_i
|
83
66
|
end
|
84
67
|
|
85
68
|
options[:matrix_limits] = [20, 40]
|
@@ -101,15 +84,21 @@ end.parse!
|
|
101
84
|
#############################################################################################
|
102
85
|
## MAIN
|
103
86
|
############################################################################################
|
104
|
-
patient_data = load_patient_cohort(options)
|
105
87
|
|
106
88
|
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
|
107
|
-
hpo
|
108
|
-
hpo
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
89
|
+
Cohort.load_ontology(:hpo, hpo_file)
|
90
|
+
Cohort.act_ont = :hpo
|
91
|
+
hpo = Cohort.get_ontology(Cohort.act_ont)
|
92
|
+
patient_data, _, _ = Cohort_Parser.load(options)
|
93
|
+
patient_data.check(hard=true)
|
94
|
+
|
95
|
+
clean_profiles = patient_data.profiles
|
96
|
+
if !options[:ref_prof].nil?
|
97
|
+
ref_profile = hpo.clean_profile_hard(options[:ref_prof])
|
98
|
+
else
|
99
|
+
ref_profile = patient_data.get_general_profile(options[:term_freq])
|
100
|
+
end
|
101
|
+
hpo.load_profiles({ref: ref_profile}, reset_stored: true)
|
113
102
|
|
114
103
|
similarities = hpo.compare_profiles(external_profiles: clean_profiles, sim_type: :lin, bidirectional: false)
|
115
104
|
|
data/bin/install_deps.rb
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code'))
|
5
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
6
|
+
require 'pets'
|
7
|
+
|
8
|
+
system_call(EXTERNAL_CODE, 'install_R_dependencies.R', '')
|
data/bin/paco_translator.rb
CHANGED
@@ -1,59 +1,10 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
3
|
ROOT_PATH = File.dirname(__FILE__)
|
4
|
-
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
5
|
-
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
|
6
4
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
7
5
|
|
8
|
-
require 'generalMethods.rb'
|
9
6
|
require 'optparse'
|
10
|
-
require '
|
11
|
-
|
12
|
-
###############
|
13
|
-
#METHODS
|
14
|
-
###############
|
15
|
-
|
16
|
-
def translate_hpo(patient_data, hpo, translate)
|
17
|
-
reject_pats = []
|
18
|
-
patient_data.each do |patientID, patient_record|
|
19
|
-
hpos, chr, start, stop = patient_record
|
20
|
-
if translate == 'names'
|
21
|
-
# hpos, rejected = hpo.translate_codes2names(hpos)
|
22
|
-
hpos, rejected = hpo.translate_ids(hpos)
|
23
|
-
elsif translate =='codes'
|
24
|
-
# hpos, rejected = hpo.translate_names2codes(hpos)
|
25
|
-
hpos, rejected = hpo.translate_names(hpos)
|
26
|
-
STDERR.puts(" The ontology names '#{rejected.join(',')}' were not found") if !rejected.empty?
|
27
|
-
end
|
28
|
-
if hpos.empty?
|
29
|
-
reject_pats << patientID
|
30
|
-
else
|
31
|
-
patient_record[0] = hpos
|
32
|
-
end
|
33
|
-
end
|
34
|
-
reject_pats.each do | rj_pat|
|
35
|
-
patient_data.delete(rj_pat)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def save_translated_file(patients_with_hpo_names, output_file, mode)
|
40
|
-
File.open(output_file, 'w') do |f|
|
41
|
-
if mode == 'paco'
|
42
|
-
f.puts "patient_id\tchr\tstart\tstop\tphenotypes"
|
43
|
-
end
|
44
|
-
patients_with_hpo_names.each do |id, patient_record|
|
45
|
-
hpos, chr, start, stop = patient_record
|
46
|
-
id = id.gsub(/_i[0-9]+$/,'')
|
47
|
-
if mode == 'default'
|
48
|
-
f.puts "#{id}\t#{hpos.join('|')}\t#{[chr, start, stop].join("\t")}"
|
49
|
-
elsif mode == 'paco'
|
50
|
-
f.puts "#{id}\t#{[chr, start, stop].join("\t")}\t#{hpos.join('|')}"
|
51
|
-
else
|
52
|
-
abort('Wrong save_mode] option, please try default or paco')
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
7
|
+
require 'pets'
|
57
8
|
|
58
9
|
###############
|
59
10
|
#OPTIONS
|
@@ -68,9 +19,9 @@ OptionParser.new do |opts|
|
|
68
19
|
options[:chromosome_col] = data
|
69
20
|
end
|
70
21
|
|
71
|
-
options[:
|
22
|
+
options[:id_col] = nil
|
72
23
|
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
73
|
-
options[:
|
24
|
+
options[:id_col] = data
|
74
25
|
end
|
75
26
|
|
76
27
|
options[:end_col] = nil
|
@@ -93,9 +44,9 @@ OptionParser.new do |opts|
|
|
93
44
|
options[:input_file] = value
|
94
45
|
end
|
95
46
|
|
96
|
-
options[:
|
47
|
+
options[:ont_col] = nil
|
97
48
|
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
98
|
-
options[:
|
49
|
+
options[:ont_col] = data
|
99
50
|
end
|
100
51
|
|
101
52
|
options[:start_col] = nil
|
@@ -103,33 +54,39 @@ OptionParser.new do |opts|
|
|
103
54
|
options[:start_col] = data
|
104
55
|
end
|
105
56
|
|
106
|
-
options[:
|
57
|
+
options[:separator] = '|'
|
107
58
|
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
108
|
-
options[:
|
59
|
+
options[:separator] = data
|
60
|
+
end
|
61
|
+
|
62
|
+
options[:n_phens] = nil
|
63
|
+
opts.on("--n_phens INTEGER", "Remove records with N or less phenotypes") do |data|
|
64
|
+
options[:n_phens] = data.to_i
|
109
65
|
end
|
110
66
|
|
111
|
-
options[:save_mode] =
|
67
|
+
options[:save_mode] = :default
|
112
68
|
opts.on("-m", "--save_mode STRING", "Set output data mode") do |data|
|
113
|
-
options[:save_mode] = data
|
69
|
+
options[:save_mode] = data.to_sym
|
114
70
|
end
|
115
71
|
|
116
|
-
options[:
|
117
|
-
opts.on("-
|
118
|
-
options[:
|
72
|
+
options[:names] = false
|
73
|
+
opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
|
74
|
+
options[:names] = true
|
75
|
+
end
|
76
|
+
|
77
|
+
options[:translate] = false
|
78
|
+
opts.on("-t", "--translate", "Set to translate from hpo codes to names. By default, ther is not translation") do
|
79
|
+
options[:translate] = true
|
119
80
|
end
|
120
81
|
end.parse!
|
121
82
|
|
122
83
|
###############
|
123
84
|
#MAIN
|
124
85
|
###############
|
125
|
-
hpo_file = ENV['hpo_file']
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
hpo = Ontology.new(file: hpo_file, load_file: true)
|
133
|
-
translate_hpo(patient_data, hpo, options[:translate])
|
134
|
-
end
|
135
|
-
save_translated_file(patient_data, options[:output_file], options[:save_mode])
|
86
|
+
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
|
87
|
+
Cohort.load_ontology(:hpo, hpo_file)
|
88
|
+
Cohort.act_ont = :hpo
|
89
|
+
|
90
|
+
patient_data, rejected_hpos, rejected_patients = Cohort_Parser.load(options)
|
91
|
+
rejected_patients_by_phen = patient_data.filter_by_term_number(options[:n_phens]) if !options[:n_phens].nil?
|
92
|
+
patient_data.save(options[:output_file], options[:save_mode], options[:translate])
|
data/bin/phen2reg.rb
CHANGED
@@ -2,16 +2,13 @@
|
|
2
2
|
# Rojano E. & Seoane P., September 2016
|
3
3
|
# Program to predict the position from given HPO codes, sorted by their association values.
|
4
4
|
|
5
|
-
REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
|
6
5
|
ROOT_PATH = File.dirname(__FILE__)
|
7
6
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
8
7
|
|
9
|
-
require 'generalMethods.rb'
|
10
|
-
require 'phen2reg_methods.rb'
|
11
8
|
require 'optparse'
|
12
9
|
require 'report_html'
|
13
10
|
require 'semtools'
|
14
|
-
|
11
|
+
require 'pets'
|
15
12
|
|
16
13
|
##########################
|
17
14
|
#OPT-PARSER
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
require 'pets'
|
8
|
+
|
9
|
+
#############################
|
10
|
+
## METHODS
|
11
|
+
#############################
|
12
|
+
def load_index(path_index)
|
13
|
+
vcf_index = {}
|
14
|
+
File.open(path_index).each do |line|
|
15
|
+
id, path = line.chomp.split("\t")
|
16
|
+
vcf_index[id] = path
|
17
|
+
end
|
18
|
+
return vcf_index
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
##########################
|
23
|
+
#OPT-PARSER
|
24
|
+
##########################
|
25
|
+
|
26
|
+
options = {}
|
27
|
+
OptionParser.new do |opts|
|
28
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
29
|
+
|
30
|
+
eval(File.open(COMMON_OPTPARSE).read)
|
31
|
+
|
32
|
+
options[:input_file] = nil
|
33
|
+
opts.on("-i", "--input_file PATH", "Input file with patient data") do |data|
|
34
|
+
options[:input_file] = data
|
35
|
+
end
|
36
|
+
|
37
|
+
options[:vcf_index] = nil
|
38
|
+
opts.on("-I", "--vcf_index PATH", "VCF file with patient id pointing to vcf path") do |data|
|
39
|
+
options[:vcf_index] = data
|
40
|
+
end
|
41
|
+
|
42
|
+
options[:names] = false
|
43
|
+
opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
|
44
|
+
options[:names] = true
|
45
|
+
end
|
46
|
+
|
47
|
+
options[:output_folder] = nil
|
48
|
+
opts.on("-o", "--output_file PATH", "Output folder") do |data|
|
49
|
+
options[:output_folder] = data
|
50
|
+
end
|
51
|
+
|
52
|
+
options[:ont_col] = nil
|
53
|
+
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
54
|
+
options[:ont_col] = data
|
55
|
+
end
|
56
|
+
|
57
|
+
options[:separator] = '|'
|
58
|
+
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
59
|
+
options[:separator] = data
|
60
|
+
end
|
61
|
+
|
62
|
+
options[:start_col] = nil
|
63
|
+
opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
|
64
|
+
options[:start_col] = data
|
65
|
+
end
|
66
|
+
|
67
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
68
|
+
puts opts
|
69
|
+
exit
|
70
|
+
end
|
71
|
+
|
72
|
+
end.parse!
|
73
|
+
|
74
|
+
#############################################################
|
75
|
+
## MAIN
|
76
|
+
#############################################################
|
77
|
+
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
|
78
|
+
Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
|
79
|
+
Cohort.act_ont = :hpo
|
80
|
+
|
81
|
+
patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
|
82
|
+
rejected_hpos_C, rejected_patients_C = patient_data.check(hard=true)
|
83
|
+
patient_data.link2ont(Cohort.act_ont)
|
84
|
+
|
85
|
+
vcf_index = load_index(options[:vcf_index]) if !options[:vcf_index].nil?
|
86
|
+
patient_data.export_phenopackets(options[:output_folder], options[:genome_assembly], vcf_index: vcf_index)
|
data/bin/reg2phen.rb
CHANGED
@@ -3,14 +3,12 @@
|
|
3
3
|
#data2predict = file to predict
|
4
4
|
#training_file.txt = file with training data (association values and hpo codes).
|
5
5
|
|
6
|
-
REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
|
7
6
|
ROOT_PATH = File.dirname(__FILE__)
|
8
7
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
9
8
|
|
10
9
|
require 'optparse'
|
11
|
-
require 'generalMethods.rb'
|
12
10
|
require 'semtools'
|
13
|
-
require '
|
11
|
+
require 'pets'
|
14
12
|
|
15
13
|
##########################
|
16
14
|
#OPT-PARSER
|