pets 0.2.3 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +79 -5
- data/bin/coPatReporter.rb +68 -156
- data/bin/comPatMondo.rb +1 -4
- data/bin/evidence_profiler.rb +102 -150
- data/bin/get_gen_features.rb +146 -0
- data/bin/get_network_nodes.rb +79 -132
- data/bin/get_sorted_profs.rb +25 -36
- data/bin/install_deps.rb +8 -0
- data/bin/paco_translator.rb +29 -72
- data/bin/phen2reg.rb +1 -4
- data/bin/profiles2phenopacket.rb +86 -0
- data/bin/reg2phen.rb +1 -3
- data/example_datasets/associations_file.txt +757 -0
- data/example_datasets/example_patient.txt +6 -0
- data/example_datasets/example_patient_hpos.txt +15 -0
- data/example_datasets/genes.txt +8 -0
- data/example_datasets/hpo2ci.txt +2798 -0
- data/example_datasets/hummu_congenital_full_dataset.txt +4183 -0
- data/example_datasets/launch.sh +20 -0
- data/external_code/generate_boxpot.R +51 -21
- data/external_code/get_clusters.R +2 -2
- data/external_code/install_R_dependencies.R +16 -0
- data/external_code/plot_heatmap.R +34 -30
- data/lib/pets/coPatReporterMethods.rb +172 -424
- data/lib/pets/cohort.rb +309 -0
- data/lib/pets/common_optparse.rb +30 -0
- data/lib/pets/constants.rb +8 -0
- data/lib/pets/generalMethods.rb +29 -319
- data/lib/pets/genomic_features.rb +240 -0
- data/lib/pets/io.rb +481 -0
- data/lib/pets/parsers/cohort_parser.rb +111 -0
- data/lib/pets/parsers/reference_parser.rb +39 -0
- data/lib/pets/version.rb +1 -1
- data/lib/pets.rb +9 -0
- data/pets.gemspec +7 -3
- data/templates/cluster_report.erb +25 -5
- data/templates/cohort_report.erb +5 -7
- data/templates/evidence_profile.erb +20 -4
- data/templates/patient_report.erb +1 -1
- metadata +96 -5
data/bin/get_network_nodes.rb
CHANGED
@@ -3,112 +3,34 @@
|
|
3
3
|
# Code to prepare data to get the associations between pathological phenotypes (HPO) and genomic regions (SOR)
|
4
4
|
|
5
5
|
ROOT_PATH = File.dirname(__FILE__)
|
6
|
-
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
7
|
-
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
|
8
6
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
9
7
|
|
10
8
|
##############################
|
11
9
|
#LIBRARIES
|
12
10
|
##############################
|
13
|
-
require 'generalMethods.rb'
|
14
11
|
require 'optparse'
|
15
|
-
require '
|
12
|
+
require 'pets'
|
16
13
|
|
17
14
|
###############################
|
18
15
|
#METHODS
|
19
16
|
###############################
|
20
17
|
|
21
|
-
def
|
22
|
-
patient2phenotype = {}
|
23
|
-
hpo_count = {}
|
24
|
-
not_found = []
|
25
|
-
patients_genomic_region_by_chr = {}
|
26
|
-
File.open(patient_file).each do |line|
|
27
|
-
line.chomp!
|
28
|
-
next if line.include?("#")
|
29
|
-
patient, chr, start, stop, phenotype_profile = line.split("\t", 5)
|
30
|
-
next if phenotype_profile.nil? #For skipping patients without phenotypes
|
31
|
-
phenotypes = phenotype_profile.split('|')
|
32
|
-
# phenotypes, rejected = hpo.translate_names2codes(phenotypes)
|
33
|
-
phenotypes, rejected = hpo.translate_names(phenotypes)
|
34
|
-
not_found = not_found | rejected
|
35
|
-
phenotypes.each do |hpo_code|
|
36
|
-
get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents) if !hpo.is_removable(hpo_code)
|
37
|
-
end
|
38
|
-
info = [patient, start.to_i, stop.to_i]
|
39
|
-
add_record(patients_genomic_region_by_chr, chr, info)
|
40
|
-
end
|
41
|
-
return patient2phenotype, hpo_count, not_found, patients_genomic_region_by_chr
|
42
|
-
end
|
43
|
-
|
44
|
-
|
45
|
-
def get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents)
|
46
|
-
add_record(hpo_count, hpo_code, patient)
|
47
|
-
add_record(patient2phenotype, patient, hpo_code)
|
48
|
-
if add_parents == 'root'
|
49
|
-
# hpo_parent_codes = hpo.get_parents(hpo_code)
|
50
|
-
hpo_parent_codes = hpo.get_ancestors(hpo_code)
|
51
|
-
hpo_parent_codes.each do |parent_code|
|
52
|
-
add_record(hpo_count, parent_code, patient)
|
53
|
-
add_record(patient2phenotype, patient, parent_code)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def build_tripartite_network(patients2hpo, hpo_stats, ic_threshold, patients_by_cluster)
|
18
|
+
def build_tripartite_network(patient_data, patients_by_cluster, add_parents, ont)
|
59
19
|
tripartite_network = []
|
60
20
|
patients_by_cluster.each do |patient, node_ids|
|
61
21
|
node_ids.each do |node_id|
|
62
22
|
tripartite_network << [node_id, patient]
|
63
23
|
end
|
64
24
|
end
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
end
|
71
|
-
end
|
25
|
+
patient_data.each_profile do |id, profile|
|
26
|
+
profile = profile.map{|term| ont.get_ancestors(term)}.flatten.uniq if add_parents == 'root'
|
27
|
+
profile.each do |term|
|
28
|
+
tripartite_network << [term, id]
|
29
|
+
end
|
72
30
|
end
|
73
31
|
return tripartite_network
|
74
32
|
end
|
75
33
|
|
76
|
-
def compute_hpo_stats(hpo_count, patient_number)
|
77
|
-
hpo_stats = {}
|
78
|
-
patient_hpo_ic = []
|
79
|
-
hpo_count.each do |hpo_code, patient_ids|
|
80
|
-
hpo_freq = patient_ids.length.fdiv(patient_number) #hpo frequency in patients
|
81
|
-
hpo_ic = -Math.log10(hpo_freq)
|
82
|
-
hpo_stats[hpo_code] = [hpo_freq, hpo_ic]
|
83
|
-
patient_ids.each do |patient_id|
|
84
|
-
patient_hpo_ic << [patient_id, hpo_code, hpo_ic]
|
85
|
-
end
|
86
|
-
end
|
87
|
-
return hpo_stats, patient_hpo_ic.sort{|a,b| a.first <=> b.first}
|
88
|
-
end
|
89
|
-
|
90
|
-
def write_hash(hash, file_path, header = [])
|
91
|
-
File.open(file_path, 'w') do |handler|
|
92
|
-
handler.puts header.join("\t") if !header.empty?
|
93
|
-
hash.each do |key, array|
|
94
|
-
handler.puts "#{key}\t#{array.join("\t")}"
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
def write_array(array, file_path)
|
100
|
-
File.open(file_path, 'w') do |handler|
|
101
|
-
array.each do |record|
|
102
|
-
if record.class == String
|
103
|
-
line = record
|
104
|
-
else
|
105
|
-
line = record.join("\t")
|
106
|
-
end
|
107
|
-
handler.puts line
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
34
|
##############################
|
113
35
|
#OPTPARSE
|
114
36
|
##############################
|
@@ -117,33 +39,75 @@ options = {}
|
|
117
39
|
OptionParser.new do |opts|
|
118
40
|
opts.banner = "Usage: #{__FILE__} [options]"
|
119
41
|
|
120
|
-
options[:
|
121
|
-
opts.on("-c", "--
|
122
|
-
options[:
|
123
|
-
end
|
42
|
+
options[:chromosome_col] = nil
|
43
|
+
opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
|
44
|
+
options[:chromosome_col] = data
|
45
|
+
end
|
124
46
|
|
125
|
-
options[:
|
126
|
-
opts.on("-
|
127
|
-
options[:
|
47
|
+
options[:id_col] = nil
|
48
|
+
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
49
|
+
options[:id_col] = data
|
128
50
|
end
|
129
51
|
|
130
|
-
options[:
|
131
|
-
opts.on("-
|
132
|
-
options[:
|
52
|
+
options[:end_col] = nil
|
53
|
+
opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
|
54
|
+
options[:end_col] = data
|
55
|
+
end
|
56
|
+
|
57
|
+
options[:ont_col] = nil
|
58
|
+
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
59
|
+
options[:ont_col] = data
|
133
60
|
end
|
134
61
|
|
135
|
-
options[:
|
136
|
-
opts.on("-
|
137
|
-
options[:
|
62
|
+
options[:start_col] = nil
|
63
|
+
opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
|
64
|
+
options[:start_col] = data
|
65
|
+
end
|
66
|
+
|
67
|
+
options[:separator] = '|'
|
68
|
+
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
69
|
+
options[:separator] = data
|
70
|
+
end
|
71
|
+
|
72
|
+
options[:names] = false
|
73
|
+
opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
|
74
|
+
options[:names] = true
|
75
|
+
end
|
76
|
+
|
77
|
+
options[:header] = true
|
78
|
+
opts.on("-H", "--header", "File has a line header. Default true") do
|
79
|
+
options[:header] = false
|
80
|
+
end
|
81
|
+
|
82
|
+
#===================================================================
|
83
|
+
|
84
|
+
options[:input_file] = nil
|
85
|
+
opts.on("-i", "--input_file PATH", "Input file with patients for parsing phenotypes to HPO codes") do |value|
|
86
|
+
options[:input_file] = value
|
138
87
|
end
|
139
88
|
|
140
89
|
options[:output_file] = 'tripartite_network.txt'
|
141
90
|
opts.on("-o", "--output_file PATH", "Output file for the tripartite network") do |value|
|
142
91
|
options[:output_file] = value
|
92
|
+
end
|
93
|
+
|
94
|
+
options[:cluster_file] = 'cluster_coords.txt'
|
95
|
+
opts.on("-u", "--cluster_file PATH", "Cluster coords output file that will be used to translate SOR nodes") do |value|
|
96
|
+
options[:cluster_file] = File.basename(value)
|
143
97
|
end
|
144
98
|
|
99
|
+
options[:excluded_hpo] = nil
|
100
|
+
opts.on("-x", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
|
101
|
+
options[:excluded_hpo] = excluded_hpo
|
102
|
+
end
|
103
|
+
|
104
|
+
options[:tag] = 'A'
|
105
|
+
opts.on("-m", "--mutation_type STRING", "Type of patient mutation, either it is a deletion (d) or duplication (D)") do |type|
|
106
|
+
options[:tag] = type
|
107
|
+
end
|
108
|
+
|
145
109
|
options[:hpo_file] = nil
|
146
|
-
opts.on("-
|
110
|
+
opts.on("-O", "--hpo_file PATH", "Input HPO file for extracting HPO codes") do |value|
|
147
111
|
options[:hpo_file] = value
|
148
112
|
end
|
149
113
|
|
@@ -152,16 +116,6 @@ OptionParser.new do |opts|
|
|
152
116
|
options[:add_parents] = value
|
153
117
|
end
|
154
118
|
|
155
|
-
options[:hpo_stat_file] = 'hpo_stats.txt'
|
156
|
-
opts.on("-s", "--hpo_stat_file PATH", "Output file with HPO codes, their frequency and CI") do |value|
|
157
|
-
options[:hpo_stat_file] = File.basename(value)
|
158
|
-
end
|
159
|
-
|
160
|
-
options[:thresold] = 0
|
161
|
-
opts.on("-t", "--info_thresold FLOAT", "IC thresold to discard non informative hpo. Default: 0.") do |thresold|
|
162
|
-
options[:thresold] = thresold.to_f
|
163
|
-
end
|
164
|
-
|
165
119
|
opts.on_tail("-h", "--help", "Show this message") do
|
166
120
|
puts opts
|
167
121
|
exit
|
@@ -176,27 +130,20 @@ output_folder = File.dirname(File.expand_path(options[:output_file]))
|
|
176
130
|
Dir.mkdir(output_folder) if !File.exists?(output_folder)
|
177
131
|
|
178
132
|
hpo_file = options[:hpo_file]
|
179
|
-
hpo_file = ENV['hpo_file'] if hpo_file.nil?
|
180
|
-
hpo_file
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
tripartite_network = build_tripartite_network(patients2hpo, hpo_stats, options[:thresold], patients_by_cluster)
|
196
|
-
|
197
|
-
# write_array(not_found - hpo.excluded_codes, File.join(output_folder, 'missing_hpo_names'))
|
198
|
-
write_array(not_found - hpo.removable_terms, File.join(output_folder, 'missing_hpo_names'))
|
133
|
+
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE if hpo_file.nil?
|
134
|
+
Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
|
135
|
+
Cohort.act_ont = :hpo
|
136
|
+
hpo = Cohort.get_ontology(Cohort.act_ont)
|
137
|
+
|
138
|
+
patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
|
139
|
+
rejected_hpos_C, rejected_patients_C = patient_data.check
|
140
|
+
rejected_hpos = rejected_hpos_L | rejected_hpos_C
|
141
|
+
rejected_patients = rejected_patients_L + rejected_patients_C
|
142
|
+
patient_data.remove_incomplete_records
|
143
|
+
patient_data.index_vars
|
144
|
+
patients_by_cluster, sors = patient_data.generate_cluster_regions(:reg_overlap, options[:tag], 1)
|
145
|
+
tripartite_network = build_tripartite_network(patient_data, patients_by_cluster, options[:add_parents], hpo)
|
146
|
+
|
147
|
+
write_array(rejected_hpos, File.join(output_folder, 'missing_hpo_names'))
|
199
148
|
write_array(sors, File.join(output_folder, options[:cluster_file]))
|
200
|
-
|
201
|
-
write_array(tripartite_network, options[:output_file])
|
202
|
-
write_array(patient_hpo_ic, File.join(output_folder, 'filtered_hpo.txt'))
|
149
|
+
write_array(tripartite_network, options[:output_file])
|
data/bin/get_sorted_profs.rb
CHANGED
@@ -1,33 +1,11 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
3
|
ROOT_PATH = File.dirname(__FILE__)
|
5
|
-
REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
|
6
|
-
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
7
|
-
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
|
8
4
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
9
5
|
|
10
6
|
require 'optparse'
|
11
7
|
require 'report_html'
|
12
|
-
require '
|
13
|
-
require 'generalMethods.rb'
|
14
|
-
|
15
|
-
#############################################################################################
|
16
|
-
## METHODS
|
17
|
-
############################################################################################
|
18
|
-
def procces_patient_data(patient_data, hpo)
|
19
|
-
clean_profiles = {}
|
20
|
-
all_hpo = []
|
21
|
-
patient_data.each do |pat_id, data|
|
22
|
-
profile = hpo.clean_profile_hard(data.first.map{|c| c.to_sym})
|
23
|
-
if !profile.empty?
|
24
|
-
clean_profiles[pat_id] = profile
|
25
|
-
all_hpo.concat(profile)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
ref_prof = hpo.clean_profile_hard(all_hpo.uniq)
|
29
|
-
return ref_prof, clean_profiles
|
30
|
-
end
|
8
|
+
require 'pets'
|
31
9
|
|
32
10
|
#############################################################################################
|
33
11
|
## OPTPARSE
|
@@ -42,9 +20,9 @@ OptionParser.new do |opts|
|
|
42
20
|
options[:chromosome_col] = data
|
43
21
|
end
|
44
22
|
|
45
|
-
options[:
|
23
|
+
options[:id_col] = nil
|
46
24
|
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
47
|
-
options[:
|
25
|
+
options[:id_col] = data
|
48
26
|
end
|
49
27
|
|
50
28
|
options[:end_col] = nil
|
@@ -67,9 +45,9 @@ OptionParser.new do |opts|
|
|
67
45
|
options[:input_file] = value
|
68
46
|
end
|
69
47
|
|
70
|
-
options[:
|
48
|
+
options[:ont_col] = nil
|
71
49
|
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
72
|
-
options[:
|
50
|
+
options[:ont_col] = data
|
73
51
|
end
|
74
52
|
|
75
53
|
options[:start_col] = nil
|
@@ -77,9 +55,14 @@ OptionParser.new do |opts|
|
|
77
55
|
options[:start_col] = data
|
78
56
|
end
|
79
57
|
|
80
|
-
options[:
|
58
|
+
options[:separator] = '|'
|
81
59
|
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
82
|
-
options[:
|
60
|
+
options[:separator] = data
|
61
|
+
end
|
62
|
+
|
63
|
+
options[:term_freq] = 0
|
64
|
+
opts.on("-f", "--general_prof_freq INTEGER", "When reference profile is not given, a general ine is computed with all profiles. If a freq is defined (0-1), all terms with freq minor than limit are removed") do |data|
|
65
|
+
options[:term_freq] = data.to_i
|
83
66
|
end
|
84
67
|
|
85
68
|
options[:matrix_limits] = [20, 40]
|
@@ -101,15 +84,21 @@ end.parse!
|
|
101
84
|
#############################################################################################
|
102
85
|
## MAIN
|
103
86
|
############################################################################################
|
104
|
-
patient_data = load_patient_cohort(options)
|
105
87
|
|
106
88
|
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
|
107
|
-
hpo
|
108
|
-
hpo
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
89
|
+
Cohort.load_ontology(:hpo, hpo_file)
|
90
|
+
Cohort.act_ont = :hpo
|
91
|
+
hpo = Cohort.get_ontology(Cohort.act_ont)
|
92
|
+
patient_data, _, _ = Cohort_Parser.load(options)
|
93
|
+
patient_data.check(hard=true)
|
94
|
+
|
95
|
+
clean_profiles = patient_data.profiles
|
96
|
+
if !options[:ref_prof].nil?
|
97
|
+
ref_profile = hpo.clean_profile_hard(options[:ref_prof])
|
98
|
+
else
|
99
|
+
ref_profile = patient_data.get_general_profile(options[:term_freq])
|
100
|
+
end
|
101
|
+
hpo.load_profiles({ref: ref_profile}, reset_stored: true)
|
113
102
|
|
114
103
|
similarities = hpo.compare_profiles(external_profiles: clean_profiles, sim_type: :lin, bidirectional: false)
|
115
104
|
|
data/bin/install_deps.rb
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code'))
|
5
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
6
|
+
require 'pets'
|
7
|
+
|
8
|
+
system_call(EXTERNAL_CODE, 'install_R_dependencies.R', '')
|
data/bin/paco_translator.rb
CHANGED
@@ -1,59 +1,10 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
3
|
ROOT_PATH = File.dirname(__FILE__)
|
4
|
-
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
5
|
-
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
|
6
4
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
7
5
|
|
8
|
-
require 'generalMethods.rb'
|
9
6
|
require 'optparse'
|
10
|
-
require '
|
11
|
-
|
12
|
-
###############
|
13
|
-
#METHODS
|
14
|
-
###############
|
15
|
-
|
16
|
-
def translate_hpo(patient_data, hpo, translate)
|
17
|
-
reject_pats = []
|
18
|
-
patient_data.each do |patientID, patient_record|
|
19
|
-
hpos, chr, start, stop = patient_record
|
20
|
-
if translate == 'names'
|
21
|
-
# hpos, rejected = hpo.translate_codes2names(hpos)
|
22
|
-
hpos, rejected = hpo.translate_ids(hpos)
|
23
|
-
elsif translate =='codes'
|
24
|
-
# hpos, rejected = hpo.translate_names2codes(hpos)
|
25
|
-
hpos, rejected = hpo.translate_names(hpos)
|
26
|
-
STDERR.puts(" The ontology names '#{rejected.join(',')}' were not found") if !rejected.empty?
|
27
|
-
end
|
28
|
-
if hpos.empty?
|
29
|
-
reject_pats << patientID
|
30
|
-
else
|
31
|
-
patient_record[0] = hpos
|
32
|
-
end
|
33
|
-
end
|
34
|
-
reject_pats.each do | rj_pat|
|
35
|
-
patient_data.delete(rj_pat)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def save_translated_file(patients_with_hpo_names, output_file, mode)
|
40
|
-
File.open(output_file, 'w') do |f|
|
41
|
-
if mode == 'paco'
|
42
|
-
f.puts "patient_id\tchr\tstart\tstop\tphenotypes"
|
43
|
-
end
|
44
|
-
patients_with_hpo_names.each do |id, patient_record|
|
45
|
-
hpos, chr, start, stop = patient_record
|
46
|
-
id = id.gsub(/_i[0-9]+$/,'')
|
47
|
-
if mode == 'default'
|
48
|
-
f.puts "#{id}\t#{hpos.join('|')}\t#{[chr, start, stop].join("\t")}"
|
49
|
-
elsif mode == 'paco'
|
50
|
-
f.puts "#{id}\t#{[chr, start, stop].join("\t")}\t#{hpos.join('|')}"
|
51
|
-
else
|
52
|
-
abort('Wrong save_mode] option, please try default or paco')
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
7
|
+
require 'pets'
|
57
8
|
|
58
9
|
###############
|
59
10
|
#OPTIONS
|
@@ -68,9 +19,9 @@ OptionParser.new do |opts|
|
|
68
19
|
options[:chromosome_col] = data
|
69
20
|
end
|
70
21
|
|
71
|
-
options[:
|
22
|
+
options[:id_col] = nil
|
72
23
|
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
73
|
-
options[:
|
24
|
+
options[:id_col] = data
|
74
25
|
end
|
75
26
|
|
76
27
|
options[:end_col] = nil
|
@@ -93,9 +44,9 @@ OptionParser.new do |opts|
|
|
93
44
|
options[:input_file] = value
|
94
45
|
end
|
95
46
|
|
96
|
-
options[:
|
47
|
+
options[:ont_col] = nil
|
97
48
|
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
98
|
-
options[:
|
49
|
+
options[:ont_col] = data
|
99
50
|
end
|
100
51
|
|
101
52
|
options[:start_col] = nil
|
@@ -103,33 +54,39 @@ OptionParser.new do |opts|
|
|
103
54
|
options[:start_col] = data
|
104
55
|
end
|
105
56
|
|
106
|
-
options[:
|
57
|
+
options[:separator] = '|'
|
107
58
|
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
108
|
-
options[:
|
59
|
+
options[:separator] = data
|
60
|
+
end
|
61
|
+
|
62
|
+
options[:n_phens] = nil
|
63
|
+
opts.on("--n_phens INTEGER", "Remove records with N or less phenotypes") do |data|
|
64
|
+
options[:n_phens] = data.to_i
|
109
65
|
end
|
110
66
|
|
111
|
-
options[:save_mode] =
|
67
|
+
options[:save_mode] = :default
|
112
68
|
opts.on("-m", "--save_mode STRING", "Set output data mode") do |data|
|
113
|
-
options[:save_mode] = data
|
69
|
+
options[:save_mode] = data.to_sym
|
114
70
|
end
|
115
71
|
|
116
|
-
options[:
|
117
|
-
opts.on("-
|
118
|
-
options[:
|
72
|
+
options[:names] = false
|
73
|
+
opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
|
74
|
+
options[:names] = true
|
75
|
+
end
|
76
|
+
|
77
|
+
options[:translate] = false
|
78
|
+
opts.on("-t", "--translate", "Set to translate from hpo codes to names. By default, ther is not translation") do
|
79
|
+
options[:translate] = true
|
119
80
|
end
|
120
81
|
end.parse!
|
121
82
|
|
122
83
|
###############
|
123
84
|
#MAIN
|
124
85
|
###############
|
125
|
-
hpo_file = ENV['hpo_file']
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
hpo = Ontology.new(file: hpo_file, load_file: true)
|
133
|
-
translate_hpo(patient_data, hpo, options[:translate])
|
134
|
-
end
|
135
|
-
save_translated_file(patient_data, options[:output_file], options[:save_mode])
|
86
|
+
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
|
87
|
+
Cohort.load_ontology(:hpo, hpo_file)
|
88
|
+
Cohort.act_ont = :hpo
|
89
|
+
|
90
|
+
patient_data, rejected_hpos, rejected_patients = Cohort_Parser.load(options)
|
91
|
+
rejected_patients_by_phen = patient_data.filter_by_term_number(options[:n_phens]) if !options[:n_phens].nil?
|
92
|
+
patient_data.save(options[:output_file], options[:save_mode], options[:translate])
|
data/bin/phen2reg.rb
CHANGED
@@ -2,16 +2,13 @@
|
|
2
2
|
# Rojano E. & Seoane P., September 2016
|
3
3
|
# Program to predict the position from given HPO codes, sorted by their association values.
|
4
4
|
|
5
|
-
REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
|
6
5
|
ROOT_PATH = File.dirname(__FILE__)
|
7
6
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
8
7
|
|
9
|
-
require 'generalMethods.rb'
|
10
|
-
require 'phen2reg_methods.rb'
|
11
8
|
require 'optparse'
|
12
9
|
require 'report_html'
|
13
10
|
require 'semtools'
|
14
|
-
|
11
|
+
require 'pets'
|
15
12
|
|
16
13
|
##########################
|
17
14
|
#OPT-PARSER
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
require 'pets'
|
8
|
+
|
9
|
+
#############################
|
10
|
+
## METHODS
|
11
|
+
#############################
|
12
|
+
def load_index(path_index)
|
13
|
+
vcf_index = {}
|
14
|
+
File.open(path_index).each do |line|
|
15
|
+
id, path = line.chomp.split("\t")
|
16
|
+
vcf_index[id] = path
|
17
|
+
end
|
18
|
+
return vcf_index
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
##########################
|
23
|
+
#OPT-PARSER
|
24
|
+
##########################
|
25
|
+
|
26
|
+
options = {}
|
27
|
+
OptionParser.new do |opts|
|
28
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
29
|
+
|
30
|
+
eval(File.open(COMMON_OPTPARSE).read)
|
31
|
+
|
32
|
+
options[:input_file] = nil
|
33
|
+
opts.on("-i", "--input_file PATH", "Input file with patient data") do |data|
|
34
|
+
options[:input_file] = data
|
35
|
+
end
|
36
|
+
|
37
|
+
options[:vcf_index] = nil
|
38
|
+
opts.on("-I", "--vcf_index PATH", "VCF file with patient id pointing to vcf path") do |data|
|
39
|
+
options[:vcf_index] = data
|
40
|
+
end
|
41
|
+
|
42
|
+
options[:names] = false
|
43
|
+
opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
|
44
|
+
options[:names] = true
|
45
|
+
end
|
46
|
+
|
47
|
+
options[:output_folder] = nil
|
48
|
+
opts.on("-o", "--output_file PATH", "Output folder") do |data|
|
49
|
+
options[:output_folder] = data
|
50
|
+
end
|
51
|
+
|
52
|
+
options[:ont_col] = nil
|
53
|
+
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
54
|
+
options[:ont_col] = data
|
55
|
+
end
|
56
|
+
|
57
|
+
options[:separator] = '|'
|
58
|
+
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
59
|
+
options[:separator] = data
|
60
|
+
end
|
61
|
+
|
62
|
+
options[:start_col] = nil
|
63
|
+
opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
|
64
|
+
options[:start_col] = data
|
65
|
+
end
|
66
|
+
|
67
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
68
|
+
puts opts
|
69
|
+
exit
|
70
|
+
end
|
71
|
+
|
72
|
+
end.parse!
|
73
|
+
|
74
|
+
#############################################################
|
75
|
+
## MAIN
|
76
|
+
#############################################################
|
77
|
+
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
|
78
|
+
Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
|
79
|
+
Cohort.act_ont = :hpo
|
80
|
+
|
81
|
+
patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
|
82
|
+
rejected_hpos_C, rejected_patients_C = patient_data.check(hard=true)
|
83
|
+
patient_data.link2ont(Cohort.act_ont)
|
84
|
+
|
85
|
+
vcf_index = load_index(options[:vcf_index]) if !options[:vcf_index].nil?
|
86
|
+
patient_data.export_phenopackets(options[:output_folder], options[:genome_assembly], vcf_index: vcf_index)
|
data/bin/reg2phen.rb
CHANGED
@@ -3,14 +3,12 @@
|
|
3
3
|
#data2predict = file to predict
|
4
4
|
#training_file.txt = file with training data (association values and hpo codes).
|
5
5
|
|
6
|
-
REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
|
7
6
|
ROOT_PATH = File.dirname(__FILE__)
|
8
7
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
9
8
|
|
10
9
|
require 'optparse'
|
11
|
-
require 'generalMethods.rb'
|
12
10
|
require 'semtools'
|
13
|
-
require '
|
11
|
+
require 'pets'
|
14
12
|
|
15
13
|
##########################
|
16
14
|
#OPT-PARSER
|