pets 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +79 -5
- data/bin/coPatReporter.rb +63 -156
- data/bin/comPatMondo.rb +1 -4
- data/bin/evidence_profiler.rb +38 -151
- data/bin/get_network_nodes.rb +79 -132
- data/bin/get_sorted_profs.rb +25 -36
- data/bin/install_deps.rb +7 -0
- data/bin/paco_translator.rb +29 -72
- data/bin/phen2reg.rb +1 -4
- data/bin/profiles2phenopacket.rb +110 -0
- data/bin/reg2phen.rb +1 -3
- data/example_datasets/associations_file.txt +757 -0
- data/example_datasets/example_patient.txt +6 -0
- data/example_datasets/example_patient_hpos.txt +15 -0
- data/example_datasets/genes.txt +8 -0
- data/example_datasets/hpo2ci.txt +2798 -0
- data/example_datasets/hummu_congenital_full_dataset.txt +4183 -0
- data/example_datasets/launch.sh +20 -0
- data/external_code/generate_boxpot.R +51 -21
- data/external_code/get_clusters.R +2 -2
- data/external_code/install_R_dependencies.R +11 -0
- data/external_code/plot_heatmap.R +34 -30
- data/lib/pets/coPatReporterMethods.rb +143 -441
- data/lib/pets/cohort.rb +307 -0
- data/lib/pets/constants.rb +7 -0
- data/lib/pets/generalMethods.rb +8 -317
- data/lib/pets/genomic_features.rb +144 -0
- data/lib/pets/io.rb +457 -0
- data/lib/pets/parsers/cohort_parser.rb +106 -0
- data/lib/pets/version.rb +1 -1
- data/lib/pets.rb +8 -0
- data/pets.gemspec +1 -0
- data/templates/cohort_report.erb +5 -7
- data/templates/patient_report.erb +1 -1
- metadata +34 -3
data/bin/evidence_profiler.rb
CHANGED
@@ -1,166 +1,63 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
3
|
ROOT_PATH = File.dirname(__FILE__)
|
4
|
-
REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
|
5
|
-
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
6
|
-
EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code'))
|
7
|
-
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
|
8
4
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
9
5
|
|
10
6
|
require 'fileutils'
|
11
7
|
require 'optparse'
|
12
8
|
require 'report_html'
|
13
9
|
require 'semtools'
|
14
|
-
require '
|
15
|
-
|
16
|
-
|
17
|
-
class Report_html
|
18
|
-
def circular_genome(user_options = {}, &block)
|
19
|
-
default_options = {}.merge!(user_options)
|
20
|
-
coordinates = user_options[:genomic_coordinates]
|
21
|
-
html_string = canvasXpress_main(default_options, block) do |options, config, samples, vars, values, object_id, x, z|
|
22
|
-
config['graphType'] = 'Circular'
|
23
|
-
config["arcSegmentsSeparation"] = 3
|
24
|
-
config["colorScheme"] = "Tableau"
|
25
|
-
config["colors"] = ["#332288","#6699CC","#88CCEE","#44AA99","#117733","#999933","#DDCC77","#661100","#CC6677","#AA4466","#882255","#AA4499"]
|
26
|
-
config["showIdeogram"] = true
|
27
|
-
chr = []
|
28
|
-
pos = []
|
29
|
-
tags2remove = []
|
30
|
-
vars.each_with_index do |var, i|
|
31
|
-
coord = coordinates[var]
|
32
|
-
if !coord.nil?
|
33
|
-
tag = coord.first.gsub(/[^\dXY]/,'')
|
34
|
-
if tag == 'X' || tag == 'Y' || (tag.to_i > 0 && tag.to_i <= 22)
|
35
|
-
chr << coord.first.gsub(/[^\dXY]/,'')
|
36
|
-
pos << coord.last - 1
|
37
|
-
else
|
38
|
-
tags2remove << i
|
39
|
-
end
|
40
|
-
else
|
41
|
-
tags2remove << i
|
42
|
-
end
|
43
|
-
end
|
44
|
-
tags2remove.reverse_each{|i| ent = vars.delete_at(i); warn("Feature #{ent} has not valid coordinates")} # Remove entities with invalid coordinates
|
45
|
-
z['chr'] = chr
|
46
|
-
z['pos'] = pos
|
47
|
-
end
|
48
|
-
return html_string
|
49
|
-
end
|
50
|
-
end
|
10
|
+
require 'pets'
|
51
11
|
|
52
12
|
#############################################################################################
|
53
13
|
## METHODS
|
54
14
|
############################################################################################
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
id, profile = line.chomp.split("\t")
|
60
|
-
hpos = profile.split(',').map{|a| a.to_sym}
|
61
|
-
hpos, rejected_hpos = hpo.check_ids(hpos)
|
62
|
-
if !hpos.empty?
|
63
|
-
hpos = hpo.clean_profile(hpos)
|
64
|
-
profiles[id] = hpos if !hpos.empty?
|
65
|
-
end
|
66
|
-
end
|
67
|
-
return profiles
|
68
|
-
end
|
69
|
-
|
70
|
-
def load_variants(variant_folder)
|
71
|
-
variants = {}
|
72
|
-
coordinates = {}
|
73
|
-
count = 0
|
74
|
-
all_vars = {}
|
75
|
-
Dir.glob(File.join(variant_folder, '*.tab')).each do |path|
|
76
|
-
profile_id = File.basename(path, '.tab')
|
77
|
-
vars = {}
|
78
|
-
File.open(path).each do |line|
|
79
|
-
fields = line.chomp.split("\t")
|
80
|
-
chr = fields[0]
|
81
|
-
start = fields[1].to_i
|
82
|
-
query = coordinates[chr]
|
83
|
-
if query.nil?
|
84
|
-
coordinates[chr] = [start]
|
85
|
-
count += 1
|
86
|
-
id = "var_#{count}"
|
87
|
-
else
|
88
|
-
if !query.include?(start)
|
89
|
-
query << start
|
90
|
-
count += 1
|
91
|
-
id = "var_#{count}"
|
92
|
-
else
|
93
|
-
id = all_vars.key([chr, start])
|
94
|
-
end
|
95
|
-
end
|
96
|
-
vars[id] = [chr, start]
|
97
|
-
end
|
98
|
-
all_vars.merge!(vars)
|
99
|
-
variants[profile_id] = vars
|
100
|
-
end
|
101
|
-
return variants
|
102
|
-
end
|
103
|
-
|
104
|
-
def load_evidences(evidences_path, hpo)
|
105
|
-
genomic_coordinates = {}
|
106
|
-
coord_files = Dir.glob(File.join(evidences_path, '*.coords'))
|
107
|
-
coord_files.each do |cd_f|
|
108
|
-
entity = File.basename(cd_f, '.coords')
|
109
|
-
coordinates = load_coordinates(cd_f)
|
110
|
-
genomic_coordinates[entity] = coordinates
|
111
|
-
end
|
112
|
-
evidences = {}
|
113
|
-
evidence_files = Dir.glob(File.join(evidences_path, '*_HP.txt'))
|
114
|
-
evidence_files.each do |e_f|
|
115
|
-
pair = File.basename(e_f, '.txt')
|
116
|
-
profiles, id2label = load_evidence_profiles(e_f, hpo)
|
117
|
-
evidences[pair] = {prof: profiles, id2lab: id2label}
|
118
|
-
end
|
119
|
-
return evidences, genomic_coordinates
|
15
|
+
def get_evidence_coordinates(entity, genomic_coordinates, candidates_ids)
|
16
|
+
all_coordinates = genomic_coordinates[entity]
|
17
|
+
coords = all_coordinates.select{|id, coordinates| candidates_ids.include?(id.to_sym)}
|
18
|
+
return coords
|
120
19
|
end
|
121
20
|
|
122
|
-
def
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
21
|
+
def make_report(profile_id, all_candidates, all_genomic_coordinates, similarity_matrixs, evidences, prof_vars, template, output)
|
22
|
+
var_ids, var_coors = format_variants4report(prof_vars)
|
23
|
+
container = {
|
24
|
+
profile_id: profile_id,
|
25
|
+
candidates: all_candidates.each{|c| c[0] = c.first.to_s},
|
26
|
+
genomic_coordinates: all_genomic_coordinates.transform_values{|c| c.first(2) },
|
27
|
+
similarity_matrixs: similarity_matrixs,
|
28
|
+
evidences: evidences,
|
29
|
+
var_ids: var_ids,
|
30
|
+
var_coordinates: var_coors
|
31
|
+
}
|
32
|
+
report = Report_html.new(container, 'Evidence profile report')
|
33
|
+
report.build(template)
|
34
|
+
report.write(File.join(output, profile_id.to_s + '.html'))
|
135
35
|
end
|
136
36
|
|
137
|
-
def
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
37
|
+
def format_variants4report(var_data)
|
38
|
+
if var_data.nil?
|
39
|
+
var_ids, var_coors = nil
|
40
|
+
else
|
41
|
+
var_ids = []
|
42
|
+
var_coors = {}
|
43
|
+
count = 0
|
44
|
+
var_data.each do |chr, reg|
|
45
|
+
var_id = "var_#{count}"
|
46
|
+
var_ids << [var_id, 0]
|
47
|
+
var_coors[var_id] = [chr.to_s, reg[:start]]
|
48
|
+
count += 1
|
149
49
|
end
|
150
50
|
end
|
151
|
-
return
|
51
|
+
return var_ids, var_coors
|
152
52
|
end
|
153
53
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
coords = all_coordinates.select{|id, coordinates| candidates_ids.include?(id.to_sym)}
|
159
|
-
return coords
|
54
|
+
def get_genome_hotspots(similarity_matrixs, all_genomic_coordinates)
|
55
|
+
regions = Genomic_Feature.new(all_genomic_coordinates.values.map{|g| g[0..2]})
|
56
|
+
candidates_by_window, genome_windows = regions.generate_cluster_regions(:reg_overlap, 'A', 1)
|
57
|
+
# TODO: COMPLETE UNTIL FULL PREDICTOR
|
160
58
|
end
|
161
59
|
|
162
60
|
|
163
|
-
|
164
61
|
#############################################################################################
|
165
62
|
## OPTPARSE
|
166
63
|
############################################################################################
|
@@ -245,17 +142,7 @@ profiles.each do |profile_id, reference_prof|
|
|
245
142
|
coords = get_evidence_coordinates(entity, genomic_coordinates, candidates_ids)
|
246
143
|
all_genomic_coordinates.merge!(coords)
|
247
144
|
end
|
145
|
+
get_genome_hotspots(similarity_matrixs, all_genomic_coordinates)
|
248
146
|
prof_vars = profile_variants[profile_id]
|
249
|
-
|
250
|
-
profile_id: profile_id,
|
251
|
-
candidates: all_candidates.each{|c| c[0] = c.first.to_s},
|
252
|
-
genomic_coordinates: all_genomic_coordinates.transform_values{|c| c.first(2) },
|
253
|
-
similarity_matrixs: similarity_matrixs,
|
254
|
-
evidences: evidences,
|
255
|
-
var_ids: prof_vars.nil? ? nil : prof_vars.keys.map{|i| [i, 0]},
|
256
|
-
var_coordinates: prof_vars
|
257
|
-
}
|
258
|
-
report = Report_html.new(container, 'Evidence profile report')
|
259
|
-
report.build(template)
|
260
|
-
report.write(File.join(options[:output_folder], profile_id.to_s + '.html'))
|
147
|
+
make_report(profile_id, all_candidates, all_genomic_coordinates, similarity_matrixs, evidences, prof_vars, template, options[:output_folder])
|
261
148
|
end
|
data/bin/get_network_nodes.rb
CHANGED
@@ -3,112 +3,34 @@
|
|
3
3
|
# Code to prepare data to get the associations between pathological phenotypes (HPO) and genomic regions (SOR)
|
4
4
|
|
5
5
|
ROOT_PATH = File.dirname(__FILE__)
|
6
|
-
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
7
|
-
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
|
8
6
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
9
7
|
|
10
8
|
##############################
|
11
9
|
#LIBRARIES
|
12
10
|
##############################
|
13
|
-
require 'generalMethods.rb'
|
14
11
|
require 'optparse'
|
15
|
-
require '
|
12
|
+
require 'pets'
|
16
13
|
|
17
14
|
###############################
|
18
15
|
#METHODS
|
19
16
|
###############################
|
20
17
|
|
21
|
-
def
|
22
|
-
patient2phenotype = {}
|
23
|
-
hpo_count = {}
|
24
|
-
not_found = []
|
25
|
-
patients_genomic_region_by_chr = {}
|
26
|
-
File.open(patient_file).each do |line|
|
27
|
-
line.chomp!
|
28
|
-
next if line.include?("#")
|
29
|
-
patient, chr, start, stop, phenotype_profile = line.split("\t", 5)
|
30
|
-
next if phenotype_profile.nil? #For skipping patients without phenotypes
|
31
|
-
phenotypes = phenotype_profile.split('|')
|
32
|
-
# phenotypes, rejected = hpo.translate_names2codes(phenotypes)
|
33
|
-
phenotypes, rejected = hpo.translate_names(phenotypes)
|
34
|
-
not_found = not_found | rejected
|
35
|
-
phenotypes.each do |hpo_code|
|
36
|
-
get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents) if !hpo.is_removable(hpo_code)
|
37
|
-
end
|
38
|
-
info = [patient, start.to_i, stop.to_i]
|
39
|
-
add_record(patients_genomic_region_by_chr, chr, info)
|
40
|
-
end
|
41
|
-
return patient2phenotype, hpo_count, not_found, patients_genomic_region_by_chr
|
42
|
-
end
|
43
|
-
|
44
|
-
|
45
|
-
def get_all_hpos(patient, hpo_code, patient2phenotype, hpo, hpo_count, add_parents)
|
46
|
-
add_record(hpo_count, hpo_code, patient)
|
47
|
-
add_record(patient2phenotype, patient, hpo_code)
|
48
|
-
if add_parents == 'root'
|
49
|
-
# hpo_parent_codes = hpo.get_parents(hpo_code)
|
50
|
-
hpo_parent_codes = hpo.get_ancestors(hpo_code)
|
51
|
-
hpo_parent_codes.each do |parent_code|
|
52
|
-
add_record(hpo_count, parent_code, patient)
|
53
|
-
add_record(patient2phenotype, patient, parent_code)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def build_tripartite_network(patients2hpo, hpo_stats, ic_threshold, patients_by_cluster)
|
18
|
+
def build_tripartite_network(patient_data, patients_by_cluster, add_parents, ont)
|
59
19
|
tripartite_network = []
|
60
20
|
patients_by_cluster.each do |patient, node_ids|
|
61
21
|
node_ids.each do |node_id|
|
62
22
|
tripartite_network << [node_id, patient]
|
63
23
|
end
|
64
24
|
end
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
end
|
71
|
-
end
|
25
|
+
patient_data.each_profile do |id, profile|
|
26
|
+
profile = profile.map{|term| ont.get_ancestors(term)}.flatten.uniq if add_parents == 'root'
|
27
|
+
profile.each do |term|
|
28
|
+
tripartite_network << [term, id]
|
29
|
+
end
|
72
30
|
end
|
73
31
|
return tripartite_network
|
74
32
|
end
|
75
33
|
|
76
|
-
def compute_hpo_stats(hpo_count, patient_number)
|
77
|
-
hpo_stats = {}
|
78
|
-
patient_hpo_ic = []
|
79
|
-
hpo_count.each do |hpo_code, patient_ids|
|
80
|
-
hpo_freq = patient_ids.length.fdiv(patient_number) #hpo frequency in patients
|
81
|
-
hpo_ic = -Math.log10(hpo_freq)
|
82
|
-
hpo_stats[hpo_code] = [hpo_freq, hpo_ic]
|
83
|
-
patient_ids.each do |patient_id|
|
84
|
-
patient_hpo_ic << [patient_id, hpo_code, hpo_ic]
|
85
|
-
end
|
86
|
-
end
|
87
|
-
return hpo_stats, patient_hpo_ic.sort{|a,b| a.first <=> b.first}
|
88
|
-
end
|
89
|
-
|
90
|
-
def write_hash(hash, file_path, header = [])
|
91
|
-
File.open(file_path, 'w') do |handler|
|
92
|
-
handler.puts header.join("\t") if !header.empty?
|
93
|
-
hash.each do |key, array|
|
94
|
-
handler.puts "#{key}\t#{array.join("\t")}"
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
def write_array(array, file_path)
|
100
|
-
File.open(file_path, 'w') do |handler|
|
101
|
-
array.each do |record|
|
102
|
-
if record.class == String
|
103
|
-
line = record
|
104
|
-
else
|
105
|
-
line = record.join("\t")
|
106
|
-
end
|
107
|
-
handler.puts line
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
34
|
##############################
|
113
35
|
#OPTPARSE
|
114
36
|
##############################
|
@@ -117,33 +39,75 @@ options = {}
|
|
117
39
|
OptionParser.new do |opts|
|
118
40
|
opts.banner = "Usage: #{__FILE__} [options]"
|
119
41
|
|
120
|
-
options[:
|
121
|
-
opts.on("-c", "--
|
122
|
-
options[:
|
123
|
-
end
|
42
|
+
options[:chromosome_col] = nil
|
43
|
+
opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
|
44
|
+
options[:chromosome_col] = data
|
45
|
+
end
|
124
46
|
|
125
|
-
options[:
|
126
|
-
opts.on("-
|
127
|
-
options[:
|
47
|
+
options[:id_col] = nil
|
48
|
+
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
49
|
+
options[:id_col] = data
|
128
50
|
end
|
129
51
|
|
130
|
-
options[:
|
131
|
-
opts.on("-
|
132
|
-
options[:
|
52
|
+
options[:end_col] = nil
|
53
|
+
opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
|
54
|
+
options[:end_col] = data
|
55
|
+
end
|
56
|
+
|
57
|
+
options[:ont_col] = nil
|
58
|
+
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
59
|
+
options[:ont_col] = data
|
133
60
|
end
|
134
61
|
|
135
|
-
options[:
|
136
|
-
opts.on("-
|
137
|
-
options[:
|
62
|
+
options[:start_col] = nil
|
63
|
+
opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
|
64
|
+
options[:start_col] = data
|
65
|
+
end
|
66
|
+
|
67
|
+
options[:separator] = '|'
|
68
|
+
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
69
|
+
options[:separator] = data
|
70
|
+
end
|
71
|
+
|
72
|
+
options[:names] = false
|
73
|
+
opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
|
74
|
+
options[:names] = true
|
75
|
+
end
|
76
|
+
|
77
|
+
options[:header] = true
|
78
|
+
opts.on("-H", "--header", "File has a line header. Default true") do
|
79
|
+
options[:header] = false
|
80
|
+
end
|
81
|
+
|
82
|
+
#===================================================================
|
83
|
+
|
84
|
+
options[:input_file] = nil
|
85
|
+
opts.on("-i", "--input_file PATH", "Input file with patients for parsing phenotypes to HPO codes") do |value|
|
86
|
+
options[:input_file] = value
|
138
87
|
end
|
139
88
|
|
140
89
|
options[:output_file] = 'tripartite_network.txt'
|
141
90
|
opts.on("-o", "--output_file PATH", "Output file for the tripartite network") do |value|
|
142
91
|
options[:output_file] = value
|
92
|
+
end
|
93
|
+
|
94
|
+
options[:cluster_file] = 'cluster_coords.txt'
|
95
|
+
opts.on("-u", "--cluster_file PATH", "Cluster coords output file that will be used to translate SOR nodes") do |value|
|
96
|
+
options[:cluster_file] = File.basename(value)
|
143
97
|
end
|
144
98
|
|
99
|
+
options[:excluded_hpo] = nil
|
100
|
+
opts.on("-x", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
|
101
|
+
options[:excluded_hpo] = excluded_hpo
|
102
|
+
end
|
103
|
+
|
104
|
+
options[:tag] = 'A'
|
105
|
+
opts.on("-m", "--mutation_type STRING", "Type of patient mutation, either it is a deletion (d) or duplication (D)") do |type|
|
106
|
+
options[:tag] = type
|
107
|
+
end
|
108
|
+
|
145
109
|
options[:hpo_file] = nil
|
146
|
-
opts.on("-
|
110
|
+
opts.on("-O", "--hpo_file PATH", "Input HPO file for extracting HPO codes") do |value|
|
147
111
|
options[:hpo_file] = value
|
148
112
|
end
|
149
113
|
|
@@ -152,16 +116,6 @@ OptionParser.new do |opts|
|
|
152
116
|
options[:add_parents] = value
|
153
117
|
end
|
154
118
|
|
155
|
-
options[:hpo_stat_file] = 'hpo_stats.txt'
|
156
|
-
opts.on("-s", "--hpo_stat_file PATH", "Output file with HPO codes, their frequency and CI") do |value|
|
157
|
-
options[:hpo_stat_file] = File.basename(value)
|
158
|
-
end
|
159
|
-
|
160
|
-
options[:thresold] = 0
|
161
|
-
opts.on("-t", "--info_thresold FLOAT", "IC thresold to discard non informative hpo. Default: 0.") do |thresold|
|
162
|
-
options[:thresold] = thresold.to_f
|
163
|
-
end
|
164
|
-
|
165
119
|
opts.on_tail("-h", "--help", "Show this message") do
|
166
120
|
puts opts
|
167
121
|
exit
|
@@ -176,27 +130,20 @@ output_folder = File.dirname(File.expand_path(options[:output_file]))
|
|
176
130
|
Dir.mkdir(output_folder) if !File.exists?(output_folder)
|
177
131
|
|
178
132
|
hpo_file = options[:hpo_file]
|
179
|
-
hpo_file = ENV['hpo_file'] if hpo_file.nil?
|
180
|
-
hpo_file
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
tripartite_network = build_tripartite_network(patients2hpo, hpo_stats, options[:thresold], patients_by_cluster)
|
196
|
-
|
197
|
-
# write_array(not_found - hpo.excluded_codes, File.join(output_folder, 'missing_hpo_names'))
|
198
|
-
write_array(not_found - hpo.removable_terms, File.join(output_folder, 'missing_hpo_names'))
|
133
|
+
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE if hpo_file.nil?
|
134
|
+
Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
|
135
|
+
Cohort.act_ont = :hpo
|
136
|
+
hpo = Cohort.get_ontology(Cohort.act_ont)
|
137
|
+
|
138
|
+
patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
|
139
|
+
rejected_hpos_C, rejected_patients_C = patient_data.check
|
140
|
+
rejected_hpos = rejected_hpos_L | rejected_hpos_C
|
141
|
+
rejected_patients = rejected_patients_L + rejected_patients_C
|
142
|
+
patient_data.remove_incomplete_records
|
143
|
+
patient_data.index_vars
|
144
|
+
patients_by_cluster, sors = patient_data.generate_cluster_regions(:reg_overlap, options[:tag], 1)
|
145
|
+
tripartite_network = build_tripartite_network(patient_data, patients_by_cluster, options[:add_parents], hpo)
|
146
|
+
|
147
|
+
write_array(rejected_hpos, File.join(output_folder, 'missing_hpo_names'))
|
199
148
|
write_array(sors, File.join(output_folder, options[:cluster_file]))
|
200
|
-
|
201
|
-
write_array(tripartite_network, options[:output_file])
|
202
|
-
write_array(patient_hpo_ic, File.join(output_folder, 'filtered_hpo.txt'))
|
149
|
+
write_array(tripartite_network, options[:output_file])
|
data/bin/get_sorted_profs.rb
CHANGED
@@ -1,33 +1,11 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
3
|
ROOT_PATH = File.dirname(__FILE__)
|
5
|
-
REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
|
6
|
-
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
7
|
-
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.json')
|
8
4
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
9
5
|
|
10
6
|
require 'optparse'
|
11
7
|
require 'report_html'
|
12
|
-
require '
|
13
|
-
require 'generalMethods.rb'
|
14
|
-
|
15
|
-
#############################################################################################
|
16
|
-
## METHODS
|
17
|
-
############################################################################################
|
18
|
-
def procces_patient_data(patient_data, hpo)
|
19
|
-
clean_profiles = {}
|
20
|
-
all_hpo = []
|
21
|
-
patient_data.each do |pat_id, data|
|
22
|
-
profile = hpo.clean_profile_hard(data.first.map{|c| c.to_sym})
|
23
|
-
if !profile.empty?
|
24
|
-
clean_profiles[pat_id] = profile
|
25
|
-
all_hpo.concat(profile)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
ref_prof = hpo.clean_profile_hard(all_hpo.uniq)
|
29
|
-
return ref_prof, clean_profiles
|
30
|
-
end
|
8
|
+
require 'pets'
|
31
9
|
|
32
10
|
#############################################################################################
|
33
11
|
## OPTPARSE
|
@@ -42,9 +20,9 @@ OptionParser.new do |opts|
|
|
42
20
|
options[:chromosome_col] = data
|
43
21
|
end
|
44
22
|
|
45
|
-
options[:
|
23
|
+
options[:id_col] = nil
|
46
24
|
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
47
|
-
options[:
|
25
|
+
options[:id_col] = data
|
48
26
|
end
|
49
27
|
|
50
28
|
options[:end_col] = nil
|
@@ -67,9 +45,9 @@ OptionParser.new do |opts|
|
|
67
45
|
options[:input_file] = value
|
68
46
|
end
|
69
47
|
|
70
|
-
options[:
|
48
|
+
options[:ont_col] = nil
|
71
49
|
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
72
|
-
options[:
|
50
|
+
options[:ont_col] = data
|
73
51
|
end
|
74
52
|
|
75
53
|
options[:start_col] = nil
|
@@ -77,9 +55,14 @@ OptionParser.new do |opts|
|
|
77
55
|
options[:start_col] = data
|
78
56
|
end
|
79
57
|
|
80
|
-
options[:
|
58
|
+
options[:separator] = '|'
|
81
59
|
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
82
|
-
options[:
|
60
|
+
options[:separator] = data
|
61
|
+
end
|
62
|
+
|
63
|
+
options[:term_freq] = 0
|
64
|
+
opts.on("-f", "--general_prof_freq INTEGER", "When reference profile is not given, a general ine is computed with all profiles. If a freq is defined (0-1), all terms with freq minor than limit are removed") do |data|
|
65
|
+
options[:term_freq] = data.to_i
|
83
66
|
end
|
84
67
|
|
85
68
|
options[:matrix_limits] = [20, 40]
|
@@ -101,15 +84,21 @@ end.parse!
|
|
101
84
|
#############################################################################################
|
102
85
|
## MAIN
|
103
86
|
############################################################################################
|
104
|
-
patient_data = load_patient_cohort(options)
|
105
87
|
|
106
88
|
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
|
107
|
-
hpo
|
108
|
-
hpo
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
89
|
+
Cohort.load_ontology(:hpo, hpo_file)
|
90
|
+
Cohort.act_ont = :hpo
|
91
|
+
hpo = Cohort.get_ontology(Cohort.act_ont)
|
92
|
+
patient_data, _, _ = Cohort_Parser.load(options)
|
93
|
+
patient_data.check(hard=true)
|
94
|
+
|
95
|
+
clean_profiles = patient_data.profiles
|
96
|
+
if !options[:ref_prof].nil?
|
97
|
+
ref_profile = hpo.clean_profile_hard(options[:ref_prof])
|
98
|
+
else
|
99
|
+
ref_profile = patient_data.get_general_profile(options[:term_freq])
|
100
|
+
end
|
101
|
+
hpo.load_profiles({ref: ref_profile}, reset_stored: true)
|
113
102
|
|
114
103
|
similarities = hpo.compare_profiles(external_profiles: clean_profiles, sim_type: :lin, bidirectional: false)
|
115
104
|
|
data/bin/install_deps.rb
ADDED