pets 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +79 -5
- data/bin/coPatReporter.rb +63 -156
- data/bin/comPatMondo.rb +1 -4
- data/bin/evidence_profiler.rb +38 -151
- data/bin/get_network_nodes.rb +79 -132
- data/bin/get_sorted_profs.rb +25 -36
- data/bin/install_deps.rb +7 -0
- data/bin/paco_translator.rb +29 -72
- data/bin/phen2reg.rb +1 -4
- data/bin/profiles2phenopacket.rb +110 -0
- data/bin/reg2phen.rb +1 -3
- data/example_datasets/associations_file.txt +757 -0
- data/example_datasets/example_patient.txt +6 -0
- data/example_datasets/example_patient_hpos.txt +15 -0
- data/example_datasets/genes.txt +8 -0
- data/example_datasets/hpo2ci.txt +2798 -0
- data/example_datasets/hummu_congenital_full_dataset.txt +4183 -0
- data/example_datasets/launch.sh +20 -0
- data/external_code/generate_boxpot.R +51 -21
- data/external_code/get_clusters.R +2 -2
- data/external_code/install_R_dependencies.R +11 -0
- data/external_code/plot_heatmap.R +34 -30
- data/lib/pets/coPatReporterMethods.rb +143 -441
- data/lib/pets/cohort.rb +307 -0
- data/lib/pets/constants.rb +7 -0
- data/lib/pets/generalMethods.rb +8 -317
- data/lib/pets/genomic_features.rb +144 -0
- data/lib/pets/io.rb +457 -0
- data/lib/pets/parsers/cohort_parser.rb +106 -0
- data/lib/pets/version.rb +1 -1
- data/lib/pets.rb +8 -0
- data/pets.gemspec +1 -0
- data/templates/cohort_report.erb +5 -7
- data/templates/patient_report.erb +1 -1
- metadata +34 -3
data/bin/paco_translator.rb
CHANGED
@@ -1,59 +1,10 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
3
|
ROOT_PATH = File.dirname(__FILE__)
|
4
|
-
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
5
|
-
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
|
6
4
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
7
5
|
|
8
|
-
require 'generalMethods.rb'
|
9
6
|
require 'optparse'
|
10
|
-
require '
|
11
|
-
|
12
|
-
###############
|
13
|
-
#METHODS
|
14
|
-
###############
|
15
|
-
|
16
|
-
def translate_hpo(patient_data, hpo, translate)
|
17
|
-
reject_pats = []
|
18
|
-
patient_data.each do |patientID, patient_record|
|
19
|
-
hpos, chr, start, stop = patient_record
|
20
|
-
if translate == 'names'
|
21
|
-
# hpos, rejected = hpo.translate_codes2names(hpos)
|
22
|
-
hpos, rejected = hpo.translate_ids(hpos)
|
23
|
-
elsif translate =='codes'
|
24
|
-
# hpos, rejected = hpo.translate_names2codes(hpos)
|
25
|
-
hpos, rejected = hpo.translate_names(hpos)
|
26
|
-
STDERR.puts(" The ontology names '#{rejected.join(',')}' were not found") if !rejected.empty?
|
27
|
-
end
|
28
|
-
if hpos.empty?
|
29
|
-
reject_pats << patientID
|
30
|
-
else
|
31
|
-
patient_record[0] = hpos
|
32
|
-
end
|
33
|
-
end
|
34
|
-
reject_pats.each do | rj_pat|
|
35
|
-
patient_data.delete(rj_pat)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def save_translated_file(patients_with_hpo_names, output_file, mode)
|
40
|
-
File.open(output_file, 'w') do |f|
|
41
|
-
if mode == 'paco'
|
42
|
-
f.puts "patient_id\tchr\tstart\tstop\tphenotypes"
|
43
|
-
end
|
44
|
-
patients_with_hpo_names.each do |id, patient_record|
|
45
|
-
hpos, chr, start, stop = patient_record
|
46
|
-
id = id.gsub(/_i[0-9]+$/,'')
|
47
|
-
if mode == 'default'
|
48
|
-
f.puts "#{id}\t#{hpos.join('|')}\t#{[chr, start, stop].join("\t")}"
|
49
|
-
elsif mode == 'paco'
|
50
|
-
f.puts "#{id}\t#{[chr, start, stop].join("\t")}\t#{hpos.join('|')}"
|
51
|
-
else
|
52
|
-
abort('Wrong save_mode] option, please try default or paco')
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
7
|
+
require 'pets'
|
57
8
|
|
58
9
|
###############
|
59
10
|
#OPTIONS
|
@@ -68,9 +19,9 @@ OptionParser.new do |opts|
|
|
68
19
|
options[:chromosome_col] = data
|
69
20
|
end
|
70
21
|
|
71
|
-
options[:
|
22
|
+
options[:id_col] = nil
|
72
23
|
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
73
|
-
options[:
|
24
|
+
options[:id_col] = data
|
74
25
|
end
|
75
26
|
|
76
27
|
options[:end_col] = nil
|
@@ -93,9 +44,9 @@ OptionParser.new do |opts|
|
|
93
44
|
options[:input_file] = value
|
94
45
|
end
|
95
46
|
|
96
|
-
options[:
|
47
|
+
options[:ont_col] = nil
|
97
48
|
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
98
|
-
options[:
|
49
|
+
options[:ont_col] = data
|
99
50
|
end
|
100
51
|
|
101
52
|
options[:start_col] = nil
|
@@ -103,33 +54,39 @@ OptionParser.new do |opts|
|
|
103
54
|
options[:start_col] = data
|
104
55
|
end
|
105
56
|
|
106
|
-
options[:
|
57
|
+
options[:separator] = '|'
|
107
58
|
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
108
|
-
options[:
|
59
|
+
options[:separator] = data
|
60
|
+
end
|
61
|
+
|
62
|
+
options[:n_phens] = nil
|
63
|
+
opts.on("--n_phens INTEGER", "Remove records with N or less phenotypes") do |data|
|
64
|
+
options[:n_phens] = data.to_i
|
109
65
|
end
|
110
66
|
|
111
|
-
options[:save_mode] =
|
67
|
+
options[:save_mode] = :default
|
112
68
|
opts.on("-m", "--save_mode STRING", "Set output data mode") do |data|
|
113
|
-
options[:save_mode] = data
|
69
|
+
options[:save_mode] = data.to_sym
|
114
70
|
end
|
115
71
|
|
116
|
-
options[:
|
117
|
-
opts.on("-
|
118
|
-
options[:
|
72
|
+
options[:names] = false
|
73
|
+
opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
|
74
|
+
options[:names] = true
|
75
|
+
end
|
76
|
+
|
77
|
+
options[:translate] = false
|
78
|
+
opts.on("-t", "--translate", "Set to translate from hpo codes to names. By default, ther is not translation") do
|
79
|
+
options[:translate] = true
|
119
80
|
end
|
120
81
|
end.parse!
|
121
82
|
|
122
83
|
###############
|
123
84
|
#MAIN
|
124
85
|
###############
|
125
|
-
hpo_file = ENV['hpo_file']
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
hpo = Ontology.new(file: hpo_file, load_file: true)
|
133
|
-
translate_hpo(patient_data, hpo, options[:translate])
|
134
|
-
end
|
135
|
-
save_translated_file(patient_data, options[:output_file], options[:save_mode])
|
86
|
+
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
|
87
|
+
Cohort.load_ontology(:hpo, hpo_file)
|
88
|
+
Cohort.act_ont = :hpo
|
89
|
+
|
90
|
+
patient_data, rejected_hpos, rejected_patients = Cohort_Parser.load(options)
|
91
|
+
rejected_patients_by_phen = patient_data.filter_by_term_number(options[:n_phens]) if !options[:n_phens].nil?
|
92
|
+
patient_data.save(options[:output_file], options[:save_mode], options[:translate])
|
data/bin/phen2reg.rb
CHANGED
@@ -2,16 +2,13 @@
|
|
2
2
|
# Rojano E. & Seoane P., September 2016
|
3
3
|
# Program to predict the position from given HPO codes, sorted by their association values.
|
4
4
|
|
5
|
-
REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
|
6
5
|
ROOT_PATH = File.dirname(__FILE__)
|
7
6
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
8
7
|
|
9
|
-
require 'generalMethods.rb'
|
10
|
-
require 'phen2reg_methods.rb'
|
11
8
|
require 'optparse'
|
12
9
|
require 'report_html'
|
13
10
|
require 'semtools'
|
14
|
-
|
11
|
+
require 'pets'
|
15
12
|
|
16
13
|
##########################
|
17
14
|
#OPT-PARSER
|
@@ -0,0 +1,110 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
require 'pets'
|
8
|
+
|
9
|
+
#############################
|
10
|
+
## METHODS
|
11
|
+
#############################
|
12
|
+
def load_index(path_index)
|
13
|
+
vcf_index = {}
|
14
|
+
File.open(path_index).each do |line|
|
15
|
+
id, path = line.chomp.split("\t")
|
16
|
+
vcf_index[id] = path
|
17
|
+
end
|
18
|
+
return vcf_index
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
##########################
|
23
|
+
#OPT-PARSER
|
24
|
+
##########################
|
25
|
+
|
26
|
+
options = {}
|
27
|
+
OptionParser.new do |opts|
|
28
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
29
|
+
|
30
|
+
options[:chromosome_col] = nil
|
31
|
+
opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
|
32
|
+
options[:chromosome_col] = data
|
33
|
+
end
|
34
|
+
|
35
|
+
options[:id_col] = nil
|
36
|
+
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
37
|
+
options[:id_col] = data
|
38
|
+
end
|
39
|
+
|
40
|
+
options[:end_col] = nil
|
41
|
+
opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
|
42
|
+
options[:end_col] = data
|
43
|
+
end
|
44
|
+
|
45
|
+
options[:genome_assembly] = 'hg38'
|
46
|
+
opts.on("-G", "--genome_assembly STRING", "Genome assembly version. Please choose between hg18, hg19 and hg38. Default hg38") do |data|
|
47
|
+
options[:genome_assembly] = data
|
48
|
+
end
|
49
|
+
|
50
|
+
options[:header] = true
|
51
|
+
#chr\tstart\tstop
|
52
|
+
opts.on("-H", "--header", "Set if the file has a line header. Default true") do
|
53
|
+
options[:header] = false
|
54
|
+
end
|
55
|
+
|
56
|
+
options[:input_file] = nil
|
57
|
+
opts.on("-i", "--input_file PATH", "Input file with patient data") do |data|
|
58
|
+
options[:input_file] = data
|
59
|
+
end
|
60
|
+
|
61
|
+
options[:vcf_index] = nil
|
62
|
+
opts.on("-I", "--vcf_index PATH", "VCF file with patient id pointing to vcf path") do |data|
|
63
|
+
options[:vcf_index] = data
|
64
|
+
end
|
65
|
+
|
66
|
+
options[:names] = false
|
67
|
+
opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
|
68
|
+
options[:names] = true
|
69
|
+
end
|
70
|
+
|
71
|
+
options[:output_folder] = nil
|
72
|
+
opts.on("-o", "--output_file PATH", "Output folder") do |data|
|
73
|
+
options[:output_folder] = data
|
74
|
+
end
|
75
|
+
|
76
|
+
options[:ont_col] = nil
|
77
|
+
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
78
|
+
options[:ont_col] = data
|
79
|
+
end
|
80
|
+
|
81
|
+
options[:separator] = '|'
|
82
|
+
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
83
|
+
options[:separator] = data
|
84
|
+
end
|
85
|
+
|
86
|
+
options[:start_col] = nil
|
87
|
+
opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
|
88
|
+
options[:start_col] = data
|
89
|
+
end
|
90
|
+
|
91
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
92
|
+
puts opts
|
93
|
+
exit
|
94
|
+
end
|
95
|
+
|
96
|
+
end.parse!
|
97
|
+
|
98
|
+
#############################################################
|
99
|
+
## MAIN
|
100
|
+
#############################################################
|
101
|
+
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
|
102
|
+
Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
|
103
|
+
Cohort.act_ont = :hpo
|
104
|
+
|
105
|
+
patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
|
106
|
+
rejected_hpos_C, rejected_patients_C = patient_data.check(hard=true)
|
107
|
+
patient_data.link2ont(Cohort.act_ont)
|
108
|
+
|
109
|
+
vcf_index = load_index(options[:vcf_index]) if !options[:vcf_index].nil?
|
110
|
+
patient_data.export_phenopackets(options[:output_folder], options[:genome_assembly], vcf_index: vcf_index)
|
data/bin/reg2phen.rb
CHANGED
@@ -3,14 +3,12 @@
|
|
3
3
|
#data2predict = file to predict
|
4
4
|
#training_file.txt = file with training data (association values and hpo codes).
|
5
5
|
|
6
|
-
REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
|
7
6
|
ROOT_PATH = File.dirname(__FILE__)
|
8
7
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
9
8
|
|
10
9
|
require 'optparse'
|
11
|
-
require 'generalMethods.rb'
|
12
10
|
require 'semtools'
|
13
|
-
require '
|
11
|
+
require 'pets'
|
14
12
|
|
15
13
|
##########################
|
16
14
|
#OPT-PARSER
|