pets 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +79 -5
- data/bin/coPatReporter.rb +63 -156
- data/bin/comPatMondo.rb +1 -4
- data/bin/evidence_profiler.rb +38 -151
- data/bin/get_network_nodes.rb +79 -132
- data/bin/get_sorted_profs.rb +25 -36
- data/bin/install_deps.rb +7 -0
- data/bin/paco_translator.rb +29 -72
- data/bin/phen2reg.rb +1 -4
- data/bin/profiles2phenopacket.rb +110 -0
- data/bin/reg2phen.rb +1 -3
- data/example_datasets/associations_file.txt +757 -0
- data/example_datasets/example_patient.txt +6 -0
- data/example_datasets/example_patient_hpos.txt +15 -0
- data/example_datasets/genes.txt +8 -0
- data/example_datasets/hpo2ci.txt +2798 -0
- data/example_datasets/hummu_congenital_full_dataset.txt +4183 -0
- data/example_datasets/launch.sh +20 -0
- data/external_code/generate_boxpot.R +51 -21
- data/external_code/get_clusters.R +2 -2
- data/external_code/install_R_dependencies.R +11 -0
- data/external_code/plot_heatmap.R +34 -30
- data/lib/pets/coPatReporterMethods.rb +143 -441
- data/lib/pets/cohort.rb +307 -0
- data/lib/pets/constants.rb +7 -0
- data/lib/pets/generalMethods.rb +8 -317
- data/lib/pets/genomic_features.rb +144 -0
- data/lib/pets/io.rb +457 -0
- data/lib/pets/parsers/cohort_parser.rb +106 -0
- data/lib/pets/version.rb +1 -1
- data/lib/pets.rb +8 -0
- data/pets.gemspec +1 -0
- data/templates/cohort_report.erb +5 -7
- data/templates/patient_report.erb +1 -1
- metadata +34 -3
data/bin/paco_translator.rb
CHANGED
@@ -1,59 +1,10 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
3
|
ROOT_PATH = File.dirname(__FILE__)
|
4
|
-
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
5
|
-
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
|
6
4
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
7
5
|
|
8
|
-
require 'generalMethods.rb'
|
9
6
|
require 'optparse'
|
10
|
-
require '
|
11
|
-
|
12
|
-
###############
|
13
|
-
#METHODS
|
14
|
-
###############
|
15
|
-
|
16
|
-
def translate_hpo(patient_data, hpo, translate)
|
17
|
-
reject_pats = []
|
18
|
-
patient_data.each do |patientID, patient_record|
|
19
|
-
hpos, chr, start, stop = patient_record
|
20
|
-
if translate == 'names'
|
21
|
-
# hpos, rejected = hpo.translate_codes2names(hpos)
|
22
|
-
hpos, rejected = hpo.translate_ids(hpos)
|
23
|
-
elsif translate =='codes'
|
24
|
-
# hpos, rejected = hpo.translate_names2codes(hpos)
|
25
|
-
hpos, rejected = hpo.translate_names(hpos)
|
26
|
-
STDERR.puts(" The ontology names '#{rejected.join(',')}' were not found") if !rejected.empty?
|
27
|
-
end
|
28
|
-
if hpos.empty?
|
29
|
-
reject_pats << patientID
|
30
|
-
else
|
31
|
-
patient_record[0] = hpos
|
32
|
-
end
|
33
|
-
end
|
34
|
-
reject_pats.each do | rj_pat|
|
35
|
-
patient_data.delete(rj_pat)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def save_translated_file(patients_with_hpo_names, output_file, mode)
|
40
|
-
File.open(output_file, 'w') do |f|
|
41
|
-
if mode == 'paco'
|
42
|
-
f.puts "patient_id\tchr\tstart\tstop\tphenotypes"
|
43
|
-
end
|
44
|
-
patients_with_hpo_names.each do |id, patient_record|
|
45
|
-
hpos, chr, start, stop = patient_record
|
46
|
-
id = id.gsub(/_i[0-9]+$/,'')
|
47
|
-
if mode == 'default'
|
48
|
-
f.puts "#{id}\t#{hpos.join('|')}\t#{[chr, start, stop].join("\t")}"
|
49
|
-
elsif mode == 'paco'
|
50
|
-
f.puts "#{id}\t#{[chr, start, stop].join("\t")}\t#{hpos.join('|')}"
|
51
|
-
else
|
52
|
-
abort('Wrong save_mode] option, please try default or paco')
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
7
|
+
require 'pets'
|
57
8
|
|
58
9
|
###############
|
59
10
|
#OPTIONS
|
@@ -68,9 +19,9 @@ OptionParser.new do |opts|
|
|
68
19
|
options[:chromosome_col] = data
|
69
20
|
end
|
70
21
|
|
71
|
-
options[:
|
22
|
+
options[:id_col] = nil
|
72
23
|
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
73
|
-
options[:
|
24
|
+
options[:id_col] = data
|
74
25
|
end
|
75
26
|
|
76
27
|
options[:end_col] = nil
|
@@ -93,9 +44,9 @@ OptionParser.new do |opts|
|
|
93
44
|
options[:input_file] = value
|
94
45
|
end
|
95
46
|
|
96
|
-
options[:
|
47
|
+
options[:ont_col] = nil
|
97
48
|
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
98
|
-
options[:
|
49
|
+
options[:ont_col] = data
|
99
50
|
end
|
100
51
|
|
101
52
|
options[:start_col] = nil
|
@@ -103,33 +54,39 @@ OptionParser.new do |opts|
|
|
103
54
|
options[:start_col] = data
|
104
55
|
end
|
105
56
|
|
106
|
-
options[:
|
57
|
+
options[:separator] = '|'
|
107
58
|
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
108
|
-
options[:
|
59
|
+
options[:separator] = data
|
60
|
+
end
|
61
|
+
|
62
|
+
options[:n_phens] = nil
|
63
|
+
opts.on("--n_phens INTEGER", "Remove records with N or less phenotypes") do |data|
|
64
|
+
options[:n_phens] = data.to_i
|
109
65
|
end
|
110
66
|
|
111
|
-
options[:save_mode] =
|
67
|
+
options[:save_mode] = :default
|
112
68
|
opts.on("-m", "--save_mode STRING", "Set output data mode") do |data|
|
113
|
-
options[:save_mode] = data
|
69
|
+
options[:save_mode] = data.to_sym
|
114
70
|
end
|
115
71
|
|
116
|
-
options[:
|
117
|
-
opts.on("-
|
118
|
-
options[:
|
72
|
+
options[:names] = false
|
73
|
+
opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
|
74
|
+
options[:names] = true
|
75
|
+
end
|
76
|
+
|
77
|
+
options[:translate] = false
|
78
|
+
opts.on("-t", "--translate", "Set to translate from hpo codes to names. By default, ther is not translation") do
|
79
|
+
options[:translate] = true
|
119
80
|
end
|
120
81
|
end.parse!
|
121
82
|
|
122
83
|
###############
|
123
84
|
#MAIN
|
124
85
|
###############
|
125
|
-
hpo_file = ENV['hpo_file']
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
hpo = Ontology.new(file: hpo_file, load_file: true)
|
133
|
-
translate_hpo(patient_data, hpo, options[:translate])
|
134
|
-
end
|
135
|
-
save_translated_file(patient_data, options[:output_file], options[:save_mode])
|
86
|
+
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
|
87
|
+
Cohort.load_ontology(:hpo, hpo_file)
|
88
|
+
Cohort.act_ont = :hpo
|
89
|
+
|
90
|
+
patient_data, rejected_hpos, rejected_patients = Cohort_Parser.load(options)
|
91
|
+
rejected_patients_by_phen = patient_data.filter_by_term_number(options[:n_phens]) if !options[:n_phens].nil?
|
92
|
+
patient_data.save(options[:output_file], options[:save_mode], options[:translate])
|
data/bin/phen2reg.rb
CHANGED
@@ -2,16 +2,13 @@
|
|
2
2
|
# Rojano E. & Seoane P., September 2016
|
3
3
|
# Program to predict the position from given HPO codes, sorted by their association values.
|
4
4
|
|
5
|
-
REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
|
6
5
|
ROOT_PATH = File.dirname(__FILE__)
|
7
6
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
8
7
|
|
9
|
-
require 'generalMethods.rb'
|
10
|
-
require 'phen2reg_methods.rb'
|
11
8
|
require 'optparse'
|
12
9
|
require 'report_html'
|
13
10
|
require 'semtools'
|
14
|
-
|
11
|
+
require 'pets'
|
15
12
|
|
16
13
|
##########################
|
17
14
|
#OPT-PARSER
|
@@ -0,0 +1,110 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
require 'pets'
|
8
|
+
|
9
|
+
#############################
|
10
|
+
## METHODS
|
11
|
+
#############################
|
12
|
+
def load_index(path_index)
|
13
|
+
vcf_index = {}
|
14
|
+
File.open(path_index).each do |line|
|
15
|
+
id, path = line.chomp.split("\t")
|
16
|
+
vcf_index[id] = path
|
17
|
+
end
|
18
|
+
return vcf_index
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
##########################
|
23
|
+
#OPT-PARSER
|
24
|
+
##########################
|
25
|
+
|
26
|
+
options = {}
|
27
|
+
OptionParser.new do |opts|
|
28
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
29
|
+
|
30
|
+
options[:chromosome_col] = nil
|
31
|
+
opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
|
32
|
+
options[:chromosome_col] = data
|
33
|
+
end
|
34
|
+
|
35
|
+
options[:id_col] = nil
|
36
|
+
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
|
37
|
+
options[:id_col] = data
|
38
|
+
end
|
39
|
+
|
40
|
+
options[:end_col] = nil
|
41
|
+
opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
|
42
|
+
options[:end_col] = data
|
43
|
+
end
|
44
|
+
|
45
|
+
options[:genome_assembly] = 'hg38'
|
46
|
+
opts.on("-G", "--genome_assembly STRING", "Genome assembly version. Please choose between hg18, hg19 and hg38. Default hg38") do |data|
|
47
|
+
options[:genome_assembly] = data
|
48
|
+
end
|
49
|
+
|
50
|
+
options[:header] = true
|
51
|
+
#chr\tstart\tstop
|
52
|
+
opts.on("-H", "--header", "Set if the file has a line header. Default true") do
|
53
|
+
options[:header] = false
|
54
|
+
end
|
55
|
+
|
56
|
+
options[:input_file] = nil
|
57
|
+
opts.on("-i", "--input_file PATH", "Input file with patient data") do |data|
|
58
|
+
options[:input_file] = data
|
59
|
+
end
|
60
|
+
|
61
|
+
options[:vcf_index] = nil
|
62
|
+
opts.on("-I", "--vcf_index PATH", "VCF file with patient id pointing to vcf path") do |data|
|
63
|
+
options[:vcf_index] = data
|
64
|
+
end
|
65
|
+
|
66
|
+
options[:names] = false
|
67
|
+
opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
|
68
|
+
options[:names] = true
|
69
|
+
end
|
70
|
+
|
71
|
+
options[:output_folder] = nil
|
72
|
+
opts.on("-o", "--output_file PATH", "Output folder") do |data|
|
73
|
+
options[:output_folder] = data
|
74
|
+
end
|
75
|
+
|
76
|
+
options[:ont_col] = nil
|
77
|
+
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
|
78
|
+
options[:ont_col] = data
|
79
|
+
end
|
80
|
+
|
81
|
+
options[:separator] = '|'
|
82
|
+
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
|
83
|
+
options[:separator] = data
|
84
|
+
end
|
85
|
+
|
86
|
+
options[:start_col] = nil
|
87
|
+
opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
|
88
|
+
options[:start_col] = data
|
89
|
+
end
|
90
|
+
|
91
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
92
|
+
puts opts
|
93
|
+
exit
|
94
|
+
end
|
95
|
+
|
96
|
+
end.parse!
|
97
|
+
|
98
|
+
#############################################################
|
99
|
+
## MAIN
|
100
|
+
#############################################################
|
101
|
+
hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
|
102
|
+
Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
|
103
|
+
Cohort.act_ont = :hpo
|
104
|
+
|
105
|
+
patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
|
106
|
+
rejected_hpos_C, rejected_patients_C = patient_data.check(hard=true)
|
107
|
+
patient_data.link2ont(Cohort.act_ont)
|
108
|
+
|
109
|
+
vcf_index = load_index(options[:vcf_index]) if !options[:vcf_index].nil?
|
110
|
+
patient_data.export_phenopackets(options[:output_folder], options[:genome_assembly], vcf_index: vcf_index)
|
data/bin/reg2phen.rb
CHANGED
@@ -3,14 +3,12 @@
|
|
3
3
|
#data2predict = file to predict
|
4
4
|
#training_file.txt = file with training data (association values and hpo codes).
|
5
5
|
|
6
|
-
REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
|
7
6
|
ROOT_PATH = File.dirname(__FILE__)
|
8
7
|
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
|
9
8
|
|
10
9
|
require 'optparse'
|
11
|
-
require 'generalMethods.rb'
|
12
10
|
require 'semtools'
|
13
|
-
require '
|
11
|
+
require 'pets'
|
14
12
|
|
15
13
|
##########################
|
16
14
|
#OPT-PARSER
|