pets 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,59 +1,10 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
3
  ROOT_PATH = File.dirname(__FILE__)
4
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
5
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
6
4
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
7
5
 
8
- require 'generalMethods.rb'
9
6
  require 'optparse'
10
- require 'semtools'
11
-
12
- ###############
13
- #METHODS
14
- ###############
15
-
16
- def translate_hpo(patient_data, hpo, translate)
17
- reject_pats = []
18
- patient_data.each do |patientID, patient_record|
19
- hpos, chr, start, stop = patient_record
20
- if translate == 'names'
21
- # hpos, rejected = hpo.translate_codes2names(hpos)
22
- hpos, rejected = hpo.translate_ids(hpos)
23
- elsif translate =='codes'
24
- # hpos, rejected = hpo.translate_names2codes(hpos)
25
- hpos, rejected = hpo.translate_names(hpos)
26
- STDERR.puts(" The ontology names '#{rejected.join(',')}' were not found") if !rejected.empty?
27
- end
28
- if hpos.empty?
29
- reject_pats << patientID
30
- else
31
- patient_record[0] = hpos
32
- end
33
- end
34
- reject_pats.each do | rj_pat|
35
- patient_data.delete(rj_pat)
36
- end
37
- end
38
-
39
- def save_translated_file(patients_with_hpo_names, output_file, mode)
40
- File.open(output_file, 'w') do |f|
41
- if mode == 'paco'
42
- f.puts "patient_id\tchr\tstart\tstop\tphenotypes"
43
- end
44
- patients_with_hpo_names.each do |id, patient_record|
45
- hpos, chr, start, stop = patient_record
46
- id = id.gsub(/_i[0-9]+$/,'')
47
- if mode == 'default'
48
- f.puts "#{id}\t#{hpos.join('|')}\t#{[chr, start, stop].join("\t")}"
49
- elsif mode == 'paco'
50
- f.puts "#{id}\t#{[chr, start, stop].join("\t")}\t#{hpos.join('|')}"
51
- else
52
- abort('Wrong save_mode] option, please try default or paco')
53
- end
54
- end
55
- end
56
- end
7
+ require 'pets'
57
8
 
58
9
  ###############
59
10
  #OPTIONS
@@ -68,9 +19,9 @@ OptionParser.new do |opts|
68
19
  options[:chromosome_col] = data
69
20
  end
70
21
 
71
- options[:pat_id_col] = nil
22
+ options[:id_col] = nil
72
23
  opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
73
- options[:pat_id_col] = data
24
+ options[:id_col] = data
74
25
  end
75
26
 
76
27
  options[:end_col] = nil
@@ -93,9 +44,9 @@ OptionParser.new do |opts|
93
44
  options[:input_file] = value
94
45
  end
95
46
 
96
- options[:hpo_col] = nil
47
+ options[:ont_col] = nil
97
48
  opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
98
- options[:hpo_col] = data
49
+ options[:ont_col] = data
99
50
  end
100
51
 
101
52
  options[:start_col] = nil
@@ -103,33 +54,39 @@ OptionParser.new do |opts|
103
54
  options[:start_col] = data
104
55
  end
105
56
 
106
- options[:hpo_separator] = '|'
57
+ options[:separator] = '|'
107
58
  opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
108
- options[:hpo_separator] = data
59
+ options[:separator] = data
60
+ end
61
+
62
+ options[:n_phens] = nil
63
+ opts.on("--n_phens INTEGER", "Remove records with N or less phenotypes") do |data|
64
+ options[:n_phens] = data.to_i
109
65
  end
110
66
 
111
- options[:save_mode] = 'default'
67
+ options[:save_mode] = :default
112
68
  opts.on("-m", "--save_mode STRING", "Set output data mode") do |data|
113
- options[:save_mode] = data
69
+ options[:save_mode] = data.to_sym
114
70
  end
115
71
 
116
- options[:translate] = nil
117
- opts.on("-t", "--translate STRING", "Set 'names' to translate from hpo codes to names or set 'codes' to translate from hpo names to codes. By default, ther is not translation") do |data|
118
- options[:translate] = data
72
+ options[:names] = false
73
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
74
+ options[:names] = true
75
+ end
76
+
77
+ options[:translate] = false
78
+ opts.on("-t", "--translate", "Set to translate from hpo codes to names. By default, ther is not translation") do
79
+ options[:translate] = true
119
80
  end
120
81
  end.parse!
121
82
 
122
83
  ###############
123
84
  #MAIN
124
85
  ###############
125
- hpo_file = ENV['hpo_file']
126
- hpo_file = HPO_FILE if hpo_file.nil?
127
-
128
- patient_data = load_patient_cohort(options)
129
- if !options[:translate].nil?
130
- # hpo = Ontology.new
131
- # hpo.load_data(hpo_file)
132
- hpo = Ontology.new(file: hpo_file, load_file: true)
133
- translate_hpo(patient_data, hpo, options[:translate])
134
- end
135
- save_translated_file(patient_data, options[:output_file], options[:save_mode])
86
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
87
+ Cohort.load_ontology(:hpo, hpo_file)
88
+ Cohort.act_ont = :hpo
89
+
90
+ patient_data, rejected_hpos, rejected_patients = Cohort_Parser.load(options)
91
+ rejected_patients_by_phen = patient_data.filter_by_term_number(options[:n_phens]) if !options[:n_phens].nil?
92
+ patient_data.save(options[:output_file], options[:save_mode], options[:translate])
data/bin/phen2reg.rb CHANGED
@@ -2,16 +2,13 @@
2
2
  # Rojano E. & Seoane P., September 2016
3
3
  # Program to predict the position from given HPO codes, sorted by their association values.
4
4
 
5
- REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
6
5
  ROOT_PATH = File.dirname(__FILE__)
7
6
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
8
7
 
9
- require 'generalMethods.rb'
10
- require 'phen2reg_methods.rb'
11
8
  require 'optparse'
12
9
  require 'report_html'
13
10
  require 'semtools'
14
-
11
+ require 'pets'
15
12
 
16
13
  ##########################
17
14
  #OPT-PARSER
@@ -0,0 +1,110 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
5
+
6
+ require 'optparse'
7
+ require 'pets'
8
+
9
+ #############################
10
+ ## METHODS
11
+ #############################
12
+ def load_index(path_index)
13
+ vcf_index = {}
14
+ File.open(path_index).each do |line|
15
+ id, path = line.chomp.split("\t")
16
+ vcf_index[id] = path
17
+ end
18
+ return vcf_index
19
+ end
20
+
21
+
22
+ ##########################
23
+ #OPT-PARSER
24
+ ##########################
25
+
26
+ options = {}
27
+ OptionParser.new do |opts|
28
+ opts.banner = "Usage: #{__FILE__} [options]"
29
+
30
+ options[:chromosome_col] = nil
31
+ opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
32
+ options[:chromosome_col] = data
33
+ end
34
+
35
+ options[:id_col] = nil
36
+ opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
37
+ options[:id_col] = data
38
+ end
39
+
40
+ options[:end_col] = nil
41
+ opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
42
+ options[:end_col] = data
43
+ end
44
+
45
+ options[:genome_assembly] = 'hg38'
46
+ opts.on("-G", "--genome_assembly STRING", "Genome assembly version. Please choose between hg18, hg19 and hg38. Default hg38") do |data|
47
+ options[:genome_assembly] = data
48
+ end
49
+
50
+ options[:header] = true
51
+ #chr\tstart\tstop
52
+ opts.on("-H", "--header", "Set if the file has a line header. Default true") do
53
+ options[:header] = false
54
+ end
55
+
56
+ options[:input_file] = nil
57
+ opts.on("-i", "--input_file PATH", "Input file with patient data") do |data|
58
+ options[:input_file] = data
59
+ end
60
+
61
+ options[:vcf_index] = nil
62
+ opts.on("-I", "--vcf_index PATH", "VCF file with patient id pointing to vcf path") do |data|
63
+ options[:vcf_index] = data
64
+ end
65
+
66
+ options[:names] = false
67
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
68
+ options[:names] = true
69
+ end
70
+
71
+ options[:output_folder] = nil
72
+ opts.on("-o", "--output_file PATH", "Output folder") do |data|
73
+ options[:output_folder] = data
74
+ end
75
+
76
+ options[:ont_col] = nil
77
+ opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
78
+ options[:ont_col] = data
79
+ end
80
+
81
+ options[:separator] = '|'
82
+ opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
83
+ options[:separator] = data
84
+ end
85
+
86
+ options[:start_col] = nil
87
+ opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
88
+ options[:start_col] = data
89
+ end
90
+
91
+ opts.on_tail("-h", "--help", "Show this message") do
92
+ puts opts
93
+ exit
94
+ end
95
+
96
+ end.parse!
97
+
98
+ #############################################################
99
+ ## MAIN
100
+ #############################################################
101
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
102
+ Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
103
+ Cohort.act_ont = :hpo
104
+
105
+ patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
106
+ rejected_hpos_C, rejected_patients_C = patient_data.check(hard=true)
107
+ patient_data.link2ont(Cohort.act_ont)
108
+
109
+ vcf_index = load_index(options[:vcf_index]) if !options[:vcf_index].nil?
110
+ patient_data.export_phenopackets(options[:output_folder], options[:genome_assembly], vcf_index: vcf_index)
data/bin/reg2phen.rb CHANGED
@@ -3,14 +3,12 @@
3
3
  #data2predict = file to predict
4
4
  #training_file.txt = file with training data (association values and hpo codes).
5
5
 
6
- REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
7
6
  ROOT_PATH = File.dirname(__FILE__)
8
7
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
8
 
10
9
  require 'optparse'
11
- require 'generalMethods.rb'
12
10
  require 'semtools'
13
- require 'reg2phen_methods'
11
+ require 'pets'
14
12
 
15
13
  ##########################
16
14
  #OPT-PARSER