pets 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,59 +1,10 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
3
  ROOT_PATH = File.dirname(__FILE__)
4
- EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
5
- HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
6
4
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
7
5
 
8
- require 'generalMethods.rb'
9
6
  require 'optparse'
10
- require 'semtools'
11
-
12
- ###############
13
- #METHODS
14
- ###############
15
-
16
- def translate_hpo(patient_data, hpo, translate)
17
- reject_pats = []
18
- patient_data.each do |patientID, patient_record|
19
- hpos, chr, start, stop = patient_record
20
- if translate == 'names'
21
- # hpos, rejected = hpo.translate_codes2names(hpos)
22
- hpos, rejected = hpo.translate_ids(hpos)
23
- elsif translate =='codes'
24
- # hpos, rejected = hpo.translate_names2codes(hpos)
25
- hpos, rejected = hpo.translate_names(hpos)
26
- STDERR.puts(" The ontology names '#{rejected.join(',')}' were not found") if !rejected.empty?
27
- end
28
- if hpos.empty?
29
- reject_pats << patientID
30
- else
31
- patient_record[0] = hpos
32
- end
33
- end
34
- reject_pats.each do | rj_pat|
35
- patient_data.delete(rj_pat)
36
- end
37
- end
38
-
39
- def save_translated_file(patients_with_hpo_names, output_file, mode)
40
- File.open(output_file, 'w') do |f|
41
- if mode == 'paco'
42
- f.puts "patient_id\tchr\tstart\tstop\tphenotypes"
43
- end
44
- patients_with_hpo_names.each do |id, patient_record|
45
- hpos, chr, start, stop = patient_record
46
- id = id.gsub(/_i[0-9]+$/,'')
47
- if mode == 'default'
48
- f.puts "#{id}\t#{hpos.join('|')}\t#{[chr, start, stop].join("\t")}"
49
- elsif mode == 'paco'
50
- f.puts "#{id}\t#{[chr, start, stop].join("\t")}\t#{hpos.join('|')}"
51
- else
52
- abort('Wrong save_mode] option, please try default or paco')
53
- end
54
- end
55
- end
56
- end
7
+ require 'pets'
57
8
 
58
9
  ###############
59
10
  #OPTIONS
@@ -68,9 +19,9 @@ OptionParser.new do |opts|
68
19
  options[:chromosome_col] = data
69
20
  end
70
21
 
71
- options[:pat_id_col] = nil
22
+ options[:id_col] = nil
72
23
  opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
73
- options[:pat_id_col] = data
24
+ options[:id_col] = data
74
25
  end
75
26
 
76
27
  options[:end_col] = nil
@@ -93,9 +44,9 @@ OptionParser.new do |opts|
93
44
  options[:input_file] = value
94
45
  end
95
46
 
96
- options[:hpo_col] = nil
47
+ options[:ont_col] = nil
97
48
  opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
98
- options[:hpo_col] = data
49
+ options[:ont_col] = data
99
50
  end
100
51
 
101
52
  options[:start_col] = nil
@@ -103,33 +54,39 @@ OptionParser.new do |opts|
103
54
  options[:start_col] = data
104
55
  end
105
56
 
106
- options[:hpo_separator] = '|'
57
+ options[:separator] = '|'
107
58
  opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
108
- options[:hpo_separator] = data
59
+ options[:separator] = data
60
+ end
61
+
62
+ options[:n_phens] = nil
63
+ opts.on("--n_phens INTEGER", "Remove records with N or less phenotypes") do |data|
64
+ options[:n_phens] = data.to_i
109
65
  end
110
66
 
111
- options[:save_mode] = 'default'
67
+ options[:save_mode] = :default
112
68
  opts.on("-m", "--save_mode STRING", "Set output data mode") do |data|
113
- options[:save_mode] = data
69
+ options[:save_mode] = data.to_sym
114
70
  end
115
71
 
116
- options[:translate] = nil
117
- opts.on("-t", "--translate STRING", "Set 'names' to translate from hpo codes to names or set 'codes' to translate from hpo names to codes. By default, ther is not translation") do |data|
118
- options[:translate] = data
72
+ options[:names] = false
73
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
74
+ options[:names] = true
75
+ end
76
+
77
+ options[:translate] = false
78
+ opts.on("-t", "--translate", "Set to translate from hpo codes to names. By default, ther is not translation") do
79
+ options[:translate] = true
119
80
  end
120
81
  end.parse!
121
82
 
122
83
  ###############
123
84
  #MAIN
124
85
  ###############
125
- hpo_file = ENV['hpo_file']
126
- hpo_file = HPO_FILE if hpo_file.nil?
127
-
128
- patient_data = load_patient_cohort(options)
129
- if !options[:translate].nil?
130
- # hpo = Ontology.new
131
- # hpo.load_data(hpo_file)
132
- hpo = Ontology.new(file: hpo_file, load_file: true)
133
- translate_hpo(patient_data, hpo, options[:translate])
134
- end
135
- save_translated_file(patient_data, options[:output_file], options[:save_mode])
86
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
87
+ Cohort.load_ontology(:hpo, hpo_file)
88
+ Cohort.act_ont = :hpo
89
+
90
+ patient_data, rejected_hpos, rejected_patients = Cohort_Parser.load(options)
91
+ rejected_patients_by_phen = patient_data.filter_by_term_number(options[:n_phens]) if !options[:n_phens].nil?
92
+ patient_data.save(options[:output_file], options[:save_mode], options[:translate])
data/bin/phen2reg.rb CHANGED
@@ -2,16 +2,13 @@
2
2
  # Rojano E. & Seoane P., September 2016
3
3
  # Program to predict the position from given HPO codes, sorted by their association values.
4
4
 
5
- REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
6
5
  ROOT_PATH = File.dirname(__FILE__)
7
6
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
8
7
 
9
- require 'generalMethods.rb'
10
- require 'phen2reg_methods.rb'
11
8
  require 'optparse'
12
9
  require 'report_html'
13
10
  require 'semtools'
14
-
11
+ require 'pets'
15
12
 
16
13
  ##########################
17
14
  #OPT-PARSER
@@ -0,0 +1,110 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
5
+
6
+ require 'optparse'
7
+ require 'pets'
8
+
9
+ #############################
10
+ ## METHODS
11
+ #############################
12
+ def load_index(path_index)
13
+ vcf_index = {}
14
+ File.open(path_index).each do |line|
15
+ id, path = line.chomp.split("\t")
16
+ vcf_index[id] = path
17
+ end
18
+ return vcf_index
19
+ end
20
+
21
+
22
+ ##########################
23
+ #OPT-PARSER
24
+ ##########################
25
+
26
+ options = {}
27
+ OptionParser.new do |opts|
28
+ opts.banner = "Usage: #{__FILE__} [options]"
29
+
30
+ options[:chromosome_col] = nil
31
+ opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
32
+ options[:chromosome_col] = data
33
+ end
34
+
35
+ options[:id_col] = nil
36
+ opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
37
+ options[:id_col] = data
38
+ end
39
+
40
+ options[:end_col] = nil
41
+ opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
42
+ options[:end_col] = data
43
+ end
44
+
45
+ options[:genome_assembly] = 'hg38'
46
+ opts.on("-G", "--genome_assembly STRING", "Genome assembly version. Please choose between hg18, hg19 and hg38. Default hg38") do |data|
47
+ options[:genome_assembly] = data
48
+ end
49
+
50
+ options[:header] = true
51
+ #chr\tstart\tstop
52
+ opts.on("-H", "--header", "Set if the file has a line header. Default true") do
53
+ options[:header] = false
54
+ end
55
+
56
+ options[:input_file] = nil
57
+ opts.on("-i", "--input_file PATH", "Input file with patient data") do |data|
58
+ options[:input_file] = data
59
+ end
60
+
61
+ options[:vcf_index] = nil
62
+ opts.on("-I", "--vcf_index PATH", "VCF file with patient id pointing to vcf path") do |data|
63
+ options[:vcf_index] = data
64
+ end
65
+
66
+ options[:names] = false
67
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
68
+ options[:names] = true
69
+ end
70
+
71
+ options[:output_folder] = nil
72
+ opts.on("-o", "--output_file PATH", "Output folder") do |data|
73
+ options[:output_folder] = data
74
+ end
75
+
76
+ options[:ont_col] = nil
77
+ opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
78
+ options[:ont_col] = data
79
+ end
80
+
81
+ options[:separator] = '|'
82
+ opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
83
+ options[:separator] = data
84
+ end
85
+
86
+ options[:start_col] = nil
87
+ opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
88
+ options[:start_col] = data
89
+ end
90
+
91
+ opts.on_tail("-h", "--help", "Show this message") do
92
+ puts opts
93
+ exit
94
+ end
95
+
96
+ end.parse!
97
+
98
+ #############################################################
99
+ ## MAIN
100
+ #############################################################
101
+ hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
102
+ Cohort.load_ontology(:hpo, hpo_file, options[:excluded_hpo])
103
+ Cohort.act_ont = :hpo
104
+
105
+ patient_data, rejected_hpos_L, rejected_patients_L = Cohort_Parser.load(options)
106
+ rejected_hpos_C, rejected_patients_C = patient_data.check(hard=true)
107
+ patient_data.link2ont(Cohort.act_ont)
108
+
109
+ vcf_index = load_index(options[:vcf_index]) if !options[:vcf_index].nil?
110
+ patient_data.export_phenopackets(options[:output_folder], options[:genome_assembly], vcf_index: vcf_index)
data/bin/reg2phen.rb CHANGED
@@ -3,14 +3,12 @@
3
3
  #data2predict = file to predict
4
4
  #training_file.txt = file with training data (association values and hpo codes).
5
5
 
6
- REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
7
6
  ROOT_PATH = File.dirname(__FILE__)
8
7
  $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
9
8
 
10
9
  require 'optparse'
11
- require 'generalMethods.rb'
12
10
  require 'semtools'
13
- require 'reg2phen_methods'
11
+ require 'pets'
14
12
 
15
13
  ##########################
16
14
  #OPT-PARSER