pets 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +41 -0
- data/Rakefile +6 -0
- data/bin/area_under_curve_pr.rb +118 -0
- data/bin/association_metrics_average.rb +94 -0
- data/bin/coPatReporter.rb +531 -0
- data/bin/console +14 -0
- data/bin/fmeasure_index.rb +72 -0
- data/bin/get_PR_values.rb +90 -0
- data/bin/get_clusters.R +18 -0
- data/bin/get_network_nodes.rb +197 -0
- data/bin/lines.R +77 -0
- data/bin/merge_by_cluster.rb +62 -0
- data/bin/merge_pairs.rb +138 -0
- data/bin/paco_translator.rb +102 -0
- data/bin/phen2reg.rb +385 -0
- data/bin/phen2reg_predictor_check.rb +297 -0
- data/bin/plot_area.R +71 -0
- data/bin/plot_boxplot.R +21 -0
- data/bin/plot_density.R +46 -0
- data/bin/plot_scatterplot.R +25 -0
- data/bin/reg2phen.rb +116 -0
- data/bin/region_to_patients_generator.rb +84 -0
- data/bin/relate_CI_to_association_value.rb +90 -0
- data/bin/setup +8 -0
- data/bin/standardize_scores.R +40 -0
- data/bin/xyplot_graph.R +60 -0
- data/external_data/biosystems_gene.gz +0 -0
- data/external_data/bsid2info.gz +0 -0
- data/external_data/chromosome_sizes_hg19.txt +24 -0
- data/external_data/gene_data.gz +0 -0
- data/external_data/gene_data_with_pathways.gz +0 -0
- data/external_data/gene_location.gz +0 -0
- data/external_data/hp.obo +146363 -0
- data/external_data/remove +0 -0
- data/lib/pets.rb +6 -0
- data/lib/pets/coPatReporterMethods.rb +77 -0
- data/lib/pets/generalMethods.rb +556 -0
- data/lib/pets/phen2reg_methods.rb +432 -0
- data/lib/pets/version.rb +3 -0
- data/pets.gemspec +47 -0
- data/templates/cohort_report.erb +93 -0
- data/templates/patient_report.erb +209 -0
- metadata +183 -0
@@ -0,0 +1,432 @@
|
|
1
|
+
require "statistics2"
|
2
|
+
require "terminal-table"
|
3
|
+
#require "report_html"
|
4
|
+
#require 'bigdecimal'
|
5
|
+
|
6
|
+
def search4HPO(info2predict, trainingData)
|
7
|
+
#search if there are profile HPOs within the association file
|
8
|
+
hpo_regions = {}
|
9
|
+
info2predict.each do |hpo|
|
10
|
+
regions = trainingData[hpo]
|
11
|
+
if !regions.nil?
|
12
|
+
hpo_regions[hpo] = regions
|
13
|
+
end
|
14
|
+
end
|
15
|
+
return hpo_regions
|
16
|
+
end
|
17
|
+
|
18
|
+
def group_by_region(hpo_regions)
|
19
|
+
#hpo_regions-> hpo => [[chr, start, stop, regID, score], [...]]
|
20
|
+
region2hpo = {}
|
21
|
+
regionAttributes = {}
|
22
|
+
association_scores = {}
|
23
|
+
hpo_regions.each do |hpo, regions|
|
24
|
+
regions.each do |chr, start, stop, regionID, association_score|
|
25
|
+
query = region2hpo[regionID]
|
26
|
+
if query.nil?
|
27
|
+
region2hpo[regionID] = [hpo]
|
28
|
+
else
|
29
|
+
query << hpo
|
30
|
+
end
|
31
|
+
query = regionAttributes[regionID]
|
32
|
+
if query.nil?
|
33
|
+
total_patients_in_region = regionID.split('.')[3].to_i
|
34
|
+
region_length = stop - start
|
35
|
+
regionAttributes[regionID] = [chr, start, stop, total_patients_in_region, region_length]
|
36
|
+
end
|
37
|
+
query = association_scores[regionID]
|
38
|
+
if query.nil?
|
39
|
+
association_scores[regionID] = {hpo => association_score}
|
40
|
+
else
|
41
|
+
query[hpo] = association_score
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
return region2hpo, regionAttributes, association_scores
|
46
|
+
end
|
47
|
+
|
48
|
+
def add_parentals_of_not_found_hpos_in_regions(
|
49
|
+
patient_hpo_profile,
|
50
|
+
trainingData,
|
51
|
+
region2hpo,
|
52
|
+
regionAttributes,
|
53
|
+
association_scores,
|
54
|
+
hpo_metadata # hpo_code => [phenotype, relations]
|
55
|
+
)
|
56
|
+
new_hpos = []
|
57
|
+
region2hpo.each do |regionID, hpos|
|
58
|
+
hpos_not_found = patient_hpo_profile - hpos
|
59
|
+
parental_hpos = []
|
60
|
+
hpo_scores = {}
|
61
|
+
hpos_not_found.each do |hpo|
|
62
|
+
region, parental_hpo = get_region_with_parental_hpo(hpo, regionID, trainingData , hpo_metadata)
|
63
|
+
if !region.nil? &&
|
64
|
+
!parental_hpos.include?(parental_hpo) &&
|
65
|
+
!patient_hpo_profile.include?(parental_hpo)
|
66
|
+
parental_hpos << parental_hpo
|
67
|
+
hpo_scores[parental_hpo] = region.last
|
68
|
+
end
|
69
|
+
end
|
70
|
+
hpos.concat(parental_hpos)
|
71
|
+
new_hpos.concat(parental_hpos)
|
72
|
+
association_scores[regionID].merge!(hpo_scores)
|
73
|
+
end
|
74
|
+
patient_hpo_profile.concat(new_hpos.uniq)
|
75
|
+
end
|
76
|
+
|
77
|
+
def get_region_with_parental_hpo(hpo, regionID, trainingData , hpo_metadata)
|
78
|
+
region = nil
|
79
|
+
final_hpo = nil
|
80
|
+
hpos = [hpo]
|
81
|
+
while !hpos.empty?
|
82
|
+
temp = []
|
83
|
+
hpos.each do |hp|
|
84
|
+
hpo_data = hpo_metadata[hp]
|
85
|
+
if !hpo_data.nil?
|
86
|
+
main_hpo_code, phenotype, relations = hpo_data
|
87
|
+
temp.concat(relations.map{|rel| rel.first})
|
88
|
+
end
|
89
|
+
end
|
90
|
+
temp.each do |temp_hpo|
|
91
|
+
regions = trainingData[temp_hpo]
|
92
|
+
if !regions.nil?
|
93
|
+
final_reg = regions.select{|reg| reg[3] == regionID}
|
94
|
+
if !final_reg.empty?
|
95
|
+
region = final_reg.first
|
96
|
+
final_hpo = temp_hpo
|
97
|
+
temp = []
|
98
|
+
break
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
hpos = temp
|
103
|
+
end
|
104
|
+
return region, final_hpo
|
105
|
+
end
|
106
|
+
|
107
|
+
def generate_hpo_region_matrix(region2hpo, association_scores, info2predict, null_value=0)
|
108
|
+
# #method for creating the hpo to region matrix for plotting
|
109
|
+
# #info2predict = hpo list from user
|
110
|
+
# #hpo_associated_regions = [[chr, start, stop, [hpos_list], [weighted_association_scores]]]
|
111
|
+
hpo_region_matrix = []
|
112
|
+
region2hpo.each do |regionID, hpos_list|
|
113
|
+
row = []
|
114
|
+
info2predict.each do |user_hpo|
|
115
|
+
value = association_scores[regionID][user_hpo]
|
116
|
+
if value.nil?
|
117
|
+
row << null_value
|
118
|
+
else
|
119
|
+
row << value
|
120
|
+
end
|
121
|
+
end
|
122
|
+
hpo_region_matrix << row
|
123
|
+
end
|
124
|
+
return hpo_region_matrix
|
125
|
+
end
|
126
|
+
|
127
|
+
def scoring_regions(regionAttributes, hpo_region_matrix, scoring_system, pvalue_cutoff, freedom_degree, null_value=0)
|
128
|
+
#hpo_associated_regions = [[chr, start, stop, [hpos_list], [weighted_association_scores]]]
|
129
|
+
#hpo_region_matrix = [[0, 0.4, 0, 0.4], [0, 0, 0.5, 0.4]]
|
130
|
+
# STDERR.puts "EH"
|
131
|
+
regionAttributes_array = regionAttributes.values
|
132
|
+
max_cluster_length = hpo_region_matrix.map{|x| x.count {|i| i != 0}}.max if freedom_degree == 'maxnum'
|
133
|
+
hpo_region_matrix.each_with_index do |associations, i|
|
134
|
+
sample_length = nil
|
135
|
+
if freedom_degree == 'prednum'
|
136
|
+
sample_length = associations.length
|
137
|
+
elsif freedom_degree == 'phennum'
|
138
|
+
sample_length = associations.count{|s| s != 0}
|
139
|
+
elsif freedom_degree == 'maxnum'
|
140
|
+
sample_length = max_cluster_length
|
141
|
+
else
|
142
|
+
abort("Invalid freedom degree calculation method: #{freedom_degree}")
|
143
|
+
end
|
144
|
+
|
145
|
+
if scoring_system == 'mean'
|
146
|
+
mean_association = associations.inject(0){|s,x| s + x } / sample_length
|
147
|
+
regionAttributes_array[i] << mean_association
|
148
|
+
elsif scoring_system == 'fisher'
|
149
|
+
#hyper must be ln not log10 from net analyzer
|
150
|
+
#https://en.wikipedia.org/wiki/Fisher%27s_method
|
151
|
+
# STDERR.puts associations.inspect
|
152
|
+
lns = associations.map{|a| Math.log(10 ** -a)} #hyper values come as log10 values
|
153
|
+
sum = lns.inject(0){|s, a| s + a}
|
154
|
+
combined_pvalue = Statistics2.chi2_x(sample_length *2, -2*sum)
|
155
|
+
regionAttributes_array[i] << combined_pvalue
|
156
|
+
elsif scoring_system == 'stouffer'
|
157
|
+
sum = associations.inject(0){|s,x| s + x}
|
158
|
+
combined_z_score = sum/Math.sqrt(sample_length)
|
159
|
+
regionAttributes_array[i] << combined_z_score
|
160
|
+
elsif scoring_system == 'geommean'
|
161
|
+
#NOTE: if troubles with number size, use BigDecimal
|
162
|
+
geommean_mult = associations.inject(1){|s,x| s * x}
|
163
|
+
geommean_association = geommean_mult.to_f ** ( sample_length ** -1 )
|
164
|
+
regionAttributes_array[i] << geommean_association
|
165
|
+
elsif scoring_system == 'median'
|
166
|
+
median_value = associations.length / 2
|
167
|
+
if median_value % 2 == 0
|
168
|
+
median_up = associations.sort[median_value]
|
169
|
+
median_down = associations.sort[median_value - 1]
|
170
|
+
pair_median = ( median_up + median_down ) / 2
|
171
|
+
median_association = associations.sort[pair_median.ceil]
|
172
|
+
else
|
173
|
+
median_association = associations.sort[median_value.ceil]
|
174
|
+
end
|
175
|
+
regionAttributes_array[i] << median_association
|
176
|
+
elsif scoring_system == 'maxnum'
|
177
|
+
max_association = associations.max
|
178
|
+
regionAttributes_array[i] << max_association
|
179
|
+
elsif scoring_system == 'minnum'
|
180
|
+
min_association = associations.min
|
181
|
+
regionAttributes_array[i] << min_association
|
182
|
+
else
|
183
|
+
abort("Invalid ranking method: #{scoring_system}")
|
184
|
+
end
|
185
|
+
end
|
186
|
+
if scoring_system == 'mean' ||
|
187
|
+
scoring_system == 'geommean' ||
|
188
|
+
scoring_system == 'maxnum' ||
|
189
|
+
scoring_system == 'minnum'
|
190
|
+
regionAttributes.select!{|regionID, attributes| attributes.last >= pvalue_cutoff}
|
191
|
+
elsif scoring_system == 'fisher'
|
192
|
+
regionAttributes.select!{|regionID, attributes| attributes.last <= pvalue_cutoff}
|
193
|
+
end
|
194
|
+
#Combined p-value: less value equals better association -> not due randomly.
|
195
|
+
end
|
196
|
+
|
197
|
+
def join_regions(regions)
|
198
|
+
#[chr, start, stop, association_values.keys, association_values.values, score]
|
199
|
+
merged_regions = []
|
200
|
+
sorted_regions = regions.sort_by{|reg | [reg[0], reg[1]]}
|
201
|
+
ref_reg = sorted_regions.shift
|
202
|
+
while !sorted_regions.empty?
|
203
|
+
next_region = sorted_regions.shift
|
204
|
+
if ref_reg[0] == next_region[0] &&
|
205
|
+
(ref_reg[2] - next_region[1]).abs <= 1 &&
|
206
|
+
(ref_reg[5] - next_region[5]).abs.fdiv([ref_reg[5], next_region[5]].max) <= 0.05 &&
|
207
|
+
ref_reg[3] == next_region[3]
|
208
|
+
|
209
|
+
ref_reg[2] = next_region[2]
|
210
|
+
ref_assoc_values = ref_reg[4]
|
211
|
+
next_assoc_values = next_region[4]
|
212
|
+
assoc_values = []
|
213
|
+
ref_assoc_values.each_with_index do |ref_val, i|
|
214
|
+
#assoc_values << (ref_val + next_assoc_values[i]).fdiv(2)
|
215
|
+
assoc_values << [ref_val, next_assoc_values[i]].max
|
216
|
+
end
|
217
|
+
ref_reg[4] = assoc_values
|
218
|
+
#ref_reg[5] = (ref_reg[5] + next_region[5]).fdiv(2)
|
219
|
+
ref_reg[5] = [ref_reg[5], next_region[5]].max
|
220
|
+
else
|
221
|
+
merged_regions << ref_reg
|
222
|
+
ref_reg = next_region
|
223
|
+
end
|
224
|
+
end
|
225
|
+
merged_regions << ref_reg
|
226
|
+
return merged_regions
|
227
|
+
end
|
228
|
+
|
229
|
+
# def hpo_quality_control(prediction_data, hpo_metadata_file, information_coefficient_file)
|
230
|
+
def hpo_quality_control(prediction_data, hpo_metadata, hpo_child_metadata, hpos_ci_values)
|
231
|
+
characterised_hpos = []
|
232
|
+
##information_coef_file= hpo_code, ci
|
233
|
+
##prediction_data = [hpo1, hpo2, hpo3...]
|
234
|
+
##hpo_metadata = {hpo_code => [phenotype, relations]}, relations = [hpo_code_relation, name_relation]
|
235
|
+
# hpo_metadata = load_hpo_metadata(hpo_metadata_file)
|
236
|
+
# hpo_child_metadata = inverse_hpo_metadata(hpo_metadata)
|
237
|
+
# hpos_ci_values = load_hpo_ci_values(information_coefficient_file)
|
238
|
+
prediction_data.each do |hpo_code|
|
239
|
+
tmp = []
|
240
|
+
ci = hpos_ci_values[hpo_code]
|
241
|
+
main_hpo_code, hpo_name, relations = hpo_metadata[hpo_code]
|
242
|
+
tmp << hpo_name # col hpo name
|
243
|
+
tmp << hpo_code # col hpo code
|
244
|
+
unless ci.nil? # col exists? and ci values
|
245
|
+
tmp << "yes"
|
246
|
+
tmp << ci
|
247
|
+
else
|
248
|
+
tmp << "no"
|
249
|
+
tmp << "-"
|
250
|
+
end
|
251
|
+
parent = check_parents(relations, prediction_data, hpo_metadata)
|
252
|
+
parent << "-" if parent.empty?
|
253
|
+
tmp << parent # col parents
|
254
|
+
childs = hpo_child_metadata[hpo_code]
|
255
|
+
if childs.nil?
|
256
|
+
childs = []
|
257
|
+
else
|
258
|
+
childs = childs
|
259
|
+
end
|
260
|
+
tmp << childs
|
261
|
+
characterised_hpos << tmp
|
262
|
+
end
|
263
|
+
# return characterised_hpos, hpo_metadata
|
264
|
+
return characterised_hpos
|
265
|
+
end
|
266
|
+
|
267
|
+
def check_parents(relations, prediction_data, hpo_metadata)
|
268
|
+
parent = []
|
269
|
+
relations.each do |par_hpo_code, par_hpo_name|
|
270
|
+
if prediction_data.include?(par_hpo_code)
|
271
|
+
parent << [par_hpo_code, par_hpo_name]
|
272
|
+
end
|
273
|
+
grand_par_hpo = hpo_metadata[par_hpo_code]
|
274
|
+
if !grand_par_hpo.nil?
|
275
|
+
parent.concat(check_parents(grand_par_hpo.last, prediction_data, hpo_metadata))
|
276
|
+
end
|
277
|
+
end
|
278
|
+
return parent
|
279
|
+
end
|
280
|
+
|
281
|
+
def report_data(characterised_hpos, hpo_associated_regions, html_file, hpo_metadata, genes_with_kegg_data, pathway_stats)
|
282
|
+
container = {:characterised_hpos => characterised_hpos,
|
283
|
+
:merged_regions => hpo_associated_regions,
|
284
|
+
:hpo_metadata => hpo_metadata,
|
285
|
+
:genes_with_kegg_data => genes_with_kegg_data,
|
286
|
+
:pathway_stats => pathway_stats
|
287
|
+
}
|
288
|
+
template = File.open(File.join(REPORT_FOLDER, 'patient_report.erb')).read
|
289
|
+
report = Report_html.new(container, 'Patient HPO profile summary')
|
290
|
+
report.build(template)
|
291
|
+
report.write(html_file)
|
292
|
+
end
|
293
|
+
|
294
|
+
##############################################################################
|
295
|
+
##############################################################################
|
296
|
+
##### OLD CODE FOR JOIN REGIONS BY BORDERS
|
297
|
+
##############################################################################
|
298
|
+
##############################################################################
|
299
|
+
|
300
|
+
# def sorting_regions_by_shared_hpos(region2hpo)
|
301
|
+
# #if regions share the same hpos, sort regions from lowest to highest
|
302
|
+
# #this method returns an array for its use in cluster_regions_by_common_hpos method
|
303
|
+
# arr_region2hpo = []
|
304
|
+
# region2hpo.each do |region, hpos|
|
305
|
+
# arr_region2hpo << [region, hpos.sort]
|
306
|
+
# end
|
307
|
+
# arr_region2hpo.sort!{|r1, r2| r1.last <=> r2.last}
|
308
|
+
# # # arr_region2hpo = [[1.1.A.1, [hpo1, hpo2, hpo3]], [1.2.A.1, [hpo1, hpo2, hpo3]]...]
|
309
|
+
# return arr_region2hpo
|
310
|
+
# end
|
311
|
+
|
312
|
+
# def cluster_regions_by_common_hpos(arr_region2hpo)
|
313
|
+
# #method for grouping hpos within different locations
|
314
|
+
# regions_by_hpos = {}
|
315
|
+
# last_hpos = []
|
316
|
+
# regions = []
|
317
|
+
# all_regions = []
|
318
|
+
# arr_region2hpo.each do |region, hpos|
|
319
|
+
# all_regions << region
|
320
|
+
# if last_hpos == hpos
|
321
|
+
# regions << region
|
322
|
+
# else
|
323
|
+
# regions_by_hpos[last_hpos] = regions if !last_hpos.empty?
|
324
|
+
# regions = [region]
|
325
|
+
# end
|
326
|
+
# last_hpos = hpos
|
327
|
+
# end
|
328
|
+
# regions_by_hpos[last_hpos] = regions
|
329
|
+
# #puts regions_by_hpos.inspect
|
330
|
+
# # #regions_by_hpos = {[hpo1, hpo2, hpo3] => [1.1.A.1, 1.2.A.4, 1.3.A.12]...}
|
331
|
+
# return regions_by_hpos
|
332
|
+
# end
|
333
|
+
|
334
|
+
# def prepare_regions_for_profile_analysis(region2hpo, regionAttributes, association_scores, weight_style)
|
335
|
+
# # region2hpo = {region => [hpo1, hpo2...]}
|
336
|
+
# # regionAttributes = {region => [chr, start, stop, patients_number, region_length, region]}
|
337
|
+
# hpo_associated_regions = []
|
338
|
+
# arr_region2hpo = sorting_regions_by_shared_hpos(region2hpo)
|
339
|
+
# regions_by_hpos = cluster_regions_by_common_hpos(arr_region2hpo)
|
340
|
+
# regions_by_hpos.each do |hpos_list, regions|
|
341
|
+
# regionIDs = []
|
342
|
+
# regions_lengths = []
|
343
|
+
# patients_numbers = []
|
344
|
+
# region_attributes = regions.map { |region| regionAttributes[region] }
|
345
|
+
# region_attributes.each do |attributes|
|
346
|
+
# cur_chr, cur_start, cur_stop, cur_patients_number, cur_region_length, cur_regionID = attributes
|
347
|
+
# add_region(hpo_associated_regions, cur_chr, cur_start, cur_stop, hpos_list, [cur_regionID], association_scores, [cur_region_length], [cur_patients_number], weight_style)
|
348
|
+
# end
|
349
|
+
# end
|
350
|
+
# #puts hpo_associated_regions.inspect
|
351
|
+
# return hpo_associated_regions
|
352
|
+
# end
|
353
|
+
|
354
|
+
# def join_regions_by_borders(region2hpo, regionAttributes, association_scores, weight_style)
|
355
|
+
# # region2hpo = {region => [hpo1, hpo2...]}
|
356
|
+
# # regionAttributes = {region => [chr, start, stop, patients_number, region_length, region]}
|
357
|
+
# joined_regions_by_borders = []
|
358
|
+
# arr_region2hpo = sorting_regions_by_shared_hpos(region2hpo)
|
359
|
+
# regions_by_hpos = cluster_regions_by_common_hpos(arr_region2hpo)
|
360
|
+
# regions_by_hpos.each do |hpos_list, regions|
|
361
|
+
# regionIDs = []
|
362
|
+
# regions_lengths = []
|
363
|
+
# patients_numbers = []
|
364
|
+
# region_attributes = regions.map { |region| regionAttributes[region] }
|
365
|
+
# region_attributes.sort! { |r1, r2| [r1[0], r1[1]] <=> [r2[0], r2[1]] }
|
366
|
+
# tmp_chr = nil
|
367
|
+
# tmp_start = nil
|
368
|
+
# tmp_stop = nil
|
369
|
+
# region_attributes.each_with_index do |attributes, counter|
|
370
|
+
# break if counter + 1 == region_attributes.length
|
371
|
+
# cur_chr, cur_start, cur_stop, cur_patients_number, cur_region_length, cur_regionID = attributes
|
372
|
+
# next_chr, next_start, next_stop, next_patients_number, next_region_length, next_regionID = region_attributes[counter + 1]
|
373
|
+
# if cur_chr == next_chr
|
374
|
+
# if cur_stop == next_start || cur_stop == next_start + 1
|
375
|
+
# tmp_chr = cur_chr
|
376
|
+
# tmp_start = cur_start if tmp_start.nil?
|
377
|
+
# tmp_stop = cur_stop
|
378
|
+
# else
|
379
|
+
# add_region(joined_regions_by_borders, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, regions_lengths, patients_numbers, weight_style)
|
380
|
+
# tmp_chr = nil
|
381
|
+
# tmp_start = nil
|
382
|
+
# tmp_stop = nil
|
383
|
+
# end
|
384
|
+
# else
|
385
|
+
# add_region(joined_regions_by_borders, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, regions_lengths, patients_numbers, weight_style)
|
386
|
+
# tmp_chr = nil
|
387
|
+
# tmp_start = nil
|
388
|
+
# tmp_stop = nil
|
389
|
+
# end
|
390
|
+
# regionIDs << cur_regionID if regionIDs.empty?
|
391
|
+
# regionIDs << next_regionID
|
392
|
+
# regions_lengths << cur_region_length if regions_lengths.empty?
|
393
|
+
# regions_lengths << next_region_length
|
394
|
+
# patients_numbers << cur_patients_number if patients_numbers.empty?
|
395
|
+
# patients_numbers << next_patients_number
|
396
|
+
# end
|
397
|
+
# add_region(joined_regions_by_borders, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, regions_lengths, patients_numbers, weight_style)
|
398
|
+
# end
|
399
|
+
# #puts joined_regions_by_borders.inspect
|
400
|
+
# return joined_regions_by_borders
|
401
|
+
# end
|
402
|
+
|
403
|
+
# def add_region(hpo_associated_regions, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, region_lengths, patients_numbers, weight_style)
|
404
|
+
# #region_lengths = number of regions that have the same HPOs
|
405
|
+
# unless tmp_chr.nil? && tmp_start.nil? && tmp_stop.nil?
|
406
|
+
# association_values_by_region = regionIDs.map {|r| association_scores[r]}
|
407
|
+
# weighted_association_scores = []
|
408
|
+
# hpos_list.each do |hpo|
|
409
|
+
# scores = association_values_by_region.map{|hpo_scores| hpo_scores[hpo] }
|
410
|
+
# weighted_score = 0
|
411
|
+
# weight = 0
|
412
|
+
# if scores.length == 1
|
413
|
+
# weighted_score = scores.first
|
414
|
+
# weight = 1
|
415
|
+
# else
|
416
|
+
# scores.each_with_index do |s, i|
|
417
|
+
# if weight_style == 'double'
|
418
|
+
# weighted_score += s * region_lengths[i] * patients_numbers[i]
|
419
|
+
# weight += region_lengths[i] * patients_numbers[i]
|
420
|
+
# elsif weight_style == 'simple'
|
421
|
+
# weighted_score += s * region_lengths[i]
|
422
|
+
# weight += region_lengths[i]
|
423
|
+
# else
|
424
|
+
# abort("Invalid weight method: #{weight_style}")
|
425
|
+
# end
|
426
|
+
# end
|
427
|
+
# end
|
428
|
+
# weighted_association_scores << weighted_score/weight
|
429
|
+
# end
|
430
|
+
# hpo_associated_regions << [tmp_chr, tmp_start, tmp_stop, hpos_list, weighted_association_scores]
|
431
|
+
# end
|
432
|
+
# end
|
data/lib/pets/version.rb
ADDED
data/pets.gemspec
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "pets/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "pets"
|
8
|
+
spec.version = Pets::VERSION
|
9
|
+
spec.authors = ["Elena Rojano, Pedro Seoane"]
|
10
|
+
spec.email = ["elenarojano@uma.es, seoanezonjic@uma.es"]
|
11
|
+
|
12
|
+
spec.summary = %q{Suite with predictive tools.}
|
13
|
+
spec.description = %q{PETS suite includes three different tools. CohortAnalyzer performs the calculation of several statistics that gives an overview of a cohort of patients to analyse. Reg2Phen uses associations between pathological phenotypes and regions of the genome (these associations can be calculated from the cohort of patients if they include genotypic & phenotypic information using NetAnalyzer, another Ruby gem) to find, for a given genomic region, which pathological phenotypes have been associated with that region. The third tool, Phen2Reg, is a predictor that using the same associations as Reg2Phen, predicts which genomic regions can be the cause of a list of pathological phenotypes observed in a patient.}
|
14
|
+
spec.homepage = "https://bitbucket.org/elenarojano/reg2phen/src/master/bin/reg2phen.rb"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
21
|
+
#
|
22
|
+
# spec.metadata["homepage_uri"] = spec.homepage
|
23
|
+
# spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
|
24
|
+
# spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
|
25
|
+
# else
|
26
|
+
# raise "RubyGems 2.0 or newer is required to protect against " \
|
27
|
+
# "public gem pushes."
|
28
|
+
# end
|
29
|
+
|
30
|
+
# Specify which files should be added to the gem when it is released.
|
31
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
32
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
33
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
34
|
+
end
|
35
|
+
spec.bindir = "bin"
|
36
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
37
|
+
spec.require_paths = ["lib"]
|
38
|
+
|
39
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
40
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
41
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
42
|
+
spec.add_dependency "statistics2"
|
43
|
+
spec.add_dependency "terminal-table"
|
44
|
+
spec.add_dependency "report_html" # ask about this gem
|
45
|
+
|
46
|
+
end
|
47
|
+
|