pets 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +41 -0
- data/Rakefile +6 -0
- data/bin/area_under_curve_pr.rb +118 -0
- data/bin/association_metrics_average.rb +94 -0
- data/bin/coPatReporter.rb +531 -0
- data/bin/console +14 -0
- data/bin/fmeasure_index.rb +72 -0
- data/bin/get_PR_values.rb +90 -0
- data/bin/get_clusters.R +18 -0
- data/bin/get_network_nodes.rb +197 -0
- data/bin/lines.R +77 -0
- data/bin/merge_by_cluster.rb +62 -0
- data/bin/merge_pairs.rb +138 -0
- data/bin/paco_translator.rb +102 -0
- data/bin/phen2reg.rb +385 -0
- data/bin/phen2reg_predictor_check.rb +297 -0
- data/bin/plot_area.R +71 -0
- data/bin/plot_boxplot.R +21 -0
- data/bin/plot_density.R +46 -0
- data/bin/plot_scatterplot.R +25 -0
- data/bin/reg2phen.rb +116 -0
- data/bin/region_to_patients_generator.rb +84 -0
- data/bin/relate_CI_to_association_value.rb +90 -0
- data/bin/setup +8 -0
- data/bin/standardize_scores.R +40 -0
- data/bin/xyplot_graph.R +60 -0
- data/external_data/biosystems_gene.gz +0 -0
- data/external_data/bsid2info.gz +0 -0
- data/external_data/chromosome_sizes_hg19.txt +24 -0
- data/external_data/gene_data.gz +0 -0
- data/external_data/gene_data_with_pathways.gz +0 -0
- data/external_data/gene_location.gz +0 -0
- data/external_data/hp.obo +146363 -0
- data/external_data/remove +0 -0
- data/lib/pets.rb +6 -0
- data/lib/pets/coPatReporterMethods.rb +77 -0
- data/lib/pets/generalMethods.rb +556 -0
- data/lib/pets/phen2reg_methods.rb +432 -0
- data/lib/pets/version.rb +3 -0
- data/pets.gemspec +47 -0
- data/templates/cohort_report.erb +93 -0
- data/templates/patient_report.erb +209 -0
- metadata +183 -0
@@ -0,0 +1,432 @@
|
|
1
|
+
require "statistics2"
|
2
|
+
require "terminal-table"
|
3
|
+
#require "report_html"
|
4
|
+
#require 'bigdecimal'
|
5
|
+
|
6
|
+
def search4HPO(info2predict, trainingData)
|
7
|
+
#search if there are profile HPOs within the association file
|
8
|
+
hpo_regions = {}
|
9
|
+
info2predict.each do |hpo|
|
10
|
+
regions = trainingData[hpo]
|
11
|
+
if !regions.nil?
|
12
|
+
hpo_regions[hpo] = regions
|
13
|
+
end
|
14
|
+
end
|
15
|
+
return hpo_regions
|
16
|
+
end
|
17
|
+
|
18
|
+
def group_by_region(hpo_regions)
|
19
|
+
#hpo_regions-> hpo => [[chr, start, stop, regID, score], [...]]
|
20
|
+
region2hpo = {}
|
21
|
+
regionAttributes = {}
|
22
|
+
association_scores = {}
|
23
|
+
hpo_regions.each do |hpo, regions|
|
24
|
+
regions.each do |chr, start, stop, regionID, association_score|
|
25
|
+
query = region2hpo[regionID]
|
26
|
+
if query.nil?
|
27
|
+
region2hpo[regionID] = [hpo]
|
28
|
+
else
|
29
|
+
query << hpo
|
30
|
+
end
|
31
|
+
query = regionAttributes[regionID]
|
32
|
+
if query.nil?
|
33
|
+
total_patients_in_region = regionID.split('.')[3].to_i
|
34
|
+
region_length = stop - start
|
35
|
+
regionAttributes[regionID] = [chr, start, stop, total_patients_in_region, region_length]
|
36
|
+
end
|
37
|
+
query = association_scores[regionID]
|
38
|
+
if query.nil?
|
39
|
+
association_scores[regionID] = {hpo => association_score}
|
40
|
+
else
|
41
|
+
query[hpo] = association_score
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
return region2hpo, regionAttributes, association_scores
|
46
|
+
end
|
47
|
+
|
48
|
+
def add_parentals_of_not_found_hpos_in_regions(
|
49
|
+
patient_hpo_profile,
|
50
|
+
trainingData,
|
51
|
+
region2hpo,
|
52
|
+
regionAttributes,
|
53
|
+
association_scores,
|
54
|
+
hpo_metadata # hpo_code => [phenotype, relations]
|
55
|
+
)
|
56
|
+
new_hpos = []
|
57
|
+
region2hpo.each do |regionID, hpos|
|
58
|
+
hpos_not_found = patient_hpo_profile - hpos
|
59
|
+
parental_hpos = []
|
60
|
+
hpo_scores = {}
|
61
|
+
hpos_not_found.each do |hpo|
|
62
|
+
region, parental_hpo = get_region_with_parental_hpo(hpo, regionID, trainingData , hpo_metadata)
|
63
|
+
if !region.nil? &&
|
64
|
+
!parental_hpos.include?(parental_hpo) &&
|
65
|
+
!patient_hpo_profile.include?(parental_hpo)
|
66
|
+
parental_hpos << parental_hpo
|
67
|
+
hpo_scores[parental_hpo] = region.last
|
68
|
+
end
|
69
|
+
end
|
70
|
+
hpos.concat(parental_hpos)
|
71
|
+
new_hpos.concat(parental_hpos)
|
72
|
+
association_scores[regionID].merge!(hpo_scores)
|
73
|
+
end
|
74
|
+
patient_hpo_profile.concat(new_hpos.uniq)
|
75
|
+
end
|
76
|
+
|
77
|
+
def get_region_with_parental_hpo(hpo, regionID, trainingData , hpo_metadata)
|
78
|
+
region = nil
|
79
|
+
final_hpo = nil
|
80
|
+
hpos = [hpo]
|
81
|
+
while !hpos.empty?
|
82
|
+
temp = []
|
83
|
+
hpos.each do |hp|
|
84
|
+
hpo_data = hpo_metadata[hp]
|
85
|
+
if !hpo_data.nil?
|
86
|
+
main_hpo_code, phenotype, relations = hpo_data
|
87
|
+
temp.concat(relations.map{|rel| rel.first})
|
88
|
+
end
|
89
|
+
end
|
90
|
+
temp.each do |temp_hpo|
|
91
|
+
regions = trainingData[temp_hpo]
|
92
|
+
if !regions.nil?
|
93
|
+
final_reg = regions.select{|reg| reg[3] == regionID}
|
94
|
+
if !final_reg.empty?
|
95
|
+
region = final_reg.first
|
96
|
+
final_hpo = temp_hpo
|
97
|
+
temp = []
|
98
|
+
break
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
hpos = temp
|
103
|
+
end
|
104
|
+
return region, final_hpo
|
105
|
+
end
|
106
|
+
|
107
|
+
def generate_hpo_region_matrix(region2hpo, association_scores, info2predict, null_value=0)
|
108
|
+
# #method for creating the hpo to region matrix for plotting
|
109
|
+
# #info2predict = hpo list from user
|
110
|
+
# #hpo_associated_regions = [[chr, start, stop, [hpos_list], [weighted_association_scores]]]
|
111
|
+
hpo_region_matrix = []
|
112
|
+
region2hpo.each do |regionID, hpos_list|
|
113
|
+
row = []
|
114
|
+
info2predict.each do |user_hpo|
|
115
|
+
value = association_scores[regionID][user_hpo]
|
116
|
+
if value.nil?
|
117
|
+
row << null_value
|
118
|
+
else
|
119
|
+
row << value
|
120
|
+
end
|
121
|
+
end
|
122
|
+
hpo_region_matrix << row
|
123
|
+
end
|
124
|
+
return hpo_region_matrix
|
125
|
+
end
|
126
|
+
|
127
|
+
def scoring_regions(regionAttributes, hpo_region_matrix, scoring_system, pvalue_cutoff, freedom_degree, null_value=0)
|
128
|
+
#hpo_associated_regions = [[chr, start, stop, [hpos_list], [weighted_association_scores]]]
|
129
|
+
#hpo_region_matrix = [[0, 0.4, 0, 0.4], [0, 0, 0.5, 0.4]]
|
130
|
+
# STDERR.puts "EH"
|
131
|
+
regionAttributes_array = regionAttributes.values
|
132
|
+
max_cluster_length = hpo_region_matrix.map{|x| x.count {|i| i != 0}}.max if freedom_degree == 'maxnum'
|
133
|
+
hpo_region_matrix.each_with_index do |associations, i|
|
134
|
+
sample_length = nil
|
135
|
+
if freedom_degree == 'prednum'
|
136
|
+
sample_length = associations.length
|
137
|
+
elsif freedom_degree == 'phennum'
|
138
|
+
sample_length = associations.count{|s| s != 0}
|
139
|
+
elsif freedom_degree == 'maxnum'
|
140
|
+
sample_length = max_cluster_length
|
141
|
+
else
|
142
|
+
abort("Invalid freedom degree calculation method: #{freedom_degree}")
|
143
|
+
end
|
144
|
+
|
145
|
+
if scoring_system == 'mean'
|
146
|
+
mean_association = associations.inject(0){|s,x| s + x } / sample_length
|
147
|
+
regionAttributes_array[i] << mean_association
|
148
|
+
elsif scoring_system == 'fisher'
|
149
|
+
#hyper must be ln not log10 from net analyzer
|
150
|
+
#https://en.wikipedia.org/wiki/Fisher%27s_method
|
151
|
+
# STDERR.puts associations.inspect
|
152
|
+
lns = associations.map{|a| Math.log(10 ** -a)} #hyper values come as log10 values
|
153
|
+
sum = lns.inject(0){|s, a| s + a}
|
154
|
+
combined_pvalue = Statistics2.chi2_x(sample_length *2, -2*sum)
|
155
|
+
regionAttributes_array[i] << combined_pvalue
|
156
|
+
elsif scoring_system == 'stouffer'
|
157
|
+
sum = associations.inject(0){|s,x| s + x}
|
158
|
+
combined_z_score = sum/Math.sqrt(sample_length)
|
159
|
+
regionAttributes_array[i] << combined_z_score
|
160
|
+
elsif scoring_system == 'geommean'
|
161
|
+
#NOTE: if troubles with number size, use BigDecimal
|
162
|
+
geommean_mult = associations.inject(1){|s,x| s * x}
|
163
|
+
geommean_association = geommean_mult.to_f ** ( sample_length ** -1 )
|
164
|
+
regionAttributes_array[i] << geommean_association
|
165
|
+
elsif scoring_system == 'median'
|
166
|
+
median_value = associations.length / 2
|
167
|
+
if median_value % 2 == 0
|
168
|
+
median_up = associations.sort[median_value]
|
169
|
+
median_down = associations.sort[median_value - 1]
|
170
|
+
pair_median = ( median_up + median_down ) / 2
|
171
|
+
median_association = associations.sort[pair_median.ceil]
|
172
|
+
else
|
173
|
+
median_association = associations.sort[median_value.ceil]
|
174
|
+
end
|
175
|
+
regionAttributes_array[i] << median_association
|
176
|
+
elsif scoring_system == 'maxnum'
|
177
|
+
max_association = associations.max
|
178
|
+
regionAttributes_array[i] << max_association
|
179
|
+
elsif scoring_system == 'minnum'
|
180
|
+
min_association = associations.min
|
181
|
+
regionAttributes_array[i] << min_association
|
182
|
+
else
|
183
|
+
abort("Invalid ranking method: #{scoring_system}")
|
184
|
+
end
|
185
|
+
end
|
186
|
+
if scoring_system == 'mean' ||
|
187
|
+
scoring_system == 'geommean' ||
|
188
|
+
scoring_system == 'maxnum' ||
|
189
|
+
scoring_system == 'minnum'
|
190
|
+
regionAttributes.select!{|regionID, attributes| attributes.last >= pvalue_cutoff}
|
191
|
+
elsif scoring_system == 'fisher'
|
192
|
+
regionAttributes.select!{|regionID, attributes| attributes.last <= pvalue_cutoff}
|
193
|
+
end
|
194
|
+
#Combined p-value: less value equals better association -> not due randomly.
|
195
|
+
end
|
196
|
+
|
197
|
+
def join_regions(regions)
|
198
|
+
#[chr, start, stop, association_values.keys, association_values.values, score]
|
199
|
+
merged_regions = []
|
200
|
+
sorted_regions = regions.sort_by{|reg | [reg[0], reg[1]]}
|
201
|
+
ref_reg = sorted_regions.shift
|
202
|
+
while !sorted_regions.empty?
|
203
|
+
next_region = sorted_regions.shift
|
204
|
+
if ref_reg[0] == next_region[0] &&
|
205
|
+
(ref_reg[2] - next_region[1]).abs <= 1 &&
|
206
|
+
(ref_reg[5] - next_region[5]).abs.fdiv([ref_reg[5], next_region[5]].max) <= 0.05 &&
|
207
|
+
ref_reg[3] == next_region[3]
|
208
|
+
|
209
|
+
ref_reg[2] = next_region[2]
|
210
|
+
ref_assoc_values = ref_reg[4]
|
211
|
+
next_assoc_values = next_region[4]
|
212
|
+
assoc_values = []
|
213
|
+
ref_assoc_values.each_with_index do |ref_val, i|
|
214
|
+
#assoc_values << (ref_val + next_assoc_values[i]).fdiv(2)
|
215
|
+
assoc_values << [ref_val, next_assoc_values[i]].max
|
216
|
+
end
|
217
|
+
ref_reg[4] = assoc_values
|
218
|
+
#ref_reg[5] = (ref_reg[5] + next_region[5]).fdiv(2)
|
219
|
+
ref_reg[5] = [ref_reg[5], next_region[5]].max
|
220
|
+
else
|
221
|
+
merged_regions << ref_reg
|
222
|
+
ref_reg = next_region
|
223
|
+
end
|
224
|
+
end
|
225
|
+
merged_regions << ref_reg
|
226
|
+
return merged_regions
|
227
|
+
end
|
228
|
+
|
229
|
+
# def hpo_quality_control(prediction_data, hpo_metadata_file, information_coefficient_file)
|
230
|
+
def hpo_quality_control(prediction_data, hpo_metadata, hpo_child_metadata, hpos_ci_values)
|
231
|
+
characterised_hpos = []
|
232
|
+
##information_coef_file= hpo_code, ci
|
233
|
+
##prediction_data = [hpo1, hpo2, hpo3...]
|
234
|
+
##hpo_metadata = {hpo_code => [phenotype, relations]}, relations = [hpo_code_relation, name_relation]
|
235
|
+
# hpo_metadata = load_hpo_metadata(hpo_metadata_file)
|
236
|
+
# hpo_child_metadata = inverse_hpo_metadata(hpo_metadata)
|
237
|
+
# hpos_ci_values = load_hpo_ci_values(information_coefficient_file)
|
238
|
+
prediction_data.each do |hpo_code|
|
239
|
+
tmp = []
|
240
|
+
ci = hpos_ci_values[hpo_code]
|
241
|
+
main_hpo_code, hpo_name, relations = hpo_metadata[hpo_code]
|
242
|
+
tmp << hpo_name # col hpo name
|
243
|
+
tmp << hpo_code # col hpo code
|
244
|
+
unless ci.nil? # col exists? and ci values
|
245
|
+
tmp << "yes"
|
246
|
+
tmp << ci
|
247
|
+
else
|
248
|
+
tmp << "no"
|
249
|
+
tmp << "-"
|
250
|
+
end
|
251
|
+
parent = check_parents(relations, prediction_data, hpo_metadata)
|
252
|
+
parent << "-" if parent.empty?
|
253
|
+
tmp << parent # col parents
|
254
|
+
childs = hpo_child_metadata[hpo_code]
|
255
|
+
if childs.nil?
|
256
|
+
childs = []
|
257
|
+
else
|
258
|
+
childs = childs
|
259
|
+
end
|
260
|
+
tmp << childs
|
261
|
+
characterised_hpos << tmp
|
262
|
+
end
|
263
|
+
# return characterised_hpos, hpo_metadata
|
264
|
+
return characterised_hpos
|
265
|
+
end
|
266
|
+
|
267
|
+
def check_parents(relations, prediction_data, hpo_metadata)
|
268
|
+
parent = []
|
269
|
+
relations.each do |par_hpo_code, par_hpo_name|
|
270
|
+
if prediction_data.include?(par_hpo_code)
|
271
|
+
parent << [par_hpo_code, par_hpo_name]
|
272
|
+
end
|
273
|
+
grand_par_hpo = hpo_metadata[par_hpo_code]
|
274
|
+
if !grand_par_hpo.nil?
|
275
|
+
parent.concat(check_parents(grand_par_hpo.last, prediction_data, hpo_metadata))
|
276
|
+
end
|
277
|
+
end
|
278
|
+
return parent
|
279
|
+
end
|
280
|
+
|
281
|
+
def report_data(characterised_hpos, hpo_associated_regions, html_file, hpo_metadata, genes_with_kegg_data, pathway_stats)
|
282
|
+
container = {:characterised_hpos => characterised_hpos,
|
283
|
+
:merged_regions => hpo_associated_regions,
|
284
|
+
:hpo_metadata => hpo_metadata,
|
285
|
+
:genes_with_kegg_data => genes_with_kegg_data,
|
286
|
+
:pathway_stats => pathway_stats
|
287
|
+
}
|
288
|
+
template = File.open(File.join(REPORT_FOLDER, 'patient_report.erb')).read
|
289
|
+
report = Report_html.new(container, 'Patient HPO profile summary')
|
290
|
+
report.build(template)
|
291
|
+
report.write(html_file)
|
292
|
+
end
|
293
|
+
|
294
|
+
##############################################################################
|
295
|
+
##############################################################################
|
296
|
+
##### OLD CODE FOR JOIN REGIONS BY BORDERS
|
297
|
+
##############################################################################
|
298
|
+
##############################################################################
|
299
|
+
|
300
|
+
# def sorting_regions_by_shared_hpos(region2hpo)
|
301
|
+
# #if regions share the same hpos, sort regions from lowest to highest
|
302
|
+
# #this method returns an array for its use in cluster_regions_by_common_hpos method
|
303
|
+
# arr_region2hpo = []
|
304
|
+
# region2hpo.each do |region, hpos|
|
305
|
+
# arr_region2hpo << [region, hpos.sort]
|
306
|
+
# end
|
307
|
+
# arr_region2hpo.sort!{|r1, r2| r1.last <=> r2.last}
|
308
|
+
# # # arr_region2hpo = [[1.1.A.1, [hpo1, hpo2, hpo3]], [1.2.A.1, [hpo1, hpo2, hpo3]]...]
|
309
|
+
# return arr_region2hpo
|
310
|
+
# end
|
311
|
+
|
312
|
+
# def cluster_regions_by_common_hpos(arr_region2hpo)
|
313
|
+
# #method for grouping hpos within different locations
|
314
|
+
# regions_by_hpos = {}
|
315
|
+
# last_hpos = []
|
316
|
+
# regions = []
|
317
|
+
# all_regions = []
|
318
|
+
# arr_region2hpo.each do |region, hpos|
|
319
|
+
# all_regions << region
|
320
|
+
# if last_hpos == hpos
|
321
|
+
# regions << region
|
322
|
+
# else
|
323
|
+
# regions_by_hpos[last_hpos] = regions if !last_hpos.empty?
|
324
|
+
# regions = [region]
|
325
|
+
# end
|
326
|
+
# last_hpos = hpos
|
327
|
+
# end
|
328
|
+
# regions_by_hpos[last_hpos] = regions
|
329
|
+
# #puts regions_by_hpos.inspect
|
330
|
+
# # #regions_by_hpos = {[hpo1, hpo2, hpo3] => [1.1.A.1, 1.2.A.4, 1.3.A.12]...}
|
331
|
+
# return regions_by_hpos
|
332
|
+
# end
|
333
|
+
|
334
|
+
# def prepare_regions_for_profile_analysis(region2hpo, regionAttributes, association_scores, weight_style)
|
335
|
+
# # region2hpo = {region => [hpo1, hpo2...]}
|
336
|
+
# # regionAttributes = {region => [chr, start, stop, patients_number, region_length, region]}
|
337
|
+
# hpo_associated_regions = []
|
338
|
+
# arr_region2hpo = sorting_regions_by_shared_hpos(region2hpo)
|
339
|
+
# regions_by_hpos = cluster_regions_by_common_hpos(arr_region2hpo)
|
340
|
+
# regions_by_hpos.each do |hpos_list, regions|
|
341
|
+
# regionIDs = []
|
342
|
+
# regions_lengths = []
|
343
|
+
# patients_numbers = []
|
344
|
+
# region_attributes = regions.map { |region| regionAttributes[region] }
|
345
|
+
# region_attributes.each do |attributes|
|
346
|
+
# cur_chr, cur_start, cur_stop, cur_patients_number, cur_region_length, cur_regionID = attributes
|
347
|
+
# add_region(hpo_associated_regions, cur_chr, cur_start, cur_stop, hpos_list, [cur_regionID], association_scores, [cur_region_length], [cur_patients_number], weight_style)
|
348
|
+
# end
|
349
|
+
# end
|
350
|
+
# #puts hpo_associated_regions.inspect
|
351
|
+
# return hpo_associated_regions
|
352
|
+
# end
|
353
|
+
|
354
|
+
# def join_regions_by_borders(region2hpo, regionAttributes, association_scores, weight_style)
|
355
|
+
# # region2hpo = {region => [hpo1, hpo2...]}
|
356
|
+
# # regionAttributes = {region => [chr, start, stop, patients_number, region_length, region]}
|
357
|
+
# joined_regions_by_borders = []
|
358
|
+
# arr_region2hpo = sorting_regions_by_shared_hpos(region2hpo)
|
359
|
+
# regions_by_hpos = cluster_regions_by_common_hpos(arr_region2hpo)
|
360
|
+
# regions_by_hpos.each do |hpos_list, regions|
|
361
|
+
# regionIDs = []
|
362
|
+
# regions_lengths = []
|
363
|
+
# patients_numbers = []
|
364
|
+
# region_attributes = regions.map { |region| regionAttributes[region] }
|
365
|
+
# region_attributes.sort! { |r1, r2| [r1[0], r1[1]] <=> [r2[0], r2[1]] }
|
366
|
+
# tmp_chr = nil
|
367
|
+
# tmp_start = nil
|
368
|
+
# tmp_stop = nil
|
369
|
+
# region_attributes.each_with_index do |attributes, counter|
|
370
|
+
# break if counter + 1 == region_attributes.length
|
371
|
+
# cur_chr, cur_start, cur_stop, cur_patients_number, cur_region_length, cur_regionID = attributes
|
372
|
+
# next_chr, next_start, next_stop, next_patients_number, next_region_length, next_regionID = region_attributes[counter + 1]
|
373
|
+
# if cur_chr == next_chr
|
374
|
+
# if cur_stop == next_start || cur_stop == next_start + 1
|
375
|
+
# tmp_chr = cur_chr
|
376
|
+
# tmp_start = cur_start if tmp_start.nil?
|
377
|
+
# tmp_stop = cur_stop
|
378
|
+
# else
|
379
|
+
# add_region(joined_regions_by_borders, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, regions_lengths, patients_numbers, weight_style)
|
380
|
+
# tmp_chr = nil
|
381
|
+
# tmp_start = nil
|
382
|
+
# tmp_stop = nil
|
383
|
+
# end
|
384
|
+
# else
|
385
|
+
# add_region(joined_regions_by_borders, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, regions_lengths, patients_numbers, weight_style)
|
386
|
+
# tmp_chr = nil
|
387
|
+
# tmp_start = nil
|
388
|
+
# tmp_stop = nil
|
389
|
+
# end
|
390
|
+
# regionIDs << cur_regionID if regionIDs.empty?
|
391
|
+
# regionIDs << next_regionID
|
392
|
+
# regions_lengths << cur_region_length if regions_lengths.empty?
|
393
|
+
# regions_lengths << next_region_length
|
394
|
+
# patients_numbers << cur_patients_number if patients_numbers.empty?
|
395
|
+
# patients_numbers << next_patients_number
|
396
|
+
# end
|
397
|
+
# add_region(joined_regions_by_borders, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, regions_lengths, patients_numbers, weight_style)
|
398
|
+
# end
|
399
|
+
# #puts joined_regions_by_borders.inspect
|
400
|
+
# return joined_regions_by_borders
|
401
|
+
# end
|
402
|
+
|
403
|
+
# def add_region(hpo_associated_regions, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, region_lengths, patients_numbers, weight_style)
|
404
|
+
# #region_lengths = number of regions that have the same HPOs
|
405
|
+
# unless tmp_chr.nil? && tmp_start.nil? && tmp_stop.nil?
|
406
|
+
# association_values_by_region = regionIDs.map {|r| association_scores[r]}
|
407
|
+
# weighted_association_scores = []
|
408
|
+
# hpos_list.each do |hpo|
|
409
|
+
# scores = association_values_by_region.map{|hpo_scores| hpo_scores[hpo] }
|
410
|
+
# weighted_score = 0
|
411
|
+
# weight = 0
|
412
|
+
# if scores.length == 1
|
413
|
+
# weighted_score = scores.first
|
414
|
+
# weight = 1
|
415
|
+
# else
|
416
|
+
# scores.each_with_index do |s, i|
|
417
|
+
# if weight_style == 'double'
|
418
|
+
# weighted_score += s * region_lengths[i] * patients_numbers[i]
|
419
|
+
# weight += region_lengths[i] * patients_numbers[i]
|
420
|
+
# elsif weight_style == 'simple'
|
421
|
+
# weighted_score += s * region_lengths[i]
|
422
|
+
# weight += region_lengths[i]
|
423
|
+
# else
|
424
|
+
# abort("Invalid weight method: #{weight_style}")
|
425
|
+
# end
|
426
|
+
# end
|
427
|
+
# end
|
428
|
+
# weighted_association_scores << weighted_score/weight
|
429
|
+
# end
|
430
|
+
# hpo_associated_regions << [tmp_chr, tmp_start, tmp_stop, hpos_list, weighted_association_scores]
|
431
|
+
# end
|
432
|
+
# end
|
data/lib/pets/version.rb
ADDED
data/pets.gemspec
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "pets/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "pets"
|
8
|
+
spec.version = Pets::VERSION
|
9
|
+
spec.authors = ["Elena Rojano, Pedro Seoane"]
|
10
|
+
spec.email = ["elenarojano@uma.es, seoanezonjic@uma.es"]
|
11
|
+
|
12
|
+
spec.summary = %q{Suite with predictive tools.}
|
13
|
+
spec.description = %q{PETS suite includes three different tools. CohortAnalyzer performs the calculation of several statistics that gives an overview of a cohort of patients to analyse. Reg2Phen uses associations between pathological phenotypes and regions of the genome (these associations can be calculated from the cohort of patients if they include genotypic & phenotypic information using NetAnalyzer, another Ruby gem) to find, for a given genomic region, which pathological phenotypes have been associated with that region. The third tool, Phen2Reg, is a predictor that using the same associations as Reg2Phen, predicts which genomic regions can be the cause of a list of pathological phenotypes observed in a patient.}
|
14
|
+
spec.homepage = "https://bitbucket.org/elenarojano/reg2phen/src/master/bin/reg2phen.rb"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
21
|
+
#
|
22
|
+
# spec.metadata["homepage_uri"] = spec.homepage
|
23
|
+
# spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
|
24
|
+
# spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
|
25
|
+
# else
|
26
|
+
# raise "RubyGems 2.0 or newer is required to protect against " \
|
27
|
+
# "public gem pushes."
|
28
|
+
# end
|
29
|
+
|
30
|
+
# Specify which files should be added to the gem when it is released.
|
31
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
32
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
33
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
34
|
+
end
|
35
|
+
spec.bindir = "bin"
|
36
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
37
|
+
spec.require_paths = ["lib"]
|
38
|
+
|
39
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
40
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
41
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
42
|
+
spec.add_dependency "statistics2"
|
43
|
+
spec.add_dependency "terminal-table"
|
44
|
+
spec.add_dependency "report_html" # ask about this gem
|
45
|
+
|
46
|
+
end
|
47
|
+
|