NetAnalyzer 0.1.5 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,4 +4,19 @@ class Node
4
4
  @id = id
5
5
  @type = type
6
6
  end
7
+
8
+ def clone
9
+ node_clone = Node.new(@id.clone, @type.clone)
10
+ return node_clone
11
+ end
12
+
13
+ def ==(other)
14
+ are_equal = true
15
+ if self.id != other.id ||
16
+ self.type != other.type
17
+ are_equal = false
18
+ end
19
+ return are_equal
20
+ end
21
+
7
22
  end
@@ -0,0 +1,98 @@
1
+ require 'bigdecimal'
2
+
3
+ class Performancer
4
+ def initialize()
5
+ @control = {}
6
+ end
7
+
8
+ def load_control(ref_array)
9
+ ref_array.each do |node1, node2|
10
+ if node2 != '-'
11
+ query = @control[node1]
12
+ if query.nil?
13
+ @control[node1] = [node2]
14
+ else
15
+ query << node2
16
+ end
17
+ end
18
+ end
19
+ return control
20
+ end
21
+
22
+ # Pandey 2007, Association Analysis-based Transformations for Protein Interaction Networks: A Function Prediction Case Study
23
+ def get_pred_rec(predictions, cut_number = 100, top_number = 10000)
24
+ performance = [] #cut, pred, rec
25
+ preds, limits = load_prediction(predictions)
26
+ cuts = get_cuts(limits, cut_number)
27
+ cuts.each do |cut|
28
+ prec, rec = pred_rec(preds, cut, top_number)
29
+ performance << [cut, prec, rec]
30
+ end
31
+ return performance
32
+ end
33
+
34
+ def load_prediction(pairs_array)
35
+ pred = {}
36
+ min = nil
37
+ max = nil
38
+ pairs_array.each do |key, label, score|
39
+ query = pred[key]
40
+ if !min.nil? && !max.nil?
41
+ min = score if score < min
42
+ max = score if score > max
43
+ else
44
+ min = score; max = score
45
+ end
46
+ if query.nil?
47
+ pred[key] = [[label], [score]]
48
+ else
49
+ query.first << label
50
+ query.last << score
51
+ end
52
+ end
53
+ return pred, [min, max]
54
+ end
55
+
56
+ def pred_rec(preds, cut, top)
57
+ predicted_labels = 0 #m
58
+ true_labels = 0 #n
59
+ common_labels = 0 # k
60
+ @control.each do |key, c_labels|
61
+ true_labels += c_labels.length #n
62
+ pred_info = preds[key]
63
+ if !pred_info.nil?
64
+ labels, scores = pred_info
65
+ reliable_labels = get_reliable_labels(labels, scores, cut, top)
66
+ predicted_labels += reliable_labels.length #m
67
+ common_labels += (c_labels & reliable_labels).length #k
68
+ end
69
+ end
70
+ #puts "cut: #{cut} trueL: #{true_labels} predL: #{predicted_labels} commL: #{common_labels}"
71
+ prec = common_labels.to_f/predicted_labels
72
+ rec = common_labels.to_f/true_labels
73
+ prec = 0.0 if prec.nan?
74
+ rec = 0.0 if rec.nan?
75
+ return prec, rec
76
+ end
77
+
78
+
79
+ def get_cuts(limits, n_cuts)
80
+ cuts = []
81
+ range = (limits.last - limits.first).abs.fdiv(n_cuts)
82
+ range = BigDecimal(range, 10)
83
+ cut = limits.first
84
+ (n_cuts + 1).times do |n|
85
+ cuts << (cut + n * range).to_f
86
+ end
87
+ return cuts
88
+ end
89
+
90
+ def get_reliable_labels(labels, scores, cut, top)
91
+ reliable_labels = []
92
+ scores.each_with_index do |score, i|
93
+ reliable_labels << [labels[i], score] if score >= cut
94
+ end
95
+ reliable_labels = reliable_labels.sort!{|l1,l2| l2.last <=> l1.last}[0..top-1].map{|pred| pred.first}
96
+ return reliable_labels
97
+ end
98
+ end
@@ -0,0 +1,250 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'numo/narray'
4
+ require "numo/linalg"
5
+
6
+ class Ranker
7
+ attr_accessor :matrix, :nodes, :seeds, :ranking
8
+
9
+ def initialize()
10
+ @matrix = nil
11
+ @nodes = [] # kernel_nodes
12
+ @seeds = {} # genes_seed
13
+ @reference_nodes = {}
14
+ @ranking = {} # ranked_genes
15
+ end
16
+
17
+ def load_seeds(node_groups, sep: ',')
18
+ @seeds = load_nodes_by_group(node_groups, sep: sep)
19
+ end
20
+
21
+ def load_references(node_groups, sep: ',')
22
+ @reference_nodes = load_nodes_by_group(node_groups, sep: sep)
23
+ end
24
+
25
+ def load_nodes_by_group(node_groups, sep: ',')
26
+ if File.exist?(node_groups)
27
+ group_nodes = load_node_groups_from_file(node_groups, sep: sep)
28
+ else
29
+ group_nodes = {"seed_genes" => node_groups.split(sep)}
30
+ end
31
+ return group_nodes
32
+ end
33
+
34
+ def load_node_groups_from_file(file, sep: ',')
35
+ group_nodes = {}
36
+ File.open(file).each do |line|
37
+ set_name, nodes = line.chomp.split("\t")
38
+ group_nodes[set_name] = nodes.split(sep)
39
+ end
40
+ return group_nodes
41
+ end
42
+
43
+ def load_nodes_from_file(file)
44
+ File.open(file).each do |line|
45
+ @nodes << line.chomp!
46
+ end
47
+ end
48
+
49
+ def do_ranking(leave_one_out: false, threads: 0)
50
+ get_seed_leave_one_out() if leave_one_out
51
+ seed_indexes = get_seed_indexes
52
+ seed_groups = @seeds.to_a # Array conversion needed for parallelization
53
+ ranked_lists = Parallel.map(seed_groups, in_processes: threads) do |seed_name, seed|
54
+ # The code in this block CANNOT modify nothing outside
55
+ if leave_one_out and @reference_nodes[seed_name].length <= 1
56
+ rank_list = get_individual_rank(seed,@reference_nodes[seed_name][0])
57
+ else
58
+ rank_list = rank_by_seed(seed_indexes, seed) # Production mode
59
+ end
60
+ [seed_name, rank_list]
61
+ end
62
+ ranked_lists.each do |seed_name, rank_list| # Transfer resuls to hash
63
+ @ranking[seed_name] = rank_list
64
+ end
65
+ end
66
+
67
+ def get_seed_leave_one_out()
68
+ new_seeds = {}
69
+ genes2predict = {}
70
+ all_genes = @seeds.values.flatten.uniq
71
+ @seeds.each do |seed_name, seeds|
72
+ group_number = seeds.length - 1
73
+ one_out_seeds = seeds.combination(group_number).to_a
74
+
75
+ one_out_seeds.each_with_index do |one_out_seed, indx|
76
+ seed_name_one_out = seed_name.to_s + "_iteration_" + indx.to_s
77
+ new_seeds[seed_name_one_out] = one_out_seed
78
+ genes2predict[seed_name_one_out] = seeds - one_out_seed
79
+ genes2predict[seed_name_one_out] += @reference_nodes[seed_name] if !@reference_nodes[seed_name].nil?
80
+ genes2predict[seed_name_one_out].uniq!
81
+ end
82
+ end
83
+ @seeds = new_seeds
84
+ @reference_nodes = genes2predict
85
+ end
86
+
87
+ def rank_by_seed(seed_indexes, seeds)
88
+ ordered_gene_score = []
89
+ genes_pos = seeds.map{|s| seed_indexes[s]}.compact
90
+ number_of_seed_genes = genes_pos.length
91
+ number_of_all_nodes = @nodes.length
92
+
93
+ if number_of_seed_genes > 0
94
+ subsets_gen_values = @matrix[genes_pos,true]
95
+ integrated_gen_values = subsets_gen_values.sum(0)
96
+ gen_list = 1.fdiv(number_of_seed_genes) * integrated_gen_values.inplace
97
+
98
+ ordered_indexes = gen_list.sort_index # from smallest to largest
99
+
100
+ last_val = nil
101
+ n_elements = ordered_indexes.shape.first
102
+ n_elements.times do |pos|
103
+ order_index = ordered_indexes[pos]
104
+ val = gen_list[order_index]
105
+ node_name = @nodes[order_index]
106
+
107
+ rank = get_position_for_items_with_same_score(pos, val, last_val, gen_list, n_elements, ordered_gene_score) # number of items behind
108
+ rank = n_elements - rank # number of nodes below or equal
109
+ rank_percentage = rank.fdiv(number_of_all_nodes)
110
+
111
+ ordered_gene_score << [node_name, val, rank_percentage, rank]
112
+ last_val = val
113
+ end
114
+
115
+ ordered_gene_score = ordered_gene_score.reverse # from largest to smallest
116
+ ordered_gene_score = add_absolute_rank_column(ordered_gene_score)
117
+ end
118
+
119
+ return ordered_gene_score
120
+ end
121
+
122
+ def get_position_for_items_with_same_score(pos, val, prev_val, gen_list, n_elements, ordered_gene_score)
123
+ members_behind = 0
124
+ if !prev_val.nil?
125
+ if prev_val < val
126
+ members_behind = pos
127
+ else
128
+ members_behind = n_elements - ordered_gene_score.last[3]
129
+ end
130
+ end
131
+ return members_behind
132
+ end
133
+
134
+ def add_absolute_rank_column(ranking)
135
+ ranking_with_new_column = []
136
+ absolute_rank = 1
137
+ n_rows = ranking.length
138
+ n_rows.times do |row_pos|
139
+ if row_pos == 0
140
+ new_row = ranking[row_pos] << absolute_rank
141
+ ranking_with_new_column << new_row
142
+ else
143
+ prev_val = ranking[row_pos-1][2]
144
+ val = ranking[row_pos][2]
145
+ if val > prev_val
146
+ absolute_rank +=1
147
+ end
148
+ new_row = ranking[row_pos] << absolute_rank
149
+ ranking_with_new_column << new_row
150
+ end
151
+ end
152
+ return ranking_with_new_column
153
+ end
154
+
155
+ def get_individual_rank(seed_genes, node_of_interest)
156
+ genes_pos = get_nodes_indexes(seed_genes)
157
+ node_of_interest_pos = @nodes.find_index(node_of_interest)
158
+ ordered_gene_score = []
159
+
160
+ if !genes_pos.empty? && !node_of_interest_pos.nil?
161
+
162
+ subsets_gen_values = @matrix[genes_pos,true]
163
+ integrated_gen_values = subsets_gen_values.sum(0)
164
+ integrated_gen_values = 1.fdiv(genes_pos.length) * integrated_gen_values.inplace
165
+
166
+ ref_value = integrated_gen_values[node_of_interest_pos]
167
+
168
+ members_below_test = 0
169
+ integrated_gen_values.each do |gen_value|
170
+ members_below_test += 1 if gen_value >= ref_value
171
+ end
172
+
173
+ rank_percentage = members_below_test.fdiv(@nodes.length)
174
+ rank = members_below_test
175
+ rank_absolute = get_individual_absolute_rank(integrated_gen_values.to_a,ref_value)
176
+
177
+ ordered_gene_score << [node_of_interest, ref_value, rank_percentage, rank, rank_absolute]
178
+ end
179
+
180
+ return ordered_gene_score
181
+
182
+ end
183
+
184
+ def get_individual_absolute_rank(values_list,ref_value)
185
+ ref_pos = nil
186
+ values_list = values_list.sort.reverse.uniq
187
+ values_list.each_with_index do |value,pos|
188
+ if value == ref_value
189
+ ref_pos = pos+1
190
+ break
191
+ end
192
+ end
193
+ return ref_pos
194
+ end
195
+
196
+ def get_reference_ranks
197
+ filtered_ranked_genes = {}
198
+
199
+ @ranking.each do |seed_name, ranking|
200
+ next if @reference_nodes[seed_name].nil? or ranking.empty?
201
+
202
+ ranking = array2hash(ranking,0,(1..ranking[0].length))
203
+ references = @reference_nodes[seed_name]
204
+ filtered_ranked_genes[seed_name] = []
205
+
206
+ references.each do |reference|
207
+ rank = ranking[reference]
208
+ if !rank.nil?
209
+ filtered_ranked_genes[seed_name] << [reference] + rank
210
+ end
211
+ end
212
+ filtered_ranked_genes[seed_name].sort_by!{|rank| -rank[1]}
213
+ end
214
+ return filtered_ranked_genes
215
+ end
216
+
217
+ def array2hash(arr, key, values)
218
+ h = {}
219
+ arr.each{|els| h[els[0]] = els[values]}
220
+ return h
221
+ end
222
+
223
+ def get_top(top_n)
224
+ top_ranked_genes = {}
225
+ @ranking.each do |seed_name, ranking|
226
+ top_ranked_genes[seed_name] = ranking[0..top_n-1] if !ranking.nil?
227
+ end
228
+ return top_ranked_genes
229
+ end
230
+
231
+ def get_nodes_indexes(nodes)
232
+ node_indxs = []
233
+ nodes.each do |node|
234
+ index_node = @nodes.find_index(node)
235
+ node_indxs << index_node if !index_node.nil?
236
+ end
237
+ return node_indxs
238
+ end
239
+
240
+ def get_seed_indexes
241
+ indexes = {}
242
+ @seeds.values.flatten.each do |node|
243
+ if !indexes.include?(node)
244
+ indx = @nodes.index(node)
245
+ indexes[node] = indx if !indx.nil?
246
+ end
247
+ end
248
+ return indexes
249
+ end
250
+ end