NetAnalyzer 0.1.5 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/NetAnalyzer.gemspec +16 -6
- data/README.md +16 -2
- data/Rakefile +13 -9
- data/bin/NetAnalyzer.rb +176 -33
- data/bin/randomize_clustering.rb +121 -0
- data/bin/randomize_network.rb +89 -0
- data/bin/ranker_gene.rb +121 -0
- data/bin/text2binary_matrix.rb +308 -0
- data/lib/NetAnalyzer/adv_mat_calc.rb +117 -0
- data/lib/NetAnalyzer/net_parser.rb +50 -0
- data/lib/NetAnalyzer/net_plotter.rb +145 -0
- data/lib/NetAnalyzer/network.rb +723 -249
- data/lib/NetAnalyzer/nodes.rb +15 -0
- data/lib/NetAnalyzer/performancer.rb +98 -0
- data/lib/NetAnalyzer/ranker.rb +250 -0
- data/lib/NetAnalyzer/templates/ElGrapho.min.js +28 -0
- data/lib/NetAnalyzer/templates/cytoscape.erb +65 -0
- data/lib/NetAnalyzer/templates/cytoscape.min.js +32 -0
- data/lib/NetAnalyzer/templates/el_grapho.erb +89 -0
- data/lib/NetAnalyzer/templates/pako.min.js +1 -0
- data/lib/NetAnalyzer/templates/sigma.erb +132 -0
- data/lib/NetAnalyzer/version.rb +1 -1
- data/lib/NetAnalyzer.rb +7 -0
- metadata +187 -29
data/lib/NetAnalyzer/nodes.rb
CHANGED
@@ -4,4 +4,19 @@ class Node
|
|
4
4
|
@id = id
|
5
5
|
@type = type
|
6
6
|
end
|
7
|
+
|
8
|
+
def clone
|
9
|
+
node_clone = Node.new(@id.clone, @type.clone)
|
10
|
+
return node_clone
|
11
|
+
end
|
12
|
+
|
13
|
+
def ==(other)
|
14
|
+
are_equal = true
|
15
|
+
if self.id != other.id ||
|
16
|
+
self.type != other.type
|
17
|
+
are_equal = false
|
18
|
+
end
|
19
|
+
return are_equal
|
20
|
+
end
|
21
|
+
|
7
22
|
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'bigdecimal'
|
2
|
+
|
3
|
+
class Performancer
|
4
|
+
def initialize()
|
5
|
+
@control = {}
|
6
|
+
end
|
7
|
+
|
8
|
+
def load_control(ref_array)
|
9
|
+
ref_array.each do |node1, node2|
|
10
|
+
if node2 != '-'
|
11
|
+
query = @control[node1]
|
12
|
+
if query.nil?
|
13
|
+
@control[node1] = [node2]
|
14
|
+
else
|
15
|
+
query << node2
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
return control
|
20
|
+
end
|
21
|
+
|
22
|
+
# Pandey 2007, Association Analysis-based Transformations for Protein Interaction Networks: A Function Prediction Case Study
|
23
|
+
def get_pred_rec(predictions, cut_number = 100, top_number = 10000)
|
24
|
+
performance = [] #cut, pred, rec
|
25
|
+
preds, limits = load_prediction(predictions)
|
26
|
+
cuts = get_cuts(limits, cut_number)
|
27
|
+
cuts.each do |cut|
|
28
|
+
prec, rec = pred_rec(preds, cut, top_number)
|
29
|
+
performance << [cut, prec, rec]
|
30
|
+
end
|
31
|
+
return performance
|
32
|
+
end
|
33
|
+
|
34
|
+
def load_prediction(pairs_array)
|
35
|
+
pred = {}
|
36
|
+
min = nil
|
37
|
+
max = nil
|
38
|
+
pairs_array.each do |key, label, score|
|
39
|
+
query = pred[key]
|
40
|
+
if !min.nil? && !max.nil?
|
41
|
+
min = score if score < min
|
42
|
+
max = score if score > max
|
43
|
+
else
|
44
|
+
min = score; max = score
|
45
|
+
end
|
46
|
+
if query.nil?
|
47
|
+
pred[key] = [[label], [score]]
|
48
|
+
else
|
49
|
+
query.first << label
|
50
|
+
query.last << score
|
51
|
+
end
|
52
|
+
end
|
53
|
+
return pred, [min, max]
|
54
|
+
end
|
55
|
+
|
56
|
+
def pred_rec(preds, cut, top)
|
57
|
+
predicted_labels = 0 #m
|
58
|
+
true_labels = 0 #n
|
59
|
+
common_labels = 0 # k
|
60
|
+
@control.each do |key, c_labels|
|
61
|
+
true_labels += c_labels.length #n
|
62
|
+
pred_info = preds[key]
|
63
|
+
if !pred_info.nil?
|
64
|
+
labels, scores = pred_info
|
65
|
+
reliable_labels = get_reliable_labels(labels, scores, cut, top)
|
66
|
+
predicted_labels += reliable_labels.length #m
|
67
|
+
common_labels += (c_labels & reliable_labels).length #k
|
68
|
+
end
|
69
|
+
end
|
70
|
+
#puts "cut: #{cut} trueL: #{true_labels} predL: #{predicted_labels} commL: #{common_labels}"
|
71
|
+
prec = common_labels.to_f/predicted_labels
|
72
|
+
rec = common_labels.to_f/true_labels
|
73
|
+
prec = 0.0 if prec.nan?
|
74
|
+
rec = 0.0 if rec.nan?
|
75
|
+
return prec, rec
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
def get_cuts(limits, n_cuts)
|
80
|
+
cuts = []
|
81
|
+
range = (limits.last - limits.first).abs.fdiv(n_cuts)
|
82
|
+
range = BigDecimal(range, 10)
|
83
|
+
cut = limits.first
|
84
|
+
(n_cuts + 1).times do |n|
|
85
|
+
cuts << (cut + n * range).to_f
|
86
|
+
end
|
87
|
+
return cuts
|
88
|
+
end
|
89
|
+
|
90
|
+
def get_reliable_labels(labels, scores, cut, top)
|
91
|
+
reliable_labels = []
|
92
|
+
scores.each_with_index do |score, i|
|
93
|
+
reliable_labels << [labels[i], score] if score >= cut
|
94
|
+
end
|
95
|
+
reliable_labels = reliable_labels.sort!{|l1,l2| l2.last <=> l1.last}[0..top-1].map{|pred| pred.first}
|
96
|
+
return reliable_labels
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,250 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
require "numo/linalg"
|
5
|
+
|
6
|
+
class Ranker
|
7
|
+
attr_accessor :matrix, :nodes, :seeds, :ranking
|
8
|
+
|
9
|
+
def initialize()
|
10
|
+
@matrix = nil
|
11
|
+
@nodes = [] # kernel_nodes
|
12
|
+
@seeds = {} # genes_seed
|
13
|
+
@reference_nodes = {}
|
14
|
+
@ranking = {} # ranked_genes
|
15
|
+
end
|
16
|
+
|
17
|
+
def load_seeds(node_groups, sep: ',')
|
18
|
+
@seeds = load_nodes_by_group(node_groups, sep: sep)
|
19
|
+
end
|
20
|
+
|
21
|
+
def load_references(node_groups, sep: ',')
|
22
|
+
@reference_nodes = load_nodes_by_group(node_groups, sep: sep)
|
23
|
+
end
|
24
|
+
|
25
|
+
def load_nodes_by_group(node_groups, sep: ',')
|
26
|
+
if File.exist?(node_groups)
|
27
|
+
group_nodes = load_node_groups_from_file(node_groups, sep: sep)
|
28
|
+
else
|
29
|
+
group_nodes = {"seed_genes" => node_groups.split(sep)}
|
30
|
+
end
|
31
|
+
return group_nodes
|
32
|
+
end
|
33
|
+
|
34
|
+
def load_node_groups_from_file(file, sep: ',')
|
35
|
+
group_nodes = {}
|
36
|
+
File.open(file).each do |line|
|
37
|
+
set_name, nodes = line.chomp.split("\t")
|
38
|
+
group_nodes[set_name] = nodes.split(sep)
|
39
|
+
end
|
40
|
+
return group_nodes
|
41
|
+
end
|
42
|
+
|
43
|
+
def load_nodes_from_file(file)
|
44
|
+
File.open(file).each do |line|
|
45
|
+
@nodes << line.chomp!
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def do_ranking(leave_one_out: false, threads: 0)
|
50
|
+
get_seed_leave_one_out() if leave_one_out
|
51
|
+
seed_indexes = get_seed_indexes
|
52
|
+
seed_groups = @seeds.to_a # Array conversion needed for parallelization
|
53
|
+
ranked_lists = Parallel.map(seed_groups, in_processes: threads) do |seed_name, seed|
|
54
|
+
# The code in this block CANNOT modify nothing outside
|
55
|
+
if leave_one_out and @reference_nodes[seed_name].length <= 1
|
56
|
+
rank_list = get_individual_rank(seed,@reference_nodes[seed_name][0])
|
57
|
+
else
|
58
|
+
rank_list = rank_by_seed(seed_indexes, seed) # Production mode
|
59
|
+
end
|
60
|
+
[seed_name, rank_list]
|
61
|
+
end
|
62
|
+
ranked_lists.each do |seed_name, rank_list| # Transfer resuls to hash
|
63
|
+
@ranking[seed_name] = rank_list
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def get_seed_leave_one_out()
|
68
|
+
new_seeds = {}
|
69
|
+
genes2predict = {}
|
70
|
+
all_genes = @seeds.values.flatten.uniq
|
71
|
+
@seeds.each do |seed_name, seeds|
|
72
|
+
group_number = seeds.length - 1
|
73
|
+
one_out_seeds = seeds.combination(group_number).to_a
|
74
|
+
|
75
|
+
one_out_seeds.each_with_index do |one_out_seed, indx|
|
76
|
+
seed_name_one_out = seed_name.to_s + "_iteration_" + indx.to_s
|
77
|
+
new_seeds[seed_name_one_out] = one_out_seed
|
78
|
+
genes2predict[seed_name_one_out] = seeds - one_out_seed
|
79
|
+
genes2predict[seed_name_one_out] += @reference_nodes[seed_name] if !@reference_nodes[seed_name].nil?
|
80
|
+
genes2predict[seed_name_one_out].uniq!
|
81
|
+
end
|
82
|
+
end
|
83
|
+
@seeds = new_seeds
|
84
|
+
@reference_nodes = genes2predict
|
85
|
+
end
|
86
|
+
|
87
|
+
def rank_by_seed(seed_indexes, seeds)
|
88
|
+
ordered_gene_score = []
|
89
|
+
genes_pos = seeds.map{|s| seed_indexes[s]}.compact
|
90
|
+
number_of_seed_genes = genes_pos.length
|
91
|
+
number_of_all_nodes = @nodes.length
|
92
|
+
|
93
|
+
if number_of_seed_genes > 0
|
94
|
+
subsets_gen_values = @matrix[genes_pos,true]
|
95
|
+
integrated_gen_values = subsets_gen_values.sum(0)
|
96
|
+
gen_list = 1.fdiv(number_of_seed_genes) * integrated_gen_values.inplace
|
97
|
+
|
98
|
+
ordered_indexes = gen_list.sort_index # from smallest to largest
|
99
|
+
|
100
|
+
last_val = nil
|
101
|
+
n_elements = ordered_indexes.shape.first
|
102
|
+
n_elements.times do |pos|
|
103
|
+
order_index = ordered_indexes[pos]
|
104
|
+
val = gen_list[order_index]
|
105
|
+
node_name = @nodes[order_index]
|
106
|
+
|
107
|
+
rank = get_position_for_items_with_same_score(pos, val, last_val, gen_list, n_elements, ordered_gene_score) # number of items behind
|
108
|
+
rank = n_elements - rank # number of nodes below or equal
|
109
|
+
rank_percentage = rank.fdiv(number_of_all_nodes)
|
110
|
+
|
111
|
+
ordered_gene_score << [node_name, val, rank_percentage, rank]
|
112
|
+
last_val = val
|
113
|
+
end
|
114
|
+
|
115
|
+
ordered_gene_score = ordered_gene_score.reverse # from largest to smallest
|
116
|
+
ordered_gene_score = add_absolute_rank_column(ordered_gene_score)
|
117
|
+
end
|
118
|
+
|
119
|
+
return ordered_gene_score
|
120
|
+
end
|
121
|
+
|
122
|
+
def get_position_for_items_with_same_score(pos, val, prev_val, gen_list, n_elements, ordered_gene_score)
|
123
|
+
members_behind = 0
|
124
|
+
if !prev_val.nil?
|
125
|
+
if prev_val < val
|
126
|
+
members_behind = pos
|
127
|
+
else
|
128
|
+
members_behind = n_elements - ordered_gene_score.last[3]
|
129
|
+
end
|
130
|
+
end
|
131
|
+
return members_behind
|
132
|
+
end
|
133
|
+
|
134
|
+
def add_absolute_rank_column(ranking)
|
135
|
+
ranking_with_new_column = []
|
136
|
+
absolute_rank = 1
|
137
|
+
n_rows = ranking.length
|
138
|
+
n_rows.times do |row_pos|
|
139
|
+
if row_pos == 0
|
140
|
+
new_row = ranking[row_pos] << absolute_rank
|
141
|
+
ranking_with_new_column << new_row
|
142
|
+
else
|
143
|
+
prev_val = ranking[row_pos-1][2]
|
144
|
+
val = ranking[row_pos][2]
|
145
|
+
if val > prev_val
|
146
|
+
absolute_rank +=1
|
147
|
+
end
|
148
|
+
new_row = ranking[row_pos] << absolute_rank
|
149
|
+
ranking_with_new_column << new_row
|
150
|
+
end
|
151
|
+
end
|
152
|
+
return ranking_with_new_column
|
153
|
+
end
|
154
|
+
|
155
|
+
def get_individual_rank(seed_genes, node_of_interest)
|
156
|
+
genes_pos = get_nodes_indexes(seed_genes)
|
157
|
+
node_of_interest_pos = @nodes.find_index(node_of_interest)
|
158
|
+
ordered_gene_score = []
|
159
|
+
|
160
|
+
if !genes_pos.empty? && !node_of_interest_pos.nil?
|
161
|
+
|
162
|
+
subsets_gen_values = @matrix[genes_pos,true]
|
163
|
+
integrated_gen_values = subsets_gen_values.sum(0)
|
164
|
+
integrated_gen_values = 1.fdiv(genes_pos.length) * integrated_gen_values.inplace
|
165
|
+
|
166
|
+
ref_value = integrated_gen_values[node_of_interest_pos]
|
167
|
+
|
168
|
+
members_below_test = 0
|
169
|
+
integrated_gen_values.each do |gen_value|
|
170
|
+
members_below_test += 1 if gen_value >= ref_value
|
171
|
+
end
|
172
|
+
|
173
|
+
rank_percentage = members_below_test.fdiv(@nodes.length)
|
174
|
+
rank = members_below_test
|
175
|
+
rank_absolute = get_individual_absolute_rank(integrated_gen_values.to_a,ref_value)
|
176
|
+
|
177
|
+
ordered_gene_score << [node_of_interest, ref_value, rank_percentage, rank, rank_absolute]
|
178
|
+
end
|
179
|
+
|
180
|
+
return ordered_gene_score
|
181
|
+
|
182
|
+
end
|
183
|
+
|
184
|
+
def get_individual_absolute_rank(values_list,ref_value)
|
185
|
+
ref_pos = nil
|
186
|
+
values_list = values_list.sort.reverse.uniq
|
187
|
+
values_list.each_with_index do |value,pos|
|
188
|
+
if value == ref_value
|
189
|
+
ref_pos = pos+1
|
190
|
+
break
|
191
|
+
end
|
192
|
+
end
|
193
|
+
return ref_pos
|
194
|
+
end
|
195
|
+
|
196
|
+
def get_reference_ranks
|
197
|
+
filtered_ranked_genes = {}
|
198
|
+
|
199
|
+
@ranking.each do |seed_name, ranking|
|
200
|
+
next if @reference_nodes[seed_name].nil? or ranking.empty?
|
201
|
+
|
202
|
+
ranking = array2hash(ranking,0,(1..ranking[0].length))
|
203
|
+
references = @reference_nodes[seed_name]
|
204
|
+
filtered_ranked_genes[seed_name] = []
|
205
|
+
|
206
|
+
references.each do |reference|
|
207
|
+
rank = ranking[reference]
|
208
|
+
if !rank.nil?
|
209
|
+
filtered_ranked_genes[seed_name] << [reference] + rank
|
210
|
+
end
|
211
|
+
end
|
212
|
+
filtered_ranked_genes[seed_name].sort_by!{|rank| -rank[1]}
|
213
|
+
end
|
214
|
+
return filtered_ranked_genes
|
215
|
+
end
|
216
|
+
|
217
|
+
def array2hash(arr, key, values)
|
218
|
+
h = {}
|
219
|
+
arr.each{|els| h[els[0]] = els[values]}
|
220
|
+
return h
|
221
|
+
end
|
222
|
+
|
223
|
+
def get_top(top_n)
|
224
|
+
top_ranked_genes = {}
|
225
|
+
@ranking.each do |seed_name, ranking|
|
226
|
+
top_ranked_genes[seed_name] = ranking[0..top_n-1] if !ranking.nil?
|
227
|
+
end
|
228
|
+
return top_ranked_genes
|
229
|
+
end
|
230
|
+
|
231
|
+
def get_nodes_indexes(nodes)
|
232
|
+
node_indxs = []
|
233
|
+
nodes.each do |node|
|
234
|
+
index_node = @nodes.find_index(node)
|
235
|
+
node_indxs << index_node if !index_node.nil?
|
236
|
+
end
|
237
|
+
return node_indxs
|
238
|
+
end
|
239
|
+
|
240
|
+
def get_seed_indexes
|
241
|
+
indexes = {}
|
242
|
+
@seeds.values.flatten.each do |node|
|
243
|
+
if !indexes.include?(node)
|
244
|
+
indx = @nodes.index(node)
|
245
|
+
indexes[node] = indx if !indx.nil?
|
246
|
+
end
|
247
|
+
end
|
248
|
+
return indexes
|
249
|
+
end
|
250
|
+
end
|