NetAnalyzer 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NetAnalyzer.gemspec +1 -1
- data/README.md +3 -1
- data/bin/NetAnalyzer.rb +5 -15
- data/bin/randomize_clustering.rb +121 -0
- data/bin/randomize_network.rb +89 -0
- data/bin/ranker_gene.rb +121 -0
- data/bin/text2binary_matrix.rb +15 -1
- data/lib/NetAnalyzer/adv_mat_calc.rb +117 -0
- data/lib/NetAnalyzer/net_parser.rb +50 -0
- data/lib/NetAnalyzer/net_plotter.rb +145 -0
- data/lib/NetAnalyzer/network.rb +474 -564
- data/lib/NetAnalyzer/nodes.rb +15 -0
- data/lib/NetAnalyzer/performancer.rb +98 -0
- data/lib/NetAnalyzer/ranker.rb +250 -0
- data/lib/NetAnalyzer/version.rb +1 -1
- data/lib/NetAnalyzer.rb +5 -0
- metadata +19 -8
data/lib/NetAnalyzer/nodes.rb
CHANGED
@@ -4,4 +4,19 @@ class Node
|
|
4
4
|
@id = id
|
5
5
|
@type = type
|
6
6
|
end
|
7
|
+
|
8
|
+
def clone
|
9
|
+
node_clone = Node.new(@id.clone, @type.clone)
|
10
|
+
return node_clone
|
11
|
+
end
|
12
|
+
|
13
|
+
def ==(other)
|
14
|
+
are_equal = true
|
15
|
+
if self.id != other.id ||
|
16
|
+
self.type != other.type
|
17
|
+
are_equal = false
|
18
|
+
end
|
19
|
+
return are_equal
|
20
|
+
end
|
21
|
+
|
7
22
|
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'bigdecimal'
|
2
|
+
|
3
|
+
class Performancer
|
4
|
+
def initialize()
|
5
|
+
@control = {}
|
6
|
+
end
|
7
|
+
|
8
|
+
def load_control(ref_array)
|
9
|
+
ref_array.each do |node1, node2|
|
10
|
+
if node2 != '-'
|
11
|
+
query = @control[node1]
|
12
|
+
if query.nil?
|
13
|
+
@control[node1] = [node2]
|
14
|
+
else
|
15
|
+
query << node2
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
return control
|
20
|
+
end
|
21
|
+
|
22
|
+
# Pandey 2007, Association Analysis-based Transformations for Protein Interaction Networks: A Function Prediction Case Study
|
23
|
+
def get_pred_rec(predictions, cut_number = 100, top_number = 10000)
|
24
|
+
performance = [] #cut, pred, rec
|
25
|
+
preds, limits = load_prediction(predictions)
|
26
|
+
cuts = get_cuts(limits, cut_number)
|
27
|
+
cuts.each do |cut|
|
28
|
+
prec, rec = pred_rec(preds, cut, top_number)
|
29
|
+
performance << [cut, prec, rec]
|
30
|
+
end
|
31
|
+
return performance
|
32
|
+
end
|
33
|
+
|
34
|
+
def load_prediction(pairs_array)
|
35
|
+
pred = {}
|
36
|
+
min = nil
|
37
|
+
max = nil
|
38
|
+
pairs_array.each do |key, label, score|
|
39
|
+
query = pred[key]
|
40
|
+
if !min.nil? && !max.nil?
|
41
|
+
min = score if score < min
|
42
|
+
max = score if score > max
|
43
|
+
else
|
44
|
+
min = score; max = score
|
45
|
+
end
|
46
|
+
if query.nil?
|
47
|
+
pred[key] = [[label], [score]]
|
48
|
+
else
|
49
|
+
query.first << label
|
50
|
+
query.last << score
|
51
|
+
end
|
52
|
+
end
|
53
|
+
return pred, [min, max]
|
54
|
+
end
|
55
|
+
|
56
|
+
def pred_rec(preds, cut, top)
|
57
|
+
predicted_labels = 0 #m
|
58
|
+
true_labels = 0 #n
|
59
|
+
common_labels = 0 # k
|
60
|
+
@control.each do |key, c_labels|
|
61
|
+
true_labels += c_labels.length #n
|
62
|
+
pred_info = preds[key]
|
63
|
+
if !pred_info.nil?
|
64
|
+
labels, scores = pred_info
|
65
|
+
reliable_labels = get_reliable_labels(labels, scores, cut, top)
|
66
|
+
predicted_labels += reliable_labels.length #m
|
67
|
+
common_labels += (c_labels & reliable_labels).length #k
|
68
|
+
end
|
69
|
+
end
|
70
|
+
#puts "cut: #{cut} trueL: #{true_labels} predL: #{predicted_labels} commL: #{common_labels}"
|
71
|
+
prec = common_labels.to_f/predicted_labels
|
72
|
+
rec = common_labels.to_f/true_labels
|
73
|
+
prec = 0.0 if prec.nan?
|
74
|
+
rec = 0.0 if rec.nan?
|
75
|
+
return prec, rec
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
def get_cuts(limits, n_cuts)
|
80
|
+
cuts = []
|
81
|
+
range = (limits.last - limits.first).abs.fdiv(n_cuts)
|
82
|
+
range = BigDecimal(range, 10)
|
83
|
+
cut = limits.first
|
84
|
+
(n_cuts + 1).times do |n|
|
85
|
+
cuts << (cut + n * range).to_f
|
86
|
+
end
|
87
|
+
return cuts
|
88
|
+
end
|
89
|
+
|
90
|
+
def get_reliable_labels(labels, scores, cut, top)
|
91
|
+
reliable_labels = []
|
92
|
+
scores.each_with_index do |score, i|
|
93
|
+
reliable_labels << [labels[i], score] if score >= cut
|
94
|
+
end
|
95
|
+
reliable_labels = reliable_labels.sort!{|l1,l2| l2.last <=> l1.last}[0..top-1].map{|pred| pred.first}
|
96
|
+
return reliable_labels
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,250 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
require "numo/linalg"
|
5
|
+
|
6
|
+
class Ranker
|
7
|
+
attr_accessor :matrix, :nodes, :seeds, :ranking
|
8
|
+
|
9
|
+
def initialize()
|
10
|
+
@matrix = nil
|
11
|
+
@nodes = [] # kernel_nodes
|
12
|
+
@seeds = {} # genes_seed
|
13
|
+
@reference_nodes = {}
|
14
|
+
@ranking = {} # ranked_genes
|
15
|
+
end
|
16
|
+
|
17
|
+
def load_seeds(node_groups, sep: ',')
|
18
|
+
@seeds = load_nodes_by_group(node_groups, sep: sep)
|
19
|
+
end
|
20
|
+
|
21
|
+
def load_references(node_groups, sep: ',')
|
22
|
+
@reference_nodes = load_nodes_by_group(node_groups, sep: sep)
|
23
|
+
end
|
24
|
+
|
25
|
+
def load_nodes_by_group(node_groups, sep: ',')
|
26
|
+
if File.exist?(node_groups)
|
27
|
+
group_nodes = load_node_groups_from_file(node_groups, sep: sep)
|
28
|
+
else
|
29
|
+
group_nodes = {"seed_genes" => node_groups.split(sep)}
|
30
|
+
end
|
31
|
+
return group_nodes
|
32
|
+
end
|
33
|
+
|
34
|
+
def load_node_groups_from_file(file, sep: ',')
|
35
|
+
group_nodes = {}
|
36
|
+
File.open(file).each do |line|
|
37
|
+
set_name, nodes = line.chomp.split("\t")
|
38
|
+
group_nodes[set_name] = nodes.split(sep)
|
39
|
+
end
|
40
|
+
return group_nodes
|
41
|
+
end
|
42
|
+
|
43
|
+
def load_nodes_from_file(file)
|
44
|
+
File.open(file).each do |line|
|
45
|
+
@nodes << line.chomp!
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def do_ranking(leave_one_out: false, threads: 0)
|
50
|
+
get_seed_leave_one_out() if leave_one_out
|
51
|
+
seed_indexes = get_seed_indexes
|
52
|
+
seed_groups = @seeds.to_a # Array conversion needed for parallelization
|
53
|
+
ranked_lists = Parallel.map(seed_groups, in_processes: threads) do |seed_name, seed|
|
54
|
+
# The code in this block CANNOT modify nothing outside
|
55
|
+
if leave_one_out and @reference_nodes[seed_name].length <= 1
|
56
|
+
rank_list = get_individual_rank(seed,@reference_nodes[seed_name][0])
|
57
|
+
else
|
58
|
+
rank_list = rank_by_seed(seed_indexes, seed) # Production mode
|
59
|
+
end
|
60
|
+
[seed_name, rank_list]
|
61
|
+
end
|
62
|
+
ranked_lists.each do |seed_name, rank_list| # Transfer resuls to hash
|
63
|
+
@ranking[seed_name] = rank_list
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def get_seed_leave_one_out()
|
68
|
+
new_seeds = {}
|
69
|
+
genes2predict = {}
|
70
|
+
all_genes = @seeds.values.flatten.uniq
|
71
|
+
@seeds.each do |seed_name, seeds|
|
72
|
+
group_number = seeds.length - 1
|
73
|
+
one_out_seeds = seeds.combination(group_number).to_a
|
74
|
+
|
75
|
+
one_out_seeds.each_with_index do |one_out_seed, indx|
|
76
|
+
seed_name_one_out = seed_name.to_s + "_iteration_" + indx.to_s
|
77
|
+
new_seeds[seed_name_one_out] = one_out_seed
|
78
|
+
genes2predict[seed_name_one_out] = seeds - one_out_seed
|
79
|
+
genes2predict[seed_name_one_out] += @reference_nodes[seed_name] if !@reference_nodes[seed_name].nil?
|
80
|
+
genes2predict[seed_name_one_out].uniq!
|
81
|
+
end
|
82
|
+
end
|
83
|
+
@seeds = new_seeds
|
84
|
+
@reference_nodes = genes2predict
|
85
|
+
end
|
86
|
+
|
87
|
+
def rank_by_seed(seed_indexes, seeds)
|
88
|
+
ordered_gene_score = []
|
89
|
+
genes_pos = seeds.map{|s| seed_indexes[s]}.compact
|
90
|
+
number_of_seed_genes = genes_pos.length
|
91
|
+
number_of_all_nodes = @nodes.length
|
92
|
+
|
93
|
+
if number_of_seed_genes > 0
|
94
|
+
subsets_gen_values = @matrix[genes_pos,true]
|
95
|
+
integrated_gen_values = subsets_gen_values.sum(0)
|
96
|
+
gen_list = 1.fdiv(number_of_seed_genes) * integrated_gen_values.inplace
|
97
|
+
|
98
|
+
ordered_indexes = gen_list.sort_index # from smallest to largest
|
99
|
+
|
100
|
+
last_val = nil
|
101
|
+
n_elements = ordered_indexes.shape.first
|
102
|
+
n_elements.times do |pos|
|
103
|
+
order_index = ordered_indexes[pos]
|
104
|
+
val = gen_list[order_index]
|
105
|
+
node_name = @nodes[order_index]
|
106
|
+
|
107
|
+
rank = get_position_for_items_with_same_score(pos, val, last_val, gen_list, n_elements, ordered_gene_score) # number of items behind
|
108
|
+
rank = n_elements - rank # number of nodes below or equal
|
109
|
+
rank_percentage = rank.fdiv(number_of_all_nodes)
|
110
|
+
|
111
|
+
ordered_gene_score << [node_name, val, rank_percentage, rank]
|
112
|
+
last_val = val
|
113
|
+
end
|
114
|
+
|
115
|
+
ordered_gene_score = ordered_gene_score.reverse # from largest to smallest
|
116
|
+
ordered_gene_score = add_absolute_rank_column(ordered_gene_score)
|
117
|
+
end
|
118
|
+
|
119
|
+
return ordered_gene_score
|
120
|
+
end
|
121
|
+
|
122
|
+
def get_position_for_items_with_same_score(pos, val, prev_val, gen_list, n_elements, ordered_gene_score)
|
123
|
+
members_behind = 0
|
124
|
+
if !prev_val.nil?
|
125
|
+
if prev_val < val
|
126
|
+
members_behind = pos
|
127
|
+
else
|
128
|
+
members_behind = n_elements - ordered_gene_score.last[3]
|
129
|
+
end
|
130
|
+
end
|
131
|
+
return members_behind
|
132
|
+
end
|
133
|
+
|
134
|
+
def add_absolute_rank_column(ranking)
|
135
|
+
ranking_with_new_column = []
|
136
|
+
absolute_rank = 1
|
137
|
+
n_rows = ranking.length
|
138
|
+
n_rows.times do |row_pos|
|
139
|
+
if row_pos == 0
|
140
|
+
new_row = ranking[row_pos] << absolute_rank
|
141
|
+
ranking_with_new_column << new_row
|
142
|
+
else
|
143
|
+
prev_val = ranking[row_pos-1][2]
|
144
|
+
val = ranking[row_pos][2]
|
145
|
+
if val > prev_val
|
146
|
+
absolute_rank +=1
|
147
|
+
end
|
148
|
+
new_row = ranking[row_pos] << absolute_rank
|
149
|
+
ranking_with_new_column << new_row
|
150
|
+
end
|
151
|
+
end
|
152
|
+
return ranking_with_new_column
|
153
|
+
end
|
154
|
+
|
155
|
+
def get_individual_rank(seed_genes, node_of_interest)
|
156
|
+
genes_pos = get_nodes_indexes(seed_genes)
|
157
|
+
node_of_interest_pos = @nodes.find_index(node_of_interest)
|
158
|
+
ordered_gene_score = []
|
159
|
+
|
160
|
+
if !genes_pos.empty? && !node_of_interest_pos.nil?
|
161
|
+
|
162
|
+
subsets_gen_values = @matrix[genes_pos,true]
|
163
|
+
integrated_gen_values = subsets_gen_values.sum(0)
|
164
|
+
integrated_gen_values = 1.fdiv(genes_pos.length) * integrated_gen_values.inplace
|
165
|
+
|
166
|
+
ref_value = integrated_gen_values[node_of_interest_pos]
|
167
|
+
|
168
|
+
members_below_test = 0
|
169
|
+
integrated_gen_values.each do |gen_value|
|
170
|
+
members_below_test += 1 if gen_value >= ref_value
|
171
|
+
end
|
172
|
+
|
173
|
+
rank_percentage = members_below_test.fdiv(@nodes.length)
|
174
|
+
rank = members_below_test
|
175
|
+
rank_absolute = get_individual_absolute_rank(integrated_gen_values.to_a,ref_value)
|
176
|
+
|
177
|
+
ordered_gene_score << [node_of_interest, ref_value, rank_percentage, rank, rank_absolute]
|
178
|
+
end
|
179
|
+
|
180
|
+
return ordered_gene_score
|
181
|
+
|
182
|
+
end
|
183
|
+
|
184
|
+
def get_individual_absolute_rank(values_list,ref_value)
|
185
|
+
ref_pos = nil
|
186
|
+
values_list = values_list.sort.reverse.uniq
|
187
|
+
values_list.each_with_index do |value,pos|
|
188
|
+
if value == ref_value
|
189
|
+
ref_pos = pos+1
|
190
|
+
break
|
191
|
+
end
|
192
|
+
end
|
193
|
+
return ref_pos
|
194
|
+
end
|
195
|
+
|
196
|
+
def get_reference_ranks
|
197
|
+
filtered_ranked_genes = {}
|
198
|
+
|
199
|
+
@ranking.each do |seed_name, ranking|
|
200
|
+
next if @reference_nodes[seed_name].nil? or ranking.empty?
|
201
|
+
|
202
|
+
ranking = array2hash(ranking,0,(1..ranking[0].length))
|
203
|
+
references = @reference_nodes[seed_name]
|
204
|
+
filtered_ranked_genes[seed_name] = []
|
205
|
+
|
206
|
+
references.each do |reference|
|
207
|
+
rank = ranking[reference]
|
208
|
+
if !rank.nil?
|
209
|
+
filtered_ranked_genes[seed_name] << [reference] + rank
|
210
|
+
end
|
211
|
+
end
|
212
|
+
filtered_ranked_genes[seed_name].sort_by!{|rank| -rank[1]}
|
213
|
+
end
|
214
|
+
return filtered_ranked_genes
|
215
|
+
end
|
216
|
+
|
217
|
+
def array2hash(arr, key, values)
|
218
|
+
h = {}
|
219
|
+
arr.each{|els| h[els[0]] = els[values]}
|
220
|
+
return h
|
221
|
+
end
|
222
|
+
|
223
|
+
def get_top(top_n)
|
224
|
+
top_ranked_genes = {}
|
225
|
+
@ranking.each do |seed_name, ranking|
|
226
|
+
top_ranked_genes[seed_name] = ranking[0..top_n-1] if !ranking.nil?
|
227
|
+
end
|
228
|
+
return top_ranked_genes
|
229
|
+
end
|
230
|
+
|
231
|
+
def get_nodes_indexes(nodes)
|
232
|
+
node_indxs = []
|
233
|
+
nodes.each do |node|
|
234
|
+
index_node = @nodes.find_index(node)
|
235
|
+
node_indxs << index_node if !index_node.nil?
|
236
|
+
end
|
237
|
+
return node_indxs
|
238
|
+
end
|
239
|
+
|
240
|
+
def get_seed_indexes
|
241
|
+
indexes = {}
|
242
|
+
@seeds.values.flatten.each do |node|
|
243
|
+
if !indexes.include?(node)
|
244
|
+
indx = @nodes.index(node)
|
245
|
+
indexes[node] = indx if !indx.nil?
|
246
|
+
end
|
247
|
+
end
|
248
|
+
return indexes
|
249
|
+
end
|
250
|
+
end
|
data/lib/NetAnalyzer/version.rb
CHANGED
data/lib/NetAnalyzer.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require "NetAnalyzer/version"
|
2
2
|
require "NetAnalyzer/nodes"
|
3
|
+
require "NetAnalyzer/net_parser"
|
4
|
+
require "NetAnalyzer/net_plotter"
|
5
|
+
require "NetAnalyzer/performancer"
|
6
|
+
require "NetAnalyzer/adv_mat_calc"
|
3
7
|
require "NetAnalyzer/network"
|
8
|
+
require "NetAnalyzer/ranker"
|
4
9
|
|
5
10
|
module NetAnalyzer
|
6
11
|
# Your code goes here...
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: NetAnalyzer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elena Rojano, Pedro Seoane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-09-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -220,16 +220,19 @@ dependencies:
|
|
220
220
|
- - ">="
|
221
221
|
- !ruby/object:Gem::Version
|
222
222
|
version: '0'
|
223
|
-
description:
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
the results
|
223
|
+
description: 'DEPRECATED PROJECT. MIGRATED TO PYTHON: https://github.com/seoanezonjic/NetAnalyzer.
|
224
|
+
NetAnalyzer is a useful network analysis tool developed in Ruby that can 1) analyse
|
225
|
+
any type of unweighted network, regardless of the number of layers, 2) calculate
|
226
|
+
the relationship between different layers, using various association indices (Jaccard,
|
227
|
+
Simpson, PCC, geometric, cosine and hypergeometric) and 3) validate the results'
|
228
228
|
email:
|
229
229
|
- elenarojano@uma.es, seoanezonjic@hotmail.com
|
230
230
|
executables:
|
231
231
|
- NetAnalyzer.rb
|
232
232
|
- console
|
233
|
+
- randomize_clustering.rb
|
234
|
+
- randomize_network.rb
|
235
|
+
- ranker_gene.rb
|
233
236
|
- setup
|
234
237
|
- text2binary_matrix.rb
|
235
238
|
extensions: []
|
@@ -245,11 +248,19 @@ files:
|
|
245
248
|
- Rakefile
|
246
249
|
- bin/NetAnalyzer.rb
|
247
250
|
- bin/console
|
251
|
+
- bin/randomize_clustering.rb
|
252
|
+
- bin/randomize_network.rb
|
253
|
+
- bin/ranker_gene.rb
|
248
254
|
- bin/setup
|
249
255
|
- bin/text2binary_matrix.rb
|
250
256
|
- lib/NetAnalyzer.rb
|
257
|
+
- lib/NetAnalyzer/adv_mat_calc.rb
|
258
|
+
- lib/NetAnalyzer/net_parser.rb
|
259
|
+
- lib/NetAnalyzer/net_plotter.rb
|
251
260
|
- lib/NetAnalyzer/network.rb
|
252
261
|
- lib/NetAnalyzer/nodes.rb
|
262
|
+
- lib/NetAnalyzer/performancer.rb
|
263
|
+
- lib/NetAnalyzer/ranker.rb
|
253
264
|
- lib/NetAnalyzer/templates/ElGrapho.min.js
|
254
265
|
- lib/NetAnalyzer/templates/cytoscape.erb
|
255
266
|
- lib/NetAnalyzer/templates/cytoscape.min.js
|
@@ -278,7 +289,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
278
289
|
- !ruby/object:Gem::Version
|
279
290
|
version: '0'
|
280
291
|
requirements: []
|
281
|
-
rubygems_version: 3.
|
292
|
+
rubygems_version: 3.3.7
|
282
293
|
signing_key:
|
283
294
|
specification_version: 4
|
284
295
|
summary: Network analysis tool that calculate and validate different association indices.
|