NetAnalyzer 0.1.5 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/NetAnalyzer.gemspec +15 -5
- data/README.md +14 -2
- data/Rakefile +13 -9
- data/bin/NetAnalyzer.rb +183 -30
- data/bin/text2binary_matrix.rb +294 -0
- data/lib/NetAnalyzer/network.rb +651 -87
- data/lib/NetAnalyzer/templates/ElGrapho.min.js +28 -0
- data/lib/NetAnalyzer/templates/cytoscape.erb +65 -0
- data/lib/NetAnalyzer/templates/cytoscape.min.js +32 -0
- data/lib/NetAnalyzer/templates/el_grapho.erb +89 -0
- data/lib/NetAnalyzer/templates/pako.min.js +1 -0
- data/lib/NetAnalyzer/templates/sigma.erb +132 -0
- data/lib/NetAnalyzer/version.rb +1 -1
- data/lib/NetAnalyzer.rb +2 -0
- metadata +171 -24
data/lib/NetAnalyzer/network.rb
CHANGED
@@ -1,22 +1,54 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
require '
|
1
|
+
require 'rubystats'
|
2
|
+
require 'gv'
|
3
|
+
#require 'nmatrix'
|
4
|
+
#require 'nmatrix/lapacke'
|
5
|
+
require 'numo/narray'
|
6
|
+
require 'numo/linalg'
|
7
|
+
require 'parallel'
|
8
|
+
|
9
|
+
#require 'pp'
|
4
10
|
require 'bigdecimal'
|
5
11
|
require 'benchmark'
|
12
|
+
#require 'nmatrix_expansion'
|
13
|
+
|
14
|
+
|
15
|
+
#For javascrip plotting
|
16
|
+
require 'erb'
|
17
|
+
require 'base64'
|
18
|
+
require 'json'
|
19
|
+
require 'zlib'
|
20
|
+
|
21
|
+
require 'semtools'
|
22
|
+
require 'expcalc'
|
23
|
+
TEMPLATES = File.join(File.dirname(__FILE__), 'templates')
|
6
24
|
|
7
25
|
class Network
|
8
26
|
|
9
|
-
attr_accessor :association_values, :control_connections
|
27
|
+
attr_accessor :association_values, :control_connections, :kernels, :reference_nodes, :group_nodes, :threads
|
10
28
|
|
11
29
|
## BASIC METHODS
|
12
30
|
############################################################
|
13
31
|
def initialize(layers)
|
14
|
-
@
|
32
|
+
@threads = 0
|
33
|
+
@nodes = {}
|
15
34
|
@edges = {}
|
35
|
+
@reference_nodes = []
|
36
|
+
@group_nodes = {}
|
16
37
|
@adjacency_matrices = {}
|
38
|
+
@kernels = {}
|
17
39
|
@layers = layers
|
18
40
|
@association_values = {}
|
19
41
|
@control_connections = {}
|
42
|
+
@compute_pairs = :conn
|
43
|
+
@compute_autorelations = true
|
44
|
+
@loaded_obos = []
|
45
|
+
@ontologies = []
|
46
|
+
@layer_ontologies = {}
|
47
|
+
end
|
48
|
+
|
49
|
+
def set_compute_pairs(use_pairs, get_autorelations)
|
50
|
+
@compute_pairs = use_pairs
|
51
|
+
@compute_autorelations = get_autorelations
|
20
52
|
end
|
21
53
|
|
22
54
|
def add_node(nodeID, nodeType = 0)
|
@@ -37,8 +69,49 @@ class Network
|
|
37
69
|
end
|
38
70
|
end
|
39
71
|
|
72
|
+
def delete_nodes(node_list, mode='d')
|
73
|
+
if mode == 'd'
|
74
|
+
@nodes.reject!{|n| node_list.include?(n)}
|
75
|
+
@edges.reject!{|n, connections| node_list.include?(n)}
|
76
|
+
@edges.each do |n, connections|
|
77
|
+
connections.reject!{|c| node_list.include?(c)}
|
78
|
+
end
|
79
|
+
elsif mode == 'r'
|
80
|
+
@nodes.select!{|n| node_list.include?(n)}
|
81
|
+
@edges.select!{|n, connections| node_list.include?(n)}
|
82
|
+
@edges.each do |n, connections|
|
83
|
+
connections.select!{|c| node_list.include?(c)}
|
84
|
+
end
|
85
|
+
end
|
86
|
+
@edges.reject!{|n, connections| connections.empty?}
|
87
|
+
end
|
88
|
+
|
89
|
+
def get_connected_nodes(node_id, from_layer)
|
90
|
+
return @edges[node_id].map{|id| @nodes[id]}.select{|node| node.type == from_layer}.map{|node| node.id}
|
91
|
+
end
|
92
|
+
|
93
|
+
def get_nodes_from_layer(from_layer)
|
94
|
+
return @nodes.values.select{|node| node.type == from_layer}.map{|node| node.id}
|
95
|
+
end
|
96
|
+
|
97
|
+
def get_bipartite_subgraph(from_layer_node_ids, from_layer, to_layer)
|
98
|
+
bipartite_subgraph = {}
|
99
|
+
from_layer_node_ids.each do |from_layer_node_id|
|
100
|
+
connected_nodes = @edges[from_layer_node_id]
|
101
|
+
connected_nodes.each do |connected_node|
|
102
|
+
if @nodes[connected_node].type == to_layer
|
103
|
+
query = bipartite_subgraph[connected_node]
|
104
|
+
if query.nil?
|
105
|
+
bipartite_subgraph[connected_node] = get_connected_nodes(connected_node, from_layer)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
return bipartite_subgraph
|
111
|
+
end
|
112
|
+
|
40
113
|
def load_network_by_pairs(file, layers, split_character="\t")
|
41
|
-
File.open(file).each
|
114
|
+
File.open(file).each do |line|
|
42
115
|
line.chomp!
|
43
116
|
pair = line.split(split_character)
|
44
117
|
node1 = pair[0]
|
@@ -49,57 +122,371 @@ class Network
|
|
49
122
|
end
|
50
123
|
end
|
51
124
|
|
125
|
+
def load_network_by_bin_matrix(input_file, node_file, layers)
|
126
|
+
node_names = load_input_list(node_file)
|
127
|
+
@adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='npy'), node_names, node_names]
|
128
|
+
end
|
129
|
+
|
130
|
+
def load_network_by_plain_matrix(input_file, node_file, layers, splitChar)
|
131
|
+
node_names = load_input_list(node_file)
|
132
|
+
@adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='txt', splitChar=splitChar), node_names, node_names]
|
133
|
+
end
|
134
|
+
|
52
135
|
def get_edge_number
|
53
|
-
node_connections =
|
136
|
+
node_connections = get_degree.values.inject(0){|sum, n| sum + n}
|
54
137
|
return node_connections/2
|
55
138
|
end
|
56
139
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
140
|
+
def get_degree(zscore=false)
|
141
|
+
degree = {}
|
142
|
+
@edges.each do |id, nodes|
|
143
|
+
degree[id] = nodes.length
|
144
|
+
end
|
145
|
+
if !zscore
|
146
|
+
degree_values = degree.values
|
147
|
+
mean_degree = degree_values.mean
|
148
|
+
std_degree = degree_values.standard_deviation
|
149
|
+
degree.transform_values!{|v| (v - mean_degree).fdiv(std_degree)}
|
150
|
+
end
|
151
|
+
return degree
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_node_attributes(attr_names)
|
155
|
+
attrs = []
|
156
|
+
attr_names.each do |attr_name|
|
157
|
+
if attr_name == 'get_degree'
|
158
|
+
attrs << get_degree
|
159
|
+
elsif attr_name == 'get_degreeZ'
|
160
|
+
attrs << get_degree(zscore=true)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
node_ids = attrs.first.keys
|
164
|
+
node_attrs = []
|
165
|
+
node_ids.each do |n|
|
166
|
+
node_attrs << [n].concat(attrs.map{|at| at[n]})
|
167
|
+
end
|
168
|
+
return node_attrs
|
169
|
+
end
|
170
|
+
|
171
|
+
def plot_network(options = {})
|
172
|
+
if options[:method] == 'graphviz'
|
173
|
+
plot_dot(options)
|
174
|
+
else
|
175
|
+
if options[:method] == 'elgrapho'
|
176
|
+
template = 'el_grapho'
|
177
|
+
elsif options[:method] == 'cytoscape'
|
178
|
+
template = 'cytoscape'
|
179
|
+
elsif options[:method] == 'sigma'
|
180
|
+
template = 'sigma'
|
181
|
+
end
|
182
|
+
renderered_template = ERB.new(File.open(File.join(TEMPLATES, template + '.erb')).read).result(binding)
|
183
|
+
File.open(options[:output_file] + '.html', 'w'){|f| f.puts renderered_template}
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def plot_dot(user_options = {}) # input keys: layout
|
188
|
+
options = {layout: "sfdp"}
|
189
|
+
options = options.merge(user_options)
|
190
|
+
graphviz_colors = %w[lightsteelblue1 lightyellow1 lightgray orchid2]
|
191
|
+
palette = {}
|
192
|
+
@layers.each do |layer|
|
193
|
+
palette[layer] = graphviz_colors.shift
|
194
|
+
end
|
195
|
+
graph = GV::Graph.open('g', type = :undirected)
|
196
|
+
plotted_edges = {}
|
60
197
|
@edges.each do |nodeID, associatedIDs|
|
61
198
|
associatedIDs.each do |associatedID|
|
62
|
-
|
199
|
+
pair = [nodeID, associatedID].sort.join('_').to_sym
|
200
|
+
if !plotted_edges[pair]
|
201
|
+
graph.edge 'e',
|
202
|
+
graph.node(nodeID, label: '', style: 'filled', fillcolor: palette[@nodes[nodeID].type]),
|
203
|
+
graph.node(associatedID, label: '', style: 'filled' , fillcolor: palette[@nodes[associatedID].type])
|
204
|
+
plotted_edges[pair] = true
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
@reference_nodes.each do |nodeID|
|
209
|
+
graph.node(nodeID, style: 'filled', fillcolor: 'firebrick1', label: '')
|
210
|
+
end
|
211
|
+
graphviz_border_colors = %w[blue darkorange red olivedrab4]
|
212
|
+
@group_nodes.each do |groupID, gNodes|
|
213
|
+
border_color = graphviz_border_colors.shift
|
214
|
+
gNodes.each do |nodeID|
|
215
|
+
graph.node(nodeID, color: border_color, penwidth: '10', label: '')
|
216
|
+
end
|
217
|
+
end
|
218
|
+
graph[:overlap] = false
|
219
|
+
STDERR.puts 'Save graph'
|
220
|
+
graph.save(options[:output_file] + '.png', format='png', layout=options[:layout])
|
221
|
+
end
|
222
|
+
|
223
|
+
def compute_group_metrics(output_filename)
|
224
|
+
metrics = []
|
225
|
+
header = ['group']
|
226
|
+
@group_nodes.keys.each do |k|
|
227
|
+
metrics << [k]
|
228
|
+
end
|
229
|
+
header << 'comparative_degree'
|
230
|
+
comparative_degree = communities_comparative_degree(@group_nodes)
|
231
|
+
comparative_degree.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
|
232
|
+
header << 'avg_sht_path'
|
233
|
+
avg_sht_path = communities_avg_sht_path(@group_nodes)
|
234
|
+
avg_sht_path.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
|
235
|
+
if !@reference_nodes.empty?
|
236
|
+
header.concat(%w[node_com_assoc_by_edge node_com_assoc_by_node])
|
237
|
+
node_com_assoc = compute_node_com_assoc_in_precomputed_communities(@group_nodes, @reference_nodes.first)
|
238
|
+
node_com_assoc.each_with_index{|val,i| metrics[i].concat(val)}
|
239
|
+
end
|
240
|
+
File.open(output_filename, 'w') do |f|
|
241
|
+
f.puts header.join("\t")
|
242
|
+
metrics.each do |gr|
|
243
|
+
f. puts gr.join("\t")
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def replace_nil_vals(val)
|
249
|
+
return val.nil? ? 'NULL' : val
|
250
|
+
end
|
251
|
+
|
252
|
+
def communities_comparative_degree(coms)
|
253
|
+
comparative_degrees = []
|
254
|
+
coms.each do |com_id, com|
|
255
|
+
comparative_degrees << compute_comparative_degree(com)
|
256
|
+
end
|
257
|
+
return comparative_degrees
|
258
|
+
end
|
259
|
+
|
260
|
+
def communities_avg_sht_path(coms)
|
261
|
+
avg_sht_path = []
|
262
|
+
coms.each do |com_id, com|
|
263
|
+
dist, paths = compute_avg_sht_path(com)
|
264
|
+
avg_sht_path << dist
|
265
|
+
end
|
266
|
+
return avg_sht_path
|
267
|
+
end
|
268
|
+
|
269
|
+
def compute_node_com_assoc_in_precomputed_communities(coms, ref_node)
|
270
|
+
node_com_assoc = []
|
271
|
+
coms.each do |com_id, com|
|
272
|
+
node_com_assoc << [compute_node_com_assoc(com, ref_node)]
|
273
|
+
end
|
274
|
+
return node_com_assoc
|
275
|
+
end
|
276
|
+
|
277
|
+
def compute_comparative_degree(com) # see Girvan-Newman Benchmark control parameter in http://networksciencebook.com/chapter/9#testing (communities chapter)
|
278
|
+
internal_degree = 0
|
279
|
+
external_degree = 0
|
280
|
+
com.each do |nodeID|
|
281
|
+
nodeIDneigh = @edges[nodeID]
|
282
|
+
next if nodeIDneigh.nil?
|
283
|
+
internal_degree += (nodeIDneigh & com).length
|
284
|
+
external_degree += (nodeIDneigh - com).length
|
285
|
+
end
|
286
|
+
comparative_degree = external_degree.fdiv(external_degree + internal_degree)
|
287
|
+
return comparative_degree
|
288
|
+
end
|
289
|
+
|
290
|
+
def compute_avg_sht_path(com, paths=false)
|
291
|
+
path_lengths = []
|
292
|
+
all_paths = []
|
293
|
+
group = com.dup
|
294
|
+
while !group.empty?
|
295
|
+
node_start = group.shift
|
296
|
+
sht_paths = Parallel.map(group, in_processes: @threads) do |node_stop|
|
297
|
+
#group.each do |node_stop|
|
298
|
+
dist, path = shortest_path(node_start, node_stop, paths)
|
299
|
+
[dist, path]
|
300
|
+
#path_lengths << dist if !dist.nil?
|
301
|
+
#all_paths << path if !path.empty?
|
302
|
+
end
|
303
|
+
sht_paths.each do |dist, path|
|
304
|
+
path_lengths << dist
|
305
|
+
all_paths << path
|
306
|
+
end
|
307
|
+
end
|
308
|
+
if path_lengths.include?(nil)
|
309
|
+
avg_sht_path = nil
|
310
|
+
else
|
311
|
+
avg_sht_path = path_lengths.inject(0){|sum,l| sum + l}.fdiv(path_lengths.length)
|
312
|
+
end
|
313
|
+
return avg_sht_path, all_paths
|
314
|
+
end
|
315
|
+
|
316
|
+
# https://pythoninwonderland.wordpress.com/2017/03/18/how-to-implement-breadth-first-search-in-python/
|
317
|
+
# finds shortest path between 2 nodes of a graph using BFS
|
318
|
+
def bfs_shortest_path(start, goal, paths=false)
|
319
|
+
dist = nil
|
320
|
+
explored = {} # keep track of explored nodes
|
321
|
+
previous = {}
|
322
|
+
queue = [[start, 0]] # keep track of all the paths to be checked
|
323
|
+
is_goal = false
|
324
|
+
while !queue.empty? && !is_goal # keeps looping until all possible paths have been checked
|
325
|
+
node, dist = queue.pop # pop the first path from the queue
|
326
|
+
if !explored.include?(node) # get the last node from the path
|
327
|
+
neighbours = @edges[node]
|
328
|
+
explored[node] = true # mark node as explored
|
329
|
+
next if neighbours.nil?
|
330
|
+
dist += 1
|
331
|
+
neighbours.each do |neighbour| # go through all neighbour nodes, construct a new path
|
332
|
+
next if explored.include?(neighbour)
|
333
|
+
queue.unshift([neighbour, dist]) # push it into the queue
|
334
|
+
previous[neighbour] = node if paths
|
335
|
+
if neighbour == goal # return path if neighbour is goal
|
336
|
+
is_goal = true
|
337
|
+
break
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end
|
342
|
+
if is_goal
|
343
|
+
path = build_path(previous, start, goal) if paths
|
344
|
+
else
|
345
|
+
dist = nil
|
346
|
+
path = []
|
347
|
+
end
|
348
|
+
return dist, path
|
349
|
+
end
|
350
|
+
|
351
|
+
def build_path(previous, startNode, stopNode)
|
352
|
+
path = []
|
353
|
+
currentNode = stopNode
|
354
|
+
path << currentNode
|
355
|
+
while currentNode != startNode
|
356
|
+
currentNode = previous[currentNode]
|
357
|
+
path << currentNode
|
358
|
+
end
|
359
|
+
return path
|
360
|
+
end
|
361
|
+
|
362
|
+
def shortest_path(node_start, node_stop, paths=false)
|
363
|
+
#https://betterprogramming.pub/5-ways-to-find-the-shortest-path-in-a-graph-88cfefd0030f
|
364
|
+
#return bidirectionalSearch(node_start, node_stop)
|
365
|
+
#https://efficientcodeblog.wordpress.com/2017/12/13/bidirectional-search-two-end-bfs/
|
366
|
+
dist, all_paths = bfs_shortest_path(node_start, node_stop, paths)
|
367
|
+
return dist, all_paths
|
368
|
+
end
|
369
|
+
|
370
|
+
def expand_clusters(expand_method)
|
371
|
+
clusters = {}
|
372
|
+
@group_nodes.each do |id, nodes|
|
373
|
+
if expand_method == 'sht_path'
|
374
|
+
dist, paths = compute_avg_sht_path(nodes, paths=true) # this uses bfs, maybe Dijkstra is the best one
|
375
|
+
new_nodes = paths.flatten.uniq
|
376
|
+
clusters[id] = nodes | new_nodes # If some node pair are not connected, recover them
|
63
377
|
end
|
64
378
|
end
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
379
|
+
return clusters
|
380
|
+
end
|
381
|
+
|
382
|
+
def compute_node_com_assoc(com, ref_node)
|
383
|
+
ref_cons = 0
|
384
|
+
ref_secondary_cons = 0
|
385
|
+
secondary_nodes = {}
|
386
|
+
other_cons = 0
|
387
|
+
other_nodes = {}
|
388
|
+
|
389
|
+
refNneigh = @edges[ref_node]
|
390
|
+
com.each do |nodeID|
|
391
|
+
nodeIDneigh = @edges[nodeID]
|
392
|
+
next if nodeIDneigh.nil?
|
393
|
+
ref_cons += 1 if nodeIDneigh.include?(ref_node)
|
394
|
+
if !refNneigh.nil?
|
395
|
+
common_nodes = nodeIDneigh & refNneigh
|
396
|
+
common_nodes.each {|id| secondary_nodes[id] = true}
|
397
|
+
ref_secondary_cons += common_nodes.length
|
398
|
+
end
|
399
|
+
specific_nodes = nodeIDneigh - refNneigh - [ref_node]
|
400
|
+
specific_nodes.each {|id| other_nodes[id] = true}
|
401
|
+
other_cons += specific_nodes.length
|
402
|
+
end
|
403
|
+
by_edge = (ref_cons + ref_secondary_cons).fdiv(other_cons)
|
404
|
+
by_node = (ref_cons + secondary_nodes.length).fdiv(other_nodes.length)
|
405
|
+
return by_edge, by_node
|
69
406
|
end
|
70
407
|
|
71
408
|
def get_all_intersections
|
72
|
-
intersection_lengths =
|
73
|
-
|
74
|
-
intersection_lengths << intersection(node1, node2).length
|
409
|
+
intersection_lengths = get_all_pairs do |node1, node2|
|
410
|
+
intersection(node1, node2).length
|
75
411
|
end
|
76
412
|
return intersection_lengths
|
77
413
|
end
|
78
414
|
|
79
415
|
def get_all_pairs(args = {})
|
80
|
-
|
416
|
+
all_pairs = []
|
417
|
+
default = {:layers => :all}
|
81
418
|
args = default.merge(args)
|
82
|
-
|
83
|
-
|
419
|
+
nodeIDsA, nodeIDsB = collect_nodes(args)
|
420
|
+
if @compute_autorelations
|
421
|
+
if @compute_pairs == :all
|
422
|
+
while !nodeIDsA.empty?
|
423
|
+
node1 = nodeIDsA.shift
|
424
|
+
pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
|
425
|
+
yield(node1, node2)
|
426
|
+
end
|
427
|
+
all_pairs.concat(pairs)
|
428
|
+
end
|
429
|
+
elsif @compute_pairs == :conn # TODO: Review this case to avoid return nil values
|
430
|
+
while !nodeIDsA.empty?
|
431
|
+
node1 = nodeIDsA.shift
|
432
|
+
ids_connected_to_n1 = @edges[node1]
|
433
|
+
pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
|
434
|
+
result = nil
|
435
|
+
ids_connected_to_n2 = @edges[node2]
|
436
|
+
if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
437
|
+
result = yield(node1, node2)
|
438
|
+
end
|
439
|
+
result
|
440
|
+
end
|
441
|
+
pairs.compact!
|
442
|
+
all_pairs.concat(pairs)
|
443
|
+
end
|
444
|
+
end
|
84
445
|
else
|
85
|
-
|
86
|
-
|
87
|
-
|
446
|
+
#MAIN METHOD
|
447
|
+
if @compute_pairs == :conn
|
448
|
+
all_pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node1|
|
449
|
+
ids_connected_to_n1 = @edges[node1]
|
450
|
+
node1_pairs = []
|
451
|
+
nodeIDsB.each do |node2|
|
452
|
+
ids_connected_to_n2 = @edges[node2]
|
453
|
+
if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
454
|
+
node1_pairs << yield(node1, node2)
|
455
|
+
end
|
456
|
+
end
|
457
|
+
node1_pairs
|
458
|
+
end
|
459
|
+
all_pairs.flatten!(1)
|
460
|
+
elsif @compute_pairs == :all
|
461
|
+
raise 'Not implemented'
|
88
462
|
end
|
89
463
|
end
|
90
464
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
465
|
+
return all_pairs
|
466
|
+
end
|
467
|
+
|
468
|
+
def collect_nodes(args)
|
469
|
+
nodeIDsA = nil
|
470
|
+
nodeIDsB = nil
|
471
|
+
if @compute_autorelations
|
472
|
+
if args[:layers] == :all
|
473
|
+
nodeIDsA = @nodes.keys
|
474
|
+
else
|
475
|
+
nodeIDsA = []
|
476
|
+
args[:layers].each do |layer|
|
477
|
+
nodeIDsA.concat(@nodes.select{|id, node| node.type == layer}.keys)
|
96
478
|
end
|
97
479
|
end
|
98
|
-
|
99
|
-
|
480
|
+
else
|
481
|
+
if args[:layers] != :all
|
482
|
+
nodeIDsA = @nodes.select{|id, node| node.type == args[:layers][0]}.keys
|
483
|
+
nodeIDsB = @nodes.select{|id, node| node.type == args[:layers][1]}.keys
|
484
|
+
end
|
100
485
|
end
|
486
|
+
return nodeIDsA, nodeIDsB
|
101
487
|
end
|
102
488
|
|
489
|
+
|
103
490
|
def get_nodes_layer(layers)
|
104
491
|
#for creating ny value in hypergeometric and pcc index
|
105
492
|
nodes = []
|
@@ -123,17 +510,16 @@ class Network
|
|
123
510
|
def generate_adjacency_matrix(layerA, layerB)
|
124
511
|
layerAidNodes = @nodes.select{|id, node| node.type == layerA}.keys
|
125
512
|
layerBidNodes = @nodes.select{|id, node| node.type == layerB}.keys
|
126
|
-
|
127
|
-
layerAidNodes.
|
128
|
-
layerBidNodes.
|
513
|
+
matrix = Numo::DFloat.zeros(layerAidNodes.length, layerBidNodes.length)
|
514
|
+
layerAidNodes.each_with_index do |nodeA, i|
|
515
|
+
layerBidNodes.each_with_index do |nodeB, j|
|
129
516
|
if @edges[nodeB].include?(nodeA)
|
130
|
-
|
517
|
+
matrix[i, j] = 1
|
131
518
|
else
|
132
|
-
|
519
|
+
matrix[i, j] = 0
|
133
520
|
end
|
134
521
|
end
|
135
522
|
end
|
136
|
-
matrix = NMatrix.new([layerAidNodes.length, layerBidNodes.length], adjacency_matrix)
|
137
523
|
all_info_matrix = [matrix, layerAidNodes, layerBidNodes]
|
138
524
|
@adjacency_matrices[[layerA, layerB]] = all_info_matrix
|
139
525
|
return all_info_matrix
|
@@ -161,6 +547,14 @@ class Network
|
|
161
547
|
relations = get_pcc_associations(layers, base_layer)
|
162
548
|
elsif meth == :hypergeometric #all networks
|
163
549
|
relations = get_hypergeometric_associations(layers, base_layer)
|
550
|
+
elsif meth == :hypergeometric_bf #all networks
|
551
|
+
relations = get_hypergeometric_associations(layers, base_layer, :bonferroni)
|
552
|
+
elsif meth == :hypergeometric_bh #all networks
|
553
|
+
relations = get_hypergeometric_associations(layers, base_layer, :benjamini_hochberg)
|
554
|
+
elsif meth == :hypergeometric_elim #tripartite networks?
|
555
|
+
relations = get_hypergeometric_associations_with_topology(layers, base_layer, :elim)
|
556
|
+
elsif meth == :hypergeometric_weight #tripartite networks?
|
557
|
+
relations = get_hypergeometric_associations_with_topology(layers, base_layer, :weight)
|
164
558
|
elsif meth == :csi #all networks
|
165
559
|
relations = get_csi_associations(layers, base_layer)
|
166
560
|
elsif meth == :transference #tripartite networks
|
@@ -173,20 +567,19 @@ class Network
|
|
173
567
|
#---------------------------------------------------------
|
174
568
|
# Alaimo 2014, doi: 10.3389/fbioe.2014.00071
|
175
569
|
def get_association_by_transference_resources(firstPairLayers, secondPairLayers, lambda_value1 = 0.5, lambda_value2 = 0.5)
|
570
|
+
relations = []
|
176
571
|
matrix1 = @adjacency_matrices[firstPairLayers].first
|
177
572
|
rowIds = @adjacency_matrices[firstPairLayers][1]
|
178
573
|
matrix2 = @adjacency_matrices[secondPairLayers].first
|
179
574
|
colIds = @adjacency_matrices[secondPairLayers][2]
|
180
|
-
m1rowNumber = matrix1.
|
181
|
-
|
182
|
-
m2rowNumber = matrix2.rows
|
183
|
-
m2colNumber = matrix2.cols
|
575
|
+
m1rowNumber, m1colNumber = matrix1.shape
|
576
|
+
m2rowNumber, m2colNumber = matrix2.shape
|
184
577
|
#puts m1rowNumber, m1colNumber, m2rowNumber, m2colNumber
|
185
578
|
matrix1Weight = graphWeights(m1colNumber, m1rowNumber, matrix1.transpose, lambda_value1)
|
186
579
|
matrix2Weight = graphWeights(m2colNumber, m2rowNumber, matrix2.transpose, lambda_value2)
|
187
|
-
matrixWeightProduct =
|
188
|
-
finalMatrix =
|
189
|
-
relations =
|
580
|
+
matrixWeightProduct = Numo::Linalg.dot(matrix1Weight, Numo::Linalg.dot(matrix2, matrix2Weight))
|
581
|
+
finalMatrix = Numo::Linalg.dot(matrix1, matrixWeightProduct)
|
582
|
+
relations = matrix2relations(finalMatrix, rowIds, colIds)
|
190
583
|
@association_values[:transference] = relations
|
191
584
|
return relations
|
192
585
|
end
|
@@ -195,15 +588,14 @@ class Network
|
|
195
588
|
#---------------------------------------------------------
|
196
589
|
# Bass 2013, doi:10.1038/nmeth.2728
|
197
590
|
def get_associations(layers, base_layer) # BASE METHOD
|
198
|
-
|
199
|
-
get_all_pairs(layers: layers) do |node1, node2|
|
591
|
+
associations = get_all_pairs(layers: layers) do |node1, node2|
|
200
592
|
associatedIDs_node1 = @edges[node1].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
|
201
593
|
associatedIDs_node2 = @edges[node2].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
|
202
594
|
intersectedIDs = associatedIDs_node1 & associatedIDs_node2
|
203
595
|
associationValue = yield(associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2)
|
204
|
-
|
596
|
+
[node1, node2, associationValue]
|
205
597
|
end
|
206
|
-
return
|
598
|
+
return associations
|
207
599
|
end
|
208
600
|
|
209
601
|
def get_jaccard_association(layers, base_layer)
|
@@ -246,7 +638,8 @@ class Network
|
|
246
638
|
|
247
639
|
def get_pcc_associations(layers, base_layer)
|
248
640
|
#for Ny calcule use get_nodes_layer
|
249
|
-
|
641
|
+
base_layer_nodes = get_nodes_layer([base_layer])
|
642
|
+
ny = base_layer_nodes.length
|
250
643
|
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
251
644
|
intersProd = intersectedIDs.length * ny
|
252
645
|
nodesProd = associatedIDs_node1.length * associatedIDs_node2.length
|
@@ -259,33 +652,95 @@ class Network
|
|
259
652
|
return relations
|
260
653
|
end
|
261
654
|
|
262
|
-
def get_hypergeometric_associations(layers, base_layer)
|
655
|
+
def get_hypergeometric_associations(layers, base_layer, pvalue_adj_method= nil)
|
263
656
|
ny = get_nodes_layer([base_layer]).length
|
657
|
+
fet = Rubystats::FishersExactTest.new
|
264
658
|
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
265
|
-
|
659
|
+
fisher = 0
|
266
660
|
intersection_lengths = intersectedIDs.length
|
267
|
-
sum = 0
|
268
661
|
if intersection_lengths > 0
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
662
|
+
n1_items = associatedIDs_node1.length
|
663
|
+
n2_items = associatedIDs_node2.length
|
664
|
+
fisher = fet.calculate(
|
665
|
+
intersection_lengths,
|
666
|
+
n1_items - intersection_lengths,
|
667
|
+
n2_items - intersection_lengths,
|
668
|
+
ny - (n1_items + n2_items - intersection_lengths)
|
669
|
+
)
|
670
|
+
fisher = fisher[:right]
|
277
671
|
end
|
278
|
-
|
279
|
-
hypergeometricValue = 0
|
280
|
-
else
|
281
|
-
hypergeometricValue = -Math.log10(sum)
|
282
|
-
end
|
283
|
-
hypergeometricValue
|
672
|
+
fisher
|
284
673
|
end
|
285
|
-
|
674
|
+
if pvalue_adj_method == :bonferroni
|
675
|
+
meth = :hypergeometric_bf
|
676
|
+
compute_adjusted_pvalue_bonferroni(relations)
|
677
|
+
elsif pvalue_adj_method == :benjamini_hochberg
|
678
|
+
meth = :hypergeometric_bh
|
679
|
+
compute_adjusted_pvalue_benjaminiHochberg(relations)
|
680
|
+
else
|
681
|
+
meth = :hypergeometric
|
682
|
+
compute_log_transformation(relations)
|
683
|
+
end
|
684
|
+
@association_values[meth] = relations
|
286
685
|
return relations
|
287
686
|
end
|
288
687
|
|
688
|
+
def get_hypergeometric_associations_with_topology(layers, base_layer, mode, thresold = 0.01)
|
689
|
+
relations = []
|
690
|
+
reference_layer = (layers - @layer_ontologies.keys).first
|
691
|
+
ontology_layer = (layers - [reference_layer]).first
|
692
|
+
ref_nodes = get_nodes_from_layer(reference_layer) # get nodes from NOT ontology layer
|
693
|
+
ontology = @layer_ontologies[ontology_layer]
|
694
|
+
base_layer_length = @nodes.values.count{|n| n.type == base_layer}
|
695
|
+
ref_nodes.each do |ref_node|
|
696
|
+
base_nodes = get_connected_nodes(ref_node, base_layer)
|
697
|
+
ontology_base_subgraph = get_bipartite_subgraph(base_nodes, base_layer, ontology_layer) # get shared nodes between nodes from NOT ontology layer and ONTOLOGY layer. Also get the conections between shared nodes and ontology nodes.
|
698
|
+
next if ontology_base_subgraph.empty?
|
699
|
+
ontology_base_subgraph.transform_keys!{|k| k.to_sym}
|
700
|
+
ontology.load_item_relations_to_terms(ontology_base_subgraph, remove_old_relations = true)
|
701
|
+
term_pvals = ontology.compute_relations_to_items(base_nodes, base_layer_length, mode, thresold)
|
702
|
+
relations.concat(term_pvals.map{|term| [ref_node, term[0], term[1]]})
|
703
|
+
end
|
704
|
+
compute_log_transformation(relations)
|
705
|
+
if mode == :elim
|
706
|
+
meth = :hypergeometric_elim
|
707
|
+
elsif mode == :weight
|
708
|
+
meth = :hypergeometric_weight
|
709
|
+
end
|
710
|
+
@association_values[meth] = relations
|
711
|
+
return relations
|
712
|
+
end
|
713
|
+
|
714
|
+
def compute_adjusted_pvalue(relations, log_val=true)
|
715
|
+
relations.each_with_index do |data, i| #p1, p2, pval
|
716
|
+
pval_adj = yield(data.last, i)
|
717
|
+
pval_adj = -Math.log10(pval_adj) if log_val && pval_adj > 0
|
718
|
+
data[2] = pval_adj
|
719
|
+
end
|
720
|
+
end
|
721
|
+
|
722
|
+
def compute_log_transformation(relations) #Only perform log transform whitout adjust pvalue. Called when adjusted method is not defined
|
723
|
+
compute_adjusted_pvalue(relations) do |pval, index|
|
724
|
+
pval
|
725
|
+
end
|
726
|
+
end
|
727
|
+
|
728
|
+
def compute_adjusted_pvalue_bonferroni(relations)
|
729
|
+
n_comparations = relations.length
|
730
|
+
compute_adjusted_pvalue(relations) do |pval, index|
|
731
|
+
adj = pval * n_comparations
|
732
|
+
adj = 1 if adj > 1
|
733
|
+
adj
|
734
|
+
end
|
735
|
+
end
|
736
|
+
|
737
|
+
def compute_adjusted_pvalue_benjaminiHochberg(relations)
|
738
|
+
adj_pvalues = get_benjaminiHochberg_pvalues(relations.map{|rel| rel.last})
|
739
|
+
compute_adjusted_pvalue(relations) do |pval, index|
|
740
|
+
adj_pvalues[index]
|
741
|
+
end
|
742
|
+
end
|
743
|
+
|
289
744
|
def add_record(hash, node1, node2)
|
290
745
|
query = hash[node1]
|
291
746
|
if query.nil?
|
@@ -411,12 +866,108 @@ class Network
|
|
411
866
|
return prec, rec
|
412
867
|
end
|
413
868
|
|
869
|
+
## KERNEL METHODS
|
870
|
+
#######################################################################################
|
871
|
+
def get_kernel(layer2kernel, kernel, normalization=false)
|
872
|
+
matrix, node_names = @adjacency_matrices[layer2kernel]
|
873
|
+
#I = identity matrix
|
874
|
+
#D = Diagonal matrix
|
875
|
+
#A = adjacency matrix
|
876
|
+
#L = laplacian matrix = D − A
|
877
|
+
matrix_result = nil
|
878
|
+
dimension_elements = matrix.shape.last
|
879
|
+
# In scuba code, the diagonal values of A is set to 0. In weighted matrix the kernel result is the same with or without this operation. Maybe increases the computing performance?
|
880
|
+
# In the md kernel this operation affects the values of the final kernel
|
881
|
+
#dimension_elements.times do |n|
|
882
|
+
# matrix[n,n] = 0.0
|
883
|
+
#end
|
884
|
+
if kernel == 'el' || kernel == 'ct' || kernel == 'rf' ||
|
885
|
+
kernel.include?('vn') || kernel.include?('rl') || kernel == 'me'
|
886
|
+
diagonal_matrix = matrix.sum(1).diag # get the total sum for each row, for this reason the sum method takes the 1 value. If sum colums is desired, use 0
|
887
|
+
# Make a matrix whose diagonal is row_sum
|
888
|
+
matrix_L = diagonal_matrix - matrix
|
889
|
+
if kernel == 'el' #Exponential Laplacian diffusion kernel(active). F Fouss 2012 | doi: 10.1016/j.neunet.2012.03.001
|
890
|
+
beta = 0.02
|
891
|
+
beta_product = matrix_L * -beta
|
892
|
+
#matrix_result = beta_product.expm
|
893
|
+
matrix_result = Numo::Linalg.expm(beta_product, 14)
|
894
|
+
elsif kernel == 'ct' # Commute time kernel (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
895
|
+
matrix_result = Numo::Linalg.pinv(matrix_L) # Anibal saids that this kernel was normalized. Why?. Paper do not seem to describe this operation for ct, it describes for Kvn or for all kernels, it is not clear.
|
896
|
+
elsif kernel == 'rf' # Random forest kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
897
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L) #Krf = (I +L ) ^ −1
|
898
|
+
elsif kernel.include?('vn') # von Neumann diffusion kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
899
|
+
alpha = kernel.gsub('vn', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
900
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) - matrix * alpha ) # (I -alphaA ) ^ −1
|
901
|
+
elsif kernel.include?('rl') # Regularized Laplacian kernel matrix (active)
|
902
|
+
alpha = kernel.gsub('rl', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
903
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L * alpha ) # (I + alphaL ) ^ −1
|
904
|
+
elsif kernel == 'me' # Markov exponential diffusion kernel (active). G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
905
|
+
beta=0.04
|
906
|
+
#(beta/N)*(N*I - D + A)
|
907
|
+
id_mat = Numo::DFloat.eye(dimension_elements)
|
908
|
+
m_matrix = (id_mat * dimension_elements - diagonal_matrix + matrix ) * (beta/dimension_elements)
|
909
|
+
#matrix_result = m_matrix.expm
|
910
|
+
matrix_result = Numo::Linalg.expm(m_matrix, 16)
|
911
|
+
end
|
912
|
+
elsif kernel == 'ka' # Kernelized adjacency matrix (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
913
|
+
lambda_value = matrix.min_eigenvalue
|
914
|
+
matrix_result = matrix + Numo::DFloat.eye(dimension_elements) * lambda_value.abs # Ka = A + lambda*I # lambda = the absolute value of the smallest eigenvalue of A
|
915
|
+
elsif kernel.include?('md') # Markov diffusion kernel matrix. G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
916
|
+
t = kernel.gsub('md', '').to_i
|
917
|
+
#TODO: check implementation with Numo::array
|
918
|
+
col_sum = matrix.sum(1)
|
919
|
+
p_mat = matrix.div_by_vector(col_sum)
|
920
|
+
p_temp_mat = p_mat.clone
|
921
|
+
zt_mat = p_mat.clone
|
922
|
+
(t-1).times do
|
923
|
+
p_temp_mat = p_temp_mat.dot(p_mat)
|
924
|
+
zt_mat = zt_mat + p_temp_mat
|
925
|
+
end
|
926
|
+
zt_mat = zt_mat * (1.0/t)
|
927
|
+
matrix_result = zt_mat.dot(zt_mat.transpose)
|
928
|
+
else
|
929
|
+
matrix_result = matrix
|
930
|
+
warn('Warning: The kernel method was not specified or not exists. The adjacency matrix will be given as result')
|
931
|
+
# This allows process a previous kernel and perform the normalization in a separated step.
|
932
|
+
end
|
933
|
+
matrix_result = matrix_result.cosine_normalization if normalization #TODO: check implementation with Numo::array
|
934
|
+
@kernels[layer2kernel] = matrix_result
|
935
|
+
end
|
936
|
+
|
937
|
+
def write_kernel(layer2kernel, output_file)
|
938
|
+
@kernels[layer2kernel].save(output_file)
|
939
|
+
end
|
940
|
+
|
941
|
+
def link_ontology(ontology_file_path, layer_name)
|
942
|
+
if !@loaded_obos.include?(ontology_file_path) #Load new ontology
|
943
|
+
ontology = Ontology.new(file: ontology_file_path, load_file: true)
|
944
|
+
@loaded_obos << ontology_file_path
|
945
|
+
@ontologies << ontology
|
946
|
+
else #Link loaded ontology to current layer
|
947
|
+
ontology = @ontologies[@loaded_obos.index(ontology_file_path)]
|
948
|
+
end
|
949
|
+
@layer_ontologies[layer_name] = ontology
|
950
|
+
end
|
414
951
|
|
415
952
|
|
416
953
|
## AUXILIAR METHODS
|
417
954
|
#######################################################################################
|
418
955
|
private
|
419
956
|
|
957
|
+
def load_input_list(file)
|
958
|
+
return File.open(file).readlines.map!{|line| line.chomp}
|
959
|
+
end
|
960
|
+
|
961
|
+
def exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
962
|
+
res = false
|
963
|
+
if !ids_connected_to_n1.nil? &&
|
964
|
+
!ids_connected_to_n2.nil? &&
|
965
|
+
!(ids_connected_to_n1 & ids_connected_to_n2).empty? # check that at least exists one node that connect to n1 and n2
|
966
|
+
res = true
|
967
|
+
end
|
968
|
+
return res
|
969
|
+
end
|
970
|
+
|
420
971
|
def set_layer(layer_definitions, node_name)
|
421
972
|
layer = nil
|
422
973
|
if layer_definitions.length > 1
|
@@ -426,9 +977,11 @@ class Network
|
|
426
977
|
break
|
427
978
|
end
|
428
979
|
end
|
980
|
+
raise("The node '#{node_name}' not match with any layer regex") if layer.nil?
|
429
981
|
else
|
430
982
|
layer = layer_definitions.first.first
|
431
983
|
end
|
984
|
+
@layers << layer if !@layers.include?(layer)
|
432
985
|
return layer
|
433
986
|
end
|
434
987
|
|
@@ -453,35 +1006,46 @@ class Network
|
|
453
1006
|
end
|
454
1007
|
|
455
1008
|
def graphWeights (rowsNumber, colsNumber, inputMatrix, lambdaValue = 0.5)
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
kx =
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
1009
|
+
ky = (1.0 / inputMatrix.sum(0)).diag #sum cols
|
1010
|
+
weigth = Numo::Linalg.dot(inputMatrix, ky).transpose
|
1011
|
+
ky = nil #free memory
|
1012
|
+
weigth = Numo::Linalg.dot(inputMatrix, weigth)
|
1013
|
+
|
1014
|
+
kx = inputMatrix.sum(1) #sum rows
|
1015
|
+
|
1016
|
+
kx_lamb = kx ** lambdaValue
|
1017
|
+
kx_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
1018
|
+
rowsNumber.times do |j|
|
1019
|
+
rowsNumber.times do |i|
|
1020
|
+
kx_lamb_mat[j,i] = kx_lamb[i]
|
1021
|
+
end
|
1022
|
+
end
|
1023
|
+
kx_lamb = nil #free memory
|
1024
|
+
|
1025
|
+
kx_inv_lamb = kx ** (1 - lambdaValue)
|
1026
|
+
kx_inv_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
1027
|
+
rowsNumber.times do |j|
|
1028
|
+
rowsNumber.times do |i|
|
1029
|
+
kx_inv_lamb_mat[i, j] = kx_inv_lamb[i]
|
1030
|
+
end
|
1031
|
+
end
|
1032
|
+
kx_inv_lamb = nil #free memory
|
1033
|
+
|
1034
|
+
nx = 1.0/(kx_lamb_mat.inplace * kx_inv_lamb_mat).inplace # inplace marks a matrix to be used by reference, not for value
|
1035
|
+
kx_lamb_mat = nil #free memory
|
1036
|
+
kx_inv_lamb_mat = nil #free memory
|
1037
|
+
weigth.inplace * nx
|
466
1038
|
return weigth
|
467
1039
|
end
|
468
1040
|
|
469
|
-
def
|
1041
|
+
def matrix2relations(finalMatrix, rowIds, colIds)
|
470
1042
|
relations = []
|
471
1043
|
rowIds.each_with_index do |rowId, rowPos|
|
472
1044
|
colIds.each_with_index do |colId, colPos|
|
473
1045
|
associationValue = finalMatrix[rowPos, colPos]
|
474
|
-
relations << [rowId, colId, associationValue]
|
1046
|
+
relations << [rowId, colId, associationValue] if associationValue > 0
|
475
1047
|
end
|
476
1048
|
end
|
477
1049
|
return relations
|
478
1050
|
end
|
479
|
-
|
480
|
-
def binom(n,k)
|
481
|
-
if k > 0 && k < n
|
482
|
-
res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
|
483
|
-
else
|
484
|
-
res = 1
|
485
|
-
end
|
486
|
-
end
|
487
1051
|
end
|