NetAnalyzer 0.1.5 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/NetAnalyzer.gemspec +15 -5
- data/README.md +14 -2
- data/Rakefile +13 -9
- data/bin/NetAnalyzer.rb +183 -30
- data/bin/text2binary_matrix.rb +294 -0
- data/lib/NetAnalyzer/network.rb +651 -87
- data/lib/NetAnalyzer/templates/ElGrapho.min.js +28 -0
- data/lib/NetAnalyzer/templates/cytoscape.erb +65 -0
- data/lib/NetAnalyzer/templates/cytoscape.min.js +32 -0
- data/lib/NetAnalyzer/templates/el_grapho.erb +89 -0
- data/lib/NetAnalyzer/templates/pako.min.js +1 -0
- data/lib/NetAnalyzer/templates/sigma.erb +132 -0
- data/lib/NetAnalyzer/version.rb +1 -1
- data/lib/NetAnalyzer.rb +2 -0
- metadata +171 -24
data/lib/NetAnalyzer/network.rb
CHANGED
@@ -1,22 +1,54 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
require '
|
1
|
+
require 'rubystats'
|
2
|
+
require 'gv'
|
3
|
+
#require 'nmatrix'
|
4
|
+
#require 'nmatrix/lapacke'
|
5
|
+
require 'numo/narray'
|
6
|
+
require 'numo/linalg'
|
7
|
+
require 'parallel'
|
8
|
+
|
9
|
+
#require 'pp'
|
4
10
|
require 'bigdecimal'
|
5
11
|
require 'benchmark'
|
12
|
+
#require 'nmatrix_expansion'
|
13
|
+
|
14
|
+
|
15
|
+
#For javascrip plotting
|
16
|
+
require 'erb'
|
17
|
+
require 'base64'
|
18
|
+
require 'json'
|
19
|
+
require 'zlib'
|
20
|
+
|
21
|
+
require 'semtools'
|
22
|
+
require 'expcalc'
|
23
|
+
TEMPLATES = File.join(File.dirname(__FILE__), 'templates')
|
6
24
|
|
7
25
|
class Network
|
8
26
|
|
9
|
-
attr_accessor :association_values, :control_connections
|
27
|
+
attr_accessor :association_values, :control_connections, :kernels, :reference_nodes, :group_nodes, :threads
|
10
28
|
|
11
29
|
## BASIC METHODS
|
12
30
|
############################################################
|
13
31
|
def initialize(layers)
|
14
|
-
@
|
32
|
+
@threads = 0
|
33
|
+
@nodes = {}
|
15
34
|
@edges = {}
|
35
|
+
@reference_nodes = []
|
36
|
+
@group_nodes = {}
|
16
37
|
@adjacency_matrices = {}
|
38
|
+
@kernels = {}
|
17
39
|
@layers = layers
|
18
40
|
@association_values = {}
|
19
41
|
@control_connections = {}
|
42
|
+
@compute_pairs = :conn
|
43
|
+
@compute_autorelations = true
|
44
|
+
@loaded_obos = []
|
45
|
+
@ontologies = []
|
46
|
+
@layer_ontologies = {}
|
47
|
+
end
|
48
|
+
|
49
|
+
def set_compute_pairs(use_pairs, get_autorelations)
|
50
|
+
@compute_pairs = use_pairs
|
51
|
+
@compute_autorelations = get_autorelations
|
20
52
|
end
|
21
53
|
|
22
54
|
def add_node(nodeID, nodeType = 0)
|
@@ -37,8 +69,49 @@ class Network
|
|
37
69
|
end
|
38
70
|
end
|
39
71
|
|
72
|
+
def delete_nodes(node_list, mode='d')
|
73
|
+
if mode == 'd'
|
74
|
+
@nodes.reject!{|n| node_list.include?(n)}
|
75
|
+
@edges.reject!{|n, connections| node_list.include?(n)}
|
76
|
+
@edges.each do |n, connections|
|
77
|
+
connections.reject!{|c| node_list.include?(c)}
|
78
|
+
end
|
79
|
+
elsif mode == 'r'
|
80
|
+
@nodes.select!{|n| node_list.include?(n)}
|
81
|
+
@edges.select!{|n, connections| node_list.include?(n)}
|
82
|
+
@edges.each do |n, connections|
|
83
|
+
connections.select!{|c| node_list.include?(c)}
|
84
|
+
end
|
85
|
+
end
|
86
|
+
@edges.reject!{|n, connections| connections.empty?}
|
87
|
+
end
|
88
|
+
|
89
|
+
def get_connected_nodes(node_id, from_layer)
|
90
|
+
return @edges[node_id].map{|id| @nodes[id]}.select{|node| node.type == from_layer}.map{|node| node.id}
|
91
|
+
end
|
92
|
+
|
93
|
+
def get_nodes_from_layer(from_layer)
|
94
|
+
return @nodes.values.select{|node| node.type == from_layer}.map{|node| node.id}
|
95
|
+
end
|
96
|
+
|
97
|
+
def get_bipartite_subgraph(from_layer_node_ids, from_layer, to_layer)
|
98
|
+
bipartite_subgraph = {}
|
99
|
+
from_layer_node_ids.each do |from_layer_node_id|
|
100
|
+
connected_nodes = @edges[from_layer_node_id]
|
101
|
+
connected_nodes.each do |connected_node|
|
102
|
+
if @nodes[connected_node].type == to_layer
|
103
|
+
query = bipartite_subgraph[connected_node]
|
104
|
+
if query.nil?
|
105
|
+
bipartite_subgraph[connected_node] = get_connected_nodes(connected_node, from_layer)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
return bipartite_subgraph
|
111
|
+
end
|
112
|
+
|
40
113
|
def load_network_by_pairs(file, layers, split_character="\t")
|
41
|
-
File.open(file).each
|
114
|
+
File.open(file).each do |line|
|
42
115
|
line.chomp!
|
43
116
|
pair = line.split(split_character)
|
44
117
|
node1 = pair[0]
|
@@ -49,57 +122,371 @@ class Network
|
|
49
122
|
end
|
50
123
|
end
|
51
124
|
|
125
|
+
def load_network_by_bin_matrix(input_file, node_file, layers)
|
126
|
+
node_names = load_input_list(node_file)
|
127
|
+
@adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='npy'), node_names, node_names]
|
128
|
+
end
|
129
|
+
|
130
|
+
def load_network_by_plain_matrix(input_file, node_file, layers, splitChar)
|
131
|
+
node_names = load_input_list(node_file)
|
132
|
+
@adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='txt', splitChar=splitChar), node_names, node_names]
|
133
|
+
end
|
134
|
+
|
52
135
|
def get_edge_number
|
53
|
-
node_connections =
|
136
|
+
node_connections = get_degree.values.inject(0){|sum, n| sum + n}
|
54
137
|
return node_connections/2
|
55
138
|
end
|
56
139
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
140
|
+
def get_degree(zscore=false)
|
141
|
+
degree = {}
|
142
|
+
@edges.each do |id, nodes|
|
143
|
+
degree[id] = nodes.length
|
144
|
+
end
|
145
|
+
if !zscore
|
146
|
+
degree_values = degree.values
|
147
|
+
mean_degree = degree_values.mean
|
148
|
+
std_degree = degree_values.standard_deviation
|
149
|
+
degree.transform_values!{|v| (v - mean_degree).fdiv(std_degree)}
|
150
|
+
end
|
151
|
+
return degree
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_node_attributes(attr_names)
|
155
|
+
attrs = []
|
156
|
+
attr_names.each do |attr_name|
|
157
|
+
if attr_name == 'get_degree'
|
158
|
+
attrs << get_degree
|
159
|
+
elsif attr_name == 'get_degreeZ'
|
160
|
+
attrs << get_degree(zscore=true)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
node_ids = attrs.first.keys
|
164
|
+
node_attrs = []
|
165
|
+
node_ids.each do |n|
|
166
|
+
node_attrs << [n].concat(attrs.map{|at| at[n]})
|
167
|
+
end
|
168
|
+
return node_attrs
|
169
|
+
end
|
170
|
+
|
171
|
+
def plot_network(options = {})
|
172
|
+
if options[:method] == 'graphviz'
|
173
|
+
plot_dot(options)
|
174
|
+
else
|
175
|
+
if options[:method] == 'elgrapho'
|
176
|
+
template = 'el_grapho'
|
177
|
+
elsif options[:method] == 'cytoscape'
|
178
|
+
template = 'cytoscape'
|
179
|
+
elsif options[:method] == 'sigma'
|
180
|
+
template = 'sigma'
|
181
|
+
end
|
182
|
+
renderered_template = ERB.new(File.open(File.join(TEMPLATES, template + '.erb')).read).result(binding)
|
183
|
+
File.open(options[:output_file] + '.html', 'w'){|f| f.puts renderered_template}
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def plot_dot(user_options = {}) # input keys: layout
|
188
|
+
options = {layout: "sfdp"}
|
189
|
+
options = options.merge(user_options)
|
190
|
+
graphviz_colors = %w[lightsteelblue1 lightyellow1 lightgray orchid2]
|
191
|
+
palette = {}
|
192
|
+
@layers.each do |layer|
|
193
|
+
palette[layer] = graphviz_colors.shift
|
194
|
+
end
|
195
|
+
graph = GV::Graph.open('g', type = :undirected)
|
196
|
+
plotted_edges = {}
|
60
197
|
@edges.each do |nodeID, associatedIDs|
|
61
198
|
associatedIDs.each do |associatedID|
|
62
|
-
|
199
|
+
pair = [nodeID, associatedID].sort.join('_').to_sym
|
200
|
+
if !plotted_edges[pair]
|
201
|
+
graph.edge 'e',
|
202
|
+
graph.node(nodeID, label: '', style: 'filled', fillcolor: palette[@nodes[nodeID].type]),
|
203
|
+
graph.node(associatedID, label: '', style: 'filled' , fillcolor: palette[@nodes[associatedID].type])
|
204
|
+
plotted_edges[pair] = true
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
@reference_nodes.each do |nodeID|
|
209
|
+
graph.node(nodeID, style: 'filled', fillcolor: 'firebrick1', label: '')
|
210
|
+
end
|
211
|
+
graphviz_border_colors = %w[blue darkorange red olivedrab4]
|
212
|
+
@group_nodes.each do |groupID, gNodes|
|
213
|
+
border_color = graphviz_border_colors.shift
|
214
|
+
gNodes.each do |nodeID|
|
215
|
+
graph.node(nodeID, color: border_color, penwidth: '10', label: '')
|
216
|
+
end
|
217
|
+
end
|
218
|
+
graph[:overlap] = false
|
219
|
+
STDERR.puts 'Save graph'
|
220
|
+
graph.save(options[:output_file] + '.png', format='png', layout=options[:layout])
|
221
|
+
end
|
222
|
+
|
223
|
+
def compute_group_metrics(output_filename)
|
224
|
+
metrics = []
|
225
|
+
header = ['group']
|
226
|
+
@group_nodes.keys.each do |k|
|
227
|
+
metrics << [k]
|
228
|
+
end
|
229
|
+
header << 'comparative_degree'
|
230
|
+
comparative_degree = communities_comparative_degree(@group_nodes)
|
231
|
+
comparative_degree.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
|
232
|
+
header << 'avg_sht_path'
|
233
|
+
avg_sht_path = communities_avg_sht_path(@group_nodes)
|
234
|
+
avg_sht_path.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
|
235
|
+
if !@reference_nodes.empty?
|
236
|
+
header.concat(%w[node_com_assoc_by_edge node_com_assoc_by_node])
|
237
|
+
node_com_assoc = compute_node_com_assoc_in_precomputed_communities(@group_nodes, @reference_nodes.first)
|
238
|
+
node_com_assoc.each_with_index{|val,i| metrics[i].concat(val)}
|
239
|
+
end
|
240
|
+
File.open(output_filename, 'w') do |f|
|
241
|
+
f.puts header.join("\t")
|
242
|
+
metrics.each do |gr|
|
243
|
+
f. puts gr.join("\t")
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def replace_nil_vals(val)
|
249
|
+
return val.nil? ? 'NULL' : val
|
250
|
+
end
|
251
|
+
|
252
|
+
def communities_comparative_degree(coms)
|
253
|
+
comparative_degrees = []
|
254
|
+
coms.each do |com_id, com|
|
255
|
+
comparative_degrees << compute_comparative_degree(com)
|
256
|
+
end
|
257
|
+
return comparative_degrees
|
258
|
+
end
|
259
|
+
|
260
|
+
def communities_avg_sht_path(coms)
|
261
|
+
avg_sht_path = []
|
262
|
+
coms.each do |com_id, com|
|
263
|
+
dist, paths = compute_avg_sht_path(com)
|
264
|
+
avg_sht_path << dist
|
265
|
+
end
|
266
|
+
return avg_sht_path
|
267
|
+
end
|
268
|
+
|
269
|
+
def compute_node_com_assoc_in_precomputed_communities(coms, ref_node)
|
270
|
+
node_com_assoc = []
|
271
|
+
coms.each do |com_id, com|
|
272
|
+
node_com_assoc << [compute_node_com_assoc(com, ref_node)]
|
273
|
+
end
|
274
|
+
return node_com_assoc
|
275
|
+
end
|
276
|
+
|
277
|
+
def compute_comparative_degree(com) # see Girvan-Newman Benchmark control parameter in http://networksciencebook.com/chapter/9#testing (communities chapter)
|
278
|
+
internal_degree = 0
|
279
|
+
external_degree = 0
|
280
|
+
com.each do |nodeID|
|
281
|
+
nodeIDneigh = @edges[nodeID]
|
282
|
+
next if nodeIDneigh.nil?
|
283
|
+
internal_degree += (nodeIDneigh & com).length
|
284
|
+
external_degree += (nodeIDneigh - com).length
|
285
|
+
end
|
286
|
+
comparative_degree = external_degree.fdiv(external_degree + internal_degree)
|
287
|
+
return comparative_degree
|
288
|
+
end
|
289
|
+
|
290
|
+
def compute_avg_sht_path(com, paths=false)
|
291
|
+
path_lengths = []
|
292
|
+
all_paths = []
|
293
|
+
group = com.dup
|
294
|
+
while !group.empty?
|
295
|
+
node_start = group.shift
|
296
|
+
sht_paths = Parallel.map(group, in_processes: @threads) do |node_stop|
|
297
|
+
#group.each do |node_stop|
|
298
|
+
dist, path = shortest_path(node_start, node_stop, paths)
|
299
|
+
[dist, path]
|
300
|
+
#path_lengths << dist if !dist.nil?
|
301
|
+
#all_paths << path if !path.empty?
|
302
|
+
end
|
303
|
+
sht_paths.each do |dist, path|
|
304
|
+
path_lengths << dist
|
305
|
+
all_paths << path
|
306
|
+
end
|
307
|
+
end
|
308
|
+
if path_lengths.include?(nil)
|
309
|
+
avg_sht_path = nil
|
310
|
+
else
|
311
|
+
avg_sht_path = path_lengths.inject(0){|sum,l| sum + l}.fdiv(path_lengths.length)
|
312
|
+
end
|
313
|
+
return avg_sht_path, all_paths
|
314
|
+
end
|
315
|
+
|
316
|
+
# https://pythoninwonderland.wordpress.com/2017/03/18/how-to-implement-breadth-first-search-in-python/
|
317
|
+
# finds shortest path between 2 nodes of a graph using BFS
|
318
|
+
def bfs_shortest_path(start, goal, paths=false)
|
319
|
+
dist = nil
|
320
|
+
explored = {} # keep track of explored nodes
|
321
|
+
previous = {}
|
322
|
+
queue = [[start, 0]] # keep track of all the paths to be checked
|
323
|
+
is_goal = false
|
324
|
+
while !queue.empty? && !is_goal # keeps looping until all possible paths have been checked
|
325
|
+
node, dist = queue.pop # pop the first path from the queue
|
326
|
+
if !explored.include?(node) # get the last node from the path
|
327
|
+
neighbours = @edges[node]
|
328
|
+
explored[node] = true # mark node as explored
|
329
|
+
next if neighbours.nil?
|
330
|
+
dist += 1
|
331
|
+
neighbours.each do |neighbour| # go through all neighbour nodes, construct a new path
|
332
|
+
next if explored.include?(neighbour)
|
333
|
+
queue.unshift([neighbour, dist]) # push it into the queue
|
334
|
+
previous[neighbour] = node if paths
|
335
|
+
if neighbour == goal # return path if neighbour is goal
|
336
|
+
is_goal = true
|
337
|
+
break
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end
|
342
|
+
if is_goal
|
343
|
+
path = build_path(previous, start, goal) if paths
|
344
|
+
else
|
345
|
+
dist = nil
|
346
|
+
path = []
|
347
|
+
end
|
348
|
+
return dist, path
|
349
|
+
end
|
350
|
+
|
351
|
+
def build_path(previous, startNode, stopNode)
|
352
|
+
path = []
|
353
|
+
currentNode = stopNode
|
354
|
+
path << currentNode
|
355
|
+
while currentNode != startNode
|
356
|
+
currentNode = previous[currentNode]
|
357
|
+
path << currentNode
|
358
|
+
end
|
359
|
+
return path
|
360
|
+
end
|
361
|
+
|
362
|
+
def shortest_path(node_start, node_stop, paths=false)
|
363
|
+
#https://betterprogramming.pub/5-ways-to-find-the-shortest-path-in-a-graph-88cfefd0030f
|
364
|
+
#return bidirectionalSearch(node_start, node_stop)
|
365
|
+
#https://efficientcodeblog.wordpress.com/2017/12/13/bidirectional-search-two-end-bfs/
|
366
|
+
dist, all_paths = bfs_shortest_path(node_start, node_stop, paths)
|
367
|
+
return dist, all_paths
|
368
|
+
end
|
369
|
+
|
370
|
+
def expand_clusters(expand_method)
|
371
|
+
clusters = {}
|
372
|
+
@group_nodes.each do |id, nodes|
|
373
|
+
if expand_method == 'sht_path'
|
374
|
+
dist, paths = compute_avg_sht_path(nodes, paths=true) # this uses bfs, maybe Dijkstra is the best one
|
375
|
+
new_nodes = paths.flatten.uniq
|
376
|
+
clusters[id] = nodes | new_nodes # If some node pair are not connected, recover them
|
63
377
|
end
|
64
378
|
end
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
379
|
+
return clusters
|
380
|
+
end
|
381
|
+
|
382
|
+
def compute_node_com_assoc(com, ref_node)
|
383
|
+
ref_cons = 0
|
384
|
+
ref_secondary_cons = 0
|
385
|
+
secondary_nodes = {}
|
386
|
+
other_cons = 0
|
387
|
+
other_nodes = {}
|
388
|
+
|
389
|
+
refNneigh = @edges[ref_node]
|
390
|
+
com.each do |nodeID|
|
391
|
+
nodeIDneigh = @edges[nodeID]
|
392
|
+
next if nodeIDneigh.nil?
|
393
|
+
ref_cons += 1 if nodeIDneigh.include?(ref_node)
|
394
|
+
if !refNneigh.nil?
|
395
|
+
common_nodes = nodeIDneigh & refNneigh
|
396
|
+
common_nodes.each {|id| secondary_nodes[id] = true}
|
397
|
+
ref_secondary_cons += common_nodes.length
|
398
|
+
end
|
399
|
+
specific_nodes = nodeIDneigh - refNneigh - [ref_node]
|
400
|
+
specific_nodes.each {|id| other_nodes[id] = true}
|
401
|
+
other_cons += specific_nodes.length
|
402
|
+
end
|
403
|
+
by_edge = (ref_cons + ref_secondary_cons).fdiv(other_cons)
|
404
|
+
by_node = (ref_cons + secondary_nodes.length).fdiv(other_nodes.length)
|
405
|
+
return by_edge, by_node
|
69
406
|
end
|
70
407
|
|
71
408
|
def get_all_intersections
|
72
|
-
intersection_lengths =
|
73
|
-
|
74
|
-
intersection_lengths << intersection(node1, node2).length
|
409
|
+
intersection_lengths = get_all_pairs do |node1, node2|
|
410
|
+
intersection(node1, node2).length
|
75
411
|
end
|
76
412
|
return intersection_lengths
|
77
413
|
end
|
78
414
|
|
79
415
|
def get_all_pairs(args = {})
|
80
|
-
|
416
|
+
all_pairs = []
|
417
|
+
default = {:layers => :all}
|
81
418
|
args = default.merge(args)
|
82
|
-
|
83
|
-
|
419
|
+
nodeIDsA, nodeIDsB = collect_nodes(args)
|
420
|
+
if @compute_autorelations
|
421
|
+
if @compute_pairs == :all
|
422
|
+
while !nodeIDsA.empty?
|
423
|
+
node1 = nodeIDsA.shift
|
424
|
+
pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
|
425
|
+
yield(node1, node2)
|
426
|
+
end
|
427
|
+
all_pairs.concat(pairs)
|
428
|
+
end
|
429
|
+
elsif @compute_pairs == :conn # TODO: Review this case to avoid return nil values
|
430
|
+
while !nodeIDsA.empty?
|
431
|
+
node1 = nodeIDsA.shift
|
432
|
+
ids_connected_to_n1 = @edges[node1]
|
433
|
+
pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
|
434
|
+
result = nil
|
435
|
+
ids_connected_to_n2 = @edges[node2]
|
436
|
+
if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
437
|
+
result = yield(node1, node2)
|
438
|
+
end
|
439
|
+
result
|
440
|
+
end
|
441
|
+
pairs.compact!
|
442
|
+
all_pairs.concat(pairs)
|
443
|
+
end
|
444
|
+
end
|
84
445
|
else
|
85
|
-
|
86
|
-
|
87
|
-
|
446
|
+
#MAIN METHOD
|
447
|
+
if @compute_pairs == :conn
|
448
|
+
all_pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node1|
|
449
|
+
ids_connected_to_n1 = @edges[node1]
|
450
|
+
node1_pairs = []
|
451
|
+
nodeIDsB.each do |node2|
|
452
|
+
ids_connected_to_n2 = @edges[node2]
|
453
|
+
if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
454
|
+
node1_pairs << yield(node1, node2)
|
455
|
+
end
|
456
|
+
end
|
457
|
+
node1_pairs
|
458
|
+
end
|
459
|
+
all_pairs.flatten!(1)
|
460
|
+
elsif @compute_pairs == :all
|
461
|
+
raise 'Not implemented'
|
88
462
|
end
|
89
463
|
end
|
90
464
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
465
|
+
return all_pairs
|
466
|
+
end
|
467
|
+
|
468
|
+
def collect_nodes(args)
|
469
|
+
nodeIDsA = nil
|
470
|
+
nodeIDsB = nil
|
471
|
+
if @compute_autorelations
|
472
|
+
if args[:layers] == :all
|
473
|
+
nodeIDsA = @nodes.keys
|
474
|
+
else
|
475
|
+
nodeIDsA = []
|
476
|
+
args[:layers].each do |layer|
|
477
|
+
nodeIDsA.concat(@nodes.select{|id, node| node.type == layer}.keys)
|
96
478
|
end
|
97
479
|
end
|
98
|
-
|
99
|
-
|
480
|
+
else
|
481
|
+
if args[:layers] != :all
|
482
|
+
nodeIDsA = @nodes.select{|id, node| node.type == args[:layers][0]}.keys
|
483
|
+
nodeIDsB = @nodes.select{|id, node| node.type == args[:layers][1]}.keys
|
484
|
+
end
|
100
485
|
end
|
486
|
+
return nodeIDsA, nodeIDsB
|
101
487
|
end
|
102
488
|
|
489
|
+
|
103
490
|
def get_nodes_layer(layers)
|
104
491
|
#for creating ny value in hypergeometric and pcc index
|
105
492
|
nodes = []
|
@@ -123,17 +510,16 @@ class Network
|
|
123
510
|
def generate_adjacency_matrix(layerA, layerB)
|
124
511
|
layerAidNodes = @nodes.select{|id, node| node.type == layerA}.keys
|
125
512
|
layerBidNodes = @nodes.select{|id, node| node.type == layerB}.keys
|
126
|
-
|
127
|
-
layerAidNodes.
|
128
|
-
layerBidNodes.
|
513
|
+
matrix = Numo::DFloat.zeros(layerAidNodes.length, layerBidNodes.length)
|
514
|
+
layerAidNodes.each_with_index do |nodeA, i|
|
515
|
+
layerBidNodes.each_with_index do |nodeB, j|
|
129
516
|
if @edges[nodeB].include?(nodeA)
|
130
|
-
|
517
|
+
matrix[i, j] = 1
|
131
518
|
else
|
132
|
-
|
519
|
+
matrix[i, j] = 0
|
133
520
|
end
|
134
521
|
end
|
135
522
|
end
|
136
|
-
matrix = NMatrix.new([layerAidNodes.length, layerBidNodes.length], adjacency_matrix)
|
137
523
|
all_info_matrix = [matrix, layerAidNodes, layerBidNodes]
|
138
524
|
@adjacency_matrices[[layerA, layerB]] = all_info_matrix
|
139
525
|
return all_info_matrix
|
@@ -161,6 +547,14 @@ class Network
|
|
161
547
|
relations = get_pcc_associations(layers, base_layer)
|
162
548
|
elsif meth == :hypergeometric #all networks
|
163
549
|
relations = get_hypergeometric_associations(layers, base_layer)
|
550
|
+
elsif meth == :hypergeometric_bf #all networks
|
551
|
+
relations = get_hypergeometric_associations(layers, base_layer, :bonferroni)
|
552
|
+
elsif meth == :hypergeometric_bh #all networks
|
553
|
+
relations = get_hypergeometric_associations(layers, base_layer, :benjamini_hochberg)
|
554
|
+
elsif meth == :hypergeometric_elim #tripartite networks?
|
555
|
+
relations = get_hypergeometric_associations_with_topology(layers, base_layer, :elim)
|
556
|
+
elsif meth == :hypergeometric_weight #tripartite networks?
|
557
|
+
relations = get_hypergeometric_associations_with_topology(layers, base_layer, :weight)
|
164
558
|
elsif meth == :csi #all networks
|
165
559
|
relations = get_csi_associations(layers, base_layer)
|
166
560
|
elsif meth == :transference #tripartite networks
|
@@ -173,20 +567,19 @@ class Network
|
|
173
567
|
#---------------------------------------------------------
|
174
568
|
# Alaimo 2014, doi: 10.3389/fbioe.2014.00071
|
175
569
|
def get_association_by_transference_resources(firstPairLayers, secondPairLayers, lambda_value1 = 0.5, lambda_value2 = 0.5)
|
570
|
+
relations = []
|
176
571
|
matrix1 = @adjacency_matrices[firstPairLayers].first
|
177
572
|
rowIds = @adjacency_matrices[firstPairLayers][1]
|
178
573
|
matrix2 = @adjacency_matrices[secondPairLayers].first
|
179
574
|
colIds = @adjacency_matrices[secondPairLayers][2]
|
180
|
-
m1rowNumber = matrix1.
|
181
|
-
|
182
|
-
m2rowNumber = matrix2.rows
|
183
|
-
m2colNumber = matrix2.cols
|
575
|
+
m1rowNumber, m1colNumber = matrix1.shape
|
576
|
+
m2rowNumber, m2colNumber = matrix2.shape
|
184
577
|
#puts m1rowNumber, m1colNumber, m2rowNumber, m2colNumber
|
185
578
|
matrix1Weight = graphWeights(m1colNumber, m1rowNumber, matrix1.transpose, lambda_value1)
|
186
579
|
matrix2Weight = graphWeights(m2colNumber, m2rowNumber, matrix2.transpose, lambda_value2)
|
187
|
-
matrixWeightProduct =
|
188
|
-
finalMatrix =
|
189
|
-
relations =
|
580
|
+
matrixWeightProduct = Numo::Linalg.dot(matrix1Weight, Numo::Linalg.dot(matrix2, matrix2Weight))
|
581
|
+
finalMatrix = Numo::Linalg.dot(matrix1, matrixWeightProduct)
|
582
|
+
relations = matrix2relations(finalMatrix, rowIds, colIds)
|
190
583
|
@association_values[:transference] = relations
|
191
584
|
return relations
|
192
585
|
end
|
@@ -195,15 +588,14 @@ class Network
|
|
195
588
|
#---------------------------------------------------------
|
196
589
|
# Bass 2013, doi:10.1038/nmeth.2728
|
197
590
|
def get_associations(layers, base_layer) # BASE METHOD
|
198
|
-
|
199
|
-
get_all_pairs(layers: layers) do |node1, node2|
|
591
|
+
associations = get_all_pairs(layers: layers) do |node1, node2|
|
200
592
|
associatedIDs_node1 = @edges[node1].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
|
201
593
|
associatedIDs_node2 = @edges[node2].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
|
202
594
|
intersectedIDs = associatedIDs_node1 & associatedIDs_node2
|
203
595
|
associationValue = yield(associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2)
|
204
|
-
|
596
|
+
[node1, node2, associationValue]
|
205
597
|
end
|
206
|
-
return
|
598
|
+
return associations
|
207
599
|
end
|
208
600
|
|
209
601
|
def get_jaccard_association(layers, base_layer)
|
@@ -246,7 +638,8 @@ class Network
|
|
246
638
|
|
247
639
|
def get_pcc_associations(layers, base_layer)
|
248
640
|
#for Ny calcule use get_nodes_layer
|
249
|
-
|
641
|
+
base_layer_nodes = get_nodes_layer([base_layer])
|
642
|
+
ny = base_layer_nodes.length
|
250
643
|
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
251
644
|
intersProd = intersectedIDs.length * ny
|
252
645
|
nodesProd = associatedIDs_node1.length * associatedIDs_node2.length
|
@@ -259,33 +652,95 @@ class Network
|
|
259
652
|
return relations
|
260
653
|
end
|
261
654
|
|
262
|
-
def get_hypergeometric_associations(layers, base_layer)
|
655
|
+
def get_hypergeometric_associations(layers, base_layer, pvalue_adj_method= nil)
|
263
656
|
ny = get_nodes_layer([base_layer]).length
|
657
|
+
fet = Rubystats::FishersExactTest.new
|
264
658
|
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
265
|
-
|
659
|
+
fisher = 0
|
266
660
|
intersection_lengths = intersectedIDs.length
|
267
|
-
sum = 0
|
268
661
|
if intersection_lengths > 0
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
662
|
+
n1_items = associatedIDs_node1.length
|
663
|
+
n2_items = associatedIDs_node2.length
|
664
|
+
fisher = fet.calculate(
|
665
|
+
intersection_lengths,
|
666
|
+
n1_items - intersection_lengths,
|
667
|
+
n2_items - intersection_lengths,
|
668
|
+
ny - (n1_items + n2_items - intersection_lengths)
|
669
|
+
)
|
670
|
+
fisher = fisher[:right]
|
277
671
|
end
|
278
|
-
|
279
|
-
hypergeometricValue = 0
|
280
|
-
else
|
281
|
-
hypergeometricValue = -Math.log10(sum)
|
282
|
-
end
|
283
|
-
hypergeometricValue
|
672
|
+
fisher
|
284
673
|
end
|
285
|
-
|
674
|
+
if pvalue_adj_method == :bonferroni
|
675
|
+
meth = :hypergeometric_bf
|
676
|
+
compute_adjusted_pvalue_bonferroni(relations)
|
677
|
+
elsif pvalue_adj_method == :benjamini_hochberg
|
678
|
+
meth = :hypergeometric_bh
|
679
|
+
compute_adjusted_pvalue_benjaminiHochberg(relations)
|
680
|
+
else
|
681
|
+
meth = :hypergeometric
|
682
|
+
compute_log_transformation(relations)
|
683
|
+
end
|
684
|
+
@association_values[meth] = relations
|
286
685
|
return relations
|
287
686
|
end
|
288
687
|
|
688
|
+
def get_hypergeometric_associations_with_topology(layers, base_layer, mode, thresold = 0.01)
|
689
|
+
relations = []
|
690
|
+
reference_layer = (layers - @layer_ontologies.keys).first
|
691
|
+
ontology_layer = (layers - [reference_layer]).first
|
692
|
+
ref_nodes = get_nodes_from_layer(reference_layer) # get nodes from NOT ontology layer
|
693
|
+
ontology = @layer_ontologies[ontology_layer]
|
694
|
+
base_layer_length = @nodes.values.count{|n| n.type == base_layer}
|
695
|
+
ref_nodes.each do |ref_node|
|
696
|
+
base_nodes = get_connected_nodes(ref_node, base_layer)
|
697
|
+
ontology_base_subgraph = get_bipartite_subgraph(base_nodes, base_layer, ontology_layer) # get shared nodes between nodes from NOT ontology layer and ONTOLOGY layer. Also get the conections between shared nodes and ontology nodes.
|
698
|
+
next if ontology_base_subgraph.empty?
|
699
|
+
ontology_base_subgraph.transform_keys!{|k| k.to_sym}
|
700
|
+
ontology.load_item_relations_to_terms(ontology_base_subgraph, remove_old_relations = true)
|
701
|
+
term_pvals = ontology.compute_relations_to_items(base_nodes, base_layer_length, mode, thresold)
|
702
|
+
relations.concat(term_pvals.map{|term| [ref_node, term[0], term[1]]})
|
703
|
+
end
|
704
|
+
compute_log_transformation(relations)
|
705
|
+
if mode == :elim
|
706
|
+
meth = :hypergeometric_elim
|
707
|
+
elsif mode == :weight
|
708
|
+
meth = :hypergeometric_weight
|
709
|
+
end
|
710
|
+
@association_values[meth] = relations
|
711
|
+
return relations
|
712
|
+
end
|
713
|
+
|
714
|
+
def compute_adjusted_pvalue(relations, log_val=true)
|
715
|
+
relations.each_with_index do |data, i| #p1, p2, pval
|
716
|
+
pval_adj = yield(data.last, i)
|
717
|
+
pval_adj = -Math.log10(pval_adj) if log_val && pval_adj > 0
|
718
|
+
data[2] = pval_adj
|
719
|
+
end
|
720
|
+
end
|
721
|
+
|
722
|
+
def compute_log_transformation(relations) #Only perform log transform whitout adjust pvalue. Called when adjusted method is not defined
|
723
|
+
compute_adjusted_pvalue(relations) do |pval, index|
|
724
|
+
pval
|
725
|
+
end
|
726
|
+
end
|
727
|
+
|
728
|
+
def compute_adjusted_pvalue_bonferroni(relations)
|
729
|
+
n_comparations = relations.length
|
730
|
+
compute_adjusted_pvalue(relations) do |pval, index|
|
731
|
+
adj = pval * n_comparations
|
732
|
+
adj = 1 if adj > 1
|
733
|
+
adj
|
734
|
+
end
|
735
|
+
end
|
736
|
+
|
737
|
+
def compute_adjusted_pvalue_benjaminiHochberg(relations)
|
738
|
+
adj_pvalues = get_benjaminiHochberg_pvalues(relations.map{|rel| rel.last})
|
739
|
+
compute_adjusted_pvalue(relations) do |pval, index|
|
740
|
+
adj_pvalues[index]
|
741
|
+
end
|
742
|
+
end
|
743
|
+
|
289
744
|
def add_record(hash, node1, node2)
|
290
745
|
query = hash[node1]
|
291
746
|
if query.nil?
|
@@ -411,12 +866,108 @@ class Network
|
|
411
866
|
return prec, rec
|
412
867
|
end
|
413
868
|
|
869
|
+
## KERNEL METHODS
|
870
|
+
#######################################################################################
|
871
|
+
def get_kernel(layer2kernel, kernel, normalization=false)
|
872
|
+
matrix, node_names = @adjacency_matrices[layer2kernel]
|
873
|
+
#I = identity matrix
|
874
|
+
#D = Diagonal matrix
|
875
|
+
#A = adjacency matrix
|
876
|
+
#L = laplacian matrix = D − A
|
877
|
+
matrix_result = nil
|
878
|
+
dimension_elements = matrix.shape.last
|
879
|
+
# In scuba code, the diagonal values of A is set to 0. In weighted matrix the kernel result is the same with or without this operation. Maybe increases the computing performance?
|
880
|
+
# In the md kernel this operation affects the values of the final kernel
|
881
|
+
#dimension_elements.times do |n|
|
882
|
+
# matrix[n,n] = 0.0
|
883
|
+
#end
|
884
|
+
if kernel == 'el' || kernel == 'ct' || kernel == 'rf' ||
|
885
|
+
kernel.include?('vn') || kernel.include?('rl') || kernel == 'me'
|
886
|
+
diagonal_matrix = matrix.sum(1).diag # get the total sum for each row, for this reason the sum method takes the 1 value. If sum colums is desired, use 0
|
887
|
+
# Make a matrix whose diagonal is row_sum
|
888
|
+
matrix_L = diagonal_matrix - matrix
|
889
|
+
if kernel == 'el' #Exponential Laplacian diffusion kernel(active). F Fouss 2012 | doi: 10.1016/j.neunet.2012.03.001
|
890
|
+
beta = 0.02
|
891
|
+
beta_product = matrix_L * -beta
|
892
|
+
#matrix_result = beta_product.expm
|
893
|
+
matrix_result = Numo::Linalg.expm(beta_product, 14)
|
894
|
+
elsif kernel == 'ct' # Commute time kernel (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
895
|
+
matrix_result = Numo::Linalg.pinv(matrix_L) # Anibal saids that this kernel was normalized. Why?. Paper do not seem to describe this operation for ct, it describes for Kvn or for all kernels, it is not clear.
|
896
|
+
elsif kernel == 'rf' # Random forest kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
897
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L) #Krf = (I +L ) ^ −1
|
898
|
+
elsif kernel.include?('vn') # von Neumann diffusion kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
899
|
+
alpha = kernel.gsub('vn', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
900
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) - matrix * alpha ) # (I -alphaA ) ^ −1
|
901
|
+
elsif kernel.include?('rl') # Regularized Laplacian kernel matrix (active)
|
902
|
+
alpha = kernel.gsub('rl', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
903
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L * alpha ) # (I + alphaL ) ^ −1
|
904
|
+
elsif kernel == 'me' # Markov exponential diffusion kernel (active). G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
905
|
+
beta=0.04
|
906
|
+
#(beta/N)*(N*I - D + A)
|
907
|
+
id_mat = Numo::DFloat.eye(dimension_elements)
|
908
|
+
m_matrix = (id_mat * dimension_elements - diagonal_matrix + matrix ) * (beta/dimension_elements)
|
909
|
+
#matrix_result = m_matrix.expm
|
910
|
+
matrix_result = Numo::Linalg.expm(m_matrix, 16)
|
911
|
+
end
|
912
|
+
elsif kernel == 'ka' # Kernelized adjacency matrix (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
913
|
+
lambda_value = matrix.min_eigenvalue
|
914
|
+
matrix_result = matrix + Numo::DFloat.eye(dimension_elements) * lambda_value.abs # Ka = A + lambda*I # lambda = the absolute value of the smallest eigenvalue of A
|
915
|
+
elsif kernel.include?('md') # Markov diffusion kernel matrix. G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
916
|
+
t = kernel.gsub('md', '').to_i
|
917
|
+
#TODO: check implementation with Numo::array
|
918
|
+
col_sum = matrix.sum(1)
|
919
|
+
p_mat = matrix.div_by_vector(col_sum)
|
920
|
+
p_temp_mat = p_mat.clone
|
921
|
+
zt_mat = p_mat.clone
|
922
|
+
(t-1).times do
|
923
|
+
p_temp_mat = p_temp_mat.dot(p_mat)
|
924
|
+
zt_mat = zt_mat + p_temp_mat
|
925
|
+
end
|
926
|
+
zt_mat = zt_mat * (1.0/t)
|
927
|
+
matrix_result = zt_mat.dot(zt_mat.transpose)
|
928
|
+
else
|
929
|
+
matrix_result = matrix
|
930
|
+
warn('Warning: The kernel method was not specified or not exists. The adjacency matrix will be given as result')
|
931
|
+
# This allows process a previous kernel and perform the normalization in a separated step.
|
932
|
+
end
|
933
|
+
matrix_result = matrix_result.cosine_normalization if normalization #TODO: check implementation with Numo::array
|
934
|
+
@kernels[layer2kernel] = matrix_result
|
935
|
+
end
|
936
|
+
|
937
|
+
def write_kernel(layer2kernel, output_file)
|
938
|
+
@kernels[layer2kernel].save(output_file)
|
939
|
+
end
|
940
|
+
|
941
|
+
def link_ontology(ontology_file_path, layer_name)
|
942
|
+
if !@loaded_obos.include?(ontology_file_path) #Load new ontology
|
943
|
+
ontology = Ontology.new(file: ontology_file_path, load_file: true)
|
944
|
+
@loaded_obos << ontology_file_path
|
945
|
+
@ontologies << ontology
|
946
|
+
else #Link loaded ontology to current layer
|
947
|
+
ontology = @ontologies[@loaded_obos.index(ontology_file_path)]
|
948
|
+
end
|
949
|
+
@layer_ontologies[layer_name] = ontology
|
950
|
+
end
|
414
951
|
|
415
952
|
|
416
953
|
## AUXILIAR METHODS
|
417
954
|
#######################################################################################
|
418
955
|
private
|
419
956
|
|
957
|
+
def load_input_list(file)
|
958
|
+
return File.open(file).readlines.map!{|line| line.chomp}
|
959
|
+
end
|
960
|
+
|
961
|
+
def exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
962
|
+
res = false
|
963
|
+
if !ids_connected_to_n1.nil? &&
|
964
|
+
!ids_connected_to_n2.nil? &&
|
965
|
+
!(ids_connected_to_n1 & ids_connected_to_n2).empty? # check that at least exists one node that connect to n1 and n2
|
966
|
+
res = true
|
967
|
+
end
|
968
|
+
return res
|
969
|
+
end
|
970
|
+
|
420
971
|
def set_layer(layer_definitions, node_name)
|
421
972
|
layer = nil
|
422
973
|
if layer_definitions.length > 1
|
@@ -426,9 +977,11 @@ class Network
|
|
426
977
|
break
|
427
978
|
end
|
428
979
|
end
|
980
|
+
raise("The node '#{node_name}' not match with any layer regex") if layer.nil?
|
429
981
|
else
|
430
982
|
layer = layer_definitions.first.first
|
431
983
|
end
|
984
|
+
@layers << layer if !@layers.include?(layer)
|
432
985
|
return layer
|
433
986
|
end
|
434
987
|
|
@@ -453,35 +1006,46 @@ class Network
|
|
453
1006
|
end
|
454
1007
|
|
455
1008
|
def graphWeights (rowsNumber, colsNumber, inputMatrix, lambdaValue = 0.5)
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
kx =
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
1009
|
+
ky = (1.0 / inputMatrix.sum(0)).diag #sum cols
|
1010
|
+
weigth = Numo::Linalg.dot(inputMatrix, ky).transpose
|
1011
|
+
ky = nil #free memory
|
1012
|
+
weigth = Numo::Linalg.dot(inputMatrix, weigth)
|
1013
|
+
|
1014
|
+
kx = inputMatrix.sum(1) #sum rows
|
1015
|
+
|
1016
|
+
kx_lamb = kx ** lambdaValue
|
1017
|
+
kx_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
1018
|
+
rowsNumber.times do |j|
|
1019
|
+
rowsNumber.times do |i|
|
1020
|
+
kx_lamb_mat[j,i] = kx_lamb[i]
|
1021
|
+
end
|
1022
|
+
end
|
1023
|
+
kx_lamb = nil #free memory
|
1024
|
+
|
1025
|
+
kx_inv_lamb = kx ** (1 - lambdaValue)
|
1026
|
+
kx_inv_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
1027
|
+
rowsNumber.times do |j|
|
1028
|
+
rowsNumber.times do |i|
|
1029
|
+
kx_inv_lamb_mat[i, j] = kx_inv_lamb[i]
|
1030
|
+
end
|
1031
|
+
end
|
1032
|
+
kx_inv_lamb = nil #free memory
|
1033
|
+
|
1034
|
+
nx = 1.0/(kx_lamb_mat.inplace * kx_inv_lamb_mat).inplace # inplace marks a matrix to be used by reference, not for value
|
1035
|
+
kx_lamb_mat = nil #free memory
|
1036
|
+
kx_inv_lamb_mat = nil #free memory
|
1037
|
+
weigth.inplace * nx
|
466
1038
|
return weigth
|
467
1039
|
end
|
468
1040
|
|
469
|
-
def
|
1041
|
+
def matrix2relations(finalMatrix, rowIds, colIds)
|
470
1042
|
relations = []
|
471
1043
|
rowIds.each_with_index do |rowId, rowPos|
|
472
1044
|
colIds.each_with_index do |colId, colPos|
|
473
1045
|
associationValue = finalMatrix[rowPos, colPos]
|
474
|
-
relations << [rowId, colId, associationValue]
|
1046
|
+
relations << [rowId, colId, associationValue] if associationValue > 0
|
475
1047
|
end
|
476
1048
|
end
|
477
1049
|
return relations
|
478
1050
|
end
|
479
|
-
|
480
|
-
def binom(n,k)
|
481
|
-
if k > 0 && k < n
|
482
|
-
res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
|
483
|
-
else
|
484
|
-
res = 1
|
485
|
-
end
|
486
|
-
end
|
487
1051
|
end
|