NetAnalyzer 0.1.2 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,56 @@
1
- require 'nodes'
2
- require 'nmatrix'
3
- require 'pp'
1
+ require 'rubystats'
2
+ require 'gv'
3
+ #require 'nmatrix'
4
+ #require 'nmatrix/lapacke'
5
+ require 'numo/narray'
6
+ require 'numo/linalg'
7
+ require 'parallel'
8
+
9
+ #require 'pp'
4
10
  require 'bigdecimal'
11
+ require 'benchmark'
12
+ #require 'nmatrix_expansion'
13
+
14
+
15
+ #For javascrip plotting
16
+ require 'erb'
17
+ require 'base64'
18
+ require 'json'
19
+ require 'zlib'
20
+
21
+ require 'semtools'
22
+ require 'expcalc'
23
+ TEMPLATES = File.join(File.dirname(__FILE__), 'templates')
5
24
 
6
25
  class Network
7
- attr_reader :association_values
26
+
27
+ attr_accessor :association_values, :control_connections, :kernels, :reference_nodes, :group_nodes, :threads
8
28
 
9
29
  ## BASIC METHODS
10
30
  ############################################################
11
31
  def initialize(layers)
12
- @nodes = {}
32
+ @threads = 0
33
+ @nodes = {}
13
34
  @edges = {}
35
+ @reference_nodes = []
36
+ @group_nodes = {}
14
37
  @adjacency_matrices = {}
38
+ @kernels = {}
15
39
  @layers = layers
16
40
  @association_values = {}
17
41
  @control_connections = {}
42
+ @compute_pairs = :conn
43
+ @compute_autorelations = true
44
+ @loaded_obos = []
45
+ @ontologies = []
46
+ @layer_ontologies = {}
18
47
  end
19
-
48
+
49
+ def set_compute_pairs(use_pairs, get_autorelations)
50
+ @compute_pairs = use_pairs
51
+ @compute_autorelations = get_autorelations
52
+ end
53
+
20
54
  def add_node(nodeID, nodeType = 0)
21
55
  @nodes[nodeID] = Node.new(nodeID, nodeType)
22
56
  end
@@ -35,52 +69,424 @@ class Network
35
69
  end
36
70
  end
37
71
 
38
- def plot(output_filename, layout="dot")
39
- roboWrite = File.open(output_filename, 'w')
40
- roboWrite.puts "digraph g {"
72
+ def delete_nodes(node_list, mode='d')
73
+ if mode == 'd'
74
+ @nodes.reject!{|n| node_list.include?(n)}
75
+ @edges.reject!{|n, connections| node_list.include?(n)}
76
+ @edges.each do |n, connections|
77
+ connections.reject!{|c| node_list.include?(c)}
78
+ end
79
+ elsif mode == 'r'
80
+ @nodes.select!{|n| node_list.include?(n)}
81
+ @edges.select!{|n, connections| node_list.include?(n)}
82
+ @edges.each do |n, connections|
83
+ connections.select!{|c| node_list.include?(c)}
84
+ end
85
+ end
86
+ @edges.reject!{|n, connections| connections.empty?}
87
+ end
88
+
89
+ def get_connected_nodes(node_id, from_layer)
90
+ return @edges[node_id].map{|id| @nodes[id]}.select{|node| node.type == from_layer}.map{|node| node.id}
91
+ end
92
+
93
+ def get_nodes_from_layer(from_layer)
94
+ return @nodes.values.select{|node| node.type == from_layer}.map{|node| node.id}
95
+ end
96
+
97
+ def get_bipartite_subgraph(from_layer_node_ids, from_layer, to_layer)
98
+ bipartite_subgraph = {}
99
+ from_layer_node_ids.each do |from_layer_node_id|
100
+ connected_nodes = @edges[from_layer_node_id]
101
+ connected_nodes.each do |connected_node|
102
+ if @nodes[connected_node].type == to_layer
103
+ query = bipartite_subgraph[connected_node]
104
+ if query.nil?
105
+ bipartite_subgraph[connected_node] = get_connected_nodes(connected_node, from_layer)
106
+ end
107
+ end
108
+ end
109
+ end
110
+ return bipartite_subgraph
111
+ end
112
+
113
+ def load_network_by_pairs(file, layers, split_character="\t")
114
+ File.open(file).each do |line|
115
+ line.chomp!
116
+ pair = line.split(split_character)
117
+ node1 = pair[0]
118
+ node2 = pair[1]
119
+ add_node(node1, set_layer(layers, node1))
120
+ add_node(node2, set_layer(layers, node2))
121
+ add_edge(node1, node2)
122
+ end
123
+ end
124
+
125
+ def load_network_by_bin_matrix(input_file, node_file, layers)
126
+ node_names = load_input_list(node_file)
127
+ @adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='npy'), node_names, node_names]
128
+ end
129
+
130
+ def load_network_by_plain_matrix(input_file, node_file, layers, splitChar)
131
+ node_names = load_input_list(node_file)
132
+ @adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='txt', splitChar=splitChar), node_names, node_names]
133
+ end
134
+
135
+ def get_edge_number
136
+ node_connections = get_degree.values.inject(0){|sum, n| sum + n}
137
+ return node_connections/2
138
+ end
139
+
140
+ def get_degree(zscore=false)
141
+ degree = {}
142
+ @edges.each do |id, nodes|
143
+ degree[id] = nodes.length
144
+ end
145
+ if !zscore
146
+ degree_values = degree.values
147
+ mean_degree = degree_values.mean
148
+ std_degree = degree_values.standard_deviation
149
+ degree.transform_values!{|v| (v - mean_degree).fdiv(std_degree)}
150
+ end
151
+ return degree
152
+ end
153
+
154
+ def get_node_attributes(attr_names)
155
+ attrs = []
156
+ attr_names.each do |attr_name|
157
+ if attr_name == 'get_degree'
158
+ attrs << get_degree
159
+ elsif attr_name == 'get_degreeZ'
160
+ attrs << get_degree(zscore=true)
161
+ end
162
+ end
163
+ node_ids = attrs.first.keys
164
+ node_attrs = []
165
+ node_ids.each do |n|
166
+ node_attrs << [n].concat(attrs.map{|at| at[n]})
167
+ end
168
+ return node_attrs
169
+ end
170
+
171
+ def plot_network(options = {})
172
+ if options[:method] == 'graphviz'
173
+ plot_dot(options)
174
+ else
175
+ if options[:method] == 'elgrapho'
176
+ template = 'el_grapho'
177
+ elsif options[:method] == 'cytoscape'
178
+ template = 'cytoscape'
179
+ elsif options[:method] == 'sigma'
180
+ template = 'sigma'
181
+ end
182
+ renderered_template = ERB.new(File.open(File.join(TEMPLATES, template + '.erb')).read).result(binding)
183
+ File.open(options[:output_file] + '.html', 'w'){|f| f.puts renderered_template}
184
+ end
185
+ end
186
+
187
+ def plot_dot(user_options = {}) # input keys: layout
188
+ options = {layout: "sfdp"}
189
+ options = options.merge(user_options)
190
+ graphviz_colors = %w[lightsteelblue1 lightyellow1 lightgray orchid2]
191
+ palette = {}
192
+ @layers.each do |layer|
193
+ palette[layer] = graphviz_colors.shift
194
+ end
195
+ graph = GV::Graph.open('g', type = :undirected)
196
+ plotted_edges = {}
41
197
  @edges.each do |nodeID, associatedIDs|
42
198
  associatedIDs.each do |associatedID|
43
- roboWrite.puts "\"#{nodeID}\"->\"#{associatedID}\";"
199
+ pair = [nodeID, associatedID].sort.join('_').to_sym
200
+ if !plotted_edges[pair]
201
+ graph.edge 'e',
202
+ graph.node(nodeID, label: '', style: 'filled', fillcolor: palette[@nodes[nodeID].type]),
203
+ graph.node(associatedID, label: '', style: 'filled' , fillcolor: palette[@nodes[associatedID].type])
204
+ plotted_edges[pair] = true
205
+ end
206
+ end
207
+ end
208
+ @reference_nodes.each do |nodeID|
209
+ graph.node(nodeID, style: 'filled', fillcolor: 'firebrick1', label: '')
210
+ end
211
+ graphviz_border_colors = %w[blue darkorange red olivedrab4]
212
+ @group_nodes.each do |groupID, gNodes|
213
+ border_color = graphviz_border_colors.shift
214
+ gNodes.each do |nodeID|
215
+ graph.node(nodeID, color: border_color, penwidth: '10', label: '')
216
+ end
217
+ end
218
+ graph[:overlap] = false
219
+ STDERR.puts 'Save graph'
220
+ graph.save(options[:output_file] + '.png', format='png', layout=options[:layout])
221
+ end
222
+
223
+ def compute_group_metrics(output_filename)
224
+ metrics = []
225
+ header = ['group']
226
+ @group_nodes.keys.each do |k|
227
+ metrics << [k]
228
+ end
229
+ header << 'comparative_degree'
230
+ comparative_degree = communities_comparative_degree(@group_nodes)
231
+ comparative_degree.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
232
+ header << 'avg_sht_path'
233
+ avg_sht_path = communities_avg_sht_path(@group_nodes)
234
+ avg_sht_path.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
235
+ if !@reference_nodes.empty?
236
+ header.concat(%w[node_com_assoc_by_edge node_com_assoc_by_node])
237
+ node_com_assoc = compute_node_com_assoc_in_precomputed_communities(@group_nodes, @reference_nodes.first)
238
+ node_com_assoc.each_with_index{|val,i| metrics[i].concat(val)}
239
+ end
240
+ File.open(output_filename, 'w') do |f|
241
+ f.puts header.join("\t")
242
+ metrics.each do |gr|
243
+ f. puts gr.join("\t")
44
244
  end
45
245
  end
46
- roboWrite.puts "}"
47
- roboWrite.close
48
- cmd = "#{layout} -Tpng #{output_filename} -o #{output_filename}.png"
49
- system(cmd)
246
+ end
247
+
248
+ def replace_nil_vals(val)
249
+ return val.nil? ? 'NULL' : val
250
+ end
251
+
252
+ def communities_comparative_degree(coms)
253
+ comparative_degrees = []
254
+ coms.each do |com_id, com|
255
+ comparative_degrees << compute_comparative_degree(com)
256
+ end
257
+ return comparative_degrees
258
+ end
259
+
260
+ def communities_avg_sht_path(coms)
261
+ avg_sht_path = []
262
+ coms.each do |com_id, com|
263
+ dist, paths = compute_avg_sht_path(com)
264
+ avg_sht_path << dist
265
+ end
266
+ return avg_sht_path
267
+ end
268
+
269
+ def compute_node_com_assoc_in_precomputed_communities(coms, ref_node)
270
+ node_com_assoc = []
271
+ coms.each do |com_id, com|
272
+ node_com_assoc << [compute_node_com_assoc(com, ref_node)]
273
+ end
274
+ return node_com_assoc
275
+ end
276
+
277
+ def compute_comparative_degree(com) # see Girvan-Newman Benchmark control parameter in http://networksciencebook.com/chapter/9#testing (communities chapter)
278
+ internal_degree = 0
279
+ external_degree = 0
280
+ com.each do |nodeID|
281
+ nodeIDneigh = @edges[nodeID]
282
+ next if nodeIDneigh.nil?
283
+ internal_degree += (nodeIDneigh & com).length
284
+ external_degree += (nodeIDneigh - com).length
285
+ end
286
+ comparative_degree = external_degree.fdiv(external_degree + internal_degree)
287
+ return comparative_degree
288
+ end
289
+
290
+ def compute_avg_sht_path(com, paths=false)
291
+ path_lengths = []
292
+ all_paths = []
293
+ group = com.dup
294
+ while !group.empty?
295
+ node_start = group.shift
296
+ sht_paths = Parallel.map(group, in_processes: @threads) do |node_stop|
297
+ #group.each do |node_stop|
298
+ dist, path = shortest_path(node_start, node_stop, paths)
299
+ [dist, path]
300
+ #path_lengths << dist if !dist.nil?
301
+ #all_paths << path if !path.empty?
302
+ end
303
+ sht_paths.each do |dist, path|
304
+ path_lengths << dist
305
+ all_paths << path
306
+ end
307
+ end
308
+ if path_lengths.include?(nil)
309
+ avg_sht_path = nil
310
+ else
311
+ avg_sht_path = path_lengths.inject(0){|sum,l| sum + l}.fdiv(path_lengths.length)
312
+ end
313
+ return avg_sht_path, all_paths
314
+ end
315
+
316
+ # https://pythoninwonderland.wordpress.com/2017/03/18/how-to-implement-breadth-first-search-in-python/
317
+ # finds shortest path between 2 nodes of a graph using BFS
318
+ def bfs_shortest_path(start, goal, paths=false)
319
+ dist = nil
320
+ explored = {} # keep track of explored nodes
321
+ previous = {}
322
+ queue = [[start, 0]] # keep track of all the paths to be checked
323
+ is_goal = false
324
+ while !queue.empty? && !is_goal # keeps looping until all possible paths have been checked
325
+ node, dist = queue.pop # pop the first path from the queue
326
+ if !explored.include?(node) # get the last node from the path
327
+ neighbours = @edges[node]
328
+ explored[node] = true # mark node as explored
329
+ next if neighbours.nil?
330
+ dist += 1
331
+ neighbours.each do |neighbour| # go through all neighbour nodes, construct a new path
332
+ next if explored.include?(neighbour)
333
+ queue.unshift([neighbour, dist]) # push it into the queue
334
+ previous[neighbour] = node if paths
335
+ if neighbour == goal # return path if neighbour is goal
336
+ is_goal = true
337
+ break
338
+ end
339
+ end
340
+ end
341
+ end
342
+ if is_goal
343
+ path = build_path(previous, start, goal) if paths
344
+ else
345
+ dist = nil
346
+ path = []
347
+ end
348
+ return dist, path
349
+ end
350
+
351
+ def build_path(previous, startNode, stopNode)
352
+ path = []
353
+ currentNode = stopNode
354
+ path << currentNode
355
+ while currentNode != startNode
356
+ currentNode = previous[currentNode]
357
+ path << currentNode
358
+ end
359
+ return path
360
+ end
361
+
362
+ def shortest_path(node_start, node_stop, paths=false)
363
+ #https://betterprogramming.pub/5-ways-to-find-the-shortest-path-in-a-graph-88cfefd0030f
364
+ #return bidirectionalSearch(node_start, node_stop)
365
+ #https://efficientcodeblog.wordpress.com/2017/12/13/bidirectional-search-two-end-bfs/
366
+ dist, all_paths = bfs_shortest_path(node_start, node_stop, paths)
367
+ return dist, all_paths
368
+ end
369
+
370
+ def expand_clusters(expand_method)
371
+ clusters = {}
372
+ @group_nodes.each do |id, nodes|
373
+ if expand_method == 'sht_path'
374
+ dist, paths = compute_avg_sht_path(nodes, paths=true) # this uses bfs, maybe Dijkstra is the best one
375
+ new_nodes = paths.flatten.uniq
376
+ clusters[id] = nodes | new_nodes # If some node pair are not connected, recover them
377
+ end
378
+ end
379
+ return clusters
380
+ end
381
+
382
+ def compute_node_com_assoc(com, ref_node)
383
+ ref_cons = 0
384
+ ref_secondary_cons = 0
385
+ secondary_nodes = {}
386
+ other_cons = 0
387
+ other_nodes = {}
388
+
389
+ refNneigh = @edges[ref_node]
390
+ com.each do |nodeID|
391
+ nodeIDneigh = @edges[nodeID]
392
+ next if nodeIDneigh.nil?
393
+ ref_cons += 1 if nodeIDneigh.include?(ref_node)
394
+ if !refNneigh.nil?
395
+ common_nodes = nodeIDneigh & refNneigh
396
+ common_nodes.each {|id| secondary_nodes[id] = true}
397
+ ref_secondary_cons += common_nodes.length
398
+ end
399
+ specific_nodes = nodeIDneigh - refNneigh - [ref_node]
400
+ specific_nodes.each {|id| other_nodes[id] = true}
401
+ other_cons += specific_nodes.length
402
+ end
403
+ by_edge = (ref_cons + ref_secondary_cons).fdiv(other_cons)
404
+ by_node = (ref_cons + secondary_nodes.length).fdiv(other_nodes.length)
405
+ return by_edge, by_node
50
406
  end
51
407
 
52
408
  def get_all_intersections
53
- intersection_lengths = []
54
- get_all_pairs do |node1, node2|
55
- intersection_lengths << intersection(node1, node2).length
409
+ intersection_lengths = get_all_pairs do |node1, node2|
410
+ intersection(node1, node2).length
56
411
  end
57
412
  return intersection_lengths
58
413
  end
59
414
 
60
415
  def get_all_pairs(args = {})
61
- default = {:meth => :all, :layers => :all}
416
+ all_pairs = []
417
+ default = {:layers => :all}
62
418
  args = default.merge(args)
63
- if args[:layers] == :all
64
- nodeIDs = @nodes.keys
419
+ nodeIDsA, nodeIDsB = collect_nodes(args)
420
+ if @compute_autorelations
421
+ if @compute_pairs == :all
422
+ while !nodeIDsA.empty?
423
+ node1 = nodeIDsA.shift
424
+ pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
425
+ yield(node1, node2)
426
+ end
427
+ all_pairs.concat(pairs)
428
+ end
429
+ elsif @compute_pairs == :conn # TODO: Review this case to avoid return nil values
430
+ while !nodeIDsA.empty?
431
+ node1 = nodeIDsA.shift
432
+ ids_connected_to_n1 = @edges[node1]
433
+ pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
434
+ result = nil
435
+ ids_connected_to_n2 = @edges[node2]
436
+ if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
437
+ result = yield(node1, node2)
438
+ end
439
+ result
440
+ end
441
+ pairs.compact!
442
+ all_pairs.concat(pairs)
443
+ end
444
+ end
65
445
  else
66
- nodeIDs = []
67
- args[:layers].each do |layer|
68
- nodeIDs.concat(@nodes.select{|id, node| node.type == layer}.keys)
446
+ #MAIN METHOD
447
+ if @compute_pairs == :conn
448
+ all_pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node1|
449
+ ids_connected_to_n1 = @edges[node1]
450
+ node1_pairs = []
451
+ nodeIDsB.each do |node2|
452
+ ids_connected_to_n2 = @edges[node2]
453
+ if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
454
+ node1_pairs << yield(node1, node2)
455
+ end
456
+ end
457
+ node1_pairs
458
+ end
459
+ all_pairs.flatten!(1)
460
+ elsif @compute_pairs == :all
461
+ raise 'Not implemented'
69
462
  end
70
463
  end
71
464
 
72
- if args[:meth] == :all
73
- while !nodeIDs.empty?
74
- node1 = nodeIDs.shift
75
- nodeIDs.each do |node2|
76
- yield(node1, node2)
465
+ return all_pairs
466
+ end
467
+
468
+ def collect_nodes(args)
469
+ nodeIDsA = nil
470
+ nodeIDsB = nil
471
+ if @compute_autorelations
472
+ if args[:layers] == :all
473
+ nodeIDsA = @nodes.keys
474
+ else
475
+ nodeIDsA = []
476
+ args[:layers].each do |layer|
477
+ nodeIDsA.concat(@nodes.select{|id, node| node.type == layer}.keys)
77
478
  end
78
479
  end
79
- #elsif args[:meth] == :conn
80
-
480
+ else
481
+ if args[:layers] != :all
482
+ nodeIDsA = @nodes.select{|id, node| node.type == args[:layers][0]}.keys
483
+ nodeIDsB = @nodes.select{|id, node| node.type == args[:layers][1]}.keys
484
+ end
81
485
  end
486
+ return nodeIDsA, nodeIDsB
82
487
  end
83
488
 
489
+
84
490
  def get_nodes_layer(layers)
85
491
  #for creating ny value in hypergeometric and pcc index
86
492
  nodes = []
@@ -104,17 +510,16 @@ class Network
104
510
  def generate_adjacency_matrix(layerA, layerB)
105
511
  layerAidNodes = @nodes.select{|id, node| node.type == layerA}.keys
106
512
  layerBidNodes = @nodes.select{|id, node| node.type == layerB}.keys
107
- adjacency_matrix = []
108
- layerAidNodes.each do |nodeA|
109
- layerBidNodes.each do |nodeB|
513
+ matrix = Numo::DFloat.zeros(layerAidNodes.length, layerBidNodes.length)
514
+ layerAidNodes.each_with_index do |nodeA, i|
515
+ layerBidNodes.each_with_index do |nodeB, j|
110
516
  if @edges[nodeB].include?(nodeA)
111
- adjacency_matrix << 1
517
+ matrix[i, j] = 1
112
518
  else
113
- adjacency_matrix << 0
519
+ matrix[i, j] = 0
114
520
  end
115
521
  end
116
522
  end
117
- matrix = NMatrix.new([layerAidNodes.length, layerBidNodes.length], adjacency_matrix)
118
523
  all_info_matrix = [matrix, layerAidNodes, layerBidNodes]
119
524
  @adjacency_matrices[[layerA, layerB]] = all_info_matrix
120
525
  return all_info_matrix
@@ -142,6 +547,14 @@ class Network
142
547
  relations = get_pcc_associations(layers, base_layer)
143
548
  elsif meth == :hypergeometric #all networks
144
549
  relations = get_hypergeometric_associations(layers, base_layer)
550
+ elsif meth == :hypergeometric_bf #all networks
551
+ relations = get_hypergeometric_associations(layers, base_layer, :bonferroni)
552
+ elsif meth == :hypergeometric_bh #all networks
553
+ relations = get_hypergeometric_associations(layers, base_layer, :benjamini_hochberg)
554
+ elsif meth == :hypergeometric_elim #tripartite networks?
555
+ relations = get_hypergeometric_associations_with_topology(layers, base_layer, :elim)
556
+ elsif meth == :hypergeometric_weight #tripartite networks?
557
+ relations = get_hypergeometric_associations_with_topology(layers, base_layer, :weight)
145
558
  elsif meth == :csi #all networks
146
559
  relations = get_csi_associations(layers, base_layer)
147
560
  elsif meth == :transference #tripartite networks
@@ -154,20 +567,19 @@ class Network
154
567
  #---------------------------------------------------------
155
568
  # Alaimo 2014, doi: 10.3389/fbioe.2014.00071
156
569
  def get_association_by_transference_resources(firstPairLayers, secondPairLayers, lambda_value1 = 0.5, lambda_value2 = 0.5)
570
+ relations = []
157
571
  matrix1 = @adjacency_matrices[firstPairLayers].first
158
572
  rowIds = @adjacency_matrices[firstPairLayers][1]
159
573
  matrix2 = @adjacency_matrices[secondPairLayers].first
160
574
  colIds = @adjacency_matrices[secondPairLayers][2]
161
- m1rowNumber = matrix1.rows
162
- m1colNumber = matrix1.cols
163
- m2rowNumber = matrix2.rows
164
- m2colNumber = matrix2.cols
575
+ m1rowNumber, m1colNumber = matrix1.shape
576
+ m2rowNumber, m2colNumber = matrix2.shape
165
577
  #puts m1rowNumber, m1colNumber, m2rowNumber, m2colNumber
166
578
  matrix1Weight = graphWeights(m1colNumber, m1rowNumber, matrix1.transpose, lambda_value1)
167
579
  matrix2Weight = graphWeights(m2colNumber, m2rowNumber, matrix2.transpose, lambda_value2)
168
- matrixWeightProduct = matrix1Weight.dot(matrix2.dot(matrix2Weight))
169
- finalMatrix = matrix1.dot(matrixWeightProduct)
170
- relations = nmatrix2relations(finalMatrix, rowIds, colIds)
580
+ matrixWeightProduct = Numo::Linalg.dot(matrix1Weight, Numo::Linalg.dot(matrix2, matrix2Weight))
581
+ finalMatrix = Numo::Linalg.dot(matrix1, matrixWeightProduct)
582
+ relations = matrix2relations(finalMatrix, rowIds, colIds)
171
583
  @association_values[:transference] = relations
172
584
  return relations
173
585
  end
@@ -176,15 +588,14 @@ class Network
176
588
  #---------------------------------------------------------
177
589
  # Bass 2013, doi:10.1038/nmeth.2728
178
590
  def get_associations(layers, base_layer) # BASE METHOD
179
- relations = []
180
- get_all_pairs(layers: layers) do |node1, node2|
591
+ associations = get_all_pairs(layers: layers) do |node1, node2|
181
592
  associatedIDs_node1 = @edges[node1].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
182
593
  associatedIDs_node2 = @edges[node2].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
183
594
  intersectedIDs = associatedIDs_node1 & associatedIDs_node2
184
595
  associationValue = yield(associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2)
185
- relations << [node1, node2, associationValue]
596
+ [node1, node2, associationValue]
186
597
  end
187
- return relations
598
+ return associations
188
599
  end
189
600
 
190
601
  def get_jaccard_association(layers, base_layer)
@@ -227,7 +638,8 @@ class Network
227
638
 
228
639
  def get_pcc_associations(layers, base_layer)
229
640
  #for Ny calcule use get_nodes_layer
230
- ny = get_nodes_layer(layers).length
641
+ base_layer_nodes = get_nodes_layer([base_layer])
642
+ ny = base_layer_nodes.length
231
643
  relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
232
644
  intersProd = intersectedIDs.length * ny
233
645
  nodesProd = associatedIDs_node1.length * associatedIDs_node2.length
@@ -240,84 +652,145 @@ class Network
240
652
  return relations
241
653
  end
242
654
 
243
- def get_hypergeometric_associations(layers, base_layer)
244
- ny = get_nodes_layer(layers).length
655
+ def get_hypergeometric_associations(layers, base_layer, pvalue_adj_method= nil)
656
+ ny = get_nodes_layer([base_layer]).length
657
+ fet = Rubystats::FishersExactTest.new
245
658
  relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
246
- minLength = [associatedIDs_node1.length, associatedIDs_node2.length].min
659
+ fisher = 0
247
660
  intersection_lengths = intersectedIDs.length
248
- sum = 0
249
- nA = associatedIDs_node1.length
250
- nB = associatedIDs_node2.length
251
- #Using index from A layer proyected to B
252
- (intersection_lengths..minLength).each do |i|
253
- binom_product = binom(nA, i) * binom(ny - nA, nB - i)
254
- sum += binom_product.fdiv(binom(ny, nB))
255
- # binom_product_float = binom_product.to_f
256
- # to_f = false
257
- # if binom_product_float.infinite? # Handle bignum coercition to bigdecimal to avoid infinity values on float class.
258
- # binom_product_float = BigDecimal.new(binom_product)
259
- # to_f = true
260
- # end
261
- # sum += binom_product_float / binom(ny, nB)
262
- # sum = sum.to_f if to_f # once the operation has finished, sum is corced from bigdecimal to float
661
+ if intersection_lengths > 0
662
+ n1_items = associatedIDs_node1.length
663
+ n2_items = associatedIDs_node2.length
664
+ fisher = fet.calculate(
665
+ intersection_lengths,
666
+ n1_items - intersection_lengths,
667
+ n2_items - intersection_lengths,
668
+ ny - (n1_items + n2_items - intersection_lengths)
669
+ )
670
+ fisher = fisher[:right]
263
671
  end
264
- if sum == 0
265
- hypergeometricValue = 0
266
- else
267
- hypergeometricValue = -Math.log10(sum)
268
- end
269
- hypergeometricValue
672
+ fisher
673
+ end
674
+ if pvalue_adj_method == :bonferroni
675
+ meth = :hypergeometric_bf
676
+ compute_adjusted_pvalue_bonferroni(relations)
677
+ elsif pvalue_adj_method == :benjamini_hochberg
678
+ meth = :hypergeometric_bh
679
+ compute_adjusted_pvalue_benjaminiHochberg(relations)
680
+ else
681
+ meth = :hypergeometric
682
+ compute_log_transformation(relations)
270
683
  end
271
- @association_values[:hypergeometric] = relations
684
+ @association_values[meth] = relations
272
685
  return relations
273
686
  end
274
687
 
275
- def add_record(hash, key, key2, value)
276
- query = hash[key]
688
+ def get_hypergeometric_associations_with_topology(layers, base_layer, mode, thresold = 0.01)
689
+ relations = []
690
+ reference_layer = (layers - @layer_ontologies.keys).first
691
+ ontology_layer = (layers - [reference_layer]).first
692
+ ref_nodes = get_nodes_from_layer(reference_layer) # get nodes from NOT ontology layer
693
+ ontology = @layer_ontologies[ontology_layer]
694
+ base_layer_length = @nodes.values.count{|n| n.type == base_layer}
695
+ ref_nodes.each do |ref_node|
696
+ base_nodes = get_connected_nodes(ref_node, base_layer)
697
+ ontology_base_subgraph = get_bipartite_subgraph(base_nodes, base_layer, ontology_layer) # get shared nodes between nodes from NOT ontology layer and ONTOLOGY layer. Also get the conections between shared nodes and ontology nodes.
698
+ next if ontology_base_subgraph.empty?
699
+ ontology_base_subgraph.transform_keys!{|k| k.to_sym}
700
+ ontology.load_item_relations_to_terms(ontology_base_subgraph, remove_old_relations = true)
701
+ term_pvals = ontology.compute_relations_to_items(base_nodes, base_layer_length, mode, thresold)
702
+ relations.concat(term_pvals.map{|term| [ref_node, term[0], term[1]]})
703
+ end
704
+ compute_log_transformation(relations)
705
+ if mode == :elim
706
+ meth = :hypergeometric_elim
707
+ elsif mode == :weight
708
+ meth = :hypergeometric_weight
709
+ end
710
+ @association_values[meth] = relations
711
+ return relations
712
+ end
713
+
714
+ def compute_adjusted_pvalue(relations, log_val=true)
715
+ relations.each_with_index do |data, i| #p1, p2, pval
716
+ pval_adj = yield(data.last, i)
717
+ pval_adj = -Math.log10(pval_adj) if log_val && pval_adj > 0
718
+ data[2] = pval_adj
719
+ end
720
+ end
721
+
722
+ def compute_log_transformation(relations) #Only perform log transform whitout adjust pvalue. Called when adjusted method is not defined
723
+ compute_adjusted_pvalue(relations) do |pval, index|
724
+ pval
725
+ end
726
+ end
727
+
728
+ def compute_adjusted_pvalue_bonferroni(relations)
729
+ n_comparations = relations.length
730
+ compute_adjusted_pvalue(relations) do |pval, index|
731
+ adj = pval * n_comparations
732
+ adj = 1 if adj > 1
733
+ adj
734
+ end
735
+ end
736
+
737
+ def compute_adjusted_pvalue_benjaminiHochberg(relations)
738
+ adj_pvalues = get_benjaminiHochberg_pvalues(relations.map{|rel| rel.last})
739
+ compute_adjusted_pvalue(relations) do |pval, index|
740
+ adj_pvalues[index]
741
+ end
742
+ end
743
+
744
+ def add_record(hash, node1, node2)
745
+ query = hash[node1]
277
746
  if query.nil?
278
- hash[key]={key2 => value}
747
+ hash[node1] = [node2]
748
+ else
749
+ query << node2
750
+ end
751
+ end
752
+
753
+ def add_nested_record(hash, node1, node2, val)
754
+ query_node1 = hash[node1]
755
+ if query_node1.nil?
756
+ hash[node1] = {node2 => val}
279
757
  else
280
- query[key2] = value
758
+ query_node1[node2] = val
281
759
  end
282
760
  end
283
761
 
762
+
284
763
  def get_csi_associations(layers, base_layer)
285
764
  pcc_relations = get_pcc_associations(layers, base_layer)
286
- indexed_pcc_relations = {}
765
+ clean_autorelations_on_association_values if layers.length > 1
766
+ nx = get_nodes_layer(layers).length
767
+ pcc_vals = {}
768
+ node_rels = {}
287
769
  pcc_relations.each do |node1, node2, assoc_index|
288
- if assoc_index > 0
289
- add_record(indexed_pcc_relations, node1, node2, assoc_index)
290
- add_record(indexed_pcc_relations, node2, node1, assoc_index)
291
- end
770
+ add_nested_record(pcc_vals, node1, node2, assoc_index.abs)
771
+ add_nested_record(pcc_vals, node2, node1, assoc_index.abs)
772
+ add_record(node_rels, node1, node2)
773
+ add_record(node_rels, node2, node1)
292
774
  end
293
- ny = get_nodes_layer(layers).length
294
- relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
295
- query = indexed_pcc_relations[node1]
296
- if query.nil?
297
- valid_A_nodes = []
298
- pccAB = -0.05
299
- else
300
- nested_query = query[node2]
301
- if nested_query.nil?
302
- pccAB = -0.05
303
- else
304
- pccAB = nested_query - 0.05
305
- end
306
- valid_A_nodes = query.select{|node_id, pcc| pcc>= pccAB}.keys
775
+ relations = []
776
+ pcc_relations.each do |node1, node2 ,assoc_index|
777
+ pccAB = assoc_index - 0.05
778
+ valid_nodes = 0
779
+ node_rels[node1].each do |node|
780
+ valid_nodes += 1 if pcc_vals[node1][node] >= pccAB
307
781
  end
308
- query2 = indexed_pcc_relations[node2]
309
- if query2.nil?
310
- valid_B_nodes = []
311
- else
312
- valid_B_nodes = query2.select{|node_id, pcc| pcc>= pccAB}.keys
782
+ node_rels[node2].each do |node|
783
+ valid_nodes += 1 if pcc_vals[node2][node] >= pccAB
313
784
  end
314
- valid_connections = valid_A_nodes | valid_B_nodes
315
- csiValue = 1 - valid_connections.length.to_f/ny
785
+ csiValue = 1 - (valid_nodes-1).fdiv(nx)
786
+ # valid_nodes-1 is done due to the connection node1-node2 is counted twice (one for each loop)
787
+ relations << [node1, node2, csiValue]
316
788
  end
317
789
  @association_values[:csi] = relations
318
790
  return relations
319
791
  end
320
792
 
793
+
321
794
  ## PERFORMANCE METHODS
322
795
  ############################################################
323
796
  def load_control(ref_array)
@@ -381,11 +854,11 @@ class Network
381
854
  if !pred_info.nil?
382
855
  labels, scores = pred_info
383
856
  reliable_labels = get_reliable_labels(labels, scores, cut, top)
384
-
385
857
  predicted_labels += reliable_labels.length #m
386
858
  common_labels += (c_labels & reliable_labels).length #k
387
859
  end
388
860
  end
861
+ #puts "cut: #{cut} trueL: #{true_labels} predL: #{predicted_labels} commL: #{common_labels}"
389
862
  prec = common_labels.to_f/predicted_labels
390
863
  rec = common_labels.to_f/true_labels
391
864
  prec = 0.0 if prec.nan?
@@ -393,19 +866,132 @@ class Network
393
866
  return prec, rec
394
867
  end
395
868
 
869
+ ## KERNEL METHODS
870
+ #######################################################################################
871
+ def get_kernel(layer2kernel, kernel, normalization=false)
872
+ matrix, node_names = @adjacency_matrices[layer2kernel]
873
+ #I = identity matrix
874
+ #D = Diagonal matrix
875
+ #A = adjacency matrix
876
+ #L = laplacian matrix = D − A
877
+ matrix_result = nil
878
+ dimension_elements = matrix.shape.last
879
+ # In scuba code, the diagonal values of A is set to 0. In weighted matrix the kernel result is the same with or without this operation. Maybe increases the computing performance?
880
+ # In the md kernel this operation affects the values of the final kernel
881
+ #dimension_elements.times do |n|
882
+ # matrix[n,n] = 0.0
883
+ #end
884
+ if kernel == 'el' || kernel == 'ct' || kernel == 'rf' ||
885
+ kernel.include?('vn') || kernel.include?('rl') || kernel == 'me'
886
+ diagonal_matrix = matrix.sum(1).diag # get the total sum for each row, for this reason the sum method takes the 1 value. If sum colums is desired, use 0
887
+ # Make a matrix whose diagonal is row_sum
888
+ matrix_L = diagonal_matrix - matrix
889
+ if kernel == 'el' #Exponential Laplacian diffusion kernel(active). F Fouss 2012 | doi: 10.1016/j.neunet.2012.03.001
890
+ beta = 0.02
891
+ beta_product = matrix_L * -beta
892
+ #matrix_result = beta_product.expm
893
+ matrix_result = Numo::Linalg.expm(beta_product, 14)
894
+ elsif kernel == 'ct' # Commute time kernel (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
895
+ matrix_result = Numo::Linalg.pinv(matrix_L) # Anibal saids that this kernel was normalized. Why?. Paper do not seem to describe this operation for ct, it describes for Kvn or for all kernels, it is not clear.
896
+ elsif kernel == 'rf' # Random forest kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
897
+ matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L) #Krf = (I +L ) ^ −1
898
+ elsif kernel.include?('vn') # von Neumann diffusion kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
899
+ alpha = kernel.gsub('vn', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
900
+ matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) - matrix * alpha ) # (I -alphaA ) ^ −1
901
+ elsif kernel.include?('rl') # Regularized Laplacian kernel matrix (active)
902
+ alpha = kernel.gsub('rl', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
903
+ matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L * alpha ) # (I + alphaL ) ^ −1
904
+ elsif kernel == 'me' # Markov exponential diffusion kernel (active). G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
905
+ beta=0.04
906
+ #(beta/N)*(N*I - D + A)
907
+ id_mat = Numo::DFloat.eye(dimension_elements)
908
+ m_matrix = (id_mat * dimension_elements - diagonal_matrix + matrix ) * (beta/dimension_elements)
909
+ #matrix_result = m_matrix.expm
910
+ matrix_result = Numo::Linalg.expm(m_matrix, 16)
911
+ end
912
+ elsif kernel == 'ka' # Kernelized adjacency matrix (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
913
+ lambda_value = matrix.min_eigenvalue
914
+ matrix_result = matrix + Numo::DFloat.eye(dimension_elements) * lambda_value.abs # Ka = A + lambda*I # lambda = the absolute value of the smallest eigenvalue of A
915
+ elsif kernel.include?('md') # Markov diffusion kernel matrix. G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
916
+ t = kernel.gsub('md', '').to_i
917
+ #TODO: check implementation with Numo::array
918
+ col_sum = matrix.sum(1)
919
+ p_mat = matrix.div_by_vector(col_sum)
920
+ p_temp_mat = p_mat.clone
921
+ zt_mat = p_mat.clone
922
+ (t-1).times do
923
+ p_temp_mat = p_temp_mat.dot(p_mat)
924
+ zt_mat = zt_mat + p_temp_mat
925
+ end
926
+ zt_mat = zt_mat * (1.0/t)
927
+ matrix_result = zt_mat.dot(zt_mat.transpose)
928
+ else
929
+ matrix_result = matrix
930
+ warn('Warning: The kernel method was not specified or not exists. The adjacency matrix will be given as result')
931
+ # This allows process a previous kernel and perform the normalization in a separated step.
932
+ end
933
+ matrix_result = matrix_result.cosine_normalization if normalization #TODO: check implementation with Numo::array
934
+ @kernels[layer2kernel] = matrix_result
935
+ end
936
+
937
+ def write_kernel(layer2kernel, output_file)
938
+ @kernels[layer2kernel].save(output_file)
939
+ end
940
+
941
+ def link_ontology(ontology_file_path, layer_name)
942
+ if !@loaded_obos.include?(ontology_file_path) #Load new ontology
943
+ ontology = Ontology.new(file: ontology_file_path, load_file: true)
944
+ @loaded_obos << ontology_file_path
945
+ @ontologies << ontology
946
+ else #Link loaded ontology to current layer
947
+ ontology = @ontologies[@loaded_obos.index(ontology_file_path)]
948
+ end
949
+ @layer_ontologies[layer_name] = ontology
950
+ end
396
951
 
397
952
 
398
953
  ## AUXILIAR METHODS
399
954
  #######################################################################################
400
955
  private
401
956
 
957
+ def load_input_list(file)
958
+ return File.open(file).readlines.map!{|line| line.chomp}
959
+ end
960
+
961
+ def exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
962
+ res = false
963
+ if !ids_connected_to_n1.nil? &&
964
+ !ids_connected_to_n2.nil? &&
965
+ !(ids_connected_to_n1 & ids_connected_to_n2).empty? # check that at least exists one node that connect to n1 and n2
966
+ res = true
967
+ end
968
+ return res
969
+ end
970
+
971
+ def set_layer(layer_definitions, node_name)
972
+ layer = nil
973
+ if layer_definitions.length > 1
974
+ layer_definitions.each do |layer_name, regexp|
975
+ if node_name =~ regexp
976
+ layer = layer_name
977
+ break
978
+ end
979
+ end
980
+ raise("The node '#{node_name}' not match with any layer regex") if layer.nil?
981
+ else
982
+ layer = layer_definitions.first.first
983
+ end
984
+ @layers << layer if !@layers.include?(layer)
985
+ return layer
986
+ end
987
+
402
988
  def get_cuts(limits, n_cuts)
403
989
  cuts = []
404
- range = (limits.last - limits.first).to_f/n_cuts
990
+ range = (limits.last - limits.first).abs.fdiv(n_cuts)
991
+ range = BigDecimal(range, 10)
405
992
  cut = limits.first
406
- n_cuts.times do
407
- cuts << cut
408
- cut += range
993
+ (n_cuts + 1).times do |n|
994
+ cuts << (cut + n * range).to_f
409
995
  end
410
996
  return cuts
411
997
  end
@@ -420,35 +1006,46 @@ class Network
420
1006
  end
421
1007
 
422
1008
  def graphWeights (rowsNumber, colsNumber, inputMatrix, lambdaValue = 0.5)
423
- invMatrix = inputMatrix.sum(0).map{|e| 1.0/ e}
424
- diagonalColSums = NMatrix.diag(invMatrix)
425
- rowsSums = inputMatrix.sum(1).to_flat_a
426
- ky = NMatrix.new([rowsNumber, rowsNumber], rowsSums).map{|e| e ** lambdaValue }
427
- invertLambdaVal = (1 - lambdaValue)
428
- kx = NMatrix.new([rowsNumber, rowsNumber], rowsSums).transpose.map{|e| e ** invertLambdaVal }
429
- nx = (ky * kx).map{|e| 1.0/ e}
430
- weigth = (inputMatrix.dot(diagonalColSums)).transpose
431
- weigth = inputMatrix.dot(weigth)
432
- weigth = nx * weigth
1009
+ ky = (1.0 / inputMatrix.sum(0)).diag #sum cols
1010
+ weigth = Numo::Linalg.dot(inputMatrix, ky).transpose
1011
+ ky = nil #free memory
1012
+ weigth = Numo::Linalg.dot(inputMatrix, weigth)
1013
+
1014
+ kx = inputMatrix.sum(1) #sum rows
1015
+
1016
+ kx_lamb = kx ** lambdaValue
1017
+ kx_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
1018
+ rowsNumber.times do |j|
1019
+ rowsNumber.times do |i|
1020
+ kx_lamb_mat[j,i] = kx_lamb[i]
1021
+ end
1022
+ end
1023
+ kx_lamb = nil #free memory
1024
+
1025
+ kx_inv_lamb = kx ** (1 - lambdaValue)
1026
+ kx_inv_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
1027
+ rowsNumber.times do |j|
1028
+ rowsNumber.times do |i|
1029
+ kx_inv_lamb_mat[i, j] = kx_inv_lamb[i]
1030
+ end
1031
+ end
1032
+ kx_inv_lamb = nil #free memory
1033
+
1034
+ nx = 1.0/(kx_lamb_mat.inplace * kx_inv_lamb_mat).inplace # inplace marks a matrix to be used by reference, not for value
1035
+ kx_lamb_mat = nil #free memory
1036
+ kx_inv_lamb_mat = nil #free memory
1037
+ weigth.inplace * nx
433
1038
  return weigth
434
1039
  end
435
1040
 
436
- def nmatrix2relations(finalMatrix, rowIds, colIds)
1041
+ def matrix2relations(finalMatrix, rowIds, colIds)
437
1042
  relations = []
438
1043
  rowIds.each_with_index do |rowId, rowPos|
439
1044
  colIds.each_with_index do |colId, colPos|
440
1045
  associationValue = finalMatrix[rowPos, colPos]
441
- relations << [rowId, colId, associationValue]
1046
+ relations << [rowId, colId, associationValue] if associationValue > 0
442
1047
  end
443
1048
  end
444
1049
  return relations
445
1050
  end
446
-
447
- def binom(n,k)
448
- if k > 0 && k < n
449
- res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
450
- else
451
- res = 1
452
- end
453
- end
454
1051
  end