NetAnalyzer 0.1.5 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,22 +1,54 @@
1
- require 'nodes'
2
- require 'nmatrix'
3
- require 'pp'
1
+ require 'rubystats'
2
+ require 'gv'
3
+ #require 'nmatrix'
4
+ #require 'nmatrix/lapacke'
5
+ require 'numo/narray'
6
+ require 'numo/linalg'
7
+ require 'parallel'
8
+
9
+ #require 'pp'
4
10
  require 'bigdecimal'
5
11
  require 'benchmark'
12
+ #require 'nmatrix_expansion'
13
+
14
+
15
+ #For javascrip plotting
16
+ require 'erb'
17
+ require 'base64'
18
+ require 'json'
19
+ require 'zlib'
20
+
21
+ require 'semtools'
22
+ require 'expcalc'
23
+ TEMPLATES = File.join(File.dirname(__FILE__), 'templates')
6
24
 
7
25
  class Network
8
26
 
9
- attr_accessor :association_values, :control_connections
27
+ attr_accessor :association_values, :control_connections, :kernels, :reference_nodes, :group_nodes, :threads
10
28
 
11
29
  ## BASIC METHODS
12
30
  ############################################################
13
31
  def initialize(layers)
14
- @nodes = {}
32
+ @threads = 0
33
+ @nodes = {}
15
34
  @edges = {}
35
+ @reference_nodes = []
36
+ @group_nodes = {}
16
37
  @adjacency_matrices = {}
38
+ @kernels = {}
17
39
  @layers = layers
18
40
  @association_values = {}
19
41
  @control_connections = {}
42
+ @compute_pairs = :conn
43
+ @compute_autorelations = true
44
+ @loaded_obos = []
45
+ @ontologies = []
46
+ @layer_ontologies = {}
47
+ end
48
+
49
+ def set_compute_pairs(use_pairs, get_autorelations)
50
+ @compute_pairs = use_pairs
51
+ @compute_autorelations = get_autorelations
20
52
  end
21
53
 
22
54
  def add_node(nodeID, nodeType = 0)
@@ -37,8 +69,49 @@ class Network
37
69
  end
38
70
  end
39
71
 
72
+ def delete_nodes(node_list, mode='d')
73
+ if mode == 'd'
74
+ @nodes.reject!{|n| node_list.include?(n)}
75
+ @edges.reject!{|n, connections| node_list.include?(n)}
76
+ @edges.each do |n, connections|
77
+ connections.reject!{|c| node_list.include?(c)}
78
+ end
79
+ elsif mode == 'r'
80
+ @nodes.select!{|n| node_list.include?(n)}
81
+ @edges.select!{|n, connections| node_list.include?(n)}
82
+ @edges.each do |n, connections|
83
+ connections.select!{|c| node_list.include?(c)}
84
+ end
85
+ end
86
+ @edges.reject!{|n, connections| connections.empty?}
87
+ end
88
+
89
+ def get_connected_nodes(node_id, from_layer)
90
+ return @edges[node_id].map{|id| @nodes[id]}.select{|node| node.type == from_layer}.map{|node| node.id}
91
+ end
92
+
93
+ def get_nodes_from_layer(from_layer)
94
+ return @nodes.values.select{|node| node.type == from_layer}.map{|node| node.id}
95
+ end
96
+
97
+ def get_bipartite_subgraph(from_layer_node_ids, from_layer, to_layer)
98
+ bipartite_subgraph = {}
99
+ from_layer_node_ids.each do |from_layer_node_id|
100
+ connected_nodes = @edges[from_layer_node_id]
101
+ connected_nodes.each do |connected_node|
102
+ if @nodes[connected_node].type == to_layer
103
+ query = bipartite_subgraph[connected_node]
104
+ if query.nil?
105
+ bipartite_subgraph[connected_node] = get_connected_nodes(connected_node, from_layer)
106
+ end
107
+ end
108
+ end
109
+ end
110
+ return bipartite_subgraph
111
+ end
112
+
40
113
  def load_network_by_pairs(file, layers, split_character="\t")
41
- File.open(file).each("\n") do |line|
114
+ File.open(file).each do |line|
42
115
  line.chomp!
43
116
  pair = line.split(split_character)
44
117
  node1 = pair[0]
@@ -49,57 +122,371 @@ class Network
49
122
  end
50
123
  end
51
124
 
125
+ def load_network_by_bin_matrix(input_file, node_file, layers)
126
+ node_names = load_input_list(node_file)
127
+ @adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='npy'), node_names, node_names]
128
+ end
129
+
130
+ def load_network_by_plain_matrix(input_file, node_file, layers, splitChar)
131
+ node_names = load_input_list(node_file)
132
+ @adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='txt', splitChar=splitChar), node_names, node_names]
133
+ end
134
+
52
135
  def get_edge_number
53
- node_connections = @edges.values.map{|connections| connections.length}.inject(0){|sum, n| sum + n}
136
+ node_connections = get_degree.values.inject(0){|sum, n| sum + n}
54
137
  return node_connections/2
55
138
  end
56
139
 
57
- def plot(output_filename, layout="dot")
58
- roboWrite = File.open(output_filename, 'w')
59
- roboWrite.puts "digraph g {"
140
+ def get_degree(zscore=false)
141
+ degree = {}
142
+ @edges.each do |id, nodes|
143
+ degree[id] = nodes.length
144
+ end
145
+ if !zscore
146
+ degree_values = degree.values
147
+ mean_degree = degree_values.mean
148
+ std_degree = degree_values.standard_deviation
149
+ degree.transform_values!{|v| (v - mean_degree).fdiv(std_degree)}
150
+ end
151
+ return degree
152
+ end
153
+
154
+ def get_node_attributes(attr_names)
155
+ attrs = []
156
+ attr_names.each do |attr_name|
157
+ if attr_name == 'get_degree'
158
+ attrs << get_degree
159
+ elsif attr_name == 'get_degreeZ'
160
+ attrs << get_degree(zscore=true)
161
+ end
162
+ end
163
+ node_ids = attrs.first.keys
164
+ node_attrs = []
165
+ node_ids.each do |n|
166
+ node_attrs << [n].concat(attrs.map{|at| at[n]})
167
+ end
168
+ return node_attrs
169
+ end
170
+
171
+ def plot_network(options = {})
172
+ if options[:method] == 'graphviz'
173
+ plot_dot(options)
174
+ else
175
+ if options[:method] == 'elgrapho'
176
+ template = 'el_grapho'
177
+ elsif options[:method] == 'cytoscape'
178
+ template = 'cytoscape'
179
+ elsif options[:method] == 'sigma'
180
+ template = 'sigma'
181
+ end
182
+ renderered_template = ERB.new(File.open(File.join(TEMPLATES, template + '.erb')).read).result(binding)
183
+ File.open(options[:output_file] + '.html', 'w'){|f| f.puts renderered_template}
184
+ end
185
+ end
186
+
187
+ def plot_dot(user_options = {}) # input keys: layout
188
+ options = {layout: "sfdp"}
189
+ options = options.merge(user_options)
190
+ graphviz_colors = %w[lightsteelblue1 lightyellow1 lightgray orchid2]
191
+ palette = {}
192
+ @layers.each do |layer|
193
+ palette[layer] = graphviz_colors.shift
194
+ end
195
+ graph = GV::Graph.open('g', type = :undirected)
196
+ plotted_edges = {}
60
197
  @edges.each do |nodeID, associatedIDs|
61
198
  associatedIDs.each do |associatedID|
62
- roboWrite.puts "\"#{nodeID}\"->\"#{associatedID}\";"
199
+ pair = [nodeID, associatedID].sort.join('_').to_sym
200
+ if !plotted_edges[pair]
201
+ graph.edge 'e',
202
+ graph.node(nodeID, label: '', style: 'filled', fillcolor: palette[@nodes[nodeID].type]),
203
+ graph.node(associatedID, label: '', style: 'filled' , fillcolor: palette[@nodes[associatedID].type])
204
+ plotted_edges[pair] = true
205
+ end
206
+ end
207
+ end
208
+ @reference_nodes.each do |nodeID|
209
+ graph.node(nodeID, style: 'filled', fillcolor: 'firebrick1', label: '')
210
+ end
211
+ graphviz_border_colors = %w[blue darkorange red olivedrab4]
212
+ @group_nodes.each do |groupID, gNodes|
213
+ border_color = graphviz_border_colors.shift
214
+ gNodes.each do |nodeID|
215
+ graph.node(nodeID, color: border_color, penwidth: '10', label: '')
216
+ end
217
+ end
218
+ graph[:overlap] = false
219
+ STDERR.puts 'Save graph'
220
+ graph.save(options[:output_file] + '.png', format='png', layout=options[:layout])
221
+ end
222
+
223
+ def compute_group_metrics(output_filename)
224
+ metrics = []
225
+ header = ['group']
226
+ @group_nodes.keys.each do |k|
227
+ metrics << [k]
228
+ end
229
+ header << 'comparative_degree'
230
+ comparative_degree = communities_comparative_degree(@group_nodes)
231
+ comparative_degree.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
232
+ header << 'avg_sht_path'
233
+ avg_sht_path = communities_avg_sht_path(@group_nodes)
234
+ avg_sht_path.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
235
+ if !@reference_nodes.empty?
236
+ header.concat(%w[node_com_assoc_by_edge node_com_assoc_by_node])
237
+ node_com_assoc = compute_node_com_assoc_in_precomputed_communities(@group_nodes, @reference_nodes.first)
238
+ node_com_assoc.each_with_index{|val,i| metrics[i].concat(val)}
239
+ end
240
+ File.open(output_filename, 'w') do |f|
241
+ f.puts header.join("\t")
242
+ metrics.each do |gr|
243
+ f. puts gr.join("\t")
244
+ end
245
+ end
246
+ end
247
+
248
+ def replace_nil_vals(val)
249
+ return val.nil? ? 'NULL' : val
250
+ end
251
+
252
+ def communities_comparative_degree(coms)
253
+ comparative_degrees = []
254
+ coms.each do |com_id, com|
255
+ comparative_degrees << compute_comparative_degree(com)
256
+ end
257
+ return comparative_degrees
258
+ end
259
+
260
+ def communities_avg_sht_path(coms)
261
+ avg_sht_path = []
262
+ coms.each do |com_id, com|
263
+ dist, paths = compute_avg_sht_path(com)
264
+ avg_sht_path << dist
265
+ end
266
+ return avg_sht_path
267
+ end
268
+
269
+ def compute_node_com_assoc_in_precomputed_communities(coms, ref_node)
270
+ node_com_assoc = []
271
+ coms.each do |com_id, com|
272
+ node_com_assoc << [compute_node_com_assoc(com, ref_node)]
273
+ end
274
+ return node_com_assoc
275
+ end
276
+
277
+ def compute_comparative_degree(com) # see Girvan-Newman Benchmark control parameter in http://networksciencebook.com/chapter/9#testing (communities chapter)
278
+ internal_degree = 0
279
+ external_degree = 0
280
+ com.each do |nodeID|
281
+ nodeIDneigh = @edges[nodeID]
282
+ next if nodeIDneigh.nil?
283
+ internal_degree += (nodeIDneigh & com).length
284
+ external_degree += (nodeIDneigh - com).length
285
+ end
286
+ comparative_degree = external_degree.fdiv(external_degree + internal_degree)
287
+ return comparative_degree
288
+ end
289
+
290
+ def compute_avg_sht_path(com, paths=false)
291
+ path_lengths = []
292
+ all_paths = []
293
+ group = com.dup
294
+ while !group.empty?
295
+ node_start = group.shift
296
+ sht_paths = Parallel.map(group, in_processes: @threads) do |node_stop|
297
+ #group.each do |node_stop|
298
+ dist, path = shortest_path(node_start, node_stop, paths)
299
+ [dist, path]
300
+ #path_lengths << dist if !dist.nil?
301
+ #all_paths << path if !path.empty?
302
+ end
303
+ sht_paths.each do |dist, path|
304
+ path_lengths << dist
305
+ all_paths << path
306
+ end
307
+ end
308
+ if path_lengths.include?(nil)
309
+ avg_sht_path = nil
310
+ else
311
+ avg_sht_path = path_lengths.inject(0){|sum,l| sum + l}.fdiv(path_lengths.length)
312
+ end
313
+ return avg_sht_path, all_paths
314
+ end
315
+
316
+ # https://pythoninwonderland.wordpress.com/2017/03/18/how-to-implement-breadth-first-search-in-python/
317
+ # finds shortest path between 2 nodes of a graph using BFS
318
+ def bfs_shortest_path(start, goal, paths=false)
319
+ dist = nil
320
+ explored = {} # keep track of explored nodes
321
+ previous = {}
322
+ queue = [[start, 0]] # keep track of all the paths to be checked
323
+ is_goal = false
324
+ while !queue.empty? && !is_goal # keeps looping until all possible paths have been checked
325
+ node, dist = queue.pop # pop the first path from the queue
326
+ if !explored.include?(node) # get the last node from the path
327
+ neighbours = @edges[node]
328
+ explored[node] = true # mark node as explored
329
+ next if neighbours.nil?
330
+ dist += 1
331
+ neighbours.each do |neighbour| # go through all neighbour nodes, construct a new path
332
+ next if explored.include?(neighbour)
333
+ queue.unshift([neighbour, dist]) # push it into the queue
334
+ previous[neighbour] = node if paths
335
+ if neighbour == goal # return path if neighbour is goal
336
+ is_goal = true
337
+ break
338
+ end
339
+ end
340
+ end
341
+ end
342
+ if is_goal
343
+ path = build_path(previous, start, goal) if paths
344
+ else
345
+ dist = nil
346
+ path = []
347
+ end
348
+ return dist, path
349
+ end
350
+
351
+ def build_path(previous, startNode, stopNode)
352
+ path = []
353
+ currentNode = stopNode
354
+ path << currentNode
355
+ while currentNode != startNode
356
+ currentNode = previous[currentNode]
357
+ path << currentNode
358
+ end
359
+ return path
360
+ end
361
+
362
+ def shortest_path(node_start, node_stop, paths=false)
363
+ #https://betterprogramming.pub/5-ways-to-find-the-shortest-path-in-a-graph-88cfefd0030f
364
+ #return bidirectionalSearch(node_start, node_stop)
365
+ #https://efficientcodeblog.wordpress.com/2017/12/13/bidirectional-search-two-end-bfs/
366
+ dist, all_paths = bfs_shortest_path(node_start, node_stop, paths)
367
+ return dist, all_paths
368
+ end
369
+
370
+ def expand_clusters(expand_method)
371
+ clusters = {}
372
+ @group_nodes.each do |id, nodes|
373
+ if expand_method == 'sht_path'
374
+ dist, paths = compute_avg_sht_path(nodes, paths=true) # this uses bfs, maybe Dijkstra is the best one
375
+ new_nodes = paths.flatten.uniq
376
+ clusters[id] = nodes | new_nodes # If some node pair are not connected, recover them
63
377
  end
64
378
  end
65
- roboWrite.puts "}"
66
- roboWrite.close
67
- cmd = "#{layout} -Tpng #{output_filename} -o #{output_filename}.png"
68
- system(cmd)
379
+ return clusters
380
+ end
381
+
382
+ def compute_node_com_assoc(com, ref_node)
383
+ ref_cons = 0
384
+ ref_secondary_cons = 0
385
+ secondary_nodes = {}
386
+ other_cons = 0
387
+ other_nodes = {}
388
+
389
+ refNneigh = @edges[ref_node]
390
+ com.each do |nodeID|
391
+ nodeIDneigh = @edges[nodeID]
392
+ next if nodeIDneigh.nil?
393
+ ref_cons += 1 if nodeIDneigh.include?(ref_node)
394
+ if !refNneigh.nil?
395
+ common_nodes = nodeIDneigh & refNneigh
396
+ common_nodes.each {|id| secondary_nodes[id] = true}
397
+ ref_secondary_cons += common_nodes.length
398
+ end
399
+ specific_nodes = nodeIDneigh - refNneigh - [ref_node]
400
+ specific_nodes.each {|id| other_nodes[id] = true}
401
+ other_cons += specific_nodes.length
402
+ end
403
+ by_edge = (ref_cons + ref_secondary_cons).fdiv(other_cons)
404
+ by_node = (ref_cons + secondary_nodes.length).fdiv(other_nodes.length)
405
+ return by_edge, by_node
69
406
  end
70
407
 
71
408
  def get_all_intersections
72
- intersection_lengths = []
73
- get_all_pairs do |node1, node2|
74
- intersection_lengths << intersection(node1, node2).length
409
+ intersection_lengths = get_all_pairs do |node1, node2|
410
+ intersection(node1, node2).length
75
411
  end
76
412
  return intersection_lengths
77
413
  end
78
414
 
79
415
  def get_all_pairs(args = {})
80
- default = {:meth => :all, :layers => :all}
416
+ all_pairs = []
417
+ default = {:layers => :all}
81
418
  args = default.merge(args)
82
- if args[:layers] == :all
83
- nodeIDs = @nodes.keys
419
+ nodeIDsA, nodeIDsB = collect_nodes(args)
420
+ if @compute_autorelations
421
+ if @compute_pairs == :all
422
+ while !nodeIDsA.empty?
423
+ node1 = nodeIDsA.shift
424
+ pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
425
+ yield(node1, node2)
426
+ end
427
+ all_pairs.concat(pairs)
428
+ end
429
+ elsif @compute_pairs == :conn # TODO: Review this case to avoid return nil values
430
+ while !nodeIDsA.empty?
431
+ node1 = nodeIDsA.shift
432
+ ids_connected_to_n1 = @edges[node1]
433
+ pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
434
+ result = nil
435
+ ids_connected_to_n2 = @edges[node2]
436
+ if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
437
+ result = yield(node1, node2)
438
+ end
439
+ result
440
+ end
441
+ pairs.compact!
442
+ all_pairs.concat(pairs)
443
+ end
444
+ end
84
445
  else
85
- nodeIDs = []
86
- args[:layers].each do |layer|
87
- nodeIDs.concat(@nodes.select{|id, node| node.type == layer}.keys)
446
+ #MAIN METHOD
447
+ if @compute_pairs == :conn
448
+ all_pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node1|
449
+ ids_connected_to_n1 = @edges[node1]
450
+ node1_pairs = []
451
+ nodeIDsB.each do |node2|
452
+ ids_connected_to_n2 = @edges[node2]
453
+ if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
454
+ node1_pairs << yield(node1, node2)
455
+ end
456
+ end
457
+ node1_pairs
458
+ end
459
+ all_pairs.flatten!(1)
460
+ elsif @compute_pairs == :all
461
+ raise 'Not implemented'
88
462
  end
89
463
  end
90
464
 
91
- if args[:meth] == :all
92
- while !nodeIDs.empty?
93
- node1 = nodeIDs.shift
94
- nodeIDs.each do |node2|
95
- yield(node1, node2)
465
+ return all_pairs
466
+ end
467
+
468
+ def collect_nodes(args)
469
+ nodeIDsA = nil
470
+ nodeIDsB = nil
471
+ if @compute_autorelations
472
+ if args[:layers] == :all
473
+ nodeIDsA = @nodes.keys
474
+ else
475
+ nodeIDsA = []
476
+ args[:layers].each do |layer|
477
+ nodeIDsA.concat(@nodes.select{|id, node| node.type == layer}.keys)
96
478
  end
97
479
  end
98
- #elsif args[:meth] == :conn
99
-
480
+ else
481
+ if args[:layers] != :all
482
+ nodeIDsA = @nodes.select{|id, node| node.type == args[:layers][0]}.keys
483
+ nodeIDsB = @nodes.select{|id, node| node.type == args[:layers][1]}.keys
484
+ end
100
485
  end
486
+ return nodeIDsA, nodeIDsB
101
487
  end
102
488
 
489
+
103
490
  def get_nodes_layer(layers)
104
491
  #for creating ny value in hypergeometric and pcc index
105
492
  nodes = []
@@ -123,17 +510,16 @@ class Network
123
510
  def generate_adjacency_matrix(layerA, layerB)
124
511
  layerAidNodes = @nodes.select{|id, node| node.type == layerA}.keys
125
512
  layerBidNodes = @nodes.select{|id, node| node.type == layerB}.keys
126
- adjacency_matrix = []
127
- layerAidNodes.each do |nodeA|
128
- layerBidNodes.each do |nodeB|
513
+ matrix = Numo::DFloat.zeros(layerAidNodes.length, layerBidNodes.length)
514
+ layerAidNodes.each_with_index do |nodeA, i|
515
+ layerBidNodes.each_with_index do |nodeB, j|
129
516
  if @edges[nodeB].include?(nodeA)
130
- adjacency_matrix << 1
517
+ matrix[i, j] = 1
131
518
  else
132
- adjacency_matrix << 0
519
+ matrix[i, j] = 0
133
520
  end
134
521
  end
135
522
  end
136
- matrix = NMatrix.new([layerAidNodes.length, layerBidNodes.length], adjacency_matrix)
137
523
  all_info_matrix = [matrix, layerAidNodes, layerBidNodes]
138
524
  @adjacency_matrices[[layerA, layerB]] = all_info_matrix
139
525
  return all_info_matrix
@@ -161,6 +547,14 @@ class Network
161
547
  relations = get_pcc_associations(layers, base_layer)
162
548
  elsif meth == :hypergeometric #all networks
163
549
  relations = get_hypergeometric_associations(layers, base_layer)
550
+ elsif meth == :hypergeometric_bf #all networks
551
+ relations = get_hypergeometric_associations(layers, base_layer, :bonferroni)
552
+ elsif meth == :hypergeometric_bh #all networks
553
+ relations = get_hypergeometric_associations(layers, base_layer, :benjamini_hochberg)
554
+ elsif meth == :hypergeometric_elim #tripartite networks?
555
+ relations = get_hypergeometric_associations_with_topology(layers, base_layer, :elim)
556
+ elsif meth == :hypergeometric_weight #tripartite networks?
557
+ relations = get_hypergeometric_associations_with_topology(layers, base_layer, :weight)
164
558
  elsif meth == :csi #all networks
165
559
  relations = get_csi_associations(layers, base_layer)
166
560
  elsif meth == :transference #tripartite networks
@@ -173,20 +567,19 @@ class Network
173
567
  #---------------------------------------------------------
174
568
  # Alaimo 2014, doi: 10.3389/fbioe.2014.00071
175
569
  def get_association_by_transference_resources(firstPairLayers, secondPairLayers, lambda_value1 = 0.5, lambda_value2 = 0.5)
570
+ relations = []
176
571
  matrix1 = @adjacency_matrices[firstPairLayers].first
177
572
  rowIds = @adjacency_matrices[firstPairLayers][1]
178
573
  matrix2 = @adjacency_matrices[secondPairLayers].first
179
574
  colIds = @adjacency_matrices[secondPairLayers][2]
180
- m1rowNumber = matrix1.rows
181
- m1colNumber = matrix1.cols
182
- m2rowNumber = matrix2.rows
183
- m2colNumber = matrix2.cols
575
+ m1rowNumber, m1colNumber = matrix1.shape
576
+ m2rowNumber, m2colNumber = matrix2.shape
184
577
  #puts m1rowNumber, m1colNumber, m2rowNumber, m2colNumber
185
578
  matrix1Weight = graphWeights(m1colNumber, m1rowNumber, matrix1.transpose, lambda_value1)
186
579
  matrix2Weight = graphWeights(m2colNumber, m2rowNumber, matrix2.transpose, lambda_value2)
187
- matrixWeightProduct = matrix1Weight.dot(matrix2.dot(matrix2Weight))
188
- finalMatrix = matrix1.dot(matrixWeightProduct)
189
- relations = nmatrix2relations(finalMatrix, rowIds, colIds)
580
+ matrixWeightProduct = Numo::Linalg.dot(matrix1Weight, Numo::Linalg.dot(matrix2, matrix2Weight))
581
+ finalMatrix = Numo::Linalg.dot(matrix1, matrixWeightProduct)
582
+ relations = matrix2relations(finalMatrix, rowIds, colIds)
190
583
  @association_values[:transference] = relations
191
584
  return relations
192
585
  end
@@ -195,15 +588,14 @@ class Network
195
588
  #---------------------------------------------------------
196
589
  # Bass 2013, doi:10.1038/nmeth.2728
197
590
  def get_associations(layers, base_layer) # BASE METHOD
198
- relations = []
199
- get_all_pairs(layers: layers) do |node1, node2|
591
+ associations = get_all_pairs(layers: layers) do |node1, node2|
200
592
  associatedIDs_node1 = @edges[node1].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
201
593
  associatedIDs_node2 = @edges[node2].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
202
594
  intersectedIDs = associatedIDs_node1 & associatedIDs_node2
203
595
  associationValue = yield(associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2)
204
- relations << [node1, node2, associationValue]
596
+ [node1, node2, associationValue]
205
597
  end
206
- return relations
598
+ return associations
207
599
  end
208
600
 
209
601
  def get_jaccard_association(layers, base_layer)
@@ -246,7 +638,8 @@ class Network
246
638
 
247
639
  def get_pcc_associations(layers, base_layer)
248
640
  #for Ny calcule use get_nodes_layer
249
- ny = get_nodes_layer([base_layer]).length
641
+ base_layer_nodes = get_nodes_layer([base_layer])
642
+ ny = base_layer_nodes.length
250
643
  relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
251
644
  intersProd = intersectedIDs.length * ny
252
645
  nodesProd = associatedIDs_node1.length * associatedIDs_node2.length
@@ -259,33 +652,95 @@ class Network
259
652
  return relations
260
653
  end
261
654
 
262
- def get_hypergeometric_associations(layers, base_layer)
655
+ def get_hypergeometric_associations(layers, base_layer, pvalue_adj_method= nil)
263
656
  ny = get_nodes_layer([base_layer]).length
657
+ fet = Rubystats::FishersExactTest.new
264
658
  relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
265
- minLength = [associatedIDs_node1.length, associatedIDs_node2.length].min
659
+ fisher = 0
266
660
  intersection_lengths = intersectedIDs.length
267
- sum = 0
268
661
  if intersection_lengths > 0
269
- nA = associatedIDs_node1.length
270
- nB = associatedIDs_node2.length
271
- #Using index from A layer proyected to B
272
- hyper_denom = binom(ny, nB)
273
- (intersection_lengths..minLength).each do |i|
274
- binom_product = binom(nA, i) * binom(ny - nA, nB - i)
275
- sum += binom_product.fdiv(hyper_denom)
276
- end
662
+ n1_items = associatedIDs_node1.length
663
+ n2_items = associatedIDs_node2.length
664
+ fisher = fet.calculate(
665
+ intersection_lengths,
666
+ n1_items - intersection_lengths,
667
+ n2_items - intersection_lengths,
668
+ ny - (n1_items + n2_items - intersection_lengths)
669
+ )
670
+ fisher = fisher[:right]
277
671
  end
278
- if sum == 0
279
- hypergeometricValue = 0
280
- else
281
- hypergeometricValue = -Math.log10(sum)
282
- end
283
- hypergeometricValue
672
+ fisher
284
673
  end
285
- @association_values[:hypergeometric] = relations
674
+ if pvalue_adj_method == :bonferroni
675
+ meth = :hypergeometric_bf
676
+ compute_adjusted_pvalue_bonferroni(relations)
677
+ elsif pvalue_adj_method == :benjamini_hochberg
678
+ meth = :hypergeometric_bh
679
+ compute_adjusted_pvalue_benjaminiHochberg(relations)
680
+ else
681
+ meth = :hypergeometric
682
+ compute_log_transformation(relations)
683
+ end
684
+ @association_values[meth] = relations
286
685
  return relations
287
686
  end
288
687
 
688
+ def get_hypergeometric_associations_with_topology(layers, base_layer, mode, thresold = 0.01)
689
+ relations = []
690
+ reference_layer = (layers - @layer_ontologies.keys).first
691
+ ontology_layer = (layers - [reference_layer]).first
692
+ ref_nodes = get_nodes_from_layer(reference_layer) # get nodes from NOT ontology layer
693
+ ontology = @layer_ontologies[ontology_layer]
694
+ base_layer_length = @nodes.values.count{|n| n.type == base_layer}
695
+ ref_nodes.each do |ref_node|
696
+ base_nodes = get_connected_nodes(ref_node, base_layer)
697
+ ontology_base_subgraph = get_bipartite_subgraph(base_nodes, base_layer, ontology_layer) # get shared nodes between nodes from NOT ontology layer and ONTOLOGY layer. Also get the conections between shared nodes and ontology nodes.
698
+ next if ontology_base_subgraph.empty?
699
+ ontology_base_subgraph.transform_keys!{|k| k.to_sym}
700
+ ontology.load_item_relations_to_terms(ontology_base_subgraph, remove_old_relations = true)
701
+ term_pvals = ontology.compute_relations_to_items(base_nodes, base_layer_length, mode, thresold)
702
+ relations.concat(term_pvals.map{|term| [ref_node, term[0], term[1]]})
703
+ end
704
+ compute_log_transformation(relations)
705
+ if mode == :elim
706
+ meth = :hypergeometric_elim
707
+ elsif mode == :weight
708
+ meth = :hypergeometric_weight
709
+ end
710
+ @association_values[meth] = relations
711
+ return relations
712
+ end
713
+
714
+ def compute_adjusted_pvalue(relations, log_val=true)
715
+ relations.each_with_index do |data, i| #p1, p2, pval
716
+ pval_adj = yield(data.last, i)
717
+ pval_adj = -Math.log10(pval_adj) if log_val && pval_adj > 0
718
+ data[2] = pval_adj
719
+ end
720
+ end
721
+
722
+ def compute_log_transformation(relations) #Only perform log transform whitout adjust pvalue. Called when adjusted method is not defined
723
+ compute_adjusted_pvalue(relations) do |pval, index|
724
+ pval
725
+ end
726
+ end
727
+
728
+ def compute_adjusted_pvalue_bonferroni(relations)
729
+ n_comparations = relations.length
730
+ compute_adjusted_pvalue(relations) do |pval, index|
731
+ adj = pval * n_comparations
732
+ adj = 1 if adj > 1
733
+ adj
734
+ end
735
+ end
736
+
737
+ def compute_adjusted_pvalue_benjaminiHochberg(relations)
738
+ adj_pvalues = get_benjaminiHochberg_pvalues(relations.map{|rel| rel.last})
739
+ compute_adjusted_pvalue(relations) do |pval, index|
740
+ adj_pvalues[index]
741
+ end
742
+ end
743
+
289
744
  def add_record(hash, node1, node2)
290
745
  query = hash[node1]
291
746
  if query.nil?
@@ -411,12 +866,108 @@ class Network
411
866
  return prec, rec
412
867
  end
413
868
 
869
+ ## KERNEL METHODS
870
+ #######################################################################################
871
+ def get_kernel(layer2kernel, kernel, normalization=false)
872
+ matrix, node_names = @adjacency_matrices[layer2kernel]
873
+ #I = identity matrix
874
+ #D = Diagonal matrix
875
+ #A = adjacency matrix
876
+ #L = laplacian matrix = D − A
877
+ matrix_result = nil
878
+ dimension_elements = matrix.shape.last
879
+ # In scuba code, the diagonal values of A is set to 0. In weighted matrix the kernel result is the same with or without this operation. Maybe increases the computing performance?
880
+ # In the md kernel this operation affects the values of the final kernel
881
+ #dimension_elements.times do |n|
882
+ # matrix[n,n] = 0.0
883
+ #end
884
+ if kernel == 'el' || kernel == 'ct' || kernel == 'rf' ||
885
+ kernel.include?('vn') || kernel.include?('rl') || kernel == 'me'
886
+ diagonal_matrix = matrix.sum(1).diag # get the total sum for each row, for this reason the sum method takes the 1 value. If sum colums is desired, use 0
887
+ # Make a matrix whose diagonal is row_sum
888
+ matrix_L = diagonal_matrix - matrix
889
+ if kernel == 'el' #Exponential Laplacian diffusion kernel(active). F Fouss 2012 | doi: 10.1016/j.neunet.2012.03.001
890
+ beta = 0.02
891
+ beta_product = matrix_L * -beta
892
+ #matrix_result = beta_product.expm
893
+ matrix_result = Numo::Linalg.expm(beta_product, 14)
894
+ elsif kernel == 'ct' # Commute time kernel (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
895
+ matrix_result = Numo::Linalg.pinv(matrix_L) # Anibal saids that this kernel was normalized. Why?. Paper do not seem to describe this operation for ct, it describes for Kvn or for all kernels, it is not clear.
896
+ elsif kernel == 'rf' # Random forest kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
897
+ matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L) #Krf = (I +L ) ^ −1
898
+ elsif kernel.include?('vn') # von Neumann diffusion kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
899
+ alpha = kernel.gsub('vn', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
900
+ matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) - matrix * alpha ) # (I -alphaA ) ^ −1
901
+ elsif kernel.include?('rl') # Regularized Laplacian kernel matrix (active)
902
+ alpha = kernel.gsub('rl', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
903
+ matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L * alpha ) # (I + alphaL ) ^ −1
904
+ elsif kernel == 'me' # Markov exponential diffusion kernel (active). G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
905
+ beta=0.04
906
+ #(beta/N)*(N*I - D + A)
907
+ id_mat = Numo::DFloat.eye(dimension_elements)
908
+ m_matrix = (id_mat * dimension_elements - diagonal_matrix + matrix ) * (beta/dimension_elements)
909
+ #matrix_result = m_matrix.expm
910
+ matrix_result = Numo::Linalg.expm(m_matrix, 16)
911
+ end
912
+ elsif kernel == 'ka' # Kernelized adjacency matrix (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
913
+ lambda_value = matrix.min_eigenvalue
914
+ matrix_result = matrix + Numo::DFloat.eye(dimension_elements) * lambda_value.abs # Ka = A + lambda*I # lambda = the absolute value of the smallest eigenvalue of A
915
+ elsif kernel.include?('md') # Markov diffusion kernel matrix. G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
916
+ t = kernel.gsub('md', '').to_i
917
+ #TODO: check implementation with Numo::array
918
+ col_sum = matrix.sum(1)
919
+ p_mat = matrix.div_by_vector(col_sum)
920
+ p_temp_mat = p_mat.clone
921
+ zt_mat = p_mat.clone
922
+ (t-1).times do
923
+ p_temp_mat = p_temp_mat.dot(p_mat)
924
+ zt_mat = zt_mat + p_temp_mat
925
+ end
926
+ zt_mat = zt_mat * (1.0/t)
927
+ matrix_result = zt_mat.dot(zt_mat.transpose)
928
+ else
929
+ matrix_result = matrix
930
+ warn('Warning: The kernel method was not specified or not exists. The adjacency matrix will be given as result')
931
+ # This allows process a previous kernel and perform the normalization in a separated step.
932
+ end
933
+ matrix_result = matrix_result.cosine_normalization if normalization #TODO: check implementation with Numo::array
934
+ @kernels[layer2kernel] = matrix_result
935
+ end
936
+
937
+ def write_kernel(layer2kernel, output_file)
938
+ @kernels[layer2kernel].save(output_file)
939
+ end
940
+
941
+ def link_ontology(ontology_file_path, layer_name)
942
+ if !@loaded_obos.include?(ontology_file_path) #Load new ontology
943
+ ontology = Ontology.new(file: ontology_file_path, load_file: true)
944
+ @loaded_obos << ontology_file_path
945
+ @ontologies << ontology
946
+ else #Link loaded ontology to current layer
947
+ ontology = @ontologies[@loaded_obos.index(ontology_file_path)]
948
+ end
949
+ @layer_ontologies[layer_name] = ontology
950
+ end
414
951
 
415
952
 
416
953
  ## AUXILIAR METHODS
417
954
  #######################################################################################
418
955
  private
419
956
 
957
+ def load_input_list(file)
958
+ return File.open(file).readlines.map!{|line| line.chomp}
959
+ end
960
+
961
+ def exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
962
+ res = false
963
+ if !ids_connected_to_n1.nil? &&
964
+ !ids_connected_to_n2.nil? &&
965
+ !(ids_connected_to_n1 & ids_connected_to_n2).empty? # check that at least exists one node that connect to n1 and n2
966
+ res = true
967
+ end
968
+ return res
969
+ end
970
+
420
971
  def set_layer(layer_definitions, node_name)
421
972
  layer = nil
422
973
  if layer_definitions.length > 1
@@ -426,9 +977,11 @@ class Network
426
977
  break
427
978
  end
428
979
  end
980
+ raise("The node '#{node_name}' not match with any layer regex") if layer.nil?
429
981
  else
430
982
  layer = layer_definitions.first.first
431
983
  end
984
+ @layers << layer if !@layers.include?(layer)
432
985
  return layer
433
986
  end
434
987
 
@@ -453,35 +1006,46 @@ class Network
453
1006
  end
454
1007
 
455
1008
  def graphWeights (rowsNumber, colsNumber, inputMatrix, lambdaValue = 0.5)
456
- invMatrix = inputMatrix.sum(0).map{|e| 1.0/ e}
457
- diagonalColSums = NMatrix.diag(invMatrix)
458
- rowsSums = inputMatrix.sum(1).to_flat_a
459
- ky = NMatrix.new([rowsNumber, rowsNumber], rowsSums).map{|e| e ** lambdaValue }
460
- invertLambdaVal = (1 - lambdaValue)
461
- kx = NMatrix.new([rowsNumber, rowsNumber], rowsSums).transpose.map{|e| e ** invertLambdaVal }
462
- nx = (ky * kx).map{|e| 1.0/ e}
463
- weigth = (inputMatrix.dot(diagonalColSums)).transpose
464
- weigth = inputMatrix.dot(weigth)
465
- weigth = nx * weigth
1009
+ ky = (1.0 / inputMatrix.sum(0)).diag #sum cols
1010
+ weigth = Numo::Linalg.dot(inputMatrix, ky).transpose
1011
+ ky = nil #free memory
1012
+ weigth = Numo::Linalg.dot(inputMatrix, weigth)
1013
+
1014
+ kx = inputMatrix.sum(1) #sum rows
1015
+
1016
+ kx_lamb = kx ** lambdaValue
1017
+ kx_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
1018
+ rowsNumber.times do |j|
1019
+ rowsNumber.times do |i|
1020
+ kx_lamb_mat[j,i] = kx_lamb[i]
1021
+ end
1022
+ end
1023
+ kx_lamb = nil #free memory
1024
+
1025
+ kx_inv_lamb = kx ** (1 - lambdaValue)
1026
+ kx_inv_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
1027
+ rowsNumber.times do |j|
1028
+ rowsNumber.times do |i|
1029
+ kx_inv_lamb_mat[i, j] = kx_inv_lamb[i]
1030
+ end
1031
+ end
1032
+ kx_inv_lamb = nil #free memory
1033
+
1034
+ nx = 1.0/(kx_lamb_mat.inplace * kx_inv_lamb_mat).inplace # inplace marks a matrix to be used by reference, not for value
1035
+ kx_lamb_mat = nil #free memory
1036
+ kx_inv_lamb_mat = nil #free memory
1037
+ weigth.inplace * nx
466
1038
  return weigth
467
1039
  end
468
1040
 
469
- def nmatrix2relations(finalMatrix, rowIds, colIds)
1041
+ def matrix2relations(finalMatrix, rowIds, colIds)
470
1042
  relations = []
471
1043
  rowIds.each_with_index do |rowId, rowPos|
472
1044
  colIds.each_with_index do |colId, colPos|
473
1045
  associationValue = finalMatrix[rowPos, colPos]
474
- relations << [rowId, colId, associationValue]
1046
+ relations << [rowId, colId, associationValue] if associationValue > 0
475
1047
  end
476
1048
  end
477
1049
  return relations
478
1050
  end
479
-
480
- def binom(n,k)
481
- if k > 0 && k < n
482
- res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
483
- else
484
- res = 1
485
- end
486
- end
487
1051
  end