NetAnalyzer 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,34 +1,26 @@
1
1
  require 'rubystats'
2
- require 'gv'
3
2
  #require 'nmatrix'
4
3
  #require 'nmatrix/lapacke'
5
4
  require 'numo/narray'
6
5
  require 'numo/linalg'
6
+ require 'npy'
7
7
  require 'parallel'
8
8
 
9
9
  #require 'pp'
10
- require 'bigdecimal'
11
10
  require 'benchmark'
12
11
  #require 'nmatrix_expansion'
13
12
 
14
-
15
- #For javascrip plotting
16
- require 'erb'
17
- require 'base64'
18
- require 'json'
19
- require 'zlib'
20
-
21
13
  require 'semtools'
22
14
  require 'expcalc'
23
- TEMPLATES = File.join(File.dirname(__FILE__), 'templates')
24
15
 
25
- class Network
26
16
 
27
- attr_accessor :association_values, :control_connections, :kernels, :reference_nodes, :group_nodes, :threads
17
+ class Network
18
+
19
+ attr_accessor :adjacency_matrices, :association_values, :control_connections, :kernels, :reference_nodes, :group_nodes, :threads, :nodes, :edges, :compute_pairs, :compute_autorelations
28
20
 
29
21
  ## BASIC METHODS
30
22
  ############################################################
31
- def initialize(layers)
23
+ def initialize(layers) # DONE
32
24
  @threads = 0
33
25
  @nodes = {}
34
26
  @edges = {}
@@ -46,21 +38,57 @@ class Network
46
38
  @layer_ontologies = {}
47
39
  end
48
40
 
49
- def set_compute_pairs(use_pairs, get_autorelations)
41
+ def clone # DONE
42
+ network_clone = Network.new(@layers.clone)
43
+ network_clone.threads = @threads.clone
44
+ network_clone.nodes = @nodes.clone
45
+ network_clone.edges = @edges.clone
46
+ network_clone.reference_nodes = @reference_nodes.clone
47
+ network_clone.group_nodes = @group_nodes.clone
48
+ network_clone.adjacency_matrices = @adjacency_matrices.clone
49
+ network_clone.kernels = @kernels.clone
50
+ network_clone.association_values = @association_values.clone
51
+ network_clone.control_connections = @control_connections.clone
52
+ network_clone.set_compute_pairs(@compute_pairs.clone, @compute_autorelations.clone)
53
+ #network_clone.loaded_obos = @loaded_obos.clone
54
+ #network_clone.ontologies = @ontologies.clone
55
+ #network_clone.layer_ontologies = @layer_ontologies.clone
56
+
57
+ return network_clone
58
+ end
59
+
60
+ def ==(other) # DONE
61
+ are_equal = true
62
+ if self.threads != other.threads ||
63
+ self.nodes != other.nodes ||
64
+ self.edges != other.edges ||
65
+ self.reference_nodes != other.reference_nodes ||
66
+ self.group_nodes != other.group_nodes ||
67
+ self.adjacency_matrices != other.adjacency_matrices ||
68
+ self.association_values != other.association_values ||
69
+ self.control_connections != other.control_connections ||
70
+ self.compute_pairs != other.compute_pairs ||
71
+ self.compute_autorelations != other.compute_autorelations
72
+ are_equal = false
73
+ end
74
+ return are_equal
75
+ end
76
+
77
+ def set_compute_pairs(use_pairs, get_autorelations) #DONE
50
78
  @compute_pairs = use_pairs
51
79
  @compute_autorelations = get_autorelations
52
80
  end
53
81
 
54
- def add_node(nodeID, nodeType = 0)
82
+ def add_node(nodeID, nodeType = 0) # DONE
55
83
  @nodes[nodeID] = Node.new(nodeID, nodeType)
56
84
  end
57
85
 
58
- def add_edge(nodeID1, nodeID2)
59
- query_edge(nodeID1, nodeID2)
60
- query_edge(nodeID2, nodeID1)
86
+ def add_edge(nodeID1, nodeID2) # DONE
87
+ add_edge2hash(nodeID1, nodeID2)
88
+ add_edge2hash(nodeID2, nodeID1)
61
89
  end
62
90
 
63
- def query_edge(nodeA, nodeB)
91
+ def add_edge2hash(nodeA, nodeB) # NOT
64
92
  query = @edges[nodeA]
65
93
  if query.nil?
66
94
  @edges[nodeA] = [nodeB]
@@ -69,7 +97,48 @@ class Network
69
97
  end
70
98
  end
71
99
 
72
- def delete_nodes(node_list, mode='d')
100
+ def set_layer(layer_definitions, node_name) # DONE
101
+ layer = nil
102
+ if layer_definitions.length > 1
103
+ layer_definitions.each do |layer_name, regexp|
104
+ if node_name =~ regexp
105
+ layer = layer_name
106
+ break
107
+ end
108
+ end
109
+ raise("The node '#{node_name}' not match with any layer regex") if layer.nil?
110
+ else
111
+ layer = layer_definitions.first.first
112
+ end
113
+ @layers << layer if !@layers.include?(layer)
114
+ return layer
115
+ end
116
+
117
+ def generate_adjacency_matrix(layerA, layerB) # DONE
118
+ layerAidNodes = @nodes.select{|id, node| node.type == layerA}.keys
119
+ layerBidNodes = @nodes.select{|id, node| node.type == layerB}.keys
120
+ matrix = Numo::DFloat.zeros(layerAidNodes.length, layerBidNodes.length)
121
+ layerAidNodes.each_with_index do |nodeA, i|
122
+ layerBidNodes.each_with_index do |nodeB, j|
123
+ if @edges[nodeB].include?(nodeA)
124
+ matrix[i, j] = 1
125
+ else
126
+ matrix[i, j] = 0
127
+ end
128
+ end
129
+ end
130
+ all_info_matrix = [matrix, layerAidNodes, layerBidNodes]
131
+
132
+ if layerA == layerB
133
+ @adjacency_matrices[[layerA]] = all_info_matrix
134
+ else
135
+ @adjacency_matrices[[layerA, layerB]] = all_info_matrix
136
+ end
137
+ return all_info_matrix
138
+ end
139
+
140
+
141
+ def delete_nodes(node_list, mode='d') #DONE
73
142
  if mode == 'd'
74
143
  @nodes.reject!{|n| node_list.include?(n)}
75
144
  @edges.reject!{|n, connections| node_list.include?(n)}
@@ -86,21 +155,17 @@ class Network
86
155
  @edges.reject!{|n, connections| connections.empty?}
87
156
  end
88
157
 
89
- def get_connected_nodes(node_id, from_layer)
158
+ def get_connected_nodes(node_id, from_layer) # DONE
90
159
  return @edges[node_id].map{|id| @nodes[id]}.select{|node| node.type == from_layer}.map{|node| node.id}
91
160
  end
92
161
 
93
- def get_nodes_from_layer(from_layer)
94
- return @nodes.values.select{|node| node.type == from_layer}.map{|node| node.id}
95
- end
96
-
97
- def get_bipartite_subgraph(from_layer_node_ids, from_layer, to_layer)
162
+ def get_bipartite_subgraph(from_layer_node_ids, from_layer, to_layer) # DONE
98
163
  bipartite_subgraph = {}
99
164
  from_layer_node_ids.each do |from_layer_node_id|
100
165
  connected_nodes = @edges[from_layer_node_id]
101
- connected_nodes.each do |connected_node|
166
+ connected_nodes.each do |connected_node|
102
167
  if @nodes[connected_node].type == to_layer
103
- query = bipartite_subgraph[connected_node]
168
+ query = bipartite_subgraph[connected_node]
104
169
  if query.nil?
105
170
  bipartite_subgraph[connected_node] = get_connected_nodes(connected_node, from_layer)
106
171
  end
@@ -110,39 +175,18 @@ class Network
110
175
  return bipartite_subgraph
111
176
  end
112
177
 
113
- def load_network_by_pairs(file, layers, split_character="\t")
114
- File.open(file).each do |line|
115
- line.chomp!
116
- pair = line.split(split_character)
117
- node1 = pair[0]
118
- node2 = pair[1]
119
- add_node(node1, set_layer(layers, node1))
120
- add_node(node2, set_layer(layers, node2))
121
- add_edge(node1, node2)
122
- end
123
- end
124
-
125
- def load_network_by_bin_matrix(input_file, node_file, layers)
126
- node_names = load_input_list(node_file)
127
- @adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='npy'), node_names, node_names]
128
- end
129
-
130
- def load_network_by_plain_matrix(input_file, node_file, layers, splitChar)
131
- node_names = load_input_list(node_file)
132
- @adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='txt', splitChar=splitChar), node_names, node_names]
133
- end
134
178
 
135
- def get_edge_number
136
- node_connections = get_degree.values.inject(0){|sum, n| sum + n}
179
+ def get_edge_number # DONE
180
+ node_connections = get_degree(zscore = false).values.inject(0){|sum, n| sum + n}
137
181
  return node_connections/2
138
182
  end
139
183
 
140
- def get_degree(zscore=false)
184
+ def get_degree(zscore=true) # DONE
141
185
  degree = {}
142
186
  @edges.each do |id, nodes|
143
187
  degree[id] = nodes.length
144
188
  end
145
- if !zscore
189
+ if zscore
146
190
  degree_values = degree.values
147
191
  mean_degree = degree_values.mean
148
192
  std_degree = degree_values.standard_deviation
@@ -151,154 +195,114 @@ class Network
151
195
  return degree
152
196
  end
153
197
 
154
- def get_node_attributes(attr_names)
155
- attrs = []
156
- attr_names.each do |attr_name|
157
- if attr_name == 'get_degree'
158
- attrs << get_degree
159
- elsif attr_name == 'get_degreeZ'
160
- attrs << get_degree(zscore=true)
161
- end
162
- end
163
- node_ids = attrs.first.keys
164
- node_attrs = []
165
- node_ids.each do |n|
166
- node_attrs << [n].concat(attrs.map{|at| at[n]})
198
+ def get_all_intersections(args = {}) # DONE
199
+ intersection_lengths = get_all_pairs(args) do |node1, node2|
200
+ intersection(node1, node2).length
167
201
  end
168
- return node_attrs
202
+ return intersection_lengths
169
203
  end
170
204
 
171
- def plot_network(options = {})
172
- if options[:method] == 'graphviz'
173
- plot_dot(options)
174
- else
175
- if options[:method] == 'elgrapho'
176
- template = 'el_grapho'
177
- elsif options[:method] == 'cytoscape'
178
- template = 'cytoscape'
179
- elsif options[:method] == 'sigma'
180
- template = 'sigma'
181
- end
182
- renderered_template = ERB.new(File.open(File.join(TEMPLATES, template + '.erb')).read).result(binding)
183
- File.open(options[:output_file] + '.html', 'w'){|f| f.puts renderered_template}
184
- end
185
- end
186
-
187
- def plot_dot(user_options = {}) # input keys: layout
188
- options = {layout: "sfdp"}
189
- options = options.merge(user_options)
190
- graphviz_colors = %w[lightsteelblue1 lightyellow1 lightgray orchid2]
191
- palette = {}
192
- @layers.each do |layer|
193
- palette[layer] = graphviz_colors.shift
194
- end
195
- graph = GV::Graph.open('g', type = :undirected)
196
- plotted_edges = {}
197
- @edges.each do |nodeID, associatedIDs|
198
- associatedIDs.each do |associatedID|
199
- pair = [nodeID, associatedID].sort.join('_').to_sym
200
- if !plotted_edges[pair]
201
- graph.edge 'e',
202
- graph.node(nodeID, label: '', style: 'filled', fillcolor: palette[@nodes[nodeID].type]),
203
- graph.node(associatedID, label: '', style: 'filled' , fillcolor: palette[@nodes[associatedID].type])
204
- plotted_edges[pair] = true
205
+ def get_all_pairs(args = {}) # DONE
206
+ all_pairs = [] #lo que se devolvera
207
+ default = {:layers => :all}
208
+ args = default.merge(args)
209
+ nodeIDsA, nodeIDsB = collect_nodes(args)
210
+ if @compute_autorelations
211
+ if @compute_pairs == :all
212
+ while !nodeIDsA.empty?
213
+ node1 = nodeIDsA.shift
214
+ pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
215
+ yield(node1, node2)
216
+ end
217
+ all_pairs.concat(pairs)
218
+ end
219
+ elsif @compute_pairs == :conn # TODO: Review this case to avoid return nil values
220
+ while !nodeIDsA.empty?
221
+ node1 = nodeIDsA.shift
222
+ ids_connected_to_n1 = @edges[node1]
223
+ pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
224
+ result = nil
225
+ ids_connected_to_n2 = @edges[node2]
226
+ if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
227
+ result = yield(node1, node2)
228
+ end
229
+ result
230
+ end
231
+ pairs.compact!
232
+ all_pairs.concat(pairs)
205
233
  end
206
234
  end
207
- end
208
- @reference_nodes.each do |nodeID|
209
- graph.node(nodeID, style: 'filled', fillcolor: 'firebrick1', label: '')
210
- end
211
- graphviz_border_colors = %w[blue darkorange red olivedrab4]
212
- @group_nodes.each do |groupID, gNodes|
213
- border_color = graphviz_border_colors.shift
214
- gNodes.each do |nodeID|
215
- graph.node(nodeID, color: border_color, penwidth: '10', label: '')
216
- end
217
- end
218
- graph[:overlap] = false
219
- STDERR.puts 'Save graph'
220
- graph.save(options[:output_file] + '.png', format='png', layout=options[:layout])
221
- end
222
-
223
- def compute_group_metrics(output_filename)
224
- metrics = []
225
- header = ['group']
226
- @group_nodes.keys.each do |k|
227
- metrics << [k]
228
- end
229
- header << 'comparative_degree'
230
- comparative_degree = communities_comparative_degree(@group_nodes)
231
- comparative_degree.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
232
- header << 'avg_sht_path'
233
- avg_sht_path = communities_avg_sht_path(@group_nodes)
234
- avg_sht_path.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
235
- if !@reference_nodes.empty?
236
- header.concat(%w[node_com_assoc_by_edge node_com_assoc_by_node])
237
- node_com_assoc = compute_node_com_assoc_in_precomputed_communities(@group_nodes, @reference_nodes.first)
238
- node_com_assoc.each_with_index{|val,i| metrics[i].concat(val)}
239
- end
240
- File.open(output_filename, 'w') do |f|
241
- f.puts header.join("\t")
242
- metrics.each do |gr|
243
- f. puts gr.join("\t")
235
+ else
236
+ #MAIN METHOD
237
+ if @compute_pairs == :conn
238
+ all_pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node1|
239
+ ids_connected_to_n1 = @edges[node1]
240
+ node1_pairs = []
241
+ nodeIDsB.each do |node2|
242
+ ids_connected_to_n2 = @edges[node2]
243
+ if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
244
+ node1_pairs << yield(node1, node2)
245
+ end
246
+ end
247
+ node1_pairs
248
+ end
249
+ all_pairs.flatten!(1)
250
+ elsif @compute_pairs == :all
251
+ raise 'Not implemented'
244
252
  end
245
253
  end
246
- end
247
-
248
- def replace_nil_vals(val)
249
- return val.nil? ? 'NULL' : val
250
- end
251
254
 
252
- def communities_comparative_degree(coms)
253
- comparative_degrees = []
254
- coms.each do |com_id, com|
255
- comparative_degrees << compute_comparative_degree(com)
256
- end
257
- return comparative_degrees
255
+ return all_pairs
258
256
  end
259
257
 
260
- def communities_avg_sht_path(coms)
261
- avg_sht_path = []
262
- coms.each do |com_id, com|
263
- dist, paths = compute_avg_sht_path(com)
264
- avg_sht_path << dist
258
+ def collect_nodes(args) # DONE
259
+ nodeIDsA = nil
260
+ nodeIDsB = nil
261
+ if @compute_autorelations
262
+ if args[:layers] == :all
263
+ nodeIDsA = @nodes.keys
264
+ else
265
+ nodeIDsA = []
266
+ args[:layers].each do |layer|
267
+ nodeIDsA.concat(@nodes.select{|id, node| node.type == layer}.keys)
268
+ end
269
+ end
270
+ else
271
+ if args[:layers] != :all
272
+ nodeIDsA = @nodes.select{|id, node| node.type == args[:layers][0]}.keys
273
+ nodeIDsB = @nodes.select{|id, node| node.type == args[:layers][1]}.keys
274
+ end
265
275
  end
266
- return avg_sht_path
276
+ return nodeIDsA, nodeIDsB
267
277
  end
268
278
 
269
- def compute_node_com_assoc_in_precomputed_communities(coms, ref_node)
270
- node_com_assoc = []
271
- coms.each do |com_id, com|
272
- node_com_assoc << [compute_node_com_assoc(com, ref_node)]
279
+ def get_nodes_layer(layers) # DONE
280
+ #for creating ny value in hypergeometric and pcc index
281
+ nodes = []
282
+ layers.each do |layer|
283
+ nodes.concat(@nodes.select{|nodeId, node| node.type == layer}.values)
273
284
  end
274
- return node_com_assoc
285
+ return nodes
275
286
  end
276
287
 
277
- def compute_comparative_degree(com) # see Girvan-Newman Benchmark control parameter in http://networksciencebook.com/chapter/9#testing (communities chapter)
278
- internal_degree = 0
279
- external_degree = 0
280
- com.each do |nodeID|
281
- nodeIDneigh = @edges[nodeID]
282
- next if nodeIDneigh.nil?
283
- internal_degree += (nodeIDneigh & com).length
284
- external_degree += (nodeIDneigh - com).length
288
+ def intersection(node1, node2) # DONE
289
+ shared_nodes = []
290
+ intersectedIDs = @edges[node1] & @edges[node2]
291
+ intersectedIDs.each do |id|
292
+ shared_nodes << @nodes[id]
285
293
  end
286
- comparative_degree = external_degree.fdiv(external_degree + internal_degree)
287
- return comparative_degree
294
+ return shared_nodes
288
295
  end
289
296
 
290
- def compute_avg_sht_path(com, paths=false)
297
+ def compute_avg_sht_path(com, paths=false) # DONE
291
298
  path_lengths = []
292
299
  all_paths = []
293
300
  group = com.dup
294
301
  while !group.empty?
295
302
  node_start = group.shift
296
303
  sht_paths = Parallel.map(group, in_processes: @threads) do |node_stop|
297
- #group.each do |node_stop|
298
304
  dist, path = shortest_path(node_start, node_stop, paths)
299
305
  [dist, path]
300
- #path_lengths << dist if !dist.nil?
301
- #all_paths << path if !path.empty?
302
306
  end
303
307
  sht_paths.each do |dist, path|
304
308
  path_lengths << dist
@@ -315,7 +319,7 @@ class Network
315
319
 
316
320
  # https://pythoninwonderland.wordpress.com/2017/03/18/how-to-implement-breadth-first-search-in-python/
317
321
  # finds shortest path between 2 nodes of a graph using BFS
318
- def bfs_shortest_path(start, goal, paths=false)
322
+ def bfs_shortest_path(start, goal, paths=false) # NOT
319
323
  dist = nil
320
324
  explored = {} # keep track of explored nodes
321
325
  previous = {}
@@ -348,7 +352,7 @@ class Network
348
352
  return dist, path
349
353
  end
350
354
 
351
- def build_path(previous, startNode, stopNode)
355
+ def build_path(previous, startNode, stopNode) # NOT
352
356
  path = []
353
357
  currentNode = stopNode
354
358
  path << currentNode
@@ -359,7 +363,7 @@ class Network
359
363
  return path
360
364
  end
361
365
 
362
- def shortest_path(node_start, node_stop, paths=false)
366
+ def shortest_path(node_start, node_stop, paths=false) # DONE
363
367
  #https://betterprogramming.pub/5-ways-to-find-the-shortest-path-in-a-graph-88cfefd0030f
364
368
  #return bidirectionalSearch(node_start, node_stop)
365
369
  #https://efficientcodeblog.wordpress.com/2017/12/13/bidirectional-search-two-end-bfs/
@@ -367,19 +371,100 @@ class Network
367
371
  return dist, all_paths
368
372
  end
369
373
 
370
- def expand_clusters(expand_method)
371
- clusters = {}
372
- @group_nodes.each do |id, nodes|
373
- if expand_method == 'sht_path'
374
- dist, paths = compute_avg_sht_path(nodes, paths=true) # this uses bfs, maybe Dijkstra is the best one
375
- new_nodes = paths.flatten.uniq
376
- clusters[id] = nodes | new_nodes # If some node pair are not connected, recover them
374
+ def get_node_attributes(attr_names)
375
+ attrs = []
376
+ attr_names.each do |attr_name|
377
+ if attr_name == 'get_degree'
378
+ attrs << get_degree(zscore=false)
379
+ elsif attr_name == 'get_degreeZ'
380
+ attrs << get_degree
381
+ end
382
+ end
383
+ node_ids = attrs.first.keys
384
+ node_attrs = []
385
+ node_ids.each do |n|
386
+ node_attrs << [n].concat(attrs.map{|at| at[n]})
387
+ end
388
+ return node_attrs
389
+ end
390
+
391
+ def plot_network(options = {})
392
+ net_data = {
393
+ group_nodes: @group_nodes,
394
+ reference_nodes: @reference_nodes,
395
+ nodes: @nodes,
396
+ edges: @edges,
397
+ layers: @layers
398
+ }
399
+ Net_plotter.new(net_data, options)
400
+ end
401
+
402
+ # Compute communities/group properties
403
+ #----------------------------------------------
404
+ def compute_group_metrics(output_filename) # DONE
405
+ metrics = []
406
+ header = ['group']
407
+ @group_nodes.keys.each do |k|
408
+ metrics << [k]
409
+ end
410
+ header << 'comparative_degree'
411
+ comparative_degree = communities_comparative_degree(@group_nodes)
412
+ comparative_degree.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
413
+ header << 'avg_sht_path'
414
+ avg_sht_path = communities_avg_sht_path(@group_nodes)
415
+ avg_sht_path.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
416
+ if !@reference_nodes.empty?
417
+ header.concat(%w[node_com_assoc_by_edge node_com_assoc_by_node])
418
+ node_com_assoc = compute_node_com_assoc_in_precomputed_communities(@group_nodes, @reference_nodes.first)
419
+ node_com_assoc.each_with_index{|val,i| metrics[i].concat(val)}
420
+ end
421
+ File.open(output_filename, 'w') do |f|
422
+ f.puts header.join("\t")
423
+ metrics.each do |gr|
424
+ f. puts gr.join("\t")
377
425
  end
378
426
  end
379
- return clusters
380
427
  end
381
428
 
382
- def compute_node_com_assoc(com, ref_node)
429
+ def communities_comparative_degree(coms) # DONE
430
+ comparative_degrees = []
431
+ coms.each do |com_id, com|
432
+ comparative_degrees << compute_comparative_degree(com)
433
+ end
434
+ return comparative_degrees
435
+ end
436
+
437
+ def communities_avg_sht_path(coms) # DONE
438
+ avg_sht_path = []
439
+ coms.each do |com_id, com|
440
+ dist, paths = compute_avg_sht_path(com)
441
+ avg_sht_path << dist
442
+ end
443
+ return avg_sht_path
444
+ end
445
+
446
+ def compute_node_com_assoc_in_precomputed_communities(coms, ref_node) # DONE
447
+ node_com_assoc = []
448
+ coms.each do |com_id, com|
449
+ node_com_assoc << [compute_node_com_assoc(com, ref_node)]
450
+ end
451
+ return node_com_assoc
452
+ end
453
+
454
+ def compute_comparative_degree(com) # DONE # see Girvan-Newman Benchmark control parameter in http://networksciencebook.com/chapter/9#testing (communities chapter)
455
+ internal_degree = 0
456
+ external_degree = 0
457
+ com.each do |nodeID|
458
+ nodeIDneigh = @edges[nodeID]
459
+ next if nodeIDneigh.nil?
460
+ internal_degree += (nodeIDneigh & com).length
461
+ external_degree += (nodeIDneigh - com).length
462
+ end
463
+ comparative_degree = external_degree.fdiv(external_degree + internal_degree)
464
+ return comparative_degree
465
+ end
466
+
467
+ def compute_node_com_assoc(com, ref_node) # DONE
383
468
  ref_cons = 0
384
469
  ref_secondary_cons = 0
385
470
  secondary_nodes = {}
@@ -405,137 +490,32 @@ class Network
405
490
  return by_edge, by_node
406
491
  end
407
492
 
408
- def get_all_intersections
409
- intersection_lengths = get_all_pairs do |node1, node2|
410
- intersection(node1, node2).length
411
- end
412
- return intersection_lengths
413
- end
414
-
415
- def get_all_pairs(args = {})
416
- all_pairs = []
417
- default = {:layers => :all}
418
- args = default.merge(args)
419
- nodeIDsA, nodeIDsB = collect_nodes(args)
420
- if @compute_autorelations
421
- if @compute_pairs == :all
422
- while !nodeIDsA.empty?
423
- node1 = nodeIDsA.shift
424
- pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
425
- yield(node1, node2)
426
- end
427
- all_pairs.concat(pairs)
428
- end
429
- elsif @compute_pairs == :conn # TODO: Review this case to avoid return nil values
430
- while !nodeIDsA.empty?
431
- node1 = nodeIDsA.shift
432
- ids_connected_to_n1 = @edges[node1]
433
- pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
434
- result = nil
435
- ids_connected_to_n2 = @edges[node2]
436
- if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
437
- result = yield(node1, node2)
438
- end
439
- result
440
- end
441
- pairs.compact!
442
- all_pairs.concat(pairs)
443
- end
444
- end
445
- else
446
- #MAIN METHOD
447
- if @compute_pairs == :conn
448
- all_pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node1|
449
- ids_connected_to_n1 = @edges[node1]
450
- node1_pairs = []
451
- nodeIDsB.each do |node2|
452
- ids_connected_to_n2 = @edges[node2]
453
- if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
454
- node1_pairs << yield(node1, node2)
455
- end
456
- end
457
- node1_pairs
458
- end
459
- all_pairs.flatten!(1)
460
- elsif @compute_pairs == :all
461
- raise 'Not implemented'
462
- end
463
- end
464
-
465
- return all_pairs
466
- end
467
-
468
- def collect_nodes(args)
469
- nodeIDsA = nil
470
- nodeIDsB = nil
471
- if @compute_autorelations
472
- if args[:layers] == :all
473
- nodeIDsA = @nodes.keys
474
- else
475
- nodeIDsA = []
476
- args[:layers].each do |layer|
477
- nodeIDsA.concat(@nodes.select{|id, node| node.type == layer}.keys)
478
- end
479
- end
480
- else
481
- if args[:layers] != :all
482
- nodeIDsA = @nodes.select{|id, node| node.type == args[:layers][0]}.keys
483
- nodeIDsB = @nodes.select{|id, node| node.type == args[:layers][1]}.keys
493
+ def expand_clusters(expand_method) # DONE
494
+ clusters = {}
495
+ @group_nodes.each do |id, nodes|
496
+ if expand_method == 'sht_path'
497
+ dist, paths = compute_avg_sht_path(nodes, paths=true) # this uses bfs, maybe Dijkstra is the best one
498
+ new_nodes = paths.flatten.uniq
499
+ clusters[id] = nodes | new_nodes # If some node pair are not connected, recover them
484
500
  end
485
501
  end
486
- return nodeIDsA, nodeIDsB
502
+ return clusters
487
503
  end
488
504
 
489
505
 
490
- def get_nodes_layer(layers)
491
- #for creating ny value in hypergeometric and pcc index
492
- nodes = []
493
- layers.each do |layer|
494
- nodes.concat(@nodes.select{|nodeId, node| node.type == layer}.values)
495
- end
496
- return nodes
497
- end
498
-
499
- def intersection(node1, node2)
500
- shared_nodes = []
501
- associatedIDs_node1 = @edges[node1]
502
- associatedIDs_node2 = @edges[node2]
503
- intersectedIDs = associatedIDs_node1 & associatedIDs_node2
504
- intersectedIDs.each do |id|
505
- shared_nodes << @nodes[id]
506
- end
507
- return shared_nodes
508
- end
509
-
510
- def generate_adjacency_matrix(layerA, layerB)
511
- layerAidNodes = @nodes.select{|id, node| node.type == layerA}.keys
512
- layerBidNodes = @nodes.select{|id, node| node.type == layerB}.keys
513
- matrix = Numo::DFloat.zeros(layerAidNodes.length, layerBidNodes.length)
514
- layerAidNodes.each_with_index do |nodeA, i|
515
- layerBidNodes.each_with_index do |nodeB, j|
516
- if @edges[nodeB].include?(nodeA)
517
- matrix[i, j] = 1
518
- else
519
- matrix[i, j] = 0
520
- end
521
- end
522
- end
523
- all_info_matrix = [matrix, layerAidNodes, layerBidNodes]
524
- @adjacency_matrices[[layerA, layerB]] = all_info_matrix
525
- return all_info_matrix
526
- end
527
-
528
- def clean_autorelations_on_association_values
506
+ ## ASSOCIATION METHODS
507
+ ############################################################
508
+ def clean_autorelations_on_association_values # DONE
529
509
  @association_values.each do |meth, values|
530
510
  values.select!{|relation| @nodes[relation[0]].type != @nodes[relation[1]].type}
531
511
  end
532
512
  end
533
513
 
534
- ## ASSOCIATION METHODS
535
- ############################################################
536
- def get_association_values(layers, base_layer, meth)
514
+ def get_association_values(layers, base_layer, meth) # DONE
537
515
  relations = [] #node A, node B, val
538
- if meth == :jaccard #all networks
516
+ if meth == :counts
517
+ relations = get_counts_association(layers, base_layer)
518
+ elsif meth == :jaccard #all networks
539
519
  relations = get_jaccard_association(layers, base_layer)
540
520
  elsif meth == :simpson #all networks
541
521
  relations = get_simpson_association(layers, base_layer)
@@ -565,20 +545,13 @@ class Network
565
545
 
566
546
  ## association methods adjacency matrix based
567
547
  #---------------------------------------------------------
568
- # Alaimo 2014, doi: 10.3389/fbioe.2014.00071
569
- def get_association_by_transference_resources(firstPairLayers, secondPairLayers, lambda_value1 = 0.5, lambda_value2 = 0.5)
548
+ def get_association_by_transference_resources(firstPairLayers, secondPairLayers, lambda_value1 = 0.5, lambda_value2 = 0.5) # DONE
570
549
  relations = []
571
550
  matrix1 = @adjacency_matrices[firstPairLayers].first
572
- rowIds = @adjacency_matrices[firstPairLayers][1]
573
551
  matrix2 = @adjacency_matrices[secondPairLayers].first
552
+ finalMatrix = Adv_mat_calc.tranference_resources(matrix1, matrix2, lambda_value1 = lambda_value1, lambda_value2 = lambda_value2)
553
+ rowIds = @adjacency_matrices[firstPairLayers][1]
574
554
  colIds = @adjacency_matrices[secondPairLayers][2]
575
- m1rowNumber, m1colNumber = matrix1.shape
576
- m2rowNumber, m2colNumber = matrix2.shape
577
- #puts m1rowNumber, m1colNumber, m2rowNumber, m2colNumber
578
- matrix1Weight = graphWeights(m1colNumber, m1rowNumber, matrix1.transpose, lambda_value1)
579
- matrix2Weight = graphWeights(m2colNumber, m2rowNumber, matrix2.transpose, lambda_value2)
580
- matrixWeightProduct = Numo::Linalg.dot(matrix1Weight, Numo::Linalg.dot(matrix2, matrix2Weight))
581
- finalMatrix = Numo::Linalg.dot(matrix1, matrixWeightProduct)
582
555
  relations = matrix2relations(finalMatrix, rowIds, colIds)
583
556
  @association_values[:transference] = relations
584
557
  return relations
@@ -587,7 +560,7 @@ class Network
587
560
  ## association methods node pairs based
588
561
  #---------------------------------------------------------
589
562
  # Bass 2013, doi:10.1038/nmeth.2728
590
- def get_associations(layers, base_layer) # BASE METHOD
563
+ def get_associations(layers, base_layer) # DONE BASE METHOD
591
564
  associations = get_all_pairs(layers: layers) do |node1, node2|
592
565
  associatedIDs_node1 = @edges[node1].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
593
566
  associatedIDs_node2 = @edges[node2].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
@@ -598,7 +571,15 @@ class Network
598
571
  return associations
599
572
  end
600
573
 
601
- def get_jaccard_association(layers, base_layer)
574
+ def get_counts_association(layers, base_layer) # DONE
575
+ relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
576
+ countValue = intersectedIDs.length
577
+ end
578
+ @association_values[:counts] = relations
579
+ return relations
580
+ end
581
+
582
+ def get_jaccard_association(layers, base_layer) # DONE
602
583
  relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
603
584
  unionIDS = associatedIDs_node1 | associatedIDs_node2
604
585
  jaccValue = intersectedIDs.length.to_f/unionIDS.length
@@ -607,7 +588,7 @@ class Network
607
588
  return relations
608
589
  end
609
590
 
610
- def get_simpson_association(layers, base_layer)
591
+ def get_simpson_association(layers, base_layer) # DONE
611
592
  relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
612
593
  minLength = [associatedIDs_node1.length, associatedIDs_node2.length].min
613
594
  simpsonValue = intersectedIDs.length.to_f/minLength
@@ -616,7 +597,7 @@ class Network
616
597
  return relations
617
598
  end
618
599
 
619
- def get_geometric_associations(layers, base_layer)
600
+ def get_geometric_associations(layers, base_layer) # DONE
620
601
  #wang 2016 method
621
602
  relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
622
603
  intersectedIDs = intersectedIDs.length**2
@@ -627,7 +608,7 @@ class Network
627
608
  return relations
628
609
  end
629
610
 
630
- def get_cosine_associations(layers, base_layer)
611
+ def get_cosine_associations(layers, base_layer) # DONE
631
612
  relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
632
613
  productLength = Math.sqrt(associatedIDs_node1.length * associatedIDs_node2.length)
633
614
  cosineValue = intersectedIDs.length/productLength
@@ -636,7 +617,7 @@ class Network
636
617
  return relations
637
618
  end
638
619
 
639
- def get_pcc_associations(layers, base_layer)
620
+ def get_pcc_associations(layers, base_layer) # DONE
640
621
  #for Ny calcule use get_nodes_layer
641
622
  base_layer_nodes = get_nodes_layer([base_layer])
642
623
  ny = base_layer_nodes.length
@@ -652,7 +633,7 @@ class Network
652
633
  return relations
653
634
  end
654
635
 
655
- def get_hypergeometric_associations(layers, base_layer, pvalue_adj_method= nil)
636
+ def get_hypergeometric_associations(layers, base_layer, pvalue_adj_method= nil) # DONE
656
637
  ny = get_nodes_layer([base_layer]).length
657
638
  fet = Rubystats::FishersExactTest.new
658
639
  relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
@@ -685,11 +666,11 @@ class Network
685
666
  return relations
686
667
  end
687
668
 
688
- def get_hypergeometric_associations_with_topology(layers, base_layer, mode, thresold = 0.01)
669
+ def get_hypergeometric_associations_with_topology(layers, base_layer, mode, thresold = 0.01) # NOT
689
670
  relations = []
690
671
  reference_layer = (layers - @layer_ontologies.keys).first
691
672
  ontology_layer = (layers - [reference_layer]).first
692
- ref_nodes = get_nodes_from_layer(reference_layer) # get nodes from NOT ontology layer
673
+ ref_nodes = get_nodes_layer([reference_layer]).map{|n| n.id} # get nodes from NOT ontology layer
693
674
  ontology = @layer_ontologies[ontology_layer]
694
675
  base_layer_length = @nodes.values.count{|n| n.type == base_layer}
695
676
  ref_nodes.each do |ref_node|
@@ -711,7 +692,7 @@ class Network
711
692
  return relations
712
693
  end
713
694
 
714
- def compute_adjusted_pvalue(relations, log_val=true)
695
+ def compute_adjusted_pvalue(relations, log_val=true) # DONE
715
696
  relations.each_with_index do |data, i| #p1, p2, pval
716
697
  pval_adj = yield(data.last, i)
717
698
  pval_adj = -Math.log10(pval_adj) if log_val && pval_adj > 0
@@ -719,13 +700,13 @@ class Network
719
700
  end
720
701
  end
721
702
 
722
- def compute_log_transformation(relations) #Only perform log transform whitout adjust pvalue. Called when adjusted method is not defined
703
+ def compute_log_transformation(relations) # NOT #Only perform log transform whitout adjust pvalue. Called when adjusted method is not defined
723
704
  compute_adjusted_pvalue(relations) do |pval, index|
724
705
  pval
725
706
  end
726
707
  end
727
708
 
728
- def compute_adjusted_pvalue_bonferroni(relations)
709
+ def compute_adjusted_pvalue_bonferroni(relations) # DONE
729
710
  n_comparations = relations.length
730
711
  compute_adjusted_pvalue(relations) do |pval, index|
731
712
  adj = pval * n_comparations
@@ -734,34 +715,16 @@ class Network
734
715
  end
735
716
  end
736
717
 
737
- def compute_adjusted_pvalue_benjaminiHochberg(relations)
718
+ def compute_adjusted_pvalue_benjaminiHochberg(relations) # DONE
738
719
  adj_pvalues = get_benjaminiHochberg_pvalues(relations.map{|rel| rel.last})
739
720
  compute_adjusted_pvalue(relations) do |pval, index|
740
721
  adj_pvalues[index]
741
722
  end
742
723
  end
743
724
 
744
- def add_record(hash, node1, node2)
745
- query = hash[node1]
746
- if query.nil?
747
- hash[node1] = [node2]
748
- else
749
- query << node2
750
- end
751
- end
752
-
753
- def add_nested_record(hash, node1, node2, val)
754
- query_node1 = hash[node1]
755
- if query_node1.nil?
756
- hash[node1] = {node2 => val}
757
- else
758
- query_node1[node2] = val
759
- end
760
- end
761
-
762
-
763
- def get_csi_associations(layers, base_layer)
725
+ def get_csi_associations(layers, base_layer) # DONE
764
726
  pcc_relations = get_pcc_associations(layers, base_layer)
727
+ pcc_relations.select!{|row| !row[2].nan?}
765
728
  clean_autorelations_on_association_values if layers.length > 1
766
729
  nx = get_nodes_layer(layers).length
767
730
  pcc_vals = {}
@@ -776,177 +739,161 @@ class Network
776
739
  pcc_relations.each do |node1, node2 ,assoc_index|
777
740
  pccAB = assoc_index - 0.05
778
741
  valid_nodes = 0
779
- node_rels[node1].each do |node|
780
- valid_nodes += 1 if pcc_vals[node1][node] >= pccAB
781
- end
782
- node_rels[node2].each do |node|
783
- valid_nodes += 1 if pcc_vals[node2][node] >= pccAB
784
- end
785
- csiValue = 1 - (valid_nodes-1).fdiv(nx)
786
- # valid_nodes-1 is done due to the connection node1-node2 is counted twice (one for each loop)
742
+
743
+ significant_nodes_from_node1 = node_rels[node1].select{|node| pcc_vals[node1][node] >= pccAB}
744
+ significant_nodes_from_node2 = node_rels[node2].select{|node| pcc_vals[node2][node] >= pccAB}
745
+ all_significant_nodes = significant_nodes_from_node2 | significant_nodes_from_node1
746
+ all_nodes = node_rels[node1] | node_rels[node2]
747
+
748
+ csiValue = 1 - (all_significant_nodes.length).fdiv(all_nodes.length)
787
749
  relations << [node1, node2, csiValue]
788
750
  end
789
751
  @association_values[:csi] = relations
790
752
  return relations
791
753
  end
792
754
 
755
+ def get_kernel(layer2kernel, kernel, normalization=false) # DONE
756
+ matrix, node_names = @adjacency_matrices[layer2kernel]
757
+ matrix_result = Adv_mat_calc.get_kernel(matrix, node_names, kernel, normalization=normalization)
758
+ @kernels[layer2kernel] = matrix_result
759
+ end
793
760
 
794
- ## PERFORMANCE METHODS
795
- ############################################################
796
- def load_control(ref_array)
797
- control = {}
798
- ref_array.each do |node1, node2|
799
- if node2 != '-'
800
- query = control[node1]
801
- if query.nil?
802
- control[node1] = [node2]
803
- else
804
- query << node2
805
- end
806
- end
761
+ def write_kernel(layer2kernel, output_file) # DONE
762
+ @kernels[layer2kernel].save(output_file)
763
+ end
764
+
765
+ def link_ontology(ontology_file_path, layer_name) # NOT until semtools is migrated
766
+ if !@loaded_obos.include?(ontology_file_path) #Load new ontology
767
+ ontology = Ontology.new(file: ontology_file_path, load_file: true)
768
+ @loaded_obos << ontology_file_path
769
+ @ontologies << ontology
770
+ else #Link loaded ontology to current layer
771
+ ontology = @ontologies[@loaded_obos.index(ontology_file_path)]
807
772
  end
808
- @control_connections = control
809
- return control
773
+ @layer_ontologies[layer_name] = ontology
810
774
  end
811
775
 
812
- def load_prediction(pairs_array)
813
- pred = {}
814
- min = nil
815
- max = nil
816
- pairs_array.each do |key, label, score|
817
- query = pred[key]
818
- if !min.nil? && !max.nil?
819
- min = score if score < min
820
- max = score if score > max
821
- else
822
- min = score; max = score
823
- end
824
- if query.nil?
825
- pred[key] = [[label], [score]]
826
- else
827
- query.first << label
828
- query.last << score
776
+ ## RAMDOMIZATION METHODS
777
+ ############################################################
778
+ def randomize_monopartite_net_by_nodes # DONE
779
+ layer = @layers.first
780
+ random_network = self.clone
781
+ if @adjacency_matrices[@layers].nil?
782
+ @adjacency_matrices[@layers] = @edges.to_bmatrix
783
+ end
784
+ nodeIds = @adjacency_matrices[@layers][1]
785
+ nodeIds.shuffle!
786
+ @adjacency_matrices[@layers][1] = nodeIds
787
+ @adjacency_matrices[@layers][2] = nodeIds
788
+ @edges = @adjacency_matrices[@layers].first.bmatrix_squared_to_hash(nodeIds) if @edges.empty?
789
+ return random_network
790
+ end
791
+
792
+ def randomize_bipartite_net_by_nodes
793
+ layerA = @layers.first
794
+ layerB = @layers.last
795
+ random_network = self.clone
796
+ if @adjacency_matrices[@layers].nil?
797
+ @adjacency_matrices[@layers] = @edges.to_bmatrix
798
+ end
799
+ rowIds = @adjacency_matrices[@layers][1]
800
+ colIds = @adjacency_matrices[@layers][2]
801
+ rowIds.shuffle!
802
+ @adjacency_matrices[@layers][1] = rowIds
803
+ @edges = @adjacency_matrices[@layers].first.bmatrix_rectangular_to_hash(rowIds, colIds) if !@edges.empty?
804
+ return random_network
805
+ end
806
+
807
+ def randomize_monopartite_net_by_links # DONE
808
+ layer = [@layers.first]
809
+ nodesA = []
810
+ nodesB = []
811
+ ## cambio a la funcion creada en el numo_expansion
812
+ relations = diagonal2relations(@adjacency_matrices[layer].first, @adjacency_matrices[layer][1], @adjacency_matrices[layer][2])
813
+ relations.each do |relation|
814
+ nodesA << relation[0]
815
+ nodesB << relation[1]
816
+ end
817
+ nodesB.shuffle!
818
+ @edges = {}
819
+ nodesA.each do |nodeA|
820
+ index_nodeB = 0
821
+ while nodeA == nodesB[index_nodeB]
822
+ index_nodeB += 1
829
823
  end
824
+ nodeB = nodesB.delete_at(index_nodeB)
825
+ add_edge(nodeA, nodeB)
830
826
  end
831
- return pred, [min, max]
827
+ generate_adjacency_matrix(layer, layer)
832
828
  end
833
829
 
834
830
 
835
- # Pandey 2007, Association Analysis-based Transformations for Protein Interaction Networks: A Function Prediction Case Study
836
- def get_pred_rec(meth, cut_number = 100, top_number = 10000)
837
- performance = [] #cut, pred, rec
838
- preds, limits = load_prediction(@association_values[meth])
839
- cuts = get_cuts(limits, cut_number)
840
- cuts.each do |cut|
841
- prec, rec = pred_rec(preds, cut, top_number)
842
- performance << [cut, prec, rec]
831
+ def randomize_bipartite_net_by_links(layers)
832
+ nodesA = []
833
+ nodesB = []
834
+ #compruebo si existe la matriz
835
+ if @adjacency_matrices[layers].nil?
836
+ @adjacency_matrices[layers] = @edges.to_bmatrix()
843
837
  end
844
- return performance
838
+ relations = matrix2relations(@adjacency_matrices[layers].first, @adjacency_matrices[layers][1], @adjacency_matrices[layers][2])
839
+ relations.each do |relation|
840
+ nodesA << relation[0]
841
+ nodesB << relation[1]
842
+ end
843
+ nodesB.shuffle!
844
+ @edges = {}
845
+
846
+ nodesA.each_with_index do |nodeA, i|
847
+ add_edge(nodeA, nodesB[i])
848
+ end
849
+ generate_adjacency_matrix(layers[0], layers[1])
850
+
845
851
  end
846
852
 
847
- def pred_rec(preds, cut, top)
848
- predicted_labels = 0 #m
849
- true_labels = 0 #n
850
- common_labels = 0 # k
851
- @control_connections.each do |key, c_labels|
852
- true_labels += c_labels.length #n
853
- pred_info = preds[key]
854
- if !pred_info.nil?
855
- labels, scores = pred_info
856
- reliable_labels = get_reliable_labels(labels, scores, cut, top)
857
- predicted_labels += reliable_labels.length #m
858
- common_labels += (c_labels & reliable_labels).length #k
853
+ def randomize_network(random_type)
854
+ if random_type == 'nodes'
855
+ if @layers.length == 1
856
+ random_network = self.randomize_monopartite_net_by_nodes
857
+ elsif @layers.length == 2
858
+ random_network = self.randomize_bipartite_net_by_nodes
859
+ end
860
+ elsif random_type == 'links'
861
+ if @layers.length == 1
862
+ random_network = self.randomize_monopartite_net_by_links
863
+ elsif @layers.length == 2
864
+ random_network = self.randomize_bipartite_net_by_links
859
865
  end
866
+ else
867
+ abort("ERROR: The randomization is not available for #{random_type} types of nodes")
860
868
  end
861
- #puts "cut: #{cut} trueL: #{true_labels} predL: #{predicted_labels} commL: #{common_labels}"
862
- prec = common_labels.to_f/predicted_labels
863
- rec = common_labels.to_f/true_labels
864
- prec = 0.0 if prec.nan?
865
- rec = 0.0 if rec.nan?
866
- return prec, rec
869
+ return random_network
867
870
  end
871
+
868
872
 
869
- ## KERNEL METHODS
870
- #######################################################################################
871
- def get_kernel(layer2kernel, kernel, normalization=false)
872
- matrix, node_names = @adjacency_matrices[layer2kernel]
873
- #I = identity matrix
874
- #D = Diagonal matrix
875
- #A = adjacency matrix
876
- #L = laplacian matrix = D − A
877
- matrix_result = nil
878
- dimension_elements = matrix.shape.last
879
- # In scuba code, the diagonal values of A is set to 0. In weighted matrix the kernel result is the same with or without this operation. Maybe increases the computing performance?
880
- # In the md kernel this operation affects the values of the final kernel
881
- #dimension_elements.times do |n|
882
- # matrix[n,n] = 0.0
883
- #end
884
- if kernel == 'el' || kernel == 'ct' || kernel == 'rf' ||
885
- kernel.include?('vn') || kernel.include?('rl') || kernel == 'me'
886
- diagonal_matrix = matrix.sum(1).diag # get the total sum for each row, for this reason the sum method takes the 1 value. If sum colums is desired, use 0
887
- # Make a matrix whose diagonal is row_sum
888
- matrix_L = diagonal_matrix - matrix
889
- if kernel == 'el' #Exponential Laplacian diffusion kernel(active). F Fouss 2012 | doi: 10.1016/j.neunet.2012.03.001
890
- beta = 0.02
891
- beta_product = matrix_L * -beta
892
- #matrix_result = beta_product.expm
893
- matrix_result = Numo::Linalg.expm(beta_product, 14)
894
- elsif kernel == 'ct' # Commute time kernel (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
895
- matrix_result = Numo::Linalg.pinv(matrix_L) # Anibal saids that this kernel was normalized. Why?. Paper do not seem to describe this operation for ct, it describes for Kvn or for all kernels, it is not clear.
896
- elsif kernel == 'rf' # Random forest kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
897
- matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L) #Krf = (I +L ) ^ −1
898
- elsif kernel.include?('vn') # von Neumann diffusion kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
899
- alpha = kernel.gsub('vn', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
900
- matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) - matrix * alpha ) # (I -alphaA ) ^ −1
901
- elsif kernel.include?('rl') # Regularized Laplacian kernel matrix (active)
902
- alpha = kernel.gsub('rl', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
903
- matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L * alpha ) # (I + alphaL ) ^ −1
904
- elsif kernel == 'me' # Markov exponential diffusion kernel (active). G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
905
- beta=0.04
906
- #(beta/N)*(N*I - D + A)
907
- id_mat = Numo::DFloat.eye(dimension_elements)
908
- m_matrix = (id_mat * dimension_elements - diagonal_matrix + matrix ) * (beta/dimension_elements)
909
- #matrix_result = m_matrix.expm
910
- matrix_result = Numo::Linalg.expm(m_matrix, 16)
911
- end
912
- elsif kernel == 'ka' # Kernelized adjacency matrix (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
913
- lambda_value = matrix.min_eigenvalue
914
- matrix_result = matrix + Numo::DFloat.eye(dimension_elements) * lambda_value.abs # Ka = A + lambda*I # lambda = the absolute value of the smallest eigenvalue of A
915
- elsif kernel.include?('md') # Markov diffusion kernel matrix. G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
916
- t = kernel.gsub('md', '').to_i
917
- #TODO: check implementation with Numo::array
918
- col_sum = matrix.sum(1)
919
- p_mat = matrix.div_by_vector(col_sum)
920
- p_temp_mat = p_mat.clone
921
- zt_mat = p_mat.clone
922
- (t-1).times do
923
- p_temp_mat = p_temp_mat.dot(p_mat)
924
- zt_mat = zt_mat + p_temp_mat
925
- end
926
- zt_mat = zt_mat * (1.0/t)
927
- matrix_result = zt_mat.dot(zt_mat.transpose)
928
- else
929
- matrix_result = matrix
930
- warn('Warning: The kernel method was not specified or not exists. The adjacency matrix will be given as result')
931
- # This allows process a previous kernel and perform the normalization in a separated step.
873
+ def save_adjacency_matrix(layerA, layerB, output_file) # NOT
874
+ if layerA == layerB
875
+ layers = [layerA]
876
+ else
877
+ layers = [layerA, layerB]
932
878
  end
933
- matrix_result = matrix_result.cosine_normalization if normalization #TODO: check implementation with Numo::array
934
- @kernels[layer2kernel] = matrix_result
879
+ Npy.save(output_file, @adjacency_matrices[layer].first)
880
+ node_names = @nodes.values.map{|node| node.id}
881
+ File.open(output_file+'.lst', 'w'){|f| f.print node_names.join("\n")}
935
882
  end
936
883
 
937
- def write_kernel(layer2kernel, output_file)
938
- @kernels[layer2kernel].save(output_file)
884
+ def build_nodes_from_adjacency_matrix(layers_network, layers_adjacency_matrix) # NOT
885
+ nodes_ids = @adjacency_matrices[layers_adjacency_matrix][1].concat(@adjacency_matrices[layers_adjacency_matrix][2]).uniq
886
+ nodes_ids.each do |node_id|
887
+ add_node(node_id, set_layer(layers_network, node_id))
888
+ end
939
889
  end
940
890
 
941
- def link_ontology(ontology_file_path, layer_name)
942
- if !@loaded_obos.include?(ontology_file_path) #Load new ontology
943
- ontology = Ontology.new(file: ontology_file_path, load_file: true)
944
- @loaded_obos << ontology_file_path
945
- @ontologies << ontology
946
- else #Link loaded ontology to current layer
947
- ontology = @ontologies[@loaded_obos.index(ontology_file_path)]
891
+ def build_edges_from_adjacency_matrix(layer) # NOT
892
+ @edges = {}
893
+ relations = matrix2relations(@adjacency_matrices[layer].first, @adjacency_matrices[layer][1], @adjacency_matrices[layer][2])
894
+ relations.each do |relation|
895
+ add_edge(relation[0], relation[1])
948
896
  end
949
- @layer_ontologies[layer_name] = ontology
950
897
  end
951
898
 
952
899
 
@@ -954,8 +901,26 @@ class Network
954
901
  #######################################################################################
955
902
  private
956
903
 
957
- def load_input_list(file)
958
- return File.open(file).readlines.map!{|line| line.chomp}
904
+ def replace_nil_vals(val) # DONE
905
+ return val.nil? ? 'NULL' : val
906
+ end
907
+
908
+ def add_record(hash, node1, node2) # DONE
909
+ query = hash[node1]
910
+ if query.nil?
911
+ hash[node1] = [node2]
912
+ else
913
+ query << node2
914
+ end
915
+ end
916
+
917
+ def add_nested_record(hash, node1, node2, val) # DONE
918
+ query_node1 = hash[node1]
919
+ if query_node1.nil?
920
+ hash[node1] = {node2 => val}
921
+ else
922
+ query_node1[node2] = val
923
+ end
959
924
  end
960
925
 
961
926
  def exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
@@ -968,84 +933,29 @@ class Network
968
933
  return res
969
934
  end
970
935
 
971
- def set_layer(layer_definitions, node_name)
972
- layer = nil
973
- if layer_definitions.length > 1
974
- layer_definitions.each do |layer_name, regexp|
975
- if node_name =~ regexp
976
- layer = layer_name
977
- break
978
- end
936
+ def matrix2relations(finalMatrix, rowIds, colIds) # DONE
937
+ relations = []
938
+ rowIds.each_with_index do |rowId, rowPos|
939
+ colIds.each_with_index do |colId, colPos|
940
+ associationValue = finalMatrix[rowPos, colPos]
941
+ relations << [rowId, colId, associationValue] if associationValue > 0
979
942
  end
980
- raise("The node '#{node_name}' not match with any layer regex") if layer.nil?
981
- else
982
- layer = layer_definitions.first.first
983
- end
984
- @layers << layer if !@layers.include?(layer)
985
- return layer
986
- end
987
-
988
- def get_cuts(limits, n_cuts)
989
- cuts = []
990
- range = (limits.last - limits.first).abs.fdiv(n_cuts)
991
- range = BigDecimal(range, 10)
992
- cut = limits.first
993
- (n_cuts + 1).times do |n|
994
- cuts << (cut + n * range).to_f
995
943
  end
996
- return cuts
944
+ return relations
997
945
  end
998
946
 
999
- def get_reliable_labels(labels, scores, cut, top)
1000
- reliable_labels = []
1001
- scores.each_with_index do |score, i|
1002
- reliable_labels << [labels[i], score] if score >= cut
1003
- end
1004
- reliable_labels = reliable_labels.sort!{|l1,l2| l2.last <=> l1.last}[0..top-1].map{|pred| pred.first}
1005
- return reliable_labels
1006
- end
1007
-
1008
- def graphWeights (rowsNumber, colsNumber, inputMatrix, lambdaValue = 0.5)
1009
- ky = (1.0 / inputMatrix.sum(0)).diag #sum cols
1010
- weigth = Numo::Linalg.dot(inputMatrix, ky).transpose
1011
- ky = nil #free memory
1012
- weigth = Numo::Linalg.dot(inputMatrix, weigth)
1013
-
1014
- kx = inputMatrix.sum(1) #sum rows
1015
-
1016
- kx_lamb = kx ** lambdaValue
1017
- kx_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
1018
- rowsNumber.times do |j|
1019
- rowsNumber.times do |i|
1020
- kx_lamb_mat[j,i] = kx_lamb[i]
1021
- end
1022
- end
1023
- kx_lamb = nil #free memory
1024
-
1025
- kx_inv_lamb = kx ** (1 - lambdaValue)
1026
- kx_inv_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
1027
- rowsNumber.times do |j|
1028
- rowsNumber.times do |i|
1029
- kx_inv_lamb_mat[i, j] = kx_inv_lamb[i]
1030
- end
1031
- end
1032
- kx_inv_lamb = nil #free memory
1033
-
1034
- nx = 1.0/(kx_lamb_mat.inplace * kx_inv_lamb_mat).inplace # inplace marks a matrix to be used by reference, not for value
1035
- kx_lamb_mat = nil #free memory
1036
- kx_inv_lamb_mat = nil #free memory
1037
- weigth.inplace * nx
1038
- return weigth
1039
- end
1040
-
1041
- def matrix2relations(finalMatrix, rowIds, colIds)
947
+ def diagonal2relations(finalMatrix, rowIds, colIds)
1042
948
  relations = []
1043
949
  rowIds.each_with_index do |rowId, rowPos|
1044
- colIds.each_with_index do |colId, colPos|
1045
- associationValue = finalMatrix[rowPos, colPos]
1046
- relations << [rowId, colId, associationValue] if associationValue > 0
950
+ colIds.each_with_index do |colId, colPos|
951
+ colMatrix = rowPos + colPos + 1
952
+ if colMatrix < colIds.length
953
+ associationValue = finalMatrix[rowPos, colMatrix]
954
+ relations << [rowId, colIds[colMatrix], associationValue] if associationValue > 0
955
+ end
1047
956
  end
1048
957
  end
1049
958
  return relations
1050
959
  end
960
+
1051
961
  end