NetAnalyzer 0.1.2 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/NetAnalyzer.gemspec +16 -6
- data/README.md +48 -10
- data/Rakefile +14 -3
- data/bin/NetAnalyzer.rb +182 -54
- data/bin/text2binary_matrix.rb +294 -0
- data/lib/NetAnalyzer/network.rb +725 -128
- data/lib/NetAnalyzer/templates/ElGrapho.min.js +28 -0
- data/lib/NetAnalyzer/templates/cytoscape.erb +65 -0
- data/lib/NetAnalyzer/templates/cytoscape.min.js +32 -0
- data/lib/NetAnalyzer/templates/el_grapho.erb +89 -0
- data/lib/NetAnalyzer/templates/pako.min.js +1 -0
- data/lib/NetAnalyzer/templates/sigma.erb +132 -0
- data/lib/NetAnalyzer/version.rb +1 -1
- data/lib/NetAnalyzer.rb +2 -0
- data/old_spec/NetAnalyzer_spec.rb +11 -0
- data/old_spec/spec_helper.rb +2 -0
- metadata +174 -25
data/lib/NetAnalyzer/network.rb
CHANGED
|
@@ -1,22 +1,56 @@
|
|
|
1
|
-
require '
|
|
2
|
-
require '
|
|
3
|
-
require '
|
|
1
|
+
require 'rubystats'
|
|
2
|
+
require 'gv'
|
|
3
|
+
#require 'nmatrix'
|
|
4
|
+
#require 'nmatrix/lapacke'
|
|
5
|
+
require 'numo/narray'
|
|
6
|
+
require 'numo/linalg'
|
|
7
|
+
require 'parallel'
|
|
8
|
+
|
|
9
|
+
#require 'pp'
|
|
4
10
|
require 'bigdecimal'
|
|
11
|
+
require 'benchmark'
|
|
12
|
+
#require 'nmatrix_expansion'
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
#For javascrip plotting
|
|
16
|
+
require 'erb'
|
|
17
|
+
require 'base64'
|
|
18
|
+
require 'json'
|
|
19
|
+
require 'zlib'
|
|
20
|
+
|
|
21
|
+
require 'semtools'
|
|
22
|
+
require 'expcalc'
|
|
23
|
+
TEMPLATES = File.join(File.dirname(__FILE__), 'templates')
|
|
5
24
|
|
|
6
25
|
class Network
|
|
7
|
-
|
|
26
|
+
|
|
27
|
+
attr_accessor :association_values, :control_connections, :kernels, :reference_nodes, :group_nodes, :threads
|
|
8
28
|
|
|
9
29
|
## BASIC METHODS
|
|
10
30
|
############################################################
|
|
11
31
|
def initialize(layers)
|
|
12
|
-
@
|
|
32
|
+
@threads = 0
|
|
33
|
+
@nodes = {}
|
|
13
34
|
@edges = {}
|
|
35
|
+
@reference_nodes = []
|
|
36
|
+
@group_nodes = {}
|
|
14
37
|
@adjacency_matrices = {}
|
|
38
|
+
@kernels = {}
|
|
15
39
|
@layers = layers
|
|
16
40
|
@association_values = {}
|
|
17
41
|
@control_connections = {}
|
|
42
|
+
@compute_pairs = :conn
|
|
43
|
+
@compute_autorelations = true
|
|
44
|
+
@loaded_obos = []
|
|
45
|
+
@ontologies = []
|
|
46
|
+
@layer_ontologies = {}
|
|
18
47
|
end
|
|
19
|
-
|
|
48
|
+
|
|
49
|
+
def set_compute_pairs(use_pairs, get_autorelations)
|
|
50
|
+
@compute_pairs = use_pairs
|
|
51
|
+
@compute_autorelations = get_autorelations
|
|
52
|
+
end
|
|
53
|
+
|
|
20
54
|
def add_node(nodeID, nodeType = 0)
|
|
21
55
|
@nodes[nodeID] = Node.new(nodeID, nodeType)
|
|
22
56
|
end
|
|
@@ -35,52 +69,424 @@ class Network
|
|
|
35
69
|
end
|
|
36
70
|
end
|
|
37
71
|
|
|
38
|
-
def
|
|
39
|
-
|
|
40
|
-
|
|
72
|
+
def delete_nodes(node_list, mode='d')
|
|
73
|
+
if mode == 'd'
|
|
74
|
+
@nodes.reject!{|n| node_list.include?(n)}
|
|
75
|
+
@edges.reject!{|n, connections| node_list.include?(n)}
|
|
76
|
+
@edges.each do |n, connections|
|
|
77
|
+
connections.reject!{|c| node_list.include?(c)}
|
|
78
|
+
end
|
|
79
|
+
elsif mode == 'r'
|
|
80
|
+
@nodes.select!{|n| node_list.include?(n)}
|
|
81
|
+
@edges.select!{|n, connections| node_list.include?(n)}
|
|
82
|
+
@edges.each do |n, connections|
|
|
83
|
+
connections.select!{|c| node_list.include?(c)}
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
@edges.reject!{|n, connections| connections.empty?}
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def get_connected_nodes(node_id, from_layer)
|
|
90
|
+
return @edges[node_id].map{|id| @nodes[id]}.select{|node| node.type == from_layer}.map{|node| node.id}
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def get_nodes_from_layer(from_layer)
|
|
94
|
+
return @nodes.values.select{|node| node.type == from_layer}.map{|node| node.id}
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def get_bipartite_subgraph(from_layer_node_ids, from_layer, to_layer)
|
|
98
|
+
bipartite_subgraph = {}
|
|
99
|
+
from_layer_node_ids.each do |from_layer_node_id|
|
|
100
|
+
connected_nodes = @edges[from_layer_node_id]
|
|
101
|
+
connected_nodes.each do |connected_node|
|
|
102
|
+
if @nodes[connected_node].type == to_layer
|
|
103
|
+
query = bipartite_subgraph[connected_node]
|
|
104
|
+
if query.nil?
|
|
105
|
+
bipartite_subgraph[connected_node] = get_connected_nodes(connected_node, from_layer)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
return bipartite_subgraph
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def load_network_by_pairs(file, layers, split_character="\t")
|
|
114
|
+
File.open(file).each do |line|
|
|
115
|
+
line.chomp!
|
|
116
|
+
pair = line.split(split_character)
|
|
117
|
+
node1 = pair[0]
|
|
118
|
+
node2 = pair[1]
|
|
119
|
+
add_node(node1, set_layer(layers, node1))
|
|
120
|
+
add_node(node2, set_layer(layers, node2))
|
|
121
|
+
add_edge(node1, node2)
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def load_network_by_bin_matrix(input_file, node_file, layers)
|
|
126
|
+
node_names = load_input_list(node_file)
|
|
127
|
+
@adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='npy'), node_names, node_names]
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def load_network_by_plain_matrix(input_file, node_file, layers, splitChar)
|
|
131
|
+
node_names = load_input_list(node_file)
|
|
132
|
+
@adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='txt', splitChar=splitChar), node_names, node_names]
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def get_edge_number
|
|
136
|
+
node_connections = get_degree.values.inject(0){|sum, n| sum + n}
|
|
137
|
+
return node_connections/2
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def get_degree(zscore=false)
|
|
141
|
+
degree = {}
|
|
142
|
+
@edges.each do |id, nodes|
|
|
143
|
+
degree[id] = nodes.length
|
|
144
|
+
end
|
|
145
|
+
if !zscore
|
|
146
|
+
degree_values = degree.values
|
|
147
|
+
mean_degree = degree_values.mean
|
|
148
|
+
std_degree = degree_values.standard_deviation
|
|
149
|
+
degree.transform_values!{|v| (v - mean_degree).fdiv(std_degree)}
|
|
150
|
+
end
|
|
151
|
+
return degree
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def get_node_attributes(attr_names)
|
|
155
|
+
attrs = []
|
|
156
|
+
attr_names.each do |attr_name|
|
|
157
|
+
if attr_name == 'get_degree'
|
|
158
|
+
attrs << get_degree
|
|
159
|
+
elsif attr_name == 'get_degreeZ'
|
|
160
|
+
attrs << get_degree(zscore=true)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
node_ids = attrs.first.keys
|
|
164
|
+
node_attrs = []
|
|
165
|
+
node_ids.each do |n|
|
|
166
|
+
node_attrs << [n].concat(attrs.map{|at| at[n]})
|
|
167
|
+
end
|
|
168
|
+
return node_attrs
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def plot_network(options = {})
|
|
172
|
+
if options[:method] == 'graphviz'
|
|
173
|
+
plot_dot(options)
|
|
174
|
+
else
|
|
175
|
+
if options[:method] == 'elgrapho'
|
|
176
|
+
template = 'el_grapho'
|
|
177
|
+
elsif options[:method] == 'cytoscape'
|
|
178
|
+
template = 'cytoscape'
|
|
179
|
+
elsif options[:method] == 'sigma'
|
|
180
|
+
template = 'sigma'
|
|
181
|
+
end
|
|
182
|
+
renderered_template = ERB.new(File.open(File.join(TEMPLATES, template + '.erb')).read).result(binding)
|
|
183
|
+
File.open(options[:output_file] + '.html', 'w'){|f| f.puts renderered_template}
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def plot_dot(user_options = {}) # input keys: layout
|
|
188
|
+
options = {layout: "sfdp"}
|
|
189
|
+
options = options.merge(user_options)
|
|
190
|
+
graphviz_colors = %w[lightsteelblue1 lightyellow1 lightgray orchid2]
|
|
191
|
+
palette = {}
|
|
192
|
+
@layers.each do |layer|
|
|
193
|
+
palette[layer] = graphviz_colors.shift
|
|
194
|
+
end
|
|
195
|
+
graph = GV::Graph.open('g', type = :undirected)
|
|
196
|
+
plotted_edges = {}
|
|
41
197
|
@edges.each do |nodeID, associatedIDs|
|
|
42
198
|
associatedIDs.each do |associatedID|
|
|
43
|
-
|
|
199
|
+
pair = [nodeID, associatedID].sort.join('_').to_sym
|
|
200
|
+
if !plotted_edges[pair]
|
|
201
|
+
graph.edge 'e',
|
|
202
|
+
graph.node(nodeID, label: '', style: 'filled', fillcolor: palette[@nodes[nodeID].type]),
|
|
203
|
+
graph.node(associatedID, label: '', style: 'filled' , fillcolor: palette[@nodes[associatedID].type])
|
|
204
|
+
plotted_edges[pair] = true
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
@reference_nodes.each do |nodeID|
|
|
209
|
+
graph.node(nodeID, style: 'filled', fillcolor: 'firebrick1', label: '')
|
|
210
|
+
end
|
|
211
|
+
graphviz_border_colors = %w[blue darkorange red olivedrab4]
|
|
212
|
+
@group_nodes.each do |groupID, gNodes|
|
|
213
|
+
border_color = graphviz_border_colors.shift
|
|
214
|
+
gNodes.each do |nodeID|
|
|
215
|
+
graph.node(nodeID, color: border_color, penwidth: '10', label: '')
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
graph[:overlap] = false
|
|
219
|
+
STDERR.puts 'Save graph'
|
|
220
|
+
graph.save(options[:output_file] + '.png', format='png', layout=options[:layout])
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def compute_group_metrics(output_filename)
|
|
224
|
+
metrics = []
|
|
225
|
+
header = ['group']
|
|
226
|
+
@group_nodes.keys.each do |k|
|
|
227
|
+
metrics << [k]
|
|
228
|
+
end
|
|
229
|
+
header << 'comparative_degree'
|
|
230
|
+
comparative_degree = communities_comparative_degree(@group_nodes)
|
|
231
|
+
comparative_degree.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
|
|
232
|
+
header << 'avg_sht_path'
|
|
233
|
+
avg_sht_path = communities_avg_sht_path(@group_nodes)
|
|
234
|
+
avg_sht_path.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
|
|
235
|
+
if !@reference_nodes.empty?
|
|
236
|
+
header.concat(%w[node_com_assoc_by_edge node_com_assoc_by_node])
|
|
237
|
+
node_com_assoc = compute_node_com_assoc_in_precomputed_communities(@group_nodes, @reference_nodes.first)
|
|
238
|
+
node_com_assoc.each_with_index{|val,i| metrics[i].concat(val)}
|
|
239
|
+
end
|
|
240
|
+
File.open(output_filename, 'w') do |f|
|
|
241
|
+
f.puts header.join("\t")
|
|
242
|
+
metrics.each do |gr|
|
|
243
|
+
f. puts gr.join("\t")
|
|
44
244
|
end
|
|
45
245
|
end
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def replace_nil_vals(val)
|
|
249
|
+
return val.nil? ? 'NULL' : val
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def communities_comparative_degree(coms)
|
|
253
|
+
comparative_degrees = []
|
|
254
|
+
coms.each do |com_id, com|
|
|
255
|
+
comparative_degrees << compute_comparative_degree(com)
|
|
256
|
+
end
|
|
257
|
+
return comparative_degrees
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def communities_avg_sht_path(coms)
|
|
261
|
+
avg_sht_path = []
|
|
262
|
+
coms.each do |com_id, com|
|
|
263
|
+
dist, paths = compute_avg_sht_path(com)
|
|
264
|
+
avg_sht_path << dist
|
|
265
|
+
end
|
|
266
|
+
return avg_sht_path
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def compute_node_com_assoc_in_precomputed_communities(coms, ref_node)
|
|
270
|
+
node_com_assoc = []
|
|
271
|
+
coms.each do |com_id, com|
|
|
272
|
+
node_com_assoc << [compute_node_com_assoc(com, ref_node)]
|
|
273
|
+
end
|
|
274
|
+
return node_com_assoc
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def compute_comparative_degree(com) # see Girvan-Newman Benchmark control parameter in http://networksciencebook.com/chapter/9#testing (communities chapter)
|
|
278
|
+
internal_degree = 0
|
|
279
|
+
external_degree = 0
|
|
280
|
+
com.each do |nodeID|
|
|
281
|
+
nodeIDneigh = @edges[nodeID]
|
|
282
|
+
next if nodeIDneigh.nil?
|
|
283
|
+
internal_degree += (nodeIDneigh & com).length
|
|
284
|
+
external_degree += (nodeIDneigh - com).length
|
|
285
|
+
end
|
|
286
|
+
comparative_degree = external_degree.fdiv(external_degree + internal_degree)
|
|
287
|
+
return comparative_degree
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def compute_avg_sht_path(com, paths=false)
|
|
291
|
+
path_lengths = []
|
|
292
|
+
all_paths = []
|
|
293
|
+
group = com.dup
|
|
294
|
+
while !group.empty?
|
|
295
|
+
node_start = group.shift
|
|
296
|
+
sht_paths = Parallel.map(group, in_processes: @threads) do |node_stop|
|
|
297
|
+
#group.each do |node_stop|
|
|
298
|
+
dist, path = shortest_path(node_start, node_stop, paths)
|
|
299
|
+
[dist, path]
|
|
300
|
+
#path_lengths << dist if !dist.nil?
|
|
301
|
+
#all_paths << path if !path.empty?
|
|
302
|
+
end
|
|
303
|
+
sht_paths.each do |dist, path|
|
|
304
|
+
path_lengths << dist
|
|
305
|
+
all_paths << path
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
if path_lengths.include?(nil)
|
|
309
|
+
avg_sht_path = nil
|
|
310
|
+
else
|
|
311
|
+
avg_sht_path = path_lengths.inject(0){|sum,l| sum + l}.fdiv(path_lengths.length)
|
|
312
|
+
end
|
|
313
|
+
return avg_sht_path, all_paths
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
# https://pythoninwonderland.wordpress.com/2017/03/18/how-to-implement-breadth-first-search-in-python/
|
|
317
|
+
# finds shortest path between 2 nodes of a graph using BFS
|
|
318
|
+
def bfs_shortest_path(start, goal, paths=false)
|
|
319
|
+
dist = nil
|
|
320
|
+
explored = {} # keep track of explored nodes
|
|
321
|
+
previous = {}
|
|
322
|
+
queue = [[start, 0]] # keep track of all the paths to be checked
|
|
323
|
+
is_goal = false
|
|
324
|
+
while !queue.empty? && !is_goal # keeps looping until all possible paths have been checked
|
|
325
|
+
node, dist = queue.pop # pop the first path from the queue
|
|
326
|
+
if !explored.include?(node) # get the last node from the path
|
|
327
|
+
neighbours = @edges[node]
|
|
328
|
+
explored[node] = true # mark node as explored
|
|
329
|
+
next if neighbours.nil?
|
|
330
|
+
dist += 1
|
|
331
|
+
neighbours.each do |neighbour| # go through all neighbour nodes, construct a new path
|
|
332
|
+
next if explored.include?(neighbour)
|
|
333
|
+
queue.unshift([neighbour, dist]) # push it into the queue
|
|
334
|
+
previous[neighbour] = node if paths
|
|
335
|
+
if neighbour == goal # return path if neighbour is goal
|
|
336
|
+
is_goal = true
|
|
337
|
+
break
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
if is_goal
|
|
343
|
+
path = build_path(previous, start, goal) if paths
|
|
344
|
+
else
|
|
345
|
+
dist = nil
|
|
346
|
+
path = []
|
|
347
|
+
end
|
|
348
|
+
return dist, path
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def build_path(previous, startNode, stopNode)
|
|
352
|
+
path = []
|
|
353
|
+
currentNode = stopNode
|
|
354
|
+
path << currentNode
|
|
355
|
+
while currentNode != startNode
|
|
356
|
+
currentNode = previous[currentNode]
|
|
357
|
+
path << currentNode
|
|
358
|
+
end
|
|
359
|
+
return path
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
def shortest_path(node_start, node_stop, paths=false)
|
|
363
|
+
#https://betterprogramming.pub/5-ways-to-find-the-shortest-path-in-a-graph-88cfefd0030f
|
|
364
|
+
#return bidirectionalSearch(node_start, node_stop)
|
|
365
|
+
#https://efficientcodeblog.wordpress.com/2017/12/13/bidirectional-search-two-end-bfs/
|
|
366
|
+
dist, all_paths = bfs_shortest_path(node_start, node_stop, paths)
|
|
367
|
+
return dist, all_paths
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def expand_clusters(expand_method)
|
|
371
|
+
clusters = {}
|
|
372
|
+
@group_nodes.each do |id, nodes|
|
|
373
|
+
if expand_method == 'sht_path'
|
|
374
|
+
dist, paths = compute_avg_sht_path(nodes, paths=true) # this uses bfs, maybe Dijkstra is the best one
|
|
375
|
+
new_nodes = paths.flatten.uniq
|
|
376
|
+
clusters[id] = nodes | new_nodes # If some node pair are not connected, recover them
|
|
377
|
+
end
|
|
378
|
+
end
|
|
379
|
+
return clusters
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
def compute_node_com_assoc(com, ref_node)
|
|
383
|
+
ref_cons = 0
|
|
384
|
+
ref_secondary_cons = 0
|
|
385
|
+
secondary_nodes = {}
|
|
386
|
+
other_cons = 0
|
|
387
|
+
other_nodes = {}
|
|
388
|
+
|
|
389
|
+
refNneigh = @edges[ref_node]
|
|
390
|
+
com.each do |nodeID|
|
|
391
|
+
nodeIDneigh = @edges[nodeID]
|
|
392
|
+
next if nodeIDneigh.nil?
|
|
393
|
+
ref_cons += 1 if nodeIDneigh.include?(ref_node)
|
|
394
|
+
if !refNneigh.nil?
|
|
395
|
+
common_nodes = nodeIDneigh & refNneigh
|
|
396
|
+
common_nodes.each {|id| secondary_nodes[id] = true}
|
|
397
|
+
ref_secondary_cons += common_nodes.length
|
|
398
|
+
end
|
|
399
|
+
specific_nodes = nodeIDneigh - refNneigh - [ref_node]
|
|
400
|
+
specific_nodes.each {|id| other_nodes[id] = true}
|
|
401
|
+
other_cons += specific_nodes.length
|
|
402
|
+
end
|
|
403
|
+
by_edge = (ref_cons + ref_secondary_cons).fdiv(other_cons)
|
|
404
|
+
by_node = (ref_cons + secondary_nodes.length).fdiv(other_nodes.length)
|
|
405
|
+
return by_edge, by_node
|
|
50
406
|
end
|
|
51
407
|
|
|
52
408
|
def get_all_intersections
|
|
53
|
-
intersection_lengths =
|
|
54
|
-
|
|
55
|
-
intersection_lengths << intersection(node1, node2).length
|
|
409
|
+
intersection_lengths = get_all_pairs do |node1, node2|
|
|
410
|
+
intersection(node1, node2).length
|
|
56
411
|
end
|
|
57
412
|
return intersection_lengths
|
|
58
413
|
end
|
|
59
414
|
|
|
60
415
|
def get_all_pairs(args = {})
|
|
61
|
-
|
|
416
|
+
all_pairs = []
|
|
417
|
+
default = {:layers => :all}
|
|
62
418
|
args = default.merge(args)
|
|
63
|
-
|
|
64
|
-
|
|
419
|
+
nodeIDsA, nodeIDsB = collect_nodes(args)
|
|
420
|
+
if @compute_autorelations
|
|
421
|
+
if @compute_pairs == :all
|
|
422
|
+
while !nodeIDsA.empty?
|
|
423
|
+
node1 = nodeIDsA.shift
|
|
424
|
+
pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
|
|
425
|
+
yield(node1, node2)
|
|
426
|
+
end
|
|
427
|
+
all_pairs.concat(pairs)
|
|
428
|
+
end
|
|
429
|
+
elsif @compute_pairs == :conn # TODO: Review this case to avoid return nil values
|
|
430
|
+
while !nodeIDsA.empty?
|
|
431
|
+
node1 = nodeIDsA.shift
|
|
432
|
+
ids_connected_to_n1 = @edges[node1]
|
|
433
|
+
pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
|
|
434
|
+
result = nil
|
|
435
|
+
ids_connected_to_n2 = @edges[node2]
|
|
436
|
+
if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
|
437
|
+
result = yield(node1, node2)
|
|
438
|
+
end
|
|
439
|
+
result
|
|
440
|
+
end
|
|
441
|
+
pairs.compact!
|
|
442
|
+
all_pairs.concat(pairs)
|
|
443
|
+
end
|
|
444
|
+
end
|
|
65
445
|
else
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
446
|
+
#MAIN METHOD
|
|
447
|
+
if @compute_pairs == :conn
|
|
448
|
+
all_pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node1|
|
|
449
|
+
ids_connected_to_n1 = @edges[node1]
|
|
450
|
+
node1_pairs = []
|
|
451
|
+
nodeIDsB.each do |node2|
|
|
452
|
+
ids_connected_to_n2 = @edges[node2]
|
|
453
|
+
if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
|
454
|
+
node1_pairs << yield(node1, node2)
|
|
455
|
+
end
|
|
456
|
+
end
|
|
457
|
+
node1_pairs
|
|
458
|
+
end
|
|
459
|
+
all_pairs.flatten!(1)
|
|
460
|
+
elsif @compute_pairs == :all
|
|
461
|
+
raise 'Not implemented'
|
|
69
462
|
end
|
|
70
463
|
end
|
|
71
464
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
465
|
+
return all_pairs
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
def collect_nodes(args)
|
|
469
|
+
nodeIDsA = nil
|
|
470
|
+
nodeIDsB = nil
|
|
471
|
+
if @compute_autorelations
|
|
472
|
+
if args[:layers] == :all
|
|
473
|
+
nodeIDsA = @nodes.keys
|
|
474
|
+
else
|
|
475
|
+
nodeIDsA = []
|
|
476
|
+
args[:layers].each do |layer|
|
|
477
|
+
nodeIDsA.concat(@nodes.select{|id, node| node.type == layer}.keys)
|
|
77
478
|
end
|
|
78
479
|
end
|
|
79
|
-
|
|
80
|
-
|
|
480
|
+
else
|
|
481
|
+
if args[:layers] != :all
|
|
482
|
+
nodeIDsA = @nodes.select{|id, node| node.type == args[:layers][0]}.keys
|
|
483
|
+
nodeIDsB = @nodes.select{|id, node| node.type == args[:layers][1]}.keys
|
|
484
|
+
end
|
|
81
485
|
end
|
|
486
|
+
return nodeIDsA, nodeIDsB
|
|
82
487
|
end
|
|
83
488
|
|
|
489
|
+
|
|
84
490
|
def get_nodes_layer(layers)
|
|
85
491
|
#for creating ny value in hypergeometric and pcc index
|
|
86
492
|
nodes = []
|
|
@@ -104,17 +510,16 @@ class Network
|
|
|
104
510
|
def generate_adjacency_matrix(layerA, layerB)
|
|
105
511
|
layerAidNodes = @nodes.select{|id, node| node.type == layerA}.keys
|
|
106
512
|
layerBidNodes = @nodes.select{|id, node| node.type == layerB}.keys
|
|
107
|
-
|
|
108
|
-
layerAidNodes.
|
|
109
|
-
layerBidNodes.
|
|
513
|
+
matrix = Numo::DFloat.zeros(layerAidNodes.length, layerBidNodes.length)
|
|
514
|
+
layerAidNodes.each_with_index do |nodeA, i|
|
|
515
|
+
layerBidNodes.each_with_index do |nodeB, j|
|
|
110
516
|
if @edges[nodeB].include?(nodeA)
|
|
111
|
-
|
|
517
|
+
matrix[i, j] = 1
|
|
112
518
|
else
|
|
113
|
-
|
|
519
|
+
matrix[i, j] = 0
|
|
114
520
|
end
|
|
115
521
|
end
|
|
116
522
|
end
|
|
117
|
-
matrix = NMatrix.new([layerAidNodes.length, layerBidNodes.length], adjacency_matrix)
|
|
118
523
|
all_info_matrix = [matrix, layerAidNodes, layerBidNodes]
|
|
119
524
|
@adjacency_matrices[[layerA, layerB]] = all_info_matrix
|
|
120
525
|
return all_info_matrix
|
|
@@ -142,6 +547,14 @@ class Network
|
|
|
142
547
|
relations = get_pcc_associations(layers, base_layer)
|
|
143
548
|
elsif meth == :hypergeometric #all networks
|
|
144
549
|
relations = get_hypergeometric_associations(layers, base_layer)
|
|
550
|
+
elsif meth == :hypergeometric_bf #all networks
|
|
551
|
+
relations = get_hypergeometric_associations(layers, base_layer, :bonferroni)
|
|
552
|
+
elsif meth == :hypergeometric_bh #all networks
|
|
553
|
+
relations = get_hypergeometric_associations(layers, base_layer, :benjamini_hochberg)
|
|
554
|
+
elsif meth == :hypergeometric_elim #tripartite networks?
|
|
555
|
+
relations = get_hypergeometric_associations_with_topology(layers, base_layer, :elim)
|
|
556
|
+
elsif meth == :hypergeometric_weight #tripartite networks?
|
|
557
|
+
relations = get_hypergeometric_associations_with_topology(layers, base_layer, :weight)
|
|
145
558
|
elsif meth == :csi #all networks
|
|
146
559
|
relations = get_csi_associations(layers, base_layer)
|
|
147
560
|
elsif meth == :transference #tripartite networks
|
|
@@ -154,20 +567,19 @@ class Network
|
|
|
154
567
|
#---------------------------------------------------------
|
|
155
568
|
# Alaimo 2014, doi: 10.3389/fbioe.2014.00071
|
|
156
569
|
def get_association_by_transference_resources(firstPairLayers, secondPairLayers, lambda_value1 = 0.5, lambda_value2 = 0.5)
|
|
570
|
+
relations = []
|
|
157
571
|
matrix1 = @adjacency_matrices[firstPairLayers].first
|
|
158
572
|
rowIds = @adjacency_matrices[firstPairLayers][1]
|
|
159
573
|
matrix2 = @adjacency_matrices[secondPairLayers].first
|
|
160
574
|
colIds = @adjacency_matrices[secondPairLayers][2]
|
|
161
|
-
m1rowNumber = matrix1.
|
|
162
|
-
|
|
163
|
-
m2rowNumber = matrix2.rows
|
|
164
|
-
m2colNumber = matrix2.cols
|
|
575
|
+
m1rowNumber, m1colNumber = matrix1.shape
|
|
576
|
+
m2rowNumber, m2colNumber = matrix2.shape
|
|
165
577
|
#puts m1rowNumber, m1colNumber, m2rowNumber, m2colNumber
|
|
166
578
|
matrix1Weight = graphWeights(m1colNumber, m1rowNumber, matrix1.transpose, lambda_value1)
|
|
167
579
|
matrix2Weight = graphWeights(m2colNumber, m2rowNumber, matrix2.transpose, lambda_value2)
|
|
168
|
-
matrixWeightProduct =
|
|
169
|
-
finalMatrix =
|
|
170
|
-
relations =
|
|
580
|
+
matrixWeightProduct = Numo::Linalg.dot(matrix1Weight, Numo::Linalg.dot(matrix2, matrix2Weight))
|
|
581
|
+
finalMatrix = Numo::Linalg.dot(matrix1, matrixWeightProduct)
|
|
582
|
+
relations = matrix2relations(finalMatrix, rowIds, colIds)
|
|
171
583
|
@association_values[:transference] = relations
|
|
172
584
|
return relations
|
|
173
585
|
end
|
|
@@ -176,15 +588,14 @@ class Network
|
|
|
176
588
|
#---------------------------------------------------------
|
|
177
589
|
# Bass 2013, doi:10.1038/nmeth.2728
|
|
178
590
|
def get_associations(layers, base_layer) # BASE METHOD
|
|
179
|
-
|
|
180
|
-
get_all_pairs(layers: layers) do |node1, node2|
|
|
591
|
+
associations = get_all_pairs(layers: layers) do |node1, node2|
|
|
181
592
|
associatedIDs_node1 = @edges[node1].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
|
|
182
593
|
associatedIDs_node2 = @edges[node2].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
|
|
183
594
|
intersectedIDs = associatedIDs_node1 & associatedIDs_node2
|
|
184
595
|
associationValue = yield(associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2)
|
|
185
|
-
|
|
596
|
+
[node1, node2, associationValue]
|
|
186
597
|
end
|
|
187
|
-
return
|
|
598
|
+
return associations
|
|
188
599
|
end
|
|
189
600
|
|
|
190
601
|
def get_jaccard_association(layers, base_layer)
|
|
@@ -227,7 +638,8 @@ class Network
|
|
|
227
638
|
|
|
228
639
|
def get_pcc_associations(layers, base_layer)
|
|
229
640
|
#for Ny calcule use get_nodes_layer
|
|
230
|
-
|
|
641
|
+
base_layer_nodes = get_nodes_layer([base_layer])
|
|
642
|
+
ny = base_layer_nodes.length
|
|
231
643
|
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
|
232
644
|
intersProd = intersectedIDs.length * ny
|
|
233
645
|
nodesProd = associatedIDs_node1.length * associatedIDs_node2.length
|
|
@@ -240,84 +652,145 @@ class Network
|
|
|
240
652
|
return relations
|
|
241
653
|
end
|
|
242
654
|
|
|
243
|
-
def get_hypergeometric_associations(layers, base_layer)
|
|
244
|
-
ny = get_nodes_layer(
|
|
655
|
+
def get_hypergeometric_associations(layers, base_layer, pvalue_adj_method= nil)
|
|
656
|
+
ny = get_nodes_layer([base_layer]).length
|
|
657
|
+
fet = Rubystats::FishersExactTest.new
|
|
245
658
|
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
|
246
|
-
|
|
659
|
+
fisher = 0
|
|
247
660
|
intersection_lengths = intersectedIDs.length
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
# binom_product_float = BigDecimal.new(binom_product)
|
|
259
|
-
# to_f = true
|
|
260
|
-
# end
|
|
261
|
-
# sum += binom_product_float / binom(ny, nB)
|
|
262
|
-
# sum = sum.to_f if to_f # once the operation has finished, sum is corced from bigdecimal to float
|
|
661
|
+
if intersection_lengths > 0
|
|
662
|
+
n1_items = associatedIDs_node1.length
|
|
663
|
+
n2_items = associatedIDs_node2.length
|
|
664
|
+
fisher = fet.calculate(
|
|
665
|
+
intersection_lengths,
|
|
666
|
+
n1_items - intersection_lengths,
|
|
667
|
+
n2_items - intersection_lengths,
|
|
668
|
+
ny - (n1_items + n2_items - intersection_lengths)
|
|
669
|
+
)
|
|
670
|
+
fisher = fisher[:right]
|
|
263
671
|
end
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
672
|
+
fisher
|
|
673
|
+
end
|
|
674
|
+
if pvalue_adj_method == :bonferroni
|
|
675
|
+
meth = :hypergeometric_bf
|
|
676
|
+
compute_adjusted_pvalue_bonferroni(relations)
|
|
677
|
+
elsif pvalue_adj_method == :benjamini_hochberg
|
|
678
|
+
meth = :hypergeometric_bh
|
|
679
|
+
compute_adjusted_pvalue_benjaminiHochberg(relations)
|
|
680
|
+
else
|
|
681
|
+
meth = :hypergeometric
|
|
682
|
+
compute_log_transformation(relations)
|
|
270
683
|
end
|
|
271
|
-
@association_values[
|
|
684
|
+
@association_values[meth] = relations
|
|
272
685
|
return relations
|
|
273
686
|
end
|
|
274
687
|
|
|
275
|
-
def
|
|
276
|
-
|
|
688
|
+
def get_hypergeometric_associations_with_topology(layers, base_layer, mode, thresold = 0.01)
|
|
689
|
+
relations = []
|
|
690
|
+
reference_layer = (layers - @layer_ontologies.keys).first
|
|
691
|
+
ontology_layer = (layers - [reference_layer]).first
|
|
692
|
+
ref_nodes = get_nodes_from_layer(reference_layer) # get nodes from NOT ontology layer
|
|
693
|
+
ontology = @layer_ontologies[ontology_layer]
|
|
694
|
+
base_layer_length = @nodes.values.count{|n| n.type == base_layer}
|
|
695
|
+
ref_nodes.each do |ref_node|
|
|
696
|
+
base_nodes = get_connected_nodes(ref_node, base_layer)
|
|
697
|
+
ontology_base_subgraph = get_bipartite_subgraph(base_nodes, base_layer, ontology_layer) # get shared nodes between nodes from NOT ontology layer and ONTOLOGY layer. Also get the conections between shared nodes and ontology nodes.
|
|
698
|
+
next if ontology_base_subgraph.empty?
|
|
699
|
+
ontology_base_subgraph.transform_keys!{|k| k.to_sym}
|
|
700
|
+
ontology.load_item_relations_to_terms(ontology_base_subgraph, remove_old_relations = true)
|
|
701
|
+
term_pvals = ontology.compute_relations_to_items(base_nodes, base_layer_length, mode, thresold)
|
|
702
|
+
relations.concat(term_pvals.map{|term| [ref_node, term[0], term[1]]})
|
|
703
|
+
end
|
|
704
|
+
compute_log_transformation(relations)
|
|
705
|
+
if mode == :elim
|
|
706
|
+
meth = :hypergeometric_elim
|
|
707
|
+
elsif mode == :weight
|
|
708
|
+
meth = :hypergeometric_weight
|
|
709
|
+
end
|
|
710
|
+
@association_values[meth] = relations
|
|
711
|
+
return relations
|
|
712
|
+
end
|
|
713
|
+
|
|
714
|
+
def compute_adjusted_pvalue(relations, log_val=true)
|
|
715
|
+
relations.each_with_index do |data, i| #p1, p2, pval
|
|
716
|
+
pval_adj = yield(data.last, i)
|
|
717
|
+
pval_adj = -Math.log10(pval_adj) if log_val && pval_adj > 0
|
|
718
|
+
data[2] = pval_adj
|
|
719
|
+
end
|
|
720
|
+
end
|
|
721
|
+
|
|
722
|
+
def compute_log_transformation(relations) #Only perform log transform whitout adjust pvalue. Called when adjusted method is not defined
|
|
723
|
+
compute_adjusted_pvalue(relations) do |pval, index|
|
|
724
|
+
pval
|
|
725
|
+
end
|
|
726
|
+
end
|
|
727
|
+
|
|
728
|
+
def compute_adjusted_pvalue_bonferroni(relations)
|
|
729
|
+
n_comparations = relations.length
|
|
730
|
+
compute_adjusted_pvalue(relations) do |pval, index|
|
|
731
|
+
adj = pval * n_comparations
|
|
732
|
+
adj = 1 if adj > 1
|
|
733
|
+
adj
|
|
734
|
+
end
|
|
735
|
+
end
|
|
736
|
+
|
|
737
|
+
def compute_adjusted_pvalue_benjaminiHochberg(relations)
|
|
738
|
+
adj_pvalues = get_benjaminiHochberg_pvalues(relations.map{|rel| rel.last})
|
|
739
|
+
compute_adjusted_pvalue(relations) do |pval, index|
|
|
740
|
+
adj_pvalues[index]
|
|
741
|
+
end
|
|
742
|
+
end
|
|
743
|
+
|
|
744
|
+
def add_record(hash, node1, node2)
|
|
745
|
+
query = hash[node1]
|
|
277
746
|
if query.nil?
|
|
278
|
-
hash[
|
|
747
|
+
hash[node1] = [node2]
|
|
748
|
+
else
|
|
749
|
+
query << node2
|
|
750
|
+
end
|
|
751
|
+
end
|
|
752
|
+
|
|
753
|
+
def add_nested_record(hash, node1, node2, val)
|
|
754
|
+
query_node1 = hash[node1]
|
|
755
|
+
if query_node1.nil?
|
|
756
|
+
hash[node1] = {node2 => val}
|
|
279
757
|
else
|
|
280
|
-
|
|
758
|
+
query_node1[node2] = val
|
|
281
759
|
end
|
|
282
760
|
end
|
|
283
761
|
|
|
762
|
+
|
|
284
763
|
def get_csi_associations(layers, base_layer)
|
|
285
764
|
pcc_relations = get_pcc_associations(layers, base_layer)
|
|
286
|
-
|
|
765
|
+
clean_autorelations_on_association_values if layers.length > 1
|
|
766
|
+
nx = get_nodes_layer(layers).length
|
|
767
|
+
pcc_vals = {}
|
|
768
|
+
node_rels = {}
|
|
287
769
|
pcc_relations.each do |node1, node2, assoc_index|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
770
|
+
add_nested_record(pcc_vals, node1, node2, assoc_index.abs)
|
|
771
|
+
add_nested_record(pcc_vals, node2, node1, assoc_index.abs)
|
|
772
|
+
add_record(node_rels, node1, node2)
|
|
773
|
+
add_record(node_rels, node2, node1)
|
|
292
774
|
end
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
else
|
|
300
|
-
nested_query = query[node2]
|
|
301
|
-
if nested_query.nil?
|
|
302
|
-
pccAB = -0.05
|
|
303
|
-
else
|
|
304
|
-
pccAB = nested_query - 0.05
|
|
305
|
-
end
|
|
306
|
-
valid_A_nodes = query.select{|node_id, pcc| pcc>= pccAB}.keys
|
|
775
|
+
relations = []
|
|
776
|
+
pcc_relations.each do |node1, node2 ,assoc_index|
|
|
777
|
+
pccAB = assoc_index - 0.05
|
|
778
|
+
valid_nodes = 0
|
|
779
|
+
node_rels[node1].each do |node|
|
|
780
|
+
valid_nodes += 1 if pcc_vals[node1][node] >= pccAB
|
|
307
781
|
end
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
valid_B_nodes = []
|
|
311
|
-
else
|
|
312
|
-
valid_B_nodes = query2.select{|node_id, pcc| pcc>= pccAB}.keys
|
|
782
|
+
node_rels[node2].each do |node|
|
|
783
|
+
valid_nodes += 1 if pcc_vals[node2][node] >= pccAB
|
|
313
784
|
end
|
|
314
|
-
|
|
315
|
-
|
|
785
|
+
csiValue = 1 - (valid_nodes-1).fdiv(nx)
|
|
786
|
+
# valid_nodes-1 is done due to the connection node1-node2 is counted twice (one for each loop)
|
|
787
|
+
relations << [node1, node2, csiValue]
|
|
316
788
|
end
|
|
317
789
|
@association_values[:csi] = relations
|
|
318
790
|
return relations
|
|
319
791
|
end
|
|
320
792
|
|
|
793
|
+
|
|
321
794
|
## PERFORMANCE METHODS
|
|
322
795
|
############################################################
|
|
323
796
|
def load_control(ref_array)
|
|
@@ -381,11 +854,11 @@ class Network
|
|
|
381
854
|
if !pred_info.nil?
|
|
382
855
|
labels, scores = pred_info
|
|
383
856
|
reliable_labels = get_reliable_labels(labels, scores, cut, top)
|
|
384
|
-
|
|
385
857
|
predicted_labels += reliable_labels.length #m
|
|
386
858
|
common_labels += (c_labels & reliable_labels).length #k
|
|
387
859
|
end
|
|
388
860
|
end
|
|
861
|
+
#puts "cut: #{cut} trueL: #{true_labels} predL: #{predicted_labels} commL: #{common_labels}"
|
|
389
862
|
prec = common_labels.to_f/predicted_labels
|
|
390
863
|
rec = common_labels.to_f/true_labels
|
|
391
864
|
prec = 0.0 if prec.nan?
|
|
@@ -393,19 +866,132 @@ class Network
|
|
|
393
866
|
return prec, rec
|
|
394
867
|
end
|
|
395
868
|
|
|
869
|
+
## KERNEL METHODS
|
|
870
|
+
#######################################################################################
|
|
871
|
+
def get_kernel(layer2kernel, kernel, normalization=false)
|
|
872
|
+
matrix, node_names = @adjacency_matrices[layer2kernel]
|
|
873
|
+
#I = identity matrix
|
|
874
|
+
#D = Diagonal matrix
|
|
875
|
+
#A = adjacency matrix
|
|
876
|
+
#L = laplacian matrix = D − A
|
|
877
|
+
matrix_result = nil
|
|
878
|
+
dimension_elements = matrix.shape.last
|
|
879
|
+
# In scuba code, the diagonal values of A is set to 0. In weighted matrix the kernel result is the same with or without this operation. Maybe increases the computing performance?
|
|
880
|
+
# In the md kernel this operation affects the values of the final kernel
|
|
881
|
+
#dimension_elements.times do |n|
|
|
882
|
+
# matrix[n,n] = 0.0
|
|
883
|
+
#end
|
|
884
|
+
if kernel == 'el' || kernel == 'ct' || kernel == 'rf' ||
|
|
885
|
+
kernel.include?('vn') || kernel.include?('rl') || kernel == 'me'
|
|
886
|
+
diagonal_matrix = matrix.sum(1).diag # get the total sum for each row, for this reason the sum method takes the 1 value. If sum colums is desired, use 0
|
|
887
|
+
# Make a matrix whose diagonal is row_sum
|
|
888
|
+
matrix_L = diagonal_matrix - matrix
|
|
889
|
+
if kernel == 'el' #Exponential Laplacian diffusion kernel(active). F Fouss 2012 | doi: 10.1016/j.neunet.2012.03.001
|
|
890
|
+
beta = 0.02
|
|
891
|
+
beta_product = matrix_L * -beta
|
|
892
|
+
#matrix_result = beta_product.expm
|
|
893
|
+
matrix_result = Numo::Linalg.expm(beta_product, 14)
|
|
894
|
+
elsif kernel == 'ct' # Commute time kernel (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
|
895
|
+
matrix_result = Numo::Linalg.pinv(matrix_L) # Anibal saids that this kernel was normalized. Why?. Paper do not seem to describe this operation for ct, it describes for Kvn or for all kernels, it is not clear.
|
|
896
|
+
elsif kernel == 'rf' # Random forest kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
|
897
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L) #Krf = (I +L ) ^ −1
|
|
898
|
+
elsif kernel.include?('vn') # von Neumann diffusion kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
|
899
|
+
alpha = kernel.gsub('vn', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
|
900
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) - matrix * alpha ) # (I -alphaA ) ^ −1
|
|
901
|
+
elsif kernel.include?('rl') # Regularized Laplacian kernel matrix (active)
|
|
902
|
+
alpha = kernel.gsub('rl', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
|
903
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L * alpha ) # (I + alphaL ) ^ −1
|
|
904
|
+
elsif kernel == 'me' # Markov exponential diffusion kernel (active). G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
|
905
|
+
beta=0.04
|
|
906
|
+
#(beta/N)*(N*I - D + A)
|
|
907
|
+
id_mat = Numo::DFloat.eye(dimension_elements)
|
|
908
|
+
m_matrix = (id_mat * dimension_elements - diagonal_matrix + matrix ) * (beta/dimension_elements)
|
|
909
|
+
#matrix_result = m_matrix.expm
|
|
910
|
+
matrix_result = Numo::Linalg.expm(m_matrix, 16)
|
|
911
|
+
end
|
|
912
|
+
elsif kernel == 'ka' # Kernelized adjacency matrix (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
|
913
|
+
lambda_value = matrix.min_eigenvalue
|
|
914
|
+
matrix_result = matrix + Numo::DFloat.eye(dimension_elements) * lambda_value.abs # Ka = A + lambda*I # lambda = the absolute value of the smallest eigenvalue of A
|
|
915
|
+
elsif kernel.include?('md') # Markov diffusion kernel matrix. G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
|
916
|
+
t = kernel.gsub('md', '').to_i
|
|
917
|
+
#TODO: check implementation with Numo::array
|
|
918
|
+
col_sum = matrix.sum(1)
|
|
919
|
+
p_mat = matrix.div_by_vector(col_sum)
|
|
920
|
+
p_temp_mat = p_mat.clone
|
|
921
|
+
zt_mat = p_mat.clone
|
|
922
|
+
(t-1).times do
|
|
923
|
+
p_temp_mat = p_temp_mat.dot(p_mat)
|
|
924
|
+
zt_mat = zt_mat + p_temp_mat
|
|
925
|
+
end
|
|
926
|
+
zt_mat = zt_mat * (1.0/t)
|
|
927
|
+
matrix_result = zt_mat.dot(zt_mat.transpose)
|
|
928
|
+
else
|
|
929
|
+
matrix_result = matrix
|
|
930
|
+
warn('Warning: The kernel method was not specified or not exists. The adjacency matrix will be given as result')
|
|
931
|
+
# This allows process a previous kernel and perform the normalization in a separated step.
|
|
932
|
+
end
|
|
933
|
+
matrix_result = matrix_result.cosine_normalization if normalization #TODO: check implementation with Numo::array
|
|
934
|
+
@kernels[layer2kernel] = matrix_result
|
|
935
|
+
end
|
|
936
|
+
|
|
937
|
+
def write_kernel(layer2kernel, output_file)
|
|
938
|
+
@kernels[layer2kernel].save(output_file)
|
|
939
|
+
end
|
|
940
|
+
|
|
941
|
+
def link_ontology(ontology_file_path, layer_name)
|
|
942
|
+
if !@loaded_obos.include?(ontology_file_path) #Load new ontology
|
|
943
|
+
ontology = Ontology.new(file: ontology_file_path, load_file: true)
|
|
944
|
+
@loaded_obos << ontology_file_path
|
|
945
|
+
@ontologies << ontology
|
|
946
|
+
else #Link loaded ontology to current layer
|
|
947
|
+
ontology = @ontologies[@loaded_obos.index(ontology_file_path)]
|
|
948
|
+
end
|
|
949
|
+
@layer_ontologies[layer_name] = ontology
|
|
950
|
+
end
|
|
396
951
|
|
|
397
952
|
|
|
398
953
|
## AUXILIAR METHODS
|
|
399
954
|
#######################################################################################
|
|
400
955
|
private
|
|
401
956
|
|
|
957
|
+
def load_input_list(file)
|
|
958
|
+
return File.open(file).readlines.map!{|line| line.chomp}
|
|
959
|
+
end
|
|
960
|
+
|
|
961
|
+
def exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
|
962
|
+
res = false
|
|
963
|
+
if !ids_connected_to_n1.nil? &&
|
|
964
|
+
!ids_connected_to_n2.nil? &&
|
|
965
|
+
!(ids_connected_to_n1 & ids_connected_to_n2).empty? # check that at least exists one node that connect to n1 and n2
|
|
966
|
+
res = true
|
|
967
|
+
end
|
|
968
|
+
return res
|
|
969
|
+
end
|
|
970
|
+
|
|
971
|
+
def set_layer(layer_definitions, node_name)
|
|
972
|
+
layer = nil
|
|
973
|
+
if layer_definitions.length > 1
|
|
974
|
+
layer_definitions.each do |layer_name, regexp|
|
|
975
|
+
if node_name =~ regexp
|
|
976
|
+
layer = layer_name
|
|
977
|
+
break
|
|
978
|
+
end
|
|
979
|
+
end
|
|
980
|
+
raise("The node '#{node_name}' not match with any layer regex") if layer.nil?
|
|
981
|
+
else
|
|
982
|
+
layer = layer_definitions.first.first
|
|
983
|
+
end
|
|
984
|
+
@layers << layer if !@layers.include?(layer)
|
|
985
|
+
return layer
|
|
986
|
+
end
|
|
987
|
+
|
|
402
988
|
def get_cuts(limits, n_cuts)
|
|
403
989
|
cuts = []
|
|
404
|
-
range = (limits.last - limits.first).
|
|
990
|
+
range = (limits.last - limits.first).abs.fdiv(n_cuts)
|
|
991
|
+
range = BigDecimal(range, 10)
|
|
405
992
|
cut = limits.first
|
|
406
|
-
n_cuts.times do
|
|
407
|
-
cuts << cut
|
|
408
|
-
cut += range
|
|
993
|
+
(n_cuts + 1).times do |n|
|
|
994
|
+
cuts << (cut + n * range).to_f
|
|
409
995
|
end
|
|
410
996
|
return cuts
|
|
411
997
|
end
|
|
@@ -420,35 +1006,46 @@ class Network
|
|
|
420
1006
|
end
|
|
421
1007
|
|
|
422
1008
|
def graphWeights (rowsNumber, colsNumber, inputMatrix, lambdaValue = 0.5)
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
kx =
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
1009
|
+
ky = (1.0 / inputMatrix.sum(0)).diag #sum cols
|
|
1010
|
+
weigth = Numo::Linalg.dot(inputMatrix, ky).transpose
|
|
1011
|
+
ky = nil #free memory
|
|
1012
|
+
weigth = Numo::Linalg.dot(inputMatrix, weigth)
|
|
1013
|
+
|
|
1014
|
+
kx = inputMatrix.sum(1) #sum rows
|
|
1015
|
+
|
|
1016
|
+
kx_lamb = kx ** lambdaValue
|
|
1017
|
+
kx_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
|
1018
|
+
rowsNumber.times do |j|
|
|
1019
|
+
rowsNumber.times do |i|
|
|
1020
|
+
kx_lamb_mat[j,i] = kx_lamb[i]
|
|
1021
|
+
end
|
|
1022
|
+
end
|
|
1023
|
+
kx_lamb = nil #free memory
|
|
1024
|
+
|
|
1025
|
+
kx_inv_lamb = kx ** (1 - lambdaValue)
|
|
1026
|
+
kx_inv_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
|
1027
|
+
rowsNumber.times do |j|
|
|
1028
|
+
rowsNumber.times do |i|
|
|
1029
|
+
kx_inv_lamb_mat[i, j] = kx_inv_lamb[i]
|
|
1030
|
+
end
|
|
1031
|
+
end
|
|
1032
|
+
kx_inv_lamb = nil #free memory
|
|
1033
|
+
|
|
1034
|
+
nx = 1.0/(kx_lamb_mat.inplace * kx_inv_lamb_mat).inplace # inplace marks a matrix to be used by reference, not for value
|
|
1035
|
+
kx_lamb_mat = nil #free memory
|
|
1036
|
+
kx_inv_lamb_mat = nil #free memory
|
|
1037
|
+
weigth.inplace * nx
|
|
433
1038
|
return weigth
|
|
434
1039
|
end
|
|
435
1040
|
|
|
436
|
-
def
|
|
1041
|
+
def matrix2relations(finalMatrix, rowIds, colIds)
|
|
437
1042
|
relations = []
|
|
438
1043
|
rowIds.each_with_index do |rowId, rowPos|
|
|
439
1044
|
colIds.each_with_index do |colId, colPos|
|
|
440
1045
|
associationValue = finalMatrix[rowPos, colPos]
|
|
441
|
-
relations << [rowId, colId, associationValue]
|
|
1046
|
+
relations << [rowId, colId, associationValue] if associationValue > 0
|
|
442
1047
|
end
|
|
443
1048
|
end
|
|
444
1049
|
return relations
|
|
445
1050
|
end
|
|
446
|
-
|
|
447
|
-
def binom(n,k)
|
|
448
|
-
if k > 0 && k < n
|
|
449
|
-
res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
|
|
450
|
-
else
|
|
451
|
-
res = 1
|
|
452
|
-
end
|
|
453
|
-
end
|
|
454
1051
|
end
|