NetAnalyzer 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NetAnalyzer.gemspec +1 -1
- data/README.md +3 -1
- data/bin/NetAnalyzer.rb +5 -15
- data/bin/randomize_clustering.rb +121 -0
- data/bin/randomize_network.rb +89 -0
- data/bin/ranker_gene.rb +121 -0
- data/bin/text2binary_matrix.rb +15 -1
- data/lib/NetAnalyzer/adv_mat_calc.rb +117 -0
- data/lib/NetAnalyzer/net_parser.rb +50 -0
- data/lib/NetAnalyzer/net_plotter.rb +145 -0
- data/lib/NetAnalyzer/network.rb +474 -564
- data/lib/NetAnalyzer/nodes.rb +15 -0
- data/lib/NetAnalyzer/performancer.rb +98 -0
- data/lib/NetAnalyzer/ranker.rb +250 -0
- data/lib/NetAnalyzer/version.rb +1 -1
- data/lib/NetAnalyzer.rb +5 -0
- metadata +19 -8
data/lib/NetAnalyzer/network.rb
CHANGED
@@ -1,34 +1,26 @@
|
|
1
1
|
require 'rubystats'
|
2
|
-
require 'gv'
|
3
2
|
#require 'nmatrix'
|
4
3
|
#require 'nmatrix/lapacke'
|
5
4
|
require 'numo/narray'
|
6
5
|
require 'numo/linalg'
|
6
|
+
require 'npy'
|
7
7
|
require 'parallel'
|
8
8
|
|
9
9
|
#require 'pp'
|
10
|
-
require 'bigdecimal'
|
11
10
|
require 'benchmark'
|
12
11
|
#require 'nmatrix_expansion'
|
13
12
|
|
14
|
-
|
15
|
-
#For javascrip plotting
|
16
|
-
require 'erb'
|
17
|
-
require 'base64'
|
18
|
-
require 'json'
|
19
|
-
require 'zlib'
|
20
|
-
|
21
13
|
require 'semtools'
|
22
14
|
require 'expcalc'
|
23
|
-
TEMPLATES = File.join(File.dirname(__FILE__), 'templates')
|
24
15
|
|
25
|
-
class Network
|
26
16
|
|
27
|
-
|
17
|
+
class Network
|
18
|
+
|
19
|
+
attr_accessor :adjacency_matrices, :association_values, :control_connections, :kernels, :reference_nodes, :group_nodes, :threads, :nodes, :edges, :compute_pairs, :compute_autorelations
|
28
20
|
|
29
21
|
## BASIC METHODS
|
30
22
|
############################################################
|
31
|
-
def initialize(layers)
|
23
|
+
def initialize(layers) # DONE
|
32
24
|
@threads = 0
|
33
25
|
@nodes = {}
|
34
26
|
@edges = {}
|
@@ -46,21 +38,57 @@ class Network
|
|
46
38
|
@layer_ontologies = {}
|
47
39
|
end
|
48
40
|
|
49
|
-
def
|
41
|
+
def clone # DONE
|
42
|
+
network_clone = Network.new(@layers.clone)
|
43
|
+
network_clone.threads = @threads.clone
|
44
|
+
network_clone.nodes = @nodes.clone
|
45
|
+
network_clone.edges = @edges.clone
|
46
|
+
network_clone.reference_nodes = @reference_nodes.clone
|
47
|
+
network_clone.group_nodes = @group_nodes.clone
|
48
|
+
network_clone.adjacency_matrices = @adjacency_matrices.clone
|
49
|
+
network_clone.kernels = @kernels.clone
|
50
|
+
network_clone.association_values = @association_values.clone
|
51
|
+
network_clone.control_connections = @control_connections.clone
|
52
|
+
network_clone.set_compute_pairs(@compute_pairs.clone, @compute_autorelations.clone)
|
53
|
+
#network_clone.loaded_obos = @loaded_obos.clone
|
54
|
+
#network_clone.ontologies = @ontologies.clone
|
55
|
+
#network_clone.layer_ontologies = @layer_ontologies.clone
|
56
|
+
|
57
|
+
return network_clone
|
58
|
+
end
|
59
|
+
|
60
|
+
def ==(other) # DONE
|
61
|
+
are_equal = true
|
62
|
+
if self.threads != other.threads ||
|
63
|
+
self.nodes != other.nodes ||
|
64
|
+
self.edges != other.edges ||
|
65
|
+
self.reference_nodes != other.reference_nodes ||
|
66
|
+
self.group_nodes != other.group_nodes ||
|
67
|
+
self.adjacency_matrices != other.adjacency_matrices ||
|
68
|
+
self.association_values != other.association_values ||
|
69
|
+
self.control_connections != other.control_connections ||
|
70
|
+
self.compute_pairs != other.compute_pairs ||
|
71
|
+
self.compute_autorelations != other.compute_autorelations
|
72
|
+
are_equal = false
|
73
|
+
end
|
74
|
+
return are_equal
|
75
|
+
end
|
76
|
+
|
77
|
+
def set_compute_pairs(use_pairs, get_autorelations) #DONE
|
50
78
|
@compute_pairs = use_pairs
|
51
79
|
@compute_autorelations = get_autorelations
|
52
80
|
end
|
53
81
|
|
54
|
-
def add_node(nodeID, nodeType = 0)
|
82
|
+
def add_node(nodeID, nodeType = 0) # DONE
|
55
83
|
@nodes[nodeID] = Node.new(nodeID, nodeType)
|
56
84
|
end
|
57
85
|
|
58
|
-
def add_edge(nodeID1, nodeID2)
|
59
|
-
|
60
|
-
|
86
|
+
def add_edge(nodeID1, nodeID2) # DONE
|
87
|
+
add_edge2hash(nodeID1, nodeID2)
|
88
|
+
add_edge2hash(nodeID2, nodeID1)
|
61
89
|
end
|
62
90
|
|
63
|
-
def
|
91
|
+
def add_edge2hash(nodeA, nodeB) # NOT
|
64
92
|
query = @edges[nodeA]
|
65
93
|
if query.nil?
|
66
94
|
@edges[nodeA] = [nodeB]
|
@@ -69,7 +97,48 @@ class Network
|
|
69
97
|
end
|
70
98
|
end
|
71
99
|
|
72
|
-
def
|
100
|
+
def set_layer(layer_definitions, node_name) # DONE
|
101
|
+
layer = nil
|
102
|
+
if layer_definitions.length > 1
|
103
|
+
layer_definitions.each do |layer_name, regexp|
|
104
|
+
if node_name =~ regexp
|
105
|
+
layer = layer_name
|
106
|
+
break
|
107
|
+
end
|
108
|
+
end
|
109
|
+
raise("The node '#{node_name}' not match with any layer regex") if layer.nil?
|
110
|
+
else
|
111
|
+
layer = layer_definitions.first.first
|
112
|
+
end
|
113
|
+
@layers << layer if !@layers.include?(layer)
|
114
|
+
return layer
|
115
|
+
end
|
116
|
+
|
117
|
+
def generate_adjacency_matrix(layerA, layerB) # DONE
|
118
|
+
layerAidNodes = @nodes.select{|id, node| node.type == layerA}.keys
|
119
|
+
layerBidNodes = @nodes.select{|id, node| node.type == layerB}.keys
|
120
|
+
matrix = Numo::DFloat.zeros(layerAidNodes.length, layerBidNodes.length)
|
121
|
+
layerAidNodes.each_with_index do |nodeA, i|
|
122
|
+
layerBidNodes.each_with_index do |nodeB, j|
|
123
|
+
if @edges[nodeB].include?(nodeA)
|
124
|
+
matrix[i, j] = 1
|
125
|
+
else
|
126
|
+
matrix[i, j] = 0
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
all_info_matrix = [matrix, layerAidNodes, layerBidNodes]
|
131
|
+
|
132
|
+
if layerA == layerB
|
133
|
+
@adjacency_matrices[[layerA]] = all_info_matrix
|
134
|
+
else
|
135
|
+
@adjacency_matrices[[layerA, layerB]] = all_info_matrix
|
136
|
+
end
|
137
|
+
return all_info_matrix
|
138
|
+
end
|
139
|
+
|
140
|
+
|
141
|
+
def delete_nodes(node_list, mode='d') #DONE
|
73
142
|
if mode == 'd'
|
74
143
|
@nodes.reject!{|n| node_list.include?(n)}
|
75
144
|
@edges.reject!{|n, connections| node_list.include?(n)}
|
@@ -86,21 +155,17 @@ class Network
|
|
86
155
|
@edges.reject!{|n, connections| connections.empty?}
|
87
156
|
end
|
88
157
|
|
89
|
-
def get_connected_nodes(node_id, from_layer)
|
158
|
+
def get_connected_nodes(node_id, from_layer) # DONE
|
90
159
|
return @edges[node_id].map{|id| @nodes[id]}.select{|node| node.type == from_layer}.map{|node| node.id}
|
91
160
|
end
|
92
161
|
|
93
|
-
def
|
94
|
-
return @nodes.values.select{|node| node.type == from_layer}.map{|node| node.id}
|
95
|
-
end
|
96
|
-
|
97
|
-
def get_bipartite_subgraph(from_layer_node_ids, from_layer, to_layer)
|
162
|
+
def get_bipartite_subgraph(from_layer_node_ids, from_layer, to_layer) # DONE
|
98
163
|
bipartite_subgraph = {}
|
99
164
|
from_layer_node_ids.each do |from_layer_node_id|
|
100
165
|
connected_nodes = @edges[from_layer_node_id]
|
101
|
-
connected_nodes.each do |connected_node|
|
166
|
+
connected_nodes.each do |connected_node|
|
102
167
|
if @nodes[connected_node].type == to_layer
|
103
|
-
query = bipartite_subgraph[connected_node]
|
168
|
+
query = bipartite_subgraph[connected_node]
|
104
169
|
if query.nil?
|
105
170
|
bipartite_subgraph[connected_node] = get_connected_nodes(connected_node, from_layer)
|
106
171
|
end
|
@@ -110,39 +175,18 @@ class Network
|
|
110
175
|
return bipartite_subgraph
|
111
176
|
end
|
112
177
|
|
113
|
-
def load_network_by_pairs(file, layers, split_character="\t")
|
114
|
-
File.open(file).each do |line|
|
115
|
-
line.chomp!
|
116
|
-
pair = line.split(split_character)
|
117
|
-
node1 = pair[0]
|
118
|
-
node2 = pair[1]
|
119
|
-
add_node(node1, set_layer(layers, node1))
|
120
|
-
add_node(node2, set_layer(layers, node2))
|
121
|
-
add_edge(node1, node2)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def load_network_by_bin_matrix(input_file, node_file, layers)
|
126
|
-
node_names = load_input_list(node_file)
|
127
|
-
@adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='npy'), node_names, node_names]
|
128
|
-
end
|
129
|
-
|
130
|
-
def load_network_by_plain_matrix(input_file, node_file, layers, splitChar)
|
131
|
-
node_names = load_input_list(node_file)
|
132
|
-
@adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='txt', splitChar=splitChar), node_names, node_names]
|
133
|
-
end
|
134
178
|
|
135
|
-
def get_edge_number
|
136
|
-
node_connections = get_degree.values.inject(0){|sum, n| sum + n}
|
179
|
+
def get_edge_number # DONE
|
180
|
+
node_connections = get_degree(zscore = false).values.inject(0){|sum, n| sum + n}
|
137
181
|
return node_connections/2
|
138
182
|
end
|
139
183
|
|
140
|
-
def get_degree(zscore=
|
184
|
+
def get_degree(zscore=true) # DONE
|
141
185
|
degree = {}
|
142
186
|
@edges.each do |id, nodes|
|
143
187
|
degree[id] = nodes.length
|
144
188
|
end
|
145
|
-
if
|
189
|
+
if zscore
|
146
190
|
degree_values = degree.values
|
147
191
|
mean_degree = degree_values.mean
|
148
192
|
std_degree = degree_values.standard_deviation
|
@@ -151,154 +195,114 @@ class Network
|
|
151
195
|
return degree
|
152
196
|
end
|
153
197
|
|
154
|
-
def
|
155
|
-
|
156
|
-
|
157
|
-
if attr_name == 'get_degree'
|
158
|
-
attrs << get_degree
|
159
|
-
elsif attr_name == 'get_degreeZ'
|
160
|
-
attrs << get_degree(zscore=true)
|
161
|
-
end
|
162
|
-
end
|
163
|
-
node_ids = attrs.first.keys
|
164
|
-
node_attrs = []
|
165
|
-
node_ids.each do |n|
|
166
|
-
node_attrs << [n].concat(attrs.map{|at| at[n]})
|
198
|
+
def get_all_intersections(args = {}) # DONE
|
199
|
+
intersection_lengths = get_all_pairs(args) do |node1, node2|
|
200
|
+
intersection(node1, node2).length
|
167
201
|
end
|
168
|
-
return
|
202
|
+
return intersection_lengths
|
169
203
|
end
|
170
204
|
|
171
|
-
def
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
pair = [nodeID, associatedID].sort.join('_').to_sym
|
200
|
-
if !plotted_edges[pair]
|
201
|
-
graph.edge 'e',
|
202
|
-
graph.node(nodeID, label: '', style: 'filled', fillcolor: palette[@nodes[nodeID].type]),
|
203
|
-
graph.node(associatedID, label: '', style: 'filled' , fillcolor: palette[@nodes[associatedID].type])
|
204
|
-
plotted_edges[pair] = true
|
205
|
+
def get_all_pairs(args = {}) # DONE
|
206
|
+
all_pairs = [] #lo que se devolvera
|
207
|
+
default = {:layers => :all}
|
208
|
+
args = default.merge(args)
|
209
|
+
nodeIDsA, nodeIDsB = collect_nodes(args)
|
210
|
+
if @compute_autorelations
|
211
|
+
if @compute_pairs == :all
|
212
|
+
while !nodeIDsA.empty?
|
213
|
+
node1 = nodeIDsA.shift
|
214
|
+
pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
|
215
|
+
yield(node1, node2)
|
216
|
+
end
|
217
|
+
all_pairs.concat(pairs)
|
218
|
+
end
|
219
|
+
elsif @compute_pairs == :conn # TODO: Review this case to avoid return nil values
|
220
|
+
while !nodeIDsA.empty?
|
221
|
+
node1 = nodeIDsA.shift
|
222
|
+
ids_connected_to_n1 = @edges[node1]
|
223
|
+
pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
|
224
|
+
result = nil
|
225
|
+
ids_connected_to_n2 = @edges[node2]
|
226
|
+
if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
227
|
+
result = yield(node1, node2)
|
228
|
+
end
|
229
|
+
result
|
230
|
+
end
|
231
|
+
pairs.compact!
|
232
|
+
all_pairs.concat(pairs)
|
205
233
|
end
|
206
234
|
end
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
metrics = []
|
225
|
-
header = ['group']
|
226
|
-
@group_nodes.keys.each do |k|
|
227
|
-
metrics << [k]
|
228
|
-
end
|
229
|
-
header << 'comparative_degree'
|
230
|
-
comparative_degree = communities_comparative_degree(@group_nodes)
|
231
|
-
comparative_degree.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
|
232
|
-
header << 'avg_sht_path'
|
233
|
-
avg_sht_path = communities_avg_sht_path(@group_nodes)
|
234
|
-
avg_sht_path.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
|
235
|
-
if !@reference_nodes.empty?
|
236
|
-
header.concat(%w[node_com_assoc_by_edge node_com_assoc_by_node])
|
237
|
-
node_com_assoc = compute_node_com_assoc_in_precomputed_communities(@group_nodes, @reference_nodes.first)
|
238
|
-
node_com_assoc.each_with_index{|val,i| metrics[i].concat(val)}
|
239
|
-
end
|
240
|
-
File.open(output_filename, 'w') do |f|
|
241
|
-
f.puts header.join("\t")
|
242
|
-
metrics.each do |gr|
|
243
|
-
f. puts gr.join("\t")
|
235
|
+
else
|
236
|
+
#MAIN METHOD
|
237
|
+
if @compute_pairs == :conn
|
238
|
+
all_pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node1|
|
239
|
+
ids_connected_to_n1 = @edges[node1]
|
240
|
+
node1_pairs = []
|
241
|
+
nodeIDsB.each do |node2|
|
242
|
+
ids_connected_to_n2 = @edges[node2]
|
243
|
+
if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
244
|
+
node1_pairs << yield(node1, node2)
|
245
|
+
end
|
246
|
+
end
|
247
|
+
node1_pairs
|
248
|
+
end
|
249
|
+
all_pairs.flatten!(1)
|
250
|
+
elsif @compute_pairs == :all
|
251
|
+
raise 'Not implemented'
|
244
252
|
end
|
245
253
|
end
|
246
|
-
end
|
247
|
-
|
248
|
-
def replace_nil_vals(val)
|
249
|
-
return val.nil? ? 'NULL' : val
|
250
|
-
end
|
251
254
|
|
252
|
-
|
253
|
-
comparative_degrees = []
|
254
|
-
coms.each do |com_id, com|
|
255
|
-
comparative_degrees << compute_comparative_degree(com)
|
256
|
-
end
|
257
|
-
return comparative_degrees
|
255
|
+
return all_pairs
|
258
256
|
end
|
259
257
|
|
260
|
-
def
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
258
|
+
def collect_nodes(args) # DONE
|
259
|
+
nodeIDsA = nil
|
260
|
+
nodeIDsB = nil
|
261
|
+
if @compute_autorelations
|
262
|
+
if args[:layers] == :all
|
263
|
+
nodeIDsA = @nodes.keys
|
264
|
+
else
|
265
|
+
nodeIDsA = []
|
266
|
+
args[:layers].each do |layer|
|
267
|
+
nodeIDsA.concat(@nodes.select{|id, node| node.type == layer}.keys)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
else
|
271
|
+
if args[:layers] != :all
|
272
|
+
nodeIDsA = @nodes.select{|id, node| node.type == args[:layers][0]}.keys
|
273
|
+
nodeIDsB = @nodes.select{|id, node| node.type == args[:layers][1]}.keys
|
274
|
+
end
|
265
275
|
end
|
266
|
-
return
|
276
|
+
return nodeIDsA, nodeIDsB
|
267
277
|
end
|
268
278
|
|
269
|
-
def
|
270
|
-
|
271
|
-
|
272
|
-
|
279
|
+
def get_nodes_layer(layers) # DONE
|
280
|
+
#for creating ny value in hypergeometric and pcc index
|
281
|
+
nodes = []
|
282
|
+
layers.each do |layer|
|
283
|
+
nodes.concat(@nodes.select{|nodeId, node| node.type == layer}.values)
|
273
284
|
end
|
274
|
-
return
|
285
|
+
return nodes
|
275
286
|
end
|
276
287
|
|
277
|
-
def
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
next if nodeIDneigh.nil?
|
283
|
-
internal_degree += (nodeIDneigh & com).length
|
284
|
-
external_degree += (nodeIDneigh - com).length
|
288
|
+
def intersection(node1, node2) # DONE
|
289
|
+
shared_nodes = []
|
290
|
+
intersectedIDs = @edges[node1] & @edges[node2]
|
291
|
+
intersectedIDs.each do |id|
|
292
|
+
shared_nodes << @nodes[id]
|
285
293
|
end
|
286
|
-
|
287
|
-
return comparative_degree
|
294
|
+
return shared_nodes
|
288
295
|
end
|
289
296
|
|
290
|
-
def compute_avg_sht_path(com, paths=false)
|
297
|
+
def compute_avg_sht_path(com, paths=false) # DONE
|
291
298
|
path_lengths = []
|
292
299
|
all_paths = []
|
293
300
|
group = com.dup
|
294
301
|
while !group.empty?
|
295
302
|
node_start = group.shift
|
296
303
|
sht_paths = Parallel.map(group, in_processes: @threads) do |node_stop|
|
297
|
-
#group.each do |node_stop|
|
298
304
|
dist, path = shortest_path(node_start, node_stop, paths)
|
299
305
|
[dist, path]
|
300
|
-
#path_lengths << dist if !dist.nil?
|
301
|
-
#all_paths << path if !path.empty?
|
302
306
|
end
|
303
307
|
sht_paths.each do |dist, path|
|
304
308
|
path_lengths << dist
|
@@ -315,7 +319,7 @@ class Network
|
|
315
319
|
|
316
320
|
# https://pythoninwonderland.wordpress.com/2017/03/18/how-to-implement-breadth-first-search-in-python/
|
317
321
|
# finds shortest path between 2 nodes of a graph using BFS
|
318
|
-
def bfs_shortest_path(start, goal, paths=false)
|
322
|
+
def bfs_shortest_path(start, goal, paths=false) # NOT
|
319
323
|
dist = nil
|
320
324
|
explored = {} # keep track of explored nodes
|
321
325
|
previous = {}
|
@@ -348,7 +352,7 @@ class Network
|
|
348
352
|
return dist, path
|
349
353
|
end
|
350
354
|
|
351
|
-
def build_path(previous, startNode, stopNode)
|
355
|
+
def build_path(previous, startNode, stopNode) # NOT
|
352
356
|
path = []
|
353
357
|
currentNode = stopNode
|
354
358
|
path << currentNode
|
@@ -359,7 +363,7 @@ class Network
|
|
359
363
|
return path
|
360
364
|
end
|
361
365
|
|
362
|
-
def shortest_path(node_start, node_stop, paths=false)
|
366
|
+
def shortest_path(node_start, node_stop, paths=false) # DONE
|
363
367
|
#https://betterprogramming.pub/5-ways-to-find-the-shortest-path-in-a-graph-88cfefd0030f
|
364
368
|
#return bidirectionalSearch(node_start, node_stop)
|
365
369
|
#https://efficientcodeblog.wordpress.com/2017/12/13/bidirectional-search-two-end-bfs/
|
@@ -367,19 +371,100 @@ class Network
|
|
367
371
|
return dist, all_paths
|
368
372
|
end
|
369
373
|
|
370
|
-
def
|
371
|
-
|
372
|
-
|
373
|
-
if
|
374
|
-
|
375
|
-
|
376
|
-
|
374
|
+
def get_node_attributes(attr_names)
|
375
|
+
attrs = []
|
376
|
+
attr_names.each do |attr_name|
|
377
|
+
if attr_name == 'get_degree'
|
378
|
+
attrs << get_degree(zscore=false)
|
379
|
+
elsif attr_name == 'get_degreeZ'
|
380
|
+
attrs << get_degree
|
381
|
+
end
|
382
|
+
end
|
383
|
+
node_ids = attrs.first.keys
|
384
|
+
node_attrs = []
|
385
|
+
node_ids.each do |n|
|
386
|
+
node_attrs << [n].concat(attrs.map{|at| at[n]})
|
387
|
+
end
|
388
|
+
return node_attrs
|
389
|
+
end
|
390
|
+
|
391
|
+
def plot_network(options = {})
|
392
|
+
net_data = {
|
393
|
+
group_nodes: @group_nodes,
|
394
|
+
reference_nodes: @reference_nodes,
|
395
|
+
nodes: @nodes,
|
396
|
+
edges: @edges,
|
397
|
+
layers: @layers
|
398
|
+
}
|
399
|
+
Net_plotter.new(net_data, options)
|
400
|
+
end
|
401
|
+
|
402
|
+
# Compute communities/group properties
|
403
|
+
#----------------------------------------------
|
404
|
+
def compute_group_metrics(output_filename) # DONE
|
405
|
+
metrics = []
|
406
|
+
header = ['group']
|
407
|
+
@group_nodes.keys.each do |k|
|
408
|
+
metrics << [k]
|
409
|
+
end
|
410
|
+
header << 'comparative_degree'
|
411
|
+
comparative_degree = communities_comparative_degree(@group_nodes)
|
412
|
+
comparative_degree.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
|
413
|
+
header << 'avg_sht_path'
|
414
|
+
avg_sht_path = communities_avg_sht_path(@group_nodes)
|
415
|
+
avg_sht_path.each_with_index{|val,i| metrics[i] << replace_nil_vals(val)}
|
416
|
+
if !@reference_nodes.empty?
|
417
|
+
header.concat(%w[node_com_assoc_by_edge node_com_assoc_by_node])
|
418
|
+
node_com_assoc = compute_node_com_assoc_in_precomputed_communities(@group_nodes, @reference_nodes.first)
|
419
|
+
node_com_assoc.each_with_index{|val,i| metrics[i].concat(val)}
|
420
|
+
end
|
421
|
+
File.open(output_filename, 'w') do |f|
|
422
|
+
f.puts header.join("\t")
|
423
|
+
metrics.each do |gr|
|
424
|
+
f. puts gr.join("\t")
|
377
425
|
end
|
378
426
|
end
|
379
|
-
return clusters
|
380
427
|
end
|
381
428
|
|
382
|
-
def
|
429
|
+
def communities_comparative_degree(coms) # DONE
|
430
|
+
comparative_degrees = []
|
431
|
+
coms.each do |com_id, com|
|
432
|
+
comparative_degrees << compute_comparative_degree(com)
|
433
|
+
end
|
434
|
+
return comparative_degrees
|
435
|
+
end
|
436
|
+
|
437
|
+
def communities_avg_sht_path(coms) # DONE
|
438
|
+
avg_sht_path = []
|
439
|
+
coms.each do |com_id, com|
|
440
|
+
dist, paths = compute_avg_sht_path(com)
|
441
|
+
avg_sht_path << dist
|
442
|
+
end
|
443
|
+
return avg_sht_path
|
444
|
+
end
|
445
|
+
|
446
|
+
def compute_node_com_assoc_in_precomputed_communities(coms, ref_node) # DONE
|
447
|
+
node_com_assoc = []
|
448
|
+
coms.each do |com_id, com|
|
449
|
+
node_com_assoc << [compute_node_com_assoc(com, ref_node)]
|
450
|
+
end
|
451
|
+
return node_com_assoc
|
452
|
+
end
|
453
|
+
|
454
|
+
def compute_comparative_degree(com) # DONE # see Girvan-Newman Benchmark control parameter in http://networksciencebook.com/chapter/9#testing (communities chapter)
|
455
|
+
internal_degree = 0
|
456
|
+
external_degree = 0
|
457
|
+
com.each do |nodeID|
|
458
|
+
nodeIDneigh = @edges[nodeID]
|
459
|
+
next if nodeIDneigh.nil?
|
460
|
+
internal_degree += (nodeIDneigh & com).length
|
461
|
+
external_degree += (nodeIDneigh - com).length
|
462
|
+
end
|
463
|
+
comparative_degree = external_degree.fdiv(external_degree + internal_degree)
|
464
|
+
return comparative_degree
|
465
|
+
end
|
466
|
+
|
467
|
+
def compute_node_com_assoc(com, ref_node) # DONE
|
383
468
|
ref_cons = 0
|
384
469
|
ref_secondary_cons = 0
|
385
470
|
secondary_nodes = {}
|
@@ -405,137 +490,32 @@ class Network
|
|
405
490
|
return by_edge, by_node
|
406
491
|
end
|
407
492
|
|
408
|
-
def
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
def get_all_pairs(args = {})
|
416
|
-
all_pairs = []
|
417
|
-
default = {:layers => :all}
|
418
|
-
args = default.merge(args)
|
419
|
-
nodeIDsA, nodeIDsB = collect_nodes(args)
|
420
|
-
if @compute_autorelations
|
421
|
-
if @compute_pairs == :all
|
422
|
-
while !nodeIDsA.empty?
|
423
|
-
node1 = nodeIDsA.shift
|
424
|
-
pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
|
425
|
-
yield(node1, node2)
|
426
|
-
end
|
427
|
-
all_pairs.concat(pairs)
|
428
|
-
end
|
429
|
-
elsif @compute_pairs == :conn # TODO: Review this case to avoid return nil values
|
430
|
-
while !nodeIDsA.empty?
|
431
|
-
node1 = nodeIDsA.shift
|
432
|
-
ids_connected_to_n1 = @edges[node1]
|
433
|
-
pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node2|
|
434
|
-
result = nil
|
435
|
-
ids_connected_to_n2 = @edges[node2]
|
436
|
-
if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
437
|
-
result = yield(node1, node2)
|
438
|
-
end
|
439
|
-
result
|
440
|
-
end
|
441
|
-
pairs.compact!
|
442
|
-
all_pairs.concat(pairs)
|
443
|
-
end
|
444
|
-
end
|
445
|
-
else
|
446
|
-
#MAIN METHOD
|
447
|
-
if @compute_pairs == :conn
|
448
|
-
all_pairs = Parallel.map(nodeIDsA, in_processes: @threads) do |node1|
|
449
|
-
ids_connected_to_n1 = @edges[node1]
|
450
|
-
node1_pairs = []
|
451
|
-
nodeIDsB.each do |node2|
|
452
|
-
ids_connected_to_n2 = @edges[node2]
|
453
|
-
if exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
454
|
-
node1_pairs << yield(node1, node2)
|
455
|
-
end
|
456
|
-
end
|
457
|
-
node1_pairs
|
458
|
-
end
|
459
|
-
all_pairs.flatten!(1)
|
460
|
-
elsif @compute_pairs == :all
|
461
|
-
raise 'Not implemented'
|
462
|
-
end
|
463
|
-
end
|
464
|
-
|
465
|
-
return all_pairs
|
466
|
-
end
|
467
|
-
|
468
|
-
def collect_nodes(args)
|
469
|
-
nodeIDsA = nil
|
470
|
-
nodeIDsB = nil
|
471
|
-
if @compute_autorelations
|
472
|
-
if args[:layers] == :all
|
473
|
-
nodeIDsA = @nodes.keys
|
474
|
-
else
|
475
|
-
nodeIDsA = []
|
476
|
-
args[:layers].each do |layer|
|
477
|
-
nodeIDsA.concat(@nodes.select{|id, node| node.type == layer}.keys)
|
478
|
-
end
|
479
|
-
end
|
480
|
-
else
|
481
|
-
if args[:layers] != :all
|
482
|
-
nodeIDsA = @nodes.select{|id, node| node.type == args[:layers][0]}.keys
|
483
|
-
nodeIDsB = @nodes.select{|id, node| node.type == args[:layers][1]}.keys
|
493
|
+
def expand_clusters(expand_method) # DONE
|
494
|
+
clusters = {}
|
495
|
+
@group_nodes.each do |id, nodes|
|
496
|
+
if expand_method == 'sht_path'
|
497
|
+
dist, paths = compute_avg_sht_path(nodes, paths=true) # this uses bfs, maybe Dijkstra is the best one
|
498
|
+
new_nodes = paths.flatten.uniq
|
499
|
+
clusters[id] = nodes | new_nodes # If some node pair are not connected, recover them
|
484
500
|
end
|
485
501
|
end
|
486
|
-
return
|
502
|
+
return clusters
|
487
503
|
end
|
488
504
|
|
489
505
|
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
layers.each do |layer|
|
494
|
-
nodes.concat(@nodes.select{|nodeId, node| node.type == layer}.values)
|
495
|
-
end
|
496
|
-
return nodes
|
497
|
-
end
|
498
|
-
|
499
|
-
def intersection(node1, node2)
|
500
|
-
shared_nodes = []
|
501
|
-
associatedIDs_node1 = @edges[node1]
|
502
|
-
associatedIDs_node2 = @edges[node2]
|
503
|
-
intersectedIDs = associatedIDs_node1 & associatedIDs_node2
|
504
|
-
intersectedIDs.each do |id|
|
505
|
-
shared_nodes << @nodes[id]
|
506
|
-
end
|
507
|
-
return shared_nodes
|
508
|
-
end
|
509
|
-
|
510
|
-
def generate_adjacency_matrix(layerA, layerB)
|
511
|
-
layerAidNodes = @nodes.select{|id, node| node.type == layerA}.keys
|
512
|
-
layerBidNodes = @nodes.select{|id, node| node.type == layerB}.keys
|
513
|
-
matrix = Numo::DFloat.zeros(layerAidNodes.length, layerBidNodes.length)
|
514
|
-
layerAidNodes.each_with_index do |nodeA, i|
|
515
|
-
layerBidNodes.each_with_index do |nodeB, j|
|
516
|
-
if @edges[nodeB].include?(nodeA)
|
517
|
-
matrix[i, j] = 1
|
518
|
-
else
|
519
|
-
matrix[i, j] = 0
|
520
|
-
end
|
521
|
-
end
|
522
|
-
end
|
523
|
-
all_info_matrix = [matrix, layerAidNodes, layerBidNodes]
|
524
|
-
@adjacency_matrices[[layerA, layerB]] = all_info_matrix
|
525
|
-
return all_info_matrix
|
526
|
-
end
|
527
|
-
|
528
|
-
def clean_autorelations_on_association_values
|
506
|
+
## ASSOCIATION METHODS
|
507
|
+
############################################################
|
508
|
+
def clean_autorelations_on_association_values # DONE
|
529
509
|
@association_values.each do |meth, values|
|
530
510
|
values.select!{|relation| @nodes[relation[0]].type != @nodes[relation[1]].type}
|
531
511
|
end
|
532
512
|
end
|
533
513
|
|
534
|
-
|
535
|
-
############################################################
|
536
|
-
def get_association_values(layers, base_layer, meth)
|
514
|
+
def get_association_values(layers, base_layer, meth) # DONE
|
537
515
|
relations = [] #node A, node B, val
|
538
|
-
if meth == :
|
516
|
+
if meth == :counts
|
517
|
+
relations = get_counts_association(layers, base_layer)
|
518
|
+
elsif meth == :jaccard #all networks
|
539
519
|
relations = get_jaccard_association(layers, base_layer)
|
540
520
|
elsif meth == :simpson #all networks
|
541
521
|
relations = get_simpson_association(layers, base_layer)
|
@@ -565,20 +545,13 @@ class Network
|
|
565
545
|
|
566
546
|
## association methods adjacency matrix based
|
567
547
|
#---------------------------------------------------------
|
568
|
-
|
569
|
-
def get_association_by_transference_resources(firstPairLayers, secondPairLayers, lambda_value1 = 0.5, lambda_value2 = 0.5)
|
548
|
+
def get_association_by_transference_resources(firstPairLayers, secondPairLayers, lambda_value1 = 0.5, lambda_value2 = 0.5) # DONE
|
570
549
|
relations = []
|
571
550
|
matrix1 = @adjacency_matrices[firstPairLayers].first
|
572
|
-
rowIds = @adjacency_matrices[firstPairLayers][1]
|
573
551
|
matrix2 = @adjacency_matrices[secondPairLayers].first
|
552
|
+
finalMatrix = Adv_mat_calc.tranference_resources(matrix1, matrix2, lambda_value1 = lambda_value1, lambda_value2 = lambda_value2)
|
553
|
+
rowIds = @adjacency_matrices[firstPairLayers][1]
|
574
554
|
colIds = @adjacency_matrices[secondPairLayers][2]
|
575
|
-
m1rowNumber, m1colNumber = matrix1.shape
|
576
|
-
m2rowNumber, m2colNumber = matrix2.shape
|
577
|
-
#puts m1rowNumber, m1colNumber, m2rowNumber, m2colNumber
|
578
|
-
matrix1Weight = graphWeights(m1colNumber, m1rowNumber, matrix1.transpose, lambda_value1)
|
579
|
-
matrix2Weight = graphWeights(m2colNumber, m2rowNumber, matrix2.transpose, lambda_value2)
|
580
|
-
matrixWeightProduct = Numo::Linalg.dot(matrix1Weight, Numo::Linalg.dot(matrix2, matrix2Weight))
|
581
|
-
finalMatrix = Numo::Linalg.dot(matrix1, matrixWeightProduct)
|
582
555
|
relations = matrix2relations(finalMatrix, rowIds, colIds)
|
583
556
|
@association_values[:transference] = relations
|
584
557
|
return relations
|
@@ -587,7 +560,7 @@ class Network
|
|
587
560
|
## association methods node pairs based
|
588
561
|
#---------------------------------------------------------
|
589
562
|
# Bass 2013, doi:10.1038/nmeth.2728
|
590
|
-
def get_associations(layers, base_layer) # BASE METHOD
|
563
|
+
def get_associations(layers, base_layer) # DONE BASE METHOD
|
591
564
|
associations = get_all_pairs(layers: layers) do |node1, node2|
|
592
565
|
associatedIDs_node1 = @edges[node1].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
|
593
566
|
associatedIDs_node2 = @edges[node2].map{|id| @nodes[id]}.select{|node| node.type == base_layer}.map{|node| node.id}
|
@@ -598,7 +571,15 @@ class Network
|
|
598
571
|
return associations
|
599
572
|
end
|
600
573
|
|
601
|
-
def
|
574
|
+
def get_counts_association(layers, base_layer) # DONE
|
575
|
+
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
576
|
+
countValue = intersectedIDs.length
|
577
|
+
end
|
578
|
+
@association_values[:counts] = relations
|
579
|
+
return relations
|
580
|
+
end
|
581
|
+
|
582
|
+
def get_jaccard_association(layers, base_layer) # DONE
|
602
583
|
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
603
584
|
unionIDS = associatedIDs_node1 | associatedIDs_node2
|
604
585
|
jaccValue = intersectedIDs.length.to_f/unionIDS.length
|
@@ -607,7 +588,7 @@ class Network
|
|
607
588
|
return relations
|
608
589
|
end
|
609
590
|
|
610
|
-
def get_simpson_association(layers, base_layer)
|
591
|
+
def get_simpson_association(layers, base_layer) # DONE
|
611
592
|
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
612
593
|
minLength = [associatedIDs_node1.length, associatedIDs_node2.length].min
|
613
594
|
simpsonValue = intersectedIDs.length.to_f/minLength
|
@@ -616,7 +597,7 @@ class Network
|
|
616
597
|
return relations
|
617
598
|
end
|
618
599
|
|
619
|
-
def get_geometric_associations(layers, base_layer)
|
600
|
+
def get_geometric_associations(layers, base_layer) # DONE
|
620
601
|
#wang 2016 method
|
621
602
|
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
622
603
|
intersectedIDs = intersectedIDs.length**2
|
@@ -627,7 +608,7 @@ class Network
|
|
627
608
|
return relations
|
628
609
|
end
|
629
610
|
|
630
|
-
def get_cosine_associations(layers, base_layer)
|
611
|
+
def get_cosine_associations(layers, base_layer) # DONE
|
631
612
|
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
632
613
|
productLength = Math.sqrt(associatedIDs_node1.length * associatedIDs_node2.length)
|
633
614
|
cosineValue = intersectedIDs.length/productLength
|
@@ -636,7 +617,7 @@ class Network
|
|
636
617
|
return relations
|
637
618
|
end
|
638
619
|
|
639
|
-
def get_pcc_associations(layers, base_layer)
|
620
|
+
def get_pcc_associations(layers, base_layer) # DONE
|
640
621
|
#for Ny calcule use get_nodes_layer
|
641
622
|
base_layer_nodes = get_nodes_layer([base_layer])
|
642
623
|
ny = base_layer_nodes.length
|
@@ -652,7 +633,7 @@ class Network
|
|
652
633
|
return relations
|
653
634
|
end
|
654
635
|
|
655
|
-
def get_hypergeometric_associations(layers, base_layer, pvalue_adj_method= nil)
|
636
|
+
def get_hypergeometric_associations(layers, base_layer, pvalue_adj_method= nil) # DONE
|
656
637
|
ny = get_nodes_layer([base_layer]).length
|
657
638
|
fet = Rubystats::FishersExactTest.new
|
658
639
|
relations = get_associations(layers, base_layer) do |associatedIDs_node1, associatedIDs_node2, intersectedIDs, node1, node2|
|
@@ -685,11 +666,11 @@ class Network
|
|
685
666
|
return relations
|
686
667
|
end
|
687
668
|
|
688
|
-
def get_hypergeometric_associations_with_topology(layers, base_layer, mode, thresold = 0.01)
|
669
|
+
def get_hypergeometric_associations_with_topology(layers, base_layer, mode, thresold = 0.01) # NOT
|
689
670
|
relations = []
|
690
671
|
reference_layer = (layers - @layer_ontologies.keys).first
|
691
672
|
ontology_layer = (layers - [reference_layer]).first
|
692
|
-
ref_nodes =
|
673
|
+
ref_nodes = get_nodes_layer([reference_layer]).map{|n| n.id} # get nodes from NOT ontology layer
|
693
674
|
ontology = @layer_ontologies[ontology_layer]
|
694
675
|
base_layer_length = @nodes.values.count{|n| n.type == base_layer}
|
695
676
|
ref_nodes.each do |ref_node|
|
@@ -711,7 +692,7 @@ class Network
|
|
711
692
|
return relations
|
712
693
|
end
|
713
694
|
|
714
|
-
def compute_adjusted_pvalue(relations, log_val=true)
|
695
|
+
def compute_adjusted_pvalue(relations, log_val=true) # DONE
|
715
696
|
relations.each_with_index do |data, i| #p1, p2, pval
|
716
697
|
pval_adj = yield(data.last, i)
|
717
698
|
pval_adj = -Math.log10(pval_adj) if log_val && pval_adj > 0
|
@@ -719,13 +700,13 @@ class Network
|
|
719
700
|
end
|
720
701
|
end
|
721
702
|
|
722
|
-
def compute_log_transformation(relations) #Only perform log transform whitout adjust pvalue. Called when adjusted method is not defined
|
703
|
+
def compute_log_transformation(relations) # NOT #Only perform log transform whitout adjust pvalue. Called when adjusted method is not defined
|
723
704
|
compute_adjusted_pvalue(relations) do |pval, index|
|
724
705
|
pval
|
725
706
|
end
|
726
707
|
end
|
727
708
|
|
728
|
-
def compute_adjusted_pvalue_bonferroni(relations)
|
709
|
+
def compute_adjusted_pvalue_bonferroni(relations) # DONE
|
729
710
|
n_comparations = relations.length
|
730
711
|
compute_adjusted_pvalue(relations) do |pval, index|
|
731
712
|
adj = pval * n_comparations
|
@@ -734,34 +715,16 @@ class Network
|
|
734
715
|
end
|
735
716
|
end
|
736
717
|
|
737
|
-
def compute_adjusted_pvalue_benjaminiHochberg(relations)
|
718
|
+
def compute_adjusted_pvalue_benjaminiHochberg(relations) # DONE
|
738
719
|
adj_pvalues = get_benjaminiHochberg_pvalues(relations.map{|rel| rel.last})
|
739
720
|
compute_adjusted_pvalue(relations) do |pval, index|
|
740
721
|
adj_pvalues[index]
|
741
722
|
end
|
742
723
|
end
|
743
724
|
|
744
|
-
def
|
745
|
-
query = hash[node1]
|
746
|
-
if query.nil?
|
747
|
-
hash[node1] = [node2]
|
748
|
-
else
|
749
|
-
query << node2
|
750
|
-
end
|
751
|
-
end
|
752
|
-
|
753
|
-
def add_nested_record(hash, node1, node2, val)
|
754
|
-
query_node1 = hash[node1]
|
755
|
-
if query_node1.nil?
|
756
|
-
hash[node1] = {node2 => val}
|
757
|
-
else
|
758
|
-
query_node1[node2] = val
|
759
|
-
end
|
760
|
-
end
|
761
|
-
|
762
|
-
|
763
|
-
def get_csi_associations(layers, base_layer)
|
725
|
+
def get_csi_associations(layers, base_layer) # DONE
|
764
726
|
pcc_relations = get_pcc_associations(layers, base_layer)
|
727
|
+
pcc_relations.select!{|row| !row[2].nan?}
|
765
728
|
clean_autorelations_on_association_values if layers.length > 1
|
766
729
|
nx = get_nodes_layer(layers).length
|
767
730
|
pcc_vals = {}
|
@@ -776,177 +739,161 @@ class Network
|
|
776
739
|
pcc_relations.each do |node1, node2 ,assoc_index|
|
777
740
|
pccAB = assoc_index - 0.05
|
778
741
|
valid_nodes = 0
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
csiValue = 1 - (
|
786
|
-
# valid_nodes-1 is done due to the connection node1-node2 is counted twice (one for each loop)
|
742
|
+
|
743
|
+
significant_nodes_from_node1 = node_rels[node1].select{|node| pcc_vals[node1][node] >= pccAB}
|
744
|
+
significant_nodes_from_node2 = node_rels[node2].select{|node| pcc_vals[node2][node] >= pccAB}
|
745
|
+
all_significant_nodes = significant_nodes_from_node2 | significant_nodes_from_node1
|
746
|
+
all_nodes = node_rels[node1] | node_rels[node2]
|
747
|
+
|
748
|
+
csiValue = 1 - (all_significant_nodes.length).fdiv(all_nodes.length)
|
787
749
|
relations << [node1, node2, csiValue]
|
788
750
|
end
|
789
751
|
@association_values[:csi] = relations
|
790
752
|
return relations
|
791
753
|
end
|
792
754
|
|
755
|
+
def get_kernel(layer2kernel, kernel, normalization=false) # DONE
|
756
|
+
matrix, node_names = @adjacency_matrices[layer2kernel]
|
757
|
+
matrix_result = Adv_mat_calc.get_kernel(matrix, node_names, kernel, normalization=normalization)
|
758
|
+
@kernels[layer2kernel] = matrix_result
|
759
|
+
end
|
793
760
|
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
end
|
806
|
-
end
|
761
|
+
def write_kernel(layer2kernel, output_file) # DONE
|
762
|
+
@kernels[layer2kernel].save(output_file)
|
763
|
+
end
|
764
|
+
|
765
|
+
def link_ontology(ontology_file_path, layer_name) # NOT until semtools is migrated
|
766
|
+
if !@loaded_obos.include?(ontology_file_path) #Load new ontology
|
767
|
+
ontology = Ontology.new(file: ontology_file_path, load_file: true)
|
768
|
+
@loaded_obos << ontology_file_path
|
769
|
+
@ontologies << ontology
|
770
|
+
else #Link loaded ontology to current layer
|
771
|
+
ontology = @ontologies[@loaded_obos.index(ontology_file_path)]
|
807
772
|
end
|
808
|
-
@
|
809
|
-
return control
|
773
|
+
@layer_ontologies[layer_name] = ontology
|
810
774
|
end
|
811
775
|
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
776
|
+
## RAMDOMIZATION METHODS
|
777
|
+
############################################################
|
778
|
+
def randomize_monopartite_net_by_nodes # DONE
|
779
|
+
layer = @layers.first
|
780
|
+
random_network = self.clone
|
781
|
+
if @adjacency_matrices[@layers].nil?
|
782
|
+
@adjacency_matrices[@layers] = @edges.to_bmatrix
|
783
|
+
end
|
784
|
+
nodeIds = @adjacency_matrices[@layers][1]
|
785
|
+
nodeIds.shuffle!
|
786
|
+
@adjacency_matrices[@layers][1] = nodeIds
|
787
|
+
@adjacency_matrices[@layers][2] = nodeIds
|
788
|
+
@edges = @adjacency_matrices[@layers].first.bmatrix_squared_to_hash(nodeIds) if @edges.empty?
|
789
|
+
return random_network
|
790
|
+
end
|
791
|
+
|
792
|
+
def randomize_bipartite_net_by_nodes
|
793
|
+
layerA = @layers.first
|
794
|
+
layerB = @layers.last
|
795
|
+
random_network = self.clone
|
796
|
+
if @adjacency_matrices[@layers].nil?
|
797
|
+
@adjacency_matrices[@layers] = @edges.to_bmatrix
|
798
|
+
end
|
799
|
+
rowIds = @adjacency_matrices[@layers][1]
|
800
|
+
colIds = @adjacency_matrices[@layers][2]
|
801
|
+
rowIds.shuffle!
|
802
|
+
@adjacency_matrices[@layers][1] = rowIds
|
803
|
+
@edges = @adjacency_matrices[@layers].first.bmatrix_rectangular_to_hash(rowIds, colIds) if !@edges.empty?
|
804
|
+
return random_network
|
805
|
+
end
|
806
|
+
|
807
|
+
def randomize_monopartite_net_by_links # DONE
|
808
|
+
layer = [@layers.first]
|
809
|
+
nodesA = []
|
810
|
+
nodesB = []
|
811
|
+
## cambio a la funcion creada en el numo_expansion
|
812
|
+
relations = diagonal2relations(@adjacency_matrices[layer].first, @adjacency_matrices[layer][1], @adjacency_matrices[layer][2])
|
813
|
+
relations.each do |relation|
|
814
|
+
nodesA << relation[0]
|
815
|
+
nodesB << relation[1]
|
816
|
+
end
|
817
|
+
nodesB.shuffle!
|
818
|
+
@edges = {}
|
819
|
+
nodesA.each do |nodeA|
|
820
|
+
index_nodeB = 0
|
821
|
+
while nodeA == nodesB[index_nodeB]
|
822
|
+
index_nodeB += 1
|
829
823
|
end
|
824
|
+
nodeB = nodesB.delete_at(index_nodeB)
|
825
|
+
add_edge(nodeA, nodeB)
|
830
826
|
end
|
831
|
-
|
827
|
+
generate_adjacency_matrix(layer, layer)
|
832
828
|
end
|
833
829
|
|
834
830
|
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
prec, rec = pred_rec(preds, cut, top_number)
|
842
|
-
performance << [cut, prec, rec]
|
831
|
+
def randomize_bipartite_net_by_links(layers)
|
832
|
+
nodesA = []
|
833
|
+
nodesB = []
|
834
|
+
#compruebo si existe la matriz
|
835
|
+
if @adjacency_matrices[layers].nil?
|
836
|
+
@adjacency_matrices[layers] = @edges.to_bmatrix()
|
843
837
|
end
|
844
|
-
|
838
|
+
relations = matrix2relations(@adjacency_matrices[layers].first, @adjacency_matrices[layers][1], @adjacency_matrices[layers][2])
|
839
|
+
relations.each do |relation|
|
840
|
+
nodesA << relation[0]
|
841
|
+
nodesB << relation[1]
|
842
|
+
end
|
843
|
+
nodesB.shuffle!
|
844
|
+
@edges = {}
|
845
|
+
|
846
|
+
nodesA.each_with_index do |nodeA, i|
|
847
|
+
add_edge(nodeA, nodesB[i])
|
848
|
+
end
|
849
|
+
generate_adjacency_matrix(layers[0], layers[1])
|
850
|
+
|
845
851
|
end
|
846
852
|
|
847
|
-
def
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
853
|
+
def randomize_network(random_type)
|
854
|
+
if random_type == 'nodes'
|
855
|
+
if @layers.length == 1
|
856
|
+
random_network = self.randomize_monopartite_net_by_nodes
|
857
|
+
elsif @layers.length == 2
|
858
|
+
random_network = self.randomize_bipartite_net_by_nodes
|
859
|
+
end
|
860
|
+
elsif random_type == 'links'
|
861
|
+
if @layers.length == 1
|
862
|
+
random_network = self.randomize_monopartite_net_by_links
|
863
|
+
elsif @layers.length == 2
|
864
|
+
random_network = self.randomize_bipartite_net_by_links
|
859
865
|
end
|
866
|
+
else
|
867
|
+
abort("ERROR: The randomization is not available for #{random_type} types of nodes")
|
860
868
|
end
|
861
|
-
|
862
|
-
prec = common_labels.to_f/predicted_labels
|
863
|
-
rec = common_labels.to_f/true_labels
|
864
|
-
prec = 0.0 if prec.nan?
|
865
|
-
rec = 0.0 if rec.nan?
|
866
|
-
return prec, rec
|
869
|
+
return random_network
|
867
870
|
end
|
871
|
+
|
868
872
|
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
#D = Diagonal matrix
|
875
|
-
#A = adjacency matrix
|
876
|
-
#L = laplacian matrix = D − A
|
877
|
-
matrix_result = nil
|
878
|
-
dimension_elements = matrix.shape.last
|
879
|
-
# In scuba code, the diagonal values of A is set to 0. In weighted matrix the kernel result is the same with or without this operation. Maybe increases the computing performance?
|
880
|
-
# In the md kernel this operation affects the values of the final kernel
|
881
|
-
#dimension_elements.times do |n|
|
882
|
-
# matrix[n,n] = 0.0
|
883
|
-
#end
|
884
|
-
if kernel == 'el' || kernel == 'ct' || kernel == 'rf' ||
|
885
|
-
kernel.include?('vn') || kernel.include?('rl') || kernel == 'me'
|
886
|
-
diagonal_matrix = matrix.sum(1).diag # get the total sum for each row, for this reason the sum method takes the 1 value. If sum colums is desired, use 0
|
887
|
-
# Make a matrix whose diagonal is row_sum
|
888
|
-
matrix_L = diagonal_matrix - matrix
|
889
|
-
if kernel == 'el' #Exponential Laplacian diffusion kernel(active). F Fouss 2012 | doi: 10.1016/j.neunet.2012.03.001
|
890
|
-
beta = 0.02
|
891
|
-
beta_product = matrix_L * -beta
|
892
|
-
#matrix_result = beta_product.expm
|
893
|
-
matrix_result = Numo::Linalg.expm(beta_product, 14)
|
894
|
-
elsif kernel == 'ct' # Commute time kernel (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
895
|
-
matrix_result = Numo::Linalg.pinv(matrix_L) # Anibal saids that this kernel was normalized. Why?. Paper do not seem to describe this operation for ct, it describes for Kvn or for all kernels, it is not clear.
|
896
|
-
elsif kernel == 'rf' # Random forest kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
897
|
-
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L) #Krf = (I +L ) ^ −1
|
898
|
-
elsif kernel.include?('vn') # von Neumann diffusion kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
899
|
-
alpha = kernel.gsub('vn', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
900
|
-
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) - matrix * alpha ) # (I -alphaA ) ^ −1
|
901
|
-
elsif kernel.include?('rl') # Regularized Laplacian kernel matrix (active)
|
902
|
-
alpha = kernel.gsub('rl', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
903
|
-
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L * alpha ) # (I + alphaL ) ^ −1
|
904
|
-
elsif kernel == 'me' # Markov exponential diffusion kernel (active). G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
905
|
-
beta=0.04
|
906
|
-
#(beta/N)*(N*I - D + A)
|
907
|
-
id_mat = Numo::DFloat.eye(dimension_elements)
|
908
|
-
m_matrix = (id_mat * dimension_elements - diagonal_matrix + matrix ) * (beta/dimension_elements)
|
909
|
-
#matrix_result = m_matrix.expm
|
910
|
-
matrix_result = Numo::Linalg.expm(m_matrix, 16)
|
911
|
-
end
|
912
|
-
elsif kernel == 'ka' # Kernelized adjacency matrix (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
913
|
-
lambda_value = matrix.min_eigenvalue
|
914
|
-
matrix_result = matrix + Numo::DFloat.eye(dimension_elements) * lambda_value.abs # Ka = A + lambda*I # lambda = the absolute value of the smallest eigenvalue of A
|
915
|
-
elsif kernel.include?('md') # Markov diffusion kernel matrix. G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
916
|
-
t = kernel.gsub('md', '').to_i
|
917
|
-
#TODO: check implementation with Numo::array
|
918
|
-
col_sum = matrix.sum(1)
|
919
|
-
p_mat = matrix.div_by_vector(col_sum)
|
920
|
-
p_temp_mat = p_mat.clone
|
921
|
-
zt_mat = p_mat.clone
|
922
|
-
(t-1).times do
|
923
|
-
p_temp_mat = p_temp_mat.dot(p_mat)
|
924
|
-
zt_mat = zt_mat + p_temp_mat
|
925
|
-
end
|
926
|
-
zt_mat = zt_mat * (1.0/t)
|
927
|
-
matrix_result = zt_mat.dot(zt_mat.transpose)
|
928
|
-
else
|
929
|
-
matrix_result = matrix
|
930
|
-
warn('Warning: The kernel method was not specified or not exists. The adjacency matrix will be given as result')
|
931
|
-
# This allows process a previous kernel and perform the normalization in a separated step.
|
873
|
+
def save_adjacency_matrix(layerA, layerB, output_file) # NOT
|
874
|
+
if layerA == layerB
|
875
|
+
layers = [layerA]
|
876
|
+
else
|
877
|
+
layers = [layerA, layerB]
|
932
878
|
end
|
933
|
-
|
934
|
-
|
879
|
+
Npy.save(output_file, @adjacency_matrices[layer].first)
|
880
|
+
node_names = @nodes.values.map{|node| node.id}
|
881
|
+
File.open(output_file+'.lst', 'w'){|f| f.print node_names.join("\n")}
|
935
882
|
end
|
936
883
|
|
937
|
-
def
|
938
|
-
@
|
884
|
+
def build_nodes_from_adjacency_matrix(layers_network, layers_adjacency_matrix) # NOT
|
885
|
+
nodes_ids = @adjacency_matrices[layers_adjacency_matrix][1].concat(@adjacency_matrices[layers_adjacency_matrix][2]).uniq
|
886
|
+
nodes_ids.each do |node_id|
|
887
|
+
add_node(node_id, set_layer(layers_network, node_id))
|
888
|
+
end
|
939
889
|
end
|
940
890
|
|
941
|
-
def
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
else #Link loaded ontology to current layer
|
947
|
-
ontology = @ontologies[@loaded_obos.index(ontology_file_path)]
|
891
|
+
def build_edges_from_adjacency_matrix(layer) # NOT
|
892
|
+
@edges = {}
|
893
|
+
relations = matrix2relations(@adjacency_matrices[layer].first, @adjacency_matrices[layer][1], @adjacency_matrices[layer][2])
|
894
|
+
relations.each do |relation|
|
895
|
+
add_edge(relation[0], relation[1])
|
948
896
|
end
|
949
|
-
@layer_ontologies[layer_name] = ontology
|
950
897
|
end
|
951
898
|
|
952
899
|
|
@@ -954,8 +901,26 @@ class Network
|
|
954
901
|
#######################################################################################
|
955
902
|
private
|
956
903
|
|
957
|
-
def
|
958
|
-
return
|
904
|
+
def replace_nil_vals(val) # DONE
|
905
|
+
return val.nil? ? 'NULL' : val
|
906
|
+
end
|
907
|
+
|
908
|
+
def add_record(hash, node1, node2) # DONE
|
909
|
+
query = hash[node1]
|
910
|
+
if query.nil?
|
911
|
+
hash[node1] = [node2]
|
912
|
+
else
|
913
|
+
query << node2
|
914
|
+
end
|
915
|
+
end
|
916
|
+
|
917
|
+
def add_nested_record(hash, node1, node2, val) # DONE
|
918
|
+
query_node1 = hash[node1]
|
919
|
+
if query_node1.nil?
|
920
|
+
hash[node1] = {node2 => val}
|
921
|
+
else
|
922
|
+
query_node1[node2] = val
|
923
|
+
end
|
959
924
|
end
|
960
925
|
|
961
926
|
def exist_connections?(ids_connected_to_n1, ids_connected_to_n2)
|
@@ -968,84 +933,29 @@ class Network
|
|
968
933
|
return res
|
969
934
|
end
|
970
935
|
|
971
|
-
def
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
break
|
978
|
-
end
|
936
|
+
def matrix2relations(finalMatrix, rowIds, colIds) # DONE
|
937
|
+
relations = []
|
938
|
+
rowIds.each_with_index do |rowId, rowPos|
|
939
|
+
colIds.each_with_index do |colId, colPos|
|
940
|
+
associationValue = finalMatrix[rowPos, colPos]
|
941
|
+
relations << [rowId, colId, associationValue] if associationValue > 0
|
979
942
|
end
|
980
|
-
raise("The node '#{node_name}' not match with any layer regex") if layer.nil?
|
981
|
-
else
|
982
|
-
layer = layer_definitions.first.first
|
983
|
-
end
|
984
|
-
@layers << layer if !@layers.include?(layer)
|
985
|
-
return layer
|
986
|
-
end
|
987
|
-
|
988
|
-
def get_cuts(limits, n_cuts)
|
989
|
-
cuts = []
|
990
|
-
range = (limits.last - limits.first).abs.fdiv(n_cuts)
|
991
|
-
range = BigDecimal(range, 10)
|
992
|
-
cut = limits.first
|
993
|
-
(n_cuts + 1).times do |n|
|
994
|
-
cuts << (cut + n * range).to_f
|
995
943
|
end
|
996
|
-
return
|
944
|
+
return relations
|
997
945
|
end
|
998
946
|
|
999
|
-
def
|
1000
|
-
reliable_labels = []
|
1001
|
-
scores.each_with_index do |score, i|
|
1002
|
-
reliable_labels << [labels[i], score] if score >= cut
|
1003
|
-
end
|
1004
|
-
reliable_labels = reliable_labels.sort!{|l1,l2| l2.last <=> l1.last}[0..top-1].map{|pred| pred.first}
|
1005
|
-
return reliable_labels
|
1006
|
-
end
|
1007
|
-
|
1008
|
-
def graphWeights (rowsNumber, colsNumber, inputMatrix, lambdaValue = 0.5)
|
1009
|
-
ky = (1.0 / inputMatrix.sum(0)).diag #sum cols
|
1010
|
-
weigth = Numo::Linalg.dot(inputMatrix, ky).transpose
|
1011
|
-
ky = nil #free memory
|
1012
|
-
weigth = Numo::Linalg.dot(inputMatrix, weigth)
|
1013
|
-
|
1014
|
-
kx = inputMatrix.sum(1) #sum rows
|
1015
|
-
|
1016
|
-
kx_lamb = kx ** lambdaValue
|
1017
|
-
kx_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
1018
|
-
rowsNumber.times do |j|
|
1019
|
-
rowsNumber.times do |i|
|
1020
|
-
kx_lamb_mat[j,i] = kx_lamb[i]
|
1021
|
-
end
|
1022
|
-
end
|
1023
|
-
kx_lamb = nil #free memory
|
1024
|
-
|
1025
|
-
kx_inv_lamb = kx ** (1 - lambdaValue)
|
1026
|
-
kx_inv_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
1027
|
-
rowsNumber.times do |j|
|
1028
|
-
rowsNumber.times do |i|
|
1029
|
-
kx_inv_lamb_mat[i, j] = kx_inv_lamb[i]
|
1030
|
-
end
|
1031
|
-
end
|
1032
|
-
kx_inv_lamb = nil #free memory
|
1033
|
-
|
1034
|
-
nx = 1.0/(kx_lamb_mat.inplace * kx_inv_lamb_mat).inplace # inplace marks a matrix to be used by reference, not for value
|
1035
|
-
kx_lamb_mat = nil #free memory
|
1036
|
-
kx_inv_lamb_mat = nil #free memory
|
1037
|
-
weigth.inplace * nx
|
1038
|
-
return weigth
|
1039
|
-
end
|
1040
|
-
|
1041
|
-
def matrix2relations(finalMatrix, rowIds, colIds)
|
947
|
+
def diagonal2relations(finalMatrix, rowIds, colIds)
|
1042
948
|
relations = []
|
1043
949
|
rowIds.each_with_index do |rowId, rowPos|
|
1044
|
-
colIds.each_with_index do |colId, colPos|
|
1045
|
-
|
1046
|
-
|
950
|
+
colIds.each_with_index do |colId, colPos|
|
951
|
+
colMatrix = rowPos + colPos + 1
|
952
|
+
if colMatrix < colIds.length
|
953
|
+
associationValue = finalMatrix[rowPos, colMatrix]
|
954
|
+
relations << [rowId, colIds[colMatrix], associationValue] if associationValue > 0
|
955
|
+
end
|
1047
956
|
end
|
1048
957
|
end
|
1049
958
|
return relations
|
1050
959
|
end
|
960
|
+
|
1051
961
|
end
|