NetAnalyzer 0.1.5 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/NetAnalyzer.gemspec +16 -6
- data/README.md +16 -2
- data/Rakefile +13 -9
- data/bin/NetAnalyzer.rb +176 -33
- data/bin/randomize_clustering.rb +121 -0
- data/bin/randomize_network.rb +89 -0
- data/bin/ranker_gene.rb +121 -0
- data/bin/text2binary_matrix.rb +308 -0
- data/lib/NetAnalyzer/adv_mat_calc.rb +117 -0
- data/lib/NetAnalyzer/net_parser.rb +50 -0
- data/lib/NetAnalyzer/net_plotter.rb +145 -0
- data/lib/NetAnalyzer/network.rb +723 -249
- data/lib/NetAnalyzer/nodes.rb +15 -0
- data/lib/NetAnalyzer/performancer.rb +98 -0
- data/lib/NetAnalyzer/ranker.rb +250 -0
- data/lib/NetAnalyzer/templates/ElGrapho.min.js +28 -0
- data/lib/NetAnalyzer/templates/cytoscape.erb +65 -0
- data/lib/NetAnalyzer/templates/cytoscape.min.js +32 -0
- data/lib/NetAnalyzer/templates/el_grapho.erb +89 -0
- data/lib/NetAnalyzer/templates/pako.min.js +1 -0
- data/lib/NetAnalyzer/templates/sigma.erb +132 -0
- data/lib/NetAnalyzer/version.rb +1 -1
- data/lib/NetAnalyzer.rb +7 -0
- metadata +187 -29
data/bin/ranker_gene.rb
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
require 'npy'
|
8
|
+
require 'parallel'
|
9
|
+
require 'NetAnalyzer'
|
10
|
+
|
11
|
+
|
12
|
+
########################### METHODS ########################
|
13
|
+
#############################################################
|
14
|
+
|
15
|
+
def write_ranking(file, ranking_list)
|
16
|
+
File.open(file ,'w') do |f|
|
17
|
+
ranking_list.each do |seed_name, ranking|
|
18
|
+
ranking.each do |ranked_gene|
|
19
|
+
f.puts "#{ranked_gene.join("\t")}\t#{seed_name}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
########################### OPTPARSE ########################
|
27
|
+
#############################################################
|
28
|
+
|
29
|
+
options = {}
|
30
|
+
OptionParser.new do |opts|
|
31
|
+
|
32
|
+
options[:kernel_file] = nil
|
33
|
+
opts.on("-k","--input_kernels KER", "The roots from each kernel to integrate") do |ker|
|
34
|
+
options[:kernel_file] = ker
|
35
|
+
end
|
36
|
+
|
37
|
+
options[:node_file] = nil
|
38
|
+
opts.on("-n","--input_nodes NODE", "The list of node for each kernel in lst format") do |node_file|
|
39
|
+
options[:node_file] = node_file
|
40
|
+
end
|
41
|
+
|
42
|
+
options[:genes_seed] = nil
|
43
|
+
opts.on("-s","--genes_seed SEED", "The name of the gene to look for backups") do |genes_seed|
|
44
|
+
options[:genes_seed] = genes_seed
|
45
|
+
end
|
46
|
+
|
47
|
+
options[:seed_genes_sep] = ","
|
48
|
+
opts.on("-S","--genes_seed_sep SEP", "Separator of seed genes. Only use when -s point to a file") do |genes_seed|
|
49
|
+
options[:genes_seed] = genes_seed
|
50
|
+
end
|
51
|
+
|
52
|
+
options[:filter] = nil
|
53
|
+
opts.on("-f","--filter NAME", "PATH to file with seed_name and genes to keep in output") do |file|
|
54
|
+
options[:filter] = file
|
55
|
+
end
|
56
|
+
|
57
|
+
options[:leave_one_out] = false
|
58
|
+
opts.on("-l","--leave_one_out", "Perform leave one out from a seed genes group") do
|
59
|
+
options[:leave_one_out] = true
|
60
|
+
end
|
61
|
+
|
62
|
+
options[:top_n] = nil
|
63
|
+
opts.on("-t","--top_n INT", "Top N genes to print in output") do |str|
|
64
|
+
options[:top_n] = str.to_i
|
65
|
+
end
|
66
|
+
|
67
|
+
options[:output_top] = nil
|
68
|
+
opts.on("--output_top PATH", "File to save Top N genes") do |path|
|
69
|
+
options[:output_top] = path
|
70
|
+
end
|
71
|
+
|
72
|
+
options[:output_name] = "ranked_genes"
|
73
|
+
opts.on("-o","--output_name NAME", "The name of the ranked file") do |output_name|
|
74
|
+
options[:output_name] = output_name
|
75
|
+
end
|
76
|
+
|
77
|
+
options[:threads] = 0
|
78
|
+
opts.on( '-T', '--threads INTEGER', 'Number of threads to use in computation, one thread will be reserved as manager.' ) do |opt|
|
79
|
+
options[:threads] = opt.to_i - 1
|
80
|
+
end
|
81
|
+
end.parse!
|
82
|
+
|
83
|
+
########################### MAIN ############################
|
84
|
+
#############################################################
|
85
|
+
|
86
|
+
ranker = Ranker.new()
|
87
|
+
ranker.matrix = Npy.load(options[:kernel_file])
|
88
|
+
ranker.load_nodes_from_file(options[:node_file])
|
89
|
+
ranker.load_seeds(options[:genes_seed], sep: options[:seed_genes_sep])
|
90
|
+
ranker.load_references(options[:filter], sep: ",") if !options[:filter].nil?
|
91
|
+
ranker.do_ranking(leave_one_out: options[:leave_one_out], threads: options[:threads])
|
92
|
+
rankings = ranker.ranking
|
93
|
+
|
94
|
+
discarded_seeds = rankings.select{|seed_name, ranks| ranks.empty?}.keys
|
95
|
+
if !discarded_seeds.empty?
|
96
|
+
File.open("#{options[:output_name]}_discarded",'w') do |f|
|
97
|
+
discarded_seeds.each do |seed_name|
|
98
|
+
f.puts "#{seed_name}\t#{ranker.seeds[seed_name].join(options[:seed_genes_sep])}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
if !options[:top_n].nil?
|
104
|
+
top_n = ranker.get_top(options[:top_n])
|
105
|
+
if options[:output_top].nil?
|
106
|
+
rankings = top_n
|
107
|
+
else
|
108
|
+
write_ranking(options[:output_top], top_n)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
if !options[:filter].nil?
|
113
|
+
rankings = ranker.get_reference_ranks
|
114
|
+
end
|
115
|
+
|
116
|
+
if !rankings.empty?
|
117
|
+
write_ranking("#{options[:output_name]}_all_candidates", rankings)
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
|
@@ -0,0 +1,308 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'optparse'
|
5
|
+
#require 'nmatrix'
|
6
|
+
require 'numo/narray'
|
7
|
+
require 'numo/linalg'
|
8
|
+
require 'npy'
|
9
|
+
require 'expcalc'
|
10
|
+
|
11
|
+
#require 'pp'
|
12
|
+
#############################################################################
|
13
|
+
## METHODS
|
14
|
+
##############################################################################
|
15
|
+
|
16
|
+
def load_matrix_file(input_file, splitChar = "\t")
|
17
|
+
matrix = nil
|
18
|
+
counter = 0
|
19
|
+
File.open(input_file).each do |line|
|
20
|
+
line.chomp!
|
21
|
+
row = line.split(splitChar).map{|c| c.to_f }
|
22
|
+
if matrix.nil?
|
23
|
+
matrix = Numo::DFloat.zeros(row.length, row.length)
|
24
|
+
end
|
25
|
+
row.each_with_index do |val, i|
|
26
|
+
matrix[counter, i] = val
|
27
|
+
end
|
28
|
+
counter += 1
|
29
|
+
end
|
30
|
+
return matrix
|
31
|
+
end
|
32
|
+
|
33
|
+
def load_pair_file(source, byte_format = :float32)
|
34
|
+
connections = {}
|
35
|
+
source.each do |line|
|
36
|
+
node_a, node_b, weight = line.chomp.split("\t")
|
37
|
+
weight.nil? ? weight = 1.0 : weight = weight.to_f
|
38
|
+
add_pair(node_a, node_b, weight, connections)
|
39
|
+
add_pair(node_b, node_a, weight, connections)
|
40
|
+
end
|
41
|
+
matrix, names = connections.to_wmatrix
|
42
|
+
return matrix, names
|
43
|
+
end
|
44
|
+
|
45
|
+
def add_pair(node_a, node_b, weight, connections)
|
46
|
+
query = connections[node_a]
|
47
|
+
if !query.nil?
|
48
|
+
query[node_b] = weight
|
49
|
+
else
|
50
|
+
subhash = Hash.new(0.0)
|
51
|
+
subhash[node_b] = weight
|
52
|
+
connections[node_a] = subhash
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def get_stats(matrix)
|
57
|
+
stats = []
|
58
|
+
#TODO: trnasform to Numo::Array operations
|
59
|
+
primary_stats = get_primary_stats(matrix)
|
60
|
+
#stats << ['Matrix - Symmetric?', matrix.symmetric?]
|
61
|
+
stats << ['Matrix - Dimensions', matrix.shape.join('x')]
|
62
|
+
stats << ['Matrix - Elements', primary_stats[:count]]
|
63
|
+
stats << ['Matrix - Elements Non Zero', primary_stats[:countNonZero]]
|
64
|
+
stats << ['Matrix - Non Zero Density', primary_stats[:countNonZero].fdiv(primary_stats[:count])]
|
65
|
+
stats << ['Weigth - Max', primary_stats[:max]]
|
66
|
+
stats << ['Weigth - Min', primary_stats[:min]]
|
67
|
+
stats << ['Weigth - Average', primary_stats[:average]]
|
68
|
+
stats << ['Weigth - Variance', primary_stats[:variance]]
|
69
|
+
stats << ['Weigth - Standard Deviation', primary_stats[:standardDeviation]]
|
70
|
+
stats << ['Weigth - Q1', primary_stats[:q1]]
|
71
|
+
stats << ['Weigth - Median', primary_stats[:median]]
|
72
|
+
stats << ['Weigth - Q3', primary_stats[:q3]]
|
73
|
+
stats << ['Weigth - Min Non Zero', primary_stats[:minNonZero]]
|
74
|
+
stats << ['Weigth - Average Non Zero', primary_stats[:averageNonZero]]
|
75
|
+
stats << ['Weigth - Variance Non Zero', primary_stats[:varianceNonZero]]
|
76
|
+
stats << ['Weigth - Standard Deviation Non Zero', primary_stats[:standardDeviationNonZero]]
|
77
|
+
stats << ['Weigth - Q1 Non Zero', primary_stats[:q1NonZero]]
|
78
|
+
stats << ['Weigth - Median Non Zero', primary_stats[:medianNonZero]]
|
79
|
+
stats << ['Weigth - Q3 Non Zero', primary_stats[:q3NonZero]]
|
80
|
+
connections = get_connection_number(matrix)
|
81
|
+
connection_stats = get_primary_stats(connections)
|
82
|
+
stats << ['Node - Elements', connection_stats[:count]]
|
83
|
+
stats << ['Node - Elements Non Zero', connection_stats[:countNonZero]]
|
84
|
+
stats << ['Node - Non Zero Density', connection_stats[:countNonZero].fdiv(connection_stats[:count])]
|
85
|
+
stats << ['Edges - Max', connection_stats[:max]]
|
86
|
+
stats << ['Edges - Min', connection_stats[:min]]
|
87
|
+
stats << ['Edges - Average', connection_stats[:average]]
|
88
|
+
stats << ['Edges - Variance', connection_stats[:variance]]
|
89
|
+
stats << ['Edges - Standard Deviation', connection_stats[:standardDeviation]]
|
90
|
+
stats << ['Edges - Q1', connection_stats[:q1]]
|
91
|
+
stats << ['Edges - Median', connection_stats[:median]]
|
92
|
+
stats << ['Edges - Q3', connection_stats[:q3]]
|
93
|
+
stats << ['Edges - Min Non Zero', primary_stats[:minNonZero]]
|
94
|
+
stats << ['Edges - Average Non Zero', connection_stats[:averageNonZero]]
|
95
|
+
stats << ['Edges - Variance Non Zero', connection_stats[:varianceNonZero]]
|
96
|
+
stats << ['Edges - Standard Deviation Non Zero', connection_stats[:standardDeviationNonZero]]
|
97
|
+
stats << ['Edges - Q1 Non Zero', connection_stats[:q1NonZero]]
|
98
|
+
stats << ['Edges - Median Non Zero', connection_stats[:medianNonZero]]
|
99
|
+
stats << ['Edges - Q3 Non Zero', connection_stats[:q3NonZero]]
|
100
|
+
return stats
|
101
|
+
end
|
102
|
+
|
103
|
+
def get_connection_number(matrix)
|
104
|
+
rows, cols = matrix.shape
|
105
|
+
connections = Numo::DFloat.zeros(1, cols)
|
106
|
+
cols.times do |i|
|
107
|
+
column = matrix[true, i]
|
108
|
+
count = 0
|
109
|
+
column.each do |value|
|
110
|
+
count += 1 if value != 0
|
111
|
+
end
|
112
|
+
connections[0, i] = count - 1 # the connection with self is removed
|
113
|
+
end
|
114
|
+
return connections
|
115
|
+
end
|
116
|
+
|
117
|
+
def transform_keys(hash)
|
118
|
+
new_hash = {}
|
119
|
+
hash.each do |key, val|
|
120
|
+
new_key = yield(key)
|
121
|
+
new_hash[new_key] = val
|
122
|
+
end
|
123
|
+
return new_hash
|
124
|
+
end
|
125
|
+
|
126
|
+
def get_primary_stats(matrix)
|
127
|
+
stats = Hash.new(0)
|
128
|
+
max = matrix[0, 0] # Initialize max value
|
129
|
+
min = matrix[0, 0] # Initialize min value
|
130
|
+
min_non_zero = matrix[0, 0] # Initialize min value
|
131
|
+
matrix.each do |value|
|
132
|
+
stats[:count] += 1
|
133
|
+
stats[:countNonZero] += 1 if value != 0
|
134
|
+
stats[:sum] += value
|
135
|
+
max = value if value > max
|
136
|
+
min = value if value < min
|
137
|
+
min_non_zero = value if value != 0 && value < min
|
138
|
+
end
|
139
|
+
stats[:max] = max
|
140
|
+
stats[:min] = min
|
141
|
+
stats[:minNonZero] = min_non_zero
|
142
|
+
values = matrix.to_a
|
143
|
+
values.flatten! if values.first.class == Array
|
144
|
+
values.sort!
|
145
|
+
quartile_stats = get_quartiles(values, stats[:count])
|
146
|
+
stats.merge!(transform_keys(quartile_stats){|k| k.to_sym})
|
147
|
+
values.select!{|v| v != 0}
|
148
|
+
quartile_stats_non_zero = get_quartiles(values, stats[:countNonZero])
|
149
|
+
stats.merge!(transform_keys(quartile_stats_non_zero){|k| (k + 'NonZero').to_sym})
|
150
|
+
get_composed_stats(stats, matrix)
|
151
|
+
return stats
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_quartiles(values, n_items)
|
155
|
+
stats = {}
|
156
|
+
q1_coor = n_items * 0.25 - 1
|
157
|
+
median = n_items * 0.5 - 1
|
158
|
+
q3_coor = n_items * 0.75 - 1
|
159
|
+
if n_items % 2 == 0
|
160
|
+
stats['q1'] = (values[q1_coor.to_i] + values[q1_coor.to_i + 1]).fdiv(2)
|
161
|
+
stats['median'] = (values[median.to_i] + values[median.to_i + 1]).fdiv(2)
|
162
|
+
stats['q3'] = (values[q3_coor.to_i] + values[q3_coor.to_i + 1]).fdiv(2)
|
163
|
+
else
|
164
|
+
stats['q1'] = values[q1_coor.ceil]
|
165
|
+
stats['median'] = values[median.ceil]
|
166
|
+
stats['q3'] = values[q3_coor.ceil]
|
167
|
+
end
|
168
|
+
return stats
|
169
|
+
end
|
170
|
+
|
171
|
+
def get_composed_stats(stats, matrix)
|
172
|
+
average = stats[:sum].fdiv(stats[:count])
|
173
|
+
average_non_zero = stats[:sum].fdiv(stats[:countNonZero])
|
174
|
+
stats[:average] = average
|
175
|
+
stats[:averageNonZero] = average_non_zero
|
176
|
+
matrix.each do |value|
|
177
|
+
stats[:sumDevs] = (value - average) ** 2
|
178
|
+
stats[:sumDevsNonZero] = (value - average_non_zero) ** 2 if value != 0
|
179
|
+
end
|
180
|
+
stats[:variance] = stats[:sumDevs].fdiv(stats[:count])
|
181
|
+
stats[:varianceNonZero] = stats[:sumDevsNonZero].fdiv(stats[:countNonZero])
|
182
|
+
stats[:standardDeviation] = stats[:variance] ** 0.5
|
183
|
+
stats[:standardDeviationNonZero] = stats[:varianceNonZero] ** 0.5
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
#############################################################################
|
188
|
+
## OPTPARSE
|
189
|
+
##############################################################################
|
190
|
+
options = {}
|
191
|
+
|
192
|
+
optparse = OptionParser.new do |opts|
|
193
|
+
options[:input_file] = nil
|
194
|
+
opts.on( '-i', '--input_file PATH', 'Input file' ) do |opt|
|
195
|
+
options[:input_file] = opt
|
196
|
+
end
|
197
|
+
|
198
|
+
options[:output_matrix_file] = nil
|
199
|
+
opts.on( '-o', '--output_matrix_file PATH', 'Output matrix file' ) do |opt|
|
200
|
+
options[:output_matrix_file] = opt
|
201
|
+
end
|
202
|
+
|
203
|
+
options[:byte_format] = :float64
|
204
|
+
opts.on( '-b', '--byte_format STRING', 'Format of the numeric values stored in matrix. Default: float64, warning set this to less precission can modify computation results using this matrix.' ) do |opt|
|
205
|
+
options[:byte_format] = opt.to_sym
|
206
|
+
end
|
207
|
+
|
208
|
+
options[:input_type] = 'pair'
|
209
|
+
opts.on( '-t', '--input_type STRING', 'Set input format file. "pair" or "matrix"' ) do |opt|
|
210
|
+
options[:input_type] = opt
|
211
|
+
end
|
212
|
+
|
213
|
+
options[:set_diagonal] = false
|
214
|
+
opts.on( '-d', '--set_diagonal', 'Set to 1.0 the main diagonal' ) do
|
215
|
+
options[:set_diagonal] = true
|
216
|
+
end
|
217
|
+
|
218
|
+
options[:binarize] = nil
|
219
|
+
opts.on( '-B', '--binarize FLOAT', 'Binarize matrix changin x >= thr to one and any other to zero into matrix given' ) do |opt|
|
220
|
+
options[:binarize] = opt.to_f
|
221
|
+
end
|
222
|
+
|
223
|
+
options[:cutoff] = nil
|
224
|
+
opts.on( '-c', '--cutoff FLOAT', 'Cutoff matrix values keeping just x >= and setting any other to zero into matrix given' ) do |opt|
|
225
|
+
options[:cutoff] = opt.to_f
|
226
|
+
end
|
227
|
+
|
228
|
+
options[:stats] = false
|
229
|
+
opts.on( '-s', '--get_stats', 'Get stats from the processed matrix' ) do
|
230
|
+
options[:stats] = true
|
231
|
+
end
|
232
|
+
|
233
|
+
options[:output_type] = 'bin'
|
234
|
+
opts.on( '-O', '--output_type STRING', 'Set output format file. "bin" for binary (default) or "mat" for tabulated text file matrix' ) do |opt|
|
235
|
+
options[:output_type] = opt
|
236
|
+
end
|
237
|
+
|
238
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options] \n\n"
|
239
|
+
|
240
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
241
|
+
puts opts
|
242
|
+
exit
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
optparse.parse!
|
247
|
+
|
248
|
+
################################################################################
|
249
|
+
## MAIN
|
250
|
+
###############################################################################
|
251
|
+
if options[:input_file] == '-'
|
252
|
+
source = STDIN
|
253
|
+
else
|
254
|
+
source = File.open(options[:input_file])
|
255
|
+
end
|
256
|
+
|
257
|
+
if options[:input_type] == 'bin'
|
258
|
+
matrix = Npy.load(options[:input_file])
|
259
|
+
#matrix = Marshal.load(File.binread(options[:input_file])) # the method needs a path not a IO object
|
260
|
+
elsif options[:input_type] == 'matrix'
|
261
|
+
matrix = load_matrix_file(source)
|
262
|
+
elsif options[:input_type] == 'pair'
|
263
|
+
matrix, names = load_pair_file(source, options[:byte_format])
|
264
|
+
File.open(options[:output_matrix_file]+'.lst', 'w'){|f| f.print names.join("\n")}
|
265
|
+
end
|
266
|
+
|
267
|
+
if options[:set_diagonal]
|
268
|
+
elements = matrix.shape.last
|
269
|
+
elements.times do |n|
|
270
|
+
matrix[n, n] = 1.0
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
if !options[:binarize].nil? && options[:cutoff].nil?
|
275
|
+
elements = matrix.shape.last
|
276
|
+
elements.times do |i|
|
277
|
+
elements.times do |j|
|
278
|
+
matrix[i,j] = matrix[i,j] >= options[:binarize] ? 1 : 0
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
if !options[:cutoff].nil? && options[:binarize].nil?
|
284
|
+
elements = matrix.shape.last
|
285
|
+
elements.times do |i|
|
286
|
+
elements.times do |j|
|
287
|
+
matrix[i,j] = matrix[i,j] >= options[:cutoff] ? matrix[i,j] : 0
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
if options[:stats]
|
293
|
+
stats = get_stats(matrix)
|
294
|
+
stats.each do |stat|
|
295
|
+
puts stat.join("\t")
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
if options[:output_type] == 'bin'
|
300
|
+
#File.binwrite(options[:output_matrix_file], Marshal.dump(matrix))
|
301
|
+
Npy.save(options[:output_matrix_file], matrix)
|
302
|
+
elsif options[:output_type] == 'mat'
|
303
|
+
File.open(options[:output_matrix_file], 'w') do |f|
|
304
|
+
matrix.each_over_axis(0) do |r|
|
305
|
+
f.puts r.to_a.join("\t")
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'numo/narray'
|
2
|
+
require 'numo/linalg'
|
3
|
+
|
4
|
+
class Adv_mat_calc # Advanced matrix calculations
|
5
|
+
## KERNEL METHODS
|
6
|
+
#######################################################################################
|
7
|
+
def self.get_kernel(matrix, node_names, kernel, normalization=false)
|
8
|
+
#I = identity matrix
|
9
|
+
#D = Diagonal matrix
|
10
|
+
#A = adjacency matrix
|
11
|
+
#L = laplacian matrix = D − A
|
12
|
+
matrix_result = nil
|
13
|
+
dimension_elements = matrix.shape.last
|
14
|
+
# In scuba code, the diagonal values of A is set to 0. In weighted matrix the kernel result is the same with or without this operation. Maybe increases the computing performance?
|
15
|
+
# In the md kernel this operation affects the values of the final kernel
|
16
|
+
#dimension_elements.times do |n|
|
17
|
+
# matrix[n,n] = 0.0
|
18
|
+
#end
|
19
|
+
if kernel == 'el' || kernel == 'ct' || kernel == 'rf' ||
|
20
|
+
kernel.include?('vn') || kernel.include?('rl') || kernel == 'me'
|
21
|
+
diagonal_matrix = matrix.sum(1).diag # get the total sum for each row, for this reason the sum method takes the 1 value. If sum colums is desired, use 0
|
22
|
+
# Make a matrix whose diagonal is row_sum
|
23
|
+
matrix_L = diagonal_matrix - matrix
|
24
|
+
if kernel == 'el' #Exponential Laplacian diffusion kernel(active). F Fouss 2012 | doi: 10.1016/j.neunet.2012.03.001
|
25
|
+
beta = 0.02
|
26
|
+
beta_product = matrix_L * -beta
|
27
|
+
#matrix_result = beta_product.expm
|
28
|
+
matrix_result = Numo::Linalg.expm(beta_product, 14)
|
29
|
+
elsif kernel == 'ct' # Commute time kernel (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
30
|
+
matrix_result = Numo::Linalg.pinv(matrix_L) # Anibal saids that this kernel was normalized. Why?. Paper do not seem to describe this operation for ct, it describes for Kvn or for all kernels, it is not clear.
|
31
|
+
elsif kernel == 'rf' # Random forest kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
32
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L) #Krf = (I +L ) ^ −1
|
33
|
+
elsif kernel.include?('vn') # von Neumann diffusion kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
34
|
+
alpha = kernel.gsub('vn', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
35
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) - matrix * alpha ) # (I -alphaA ) ^ −1
|
36
|
+
elsif kernel.include?('rl') # Regularized Laplacian kernel matrix (active)
|
37
|
+
alpha = kernel.gsub('rl', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
38
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L * alpha ) # (I + alphaL ) ^ −1
|
39
|
+
elsif kernel == 'me' # Markov exponential diffusion kernel (active). G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
40
|
+
beta=0.04
|
41
|
+
#(beta/N)*(N*I - D + A)
|
42
|
+
id_mat = Numo::DFloat.eye(dimension_elements)
|
43
|
+
m_matrix = (id_mat * dimension_elements - diagonal_matrix + matrix ) * (beta/dimension_elements)
|
44
|
+
#matrix_result = m_matrix.expm
|
45
|
+
matrix_result = Numo::Linalg.expm(m_matrix, 16)
|
46
|
+
end
|
47
|
+
elsif kernel == 'ka' # Kernelized adjacency matrix (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
48
|
+
lambda_value = matrix.min_eigenvalue
|
49
|
+
matrix_result = matrix + Numo::DFloat.eye(dimension_elements) * lambda_value.abs # Ka = A + lambda*I # lambda = the absolute value of the smallest eigenvalue of A
|
50
|
+
elsif kernel.include?('md') # Markov diffusion kernel matrix. G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
51
|
+
t = kernel.gsub('md', '').to_i
|
52
|
+
#TODO: check implementation with Numo::array
|
53
|
+
col_sum = matrix.sum(1)
|
54
|
+
p_mat = matrix.div_by_vector(col_sum)
|
55
|
+
p_temp_mat = p_mat.clone
|
56
|
+
zt_mat = p_mat.clone
|
57
|
+
(t-1).times do
|
58
|
+
p_temp_mat = p_temp_mat.dot(p_mat)
|
59
|
+
zt_mat = zt_mat + p_temp_mat
|
60
|
+
end
|
61
|
+
zt_mat = zt_mat * (1.0/t)
|
62
|
+
matrix_result = zt_mat.dot(zt_mat.transpose)
|
63
|
+
else
|
64
|
+
matrix_result = matrix
|
65
|
+
warn('Warning: The kernel method was not specified or not exists. The adjacency matrix will be given as result')
|
66
|
+
# This allows process a previous kernel and perform the normalization in a separated step.
|
67
|
+
end
|
68
|
+
matrix_result = matrix_result.cosine_normalization if normalization #TODO: check implementation with Numo::array
|
69
|
+
return matrix_result
|
70
|
+
end
|
71
|
+
|
72
|
+
# Alaimo 2014, doi: 10.3389/fbioe.2014.00071
|
73
|
+
def self.tranference_resources(matrix1, matrix2, lambda_value1 = 0.5, lambda_value2 = 0.5)
|
74
|
+
m1rowNumber, m1colNumber = matrix1.shape
|
75
|
+
m2rowNumber, m2colNumber = matrix2.shape
|
76
|
+
#puts m1rowNumber, m1colNumber, m2rowNumber, m2colNumber
|
77
|
+
matrix1Weight = self.graphWeights(m1colNumber, m1rowNumber, matrix1.transpose, lambda_value1)
|
78
|
+
matrix2Weight = self.graphWeights(m2colNumber, m2rowNumber, matrix2.transpose, lambda_value2)
|
79
|
+
matrixWeightProduct = Numo::Linalg.dot(matrix1Weight, Numo::Linalg.dot(matrix2, matrix2Weight))
|
80
|
+
finalMatrix = Numo::Linalg.dot(matrix1, matrixWeightProduct)
|
81
|
+
return finalMatrix
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.graphWeights(rowsNumber, colsNumber, inputMatrix, lambdaValue = 0.5)
|
85
|
+
ky = (1.0 / inputMatrix.sum(0)).diag #sum cols
|
86
|
+
weigth = Numo::Linalg.dot(inputMatrix, ky).transpose
|
87
|
+
ky = nil #free memory
|
88
|
+
weigth = Numo::Linalg.dot(inputMatrix, weigth)
|
89
|
+
|
90
|
+
kx = inputMatrix.sum(1) #sum rows
|
91
|
+
|
92
|
+
kx_lamb = kx ** lambdaValue
|
93
|
+
kx_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
94
|
+
rowsNumber.times do |j|
|
95
|
+
rowsNumber.times do |i|
|
96
|
+
kx_lamb_mat[j,i] = kx_lamb[i]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
kx_lamb = nil #free memory
|
100
|
+
|
101
|
+
kx_inv_lamb = kx ** (1 - lambdaValue)
|
102
|
+
kx_inv_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
103
|
+
rowsNumber.times do |j|
|
104
|
+
rowsNumber.times do |i|
|
105
|
+
kx_inv_lamb_mat[i, j] = kx_inv_lamb[i]
|
106
|
+
end
|
107
|
+
end
|
108
|
+
kx_inv_lamb = nil #free memory
|
109
|
+
|
110
|
+
nx = 1.0/(kx_lamb_mat.inplace * kx_inv_lamb_mat).inplace # inplace marks a matrix to be used by reference, not for value
|
111
|
+
kx_lamb_mat = nil #free memory
|
112
|
+
kx_inv_lamb_mat = nil #free memory
|
113
|
+
weigth.inplace * nx
|
114
|
+
return weigth
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'expcalc'
|
2
|
+
|
3
|
+
class Net_parser
|
4
|
+
def self.load(options)
|
5
|
+
net = nil
|
6
|
+
if options[:input_format] == 'pair'
|
7
|
+
net = load_network_by_pairs(options[:input_file], options[:layers], options[:split_char])
|
8
|
+
elsif options[:input_format] == 'bin'
|
9
|
+
net = load_network_by_bin_matrix(options[:input_file], options[:node_file], options[:layers])
|
10
|
+
elsif options[:input_format] == 'matrix'
|
11
|
+
net = load_network_by_plain_matrix(options[:input_file], options[:node_file], options[:layers], options[:splitChar])
|
12
|
+
else
|
13
|
+
raise("ERROR: The format #{options[:input_format]} is not defined")
|
14
|
+
end
|
15
|
+
return net
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.load_network_by_pairs(file, layers, split_character="\t")
|
19
|
+
net = Network.new(layers.map{|layer| layer.first})
|
20
|
+
File.open(file).each do |line|
|
21
|
+
line.chomp!
|
22
|
+
pair = line.split(split_character)
|
23
|
+
node1 = pair[0]
|
24
|
+
node2 = pair[1]
|
25
|
+
net.add_node(node1, net.set_layer(layers, node1))
|
26
|
+
net.add_node(node2, net.set_layer(layers, node2))
|
27
|
+
net.add_edge(node1, node2)
|
28
|
+
end
|
29
|
+
return net
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.load_network_by_bin_matrix(input_file, node_file, layers)
|
33
|
+
net = Network.new(layers.map{|layer| layer.first})
|
34
|
+
node_names = load_input_list(node_file)
|
35
|
+
net.adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='npy'), node_names, node_names]
|
36
|
+
return net
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.load_network_by_plain_matrix(input_file, node_file, layers, splitChar="\t")
|
40
|
+
net = Network.new(layers.map{|layer| layer.first})
|
41
|
+
node_names = load_input_list(node_file)
|
42
|
+
net.adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='txt', splitChar=splitChar), node_names, node_names]
|
43
|
+
return net
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def self.load_input_list(file)
|
48
|
+
return File.open(file).readlines.map!{|line| line.chomp}
|
49
|
+
end
|
50
|
+
end
|