NetAnalyzer 0.1.5 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/NetAnalyzer.gemspec +15 -5
- data/README.md +14 -2
- data/Rakefile +13 -9
- data/bin/NetAnalyzer.rb +183 -30
- data/bin/text2binary_matrix.rb +294 -0
- data/lib/NetAnalyzer/network.rb +651 -87
- data/lib/NetAnalyzer/templates/ElGrapho.min.js +28 -0
- data/lib/NetAnalyzer/templates/cytoscape.erb +65 -0
- data/lib/NetAnalyzer/templates/cytoscape.min.js +32 -0
- data/lib/NetAnalyzer/templates/el_grapho.erb +89 -0
- data/lib/NetAnalyzer/templates/pako.min.js +1 -0
- data/lib/NetAnalyzer/templates/sigma.erb +132 -0
- data/lib/NetAnalyzer/version.rb +1 -1
- data/lib/NetAnalyzer.rb +2 -0
- metadata +171 -24
@@ -0,0 +1,294 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'optparse'
|
5
|
+
#require 'nmatrix'
|
6
|
+
require 'numo/narray'
|
7
|
+
require 'numo/linalg'
|
8
|
+
require 'npy'
|
9
|
+
require 'expcalc'
|
10
|
+
|
11
|
+
#require 'pp'
|
12
|
+
#############################################################################
|
13
|
+
## METHODS
|
14
|
+
##############################################################################
|
15
|
+
|
16
|
+
def load_matrix_file(input_file, splitChar = "\t")
|
17
|
+
matrix = nil
|
18
|
+
counter = 0
|
19
|
+
File.open(input_file).each do |line|
|
20
|
+
line.chomp!
|
21
|
+
row = line.split(splitChar).map{|c| c.to_f }
|
22
|
+
if matrix.nil?
|
23
|
+
matrix = Numo::DFloat.zeros(row.length, row.length)
|
24
|
+
end
|
25
|
+
row.each_with_index do |val, i|
|
26
|
+
matrix[counter, i] = val
|
27
|
+
end
|
28
|
+
counter += 1
|
29
|
+
end
|
30
|
+
return matrix
|
31
|
+
end
|
32
|
+
|
33
|
+
def load_pair_file(source, byte_format = :float32)
|
34
|
+
connections = {}
|
35
|
+
source.each do |line|
|
36
|
+
node_a, node_b, weight = line.chomp.split("\t")
|
37
|
+
weight.nil? ? weight = 1.0 : weight = weight.to_f
|
38
|
+
add_pair(node_a, node_b, weight, connections)
|
39
|
+
add_pair(node_b, node_a, weight, connections)
|
40
|
+
end
|
41
|
+
matrix, names = connections.to_wmatrix
|
42
|
+
return matrix, names
|
43
|
+
end
|
44
|
+
|
45
|
+
def add_pair(node_a, node_b, weight, connections)
|
46
|
+
query = connections[node_a]
|
47
|
+
if !query.nil?
|
48
|
+
query[node_b] = weight
|
49
|
+
else
|
50
|
+
subhash = Hash.new(0.0)
|
51
|
+
subhash[node_b] = weight
|
52
|
+
connections[node_a] = subhash
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def get_stats(matrix)
|
57
|
+
stats = []
|
58
|
+
#TODO: trnasform to Numo::Array operations
|
59
|
+
primary_stats = get_primary_stats(matrix)
|
60
|
+
#stats << ['Matrix - Symmetric?', matrix.symmetric?]
|
61
|
+
stats << ['Matrix - Dimensions', matrix.shape.join('x')]
|
62
|
+
stats << ['Matrix - Elements', primary_stats[:count]]
|
63
|
+
stats << ['Matrix - Elements Non Zero', primary_stats[:countNonZero]]
|
64
|
+
stats << ['Matrix - Non Zero Density', primary_stats[:countNonZero].fdiv(primary_stats[:count])]
|
65
|
+
stats << ['Weigth - Max', primary_stats[:max]]
|
66
|
+
stats << ['Weigth - Min', primary_stats[:min]]
|
67
|
+
stats << ['Weigth - Average', primary_stats[:average]]
|
68
|
+
stats << ['Weigth - Variance', primary_stats[:variance]]
|
69
|
+
stats << ['Weigth - Standard Deviation', primary_stats[:standardDeviation]]
|
70
|
+
stats << ['Weigth - Q1', primary_stats[:q1]]
|
71
|
+
stats << ['Weigth - Median', primary_stats[:median]]
|
72
|
+
stats << ['Weigth - Q3', primary_stats[:q3]]
|
73
|
+
stats << ['Weigth - Min Non Zero', primary_stats[:minNonZero]]
|
74
|
+
stats << ['Weigth - Average Non Zero', primary_stats[:averageNonZero]]
|
75
|
+
stats << ['Weigth - Variance Non Zero', primary_stats[:varianceNonZero]]
|
76
|
+
stats << ['Weigth - Standard Deviation Non Zero', primary_stats[:standardDeviationNonZero]]
|
77
|
+
stats << ['Weigth - Q1 Non Zero', primary_stats[:q1NonZero]]
|
78
|
+
stats << ['Weigth - Median Non Zero', primary_stats[:medianNonZero]]
|
79
|
+
stats << ['Weigth - Q3 Non Zero', primary_stats[:q3NonZero]]
|
80
|
+
connections = get_connection_number(matrix)
|
81
|
+
connection_stats = get_primary_stats(connections)
|
82
|
+
stats << ['Node - Elements', connection_stats[:count]]
|
83
|
+
stats << ['Node - Elements Non Zero', connection_stats[:countNonZero]]
|
84
|
+
stats << ['Node - Non Zero Density', connection_stats[:countNonZero].fdiv(connection_stats[:count])]
|
85
|
+
stats << ['Edges - Max', connection_stats[:max]]
|
86
|
+
stats << ['Edges - Min', connection_stats[:min]]
|
87
|
+
stats << ['Edges - Average', connection_stats[:average]]
|
88
|
+
stats << ['Edges - Variance', connection_stats[:variance]]
|
89
|
+
stats << ['Edges - Standard Deviation', connection_stats[:standardDeviation]]
|
90
|
+
stats << ['Edges - Q1', connection_stats[:q1]]
|
91
|
+
stats << ['Edges - Median', connection_stats[:median]]
|
92
|
+
stats << ['Edges - Q3', connection_stats[:q3]]
|
93
|
+
stats << ['Edges - Min Non Zero', primary_stats[:minNonZero]]
|
94
|
+
stats << ['Edges - Average Non Zero', connection_stats[:averageNonZero]]
|
95
|
+
stats << ['Edges - Variance Non Zero', connection_stats[:varianceNonZero]]
|
96
|
+
stats << ['Edges - Standard Deviation Non Zero', connection_stats[:standardDeviationNonZero]]
|
97
|
+
stats << ['Edges - Q1 Non Zero', connection_stats[:q1NonZero]]
|
98
|
+
stats << ['Edges - Median Non Zero', connection_stats[:medianNonZero]]
|
99
|
+
stats << ['Edges - Q3 Non Zero', connection_stats[:q3NonZero]]
|
100
|
+
return stats
|
101
|
+
end
|
102
|
+
|
103
|
+
def get_connection_number(matrix)
|
104
|
+
rows, cols = matrix.shape
|
105
|
+
connections = Numo::DFloat.zeros(1, cols)
|
106
|
+
cols.times do |i|
|
107
|
+
column = matrix[true, i]
|
108
|
+
count = 0
|
109
|
+
column.each do |value|
|
110
|
+
count += 1 if value != 0
|
111
|
+
end
|
112
|
+
connections[0, i] = count - 1 # the connection with self is removed
|
113
|
+
end
|
114
|
+
return connections
|
115
|
+
end
|
116
|
+
|
117
|
+
def transform_keys(hash)
|
118
|
+
new_hash = {}
|
119
|
+
hash.each do |key, val|
|
120
|
+
new_key = yield(key)
|
121
|
+
new_hash[new_key] = val
|
122
|
+
end
|
123
|
+
return new_hash
|
124
|
+
end
|
125
|
+
|
126
|
+
def get_primary_stats(matrix)
|
127
|
+
stats = Hash.new(0)
|
128
|
+
max = matrix[0, 0] # Initialize max value
|
129
|
+
min = matrix[0, 0] # Initialize min value
|
130
|
+
min_non_zero = matrix[0, 0] # Initialize min value
|
131
|
+
matrix.each do |value|
|
132
|
+
stats[:count] += 1
|
133
|
+
stats[:countNonZero] += 1 if value != 0
|
134
|
+
stats[:sum] += value
|
135
|
+
max = value if value > max
|
136
|
+
min = value if value < min
|
137
|
+
min_non_zero = value if value != 0 && value < min
|
138
|
+
end
|
139
|
+
stats[:max] = max
|
140
|
+
stats[:min] = min
|
141
|
+
stats[:minNonZero] = min_non_zero
|
142
|
+
values = matrix.to_a
|
143
|
+
values.flatten! if values.first.class == Array
|
144
|
+
values.sort!
|
145
|
+
quartile_stats = get_quartiles(values, stats[:count])
|
146
|
+
stats.merge!(transform_keys(quartile_stats){|k| k.to_sym})
|
147
|
+
values.select!{|v| v != 0}
|
148
|
+
quartile_stats_non_zero = get_quartiles(values, stats[:countNonZero])
|
149
|
+
stats.merge!(transform_keys(quartile_stats_non_zero){|k| (k + 'NonZero').to_sym})
|
150
|
+
get_composed_stats(stats, matrix)
|
151
|
+
return stats
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_quartiles(values, n_items)
|
155
|
+
stats = {}
|
156
|
+
q1_coor = n_items * 0.25 - 1
|
157
|
+
median = n_items * 0.5 - 1
|
158
|
+
q3_coor = n_items * 0.75 - 1
|
159
|
+
if n_items % 2 == 0
|
160
|
+
stats['q1'] = (values[q1_coor.to_i] + values[q1_coor.to_i + 1]).fdiv(2)
|
161
|
+
stats['median'] = (values[median.to_i] + values[median.to_i + 1]).fdiv(2)
|
162
|
+
stats['q3'] = (values[q3_coor.to_i] + values[q3_coor.to_i + 1]).fdiv(2)
|
163
|
+
else
|
164
|
+
stats['q1'] = values[q1_coor.ceil]
|
165
|
+
stats['median'] = values[median.ceil]
|
166
|
+
stats['q3'] = values[q3_coor.ceil]
|
167
|
+
end
|
168
|
+
return stats
|
169
|
+
end
|
170
|
+
|
171
|
+
def get_composed_stats(stats, matrix)
|
172
|
+
average = stats[:sum].fdiv(stats[:count])
|
173
|
+
average_non_zero = stats[:sum].fdiv(stats[:countNonZero])
|
174
|
+
stats[:average] = average
|
175
|
+
stats[:averageNonZero] = average_non_zero
|
176
|
+
matrix.each do |value|
|
177
|
+
stats[:sumDevs] = (value - average) ** 2
|
178
|
+
stats[:sumDevsNonZero] = (value - average_non_zero) ** 2 if value != 0
|
179
|
+
end
|
180
|
+
stats[:variance] = stats[:sumDevs].fdiv(stats[:count])
|
181
|
+
stats[:varianceNonZero] = stats[:sumDevsNonZero].fdiv(stats[:countNonZero])
|
182
|
+
stats[:standardDeviation] = stats[:variance] ** 0.5
|
183
|
+
stats[:standardDeviationNonZero] = stats[:varianceNonZero] ** 0.5
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
#############################################################################
|
188
|
+
## OPTPARSE
|
189
|
+
##############################################################################
|
190
|
+
options = {}
|
191
|
+
|
192
|
+
optparse = OptionParser.new do |opts|
|
193
|
+
options[:input_file] = nil
|
194
|
+
opts.on( '-i', '--input_file PATH', 'Input file' ) do |opt|
|
195
|
+
options[:input_file] = opt
|
196
|
+
end
|
197
|
+
|
198
|
+
options[:output_matrix_file] = nil
|
199
|
+
opts.on( '-o', '--output_matrix_file PATH', 'Output matrix file' ) do |opt|
|
200
|
+
options[:output_matrix_file] = opt
|
201
|
+
end
|
202
|
+
|
203
|
+
options[:byte_format] = :float64
|
204
|
+
opts.on( '-b', '--byte_format STRING', 'Format of the numeric values stored in matrix. Default: float64, warning set this to less precission can modify computation results using this matrix.' ) do |opt|
|
205
|
+
options[:byte_format] = opt.to_sym
|
206
|
+
end
|
207
|
+
|
208
|
+
options[:input_type] = 'pair'
|
209
|
+
opts.on( '-t', '--input_type STRING', 'Set input format file. "pair" or "matrix"' ) do |opt|
|
210
|
+
options[:input_type] = opt
|
211
|
+
end
|
212
|
+
|
213
|
+
options[:set_diagonal] = false
|
214
|
+
opts.on( '-d', '--set_diagonal', 'Set to 1.0 the main diagonal' ) do
|
215
|
+
options[:set_diagonal] = true
|
216
|
+
end
|
217
|
+
|
218
|
+
options[:binarize] = nil
|
219
|
+
opts.on( '-B', '--binarize FLOAT', 'Binarize matrix changin x >= thr to one and any other to zero into matrix given' ) do |opt|
|
220
|
+
options[:binarize] = opt.to_f
|
221
|
+
end
|
222
|
+
|
223
|
+
options[:stats] = false
|
224
|
+
opts.on( '-s', '--get_stats', 'Get stats from the processed matrix' ) do
|
225
|
+
options[:stats] = true
|
226
|
+
end
|
227
|
+
|
228
|
+
options[:output_type] = 'bin'
|
229
|
+
opts.on( '-O', '--output_type STRING', 'Set output format file. "bin" for binary (default) or "mat" for tabulated text file matrix' ) do |opt|
|
230
|
+
options[:output_type] = opt
|
231
|
+
end
|
232
|
+
|
233
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options] \n\n"
|
234
|
+
|
235
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
236
|
+
puts opts
|
237
|
+
exit
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
optparse.parse!
|
242
|
+
|
243
|
+
################################################################################
|
244
|
+
## MAIN
|
245
|
+
###############################################################################
|
246
|
+
if options[:input_file] == '-'
|
247
|
+
source = STDIN
|
248
|
+
else
|
249
|
+
source = File.open(options[:input_file])
|
250
|
+
end
|
251
|
+
|
252
|
+
if options[:input_type] == 'bin'
|
253
|
+
matrix = Npy.load(options[:input_file])
|
254
|
+
#matrix = Marshal.load(File.binread(options[:input_file])) # the method needs a path not a IO object
|
255
|
+
elsif options[:input_type] == 'matrix'
|
256
|
+
matrix = load_matrix_file(source)
|
257
|
+
elsif options[:input_type] == 'pair'
|
258
|
+
matrix, names = load_pair_file(source, options[:byte_format])
|
259
|
+
File.open(options[:output_matrix_file]+'.lst', 'w'){|f| f.print names.join("\n")}
|
260
|
+
end
|
261
|
+
|
262
|
+
if options[:set_diagonal]
|
263
|
+
elements = matrix.shape.last
|
264
|
+
elements.times do |n|
|
265
|
+
matrix[n, n] = 1.0
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
if !options[:binarize].nil?
|
270
|
+
elements = matrix.shape.last
|
271
|
+
elements.times do |i|
|
272
|
+
elements.times do |j|
|
273
|
+
matrix[i,j] = matrix[i,j] >= options[:binarize] ? 1 : 0
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
if options[:stats]
|
279
|
+
stats = get_stats(matrix)
|
280
|
+
stats.each do |stat|
|
281
|
+
puts stat.join("\t")
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
if options[:output_type] == 'bin'
|
286
|
+
#File.binwrite(options[:output_matrix_file], Marshal.dump(matrix))
|
287
|
+
Npy.save(options[:output_matrix_file], matrix)
|
288
|
+
elsif options[:output_type] == 'mat'
|
289
|
+
File.open(options[:output_matrix_file], 'w') do |f|
|
290
|
+
matrix.each_over_axis(0) do |r|
|
291
|
+
f.puts r.to_a.join("\t")
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|