NetAnalyzer 0.1.5 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/NetAnalyzer.gemspec +15 -5
- data/README.md +14 -2
- data/Rakefile +13 -9
- data/bin/NetAnalyzer.rb +183 -30
- data/bin/text2binary_matrix.rb +294 -0
- data/lib/NetAnalyzer/network.rb +651 -87
- data/lib/NetAnalyzer/templates/ElGrapho.min.js +28 -0
- data/lib/NetAnalyzer/templates/cytoscape.erb +65 -0
- data/lib/NetAnalyzer/templates/cytoscape.min.js +32 -0
- data/lib/NetAnalyzer/templates/el_grapho.erb +89 -0
- data/lib/NetAnalyzer/templates/pako.min.js +1 -0
- data/lib/NetAnalyzer/templates/sigma.erb +132 -0
- data/lib/NetAnalyzer/version.rb +1 -1
- data/lib/NetAnalyzer.rb +2 -0
- metadata +171 -24
@@ -0,0 +1,294 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'optparse'
|
5
|
+
#require 'nmatrix'
|
6
|
+
require 'numo/narray'
|
7
|
+
require 'numo/linalg'
|
8
|
+
require 'npy'
|
9
|
+
require 'expcalc'
|
10
|
+
|
11
|
+
#require 'pp'
|
12
|
+
#############################################################################
|
13
|
+
## METHODS
|
14
|
+
##############################################################################
|
15
|
+
|
16
|
+
def load_matrix_file(input_file, splitChar = "\t")
|
17
|
+
matrix = nil
|
18
|
+
counter = 0
|
19
|
+
File.open(input_file).each do |line|
|
20
|
+
line.chomp!
|
21
|
+
row = line.split(splitChar).map{|c| c.to_f }
|
22
|
+
if matrix.nil?
|
23
|
+
matrix = Numo::DFloat.zeros(row.length, row.length)
|
24
|
+
end
|
25
|
+
row.each_with_index do |val, i|
|
26
|
+
matrix[counter, i] = val
|
27
|
+
end
|
28
|
+
counter += 1
|
29
|
+
end
|
30
|
+
return matrix
|
31
|
+
end
|
32
|
+
|
33
|
+
def load_pair_file(source, byte_format = :float32)
|
34
|
+
connections = {}
|
35
|
+
source.each do |line|
|
36
|
+
node_a, node_b, weight = line.chomp.split("\t")
|
37
|
+
weight.nil? ? weight = 1.0 : weight = weight.to_f
|
38
|
+
add_pair(node_a, node_b, weight, connections)
|
39
|
+
add_pair(node_b, node_a, weight, connections)
|
40
|
+
end
|
41
|
+
matrix, names = connections.to_wmatrix
|
42
|
+
return matrix, names
|
43
|
+
end
|
44
|
+
|
45
|
+
def add_pair(node_a, node_b, weight, connections)
|
46
|
+
query = connections[node_a]
|
47
|
+
if !query.nil?
|
48
|
+
query[node_b] = weight
|
49
|
+
else
|
50
|
+
subhash = Hash.new(0.0)
|
51
|
+
subhash[node_b] = weight
|
52
|
+
connections[node_a] = subhash
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def get_stats(matrix)
|
57
|
+
stats = []
|
58
|
+
#TODO: trnasform to Numo::Array operations
|
59
|
+
primary_stats = get_primary_stats(matrix)
|
60
|
+
#stats << ['Matrix - Symmetric?', matrix.symmetric?]
|
61
|
+
stats << ['Matrix - Dimensions', matrix.shape.join('x')]
|
62
|
+
stats << ['Matrix - Elements', primary_stats[:count]]
|
63
|
+
stats << ['Matrix - Elements Non Zero', primary_stats[:countNonZero]]
|
64
|
+
stats << ['Matrix - Non Zero Density', primary_stats[:countNonZero].fdiv(primary_stats[:count])]
|
65
|
+
stats << ['Weigth - Max', primary_stats[:max]]
|
66
|
+
stats << ['Weigth - Min', primary_stats[:min]]
|
67
|
+
stats << ['Weigth - Average', primary_stats[:average]]
|
68
|
+
stats << ['Weigth - Variance', primary_stats[:variance]]
|
69
|
+
stats << ['Weigth - Standard Deviation', primary_stats[:standardDeviation]]
|
70
|
+
stats << ['Weigth - Q1', primary_stats[:q1]]
|
71
|
+
stats << ['Weigth - Median', primary_stats[:median]]
|
72
|
+
stats << ['Weigth - Q3', primary_stats[:q3]]
|
73
|
+
stats << ['Weigth - Min Non Zero', primary_stats[:minNonZero]]
|
74
|
+
stats << ['Weigth - Average Non Zero', primary_stats[:averageNonZero]]
|
75
|
+
stats << ['Weigth - Variance Non Zero', primary_stats[:varianceNonZero]]
|
76
|
+
stats << ['Weigth - Standard Deviation Non Zero', primary_stats[:standardDeviationNonZero]]
|
77
|
+
stats << ['Weigth - Q1 Non Zero', primary_stats[:q1NonZero]]
|
78
|
+
stats << ['Weigth - Median Non Zero', primary_stats[:medianNonZero]]
|
79
|
+
stats << ['Weigth - Q3 Non Zero', primary_stats[:q3NonZero]]
|
80
|
+
connections = get_connection_number(matrix)
|
81
|
+
connection_stats = get_primary_stats(connections)
|
82
|
+
stats << ['Node - Elements', connection_stats[:count]]
|
83
|
+
stats << ['Node - Elements Non Zero', connection_stats[:countNonZero]]
|
84
|
+
stats << ['Node - Non Zero Density', connection_stats[:countNonZero].fdiv(connection_stats[:count])]
|
85
|
+
stats << ['Edges - Max', connection_stats[:max]]
|
86
|
+
stats << ['Edges - Min', connection_stats[:min]]
|
87
|
+
stats << ['Edges - Average', connection_stats[:average]]
|
88
|
+
stats << ['Edges - Variance', connection_stats[:variance]]
|
89
|
+
stats << ['Edges - Standard Deviation', connection_stats[:standardDeviation]]
|
90
|
+
stats << ['Edges - Q1', connection_stats[:q1]]
|
91
|
+
stats << ['Edges - Median', connection_stats[:median]]
|
92
|
+
stats << ['Edges - Q3', connection_stats[:q3]]
|
93
|
+
stats << ['Edges - Min Non Zero', primary_stats[:minNonZero]]
|
94
|
+
stats << ['Edges - Average Non Zero', connection_stats[:averageNonZero]]
|
95
|
+
stats << ['Edges - Variance Non Zero', connection_stats[:varianceNonZero]]
|
96
|
+
stats << ['Edges - Standard Deviation Non Zero', connection_stats[:standardDeviationNonZero]]
|
97
|
+
stats << ['Edges - Q1 Non Zero', connection_stats[:q1NonZero]]
|
98
|
+
stats << ['Edges - Median Non Zero', connection_stats[:medianNonZero]]
|
99
|
+
stats << ['Edges - Q3 Non Zero', connection_stats[:q3NonZero]]
|
100
|
+
return stats
|
101
|
+
end
|
102
|
+
|
103
|
+
def get_connection_number(matrix)
|
104
|
+
rows, cols = matrix.shape
|
105
|
+
connections = Numo::DFloat.zeros(1, cols)
|
106
|
+
cols.times do |i|
|
107
|
+
column = matrix[true, i]
|
108
|
+
count = 0
|
109
|
+
column.each do |value|
|
110
|
+
count += 1 if value != 0
|
111
|
+
end
|
112
|
+
connections[0, i] = count - 1 # the connection with self is removed
|
113
|
+
end
|
114
|
+
return connections
|
115
|
+
end
|
116
|
+
|
117
|
+
def transform_keys(hash)
|
118
|
+
new_hash = {}
|
119
|
+
hash.each do |key, val|
|
120
|
+
new_key = yield(key)
|
121
|
+
new_hash[new_key] = val
|
122
|
+
end
|
123
|
+
return new_hash
|
124
|
+
end
|
125
|
+
|
126
|
+
def get_primary_stats(matrix)
|
127
|
+
stats = Hash.new(0)
|
128
|
+
max = matrix[0, 0] # Initialize max value
|
129
|
+
min = matrix[0, 0] # Initialize min value
|
130
|
+
min_non_zero = matrix[0, 0] # Initialize min value
|
131
|
+
matrix.each do |value|
|
132
|
+
stats[:count] += 1
|
133
|
+
stats[:countNonZero] += 1 if value != 0
|
134
|
+
stats[:sum] += value
|
135
|
+
max = value if value > max
|
136
|
+
min = value if value < min
|
137
|
+
min_non_zero = value if value != 0 && value < min
|
138
|
+
end
|
139
|
+
stats[:max] = max
|
140
|
+
stats[:min] = min
|
141
|
+
stats[:minNonZero] = min_non_zero
|
142
|
+
values = matrix.to_a
|
143
|
+
values.flatten! if values.first.class == Array
|
144
|
+
values.sort!
|
145
|
+
quartile_stats = get_quartiles(values, stats[:count])
|
146
|
+
stats.merge!(transform_keys(quartile_stats){|k| k.to_sym})
|
147
|
+
values.select!{|v| v != 0}
|
148
|
+
quartile_stats_non_zero = get_quartiles(values, stats[:countNonZero])
|
149
|
+
stats.merge!(transform_keys(quartile_stats_non_zero){|k| (k + 'NonZero').to_sym})
|
150
|
+
get_composed_stats(stats, matrix)
|
151
|
+
return stats
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_quartiles(values, n_items)
|
155
|
+
stats = {}
|
156
|
+
q1_coor = n_items * 0.25 - 1
|
157
|
+
median = n_items * 0.5 - 1
|
158
|
+
q3_coor = n_items * 0.75 - 1
|
159
|
+
if n_items % 2 == 0
|
160
|
+
stats['q1'] = (values[q1_coor.to_i] + values[q1_coor.to_i + 1]).fdiv(2)
|
161
|
+
stats['median'] = (values[median.to_i] + values[median.to_i + 1]).fdiv(2)
|
162
|
+
stats['q3'] = (values[q3_coor.to_i] + values[q3_coor.to_i + 1]).fdiv(2)
|
163
|
+
else
|
164
|
+
stats['q1'] = values[q1_coor.ceil]
|
165
|
+
stats['median'] = values[median.ceil]
|
166
|
+
stats['q3'] = values[q3_coor.ceil]
|
167
|
+
end
|
168
|
+
return stats
|
169
|
+
end
|
170
|
+
|
171
|
+
def get_composed_stats(stats, matrix)
|
172
|
+
average = stats[:sum].fdiv(stats[:count])
|
173
|
+
average_non_zero = stats[:sum].fdiv(stats[:countNonZero])
|
174
|
+
stats[:average] = average
|
175
|
+
stats[:averageNonZero] = average_non_zero
|
176
|
+
matrix.each do |value|
|
177
|
+
stats[:sumDevs] = (value - average) ** 2
|
178
|
+
stats[:sumDevsNonZero] = (value - average_non_zero) ** 2 if value != 0
|
179
|
+
end
|
180
|
+
stats[:variance] = stats[:sumDevs].fdiv(stats[:count])
|
181
|
+
stats[:varianceNonZero] = stats[:sumDevsNonZero].fdiv(stats[:countNonZero])
|
182
|
+
stats[:standardDeviation] = stats[:variance] ** 0.5
|
183
|
+
stats[:standardDeviationNonZero] = stats[:varianceNonZero] ** 0.5
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
#############################################################################
|
188
|
+
## OPTPARSE
|
189
|
+
##############################################################################
|
190
|
+
options = {}
|
191
|
+
|
192
|
+
optparse = OptionParser.new do |opts|
|
193
|
+
options[:input_file] = nil
|
194
|
+
opts.on( '-i', '--input_file PATH', 'Input file' ) do |opt|
|
195
|
+
options[:input_file] = opt
|
196
|
+
end
|
197
|
+
|
198
|
+
options[:output_matrix_file] = nil
|
199
|
+
opts.on( '-o', '--output_matrix_file PATH', 'Output matrix file' ) do |opt|
|
200
|
+
options[:output_matrix_file] = opt
|
201
|
+
end
|
202
|
+
|
203
|
+
options[:byte_format] = :float64
|
204
|
+
opts.on( '-b', '--byte_format STRING', 'Format of the numeric values stored in matrix. Default: float64, warning set this to less precission can modify computation results using this matrix.' ) do |opt|
|
205
|
+
options[:byte_format] = opt.to_sym
|
206
|
+
end
|
207
|
+
|
208
|
+
options[:input_type] = 'pair'
|
209
|
+
opts.on( '-t', '--input_type STRING', 'Set input format file. "pair" or "matrix"' ) do |opt|
|
210
|
+
options[:input_type] = opt
|
211
|
+
end
|
212
|
+
|
213
|
+
options[:set_diagonal] = false
|
214
|
+
opts.on( '-d', '--set_diagonal', 'Set to 1.0 the main diagonal' ) do
|
215
|
+
options[:set_diagonal] = true
|
216
|
+
end
|
217
|
+
|
218
|
+
options[:binarize] = nil
|
219
|
+
opts.on( '-B', '--binarize FLOAT', 'Binarize matrix changin x >= thr to one and any other to zero into matrix given' ) do |opt|
|
220
|
+
options[:binarize] = opt.to_f
|
221
|
+
end
|
222
|
+
|
223
|
+
options[:stats] = false
|
224
|
+
opts.on( '-s', '--get_stats', 'Get stats from the processed matrix' ) do
|
225
|
+
options[:stats] = true
|
226
|
+
end
|
227
|
+
|
228
|
+
options[:output_type] = 'bin'
|
229
|
+
opts.on( '-O', '--output_type STRING', 'Set output format file. "bin" for binary (default) or "mat" for tabulated text file matrix' ) do |opt|
|
230
|
+
options[:output_type] = opt
|
231
|
+
end
|
232
|
+
|
233
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options] \n\n"
|
234
|
+
|
235
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
236
|
+
puts opts
|
237
|
+
exit
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
optparse.parse!
|
242
|
+
|
243
|
+
################################################################################
|
244
|
+
## MAIN
|
245
|
+
###############################################################################
|
246
|
+
if options[:input_file] == '-'
|
247
|
+
source = STDIN
|
248
|
+
else
|
249
|
+
source = File.open(options[:input_file])
|
250
|
+
end
|
251
|
+
|
252
|
+
if options[:input_type] == 'bin'
|
253
|
+
matrix = Npy.load(options[:input_file])
|
254
|
+
#matrix = Marshal.load(File.binread(options[:input_file])) # the method needs a path not a IO object
|
255
|
+
elsif options[:input_type] == 'matrix'
|
256
|
+
matrix = load_matrix_file(source)
|
257
|
+
elsif options[:input_type] == 'pair'
|
258
|
+
matrix, names = load_pair_file(source, options[:byte_format])
|
259
|
+
File.open(options[:output_matrix_file]+'.lst', 'w'){|f| f.print names.join("\n")}
|
260
|
+
end
|
261
|
+
|
262
|
+
if options[:set_diagonal]
|
263
|
+
elements = matrix.shape.last
|
264
|
+
elements.times do |n|
|
265
|
+
matrix[n, n] = 1.0
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
if !options[:binarize].nil?
|
270
|
+
elements = matrix.shape.last
|
271
|
+
elements.times do |i|
|
272
|
+
elements.times do |j|
|
273
|
+
matrix[i,j] = matrix[i,j] >= options[:binarize] ? 1 : 0
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
if options[:stats]
|
279
|
+
stats = get_stats(matrix)
|
280
|
+
stats.each do |stat|
|
281
|
+
puts stat.join("\t")
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
if options[:output_type] == 'bin'
|
286
|
+
#File.binwrite(options[:output_matrix_file], Marshal.dump(matrix))
|
287
|
+
Npy.save(options[:output_matrix_file], matrix)
|
288
|
+
elsif options[:output_type] == 'mat'
|
289
|
+
File.open(options[:output_matrix_file], 'w') do |f|
|
290
|
+
matrix.each_over_axis(0) do |r|
|
291
|
+
f.puts r.to_a.join("\t")
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|