NetAnalyzer 0.1.5 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,294 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'benchmark'
4
+ require 'optparse'
5
+ #require 'nmatrix'
6
+ require 'numo/narray'
7
+ require 'numo/linalg'
8
+ require 'npy'
9
+ require 'expcalc'
10
+
11
+ #require 'pp'
12
+ #############################################################################
13
+ ## METHODS
14
+ ##############################################################################
15
+
16
+ def load_matrix_file(input_file, splitChar = "\t")
17
+ matrix = nil
18
+ counter = 0
19
+ File.open(input_file).each do |line|
20
+ line.chomp!
21
+ row = line.split(splitChar).map{|c| c.to_f }
22
+ if matrix.nil?
23
+ matrix = Numo::DFloat.zeros(row.length, row.length)
24
+ end
25
+ row.each_with_index do |val, i|
26
+ matrix[counter, i] = val
27
+ end
28
+ counter += 1
29
+ end
30
+ return matrix
31
+ end
32
+
33
+ def load_pair_file(source, byte_format = :float32)
34
+ connections = {}
35
+ source.each do |line|
36
+ node_a, node_b, weight = line.chomp.split("\t")
37
+ weight.nil? ? weight = 1.0 : weight = weight.to_f
38
+ add_pair(node_a, node_b, weight, connections)
39
+ add_pair(node_b, node_a, weight, connections)
40
+ end
41
+ matrix, names = connections.to_wmatrix
42
+ return matrix, names
43
+ end
44
+
45
+ def add_pair(node_a, node_b, weight, connections)
46
+ query = connections[node_a]
47
+ if !query.nil?
48
+ query[node_b] = weight
49
+ else
50
+ subhash = Hash.new(0.0)
51
+ subhash[node_b] = weight
52
+ connections[node_a] = subhash
53
+ end
54
+ end
55
+
56
+ def get_stats(matrix)
57
+ stats = []
58
+ #TODO: trnasform to Numo::Array operations
59
+ primary_stats = get_primary_stats(matrix)
60
+ #stats << ['Matrix - Symmetric?', matrix.symmetric?]
61
+ stats << ['Matrix - Dimensions', matrix.shape.join('x')]
62
+ stats << ['Matrix - Elements', primary_stats[:count]]
63
+ stats << ['Matrix - Elements Non Zero', primary_stats[:countNonZero]]
64
+ stats << ['Matrix - Non Zero Density', primary_stats[:countNonZero].fdiv(primary_stats[:count])]
65
+ stats << ['Weigth - Max', primary_stats[:max]]
66
+ stats << ['Weigth - Min', primary_stats[:min]]
67
+ stats << ['Weigth - Average', primary_stats[:average]]
68
+ stats << ['Weigth - Variance', primary_stats[:variance]]
69
+ stats << ['Weigth - Standard Deviation', primary_stats[:standardDeviation]]
70
+ stats << ['Weigth - Q1', primary_stats[:q1]]
71
+ stats << ['Weigth - Median', primary_stats[:median]]
72
+ stats << ['Weigth - Q3', primary_stats[:q3]]
73
+ stats << ['Weigth - Min Non Zero', primary_stats[:minNonZero]]
74
+ stats << ['Weigth - Average Non Zero', primary_stats[:averageNonZero]]
75
+ stats << ['Weigth - Variance Non Zero', primary_stats[:varianceNonZero]]
76
+ stats << ['Weigth - Standard Deviation Non Zero', primary_stats[:standardDeviationNonZero]]
77
+ stats << ['Weigth - Q1 Non Zero', primary_stats[:q1NonZero]]
78
+ stats << ['Weigth - Median Non Zero', primary_stats[:medianNonZero]]
79
+ stats << ['Weigth - Q3 Non Zero', primary_stats[:q3NonZero]]
80
+ connections = get_connection_number(matrix)
81
+ connection_stats = get_primary_stats(connections)
82
+ stats << ['Node - Elements', connection_stats[:count]]
83
+ stats << ['Node - Elements Non Zero', connection_stats[:countNonZero]]
84
+ stats << ['Node - Non Zero Density', connection_stats[:countNonZero].fdiv(connection_stats[:count])]
85
+ stats << ['Edges - Max', connection_stats[:max]]
86
+ stats << ['Edges - Min', connection_stats[:min]]
87
+ stats << ['Edges - Average', connection_stats[:average]]
88
+ stats << ['Edges - Variance', connection_stats[:variance]]
89
+ stats << ['Edges - Standard Deviation', connection_stats[:standardDeviation]]
90
+ stats << ['Edges - Q1', connection_stats[:q1]]
91
+ stats << ['Edges - Median', connection_stats[:median]]
92
+ stats << ['Edges - Q3', connection_stats[:q3]]
93
+ stats << ['Edges - Min Non Zero', primary_stats[:minNonZero]]
94
+ stats << ['Edges - Average Non Zero', connection_stats[:averageNonZero]]
95
+ stats << ['Edges - Variance Non Zero', connection_stats[:varianceNonZero]]
96
+ stats << ['Edges - Standard Deviation Non Zero', connection_stats[:standardDeviationNonZero]]
97
+ stats << ['Edges - Q1 Non Zero', connection_stats[:q1NonZero]]
98
+ stats << ['Edges - Median Non Zero', connection_stats[:medianNonZero]]
99
+ stats << ['Edges - Q3 Non Zero', connection_stats[:q3NonZero]]
100
+ return stats
101
+ end
102
+
103
+ def get_connection_number(matrix)
104
+ rows, cols = matrix.shape
105
+ connections = Numo::DFloat.zeros(1, cols)
106
+ cols.times do |i|
107
+ column = matrix[true, i]
108
+ count = 0
109
+ column.each do |value|
110
+ count += 1 if value != 0
111
+ end
112
+ connections[0, i] = count - 1 # the connection with self is removed
113
+ end
114
+ return connections
115
+ end
116
+
117
+ def transform_keys(hash)
118
+ new_hash = {}
119
+ hash.each do |key, val|
120
+ new_key = yield(key)
121
+ new_hash[new_key] = val
122
+ end
123
+ return new_hash
124
+ end
125
+
126
+ def get_primary_stats(matrix)
127
+ stats = Hash.new(0)
128
+ max = matrix[0, 0] # Initialize max value
129
+ min = matrix[0, 0] # Initialize min value
130
+ min_non_zero = matrix[0, 0] # Initialize min value
131
+ matrix.each do |value|
132
+ stats[:count] += 1
133
+ stats[:countNonZero] += 1 if value != 0
134
+ stats[:sum] += value
135
+ max = value if value > max
136
+ min = value if value < min
137
+ min_non_zero = value if value != 0 && value < min
138
+ end
139
+ stats[:max] = max
140
+ stats[:min] = min
141
+ stats[:minNonZero] = min_non_zero
142
+ values = matrix.to_a
143
+ values.flatten! if values.first.class == Array
144
+ values.sort!
145
+ quartile_stats = get_quartiles(values, stats[:count])
146
+ stats.merge!(transform_keys(quartile_stats){|k| k.to_sym})
147
+ values.select!{|v| v != 0}
148
+ quartile_stats_non_zero = get_quartiles(values, stats[:countNonZero])
149
+ stats.merge!(transform_keys(quartile_stats_non_zero){|k| (k + 'NonZero').to_sym})
150
+ get_composed_stats(stats, matrix)
151
+ return stats
152
+ end
153
+
154
+ def get_quartiles(values, n_items)
155
+ stats = {}
156
+ q1_coor = n_items * 0.25 - 1
157
+ median = n_items * 0.5 - 1
158
+ q3_coor = n_items * 0.75 - 1
159
+ if n_items % 2 == 0
160
+ stats['q1'] = (values[q1_coor.to_i] + values[q1_coor.to_i + 1]).fdiv(2)
161
+ stats['median'] = (values[median.to_i] + values[median.to_i + 1]).fdiv(2)
162
+ stats['q3'] = (values[q3_coor.to_i] + values[q3_coor.to_i + 1]).fdiv(2)
163
+ else
164
+ stats['q1'] = values[q1_coor.ceil]
165
+ stats['median'] = values[median.ceil]
166
+ stats['q3'] = values[q3_coor.ceil]
167
+ end
168
+ return stats
169
+ end
170
+
171
+ def get_composed_stats(stats, matrix)
172
+ average = stats[:sum].fdiv(stats[:count])
173
+ average_non_zero = stats[:sum].fdiv(stats[:countNonZero])
174
+ stats[:average] = average
175
+ stats[:averageNonZero] = average_non_zero
176
+ matrix.each do |value|
177
+ stats[:sumDevs] = (value - average) ** 2
178
+ stats[:sumDevsNonZero] = (value - average_non_zero) ** 2 if value != 0
179
+ end
180
+ stats[:variance] = stats[:sumDevs].fdiv(stats[:count])
181
+ stats[:varianceNonZero] = stats[:sumDevsNonZero].fdiv(stats[:countNonZero])
182
+ stats[:standardDeviation] = stats[:variance] ** 0.5
183
+ stats[:standardDeviationNonZero] = stats[:varianceNonZero] ** 0.5
184
+ end
185
+
186
+
187
+ #############################################################################
188
+ ## OPTPARSE
189
+ ##############################################################################
190
+ options = {}
191
+
192
+ optparse = OptionParser.new do |opts|
193
+ options[:input_file] = nil
194
+ opts.on( '-i', '--input_file PATH', 'Input file' ) do |opt|
195
+ options[:input_file] = opt
196
+ end
197
+
198
+ options[:output_matrix_file] = nil
199
+ opts.on( '-o', '--output_matrix_file PATH', 'Output matrix file' ) do |opt|
200
+ options[:output_matrix_file] = opt
201
+ end
202
+
203
+ options[:byte_format] = :float64
204
+ opts.on( '-b', '--byte_format STRING', 'Format of the numeric values stored in matrix. Default: float64, warning set this to less precission can modify computation results using this matrix.' ) do |opt|
205
+ options[:byte_format] = opt.to_sym
206
+ end
207
+
208
+ options[:input_type] = 'pair'
209
+ opts.on( '-t', '--input_type STRING', 'Set input format file. "pair" or "matrix"' ) do |opt|
210
+ options[:input_type] = opt
211
+ end
212
+
213
+ options[:set_diagonal] = false
214
+ opts.on( '-d', '--set_diagonal', 'Set to 1.0 the main diagonal' ) do
215
+ options[:set_diagonal] = true
216
+ end
217
+
218
+ options[:binarize] = nil
219
+ opts.on( '-B', '--binarize FLOAT', 'Binarize matrix changin x >= thr to one and any other to zero into matrix given' ) do |opt|
220
+ options[:binarize] = opt.to_f
221
+ end
222
+
223
+ options[:stats] = false
224
+ opts.on( '-s', '--get_stats', 'Get stats from the processed matrix' ) do
225
+ options[:stats] = true
226
+ end
227
+
228
+ options[:output_type] = 'bin'
229
+ opts.on( '-O', '--output_type STRING', 'Set output format file. "bin" for binary (default) or "mat" for tabulated text file matrix' ) do |opt|
230
+ options[:output_type] = opt
231
+ end
232
+
233
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options] \n\n"
234
+
235
+ opts.on( '-h', '--help', 'Display this screen' ) do
236
+ puts opts
237
+ exit
238
+ end
239
+ end
240
+
241
+ optparse.parse!
242
+
243
+ ################################################################################
244
+ ## MAIN
245
+ ###############################################################################
246
+ if options[:input_file] == '-'
247
+ source = STDIN
248
+ else
249
+ source = File.open(options[:input_file])
250
+ end
251
+
252
+ if options[:input_type] == 'bin'
253
+ matrix = Npy.load(options[:input_file])
254
+ #matrix = Marshal.load(File.binread(options[:input_file])) # the method needs a path not a IO object
255
+ elsif options[:input_type] == 'matrix'
256
+ matrix = load_matrix_file(source)
257
+ elsif options[:input_type] == 'pair'
258
+ matrix, names = load_pair_file(source, options[:byte_format])
259
+ File.open(options[:output_matrix_file]+'.lst', 'w'){|f| f.print names.join("\n")}
260
+ end
261
+
262
+ if options[:set_diagonal]
263
+ elements = matrix.shape.last
264
+ elements.times do |n|
265
+ matrix[n, n] = 1.0
266
+ end
267
+ end
268
+
269
+ if !options[:binarize].nil?
270
+ elements = matrix.shape.last
271
+ elements.times do |i|
272
+ elements.times do |j|
273
+ matrix[i,j] = matrix[i,j] >= options[:binarize] ? 1 : 0
274
+ end
275
+ end
276
+ end
277
+
278
+ if options[:stats]
279
+ stats = get_stats(matrix)
280
+ stats.each do |stat|
281
+ puts stat.join("\t")
282
+ end
283
+ end
284
+
285
+ if options[:output_type] == 'bin'
286
+ #File.binwrite(options[:output_matrix_file], Marshal.dump(matrix))
287
+ Npy.save(options[:output_matrix_file], matrix)
288
+ elsif options[:output_type] == 'mat'
289
+ File.open(options[:output_matrix_file], 'w') do |f|
290
+ matrix.each_over_axis(0) do |r|
291
+ f.puts r.to_a.join("\t")
292
+ end
293
+ end
294
+ end