NetAnalyzer 0.1.5 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,294 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'benchmark'
4
+ require 'optparse'
5
+ #require 'nmatrix'
6
+ require 'numo/narray'
7
+ require 'numo/linalg'
8
+ require 'npy'
9
+ require 'expcalc'
10
+
11
+ #require 'pp'
12
+ #############################################################################
13
+ ## METHODS
14
+ ##############################################################################
15
+
16
+ def load_matrix_file(input_file, splitChar = "\t")
17
+ matrix = nil
18
+ counter = 0
19
+ File.open(input_file).each do |line|
20
+ line.chomp!
21
+ row = line.split(splitChar).map{|c| c.to_f }
22
+ if matrix.nil?
23
+ matrix = Numo::DFloat.zeros(row.length, row.length)
24
+ end
25
+ row.each_with_index do |val, i|
26
+ matrix[counter, i] = val
27
+ end
28
+ counter += 1
29
+ end
30
+ return matrix
31
+ end
32
+
33
+ def load_pair_file(source, byte_format = :float32)
34
+ connections = {}
35
+ source.each do |line|
36
+ node_a, node_b, weight = line.chomp.split("\t")
37
+ weight.nil? ? weight = 1.0 : weight = weight.to_f
38
+ add_pair(node_a, node_b, weight, connections)
39
+ add_pair(node_b, node_a, weight, connections)
40
+ end
41
+ matrix, names = connections.to_wmatrix
42
+ return matrix, names
43
+ end
44
+
45
+ def add_pair(node_a, node_b, weight, connections)
46
+ query = connections[node_a]
47
+ if !query.nil?
48
+ query[node_b] = weight
49
+ else
50
+ subhash = Hash.new(0.0)
51
+ subhash[node_b] = weight
52
+ connections[node_a] = subhash
53
+ end
54
+ end
55
+
56
+ def get_stats(matrix)
57
+ stats = []
58
+ #TODO: trnasform to Numo::Array operations
59
+ primary_stats = get_primary_stats(matrix)
60
+ #stats << ['Matrix - Symmetric?', matrix.symmetric?]
61
+ stats << ['Matrix - Dimensions', matrix.shape.join('x')]
62
+ stats << ['Matrix - Elements', primary_stats[:count]]
63
+ stats << ['Matrix - Elements Non Zero', primary_stats[:countNonZero]]
64
+ stats << ['Matrix - Non Zero Density', primary_stats[:countNonZero].fdiv(primary_stats[:count])]
65
+ stats << ['Weigth - Max', primary_stats[:max]]
66
+ stats << ['Weigth - Min', primary_stats[:min]]
67
+ stats << ['Weigth - Average', primary_stats[:average]]
68
+ stats << ['Weigth - Variance', primary_stats[:variance]]
69
+ stats << ['Weigth - Standard Deviation', primary_stats[:standardDeviation]]
70
+ stats << ['Weigth - Q1', primary_stats[:q1]]
71
+ stats << ['Weigth - Median', primary_stats[:median]]
72
+ stats << ['Weigth - Q3', primary_stats[:q3]]
73
+ stats << ['Weigth - Min Non Zero', primary_stats[:minNonZero]]
74
+ stats << ['Weigth - Average Non Zero', primary_stats[:averageNonZero]]
75
+ stats << ['Weigth - Variance Non Zero', primary_stats[:varianceNonZero]]
76
+ stats << ['Weigth - Standard Deviation Non Zero', primary_stats[:standardDeviationNonZero]]
77
+ stats << ['Weigth - Q1 Non Zero', primary_stats[:q1NonZero]]
78
+ stats << ['Weigth - Median Non Zero', primary_stats[:medianNonZero]]
79
+ stats << ['Weigth - Q3 Non Zero', primary_stats[:q3NonZero]]
80
+ connections = get_connection_number(matrix)
81
+ connection_stats = get_primary_stats(connections)
82
+ stats << ['Node - Elements', connection_stats[:count]]
83
+ stats << ['Node - Elements Non Zero', connection_stats[:countNonZero]]
84
+ stats << ['Node - Non Zero Density', connection_stats[:countNonZero].fdiv(connection_stats[:count])]
85
+ stats << ['Edges - Max', connection_stats[:max]]
86
+ stats << ['Edges - Min', connection_stats[:min]]
87
+ stats << ['Edges - Average', connection_stats[:average]]
88
+ stats << ['Edges - Variance', connection_stats[:variance]]
89
+ stats << ['Edges - Standard Deviation', connection_stats[:standardDeviation]]
90
+ stats << ['Edges - Q1', connection_stats[:q1]]
91
+ stats << ['Edges - Median', connection_stats[:median]]
92
+ stats << ['Edges - Q3', connection_stats[:q3]]
93
+ stats << ['Edges - Min Non Zero', primary_stats[:minNonZero]]
94
+ stats << ['Edges - Average Non Zero', connection_stats[:averageNonZero]]
95
+ stats << ['Edges - Variance Non Zero', connection_stats[:varianceNonZero]]
96
+ stats << ['Edges - Standard Deviation Non Zero', connection_stats[:standardDeviationNonZero]]
97
+ stats << ['Edges - Q1 Non Zero', connection_stats[:q1NonZero]]
98
+ stats << ['Edges - Median Non Zero', connection_stats[:medianNonZero]]
99
+ stats << ['Edges - Q3 Non Zero', connection_stats[:q3NonZero]]
100
+ return stats
101
+ end
102
+
103
+ def get_connection_number(matrix)
104
+ rows, cols = matrix.shape
105
+ connections = Numo::DFloat.zeros(1, cols)
106
+ cols.times do |i|
107
+ column = matrix[true, i]
108
+ count = 0
109
+ column.each do |value|
110
+ count += 1 if value != 0
111
+ end
112
+ connections[0, i] = count - 1 # the connection with self is removed
113
+ end
114
+ return connections
115
+ end
116
+
117
+ def transform_keys(hash)
118
+ new_hash = {}
119
+ hash.each do |key, val|
120
+ new_key = yield(key)
121
+ new_hash[new_key] = val
122
+ end
123
+ return new_hash
124
+ end
125
+
126
+ def get_primary_stats(matrix)
127
+ stats = Hash.new(0)
128
+ max = matrix[0, 0] # Initialize max value
129
+ min = matrix[0, 0] # Initialize min value
130
+ min_non_zero = matrix[0, 0] # Initialize min value
131
+ matrix.each do |value|
132
+ stats[:count] += 1
133
+ stats[:countNonZero] += 1 if value != 0
134
+ stats[:sum] += value
135
+ max = value if value > max
136
+ min = value if value < min
137
+ min_non_zero = value if value != 0 && value < min
138
+ end
139
+ stats[:max] = max
140
+ stats[:min] = min
141
+ stats[:minNonZero] = min_non_zero
142
+ values = matrix.to_a
143
+ values.flatten! if values.first.class == Array
144
+ values.sort!
145
+ quartile_stats = get_quartiles(values, stats[:count])
146
+ stats.merge!(transform_keys(quartile_stats){|k| k.to_sym})
147
+ values.select!{|v| v != 0}
148
+ quartile_stats_non_zero = get_quartiles(values, stats[:countNonZero])
149
+ stats.merge!(transform_keys(quartile_stats_non_zero){|k| (k + 'NonZero').to_sym})
150
+ get_composed_stats(stats, matrix)
151
+ return stats
152
+ end
153
+
154
+ def get_quartiles(values, n_items)
155
+ stats = {}
156
+ q1_coor = n_items * 0.25 - 1
157
+ median = n_items * 0.5 - 1
158
+ q3_coor = n_items * 0.75 - 1
159
+ if n_items % 2 == 0
160
+ stats['q1'] = (values[q1_coor.to_i] + values[q1_coor.to_i + 1]).fdiv(2)
161
+ stats['median'] = (values[median.to_i] + values[median.to_i + 1]).fdiv(2)
162
+ stats['q3'] = (values[q3_coor.to_i] + values[q3_coor.to_i + 1]).fdiv(2)
163
+ else
164
+ stats['q1'] = values[q1_coor.ceil]
165
+ stats['median'] = values[median.ceil]
166
+ stats['q3'] = values[q3_coor.ceil]
167
+ end
168
+ return stats
169
+ end
170
+
171
+ def get_composed_stats(stats, matrix)
172
+ average = stats[:sum].fdiv(stats[:count])
173
+ average_non_zero = stats[:sum].fdiv(stats[:countNonZero])
174
+ stats[:average] = average
175
+ stats[:averageNonZero] = average_non_zero
176
+ matrix.each do |value|
177
+ stats[:sumDevs] = (value - average) ** 2
178
+ stats[:sumDevsNonZero] = (value - average_non_zero) ** 2 if value != 0
179
+ end
180
+ stats[:variance] = stats[:sumDevs].fdiv(stats[:count])
181
+ stats[:varianceNonZero] = stats[:sumDevsNonZero].fdiv(stats[:countNonZero])
182
+ stats[:standardDeviation] = stats[:variance] ** 0.5
183
+ stats[:standardDeviationNonZero] = stats[:varianceNonZero] ** 0.5
184
+ end
185
+
186
+
187
+ #############################################################################
188
+ ## OPTPARSE
189
+ ##############################################################################
190
+ options = {}
191
+
192
+ optparse = OptionParser.new do |opts|
193
+ options[:input_file] = nil
194
+ opts.on( '-i', '--input_file PATH', 'Input file' ) do |opt|
195
+ options[:input_file] = opt
196
+ end
197
+
198
+ options[:output_matrix_file] = nil
199
+ opts.on( '-o', '--output_matrix_file PATH', 'Output matrix file' ) do |opt|
200
+ options[:output_matrix_file] = opt
201
+ end
202
+
203
+ options[:byte_format] = :float64
204
+ opts.on( '-b', '--byte_format STRING', 'Format of the numeric values stored in matrix. Default: float64, warning set this to less precission can modify computation results using this matrix.' ) do |opt|
205
+ options[:byte_format] = opt.to_sym
206
+ end
207
+
208
+ options[:input_type] = 'pair'
209
+ opts.on( '-t', '--input_type STRING', 'Set input format file. "pair" or "matrix"' ) do |opt|
210
+ options[:input_type] = opt
211
+ end
212
+
213
+ options[:set_diagonal] = false
214
+ opts.on( '-d', '--set_diagonal', 'Set to 1.0 the main diagonal' ) do
215
+ options[:set_diagonal] = true
216
+ end
217
+
218
+ options[:binarize] = nil
219
+ opts.on( '-B', '--binarize FLOAT', 'Binarize matrix changin x >= thr to one and any other to zero into matrix given' ) do |opt|
220
+ options[:binarize] = opt.to_f
221
+ end
222
+
223
+ options[:stats] = false
224
+ opts.on( '-s', '--get_stats', 'Get stats from the processed matrix' ) do
225
+ options[:stats] = true
226
+ end
227
+
228
+ options[:output_type] = 'bin'
229
+ opts.on( '-O', '--output_type STRING', 'Set output format file. "bin" for binary (default) or "mat" for tabulated text file matrix' ) do |opt|
230
+ options[:output_type] = opt
231
+ end
232
+
233
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options] \n\n"
234
+
235
+ opts.on( '-h', '--help', 'Display this screen' ) do
236
+ puts opts
237
+ exit
238
+ end
239
+ end
240
+
241
+ optparse.parse!
242
+
243
+ ################################################################################
244
+ ## MAIN
245
+ ###############################################################################
246
+ if options[:input_file] == '-'
247
+ source = STDIN
248
+ else
249
+ source = File.open(options[:input_file])
250
+ end
251
+
252
+ if options[:input_type] == 'bin'
253
+ matrix = Npy.load(options[:input_file])
254
+ #matrix = Marshal.load(File.binread(options[:input_file])) # the method needs a path not a IO object
255
+ elsif options[:input_type] == 'matrix'
256
+ matrix = load_matrix_file(source)
257
+ elsif options[:input_type] == 'pair'
258
+ matrix, names = load_pair_file(source, options[:byte_format])
259
+ File.open(options[:output_matrix_file]+'.lst', 'w'){|f| f.print names.join("\n")}
260
+ end
261
+
262
+ if options[:set_diagonal]
263
+ elements = matrix.shape.last
264
+ elements.times do |n|
265
+ matrix[n, n] = 1.0
266
+ end
267
+ end
268
+
269
+ if !options[:binarize].nil?
270
+ elements = matrix.shape.last
271
+ elements.times do |i|
272
+ elements.times do |j|
273
+ matrix[i,j] = matrix[i,j] >= options[:binarize] ? 1 : 0
274
+ end
275
+ end
276
+ end
277
+
278
+ if options[:stats]
279
+ stats = get_stats(matrix)
280
+ stats.each do |stat|
281
+ puts stat.join("\t")
282
+ end
283
+ end
284
+
285
+ if options[:output_type] == 'bin'
286
+ #File.binwrite(options[:output_matrix_file], Marshal.dump(matrix))
287
+ Npy.save(options[:output_matrix_file], matrix)
288
+ elsif options[:output_type] == 'mat'
289
+ File.open(options[:output_matrix_file], 'w') do |f|
290
+ matrix.each_over_axis(0) do |r|
291
+ f.puts r.to_a.join("\t")
292
+ end
293
+ end
294
+ end