geoptima 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/csv_chart +167 -17
- data/bin/csv_merge +204 -30
- data/bin/show_geoptima +128 -35
- data/examples/csv_chart.rb +167 -17
- data/examples/csv_merge.rb +204 -30
- data/examples/show_geoptima.rb +128 -35
- data/geoptima.gemspec +1 -1
- data/lib/geoptima/data.rb +40 -37
- data/lib/geoptima/version.rb +1 -1
- metadata +5 -4
data/bin/csv_chart
CHANGED
@@ -10,7 +10,7 @@ require 'geoptima/options'
|
|
10
10
|
require 'fileutils'
|
11
11
|
require 'geoptima/daterange'
|
12
12
|
|
13
|
-
Geoptima::assert_version("0.1.
|
13
|
+
Geoptima::assert_version("0.1.4")
|
14
14
|
Geoptima::Chart.available? || puts("No charting libraries available") || exit(-1)
|
15
15
|
|
16
16
|
$export_dir = '.'
|
@@ -26,9 +26,7 @@ $files = Geoptima::Options.process_args do |option|
|
|
26
26
|
option.S {$specfile = ARGV.shift}
|
27
27
|
option.P {$diversity = ARGV.shift.to_f}
|
28
28
|
option.W {$chart_width = ARGV.shift.to_i}
|
29
|
-
option.T
|
30
|
-
$time_range = Geoptima::DateRange.from ARGV.shift
|
31
|
-
end
|
29
|
+
option.T {$time_range = Geoptima::DateRange.from ARGV.shift}
|
32
30
|
end
|
33
31
|
|
34
32
|
FileUtils.mkdir_p $export_dir
|
@@ -148,8 +146,10 @@ class StatsManager
|
|
148
146
|
end
|
149
147
|
|
150
148
|
module Geoptima
|
149
|
+
|
150
|
+
# Class for original stats approach of creating a new 'column' from simple combinations of other columns
|
151
151
|
class StatSpec
|
152
|
-
attr_reader :header, :event, :index, :indices, :fields, :options, :proc, :groups
|
152
|
+
attr_reader :header, :event, :index, :indices, :fields, :options, :proc, :groups, :values
|
153
153
|
def initialize(header,*fields,&block)
|
154
154
|
@header = header
|
155
155
|
@fields = fields
|
@@ -184,13 +184,13 @@ module Geoptima
|
|
184
184
|
key = @group.call(time)
|
185
185
|
ghead = "#{header} #{key}"
|
186
186
|
@groups[key] = ghead
|
187
|
-
stats_manager.add(
|
187
|
+
stats_manager.add(map_fields(fields),ghead,nil)
|
188
188
|
end
|
189
189
|
rescue ArgumentError
|
190
190
|
puts "Error: Unable to process time field[#{time}]: #{$!}"
|
191
191
|
end
|
192
192
|
end
|
193
|
-
stats_manager.add(
|
193
|
+
stats_manager.add(map_fields(fields),header,index)
|
194
194
|
end
|
195
195
|
def div
|
196
196
|
unless @div
|
@@ -224,18 +224,30 @@ module Geoptima
|
|
224
224
|
val
|
225
225
|
end
|
226
226
|
end
|
227
|
-
def
|
227
|
+
def prepare_values(values)
|
228
|
+
@values = []
|
228
229
|
if @indices
|
229
230
|
puts "StatSpec[#{self}]: #{options.inspect}" if($debug)
|
230
|
-
|
231
|
-
puts "\tVALUES: #{
|
232
|
-
|
231
|
+
@values = @indices.map{|i| values[i]}
|
232
|
+
puts "\tVALUES: #{values.inspect}" if($debug)
|
233
|
+
end
|
234
|
+
@values
|
235
|
+
end
|
236
|
+
def vals_for(values,filter={})
|
237
|
+
if @indices
|
238
|
+
prepare_values(values)
|
239
|
+
(options[:filter] || filter).each do |field,expected|
|
233
240
|
puts "\t\tChecking if field #{field} is #{expected}" if($debug)
|
234
241
|
puts "\t\tLooking for #{field} or #{event}.#{field} in #{@fields.inspect}" if($debug)
|
235
242
|
hi = @fields.index(field.to_s) || @fields.index("#{event}.#{field}")
|
236
|
-
puts "\t\t#{field} -> #{hi} -> #{hi &&
|
237
|
-
return nil unless(hi &&
|
243
|
+
puts "\t\t#{field} -> #{hi} -> #{hi && values[hi]}" if($debug)
|
244
|
+
return nil unless(hi && values[hi] && (expected === values[hi].downcase || values[hi].downcase === expected.to_s.downcase))
|
238
245
|
end
|
246
|
+
values
|
247
|
+
end
|
248
|
+
end
|
249
|
+
def map_fields(values,filter={})
|
250
|
+
if vals = vals_for(values,filter)
|
239
251
|
val = proc.nil? ? vals[0] : proc.call(*vals)
|
240
252
|
puts "\tBLOCK MAP: #{vals.inspect} --> #{val.inspect}" if($debug)
|
241
253
|
if options[:div]
|
@@ -266,6 +278,128 @@ module Geoptima
|
|
266
278
|
"#{header}[#{index}]<-#{fields.inspect}(#{indices && indices.join(',')})"
|
267
279
|
end
|
268
280
|
end
|
281
|
+
|
282
|
+
class Group
|
283
|
+
attr_reader :name, :options, :proc, :is_time, :index
|
284
|
+
def initialize(name,options={},&block)
|
285
|
+
@name = name
|
286
|
+
@options = options
|
287
|
+
@proc = block
|
288
|
+
@is_time = options[:is_time]
|
289
|
+
end
|
290
|
+
def index= (ind)
|
291
|
+
puts "Set group header index=#{ind} for group '#{name}'"
|
292
|
+
@index = ind
|
293
|
+
end
|
294
|
+
def call(time,values)
|
295
|
+
is_time && @proc.call(time) || @proc.call(values[index])
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
# The KPI class allows for complex statistics called 'Key Performance Indicators'.
|
300
|
+
# These are specified using four functions:
|
301
|
+
# filter: how to choose rows to include in the statistics (default is '!map.nil?')
|
302
|
+
# map: how to convert a row into the internal stats (default is input columns)
|
303
|
+
# aggregate: how to aggregate internal stats to higher levels (eg. daily, default is count)
|
304
|
+
# reduce: how to extract presentable stats from internal stats (eg. avg=total/count, default is internal stats)
|
305
|
+
#
|
306
|
+
# The KPI is defined with a name and set of columns to use, followed by the block
|
307
|
+
# defining the four functions above. For example:
|
308
|
+
#
|
309
|
+
# kpi 'DNS Success', 'dnsLookup.address', 'dnsLookup.error', 'dnsLookup.interface' do |f|
|
310
|
+
# f.filter {|addr,err,int| addr =~/\w/}
|
311
|
+
# f.map {|addr,err,int| err.length==0 ? [1,1] : [1,0]}
|
312
|
+
# f.aggregate {|a,v| a[0]+=v[0];a[1]+=v[1];a}
|
313
|
+
# f.reduce {|a| 100.0*a[1].to_f/a[0].to_f}
|
314
|
+
# end
|
315
|
+
#
|
316
|
+
# Currently this class extends StatSpec for access to the prepare_indices method.
|
317
|
+
# We should consider moving that to a mixin, or depreciating the StatSpec class
|
318
|
+
# entirely since KPISpec should provide a superset of features.
|
319
|
+
class KPISpec < StatSpec
|
320
|
+
def initialize(header,*fields,&block)
|
321
|
+
@header = header
|
322
|
+
@fields = fields
|
323
|
+
@event = @fields[0].split(/\./)[0]
|
324
|
+
block.call self unless(block.nil?)
|
325
|
+
if @fields[-1].is_a?(Hash)
|
326
|
+
@options = @fields.pop
|
327
|
+
else
|
328
|
+
@options = {}
|
329
|
+
end
|
330
|
+
@group_procs = []
|
331
|
+
@groups = {}
|
332
|
+
if @options[:group]
|
333
|
+
[@options[:group]].flatten.compact.sort.uniq.each do |group_name|
|
334
|
+
gname = group_name.to_s.intern
|
335
|
+
case gname
|
336
|
+
when :months
|
337
|
+
group_by(gname,true) {|t| t.strftime("%Y-%m")}
|
338
|
+
when :weeks
|
339
|
+
group_by(gname,true) {|t| t.strftime("%Y w%W")}
|
340
|
+
when :days
|
341
|
+
group_by(gname,true) {|t| t.strftime("%Y-%m-%d")}
|
342
|
+
when :hours
|
343
|
+
group_by(gname,true) {|t| t.strftime("%Y-%m-%d %H")}
|
344
|
+
else
|
345
|
+
group_by(gname) {|f| f}
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
puts "Created StatSpec: #{self}"
|
350
|
+
end
|
351
|
+
def group_by(field,is_time=false,&block)
|
352
|
+
@group_procs = Group.new(field,:is_time => is_time,&block)
|
353
|
+
end
|
354
|
+
def filter(&block)
|
355
|
+
@filter_proc = block
|
356
|
+
end
|
357
|
+
def map(&block)
|
358
|
+
@map_proc = block
|
359
|
+
end
|
360
|
+
def aggregate(&block)
|
361
|
+
@aggregate_proc = block
|
362
|
+
end
|
363
|
+
def reduce(&block)
|
364
|
+
@reduce_proc = block
|
365
|
+
end
|
366
|
+
def add(stats_manager,values)
|
367
|
+
prepare_values(values)
|
368
|
+
if @group_procs.length > 0
|
369
|
+
begin
|
370
|
+
time = DateTime.parse(values[stats_manager.time_index])
|
371
|
+
if $time_range.nil? || $time_range.include?(time)
|
372
|
+
key = @group_procs.inject(header) do |ghead,group|
|
373
|
+
key = @group.call(time,values)
|
374
|
+
ghead += " #{key}"
|
375
|
+
end
|
376
|
+
@groups[key] = ghead
|
377
|
+
stats_manager.add(map_fields(fields),ghead,nil)
|
378
|
+
end
|
379
|
+
rescue ArgumentError
|
380
|
+
puts "Error: Unable to process time field[#{time}]: #{$!}"
|
381
|
+
end
|
382
|
+
end
|
383
|
+
stats_manager.add(map_fields(fields),header,index)
|
384
|
+
end
|
385
|
+
def map_fields(values,filter=nil)
|
386
|
+
if values
|
387
|
+
if @filter_proc.nil? || @filter_proc.call(*values)
|
388
|
+
val = @map_proc && @map_proc.call(*values) || values[0]
|
389
|
+
puts "\tBLOCK MAP: #{values.inspect} --> #{values.inspect}" if($debug)
|
390
|
+
end
|
391
|
+
val
|
392
|
+
end
|
393
|
+
end
|
394
|
+
def prepare_indices(stats_manager,headers)
|
395
|
+
super(stats_manager,headers)
|
396
|
+
@group_procs.each do |g|
|
397
|
+
g.index = fields.index(g.name)
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
# Class for specifications of individual charts
|
269
403
|
class ChartSpec
|
270
404
|
attr_reader :chart_type, :header, :options
|
271
405
|
def initialize(header,options={})
|
@@ -328,14 +462,15 @@ module Geoptima
|
|
328
462
|
g.write("#{$export_dir}/Chart_#{stats_manager.name}_#{header}_#{chart_type}_distribution.png")
|
329
463
|
end
|
330
464
|
def to_s
|
331
|
-
"#{chart_type.upcase}-#{header}"
|
465
|
+
"#{chart_type.to_s.upcase}-#{header}"
|
332
466
|
end
|
333
467
|
end
|
334
468
|
class StatsSpecs
|
335
|
-
attr_reader :chart_specs, :stat_specs
|
469
|
+
attr_reader :chart_specs, :stat_specs, :kpi_specs
|
336
470
|
def initialize(specfile)
|
337
471
|
@chart_specs = []
|
338
472
|
@stat_specs = []
|
473
|
+
@kpi_specs = []
|
339
474
|
instance_eval(File.open(specfile).read)
|
340
475
|
end
|
341
476
|
def category_chart(header,options={})
|
@@ -353,10 +488,16 @@ module Geoptima
|
|
353
488
|
def stats(header,*fields,&block)
|
354
489
|
@stat_specs << StatSpec.new(header,*fields,&block)
|
355
490
|
end
|
491
|
+
def kpi(header,*fields,&block)
|
492
|
+
@kpi_specs << KPISpec.new(header,*fields,&block)
|
493
|
+
end
|
356
494
|
def add_stats(stats_manager,headers)
|
357
495
|
stat_specs.each do |stat_spec|
|
358
496
|
stat_spec.prepare_indices(stats_manager,headers)
|
359
497
|
end
|
498
|
+
kpi_specs.each do |kpi_spec|
|
499
|
+
kpi_spec.prepare_indices(stats_manager,headers)
|
500
|
+
end
|
360
501
|
end
|
361
502
|
def add_fields(stats_manager,fields)
|
362
503
|
puts "Adding fields to #{stat_specs.length} StatSpec's" if($debug)
|
@@ -364,9 +505,14 @@ module Geoptima
|
|
364
505
|
puts "Adding fields to StatSpec: #{stat_spec}" if($debug)
|
365
506
|
stat_spec.add(stats_manager,fields)
|
366
507
|
end
|
508
|
+
puts "Adding fields to #{kpi_specs.length} KPISpec's" if($debug)
|
509
|
+
kpi_specs.each do |kpi_spec|
|
510
|
+
puts "Adding fields to KPISpec: #{kpi_spec}" if($debug)
|
511
|
+
kpi_spec.add(stats_manager,fields)
|
512
|
+
end
|
367
513
|
end
|
368
514
|
def to_s
|
369
|
-
"Stats[#{@stat_specs.join(', ')}] AND Charts[#{@chart_specs.join(', ')}]"
|
515
|
+
"Stats[#{@stat_specs.join(', ')}] AND KPIs[#{@kpi_specs.join(', ')}] AND Charts[#{@chart_specs.join(', ')}]"
|
370
516
|
end
|
371
517
|
end
|
372
518
|
end
|
@@ -468,7 +614,11 @@ end
|
|
468
614
|
$stats_managers.each do |name,stats_manager|
|
469
615
|
if $specs
|
470
616
|
$specs.chart_specs.each do |chart_spec|
|
471
|
-
|
617
|
+
begin
|
618
|
+
chart_spec.process(stats_manager)
|
619
|
+
rescue NoMethodError
|
620
|
+
puts "Failed to process chart '#{chart_spec}': #{$!}"
|
621
|
+
end
|
472
622
|
end
|
473
623
|
end
|
474
624
|
if $create_all
|
data/bin/csv_merge
CHANGED
@@ -9,20 +9,24 @@ require 'geoptima/options'
|
|
9
9
|
require 'fileutils'
|
10
10
|
require 'geoptima/daterange'
|
11
11
|
|
12
|
-
Geoptima::assert_version("0.1.
|
12
|
+
Geoptima::assert_version("0.1.4")
|
13
13
|
|
14
14
|
$export_dir = '.'
|
15
15
|
$export_name = 'merged.csv'
|
16
|
+
$split_by = :days
|
16
17
|
|
17
18
|
$files = Geoptima::Options.process_args do |option|
|
18
19
|
option.t {$time_split = true}
|
20
|
+
option.m {$low_memory = true}
|
19
21
|
option.D {$export_dir = ARGV.shift}
|
20
22
|
option.N {$export_name = ARGV.shift}
|
21
|
-
option.
|
22
|
-
$
|
23
|
-
|
24
|
-
|
23
|
+
option.S do
|
24
|
+
$split_by = case ARGV.shift.downcase.intern
|
25
|
+
when :days ; :days
|
26
|
+
else :days
|
27
|
+
end
|
25
28
|
end
|
29
|
+
option.T {$time_range = Geoptima::DateRange.from ARGV.shift}
|
26
30
|
end
|
27
31
|
|
28
32
|
FileUtils.mkdir_p $export_dir
|
@@ -30,46 +34,216 @@ FileUtils.mkdir_p $export_dir
|
|
30
34
|
$help = true unless($files.length>0)
|
31
35
|
if $help
|
32
36
|
puts <<EOHELP
|
33
|
-
Usage:
|
37
|
+
Usage: csv_merge <-dhtm> <-N name> <-D dir> <-T range> <-S split_by> files...
|
34
38
|
-d debug mode #{cw $debug}
|
35
39
|
-h print this help #{cw $help}
|
36
|
-
-t merge and split by time (
|
37
|
-
-
|
40
|
+
-t merge and split by time (#{$split_by}) #{cw $time_split}
|
41
|
+
-m use low memory, temporarily storing to intermediate files #{cw $low_memory}
|
42
|
+
-N use specified name for merged dataset: #{$export_name}
|
38
43
|
-D export to specified directory: #{$export_dir}
|
44
|
+
-S time units to split exports by: #{$split_by}
|
39
45
|
-T set time-range filter: #{$time_range}
|
40
46
|
Files to import: #{$files.join(', ')}
|
41
47
|
EOHELP
|
42
48
|
exit
|
43
49
|
end
|
44
50
|
|
45
|
-
class
|
46
|
-
attr_reader :
|
47
|
-
def initialize(
|
51
|
+
class CSVRecord
|
52
|
+
attr_reader :time, :fields, :day
|
53
|
+
def initialize(fields,time_index=0)
|
54
|
+
@fields = fields
|
55
|
+
@time = DateTime.parse(fields[time_index])
|
56
|
+
@day = @time.strftime("%Y-%m-%d")
|
57
|
+
end
|
58
|
+
def [](index)
|
59
|
+
fields[index]
|
60
|
+
end
|
61
|
+
def <=>(other)
|
62
|
+
time <=> other
|
63
|
+
end
|
64
|
+
def within(time_range)
|
65
|
+
time_range.nil? || time_range.include?(time)
|
48
66
|
end
|
49
67
|
end
|
50
68
|
|
51
|
-
|
52
|
-
lines
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
69
|
+
class CSVDataset
|
70
|
+
attr_reader :filename, :headers, :day_map, :lines, :count, :record_creation_duration
|
71
|
+
def initialize(filename)
|
72
|
+
@filename = filename
|
73
|
+
@lines = []
|
74
|
+
@day_map = {}
|
75
|
+
@record_creation_duration = 0
|
76
|
+
@count = 0
|
77
|
+
@headers = nil
|
78
|
+
read_file do |fields|
|
79
|
+
add fields
|
80
|
+
end
|
81
|
+
end
|
82
|
+
def read_file
|
83
|
+
lines = 0
|
84
|
+
File.open(filename).each do |line|
|
85
|
+
fields=line.chomp.split(/\t/)
|
86
|
+
if lines > 0
|
87
|
+
puts "Processing line: #{line}" if($debug)
|
88
|
+
yield fields
|
89
|
+
else
|
90
|
+
if fields.length<2
|
91
|
+
puts "Too few headers, rejecting #{file}"
|
92
|
+
break
|
93
|
+
end
|
94
|
+
@headers ||= fields
|
95
|
+
end
|
96
|
+
lines += 1
|
97
|
+
end
|
98
|
+
@export_headers ||= @headers
|
99
|
+
end
|
100
|
+
def add(fields)
|
101
|
+
start_time = Time.new
|
102
|
+
line = create_line(fields)
|
103
|
+
if line
|
104
|
+
@day_map[line.day] ||= 0
|
105
|
+
@day_map[line.day] += 1
|
106
|
+
@lines << line unless($low_memory)
|
107
|
+
@count += 1
|
108
|
+
@record_creation_duration += Time.new - start_time
|
109
|
+
end
|
110
|
+
line
|
111
|
+
end
|
112
|
+
def create_line(fields)
|
113
|
+
begin
|
114
|
+
line = CSVRecord.new(fields,0)
|
115
|
+
if(line.within($time_range))
|
116
|
+
line
|
117
|
+
else
|
118
|
+
nil
|
119
|
+
end
|
120
|
+
rescue ArgumentError
|
121
|
+
puts "Failed to parse line with timestamp='#{fields[0]}': #{$!}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
def header_map(eh=nil)
|
125
|
+
if eh
|
126
|
+
@export_headers = eh
|
127
|
+
@header_map = nil
|
128
|
+
end
|
129
|
+
unless @header_map
|
130
|
+
@header_map = []
|
131
|
+
(@export_headers || @headers).each do |head|
|
132
|
+
@header_map << @headers.index(head)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
@header_map
|
136
|
+
end
|
137
|
+
def map_line(line)
|
138
|
+
@header_map.map do |index|
|
139
|
+
index && line[index]
|
140
|
+
end
|
141
|
+
end
|
142
|
+
def days
|
143
|
+
@day_map.keys.sort
|
144
|
+
end
|
145
|
+
def each(eh=nil)
|
146
|
+
header_map(eh)
|
147
|
+
if $low_memory
|
148
|
+
read_file do |fields|
|
149
|
+
line = create_line fields
|
150
|
+
yield line.day,map_line(line)
|
151
|
+
end
|
65
152
|
else
|
66
|
-
|
67
|
-
|
68
|
-
puts "Too few headers, rejecting #{file}"
|
69
|
-
break
|
153
|
+
(@lines || []).each do |line|
|
154
|
+
yield line.day,map_line(line)
|
70
155
|
end
|
71
|
-
$stats_managers[name].set_headers(headers)
|
72
156
|
end
|
73
157
|
end
|
158
|
+
def <=>(other)
|
159
|
+
self.filename <=> other.filename
|
160
|
+
end
|
161
|
+
def length
|
162
|
+
count
|
163
|
+
end
|
74
164
|
end
|
75
165
|
|
166
|
+
class CSVDatasets
|
167
|
+
attr_reader :datasets
|
168
|
+
def initialize
|
169
|
+
@datasets = []
|
170
|
+
end
|
171
|
+
def add_file(file)
|
172
|
+
lines = 0
|
173
|
+
dataset = nil
|
174
|
+
filename = File.basename(file)
|
175
|
+
(names = filename.split(/[_\.]/)).pop
|
176
|
+
name = names.join('_')
|
177
|
+
puts "About to read file #{file}"
|
178
|
+
dataset = CSVDataset.new(file)
|
179
|
+
@datasets << dataset if(dataset && dataset.length>0)
|
180
|
+
dataset
|
181
|
+
end
|
182
|
+
def export_days
|
183
|
+
headers = @datasets.map{|d| d.headers}.flatten.uniq
|
184
|
+
days = @datasets.map{|d| d.days}.flatten.sort.uniq
|
185
|
+
day_files = {}
|
186
|
+
day_names = {}
|
187
|
+
count = {}
|
188
|
+
duration = {}
|
189
|
+
days.each do |day|
|
190
|
+
filename = "#{$export_dir}/#{$export_name.gsub(/\.csv$/,'')}_#{day}.csv"
|
191
|
+
puts "Exporting #{filename} for #{day}"
|
192
|
+
day_names[day] = filename
|
193
|
+
day_files[day] = File.open(filename,'w')
|
194
|
+
day_files[day].puts headers.join("\t")
|
195
|
+
count[day] = 0
|
196
|
+
duration[day] = 0
|
197
|
+
end
|
198
|
+
@datasets.sort.each do |dataset|
|
199
|
+
dataset.each(headers) do |day,line|
|
200
|
+
start_time = Time.new
|
201
|
+
day_files[day].puts line.join("\t")
|
202
|
+
duration[day] += Time.new - start_time
|
203
|
+
count[day] += 1
|
204
|
+
end
|
205
|
+
end
|
206
|
+
day_files.each do |day,out|
|
207
|
+
out.close
|
208
|
+
puts "\tExported #{count[day]} records to #{day_names[day]} in #{duration[day]} seconds"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
def export_merged
|
212
|
+
headers = @datasets.map{|d| d.headers}.flatten.sort.uniq
|
213
|
+
filename = "#{$export_dir}/#{$export_name}"
|
214
|
+
File.open(filename,'w') do |out|
|
215
|
+
out.puts headers.join("\t")
|
216
|
+
@datasets.sort.each(headers) do |dataset|
|
217
|
+
dataset.each do |day,line|
|
218
|
+
out.puts line.join("\t")
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
$datasets = CSVDatasets.new
|
226
|
+
|
227
|
+
$files.each do |file|
|
228
|
+
start_time = Time.new
|
229
|
+
ds = $datasets.add_file(file)
|
230
|
+
duration = Time.new - start_time
|
231
|
+
puts "\tLoaded #{file} in #{duration} seconds"
|
232
|
+
puts "\t#{(100.0 * ds.record_creation_duration.to_f/duration.to_f).to_i}% = #{ds.record_creation_duration}/#{duration} was spent creating records"
|
233
|
+
puts "\tFile contained #{ds.length} events for #{ds.days.length} days:"
|
234
|
+
ds.days.each do |day|
|
235
|
+
puts "\t\t#{day}: \t#{(100.0 * ds.day_map[day].to_f/ds.length.to_f).to_i}%\t#{ds.day_map[day]} records"
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
start_time = Time.new
|
240
|
+
|
241
|
+
if $time_split
|
242
|
+
$datasets.export_days
|
243
|
+
else
|
244
|
+
$datasets.export_merged
|
245
|
+
end
|
246
|
+
|
247
|
+
duration = Time.new - start_time
|
248
|
+
puts "Exported in #{duration} seconds"
|
249
|
+
|