geoptima 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/csv_chart +167 -17
- data/bin/csv_merge +204 -30
- data/bin/show_geoptima +128 -35
- data/examples/csv_chart.rb +167 -17
- data/examples/csv_merge.rb +204 -30
- data/examples/show_geoptima.rb +128 -35
- data/geoptima.gemspec +1 -1
- data/lib/geoptima/data.rb +40 -37
- data/lib/geoptima/version.rb +1 -1
- metadata +5 -4
data/bin/csv_chart
CHANGED
@@ -10,7 +10,7 @@ require 'geoptima/options'
|
|
10
10
|
require 'fileutils'
|
11
11
|
require 'geoptima/daterange'
|
12
12
|
|
13
|
-
Geoptima::assert_version("0.1.
|
13
|
+
Geoptima::assert_version("0.1.4")
|
14
14
|
Geoptima::Chart.available? || puts("No charting libraries available") || exit(-1)
|
15
15
|
|
16
16
|
$export_dir = '.'
|
@@ -26,9 +26,7 @@ $files = Geoptima::Options.process_args do |option|
|
|
26
26
|
option.S {$specfile = ARGV.shift}
|
27
27
|
option.P {$diversity = ARGV.shift.to_f}
|
28
28
|
option.W {$chart_width = ARGV.shift.to_i}
|
29
|
-
option.T
|
30
|
-
$time_range = Geoptima::DateRange.from ARGV.shift
|
31
|
-
end
|
29
|
+
option.T {$time_range = Geoptima::DateRange.from ARGV.shift}
|
32
30
|
end
|
33
31
|
|
34
32
|
FileUtils.mkdir_p $export_dir
|
@@ -148,8 +146,10 @@ class StatsManager
|
|
148
146
|
end
|
149
147
|
|
150
148
|
module Geoptima
|
149
|
+
|
150
|
+
# Class for original stats approach of creating a new 'column' from simple combinations of other columns
|
151
151
|
class StatSpec
|
152
|
-
attr_reader :header, :event, :index, :indices, :fields, :options, :proc, :groups
|
152
|
+
attr_reader :header, :event, :index, :indices, :fields, :options, :proc, :groups, :values
|
153
153
|
def initialize(header,*fields,&block)
|
154
154
|
@header = header
|
155
155
|
@fields = fields
|
@@ -184,13 +184,13 @@ module Geoptima
|
|
184
184
|
key = @group.call(time)
|
185
185
|
ghead = "#{header} #{key}"
|
186
186
|
@groups[key] = ghead
|
187
|
-
stats_manager.add(
|
187
|
+
stats_manager.add(map_fields(fields),ghead,nil)
|
188
188
|
end
|
189
189
|
rescue ArgumentError
|
190
190
|
puts "Error: Unable to process time field[#{time}]: #{$!}"
|
191
191
|
end
|
192
192
|
end
|
193
|
-
stats_manager.add(
|
193
|
+
stats_manager.add(map_fields(fields),header,index)
|
194
194
|
end
|
195
195
|
def div
|
196
196
|
unless @div
|
@@ -224,18 +224,30 @@ module Geoptima
|
|
224
224
|
val
|
225
225
|
end
|
226
226
|
end
|
227
|
-
def
|
227
|
+
def prepare_values(values)
|
228
|
+
@values = []
|
228
229
|
if @indices
|
229
230
|
puts "StatSpec[#{self}]: #{options.inspect}" if($debug)
|
230
|
-
|
231
|
-
puts "\tVALUES: #{
|
232
|
-
|
231
|
+
@values = @indices.map{|i| values[i]}
|
232
|
+
puts "\tVALUES: #{values.inspect}" if($debug)
|
233
|
+
end
|
234
|
+
@values
|
235
|
+
end
|
236
|
+
def vals_for(values,filter={})
|
237
|
+
if @indices
|
238
|
+
prepare_values(values)
|
239
|
+
(options[:filter] || filter).each do |field,expected|
|
233
240
|
puts "\t\tChecking if field #{field} is #{expected}" if($debug)
|
234
241
|
puts "\t\tLooking for #{field} or #{event}.#{field} in #{@fields.inspect}" if($debug)
|
235
242
|
hi = @fields.index(field.to_s) || @fields.index("#{event}.#{field}")
|
236
|
-
puts "\t\t#{field} -> #{hi} -> #{hi &&
|
237
|
-
return nil unless(hi &&
|
243
|
+
puts "\t\t#{field} -> #{hi} -> #{hi && values[hi]}" if($debug)
|
244
|
+
return nil unless(hi && values[hi] && (expected === values[hi].downcase || values[hi].downcase === expected.to_s.downcase))
|
238
245
|
end
|
246
|
+
values
|
247
|
+
end
|
248
|
+
end
|
249
|
+
def map_fields(values,filter={})
|
250
|
+
if vals = vals_for(values,filter)
|
239
251
|
val = proc.nil? ? vals[0] : proc.call(*vals)
|
240
252
|
puts "\tBLOCK MAP: #{vals.inspect} --> #{val.inspect}" if($debug)
|
241
253
|
if options[:div]
|
@@ -266,6 +278,128 @@ module Geoptima
|
|
266
278
|
"#{header}[#{index}]<-#{fields.inspect}(#{indices && indices.join(',')})"
|
267
279
|
end
|
268
280
|
end
|
281
|
+
|
282
|
+
class Group
|
283
|
+
attr_reader :name, :options, :proc, :is_time, :index
|
284
|
+
def initialize(name,options={},&block)
|
285
|
+
@name = name
|
286
|
+
@options = options
|
287
|
+
@proc = block
|
288
|
+
@is_time = options[:is_time]
|
289
|
+
end
|
290
|
+
def index= (ind)
|
291
|
+
puts "Set group header index=#{ind} for group '#{name}'"
|
292
|
+
@index = ind
|
293
|
+
end
|
294
|
+
def call(time,values)
|
295
|
+
is_time && @proc.call(time) || @proc.call(values[index])
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
# The KPI class allows for complex statistics called 'Key Performance Indicators'.
|
300
|
+
# These are specified using four functions:
|
301
|
+
# filter: how to choose rows to include in the statistics (default is '!map.nil?')
|
302
|
+
# map: how to convert a row into the internal stats (default is input columns)
|
303
|
+
# aggregate: how to aggregate internal stats to higher levels (eg. daily, default is count)
|
304
|
+
# reduce: how to extract presentable stats from internal stats (eg. avg=total/count, default is internal stats)
|
305
|
+
#
|
306
|
+
# The KPI is defined with a name and set of columns to use, followed by the block
|
307
|
+
# defining the four functions above. For example:
|
308
|
+
#
|
309
|
+
# kpi 'DNS Success', 'dnsLookup.address', 'dnsLookup.error', 'dnsLookup.interface' do |f|
|
310
|
+
# f.filter {|addr,err,int| addr =~/\w/}
|
311
|
+
# f.map {|addr,err,int| err.length==0 ? [1,1] : [1,0]}
|
312
|
+
# f.aggregate {|a,v| a[0]+=v[0];a[1]+=v[1];a}
|
313
|
+
# f.reduce {|a| 100.0*a[1].to_f/a[0].to_f}
|
314
|
+
# end
|
315
|
+
#
|
316
|
+
# Currently this class extends StatSpec for access to the prepare_indices method.
|
317
|
+
# We should consider moving that to a mixin, or depreciating the StatSpec class
|
318
|
+
# entirely since KPISpec should provide a superset of features.
|
319
|
+
class KPISpec < StatSpec
|
320
|
+
def initialize(header,*fields,&block)
|
321
|
+
@header = header
|
322
|
+
@fields = fields
|
323
|
+
@event = @fields[0].split(/\./)[0]
|
324
|
+
block.call self unless(block.nil?)
|
325
|
+
if @fields[-1].is_a?(Hash)
|
326
|
+
@options = @fields.pop
|
327
|
+
else
|
328
|
+
@options = {}
|
329
|
+
end
|
330
|
+
@group_procs = []
|
331
|
+
@groups = {}
|
332
|
+
if @options[:group]
|
333
|
+
[@options[:group]].flatten.compact.sort.uniq.each do |group_name|
|
334
|
+
gname = group_name.to_s.intern
|
335
|
+
case gname
|
336
|
+
when :months
|
337
|
+
group_by(gname,true) {|t| t.strftime("%Y-%m")}
|
338
|
+
when :weeks
|
339
|
+
group_by(gname,true) {|t| t.strftime("%Y w%W")}
|
340
|
+
when :days
|
341
|
+
group_by(gname,true) {|t| t.strftime("%Y-%m-%d")}
|
342
|
+
when :hours
|
343
|
+
group_by(gname,true) {|t| t.strftime("%Y-%m-%d %H")}
|
344
|
+
else
|
345
|
+
group_by(gname) {|f| f}
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
puts "Created StatSpec: #{self}"
|
350
|
+
end
|
351
|
+
def group_by(field,is_time=false,&block)
|
352
|
+
@group_procs = Group.new(field,:is_time => is_time,&block)
|
353
|
+
end
|
354
|
+
def filter(&block)
|
355
|
+
@filter_proc = block
|
356
|
+
end
|
357
|
+
def map(&block)
|
358
|
+
@map_proc = block
|
359
|
+
end
|
360
|
+
def aggregate(&block)
|
361
|
+
@aggregate_proc = block
|
362
|
+
end
|
363
|
+
def reduce(&block)
|
364
|
+
@reduce_proc = block
|
365
|
+
end
|
366
|
+
def add(stats_manager,values)
|
367
|
+
prepare_values(values)
|
368
|
+
if @group_procs.length > 0
|
369
|
+
begin
|
370
|
+
time = DateTime.parse(values[stats_manager.time_index])
|
371
|
+
if $time_range.nil? || $time_range.include?(time)
|
372
|
+
key = @group_procs.inject(header) do |ghead,group|
|
373
|
+
key = @group.call(time,values)
|
374
|
+
ghead += " #{key}"
|
375
|
+
end
|
376
|
+
@groups[key] = ghead
|
377
|
+
stats_manager.add(map_fields(fields),ghead,nil)
|
378
|
+
end
|
379
|
+
rescue ArgumentError
|
380
|
+
puts "Error: Unable to process time field[#{time}]: #{$!}"
|
381
|
+
end
|
382
|
+
end
|
383
|
+
stats_manager.add(map_fields(fields),header,index)
|
384
|
+
end
|
385
|
+
def map_fields(values,filter=nil)
|
386
|
+
if values
|
387
|
+
if @filter_proc.nil? || @filter_proc.call(*values)
|
388
|
+
val = @map_proc && @map_proc.call(*values) || values[0]
|
389
|
+
puts "\tBLOCK MAP: #{values.inspect} --> #{values.inspect}" if($debug)
|
390
|
+
end
|
391
|
+
val
|
392
|
+
end
|
393
|
+
end
|
394
|
+
def prepare_indices(stats_manager,headers)
|
395
|
+
super(stats_manager,headers)
|
396
|
+
@group_procs.each do |g|
|
397
|
+
g.index = fields.index(g.name)
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
# Class for specifications of individual charts
|
269
403
|
class ChartSpec
|
270
404
|
attr_reader :chart_type, :header, :options
|
271
405
|
def initialize(header,options={})
|
@@ -328,14 +462,15 @@ module Geoptima
|
|
328
462
|
g.write("#{$export_dir}/Chart_#{stats_manager.name}_#{header}_#{chart_type}_distribution.png")
|
329
463
|
end
|
330
464
|
def to_s
|
331
|
-
"#{chart_type.upcase}-#{header}"
|
465
|
+
"#{chart_type.to_s.upcase}-#{header}"
|
332
466
|
end
|
333
467
|
end
|
334
468
|
class StatsSpecs
|
335
|
-
attr_reader :chart_specs, :stat_specs
|
469
|
+
attr_reader :chart_specs, :stat_specs, :kpi_specs
|
336
470
|
def initialize(specfile)
|
337
471
|
@chart_specs = []
|
338
472
|
@stat_specs = []
|
473
|
+
@kpi_specs = []
|
339
474
|
instance_eval(File.open(specfile).read)
|
340
475
|
end
|
341
476
|
def category_chart(header,options={})
|
@@ -353,10 +488,16 @@ module Geoptima
|
|
353
488
|
def stats(header,*fields,&block)
|
354
489
|
@stat_specs << StatSpec.new(header,*fields,&block)
|
355
490
|
end
|
491
|
+
def kpi(header,*fields,&block)
|
492
|
+
@kpi_specs << KPISpec.new(header,*fields,&block)
|
493
|
+
end
|
356
494
|
def add_stats(stats_manager,headers)
|
357
495
|
stat_specs.each do |stat_spec|
|
358
496
|
stat_spec.prepare_indices(stats_manager,headers)
|
359
497
|
end
|
498
|
+
kpi_specs.each do |kpi_spec|
|
499
|
+
kpi_spec.prepare_indices(stats_manager,headers)
|
500
|
+
end
|
360
501
|
end
|
361
502
|
def add_fields(stats_manager,fields)
|
362
503
|
puts "Adding fields to #{stat_specs.length} StatSpec's" if($debug)
|
@@ -364,9 +505,14 @@ module Geoptima
|
|
364
505
|
puts "Adding fields to StatSpec: #{stat_spec}" if($debug)
|
365
506
|
stat_spec.add(stats_manager,fields)
|
366
507
|
end
|
508
|
+
puts "Adding fields to #{kpi_specs.length} KPISpec's" if($debug)
|
509
|
+
kpi_specs.each do |kpi_spec|
|
510
|
+
puts "Adding fields to KPISpec: #{kpi_spec}" if($debug)
|
511
|
+
kpi_spec.add(stats_manager,fields)
|
512
|
+
end
|
367
513
|
end
|
368
514
|
def to_s
|
369
|
-
"Stats[#{@stat_specs.join(', ')}] AND Charts[#{@chart_specs.join(', ')}]"
|
515
|
+
"Stats[#{@stat_specs.join(', ')}] AND KPIs[#{@kpi_specs.join(', ')}] AND Charts[#{@chart_specs.join(', ')}]"
|
370
516
|
end
|
371
517
|
end
|
372
518
|
end
|
@@ -468,7 +614,11 @@ end
|
|
468
614
|
$stats_managers.each do |name,stats_manager|
|
469
615
|
if $specs
|
470
616
|
$specs.chart_specs.each do |chart_spec|
|
471
|
-
|
617
|
+
begin
|
618
|
+
chart_spec.process(stats_manager)
|
619
|
+
rescue NoMethodError
|
620
|
+
puts "Failed to process chart '#{chart_spec}': #{$!}"
|
621
|
+
end
|
472
622
|
end
|
473
623
|
end
|
474
624
|
if $create_all
|
data/bin/csv_merge
CHANGED
@@ -9,20 +9,24 @@ require 'geoptima/options'
|
|
9
9
|
require 'fileutils'
|
10
10
|
require 'geoptima/daterange'
|
11
11
|
|
12
|
-
Geoptima::assert_version("0.1.
|
12
|
+
Geoptima::assert_version("0.1.4")
|
13
13
|
|
14
14
|
$export_dir = '.'
|
15
15
|
$export_name = 'merged.csv'
|
16
|
+
$split_by = :days
|
16
17
|
|
17
18
|
$files = Geoptima::Options.process_args do |option|
|
18
19
|
option.t {$time_split = true}
|
20
|
+
option.m {$low_memory = true}
|
19
21
|
option.D {$export_dir = ARGV.shift}
|
20
22
|
option.N {$export_name = ARGV.shift}
|
21
|
-
option.
|
22
|
-
$
|
23
|
-
|
24
|
-
|
23
|
+
option.S do
|
24
|
+
$split_by = case ARGV.shift.downcase.intern
|
25
|
+
when :days ; :days
|
26
|
+
else :days
|
27
|
+
end
|
25
28
|
end
|
29
|
+
option.T {$time_range = Geoptima::DateRange.from ARGV.shift}
|
26
30
|
end
|
27
31
|
|
28
32
|
FileUtils.mkdir_p $export_dir
|
@@ -30,46 +34,216 @@ FileUtils.mkdir_p $export_dir
|
|
30
34
|
$help = true unless($files.length>0)
|
31
35
|
if $help
|
32
36
|
puts <<EOHELP
|
33
|
-
Usage:
|
37
|
+
Usage: csv_merge <-dhtm> <-N name> <-D dir> <-T range> <-S split_by> files...
|
34
38
|
-d debug mode #{cw $debug}
|
35
39
|
-h print this help #{cw $help}
|
36
|
-
-t merge and split by time (
|
37
|
-
-
|
40
|
+
-t merge and split by time (#{$split_by}) #{cw $time_split}
|
41
|
+
-m use low memory, temporarily storing to intermediate files #{cw $low_memory}
|
42
|
+
-N use specified name for merged dataset: #{$export_name}
|
38
43
|
-D export to specified directory: #{$export_dir}
|
44
|
+
-S time units to split exports by: #{$split_by}
|
39
45
|
-T set time-range filter: #{$time_range}
|
40
46
|
Files to import: #{$files.join(', ')}
|
41
47
|
EOHELP
|
42
48
|
exit
|
43
49
|
end
|
44
50
|
|
45
|
-
class
|
46
|
-
attr_reader :
|
47
|
-
def initialize(
|
51
|
+
class CSVRecord
|
52
|
+
attr_reader :time, :fields, :day
|
53
|
+
def initialize(fields,time_index=0)
|
54
|
+
@fields = fields
|
55
|
+
@time = DateTime.parse(fields[time_index])
|
56
|
+
@day = @time.strftime("%Y-%m-%d")
|
57
|
+
end
|
58
|
+
def [](index)
|
59
|
+
fields[index]
|
60
|
+
end
|
61
|
+
def <=>(other)
|
62
|
+
time <=> other
|
63
|
+
end
|
64
|
+
def within(time_range)
|
65
|
+
time_range.nil? || time_range.include?(time)
|
48
66
|
end
|
49
67
|
end
|
50
68
|
|
51
|
-
|
52
|
-
lines
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
69
|
+
class CSVDataset
|
70
|
+
attr_reader :filename, :headers, :day_map, :lines, :count, :record_creation_duration
|
71
|
+
def initialize(filename)
|
72
|
+
@filename = filename
|
73
|
+
@lines = []
|
74
|
+
@day_map = {}
|
75
|
+
@record_creation_duration = 0
|
76
|
+
@count = 0
|
77
|
+
@headers = nil
|
78
|
+
read_file do |fields|
|
79
|
+
add fields
|
80
|
+
end
|
81
|
+
end
|
82
|
+
def read_file
|
83
|
+
lines = 0
|
84
|
+
File.open(filename).each do |line|
|
85
|
+
fields=line.chomp.split(/\t/)
|
86
|
+
if lines > 0
|
87
|
+
puts "Processing line: #{line}" if($debug)
|
88
|
+
yield fields
|
89
|
+
else
|
90
|
+
if fields.length<2
|
91
|
+
puts "Too few headers, rejecting #{file}"
|
92
|
+
break
|
93
|
+
end
|
94
|
+
@headers ||= fields
|
95
|
+
end
|
96
|
+
lines += 1
|
97
|
+
end
|
98
|
+
@export_headers ||= @headers
|
99
|
+
end
|
100
|
+
def add(fields)
|
101
|
+
start_time = Time.new
|
102
|
+
line = create_line(fields)
|
103
|
+
if line
|
104
|
+
@day_map[line.day] ||= 0
|
105
|
+
@day_map[line.day] += 1
|
106
|
+
@lines << line unless($low_memory)
|
107
|
+
@count += 1
|
108
|
+
@record_creation_duration += Time.new - start_time
|
109
|
+
end
|
110
|
+
line
|
111
|
+
end
|
112
|
+
def create_line(fields)
|
113
|
+
begin
|
114
|
+
line = CSVRecord.new(fields,0)
|
115
|
+
if(line.within($time_range))
|
116
|
+
line
|
117
|
+
else
|
118
|
+
nil
|
119
|
+
end
|
120
|
+
rescue ArgumentError
|
121
|
+
puts "Failed to parse line with timestamp='#{fields[0]}': #{$!}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
def header_map(eh=nil)
|
125
|
+
if eh
|
126
|
+
@export_headers = eh
|
127
|
+
@header_map = nil
|
128
|
+
end
|
129
|
+
unless @header_map
|
130
|
+
@header_map = []
|
131
|
+
(@export_headers || @headers).each do |head|
|
132
|
+
@header_map << @headers.index(head)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
@header_map
|
136
|
+
end
|
137
|
+
def map_line(line)
|
138
|
+
@header_map.map do |index|
|
139
|
+
index && line[index]
|
140
|
+
end
|
141
|
+
end
|
142
|
+
def days
|
143
|
+
@day_map.keys.sort
|
144
|
+
end
|
145
|
+
def each(eh=nil)
|
146
|
+
header_map(eh)
|
147
|
+
if $low_memory
|
148
|
+
read_file do |fields|
|
149
|
+
line = create_line fields
|
150
|
+
yield line.day,map_line(line)
|
151
|
+
end
|
65
152
|
else
|
66
|
-
|
67
|
-
|
68
|
-
puts "Too few headers, rejecting #{file}"
|
69
|
-
break
|
153
|
+
(@lines || []).each do |line|
|
154
|
+
yield line.day,map_line(line)
|
70
155
|
end
|
71
|
-
$stats_managers[name].set_headers(headers)
|
72
156
|
end
|
73
157
|
end
|
158
|
+
def <=>(other)
|
159
|
+
self.filename <=> other.filename
|
160
|
+
end
|
161
|
+
def length
|
162
|
+
count
|
163
|
+
end
|
74
164
|
end
|
75
165
|
|
166
|
+
class CSVDatasets
|
167
|
+
attr_reader :datasets
|
168
|
+
def initialize
|
169
|
+
@datasets = []
|
170
|
+
end
|
171
|
+
def add_file(file)
|
172
|
+
lines = 0
|
173
|
+
dataset = nil
|
174
|
+
filename = File.basename(file)
|
175
|
+
(names = filename.split(/[_\.]/)).pop
|
176
|
+
name = names.join('_')
|
177
|
+
puts "About to read file #{file}"
|
178
|
+
dataset = CSVDataset.new(file)
|
179
|
+
@datasets << dataset if(dataset && dataset.length>0)
|
180
|
+
dataset
|
181
|
+
end
|
182
|
+
def export_days
|
183
|
+
headers = @datasets.map{|d| d.headers}.flatten.uniq
|
184
|
+
days = @datasets.map{|d| d.days}.flatten.sort.uniq
|
185
|
+
day_files = {}
|
186
|
+
day_names = {}
|
187
|
+
count = {}
|
188
|
+
duration = {}
|
189
|
+
days.each do |day|
|
190
|
+
filename = "#{$export_dir}/#{$export_name.gsub(/\.csv$/,'')}_#{day}.csv"
|
191
|
+
puts "Exporting #{filename} for #{day}"
|
192
|
+
day_names[day] = filename
|
193
|
+
day_files[day] = File.open(filename,'w')
|
194
|
+
day_files[day].puts headers.join("\t")
|
195
|
+
count[day] = 0
|
196
|
+
duration[day] = 0
|
197
|
+
end
|
198
|
+
@datasets.sort.each do |dataset|
|
199
|
+
dataset.each(headers) do |day,line|
|
200
|
+
start_time = Time.new
|
201
|
+
day_files[day].puts line.join("\t")
|
202
|
+
duration[day] += Time.new - start_time
|
203
|
+
count[day] += 1
|
204
|
+
end
|
205
|
+
end
|
206
|
+
day_files.each do |day,out|
|
207
|
+
out.close
|
208
|
+
puts "\tExported #{count[day]} records to #{day_names[day]} in #{duration[day]} seconds"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
def export_merged
|
212
|
+
headers = @datasets.map{|d| d.headers}.flatten.sort.uniq
|
213
|
+
filename = "#{$export_dir}/#{$export_name}"
|
214
|
+
File.open(filename,'w') do |out|
|
215
|
+
out.puts headers.join("\t")
|
216
|
+
@datasets.sort.each(headers) do |dataset|
|
217
|
+
dataset.each do |day,line|
|
218
|
+
out.puts line.join("\t")
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
$datasets = CSVDatasets.new
|
226
|
+
|
227
|
+
$files.each do |file|
|
228
|
+
start_time = Time.new
|
229
|
+
ds = $datasets.add_file(file)
|
230
|
+
duration = Time.new - start_time
|
231
|
+
puts "\tLoaded #{file} in #{duration} seconds"
|
232
|
+
puts "\t#{(100.0 * ds.record_creation_duration.to_f/duration.to_f).to_i}% = #{ds.record_creation_duration}/#{duration} was spent creating records"
|
233
|
+
puts "\tFile contained #{ds.length} events for #{ds.days.length} days:"
|
234
|
+
ds.days.each do |day|
|
235
|
+
puts "\t\t#{day}: \t#{(100.0 * ds.day_map[day].to_f/ds.length.to_f).to_i}%\t#{ds.day_map[day]} records"
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
start_time = Time.new
|
240
|
+
|
241
|
+
if $time_split
|
242
|
+
$datasets.export_days
|
243
|
+
else
|
244
|
+
$datasets.export_merged
|
245
|
+
end
|
246
|
+
|
247
|
+
duration = Time.new - start_time
|
248
|
+
puts "Exported in #{duration} seconds"
|
249
|
+
|