geoptima 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/csv_chart +167 -17
- data/bin/csv_merge +204 -30
- data/bin/show_geoptima +128 -35
- data/examples/csv_chart.rb +167 -17
- data/examples/csv_merge.rb +204 -30
- data/examples/show_geoptima.rb +128 -35
- data/geoptima.gemspec +1 -1
- data/lib/geoptima/data.rb +40 -37
- data/lib/geoptima/version.rb +1 -1
- metadata +5 -4
data/examples/csv_merge.rb
CHANGED
@@ -9,20 +9,24 @@ require 'geoptima/options'
|
|
9
9
|
require 'fileutils'
|
10
10
|
require 'geoptima/daterange'
|
11
11
|
|
12
|
-
Geoptima::assert_version("0.1.
|
12
|
+
Geoptima::assert_version("0.1.4")
|
13
13
|
|
14
14
|
$export_dir = '.'
|
15
15
|
$export_name = 'merged.csv'
|
16
|
+
$split_by = :days
|
16
17
|
|
17
18
|
$files = Geoptima::Options.process_args do |option|
|
18
19
|
option.t {$time_split = true}
|
20
|
+
option.m {$low_memory = true}
|
19
21
|
option.D {$export_dir = ARGV.shift}
|
20
22
|
option.N {$export_name = ARGV.shift}
|
21
|
-
option.
|
22
|
-
$
|
23
|
-
|
24
|
-
|
23
|
+
option.S do
|
24
|
+
$split_by = case ARGV.shift.downcase.intern
|
25
|
+
when :days ; :days
|
26
|
+
else :days
|
27
|
+
end
|
25
28
|
end
|
29
|
+
option.T {$time_range = Geoptima::DateRange.from ARGV.shift}
|
26
30
|
end
|
27
31
|
|
28
32
|
FileUtils.mkdir_p $export_dir
|
@@ -30,46 +34,216 @@ FileUtils.mkdir_p $export_dir
|
|
30
34
|
$help = true unless($files.length>0)
|
31
35
|
if $help
|
32
36
|
puts <<EOHELP
|
33
|
-
Usage:
|
37
|
+
Usage: csv_merge <-dhtm> <-N name> <-D dir> <-T range> <-S split_by> files...
|
34
38
|
-d debug mode #{cw $debug}
|
35
39
|
-h print this help #{cw $help}
|
36
|
-
-t merge and split by time (
|
37
|
-
-
|
40
|
+
-t merge and split by time (#{$split_by}) #{cw $time_split}
|
41
|
+
-m use low memory, temporarily storing to intermediate files #{cw $low_memory}
|
42
|
+
-N use specified name for merged dataset: #{$export_name}
|
38
43
|
-D export to specified directory: #{$export_dir}
|
44
|
+
-S time units to split exports by: #{$split_by}
|
39
45
|
-T set time-range filter: #{$time_range}
|
40
46
|
Files to import: #{$files.join(', ')}
|
41
47
|
EOHELP
|
42
48
|
exit
|
43
49
|
end
|
44
50
|
|
45
|
-
class
|
46
|
-
attr_reader :
|
47
|
-
def initialize(
|
51
|
+
class CSVRecord
|
52
|
+
attr_reader :time, :fields, :day
|
53
|
+
def initialize(fields,time_index=0)
|
54
|
+
@fields = fields
|
55
|
+
@time = DateTime.parse(fields[time_index])
|
56
|
+
@day = @time.strftime("%Y-%m-%d")
|
57
|
+
end
|
58
|
+
def [](index)
|
59
|
+
fields[index]
|
60
|
+
end
|
61
|
+
def <=>(other)
|
62
|
+
time <=> other
|
63
|
+
end
|
64
|
+
def within(time_range)
|
65
|
+
time_range.nil? || time_range.include?(time)
|
48
66
|
end
|
49
67
|
end
|
50
68
|
|
51
|
-
|
52
|
-
lines
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
69
|
+
class CSVDataset
|
70
|
+
attr_reader :filename, :headers, :day_map, :lines, :count, :record_creation_duration
|
71
|
+
def initialize(filename)
|
72
|
+
@filename = filename
|
73
|
+
@lines = []
|
74
|
+
@day_map = {}
|
75
|
+
@record_creation_duration = 0
|
76
|
+
@count = 0
|
77
|
+
@headers = nil
|
78
|
+
read_file do |fields|
|
79
|
+
add fields
|
80
|
+
end
|
81
|
+
end
|
82
|
+
def read_file
|
83
|
+
lines = 0
|
84
|
+
File.open(filename).each do |line|
|
85
|
+
fields=line.chomp.split(/\t/)
|
86
|
+
if lines > 0
|
87
|
+
puts "Processing line: #{line}" if($debug)
|
88
|
+
yield fields
|
89
|
+
else
|
90
|
+
if fields.length<2
|
91
|
+
puts "Too few headers, rejecting #{file}"
|
92
|
+
break
|
93
|
+
end
|
94
|
+
@headers ||= fields
|
95
|
+
end
|
96
|
+
lines += 1
|
97
|
+
end
|
98
|
+
@export_headers ||= @headers
|
99
|
+
end
|
100
|
+
def add(fields)
|
101
|
+
start_time = Time.new
|
102
|
+
line = create_line(fields)
|
103
|
+
if line
|
104
|
+
@day_map[line.day] ||= 0
|
105
|
+
@day_map[line.day] += 1
|
106
|
+
@lines << line unless($low_memory)
|
107
|
+
@count += 1
|
108
|
+
@record_creation_duration += Time.new - start_time
|
109
|
+
end
|
110
|
+
line
|
111
|
+
end
|
112
|
+
def create_line(fields)
|
113
|
+
begin
|
114
|
+
line = CSVRecord.new(fields,0)
|
115
|
+
if(line.within($time_range))
|
116
|
+
line
|
117
|
+
else
|
118
|
+
nil
|
119
|
+
end
|
120
|
+
rescue ArgumentError
|
121
|
+
puts "Failed to parse line with timestamp='#{fields[0]}': #{$!}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
def header_map(eh=nil)
|
125
|
+
if eh
|
126
|
+
@export_headers = eh
|
127
|
+
@header_map = nil
|
128
|
+
end
|
129
|
+
unless @header_map
|
130
|
+
@header_map = []
|
131
|
+
(@export_headers || @headers).each do |head|
|
132
|
+
@header_map << @headers.index(head)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
@header_map
|
136
|
+
end
|
137
|
+
def map_line(line)
|
138
|
+
@header_map.map do |index|
|
139
|
+
index && line[index]
|
140
|
+
end
|
141
|
+
end
|
142
|
+
def days
|
143
|
+
@day_map.keys.sort
|
144
|
+
end
|
145
|
+
def each(eh=nil)
|
146
|
+
header_map(eh)
|
147
|
+
if $low_memory
|
148
|
+
read_file do |fields|
|
149
|
+
line = create_line fields
|
150
|
+
yield line.day,map_line(line)
|
151
|
+
end
|
65
152
|
else
|
66
|
-
|
67
|
-
|
68
|
-
puts "Too few headers, rejecting #{file}"
|
69
|
-
break
|
153
|
+
(@lines || []).each do |line|
|
154
|
+
yield line.day,map_line(line)
|
70
155
|
end
|
71
|
-
$stats_managers[name].set_headers(headers)
|
72
156
|
end
|
73
157
|
end
|
158
|
+
def <=>(other)
|
159
|
+
self.filename <=> other.filename
|
160
|
+
end
|
161
|
+
def length
|
162
|
+
count
|
163
|
+
end
|
74
164
|
end
|
75
165
|
|
166
|
+
class CSVDatasets
|
167
|
+
attr_reader :datasets
|
168
|
+
def initialize
|
169
|
+
@datasets = []
|
170
|
+
end
|
171
|
+
def add_file(file)
|
172
|
+
lines = 0
|
173
|
+
dataset = nil
|
174
|
+
filename = File.basename(file)
|
175
|
+
(names = filename.split(/[_\.]/)).pop
|
176
|
+
name = names.join('_')
|
177
|
+
puts "About to read file #{file}"
|
178
|
+
dataset = CSVDataset.new(file)
|
179
|
+
@datasets << dataset if(dataset && dataset.length>0)
|
180
|
+
dataset
|
181
|
+
end
|
182
|
+
def export_days
|
183
|
+
headers = @datasets.map{|d| d.headers}.flatten.uniq
|
184
|
+
days = @datasets.map{|d| d.days}.flatten.sort.uniq
|
185
|
+
day_files = {}
|
186
|
+
day_names = {}
|
187
|
+
count = {}
|
188
|
+
duration = {}
|
189
|
+
days.each do |day|
|
190
|
+
filename = "#{$export_dir}/#{$export_name.gsub(/\.csv$/,'')}_#{day}.csv"
|
191
|
+
puts "Exporting #{filename} for #{day}"
|
192
|
+
day_names[day] = filename
|
193
|
+
day_files[day] = File.open(filename,'w')
|
194
|
+
day_files[day].puts headers.join("\t")
|
195
|
+
count[day] = 0
|
196
|
+
duration[day] = 0
|
197
|
+
end
|
198
|
+
@datasets.sort.each do |dataset|
|
199
|
+
dataset.each(headers) do |day,line|
|
200
|
+
start_time = Time.new
|
201
|
+
day_files[day].puts line.join("\t")
|
202
|
+
duration[day] += Time.new - start_time
|
203
|
+
count[day] += 1
|
204
|
+
end
|
205
|
+
end
|
206
|
+
day_files.each do |day,out|
|
207
|
+
out.close
|
208
|
+
puts "\tExported #{count[day]} records to #{day_names[day]} in #{duration[day]} seconds"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
def export_merged
|
212
|
+
headers = @datasets.map{|d| d.headers}.flatten.sort.uniq
|
213
|
+
filename = "#{$export_dir}/#{$export_name}"
|
214
|
+
File.open(filename,'w') do |out|
|
215
|
+
out.puts headers.join("\t")
|
216
|
+
@datasets.sort.each(headers) do |dataset|
|
217
|
+
dataset.each do |day,line|
|
218
|
+
out.puts line.join("\t")
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
$datasets = CSVDatasets.new
|
226
|
+
|
227
|
+
$files.each do |file|
|
228
|
+
start_time = Time.new
|
229
|
+
ds = $datasets.add_file(file)
|
230
|
+
duration = Time.new - start_time
|
231
|
+
puts "\tLoaded #{file} in #{duration} seconds"
|
232
|
+
puts "\t#{(100.0 * ds.record_creation_duration.to_f/duration.to_f).to_i}% = #{ds.record_creation_duration}/#{duration} was spent creating records"
|
233
|
+
puts "\tFile contained #{ds.length} events for #{ds.days.length} days:"
|
234
|
+
ds.days.each do |day|
|
235
|
+
puts "\t\t#{day}: \t#{(100.0 * ds.day_map[day].to_f/ds.length.to_f).to_i}%\t#{ds.day_map[day]} records"
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
start_time = Time.new
|
240
|
+
|
241
|
+
if $time_split
|
242
|
+
$datasets.export_days
|
243
|
+
else
|
244
|
+
$datasets.export_merged
|
245
|
+
end
|
246
|
+
|
247
|
+
duration = Time.new - start_time
|
248
|
+
puts "Exported in #{duration} seconds"
|
249
|
+
|
data/examples/show_geoptima.rb
CHANGED
@@ -6,8 +6,9 @@ $: << '../lib'
|
|
6
6
|
|
7
7
|
require 'date'
|
8
8
|
require 'geoptima'
|
9
|
+
require 'geoptima/options'
|
9
10
|
|
10
|
-
Geoptima::assert_version("0.1.
|
11
|
+
Geoptima::assert_version("0.1.4")
|
11
12
|
|
12
13
|
$debug=false
|
13
14
|
|
@@ -24,29 +25,89 @@ $files = Geoptima::Options.process_args do |option|
|
|
24
25
|
option.m {$map_headers = true}
|
25
26
|
option.a {$combine_all = true}
|
26
27
|
option.l {$more_headers = true}
|
28
|
+
option.P {$export_prefix = ARGV.shift}
|
27
29
|
option.E {$event_names += ARGV.shift.split(/[\,\;\:\.]+/)}
|
28
|
-
option.T
|
29
|
-
$time_range = Geoptima::DateRange.from ARGV.shift
|
30
|
-
end
|
30
|
+
option.T {$time_range = Geoptima::DateRange.from ARGV.shift}
|
31
31
|
option.L {$print_limit = [1,ARGV.shift.to_i].max}
|
32
|
-
|
33
|
-
option.t {$time_split = true}
|
34
|
-
option.D {$export_dir = ARGV.shift}
|
35
|
-
option.N {$merged_name = ARGV.shift}
|
36
|
-
option.S {$specfile = ARGV.shift}
|
37
|
-
option.P {$diversity = ARGV.shift.to_f}
|
38
|
-
option.W {$chart_width = ARGV.shift.to_i}
|
39
|
-
option.T do
|
40
|
-
$time_range = Geoptima::DateRange.from ARGV.shift
|
41
|
-
end
|
32
|
+
option.M {$mapfile = ARGV.shift}
|
42
33
|
end.map do |file|
|
43
34
|
File.exist?(file) ? file : puts("No such file: #{file}")
|
44
35
|
end.compact
|
45
36
|
|
37
|
+
class HeaderMap
|
38
|
+
attr_reader :prefix, :name, :event
|
39
|
+
attr_accessor :columns
|
40
|
+
def initialize(prefix,name,event)
|
41
|
+
@prefix = prefix
|
42
|
+
@name = name
|
43
|
+
@event = event
|
44
|
+
@columns = []
|
45
|
+
end
|
46
|
+
def mk_known(header)
|
47
|
+
puts "Creating column mappings for headers: #{header}" if($debug)
|
48
|
+
@col_indices = {}
|
49
|
+
columns.each do |col|
|
50
|
+
c = (col[1] && col[1].gsub(/\?/,'')).to_s
|
51
|
+
if c.length>0
|
52
|
+
@col_indices[c] = header.index(c)
|
53
|
+
puts "\tMade column mapping: #{c} --> #{header.index(c)}" if($debug)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
def map_fields(header,fields)
|
58
|
+
@scenario_counter ||= 0
|
59
|
+
mk_known(header) unless @col_indices
|
60
|
+
@columns.map do |column|
|
61
|
+
if column[1] =~ /SCENARIO_COUNTER/
|
62
|
+
@scenario_counter += 1
|
63
|
+
else
|
64
|
+
index = @col_indices[column[1]]
|
65
|
+
puts "Found mapping #{column} -> #{index} -> #{index && fields[index]}" if($debug)
|
66
|
+
index && fields[index]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
if $mapfile
|
73
|
+
$header_maps = []
|
74
|
+
current_map = nil
|
75
|
+
prefix = $mapfile.split(/\./)[0]
|
76
|
+
File.open($mapfile).each do |line|
|
77
|
+
line.chomp!
|
78
|
+
next if line =~ /^\s*#/
|
79
|
+
next if line.length < 2
|
80
|
+
if line =~ /^\[(\w+)\]\t(\w+)/
|
81
|
+
current_map = HeaderMap.new(prefix,$1,$2)
|
82
|
+
$header_maps << current_map
|
83
|
+
elsif current_map
|
84
|
+
current_map.columns << line.chomp.split(/\t/)[0..1]
|
85
|
+
else
|
86
|
+
puts "Invalid header map line: #{line}"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def show_header_maps
|
92
|
+
if $header_maps
|
93
|
+
puts "Using #{$header_maps.length} header maps:"
|
94
|
+
$header_maps.each do |hm|
|
95
|
+
puts "\t[#{hm.name}] (#{hm.event})"
|
96
|
+
if $debug
|
97
|
+
hm.columns.each do |hc|
|
98
|
+
puts "\t\t#{hc.map{|c| (c+' '*30)[0..30]}.join("\t-->\t")}"
|
99
|
+
end
|
100
|
+
else
|
101
|
+
puts "\t\t#{hm.columns.map{|hc| hc[0]}.join(', ')}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
46
107
|
$help = true if($files.length < 1)
|
47
108
|
if $help
|
48
109
|
puts <<EOHELP
|
49
|
-
Usage: show_geoptima <-dpvxomlsah> <-L limit> <-E types> <-T min,max> file <files>
|
110
|
+
Usage: show_geoptima <-dpvxomlsah> <-L limit> <-E types> <-T min,max> <-M mapfile> file <files>
|
50
111
|
-d debug mode (output more context during processing) #{cw $debug}
|
51
112
|
-p print mode (print out final results to console) #{cw $print}
|
52
113
|
-v verbose mode (output extra information to console) #{cw $verbose}
|
@@ -57,14 +118,19 @@ Usage: show_geoptima <-dpvxomlsah> <-L limit> <-E types> <-T min,max> file <file
|
|
57
118
|
-s seperate the export files by event type #{cw $seperate}
|
58
119
|
-a combine all IMEI's into a single dataset #{cw $combine_all}
|
59
120
|
-h show this help
|
121
|
+
-P prefix for exported files (default: ''; current: #{$export_prefix})
|
60
122
|
-E comma-seperated list of event types to show and export (default: all; current: #{$event_names.join(',')})
|
61
123
|
-T time range to limit results to (default: all; current: #{$time_range})
|
62
124
|
-L limit verbose output to specific number of lines #{cw $print_limit}
|
125
|
+
-M mapfile of normal->altered header names: #{$mapfile}
|
63
126
|
EOHELP
|
127
|
+
show_header_maps
|
64
128
|
exit 0
|
65
129
|
end
|
66
130
|
|
67
131
|
$verbose = $verbose || $debug
|
132
|
+
show_header_maps if($verbose)
|
133
|
+
|
68
134
|
$datasets = Geoptima::Dataset.make_datasets($files, :locate => true, :time_range => $time_range, :combine_all => $combine_all)
|
69
135
|
|
70
136
|
class Export
|
@@ -74,13 +140,18 @@ class Export
|
|
74
140
|
@imei = imei
|
75
141
|
@names = names
|
76
142
|
if $export
|
77
|
-
if $
|
143
|
+
if $header_maps
|
144
|
+
@files = $header_maps.inject({}) do |a,hm|
|
145
|
+
a[hm.event] = File.open("#{$export_prefix}#{imei}_#{hm.prefix}_#{hm.name}.csv",'w')
|
146
|
+
a
|
147
|
+
end
|
148
|
+
elsif $seperate
|
78
149
|
@files = names.inject({}) do |a,name|
|
79
|
-
a[name] = File.open("#{imei}_#{name}.csv",'w')
|
150
|
+
a[name] = File.open("#{$export_prefix}#{imei}_#{name}.csv",'w')
|
80
151
|
a
|
81
152
|
end
|
82
153
|
else
|
83
|
-
@files={nil => File.open("#{imei}.csv",'w')}
|
154
|
+
@files={nil => File.open("#{$export_prefix}#{imei}.csv",'w')}
|
84
155
|
end
|
85
156
|
end
|
86
157
|
@headers = names.inject({}) do |a,name|
|
@@ -91,7 +162,11 @@ class Export
|
|
91
162
|
end
|
92
163
|
@headers[nil] = @headers.values.flatten.sort
|
93
164
|
files && files.each do |key,file|
|
94
|
-
|
165
|
+
if $header_maps
|
166
|
+
file.puts $header_maps.find{|hm| hm.event == key}.columns.map{|c| c[0]}.join("\t")
|
167
|
+
else
|
168
|
+
file.puts map_headers(base_headers+more_headers+header(key)).join("\t")
|
169
|
+
end
|
95
170
|
end
|
96
171
|
if $debug || $verbose
|
97
172
|
@headers.each do |name,head|
|
@@ -108,7 +183,7 @@ class Export
|
|
108
183
|
end
|
109
184
|
def more_headers
|
110
185
|
$more_headers ?
|
111
|
-
['IMSI','MSISDN','MCC','MNC','LAC','CI','LAC-CI','RSSI','Platform','Model','OS','Operator'] :
|
186
|
+
['IMSI','MSISDN','MCC','MNC','LAC','CI','LAC-CI','RSSI','Platform','Model','OS','Operator','Battery'] :
|
112
187
|
[]
|
113
188
|
end
|
114
189
|
def base_fields(event)
|
@@ -127,15 +202,17 @@ class Export
|
|
127
202
|
when 'LAC-CI'
|
128
203
|
"#{dataset.recent(event,'service.lac')}-#{dataset.recent(event,'service.cell_id')}"
|
129
204
|
when 'MCC'
|
130
|
-
|
205
|
+
event.file[h] || dataset.recent(event,'service.mcc')
|
131
206
|
when 'MNC'
|
132
|
-
|
207
|
+
event.file[h] || dataset.recent(event,'service.mnc')
|
208
|
+
when 'Battery'
|
209
|
+
dataset.recent(event,'batteryState.state',600)
|
133
210
|
when 'Operator'
|
134
|
-
|
211
|
+
event.file['carrierName']
|
135
212
|
when 'IMSI'
|
136
|
-
|
213
|
+
event.file['imsi']
|
137
214
|
else
|
138
|
-
|
215
|
+
event.file[h]
|
139
216
|
end
|
140
217
|
end
|
141
218
|
end
|
@@ -164,10 +241,10 @@ class Export
|
|
164
241
|
end || hnames
|
165
242
|
end
|
166
243
|
def export_stats(stats)
|
167
|
-
File.open("#{imei}_stats.csv",'w') do |out|
|
244
|
+
File.open("#{$export_prefix}#{imei}_stats.csv",'w') do |out|
|
168
245
|
stats.keys.sort.each do |header|
|
169
246
|
out.puts header
|
170
|
-
values = stats[header].keys.sort
|
247
|
+
values = stats[header].keys.sort{|a,b| b.to_s<=>a.to_s}
|
171
248
|
out.puts values.join("\t")
|
172
249
|
out.puts values.map{|v| stats[header][v]}.join("\t")
|
173
250
|
out.puts
|
@@ -178,7 +255,7 @@ class Export
|
|
178
255
|
@headers[name]
|
179
256
|
end
|
180
257
|
def puts_to(line,name)
|
181
|
-
name = nil unless($seperate)
|
258
|
+
name = nil unless($seperate || $header_maps)
|
182
259
|
files[name].puts(line) if($export && files[name])
|
183
260
|
end
|
184
261
|
def puts_to_all(line)
|
@@ -222,13 +299,29 @@ $datasets.keys.sort.each do |imei|
|
|
222
299
|
names = dataset.events_names if(names.length<1)
|
223
300
|
export = Export.new(imei,names,dataset)
|
224
301
|
export.export_stats(dataset.stats) if($export_stats)
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
302
|
+
if $header_maps && $header_maps.length > 0
|
303
|
+
$header_maps.each do |hm|
|
304
|
+
puts "Searching for events for header_maps '#{hm.event}'"
|
305
|
+
events.each do |event|
|
306
|
+
if event.name == hm.event
|
307
|
+
header = export.header(event.name)
|
308
|
+
fields = header.map{|h| event[h]}
|
309
|
+
b_header = export.base_headers + export.more_headers
|
310
|
+
b_fields = export.base_fields(event) + export.more_fields(event,dataset)
|
311
|
+
all_fields = hm.map_fields(b_header + header, b_fields + fields)
|
312
|
+
export.puts_to all_fields.join("\t"), event.name
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
316
|
+
else
|
317
|
+
events.each do |event|
|
318
|
+
names.each do |name|
|
319
|
+
if event.name === name
|
320
|
+
fields = export.header($seperate ? name : nil).map{|h| event[h]}
|
321
|
+
b_fields = export.base_fields(event) + export.more_fields(event,dataset)
|
322
|
+
export.puts_to "#{b_fields.join("\t")}\t#{fields.join("\t")}", name
|
323
|
+
if_le{puts "#{b_fields.join("\t")}\t#{event.fields.inspect}"}
|
324
|
+
end
|
232
325
|
end
|
233
326
|
end
|
234
327
|
end
|
data/geoptima.gemspec
CHANGED
@@ -25,7 +25,7 @@ EOF
|
|
25
25
|
s.files = Dir.glob("{bin,lib,rdoc}/**/*").reject{|x| x=~/(tmp|target|test-data)/ || x=~/~$/} +
|
26
26
|
Dir.glob("examples/*rb") + Dir.glob("examples/sample*json") +
|
27
27
|
%w(README.rdoc CHANGELOG CONTRIBUTORS Gemfile geoptima.gemspec)
|
28
|
-
s.executables = ['show_geoptima','geoptima_file_time','csv_chart','csv_stats']
|
28
|
+
s.executables = ['show_geoptima','geoptima_file_time','csv_chart','csv_stats','csv_merge']
|
29
29
|
|
30
30
|
s.extra_rdoc_files = %w( README.rdoc )
|
31
31
|
s.rdoc_options = ["--quiet", "--title", "Geoptima.rb", "--line-numbers", "--main", "README.rdoc", "--inline-source"]
|
data/lib/geoptima/data.rb
CHANGED
@@ -55,7 +55,7 @@ module Geoptima
|
|
55
55
|
@header = header
|
56
56
|
@data = data
|
57
57
|
@fields = @header.inject({}) do |a,v|
|
58
|
-
a[v] = @data[a.length]
|
58
|
+
a[v] = check_field(@data[a.length])
|
59
59
|
a
|
60
60
|
end
|
61
61
|
@time = start + (@fields['timeoffset'].to_f / MSPERDAY.to_f)
|
@@ -65,6 +65,9 @@ module Geoptima
|
|
65
65
|
end
|
66
66
|
puts "Created Event: #{self}" if($debug)
|
67
67
|
end
|
68
|
+
def check_field(field)
|
69
|
+
(field && field.respond_to?('length') && field =~ /\d\,\d/) ? field.gsub(/\,/,'.').to_f : field
|
70
|
+
end
|
68
71
|
def utc
|
69
72
|
time.new_offset(0)
|
70
73
|
end
|
@@ -135,7 +138,7 @@ module Geoptima
|
|
135
138
|
@start ||= subscriber['start'] && DateTime.parse(subscriber['start'].gsub(/Asia\/Bangkok/,'GMT+7'))#.gsub(/Mar 17 2044/,'Feb 14 2012'))
|
136
139
|
end
|
137
140
|
def valid?
|
138
|
-
start && start
|
141
|
+
start && start >= (Data.min_start-1) && start < Data.max_start
|
139
142
|
end
|
140
143
|
def self.min_start
|
141
144
|
@@min_start ||= MIN_DATETIME
|
@@ -180,30 +183,28 @@ module Geoptima
|
|
180
183
|
unless header
|
181
184
|
puts "No header found for '#{event_type}', trying known Geoptima headers"
|
182
185
|
header = Event::KNOWN_HEADERS[event_type]
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
else
|
192
|
-
puts "No data found for event type '#{event_type}'"
|
193
|
-
header = nil
|
194
|
-
end
|
186
|
+
puts "Found known header '#{event_type}' => #{header.inspect}" if(header)
|
187
|
+
end
|
188
|
+
# Double-check the header length matches a multiple of the data length
|
189
|
+
if header
|
190
|
+
mismatch = events.length % header.length
|
191
|
+
if mismatch != 0
|
192
|
+
puts "'#{event_type}' header length #{header.length} incompatible with data length #{events.length}"
|
193
|
+
header = nil
|
195
194
|
end
|
195
|
+
else
|
196
|
+
puts "No header found for event type: #{event_type}"
|
196
197
|
end
|
197
198
|
# Now process the single long data array into a list of events with timestamps
|
198
199
|
if header
|
199
200
|
events_data[event_type] = (0...data[event_type].to_i).inject([]) do |a,block|
|
200
201
|
index = header.length * block
|
201
|
-
|
202
|
-
if
|
202
|
+
record = events[index...(index+header.length)]
|
203
|
+
if record && record.length == header.length
|
203
204
|
@count += 1
|
204
|
-
a << Event.new(self,start,event_type,header,
|
205
|
+
a << Event.new(self,start,event_type,header,record)
|
205
206
|
else
|
206
|
-
puts "Invalid '#{event_type}' data block #{block}: #{
|
207
|
+
puts "Invalid '#{event_type}' data block #{block}: #{record.inspect}"
|
207
208
|
break a
|
208
209
|
end
|
209
210
|
end
|
@@ -214,8 +215,6 @@ module Geoptima
|
|
214
215
|
puts "\t#{d.data.join("\t")}"
|
215
216
|
end
|
216
217
|
end
|
217
|
-
else
|
218
|
-
puts "No header found for event type: #{event_type}"
|
219
218
|
end
|
220
219
|
end
|
221
220
|
find_first_and_last(events_data)
|
@@ -227,8 +226,8 @@ module Geoptima
|
|
227
226
|
events_data.each do |event_type,data|
|
228
227
|
@first ||= data[0]
|
229
228
|
@last ||= data[-1]
|
230
|
-
@first = data[0] if(@first.time > data[0].time)
|
231
|
-
@last = data[-1] if(@last.time < data[-1].time)
|
229
|
+
@first = data[0] if(@first && @first.time > data[0].time)
|
230
|
+
@last = data[-1] if(@last && @last.time < data[-1].time)
|
232
231
|
end
|
233
232
|
if $debug
|
234
233
|
puts "For data: #{self}"
|
@@ -276,24 +275,28 @@ module Geoptima
|
|
276
275
|
end.compact.uniq
|
277
276
|
end
|
278
277
|
|
279
|
-
def recent(event,key)
|
278
|
+
def recent(event,key,seconds=60)
|
280
279
|
unless event[key]
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
280
|
+
if imei = event.file.imei
|
281
|
+
puts "Searching for recent values for '#{key}' starting at event #{event}" if($debug)
|
282
|
+
ev,prop=key.split(/\./)
|
283
|
+
ar=sorted
|
284
|
+
puts "\tSearching through #{ar && ar.length} events for event type #{ev} and property #{prop}" if($debug)
|
285
|
+
if i=ar.index(event)
|
286
|
+
afe = while(i>0)
|
287
|
+
fe = ar[i-=1]
|
288
|
+
puts "\t\tTesting event[#{i}]: #{fe}" if($debug)
|
289
|
+
break(fe) if(fe.nil? || (event.time - fe.time) * SPERDAY > seconds || (fe.name == ev && fe.file.imei == imei))
|
290
|
+
end
|
291
|
+
if afe && afe.name == ev
|
292
|
+
puts "\t\tFound event[#{i}] with #{prop} => #{afe[prop]} and time gap of #{(event.time - fe.time) * SPERDAY} seconds" if($debug)
|
293
|
+
event[key] = afe[prop]
|
294
|
+
end
|
295
|
+
else
|
296
|
+
puts "Event not found in search for recent '#{key}': #{event}"
|
294
297
|
end
|
295
298
|
else
|
296
|
-
puts "
|
299
|
+
puts "Not searching for correlated data without imei: #{event}"
|
297
300
|
end
|
298
301
|
end
|
299
302
|
# @recent[key] ||= ''
|