geoptima 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/csv_chart +167 -17
- data/bin/csv_merge +204 -30
- data/bin/show_geoptima +128 -35
- data/examples/csv_chart.rb +167 -17
- data/examples/csv_merge.rb +204 -30
- data/examples/show_geoptima.rb +128 -35
- data/geoptima.gemspec +1 -1
- data/lib/geoptima/data.rb +40 -37
- data/lib/geoptima/version.rb +1 -1
- metadata +5 -4
data/examples/csv_merge.rb
CHANGED
@@ -9,20 +9,24 @@ require 'geoptima/options'
|
|
9
9
|
require 'fileutils'
|
10
10
|
require 'geoptima/daterange'
|
11
11
|
|
12
|
-
Geoptima::assert_version("0.1.
|
12
|
+
Geoptima::assert_version("0.1.4")
|
13
13
|
|
14
14
|
$export_dir = '.'
|
15
15
|
$export_name = 'merged.csv'
|
16
|
+
$split_by = :days
|
16
17
|
|
17
18
|
$files = Geoptima::Options.process_args do |option|
|
18
19
|
option.t {$time_split = true}
|
20
|
+
option.m {$low_memory = true}
|
19
21
|
option.D {$export_dir = ARGV.shift}
|
20
22
|
option.N {$export_name = ARGV.shift}
|
21
|
-
option.
|
22
|
-
$
|
23
|
-
|
24
|
-
|
23
|
+
option.S do
|
24
|
+
$split_by = case ARGV.shift.downcase.intern
|
25
|
+
when :days ; :days
|
26
|
+
else :days
|
27
|
+
end
|
25
28
|
end
|
29
|
+
option.T {$time_range = Geoptima::DateRange.from ARGV.shift}
|
26
30
|
end
|
27
31
|
|
28
32
|
FileUtils.mkdir_p $export_dir
|
@@ -30,46 +34,216 @@ FileUtils.mkdir_p $export_dir
|
|
30
34
|
$help = true unless($files.length>0)
|
31
35
|
if $help
|
32
36
|
puts <<EOHELP
|
33
|
-
Usage:
|
37
|
+
Usage: csv_merge <-dhtm> <-N name> <-D dir> <-T range> <-S split_by> files...
|
34
38
|
-d debug mode #{cw $debug}
|
35
39
|
-h print this help #{cw $help}
|
36
|
-
-t merge and split by time (
|
37
|
-
-
|
40
|
+
-t merge and split by time (#{$split_by}) #{cw $time_split}
|
41
|
+
-m use low memory, temporarily storing to intermediate files #{cw $low_memory}
|
42
|
+
-N use specified name for merged dataset: #{$export_name}
|
38
43
|
-D export to specified directory: #{$export_dir}
|
44
|
+
-S time units to split exports by: #{$split_by}
|
39
45
|
-T set time-range filter: #{$time_range}
|
40
46
|
Files to import: #{$files.join(', ')}
|
41
47
|
EOHELP
|
42
48
|
exit
|
43
49
|
end
|
44
50
|
|
45
|
-
class
|
46
|
-
attr_reader :
|
47
|
-
def initialize(
|
51
|
+
class CSVRecord
|
52
|
+
attr_reader :time, :fields, :day
|
53
|
+
def initialize(fields,time_index=0)
|
54
|
+
@fields = fields
|
55
|
+
@time = DateTime.parse(fields[time_index])
|
56
|
+
@day = @time.strftime("%Y-%m-%d")
|
57
|
+
end
|
58
|
+
def [](index)
|
59
|
+
fields[index]
|
60
|
+
end
|
61
|
+
def <=>(other)
|
62
|
+
time <=> other
|
63
|
+
end
|
64
|
+
def within(time_range)
|
65
|
+
time_range.nil? || time_range.include?(time)
|
48
66
|
end
|
49
67
|
end
|
50
68
|
|
51
|
-
|
52
|
-
lines
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
69
|
+
class CSVDataset
|
70
|
+
attr_reader :filename, :headers, :day_map, :lines, :count, :record_creation_duration
|
71
|
+
def initialize(filename)
|
72
|
+
@filename = filename
|
73
|
+
@lines = []
|
74
|
+
@day_map = {}
|
75
|
+
@record_creation_duration = 0
|
76
|
+
@count = 0
|
77
|
+
@headers = nil
|
78
|
+
read_file do |fields|
|
79
|
+
add fields
|
80
|
+
end
|
81
|
+
end
|
82
|
+
def read_file
|
83
|
+
lines = 0
|
84
|
+
File.open(filename).each do |line|
|
85
|
+
fields=line.chomp.split(/\t/)
|
86
|
+
if lines > 0
|
87
|
+
puts "Processing line: #{line}" if($debug)
|
88
|
+
yield fields
|
89
|
+
else
|
90
|
+
if fields.length<2
|
91
|
+
puts "Too few headers, rejecting #{file}"
|
92
|
+
break
|
93
|
+
end
|
94
|
+
@headers ||= fields
|
95
|
+
end
|
96
|
+
lines += 1
|
97
|
+
end
|
98
|
+
@export_headers ||= @headers
|
99
|
+
end
|
100
|
+
def add(fields)
|
101
|
+
start_time = Time.new
|
102
|
+
line = create_line(fields)
|
103
|
+
if line
|
104
|
+
@day_map[line.day] ||= 0
|
105
|
+
@day_map[line.day] += 1
|
106
|
+
@lines << line unless($low_memory)
|
107
|
+
@count += 1
|
108
|
+
@record_creation_duration += Time.new - start_time
|
109
|
+
end
|
110
|
+
line
|
111
|
+
end
|
112
|
+
def create_line(fields)
|
113
|
+
begin
|
114
|
+
line = CSVRecord.new(fields,0)
|
115
|
+
if(line.within($time_range))
|
116
|
+
line
|
117
|
+
else
|
118
|
+
nil
|
119
|
+
end
|
120
|
+
rescue ArgumentError
|
121
|
+
puts "Failed to parse line with timestamp='#{fields[0]}': #{$!}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
def header_map(eh=nil)
|
125
|
+
if eh
|
126
|
+
@export_headers = eh
|
127
|
+
@header_map = nil
|
128
|
+
end
|
129
|
+
unless @header_map
|
130
|
+
@header_map = []
|
131
|
+
(@export_headers || @headers).each do |head|
|
132
|
+
@header_map << @headers.index(head)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
@header_map
|
136
|
+
end
|
137
|
+
def map_line(line)
|
138
|
+
@header_map.map do |index|
|
139
|
+
index && line[index]
|
140
|
+
end
|
141
|
+
end
|
142
|
+
def days
|
143
|
+
@day_map.keys.sort
|
144
|
+
end
|
145
|
+
def each(eh=nil)
|
146
|
+
header_map(eh)
|
147
|
+
if $low_memory
|
148
|
+
read_file do |fields|
|
149
|
+
line = create_line fields
|
150
|
+
yield line.day,map_line(line)
|
151
|
+
end
|
65
152
|
else
|
66
|
-
|
67
|
-
|
68
|
-
puts "Too few headers, rejecting #{file}"
|
69
|
-
break
|
153
|
+
(@lines || []).each do |line|
|
154
|
+
yield line.day,map_line(line)
|
70
155
|
end
|
71
|
-
$stats_managers[name].set_headers(headers)
|
72
156
|
end
|
73
157
|
end
|
158
|
+
def <=>(other)
|
159
|
+
self.filename <=> other.filename
|
160
|
+
end
|
161
|
+
def length
|
162
|
+
count
|
163
|
+
end
|
74
164
|
end
|
75
165
|
|
166
|
+
class CSVDatasets
|
167
|
+
attr_reader :datasets
|
168
|
+
def initialize
|
169
|
+
@datasets = []
|
170
|
+
end
|
171
|
+
def add_file(file)
|
172
|
+
lines = 0
|
173
|
+
dataset = nil
|
174
|
+
filename = File.basename(file)
|
175
|
+
(names = filename.split(/[_\.]/)).pop
|
176
|
+
name = names.join('_')
|
177
|
+
puts "About to read file #{file}"
|
178
|
+
dataset = CSVDataset.new(file)
|
179
|
+
@datasets << dataset if(dataset && dataset.length>0)
|
180
|
+
dataset
|
181
|
+
end
|
182
|
+
def export_days
|
183
|
+
headers = @datasets.map{|d| d.headers}.flatten.uniq
|
184
|
+
days = @datasets.map{|d| d.days}.flatten.sort.uniq
|
185
|
+
day_files = {}
|
186
|
+
day_names = {}
|
187
|
+
count = {}
|
188
|
+
duration = {}
|
189
|
+
days.each do |day|
|
190
|
+
filename = "#{$export_dir}/#{$export_name.gsub(/\.csv$/,'')}_#{day}.csv"
|
191
|
+
puts "Exporting #{filename} for #{day}"
|
192
|
+
day_names[day] = filename
|
193
|
+
day_files[day] = File.open(filename,'w')
|
194
|
+
day_files[day].puts headers.join("\t")
|
195
|
+
count[day] = 0
|
196
|
+
duration[day] = 0
|
197
|
+
end
|
198
|
+
@datasets.sort.each do |dataset|
|
199
|
+
dataset.each(headers) do |day,line|
|
200
|
+
start_time = Time.new
|
201
|
+
day_files[day].puts line.join("\t")
|
202
|
+
duration[day] += Time.new - start_time
|
203
|
+
count[day] += 1
|
204
|
+
end
|
205
|
+
end
|
206
|
+
day_files.each do |day,out|
|
207
|
+
out.close
|
208
|
+
puts "\tExported #{count[day]} records to #{day_names[day]} in #{duration[day]} seconds"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
def export_merged
|
212
|
+
headers = @datasets.map{|d| d.headers}.flatten.sort.uniq
|
213
|
+
filename = "#{$export_dir}/#{$export_name}"
|
214
|
+
File.open(filename,'w') do |out|
|
215
|
+
out.puts headers.join("\t")
|
216
|
+
@datasets.sort.each(headers) do |dataset|
|
217
|
+
dataset.each do |day,line|
|
218
|
+
out.puts line.join("\t")
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
$datasets = CSVDatasets.new
|
226
|
+
|
227
|
+
$files.each do |file|
|
228
|
+
start_time = Time.new
|
229
|
+
ds = $datasets.add_file(file)
|
230
|
+
duration = Time.new - start_time
|
231
|
+
puts "\tLoaded #{file} in #{duration} seconds"
|
232
|
+
puts "\t#{(100.0 * ds.record_creation_duration.to_f/duration.to_f).to_i}% = #{ds.record_creation_duration}/#{duration} was spent creating records"
|
233
|
+
puts "\tFile contained #{ds.length} events for #{ds.days.length} days:"
|
234
|
+
ds.days.each do |day|
|
235
|
+
puts "\t\t#{day}: \t#{(100.0 * ds.day_map[day].to_f/ds.length.to_f).to_i}%\t#{ds.day_map[day]} records"
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
start_time = Time.new
|
240
|
+
|
241
|
+
if $time_split
|
242
|
+
$datasets.export_days
|
243
|
+
else
|
244
|
+
$datasets.export_merged
|
245
|
+
end
|
246
|
+
|
247
|
+
duration = Time.new - start_time
|
248
|
+
puts "Exported in #{duration} seconds"
|
249
|
+
|
data/examples/show_geoptima.rb
CHANGED
@@ -6,8 +6,9 @@ $: << '../lib'
|
|
6
6
|
|
7
7
|
require 'date'
|
8
8
|
require 'geoptima'
|
9
|
+
require 'geoptima/options'
|
9
10
|
|
10
|
-
Geoptima::assert_version("0.1.
|
11
|
+
Geoptima::assert_version("0.1.4")
|
11
12
|
|
12
13
|
$debug=false
|
13
14
|
|
@@ -24,29 +25,89 @@ $files = Geoptima::Options.process_args do |option|
|
|
24
25
|
option.m {$map_headers = true}
|
25
26
|
option.a {$combine_all = true}
|
26
27
|
option.l {$more_headers = true}
|
28
|
+
option.P {$export_prefix = ARGV.shift}
|
27
29
|
option.E {$event_names += ARGV.shift.split(/[\,\;\:\.]+/)}
|
28
|
-
option.T
|
29
|
-
$time_range = Geoptima::DateRange.from ARGV.shift
|
30
|
-
end
|
30
|
+
option.T {$time_range = Geoptima::DateRange.from ARGV.shift}
|
31
31
|
option.L {$print_limit = [1,ARGV.shift.to_i].max}
|
32
|
-
|
33
|
-
option.t {$time_split = true}
|
34
|
-
option.D {$export_dir = ARGV.shift}
|
35
|
-
option.N {$merged_name = ARGV.shift}
|
36
|
-
option.S {$specfile = ARGV.shift}
|
37
|
-
option.P {$diversity = ARGV.shift.to_f}
|
38
|
-
option.W {$chart_width = ARGV.shift.to_i}
|
39
|
-
option.T do
|
40
|
-
$time_range = Geoptima::DateRange.from ARGV.shift
|
41
|
-
end
|
32
|
+
option.M {$mapfile = ARGV.shift}
|
42
33
|
end.map do |file|
|
43
34
|
File.exist?(file) ? file : puts("No such file: #{file}")
|
44
35
|
end.compact
|
45
36
|
|
37
|
+
class HeaderMap
|
38
|
+
attr_reader :prefix, :name, :event
|
39
|
+
attr_accessor :columns
|
40
|
+
def initialize(prefix,name,event)
|
41
|
+
@prefix = prefix
|
42
|
+
@name = name
|
43
|
+
@event = event
|
44
|
+
@columns = []
|
45
|
+
end
|
46
|
+
def mk_known(header)
|
47
|
+
puts "Creating column mappings for headers: #{header}" if($debug)
|
48
|
+
@col_indices = {}
|
49
|
+
columns.each do |col|
|
50
|
+
c = (col[1] && col[1].gsub(/\?/,'')).to_s
|
51
|
+
if c.length>0
|
52
|
+
@col_indices[c] = header.index(c)
|
53
|
+
puts "\tMade column mapping: #{c} --> #{header.index(c)}" if($debug)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
def map_fields(header,fields)
|
58
|
+
@scenario_counter ||= 0
|
59
|
+
mk_known(header) unless @col_indices
|
60
|
+
@columns.map do |column|
|
61
|
+
if column[1] =~ /SCENARIO_COUNTER/
|
62
|
+
@scenario_counter += 1
|
63
|
+
else
|
64
|
+
index = @col_indices[column[1]]
|
65
|
+
puts "Found mapping #{column} -> #{index} -> #{index && fields[index]}" if($debug)
|
66
|
+
index && fields[index]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
if $mapfile
|
73
|
+
$header_maps = []
|
74
|
+
current_map = nil
|
75
|
+
prefix = $mapfile.split(/\./)[0]
|
76
|
+
File.open($mapfile).each do |line|
|
77
|
+
line.chomp!
|
78
|
+
next if line =~ /^\s*#/
|
79
|
+
next if line.length < 2
|
80
|
+
if line =~ /^\[(\w+)\]\t(\w+)/
|
81
|
+
current_map = HeaderMap.new(prefix,$1,$2)
|
82
|
+
$header_maps << current_map
|
83
|
+
elsif current_map
|
84
|
+
current_map.columns << line.chomp.split(/\t/)[0..1]
|
85
|
+
else
|
86
|
+
puts "Invalid header map line: #{line}"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def show_header_maps
|
92
|
+
if $header_maps
|
93
|
+
puts "Using #{$header_maps.length} header maps:"
|
94
|
+
$header_maps.each do |hm|
|
95
|
+
puts "\t[#{hm.name}] (#{hm.event})"
|
96
|
+
if $debug
|
97
|
+
hm.columns.each do |hc|
|
98
|
+
puts "\t\t#{hc.map{|c| (c+' '*30)[0..30]}.join("\t-->\t")}"
|
99
|
+
end
|
100
|
+
else
|
101
|
+
puts "\t\t#{hm.columns.map{|hc| hc[0]}.join(', ')}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
46
107
|
$help = true if($files.length < 1)
|
47
108
|
if $help
|
48
109
|
puts <<EOHELP
|
49
|
-
Usage: show_geoptima <-dpvxomlsah> <-L limit> <-E types> <-T min,max> file <files>
|
110
|
+
Usage: show_geoptima <-dpvxomlsah> <-L limit> <-E types> <-T min,max> <-M mapfile> file <files>
|
50
111
|
-d debug mode (output more context during processing) #{cw $debug}
|
51
112
|
-p print mode (print out final results to console) #{cw $print}
|
52
113
|
-v verbose mode (output extra information to console) #{cw $verbose}
|
@@ -57,14 +118,19 @@ Usage: show_geoptima <-dpvxomlsah> <-L limit> <-E types> <-T min,max> file <file
|
|
57
118
|
-s seperate the export files by event type #{cw $seperate}
|
58
119
|
-a combine all IMEI's into a single dataset #{cw $combine_all}
|
59
120
|
-h show this help
|
121
|
+
-P prefix for exported files (default: ''; current: #{$export_prefix})
|
60
122
|
-E comma-seperated list of event types to show and export (default: all; current: #{$event_names.join(',')})
|
61
123
|
-T time range to limit results to (default: all; current: #{$time_range})
|
62
124
|
-L limit verbose output to specific number of lines #{cw $print_limit}
|
125
|
+
-M mapfile of normal->altered header names: #{$mapfile}
|
63
126
|
EOHELP
|
127
|
+
show_header_maps
|
64
128
|
exit 0
|
65
129
|
end
|
66
130
|
|
67
131
|
$verbose = $verbose || $debug
|
132
|
+
show_header_maps if($verbose)
|
133
|
+
|
68
134
|
$datasets = Geoptima::Dataset.make_datasets($files, :locate => true, :time_range => $time_range, :combine_all => $combine_all)
|
69
135
|
|
70
136
|
class Export
|
@@ -74,13 +140,18 @@ class Export
|
|
74
140
|
@imei = imei
|
75
141
|
@names = names
|
76
142
|
if $export
|
77
|
-
if $
|
143
|
+
if $header_maps
|
144
|
+
@files = $header_maps.inject({}) do |a,hm|
|
145
|
+
a[hm.event] = File.open("#{$export_prefix}#{imei}_#{hm.prefix}_#{hm.name}.csv",'w')
|
146
|
+
a
|
147
|
+
end
|
148
|
+
elsif $seperate
|
78
149
|
@files = names.inject({}) do |a,name|
|
79
|
-
a[name] = File.open("#{imei}_#{name}.csv",'w')
|
150
|
+
a[name] = File.open("#{$export_prefix}#{imei}_#{name}.csv",'w')
|
80
151
|
a
|
81
152
|
end
|
82
153
|
else
|
83
|
-
@files={nil => File.open("#{imei}.csv",'w')}
|
154
|
+
@files={nil => File.open("#{$export_prefix}#{imei}.csv",'w')}
|
84
155
|
end
|
85
156
|
end
|
86
157
|
@headers = names.inject({}) do |a,name|
|
@@ -91,7 +162,11 @@ class Export
|
|
91
162
|
end
|
92
163
|
@headers[nil] = @headers.values.flatten.sort
|
93
164
|
files && files.each do |key,file|
|
94
|
-
|
165
|
+
if $header_maps
|
166
|
+
file.puts $header_maps.find{|hm| hm.event == key}.columns.map{|c| c[0]}.join("\t")
|
167
|
+
else
|
168
|
+
file.puts map_headers(base_headers+more_headers+header(key)).join("\t")
|
169
|
+
end
|
95
170
|
end
|
96
171
|
if $debug || $verbose
|
97
172
|
@headers.each do |name,head|
|
@@ -108,7 +183,7 @@ class Export
|
|
108
183
|
end
|
109
184
|
def more_headers
|
110
185
|
$more_headers ?
|
111
|
-
['IMSI','MSISDN','MCC','MNC','LAC','CI','LAC-CI','RSSI','Platform','Model','OS','Operator'] :
|
186
|
+
['IMSI','MSISDN','MCC','MNC','LAC','CI','LAC-CI','RSSI','Platform','Model','OS','Operator','Battery'] :
|
112
187
|
[]
|
113
188
|
end
|
114
189
|
def base_fields(event)
|
@@ -127,15 +202,17 @@ class Export
|
|
127
202
|
when 'LAC-CI'
|
128
203
|
"#{dataset.recent(event,'service.lac')}-#{dataset.recent(event,'service.cell_id')}"
|
129
204
|
when 'MCC'
|
130
|
-
|
205
|
+
event.file[h] || dataset.recent(event,'service.mcc')
|
131
206
|
when 'MNC'
|
132
|
-
|
207
|
+
event.file[h] || dataset.recent(event,'service.mnc')
|
208
|
+
when 'Battery'
|
209
|
+
dataset.recent(event,'batteryState.state',600)
|
133
210
|
when 'Operator'
|
134
|
-
|
211
|
+
event.file['carrierName']
|
135
212
|
when 'IMSI'
|
136
|
-
|
213
|
+
event.file['imsi']
|
137
214
|
else
|
138
|
-
|
215
|
+
event.file[h]
|
139
216
|
end
|
140
217
|
end
|
141
218
|
end
|
@@ -164,10 +241,10 @@ class Export
|
|
164
241
|
end || hnames
|
165
242
|
end
|
166
243
|
def export_stats(stats)
|
167
|
-
File.open("#{imei}_stats.csv",'w') do |out|
|
244
|
+
File.open("#{$export_prefix}#{imei}_stats.csv",'w') do |out|
|
168
245
|
stats.keys.sort.each do |header|
|
169
246
|
out.puts header
|
170
|
-
values = stats[header].keys.sort
|
247
|
+
values = stats[header].keys.sort{|a,b| b.to_s<=>a.to_s}
|
171
248
|
out.puts values.join("\t")
|
172
249
|
out.puts values.map{|v| stats[header][v]}.join("\t")
|
173
250
|
out.puts
|
@@ -178,7 +255,7 @@ class Export
|
|
178
255
|
@headers[name]
|
179
256
|
end
|
180
257
|
def puts_to(line,name)
|
181
|
-
name = nil unless($seperate)
|
258
|
+
name = nil unless($seperate || $header_maps)
|
182
259
|
files[name].puts(line) if($export && files[name])
|
183
260
|
end
|
184
261
|
def puts_to_all(line)
|
@@ -222,13 +299,29 @@ $datasets.keys.sort.each do |imei|
|
|
222
299
|
names = dataset.events_names if(names.length<1)
|
223
300
|
export = Export.new(imei,names,dataset)
|
224
301
|
export.export_stats(dataset.stats) if($export_stats)
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
302
|
+
if $header_maps && $header_maps.length > 0
|
303
|
+
$header_maps.each do |hm|
|
304
|
+
puts "Searching for events for header_maps '#{hm.event}'"
|
305
|
+
events.each do |event|
|
306
|
+
if event.name == hm.event
|
307
|
+
header = export.header(event.name)
|
308
|
+
fields = header.map{|h| event[h]}
|
309
|
+
b_header = export.base_headers + export.more_headers
|
310
|
+
b_fields = export.base_fields(event) + export.more_fields(event,dataset)
|
311
|
+
all_fields = hm.map_fields(b_header + header, b_fields + fields)
|
312
|
+
export.puts_to all_fields.join("\t"), event.name
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
316
|
+
else
|
317
|
+
events.each do |event|
|
318
|
+
names.each do |name|
|
319
|
+
if event.name === name
|
320
|
+
fields = export.header($seperate ? name : nil).map{|h| event[h]}
|
321
|
+
b_fields = export.base_fields(event) + export.more_fields(event,dataset)
|
322
|
+
export.puts_to "#{b_fields.join("\t")}\t#{fields.join("\t")}", name
|
323
|
+
if_le{puts "#{b_fields.join("\t")}\t#{event.fields.inspect}"}
|
324
|
+
end
|
232
325
|
end
|
233
326
|
end
|
234
327
|
end
|
data/geoptima.gemspec
CHANGED
@@ -25,7 +25,7 @@ EOF
|
|
25
25
|
s.files = Dir.glob("{bin,lib,rdoc}/**/*").reject{|x| x=~/(tmp|target|test-data)/ || x=~/~$/} +
|
26
26
|
Dir.glob("examples/*rb") + Dir.glob("examples/sample*json") +
|
27
27
|
%w(README.rdoc CHANGELOG CONTRIBUTORS Gemfile geoptima.gemspec)
|
28
|
-
s.executables = ['show_geoptima','geoptima_file_time','csv_chart','csv_stats']
|
28
|
+
s.executables = ['show_geoptima','geoptima_file_time','csv_chart','csv_stats','csv_merge']
|
29
29
|
|
30
30
|
s.extra_rdoc_files = %w( README.rdoc )
|
31
31
|
s.rdoc_options = ["--quiet", "--title", "Geoptima.rb", "--line-numbers", "--main", "README.rdoc", "--inline-source"]
|
data/lib/geoptima/data.rb
CHANGED
@@ -55,7 +55,7 @@ module Geoptima
|
|
55
55
|
@header = header
|
56
56
|
@data = data
|
57
57
|
@fields = @header.inject({}) do |a,v|
|
58
|
-
a[v] = @data[a.length]
|
58
|
+
a[v] = check_field(@data[a.length])
|
59
59
|
a
|
60
60
|
end
|
61
61
|
@time = start + (@fields['timeoffset'].to_f / MSPERDAY.to_f)
|
@@ -65,6 +65,9 @@ module Geoptima
|
|
65
65
|
end
|
66
66
|
puts "Created Event: #{self}" if($debug)
|
67
67
|
end
|
68
|
+
def check_field(field)
|
69
|
+
(field && field.respond_to?('length') && field =~ /\d\,\d/) ? field.gsub(/\,/,'.').to_f : field
|
70
|
+
end
|
68
71
|
def utc
|
69
72
|
time.new_offset(0)
|
70
73
|
end
|
@@ -135,7 +138,7 @@ module Geoptima
|
|
135
138
|
@start ||= subscriber['start'] && DateTime.parse(subscriber['start'].gsub(/Asia\/Bangkok/,'GMT+7'))#.gsub(/Mar 17 2044/,'Feb 14 2012'))
|
136
139
|
end
|
137
140
|
def valid?
|
138
|
-
start && start
|
141
|
+
start && start >= (Data.min_start-1) && start < Data.max_start
|
139
142
|
end
|
140
143
|
def self.min_start
|
141
144
|
@@min_start ||= MIN_DATETIME
|
@@ -180,30 +183,28 @@ module Geoptima
|
|
180
183
|
unless header
|
181
184
|
puts "No header found for '#{event_type}', trying known Geoptima headers"
|
182
185
|
header = Event::KNOWN_HEADERS[event_type]
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
else
|
192
|
-
puts "No data found for event type '#{event_type}'"
|
193
|
-
header = nil
|
194
|
-
end
|
186
|
+
puts "Found known header '#{event_type}' => #{header.inspect}" if(header)
|
187
|
+
end
|
188
|
+
# Double-check the header length matches a multiple of the data length
|
189
|
+
if header
|
190
|
+
mismatch = events.length % header.length
|
191
|
+
if mismatch != 0
|
192
|
+
puts "'#{event_type}' header length #{header.length} incompatible with data length #{events.length}"
|
193
|
+
header = nil
|
195
194
|
end
|
195
|
+
else
|
196
|
+
puts "No header found for event type: #{event_type}"
|
196
197
|
end
|
197
198
|
# Now process the single long data array into a list of events with timestamps
|
198
199
|
if header
|
199
200
|
events_data[event_type] = (0...data[event_type].to_i).inject([]) do |a,block|
|
200
201
|
index = header.length * block
|
201
|
-
|
202
|
-
if
|
202
|
+
record = events[index...(index+header.length)]
|
203
|
+
if record && record.length == header.length
|
203
204
|
@count += 1
|
204
|
-
a << Event.new(self,start,event_type,header,
|
205
|
+
a << Event.new(self,start,event_type,header,record)
|
205
206
|
else
|
206
|
-
puts "Invalid '#{event_type}' data block #{block}: #{
|
207
|
+
puts "Invalid '#{event_type}' data block #{block}: #{record.inspect}"
|
207
208
|
break a
|
208
209
|
end
|
209
210
|
end
|
@@ -214,8 +215,6 @@ module Geoptima
|
|
214
215
|
puts "\t#{d.data.join("\t")}"
|
215
216
|
end
|
216
217
|
end
|
217
|
-
else
|
218
|
-
puts "No header found for event type: #{event_type}"
|
219
218
|
end
|
220
219
|
end
|
221
220
|
find_first_and_last(events_data)
|
@@ -227,8 +226,8 @@ module Geoptima
|
|
227
226
|
events_data.each do |event_type,data|
|
228
227
|
@first ||= data[0]
|
229
228
|
@last ||= data[-1]
|
230
|
-
@first = data[0] if(@first.time > data[0].time)
|
231
|
-
@last = data[-1] if(@last.time < data[-1].time)
|
229
|
+
@first = data[0] if(@first && @first.time > data[0].time)
|
230
|
+
@last = data[-1] if(@last && @last.time < data[-1].time)
|
232
231
|
end
|
233
232
|
if $debug
|
234
233
|
puts "For data: #{self}"
|
@@ -276,24 +275,28 @@ module Geoptima
|
|
276
275
|
end.compact.uniq
|
277
276
|
end
|
278
277
|
|
279
|
-
def recent(event,key)
|
278
|
+
def recent(event,key,seconds=60)
|
280
279
|
unless event[key]
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
280
|
+
if imei = event.file.imei
|
281
|
+
puts "Searching for recent values for '#{key}' starting at event #{event}" if($debug)
|
282
|
+
ev,prop=key.split(/\./)
|
283
|
+
ar=sorted
|
284
|
+
puts "\tSearching through #{ar && ar.length} events for event type #{ev} and property #{prop}" if($debug)
|
285
|
+
if i=ar.index(event)
|
286
|
+
afe = while(i>0)
|
287
|
+
fe = ar[i-=1]
|
288
|
+
puts "\t\tTesting event[#{i}]: #{fe}" if($debug)
|
289
|
+
break(fe) if(fe.nil? || (event.time - fe.time) * SPERDAY > seconds || (fe.name == ev && fe.file.imei == imei))
|
290
|
+
end
|
291
|
+
if afe && afe.name == ev
|
292
|
+
puts "\t\tFound event[#{i}] with #{prop} => #{afe[prop]} and time gap of #{(event.time - fe.time) * SPERDAY} seconds" if($debug)
|
293
|
+
event[key] = afe[prop]
|
294
|
+
end
|
295
|
+
else
|
296
|
+
puts "Event not found in search for recent '#{key}': #{event}"
|
294
297
|
end
|
295
298
|
else
|
296
|
-
puts "
|
299
|
+
puts "Not searching for correlated data without imei: #{event}"
|
297
300
|
end
|
298
301
|
end
|
299
302
|
# @recent[key] ||= ''
|