embulk-input-mixpanel 0.5.15 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +4 -1
- data/embulk-input-mixpanel.gemspec +1 -1
- data/lib/embulk/input/mixpanel.rb +24 -342
- data/lib/embulk/input/mixpanel_api/client.rb +48 -9
- data/lib/embulk/input/service/base_service.rb +122 -0
- data/lib/embulk/input/service/export_service.rb +284 -0
- data/lib/embulk/input/service/jql_service.rb +276 -0
- data/lib/timezone_validator.rb +1 -1
- data/test/embulk/input/mixpanel_api/test_client.rb +4 -22
- data/test/embulk/input/{test_mixpanel.rb → service/test_export_service.rb} +62 -25
- data/test/embulk/input/service/test_jql_service.rb +745 -0
- data/test/test_range_generator.rb +1 -1
- metadata +9 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8fc75a11eeef6fc6e9831e11a5b3eeae414d7799
|
4
|
+
data.tar.gz: 8176825bc52368dd5abd8eea00f394dec3061d37
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d13fa15449b9900e4179260b6609a536e058de90e96ae691a88ebd757a13b6ddd8972a2715802a8d18557e30157220d1703b933e97a32e2d7766c347fc09c05
|
7
|
+
data.tar.gz: 42c5a4bada8075467e6b1195c7a232f78dc77e71f65a9b573b5e17152af53405a11595f43b4f6456db6f45b54267668a8ed6a6dc6c6db5e59bceb8fb78fec6fb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.6.1 - 2020-04-06
|
2
|
+
|
3
|
+
* [enhancement] Support JQL script for Profile [#66](https://github.com/treasure-data/embulk-input-mixpanel/pull/66)
|
4
|
+
|
5
|
+
## 0.6.0 - 2020-03-30
|
6
|
+
|
7
|
+
* [enhancement] Support JQL script [#65](https://github.com/treasure-data/embulk-input-mixpanel/pull/65)
|
8
|
+
|
1
9
|
## 0.5.15 - 2020-01-22
|
2
10
|
|
3
11
|
* [enhancement] Update the authentication method to latest [#63](https://github.com/treasure-data/embulk-input-mixpanel/pull/63)
|
data/README.md
CHANGED
@@ -33,13 +33,16 @@ To get it, you should log in mixpanel website, and click gear icon at the lower
|
|
33
33
|
|
34
34
|
- **api_secret**: project API Secret (string, required)
|
35
35
|
- **export_endpoint**: the Data Export API's endpoint (string, default to "http://data.mixpanel.com/api/2.0/export")
|
36
|
+
- **jql_endpoint**: the JQL API's endpoint (string, default to "https://mixpanel.com/api/2.0/jql/")
|
37
|
+
- **jql_mode**: using JQL or export endpoint (boolean, default to false)
|
38
|
+
- **jql_script**: JQL script sent the JQL endpoint(string)
|
36
39
|
- **timezone**: project timezone(string, required)
|
37
40
|
- **from_date**: From date to export (string, optional, default: today - 2)
|
38
41
|
- NOTE: Mixpanel API supports to export data from at least 2 days before to at most the previous day.
|
39
42
|
- **fetch_days**: Count of days range for exporting (integer, optional, default: from_date - (today - 1))
|
40
43
|
- NOTE: Mixpanel doesn't support to from_date > today - 2
|
41
44
|
- **incremental**: Run incremental mode nor not (boolean, optional, default: true)
|
42
|
-
- **incremental_column**: Column to be add to where query as a constraint for incremental time. Only data that have incremental_column timestamp > than previous latest_fetched_time will be return (string, optional, default:
|
45
|
+
- **incremental_column**: Column to be add to where query as a constraint for incremental time. Only data that have incremental_column timestamp > than previous latest_fetched_time will be return (string, optional, default: time)
|
43
46
|
- **back_fill_time**: Amount of time that will be subtracted from `from_date` to calculate the final `from_date` that will be use for API Request. This is due to Mixpanel caching data on user devices before sending it to Mixpanel server (integer, optional, default: 5)
|
44
47
|
- NOTE: Only have effect when incremental is true and incremental_column is specified
|
45
48
|
- **incremental_column_upper_limit_delay_in_seconds**: When query with incremental column, plugin will lock the upper limit of incremental column query with the job start time, in order to avoid issue with data that commit when the job is running
|
@@ -1,88 +1,16 @@
|
|
1
|
-
require "
|
2
|
-
require "
|
3
|
-
require "embulk/input/mixpanel_api/client"
|
4
|
-
require "embulk/input/mixpanel_api/exceptions"
|
5
|
-
require "range_generator"
|
6
|
-
require "timezone_validator"
|
7
|
-
require "active_support/core_ext/time"
|
1
|
+
require "embulk/input/service/jql_service"
|
2
|
+
require "embulk/input/service/export_service"
|
8
3
|
|
9
4
|
module Embulk
|
10
5
|
module Input
|
11
6
|
class Mixpanel < InputPlugin
|
12
7
|
Plugin.register_input("mixpanel", self)
|
13
|
-
|
14
|
-
NOT_PROPERTY_COLUMN = "event".freeze
|
15
|
-
|
16
|
-
# https://mixpanel.com/help/questions/articles/special-or-reserved-properties
|
17
|
-
# https://mixpanel.com/help/questions/articles/what-properties-do-mixpanels-libraries-store-by-default
|
18
|
-
#
|
19
|
-
# JavaScript to extract key names from HTML: run it on Chrome Devtool when opening their document
|
20
|
-
# > Array.from(document.querySelectorAll("strong")).map(function(s){ return s.textContent.match(/[A-Z]/) ? s.parentNode.textContent.match(/\((.*?)\)/)[1] : s.textContent.split(",").join(" ") }).join(" ")
|
21
|
-
# > Array.from(document.querySelectorAll("li")).map(function(s){ m = s.textContent.match(/\((.*?)\)/); return m && m[1] }).filter(function(k) { return k && !k.match("utm") }).join(" ")
|
22
|
-
KNOWN_KEYS = %W(
|
23
|
-
#{NOT_PROPERTY_COLUMN}
|
24
|
-
distinct_id ip mp_name_tag mp_note token time mp_country_code length campaign_id $email $phone $distinct_id $ios_devices $android_devices $first_name $last_name $name $city $region $country_code $timezone $unsubscribed
|
25
|
-
$city $region mp_country_code $browser $browser_version $device $current_url $initial_referrer $initial_referring_domain $os $referrer $referring_domain $screen_height $screen_width $search_engine $city $region $mp_country_code $timezone $browser_version $browser $initial_referrer $initial_referring_domain $os $last_seen $city $region mp_country_code $app_release $app_version $carrier $ios_ifa $os_version $manufacturer $lib_version $model $os $screen_height $screen_width $wifi $city $region $mp_country_code $timezone $ios_app_release $ios_app_version $ios_device_model $ios_lib_version $ios_version $ios_ifa $last_seen $city $region mp_country_code $app_version $bluetooth_enabled $bluetooth_version $brand $carrier $has_nfc $has_telephone $lib_version $manufacturer $model $os $os_version $screen_dpi $screen_height $screen_width $wifi $google_play_services $city $region mp_country_code $timezone $android_app_version $android_app_version_code $android_lib_version $android_os $android_os_version $android_brand $android_model $android_manufacturer $last_seen
|
26
|
-
).uniq.freeze
|
27
|
-
|
28
|
-
|
29
|
-
DEFAULT_FETCH_DAYS = 7
|
30
|
-
DEFAULT_TIME_COLUMN = 'time'
|
31
|
-
|
8
|
+
|
32
9
|
def self.transaction(config, &control)
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
fetch_days = config.param(:fetch_days, :integer, default: nil)
|
38
|
-
|
39
|
-
|
40
|
-
fetch_unknown_columns = config.param(:fetch_unknown_columns, :bool, default: false)
|
41
|
-
|
42
|
-
incremental_column = config.param(:incremental_column, :string, default: nil)
|
43
|
-
incremental = config.param(:incremental, :bool, default: true)
|
44
|
-
latest_fetched_time = config.param(:latest_fetched_time, :integer, default: 0)
|
45
|
-
|
46
|
-
# Backfill from date if incremental and an incremental field is set and we are in incremental run
|
47
|
-
if incremental && incremental_column && latest_fetched_time !=0
|
48
|
-
back_fill_days = config.param(:back_fill_days, :integer, default: 5)
|
49
|
-
Embulk.logger.info "Backfill days #{back_fill_days}"
|
50
|
-
from_date = (Date.parse(from_date) - back_fill_days).to_s
|
51
|
-
fetch_days = fetch_days.nil? ? nil : fetch_days + back_fill_days
|
52
|
-
end
|
53
|
-
|
54
|
-
range = RangeGenerator.new(from_date, fetch_days, timezone).generate_range
|
55
|
-
Embulk.logger.info "Try to fetch data from #{range.first} to #{range.last}"
|
56
|
-
job_start_time = Time.now.to_i*1000
|
57
|
-
upper_limit_delay = config.param(:incremental_column_upper_limit_delay_in_seconds, :integer, default: 0)
|
58
|
-
incremental_column_upper_limit = job_start_time - (upper_limit_delay * 1000)
|
59
|
-
task = {
|
60
|
-
params: export_params(config),
|
61
|
-
dates: range,
|
62
|
-
timezone: timezone,
|
63
|
-
export_endpoint: export_endpoint(config),
|
64
|
-
api_secret: config.param(:api_secret, :string),
|
65
|
-
schema: config.param(:columns, :array),
|
66
|
-
fetch_unknown_columns: fetch_unknown_columns,
|
67
|
-
fetch_custom_properties: config.param(:fetch_custom_properties, :bool, default: true),
|
68
|
-
retry_initial_wait_sec: config.param(:retry_initial_wait_sec, :integer, default: 1),
|
69
|
-
incremental_column: incremental_column,
|
70
|
-
retry_limit: config.param(:retry_limit, :integer, default: 5),
|
71
|
-
latest_fetched_time: latest_fetched_time,
|
72
|
-
incremental: incremental,
|
73
|
-
slice_range: config.param(:slice_range, :integer, default: 7),
|
74
|
-
job_start_time: job_start_time,
|
75
|
-
incremental_column_upper_limit: incremental_column_upper_limit,
|
76
|
-
allow_partial_import: config.param(:allow_partial_import,:bool, default: true)
|
77
|
-
}
|
78
|
-
|
79
|
-
if !incremental_column.nil? && !latest_fetched_time.nil? && (incremental_column_upper_limit <= latest_fetched_time)
|
80
|
-
raise Embulk::ConfigError.new("Incremental column upper limit (job_start_time - incremental_column_upper_limit_delay_in_seconds) can't be smaller or equal latest fetched time #{latest_fetched_time}")
|
81
|
-
end
|
82
|
-
|
83
|
-
if task[:fetch_unknown_columns] && task[:fetch_custom_properties]
|
84
|
-
raise Embulk::ConfigError.new("Don't set true both `fetch_unknown_columns` and `fetch_custom_properties`.")
|
85
|
-
end
|
10
|
+
service = service(config)
|
11
|
+
service.validate_config
|
12
|
+
task = service.create_task
|
13
|
+
Embulk.logger.info "Try to fetch data from #{task[:dates].first} to #{task[:dates].last}"
|
86
14
|
|
87
15
|
columns = task[:schema].map do |column|
|
88
16
|
name = column["name"]
|
@@ -91,15 +19,15 @@ module Embulk
|
|
91
19
|
Column.new(nil, name, type, column["format"])
|
92
20
|
end
|
93
21
|
|
94
|
-
if fetch_unknown_columns
|
95
|
-
Embulk.logger.warn "Deprecated `unknown_columns`. Use `fetch_custom_properties` instead."
|
96
|
-
columns << Column.new(nil, "unknown_columns", :json)
|
97
|
-
end
|
98
|
-
|
99
22
|
if task[:fetch_custom_properties]
|
100
23
|
columns << Column.new(nil, "custom_properties", :json)
|
101
24
|
end
|
102
25
|
|
26
|
+
if task[:fetch_unknown_columns]
|
27
|
+
Embulk.logger.warn "Deprecated `unknown_columns`. Use `fetch_custom_properties` instead."
|
28
|
+
columns << Column.new(nil, "unknown_columns", :json)
|
29
|
+
end
|
30
|
+
|
103
31
|
resume(task, columns, 1, &control)
|
104
32
|
end
|
105
33
|
|
@@ -110,283 +38,37 @@ module Embulk
|
|
110
38
|
# implementation is terrible.
|
111
39
|
if task[:incremental]
|
112
40
|
task_report = task_reports.first
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
from_date: next_to_date.to_s,
|
117
|
-
latest_fetched_time: task_report[:latest_fetched_time],
|
118
|
-
}
|
119
|
-
return next_config_diff
|
41
|
+
service = service(task)
|
42
|
+
next_from_date = service.next_from_date(task_report)
|
43
|
+
return next_from_date
|
120
44
|
end
|
121
45
|
return {}
|
122
46
|
end
|
123
47
|
|
124
48
|
def self.guess(config)
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
retry_initial_wait_sec: config.param(:retry_initial_wait_sec, :integer, default: 1),
|
129
|
-
retry_limit: config.param(:retry_limit, :integer, default: 5),
|
130
|
-
})
|
131
|
-
client = MixpanelApi::Client.new(config.param(:api_secret, :string),
|
132
|
-
retryer,
|
133
|
-
export_endpoint(config))
|
134
|
-
|
135
|
-
range = guess_range(config)
|
136
|
-
Embulk.logger.info "Guessing schema using #{range.first}..#{range.last} records"
|
137
|
-
|
138
|
-
params = export_params(config).merge(
|
139
|
-
"from_date" => range.first,
|
140
|
-
"to_date" => range.last,
|
141
|
-
)
|
142
|
-
columns = guess_from_records(client.export_for_small_dataset(params))
|
143
|
-
return {"columns" => columns}
|
144
|
-
end
|
145
|
-
|
146
|
-
def self.perfect_retry(task)
|
147
|
-
PerfectRetry.new do |config|
|
148
|
-
config.limit = task[:retry_limit]
|
149
|
-
config.sleep = proc{|n| task[:retry_initial_wait_sec] * (2 * (n - 1)) }
|
150
|
-
config.dont_rescues = [Embulk::ConfigError,MixpanelApi::IncompleteExportResponseError]
|
151
|
-
config.rescues = [RuntimeError]
|
152
|
-
config.log_level = nil
|
153
|
-
config.logger = Embulk.logger
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
def self.export_endpoint(config)
|
158
|
-
config.param(:export_endpoint, :string, default: Embulk::Input::MixpanelApi::Client::DEFAULT_EXPORT_ENDPOINT)
|
49
|
+
service = service(config)
|
50
|
+
service.validate_config
|
51
|
+
return {"columns"=>service.guess_columns}
|
159
52
|
end
|
160
53
|
|
161
54
|
def init
|
162
|
-
@export_endpoint = task[:export_endpoint]
|
163
55
|
@api_secret = task[:api_secret]
|
164
|
-
@params = task[:params]
|
165
|
-
@timezone = task[:timezone]
|
166
|
-
@schema = task[:schema]
|
167
|
-
@dates = task[:dates]
|
168
|
-
@fetch_unknown_columns = task[:fetch_unknown_columns]
|
169
|
-
@incremental_column = task[:incremental_column]
|
170
|
-
@incremental = task[:incremental]
|
171
56
|
end
|
172
57
|
|
173
58
|
def run
|
174
|
-
|
175
|
-
self.class.giveup_when_mixpanel_is_down(task[:export_endpoint])
|
176
|
-
prev_latest_fetched_time = task[:latest_fetched_time] || 0
|
177
|
-
prev_latest_fetched_time_format = Time.at(prev_latest_fetched_time).strftime("%F %T %z")
|
178
|
-
current_latest_fetched_time = prev_latest_fetched_time
|
179
|
-
@dates.each_slice(task[:slice_range]) do |slice_dates|
|
180
|
-
ignored_fetched_record_count = 0
|
181
|
-
# There is the issue with Mixpanel time field during the transition from standard to daylight saving time
|
182
|
-
# in the US timezone i.e. 11 Mar 2018 2AM - 2:59AM, time within that period must not be existed,
|
183
|
-
# due to daylight saving, time will be forwarded 1 hour from 2AM to 3AM.
|
184
|
-
#
|
185
|
-
# All of records with wrong timezone will be ignored instead of throw exception out
|
186
|
-
ignored_wrong_daylight_tz_record_count = 0
|
187
|
-
unless preview?
|
188
|
-
Embulk.logger.info "Fetching data from #{slice_dates.first} to #{slice_dates.last} ..."
|
189
|
-
end
|
190
|
-
record_time_column=@incremental_column || DEFAULT_TIME_COLUMN
|
191
|
-
begin
|
192
|
-
fetch(slice_dates, prev_latest_fetched_time).each do |record|
|
193
|
-
if @incremental
|
194
|
-
if !record["properties"].include?(record_time_column)
|
195
|
-
raise Embulk::ConfigError.new("Incremental column not exists in fetched data #{record_time_column}")
|
196
|
-
end
|
197
|
-
record_time = record["properties"][record_time_column]
|
198
|
-
if @incremental_column.nil?
|
199
|
-
if record_time <= prev_latest_fetched_time
|
200
|
-
ignored_fetched_record_count += 1
|
201
|
-
next
|
202
|
-
end
|
203
|
-
end
|
204
|
-
|
205
|
-
current_latest_fetched_time= [
|
206
|
-
current_latest_fetched_time,
|
207
|
-
record_time,
|
208
|
-
].max
|
209
|
-
end
|
210
|
-
begin
|
211
|
-
values = extract_values(record)
|
212
|
-
if @fetch_unknown_columns
|
213
|
-
unknown_values = extract_unknown_values(record)
|
214
|
-
values << unknown_values.to_json
|
215
|
-
end
|
216
|
-
if task[:fetch_custom_properties]
|
217
|
-
values << collect_custom_properties(record)
|
218
|
-
end
|
219
|
-
page_builder.add(values)
|
220
|
-
rescue TZInfo::PeriodNotFound
|
221
|
-
ignored_wrong_daylight_tz_record_count += 1
|
222
|
-
end
|
223
|
-
end
|
224
|
-
rescue MixpanelApi::IncompleteExportResponseError
|
225
|
-
if !task[:allow_partial_import]
|
226
|
-
# re raise the exception if we don't allow partial import
|
227
|
-
raise
|
228
|
-
end
|
229
|
-
end
|
230
|
-
if ignored_fetched_record_count > 0
|
231
|
-
Embulk.logger.warn "Skipped already loaded #{ignored_fetched_record_count} records. These record times are older or equal than previous fetched record time (#{prev_latest_fetched_time} @ #{prev_latest_fetched_time_format})."
|
232
|
-
end
|
233
|
-
if ignored_wrong_daylight_tz_record_count > 0
|
234
|
-
Embulk.logger.warn "Skipped #{ignored_wrong_daylight_tz_record_count} records due to corrupted Mixpanel time transition from standard to daylight saving"
|
235
|
-
end
|
236
|
-
break if preview?
|
237
|
-
end
|
238
|
-
page_builder.finish
|
239
|
-
task_report = {
|
240
|
-
latest_fetched_time: current_latest_fetched_time,
|
241
|
-
to_date: @dates.last || today(@timezone) - 1,
|
242
|
-
}
|
243
|
-
task_report
|
59
|
+
Mixpanel::service(DataSource[task.to_a]).ingest(task, page_builder)
|
244
60
|
end
|
245
61
|
|
246
62
|
private
|
247
63
|
|
248
|
-
def self.
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
end
|
253
|
-
|
254
|
-
def extract_values(record)
|
255
|
-
@schema.map do |column|
|
256
|
-
extract_value(record, column["name"])
|
257
|
-
end
|
258
|
-
end
|
259
|
-
|
260
|
-
def extract_value(record, name)
|
261
|
-
case name
|
262
|
-
when NOT_PROPERTY_COLUMN
|
263
|
-
record[NOT_PROPERTY_COLUMN]
|
264
|
-
when "time"
|
265
|
-
time = record["properties"]["time"]
|
266
|
-
adjust_timezone(time)
|
64
|
+
def self.service(config)
|
65
|
+
jql_mode = config[:jql_mode]
|
66
|
+
if jql_mode
|
67
|
+
Service::JqlService.new(config)
|
267
68
|
else
|
268
|
-
|
269
|
-
end
|
270
|
-
end
|
271
|
-
|
272
|
-
def collect_custom_properties(record)
|
273
|
-
specified_columns = @schema.map{|col| col["name"]}
|
274
|
-
custom_keys = record["properties"].keys.find_all{|key| !KNOWN_KEYS.include?(key.to_s) && !specified_columns.include?(key.to_s) }
|
275
|
-
custom_keys.inject({}) do |result, key|
|
276
|
-
result.merge({
|
277
|
-
key => record["properties"][key]
|
278
|
-
})
|
69
|
+
Service::ExportService.new(config)
|
279
70
|
end
|
280
71
|
end
|
281
|
-
|
282
|
-
def extract_unknown_values(record)
|
283
|
-
record_keys = record["properties"].keys + [NOT_PROPERTY_COLUMN]
|
284
|
-
schema_keys = @schema.map {|column| column["name"]}
|
285
|
-
unknown_keys = record_keys - schema_keys
|
286
|
-
|
287
|
-
unless unknown_keys.empty?
|
288
|
-
Embulk.logger.warn("Unknown columns exists in record: #{unknown_keys.join(', ')}")
|
289
|
-
end
|
290
|
-
|
291
|
-
unknown_keys.inject({}) do |result, key|
|
292
|
-
result[key] = extract_value(record, key)
|
293
|
-
result
|
294
|
-
end
|
295
|
-
end
|
296
|
-
|
297
|
-
def fetch(dates, last_fetch_time, &block)
|
298
|
-
from_date = dates.first
|
299
|
-
to_date = dates.last
|
300
|
-
params = @params.merge(
|
301
|
-
"from_date" => from_date,
|
302
|
-
"to_date" => to_date
|
303
|
-
)
|
304
|
-
if !@incremental_column.nil? # can't do filter on time column, time column need to be filter manually.
|
305
|
-
params = params.merge(
|
306
|
-
"where" => "#{params['where'].nil? ? '' : "(#{params['where']}) and " }properties[\"#{@incremental_column}\"] > #{last_fetch_time || 0} and properties[\"#{@incremental_column}\"] < #{task[:incremental_column_upper_limit]}"
|
307
|
-
)
|
308
|
-
end
|
309
|
-
Embulk.logger.info "Where params is #{params["where"]}"
|
310
|
-
client = MixpanelApi::Client.new(@api_secret, self.class.perfect_retry(task), @export_endpoint)
|
311
|
-
|
312
|
-
if preview?
|
313
|
-
client.export_for_small_dataset(params)
|
314
|
-
else
|
315
|
-
Enumerator.new do |y|
|
316
|
-
client.export(params) do |record|
|
317
|
-
y << record
|
318
|
-
end
|
319
|
-
end
|
320
|
-
end
|
321
|
-
end
|
322
|
-
|
323
|
-
def adjust_timezone(epoch)
|
324
|
-
# Adjust timezone offset to get UTC time
|
325
|
-
# c.f. https://mixpanel.com/docs/api-documentation/exporting-raw-data-you-inserted-into-mixpanel#export
|
326
|
-
tz = TZInfo::Timezone.get(@timezone)
|
327
|
-
offset = tz.period_for_local(epoch, true).offset.utc_total_offset
|
328
|
-
epoch - offset
|
329
|
-
end
|
330
|
-
|
331
|
-
def preview?
|
332
|
-
begin
|
333
|
-
org.embulk.spi.Exec.isPreview()
|
334
|
-
rescue java.lang.NullPointerException => e
|
335
|
-
false
|
336
|
-
end
|
337
|
-
end
|
338
|
-
|
339
|
-
def self.export_params(config)
|
340
|
-
event = config.param(:event, :array, default: nil)
|
341
|
-
event = event.nil? ? nil : event.to_json
|
342
|
-
{
|
343
|
-
event: event,
|
344
|
-
where: config.param(:where, :string, default: nil),
|
345
|
-
bucket: config.param(:bucket, :string, default: nil),
|
346
|
-
}
|
347
|
-
end
|
348
|
-
|
349
|
-
def self.default_guess_start_date(timezone)
|
350
|
-
today(timezone) - DEFAULT_FETCH_DAYS - 1
|
351
|
-
end
|
352
|
-
|
353
|
-
def self.guess_range(config)
|
354
|
-
time_zone = config.param(:timezone, :string, default: "")
|
355
|
-
from_date = config.param(:from_date, :string, default: default_guess_start_date(time_zone).to_s)
|
356
|
-
fetch_days = config.param(:fetch_days, :integer, default: DEFAULT_FETCH_DAYS)
|
357
|
-
range = RangeGenerator.new(from_date, fetch_days, time_zone).generate_range
|
358
|
-
if range.empty?
|
359
|
-
return default_guess_start_date(time_zone)..(today(time_zone) - 1)
|
360
|
-
end
|
361
|
-
range
|
362
|
-
end
|
363
|
-
|
364
|
-
def self.guess_from_records(records)
|
365
|
-
sample_props = records.map {|r| r["properties"]}
|
366
|
-
schema = Guess::SchemaGuess.from_hash_records(sample_props)
|
367
|
-
columns = schema.map do |col|
|
368
|
-
next if col.name == "time"
|
369
|
-
result = {
|
370
|
-
name: col.name,
|
371
|
-
type: col.type,
|
372
|
-
}
|
373
|
-
result[:format] = col.format if col.format
|
374
|
-
result
|
375
|
-
end.compact
|
376
|
-
columns.unshift(name: NOT_PROPERTY_COLUMN, type: :string)
|
377
|
-
# Shift incremental column to top
|
378
|
-
columns.unshift(name: "time", type: :long)
|
379
|
-
end
|
380
|
-
|
381
|
-
def self.today(timezone)
|
382
|
-
if timezone.nil?
|
383
|
-
Date.today
|
384
|
-
else
|
385
|
-
zone = ActiveSupport::TimeZone[timezone]
|
386
|
-
zone.nil? ? Date.today : zone.today
|
387
|
-
end
|
388
|
-
end
|
389
|
-
|
390
72
|
end
|
391
73
|
end
|
392
74
|
end
|