event_meter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +1081 -0
- data/exe/event_meter +5 -0
- data/lib/event_meter/auto_cleanup.rb +93 -0
- data/lib/event_meter/cli.rb +124 -0
- data/lib/event_meter/configuration.rb +244 -0
- data/lib/event_meter/errors.rb +9 -0
- data/lib/event_meter/event.rb +180 -0
- data/lib/event_meter/event_payload.rb +103 -0
- data/lib/event_meter/hash_input.rb +20 -0
- data/lib/event_meter/index_key.rb +19 -0
- data/lib/event_meter/keys.rb +63 -0
- data/lib/event_meter/path_name.rb +37 -0
- data/lib/event_meter/processor.rb +305 -0
- data/lib/event_meter/rails.rb +79 -0
- data/lib/event_meter/report_definition.rb +184 -0
- data/lib/event_meter/reports.rb +143 -0
- data/lib/event_meter/rollup.rb +148 -0
- data/lib/event_meter/stores/cleanup_helpers.rb +76 -0
- data/lib/event_meter/stores/file_helpers.rb +47 -0
- data/lib/event_meter/stores/lock_refresher.rb +75 -0
- data/lib/event_meter/stores/namespace.rb +14 -0
- data/lib/event_meter/stores/redis_lock.rb +77 -0
- data/lib/event_meter/stores/rollup/active_record_postgres.rb +135 -0
- data/lib/event_meter/stores/rollup/file.rb +736 -0
- data/lib/event_meter/stores/rollup/postgres.rb +813 -0
- data/lib/event_meter/stores/rollup/redis.rb +349 -0
- data/lib/event_meter/stores/stream/file.rb +98 -0
- data/lib/event_meter/stores/stream/redis.rb +79 -0
- data/lib/event_meter/time_buckets.rb +56 -0
- data/lib/event_meter/version.rb +3 -0
- data/lib/event_meter/write_result.rb +26 -0
- data/lib/event_meter.rb +150 -0
- data/lib/generators/event_meter/install_generator.rb +57 -0
- data/lib/generators/event_meter/templates/create_event_meter_tables.rb.erb +12 -0
- data/lib/generators/event_meter/templates/event_meter.rb.erb +12 -0
- metadata +156 -0
|
@@ -0,0 +1,736 @@
|
|
|
1
|
+
require "digest"
|
|
2
|
+
require "fileutils"
|
|
3
|
+
require "json"
|
|
4
|
+
require "time"
|
|
5
|
+
|
|
6
|
+
require_relative "../../rollup"
|
|
7
|
+
require_relative "../cleanup_helpers"
|
|
8
|
+
require_relative "../file_helpers"
|
|
9
|
+
require_relative "../namespace"
|
|
10
|
+
|
|
11
|
+
module EventMeter
|
|
12
|
+
module Stores
|
|
13
|
+
module Rollup
|
|
14
|
+
class File
|
|
15
|
+
include CleanupHelpers
|
|
16
|
+
include FileHelpers
|
|
17
|
+
include Namespace
|
|
18
|
+
|
|
19
|
+
APPLIED_KEY = "_applied"
|
|
20
|
+
BATCHES_KEY = "batches"
|
|
21
|
+
PROCESSED_IDS_KEY = "processed_ids"
|
|
22
|
+
STREAM_FILE_KEY = "stream_file"
|
|
23
|
+
|
|
24
|
+
attr_reader :path, :namespace, :report_name, :version
|
|
25
|
+
|
|
26
|
+
def initialize(path:, namespace: nil, report_name: nil, version: nil)
|
|
27
|
+
@path = normalize_file_store_path(path)
|
|
28
|
+
@namespace = normalize_namespace(namespace) if namespace
|
|
29
|
+
@report_name = report_name
|
|
30
|
+
@version = version
|
|
31
|
+
|
|
32
|
+
FileUtils.mkdir_p(rollup_path) if scoped?
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def for_namespace(namespace)
|
|
36
|
+
namespace = normalize_namespace(namespace)
|
|
37
|
+
return self if self.namespace == namespace
|
|
38
|
+
|
|
39
|
+
if self.namespace
|
|
40
|
+
raise ConfigurationError, "file rollup storage namespace #{self.namespace.inspect} does not match #{namespace.inspect}"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
@namespace = namespace
|
|
44
|
+
FileUtils.mkdir_p(rollup_path) if scoped?
|
|
45
|
+
self
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def for_report(name:, version:)
|
|
49
|
+
self.class.new(
|
|
50
|
+
path: path,
|
|
51
|
+
namespace: namespace,
|
|
52
|
+
report_name: name.to_s,
|
|
53
|
+
version: version
|
|
54
|
+
)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def ensure_definition(definition)
|
|
58
|
+
update_json_file(definition_path, {}) do |stored|
|
|
59
|
+
if stored.empty?
|
|
60
|
+
stored.merge!(definition.to_h)
|
|
61
|
+
else
|
|
62
|
+
ensure_same_definition!(stored, definition)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def report_definition(name:, version:)
|
|
68
|
+
return nil unless scoped_for?(name, version)
|
|
69
|
+
|
|
70
|
+
hash_value(read_json_file(definition_path))
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def processed_ids(ids)
|
|
74
|
+
ensure_scoped!
|
|
75
|
+
|
|
76
|
+
ids.select do |id|
|
|
77
|
+
processed_sidecar_for(id).processed?(id)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def apply(batch)
|
|
82
|
+
ensure_scoped!
|
|
83
|
+
return if batch.empty?
|
|
84
|
+
|
|
85
|
+
batch_id = transaction_id(batch.entry_ids)
|
|
86
|
+
applied_paths = []
|
|
87
|
+
applied_paths.concat(apply_rollups(batch_id, batch))
|
|
88
|
+
applied_paths.concat(apply_string_updates(batch_id, batch))
|
|
89
|
+
mark_processed_entries(batch, batch_id, applied_paths.uniq)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def forget_processed_ids(ids)
|
|
93
|
+
ensure_scoped!
|
|
94
|
+
|
|
95
|
+
sidecars = ids.map { |id| processed_sidecar_for(id) }.uniq(&:path)
|
|
96
|
+
applied_paths_by_batch = {}
|
|
97
|
+
|
|
98
|
+
sidecars.each do |sidecar|
|
|
99
|
+
sidecar.batch_paths.each do |batch_id, paths|
|
|
100
|
+
applied_paths_by_batch[batch_id] ||= []
|
|
101
|
+
applied_paths_by_batch[batch_id].concat(paths)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
sidecar.delete
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
applied_paths_by_batch.each do |batch_id, relative_paths|
|
|
108
|
+
forget_applied_marker(batch_id, relative_paths.uniq)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def hgetall_many(keys)
|
|
113
|
+
ensure_scoped!
|
|
114
|
+
|
|
115
|
+
keys.map do |key|
|
|
116
|
+
rollup = rollup_key_parts(key)
|
|
117
|
+
next {} unless rollup
|
|
118
|
+
|
|
119
|
+
data = read_json_file(rollup_bucket_path(rollup.fetch(:every), rollup.fetch(:bucket)))
|
|
120
|
+
hash_value(data[rollup.fetch(:index)]).dup
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def keys_matching(pattern, limit: nil)
|
|
125
|
+
ensure_scoped!
|
|
126
|
+
|
|
127
|
+
keys = rollup_keys.select { |key| key_matches?(key, pattern) }.sort
|
|
128
|
+
limit ? keys.first(positive_integer(limit, "limit")) : keys
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def get(key)
|
|
132
|
+
ensure_scoped!
|
|
133
|
+
|
|
134
|
+
read_json_file(shard_path("strings", key))[key]
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def cleanup_watermark(key)
|
|
138
|
+
read_json_file(cleanup_state_path)[key]
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def write_cleanup_watermark(key, value)
|
|
142
|
+
update_json_file(cleanup_state_path, {}) do |data|
|
|
143
|
+
data[key] = value.to_s
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def with_lock(ttl:)
|
|
148
|
+
FileUtils.mkdir_p(::File.dirname(lock_path))
|
|
149
|
+
|
|
150
|
+
::File.open(lock_path, ::File::RDWR | ::File::CREAT, 0o600) do |file|
|
|
151
|
+
return false unless file.flock(::File::LOCK_EX | ::File::LOCK_NB)
|
|
152
|
+
|
|
153
|
+
yield
|
|
154
|
+
true
|
|
155
|
+
ensure
|
|
156
|
+
file&.flock(::File::LOCK_UN)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def cleanup_history(before:, events:, interval_state:)
|
|
161
|
+
ensure_namespace!
|
|
162
|
+
return cleanup_all_report_histories(before: before, events: events, interval_state: interval_state) unless scoped?
|
|
163
|
+
|
|
164
|
+
cleanup_scoped_history(before: before, events: events, interval_state: interval_state)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
private
|
|
168
|
+
|
|
169
|
+
def apply_rollups(batch_id, batch)
|
|
170
|
+
batch.rollups.group_by { |key, _rollup| rollup_bucket_file_for_key(key) }.filter_map do |file, entries|
|
|
171
|
+
next unless file
|
|
172
|
+
|
|
173
|
+
apply_once(file, batch_id) do |data|
|
|
174
|
+
entries.each do |key, rollup|
|
|
175
|
+
parts = rollup_key_parts(key)
|
|
176
|
+
next unless parts
|
|
177
|
+
|
|
178
|
+
index = parts.fetch(:index)
|
|
179
|
+
merged = EventMeter::Rollup.from_hash(hash_value(data[index])).merge!(rollup)
|
|
180
|
+
data[index] = merged.fields.transform_values(&:to_s)
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def apply_string_updates(batch_id, batch)
|
|
187
|
+
batch.state_updates.group_by { |key, _value| shard_path("strings", key) }.map do |file, entries|
|
|
188
|
+
apply_once(file, batch_id) do |data|
|
|
189
|
+
entries.each do |key, value|
|
|
190
|
+
data[key] = [integer_value(data[key]), integer_value(value)].compact.max.to_s
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def apply_once(file, batch_id)
|
|
197
|
+
update_json_file(file, {}) do |data|
|
|
198
|
+
applied = applied_hash(data)
|
|
199
|
+
next if applied.key?(batch_id)
|
|
200
|
+
|
|
201
|
+
yield data
|
|
202
|
+
applied[batch_id] = current_time.iso8601(6)
|
|
203
|
+
data[APPLIED_KEY] = applied
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
relative_path(file)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def mark_processed_entries(batch, batch_id, applied_paths)
|
|
210
|
+
timestamp = current_time.iso8601(6)
|
|
211
|
+
|
|
212
|
+
batch.entry_ids.group_by { |id| processed_sidecar_for(id) }.each do |sidecar, ids|
|
|
213
|
+
sidecar.mark(ids, batch_id: batch_id, applied_paths: applied_paths, timestamp: timestamp)
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def forget_applied_marker(batch_id, relative_paths)
|
|
218
|
+
relative_paths.each do |relative_path|
|
|
219
|
+
file = absolute_rollup_file(relative_path)
|
|
220
|
+
next unless file.start_with?("#{rollup_path}/")
|
|
221
|
+
next unless ::File.exist?(file)
|
|
222
|
+
|
|
223
|
+
update_json_file(file, {}) do |data|
|
|
224
|
+
applied = applied_hash(data)
|
|
225
|
+
applied.delete(batch_id)
|
|
226
|
+
|
|
227
|
+
if applied.empty?
|
|
228
|
+
data.delete(APPLIED_KEY)
|
|
229
|
+
else
|
|
230
|
+
data[APPLIED_KEY] = applied
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def cleanup_scoped_history(before:, events:, interval_state:)
|
|
237
|
+
filter = event_filter(events)
|
|
238
|
+
result = {
|
|
239
|
+
rollup_keys_deleted: cleanup_rollups(before, filter),
|
|
240
|
+
interval_state_keys_deleted: interval_state ? cleanup_interval_state(before, filter) : 0,
|
|
241
|
+
processed_entries_deleted: cleanup_processed_sidecars(before, filter)
|
|
242
|
+
}
|
|
243
|
+
cleanup_old_applied_markers(before)
|
|
244
|
+
|
|
245
|
+
result
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def cleanup_all_report_histories(before:, events:, interval_state:)
|
|
249
|
+
definition_files.each_with_object(empty_cleanup_result) do |definition_file, total|
|
|
250
|
+
definition = read_json_file(definition_file)
|
|
251
|
+
next if definition.empty?
|
|
252
|
+
|
|
253
|
+
result = self.class.new(
|
|
254
|
+
path: path,
|
|
255
|
+
namespace: namespace,
|
|
256
|
+
report_name: definition.fetch("name"),
|
|
257
|
+
version: definition.fetch("version")
|
|
258
|
+
).cleanup_history(before: before, events: events, interval_state: interval_state)
|
|
259
|
+
|
|
260
|
+
merge_cleanup_result(total, result)
|
|
261
|
+
rescue KeyError, ArgumentError, TypeError
|
|
262
|
+
total
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def cleanup_rollups(before, event_filter)
|
|
267
|
+
return 0 if filtered_out?(event_filter)
|
|
268
|
+
|
|
269
|
+
deleted = 0
|
|
270
|
+
|
|
271
|
+
rollup_bucket_files.each do |file|
|
|
272
|
+
next unless rollup_file_old?(file, before)
|
|
273
|
+
|
|
274
|
+
data = read_json_file(file)
|
|
275
|
+
deleted += data.keys.reject { |key| metadata_key?(key) }.length
|
|
276
|
+
FileUtils.rm_f(file)
|
|
277
|
+
FileUtils.rm_f(lock_file_path(file))
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
deleted
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def cleanup_interval_state(before, event_filter)
|
|
284
|
+
before_ms = (before.to_f * 1000).to_i
|
|
285
|
+
deleted = 0
|
|
286
|
+
|
|
287
|
+
shard_files("strings").each do |file|
|
|
288
|
+
update_json_file(file, {}) do |data|
|
|
289
|
+
data.keys.grep(/\A#{Regexp.escape(namespace)}:state:/).each do |key|
|
|
290
|
+
if state_key_old?(key, before_ms, event_filter, data[key])
|
|
291
|
+
data.delete(key)
|
|
292
|
+
deleted += 1
|
|
293
|
+
end
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
deleted
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def cleanup_processed_sidecars(before, event_filter)
|
|
302
|
+
return 0 if filtered_out?(event_filter)
|
|
303
|
+
|
|
304
|
+
sidecar_files.sum do |file|
|
|
305
|
+
sidecar = ProcessedSidecar.new(path: file)
|
|
306
|
+
next 0 unless sidecar.old?(before)
|
|
307
|
+
|
|
308
|
+
count = sidecar.processed_count
|
|
309
|
+
sidecar.delete
|
|
310
|
+
count
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
def cleanup_old_applied_markers(before)
|
|
315
|
+
each_data_file do |file|
|
|
316
|
+
update_json_file(file, {}) do |data|
|
|
317
|
+
applied = applied_hash(data)
|
|
318
|
+
applied.delete_if { |_batch_id, timestamp| processed_entry_old?(timestamp, before) }
|
|
319
|
+
|
|
320
|
+
if applied.empty?
|
|
321
|
+
data.delete(APPLIED_KEY)
|
|
322
|
+
else
|
|
323
|
+
data[APPLIED_KEY] = applied
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
def processed_entry_old?(timestamp, before)
|
|
330
|
+
Time.parse(timestamp).utc < before
|
|
331
|
+
rescue ArgumentError, TypeError, RangeError
|
|
332
|
+
true
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def rollup_keys
|
|
336
|
+
rollup_bucket_files.flat_map do |file|
|
|
337
|
+
parts = rollup_file_parts(file)
|
|
338
|
+
next [] unless parts
|
|
339
|
+
|
|
340
|
+
read_json_file(file).keys.filter_map do |index|
|
|
341
|
+
next if metadata_key?(index)
|
|
342
|
+
|
|
343
|
+
Keys.rollup(
|
|
344
|
+
namespace: namespace,
|
|
345
|
+
name: report_name,
|
|
346
|
+
version: version,
|
|
347
|
+
every: parts.fetch(:every),
|
|
348
|
+
bucket: parts.fetch(:time),
|
|
349
|
+
index: IndexStruct.new(index)
|
|
350
|
+
)
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def rollup_file_old?(file, before)
|
|
356
|
+
parts = rollup_file_parts(file)
|
|
357
|
+
return false unless parts
|
|
358
|
+
|
|
359
|
+
bucket_end_time(parts.fetch(:every).to_s, parts.fetch(:bucket)) <= before
|
|
360
|
+
rescue ArgumentError, TypeError
|
|
361
|
+
false
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def rollup_file_parts(file)
|
|
365
|
+
relative = relative_path(file)
|
|
366
|
+
match = relative.match(%r{\Ahashes/(minute|hour)/(\d+)\.json\z})
|
|
367
|
+
return unless match
|
|
368
|
+
|
|
369
|
+
every = match[1].to_sym
|
|
370
|
+
bucket = match[2]
|
|
371
|
+
|
|
372
|
+
{
|
|
373
|
+
every: every,
|
|
374
|
+
bucket: bucket,
|
|
375
|
+
time: bucket_time(every, bucket)
|
|
376
|
+
}
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
def bucket_time(every, bucket)
|
|
380
|
+
case every.to_s
|
|
381
|
+
when "minute"
|
|
382
|
+
minute_bucket_time(bucket)
|
|
383
|
+
when "hour"
|
|
384
|
+
hour_bucket_time(bucket)
|
|
385
|
+
else
|
|
386
|
+
raise ArgumentError, "unsupported rollup bucket: #{every.inspect}"
|
|
387
|
+
end
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
def rollup_bucket_file_for_key(key)
|
|
391
|
+
parts = rollup_key_parts(key)
|
|
392
|
+
return unless parts
|
|
393
|
+
|
|
394
|
+
rollup_bucket_path(parts.fetch(:every), parts.fetch(:bucket))
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def rollup_key_parts(key)
|
|
398
|
+
prefix = "#{namespace}:rollup:"
|
|
399
|
+
return unless key.start_with?(prefix)
|
|
400
|
+
|
|
401
|
+
event_name, version_key, every, bucket, index = key.delete_prefix(prefix).split(":", 5)
|
|
402
|
+
return unless event_name == Keys.event_name(report_name)
|
|
403
|
+
return unless version_key == Keys.version_key(version)
|
|
404
|
+
return unless %w[minute hour].include?(every)
|
|
405
|
+
return unless bucket && index
|
|
406
|
+
|
|
407
|
+
{
|
|
408
|
+
every: every.to_sym,
|
|
409
|
+
bucket: bucket,
|
|
410
|
+
index: index
|
|
411
|
+
}
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
def update_json_file(file, default)
|
|
415
|
+
FileUtils.mkdir_p(::File.dirname(file))
|
|
416
|
+
|
|
417
|
+
::File.open(lock_file_path(file), ::File::RDWR | ::File::CREAT, 0o600) do |lock|
|
|
418
|
+
lock.flock(::File::LOCK_EX)
|
|
419
|
+
data = read_json_file(file, default)
|
|
420
|
+
yield data
|
|
421
|
+
write_or_remove_json(file, data)
|
|
422
|
+
ensure
|
|
423
|
+
lock&.flock(::File::LOCK_UN)
|
|
424
|
+
end
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def read_json_file(file, default = {})
|
|
428
|
+
return default.dup unless ::File.exist?(file)
|
|
429
|
+
|
|
430
|
+
data = JSON.parse(::File.read(file))
|
|
431
|
+
data.is_a?(Hash) ? data : default.dup
|
|
432
|
+
rescue JSON::ParserError, Errno::ENOENT, SystemCallError, IOError
|
|
433
|
+
default.dup
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def write_or_remove_json(file, data)
|
|
437
|
+
if data.empty?
|
|
438
|
+
FileUtils.rm_f(file)
|
|
439
|
+
else
|
|
440
|
+
atomic_write_json(file, data)
|
|
441
|
+
end
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
def applied_hash(data)
|
|
445
|
+
hash = hash_value(data[APPLIED_KEY])
|
|
446
|
+
data[APPLIED_KEY] = hash
|
|
447
|
+
hash
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
def hash_value(value)
|
|
451
|
+
value.is_a?(Hash) ? value : {}
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
def integer_value(value)
|
|
455
|
+
Integer(value)
|
|
456
|
+
rescue ArgumentError, TypeError, RangeError
|
|
457
|
+
nil
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
def positive_integer(value, name)
|
|
461
|
+
integer = Integer(value)
|
|
462
|
+
return integer if integer.positive?
|
|
463
|
+
|
|
464
|
+
raise ArgumentError, "#{name} must be positive"
|
|
465
|
+
rescue ArgumentError, TypeError, RangeError
|
|
466
|
+
raise ArgumentError, "#{name} must be positive"
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
def transaction_id(entry_ids)
|
|
470
|
+
Digest::SHA256.hexdigest(entry_ids.map(&:to_s).sort.join("\n"))
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
def processed_sidecar_for(entry_id)
|
|
474
|
+
ProcessedSidecar.new(path: processed_sidecar_path(stream_file_name_for(entry_id)))
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
def stream_file_name_for(entry_id)
|
|
478
|
+
value = entry_id.to_s
|
|
479
|
+
separator = value.index(":")
|
|
480
|
+
return value[0...separator] if separator&.positive?
|
|
481
|
+
|
|
482
|
+
"entries-#{Digest::SHA256.hexdigest(value)[0, 16]}"
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
def safe_file_name(value)
|
|
486
|
+
value = value.to_s
|
|
487
|
+
return value if value.match?(/\A[a-zA-Z0-9._-]+\z/)
|
|
488
|
+
|
|
489
|
+
IndexKey.escape(value)
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
def processed_sidecar_path(stream_file_name)
|
|
493
|
+
::File.join(processed_path, "#{safe_file_name(stream_file_name)}.processed.json")
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
def shard_path(section, key)
|
|
497
|
+
::File.join(rollup_path, section, "shards", "#{shard_id(key)}.json")
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
def shard_id(key)
|
|
501
|
+
Digest::SHA256.hexdigest(key.to_s)[0, 2]
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
def rollup_bucket_path(every, bucket)
|
|
505
|
+
::File.join(rollup_path, "hashes", every.to_s, "#{bucket}.json")
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
def rollup_bucket_files
|
|
509
|
+
Dir.glob(::File.join(rollup_path, "hashes", "*", "*.json")).sort
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
def shard_files(section)
|
|
513
|
+
Dir.glob(::File.join(rollup_path, section, "shards", "*.json")).sort
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
def sidecar_files
|
|
517
|
+
Dir.glob(::File.join(processed_path, "*.processed.json")).sort
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
def each_data_file(&block)
|
|
521
|
+
(rollup_bucket_files + shard_files("strings")).each(&block)
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
def definition_files
|
|
525
|
+
Dir.glob(::File.join(path, "rollups", "*", "*", "v*", "definition.json")).sort
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
def filtered_out?(event_filter)
|
|
529
|
+
event_filter && !event_filter.include?(Keys.event_name(report_name))
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
def metadata_key?(key)
|
|
533
|
+
key.to_s.start_with?("_")
|
|
534
|
+
end
|
|
535
|
+
|
|
536
|
+
def current_time
|
|
537
|
+
Time.now.utc
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
def lock_file_path(file)
|
|
541
|
+
"#{file}.lock"
|
|
542
|
+
end
|
|
543
|
+
|
|
544
|
+
def cleanup_state_path
|
|
545
|
+
@cleanup_state_path ||= ::File.join(path, "cleanup.json")
|
|
546
|
+
end
|
|
547
|
+
|
|
548
|
+
def relative_path(file)
|
|
549
|
+
file.delete_prefix("#{rollup_path}/")
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
def absolute_rollup_file(relative_path)
|
|
553
|
+
::File.expand_path(::File.join(rollup_path, relative_path.to_s))
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
def empty_cleanup_result
|
|
557
|
+
{
|
|
558
|
+
rollup_keys_deleted: 0,
|
|
559
|
+
interval_state_keys_deleted: 0,
|
|
560
|
+
processed_entries_deleted: 0
|
|
561
|
+
}
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
def merge_cleanup_result(total, result)
|
|
565
|
+
total.each_key do |key|
|
|
566
|
+
total[key] += result.fetch(key, 0)
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
total
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
def ensure_same_definition!(stored, definition)
|
|
573
|
+
stored_definition = ReportDefinition.from_h(stored)
|
|
574
|
+
return if stored_definition.fingerprint == definition.fingerprint
|
|
575
|
+
|
|
576
|
+
raise DefinitionChangedError, "#{definition.name} v#{definition.version} changed; bump version"
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
def key_matches?(key, pattern)
|
|
580
|
+
prefix = "#{namespace}:"
|
|
581
|
+
return ::File.fnmatch?(pattern, key) unless pattern.start_with?(prefix)
|
|
582
|
+
return false unless key.start_with?(prefix)
|
|
583
|
+
|
|
584
|
+
::File.fnmatch?(pattern.delete_prefix(prefix), key.delete_prefix(prefix))
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
def rollup_path
|
|
588
|
+
@rollup_path ||= ::File.join(
|
|
589
|
+
path,
|
|
590
|
+
"rollups",
|
|
591
|
+
PathName.event(namespace),
|
|
592
|
+
PathName.event(report_name),
|
|
593
|
+
PathName.version(version)
|
|
594
|
+
)
|
|
595
|
+
end
|
|
596
|
+
|
|
597
|
+
def processed_path
|
|
598
|
+
@processed_path ||= ::File.join(rollup_path, "processed")
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
def scoped?
|
|
602
|
+
namespace && report_name && version
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
def scoped_for?(name, version)
|
|
606
|
+
scoped? && report_name == name.to_s && self.version.to_i == version.to_i
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
def ensure_scoped!
|
|
610
|
+
ensure_namespace!
|
|
611
|
+
return if scoped?
|
|
612
|
+
|
|
613
|
+
raise ConfigurationError, "file rollup storage must be scoped with for_report"
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
def ensure_namespace!
|
|
617
|
+
return if namespace
|
|
618
|
+
|
|
619
|
+
raise ConfigurationError, "file rollup storage must be configured with namespace"
|
|
620
|
+
end
|
|
621
|
+
|
|
622
|
+
def definition_path
|
|
623
|
+
@definition_path ||= ::File.join(rollup_path, "definition.json")
|
|
624
|
+
end
|
|
625
|
+
|
|
626
|
+
def lock_path
|
|
627
|
+
@lock_path ||= if scoped?
|
|
628
|
+
::File.join(rollup_path, "process.lock")
|
|
629
|
+
else
|
|
630
|
+
::File.join(path, "process.lock")
|
|
631
|
+
end
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
IndexStruct = Struct.new(:key)
|
|
635
|
+
|
|
636
|
+
class ProcessedSidecar
|
|
637
|
+
include FileHelpers
|
|
638
|
+
|
|
639
|
+
attr_reader :path
|
|
640
|
+
|
|
641
|
+
def initialize(path:)
|
|
642
|
+
@path = path
|
|
643
|
+
end
|
|
644
|
+
|
|
645
|
+
def processed?(entry_id)
|
|
646
|
+
hash_value(read[PROCESSED_IDS_KEY]).key?(entry_id.to_s)
|
|
647
|
+
end
|
|
648
|
+
|
|
649
|
+
def mark(entry_ids, batch_id:, applied_paths:, timestamp:)
|
|
650
|
+
update do |data|
|
|
651
|
+
data[STREAM_FILE_KEY] ||= stream_file
|
|
652
|
+
processed = hash_value(data[PROCESSED_IDS_KEY])
|
|
653
|
+
entry_ids.each { |id| processed[id.to_s] = timestamp }
|
|
654
|
+
data[PROCESSED_IDS_KEY] = processed
|
|
655
|
+
|
|
656
|
+
batches = hash_value(data[BATCHES_KEY])
|
|
657
|
+
batches[batch_id] = {
|
|
658
|
+
"processed_at" => timestamp,
|
|
659
|
+
"applied_paths" => applied_paths
|
|
660
|
+
}
|
|
661
|
+
data[BATCHES_KEY] = batches
|
|
662
|
+
end
|
|
663
|
+
end
|
|
664
|
+
|
|
665
|
+
def batch_paths
|
|
666
|
+
hash_value(read[BATCHES_KEY]).transform_values do |batch|
|
|
667
|
+
Array(hash_value(batch)["applied_paths"]).map(&:to_s)
|
|
668
|
+
end
|
|
669
|
+
end
|
|
670
|
+
|
|
671
|
+
def processed_count
|
|
672
|
+
hash_value(read[PROCESSED_IDS_KEY]).length
|
|
673
|
+
end
|
|
674
|
+
|
|
675
|
+
def old?(before)
|
|
676
|
+
timestamps = hash_value(read[PROCESSED_IDS_KEY]).values
|
|
677
|
+
return false if timestamps.empty?
|
|
678
|
+
|
|
679
|
+
timestamps.all? { |timestamp| processed_entry_old?(timestamp, before) }
|
|
680
|
+
end
|
|
681
|
+
|
|
682
|
+
def delete
|
|
683
|
+
::FileUtils.rm_f(path)
|
|
684
|
+
::FileUtils.rm_f(lock_path)
|
|
685
|
+
end
|
|
686
|
+
|
|
687
|
+
private
|
|
688
|
+
|
|
689
|
+
def update
|
|
690
|
+
::FileUtils.mkdir_p(::File.dirname(path))
|
|
691
|
+
|
|
692
|
+
::File.open(lock_path, ::File::RDWR | ::File::CREAT, 0o600) do |lock|
|
|
693
|
+
lock.flock(::File::LOCK_EX)
|
|
694
|
+
data = read
|
|
695
|
+
yield data
|
|
696
|
+
write(data)
|
|
697
|
+
ensure
|
|
698
|
+
lock&.flock(::File::LOCK_UN)
|
|
699
|
+
end
|
|
700
|
+
end
|
|
701
|
+
|
|
702
|
+
def read
|
|
703
|
+
return {} unless ::File.exist?(path)
|
|
704
|
+
|
|
705
|
+
data = ::JSON.parse(::File.read(path))
|
|
706
|
+
data.is_a?(Hash) ? data : {}
|
|
707
|
+
rescue ::JSON::ParserError, Errno::ENOENT, SystemCallError, IOError
|
|
708
|
+
{}
|
|
709
|
+
end
|
|
710
|
+
|
|
711
|
+
def write(data)
|
|
712
|
+
atomic_write_json(path, data)
|
|
713
|
+
end
|
|
714
|
+
|
|
715
|
+
def hash_value(value)
|
|
716
|
+
value.is_a?(Hash) ? value : {}
|
|
717
|
+
end
|
|
718
|
+
|
|
719
|
+
def stream_file
|
|
720
|
+
::File.basename(path).delete_suffix(".processed.json")
|
|
721
|
+
end
|
|
722
|
+
|
|
723
|
+
def lock_path
|
|
724
|
+
"#{path}.lock"
|
|
725
|
+
end
|
|
726
|
+
|
|
727
|
+
def processed_entry_old?(timestamp, before)
|
|
728
|
+
::Time.parse(timestamp).utc < before
|
|
729
|
+
rescue ArgumentError, TypeError, RangeError
|
|
730
|
+
true
|
|
731
|
+
end
|
|
732
|
+
end
|
|
733
|
+
end
|
|
734
|
+
end
|
|
735
|
+
end
|
|
736
|
+
end
|