purplelight 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +21 -0
- data/README.md +1 -1
- data/lib/purplelight/partitioner.rb +80 -22
- data/lib/purplelight/snapshot.rb +39 -6
- data/lib/purplelight/telemetry.rb +51 -0
- data/lib/purplelight/version.rb +1 -1
- data/lib/purplelight/writer_csv.rb +6 -2
- data/lib/purplelight/writer_jsonl.rb +6 -2
- data/lib/purplelight/writer_parquet.rb +5 -0
- metadata +27 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a650fdd2113129b151396a1a90a83a6f1ede97eb5c34c60e028eb7639d5cc4fd
|
4
|
+
data.tar.gz: 1ab1bc421ddaf1c457639ae2ac5968245c6141b8504897ab762a49bb69c51a69
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 506e52dce7c474998c8bc4b9afa9f5140349e8e2eed2eed7cccbacac0bd9d9f41528b234f1b99ad8407674791471368ee5b99d93b7ab058522311f2642006a20
|
7
|
+
data.tar.gz: 5c17e387f0d67a21d1351cf4e1e69beaa7beecdf5b9f8011e16bd740e990902abf32c54b02257cdead9c296241557571608b6004446cca5d429675fda07ff61a
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Alexander Nicholson
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
CHANGED
@@ -11,7 +11,86 @@ module Purplelight
|
|
11
11
|
class Partitioner
|
12
12
|
# Builds contiguous _id range filters for N partitions.
|
13
13
|
# For ObjectId _id, we sample quantiles to split into near-equal document counts.
|
14
|
-
def self.object_id_partitions(collection:, query:, partitions:)
|
14
|
+
def self.object_id_partitions(collection:, query:, partitions:, mode: nil, telemetry: nil)
|
15
|
+
# Choose planning mode: :timestamp (fast), :cursor (legacy)
|
16
|
+
chosen_mode = (mode || ENV['PL_PARTITIONER_MODE'] || :timestamp).to_sym
|
17
|
+
telemetry ||= (defined?(Telemetry) ? Telemetry::NULL : nil)
|
18
|
+
|
19
|
+
return cursor_sampling_partitions(collection: collection, query: query, partitions: partitions) if chosen_mode == :cursor
|
20
|
+
|
21
|
+
timestamp_partitions(collection: collection, query: query, partitions: partitions, telemetry: telemetry)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.simple_ranges(collection:, query:, partitions:)
|
25
|
+
# Split by _id quantiles using min/max endpoints
|
26
|
+
min_id = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(1).first&.dig('_id')
|
27
|
+
max_id = collection.find(query || {}).projection(_id: 1).sort(_id: -1).limit(1).first&.dig('_id')
|
28
|
+
return [{ filter: query || {}, sort: { _id: 1 } }] if min_id.nil? || max_id.nil?
|
29
|
+
|
30
|
+
# Create numeric-ish interpolation by sampling
|
31
|
+
ids = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(partitions - 1).to_a.map { |d| d['_id'] }
|
32
|
+
boundaries = [min_id] + ids + [max_id]
|
33
|
+
ranges = []
|
34
|
+
boundaries.each_cons(2) do |a, b|
|
35
|
+
ranges << build_range(a, b)
|
36
|
+
end
|
37
|
+
|
38
|
+
ranges.map do |r|
|
39
|
+
filter = query ? query.dup : {}
|
40
|
+
filter['_id'] = r
|
41
|
+
{ filter: filter, sort: { _id: 1 }, hint: { _id: 1 } }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Faster planning using ObjectId timestamps: O(partitions) indexed lookups
|
46
|
+
def self.timestamp_partitions(collection:, query:, partitions:, telemetry: nil)
|
47
|
+
t_minmax = telemetry&.start(:plan_minmax_time)
|
48
|
+
min_id = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(1).first&.dig('_id')
|
49
|
+
max_id = collection.find(query || {}).projection(_id: 1).sort(_id: -1).limit(1).first&.dig('_id')
|
50
|
+
telemetry&.finish(:plan_minmax_time, t_minmax)
|
51
|
+
|
52
|
+
return [{ filter: query || {}, sort: { _id: 1 } }] if min_id.nil? || max_id.nil?
|
53
|
+
|
54
|
+
min_ts = min_id.respond_to?(:generation_time) ? min_id.generation_time.to_i : nil
|
55
|
+
max_ts = max_id.respond_to?(:generation_time) ? max_id.generation_time.to_i : nil
|
56
|
+
|
57
|
+
# Fallback to cursor sampling if _id isn't an ObjectId
|
58
|
+
return cursor_sampling_partitions(collection: collection, query: query, partitions: partitions) if min_ts.nil? || max_ts.nil? || max_ts <= min_ts
|
59
|
+
|
60
|
+
step = [(max_ts - min_ts) / partitions, 1].max
|
61
|
+
inner_boundaries = []
|
62
|
+
t_boundaries = telemetry&.start(:plan_boundary_queries_time)
|
63
|
+
1.upto(partitions - 1) do |i|
|
64
|
+
target_ts = min_ts + (step * i)
|
65
|
+
candidate = BSON::ObjectId.from_time(Time.at(target_ts))
|
66
|
+
f = query ? query.dup : {}
|
67
|
+
f['_id'] = { '$gt' => candidate }
|
68
|
+
b = collection.find(f).projection(_id: 1).sort(_id: 1).hint(_id: 1).limit(1).first&.dig('_id')
|
69
|
+
inner_boundaries << b if b
|
70
|
+
end
|
71
|
+
telemetry&.finish(:plan_boundary_queries_time, t_boundaries)
|
72
|
+
|
73
|
+
# Build ranges: first range has nil lower bound to include min_id,
|
74
|
+
# middle ranges are (prev, current], and last is (last, +inf)
|
75
|
+
ranges = []
|
76
|
+
t_ranges = telemetry&.start(:plan_ranges_build_time)
|
77
|
+
prev = nil
|
78
|
+
inner_boundaries.each do |b|
|
79
|
+
ranges << build_range(prev, b)
|
80
|
+
prev = b
|
81
|
+
end
|
82
|
+
ranges << build_range(prev, nil)
|
83
|
+
telemetry&.finish(:plan_ranges_build_time, t_ranges)
|
84
|
+
|
85
|
+
ranges.map do |r|
|
86
|
+
filter = query ? query.dup : {}
|
87
|
+
filter['_id'] = r
|
88
|
+
{ filter: filter, sort: { _id: 1 }, hint: { _id: 1 } }
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Legacy cursor sampling planner
|
93
|
+
def self.cursor_sampling_partitions(collection:, query:, partitions:)
|
15
94
|
# Ensure sort order for sampling
|
16
95
|
base_query = collection.find(query || {}, {}.merge(sort: { _id: 1 }))
|
17
96
|
|
@@ -49,27 +128,6 @@ module Purplelight
|
|
49
128
|
end
|
50
129
|
end
|
51
130
|
|
52
|
-
def self.simple_ranges(collection:, query:, partitions:)
|
53
|
-
# Split by _id quantiles using min/max endpoints
|
54
|
-
min_id = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(1).first&.dig('_id')
|
55
|
-
max_id = collection.find(query || {}).projection(_id: 1).sort(_id: -1).limit(1).first&.dig('_id')
|
56
|
-
return [{ filter: query || {}, sort: { _id: 1 } }] if min_id.nil? || max_id.nil?
|
57
|
-
|
58
|
-
# Create numeric-ish interpolation by sampling
|
59
|
-
ids = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(partitions - 1).to_a.map { |d| d['_id'] }
|
60
|
-
boundaries = [min_id] + ids + [max_id]
|
61
|
-
ranges = []
|
62
|
-
boundaries.each_cons(2) do |a, b|
|
63
|
-
ranges << build_range(a, b)
|
64
|
-
end
|
65
|
-
|
66
|
-
ranges.map do |r|
|
67
|
-
filter = query ? query.dup : {}
|
68
|
-
filter['_id'] = r
|
69
|
-
{ filter: filter, sort: { _id: 1 }, hint: { _id: 1 } }
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
131
|
def self.build_range(from_id, to_id)
|
74
132
|
if from_id && to_id
|
75
133
|
{ '$gt' => from_id, '$lte' => to_id }
|
data/lib/purplelight/snapshot.rb
CHANGED
@@ -10,6 +10,7 @@ require_relative 'writer_csv'
|
|
10
10
|
require_relative 'writer_parquet'
|
11
11
|
require_relative 'manifest'
|
12
12
|
require_relative 'errors'
|
13
|
+
require_relative 'telemetry'
|
13
14
|
|
14
15
|
module Purplelight
|
15
16
|
# Snapshot orchestrates partition planning, parallel reads, and writing.
|
@@ -37,7 +38,7 @@ module Purplelight
|
|
37
38
|
resume: { enabled: true, state_path: nil, overwrite_incompatible: false },
|
38
39
|
sharding: { mode: :by_size, part_bytes: DEFAULTS[:rotate_bytes], prefix: nil },
|
39
40
|
logger: nil, on_progress: nil, read_concern: DEFAULTS[:read_concern], read_preference: DEFAULTS[:read_preference],
|
40
|
-
no_cursor_timeout: DEFAULTS[:no_cursor_timeout])
|
41
|
+
no_cursor_timeout: DEFAULTS[:no_cursor_timeout], telemetry: nil)
|
41
42
|
@client = client
|
42
43
|
@collection = client[collection]
|
43
44
|
@output = output
|
@@ -60,6 +61,10 @@ module Purplelight
|
|
60
61
|
@no_cursor_timeout = no_cursor_timeout
|
61
62
|
|
62
63
|
@running = true
|
64
|
+
@telemetry_enabled = telemetry ? telemetry.enabled? : (ENV['PL_TELEMETRY'] == '1')
|
65
|
+
@telemetry = telemetry || (
|
66
|
+
@telemetry_enabled ? Telemetry.new(enabled: true) : Telemetry::NULL
|
67
|
+
)
|
63
68
|
end
|
64
69
|
|
65
70
|
# rubocop:disable Naming/PredicateMethod
|
@@ -90,8 +95,10 @@ module Purplelight
|
|
90
95
|
manifest.ensure_partitions!(@partitions)
|
91
96
|
|
92
97
|
# Plan partitions
|
98
|
+
t_plan = @telemetry.start(:partition_plan_time)
|
93
99
|
partition_filters = Partitioner.object_id_partitions(collection: @collection, query: @query,
|
94
|
-
partitions: @partitions)
|
100
|
+
partitions: @partitions, telemetry: @telemetry)
|
101
|
+
@telemetry.finish(:partition_plan_time, t_plan)
|
95
102
|
|
96
103
|
# Reader queue
|
97
104
|
queue = ByteQueue.new(max_bytes: @queue_size_bytes)
|
@@ -116,12 +123,17 @@ module Purplelight
|
|
116
123
|
# Start reader threads
|
117
124
|
readers = partition_filters.each_with_index.map do |pf, idx|
|
118
125
|
Thread.new do
|
119
|
-
|
126
|
+
local_telemetry = @telemetry_enabled ? Telemetry.new(enabled: true) : Telemetry::NULL
|
127
|
+
read_partition(idx: idx, filter_spec: pf, queue: queue, batch_size: @batch_size, manifest: manifest, telemetry: local_telemetry)
|
128
|
+
# Merge per-thread telemetry
|
129
|
+
@telemetry.merge!(local_telemetry) if @telemetry_enabled
|
120
130
|
end
|
121
131
|
end
|
122
132
|
|
123
133
|
# Writer loop
|
134
|
+
writer_telemetry = @telemetry_enabled ? Telemetry.new(enabled: true) : Telemetry::NULL
|
124
135
|
writer_thread = Thread.new do
|
136
|
+
Thread.current[:pl_telemetry] = writer_telemetry if @telemetry_enabled
|
125
137
|
loop do
|
126
138
|
batch = queue.pop
|
127
139
|
break if batch.nil?
|
@@ -146,8 +158,22 @@ module Purplelight
|
|
146
158
|
readers.each(&:join)
|
147
159
|
queue.close
|
148
160
|
writer_thread.join
|
161
|
+
@telemetry.merge!(writer_telemetry) if @telemetry_enabled
|
149
162
|
@running = false
|
150
163
|
progress_thread.join
|
164
|
+
if @telemetry_enabled
|
165
|
+
total = @telemetry.timers.values.sum
|
166
|
+
breakdown = @telemetry.timers
|
167
|
+
.sort_by { |_k, v| -v }
|
168
|
+
.map { |k, v| [k, v, total.zero? ? 0 : ((v / total) * 100.0)] }
|
169
|
+
if @logger
|
170
|
+
@logger.info('Telemetry (seconds and % of timed work):')
|
171
|
+
breakdown.each { |k, v, pct| @logger.info(" #{k}: #{v.round(3)}s (#{pct.round(1)}%)") }
|
172
|
+
else
|
173
|
+
puts 'Telemetry (seconds and % of timed work):'
|
174
|
+
breakdown.each { |k, v, pct| puts " #{k}: #{v.round(3)}s (#{pct.round(1)}%)" }
|
175
|
+
end
|
176
|
+
end
|
151
177
|
true
|
152
178
|
end
|
153
179
|
# rubocop:enable Naming/PredicateMethod
|
@@ -167,7 +193,7 @@ module Purplelight
|
|
167
193
|
[dir, prefix]
|
168
194
|
end
|
169
195
|
|
170
|
-
def read_partition(idx:, filter_spec:, queue:, batch_size:, manifest:)
|
196
|
+
def read_partition(idx:, filter_spec:, queue:, batch_size:, manifest:, telemetry: Telemetry::NULL)
|
171
197
|
filter = filter_spec[:filter]
|
172
198
|
sort = filter_spec[:sort] || { _id: 1 }
|
173
199
|
hint = @hint || filter_spec[:hint] || { _id: 1 }
|
@@ -202,25 +228,32 @@ module Purplelight
|
|
202
228
|
cursor.each do |doc|
|
203
229
|
last_id = doc['_id']
|
204
230
|
doc = @mapper.call(doc) if @mapper
|
231
|
+
t_ser = telemetry.start(:serialize_time)
|
205
232
|
if encode_lines
|
206
|
-
line = "#{
|
233
|
+
line = "#{JSON.generate(doc)}\n"
|
234
|
+
telemetry.finish(:serialize_time, t_ser)
|
207
235
|
bytes = line.bytesize
|
208
236
|
buffer << line
|
209
237
|
else
|
210
238
|
# For CSV/Parquet keep raw docs to allow schema/row building
|
211
|
-
bytes = (
|
239
|
+
bytes = (JSON.generate(doc).bytesize + 1)
|
240
|
+
telemetry.finish(:serialize_time, t_ser)
|
212
241
|
buffer << doc
|
213
242
|
end
|
214
243
|
buffer_bytes += bytes
|
215
244
|
next unless buffer.length >= batch_size || buffer_bytes >= 1_000_000
|
216
245
|
|
246
|
+
t_q = telemetry.start(:queue_wait_time)
|
217
247
|
queue.push(buffer, bytes: buffer_bytes)
|
248
|
+
telemetry.finish(:queue_wait_time, t_q)
|
218
249
|
manifest.update_partition_checkpoint!(idx, last_id)
|
219
250
|
buffer = []
|
220
251
|
buffer_bytes = 0
|
221
252
|
end
|
222
253
|
unless buffer.empty?
|
254
|
+
t_q = telemetry.start(:queue_wait_time)
|
223
255
|
queue.push(buffer, bytes: buffer_bytes)
|
256
|
+
telemetry.finish(:queue_wait_time, t_q)
|
224
257
|
manifest.update_partition_checkpoint!(idx, last_id)
|
225
258
|
buffer = []
|
226
259
|
buffer_bytes = 0
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Purplelight
|
4
|
+
# Lightweight, low-overhead timing and counters with a ticket API.
|
5
|
+
class Telemetry
|
6
|
+
def initialize(enabled: true)
|
7
|
+
@enabled = enabled
|
8
|
+
@counters = Hash.new(0)
|
9
|
+
@timers = Hash.new(0.0)
|
10
|
+
@mutex = Mutex.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def enabled?
|
14
|
+
@enabled
|
15
|
+
end
|
16
|
+
|
17
|
+
# Start a timer. Returns a ticket (Float) when enabled, or nil when disabled.
|
18
|
+
def start(_key)
|
19
|
+
return nil unless @enabled
|
20
|
+
|
21
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Finish a timer using a ticket from start. No-ops if ticket is nil.
|
25
|
+
def finish(key, ticket)
|
26
|
+
return unless @enabled && ticket
|
27
|
+
|
28
|
+
dt = Process.clock_gettime(Process::CLOCK_MONOTONIC) - ticket
|
29
|
+
@timers[key] += dt
|
30
|
+
end
|
31
|
+
|
32
|
+
def add(key, count = 1)
|
33
|
+
return unless @enabled
|
34
|
+
|
35
|
+
@counters[key] += count
|
36
|
+
end
|
37
|
+
|
38
|
+
def merge!(other)
|
39
|
+
return self unless @enabled
|
40
|
+
|
41
|
+
other.counters.each { |k, v| @counters[k] += v }
|
42
|
+
other.timers.each { |k, v| @timers[k] += v }
|
43
|
+
self
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_reader :counters, :timers
|
47
|
+
|
48
|
+
# A disabled singleton for zero overhead checks if needed.
|
49
|
+
NULL = new(enabled: false)
|
50
|
+
end
|
51
|
+
end
|
data/lib/purplelight/version.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'csv'
|
4
|
-
require '
|
4
|
+
require 'json'
|
5
5
|
require 'zlib'
|
6
6
|
require 'fileutils'
|
7
7
|
|
@@ -81,8 +81,10 @@ module Purplelight
|
|
81
81
|
|
82
82
|
@csv&.flush
|
83
83
|
if @io
|
84
|
+
t = Thread.current[:pl_telemetry]&.start(:rotate_time)
|
84
85
|
finalize_current_part!
|
85
86
|
@io.close
|
87
|
+
Thread.current[:pl_telemetry]&.finish(:rotate_time, t)
|
86
88
|
end
|
87
89
|
@closed = true
|
88
90
|
end
|
@@ -122,8 +124,10 @@ module Purplelight
|
|
122
124
|
def rotate!
|
123
125
|
return unless @io
|
124
126
|
|
127
|
+
t = Thread.current[:pl_telemetry]&.start(:rotate_time)
|
125
128
|
finalize_current_part!
|
126
129
|
@io.close
|
130
|
+
Thread.current[:pl_telemetry]&.finish(:rotate_time, t)
|
127
131
|
@io = nil
|
128
132
|
@csv = nil
|
129
133
|
ensure_open!
|
@@ -173,7 +177,7 @@ module Purplelight
|
|
173
177
|
val = doc[key] || doc[key.to_sym]
|
174
178
|
case val
|
175
179
|
when Hash, Array
|
176
|
-
|
180
|
+
JSON.generate(val)
|
177
181
|
else
|
178
182
|
val
|
179
183
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require '
|
3
|
+
require 'json'
|
4
4
|
require 'zlib'
|
5
5
|
require 'fileutils'
|
6
6
|
|
@@ -44,7 +44,7 @@ module Purplelight
|
|
44
44
|
buffer = if array_of_docs.first.is_a?(String)
|
45
45
|
array_of_docs.join
|
46
46
|
else
|
47
|
-
array_of_docs.map { |doc| "#{
|
47
|
+
array_of_docs.map { |doc| "#{JSON.generate(doc)}\n" }.join
|
48
48
|
end
|
49
49
|
rows = array_of_docs.size
|
50
50
|
write_buffer(buffer)
|
@@ -106,7 +106,9 @@ module Purplelight
|
|
106
106
|
end
|
107
107
|
|
108
108
|
def write_buffer(buffer)
|
109
|
+
t = Thread.current[:pl_telemetry]&.start(:write_time)
|
109
110
|
@io.write(buffer)
|
111
|
+
Thread.current[:pl_telemetry]&.finish(:write_time, t)
|
110
112
|
@bytes_written += buffer.bytesize
|
111
113
|
rotate_if_needed
|
112
114
|
end
|
@@ -114,8 +116,10 @@ module Purplelight
|
|
114
116
|
def rotate!
|
115
117
|
return unless @io
|
116
118
|
|
119
|
+
t = Thread.current[:pl_telemetry]&.start(:rotate_time)
|
117
120
|
finalize_current_part!
|
118
121
|
@io.close
|
122
|
+
Thread.current[:pl_telemetry]&.finish(:rotate_time, t)
|
119
123
|
@io = nil
|
120
124
|
ensure_open!
|
121
125
|
end
|
@@ -44,8 +44,13 @@ module Purplelight
|
|
44
44
|
|
45
45
|
ensure_open!
|
46
46
|
unless @buffer_docs.empty?
|
47
|
+
t_tbl = Thread.current[:pl_telemetry]&.start(:parquet_table_build_time)
|
47
48
|
table = build_table(@buffer_docs)
|
49
|
+
Thread.current[:pl_telemetry]&.finish(:parquet_table_build_time, t_tbl)
|
50
|
+
|
51
|
+
t_w = Thread.current[:pl_telemetry]&.start(:parquet_write_time)
|
48
52
|
write_table(table, @writer_path, append: false)
|
53
|
+
Thread.current[:pl_telemetry]&.finish(:parquet_write_time, t_w)
|
49
54
|
end
|
50
55
|
finalize_current_part!
|
51
56
|
@closed = true
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: purplelight
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Nicholson
|
@@ -13,58 +13,62 @@ dependencies:
|
|
13
13
|
name: csv
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
15
15
|
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '3.3'
|
16
19
|
- - ">="
|
17
20
|
- !ruby/object:Gem::Version
|
18
|
-
version:
|
21
|
+
version: 3.3.5
|
19
22
|
type: :runtime
|
20
23
|
prerelease: false
|
21
24
|
version_requirements: !ruby/object:Gem::Requirement
|
22
25
|
requirements:
|
26
|
+
- - "~>"
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
version: '3.3'
|
23
29
|
- - ">="
|
24
30
|
- !ruby/object:Gem::Version
|
25
|
-
version:
|
31
|
+
version: 3.3.5
|
26
32
|
- !ruby/object:Gem::Dependency
|
27
33
|
name: logger
|
28
34
|
requirement: !ruby/object:Gem::Requirement
|
29
35
|
requirements:
|
36
|
+
- - "~>"
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '1.7'
|
30
39
|
- - ">="
|
31
40
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
41
|
+
version: 1.7.0
|
33
42
|
type: :runtime
|
34
43
|
prerelease: false
|
35
44
|
version_requirements: !ruby/object:Gem::Requirement
|
36
45
|
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '1.7'
|
37
49
|
- - ">="
|
38
50
|
- !ruby/object:Gem::Version
|
39
|
-
version:
|
51
|
+
version: 1.7.0
|
40
52
|
- !ruby/object:Gem::Dependency
|
41
53
|
name: mongo
|
42
54
|
requirement: !ruby/object:Gem::Requirement
|
43
55
|
requirements:
|
44
|
-
- - "
|
56
|
+
- - "~>"
|
45
57
|
- !ruby/object:Gem::Version
|
46
|
-
version: '2.
|
47
|
-
type: :runtime
|
48
|
-
prerelease: false
|
49
|
-
version_requirements: !ruby/object:Gem::Requirement
|
50
|
-
requirements:
|
58
|
+
version: '2.21'
|
51
59
|
- - ">="
|
52
60
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
54
|
-
- !ruby/object:Gem::Dependency
|
55
|
-
name: oj
|
56
|
-
requirement: !ruby/object:Gem::Requirement
|
57
|
-
requirements:
|
58
|
-
- - ">="
|
59
|
-
- !ruby/object:Gem::Version
|
60
|
-
version: '3.16'
|
61
|
+
version: 2.21.3
|
61
62
|
type: :runtime
|
62
63
|
prerelease: false
|
63
64
|
version_requirements: !ruby/object:Gem::Requirement
|
64
65
|
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '2.21'
|
65
69
|
- - ">="
|
66
70
|
- !ruby/object:Gem::Version
|
67
|
-
version:
|
71
|
+
version: 2.21.3
|
68
72
|
description: High-throughput, resumable snapshots of MongoDB collections with partitioning,
|
69
73
|
multi-threaded readers, and size-based sharded outputs.
|
70
74
|
email:
|
@@ -74,6 +78,7 @@ executables:
|
|
74
78
|
extensions: []
|
75
79
|
extra_rdoc_files: []
|
76
80
|
files:
|
81
|
+
- LICENSE
|
77
82
|
- README.md
|
78
83
|
- Rakefile
|
79
84
|
- bin/purplelight
|
@@ -83,15 +88,16 @@ files:
|
|
83
88
|
- lib/purplelight/partitioner.rb
|
84
89
|
- lib/purplelight/queue.rb
|
85
90
|
- lib/purplelight/snapshot.rb
|
91
|
+
- lib/purplelight/telemetry.rb
|
86
92
|
- lib/purplelight/version.rb
|
87
93
|
- lib/purplelight/writer_csv.rb
|
88
94
|
- lib/purplelight/writer_jsonl.rb
|
89
95
|
- lib/purplelight/writer_parquet.rb
|
96
|
+
homepage: https://github.com/alexandernicholson/purplelight
|
90
97
|
licenses:
|
91
98
|
- MIT
|
92
99
|
metadata:
|
93
100
|
rubygems_mfa_required: 'true'
|
94
|
-
homepage_uri: https://github.com/alexandernicholson/purplelight
|
95
101
|
source_code_uri: https://github.com/alexandernicholson/purplelight
|
96
102
|
changelog_uri: https://github.com/alexandernicholson/purplelight/releases
|
97
103
|
rdoc_options: []
|