purplelight 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/purplelight/partitioner.rb +80 -22
- data/lib/purplelight/snapshot.rb +37 -4
- data/lib/purplelight/telemetry.rb +51 -0
- data/lib/purplelight/version.rb +1 -1
- data/lib/purplelight/writer_csv.rb +4 -0
- data/lib/purplelight/writer_jsonl.rb +4 -0
- data/lib/purplelight/writer_parquet.rb +5 -0
- metadata +27 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a650fdd2113129b151396a1a90a83a6f1ede97eb5c34c60e028eb7639d5cc4fd
|
4
|
+
data.tar.gz: 1ab1bc421ddaf1c457639ae2ac5968245c6141b8504897ab762a49bb69c51a69
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 506e52dce7c474998c8bc4b9afa9f5140349e8e2eed2eed7cccbacac0bd9d9f41528b234f1b99ad8407674791471368ee5b99d93b7ab058522311f2642006a20
|
7
|
+
data.tar.gz: 5c17e387f0d67a21d1351cf4e1e69beaa7beecdf5b9f8011e16bd740e990902abf32c54b02257cdead9c296241557571608b6004446cca5d429675fda07ff61a
|
data/README.md
CHANGED
@@ -11,7 +11,86 @@ module Purplelight
|
|
11
11
|
class Partitioner
|
12
12
|
# Builds contiguous _id range filters for N partitions.
|
13
13
|
# For ObjectId _id, we sample quantiles to split into near-equal document counts.
|
14
|
-
def self.object_id_partitions(collection:, query:, partitions:)
|
14
|
+
def self.object_id_partitions(collection:, query:, partitions:, mode: nil, telemetry: nil)
|
15
|
+
# Choose planning mode: :timestamp (fast), :cursor (legacy)
|
16
|
+
chosen_mode = (mode || ENV['PL_PARTITIONER_MODE'] || :timestamp).to_sym
|
17
|
+
telemetry ||= (defined?(Telemetry) ? Telemetry::NULL : nil)
|
18
|
+
|
19
|
+
return cursor_sampling_partitions(collection: collection, query: query, partitions: partitions) if chosen_mode == :cursor
|
20
|
+
|
21
|
+
timestamp_partitions(collection: collection, query: query, partitions: partitions, telemetry: telemetry)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.simple_ranges(collection:, query:, partitions:)
|
25
|
+
# Split by _id quantiles using min/max endpoints
|
26
|
+
min_id = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(1).first&.dig('_id')
|
27
|
+
max_id = collection.find(query || {}).projection(_id: 1).sort(_id: -1).limit(1).first&.dig('_id')
|
28
|
+
return [{ filter: query || {}, sort: { _id: 1 } }] if min_id.nil? || max_id.nil?
|
29
|
+
|
30
|
+
# Create numeric-ish interpolation by sampling
|
31
|
+
ids = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(partitions - 1).to_a.map { |d| d['_id'] }
|
32
|
+
boundaries = [min_id] + ids + [max_id]
|
33
|
+
ranges = []
|
34
|
+
boundaries.each_cons(2) do |a, b|
|
35
|
+
ranges << build_range(a, b)
|
36
|
+
end
|
37
|
+
|
38
|
+
ranges.map do |r|
|
39
|
+
filter = query ? query.dup : {}
|
40
|
+
filter['_id'] = r
|
41
|
+
{ filter: filter, sort: { _id: 1 }, hint: { _id: 1 } }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Faster planning using ObjectId timestamps: O(partitions) indexed lookups
|
46
|
+
def self.timestamp_partitions(collection:, query:, partitions:, telemetry: nil)
|
47
|
+
t_minmax = telemetry&.start(:plan_minmax_time)
|
48
|
+
min_id = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(1).first&.dig('_id')
|
49
|
+
max_id = collection.find(query || {}).projection(_id: 1).sort(_id: -1).limit(1).first&.dig('_id')
|
50
|
+
telemetry&.finish(:plan_minmax_time, t_minmax)
|
51
|
+
|
52
|
+
return [{ filter: query || {}, sort: { _id: 1 } }] if min_id.nil? || max_id.nil?
|
53
|
+
|
54
|
+
min_ts = min_id.respond_to?(:generation_time) ? min_id.generation_time.to_i : nil
|
55
|
+
max_ts = max_id.respond_to?(:generation_time) ? max_id.generation_time.to_i : nil
|
56
|
+
|
57
|
+
# Fallback to cursor sampling if _id isn't an ObjectId
|
58
|
+
return cursor_sampling_partitions(collection: collection, query: query, partitions: partitions) if min_ts.nil? || max_ts.nil? || max_ts <= min_ts
|
59
|
+
|
60
|
+
step = [(max_ts - min_ts) / partitions, 1].max
|
61
|
+
inner_boundaries = []
|
62
|
+
t_boundaries = telemetry&.start(:plan_boundary_queries_time)
|
63
|
+
1.upto(partitions - 1) do |i|
|
64
|
+
target_ts = min_ts + (step * i)
|
65
|
+
candidate = BSON::ObjectId.from_time(Time.at(target_ts))
|
66
|
+
f = query ? query.dup : {}
|
67
|
+
f['_id'] = { '$gt' => candidate }
|
68
|
+
b = collection.find(f).projection(_id: 1).sort(_id: 1).hint(_id: 1).limit(1).first&.dig('_id')
|
69
|
+
inner_boundaries << b if b
|
70
|
+
end
|
71
|
+
telemetry&.finish(:plan_boundary_queries_time, t_boundaries)
|
72
|
+
|
73
|
+
# Build ranges: first range has nil lower bound to include min_id,
|
74
|
+
# middle ranges are (prev, current], and last is (last, +inf)
|
75
|
+
ranges = []
|
76
|
+
t_ranges = telemetry&.start(:plan_ranges_build_time)
|
77
|
+
prev = nil
|
78
|
+
inner_boundaries.each do |b|
|
79
|
+
ranges << build_range(prev, b)
|
80
|
+
prev = b
|
81
|
+
end
|
82
|
+
ranges << build_range(prev, nil)
|
83
|
+
telemetry&.finish(:plan_ranges_build_time, t_ranges)
|
84
|
+
|
85
|
+
ranges.map do |r|
|
86
|
+
filter = query ? query.dup : {}
|
87
|
+
filter['_id'] = r
|
88
|
+
{ filter: filter, sort: { _id: 1 }, hint: { _id: 1 } }
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Legacy cursor sampling planner
|
93
|
+
def self.cursor_sampling_partitions(collection:, query:, partitions:)
|
15
94
|
# Ensure sort order for sampling
|
16
95
|
base_query = collection.find(query || {}, {}.merge(sort: { _id: 1 }))
|
17
96
|
|
@@ -49,27 +128,6 @@ module Purplelight
|
|
49
128
|
end
|
50
129
|
end
|
51
130
|
|
52
|
-
def self.simple_ranges(collection:, query:, partitions:)
|
53
|
-
# Split by _id quantiles using min/max endpoints
|
54
|
-
min_id = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(1).first&.dig('_id')
|
55
|
-
max_id = collection.find(query || {}).projection(_id: 1).sort(_id: -1).limit(1).first&.dig('_id')
|
56
|
-
return [{ filter: query || {}, sort: { _id: 1 } }] if min_id.nil? || max_id.nil?
|
57
|
-
|
58
|
-
# Create numeric-ish interpolation by sampling
|
59
|
-
ids = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(partitions - 1).to_a.map { |d| d['_id'] }
|
60
|
-
boundaries = [min_id] + ids + [max_id]
|
61
|
-
ranges = []
|
62
|
-
boundaries.each_cons(2) do |a, b|
|
63
|
-
ranges << build_range(a, b)
|
64
|
-
end
|
65
|
-
|
66
|
-
ranges.map do |r|
|
67
|
-
filter = query ? query.dup : {}
|
68
|
-
filter['_id'] = r
|
69
|
-
{ filter: filter, sort: { _id: 1 }, hint: { _id: 1 } }
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
131
|
def self.build_range(from_id, to_id)
|
74
132
|
if from_id && to_id
|
75
133
|
{ '$gt' => from_id, '$lte' => to_id }
|
data/lib/purplelight/snapshot.rb
CHANGED
@@ -10,6 +10,7 @@ require_relative 'writer_csv'
|
|
10
10
|
require_relative 'writer_parquet'
|
11
11
|
require_relative 'manifest'
|
12
12
|
require_relative 'errors'
|
13
|
+
require_relative 'telemetry'
|
13
14
|
|
14
15
|
module Purplelight
|
15
16
|
# Snapshot orchestrates partition planning, parallel reads, and writing.
|
@@ -37,7 +38,7 @@ module Purplelight
|
|
37
38
|
resume: { enabled: true, state_path: nil, overwrite_incompatible: false },
|
38
39
|
sharding: { mode: :by_size, part_bytes: DEFAULTS[:rotate_bytes], prefix: nil },
|
39
40
|
logger: nil, on_progress: nil, read_concern: DEFAULTS[:read_concern], read_preference: DEFAULTS[:read_preference],
|
40
|
-
no_cursor_timeout: DEFAULTS[:no_cursor_timeout])
|
41
|
+
no_cursor_timeout: DEFAULTS[:no_cursor_timeout], telemetry: nil)
|
41
42
|
@client = client
|
42
43
|
@collection = client[collection]
|
43
44
|
@output = output
|
@@ -60,6 +61,10 @@ module Purplelight
|
|
60
61
|
@no_cursor_timeout = no_cursor_timeout
|
61
62
|
|
62
63
|
@running = true
|
64
|
+
@telemetry_enabled = telemetry ? telemetry.enabled? : (ENV['PL_TELEMETRY'] == '1')
|
65
|
+
@telemetry = telemetry || (
|
66
|
+
@telemetry_enabled ? Telemetry.new(enabled: true) : Telemetry::NULL
|
67
|
+
)
|
63
68
|
end
|
64
69
|
|
65
70
|
# rubocop:disable Naming/PredicateMethod
|
@@ -90,8 +95,10 @@ module Purplelight
|
|
90
95
|
manifest.ensure_partitions!(@partitions)
|
91
96
|
|
92
97
|
# Plan partitions
|
98
|
+
t_plan = @telemetry.start(:partition_plan_time)
|
93
99
|
partition_filters = Partitioner.object_id_partitions(collection: @collection, query: @query,
|
94
|
-
partitions: @partitions)
|
100
|
+
partitions: @partitions, telemetry: @telemetry)
|
101
|
+
@telemetry.finish(:partition_plan_time, t_plan)
|
95
102
|
|
96
103
|
# Reader queue
|
97
104
|
queue = ByteQueue.new(max_bytes: @queue_size_bytes)
|
@@ -116,12 +123,17 @@ module Purplelight
|
|
116
123
|
# Start reader threads
|
117
124
|
readers = partition_filters.each_with_index.map do |pf, idx|
|
118
125
|
Thread.new do
|
119
|
-
|
126
|
+
local_telemetry = @telemetry_enabled ? Telemetry.new(enabled: true) : Telemetry::NULL
|
127
|
+
read_partition(idx: idx, filter_spec: pf, queue: queue, batch_size: @batch_size, manifest: manifest, telemetry: local_telemetry)
|
128
|
+
# Merge per-thread telemetry
|
129
|
+
@telemetry.merge!(local_telemetry) if @telemetry_enabled
|
120
130
|
end
|
121
131
|
end
|
122
132
|
|
123
133
|
# Writer loop
|
134
|
+
writer_telemetry = @telemetry_enabled ? Telemetry.new(enabled: true) : Telemetry::NULL
|
124
135
|
writer_thread = Thread.new do
|
136
|
+
Thread.current[:pl_telemetry] = writer_telemetry if @telemetry_enabled
|
125
137
|
loop do
|
126
138
|
batch = queue.pop
|
127
139
|
break if batch.nil?
|
@@ -146,8 +158,22 @@ module Purplelight
|
|
146
158
|
readers.each(&:join)
|
147
159
|
queue.close
|
148
160
|
writer_thread.join
|
161
|
+
@telemetry.merge!(writer_telemetry) if @telemetry_enabled
|
149
162
|
@running = false
|
150
163
|
progress_thread.join
|
164
|
+
if @telemetry_enabled
|
165
|
+
total = @telemetry.timers.values.sum
|
166
|
+
breakdown = @telemetry.timers
|
167
|
+
.sort_by { |_k, v| -v }
|
168
|
+
.map { |k, v| [k, v, total.zero? ? 0 : ((v / total) * 100.0)] }
|
169
|
+
if @logger
|
170
|
+
@logger.info('Telemetry (seconds and % of timed work):')
|
171
|
+
breakdown.each { |k, v, pct| @logger.info(" #{k}: #{v.round(3)}s (#{pct.round(1)}%)") }
|
172
|
+
else
|
173
|
+
puts 'Telemetry (seconds and % of timed work):'
|
174
|
+
breakdown.each { |k, v, pct| puts " #{k}: #{v.round(3)}s (#{pct.round(1)}%)" }
|
175
|
+
end
|
176
|
+
end
|
151
177
|
true
|
152
178
|
end
|
153
179
|
# rubocop:enable Naming/PredicateMethod
|
@@ -167,7 +193,7 @@ module Purplelight
|
|
167
193
|
[dir, prefix]
|
168
194
|
end
|
169
195
|
|
170
|
-
def read_partition(idx:, filter_spec:, queue:, batch_size:, manifest:)
|
196
|
+
def read_partition(idx:, filter_spec:, queue:, batch_size:, manifest:, telemetry: Telemetry::NULL)
|
171
197
|
filter = filter_spec[:filter]
|
172
198
|
sort = filter_spec[:sort] || { _id: 1 }
|
173
199
|
hint = @hint || filter_spec[:hint] || { _id: 1 }
|
@@ -202,25 +228,32 @@ module Purplelight
|
|
202
228
|
cursor.each do |doc|
|
203
229
|
last_id = doc['_id']
|
204
230
|
doc = @mapper.call(doc) if @mapper
|
231
|
+
t_ser = telemetry.start(:serialize_time)
|
205
232
|
if encode_lines
|
206
233
|
line = "#{JSON.generate(doc)}\n"
|
234
|
+
telemetry.finish(:serialize_time, t_ser)
|
207
235
|
bytes = line.bytesize
|
208
236
|
buffer << line
|
209
237
|
else
|
210
238
|
# For CSV/Parquet keep raw docs to allow schema/row building
|
211
239
|
bytes = (JSON.generate(doc).bytesize + 1)
|
240
|
+
telemetry.finish(:serialize_time, t_ser)
|
212
241
|
buffer << doc
|
213
242
|
end
|
214
243
|
buffer_bytes += bytes
|
215
244
|
next unless buffer.length >= batch_size || buffer_bytes >= 1_000_000
|
216
245
|
|
246
|
+
t_q = telemetry.start(:queue_wait_time)
|
217
247
|
queue.push(buffer, bytes: buffer_bytes)
|
248
|
+
telemetry.finish(:queue_wait_time, t_q)
|
218
249
|
manifest.update_partition_checkpoint!(idx, last_id)
|
219
250
|
buffer = []
|
220
251
|
buffer_bytes = 0
|
221
252
|
end
|
222
253
|
unless buffer.empty?
|
254
|
+
t_q = telemetry.start(:queue_wait_time)
|
223
255
|
queue.push(buffer, bytes: buffer_bytes)
|
256
|
+
telemetry.finish(:queue_wait_time, t_q)
|
224
257
|
manifest.update_partition_checkpoint!(idx, last_id)
|
225
258
|
buffer = []
|
226
259
|
buffer_bytes = 0
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Purplelight
|
4
|
+
# Lightweight, low-overhead timing and counters with a ticket API.
|
5
|
+
class Telemetry
|
6
|
+
def initialize(enabled: true)
|
7
|
+
@enabled = enabled
|
8
|
+
@counters = Hash.new(0)
|
9
|
+
@timers = Hash.new(0.0)
|
10
|
+
@mutex = Mutex.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def enabled?
|
14
|
+
@enabled
|
15
|
+
end
|
16
|
+
|
17
|
+
# Start a timer. Returns a ticket (Float) when enabled, or nil when disabled.
|
18
|
+
def start(_key)
|
19
|
+
return nil unless @enabled
|
20
|
+
|
21
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Finish a timer using a ticket from start. No-ops if ticket is nil.
|
25
|
+
def finish(key, ticket)
|
26
|
+
return unless @enabled && ticket
|
27
|
+
|
28
|
+
dt = Process.clock_gettime(Process::CLOCK_MONOTONIC) - ticket
|
29
|
+
@timers[key] += dt
|
30
|
+
end
|
31
|
+
|
32
|
+
def add(key, count = 1)
|
33
|
+
return unless @enabled
|
34
|
+
|
35
|
+
@counters[key] += count
|
36
|
+
end
|
37
|
+
|
38
|
+
def merge!(other)
|
39
|
+
return self unless @enabled
|
40
|
+
|
41
|
+
other.counters.each { |k, v| @counters[k] += v }
|
42
|
+
other.timers.each { |k, v| @timers[k] += v }
|
43
|
+
self
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_reader :counters, :timers
|
47
|
+
|
48
|
+
# A disabled singleton for zero overhead checks if needed.
|
49
|
+
NULL = new(enabled: false)
|
50
|
+
end
|
51
|
+
end
|
data/lib/purplelight/version.rb
CHANGED
@@ -81,8 +81,10 @@ module Purplelight
|
|
81
81
|
|
82
82
|
@csv&.flush
|
83
83
|
if @io
|
84
|
+
t = Thread.current[:pl_telemetry]&.start(:rotate_time)
|
84
85
|
finalize_current_part!
|
85
86
|
@io.close
|
87
|
+
Thread.current[:pl_telemetry]&.finish(:rotate_time, t)
|
86
88
|
end
|
87
89
|
@closed = true
|
88
90
|
end
|
@@ -122,8 +124,10 @@ module Purplelight
|
|
122
124
|
def rotate!
|
123
125
|
return unless @io
|
124
126
|
|
127
|
+
t = Thread.current[:pl_telemetry]&.start(:rotate_time)
|
125
128
|
finalize_current_part!
|
126
129
|
@io.close
|
130
|
+
Thread.current[:pl_telemetry]&.finish(:rotate_time, t)
|
127
131
|
@io = nil
|
128
132
|
@csv = nil
|
129
133
|
ensure_open!
|
@@ -106,7 +106,9 @@ module Purplelight
|
|
106
106
|
end
|
107
107
|
|
108
108
|
def write_buffer(buffer)
|
109
|
+
t = Thread.current[:pl_telemetry]&.start(:write_time)
|
109
110
|
@io.write(buffer)
|
111
|
+
Thread.current[:pl_telemetry]&.finish(:write_time, t)
|
110
112
|
@bytes_written += buffer.bytesize
|
111
113
|
rotate_if_needed
|
112
114
|
end
|
@@ -114,8 +116,10 @@ module Purplelight
|
|
114
116
|
def rotate!
|
115
117
|
return unless @io
|
116
118
|
|
119
|
+
t = Thread.current[:pl_telemetry]&.start(:rotate_time)
|
117
120
|
finalize_current_part!
|
118
121
|
@io.close
|
122
|
+
Thread.current[:pl_telemetry]&.finish(:rotate_time, t)
|
119
123
|
@io = nil
|
120
124
|
ensure_open!
|
121
125
|
end
|
@@ -44,8 +44,13 @@ module Purplelight
|
|
44
44
|
|
45
45
|
ensure_open!
|
46
46
|
unless @buffer_docs.empty?
|
47
|
+
t_tbl = Thread.current[:pl_telemetry]&.start(:parquet_table_build_time)
|
47
48
|
table = build_table(@buffer_docs)
|
49
|
+
Thread.current[:pl_telemetry]&.finish(:parquet_table_build_time, t_tbl)
|
50
|
+
|
51
|
+
t_w = Thread.current[:pl_telemetry]&.start(:parquet_write_time)
|
48
52
|
write_table(table, @writer_path, append: false)
|
53
|
+
Thread.current[:pl_telemetry]&.finish(:parquet_write_time, t_w)
|
49
54
|
end
|
50
55
|
finalize_current_part!
|
51
56
|
@closed = true
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: purplelight
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Nicholson
|
@@ -13,44 +13,62 @@ dependencies:
|
|
13
13
|
name: csv
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
15
15
|
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '3.3'
|
16
19
|
- - ">="
|
17
20
|
- !ruby/object:Gem::Version
|
18
|
-
version:
|
21
|
+
version: 3.3.5
|
19
22
|
type: :runtime
|
20
23
|
prerelease: false
|
21
24
|
version_requirements: !ruby/object:Gem::Requirement
|
22
25
|
requirements:
|
26
|
+
- - "~>"
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
version: '3.3'
|
23
29
|
- - ">="
|
24
30
|
- !ruby/object:Gem::Version
|
25
|
-
version:
|
31
|
+
version: 3.3.5
|
26
32
|
- !ruby/object:Gem::Dependency
|
27
33
|
name: logger
|
28
34
|
requirement: !ruby/object:Gem::Requirement
|
29
35
|
requirements:
|
36
|
+
- - "~>"
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '1.7'
|
30
39
|
- - ">="
|
31
40
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
41
|
+
version: 1.7.0
|
33
42
|
type: :runtime
|
34
43
|
prerelease: false
|
35
44
|
version_requirements: !ruby/object:Gem::Requirement
|
36
45
|
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '1.7'
|
37
49
|
- - ">="
|
38
50
|
- !ruby/object:Gem::Version
|
39
|
-
version:
|
51
|
+
version: 1.7.0
|
40
52
|
- !ruby/object:Gem::Dependency
|
41
53
|
name: mongo
|
42
54
|
requirement: !ruby/object:Gem::Requirement
|
43
55
|
requirements:
|
56
|
+
- - "~>"
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '2.21'
|
44
59
|
- - ">="
|
45
60
|
- !ruby/object:Gem::Version
|
46
|
-
version:
|
61
|
+
version: 2.21.3
|
47
62
|
type: :runtime
|
48
63
|
prerelease: false
|
49
64
|
version_requirements: !ruby/object:Gem::Requirement
|
50
65
|
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '2.21'
|
51
69
|
- - ">="
|
52
70
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
71
|
+
version: 2.21.3
|
54
72
|
description: High-throughput, resumable snapshots of MongoDB collections with partitioning,
|
55
73
|
multi-threaded readers, and size-based sharded outputs.
|
56
74
|
email:
|
@@ -70,15 +88,16 @@ files:
|
|
70
88
|
- lib/purplelight/partitioner.rb
|
71
89
|
- lib/purplelight/queue.rb
|
72
90
|
- lib/purplelight/snapshot.rb
|
91
|
+
- lib/purplelight/telemetry.rb
|
73
92
|
- lib/purplelight/version.rb
|
74
93
|
- lib/purplelight/writer_csv.rb
|
75
94
|
- lib/purplelight/writer_jsonl.rb
|
76
95
|
- lib/purplelight/writer_parquet.rb
|
96
|
+
homepage: https://github.com/alexandernicholson/purplelight
|
77
97
|
licenses:
|
78
98
|
- MIT
|
79
99
|
metadata:
|
80
100
|
rubygems_mfa_required: 'true'
|
81
|
-
homepage_uri: https://github.com/alexandernicholson/purplelight
|
82
101
|
source_code_uri: https://github.com/alexandernicholson/purplelight
|
83
102
|
changelog_uri: https://github.com/alexandernicholson/purplelight/releases
|
84
103
|
rdoc_options: []
|