purplelight 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5964231e634be4a743207679e349623c275e0b20771b492bbc54c4261238e352
4
- data.tar.gz: 82448e1f4b5ffb8e9846938653b16a4d7008aa29e722d9417863b80362370168
3
+ metadata.gz: a650fdd2113129b151396a1a90a83a6f1ede97eb5c34c60e028eb7639d5cc4fd
4
+ data.tar.gz: 1ab1bc421ddaf1c457639ae2ac5968245c6141b8504897ab762a49bb69c51a69
5
5
  SHA512:
6
- metadata.gz: 4b8ac2ba82501978bcd1ae1b9db888f7dab387de76a7e473c5b0993a526398a727a029a0597a64c77bbbed976980e6adaad7a08f7e3967c9cfe6c7afa2d996ac
7
- data.tar.gz: 15f536cfc05a0b70f7cdeb6bc7985e56b2e6dad6b2db53922e7aab12a821b8bb1294217390ff00c095b2cacb46c23e027d24d90a083e9e415a0df6e26fec9b59
6
+ metadata.gz: 506e52dce7c474998c8bc4b9afa9f5140349e8e2eed2eed7cccbacac0bd9d9f41528b234f1b99ad8407674791471368ee5b99d93b7ab058522311f2642006a20
7
+ data.tar.gz: 5c17e387f0d67a21d1351cf4e1e69beaa7beecdf5b9f8011e16bd740e990902abf32c54b02257cdead9c296241557571608b6004446cca5d429675fda07ff61a
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Alexander Nicholson
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md CHANGED
@@ -9,7 +9,7 @@ Purplelight is published on RubyGems: [purplelight on RubyGems](https://rubygems
9
9
  Add to your Gemfile:
10
10
 
11
11
  ```ruby
12
- gem 'purplelight', '~> 0.1.3'
12
+ gem 'purplelight', '~> 0.1.5'
13
13
  ```
14
14
 
15
15
  Or install directly:
@@ -11,7 +11,86 @@ module Purplelight
11
11
  class Partitioner
12
12
  # Builds contiguous _id range filters for N partitions.
13
13
  # For ObjectId _id, we sample quantiles to split into near-equal document counts.
14
- def self.object_id_partitions(collection:, query:, partitions:)
14
+ def self.object_id_partitions(collection:, query:, partitions:, mode: nil, telemetry: nil)
15
+ # Choose planning mode: :timestamp (fast), :cursor (legacy)
16
+ chosen_mode = (mode || ENV['PL_PARTITIONER_MODE'] || :timestamp).to_sym
17
+ telemetry ||= (defined?(Telemetry) ? Telemetry::NULL : nil)
18
+
19
+ return cursor_sampling_partitions(collection: collection, query: query, partitions: partitions) if chosen_mode == :cursor
20
+
21
+ timestamp_partitions(collection: collection, query: query, partitions: partitions, telemetry: telemetry)
22
+ end
23
+
24
+ def self.simple_ranges(collection:, query:, partitions:)
25
+ # Split by _id quantiles using min/max endpoints
26
+ min_id = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(1).first&.dig('_id')
27
+ max_id = collection.find(query || {}).projection(_id: 1).sort(_id: -1).limit(1).first&.dig('_id')
28
+ return [{ filter: query || {}, sort: { _id: 1 } }] if min_id.nil? || max_id.nil?
29
+
30
+ # Create numeric-ish interpolation by sampling
31
+ ids = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(partitions - 1).to_a.map { |d| d['_id'] }
32
+ boundaries = [min_id] + ids + [max_id]
33
+ ranges = []
34
+ boundaries.each_cons(2) do |a, b|
35
+ ranges << build_range(a, b)
36
+ end
37
+
38
+ ranges.map do |r|
39
+ filter = query ? query.dup : {}
40
+ filter['_id'] = r
41
+ { filter: filter, sort: { _id: 1 }, hint: { _id: 1 } }
42
+ end
43
+ end
44
+
45
+ # Faster planning using ObjectId timestamps: O(partitions) indexed lookups
46
+ def self.timestamp_partitions(collection:, query:, partitions:, telemetry: nil)
47
+ t_minmax = telemetry&.start(:plan_minmax_time)
48
+ min_id = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(1).first&.dig('_id')
49
+ max_id = collection.find(query || {}).projection(_id: 1).sort(_id: -1).limit(1).first&.dig('_id')
50
+ telemetry&.finish(:plan_minmax_time, t_minmax)
51
+
52
+ return [{ filter: query || {}, sort: { _id: 1 } }] if min_id.nil? || max_id.nil?
53
+
54
+ min_ts = min_id.respond_to?(:generation_time) ? min_id.generation_time.to_i : nil
55
+ max_ts = max_id.respond_to?(:generation_time) ? max_id.generation_time.to_i : nil
56
+
57
+ # Fallback to cursor sampling if _id isn't an ObjectId
58
+ return cursor_sampling_partitions(collection: collection, query: query, partitions: partitions) if min_ts.nil? || max_ts.nil? || max_ts <= min_ts
59
+
60
+ step = [(max_ts - min_ts) / partitions, 1].max
61
+ inner_boundaries = []
62
+ t_boundaries = telemetry&.start(:plan_boundary_queries_time)
63
+ 1.upto(partitions - 1) do |i|
64
+ target_ts = min_ts + (step * i)
65
+ candidate = BSON::ObjectId.from_time(Time.at(target_ts))
66
+ f = query ? query.dup : {}
67
+ f['_id'] = { '$gt' => candidate }
68
+ b = collection.find(f).projection(_id: 1).sort(_id: 1).hint(_id: 1).limit(1).first&.dig('_id')
69
+ inner_boundaries << b if b
70
+ end
71
+ telemetry&.finish(:plan_boundary_queries_time, t_boundaries)
72
+
73
+ # Build ranges: first range has nil lower bound to include min_id,
74
+ # middle ranges are (prev, current], and last is (last, +inf)
75
+ ranges = []
76
+ t_ranges = telemetry&.start(:plan_ranges_build_time)
77
+ prev = nil
78
+ inner_boundaries.each do |b|
79
+ ranges << build_range(prev, b)
80
+ prev = b
81
+ end
82
+ ranges << build_range(prev, nil)
83
+ telemetry&.finish(:plan_ranges_build_time, t_ranges)
84
+
85
+ ranges.map do |r|
86
+ filter = query ? query.dup : {}
87
+ filter['_id'] = r
88
+ { filter: filter, sort: { _id: 1 }, hint: { _id: 1 } }
89
+ end
90
+ end
91
+
92
+ # Legacy cursor sampling planner
93
+ def self.cursor_sampling_partitions(collection:, query:, partitions:)
15
94
  # Ensure sort order for sampling
16
95
  base_query = collection.find(query || {}, {}.merge(sort: { _id: 1 }))
17
96
 
@@ -49,27 +128,6 @@ module Purplelight
49
128
  end
50
129
  end
51
130
 
52
- def self.simple_ranges(collection:, query:, partitions:)
53
- # Split by _id quantiles using min/max endpoints
54
- min_id = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(1).first&.dig('_id')
55
- max_id = collection.find(query || {}).projection(_id: 1).sort(_id: -1).limit(1).first&.dig('_id')
56
- return [{ filter: query || {}, sort: { _id: 1 } }] if min_id.nil? || max_id.nil?
57
-
58
- # Create numeric-ish interpolation by sampling
59
- ids = collection.find(query || {}).projection(_id: 1).sort(_id: 1).limit(partitions - 1).to_a.map { |d| d['_id'] }
60
- boundaries = [min_id] + ids + [max_id]
61
- ranges = []
62
- boundaries.each_cons(2) do |a, b|
63
- ranges << build_range(a, b)
64
- end
65
-
66
- ranges.map do |r|
67
- filter = query ? query.dup : {}
68
- filter['_id'] = r
69
- { filter: filter, sort: { _id: 1 }, hint: { _id: 1 } }
70
- end
71
- end
72
-
73
131
  def self.build_range(from_id, to_id)
74
132
  if from_id && to_id
75
133
  { '$gt' => from_id, '$lte' => to_id }
@@ -10,6 +10,7 @@ require_relative 'writer_csv'
10
10
  require_relative 'writer_parquet'
11
11
  require_relative 'manifest'
12
12
  require_relative 'errors'
13
+ require_relative 'telemetry'
13
14
 
14
15
  module Purplelight
15
16
  # Snapshot orchestrates partition planning, parallel reads, and writing.
@@ -37,7 +38,7 @@ module Purplelight
37
38
  resume: { enabled: true, state_path: nil, overwrite_incompatible: false },
38
39
  sharding: { mode: :by_size, part_bytes: DEFAULTS[:rotate_bytes], prefix: nil },
39
40
  logger: nil, on_progress: nil, read_concern: DEFAULTS[:read_concern], read_preference: DEFAULTS[:read_preference],
40
- no_cursor_timeout: DEFAULTS[:no_cursor_timeout])
41
+ no_cursor_timeout: DEFAULTS[:no_cursor_timeout], telemetry: nil)
41
42
  @client = client
42
43
  @collection = client[collection]
43
44
  @output = output
@@ -60,6 +61,10 @@ module Purplelight
60
61
  @no_cursor_timeout = no_cursor_timeout
61
62
 
62
63
  @running = true
64
+ @telemetry_enabled = telemetry ? telemetry.enabled? : (ENV['PL_TELEMETRY'] == '1')
65
+ @telemetry = telemetry || (
66
+ @telemetry_enabled ? Telemetry.new(enabled: true) : Telemetry::NULL
67
+ )
63
68
  end
64
69
 
65
70
  # rubocop:disable Naming/PredicateMethod
@@ -90,8 +95,10 @@ module Purplelight
90
95
  manifest.ensure_partitions!(@partitions)
91
96
 
92
97
  # Plan partitions
98
+ t_plan = @telemetry.start(:partition_plan_time)
93
99
  partition_filters = Partitioner.object_id_partitions(collection: @collection, query: @query,
94
- partitions: @partitions)
100
+ partitions: @partitions, telemetry: @telemetry)
101
+ @telemetry.finish(:partition_plan_time, t_plan)
95
102
 
96
103
  # Reader queue
97
104
  queue = ByteQueue.new(max_bytes: @queue_size_bytes)
@@ -116,12 +123,17 @@ module Purplelight
116
123
  # Start reader threads
117
124
  readers = partition_filters.each_with_index.map do |pf, idx|
118
125
  Thread.new do
119
- read_partition(idx: idx, filter_spec: pf, queue: queue, batch_size: @batch_size, manifest: manifest)
126
+ local_telemetry = @telemetry_enabled ? Telemetry.new(enabled: true) : Telemetry::NULL
127
+ read_partition(idx: idx, filter_spec: pf, queue: queue, batch_size: @batch_size, manifest: manifest, telemetry: local_telemetry)
128
+ # Merge per-thread telemetry
129
+ @telemetry.merge!(local_telemetry) if @telemetry_enabled
120
130
  end
121
131
  end
122
132
 
123
133
  # Writer loop
134
+ writer_telemetry = @telemetry_enabled ? Telemetry.new(enabled: true) : Telemetry::NULL
124
135
  writer_thread = Thread.new do
136
+ Thread.current[:pl_telemetry] = writer_telemetry if @telemetry_enabled
125
137
  loop do
126
138
  batch = queue.pop
127
139
  break if batch.nil?
@@ -146,8 +158,22 @@ module Purplelight
146
158
  readers.each(&:join)
147
159
  queue.close
148
160
  writer_thread.join
161
+ @telemetry.merge!(writer_telemetry) if @telemetry_enabled
149
162
  @running = false
150
163
  progress_thread.join
164
+ if @telemetry_enabled
165
+ total = @telemetry.timers.values.sum
166
+ breakdown = @telemetry.timers
167
+ .sort_by { |_k, v| -v }
168
+ .map { |k, v| [k, v, total.zero? ? 0 : ((v / total) * 100.0)] }
169
+ if @logger
170
+ @logger.info('Telemetry (seconds and % of timed work):')
171
+ breakdown.each { |k, v, pct| @logger.info(" #{k}: #{v.round(3)}s (#{pct.round(1)}%)") }
172
+ else
173
+ puts 'Telemetry (seconds and % of timed work):'
174
+ breakdown.each { |k, v, pct| puts " #{k}: #{v.round(3)}s (#{pct.round(1)}%)" }
175
+ end
176
+ end
151
177
  true
152
178
  end
153
179
  # rubocop:enable Naming/PredicateMethod
@@ -167,7 +193,7 @@ module Purplelight
167
193
  [dir, prefix]
168
194
  end
169
195
 
170
- def read_partition(idx:, filter_spec:, queue:, batch_size:, manifest:)
196
+ def read_partition(idx:, filter_spec:, queue:, batch_size:, manifest:, telemetry: Telemetry::NULL)
171
197
  filter = filter_spec[:filter]
172
198
  sort = filter_spec[:sort] || { _id: 1 }
173
199
  hint = @hint || filter_spec[:hint] || { _id: 1 }
@@ -202,25 +228,32 @@ module Purplelight
202
228
  cursor.each do |doc|
203
229
  last_id = doc['_id']
204
230
  doc = @mapper.call(doc) if @mapper
231
+ t_ser = telemetry.start(:serialize_time)
205
232
  if encode_lines
206
- line = "#{Oj.dump(doc, mode: :compat)}\n"
233
+ line = "#{JSON.generate(doc)}\n"
234
+ telemetry.finish(:serialize_time, t_ser)
207
235
  bytes = line.bytesize
208
236
  buffer << line
209
237
  else
210
238
  # For CSV/Parquet keep raw docs to allow schema/row building
211
- bytes = (Oj.dump(doc, mode: :compat).bytesize + 1)
239
+ bytes = (JSON.generate(doc).bytesize + 1)
240
+ telemetry.finish(:serialize_time, t_ser)
212
241
  buffer << doc
213
242
  end
214
243
  buffer_bytes += bytes
215
244
  next unless buffer.length >= batch_size || buffer_bytes >= 1_000_000
216
245
 
246
+ t_q = telemetry.start(:queue_wait_time)
217
247
  queue.push(buffer, bytes: buffer_bytes)
248
+ telemetry.finish(:queue_wait_time, t_q)
218
249
  manifest.update_partition_checkpoint!(idx, last_id)
219
250
  buffer = []
220
251
  buffer_bytes = 0
221
252
  end
222
253
  unless buffer.empty?
254
+ t_q = telemetry.start(:queue_wait_time)
223
255
  queue.push(buffer, bytes: buffer_bytes)
256
+ telemetry.finish(:queue_wait_time, t_q)
224
257
  manifest.update_partition_checkpoint!(idx, last_id)
225
258
  buffer = []
226
259
  buffer_bytes = 0
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Purplelight
4
+ # Lightweight, low-overhead timing and counters with a ticket API.
5
+ class Telemetry
6
+ def initialize(enabled: true)
7
+ @enabled = enabled
8
+ @counters = Hash.new(0)
9
+ @timers = Hash.new(0.0)
10
+ @mutex = Mutex.new
11
+ end
12
+
13
+ def enabled?
14
+ @enabled
15
+ end
16
+
17
+ # Start a timer. Returns a ticket (Float) when enabled, or nil when disabled.
18
+ def start(_key)
19
+ return nil unless @enabled
20
+
21
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
22
+ end
23
+
24
+ # Finish a timer using a ticket from start. No-ops if ticket is nil.
25
+ def finish(key, ticket)
26
+ return unless @enabled && ticket
27
+
28
+ dt = Process.clock_gettime(Process::CLOCK_MONOTONIC) - ticket
29
+ @timers[key] += dt
30
+ end
31
+
32
+ def add(key, count = 1)
33
+ return unless @enabled
34
+
35
+ @counters[key] += count
36
+ end
37
+
38
+ def merge!(other)
39
+ return self unless @enabled
40
+
41
+ other.counters.each { |k, v| @counters[k] += v }
42
+ other.timers.each { |k, v| @timers[k] += v }
43
+ self
44
+ end
45
+
46
+ attr_reader :counters, :timers
47
+
48
+ # A disabled singleton for zero overhead checks if needed.
49
+ NULL = new(enabled: false)
50
+ end
51
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Purplelight
4
- VERSION = '0.1.3'
4
+ VERSION = '0.1.5'
5
5
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'csv'
4
- require 'oj'
4
+ require 'json'
5
5
  require 'zlib'
6
6
  require 'fileutils'
7
7
 
@@ -81,8 +81,10 @@ module Purplelight
81
81
 
82
82
  @csv&.flush
83
83
  if @io
84
+ t = Thread.current[:pl_telemetry]&.start(:rotate_time)
84
85
  finalize_current_part!
85
86
  @io.close
87
+ Thread.current[:pl_telemetry]&.finish(:rotate_time, t)
86
88
  end
87
89
  @closed = true
88
90
  end
@@ -122,8 +124,10 @@ module Purplelight
122
124
  def rotate!
123
125
  return unless @io
124
126
 
127
+ t = Thread.current[:pl_telemetry]&.start(:rotate_time)
125
128
  finalize_current_part!
126
129
  @io.close
130
+ Thread.current[:pl_telemetry]&.finish(:rotate_time, t)
127
131
  @io = nil
128
132
  @csv = nil
129
133
  ensure_open!
@@ -173,7 +177,7 @@ module Purplelight
173
177
  val = doc[key] || doc[key.to_sym]
174
178
  case val
175
179
  when Hash, Array
176
- Oj.dump(val, mode: :compat)
180
+ JSON.generate(val)
177
181
  else
178
182
  val
179
183
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'oj'
3
+ require 'json'
4
4
  require 'zlib'
5
5
  require 'fileutils'
6
6
 
@@ -44,7 +44,7 @@ module Purplelight
44
44
  buffer = if array_of_docs.first.is_a?(String)
45
45
  array_of_docs.join
46
46
  else
47
- array_of_docs.map { |doc| "#{Oj.dump(doc, mode: :compat)}\n" }.join
47
+ array_of_docs.map { |doc| "#{JSON.generate(doc)}\n" }.join
48
48
  end
49
49
  rows = array_of_docs.size
50
50
  write_buffer(buffer)
@@ -106,7 +106,9 @@ module Purplelight
106
106
  end
107
107
 
108
108
  def write_buffer(buffer)
109
+ t = Thread.current[:pl_telemetry]&.start(:write_time)
109
110
  @io.write(buffer)
111
+ Thread.current[:pl_telemetry]&.finish(:write_time, t)
110
112
  @bytes_written += buffer.bytesize
111
113
  rotate_if_needed
112
114
  end
@@ -114,8 +116,10 @@ module Purplelight
114
116
  def rotate!
115
117
  return unless @io
116
118
 
119
+ t = Thread.current[:pl_telemetry]&.start(:rotate_time)
117
120
  finalize_current_part!
118
121
  @io.close
122
+ Thread.current[:pl_telemetry]&.finish(:rotate_time, t)
119
123
  @io = nil
120
124
  ensure_open!
121
125
  end
@@ -44,8 +44,13 @@ module Purplelight
44
44
 
45
45
  ensure_open!
46
46
  unless @buffer_docs.empty?
47
+ t_tbl = Thread.current[:pl_telemetry]&.start(:parquet_table_build_time)
47
48
  table = build_table(@buffer_docs)
49
+ Thread.current[:pl_telemetry]&.finish(:parquet_table_build_time, t_tbl)
50
+
51
+ t_w = Thread.current[:pl_telemetry]&.start(:parquet_write_time)
48
52
  write_table(table, @writer_path, append: false)
53
+ Thread.current[:pl_telemetry]&.finish(:parquet_write_time, t_w)
49
54
  end
50
55
  finalize_current_part!
51
56
  @closed = true
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: purplelight
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Nicholson
@@ -13,58 +13,62 @@ dependencies:
13
13
  name: csv
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '3.3'
16
19
  - - ">="
17
20
  - !ruby/object:Gem::Version
18
- version: '0'
21
+ version: 3.3.5
19
22
  type: :runtime
20
23
  prerelease: false
21
24
  version_requirements: !ruby/object:Gem::Requirement
22
25
  requirements:
26
+ - - "~>"
27
+ - !ruby/object:Gem::Version
28
+ version: '3.3'
23
29
  - - ">="
24
30
  - !ruby/object:Gem::Version
25
- version: '0'
31
+ version: 3.3.5
26
32
  - !ruby/object:Gem::Dependency
27
33
  name: logger
28
34
  requirement: !ruby/object:Gem::Requirement
29
35
  requirements:
36
+ - - "~>"
37
+ - !ruby/object:Gem::Version
38
+ version: '1.7'
30
39
  - - ">="
31
40
  - !ruby/object:Gem::Version
32
- version: '1.6'
41
+ version: 1.7.0
33
42
  type: :runtime
34
43
  prerelease: false
35
44
  version_requirements: !ruby/object:Gem::Requirement
36
45
  requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '1.7'
37
49
  - - ">="
38
50
  - !ruby/object:Gem::Version
39
- version: '1.6'
51
+ version: 1.7.0
40
52
  - !ruby/object:Gem::Dependency
41
53
  name: mongo
42
54
  requirement: !ruby/object:Gem::Requirement
43
55
  requirements:
44
- - - ">="
56
+ - - "~>"
45
57
  - !ruby/object:Gem::Version
46
- version: '2.19'
47
- type: :runtime
48
- prerelease: false
49
- version_requirements: !ruby/object:Gem::Requirement
50
- requirements:
58
+ version: '2.21'
51
59
  - - ">="
52
60
  - !ruby/object:Gem::Version
53
- version: '2.19'
54
- - !ruby/object:Gem::Dependency
55
- name: oj
56
- requirement: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- version: '3.16'
61
+ version: 2.21.3
61
62
  type: :runtime
62
63
  prerelease: false
63
64
  version_requirements: !ruby/object:Gem::Requirement
64
65
  requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.21'
65
69
  - - ">="
66
70
  - !ruby/object:Gem::Version
67
- version: '3.16'
71
+ version: 2.21.3
68
72
  description: High-throughput, resumable snapshots of MongoDB collections with partitioning,
69
73
  multi-threaded readers, and size-based sharded outputs.
70
74
  email:
@@ -74,6 +78,7 @@ executables:
74
78
  extensions: []
75
79
  extra_rdoc_files: []
76
80
  files:
81
+ - LICENSE
77
82
  - README.md
78
83
  - Rakefile
79
84
  - bin/purplelight
@@ -83,15 +88,16 @@ files:
83
88
  - lib/purplelight/partitioner.rb
84
89
  - lib/purplelight/queue.rb
85
90
  - lib/purplelight/snapshot.rb
91
+ - lib/purplelight/telemetry.rb
86
92
  - lib/purplelight/version.rb
87
93
  - lib/purplelight/writer_csv.rb
88
94
  - lib/purplelight/writer_jsonl.rb
89
95
  - lib/purplelight/writer_parquet.rb
96
+ homepage: https://github.com/alexandernicholson/purplelight
90
97
  licenses:
91
98
  - MIT
92
99
  metadata:
93
100
  rubygems_mfa_required: 'true'
94
- homepage_uri: https://github.com/alexandernicholson/purplelight
95
101
  source_code_uri: https://github.com/alexandernicholson/purplelight
96
102
  changelog_uri: https://github.com/alexandernicholson/purplelight/releases
97
103
  rdoc_options: []