purplelight 0.1.12 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/bin/purplelight +5 -1
- data/lib/purplelight/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c500768ed34e247a92be3979f56c1036314b51003cb1ff7953562f1ca8278677
|
|
4
|
+
data.tar.gz: f12b305e35b201192e219aec23a0c66c13a7b9eb262c038dbac9d1e2a3c77bc5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0f6959b02fb695a4c28802ec555d7b6ce6e7020d348024d2df8381efbb022632916d2915c688131cf79aa042f05aa3e2ec8780ad866fdb774b45b51f53093d6b
|
|
7
|
+
data.tar.gz: aa2885e120f57bd492c1a98aee91f52b7c83bee15492ce34a0b6d3dae985e0355ea77489e69dfde2f27fb5b51974263b7f7358a51103697d8222a03cd98a4185
|
data/README.md
CHANGED
|
@@ -9,7 +9,7 @@ Purplelight is published on RubyGems: [purplelight on RubyGems](https://rubygems
|
|
|
9
9
|
Add to your Gemfile:
|
|
10
10
|
|
|
11
11
|
```ruby
|
|
12
|
-
gem 'purplelight', '~> 0.1.
|
|
12
|
+
gem 'purplelight', '~> 0.1.13'
|
|
13
13
|
```
|
|
14
14
|
|
|
15
15
|
Or install directly:
|
|
@@ -212,6 +212,7 @@ bundle exec bin/purplelight \
|
|
|
212
212
|
- `--read-concern LEVEL`: `majority|local|linearizable|available|snapshot`.
|
|
213
213
|
- `--no-cursor-timeout BOOL`: Toggle `noCursorTimeout` (default true).
|
|
214
214
|
- `--parquet-row-group N`: Parquet row group size (rows).
|
|
215
|
+
- `--parquet-max-rows N`: Parquet max rows per part (enables multi-part when not `--single-file`).
|
|
215
216
|
- `--write-chunk-mb MB`: JSONL encode/write chunk size before enqueueing.
|
|
216
217
|
- `--writer-threads N` (experimental): Number of writer threads (JSONL only).
|
|
217
218
|
- `--telemetry on|off`: Force enable/disable telemetry output.
|
|
@@ -222,7 +223,7 @@ bundle exec bin/purplelight \
|
|
|
222
223
|
Notes:
|
|
223
224
|
- Compression backend selection order is: requested format → `zstd-ruby` → `zstds` → `gzip`.
|
|
224
225
|
- `--single-file` and `--by-size` update only the sharding mode/params and preserve any provided `--prefix`.
|
|
225
|
-
|
|
226
|
+
– Parquet multi-part sizing can be controlled via `parquet_max_rows` programmatically, or via CLI `--parquet-max-rows`.
|
|
226
227
|
- To increase concurrent connections, set `maxPoolSize` on your Mongo URI (used by `--uri`), e.g., `mongodb://.../?maxPoolSize=32`. A good starting point is `maxPoolSize >= --partitions`.
|
|
227
228
|
|
|
228
229
|
### Architecture
|
data/bin/purplelight
CHANGED
|
@@ -25,6 +25,7 @@ options = {
|
|
|
25
25
|
writer_threads: nil,
|
|
26
26
|
write_chunk_bytes: nil,
|
|
27
27
|
parquet_row_group: nil,
|
|
28
|
+
parquet_max_rows: nil,
|
|
28
29
|
telemetry_flag: nil,
|
|
29
30
|
read_concern: nil,
|
|
30
31
|
no_cursor_timeout: nil,
|
|
@@ -61,6 +62,7 @@ parser = OptionParser.new do |opts|
|
|
|
61
62
|
opts.on('--writer-threads N', Integer, 'Number of writer threads (experimental, JSONL only)') { |v| options[:writer_threads] = v }
|
|
62
63
|
opts.on('--write-chunk-mb MB', Integer, 'JSONL encode/write chunk size in MB') { |v| options[:write_chunk_bytes] = v * 1024 * 1024 }
|
|
63
64
|
opts.on('--parquet-row-group N', Integer, 'Parquet row group size (rows)') { |v| options[:parquet_row_group] = v }
|
|
65
|
+
opts.on('--parquet-max-rows N', Integer, 'Parquet max rows per part (multi-part only)') { |v| options[:parquet_max_rows] = v }
|
|
64
66
|
opts.on('-q', '--query JSON', 'Filter query as JSON (Extended JSON supported)') do |v|
|
|
65
67
|
# Prefer BSON Extended JSON to support $date, $oid, etc.
|
|
66
68
|
options[:query] = BSON::ExtJSON.parse(v)
|
|
@@ -158,7 +160,8 @@ snapshot_args[:no_cursor_timeout] = options[:no_cursor_timeout] unless options[:
|
|
|
158
160
|
snapshot_args[:compression_level] = options[:compression_level] if options[:compression_level]
|
|
159
161
|
snapshot_args[:writer_threads] = options[:writer_threads] if options[:writer_threads]
|
|
160
162
|
snapshot_args[:write_chunk_bytes] = options[:write_chunk_bytes] if options[:write_chunk_bytes]
|
|
161
|
-
snapshot_args[:parquet_row_group] = options[:parquet_row_group] if options[:parquet_row_group]
|
|
163
|
+
snapshot_args[:parquet_row_group] = options[:parquet_row_group] if options[:parquet_row_group]
|
|
164
|
+
snapshot_args[:parquet_max_rows] = options[:parquet_max_rows] if options[:parquet_max_rows]
|
|
162
165
|
|
|
163
166
|
# telemetry env override
|
|
164
167
|
if options[:telemetry_flag]
|
|
@@ -169,6 +172,7 @@ end
|
|
|
169
172
|
ENV['PL_ZSTD_LEVEL'] = options[:compression_level].to_s if options[:compression_level]
|
|
170
173
|
ENV['PL_WRITE_CHUNK_BYTES'] = options[:write_chunk_bytes].to_s if options[:write_chunk_bytes]
|
|
171
174
|
ENV['PL_PARQUET_ROW_GROUP'] = options[:parquet_row_group].to_s if options[:parquet_row_group]
|
|
175
|
+
# No env default for parquet_max_rows; it is snapshot-argument only
|
|
172
176
|
ENV['PL_WRITER_THREADS'] = options[:writer_threads].to_s if options[:writer_threads]
|
|
173
177
|
|
|
174
178
|
ok = Purplelight.snapshot(**snapshot_args)
|
data/lib/purplelight/version.rb
CHANGED