purplelight 0.1.12 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 920b534dc9ac832d83600031277ddd35da2920cff494e0f96d0ca230652d4ba4
4
- data.tar.gz: 2fd4476e73efc67d1f4a722dae8d7759ac8b51b5c21546dfb4b06d9fc1cd3934
3
+ metadata.gz: c500768ed34e247a92be3979f56c1036314b51003cb1ff7953562f1ca8278677
4
+ data.tar.gz: f12b305e35b201192e219aec23a0c66c13a7b9eb262c038dbac9d1e2a3c77bc5
5
5
  SHA512:
6
- metadata.gz: 370660a815b47c4aa4a0725a6188d6e0455074232000a24c4f909a313c3b9c2d5d3219a17edb628df58f167115b317347c159cc93d5bd59e216d6de1ec7ecd77
7
- data.tar.gz: efec787b1e355af50ec07e45b8c125d0f1054640734afbe7790a3a35181f999900d3c843104911eabcea644d2dc7f4604050571e17469f08fe97d9fa5aec92e9
6
+ metadata.gz: 0f6959b02fb695a4c28802ec555d7b6ce6e7020d348024d2df8381efbb022632916d2915c688131cf79aa042f05aa3e2ec8780ad866fdb774b45b51f53093d6b
7
+ data.tar.gz: aa2885e120f57bd492c1a98aee91f52b7c83bee15492ce34a0b6d3dae985e0355ea77489e69dfde2f27fb5b51974263b7f7358a51103697d8222a03cd98a4185
data/README.md CHANGED
@@ -9,7 +9,7 @@ Purplelight is published on RubyGems: [purplelight on RubyGems](https://rubygems
9
9
  Add to your Gemfile:
10
10
 
11
11
  ```ruby
12
- gem 'purplelight', '~> 0.1.12'
12
+ gem 'purplelight', '~> 0.1.13'
13
13
  ```
14
14
 
15
15
  Or install directly:
@@ -212,6 +212,7 @@ bundle exec bin/purplelight \
212
212
  - `--read-concern LEVEL`: `majority|local|linearizable|available|snapshot`.
213
213
  - `--no-cursor-timeout BOOL`: Toggle `noCursorTimeout` (default true).
214
214
  - `--parquet-row-group N`: Parquet row group size (rows).
215
+ - `--parquet-max-rows N`: Parquet max rows per part (enables multi-part when not `--single-file`).
215
216
  - `--write-chunk-mb MB`: JSONL encode/write chunk size before enqueueing.
216
217
  - `--writer-threads N` (experimental): Number of writer threads (JSONL only).
217
218
  - `--telemetry on|off`: Force enable/disable telemetry output.
@@ -222,7 +223,7 @@ bundle exec bin/purplelight \
222
223
  Notes:
223
224
  - Compression backend selection order is: requested format → `zstd-ruby` → `zstds` → `gzip`.
224
225
  - `--single-file` and `--by-size` update only the sharding mode/params and preserve any provided `--prefix`.
225
- - Parquet multi-part sizing is programmatic via `parquet_max_rows`; there is no CLI flag for it.
226
+ Parquet multi-part sizing can be controlled via `parquet_max_rows` programmatically, or via CLI `--parquet-max-rows`.
226
227
  - To increase concurrent connections, set `maxPoolSize` on your Mongo URI (used by `--uri`), e.g., `mongodb://.../?maxPoolSize=32`. A good starting point is `maxPoolSize >= --partitions`.
227
228
 
228
229
  ### Architecture
data/bin/purplelight CHANGED
@@ -25,6 +25,7 @@ options = {
25
25
  writer_threads: nil,
26
26
  write_chunk_bytes: nil,
27
27
  parquet_row_group: nil,
28
+ parquet_max_rows: nil,
28
29
  telemetry_flag: nil,
29
30
  read_concern: nil,
30
31
  no_cursor_timeout: nil,
@@ -61,6 +62,7 @@ parser = OptionParser.new do |opts|
61
62
  opts.on('--writer-threads N', Integer, 'Number of writer threads (experimental, JSONL only)') { |v| options[:writer_threads] = v }
62
63
  opts.on('--write-chunk-mb MB', Integer, 'JSONL encode/write chunk size in MB') { |v| options[:write_chunk_bytes] = v * 1024 * 1024 }
63
64
  opts.on('--parquet-row-group N', Integer, 'Parquet row group size (rows)') { |v| options[:parquet_row_group] = v }
65
+ opts.on('--parquet-max-rows N', Integer, 'Parquet max rows per part (multi-part only)') { |v| options[:parquet_max_rows] = v }
64
66
  opts.on('-q', '--query JSON', 'Filter query as JSON (Extended JSON supported)') do |v|
65
67
  # Prefer BSON Extended JSON to support $date, $oid, etc.
66
68
  options[:query] = BSON::ExtJSON.parse(v)
@@ -158,7 +160,8 @@ snapshot_args[:no_cursor_timeout] = options[:no_cursor_timeout] unless options[:
158
160
  snapshot_args[:compression_level] = options[:compression_level] if options[:compression_level]
159
161
  snapshot_args[:writer_threads] = options[:writer_threads] if options[:writer_threads]
160
162
  snapshot_args[:write_chunk_bytes] = options[:write_chunk_bytes] if options[:write_chunk_bytes]
161
- snapshot_args[:parquet_row_group] = options[:parquet_row_group] if options[:parquet_row_group]
163
+ snapshot_args[:parquet_row_group] = options[:parquet_row_group] if options[:parquet_row_group]
164
+ snapshot_args[:parquet_max_rows] = options[:parquet_max_rows] if options[:parquet_max_rows]
162
165
 
163
166
  # telemetry env override
164
167
  if options[:telemetry_flag]
@@ -169,6 +172,7 @@ end
169
172
  ENV['PL_ZSTD_LEVEL'] = options[:compression_level].to_s if options[:compression_level]
170
173
  ENV['PL_WRITE_CHUNK_BYTES'] = options[:write_chunk_bytes].to_s if options[:write_chunk_bytes]
171
174
  ENV['PL_PARQUET_ROW_GROUP'] = options[:parquet_row_group].to_s if options[:parquet_row_group]
175
+ # No env default for parquet_max_rows; it is snapshot-argument only
172
176
  ENV['PL_WRITER_THREADS'] = options[:writer_threads].to_s if options[:writer_threads]
173
177
 
174
178
  ok = Purplelight.snapshot(**snapshot_args)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Purplelight
4
- VERSION = '0.1.12'
4
+ VERSION = '0.1.13'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: purplelight
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.12
4
+ version: 0.1.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Nicholson