purplelight 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +55 -1
- data/lib/purplelight/snapshot.rb +2 -2
- data/lib/purplelight/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '07534009e367f28d3374708991cb870f5fa168ee11a95142af8d357885af7abc'
|
4
|
+
data.tar.gz: e665d587dea94999326c0c42e88d2bcfd99bae01e305aee9e3051d3ddcd266e2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4cabf4d438a8afa0d00902aa07b01320013f4e8588630fb2d5c4f9b2432e1910ac94c19ccce78bdb396a415ac3ea949527b83d22eadfbc036520656c4273869
|
7
|
+
data.tar.gz: b038e1fa40f36e985571019d7b4d7fe9c5013ea17314640f8d484ca9cbbb68292af08c5f91b1a006f34c473a24f3b83b03aece33d333685afb749437ac920ca4
|
data/README.md
CHANGED
@@ -9,7 +9,7 @@ Purplelight is published on RubyGems: [purplelight on RubyGems](https://rubygems
|
|
9
9
|
Add to your Gemfile:
|
10
10
|
|
11
11
|
```ruby
|
12
|
-
gem 'purplelight', '~> 0.1.
|
12
|
+
gem 'purplelight', '~> 0.1.2'
|
13
13
|
```
|
14
14
|
|
15
15
|
Or install directly:
|
@@ -41,6 +41,60 @@ Purplelight.snapshot(
|
|
41
41
|
)
|
42
42
|
```
|
43
43
|
|
44
|
+
### Filtering with `query`
|
45
|
+
|
46
|
+
`query` is passed directly to MongoDB as the filter for the collection read. Use standard MongoDB query operators.
|
47
|
+
|
48
|
+
Ruby examples:
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
# Equality
|
52
|
+
query: { status: 'active' }
|
53
|
+
|
54
|
+
# Ranges
|
55
|
+
query: { created_at: { '$gte' => Time.parse('2025-01-01'), '$lt' => Time.parse('2025-02-01') } }
|
56
|
+
|
57
|
+
# $in / $nin
|
58
|
+
query: { type: { '$in' => %w[user admin] } }
|
59
|
+
|
60
|
+
# Nested fields (dot-notation also supported in Mongo)
|
61
|
+
query: { 'profile.country' => 'US' }
|
62
|
+
|
63
|
+
# By ObjectId boundary (works great with _id partitions)
|
64
|
+
query: { _id: { '$gt' => BSON::ObjectId.from_time(Time.utc(2024, 1, 1)) } }
|
65
|
+
```
|
66
|
+
|
67
|
+
CLI examples (JSON):
|
68
|
+
|
69
|
+
```bash
|
70
|
+
# Equality
|
71
|
+
--query '{"status":"active"}'
|
72
|
+
|
73
|
+
# Date/time range (ISO8601 strings your app can parse downstream)
|
74
|
+
--query '{"created_at":{"$gte":"2025-01-01T00:00:00Z","$lt":"2025-02-01T00:00:00Z"}}'
|
75
|
+
|
76
|
+
# Nested field
|
77
|
+
--query '{"profile.country":"US"}'
|
78
|
+
|
79
|
+
# IN list
|
80
|
+
--query '{"type":{"$in":["user","admin"]}}'
|
81
|
+
```
|
82
|
+
|
83
|
+
Notes:
|
84
|
+
- Ensure values are serializable; when using Ruby, you can pass native `Time`, `BSON::ObjectId`, etc.
|
85
|
+
- Consider adding an appropriate index to match your `query` and pass `hint:` to force indexed scans when needed:
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
Purplelight.snapshot(
|
89
|
+
client: client,
|
90
|
+
collection: 'events',
|
91
|
+
output: '/data/exports',
|
92
|
+
format: :jsonl,
|
93
|
+
query: { created_at: { '$gte' => Time.parse('2025-01-01') } },
|
94
|
+
hint: { created_at: 1 }
|
95
|
+
)
|
96
|
+
```
|
97
|
+
|
44
98
|
Outputs files like:
|
45
99
|
|
46
100
|
```
|
data/lib/purplelight/snapshot.rb
CHANGED
@@ -81,7 +81,7 @@ module Purplelight
|
|
81
81
|
end
|
82
82
|
|
83
83
|
manifest.configure!(collection: @collection.name, format: @format, compression: @compression, query_digest: query_digest, options: {
|
84
|
-
partitions: @partitions, batch_size: @batch_size, rotate_bytes: @rotate_bytes
|
84
|
+
partitions: @partitions, batch_size: @batch_size, rotate_bytes: @rotate_bytes, hint: @hint
|
85
85
|
})
|
86
86
|
manifest.ensure_partitions!(@partitions)
|
87
87
|
|
@@ -159,7 +159,7 @@ module Purplelight
|
|
159
159
|
def read_partition(idx:, filter_spec:, queue:, batch_size:, manifest:)
|
160
160
|
filter = filter_spec[:filter]
|
161
161
|
sort = filter_spec[:sort] || { _id: 1 }
|
162
|
-
hint = filter_spec[:hint] || { _id: 1 }
|
162
|
+
hint = @hint || filter_spec[:hint] || { _id: 1 }
|
163
163
|
|
164
164
|
# Resume from checkpoint if present
|
165
165
|
checkpoint = manifest.partitions[idx] && manifest.partitions[idx]['last_id_exclusive']
|
data/lib/purplelight/version.rb
CHANGED