familia 2.8.0 → 2.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.rst +105 -0
- data/Gemfile.lock +2 -2
- data/docs/guides/datatype-collections.md +159 -0
- data/docs/migrating/v2.9.0.md +125 -0
- data/familia.gemspec +1 -1
- data/lib/familia/batch_result.rb +158 -0
- data/lib/familia/data_type/collection_base.rb +129 -0
- data/lib/familia/data_type/scalar_base.rb +33 -0
- data/lib/familia/data_type/types/hashkey.rb +37 -0
- data/lib/familia/data_type/types/json_stringkey.rb +3 -1
- data/lib/familia/data_type/types/listkey.rb +41 -18
- data/lib/familia/data_type/types/sorted_set.rb +113 -18
- data/lib/familia/data_type/types/stringkey.rb +3 -1
- data/lib/familia/data_type/types/unsorted_set.rb +37 -14
- data/lib/familia/data_type.rb +2 -1
- data/lib/familia/features/encrypted_fields/encrypted_field_type.rb +2 -20
- data/lib/familia/features/expiration.rb +2 -2
- data/lib/familia/field_type.rb +1 -19
- data/lib/familia/horreum/definition.rb +1 -19
- data/lib/familia/horreum/management.rb +1 -1
- data/lib/familia/multi_result.rb +111 -0
- data/lib/familia/utils.rb +48 -0
- data/lib/familia/version.rb +1 -1
- data/lib/familia.rb +2 -1
- data/try/edge_cases/fast_writer_pipeline_support_try.rb +80 -0
- data/try/edge_cases/fast_writer_transaction_guard_try.rb +40 -59
- data/try/edge_cases/iterator_connection_errors_try.rb +97 -0
- data/try/edge_cases/pipeline_handler_edge_cases_try.rb +1 -1
- data/try/edge_cases/ttl_side_effects_try.rb +1 -1
- data/try/features/atomic_write_coverage_try.rb +1 -1
- data/try/features/atomic_write_try.rb +3 -3
- data/try/features/atomicity_try.rb +2 -2
- data/try/features/dirty_tracking_try.rb +21 -21
- data/try/features/instance_registry_try.rb +2 -2
- data/try/integration/connection/operation_mode_guards_try.rb +3 -3
- data/try/integration/connection/pipeline_fallback_integration_try.rb +4 -4
- data/try/integration/connection/pipeline_handler_integration_try.rb +3 -3
- data/try/integration/connection/pipeline_horreum_routing_try.rb +4 -4
- data/try/integration/connection/pools_try.rb +1 -1
- data/try/integration/connection/transaction_fallback_integration_try.rb +4 -4
- data/try/integration/connection/transaction_mode_permissive_try.rb +8 -8
- data/try/integration/connection/transaction_mode_strict_try.rb +2 -2
- data/try/integration/connection/transaction_mode_warn_try.rb +5 -5
- data/try/integration/connection/transaction_modes_try.rb +14 -14
- data/try/integration/data_types/datatype_pipelines_try.rb +9 -9
- data/try/integration/data_types/datatype_transactions_try.rb +17 -17
- data/try/integration/database_consistency_try.rb +1 -1
- data/try/integration/models/familia_object_try.rb +1 -1
- data/try/integration/transaction_safety_core_try.rb +1 -1
- data/try/integration/transaction_safety_workflow_try.rb +2 -2
- data/try/support/prototypes/atomic_saves_v2_connection_switching.rb +1 -1
- data/try/support/prototypes/lib/atomic_saves_v2_connection_switching_helpers.rb +1 -1
- data/try/support/prototypes/pooling/lib/connection_pool_stress_test.rb +1 -1
- data/try/unit/batch_result_try.rb +348 -0
- data/try/unit/data_types/each_record_try.rb +375 -0
- data/try/unit/data_types/enumerable_consistency/concurrent_modification_try.rb +176 -0
- data/try/unit/data_types/enumerable_consistency/hashkey_consistency_try.rb +224 -0
- data/try/unit/data_types/enumerable_consistency/large_scale_consistency_try.rb +292 -0
- data/try/unit/data_types/enumerable_consistency/listkey_consistency_try.rb +230 -0
- data/try/unit/data_types/enumerable_consistency/sorted_set_consistency_try.rb +241 -0
- data/try/unit/data_types/enumerable_consistency/unsorted_set_consistency_try.rb +261 -0
- data/try/unit/data_types/enumerable_try.rb +228 -0
- data/try/unit/data_types/hashkey_each_try.rb +213 -0
- data/try/unit/data_types/listkey_each_try.rb +222 -0
- data/try/unit/data_types/sorted_set_each_try.rb +227 -0
- data/try/unit/data_types/sorted_set_try.rb +44 -0
- data/try/unit/data_types/unsorted_set_each_try.rb +185 -0
- data/try/unit/horreum/base_try.rb +1 -1
- data/try/unit/horreum/destroy_related_fields_cleanup_try.rb +4 -4
- data/try/unit/horreum/initialization_try.rb +1 -1
- data/try/unit/horreum/json_type_preservation_try.rb +3 -3
- data/try/unit/horreum/multi_field_update_try.rb +143 -0
- data/try/unit/horreum/serialization_try.rb +14 -14
- data/try/unit/utils/future_aware_helpers_try.rb +128 -0
- metadata +26 -5
- data/docs/archive/.gitignore +0 -2
- data/lib/multi_result.rb +0 -109
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d24c3b38092f1192f8e250553e8ef4d51b05c1bb00c48dfd7f6520d3a48a9a0e
|
|
4
|
+
data.tar.gz: 1dd0a2aa47682736209f116adf378eb4a0eccafffd7c151b02a8ef4573e52551
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 10d69edb30370c7dba83b387f53429878fded6bf736e04fbefec4c228baf375806eeefa6c1d44c45c296e5d79b82345d1bc9cfbb0992ee18fc4c204dac250e10
|
|
7
|
+
data.tar.gz: 9b52e601ae93d44a6db8b0f995828ae45d55872377a471658a8d63210607c56f70958e372654ac20a13b07344e9b9da0afcdaaa50203f0796326d5d08e93bf6b
|
data/CHANGELOG.rst
CHANGED
|
@@ -7,6 +7,111 @@ The format is based on `Keep a Changelog <https://keepachangelog.com/en/1.1.0/>`
|
|
|
7
7
|
|
|
8
8
|
<!--scriv-insert-here-->
|
|
9
9
|
|
|
10
|
+
.. _changelog-2.9.1:
|
|
11
|
+
|
|
12
|
+
2.9.1 — 2026-05-18
|
|
13
|
+
==================
|
|
14
|
+
|
|
15
|
+
Added
|
|
16
|
+
-----
|
|
17
|
+
|
|
18
|
+
- ``SortedSet#update`` (aliased ``merge!``) for bulk member insertion. A sorted
|
|
19
|
+
set is ``member => score`` -- the same pair shape as ``HashKey``'s
|
|
20
|
+
``field => value`` -- so it follows the established ``HashKey#update``/``merge!``
|
|
21
|
+
convention (a single Hash argument) rather than the variadic splat used by the
|
|
22
|
+
value-only ``UnsortedSet``/``ListKey``. Pass ``{member => score}`` to issue one
|
|
23
|
+
``ZADD`` instead of one round-trip per member. Validates the argument is a Hash
|
|
24
|
+
and that every score is ``Numeric`` (a missing/``nil`` score raises a clear
|
|
25
|
+
``ArgumentError`` instead of a low-level client error -- unlike single-value
|
|
26
|
+
``#add``, the bulk path does not default a missing score to ``Familia.now``).
|
|
27
|
+
Cascades expiration, and is a no-op returning ``0`` for empty input. The
|
|
28
|
+
single-value ``SortedSet#add`` (and its array-as-single-member contract) is
|
|
29
|
+
unchanged. PR #269
|
|
30
|
+
|
|
31
|
+
Changed
|
|
32
|
+
-------
|
|
33
|
+
|
|
34
|
+
- Bulk-write optimization for multi-value collection mutations. ``UnsortedSet#add``,
|
|
35
|
+
``ListKey#push``, and ``ListKey#unshift`` previously issued one Redis command per
|
|
36
|
+
element (a loop of ``SADD``/``RPUSH``/``LPUSH`` calls), making large populations
|
|
37
|
+
slow even when pipelined. They now serialize all values and issue a single bulk
|
|
38
|
+
``SADD``/``RPUSH``/``LPUSH`` command. Element ordering, ``nil`` compaction, nested
|
|
39
|
+
array flattening, return values, dirty-write warnings, and expiration cascading
|
|
40
|
+
are unchanged; empty calls remain no-ops. PR #269
|
|
41
|
+
|
|
42
|
+
AI Assistance
|
|
43
|
+
-------------
|
|
44
|
+
|
|
45
|
+
- AI investigated all collection ``DataType`` classes for the same per-element
|
|
46
|
+
loop anti-pattern, identified the three affected methods, verified
|
|
47
|
+
behavior-preservation (ordering, edge cases, chainability) at the Redis wire
|
|
48
|
+
level, and confirmed zero regressions against the existing test suites. The
|
|
49
|
+
``SortedSet#update`` API shape was chosen by priority order: existing Familia
|
|
50
|
+
conventions first (the ``HashKey#update``/``merge!`` precedent for keyed
|
|
51
|
+
collections), then the upstream redis-rb bulk ``ZADD`` form, then Ruby
|
|
52
|
+
``Hash#merge!`` semantics as confirmation.
|
|
53
|
+
|
|
54
|
+
.. _changelog-2.9.0:
|
|
55
|
+
|
|
56
|
+
2.9.0 — 2026-05-17
|
|
57
|
+
==================
|
|
58
|
+
|
|
59
|
+
Added
|
|
60
|
+
-----
|
|
61
|
+
|
|
62
|
+
- Batch iteration primitives for DataTypes via ``Enumerable`` integration:
|
|
63
|
+
|
|
64
|
+
- All DataTypes (``SortedSet``, ``HashKey``, ``UnsortedSet``, ``ListKey``) now
|
|
65
|
+
``include Enumerable``, providing ``each_slice``, ``lazy``, ``map``, ``reduce``,
|
|
66
|
+
``find``, and other stdlib methods.
|
|
67
|
+
|
|
68
|
+
- **SortedSet#each(since:, until:)**: Cursor-based iteration with optional
|
|
69
|
+
timestamp bounds. Uses ZRANGEBYSCORE when bounds provided (inclusive),
|
|
70
|
+
ZSCAN otherwise. Accepts Time objects or numeric scores.
|
|
71
|
+
|
|
72
|
+
- **HashKey#each(matching:)**: Cursor-based iteration via HSCAN with optional
|
|
73
|
+
glob pattern filter on field names.
|
|
74
|
+
|
|
75
|
+
- **UnsortedSet#each(matching:)**: Cursor-based iteration via SSCAN with optional
|
|
76
|
+
glob pattern filter using Redis SSCAN MATCH on raw values.
|
|
77
|
+
|
|
78
|
+
- **ListKey#each(batch_size:)**: Memory-efficient LRANGE pagination for large lists.
|
|
79
|
+
|
|
80
|
+
- ``DataType#each_record(batch_size:, write_size:, **filters)`` yields loaded
|
|
81
|
+
Horreum records (not raw IDs) via ``load_multi``. Ghost instances (expired keys
|
|
82
|
+
still in ``instances``) are automatically filtered. The ``write_size:`` parameter
|
|
83
|
+
controls pipelining depth (``nil`` for serial execution).
|
|
84
|
+
|
|
85
|
+
- ``Familia::BatchResult`` value type for aggregating batch operation results:
|
|
86
|
+
|
|
87
|
+
- ``BatchResult.collect(enumerable, strict: false) { |record| ... }`` iterates
|
|
88
|
+
any Enumerable, tracking ``scanned``, ``modified`` (truthy returns), ``errors``
|
|
89
|
+
(array of ``{id:, error:}``), and ``duration_ms``.
|
|
90
|
+
|
|
91
|
+
- Per-record exception isolation: errors are captured and iteration continues.
|
|
92
|
+
|
|
93
|
+
- ``strict: true`` re-raises collected errors after iteration completes.
|
|
94
|
+
|
|
95
|
+
Changed
|
|
96
|
+
-------
|
|
97
|
+
|
|
98
|
+
- Renamed batch field-update methods for clarity:
|
|
99
|
+
|
|
100
|
+
- ``batch_update`` is now ``multi_field_update``
|
|
101
|
+
- ``batch_fast_write`` is now ``multi_field_fast_write``
|
|
102
|
+
|
|
103
|
+
Old names removed without deprecation shim (breaking change).
|
|
104
|
+
|
|
105
|
+
- Moved ``MultiResult`` into Familia namespace as ``Familia::MultiResult``.
|
|
106
|
+
Old top-level constant removed without backwards-compat alias (breaking change).
|
|
107
|
+
|
|
108
|
+
AI Assistance
|
|
109
|
+
-------------
|
|
110
|
+
|
|
111
|
+
- Implementation and test coverage developed with parallel Claude Code agents:
|
|
112
|
+
one for production code (DataType iteration, BatchResult, renames), one for
|
|
113
|
+
Tryouts test suite (228 new tests across 8 files). PR #264.
|
|
114
|
+
|
|
10
115
|
.. _changelog-2.8.0:
|
|
11
116
|
|
|
12
117
|
2.8.0 — 2026-05-15
|
data/Gemfile.lock
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
familia (2.
|
|
4
|
+
familia (2.9.1)
|
|
5
5
|
concurrent-ruby (~> 1.3)
|
|
6
6
|
connection_pool (>= 2.4, < 4.0)
|
|
7
7
|
csv (~> 3.3)
|
|
8
8
|
json_schemer (~> 2.0)
|
|
9
9
|
logger (~> 1.7)
|
|
10
10
|
oj (~> 3.16)
|
|
11
|
-
redis (>=
|
|
11
|
+
redis (>= 5.0, < 6.0)
|
|
12
12
|
stringio (~> 3.1.1)
|
|
13
13
|
uri-valkey (~> 1.4)
|
|
14
14
|
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# docs/guides/datatype-collections.md
|
|
2
|
+
---
|
|
3
|
+
|
|
4
|
+
# DataType - Collection classes
|
|
5
|
+
|
|
6
|
+
UnsortedSet, Sorted Set, List, and Hash data types all include the `Collection` module. This guide covers two performance-sensitive concerns: writing many elements efficiently (a single bulk command instead of one round-trip per element), and iterating large collections efficiently via `each` and `each_record`.
|
|
7
|
+
|
|
8
|
+
## Bulk writes — single round-trip mutations
|
|
9
|
+
|
|
10
|
+
Collection mutations are **immediate** — every call hits Valkey/Redis right away, unlike scalar `field` setters which are deferred until `save`. Each call also runs `warn_if_dirty!` and cascades expiration. (See the write-model notes in `CLAUDE.md` for the deferred-vs-immediate split.)
|
|
11
|
+
|
|
12
|
+
Multi-element adds issue **one** command for the whole batch, not one per element. Populating a large collection is therefore a single round-trip even without an explicit pipeline.
|
|
13
|
+
|
|
14
|
+
The argument shape follows the collection's structure, and is consistent across the codebase:
|
|
15
|
+
|
|
16
|
+
- **Value-only** collections (`UnsortedSet`, `ListKey`) take a **variadic splat**; arguments are flattened and `nil`-compacted.
|
|
17
|
+
- **Keyed/pair** collections (`HashKey` is `field => value`, `SortedSet` is `member => score`) take a **single Hash** via `update` (aliased `merge!`), raising `ArgumentError` on a non-Hash.
|
|
18
|
+
|
|
19
|
+
| Type | Bulk method | Call shape | Redis command |
|
|
20
|
+
|---|---|---|---|
|
|
21
|
+
| `UnsortedSet` | `add(*values)` | `tags.add(:a, :b, :c)` | one `SADD` |
|
|
22
|
+
| `ListKey` | `push(*values)` / `unshift(*values)` | `log.push(1, 2, 3)` | one `RPUSH` / `LPUSH` |
|
|
23
|
+
| `HashKey` | `update(hash)` / `merge!` | `cfg.update(a: 1, b: 2)` | one `HMSET` |
|
|
24
|
+
| `SortedSet` | `update(hash)` / `merge!` | `board.update("alice" => 1000, "bob" => 850)` | one `ZADD` |
|
|
25
|
+
|
|
26
|
+
```ruby
|
|
27
|
+
tags.add(:ruby, :redis, :valkey) # 1 SADD, returns self
|
|
28
|
+
log.push("a", "b", "c") # 1 RPUSH → [a, b, c]
|
|
29
|
+
board.update("alice" => 1000, "bob" => 850) # 1 ZADD, returns new-member count (2)
|
|
30
|
+
board.merge!("alice" => 1200) # 1 ZADD, score updated → returns 0
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Behavior notes:
|
|
34
|
+
|
|
35
|
+
- **Ordering**: `push` preserves argument order; `unshift` prepends each element in turn, so `unshift(a, b, c)` leaves the list head as `c, b, a` (Redis `LPUSH` semantics — unchanged from the prior per-element implementation). Sets are unordered; sorted sets order by score.
|
|
36
|
+
- **Empty input is a no-op**: `add()` / `push()` / `update({})` issue no command. Set/list adds return `self`; `SortedSet#update` returns `0`.
|
|
37
|
+
- **`SortedSet#add(val, score, …)` is unchanged and not bulk** — it takes a single member plus score and the conditional ZADD options (`nx:`, `xx:`, `gt:`, `lt:`, `ch:`). An Array passed as `val` is stored as one JSON-encoded member, not exploded into many. Use `update`/`merge!` for bulk insertion.
|
|
38
|
+
|
|
39
|
+
The iteration methods `each` and `each_record` efficiently handle large collections by paginating through Valkey/Redis data structures, but they serve different purposes and yield different results. Here's how the two iterate, using `ModelClass.instances` (a `SortedSet` with `reference: true`) as the running example.
|
|
40
|
+
|
|
41
|
+
## `each` — yields **members** (identifiers, raw strings)
|
|
42
|
+
|
|
43
|
+
`each` is implemented per type. For the `instances` SortedSet, it pages through the ZSET with either `ZRANGEBYSCORE` (when `since:`/`until:` are given) or `ZSCAN` (unbounded), yielding one deserialized member at a time.
|
|
44
|
+
|
|
45
|
+
```mermaid
|
|
46
|
+
flowchart TD
|
|
47
|
+
Caller["ModelClass.instances.each { |id| ... }"] --> EachImpl["SortedSet#each"]
|
|
48
|
+
EachImpl --> Decide{since/until?}
|
|
49
|
+
Decide -- yes --> ZRBS["ZRANGEBYSCORE key min max LIMIT 0 batch_size WITHSCORES"]
|
|
50
|
+
Decide -- no --> ZSCAN["ZSCAN key cursor COUNT batch_size"]
|
|
51
|
+
ZRBS --> Page["Page of raw members"]
|
|
52
|
+
ZSCAN --> Page
|
|
53
|
+
Page --> Yield["yield deserialize_value(member)"]
|
|
54
|
+
Yield --> More{more pages?}
|
|
55
|
+
More -- yes --> Decide
|
|
56
|
+
More -- no --> Done["return self"]
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Per-type variations:
|
|
60
|
+
- `ListKey#each` — paginates with `LRANGE start stop` (no SCAN equivalent)
|
|
61
|
+
- `UnsortedSet#each` / `HashKey#each` — `SSCAN` / `HSCAN`, optional `matching:` glob
|
|
62
|
+
- `SortedSet#each` — `ZRANGEBYSCORE` (bounded) or `ZSCAN` (unbounded)
|
|
63
|
+
|
|
64
|
+
You get **identifiers only**. No record loading. One Redis round-trip per page.
|
|
65
|
+
|
|
66
|
+
## `each_record` — yields **loaded Horreum records**
|
|
67
|
+
|
|
68
|
+
`each_record` is defined once in `CollectionBase` and delegates to `each` to collect identifiers, then batches them into `record_class.load_multi` (pipelined `HGETALL`s), filters ghosts, and yields the live records.
|
|
69
|
+
|
|
70
|
+
```mermaid
|
|
71
|
+
flowchart TD
|
|
72
|
+
Caller["ModelClass.instances.each_record { |rec| ... }"] --> ER["each_record(batch_size, pipeline, **filters)"]
|
|
73
|
+
ER --> Validate{"pipeline <= batch_size?"}
|
|
74
|
+
Validate -- no --> Raise["raise ArgumentError"]
|
|
75
|
+
Validate -- yes --> CallEach["each(**filters) do |member|"]
|
|
76
|
+
CallEach --> Extract["id = member.is_a?(Array) ? member.first : member"]
|
|
77
|
+
Extract --> Buffer["buffer << id"]
|
|
78
|
+
Buffer --> Full{"buffer.size >= batch_size?"}
|
|
79
|
+
Full -- no --> CallEach
|
|
80
|
+
Full -- yes --> Load["record_class.load_multi(ids) -- pipelined HGETALLs"]
|
|
81
|
+
Load --> Compact["live = records.compact -- drop ghosts"]
|
|
82
|
+
Compact --> Mode{pipeline?}
|
|
83
|
+
Mode -- nil --> Serial["live.each { |r| block.call(r) }"]
|
|
84
|
+
Mode -- positive --> Pipe["live.each_slice(pipeline) do |group|<br/>record_class.pipelined { group.each &block }<br/>end"]
|
|
85
|
+
Serial --> Clear["buffer.clear; resume each"]
|
|
86
|
+
Pipe --> Clear
|
|
87
|
+
Clear --> CallEach
|
|
88
|
+
CallEach -. each exhausted .-> Flush["process_batch(buffer) if any remain"]
|
|
89
|
+
Flush --> Return["return self"]
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Concrete timeline for `User.instances.each_record(batch_size: 100, pipeline: 25) { |u| u.touch! }`
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
SortedSet#each (ZSCAN page 1, 100 ids)
|
|
96
|
+
├─ buffer fills to 100
|
|
97
|
+
├─ load_multi(ids) → 1 pipeline of 100 HGETALLs
|
|
98
|
+
├─ compact ghosts → e.g. 97 live records
|
|
99
|
+
├─ slice(25):
|
|
100
|
+
│ pipelined { 25 × u.touch! } ← 1 Redis pipeline
|
|
101
|
+
│ pipelined { 25 × u.touch! } ← 1 Redis pipeline
|
|
102
|
+
│ pipelined { 25 × u.touch! } ← 1 Redis pipeline
|
|
103
|
+
│ pipelined { 22 × u.touch! } ← 1 Redis pipeline
|
|
104
|
+
└─ buffer.clear
|
|
105
|
+
SortedSet#each (ZSCAN page 2, 100 ids)
|
|
106
|
+
└─ … repeat …
|
|
107
|
+
SortedSet#each exhausted
|
|
108
|
+
└─ flush any remaining buffered ids the same way
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Key differences
|
|
112
|
+
|
|
113
|
+
| Aspect | `each` | `each_record` |
|
|
114
|
+
|---|---|---|
|
|
115
|
+
| Yields | raw identifier (or `[field, value]` for `HashKey`) | loaded Horreum instance |
|
|
116
|
+
| Redis ops per yield | 0 extra (already paged) | amortized `HGETALL` via `load_multi` batch |
|
|
117
|
+
| Requires `reference: true` + `:class` | no | yes (raises `Familia::Problem` otherwise) |
|
|
118
|
+
| Ghost handling | yields the dangling id | `compact` drops them silently |
|
|
119
|
+
| Write pipelining | not built-in | `pipeline:` groups block-body writes into `pipelined` blocks |
|
|
120
|
+
| Filters | type-specific (`since:`, `matching:`, …) | forwarded to underlying `each` |
|
|
121
|
+
|
|
122
|
+
So `each_record` is a thin orchestration layer: it leans on the type's own `each` for read pagination, then layers (1) batched record hydration and (2) optional write pipelining on top.
|
|
123
|
+
|
|
124
|
+
## Choosing a `pipeline` mode
|
|
125
|
+
|
|
126
|
+
`each_record` has two dispatch modes, controlled by `pipeline:`. The parameter answers a single question: **may the dispatch loop wrap your block in a `pipelined { }`?**
|
|
127
|
+
|
|
128
|
+
| Value | Dispatch | Use when the block… |
|
|
129
|
+
|---|---|---|
|
|
130
|
+
| `nil` (default) | Each record runs in its own connection context, no pipeline wrapper | …reads, OR calls `save` / `commit_fields` / `transaction` / anything with its own internal MULTI |
|
|
131
|
+
| positive integer | Groups of `pipeline` records run inside `record_class.pipelined { ... }` | …only issues fast writers (`record.field!`) that tolerate being queued |
|
|
132
|
+
|
|
133
|
+
Note: `pipeline: 0` raises `ArgumentError`. Use `pipeline: nil` to disable pipelining.
|
|
134
|
+
|
|
135
|
+
The read-only case and the serial-write case collapse into the same mode because both require **immediate** execution with real return values. Wrapping `save` in an outer `pipelined` would either return `Redis::Future` objects or raise `ConflictingContextError` when `save`'s internal transaction tries to open.
|
|
136
|
+
|
|
137
|
+
### The three idiomatic patterns
|
|
138
|
+
|
|
139
|
+
```ruby
|
|
140
|
+
# 1. Read-only iteration — the default (pipeline: nil) is correct
|
|
141
|
+
User.instances.each_record do |user|
|
|
142
|
+
puts "#{user.email} #{user.last_login}"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# 2. Serial writes — the default (pipeline: nil) is required for save / commit_fields / transaction
|
|
146
|
+
User.instances.each_record do |user|
|
|
147
|
+
user.score = recompute(user)
|
|
148
|
+
user.save
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# 3. Pipelined fast writers — opt-in optimization
|
|
152
|
+
User.instances.each_record(pipeline: 50) do |user|
|
|
153
|
+
user.last_seen_at! Familia.now # single HSET, safe to queue in pipeline
|
|
154
|
+
end
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Pipelining footgun
|
|
158
|
+
|
|
159
|
+
If you enable pipelining and your block reads from a related collection (e.g. `user.sessions.size`), that read is queued into the pipeline and returns a `Redis::Future` rather than a value. Omit the `pipeline:` parameter (or explicitly pass `pipeline: nil`) whenever the block needs real return values from Redis.
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# Migrating to Familia 2.9.0
|
|
2
|
+
|
|
3
|
+
This version introduces batch iteration primitives for DataTypes, enabling efficient enumeration over large Redis collections. It also includes breaking changes to method names for clarity.
|
|
4
|
+
|
|
5
|
+
## Breaking Changes
|
|
6
|
+
|
|
7
|
+
### Method Renames
|
|
8
|
+
|
|
9
|
+
The multi-field update methods have been renamed to better reflect their purpose:
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
# Before (2.8.x)
|
|
13
|
+
user.batch_update(name: "Alice", email: "alice@example.com")
|
|
14
|
+
user.batch_fast_write(name: "Alice", email: "alice@example.com")
|
|
15
|
+
|
|
16
|
+
# After (2.9.0)
|
|
17
|
+
user.multi_field_update(name: "Alice", email: "alice@example.com")
|
|
18
|
+
user.multi_field_fast_write(name: "Alice", email: "alice@example.com")
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
**Migration**: Find and replace `batch_update` with `multi_field_update` and `batch_fast_write` with `multi_field_fast_write`.
|
|
22
|
+
|
|
23
|
+
### MultiResult Namespace
|
|
24
|
+
|
|
25
|
+
`MultiResult` has moved into the Familia namespace:
|
|
26
|
+
|
|
27
|
+
```ruby
|
|
28
|
+
# Before (2.8.x)
|
|
29
|
+
result.is_a?(MultiResult)
|
|
30
|
+
|
|
31
|
+
# After (2.9.0)
|
|
32
|
+
result.is_a?(Familia::MultiResult)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
**Migration**: Replace bare `MultiResult` references with `Familia::MultiResult`.
|
|
36
|
+
|
|
37
|
+
## New Features
|
|
38
|
+
|
|
39
|
+
### Enumerable Integration
|
|
40
|
+
|
|
41
|
+
All collection DataTypes now include Ruby's `Enumerable` module, providing `each_slice`, `lazy`, `map`, `reduce`, `find`, and other stdlib methods:
|
|
42
|
+
|
|
43
|
+
```ruby
|
|
44
|
+
# Lazy iteration with transformation
|
|
45
|
+
Org.instances.lazy.map { |id| id.upcase }.take(10).to_a
|
|
46
|
+
|
|
47
|
+
# Batch processing with each_slice
|
|
48
|
+
User.instances.each_slice(100) do |batch|
|
|
49
|
+
batch.each { |id| process(id) }
|
|
50
|
+
end
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Filtered Iteration
|
|
54
|
+
|
|
55
|
+
Each DataType now supports type-specific filters on `each`:
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
# SortedSet: filter by score (timestamp) bounds
|
|
59
|
+
Org.instances.each(since: 24.hours.ago, until: Time.now) do |id|
|
|
60
|
+
puts id
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# HashKey: filter by field name pattern
|
|
64
|
+
user.profile.each(matching: "pref_*") do |field, value|
|
|
65
|
+
puts "#{field}: #{value}"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# UnsortedSet: filter by member pattern
|
|
69
|
+
user.tags.each(matching: "admin*") do |tag|
|
|
70
|
+
puts tag
|
|
71
|
+
end
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### each_record for Loading Horreum Instances
|
|
75
|
+
|
|
76
|
+
`each_record` yields fully-loaded Horreum records instead of raw IDs:
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
# Load records in batches of 100
|
|
80
|
+
Org.instances.each_record(batch_size: 100) do |org|
|
|
81
|
+
org.tidy! # org is a loaded Horreum instance
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Control pipelining depth separately from fetch batch size
|
|
85
|
+
Org.instances.each_record(batch_size: 500, pipeline: 50) do |org|
|
|
86
|
+
org.status!("active")
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Serial execution (no pipelining) — this is the default
|
|
90
|
+
Org.instances.each_record(batch_size: 100) do |org|
|
|
91
|
+
org.complex_operation
|
|
92
|
+
end
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Ghost instances (keys that expired but remain in the `instances` sorted set) are automatically filtered and never reach the block.
|
|
96
|
+
|
|
97
|
+
### BatchResult for Aggregated Operations
|
|
98
|
+
|
|
99
|
+
`Familia::BatchResult` aggregates results from batch operations with per-record error isolation:
|
|
100
|
+
|
|
101
|
+
```ruby
|
|
102
|
+
result = Familia::BatchResult.collect(
|
|
103
|
+
Org.instances.each_record(batch_size: 100, since: 24.hours.ago)
|
|
104
|
+
) do |org|
|
|
105
|
+
org.tidy!
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
result.scanned # Total records yielded to block
|
|
109
|
+
result.modified # Count of truthy block returns
|
|
110
|
+
result.errors # Array of {id:, error:} for failed records
|
|
111
|
+
result.duration_ms # Total execution time
|
|
112
|
+
|
|
113
|
+
# Re-raise errors after completion
|
|
114
|
+
result = Familia::BatchResult.collect(enum, strict: true) { |r| r.process! }
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Concurrent Mutation Behavior
|
|
118
|
+
|
|
119
|
+
When iterating with `each` or `each_record`, be aware of Redis cursor semantics:
|
|
120
|
+
|
|
121
|
+
- Items present from iteration start to end are guaranteed to be returned
|
|
122
|
+
- Items added or removed mid-iteration may or may not appear
|
|
123
|
+
- Blocks should be idempotent to handle potential duplicates
|
|
124
|
+
|
|
125
|
+
This is inherent to ZSCAN/HSCAN/SSCAN and is documented, not a bug.
|
data/familia.gemspec
CHANGED
|
@@ -25,7 +25,7 @@ Gem::Specification.new do |spec|
|
|
|
25
25
|
spec.add_dependency 'json_schemer', '~> 2.0'
|
|
26
26
|
spec.add_dependency 'logger', '~> 1.7'
|
|
27
27
|
spec.add_dependency 'oj', '~> 3.16'
|
|
28
|
-
spec.add_dependency 'redis', '>=
|
|
28
|
+
spec.add_dependency 'redis', '>= 5.0', '< 6.0'
|
|
29
29
|
spec.add_dependency 'stringio', '~> 3.1.1'
|
|
30
30
|
spec.add_dependency 'uri-valkey', '~> 1.4'
|
|
31
31
|
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# lib/familia/batch_result.rb
|
|
2
|
+
#
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
|
|
5
|
+
module Familia
|
|
6
|
+
# Represents the result of a batch iteration operation.
|
|
7
|
+
#
|
|
8
|
+
# BatchResult tracks statistics and errors when processing multiple records
|
|
9
|
+
# via methods like `each_record`. It provides aggregated metrics for the
|
|
10
|
+
# entire batch run, distinct from MultiResult which wraps a single
|
|
11
|
+
# MULTI/EXEC or pipeline operation.
|
|
12
|
+
#
|
|
13
|
+
# @attr_reader scanned [Integer] Total number of items iterated
|
|
14
|
+
# @attr_reader modified [Integer] Count of items where block returned truthy
|
|
15
|
+
# @attr_reader errors [Array<Hash>] Per-item errors as [{id:, error:}, ...]
|
|
16
|
+
# @attr_reader duration_ms [Float] Total elapsed time in milliseconds
|
|
17
|
+
#
|
|
18
|
+
# @example Using BatchResult.collect
|
|
19
|
+
# result = BatchResult.collect(User.instances) do |user|
|
|
20
|
+
# user.deactivate!
|
|
21
|
+
# end
|
|
22
|
+
# puts "Processed #{result.scanned}, modified #{result.modified}"
|
|
23
|
+
# puts "Errors: #{result.errors.size}" if result.errors?
|
|
24
|
+
#
|
|
25
|
+
# @example With strict mode
|
|
26
|
+
# # Re-raises first error after completing iteration
|
|
27
|
+
# BatchResult.collect(items, strict: true) { |item| item.process! }
|
|
28
|
+
#
|
|
29
|
+
class BatchResult
|
|
30
|
+
attr_reader :scanned, :modified, :errors, :duration_ms
|
|
31
|
+
|
|
32
|
+
# Creates a new BatchResult instance.
|
|
33
|
+
#
|
|
34
|
+
# @param scanned [Integer] Total items processed
|
|
35
|
+
# @param modified [Integer] Items where block returned truthy
|
|
36
|
+
# @param errors [Array<Hash>] Array of error hashes with :id and :error keys
|
|
37
|
+
# @param duration_ms [Float] Elapsed time in milliseconds
|
|
38
|
+
def initialize(scanned:, modified:, errors:, duration_ms:)
|
|
39
|
+
@scanned = scanned
|
|
40
|
+
@modified = modified
|
|
41
|
+
@errors = errors
|
|
42
|
+
@duration_ms = duration_ms
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Iterates over an enumerable, collecting statistics and errors.
|
|
46
|
+
#
|
|
47
|
+
# This is the primary factory method for creating BatchResult instances.
|
|
48
|
+
# It tracks how many items were processed, how many returned truthy values,
|
|
49
|
+
# and captures any exceptions that occur during iteration.
|
|
50
|
+
#
|
|
51
|
+
# @param enumerable [Enumerable] The collection to iterate
|
|
52
|
+
# @param strict [Boolean] When true, re-raises the first captured error
|
|
53
|
+
# after iteration completes. Default: false.
|
|
54
|
+
# @yield [item] Each item from the enumerable
|
|
55
|
+
# @yieldreturn [Object] Truthy return values increment the modified count
|
|
56
|
+
# @return [BatchResult] Aggregated result of the batch operation
|
|
57
|
+
#
|
|
58
|
+
# @example Basic usage
|
|
59
|
+
# result = BatchResult.collect(records) { |r| r.update!(status: 'done') }
|
|
60
|
+
#
|
|
61
|
+
# @example Strict mode re-raises errors
|
|
62
|
+
# begin
|
|
63
|
+
# BatchResult.collect(records, strict: true) { |r| r.validate! }
|
|
64
|
+
# rescue => e
|
|
65
|
+
# puts "Batch failed: #{e.message}"
|
|
66
|
+
# end
|
|
67
|
+
#
|
|
68
|
+
def self.collect(enumerable, strict: false)
|
|
69
|
+
scanned = 0
|
|
70
|
+
modified = 0
|
|
71
|
+
errors = []
|
|
72
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
73
|
+
|
|
74
|
+
enumerable.each do |*args|
|
|
75
|
+
scanned += 1
|
|
76
|
+
begin
|
|
77
|
+
result = yield(*args)
|
|
78
|
+
modified += 1 if result
|
|
79
|
+
rescue StandardError => e
|
|
80
|
+
# Extract identifier if possible
|
|
81
|
+
identifier = extract_identifier(args.length == 1 ? args[0] : args)
|
|
82
|
+
errors << { id: identifier, error: e }
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
duration_ms = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000
|
|
87
|
+
|
|
88
|
+
batch_result = new(
|
|
89
|
+
scanned: scanned,
|
|
90
|
+
modified: modified,
|
|
91
|
+
errors: errors,
|
|
92
|
+
duration_ms: duration_ms
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# In strict mode, re-raise the first error after completing iteration
|
|
96
|
+
raise errors.first[:error] if strict && errors.any?
|
|
97
|
+
|
|
98
|
+
batch_result
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Checks if any errors occurred during the batch.
|
|
102
|
+
#
|
|
103
|
+
# @return [Boolean] true if at least one error was captured
|
|
104
|
+
def errors?
|
|
105
|
+
!errors.empty?
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Checks if the batch completed without errors.
|
|
109
|
+
#
|
|
110
|
+
# @return [Boolean] true if no errors occurred
|
|
111
|
+
def successful?
|
|
112
|
+
errors.empty?
|
|
113
|
+
end
|
|
114
|
+
alias success? successful?
|
|
115
|
+
|
|
116
|
+
# Returns the count of items that were scanned but not modified.
|
|
117
|
+
#
|
|
118
|
+
# @return [Integer] Number of items where block returned falsy
|
|
119
|
+
def skipped
|
|
120
|
+
scanned - modified - errors.size
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Returns a hash representation of the result.
|
|
124
|
+
#
|
|
125
|
+
# @return [Hash] Result data including all metrics
|
|
126
|
+
def to_h
|
|
127
|
+
{
|
|
128
|
+
scanned: scanned,
|
|
129
|
+
modified: modified,
|
|
130
|
+
skipped: skipped,
|
|
131
|
+
errors: errors.size,
|
|
132
|
+
duration_ms: duration_ms.round(2),
|
|
133
|
+
successful: successful?
|
|
134
|
+
}
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Returns a human-readable summary.
|
|
138
|
+
#
|
|
139
|
+
# @return [String] Summary of the batch operation
|
|
140
|
+
def to_s
|
|
141
|
+
"BatchResult: scanned=#{scanned} modified=#{modified} errors=#{errors.size} duration=#{duration_ms.round(2)}ms"
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# @private
|
|
145
|
+
def self.extract_identifier(item)
|
|
146
|
+
if item.respond_to?(:identifier)
|
|
147
|
+
item.identifier
|
|
148
|
+
elsif item.respond_to?(:id)
|
|
149
|
+
item.id
|
|
150
|
+
else
|
|
151
|
+
item.to_s[0, 50]
|
|
152
|
+
end
|
|
153
|
+
rescue StandardError
|
|
154
|
+
nil
|
|
155
|
+
end
|
|
156
|
+
private_class_method :extract_identifier
|
|
157
|
+
end
|
|
158
|
+
end
|