familia 2.8.0 → 2.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.rst +105 -0
  3. data/Gemfile.lock +2 -2
  4. data/docs/guides/datatype-collections.md +159 -0
  5. data/docs/migrating/v2.9.0.md +125 -0
  6. data/familia.gemspec +1 -1
  7. data/lib/familia/batch_result.rb +158 -0
  8. data/lib/familia/data_type/collection_base.rb +129 -0
  9. data/lib/familia/data_type/scalar_base.rb +33 -0
  10. data/lib/familia/data_type/types/hashkey.rb +37 -0
  11. data/lib/familia/data_type/types/json_stringkey.rb +3 -1
  12. data/lib/familia/data_type/types/listkey.rb +41 -18
  13. data/lib/familia/data_type/types/sorted_set.rb +113 -18
  14. data/lib/familia/data_type/types/stringkey.rb +3 -1
  15. data/lib/familia/data_type/types/unsorted_set.rb +37 -14
  16. data/lib/familia/data_type.rb +2 -1
  17. data/lib/familia/features/encrypted_fields/encrypted_field_type.rb +2 -20
  18. data/lib/familia/features/expiration.rb +2 -2
  19. data/lib/familia/field_type.rb +1 -19
  20. data/lib/familia/horreum/definition.rb +1 -19
  21. data/lib/familia/horreum/management.rb +1 -1
  22. data/lib/familia/multi_result.rb +111 -0
  23. data/lib/familia/utils.rb +48 -0
  24. data/lib/familia/version.rb +1 -1
  25. data/lib/familia.rb +2 -1
  26. data/try/edge_cases/fast_writer_pipeline_support_try.rb +80 -0
  27. data/try/edge_cases/fast_writer_transaction_guard_try.rb +40 -59
  28. data/try/edge_cases/iterator_connection_errors_try.rb +97 -0
  29. data/try/edge_cases/pipeline_handler_edge_cases_try.rb +1 -1
  30. data/try/edge_cases/ttl_side_effects_try.rb +1 -1
  31. data/try/features/atomic_write_coverage_try.rb +1 -1
  32. data/try/features/atomic_write_try.rb +3 -3
  33. data/try/features/atomicity_try.rb +2 -2
  34. data/try/features/dirty_tracking_try.rb +21 -21
  35. data/try/features/instance_registry_try.rb +2 -2
  36. data/try/integration/connection/operation_mode_guards_try.rb +3 -3
  37. data/try/integration/connection/pipeline_fallback_integration_try.rb +4 -4
  38. data/try/integration/connection/pipeline_handler_integration_try.rb +3 -3
  39. data/try/integration/connection/pipeline_horreum_routing_try.rb +4 -4
  40. data/try/integration/connection/pools_try.rb +1 -1
  41. data/try/integration/connection/transaction_fallback_integration_try.rb +4 -4
  42. data/try/integration/connection/transaction_mode_permissive_try.rb +8 -8
  43. data/try/integration/connection/transaction_mode_strict_try.rb +2 -2
  44. data/try/integration/connection/transaction_mode_warn_try.rb +5 -5
  45. data/try/integration/connection/transaction_modes_try.rb +14 -14
  46. data/try/integration/data_types/datatype_pipelines_try.rb +9 -9
  47. data/try/integration/data_types/datatype_transactions_try.rb +17 -17
  48. data/try/integration/database_consistency_try.rb +1 -1
  49. data/try/integration/models/familia_object_try.rb +1 -1
  50. data/try/integration/transaction_safety_core_try.rb +1 -1
  51. data/try/integration/transaction_safety_workflow_try.rb +2 -2
  52. data/try/support/prototypes/atomic_saves_v2_connection_switching.rb +1 -1
  53. data/try/support/prototypes/lib/atomic_saves_v2_connection_switching_helpers.rb +1 -1
  54. data/try/support/prototypes/pooling/lib/connection_pool_stress_test.rb +1 -1
  55. data/try/unit/batch_result_try.rb +348 -0
  56. data/try/unit/data_types/each_record_try.rb +375 -0
  57. data/try/unit/data_types/enumerable_consistency/concurrent_modification_try.rb +176 -0
  58. data/try/unit/data_types/enumerable_consistency/hashkey_consistency_try.rb +224 -0
  59. data/try/unit/data_types/enumerable_consistency/large_scale_consistency_try.rb +292 -0
  60. data/try/unit/data_types/enumerable_consistency/listkey_consistency_try.rb +230 -0
  61. data/try/unit/data_types/enumerable_consistency/sorted_set_consistency_try.rb +241 -0
  62. data/try/unit/data_types/enumerable_consistency/unsorted_set_consistency_try.rb +261 -0
  63. data/try/unit/data_types/enumerable_try.rb +228 -0
  64. data/try/unit/data_types/hashkey_each_try.rb +213 -0
  65. data/try/unit/data_types/listkey_each_try.rb +222 -0
  66. data/try/unit/data_types/sorted_set_each_try.rb +227 -0
  67. data/try/unit/data_types/sorted_set_try.rb +44 -0
  68. data/try/unit/data_types/unsorted_set_each_try.rb +185 -0
  69. data/try/unit/horreum/base_try.rb +1 -1
  70. data/try/unit/horreum/destroy_related_fields_cleanup_try.rb +4 -4
  71. data/try/unit/horreum/initialization_try.rb +1 -1
  72. data/try/unit/horreum/json_type_preservation_try.rb +3 -3
  73. data/try/unit/horreum/multi_field_update_try.rb +143 -0
  74. data/try/unit/horreum/serialization_try.rb +14 -14
  75. data/try/unit/utils/future_aware_helpers_try.rb +128 -0
  76. metadata +26 -5
  77. data/docs/archive/.gitignore +0 -2
  78. data/lib/multi_result.rb +0 -109
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c6cb1ccd59d4290c1d75b70e114945180fc5dbb03aaeb405d841c250cd504179
4
- data.tar.gz: c2bd7946e7e024c8322d0bf48bea39c22cd61c7690589007f9ab32e6f12e614d
3
+ metadata.gz: d24c3b38092f1192f8e250553e8ef4d51b05c1bb00c48dfd7f6520d3a48a9a0e
4
+ data.tar.gz: 1dd0a2aa47682736209f116adf378eb4a0eccafffd7c151b02a8ef4573e52551
5
5
  SHA512:
6
- metadata.gz: 54d3c3020c53fe01de9baf6af78fbcc1ffbea72a671b6e6269c08c9c5eaab786bb267514aa64212ff8ec854e0363cfb1c1de9b9c66674e57cb067ed419a8944b
7
- data.tar.gz: d8b8ecd85ed1273c64ae75cab7559dee0ddb92fe5d552b60690c3a66ee9df93b3829aa32deef5438677ad9fb8d9acebb0bdbf071f3bccdb63161c4a772b1c021
6
+ metadata.gz: 10d69edb30370c7dba83b387f53429878fded6bf736e04fbefec4c228baf375806eeefa6c1d44c45c296e5d79b82345d1bc9cfbb0992ee18fc4c204dac250e10
7
+ data.tar.gz: 9b52e601ae93d44a6db8b0f995828ae45d55872377a471658a8d63210607c56f70958e372654ac20a13b07344e9b9da0afcdaaa50203f0796326d5d08e93bf6b
data/CHANGELOG.rst CHANGED
@@ -7,6 +7,111 @@ The format is based on `Keep a Changelog <https://keepachangelog.com/en/1.1.0/>`
7
7
 
8
8
  <!--scriv-insert-here-->
9
9
 
10
+ .. _changelog-2.9.1:
11
+
12
+ 2.9.1 — 2026-05-18
13
+ ==================
14
+
15
+ Added
16
+ -----
17
+
18
+ - ``SortedSet#update`` (aliased ``merge!``) for bulk member insertion. A sorted
19
+ set is ``member => score`` -- the same pair shape as ``HashKey``'s
20
+ ``field => value`` -- so it follows the established ``HashKey#update``/``merge!``
21
+ convention (a single Hash argument) rather than the variadic splat used by the
22
+ value-only ``UnsortedSet``/``ListKey``. Pass ``{member => score}`` to issue one
23
+ ``ZADD`` instead of one round-trip per member. Validates the argument is a Hash
24
+ and that every score is ``Numeric`` (a missing/``nil`` score raises a clear
25
+ ``ArgumentError`` instead of a low-level client error -- unlike single-value
26
+ ``#add``, the bulk path does not default a missing score to ``Familia.now``).
27
+ Cascades expiration, and is a no-op returning ``0`` for empty input. The
28
+ single-value ``SortedSet#add`` (and its array-as-single-member contract) is
29
+ unchanged. PR #269
30
+
31
+ Changed
32
+ -------
33
+
34
+ - Bulk-write optimization for multi-value collection mutations. ``UnsortedSet#add``,
35
+ ``ListKey#push``, and ``ListKey#unshift`` previously issued one Redis command per
36
+ element (a loop of ``SADD``/``RPUSH``/``LPUSH`` calls), making large populations
37
+ slow even when pipelined. They now serialize all values and issue a single bulk
38
+ ``SADD``/``RPUSH``/``LPUSH`` command. Element ordering, ``nil`` compaction, nested
39
+ array flattening, return values, dirty-write warnings, and expiration cascading
40
+ are unchanged; empty calls remain no-ops. PR #269
41
+
42
+ AI Assistance
43
+ -------------
44
+
45
+ - AI investigated all collection ``DataType`` classes for the same per-element
46
+ loop anti-pattern, identified the three affected methods, verified
47
+ behavior-preservation (ordering, edge cases, chainability) at the Redis wire
48
+ level, and confirmed zero regressions against the existing test suites. The
49
+ ``SortedSet#update`` API shape was chosen by priority order: existing Familia
50
+ conventions first (the ``HashKey#update``/``merge!`` precedent for keyed
51
+ collections), then the upstream redis-rb bulk ``ZADD`` form, then Ruby
52
+ ``Hash#merge!`` semantics as confirmation.
53
+
54
+ .. _changelog-2.9.0:
55
+
56
+ 2.9.0 — 2026-05-17
57
+ ==================
58
+
59
+ Added
60
+ -----
61
+
62
+ - Batch iteration primitives for DataTypes via ``Enumerable`` integration:
63
+
64
+ - All DataTypes (``SortedSet``, ``HashKey``, ``UnsortedSet``, ``ListKey``) now
65
+ ``include Enumerable``, providing ``each_slice``, ``lazy``, ``map``, ``reduce``,
66
+ ``find``, and other stdlib methods.
67
+
68
+ - **SortedSet#each(since:, until:)**: Cursor-based iteration with optional
69
+ timestamp bounds. Uses ZRANGEBYSCORE when bounds provided (inclusive),
70
+ ZSCAN otherwise. Accepts Time objects or numeric scores.
71
+
72
+ - **HashKey#each(matching:)**: Cursor-based iteration via HSCAN with optional
73
+ glob pattern filter on field names.
74
+
75
+ - **UnsortedSet#each(matching:)**: Cursor-based iteration via SSCAN with optional
76
+ glob pattern filter using Redis SSCAN MATCH on raw values.
77
+
78
+ - **ListKey#each(batch_size:)**: Memory-efficient LRANGE pagination for large lists.
79
+
80
+ - ``DataType#each_record(batch_size:, write_size:, **filters)`` yields loaded
81
+ Horreum records (not raw IDs) via ``load_multi``. Ghost instances (expired keys
82
+ still in ``instances``) are automatically filtered. The ``write_size:`` parameter
83
+ controls pipelining depth (``nil`` for serial execution).
84
+
85
+ - ``Familia::BatchResult`` value type for aggregating batch operation results:
86
+
87
+ - ``BatchResult.collect(enumerable, strict: false) { |record| ... }`` iterates
88
+ any Enumerable, tracking ``scanned``, ``modified`` (truthy returns), ``errors``
89
+ (array of ``{id:, error:}``), and ``duration_ms``.
90
+
91
+ - Per-record exception isolation: errors are captured and iteration continues.
92
+
93
+ - ``strict: true`` re-raises collected errors after iteration completes.
94
+
95
+ Changed
96
+ -------
97
+
98
+ - Renamed batch field-update methods for clarity:
99
+
100
+ - ``batch_update`` is now ``multi_field_update``
101
+ - ``batch_fast_write`` is now ``multi_field_fast_write``
102
+
103
+ Old names removed without deprecation shim (breaking change).
104
+
105
+ - Moved ``MultiResult`` into Familia namespace as ``Familia::MultiResult``.
106
+ Old top-level constant removed without backwards-compat alias (breaking change).
107
+
108
+ AI Assistance
109
+ -------------
110
+
111
+ - Implementation and test coverage developed with parallel Claude Code agents:
112
+ one for production code (DataType iteration, BatchResult, renames), one for
113
+ Tryouts test suite (228 new tests across 8 files). PR #264.
114
+
10
115
  .. _changelog-2.8.0:
11
116
 
12
117
  2.8.0 — 2026-05-15
data/Gemfile.lock CHANGED
@@ -1,14 +1,14 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- familia (2.8.0)
4
+ familia (2.9.1)
5
5
  concurrent-ruby (~> 1.3)
6
6
  connection_pool (>= 2.4, < 4.0)
7
7
  csv (~> 3.3)
8
8
  json_schemer (~> 2.0)
9
9
  logger (~> 1.7)
10
10
  oj (~> 3.16)
11
- redis (>= 4.8.1, < 6.0)
11
+ redis (>= 5.0, < 6.0)
12
12
  stringio (~> 3.1.1)
13
13
  uri-valkey (~> 1.4)
14
14
 
@@ -0,0 +1,159 @@
1
+ # docs/guides/datatype-collections.md
2
+ ---
3
+
4
+ # DataType - Collection classes
5
+
6
+ UnsortedSet, Sorted Set, List, and Hash data types all include the `Collection` module. This guide covers two performance-sensitive concerns: writing many elements efficiently (a single bulk command instead of one round-trip per element), and iterating large collections efficiently via `each` and `each_record`.
7
+
8
+ ## Bulk writes — single round-trip mutations
9
+
10
+ Collection mutations are **immediate** — every call hits Valkey/Redis right away, unlike scalar `field` setters which are deferred until `save`. Each call also runs `warn_if_dirty!` and cascades expiration. (See the write-model notes in `CLAUDE.md` for the deferred-vs-immediate split.)
11
+
12
+ Multi-element adds issue **one** command for the whole batch, not one per element. Populating a large collection is therefore a single round-trip even without an explicit pipeline.
13
+
14
+ The argument shape follows the collection's structure, and is consistent across the codebase:
15
+
16
+ - **Value-only** collections (`UnsortedSet`, `ListKey`) take a **variadic splat**; arguments are flattened and `nil`-compacted.
17
+ - **Keyed/pair** collections (`HashKey` is `field => value`, `SortedSet` is `member => score`) take a **single Hash** via `update` (aliased `merge!`), raising `ArgumentError` on a non-Hash.
18
+
19
+ | Type | Bulk method | Call shape | Redis command |
20
+ |---|---|---|---|
21
+ | `UnsortedSet` | `add(*values)` | `tags.add(:a, :b, :c)` | one `SADD` |
22
+ | `ListKey` | `push(*values)` / `unshift(*values)` | `log.push(1, 2, 3)` | one `RPUSH` / `LPUSH` |
23
+ | `HashKey` | `update(hash)` / `merge!` | `cfg.update(a: 1, b: 2)` | one `HMSET` |
24
+ | `SortedSet` | `update(hash)` / `merge!` | `board.update("alice" => 1000, "bob" => 850)` | one `ZADD` |
25
+
26
+ ```ruby
27
+ tags.add(:ruby, :redis, :valkey) # 1 SADD, returns self
28
+ log.push("a", "b", "c") # 1 RPUSH → [a, b, c]
29
+ board.update("alice" => 1000, "bob" => 850) # 1 ZADD, returns new-member count (2)
30
+ board.merge!("alice" => 1200) # 1 ZADD, score updated → returns 0
31
+ ```
32
+
33
+ Behavior notes:
34
+
35
+ - **Ordering**: `push` preserves argument order; `unshift` prepends each element in turn, so `unshift(a, b, c)` leaves the list head as `c, b, a` (Redis `LPUSH` semantics — unchanged from the prior per-element implementation). Sets are unordered; sorted sets order by score.
36
+ - **Empty input is a no-op**: `add()` / `push()` / `update({})` issue no command. Set/list adds return `self`; `SortedSet#update` returns `0`.
37
+ - **`SortedSet#add(val, score, …)` is unchanged and not bulk** — it takes a single member plus score and the conditional ZADD options (`nx:`, `xx:`, `gt:`, `lt:`, `ch:`). An Array passed as `val` is stored as one JSON-encoded member, not exploded into many. Use `update`/`merge!` for bulk insertion.
38
+
39
+ The iteration methods `each` and `each_record` efficiently handle large collections by paginating through Valkey/Redis data structures, but they serve different purposes and yield different results. Here's how the two iterate, using `ModelClass.instances` (a `SortedSet` with `reference: true`) as the running example.
40
+
41
+ ## `each` — yields **members** (identifiers, raw strings)
42
+
43
+ `each` is implemented per type. For the `instances` SortedSet, it pages through the ZSET with either `ZRANGEBYSCORE` (when `since:`/`until:` are given) or `ZSCAN` (unbounded), yielding one deserialized member at a time.
44
+
45
+ ```mermaid
46
+ flowchart TD
47
+ Caller["ModelClass.instances.each { |id| ... }"] --> EachImpl["SortedSet#each"]
48
+ EachImpl --> Decide{since/until?}
49
+ Decide -- yes --> ZRBS["ZRANGEBYSCORE key min max LIMIT 0 batch_size WITHSCORES"]
50
+ Decide -- no --> ZSCAN["ZSCAN key cursor COUNT batch_size"]
51
+ ZRBS --> Page["Page of raw members"]
52
+ ZSCAN --> Page
53
+ Page --> Yield["yield deserialize_value(member)"]
54
+ Yield --> More{more pages?}
55
+ More -- yes --> Decide
56
+ More -- no --> Done["return self"]
57
+ ```
58
+
59
+ Per-type variations:
60
+ - `ListKey#each` — paginates with `LRANGE start stop` (no SCAN equivalent)
61
+ - `UnsortedSet#each` / `HashKey#each` — `SSCAN` / `HSCAN`, optional `matching:` glob
62
+ - `SortedSet#each` — `ZRANGEBYSCORE` (bounded) or `ZSCAN` (unbounded)
63
+
64
+ You get **identifiers only**. No record loading. One Redis round-trip per page.
65
+
66
+ ## `each_record` — yields **loaded Horreum records**
67
+
68
+ `each_record` is defined once in `CollectionBase` and delegates to `each` to collect identifiers, then batches them into `record_class.load_multi` (pipelined `HGETALL`s), filters ghosts, and yields the live records.
69
+
70
+ ```mermaid
71
+ flowchart TD
72
+ Caller["ModelClass.instances.each_record { |rec| ... }"] --> ER["each_record(batch_size, pipeline, **filters)"]
73
+ ER --> Validate{"pipeline <= batch_size?"}
74
+ Validate -- no --> Raise["raise ArgumentError"]
75
+ Validate -- yes --> CallEach["each(**filters) do |member|"]
76
+ CallEach --> Extract["id = member.is_a?(Array) ? member.first : member"]
77
+ Extract --> Buffer["buffer << id"]
78
+ Buffer --> Full{"buffer.size >= batch_size?"}
79
+ Full -- no --> CallEach
80
+ Full -- yes --> Load["record_class.load_multi(ids) -- pipelined HGETALLs"]
81
+ Load --> Compact["live = records.compact -- drop ghosts"]
82
+ Compact --> Mode{pipeline?}
83
+ Mode -- nil --> Serial["live.each { |r| block.call(r) }"]
84
+ Mode -- positive --> Pipe["live.each_slice(pipeline) do |group|<br/>record_class.pipelined { group.each &block }<br/>end"]
85
+ Serial --> Clear["buffer.clear; resume each"]
86
+ Pipe --> Clear
87
+ Clear --> CallEach
88
+ CallEach -. each exhausted .-> Flush["process_batch(buffer) if any remain"]
89
+ Flush --> Return["return self"]
90
+ ```
91
+
92
+ ### Concrete timeline for `User.instances.each_record(batch_size: 100, pipeline: 25) { |u| u.touch! }`
93
+
94
+ ```
95
+ SortedSet#each (ZSCAN page 1, 100 ids)
96
+ ├─ buffer fills to 100
97
+ ├─ load_multi(ids) → 1 pipeline of 100 HGETALLs
98
+ ├─ compact ghosts → e.g. 97 live records
99
+ ├─ slice(25):
100
+ │ pipelined { 25 × u.touch! } ← 1 Redis pipeline
101
+ │ pipelined { 25 × u.touch! } ← 1 Redis pipeline
102
+ │ pipelined { 25 × u.touch! } ← 1 Redis pipeline
103
+ │ pipelined { 22 × u.touch! } ← 1 Redis pipeline
104
+ └─ buffer.clear
105
+ SortedSet#each (ZSCAN page 2, 100 ids)
106
+ └─ … repeat …
107
+ SortedSet#each exhausted
108
+ └─ flush any remaining buffered ids the same way
109
+ ```
110
+
111
+ ## Key differences
112
+
113
+ | Aspect | `each` | `each_record` |
114
+ |---|---|---|
115
+ | Yields | raw identifier (or `[field, value]` for `HashKey`) | loaded Horreum instance |
116
+ | Redis ops per yield | 0 extra (already paged) | amortized `HGETALL` via `load_multi` batch |
117
+ | Requires `reference: true` + `:class` | no | yes (raises `Familia::Problem` otherwise) |
118
+ | Ghost handling | yields the dangling id | `compact` drops them silently |
119
+ | Write pipelining | not built-in | `pipeline:` groups block-body writes into `pipelined` blocks |
120
+ | Filters | type-specific (`since:`, `matching:`, …) | forwarded to underlying `each` |
121
+
122
+ So `each_record` is a thin orchestration layer: it leans on the type's own `each` for read pagination, then layers (1) batched record hydration and (2) optional write pipelining on top.
123
+
124
+ ## Choosing a `pipeline` mode
125
+
126
+ `each_record` has two dispatch modes, controlled by `pipeline:`. The parameter answers a single question: **may the dispatch loop wrap your block in a `pipelined { }`?**
127
+
128
+ | Value | Dispatch | Use when the block… |
129
+ |---|---|---|
130
+ | `nil` (default) | Each record runs in its own connection context, no pipeline wrapper | …reads, OR calls `save` / `commit_fields` / `transaction` / anything with its own internal MULTI |
131
+ | positive integer | Groups of `pipeline` records run inside `record_class.pipelined { ... }` | …only issues fast writers (`record.field!`) that tolerate being queued |
132
+
133
+ Note: `pipeline: 0` raises `ArgumentError`. Use `pipeline: nil` to disable pipelining.
134
+
135
+ The read-only case and the serial-write case collapse into the same mode because both require **immediate** execution with real return values. Wrapping `save` in an outer `pipelined` would either return `Redis::Future` objects or raise `ConflictingContextError` when `save`'s internal transaction tries to open.
136
+
137
+ ### The three idiomatic patterns
138
+
139
+ ```ruby
140
+ # 1. Read-only iteration — the default (pipeline: nil) is correct
141
+ User.instances.each_record do |user|
142
+ puts "#{user.email} #{user.last_login}"
143
+ end
144
+
145
+ # 2. Serial writes — the default (pipeline: nil) is required for save / commit_fields / transaction
146
+ User.instances.each_record do |user|
147
+ user.score = recompute(user)
148
+ user.save
149
+ end
150
+
151
+ # 3. Pipelined fast writers — opt-in optimization
152
+ User.instances.each_record(pipeline: 50) do |user|
153
+ user.last_seen_at! Familia.now # single HSET, safe to queue in pipeline
154
+ end
155
+ ```
156
+
157
+ ### Pipelining footgun
158
+
159
+ If you enable pipelining and your block reads from a related collection (e.g. `user.sessions.size`), that read is queued into the pipeline and returns a `Redis::Future` rather than a value. Omit the `pipeline:` parameter (or explicitly pass `pipeline: nil`) whenever the block needs real return values from Redis.
@@ -0,0 +1,125 @@
1
+ # Migrating to Familia 2.9.0
2
+
3
+ This version introduces batch iteration primitives for DataTypes, enabling efficient enumeration over large Redis collections. It also includes breaking changes to method names for clarity.
4
+
5
+ ## Breaking Changes
6
+
7
+ ### Method Renames
8
+
9
+ The multi-field update methods have been renamed to better reflect their purpose:
10
+
11
+ ```ruby
12
+ # Before (2.8.x)
13
+ user.batch_update(name: "Alice", email: "alice@example.com")
14
+ user.batch_fast_write(name: "Alice", email: "alice@example.com")
15
+
16
+ # After (2.9.0)
17
+ user.multi_field_update(name: "Alice", email: "alice@example.com")
18
+ user.multi_field_fast_write(name: "Alice", email: "alice@example.com")
19
+ ```
20
+
21
+ **Migration**: Find and replace `batch_update` with `multi_field_update` and `batch_fast_write` with `multi_field_fast_write`.
22
+
23
+ ### MultiResult Namespace
24
+
25
+ `MultiResult` has moved into the Familia namespace:
26
+
27
+ ```ruby
28
+ # Before (2.8.x)
29
+ result.is_a?(MultiResult)
30
+
31
+ # After (2.9.0)
32
+ result.is_a?(Familia::MultiResult)
33
+ ```
34
+
35
+ **Migration**: Replace bare `MultiResult` references with `Familia::MultiResult`.
36
+
37
+ ## New Features
38
+
39
+ ### Enumerable Integration
40
+
41
+ All collection DataTypes now include Ruby's `Enumerable` module, providing `each_slice`, `lazy`, `map`, `reduce`, `find`, and other stdlib methods:
42
+
43
+ ```ruby
44
+ # Lazy iteration with transformation
45
+ Org.instances.lazy.map { |id| id.upcase }.take(10).to_a
46
+
47
+ # Batch processing with each_slice
48
+ User.instances.each_slice(100) do |batch|
49
+ batch.each { |id| process(id) }
50
+ end
51
+ ```
52
+
53
+ ### Filtered Iteration
54
+
55
+ Each DataType now supports type-specific filters on `each`:
56
+
57
+ ```ruby
58
+ # SortedSet: filter by score (timestamp) bounds
59
+ Org.instances.each(since: 24.hours.ago, until: Time.now) do |id|
60
+ puts id
61
+ end
62
+
63
+ # HashKey: filter by field name pattern
64
+ user.profile.each(matching: "pref_*") do |field, value|
65
+ puts "#{field}: #{value}"
66
+ end
67
+
68
+ # UnsortedSet: filter by member pattern
69
+ user.tags.each(matching: "admin*") do |tag|
70
+ puts tag
71
+ end
72
+ ```
73
+
74
+ ### each_record for Loading Horreum Instances
75
+
76
+ `each_record` yields fully-loaded Horreum records instead of raw IDs:
77
+
78
+ ```ruby
79
+ # Load records in batches of 100
80
+ Org.instances.each_record(batch_size: 100) do |org|
81
+ org.tidy! # org is a loaded Horreum instance
82
+ end
83
+
84
+ # Control pipelining depth separately from fetch batch size
85
+ Org.instances.each_record(batch_size: 500, pipeline: 50) do |org|
86
+ org.status!("active")
87
+ end
88
+
89
+ # Serial execution (no pipelining) — this is the default
90
+ Org.instances.each_record(batch_size: 100) do |org|
91
+ org.complex_operation
92
+ end
93
+ ```
94
+
95
+ Ghost instances (keys that expired but remain in the `instances` sorted set) are automatically filtered and never reach the block.
96
+
97
+ ### BatchResult for Aggregated Operations
98
+
99
+ `Familia::BatchResult` aggregates results from batch operations with per-record error isolation:
100
+
101
+ ```ruby
102
+ result = Familia::BatchResult.collect(
103
+ Org.instances.each_record(batch_size: 100, since: 24.hours.ago)
104
+ ) do |org|
105
+ org.tidy!
106
+ end
107
+
108
+ result.scanned # Total records yielded to block
109
+ result.modified # Count of truthy block returns
110
+ result.errors # Array of {id:, error:} for failed records
111
+ result.duration_ms # Total execution time
112
+
113
+ # Re-raise errors after completion
114
+ result = Familia::BatchResult.collect(enum, strict: true) { |r| r.process! }
115
+ ```
116
+
117
+ ## Concurrent Mutation Behavior
118
+
119
+ When iterating with `each` or `each_record`, be aware of Redis cursor semantics:
120
+
121
+ - Items present from iteration start to end are guaranteed to be returned
122
+ - Items added or removed mid-iteration may or may not appear
123
+ - Blocks should be idempotent to handle potential duplicates
124
+
125
+ This is inherent to ZSCAN/HSCAN/SSCAN and is documented, not a bug.
data/familia.gemspec CHANGED
@@ -25,7 +25,7 @@ Gem::Specification.new do |spec|
25
25
  spec.add_dependency 'json_schemer', '~> 2.0'
26
26
  spec.add_dependency 'logger', '~> 1.7'
27
27
  spec.add_dependency 'oj', '~> 3.16'
28
- spec.add_dependency 'redis', '>= 4.8.1', '< 6.0'
28
+ spec.add_dependency 'redis', '>= 5.0', '< 6.0'
29
29
  spec.add_dependency 'stringio', '~> 3.1.1'
30
30
  spec.add_dependency 'uri-valkey', '~> 1.4'
31
31
 
@@ -0,0 +1,158 @@
1
+ # lib/familia/batch_result.rb
2
+ #
3
+ # frozen_string_literal: true
4
+
5
+ module Familia
6
+ # Represents the result of a batch iteration operation.
7
+ #
8
+ # BatchResult tracks statistics and errors when processing multiple records
9
+ # via methods like `each_record`. It provides aggregated metrics for the
10
+ # entire batch run, distinct from MultiResult which wraps a single
11
+ # MULTI/EXEC or pipeline operation.
12
+ #
13
+ # @attr_reader scanned [Integer] Total number of items iterated
14
+ # @attr_reader modified [Integer] Count of items where block returned truthy
15
+ # @attr_reader errors [Array<Hash>] Per-item errors as [{id:, error:}, ...]
16
+ # @attr_reader duration_ms [Float] Total elapsed time in milliseconds
17
+ #
18
+ # @example Using BatchResult.collect
19
+ # result = BatchResult.collect(User.instances) do |user|
20
+ # user.deactivate!
21
+ # end
22
+ # puts "Processed #{result.scanned}, modified #{result.modified}"
23
+ # puts "Errors: #{result.errors.size}" if result.errors?
24
+ #
25
+ # @example With strict mode
26
+ # # Re-raises first error after completing iteration
27
+ # BatchResult.collect(items, strict: true) { |item| item.process! }
28
+ #
29
+ class BatchResult
30
+ attr_reader :scanned, :modified, :errors, :duration_ms
31
+
32
+ # Creates a new BatchResult instance.
33
+ #
34
+ # @param scanned [Integer] Total items processed
35
+ # @param modified [Integer] Items where block returned truthy
36
+ # @param errors [Array<Hash>] Array of error hashes with :id and :error keys
37
+ # @param duration_ms [Float] Elapsed time in milliseconds
38
+ def initialize(scanned:, modified:, errors:, duration_ms:)
39
+ @scanned = scanned
40
+ @modified = modified
41
+ @errors = errors
42
+ @duration_ms = duration_ms
43
+ end
44
+
45
+ # Iterates over an enumerable, collecting statistics and errors.
46
+ #
47
+ # This is the primary factory method for creating BatchResult instances.
48
+ # It tracks how many items were processed, how many returned truthy values,
49
+ # and captures any exceptions that occur during iteration.
50
+ #
51
+ # @param enumerable [Enumerable] The collection to iterate
52
+ # @param strict [Boolean] When true, re-raises the first captured error
53
+ # after iteration completes. Default: false.
54
+ # @yield [item] Each item from the enumerable
55
+ # @yieldreturn [Object] Truthy return values increment the modified count
56
+ # @return [BatchResult] Aggregated result of the batch operation
57
+ #
58
+ # @example Basic usage
59
+ # result = BatchResult.collect(records) { |r| r.update!(status: 'done') }
60
+ #
61
+ # @example Strict mode re-raises errors
62
+ # begin
63
+ # BatchResult.collect(records, strict: true) { |r| r.validate! }
64
+ # rescue => e
65
+ # puts "Batch failed: #{e.message}"
66
+ # end
67
+ #
68
+ def self.collect(enumerable, strict: false)
69
+ scanned = 0
70
+ modified = 0
71
+ errors = []
72
+ start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
73
+
74
+ enumerable.each do |*args|
75
+ scanned += 1
76
+ begin
77
+ result = yield(*args)
78
+ modified += 1 if result
79
+ rescue StandardError => e
80
+ # Extract identifier if possible
81
+ identifier = extract_identifier(args.length == 1 ? args[0] : args)
82
+ errors << { id: identifier, error: e }
83
+ end
84
+ end
85
+
86
+ duration_ms = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000
87
+
88
+ batch_result = new(
89
+ scanned: scanned,
90
+ modified: modified,
91
+ errors: errors,
92
+ duration_ms: duration_ms
93
+ )
94
+
95
+ # In strict mode, re-raise the first error after completing iteration
96
+ raise errors.first[:error] if strict && errors.any?
97
+
98
+ batch_result
99
+ end
100
+
101
+ # Checks if any errors occurred during the batch.
102
+ #
103
+ # @return [Boolean] true if at least one error was captured
104
+ def errors?
105
+ !errors.empty?
106
+ end
107
+
108
+ # Checks if the batch completed without errors.
109
+ #
110
+ # @return [Boolean] true if no errors occurred
111
+ def successful?
112
+ errors.empty?
113
+ end
114
+ alias success? successful?
115
+
116
+ # Returns the count of items that were scanned but not modified.
117
+ #
118
+ # @return [Integer] Number of items where block returned falsy
119
+ def skipped
120
+ scanned - modified - errors.size
121
+ end
122
+
123
+ # Returns a hash representation of the result.
124
+ #
125
+ # @return [Hash] Result data including all metrics
126
+ def to_h
127
+ {
128
+ scanned: scanned,
129
+ modified: modified,
130
+ skipped: skipped,
131
+ errors: errors.size,
132
+ duration_ms: duration_ms.round(2),
133
+ successful: successful?
134
+ }
135
+ end
136
+
137
+ # Returns a human-readable summary.
138
+ #
139
+ # @return [String] Summary of the batch operation
140
+ def to_s
141
+ "BatchResult: scanned=#{scanned} modified=#{modified} errors=#{errors.size} duration=#{duration_ms.round(2)}ms"
142
+ end
143
+
144
+ # @private
145
+ def self.extract_identifier(item)
146
+ if item.respond_to?(:identifier)
147
+ item.identifier
148
+ elsif item.respond_to?(:id)
149
+ item.id
150
+ else
151
+ item.to_s[0, 50]
152
+ end
153
+ rescue StandardError
154
+ nil
155
+ end
156
+ private_class_method :extract_identifier
157
+ end
158
+ end