familia 2.9.0 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/claude-code-review.yml +1 -1
  3. data/.gitignore +1 -1
  4. data/AGENTS.md +198 -0
  5. data/CHANGELOG.rst +193 -0
  6. data/Gemfile.lock +2 -2
  7. data/README.md +2 -2
  8. data/docs/guides/datatype-collections.md +159 -0
  9. data/docs/guides/getting-started.md +87 -0
  10. data/docs/guides/index.md +4 -0
  11. data/docs/migrating/v2.10.0.md +167 -0
  12. data/docs/migrating/v2.9.0.md +3 -3
  13. data/examples/encrypted_fields.rb +43 -29
  14. data/examples/relationships.rb +66 -36
  15. data/examples/safe_dump.rb +7 -5
  16. data/familia.gemspec +0 -1
  17. data/lib/familia/data_type/collection_base.rb +18 -19
  18. data/lib/familia/data_type/serialization.rb +15 -2
  19. data/lib/familia/data_type/types/json_stringkey.rb +1 -1
  20. data/lib/familia/data_type/types/listkey.rb +7 -5
  21. data/lib/familia/data_type/types/sorted_set.rb +45 -0
  22. data/lib/familia/data_type/types/stringkey.rb +1 -1
  23. data/lib/familia/data_type/types/unsorted_set.rb +2 -1
  24. data/lib/familia/data_type.rb +163 -7
  25. data/lib/familia/encryption/encrypted_data.rb +27 -2
  26. data/lib/familia/encryption/manager.rb +17 -2
  27. data/lib/familia/encryption/providers/xchacha20_poly1305_provider.rb +4 -1
  28. data/lib/familia/encryption/request_cache.rb +4 -28
  29. data/lib/familia/encryption.rb +1 -0
  30. data/lib/familia/features/encrypted_fields/concealed_string.rb +12 -0
  31. data/lib/familia/features/encrypted_fields/encrypted_field_type.rb +52 -87
  32. data/lib/familia/features/encrypted_fields.rb +14 -0
  33. data/lib/familia/features/expiration.rb +2 -2
  34. data/lib/familia/features/relationships/indexing/unique_index_generators.rb +18 -3
  35. data/lib/familia/features/relationships/indexing.rb +6 -2
  36. data/lib/familia/field_type.rb +1 -19
  37. data/lib/familia/horreum/atomic_write.rb +107 -22
  38. data/lib/familia/horreum/definition.rb +52 -19
  39. data/lib/familia/horreum/dirty_tracking.rb +28 -0
  40. data/lib/familia/horreum/management.rb +116 -4
  41. data/lib/familia/horreum/persistence.rb +17 -4
  42. data/lib/familia/horreum/related_fields.rb +2 -0
  43. data/lib/familia/horreum.rb +1 -0
  44. data/lib/familia/instrumentation.rb +22 -0
  45. data/lib/familia/logging.rb +24 -3
  46. data/lib/familia/settings.rb +79 -1
  47. data/lib/familia/utils.rb +48 -0
  48. data/lib/familia/version.rb +1 -1
  49. data/lib/middleware/database_logger.rb +208 -128
  50. data/try/audit/audit_cross_references_try.rb +6 -6
  51. data/try/audit/audit_unique_indexes_try.rb +3 -2
  52. data/try/audit/repair_all_integration_try.rb +2 -1
  53. data/try/audit/repair_indexes_try.rb +2 -1
  54. data/try/edge_cases/fast_writer_pipeline_support_try.rb +80 -0
  55. data/try/edge_cases/fast_writer_transaction_guard_try.rb +40 -59
  56. data/try/features/atomic_write_watch_try.rb +164 -0
  57. data/try/features/build_block_try.rb +191 -0
  58. data/try/features/create_block_try.rb +58 -0
  59. data/try/features/dirty_write_new_object_try.rb +181 -0
  60. data/try/features/dirty_write_warnings_try.rb +456 -0
  61. data/try/features/encrypted_fields/aad_transient_fix_try.rb +164 -0
  62. data/try/features/encrypted_fields/aad_transient_proof_try.rb +253 -0
  63. data/try/features/encrypted_fields/concealed_string_core_try.rb +6 -4
  64. data/try/features/encrypted_fields/encrypted_data_try.rb +151 -0
  65. data/try/features/encrypted_fields/encrypted_fields_integration_try.rb +3 -0
  66. data/try/features/encrypted_fields/envelope_version_branching_try.rb +106 -0
  67. data/try/features/encrypted_fields/envelope_version_try.rb +171 -0
  68. data/try/features/encrypted_fields/key_material_try.rb +205 -0
  69. data/try/features/encryption/request_cache_try.rb +88 -0
  70. data/try/features/relationships/participation_reverse_methods_try.rb +3 -2
  71. data/try/features/relationships/unique_index_each_record_try.rb +143 -0
  72. data/try/features/safe_dump/safe_dump_advanced_try.rb +1 -1
  73. data/try/integration/examples/encrypted_fields_example_try.rb +67 -0
  74. data/try/integration/examples/relationships_example_try.rb +69 -0
  75. data/try/integration/examples/safe_dump_example_try.rb +60 -0
  76. data/try/unit/core/trace_caching_try.rb +58 -0
  77. data/try/unit/data_types/each_record_try.rb +90 -13
  78. data/try/unit/data_types/sorted_set_try.rb +44 -0
  79. data/try/unit/data_types/stringkey_extended_try.rb +1 -1
  80. data/try/unit/horreum/destroy_related_fields_cleanup_try.rb +3 -3
  81. data/try/unit/horreum/relations_try.rb +5 -0
  82. data/try/unit/middleware/database_logger_capture_toggle_try.rb +278 -0
  83. data/try/unit/utils/future_aware_helpers_try.rb +128 -0
  84. metadata +25 -3
  85. data/CLAUDE.md +0 -322
  86. data/docs/archive/.gitignore +0 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 42bbbbcb737ab4222505955e5b1af2ab72ed962ba80a02cf324206b4f2379b94
4
- data.tar.gz: 331b1d0bb0808618a87d962e1b09af61a31d59b7b8d56b0f49513cb9f3fec63d
3
+ metadata.gz: ab706939f766966f0471ff1285db5def4d14bfc9b0d428c5b0caf481595d57c2
4
+ data.tar.gz: 6d08805e52f5acd4a90fc117bb8a6b2e352c036daa9916c0a19079af827ddfec
5
5
  SHA512:
6
- metadata.gz: 9ff40710ce23c2f3dadbfbf68142800b8f10590c898f30f424819a4f0efea9aa545c52307c487c6fd96bf0c0f95f7504e5614056a30039f75f9db84fe1fcd595
7
- data.tar.gz: be6c618b3fab3eb5ac8577c8a4bd7fbbbc53ce300da750baf3f64d70807a0aeb2a3a7a7f28da91d7637ebe86d9f2ccc86b677478ed90ab3ae7878d6d8816cd09
6
+ metadata.gz: b328108ed4793a4c94ddd1ef1447e759bd369647fc81aca87bafe92eb1f23a71a53ca1d763726da5a631af19e520966075bb65ab40ff29d4a4563ac1a4b1e34a
7
+ data.tar.gz: 652d0a36301b9321eba9225ec658f3d72b0aa449bc0133b038055900475fe546fca36e2df2133680f69a938409afd6b637d0541be44dd956f51cf2cb50b205b2
@@ -43,7 +43,7 @@ jobs:
43
43
  - Security concerns
44
44
  - Test coverage
45
45
 
46
- Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback.
46
+ Use the repository's AGENTS.md for guidance on style and conventions. Be constructive and helpful in your feedback.
47
47
 
48
48
  Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR.
49
49
 
data/.gitignore CHANGED
@@ -30,5 +30,5 @@ public/
30
30
  # Exclusions
31
31
  !README.md
32
32
  !CHANGELOG.md
33
- !CLAUDE.md
33
+ !AGENTS.md
34
34
  !LICENSE.txt
data/AGENTS.md ADDED
@@ -0,0 +1,198 @@
1
+ # AGENTS.md
2
+
3
+ Guidance for AI coding agents working in this repository.
4
+
5
+ ## Development Commands
6
+
7
+ - **Install**: `bundle install`
8
+ - **Docs**: `bundle exec yard`
9
+ - **Lint**: `bundle exec rubocop`
10
+ - **Test**: `bundle exec try` (auto-discovers `*_try.rb` / `*.try.rb`)
11
+
12
+ ### Testing (Tryouts v3)
13
+
14
+ Each file has optional setup, testcases, and optional teardown. A testcase is a
15
+ `##` description line, Ruby code, then one or more expectation comments
16
+ (`#=>`, `#==>`, `#=:>`, `#=!>`, ...). The last expression is the result.
17
+ Instance variables (`@var`) persist across sections; locals do not. Write plain
18
+ realistic code; avoid mocks and test DSL.
19
+
20
+ Run with `--agent` for token-efficient output (`--agent-focus summary|first-failure|critical`).
21
+ See `bundle exec try --help` for the full CLI, framework integration (`--rspec`,
22
+ `--minitest`), and debugging flags.
23
+
24
+ ### Changelog
25
+
26
+ Add a changelog fragment (RST) with each user-facing change. See @changelog.d/README.md
27
+
28
+ ### Known Issues & Quirks
29
+
30
+ - **Reserved field names**: `ttl`, `db`, `valkey`, `redis` cannot be field names — use prefixed alternatives.
31
+ - **Empty identifiers**: Cause a stack overflow in key generation — validate before operations.
32
+ - **Lazy initialization**: Connection chains and field collections initialize lazily without synchronization (generally safe under the GIL, not guaranteed).
33
+
34
+ ### Debugging
35
+
36
+ Ask the user for real-time database command monitoring (commands with timestamps
37
+ and database numbers, live) when debugging multi/exec, pipelining, or
38
+ `logical_database` issues.
39
+
40
+ ## Architecture
41
+
42
+ **Familia** is a Valkey-compatible ORM providing Ruby object storage with
43
+ expiration, safe dumping, and quantization.
44
+
45
+ ### Core Classes
46
+
47
+ - **`Familia::Horreum`** (`lib/familia/horreum.rb`) — base class for Valkey-backed objects (ActiveRecord-like). Field definitions, data type relationships, lifecycle.
48
+ - **`Familia::DataType`** (`lib/familia/data_type.rb`) — base for type wrappers (String, JsonStringKey, List, UnsortedSet, SortedSet, HashKey). Each type in `lib/familia/data_type/types/`.
49
+ - **`Familia::Base`** (`lib/familia/base.rb`) — shared module for both, hosts the feature system.
50
+
51
+ Features (Expiration, SafeDump, Relationships, ...) are modules mixed into
52
+ classes via `Familia::Base`. See `lib/familia/features/`.
53
+
54
+ ### Defining a Model
55
+
56
+ ```ruby
57
+ class User < Familia::Horreum
58
+ field :email # scalar field
59
+ list :sessions # Valkey/Redis list
60
+ set :tags # set
61
+ zset :metrics # sorted set
62
+ hashkey :settings # hash
63
+ end
64
+ ```
65
+
66
+ Identifier strategies:
67
+
68
+ ```ruby
69
+ identifier_field :email # symbol
70
+ identifier ->(user) { "user:#{user.email}" } # proc
71
+ identifier [:type, :email] # array
72
+ ```
73
+
74
+ Connection handling lives in `lib/familia/connection.rb` and `lib/familia/settings.rb`;
75
+ select databases with the `logical_database` class method (URI configuration supported).
76
+
77
+ ### Initialization: do not override `initialize` without `super`
78
+
79
+ Familia's `initialize` sets fields from kwargs, then sets up DataType objects,
80
+ then calls your `init` hook. Overriding `initialize` without `super` breaks
81
+ related-field setup.
82
+
83
+ Apply defaults in the `init` hook with `||=` (never `=`, which would overwrite
84
+ values Horreum already set from kwargs):
85
+
86
+ ```ruby
87
+ class User < Familia::Horreum
88
+ field :objid
89
+ field :email
90
+
91
+ def init
92
+ @objid ||= SecureRandom.uuid
93
+ end
94
+ end
95
+
96
+ User.new(email: 'test@example.com').objid # => generated UUID
97
+ ```
98
+
99
+ Only override `initialize` (with `super`) when you must transform arguments
100
+ before Horreum processes them.
101
+
102
+ ## Serialization
103
+
104
+ Horreum fields are JSON-encoded for storage and JSON-decoded on load, preserving
105
+ Ruby types (Integer, Boolean, String, Float, Hash, Array, nil). `false` and `0`
106
+ are preserved; only `nil` values are omitted from storage.
107
+
108
+ | Context | Serialize | Ruby `"UK"` stored as | Ruby `123` stored as |
109
+ |---|---|---|---|
110
+ | Horreum `field` | `serialize_value` (JSON) | `"\"UK\""` | `"123"` |
111
+ | `StringKey` | `.to_s` (raw) | `"UK"` | `"123"` |
112
+ | `JsonStringKey` | JSON dump | `"\"UK\""` | `"123"` |
113
+ | List/Set/SortedSet/HashKey values | `serialize_value` (JSON) | `"\"UK\""` | `"123"` |
114
+
115
+ `StringKey` uses raw `.to_s` (not JSON) to support `INCR`/`DECR`/`APPEND`; a
116
+ Horreum string field stores `"UK"` as `"\"UK\""` while a `StringKey` stores it as
117
+ `"UK"`. Use `instance.debug_fields` to compare Ruby values vs stored JSON.
118
+
119
+ Database keys are generated as `classname:identifier:fieldname` (aka dbkey).
120
+ DataType instances are frozen after instantiation.
121
+
122
+ ## Write Model: Deferred vs Immediate
123
+
124
+ **Scalar fields** (`field`) use deferred writes: normal setters
125
+ (`user.name = "Alice"`) only touch memory until `save`/`commit_fields`/`batch_update`.
126
+ Fast writers (`user.name! "Alice"`) do an immediate `HSET`.
127
+
128
+ **Collection fields** (`list`, `set`, `zset`, `hashkey`) use immediate writes:
129
+ every mutator (`add`, `push`, `remove`, `clear`, `[]=`) hits Redis right away.
130
+ Collections live on separate keys from the object hash.
131
+
132
+ **Safe pattern — scalars first, then collections:**
133
+
134
+ ```ruby
135
+ # Option A: explicit save, then mutate collections directly
136
+ plan.name = "Premium"
137
+ plan.save # HMSET for scalar fields
138
+ plan.features.clear
139
+ plan.features.add("sso")
140
+
141
+ # Option B: convenience wrapper (calls save internally, then yields the block)
142
+ plan.name = "Premium"
143
+ plan.save_with_collections do
144
+ plan.features.clear
145
+ plan.features.add("sso")
146
+ end
147
+ ```
148
+
149
+ Mutating collections before `save` is unsafe: if `save` raises, the collections
150
+ are already mutated.
151
+
152
+ **Atomic pattern — scalars and collections in one MULTI/EXEC:**
153
+
154
+ ```ruby
155
+ plan.atomic_write do
156
+ plan.name = "Premium" # deferred: queued as HMSET
157
+ plan.features.clear # immediate: queued as DEL in the open MULTI
158
+ plan.features.add("sso")
159
+ end
160
+ ```
161
+
162
+ `atomic_write` composes the `transaction` infrastructure so every command lands
163
+ in one MULTI/EXEC; collection mutations auto-route into the open transaction via
164
+ `Fiber[:familia_transaction]`. Constraints:
165
+
166
+ - All related DataTypes must share the parent's `logical_database`, else `Familia::CrossDatabaseError` (fall back to `save_with_collections`). MULTI/EXEC is single-database only.
167
+ - Cannot nest inside another `transaction`/`atomic_write` (`Familia::OperationModeError`).
168
+ - Collection return values inside the block are `Redis::Future` — do not inspect before EXEC.
169
+
170
+ **Factory — `build` for create-and-populate:**
171
+
172
+ ```ruby
173
+ user = User.build(email: "alice@example.com") do |u|
174
+ u.name = "Alice" # deferred scalar
175
+ u.tags.add("admin") # folded into the same MULTI
176
+ end
177
+ ```
178
+
179
+ `build` is class-level sugar over `new` + `atomic_write` with create-only
180
+ semantics: raises `RecordExistsError` if the identifier exists, same
181
+ single-database constraint. Without a block it degenerates to `new(...).save`.
182
+ For upsert, use `save`/`save_with_collections`.
183
+
184
+ ## Instances Timeline
185
+
186
+ Every Horreum subclass has a class-level `instances` sorted set — a timeline of
187
+ last-write timestamps (ZADD score), not a registry.
188
+
189
+ - **Touch** (`touch_instances!`): `save`/`save_if_not_exists!` (via `persist_to_storage`), `commit_fields`, `batch_update`, `save_fields`, fast writers.
190
+ - **Remove**: instance `destroy!` (`remove_from_instances!`), class `destroy!(id)`, lazy `cleanup_stale_instance_entry` in `find_by_dbkey`.
191
+ - **Ghosts**: a hash key expiring via TTL leaves a stale identifier in `instances`. `find_by_dbkey` prunes on access; raw enumeration (`instances.members`) still sees ghosts.
192
+ - **`in_instances?(id)`** — fast O(log N), may report ghosts or miss non-Familia objects. **`exists?(id)`** — authoritative hash-key check (round-trip). `load`/`find_by_id` read the hash key directly and bypass `instances`.
193
+
194
+ ## Thread Safety
195
+
196
+ DataType instances are frozen (immutable). Configure module-level settings once
197
+ at startup, before threads spawn. `Familia.start_monitoring!` tracks contention.
198
+ Tests and contention patterns live in `try/thread_safety/`.
data/CHANGELOG.rst CHANGED
@@ -7,6 +7,199 @@ The format is based on `Keep a Changelog <https://keepachangelog.com/en/1.1.0/>`
7
7
 
8
8
  <!--scriv-insert-here-->
9
9
 
10
+ .. _changelog-2.10.0:
11
+
12
+ 2.10.0 — 2026-06-04
13
+ ====================
14
+
15
+ Added
16
+ -----
17
+
18
+ - ``Horreum.build``: A factory block that yields a new instance, then commits
19
+ all scalar and collection changes in a single ``MULTI/EXEC`` upon exit.
20
+ This avoids sequencing ``save`` before collection writes. Raises
21
+ ``Familia::RecordExistsError`` if the identifier exists (create-only).
22
+ Without a block, it behaves as ``new(...).save``. #279
23
+
24
+ - ``atomic_write`` now supports ``watch_keys:`` (keys to watch) and
25
+ ``pre_check:`` (a callable run between ``WATCH`` and ``MULTI``) to enable
26
+ optimistic locking. Retries with exponential backoff on abort. #288
27
+
28
+ - ``encrypted_field`` now accepts a ``key_material:`` proc. This mixes
29
+ additional entropy into key derivation (separate from AAD), requiring
30
+ the correct material at decryption to avoid producing garbage output. PR #280
31
+
32
+ - Encrypted-field envelopes now store their own ``envelope_version`` and
33
+ ``aad_fields`` list. Decryption rebuilds AAD from these stored fields
34
+ rather than the active class declaration, preventing breakage when model
35
+ definitions change. PR #280
36
+
37
+ - ``DatabaseLogger.capture_enabled`` (Boolean, default ``true``) controls
38
+ in-memory buffer capturing. Disabling it bypasses clock checks, message
39
+ allocations, and buffer appends, offering a zero-overhead production path. Issue #233
40
+
41
+ - ``Familia::Instrumentation.hooks?(type)`` reports whether hooks are
42
+ registered for a given event type (e.g. ``:command``, ``:pipeline``). Issue #233
43
+
44
+ - ``Familia.reset_trace!`` clears the cached trace environment lookup. Issue #233
45
+
46
+ - ``dirty_write_warnings`` class method configures write-order warnings per
47
+ class (inheritable). Accepts ``:strict``, ``:warn``, ``:once``, or ``:off``. Issue #277
48
+
49
+ - ``Familia.dirty_write_warnings`` global setting providing the default mode for
50
+ classes that do not set their own. Issue #277
51
+
52
+ - ``Familia.raise_on_unsaved_parent_write`` (default ``true``) controls whether a
53
+ collection write on a new, unsaved, dirty parent raises or warns. Issue #278
54
+
55
+ Changed
56
+ -------
57
+
58
+ - Mutating a collection on a *new, unsaved* parent Horreum now **raises**
59
+ ``Familia::Problem`` by default. The guard fires *before* the command runs,
60
+ preventing orphaned data. Save the parent first, or set
61
+ ``Familia.raise_on_unsaved_parent_write = false`` to restore warnings. Issue #278
62
+
63
+ - Dirty-write warnings are now **deduplicated per dirty window** (mode ``:once``).
64
+ Writing to a collection on a parent with unsaved scalar fields warns once per
65
+ distinct set of unsaved fields instead of on every write. Set
66
+ ``dirty_write_warnings :warn`` to restore the old behavior. Issue #277
67
+
68
+ - Dirty-write warnings and strict raises now append the hint:
69
+ ``(call #save first or wrap in atomic_write)``. Issue #277
70
+
71
+ - ``trace_enabled?`` now caches the ``FAMILIA_TRACE`` lookup. Use
72
+ ``Familia.reset_trace!`` to force a re-read of the environment. Issue #233
73
+
74
+ - ``unique_index`` hashkeys now store identifiers as raw strings rather than
75
+ JSON-encoded strings. Rebuild existing unique indexes to convert legacy entries,
76
+ e.g., via ``User.rebuild_email_lookup`` or ``company.rebuild_badge_index``. Issue #276
77
+
78
+ Fixed
79
+ -----
80
+
81
+ - ``Horreum.build`` with a block no longer has a TOCTOU race between the
82
+ ``exists?`` check and the ``atomic_write`` commit. The block path now uses
83
+ ``atomic_write(watch_keys:, pre_check:)`` so the existence check runs between
84
+ ``WATCH`` and ``MULTI``. #288
85
+
86
+ - ``aad_fields`` containing a ``transient_field`` now bind to the field's real
87
+ value. Previously ``build_aad`` called ``RedactedString#to_s``, which returns
88
+ ``"[REDACTED]"`` for every value -- so all passphrases produced identical AAD
89
+ and the binding was defeated. PR #280
90
+
91
+ - ``each_record`` now works on ``unique_index`` hashkeys. Previously it raised
92
+ ``Familia::Problem`` because ``unique_index`` created its backing hashkey
93
+ without the ``class:`` option. Issue #276
94
+
95
+ - ``each_record`` extracts the stored identifier (the hash *value*) from a
96
+ HashKey instead of the indexed field (the hash *key*). Issue #276
97
+
98
+ - The unguarded ``Familia.trace`` sites in ``Horreum#destroy!`` and
99
+ ``find_by_dbkey`` now carry an inline ``if Familia.debug?`` guard. Issue #233
100
+
101
+ - Two latent encryption bugs surfaced while repairing the examples (issue #250):
102
+
103
+ - ``Familia::Encryption.with_request_cache`` and ``clear_request_cache!``
104
+ were unreachable. The implementation lived in
105
+ ``lib/familia/encryption/request_cache.rb``, which was never ``require``\ d.
106
+ The file is now loaded with the rest of the encryption stack.
107
+
108
+ - The XChaCha20-Poly1305 provider derived keys with
109
+ ``context.force_encoding('BINARY')``, mutating the caller's string. A
110
+ frozen context raised ``FrozenError``. It now uses ``context.b``.
111
+
112
+ Security
113
+ --------
114
+
115
+ - The ``aad_fields`` transient-field fix changes AAD output for any field that
116
+ lists a ``transient_field``. Values encrypted by an earlier release using a
117
+ transient field in ``aad_fields`` were bound to ``"[REDACTED]"`` and will no
118
+ longer decrypt after upgrading. Re-encrypt affected values if any exist.
119
+ PR #280
120
+
121
+ Documentation
122
+ -------------
123
+
124
+ - Repaired every script in ``examples/`` so each runs top-to-bottom and is
125
+ re-runnable (issue #250). Added ``try/integration/examples/`` with one
126
+ subprocess-driven tryouts file per example script for automated regression
127
+ coverage.
128
+
129
+ - ``Horreum.create!``: added ``@yield``, ``@yieldparam``, and
130
+ ``@yieldreturn`` YARD tags documenting the post-success block semantics. #286
131
+
132
+ - ``Horreum#save``: added ``@example`` tags showing idiomatic Ruby patterns
133
+ for post-save callbacks (``if save`` and ``&&`` short-circuit). #286
134
+
135
+ - Renamed ``CLAUDE.md`` to ``AGENTS.md`` and pruned it to remove volatile
136
+ content better served by its source of truth. Kept the non-obvious behavioral
137
+ contracts like deferred-vs-immediate write model and the serialization table.
138
+
139
+ AI Assistance
140
+ -------------
141
+
142
+ - AI implemented ``build`` factory block (#279) and WATCH composition in
143
+ ``atomic_write`` (#288), including tryouts for both.
144
+
145
+ - AI refactored encryption envelope handling (#280): unified AAD construction
146
+ through ``EncryptedData``, added envelope versioning, and fixed the
147
+ transient-field AAD bypass.
148
+
149
+ - AI implemented ``DatabaseLogger.capture_enabled`` toggle and middleware
150
+ consolidation (#233), per-class ``dirty_write_warnings`` (#277), and
151
+ unsaved-parent guard (#278) with tryouts for each.
152
+
153
+ - AI diagnosed and fixed ``each_record`` on ``unique_index`` hashkeys (#276)
154
+ and repaired all example scripts with regression tryouts (#250).
155
+
156
+ - AI evaluated and rejected ``save_and_then`` (#286) after cross-ORM analysis;
157
+ added YARD docs and ``create_block_try.rb`` instead.
158
+
159
+ .. _changelog-2.9.1:
160
+
161
+ 2.9.1 — 2026-05-18
162
+ ==================
163
+
164
+ Added
165
+ -----
166
+
167
+ - ``SortedSet#update`` (aliased ``merge!``) for bulk member insertion. A sorted
168
+ set is ``member => score`` -- the same pair shape as ``HashKey``'s
169
+ ``field => value`` -- so it follows the established ``HashKey#update``/``merge!``
170
+ convention (a single Hash argument) rather than the variadic splat used by the
171
+ value-only ``UnsortedSet``/``ListKey``. Pass ``{member => score}`` to issue one
172
+ ``ZADD`` instead of one round-trip per member. Validates the argument is a Hash
173
+ and that every score is ``Numeric`` (a missing/``nil`` score raises a clear
174
+ ``ArgumentError`` instead of a low-level client error -- unlike single-value
175
+ ``#add``, the bulk path does not default a missing score to ``Familia.now``).
176
+ Cascades expiration, and is a no-op returning ``0`` for empty input. The
177
+ single-value ``SortedSet#add`` (and its array-as-single-member contract) is
178
+ unchanged. PR #269
179
+
180
+ Changed
181
+ -------
182
+
183
+ - Bulk-write optimization for multi-value collection mutations. ``UnsortedSet#add``,
184
+ ``ListKey#push``, and ``ListKey#unshift`` previously issued one Redis command per
185
+ element (a loop of ``SADD``/``RPUSH``/``LPUSH`` calls), making large populations
186
+ slow even when pipelined. They now serialize all values and issue a single bulk
187
+ ``SADD``/``RPUSH``/``LPUSH`` command. Element ordering, ``nil`` compaction, nested
188
+ array flattening, return values, dirty-write warnings, and expiration cascading
189
+ are unchanged; empty calls remain no-ops. PR #269
190
+
191
+ AI Assistance
192
+ -------------
193
+
194
+ - AI investigated all collection ``DataType`` classes for the same per-element
195
+ loop anti-pattern, identified the three affected methods, verified
196
+ behavior-preservation (ordering, edge cases, chainability) at the Redis wire
197
+ level, and confirmed zero regressions against the existing test suites. The
198
+ ``SortedSet#update`` API shape was chosen by priority order: existing Familia
199
+ conventions first (the ``HashKey#update``/``merge!`` precedent for keyed
200
+ collections), then the upstream redis-rb bulk ``ZADD`` form, then Ruby
201
+ ``Hash#merge!`` semantics as confirmation.
202
+
10
203
  .. _changelog-2.9.0:
11
204
 
12
205
  2.9.0 — 2026-05-17
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- familia (2.9.0)
4
+ familia (2.10.0)
5
5
  concurrent-ruby (~> 1.3)
6
6
  connection_pool (>= 2.4, < 4.0)
7
7
  csv (~> 3.3)
@@ -67,7 +67,7 @@ GEM
67
67
  pp (>= 0.6.0)
68
68
  rdoc (>= 4.0.0)
69
69
  reline (>= 0.4.2)
70
- json (2.15.1)
70
+ json (2.15.2.1)
71
71
  json-schema (6.2.0)
72
72
  addressable (~> 2.8)
73
73
  bigdecimal (>= 3.1, < 5)
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Familia - 2.5
1
+ # Familia - v2
2
2
 
3
3
  **Organize and store Ruby objects in Valkey/Redis using native database types (an ORM of sorts).**
4
4
 
@@ -56,7 +56,7 @@ The performance characteristics you rely on in Valkey/Redis remain unchanged. Se
56
56
 
57
57
  ```bash
58
58
  # Add to Gemfile
59
- gem 'familia', '~> 2.5'
59
+ gem 'familia', '~> 2.10'
60
60
 
61
61
  # Or install directly
62
62
  gem install familia
@@ -0,0 +1,159 @@
1
+ # docs/guides/datatype-collections.md
2
+ ---
3
+
4
+ # DataType - Collection classes
5
+
6
+ UnsortedSet, Sorted Set, List, and Hash data types all include the `Collection` module. This guide covers two performance-sensitive concerns: writing many elements efficiently (a single bulk command instead of one round-trip per element), and iterating large collections efficiently via `each` and `each_record`.
7
+
8
+ ## Bulk writes — single round-trip mutations
9
+
10
+ Collection mutations are **immediate** — every call hits Valkey/Redis right away, unlike scalar `field` setters which are deferred until `save`. Each call also runs `warn_if_dirty!` and cascades expiration. (See the write-model notes in `AGENTS.md` for the deferred-vs-immediate split.)
11
+
12
+ Multi-element adds issue **one** command for the whole batch, not one per element. Populating a large collection is therefore a single round-trip even without an explicit pipeline.
13
+
14
+ The argument shape follows the collection's structure, and is consistent across the codebase:
15
+
16
+ - **Value-only** collections (`UnsortedSet`, `ListKey`) take a **variadic splat**; arguments are flattened and `nil`-compacted.
17
+ - **Keyed/pair** collections (`HashKey` is `field => value`, `SortedSet` is `member => score`) take a **single Hash** via `update` (aliased `merge!`), raising `ArgumentError` on a non-Hash.
18
+
19
+ | Type | Bulk method | Call shape | Redis command |
20
+ |---|---|---|---|
21
+ | `UnsortedSet` | `add(*values)` | `tags.add(:a, :b, :c)` | one `SADD` |
22
+ | `ListKey` | `push(*values)` / `unshift(*values)` | `log.push(1, 2, 3)` | one `RPUSH` / `LPUSH` |
23
+ | `HashKey` | `update(hash)` / `merge!` | `cfg.update(a: 1, b: 2)` | one `HMSET` |
24
+ | `SortedSet` | `update(hash)` / `merge!` | `board.update("alice" => 1000, "bob" => 850)` | one `ZADD` |
25
+
26
+ ```ruby
27
+ tags.add(:ruby, :redis, :valkey) # 1 SADD, returns self
28
+ log.push("a", "b", "c") # 1 RPUSH → [a, b, c]
29
+ board.update("alice" => 1000, "bob" => 850) # 1 ZADD, returns new-member count (2)
30
+ board.merge!("alice" => 1200) # 1 ZADD, score updated → returns 0
31
+ ```
32
+
33
+ Behavior notes:
34
+
35
+ - **Ordering**: `push` preserves argument order; `unshift` prepends each element in turn, so `unshift(a, b, c)` leaves the list head as `c, b, a` (Redis `LPUSH` semantics — unchanged from the prior per-element implementation). Sets are unordered; sorted sets order by score.
36
+ - **Empty input is a no-op**: `add()` / `push()` / `update({})` issue no command. Set/list adds return `self`; `SortedSet#update` returns `0`.
37
+ - **`SortedSet#add(val, score, …)` is unchanged and not bulk** — it takes a single member plus score and the conditional ZADD options (`nx:`, `xx:`, `gt:`, `lt:`, `ch:`). An Array passed as `val` is stored as one JSON-encoded member, not exploded into many. Use `update`/`merge!` for bulk insertion.
38
+
39
+ The iteration methods `each` and `each_record` efficiently handle large collections by paginating through Valkey/Redis data structures, but they serve different purposes and yield different results. Here's how the two iterate, using `ModelClass.instances` (a `SortedSet` with `reference: true`) as the running example.
40
+
41
+ ## `each` — yields **members** (identifiers, raw strings)
42
+
43
+ `each` is implemented per type. For the `instances` SortedSet, it pages through the ZSET with either `ZRANGEBYSCORE` (when `since:`/`until:` are given) or `ZSCAN` (unbounded), yielding one deserialized member at a time.
44
+
45
+ ```mermaid
46
+ flowchart TD
47
+ Caller["ModelClass.instances.each { |id| ... }"] --> EachImpl["SortedSet#each"]
48
+ EachImpl --> Decide{since/until?}
49
+ Decide -- yes --> ZRBS["ZRANGEBYSCORE key min max LIMIT 0 batch_size WITHSCORES"]
50
+ Decide -- no --> ZSCAN["ZSCAN key cursor COUNT batch_size"]
51
+ ZRBS --> Page["Page of raw members"]
52
+ ZSCAN --> Page
53
+ Page --> Yield["yield deserialize_value(member)"]
54
+ Yield --> More{more pages?}
55
+ More -- yes --> Decide
56
+ More -- no --> Done["return self"]
57
+ ```
58
+
59
+ Per-type variations:
60
+ - `ListKey#each` — paginates with `LRANGE start stop` (no SCAN equivalent)
61
+ - `UnsortedSet#each` / `HashKey#each` — `SSCAN` / `HSCAN`, optional `matching:` glob
62
+ - `SortedSet#each` — `ZRANGEBYSCORE` (bounded) or `ZSCAN` (unbounded)
63
+
64
+ You get **identifiers only**. No record loading. One Redis round-trip per page.
65
+
66
+ ## `each_record` — yields **loaded Horreum records**
67
+
68
+ `each_record` is defined once in `CollectionBase` and delegates to `each` to collect identifiers, then batches them into `record_class.load_multi` (pipelined `HGETALL`s), filters ghosts, and yields the live records.
69
+
70
+ ```mermaid
71
+ flowchart TD
72
+ Caller["ModelClass.instances.each_record { |rec| ... }"] --> ER["each_record(batch_size, pipeline, **filters)"]
73
+ ER --> Validate{"pipeline <= batch_size?"}
74
+ Validate -- no --> Raise["raise ArgumentError"]
75
+ Validate -- yes --> CallEach["each(**filters) do |member|"]
76
+ CallEach --> Extract["id = member.is_a?(Array) ? member.first : member"]
77
+ Extract --> Buffer["buffer << id"]
78
+ Buffer --> Full{"buffer.size >= batch_size?"}
79
+ Full -- no --> CallEach
80
+ Full -- yes --> Load["record_class.load_multi(ids) -- pipelined HGETALLs"]
81
+ Load --> Compact["live = records.compact -- drop ghosts"]
82
+ Compact --> Mode{pipeline?}
83
+ Mode -- nil --> Serial["live.each { |r| block.call(r) }"]
84
+ Mode -- positive --> Pipe["live.each_slice(pipeline) do |group|<br/>record_class.pipelined { group.each &block }<br/>end"]
85
+ Serial --> Clear["buffer.clear; resume each"]
86
+ Pipe --> Clear
87
+ Clear --> CallEach
88
+ CallEach -. each exhausted .-> Flush["process_batch(buffer) if any remain"]
89
+ Flush --> Return["return self"]
90
+ ```
91
+
92
+ ### Concrete timeline for `User.instances.each_record(batch_size: 100, pipeline: 25) { |u| u.touch! }`
93
+
94
+ ```
95
+ SortedSet#each (ZSCAN page 1, 100 ids)
96
+ ├─ buffer fills to 100
97
+ ├─ load_multi(ids) → 1 pipeline of 100 HGETALLs
98
+ ├─ compact ghosts → e.g. 97 live records
99
+ ├─ slice(25):
100
+ │ pipelined { 25 × u.touch! } ← 1 Redis pipeline
101
+ │ pipelined { 25 × u.touch! } ← 1 Redis pipeline
102
+ │ pipelined { 25 × u.touch! } ← 1 Redis pipeline
103
+ │ pipelined { 22 × u.touch! } ← 1 Redis pipeline
104
+ └─ buffer.clear
105
+ SortedSet#each (ZSCAN page 2, 100 ids)
106
+ └─ … repeat …
107
+ SortedSet#each exhausted
108
+ └─ flush any remaining buffered ids the same way
109
+ ```
110
+
111
+ ## Key differences
112
+
113
+ | Aspect | `each` | `each_record` |
114
+ |---|---|---|
115
+ | Yields | raw identifier (or `[field, value]` for `HashKey`) | loaded Horreum instance |
116
+ | Redis ops per yield | 0 extra (already paged) | amortized `HGETALL` via `load_multi` batch |
117
+ | Requires `reference: true` + `:class` | no | yes (raises `Familia::Problem` otherwise) |
118
+ | Ghost handling | yields the dangling id | `compact` drops them silently |
119
+ | Write pipelining | not built-in | `pipeline:` groups block-body writes into `pipelined` blocks |
120
+ | Filters | type-specific (`since:`, `matching:`, …) | forwarded to underlying `each` |
121
+
122
+ So `each_record` is a thin orchestration layer: it leans on the type's own `each` for read pagination, then layers (1) batched record hydration and (2) optional write pipelining on top.
123
+
124
+ ## Choosing a `pipeline` mode
125
+
126
+ `each_record` has two dispatch modes, controlled by `pipeline:`. The parameter answers a single question: **may the dispatch loop wrap your block in a `pipelined { }`?**
127
+
128
+ | Value | Dispatch | Use when the block… |
129
+ |---|---|---|
130
+ | `nil` (default) | Each record runs in its own connection context, no pipeline wrapper | …reads, OR calls `save` / `commit_fields` / `transaction` / anything with its own internal MULTI |
131
+ | positive integer | Groups of `pipeline` records run inside `record_class.pipelined { ... }` | …only issues fast writers (`record.field!`) that tolerate being queued |
132
+
133
+ Note: `pipeline: 0` raises `ArgumentError`. Use `pipeline: nil` to disable pipelining.
134
+
135
+ The read-only case and the serial-write case collapse into the same mode because both require **immediate** execution with real return values. Wrapping `save` in an outer `pipelined` would either return `Redis::Future` objects or raise `ConflictingContextError` when `save`'s internal transaction tries to open.
136
+
137
+ ### The three idiomatic patterns
138
+
139
+ ```ruby
140
+ # 1. Read-only iteration — the default (pipeline: nil) is correct
141
+ User.instances.each_record do |user|
142
+ puts "#{user.email} #{user.last_login}"
143
+ end
144
+
145
+ # 2. Serial writes — the default (pipeline: nil) is required for save / commit_fields / transaction
146
+ User.instances.each_record do |user|
147
+ user.score = recompute(user)
148
+ user.save
149
+ end
150
+
151
+ # 3. Pipelined fast writers — opt-in optimization
152
+ User.instances.each_record(pipeline: 50) do |user|
153
+ user.last_seen_at! Familia.now # single HSET, safe to queue in pipeline
154
+ end
155
+ ```
156
+
157
+ ### Pipelining footgun
158
+
159
+ If you enable pipelining and your block reads from a related collection (e.g. `user.sessions.size`), that read is queued into the pipeline and returns a `Redis::Future` rather than a value. Omit the `pipeline:` parameter (or explicitly pass `pipeline: nil`) whenever the block needs real return values from Redis.