familia 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.rst +61 -0
  3. data/Gemfile.lock +2 -2
  4. data/docs/migrating/v2.9.0.md +125 -0
  5. data/familia.gemspec +1 -1
  6. data/lib/familia/batch_result.rb +158 -0
  7. data/lib/familia/data_type/collection_base.rb +132 -0
  8. data/lib/familia/data_type/scalar_base.rb +33 -0
  9. data/lib/familia/data_type/types/hashkey.rb +37 -0
  10. data/lib/familia/data_type/types/json_stringkey.rb +2 -0
  11. data/lib/familia/data_type/types/listkey.rb +34 -13
  12. data/lib/familia/data_type/types/sorted_set.rb +68 -18
  13. data/lib/familia/data_type/types/stringkey.rb +2 -0
  14. data/lib/familia/data_type/types/unsorted_set.rb +35 -13
  15. data/lib/familia/data_type.rb +2 -1
  16. data/lib/familia/multi_result.rb +111 -0
  17. data/lib/familia/version.rb +1 -1
  18. data/lib/familia.rb +2 -1
  19. data/try/edge_cases/fast_writer_transaction_guard_try.rb +4 -4
  20. data/try/edge_cases/iterator_connection_errors_try.rb +97 -0
  21. data/try/edge_cases/pipeline_handler_edge_cases_try.rb +1 -1
  22. data/try/edge_cases/ttl_side_effects_try.rb +1 -1
  23. data/try/features/atomic_write_coverage_try.rb +1 -1
  24. data/try/features/atomic_write_try.rb +3 -3
  25. data/try/features/atomicity_try.rb +2 -2
  26. data/try/features/dirty_tracking_try.rb +21 -21
  27. data/try/features/instance_registry_try.rb +2 -2
  28. data/try/integration/connection/operation_mode_guards_try.rb +3 -3
  29. data/try/integration/connection/pipeline_fallback_integration_try.rb +4 -4
  30. data/try/integration/connection/pipeline_handler_integration_try.rb +3 -3
  31. data/try/integration/connection/pipeline_horreum_routing_try.rb +4 -4
  32. data/try/integration/connection/pools_try.rb +1 -1
  33. data/try/integration/connection/transaction_fallback_integration_try.rb +4 -4
  34. data/try/integration/connection/transaction_mode_permissive_try.rb +8 -8
  35. data/try/integration/connection/transaction_mode_strict_try.rb +2 -2
  36. data/try/integration/connection/transaction_mode_warn_try.rb +5 -5
  37. data/try/integration/connection/transaction_modes_try.rb +14 -14
  38. data/try/integration/data_types/datatype_pipelines_try.rb +9 -9
  39. data/try/integration/data_types/datatype_transactions_try.rb +17 -17
  40. data/try/integration/database_consistency_try.rb +1 -1
  41. data/try/integration/models/familia_object_try.rb +1 -1
  42. data/try/integration/transaction_safety_core_try.rb +1 -1
  43. data/try/integration/transaction_safety_workflow_try.rb +2 -2
  44. data/try/support/prototypes/atomic_saves_v2_connection_switching.rb +1 -1
  45. data/try/support/prototypes/lib/atomic_saves_v2_connection_switching_helpers.rb +1 -1
  46. data/try/support/prototypes/pooling/lib/connection_pool_stress_test.rb +1 -1
  47. data/try/unit/batch_result_try.rb +348 -0
  48. data/try/unit/data_types/each_record_try.rb +298 -0
  49. data/try/unit/data_types/enumerable_consistency/concurrent_modification_try.rb +176 -0
  50. data/try/unit/data_types/enumerable_consistency/hashkey_consistency_try.rb +224 -0
  51. data/try/unit/data_types/enumerable_consistency/large_scale_consistency_try.rb +292 -0
  52. data/try/unit/data_types/enumerable_consistency/listkey_consistency_try.rb +230 -0
  53. data/try/unit/data_types/enumerable_consistency/sorted_set_consistency_try.rb +241 -0
  54. data/try/unit/data_types/enumerable_consistency/unsorted_set_consistency_try.rb +261 -0
  55. data/try/unit/data_types/enumerable_try.rb +228 -0
  56. data/try/unit/data_types/hashkey_each_try.rb +213 -0
  57. data/try/unit/data_types/listkey_each_try.rb +222 -0
  58. data/try/unit/data_types/sorted_set_each_try.rb +227 -0
  59. data/try/unit/data_types/unsorted_set_each_try.rb +185 -0
  60. data/try/unit/horreum/base_try.rb +1 -1
  61. data/try/unit/horreum/destroy_related_fields_cleanup_try.rb +1 -1
  62. data/try/unit/horreum/initialization_try.rb +1 -1
  63. data/try/unit/horreum/json_type_preservation_try.rb +3 -3
  64. data/try/unit/horreum/multi_field_update_try.rb +143 -0
  65. data/try/unit/horreum/serialization_try.rb +14 -14
  66. metadata +23 -4
  67. data/lib/multi_result.rb +0 -109
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c6cb1ccd59d4290c1d75b70e114945180fc5dbb03aaeb405d841c250cd504179
4
- data.tar.gz: c2bd7946e7e024c8322d0bf48bea39c22cd61c7690589007f9ab32e6f12e614d
3
+ metadata.gz: 42bbbbcb737ab4222505955e5b1af2ab72ed962ba80a02cf324206b4f2379b94
4
+ data.tar.gz: 331b1d0bb0808618a87d962e1b09af61a31d59b7b8d56b0f49513cb9f3fec63d
5
5
  SHA512:
6
- metadata.gz: 54d3c3020c53fe01de9baf6af78fbcc1ffbea72a671b6e6269c08c9c5eaab786bb267514aa64212ff8ec854e0363cfb1c1de9b9c66674e57cb067ed419a8944b
7
- data.tar.gz: d8b8ecd85ed1273c64ae75cab7559dee0ddb92fe5d552b60690c3a66ee9df93b3829aa32deef5438677ad9fb8d9acebb0bdbf071f3bccdb63161c4a772b1c021
6
+ metadata.gz: 9ff40710ce23c2f3dadbfbf68142800b8f10590c898f30f424819a4f0efea9aa545c52307c487c6fd96bf0c0f95f7504e5614056a30039f75f9db84fe1fcd595
7
+ data.tar.gz: be6c618b3fab3eb5ac8577c8a4bd7fbbbc53ce300da750baf3f64d70807a0aeb2a3a7a7f28da91d7637ebe86d9f2ccc86b677478ed90ab3ae7878d6d8816cd09
data/CHANGELOG.rst CHANGED
@@ -7,6 +7,67 @@ The format is based on `Keep a Changelog <https://keepachangelog.com/en/1.1.0/>`
7
7
 
8
8
  <!--scriv-insert-here-->
9
9
 
10
+ .. _changelog-2.9.0:
11
+
12
+ 2.9.0 — 2026-05-17
13
+ ==================
14
+
15
+ Added
16
+ -----
17
+
18
+ - Batch iteration primitives for DataTypes via ``Enumerable`` integration:
19
+
20
+ - All DataTypes (``SortedSet``, ``HashKey``, ``UnsortedSet``, ``ListKey``) now
21
+ ``include Enumerable``, providing ``each_slice``, ``lazy``, ``map``, ``reduce``,
22
+ ``find``, and other stdlib methods.
23
+
24
+ - **SortedSet#each(since:, until:)**: Cursor-based iteration with optional
25
+ timestamp bounds. Uses ZRANGEBYSCORE when bounds provided (inclusive),
26
+ ZSCAN otherwise. Accepts Time objects or numeric scores.
27
+
28
+ - **HashKey#each(matching:)**: Cursor-based iteration via HSCAN with optional
29
+ glob pattern filter on field names.
30
+
31
+ - **UnsortedSet#each(matching:)**: Cursor-based iteration via SSCAN with optional
32
+ glob pattern filter using Redis SSCAN MATCH on raw values.
33
+
34
+ - **ListKey#each(batch_size:)**: Memory-efficient LRANGE pagination for large lists.
35
+
36
+ - ``DataType#each_record(batch_size:, write_size:, **filters)`` yields loaded
37
+ Horreum records (not raw IDs) via ``load_multi``. Ghost instances (expired keys
38
+ still in ``instances``) are automatically filtered. The ``write_size:`` parameter
39
+ controls pipelining depth (``nil`` for serial execution).
40
+
41
+ - ``Familia::BatchResult`` value type for aggregating batch operation results:
42
+
43
+ - ``BatchResult.collect(enumerable, strict: false) { |record| ... }`` iterates
44
+ any Enumerable, tracking ``scanned``, ``modified`` (truthy returns), ``errors``
45
+ (array of ``{id:, error:}``), and ``duration_ms``.
46
+
47
+ - Per-record exception isolation: errors are captured and iteration continues.
48
+
49
+ - ``strict: true`` re-raises collected errors after iteration completes.
50
+
51
+ Changed
52
+ -------
53
+
54
+ - Renamed batch field-update methods for clarity:
55
+
56
+ - ``batch_update`` is now ``multi_field_update``
57
+ - ``batch_fast_write`` is now ``multi_field_fast_write``
58
+
59
+ Old names removed without deprecation shim (breaking change).
60
+
61
+ - Moved ``MultiResult`` into Familia namespace as ``Familia::MultiResult``.
62
+ Old top-level constant removed without backwards-compat alias (breaking change).
63
+
64
+ AI Assistance
65
+ -------------
66
+
67
+ - Implementation and test coverage developed with parallel Claude Code agents:
68
+ one for production code (DataType iteration, BatchResult, renames), one for
69
+ Tryouts test suite (228 new tests across 8 files). PR #264.
70
+
10
71
  .. _changelog-2.8.0:
11
72
 
12
73
  2.8.0 — 2026-05-15
data/Gemfile.lock CHANGED
@@ -1,14 +1,14 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- familia (2.8.0)
4
+ familia (2.9.0)
5
5
  concurrent-ruby (~> 1.3)
6
6
  connection_pool (>= 2.4, < 4.0)
7
7
  csv (~> 3.3)
8
8
  json_schemer (~> 2.0)
9
9
  logger (~> 1.7)
10
10
  oj (~> 3.16)
11
- redis (>= 4.8.1, < 6.0)
11
+ redis (>= 5.0, < 6.0)
12
12
  stringio (~> 3.1.1)
13
13
  uri-valkey (~> 1.4)
14
14
 
@@ -0,0 +1,125 @@
1
+ # Migrating to Familia 2.9.0
2
+
3
+ This version introduces batch iteration primitives for DataTypes, enabling efficient enumeration over large Redis collections. It also includes breaking changes to method names for clarity.
4
+
5
+ ## Breaking Changes
6
+
7
+ ### Method Renames
8
+
9
+ The multi-field update methods have been renamed to better reflect their purpose:
10
+
11
+ ```ruby
12
+ # Before (2.8.x)
13
+ user.batch_update(name: "Alice", email: "alice@example.com")
14
+ user.batch_fast_write(name: "Alice", email: "alice@example.com")
15
+
16
+ # After (2.9.0)
17
+ user.multi_field_update(name: "Alice", email: "alice@example.com")
18
+ user.multi_field_fast_write(name: "Alice", email: "alice@example.com")
19
+ ```
20
+
21
+ **Migration**: Find and replace `batch_update` with `multi_field_update` and `batch_fast_write` with `multi_field_fast_write`.
22
+
23
+ ### MultiResult Namespace
24
+
25
+ `MultiResult` has moved into the Familia namespace:
26
+
27
+ ```ruby
28
+ # Before (2.8.x)
29
+ result.is_a?(MultiResult)
30
+
31
+ # After (2.9.0)
32
+ result.is_a?(Familia::MultiResult)
33
+ ```
34
+
35
+ **Migration**: Replace bare `MultiResult` references with `Familia::MultiResult`.
36
+
37
+ ## New Features
38
+
39
+ ### Enumerable Integration
40
+
41
+ All collection DataTypes now include Ruby's `Enumerable` module, providing `each_slice`, `lazy`, `map`, `reduce`, `find`, and other stdlib methods:
42
+
43
+ ```ruby
44
+ # Lazy iteration with transformation
45
+ Org.instances.lazy.map { |id| id.upcase }.take(10).to_a
46
+
47
+ # Batch processing with each_slice
48
+ User.instances.each_slice(100) do |batch|
49
+ batch.each { |id| process(id) }
50
+ end
51
+ ```
52
+
53
+ ### Filtered Iteration
54
+
55
+ Each DataType now supports type-specific filters on `each`:
56
+
57
+ ```ruby
58
+ # SortedSet: filter by score (timestamp) bounds
59
+ Org.instances.each(since: 24.hours.ago, until: Time.now) do |id|
60
+ puts id
61
+ end
62
+
63
+ # HashKey: filter by field name pattern
64
+ user.profile.each(matching: "pref_*") do |field, value|
65
+ puts "#{field}: #{value}"
66
+ end
67
+
68
+ # UnsortedSet: filter by member pattern
69
+ user.tags.each(matching: "admin*") do |tag|
70
+ puts tag
71
+ end
72
+ ```
73
+
74
+ ### each_record for Loading Horreum Instances
75
+
76
+ `each_record` yields fully-loaded Horreum records instead of raw IDs:
77
+
78
+ ```ruby
79
+ # Load records in batches of 100
80
+ Org.instances.each_record(batch_size: 100) do |org|
81
+ org.tidy! # org is a loaded Horreum instance
82
+ end
83
+
84
+ # Control pipelining depth separately from fetch batch size
85
+ Org.instances.each_record(batch_size: 500, write_size: 50) do |org|
86
+ org.status!("active")
87
+ end
88
+
89
+ # Serial execution (no pipelining)
90
+ Org.instances.each_record(batch_size: 100, write_size: nil) do |org|
91
+ org.complex_operation
92
+ end
93
+ ```
94
+
95
+ Ghost instances (keys that expired but remain in the `instances` sorted set) are automatically filtered and never reach the block.
96
+
97
+ ### BatchResult for Aggregated Operations
98
+
99
+ `Familia::BatchResult` aggregates results from batch operations with per-record error isolation:
100
+
101
+ ```ruby
102
+ result = Familia::BatchResult.collect(
103
+ Org.instances.each_record(batch_size: 100, since: 24.hours.ago)
104
+ ) do |org|
105
+ org.tidy!
106
+ end
107
+
108
+ result.scanned # Total records yielded to block
109
+ result.modified # Count of truthy block returns
110
+ result.errors # Array of {id:, error:} for failed records
111
+ result.duration_ms # Total execution time
112
+
113
+ # Re-raise errors after completion
114
+ result = Familia::BatchResult.collect(enum, strict: true) { |r| r.process! }
115
+ ```
116
+
117
+ ## Concurrent Mutation Behavior
118
+
119
+ When iterating with `each` or `each_record`, be aware of Redis cursor semantics:
120
+
121
+ - Items present from iteration start to end are guaranteed to be returned
122
+ - Items added or removed mid-iteration may or may not appear
123
+ - Blocks should be idempotent to handle potential duplicates
124
+
125
+ This is inherent to ZSCAN/HSCAN/SSCAN and is documented, not a bug.
data/familia.gemspec CHANGED
@@ -25,7 +25,7 @@ Gem::Specification.new do |spec|
25
25
  spec.add_dependency 'json_schemer', '~> 2.0'
26
26
  spec.add_dependency 'logger', '~> 1.7'
27
27
  spec.add_dependency 'oj', '~> 3.16'
28
- spec.add_dependency 'redis', '>= 4.8.1', '< 6.0'
28
+ spec.add_dependency 'redis', '>= 5.0', '< 6.0'
29
29
  spec.add_dependency 'stringio', '~> 3.1.1'
30
30
  spec.add_dependency 'uri-valkey', '~> 1.4'
31
31
 
@@ -0,0 +1,158 @@
1
+ # lib/familia/batch_result.rb
2
+ #
3
+ # frozen_string_literal: true
4
+
5
+ module Familia
6
+ # Represents the result of a batch iteration operation.
7
+ #
8
+ # BatchResult tracks statistics and errors when processing multiple records
9
+ # via methods like `each_record`. It provides aggregated metrics for the
10
+ # entire batch run, distinct from MultiResult which wraps a single
11
+ # MULTI/EXEC or pipeline operation.
12
+ #
13
+ # @attr_reader scanned [Integer] Total number of items iterated
14
+ # @attr_reader modified [Integer] Count of items where block returned truthy
15
+ # @attr_reader errors [Array<Hash>] Per-item errors as [{id:, error:}, ...]
16
+ # @attr_reader duration_ms [Float] Total elapsed time in milliseconds
17
+ #
18
+ # @example Using BatchResult.collect
19
+ # result = BatchResult.collect(User.instances) do |user|
20
+ # user.deactivate!
21
+ # end
22
+ # puts "Processed #{result.scanned}, modified #{result.modified}"
23
+ # puts "Errors: #{result.errors.size}" if result.errors?
24
+ #
25
+ # @example With strict mode
26
+ # # Re-raises first error after completing iteration
27
+ # BatchResult.collect(items, strict: true) { |item| item.process! }
28
+ #
29
+ class BatchResult
30
+ attr_reader :scanned, :modified, :errors, :duration_ms
31
+
32
+ # Creates a new BatchResult instance.
33
+ #
34
+ # @param scanned [Integer] Total items processed
35
+ # @param modified [Integer] Items where block returned truthy
36
+ # @param errors [Array<Hash>] Array of error hashes with :id and :error keys
37
+ # @param duration_ms [Float] Elapsed time in milliseconds
38
+ def initialize(scanned:, modified:, errors:, duration_ms:)
39
+ @scanned = scanned
40
+ @modified = modified
41
+ @errors = errors
42
+ @duration_ms = duration_ms
43
+ end
44
+
45
+ # Iterates over an enumerable, collecting statistics and errors.
46
+ #
47
+ # This is the primary factory method for creating BatchResult instances.
48
+ # It tracks how many items were processed, how many returned truthy values,
49
+ # and captures any exceptions that occur during iteration.
50
+ #
51
+ # @param enumerable [Enumerable] The collection to iterate
52
+ # @param strict [Boolean] When true, re-raises the first captured error
53
+ # after iteration completes. Default: false.
54
+ # @yield [item] Each item from the enumerable
55
+ # @yieldreturn [Object] Truthy return values increment the modified count
56
+ # @return [BatchResult] Aggregated result of the batch operation
57
+ #
58
+ # @example Basic usage
59
+ # result = BatchResult.collect(records) { |r| r.update!(status: 'done') }
60
+ #
61
+ # @example Strict mode re-raises errors
62
+ # begin
63
+ # BatchResult.collect(records, strict: true) { |r| r.validate! }
64
+ # rescue => e
65
+ # puts "Batch failed: #{e.message}"
66
+ # end
67
+ #
68
+ def self.collect(enumerable, strict: false)
69
+ scanned = 0
70
+ modified = 0
71
+ errors = []
72
+ start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
73
+
74
+ enumerable.each do |*args|
75
+ scanned += 1
76
+ begin
77
+ result = yield(*args)
78
+ modified += 1 if result
79
+ rescue StandardError => e
80
+ # Extract identifier if possible
81
+ identifier = extract_identifier(args.length == 1 ? args[0] : args)
82
+ errors << { id: identifier, error: e }
83
+ end
84
+ end
85
+
86
+ duration_ms = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000
87
+
88
+ batch_result = new(
89
+ scanned: scanned,
90
+ modified: modified,
91
+ errors: errors,
92
+ duration_ms: duration_ms
93
+ )
94
+
95
+ # In strict mode, re-raise the first error after completing iteration
96
+ raise errors.first[:error] if strict && errors.any?
97
+
98
+ batch_result
99
+ end
100
+
101
+ # Checks if any errors occurred during the batch.
102
+ #
103
+ # @return [Boolean] true if at least one error was captured
104
+ def errors?
105
+ !errors.empty?
106
+ end
107
+
108
+ # Checks if the batch completed without errors.
109
+ #
110
+ # @return [Boolean] true if no errors occurred
111
+ def successful?
112
+ errors.empty?
113
+ end
114
+ alias success? successful?
115
+
116
+ # Returns the count of items that were scanned but not modified.
117
+ #
118
+ # @return [Integer] Number of items where block returned falsy
119
+ def skipped
120
+ scanned - modified - errors.size
121
+ end
122
+
123
+ # Returns a hash representation of the result.
124
+ #
125
+ # @return [Hash] Result data including all metrics
126
+ def to_h
127
+ {
128
+ scanned: scanned,
129
+ modified: modified,
130
+ skipped: skipped,
131
+ errors: errors.size,
132
+ duration_ms: duration_ms.round(2),
133
+ successful: successful?
134
+ }
135
+ end
136
+
137
+ # Returns a human-readable summary.
138
+ #
139
+ # @return [String] Summary of the batch operation
140
+ def to_s
141
+ "BatchResult: scanned=#{scanned} modified=#{modified} errors=#{errors.size} duration=#{duration_ms.round(2)}ms"
142
+ end
143
+
144
+ # @private
145
+ def self.extract_identifier(item)
146
+ if item.respond_to?(:identifier)
147
+ item.identifier
148
+ elsif item.respond_to?(:id)
149
+ item.id
150
+ else
151
+ item.to_s[0, 50]
152
+ end
153
+ rescue StandardError
154
+ nil
155
+ end
156
+ private_class_method :extract_identifier
157
+ end
158
+ end
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Familia
4
+ class DataType
5
+ # CollectionBase - Base module for iterable DataType classes
6
+ #
7
+ # Collection types represent multi-value structures in Redis (LIST, SET,
8
+ # ZSET, HASH). They include Enumerable and provide batch iteration via
9
+ # each_record for reference collections.
10
+ #
11
+ # Each collection type must implement its own `each` method that:
12
+ # - Yields elements to the block when given
13
+ # - Returns an Enumerator when no block given
14
+ #
15
+ # @example Collection types
16
+ # ListKey - Redis LIST
17
+ # UnsortedSet - Redis SET
18
+ # SortedSet - Redis ZSET
19
+ # HashKey - Redis HASH
20
+ #
21
+ module CollectionBase
22
+ def self.included(base)
23
+ base.include(Enumerable)
24
+ base.extend(ClassMethods)
25
+ end
26
+
27
+ module ClassMethods
28
+ def collection_type?
29
+ # Check ancestors to handle inheritance
30
+ ancestors.include?(Familia::DataType::CollectionBase)
31
+ end
32
+ end
33
+
34
+ def collection_type?
35
+ self.class.collection_type?
36
+ end
37
+
38
+ # Iterates over identifiers, loading each as a Horreum record.
39
+ #
40
+ # This method is designed for DataTypes that store object identifiers
41
+ # (typically with `reference: true`). It loads records in batches using
42
+ # the parent class's `load_multi` method and yields each loaded record.
43
+ #
44
+ # Ghost identifiers (where the underlying key has expired) are silently
45
+ # filtered out.
46
+ #
47
+ # @param batch_size [Integer] Number of identifiers to load per batch
48
+ # @param write_size [Integer, nil] Controls pipelining depth for writes
49
+ # in the block. When nil or 0, writes are serial (no pipelining).
50
+ # When positive, fast writers in the block will be pipelined in
51
+ # groups of this size.
52
+ # @param filters [Hash] Additional filter parameters passed to `each`.
53
+ # Available filters depend on the collection type:
54
+ # - SortedSet: `since:`, `until:`, `cursor_batch_size:`
55
+ # - UnsortedSet/HashKey: `matching:`, `cursor_batch_size:`
56
+ # - ListKey: `cursor_batch_size:` only
57
+ # Passing unsupported filters raises ArgumentError.
58
+ # @yield [record] Each loaded Horreum record (non-nil)
59
+ # @return [Enumerator, self] Returns Enumerator if no block given, self otherwise
60
+ #
61
+ # @example Iterate over all records
62
+ # User.instances.each_record { |user| user.deactivate! }
63
+ #
64
+ # @example With time filter (for SortedSet)
65
+ # User.instances.each_record(since: 1.day.ago) { |u| notify(u) }
66
+ #
67
+ # @example Pipeline writes in groups
68
+ # items.each_record(batch_size: 500, write_size: 50) { |r| r.foo! 'bar' }
69
+ #
70
+ # @example Serial writes (no pipelining)
71
+ # items.each_record(write_size: nil) { |r| r.save }
72
+ #
73
+ def each_record(batch_size: 100, write_size: batch_size, **filters, &block)
74
+ return to_enum(:each_record, batch_size: batch_size, write_size: write_size, **filters) unless block
75
+
76
+ # Determine the class to load records from
77
+ # For reference DataTypes, @opts[:class] holds the Horreum class
78
+ record_class = @opts[:class]
79
+ unless record_class&.respond_to?(:load_multi)
80
+ raise Familia::Problem, "each_record requires a reference DataType with a :class option that responds to load_multi"
81
+ end
82
+
83
+ # Validate write_size constraints
84
+ if write_size && write_size > batch_size
85
+ raise ArgumentError, "write_size (#{write_size}) cannot exceed batch_size (#{batch_size})"
86
+ end
87
+
88
+ # Collect identifiers in batches
89
+ buffer = []
90
+
91
+ process_batch = lambda do |ids|
92
+ return if ids.empty?
93
+
94
+ # Load records using the class's load_multi (pipelined HGETALLs)
95
+ records = record_class.load_multi(ids)
96
+
97
+ # Filter out ghosts (nil results from expired keys)
98
+ live_records = records.compact
99
+
100
+ if write_size.nil? || write_size.zero?
101
+ # Serial mode - no pipelining, execute block for each record directly
102
+ live_records.each { |record| block.call(record) }
103
+ else
104
+ # Pipelined mode - group records and wrap each group in a pipeline
105
+ live_records.each_slice(write_size) do |group|
106
+ record_class.pipelined do
107
+ group.each { |record| block.call(record) }
108
+ end
109
+ end
110
+ end
111
+ end
112
+
113
+ # Iterate using the type's each method with any filters
114
+ each(**filters) do |member|
115
+ # HashKey yields [field, value] pairs; extract field as identifier
116
+ identifier = member.is_a?(Array) ? member.first : member
117
+ buffer << identifier
118
+
119
+ if buffer.size >= batch_size
120
+ process_batch.call(buffer)
121
+ buffer.clear
122
+ end
123
+ end
124
+
125
+ # Process remaining items
126
+ process_batch.call(buffer) unless buffer.empty?
127
+
128
+ self
129
+ end
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Familia
4
+ class DataType
5
+ # ScalarBase - Base module for non-iterable DataType classes
6
+ #
7
+ # Scalar types represent single values in Redis (STRING, counters, locks).
8
+ # They do not include Enumerable because iteration over a single value
9
+ # is not semantically meaningful.
10
+ #
11
+ # @example Scalar types
12
+ # StringKey - Redis STRING
13
+ # Counter - Redis STRING with INCR/DECR
14
+ # Lock - Redis STRING with SETNX semantics
15
+ #
16
+ module ScalarBase
17
+ def self.included(base)
18
+ base.extend(ClassMethods)
19
+ end
20
+
21
+ module ClassMethods
22
+ def scalar_type?
23
+ # Check ancestors to handle inheritance (Counter < StringKey)
24
+ ancestors.include?(Familia::DataType::ScalarBase)
25
+ end
26
+ end
27
+
28
+ def scalar_type?
29
+ self.class.scalar_type?
30
+ end
31
+ end
32
+ end
33
+ end
@@ -4,6 +4,8 @@
4
4
 
5
5
  module Familia
6
6
  class HashKey < DataType
7
+ include DataType::CollectionBase
8
+
7
9
  # Returns the number of fields in the hash
8
10
  # @return [Integer] number of fields
9
11
  def field_count
@@ -141,6 +143,41 @@ module Familia
141
143
  deserialize_values(*elements)
142
144
  end
143
145
 
146
+ # Iterates over field-value pairs in the hash.
147
+ #
148
+ # Uses HSCAN for memory-efficient iteration. Optionally filters by field
149
+ # name pattern using Redis MATCH.
150
+ #
151
+ # @param matching [String, nil] Optional glob-style pattern to filter field
152
+ # names (e.g., "user:*", "*_count"). Pattern is passed to Redis HSCAN MATCH
153
+ # and matches against field names (plain strings, not JSON-encoded).
154
+ # @param batch_size [Integer] Number of elements to fetch per HSCAN iteration
155
+ # @yield [field, value] Each field-value pair (values are deserialized)
156
+ # @return [Enumerator, self] Returns Enumerator if no block given, self otherwise
157
+ #
158
+ # @example Iterate all pairs
159
+ # settings.each { |field, value| puts "#{field}: #{value}" }
160
+ #
161
+ # @example Filter by field name pattern
162
+ # settings.each(matching: "cache_*") { |f, v| puts "#{f}: #{v}" }
163
+ #
164
+ # @note Pattern matches field names only (plain strings). To filter on
165
+ # values, use Enumerable#select instead.
166
+ #
167
+ def each(matching: nil, batch_size: 100, &block)
168
+ return to_enum(:each, matching: matching, batch_size: batch_size) unless block
169
+
170
+ cursor = 0
171
+ loop do
172
+ new_cursor, pairs = scan(cursor, match: matching, count: batch_size)
173
+ pairs.each(&block)
174
+ cursor = new_cursor
175
+ break if cursor.zero?
176
+ end
177
+
178
+ self
179
+ end
180
+
144
181
  # Incrementally iterates over fields in the hash using cursor-based iteration.
145
182
  # This is more memory-efficient than `hgetall` for large hashes.
146
183
  #
@@ -41,6 +41,8 @@ module Familia
41
41
  # puts val.to_f
42
42
  #
43
43
  class JsonStringKey < DataType
44
+ include DataType::ScalarBase
45
+
44
46
  # Initialization hook (required by DataType contract)
45
47
  def init; end
46
48
 
@@ -4,6 +4,8 @@
4
4
 
5
5
  module Familia
6
6
  class ListKey < DataType
7
+ include DataType::CollectionBase
8
+
7
9
  # Returns the number of elements in the list
8
10
  # @return [Integer] number of elements
9
11
  def element_count
@@ -132,12 +134,39 @@ module Familia
132
134
  rangeraw 0, count
133
135
  end
134
136
 
135
- def each(&)
136
- range.each(&)
137
- end
137
+ # Iterates over elements of the list.
138
+ #
139
+ # Uses LRANGE pagination for memory-efficient iteration over large lists.
140
+ # Unlike sets, Redis lists do not support SCAN, so we paginate through
141
+ # the list using index ranges.
142
+ #
143
+ # @param batch_size [Integer] Number of elements to fetch per LRANGE call
144
+ # @yield [element] Each deserialized element
145
+ # @return [Enumerator, self] Returns Enumerator if no block given, self otherwise
146
+ #
147
+ # @example Iterate all elements
148
+ # history.each { |event| process(event) }
149
+ #
150
+ # @example Use as Enumerator
151
+ # history.each.with_index { |event, idx| puts "#{idx}: #{event}" }
152
+ #
153
+ def each(batch_size: 100, &block)
154
+ return to_enum(:each, batch_size: batch_size) unless block
155
+
156
+ offset = 0
157
+ loop do
158
+ # LRANGE is inclusive on both ends, so end_idx = offset + batch_size - 1
159
+ elements = range(offset, offset + batch_size - 1)
160
+ break if elements.empty?
161
+
162
+ elements.each(&block)
163
+ offset += elements.size
164
+
165
+ # If we got fewer than batch_size, we've reached the end
166
+ break if elements.size < batch_size
167
+ end
138
168
 
139
- def each_with_index(&)
140
- range.each_with_index(&)
169
+ self
141
170
  end
142
171
 
143
172
  def eachraw(&)
@@ -148,14 +177,6 @@ module Familia
148
177
  rangeraw.each_with_index(&)
149
178
  end
150
179
 
151
- def collect(&)
152
- range.collect(&)
153
- end
154
-
155
- def select(&)
156
- range.select(&)
157
- end
158
-
159
180
  def collectraw(&)
160
181
  rangeraw.collect(&)
161
182
  end