RubyGems - familia - Versions diffs - 2.8.0 → 2.9.0 - Mend

familia 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

checksums.yaml +4 -4
data/CHANGELOG.rst +61 -0
data/Gemfile.lock +2 -2
data/docs/migrating/v2.9.0.md +125 -0
data/familia.gemspec +1 -1
data/lib/familia/batch_result.rb +158 -0
data/lib/familia/data_type/collection_base.rb +132 -0
data/lib/familia/data_type/scalar_base.rb +33 -0
data/lib/familia/data_type/types/hashkey.rb +37 -0
data/lib/familia/data_type/types/json_stringkey.rb +2 -0
data/lib/familia/data_type/types/listkey.rb +34 -13
data/lib/familia/data_type/types/sorted_set.rb +68 -18
data/lib/familia/data_type/types/stringkey.rb +2 -0
data/lib/familia/data_type/types/unsorted_set.rb +35 -13
data/lib/familia/data_type.rb +2 -1
data/lib/familia/multi_result.rb +111 -0
data/lib/familia/version.rb +1 -1
data/lib/familia.rb +2 -1
data/try/edge_cases/fast_writer_transaction_guard_try.rb +4 -4
data/try/edge_cases/iterator_connection_errors_try.rb +97 -0
data/try/edge_cases/pipeline_handler_edge_cases_try.rb +1 -1
data/try/edge_cases/ttl_side_effects_try.rb +1 -1
data/try/features/atomic_write_coverage_try.rb +1 -1
data/try/features/atomic_write_try.rb +3 -3
data/try/features/atomicity_try.rb +2 -2
data/try/features/dirty_tracking_try.rb +21 -21
data/try/features/instance_registry_try.rb +2 -2
data/try/integration/connection/operation_mode_guards_try.rb +3 -3
data/try/integration/connection/pipeline_fallback_integration_try.rb +4 -4
data/try/integration/connection/pipeline_handler_integration_try.rb +3 -3
data/try/integration/connection/pipeline_horreum_routing_try.rb +4 -4
data/try/integration/connection/pools_try.rb +1 -1
data/try/integration/connection/transaction_fallback_integration_try.rb +4 -4
data/try/integration/connection/transaction_mode_permissive_try.rb +8 -8
data/try/integration/connection/transaction_mode_strict_try.rb +2 -2
data/try/integration/connection/transaction_mode_warn_try.rb +5 -5
data/try/integration/connection/transaction_modes_try.rb +14 -14
data/try/integration/data_types/datatype_pipelines_try.rb +9 -9
data/try/integration/data_types/datatype_transactions_try.rb +17 -17
data/try/integration/database_consistency_try.rb +1 -1
data/try/integration/models/familia_object_try.rb +1 -1
data/try/integration/transaction_safety_core_try.rb +1 -1
data/try/integration/transaction_safety_workflow_try.rb +2 -2
data/try/support/prototypes/atomic_saves_v2_connection_switching.rb +1 -1
data/try/support/prototypes/lib/atomic_saves_v2_connection_switching_helpers.rb +1 -1
data/try/support/prototypes/pooling/lib/connection_pool_stress_test.rb +1 -1
data/try/unit/batch_result_try.rb +348 -0
data/try/unit/data_types/each_record_try.rb +298 -0
data/try/unit/data_types/enumerable_consistency/concurrent_modification_try.rb +176 -0
data/try/unit/data_types/enumerable_consistency/hashkey_consistency_try.rb +224 -0
data/try/unit/data_types/enumerable_consistency/large_scale_consistency_try.rb +292 -0
data/try/unit/data_types/enumerable_consistency/listkey_consistency_try.rb +230 -0
data/try/unit/data_types/enumerable_consistency/sorted_set_consistency_try.rb +241 -0
data/try/unit/data_types/enumerable_consistency/unsorted_set_consistency_try.rb +261 -0
data/try/unit/data_types/enumerable_try.rb +228 -0
data/try/unit/data_types/hashkey_each_try.rb +213 -0
data/try/unit/data_types/listkey_each_try.rb +222 -0
data/try/unit/data_types/sorted_set_each_try.rb +227 -0
data/try/unit/data_types/unsorted_set_each_try.rb +185 -0
data/try/unit/horreum/base_try.rb +1 -1
data/try/unit/horreum/destroy_related_fields_cleanup_try.rb +1 -1
data/try/unit/horreum/initialization_try.rb +1 -1
data/try/unit/horreum/json_type_preservation_try.rb +3 -3
data/try/unit/horreum/multi_field_update_try.rb +143 -0
data/try/unit/horreum/serialization_try.rb +14 -14
metadata +23 -4
data/lib/multi_result.rb +0 -109

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c6cb1ccd59d4290c1d75b70e114945180fc5dbb03aaeb405d841c250cd504179
-  data.tar.gz: c2bd7946e7e024c8322d0bf48bea39c22cd61c7690589007f9ab32e6f12e614d
+  metadata.gz: 42bbbbcb737ab4222505955e5b1af2ab72ed962ba80a02cf324206b4f2379b94
+  data.tar.gz: 331b1d0bb0808618a87d962e1b09af61a31d59b7b8d56b0f49513cb9f3fec63d
 SHA512:
-  metadata.gz: 54d3c3020c53fe01de9baf6af78fbcc1ffbea72a671b6e6269c08c9c5eaab786bb267514aa64212ff8ec854e0363cfb1c1de9b9c66674e57cb067ed419a8944b
-  data.tar.gz: d8b8ecd85ed1273c64ae75cab7559dee0ddb92fe5d552b60690c3a66ee9df93b3829aa32deef5438677ad9fb8d9acebb0bdbf071f3bccdb63161c4a772b1c021
+  metadata.gz: 9ff40710ce23c2f3dadbfbf68142800b8f10590c898f30f424819a4f0efea9aa545c52307c487c6fd96bf0c0f95f7504e5614056a30039f75f9db84fe1fcd595
+  data.tar.gz: be6c618b3fab3eb5ac8577c8a4bd7fbbbc53ce300da750baf3f64d70807a0aeb2a3a7a7f28da91d7637ebe86d9f2ccc86b677478ed90ab3ae7878d6d8816cd09

data/CHANGELOG.rst CHANGED Viewed

@@ -7,6 +7,67 @@ The format is based on `Keep a Changelog <https://keepachangelog.com/en/1.1.0/>`
    <!--scriv-insert-here-->
+.. _changelog-2.9.0:
+2.9.0 — 2026-05-17
+==================
+Added
+-----
+- Batch iteration primitives for DataTypes via ``Enumerable`` integration:
+  - All DataTypes (``SortedSet``, ``HashKey``, ``UnsortedSet``, ``ListKey``) now
+    ``include Enumerable``, providing ``each_slice``, ``lazy``, ``map``, ``reduce``,
+    ``find``, and other stdlib methods.
+  - **SortedSet#each(since:, until:)**: Cursor-based iteration with optional
+    timestamp bounds. Uses ZRANGEBYSCORE when bounds provided (inclusive),
+    ZSCAN otherwise. Accepts Time objects or numeric scores.
+  - **HashKey#each(matching:)**: Cursor-based iteration via HSCAN with optional
+    glob pattern filter on field names.
+  - **UnsortedSet#each(matching:)**: Cursor-based iteration via SSCAN with optional
+    glob pattern filter using Redis SSCAN MATCH on raw values.
+  - **ListKey#each(batch_size:)**: Memory-efficient LRANGE pagination for large lists.
+- ``DataType#each_record(batch_size:, write_size:, **filters)`` yields loaded
+  Horreum records (not raw IDs) via ``load_multi``. Ghost instances (expired keys
+  still in ``instances``) are automatically filtered. The ``write_size:`` parameter
+  controls pipelining depth (``nil`` for serial execution).
+- ``Familia::BatchResult`` value type for aggregating batch operation results:
+  - ``BatchResult.collect(enumerable, strict: false) { |record| ... }`` iterates
+    any Enumerable, tracking ``scanned``, ``modified`` (truthy returns), ``errors``
+    (array of ``{id:, error:}``), and ``duration_ms``.
+  - Per-record exception isolation: errors are captured and iteration continues.
+  - ``strict: true`` re-raises collected errors after iteration completes.
+Changed
+-------
+- Renamed batch field-update methods for clarity:
+  - ``batch_update`` is now ``multi_field_update``
+  - ``batch_fast_write`` is now ``multi_field_fast_write``
+  Old names removed without deprecation shim (breaking change).
+- Moved ``MultiResult`` into Familia namespace as ``Familia::MultiResult``.
+  Old top-level constant removed without backwards-compat alias (breaking change).
+AI Assistance
+-------------
+- Implementation and test coverage developed with parallel Claude Code agents:
+  one for production code (DataType iteration, BatchResult, renames), one for
+  Tryouts test suite (228 new tests across 8 files). PR #264.
 .. _changelog-2.8.0:
 2.8.0 — 2026-05-15

data/Gemfile.lock CHANGED Viewed

@@ -1,14 +1,14 @@
 PATH
   remote: .
   specs:
-    familia (2.8.0)
+    familia (2.9.0)
       concurrent-ruby (~> 1.3)
       connection_pool (>= 2.4, < 4.0)
       csv (~> 3.3)
       json_schemer (~> 2.0)
       logger (~> 1.7)
       oj (~> 3.16)
-      redis (>= 4.8.1, < 6.0)
+      redis (>= 5.0, < 6.0)
       stringio (~> 3.1.1)
       uri-valkey (~> 1.4)

data/docs/migrating/v2.9.0.md ADDED Viewed

@@ -0,0 +1,125 @@
+# Migrating to Familia 2.9.0
+This version introduces batch iteration primitives for DataTypes, enabling efficient enumeration over large Redis collections. It also includes breaking changes to method names for clarity.
+## Breaking Changes
+### Method Renames
+The multi-field update methods have been renamed to better reflect their purpose:
+```ruby
+# Before (2.8.x)
+user.batch_update(name: "Alice", email: "alice@example.com")
+user.batch_fast_write(name: "Alice", email: "alice@example.com")
+# After (2.9.0)
+user.multi_field_update(name: "Alice", email: "alice@example.com")
+user.multi_field_fast_write(name: "Alice", email: "alice@example.com")
+```
+**Migration**: Find and replace `batch_update` with `multi_field_update` and `batch_fast_write` with `multi_field_fast_write`.
+### MultiResult Namespace
+`MultiResult` has moved into the Familia namespace:
+```ruby
+# Before (2.8.x)
+result.is_a?(MultiResult)
+# After (2.9.0)
+result.is_a?(Familia::MultiResult)
+```
+**Migration**: Replace bare `MultiResult` references with `Familia::MultiResult`.
+## New Features
+### Enumerable Integration
+All collection DataTypes now include Ruby's `Enumerable` module, providing `each_slice`, `lazy`, `map`, `reduce`, `find`, and other stdlib methods:
+```ruby
+# Lazy iteration with transformation
+Org.instances.lazy.map { |id| id.upcase }.take(10).to_a
+# Batch processing with each_slice
+User.instances.each_slice(100) do |batch|
+  batch.each { |id| process(id) }
+end
+```
+### Filtered Iteration
+Each DataType now supports type-specific filters on `each`:
+```ruby
+# SortedSet: filter by score (timestamp) bounds
+Org.instances.each(since: 24.hours.ago, until: Time.now) do |id|
+  puts id
+end
+# HashKey: filter by field name pattern
+user.profile.each(matching: "pref_*") do |field, value|
+  puts "#{field}: #{value}"
+end
+# UnsortedSet: filter by member pattern
+user.tags.each(matching: "admin*") do |tag|
+  puts tag
+end
+```
+### each_record for Loading Horreum Instances
+`each_record` yields fully-loaded Horreum records instead of raw IDs:
+```ruby
+# Load records in batches of 100
+Org.instances.each_record(batch_size: 100) do |org|
+  org.tidy!  # org is a loaded Horreum instance
+end
+# Control pipelining depth separately from fetch batch size
+Org.instances.each_record(batch_size: 500, write_size: 50) do |org|
+  org.status!("active")
+end
+# Serial execution (no pipelining)
+Org.instances.each_record(batch_size: 100, write_size: nil) do |org|
+  org.complex_operation
+end
+```
+Ghost instances (keys that expired but remain in the `instances` sorted set) are automatically filtered and never reach the block.
+### BatchResult for Aggregated Operations
+`Familia::BatchResult` aggregates results from batch operations with per-record error isolation:
+```ruby
+result = Familia::BatchResult.collect(
+  Org.instances.each_record(batch_size: 100, since: 24.hours.ago)
+) do |org|
+  org.tidy!
+end
+result.scanned     # Total records yielded to block
+result.modified    # Count of truthy block returns
+result.errors      # Array of {id:, error:} for failed records
+result.duration_ms # Total execution time
+# Re-raise errors after completion
+result = Familia::BatchResult.collect(enum, strict: true) { |r| r.process! }
+```
+## Concurrent Mutation Behavior
+When iterating with `each` or `each_record`, be aware of Redis cursor semantics:
+- Items present from iteration start to end are guaranteed to be returned
+- Items added or removed mid-iteration may or may not appear
+- Blocks should be idempotent to handle potential duplicates
+This is inherent to ZSCAN/HSCAN/SSCAN and is documented, not a bug.

data/familia.gemspec CHANGED Viewed

@@ -25,7 +25,7 @@ Gem::Specification.new do |spec|
   spec.add_dependency 'json_schemer', '~> 2.0'
   spec.add_dependency 'logger', '~> 1.7'
   spec.add_dependency 'oj', '~> 3.16'
-  spec.add_dependency 'redis', '>= 4.8.1', '< 6.0'
+  spec.add_dependency 'redis', '>= 5.0', '< 6.0'
   spec.add_dependency 'stringio', '~> 3.1.1'
   spec.add_dependency 'uri-valkey', '~> 1.4'

data/lib/familia/batch_result.rb ADDED Viewed

@@ -0,0 +1,158 @@
+# lib/familia/batch_result.rb
+#
+# frozen_string_literal: true
+module Familia
+  # Represents the result of a batch iteration operation.
+  #
+  # BatchResult tracks statistics and errors when processing multiple records
+  # via methods like `each_record`. It provides aggregated metrics for the
+  # entire batch run, distinct from MultiResult which wraps a single
+  # MULTI/EXEC or pipeline operation.
+  #
+  # @attr_reader scanned [Integer] Total number of items iterated
+  # @attr_reader modified [Integer] Count of items where block returned truthy
+  # @attr_reader errors [Array<Hash>] Per-item errors as [{id:, error:}, ...]
+  # @attr_reader duration_ms [Float] Total elapsed time in milliseconds
+  #
+  # @example Using BatchResult.collect
+  #   result = BatchResult.collect(User.instances) do |user|
+  #     user.deactivate!
+  #   end
+  #   puts "Processed #{result.scanned}, modified #{result.modified}"
+  #   puts "Errors: #{result.errors.size}" if result.errors?
+  #
+  # @example With strict mode
+  #   # Re-raises first error after completing iteration
+  #   BatchResult.collect(items, strict: true) { |item| item.process! }
+  #
+  class BatchResult
+    attr_reader :scanned, :modified, :errors, :duration_ms
+    # Creates a new BatchResult instance.
+    #
+    # @param scanned [Integer] Total items processed
+    # @param modified [Integer] Items where block returned truthy
+    # @param errors [Array<Hash>] Array of error hashes with :id and :error keys
+    # @param duration_ms [Float] Elapsed time in milliseconds
+    def initialize(scanned:, modified:, errors:, duration_ms:)
+      @scanned = scanned
+      @modified = modified
+      @errors = errors
+      @duration_ms = duration_ms
+    end
+    # Iterates over an enumerable, collecting statistics and errors.
+    #
+    # This is the primary factory method for creating BatchResult instances.
+    # It tracks how many items were processed, how many returned truthy values,
+    # and captures any exceptions that occur during iteration.
+    #
+    # @param enumerable [Enumerable] The collection to iterate
+    # @param strict [Boolean] When true, re-raises the first captured error
+    #   after iteration completes. Default: false.
+    # @yield [item] Each item from the enumerable
+    # @yieldreturn [Object] Truthy return values increment the modified count
+    # @return [BatchResult] Aggregated result of the batch operation
+    #
+    # @example Basic usage
+    #   result = BatchResult.collect(records) { |r| r.update!(status: 'done') }
+    #
+    # @example Strict mode re-raises errors
+    #   begin
+    #     BatchResult.collect(records, strict: true) { |r| r.validate! }
+    #   rescue => e
+    #     puts "Batch failed: #{e.message}"
+    #   end
+    #
+    def self.collect(enumerable, strict: false)
+      scanned = 0
+      modified = 0
+      errors = []
+      start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+      enumerable.each do |*args|
+        scanned += 1
+        begin
+          result = yield(*args)
+          modified += 1 if result
+        rescue StandardError => e
+          # Extract identifier if possible
+          identifier = extract_identifier(args.length == 1 ? args[0] : args)
+          errors << { id: identifier, error: e }
+        end
+      end
+      duration_ms = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000
+      batch_result = new(
+        scanned: scanned,
+        modified: modified,
+        errors: errors,
+        duration_ms: duration_ms
+      )
+      # In strict mode, re-raise the first error after completing iteration
+      raise errors.first[:error] if strict && errors.any?
+      batch_result
+    end
+    # Checks if any errors occurred during the batch.
+    #
+    # @return [Boolean] true if at least one error was captured
+    def errors?
+      !errors.empty?
+    end
+    # Checks if the batch completed without errors.
+    #
+    # @return [Boolean] true if no errors occurred
+    def successful?
+      errors.empty?
+    end
+    alias success? successful?
+    # Returns the count of items that were scanned but not modified.
+    #
+    # @return [Integer] Number of items where block returned falsy
+    def skipped
+      scanned - modified - errors.size
+    end
+    # Returns a hash representation of the result.
+    #
+    # @return [Hash] Result data including all metrics
+    def to_h
+      {
+        scanned: scanned,
+        modified: modified,
+        skipped: skipped,
+        errors: errors.size,
+        duration_ms: duration_ms.round(2),
+        successful: successful?
+      }
+    end
+    # Returns a human-readable summary.
+    #
+    # @return [String] Summary of the batch operation
+    def to_s
+      "BatchResult: scanned=#{scanned} modified=#{modified} errors=#{errors.size} duration=#{duration_ms.round(2)}ms"
+    end
+    # @private
+    def self.extract_identifier(item)
+      if item.respond_to?(:identifier)
+        item.identifier
+      elsif item.respond_to?(:id)
+        item.id
+      else
+        item.to_s[0, 50]
+      end
+    rescue StandardError
+      nil
+    end
+    private_class_method :extract_identifier
+  end
+end

data/lib/familia/data_type/collection_base.rb ADDED Viewed

@@ -0,0 +1,132 @@
+# frozen_string_literal: true
+module Familia
+  class DataType
+    # CollectionBase - Base module for iterable DataType classes
+    #
+    # Collection types represent multi-value structures in Redis (LIST, SET,
+    # ZSET, HASH). They include Enumerable and provide batch iteration via
+    # each_record for reference collections.
+    #
+    # Each collection type must implement its own `each` method that:
+    # - Yields elements to the block when given
+    # - Returns an Enumerator when no block given
+    #
+    # @example Collection types
+    #   ListKey     - Redis LIST
+    #   UnsortedSet - Redis SET
+    #   SortedSet   - Redis ZSET
+    #   HashKey     - Redis HASH
+    #
+    module CollectionBase
+      def self.included(base)
+        base.include(Enumerable)
+        base.extend(ClassMethods)
+      end
+      module ClassMethods
+        def collection_type?
+          # Check ancestors to handle inheritance
+          ancestors.include?(Familia::DataType::CollectionBase)
+        end
+      end
+      def collection_type?
+        self.class.collection_type?
+      end
+      # Iterates over identifiers, loading each as a Horreum record.
+      #
+      # This method is designed for DataTypes that store object identifiers
+      # (typically with `reference: true`). It loads records in batches using
+      # the parent class's `load_multi` method and yields each loaded record.
+      #
+      # Ghost identifiers (where the underlying key has expired) are silently
+      # filtered out.
+      #
+      # @param batch_size [Integer] Number of identifiers to load per batch
+      # @param write_size [Integer, nil] Controls pipelining depth for writes
+      #   in the block. When nil or 0, writes are serial (no pipelining).
+      #   When positive, fast writers in the block will be pipelined in
+      #   groups of this size.
+      # @param filters [Hash] Additional filter parameters passed to `each`.
+      #   Available filters depend on the collection type:
+      #   - SortedSet: `since:`, `until:`, `cursor_batch_size:`
+      #   - UnsortedSet/HashKey: `matching:`, `cursor_batch_size:`
+      #   - ListKey: `cursor_batch_size:` only
+      #   Passing unsupported filters raises ArgumentError.
+      # @yield [record] Each loaded Horreum record (non-nil)
+      # @return [Enumerator, self] Returns Enumerator if no block given, self otherwise
+      #
+      # @example Iterate over all records
+      #   User.instances.each_record { |user| user.deactivate! }
+      #
+      # @example With time filter (for SortedSet)
+      #   User.instances.each_record(since: 1.day.ago) { |u| notify(u) }
+      #
+      # @example Pipeline writes in groups
+      #   items.each_record(batch_size: 500, write_size: 50) { |r| r.foo! 'bar' }
+      #
+      # @example Serial writes (no pipelining)
+      #   items.each_record(write_size: nil) { |r| r.save }
+      #
+      def each_record(batch_size: 100, write_size: batch_size, **filters, &block)
+        return to_enum(:each_record, batch_size: batch_size, write_size: write_size, **filters) unless block
+        # Determine the class to load records from
+        # For reference DataTypes, @opts[:class] holds the Horreum class
+        record_class = @opts[:class]
+        unless record_class&.respond_to?(:load_multi)
+          raise Familia::Problem, "each_record requires a reference DataType with a :class option that responds to load_multi"
+        end
+        # Validate write_size constraints
+        if write_size && write_size > batch_size
+          raise ArgumentError, "write_size (#{write_size}) cannot exceed batch_size (#{batch_size})"
+        end
+        # Collect identifiers in batches
+        buffer = []
+        process_batch = lambda do |ids|
+          return if ids.empty?
+          # Load records using the class's load_multi (pipelined HGETALLs)
+          records = record_class.load_multi(ids)
+          # Filter out ghosts (nil results from expired keys)
+          live_records = records.compact
+          if write_size.nil? || write_size.zero?
+            # Serial mode - no pipelining, execute block for each record directly
+            live_records.each { |record| block.call(record) }
+          else
+            # Pipelined mode - group records and wrap each group in a pipeline
+            live_records.each_slice(write_size) do |group|
+              record_class.pipelined do
+                group.each { |record| block.call(record) }
+              end
+            end
+          end
+        end
+        # Iterate using the type's each method with any filters
+        each(**filters) do |member|
+          # HashKey yields [field, value] pairs; extract field as identifier
+          identifier = member.is_a?(Array) ? member.first : member
+          buffer << identifier
+          if buffer.size >= batch_size
+            process_batch.call(buffer)
+            buffer.clear
+          end
+        end
+        # Process remaining items
+        process_batch.call(buffer) unless buffer.empty?
+        self
+      end
+    end
+  end
+end

data/lib/familia/data_type/scalar_base.rb ADDED Viewed

@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+module Familia
+  class DataType
+    # ScalarBase - Base module for non-iterable DataType classes
+    #
+    # Scalar types represent single values in Redis (STRING, counters, locks).
+    # They do not include Enumerable because iteration over a single value
+    # is not semantically meaningful.
+    #
+    # @example Scalar types
+    #   StringKey  - Redis STRING
+    #   Counter    - Redis STRING with INCR/DECR
+    #   Lock       - Redis STRING with SETNX semantics
+    #
+    module ScalarBase
+      def self.included(base)
+        base.extend(ClassMethods)
+      end
+      module ClassMethods
+        def scalar_type?
+          # Check ancestors to handle inheritance (Counter < StringKey)
+          ancestors.include?(Familia::DataType::ScalarBase)
+        end
+      end
+      def scalar_type?
+        self.class.scalar_type?
+      end
+    end
+  end
+end

data/lib/familia/data_type/types/hashkey.rb CHANGED Viewed

@@ -4,6 +4,8 @@
 module Familia
   class HashKey < DataType
+    include DataType::CollectionBase
     # Returns the number of fields in the hash
     # @return [Integer] number of fields
     def field_count
@@ -141,6 +143,41 @@ module Familia
       deserialize_values(*elements)
     end
+    # Iterates over field-value pairs in the hash.
+    #
+    # Uses HSCAN for memory-efficient iteration. Optionally filters by field
+    # name pattern using Redis MATCH.
+    #
+    # @param matching [String, nil] Optional glob-style pattern to filter field
+    #   names (e.g., "user:*", "*_count"). Pattern is passed to Redis HSCAN MATCH
+    #   and matches against field names (plain strings, not JSON-encoded).
+    # @param batch_size [Integer] Number of elements to fetch per HSCAN iteration
+    # @yield [field, value] Each field-value pair (values are deserialized)
+    # @return [Enumerator, self] Returns Enumerator if no block given, self otherwise
+    #
+    # @example Iterate all pairs
+    #   settings.each { |field, value| puts "#{field}: #{value}" }
+    #
+    # @example Filter by field name pattern
+    #   settings.each(matching: "cache_*") { |f, v| puts "#{f}: #{v}" }
+    #
+    # @note Pattern matches field names only (plain strings). To filter on
+    #   values, use Enumerable#select instead.
+    #
+    def each(matching: nil, batch_size: 100, &block)
+      return to_enum(:each, matching: matching, batch_size: batch_size) unless block
+      cursor = 0
+      loop do
+        new_cursor, pairs = scan(cursor, match: matching, count: batch_size)
+        pairs.each(&block)
+        cursor = new_cursor
+        break if cursor.zero?
+      end
+      self
+    end
     # Incrementally iterates over fields in the hash using cursor-based iteration.
     # This is more memory-efficient than `hgetall` for large hashes.
     #

data/lib/familia/data_type/types/json_stringkey.rb CHANGED Viewed

@@ -41,6 +41,8 @@ module Familia
   #   puts val.to_f
   #
   class JsonStringKey < DataType
+    include DataType::ScalarBase
     # Initialization hook (required by DataType contract)
     def init; end

data/lib/familia/data_type/types/listkey.rb CHANGED Viewed

@@ -4,6 +4,8 @@
 module Familia
   class ListKey < DataType
+    include DataType::CollectionBase
     # Returns the number of elements in the list
     # @return [Integer] number of elements
     def element_count
@@ -132,12 +134,39 @@ module Familia
       rangeraw 0, count
     end
-    def each(&)
-      range.each(&)
-    end
+    # Iterates over elements of the list.
+    #
+    # Uses LRANGE pagination for memory-efficient iteration over large lists.
+    # Unlike sets, Redis lists do not support SCAN, so we paginate through
+    # the list using index ranges.
+    #
+    # @param batch_size [Integer] Number of elements to fetch per LRANGE call
+    # @yield [element] Each deserialized element
+    # @return [Enumerator, self] Returns Enumerator if no block given, self otherwise
+    #
+    # @example Iterate all elements
+    #   history.each { |event| process(event) }
+    #
+    # @example Use as Enumerator
+    #   history.each.with_index { |event, idx| puts "#{idx}: #{event}" }
+    #
+    def each(batch_size: 100, &block)
+      return to_enum(:each, batch_size: batch_size) unless block
+      offset = 0
+      loop do
+        # LRANGE is inclusive on both ends, so end_idx = offset + batch_size - 1
+        elements = range(offset, offset + batch_size - 1)
+        break if elements.empty?
+        elements.each(&block)
+        offset += elements.size
+        # If we got fewer than batch_size, we've reached the end
+        break if elements.size < batch_size
+      end
-    def each_with_index(&)
-      range.each_with_index(&)
+      self
     end
     def eachraw(&)
@@ -148,14 +177,6 @@ module Familia
       rangeraw.each_with_index(&)
     end
-    def collect(&)
-      range.collect(&)
-    end
-    def select(&)
-      range.select(&)
-    end
     def collectraw(&)
       rangeraw.collect(&)
     end