RubyGems - familia - Versions diffs - 2.0.0 → 2.1.0 - Mend

familia 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +4 -4
data/CHANGELOG.rst +45 -0
data/Gemfile +2 -0
data/Gemfile.lock +11 -1
data/docs/guides/writing-migrations.md +345 -0
data/examples/migrations/v1_to_v2_serialization_migration.rb +374 -0
data/examples/schemas/customer.json +33 -0
data/examples/schemas/session.json +27 -0
data/familia.gemspec +2 -0
data/lib/familia/data_type/types/hashkey.rb +0 -238
data/lib/familia/data_type/types/listkey.rb +4 -110
data/lib/familia/data_type/types/sorted_set.rb +0 -365
data/lib/familia/data_type/types/stringkey.rb +0 -139
data/lib/familia/data_type/types/unsorted_set.rb +2 -122
data/lib/familia/features/schema_validation.rb +139 -0
data/lib/familia/migration/base.rb +447 -0
data/lib/familia/migration/errors.rb +31 -0
data/lib/familia/migration/model.rb +418 -0
data/lib/familia/migration/pipeline.rb +226 -0
data/lib/familia/migration/rake_tasks.rake +3 -0
data/lib/familia/migration/rake_tasks.rb +160 -0
data/lib/familia/migration/registry.rb +364 -0
data/lib/familia/migration/runner.rb +311 -0
data/lib/familia/migration/script.rb +234 -0
data/lib/familia/migration.rb +43 -0
data/lib/familia/schema_registry.rb +173 -0
data/lib/familia/settings.rb +63 -1
data/lib/familia/version.rb +1 -1
data/lib/familia.rb +1 -0
data/try/features/schema_registry_try.rb +193 -0
data/try/features/schema_validation_feature_try.rb +218 -0
data/try/migration/base_try.rb +226 -0
data/try/migration/errors_try.rb +67 -0
data/try/migration/integration_try.rb +451 -0
data/try/migration/model_try.rb +431 -0
data/try/migration/pipeline_try.rb +460 -0
data/try/migration/rake_tasks_try.rb +61 -0
data/try/migration/registry_try.rb +199 -0
data/try/migration/runner_try.rb +311 -0
data/try/migration/schema_validation_try.rb +201 -0
data/try/migration/script_try.rb +192 -0
data/try/migration/v1_to_v2_serialization_try.rb +513 -0
data/try/performance/benchmarks_try.rb +11 -12
metadata +44 -1

data/lib/familia/migration/model.rb ADDED Viewed

@@ -0,0 +1,418 @@
+# lib/familia/migration/model.rb
+#
+# frozen_string_literal: true
+require_relative 'base'
+module Familia
+  module Migration
+    # Base class for individual record migrations on Familia::Horreum models
+    #
+    # Provides Redis SCAN-based iteration with progress tracking, error handling,
+    # and dry-run/actual-run modes for processing records one at a time.
+    #
+    # ## When to Use Model vs Pipeline
+    #
+    # Use **Model** when:
+    # - Complex logic is needed for each record
+    # - Error handling per record is important
+    # - Records need individual validation
+    # - Updates vary significantly between records
+    #
+    # Use **Pipeline** when:
+    # - Simple bulk updates across many records
+    # - Performance is critical for large datasets
+    # - All records get similar field updates
+    # - Redis pipelining can be utilized effectively
+    #
+    # ## Subclassing Requirements
+    #
+    # Subclasses must implement:
+    # - {#prepare} - Set @model_class and optionally @batch_size
+    # - {#process_record} - Handle individual record processing
+    #
+    # Subclasses may override:
+    # - {#migration_needed?} - Default returns true (always migrate)
+    # - {#load_from_key} - Custom object loading from database keys
+    #
+    # ## Usage Example
+    #
+    #   class CustomerEmailMigration < Familia::Migration::Model
+    #     def prepare
+    #       @model_class = Customer
+    #       @batch_size = 1000  # optional, defaults to config
+    #     end
+    #
+    #     def process_record(obj, key)
+    #       return unless obj.email.blank?
+    #
+    #       for_realsies_this_time? do
+    #         obj.email = "#{obj.custid}@example.com"
+    #         obj.save
+    #       end
+    #       track_stat(:emails_updated)
+    #     end
+    #   end
+    #
+    # ## Development Rule
+    #
+    # **IMPORTANT**: Deploy schema changes and logic changes separately.
+    # This prevents new model logic from breaking migration logic and
+    # reduces debugging complexity.
+    #
+    # @abstract Subclass and implement {#prepare} and {#process_record}
+    # @see Pipeline For bulk processing with Redis pipelining
+    class Model < Base
+      # Model class being migrated
+      # @return [Class] Familia::Horreum subclass
+      attr_reader :model_class
+      # Number of keys to scan per Redis SCAN operation
+      # @return [Integer] batch size for scanning
+      attr_reader :batch_size
+      # Total number of indexed records in the model
+      # @return [Integer] count from model_class.instances
+      attr_reader :total_records
+      # Number of keys found by Redis SCAN
+      # @return [Integer] actual keys discovered
+      attr_reader :total_scanned
+      # Records that passed through process_record
+      # @return [Integer] count of records needing updates
+      attr_reader :records_needing_update
+      # Records successfully updated
+      # @return [Integer] count of records modified
+      attr_reader :records_updated
+      # Number of processing errors encountered
+      # @return [Integer] error count
+      attr_reader :error_count
+      # Interactive debugging mode flag
+      # @return [Boolean] whether to drop into pry on errors
+      attr_reader :interactive
+      # Redis SCAN pattern for finding records
+      # @return [String] pattern like "customer:*:object"
+      attr_reader :scan_pattern
+      def initialize(options = {})
+        super
+        reset_counters
+        set_defaults
+      end
+      # Main migration entry point
+      #
+      # Validates configuration, displays run mode information,
+      # executes the SCAN-based record processing, and displays
+      # a comprehensive summary.
+      #
+      # @return [Boolean] true if no errors occurred
+      def migrate
+        validate_model_class!
+        # Set `@interactive = true` in the implementing migration class
+        # for an interactive debug session on a per-record basis.
+        require 'pry-byebug' if interactive
+        print_database_details
+        run_mode_banner
+        info("[#{self.class.name.split('::').last}] Starting #{model_class.name} migration")
+        info("Processing up to #{total_records} records")
+        info('Will show progress every 100 records and log each update')
+        scan_and_process_records
+        print_database_details
+        print_migration_summary
+        @error_count == 0
+      end
+      # Default migration check - always returns true
+      #
+      # Always return true to allow re-running for error recovery.
+      # The migration should be idempotent - it won't overwrite existing values.
+      # Override if you need conditional migration logic.
+      #
+      # @return [Boolean] true to proceed with migration
+      def migration_needed?
+        debug("[#{self.class.name.split('::').last}] Checking if migration is needed...")
+        true
+      end
+      # Load Familia::Horreum object instance from database key
+      #
+      # Override this method to customize loading behavior. For example,
+      # with a custom @scan_pattern, the migration might loop through
+      # relation keys of a horreum model (e.g. customer:ID:custom_domain).
+      #
+      # Typically migrations iterate over objects themselves, but this
+      # won't work if there are dangling "orphan" keys without corresponding
+      # objects. Override this method to handle such cases.
+      #
+      # @param key [String] database key to load from
+      # @return [Familia::Horreum, Familia::DataType] loaded object instance
+      def load_from_key(key)
+        model_class.find_by_key(key)
+      end
+      protected
+      # Set @model_class and optionally @batch_size
+      #
+      # **Required for subclasses** - must set @model_class to a
+      # Familia::Horreum subclass. Can optionally set @batch_size
+      # to override the default.
+      #
+      # @abstract Subclasses must implement this method
+      # @return [void]
+      # @raise [NotImplementedError] if not implemented
+      def prepare
+        raise NotImplementedError, "#{self.class} must set @model_class in #prepare"
+      end
+      # Process a single record
+      #
+      # **Required for subclasses** - implement the core logic for
+      # processing each record. Use {#track_stat} to count operations
+      # and {#for_realsies_this_time?} to wrap actual changes.
+      #
+      # @abstract Subclasses must implement this method
+      # @param obj [Familia::Horreum, Familia::DataType] The familia class instance to process
+      # @param key [String] The dbkey of the record
+      # @return [void]
+      # @raise [NotImplementedError] if not implemented
+      def process_record(obj, key)
+        raise NotImplementedError, "#{self.class} must implement #process_record"
+      end
+      # Track statistics and auto-increment records_updated counter
+      #
+      # Automatically increments @records_updated when statname is :records_updated.
+      # Use this to maintain consistent counting across migrations.
+      #
+      # @param statname [Symbol] The name of the statistic to track
+      # @param increment [Integer] The amount to increment by
+      # @return [void]
+      def track_stat(statname, increment = 1)
+        super
+        @records_updated += increment if statname == :records_updated
+      end
+      # Track stat and log decision reason in one call
+      #
+      # Convenience method for logging migration decisions with consistent
+      # formatting and automatic statistic tracking.
+      #
+      # @param obj [Familia::Horreum] object being processed
+      # @param decision [String] decision made (e.g., 'skipped', 'updated')
+      # @param field [String] field name involved in decision
+      # @return [nil]
+      def track_stat_and_log_reason(obj, decision, field)
+        track_stat(:decision)
+        track_stat("#{decision}_#{field}")
+        info("#{decision} objid=#{obj.respond_to?(:objid) ? obj.objid : 'N/A'} #{field}=#{obj.send(field)}")
+        nil
+      end
+      # === Schema Validation Hooks ===
+      # Override in subclass to enable pre-transform validation
+      #
+      # When enabled, validates each record against its schema before
+      # {#process_record} is called. Validation failures are tracked
+      # via the :schema_errors_before stat.
+      #
+      # @return [Boolean] true to validate before transform
+      def validate_before_transform?
+        false
+      end
+      # Override in subclass to enable post-transform validation
+      #
+      # When enabled, validates each record against its schema after
+      # {#process_record} completes. Validation failures are tracked
+      # via the :schema_errors_after stat.
+      #
+      # @return [Boolean] true to validate after transform
+      def validate_after_transform?
+        false
+      end
+      # Wrapper that applies validation hooks around process_record
+      #
+      # Called internally by {#process_single_record} when validation
+      # is enabled. Validates the object before and/or after the transform
+      # based on {#validate_before_transform?} and {#validate_after_transform?}.
+      #
+      # @param obj [Familia::Horreum] the object to process
+      # @param key [String] the database key of the record
+      # @return [void]
+      def process_record_with_validation(obj, key)
+        if validate_before_transform?
+          result = validate_schema(obj, context: 'before transform')
+          track_stat(:schema_errors_before) unless result[:valid]
+        end
+        process_record(obj, key)
+        if validate_after_transform?
+          result = validate_schema(obj, context: 'after transform')
+          track_stat(:schema_errors_after) unless result[:valid]
+        end
+      end
+      private
+      def reset_counters
+        @total_scanned          = 0
+        @records_needing_update = 0
+        @records_updated        = 0
+        @error_count            = 0
+      end
+      def set_defaults
+        @batch_size   = Familia::Migration.config.batch_size
+        @model_class  = nil
+        @scan_pattern = nil
+        @interactive  = false
+        @total_records = 0
+      end
+      def validate_model_class!
+        unless @model_class
+          raise Errors::PreconditionFailed, 'Model class not set. Define @model_class in your #prepare method'
+        end
+        unless familia_horreum_class?
+          raise Errors::PreconditionFailed, "Model class must be a Familia::Horreum subclass #{@model_class}"
+        end
+        @total_records  = @model_class.respond_to?(:instances) ? @model_class.instances.size : 0
+        @dbclient     ||= @model_class.respond_to?(:dbclient) ? @model_class.dbclient : Familia.dbclient
+        @scan_pattern ||= "#{@model_class.prefix}:*:object"
+        nil
+      end
+      def familia_horreum_class?
+        @model_class.ancestors.include?(Familia::Horreum) ||
+          (@model_class.respond_to?(:ancestors) && @model_class < Familia::Base)
+      rescue StandardError
+        false
+      end
+      def scan_and_process_records
+        cursor = '0'
+        loop do
+          cursor, keys    = dbclient.scan(cursor, match: @scan_pattern, count: @batch_size)
+          @total_scanned += keys.size
+          show_progress if should_show_progress?
+          info("Processing batch of #{keys.size} keys...") unless keys.empty?
+          keys.each { |key| process_single_record(key) }
+          break if cursor == '0'
+        end
+      end
+      def should_show_progress?
+        @total_scanned <= 500 || @total_scanned % 100 == 0
+      end
+      def show_progress
+        progress(@total_scanned, @total_records, "Scanning #{model_class.name.split('::').last} records")
+      end
+      def process_single_record(key)
+        obj = load_from_key(key)
+        # Every record that gets processed is considered as needing update. The
+        # idempotent operations in process_record determine whether changes are
+        # actually made.
+        @records_needing_update += 1
+        # Call the subclass implementation, with optional schema validation
+        if validate_before_transform? || validate_after_transform?
+          process_record_with_validation(obj, key)
+        else
+          process_record(obj, key)
+        end
+      rescue StandardError => ex
+        handle_record_error(key, ex)
+      end
+      def handle_record_error(key, ex)
+        @error_count += 1
+        error("Error processing #{key}: #{ex.message}")
+        debug("Stack trace: #{ex.backtrace.first(10).join('; ')}")
+        track_stat(:errors)
+        binding.pry if interactive # rubocop:disable Lint/Debugger
+      end
+      def print_migration_summary
+        print_summary do
+          info("Redis SCAN found: #{@total_scanned} #{model_class} records")
+          info("Passed migration filter: #{@records_needing_update} records")
+          info("#{actual_run? ? 'Processed' : 'Would be processed'}: #{@records_updated} records")
+          info("Errors: #{@error_count}")
+          print_custom_stats
+          print_error_guidance
+          print_dry_run_guidance
+        end
+      end
+      def print_custom_stats
+        return unless @stats.any?
+        info('')
+        info('Additional statistics:')
+        @stats.each do |key, value|
+          next if [:errors, :records_updated].include?(key)
+          info("  #{key}: #{value}")
+        end
+      end
+      def print_error_guidance
+        return unless @error_count > 0
+        info('')
+        info('Check logs for error details')
+      end
+      def print_dry_run_guidance
+        return unless dry_run? && @records_needing_update > 0
+        info ''
+        info 'Run with --run to apply these updates'
+      end
+      def print_database_details
+        print_summary('Redis Details') do
+          info("Model class: #{@model_class.name}")
+          info("Redis connection: #{dbclient.respond_to?(:id) ? dbclient.id : dbclient.class}")
+          info("Scan pattern: #{@scan_pattern}")
+          info("Indexed records: #{@total_records} (#{@model_class.name}.instances)")
+          info("Batch size: #{@batch_size}")
+          verify_database_connection
+        end
+      end
+      def verify_database_connection
+        dbclient.ping
+        debug('Redis connection: verified')
+      rescue StandardError => ex
+        error("Cannot connect to the database: #{ex.message}")
+        raise ex
+      end
+    end
+  end
+end

data/lib/familia/migration/pipeline.rb ADDED Viewed

@@ -0,0 +1,226 @@
+# lib/familia/migration/pipeline.rb
+#
+# frozen_string_literal: true
+require_relative 'model'
+module Familia
+  module Migration
+    # Pipeline-based migration for batch Redis operations with improved performance
+    #
+    # Inherits all Model functionality but processes records in batches
+    # using Redis pipelining instead of individual operations. This provides
+    # significant performance improvements for large datasets with simple updates.
+    #
+    # ## When to Use Pipeline vs Model
+    #
+    # Use **Pipeline** when:
+    # - Processing thousands+ records with simple field updates
+    # - All records get similar field modifications
+    # - Performance is more important than per-record error handling
+    # - Updates can be expressed as Hash field assignments
+    #
+    # Use **Model** when:
+    # - Complex logic needed per record
+    # - Individual error handling is important
+    # - Records need different processing logic
+    # - Updates involve method calls beyond simple field assignment
+    #
+    # ## Subclassing Requirements
+    #
+    # Subclasses must implement:
+    # - {#prepare} - Set @model_class and @batch_size (inherited)
+    # - {#should_process?} - Return true/false for each record
+    # - {#build_update_fields} - Return Hash of field updates
+    #
+    # Subclasses may override:
+    # - {#execute_update} - Customize the pipeline update operation
+    #
+    # ## Usage Example
+    #
+    #   class CustomerObjidMigration < Familia::Migration::Pipeline
+    #     def prepare
+    #       @model_class = Customer
+    #       @batch_size = 100  # Smaller batches for pipelines
+    #     end
+    #
+    #     def should_process?(obj)
+    #       return track_stat(:skipped_empty_custid) && false if obj.custid.empty?
+    #       true
+    #     end
+    #
+    #     def build_update_fields(obj)
+    #       {
+    #         objid: obj.objid || SecureRandom.uuid_v7_from(obj.created),
+    #         user_type: 'authenticated'
+    #       }
+    #     end
+    #   end
+    #
+    # ## Performance Notes
+    #
+    # - Use smaller batch sizes (50-200) compared to Model
+    # - Pipeline operations are atomic per batch, not per record
+    # - Error handling is less granular than Model
+    #
+    # @abstract Subclass and implement {#should_process?} and {#build_update_fields}
+    # @see Model For individual record processing
+    class Pipeline < Model
+      # Main batch processor - executes Redis operations in pipeline
+      #
+      # Processes an array of objects using Redis pipelining for improved
+      # performance. Each object is checked via {#should_process?} and
+      # updated via {#execute_update} if processing is needed.
+      #
+      # @param objects [Array<Array>] Array of tuples: [obj, original_dbkey]
+      #   The original database key is preserved because records with missing/empty
+      #   identifier fields cannot reconstitute their database key via obj.dbkey.
+      #   Only the original key from SCAN guarantees we can operate on the record.
+      # @return [void]
+      def process_batch(objects)
+        dbclient.pipelined do |pipe|
+          objects.each do |obj, original_key|
+            next unless should_process?(obj)
+            fields = build_update_fields(obj)
+            # Previously we skipped here when the migration returned no fields
+            # to update. We're not always here to update though. Sometimes we
+            # delete or update expirations or do other stuff. If we skip ahead
+            # here, we never get to the execute_update method which migrations
+            # can override to do whatever they want.
+            #
+            # Now, we simply return inside the default execute_update. The end
+            # result is the same but it gives us the opportunity to perform
+            # additional operations on the record.
+            execute_update(pipe, obj, fields, original_key)
+            track_stat(:records_updated)
+          end
+        end
+      end
+      # Override scanning to collect batches instead of individual processing
+      private
+      def scan_and_process_records
+        cursor        = '0'
+        batch_objects = []
+        loop do
+          cursor, keys    = dbclient.scan(cursor, match: @scan_pattern, count: @batch_size)
+          @total_scanned += keys.size
+          # Progress reporting
+          if @total_scanned <= 500 || @total_scanned % 100 == 0
+            progress(@total_scanned, @total_records, "Scanning #{model_class.name.split('::').last} records")
+          end
+          # Collect objects for batch processing
+          keys.each do |key|
+            obj                      = load_from_key(key)
+            @records_needing_update += 1
+            batch_objects << [obj, key]
+            # Process when batch is full
+            if batch_objects.size >= @batch_size
+              process_batch_safely(batch_objects)
+              batch_objects.clear
+            end
+          end
+          break if cursor == '0'
+        end
+        # Process remaining objects
+        process_batch_safely(batch_objects) if batch_objects.any?
+      end
+      def process_batch_safely(objects)
+        return if objects.empty?
+        info("Processing batch of #{objects.size} objects...")
+        for_realsies_this_time? do
+          process_batch(objects)
+        end
+      rescue StandardError => ex
+        @error_count += objects.size
+        error("Error processing batch of #{objects.size}: #{ex.message}")
+        debug("Stack trace: #{ex.backtrace.first(10).join('; ')}")
+        objects.each { track_stat(:errors) }
+      end
+      protected
+      # Determine if object should be processed in this batch
+      #
+      # **Required for subclasses** - implement filtering logic to determine
+      # which records should be included in the pipeline update. Use
+      # {#track_stat} to count skipped records.
+      #
+      # @abstract Subclasses must implement this method
+      # @param obj [Familia::Horreum] The model instance to evaluate
+      # @return [Boolean] true to process, false to skip
+      # @raise [NotImplementedError] if not implemented
+      def should_process?(obj)
+        raise NotImplementedError, "#{self.class} must implement #should_process?"
+      end
+      # Build fields hash for Redis HMSET operation
+      #
+      # **Required for subclasses** - return a hash of field names to values
+      # that will be applied via Redis HMSET in the pipeline. Return an empty
+      # hash or nil to skip the default HMSET operation.
+      #
+      # @abstract Subclasses must implement this method
+      # @param obj [Familia::Horreum] The model instance to update
+      # @return [Hash] field_name => value pairs for Redis HMSET
+      # @raise [NotImplementedError] if not implemented
+      def build_update_fields(obj)
+        raise NotImplementedError, "#{self.class} must implement #build_update_fields"
+      end
+      # Execute pipeline update operation
+      #
+      # Override this method to customize pipeline operations beyond simple
+      # HMSET field updates. The default implementation handles HMSET with
+      # dry-run support.
+      #
+      # **Important**: Use the provided `pipe` parameter, not the regular
+      # Redis connection, to ensure operations are pipelined.
+      #
+      # NOTE: The `track_stat(:records_updated)` method should not be called here
+      # (or anywhere else in a pipeline migration actually) as it is called by the
+      # pipeline migration framework itself.
+      #
+      # @param pipe [Redis::Pipeline] Redis pipeline instance
+      # @param obj [Familia::Horreum] object being updated
+      # @param fields [Hash] field updates from {#build_update_fields}
+      # @param original_key [String] original database key from SCAN
+      # @return [void]
+      def execute_update(pipe, obj, fields, original_key = nil)
+        klass_name = obj.class.name.split('::').last
+        unless fields&.any?
+          return debug("Would skip #{klass_name} b/c empty fields (#{original_key})")
+        end
+        # Use original_key for records that can't generate valid keys
+        dbkey = original_key || obj.dbkey
+        # USE THE PIPELINE AND NOT THE regular redis connection.
+        pipe.hmset(dbkey, *fields.flatten)
+        dry_run_only? do
+          debug("Would update #{klass_name}: #{fields}")
+        end
+      end
+      # Not used in Pipeline - batch processing instead
+      def process_record(obj, key)
+        # No-op: Pipeline uses batch processing
+      end
+    end
+  end
+end

data/lib/familia/migration/rake_tasks.rake ADDED Viewed

@@ -0,0 +1,3 @@
+# frozen_string_literal: true
+require_relative 'rake_tasks'