RubyGems - catpm - Versions diffs - 0.9.5 → 0.9.6 - Mend

catpm 0.9.5 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/README.md +1 -1
data/app/helpers/catpm/application_helper.rb +1 -1
data/app/models/catpm/sample.rb +0 -4
data/lib/catpm/adapter/base.rb +0 -1
data/lib/catpm/buffer.rb +6 -4
data/lib/catpm/collector.rb +70 -57
data/lib/catpm/configuration.rb +21 -16
data/lib/catpm/event.rb +2 -3
data/lib/catpm/fingerprint.rb +1 -1
data/lib/catpm/flusher.rb +77 -202
data/lib/catpm/lifecycle.rb +8 -9
data/lib/catpm/middleware.rb +1 -2
data/lib/catpm/request_segments.rb +2 -74
data/lib/catpm/trace.rb +1 -14
data/lib/catpm/version.rb +1 -1
data/lib/catpm.rb +1 -0
data/lib/generators/catpm/templates/initializer.rb.tt +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 18b84e4c767fa3872bbf196d21c3d197e6d4f6517269357e7b24bf0514da3708
-  data.tar.gz: 6cc282d9f10f13546939f8e9c39b12b784687ef4c0d25e42a2a7e34a39cd3124
+  metadata.gz: 3731353688fdebaef1f9cb164731daba00a0d69df65da03dc309f6f7901e2708
+  data.tar.gz: 4ef7e26d0b721c8fea556f797d74c58ef2b0df8f9257edead653393fcb991229
 SHA512:
-  metadata.gz: 1968d1e8c7ed1257d2f0bfc3e28f9e34b4e38057ec89e103c10f02bd42d99daa78cf7ba0a9fc31ff5c07a8dc9e0895d9f6c541c557345469a20e2c09da88dd9c
-  data.tar.gz: d9e5c1b664605c6e66c92e9e6520a1c606f979f10e5e0deb8883307b965f4b4c3d361ef2dcf7ddfe3fb840d99e107c4c666a1ef4487165543f5904285f2a5a07
+  metadata.gz: df633940cf6beba3252b6915c45d22688ff23ef3200edf17622e971ce65191ad85af476bfdd853a2bee449da6d28df81431c48e663001c80b14242ba407d3a7b
+  data.tar.gz: 473011238fdf84d011bf9d3c0ceed1d47b7f1a3be5e6e9d640f6f66b62273c2a396c425166b09ceb5b0b8bf63a993e51651d99d373c48b1058ead4ede2451266

data/README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 gem build catpm.gemspec
-gem push catpm-0.9.4.gem
+gem push catpm-0.9.5.gem
 # Catpm

data/app/helpers/catpm/application_helper.rb CHANGED Viewed

@@ -94,7 +94,7 @@ module Catpm
       events_max_samples_per_name:      { group: 'Events', label: 'Max Samples / Name', desc: 'Event samples retained per event name', fmt: :nullable_int },
       # ── Buffer & Flush ──
-      max_memory_per_thread:            { group: 'Buffer & Flush', label: 'Memory per Thread', desc: 'Memory budget per application thread (split between request segments and event buffer)', fmt: :bytes },
+      max_memory:                       { group: 'Buffer & Flush', label: 'Max Memory (MB)', desc: 'Global memory budget for catpm gem in megabytes', fmt: :int },
       flush_interval:                   { group: 'Buffer & Flush', label: 'Flush Interval', desc: 'How often the background thread drains the buffer to the database', fmt: :seconds },
       flush_jitter:                     { group: 'Buffer & Flush', label: 'Flush Jitter', desc: 'Random jitter added to flush interval to avoid thundering herd', fmt: :pm_seconds },
       persistence_batch_size:           { group: 'Buffer & Flush', label: 'Batch Size', desc: 'Number of events written per database transaction', fmt: :int },

data/app/models/catpm/sample.rb CHANGED Viewed

@@ -14,10 +14,6 @@ module Catpm
     scope :recent, ->(period = 1.hour) { where(recorded_at: period.ago..) }
     scope :for_error, ->(fingerprint) { where(error_fingerprint: fingerprint) }
-    def self.request_id_supported?
-      column_names.include?('request_id')
-    end
     def parsed_context
       case context
       when Hash then context

data/lib/catpm/adapter/base.rb CHANGED Viewed

@@ -38,7 +38,6 @@ module Catpm
                 context: sample_data[:context],
                 error_fingerprint: sample_data[:error_fingerprint]
               }
-              record[:request_id] = sample_data[:request_id] if Catpm::Sample.request_id_supported?
               record
             end

data/lib/catpm/buffer.rb CHANGED Viewed

@@ -22,17 +22,19 @@ module Catpm
     # Called from request threads. Returns :accepted or :dropped.
     # Never blocks — monitoring must not slow down the application.
     #
+    OVERFLOW_FACTOR = 1.5 # hard cap multiplier — drops events beyond this to prevent OOM
     # When buffer reaches max_bytes, signals the flusher for immediate drain
-    # and continues accepting events. Only drops as a last resort at 3x capacity
-    # (flusher stuck or DB down).
+    # and continues accepting events. Only drops as a last resort at OVERFLOW_FACTOR
+    # capacity (flusher stuck or DB down).
     def push(event)
       signal_flush = false
       @monitor.synchronize do
         bytes = event.estimated_bytes
-        # Hard safety cap: 3x configured limit prevents OOM if flusher is stuck
-        if @current_bytes + bytes > @max_bytes * 3
+        # Hard safety cap: prevents OOM if flusher is stuck
+        if @current_bytes + bytes > @max_bytes * OVERFLOW_FACTOR
           @dropped_count += 1
           Catpm.stats[:dropped_events] += 1
           return :dropped

data/lib/catpm/collector.rb CHANGED Viewed

@@ -10,6 +10,7 @@ module Catpm
     # are slow. Without this cap, apps with 30% slow requests would see ~23%
     # instrumentation instead of the configured 1/random_sample_rate.
     MAX_FORCE_INSTRUMENT_COUNT = 3
+    FORCE_INSTRUMENT_MAX_ENDPOINTS = 100 # cap per-endpoint force-instrument hash
     class << self
       def process_action_controller(event)
@@ -63,16 +64,20 @@ module Catpm
         )
         # Force the NEXT HTTP request to be fully instrumented when this one
-        # wasn't. Covers slow/error spikes and filling phase (new endpoints that
-        # haven't collected enough instrumented samples yet).
+        # wasn't instrumented and was slow/error.
+        # Filling phase is handled by @http_filling_active flag in
+        # should_instrument_request? — no need for force_instrument here.
         if !instrumented
           if payload[:exception] || duration >= Catpm.config.slow_threshold_for(:http)
             trigger_force_instrument
-          else
+          elsif !@http_filling_active
+            # Detect new/underfilled endpoints that appeared after filling phase ended
             max = Catpm.config.max_random_samples_per_endpoint
             if max
               endpoint_key = ['http', target, operation]
-              trigger_force_instrument if instrumented_sample_counts[endpoint_key] < max
+              if instrumented_sample_counts[endpoint_key] < max
+                @http_filling_active = true
+              end
             end
           end
         end
@@ -372,8 +377,6 @@ module Catpm
           context = nil
         end
-        request_id = req_segments&.request_id
         ev = Event.new(
           kind: kind,
           target: target,
@@ -386,56 +389,7 @@ module Catpm
           metadata: metadata,
           error_class: error&.class&.name,
           error_message: error&.message,
-          backtrace: error&.backtrace,
-          request_id: request_id
-        )
-        Catpm.buffer&.push(ev)
-      end
-      def process_checkpoint(kind:, target:, operation:, context:, metadata:, checkpoint_data:, request_start:, request_id: nil)
-        return unless Catpm.enabled?
-        segments = checkpoint_data[:segments].dup
-        collapse_code_wrappers(segments)
-        duration_so_far = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - request_start) * 1000.0
-        # Inject root request segment
-        root_segment = {
-          type: 'request',
-          detail: "#{operation.presence || kind} #{target}",
-          duration: duration_so_far.round(2),
-          offset: 0.0
-        }
-        segments.each do |seg|
-          if seg.key?(:parent_index)
-            seg[:parent_index] += 1
-          else
-            seg[:parent_index] = 0
-          end
-        end
-        segments.unshift(root_segment)
-        checkpoint_context = (context || {}).dup
-        checkpoint_context[:segments] = segments
-        checkpoint_context[:segment_summary] = checkpoint_data[:summary]
-        checkpoint_context[:segments_capped] = checkpoint_data[:overflow]
-        checkpoint_context[:partial] = true
-        checkpoint_context[:checkpoint_number] = checkpoint_data[:checkpoint_number]
-        checkpoint_context = scrub(checkpoint_context)
-        ev = Event.new(
-          kind: kind,
-          target: target,
-          operation: operation.to_s,
-          duration: duration_so_far,
-          started_at: Time.current,
-          status: DEFAULT_SUCCESS_STATUS,
-          context: checkpoint_context,
-          sample_type: 'random',
-          metadata: (metadata || {}).dup.merge(checkpoint_data[:summary] || {}),
-          request_id: request_id
+          backtrace: error&.backtrace
         )
         Catpm.buffer&.push(ev)
@@ -463,6 +417,15 @@ module Catpm
       # --- Pre-sampling: decide BEFORE request whether to instrument ---
+      # Eagerly load sample counts at startup so old endpoints don't
+      # re-enter filling phase on every process restart.
+      # Called from Lifecycle.register_hooks after flusher init.
+      def load_sample_counts_eagerly!
+        @instrumented_sample_counts = load_sample_counts_from_db
+        @instrumented_sample_counts_loaded = true
+        recompute_http_filling_active
+      end
       # For HTTP middleware where endpoint is unknown at start.
       # Returns true if this request should get full instrumentation.
       def should_instrument_request?
@@ -472,6 +435,12 @@ module Catpm
           return true
         end
+        # During filling phase, instrument all requests so underfilled
+        # endpoints collect their quota (max_random_samples_per_endpoint).
+        # The flag is set by load_sample_counts_eagerly! and maintained
+        # by early_sample_type as endpoints fill up.
+        return true if @http_filling_active
         rand(Catpm.config.random_sample_rate) == 0
       end
@@ -505,7 +474,9 @@ module Catpm
       def trigger_force_instrument(kind: nil, target: nil, operation: nil)
         if kind && target
           endpoint_key = [kind.to_s, target.to_s, (operation || '').to_s]
-          force_instrument_endpoints[endpoint_key] = true
+          if force_instrument_endpoints.size < FORCE_INSTRUMENT_MAX_ENDPOINTS
+            force_instrument_endpoints[endpoint_key] = true
+          end
         else
           @force_instrument_count = [(@force_instrument_count || 0) + 1, MAX_FORCE_INSTRUMENT_COUNT].min
         end
@@ -516,10 +487,42 @@ module Catpm
         @instrumented_sample_counts_loaded = false
         @force_instrument_endpoints = nil
         @force_instrument_count = nil
+        @http_filling_active = false
       end
       private
+      # Recompute whether any HTTP endpoint is still below its sample quota.
+      # Called after loading counts from DB and when an endpoint exits filling.
+      def recompute_http_filling_active
+        max = Catpm.config.max_random_samples_per_endpoint
+        @http_filling_active = if max
+          # True if hash is empty (new app / new endpoints may appear) or any endpoint below quota
+          instrumented_sample_counts.empty? || instrumented_sample_counts.any? { |_, c| c < max }
+        else
+          false # unlimited quota → no filling phase for HTTP middleware
+        end
+      end
+      # Evict half the entries from instrumented_sample_counts.
+      # Prefers evicting filled entries (count >= max) to avoid
+      # re-triggering filling phase for those endpoints.
+      def evict_sample_counts(max_random)
+        evict_count = instrumented_sample_counts.size / 2
+        if max_random
+          filled_keys = []
+          unfilled_keys = []
+          instrumented_sample_counts.each do |k, c|
+            (c >= max_random ? filled_keys : unfilled_keys) << k
+          end
+          # Evict filled first (safe), then unfilled if needed
+          to_evict = (filled_keys + unfilled_keys).first(evict_count)
+          to_evict.each { |k| instrumented_sample_counts.delete(k) }
+        else
+          evict_count.times { instrumented_sample_counts.shift }
+        end
+      end
       def force_instrument_endpoints
         @force_instrument_endpoints ||= {}
       end
@@ -644,7 +647,17 @@ module Catpm
         count = instrumented_sample_counts[endpoint_key]
         max_random = Catpm.config.max_random_samples_per_endpoint
         if max_random.nil? || count < max_random
+          # Evict when hash exceeds derived limit — prefer filled entries
+          max_entries = Catpm.config.effective_sample_counts_max
+          if instrumented_sample_counts.size >= max_entries
+            evict_sample_counts(max_random)
+          end
           instrumented_sample_counts[endpoint_key] = count + 1
+          # Endpoint just reached quota — recheck if any filling endpoints remain
+          if max_random && count + 1 >= max_random
+            recompute_http_filling_active
+          end
         end
         return 'slow' if is_slow

data/lib/catpm/configuration.rb CHANGED Viewed

@@ -2,12 +2,15 @@
 module Catpm
   class Configuration
-    # Memory budget shares — how max_memory_per_thread is split
-    BUFFER_MEMORY_SHARE = 0.5   # half per-thread budget goes to buffer pool
-    REQUEST_MEMORY_SHARE = 0.5  # half per-thread budget for request segments
-    MIN_REQUEST_MEMORY = 1_024     # 1 KB — floor for per-request (checkpoint viability, ~5 minimal segments)
     MIN_BUFFER_MEMORY = 1_048_576  # 1 MB — floor for buffer (meaningful buffering)
-    DEFAULT_ASSUMED_THREADS = 5         # fallback when thread detection fails
+    DEFAULT_ASSUMED_THREADS = 5   # fallback when thread detection fails
+    # Global memory budget distribution shares
+    BUFFER_MEMORY_SHARE = 0.5       # 50% of max_memory for event buffer
+    CACHE_ENTRIES_PER_MB = 10_000   # ~100 bytes/entry in path_cache
+    PATH_CACHE_BUDGET_SHARE = 0.05  # 5% of max_memory for path_cache
+    SAMPLE_COUNTS_PER_MB = 12_500   # ~80 bytes/entry in sample counts hash
+    SAMPLE_COUNTS_BUDGET_SHARE = 0.02 # 2% of max_memory for sample counts
     # Boolean / non-numeric settings — plain attr_accessor
     attr_accessor :enabled,
@@ -37,7 +40,7 @@ module Catpm
       random_sample_rate cleanup_interval
       circuit_breaker_failure_threshold circuit_breaker_recovery_timeout
       sqlite_busy_timeout persistence_batch_size shutdown_timeout
-      stack_sample_interval min_segment_duration
+      stack_sample_interval min_segment_duration max_memory
     ].freeze
     # Numeric settings where nil means "no limit" / "disabled"
@@ -48,7 +51,6 @@ module Catpm
       events_max_samples_per_name max_stack_samples_per_request
       max_error_detail_length max_fingerprint_app_frames
       max_fingerprint_gem_frames cleanup_batch_size caller_scan_depth
-      max_memory_per_thread
     ].freeze
     (REQUIRED_NUMERIC + OPTIONAL_NUMERIC).each do |attr|
@@ -83,7 +85,7 @@ module Catpm
       @slow_threshold_per_kind = {}
       @ignored_targets = []
       @retention_period = nil # nil = keep forever (data is downsampled, not deleted)
-      @max_memory_per_thread = 2.megabytes
+      @max_memory = 20 # MB — global memory budget (2% of 1GB server)
       @flush_interval = 30 # seconds
       @flush_jitter = 5 # ±seconds
       @max_error_contexts = 5
@@ -124,17 +126,20 @@ module Catpm
       @show_untracked_segments = false
     end
-    def derived_request_memory_limit
-      return nil unless max_memory_per_thread
-      [max_memory_per_thread * REQUEST_MEMORY_SHARE, MIN_REQUEST_MEMORY].max
+    # Buffer gets BUFFER_MEMORY_SHARE of max_memory, scaled by thread count
+    def effective_max_buffer_memory
+      bytes = (max_memory * 1_048_576 * BUFFER_MEMORY_SHARE).to_i
+      [bytes, MIN_BUFFER_MEMORY].max
     end
-    def derived_buffer_memory_limit(detected_threads = nil)
-      return MIN_BUFFER_MEMORY unless max_memory_per_thread
+    # Path cache limit derived from max_memory
+    def effective_path_cache_max
+      (max_memory * CACHE_ENTRIES_PER_MB * PATH_CACHE_BUDGET_SHARE).to_i
+    end
-      threads = detected_threads || DEFAULT_ASSUMED_THREADS
-      [max_memory_per_thread * BUFFER_MEMORY_SHARE * threads, MIN_BUFFER_MEMORY].max
+    # Sample counts hash limit derived from max_memory
+    def effective_sample_counts_max
+      (max_memory * SAMPLE_COUNTS_PER_MB * SAMPLE_COUNTS_BUDGET_SHARE).to_i
     end
     def slow_threshold_for(kind)

data/lib/catpm/event.rb CHANGED Viewed

@@ -9,14 +9,14 @@ module Catpm
     attr_accessor :kind, :target, :operation, :duration, :started_at,
                   :metadata, :error_class, :error_message, :backtrace,
-                  :sample_type, :context, :status, :request_id
+                  :sample_type, :context, :status
     EMPTY_HASH = {}.freeze
     private_constant :EMPTY_HASH
     def initialize(kind:, target:, operation: '', duration: 0.0, started_at: nil,
                    metadata: nil, error_class: nil, error_message: nil, backtrace: nil,
-                   sample_type: nil, context: nil, status: nil, request_id: nil)
+                   sample_type: nil, context: nil, status: nil)
       @kind = kind.to_s
       @target = target.to_s
       @operation = (operation || '').to_s
@@ -32,7 +32,6 @@ module Catpm
       @sample_type = sample_type
       @context = context
       @status = status
-      @request_id = request_id
     end
     def estimated_bytes

data/lib/catpm/fingerprint.rb CHANGED Viewed

@@ -40,7 +40,7 @@ module Catpm
       result = _app_frame?(line)
       @path_cache_mutex.synchronize do
-        @path_cache.clear if @path_cache.size > 4000
+        @path_cache.clear if @path_cache.size > Catpm.config.effective_path_cache_max
         @path_cache[line] = result
       end
       result

data/lib/catpm/flusher.rb CHANGED Viewed

@@ -3,7 +3,6 @@
 module Catpm
   class Flusher
     ERROR_LOG_BACKTRACE_LINES = 5
-    PARTIAL_STALE_TIMEOUT = 600 # seconds — orphaned partial samples cleaned after 10 minutes
     attr_reader :running
@@ -182,8 +181,7 @@ module Catpm
             sample_type: sample_type,
             recorded_at: event.started_at,
             duration: event.duration,
-            context: event.context || {},
-            request_id: event.request_id
+            context: event.context || {}
           }
           sample_hash[:error_fingerprint] = error_fp if error_fp
           samples << sample_hash
@@ -221,8 +219,6 @@ module Catpm
         b
       end
-      samples = merge_request_samples(samples)
       [ buckets, samples, error_groups.values ]
     end
@@ -350,7 +346,6 @@ module Catpm
       @last_cleanup_at = Time.now
       downsample_buckets
       cleanup_expired_data if Catpm.config.retention_period
-      cleanup_orphaned_partials
       Collector.reset_sample_counts!
     end
@@ -398,83 +393,93 @@ module Catpm
       cutoff = age_threshold.ago
       target_seconds = target_interval.to_i
-      # Process in batches to avoid loading all old buckets into memory
-      Catpm::Bucket.where(bucket_start: ...cutoff)
-        .select(:id, :kind, :target, :operation, :bucket_start)
-        .group_by { |b| [b.kind, b.target, b.operation] }
-        .each do |(_kind, _target, _operation), endpoint_buckets|
-          groups = endpoint_buckets.group_by do |bucket|
-            epoch = bucket.bucket_start.to_i
-            aligned_epoch = epoch - (epoch % target_seconds)
-            Time.at(aligned_epoch).utc
-          end
+      # Get unique endpoint keys first (small set), then process per-endpoint
+      # to avoid loading all old buckets into memory at once
+      endpoint_keys = Catpm::Bucket.where(bucket_start: ...cutoff)
+        .distinct.pluck(:kind, :target, :operation)
-          groups.each do |aligned_start, stub_buckets|
-            next if stub_buckets.size == 1 && stub_buckets.first.bucket_start.to_i % target_seconds == 0
-            # Load full records only for groups that need merging
-            bucket_ids = stub_buckets.map(&:id)
-            buckets = Catpm::Bucket.where(id: bucket_ids).to_a
-            merged = {
-              kind: buckets.first.kind,
-              target: buckets.first.target,
-              operation: buckets.first.operation,
-              bucket_start: aligned_start,
-              count: buckets.sum(&:count),
-              success_count: buckets.sum(&:success_count),
-              failure_count: buckets.sum(&:failure_count),
-              duration_sum: buckets.sum(&:duration_sum),
-              duration_max: buckets.map(&:duration_max).max,
-              duration_min: buckets.map(&:duration_min).min,
-              metadata_sum: merge_bucket_metadata(buckets, adapter),
-              p95_digest: merge_bucket_digests(buckets)
-            }
-            survivor = buckets.first
-            # Reassign all samples to the survivor bucket
-            Catpm::Sample.where(bucket_id: bucket_ids).update_all(bucket_id: survivor.id)
-            # Delete non-survivor source buckets (now sample-free)
-            Catpm::Bucket.where(id: bucket_ids - [survivor.id]).delete_all
-            # Overwrite survivor with merged data
-            survivor.update!(
-              bucket_start: aligned_start,
-              count: merged[:count],
-              success_count: merged[:success_count],
-              failure_count: merged[:failure_count],
-              duration_sum: merged[:duration_sum],
-              duration_max: merged[:duration_max],
-              duration_min: merged[:duration_min],
-              metadata_sum: merged[:metadata_sum],
-              p95_digest: merged[:p95_digest]
-            )
-          end
+      endpoint_keys.each do |kind, target, operation|
+        endpoint_buckets = Catpm::Bucket
+          .where(kind: kind, target: target, operation: operation, bucket_start: ...cutoff)
+          .select(:id, :bucket_start).to_a
+        groups = endpoint_buckets.group_by do |bucket|
+          epoch = bucket.bucket_start.to_i
+          aligned_epoch = epoch - (epoch % target_seconds)
+          Time.at(aligned_epoch).utc
+        end
+        groups.each do |aligned_start, stub_buckets|
+          next if stub_buckets.size == 1 && stub_buckets.first.bucket_start.to_i % target_seconds == 0
+          # Load full records only for groups that need merging
+          bucket_ids = stub_buckets.map(&:id)
+          buckets = Catpm::Bucket.where(id: bucket_ids).to_a
+          merged = {
+            kind: buckets.first.kind,
+            target: buckets.first.target,
+            operation: buckets.first.operation,
+            bucket_start: aligned_start,
+            count: buckets.sum(&:count),
+            success_count: buckets.sum(&:success_count),
+            failure_count: buckets.sum(&:failure_count),
+            duration_sum: buckets.sum(&:duration_sum),
+            duration_max: buckets.map(&:duration_max).max,
+            duration_min: buckets.map(&:duration_min).min,
+            metadata_sum: merge_bucket_metadata(buckets, adapter),
+            p95_digest: merge_bucket_digests(buckets)
+          }
+          survivor = buckets.first
+          # Reassign all samples to the survivor bucket
+          Catpm::Sample.where(bucket_id: bucket_ids).update_all(bucket_id: survivor.id)
+          # Delete non-survivor source buckets (now sample-free)
+          Catpm::Bucket.where(id: bucket_ids - [survivor.id]).delete_all
+          # Overwrite survivor with merged data
+          survivor.update!(
+            bucket_start: aligned_start,
+            count: merged[:count],
+            success_count: merged[:success_count],
+            failure_count: merged[:failure_count],
+            duration_sum: merged[:duration_sum],
+            duration_max: merged[:duration_max],
+            duration_min: merged[:duration_min],
+            metadata_sum: merged[:metadata_sum],
+            p95_digest: merged[:p95_digest]
+          )
         end
+      end
     end
     def downsample_event_tier(target_interval:, age_threshold:, adapter:)
       cutoff = age_threshold.ago
       target_seconds = target_interval.to_i
-      source_buckets = Catpm::EventBucket.where(bucket_start: ...cutoff).to_a
-      return if source_buckets.empty?
+      # Process per-name to avoid loading all event buckets into memory
+      names = Catpm::EventBucket.where(bucket_start: ...cutoff).distinct.pluck(:name)
+      return if names.empty?
-      groups = source_buckets.group_by do |bucket|
-        epoch = bucket.bucket_start.to_i
-        aligned_epoch = epoch - (epoch % target_seconds)
-        aligned_start = Time.at(aligned_epoch).utc
-        [bucket.name, aligned_start]
-      end
+      names.each do |name|
+        buckets = Catpm::EventBucket.where(name: name, bucket_start: ...cutoff).to_a
+        next if buckets.empty?
-      groups.each do |(name, aligned_start), buckets|
-        next if buckets.size == 1 && buckets.first.bucket_start.to_i % target_seconds == 0
+        groups = buckets.group_by do |bucket|
+          epoch = bucket.bucket_start.to_i
+          aligned_epoch = epoch - (epoch % target_seconds)
+          Time.at(aligned_epoch).utc
+        end
+        groups.each do |aligned_start, group_buckets|
+          next if group_buckets.size == 1 && group_buckets.first.bucket_start.to_i % target_seconds == 0
-        merged = { name: name, bucket_start: aligned_start, count: buckets.sum(&:count) }
-        Catpm::EventBucket.where(id: buckets.map(&:id)).delete_all
-        adapter.persist_event_buckets([merged])
+          merged = { name: name, bucket_start: aligned_start, count: group_buckets.sum(&:count) }
+          Catpm::EventBucket.where(id: group_buckets.map(&:id)).delete_all
+          adapter.persist_event_buckets([merged])
+        end
       end
     end
@@ -494,136 +499,6 @@ module Catpm
       combined.empty? ? nil : combined.serialize
     end
-    def merge_request_samples(samples)
-      return samples unless Catpm::Sample.request_id_supported?
-      by_request = {}     # request_id => { partials: [], final: nil }
-      regular = []
-      samples.each do |s|
-        rid = s[:request_id]
-        if rid
-          entry = (by_request[rid] ||= { partials: [], final: nil })
-          if s[:context].is_a?(Hash) && s[:context][:partial]
-            entry[:partials] << s
-          else
-            entry[:final] = s
-          end
-        else
-          regular << s
-        end
-      end
-      merged = []
-      by_request.each do |rid, entry|
-        if entry[:final]
-          # Merge in-batch partials
-          if entry[:partials].any?
-            merge_checkpoint_contexts(
-              entry[:final][:context],
-              entry[:partials].map { |p| p[:context] }
-            )
-          end
-          # Merge cross-batch partials from DB
-          db_partials = Catpm::Sample.where(request_id: rid)
-          if db_partials.exists?
-            merge_checkpoint_contexts(
-              entry[:final][:context],
-              db_partials.map(&:parsed_context)
-            )
-            db_partials.delete_all
-          end
-          # Clear request_id so persisted final sample won't be treated as orphan
-          entry[:final][:request_id] = nil
-          merged << entry[:final]
-        else
-          # Only partials, no final yet — persist as-is
-          merged.concat(entry[:partials])
-        end
-      end
-      regular + merged
-    end
-    def merge_checkpoint_contexts(final_ctx, checkpoint_ctxs)
-      final_segments = final_ctx[:segments] || final_ctx['segments']
-      return unless final_segments
-      final_ctrl_idx = final_segments.index { |s|
-        (s[:type] || s['type']) == 'controller'
-      }
-      sorted = checkpoint_ctxs.sort_by { |c|
-        c[:checkpoint_number] || c['checkpoint_number'] || 0
-      }
-      sorted.each do |cp_ctx|
-        cp_segments = cp_ctx[:segments] || cp_ctx['segments'] || []
-        old_to_new = {}
-        kept = []
-        cp_segments.each_with_index do |seg, i|
-          seg_type = seg[:type] || seg['type']
-          next if seg_type == 'request'
-          next if seg_type == 'controller'
-          old_to_new[i] = final_segments.size + kept.size
-          kept << seg.dup
-        end
-        kept.each do |seg|
-          pi_key = seg.key?(:parent_index) ? :parent_index : 'parent_index'
-          pi = seg[pi_key]
-          next unless pi
-          if old_to_new.key?(pi)
-            seg[pi_key] = old_to_new[pi]
-          else
-            seg[pi_key] = final_ctrl_idx || 0
-          end
-        end
-        final_segments.concat(kept)
-        # Merge summary
-        cp_summary = cp_ctx[:segment_summary] || cp_ctx['segment_summary']
-        if cp_summary
-          use_symbols = final_ctx.key?(:segment_summary)
-          summary_key = use_symbols ? :segment_summary : 'segment_summary'
-          final_summary = final_ctx[summary_key] ||= {}
-          cp_summary.each do |k, v|
-            nk = use_symbols ? k.to_sym : k.to_s
-            final_summary[nk] = (final_summary[nk] || 0) + v.to_f
-          end
-        end
-        # Merge capped flag
-        capped_key = final_ctx.key?(:segments_capped) ? :segments_capped : 'segments_capped'
-        cp_capped = cp_ctx[:segments_capped] || cp_ctx['segments_capped']
-        final_ctx[capped_key] = true if cp_capped
-      end
-      # Clean up checkpoint markers
-      final_ctx.delete(:partial)
-      final_ctx.delete('partial')
-      final_ctx.delete(:request_id)
-      final_ctx.delete('request_id')
-      final_ctx.delete(:checkpoint_number)
-      final_ctx.delete('checkpoint_number')
-    end
-    def cleanup_orphaned_partials
-      return unless Catpm::Sample.request_id_supported?
-      Catpm::Sample.where.not(request_id: nil)
-        .where(recorded_at: ..PARTIAL_STALE_TIMEOUT.seconds.ago)
-        .delete_all
-    rescue => e
-      Catpm.config.error_handler&.call(e)
-    end
     def cleanup_expired_data
       cutoff = Catpm.config.retention_period.ago
       batch_size = Catpm.config.cleanup_batch_size

data/lib/catpm/lifecycle.rb CHANGED Viewed

@@ -8,6 +8,7 @@ module Catpm
         initialize_buffer
         initialize_flusher
+        load_sample_counts
         apply_patches
         # Start the flusher in the current process.
@@ -24,6 +25,12 @@ module Catpm
       private
+      def load_sample_counts
+        Collector.load_sample_counts_eagerly!
+      rescue => e
+        Catpm.config.error_handler&.call(e)
+      end
       def apply_patches
         if Catpm.config.instrument_net_http
           if defined?(::Net::HTTP)
@@ -39,15 +46,7 @@ module Catpm
       end
       def initialize_buffer
-        max_bytes = Catpm.config.derived_buffer_memory_limit(detect_threads)
-        Catpm.buffer ||= Buffer.new(max_bytes: max_bytes)
-      end
-      def detect_threads
-        return Puma.cli_config.options[:max_threads] if defined?(Puma::Server) && Puma.respond_to?(:cli_config)
-        return ENV['RAILS_MAX_THREADS'].to_i if ENV['RAILS_MAX_THREADS'].present?
-        return Sidekiq[:concurrency] if defined?(Sidekiq) && Sidekiq.respond_to?(:[])
-        nil
+        Catpm.buffer ||= Buffer.new(max_bytes: Catpm.config.effective_max_buffer_memory)
       end
       def initialize_flusher

data/lib/catpm/middleware.rb CHANGED Viewed

@@ -20,8 +20,7 @@ module Catpm
           max_segments: Catpm.config.max_segments_per_request,
           request_start: env['catpm.request_start'],
           stack_sample: use_sampler,
-          call_tree: Catpm.config.instrument_call_tree,
-          memory_limit: Catpm.config.derived_request_memory_limit
+          call_tree: Catpm.config.instrument_call_tree
         )
         env['catpm.segments'] = req_segments
         Thread.current[:catpm_request_segments] = req_segments

data/lib/catpm/request_segments.rb CHANGED Viewed

@@ -9,9 +9,9 @@ module Catpm
     SEGMENT_BASE_BYTES = Event::OBJECT_OVERHEAD + (6 * Event::HASH_ENTRY_SIZE)
     SEGMENT_STRING_OVERHEAD = Event::OBJECT_OVERHEAD # per-string overhead in segment values
-    attr_reader :segments, :summary, :request_start, :estimated_bytes, :checkpoint_count, :request_id, :segments_filtered
+    attr_reader :segments, :summary, :request_start, :estimated_bytes, :segments_filtered
-    def initialize(max_segments:, request_start: nil, stack_sample: false, call_tree: false, memory_limit: nil)
+    def initialize(max_segments:, request_start: nil, stack_sample: false, call_tree: false)
       @max_segments = max_segments
       @request_start = request_start || Process.clock_gettime(Process::CLOCK_MONOTONIC)
       @segments = []
@@ -20,11 +20,7 @@ module Catpm
       @span_stack = []
       @tracked_ranges = []
       @call_tree = call_tree
-      @memory_limit = memory_limit
       @estimated_bytes = 0
-      @checkpoint_callback = nil
-      @checkpoint_count = 0
-      @request_id = memory_limit ? SecureRandom.hex(8) : nil
       @segments_filtered = 0
       if stack_sample
@@ -33,10 +29,6 @@ module Catpm
       end
     end
-    def on_checkpoint(&block)
-      @checkpoint_callback = block
-    end
     def add(type:, duration:, detail:, source: nil, started_at: nil)
       type_key = type.to_sym
       count_key, dur_key = SUMMARY_KEYS[type_key]
@@ -73,7 +65,6 @@ module Catpm
       end
       @estimated_bytes += estimate_segment_bytes(segment)
-      maybe_checkpoint
     end
     def push_span(type:, detail:, started_at: nil)
@@ -166,68 +157,5 @@ module Catpm
       bytes
     end
-    def maybe_checkpoint
-      return unless @memory_limit && @estimated_bytes > @memory_limit && @checkpoint_callback
-      checkpoint_data = {
-        segments: @segments,
-        summary: @summary,
-        overflow: @overflow,
-        sampler_segments: @sampler ? sampler_segments_for_checkpoint : [],
-        checkpoint_number: @checkpoint_count
-      }
-      @checkpoint_count += 1
-      rebuild_after_checkpoint
-      @checkpoint_callback.call(checkpoint_data)
-    end
-    def sampler_segments_for_checkpoint
-      if @call_tree
-        result = @sampler&.to_call_tree(tracked_ranges: @tracked_ranges) || []
-      else
-        result = @sampler&.to_segments(tracked_ranges: @tracked_ranges) || []
-      end
-      @sampler&.clear_samples!
-      result
-    end
-    # After checkpoint: keep only active spans from @span_stack, reset everything else.
-    def rebuild_after_checkpoint
-      if @span_stack.any?
-        # Clone active spans with corrected indices
-        new_segments = []
-        old_to_new = {}
-        @span_stack.each do |old_idx|
-          seg = @segments[old_idx]
-          next unless seg
-          new_idx = new_segments.size
-          old_to_new[old_idx] = new_idx
-          new_segments << seg.dup
-        end
-        # Fix parent_index references in cloned spans
-        new_segments.each do |seg|
-          if seg.key?(:parent_index) && old_to_new.key?(seg[:parent_index])
-            seg[:parent_index] = old_to_new[seg[:parent_index]]
-          else
-            seg.delete(:parent_index)
-          end
-        end
-        @span_stack = @span_stack.filter_map { |old_idx| old_to_new[old_idx] }
-        @segments = new_segments
-      else
-        @segments = []
-      end
-      @summary = Hash.new(0)
-      @tracked_ranges = []
-      @overflow = false
-      @estimated_bytes = 0
-      @segments_filtered = 0
-    end
   end
 end

data/lib/catpm/trace.rb CHANGED Viewed

@@ -89,23 +89,10 @@ module Catpm
           max_segments: config.max_segments_per_request,
           request_start: start_time,
           stack_sample: use_sampler,
-          call_tree: config.instrument_call_tree,
-          memory_limit: config.derived_request_memory_limit
+          call_tree: config.instrument_call_tree
         )
         Thread.current[:catpm_request_segments] = req_segments
         owns_segments = true
-        if config.derived_request_memory_limit
-          req_segments.on_checkpoint do |checkpoint_data|
-            Collector.process_checkpoint(
-              kind: kind, target: target, operation: operation,
-              context: context, metadata: metadata,
-              checkpoint_data: checkpoint_data,
-              request_start: start_time,
-              request_id: req_segments.request_id
-            )
-          end
-        end
       end
     end

data/lib/catpm/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Catpm
-  VERSION = '0.9.5'
+  VERSION = '0.9.6'
 end

data/lib/catpm.rb CHANGED Viewed

@@ -39,6 +39,7 @@ module Catpm
       @buffer = nil
       @flusher = nil
       Fingerprint.reset_caches!
+      Collector.reset_sample_counts!
     end
     def enabled?

data/lib/generators/catpm/templates/initializer.rb.tt CHANGED Viewed

@@ -50,7 +50,7 @@ Catpm.configure do |config|
   # config.events_max_samples_per_name = 20            # nil = unlimited
   # === Memory ===
-  # config.max_memory_per_thread = 2.megabytes  # memory budget per thread (buffer + request segments)
+  # config.max_memory = 20  # MB — global memory budget (2% of 1GB server)
   # === Buffering & Flushing ===
   # config.flush_interval = 30                         # seconds

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: catpm
 version: !ruby/object:Gem::Version
-  version: 0.9.5
+  version: 0.9.6
 platform: ruby
 authors:
 - ''