smith-agents 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +139 -0
  3. data/CODE_OF_CONDUCT.md +128 -0
  4. data/LICENSE +21 -0
  5. data/README.md +226 -0
  6. data/Rakefile +14 -0
  7. data/UPSTREAM_PROPOSAL.md +141 -0
  8. data/docs/CONFIGURATION.md +123 -0
  9. data/docs/PATTERNS.md +492 -0
  10. data/docs/PERSISTENCE.md +169 -0
  11. data/docs/TOOLS_AND_GUARDRAILS.md +140 -0
  12. data/docs/workflow_claim.md +58 -0
  13. data/exe/smith +7 -0
  14. data/lib/generators/smith/install/install_generator.rb +22 -0
  15. data/lib/generators/smith/install/templates/smith.rb.tt +44 -0
  16. data/lib/smith/agent/lifecycle.rb +264 -0
  17. data/lib/smith/agent/registry.rb +128 -0
  18. data/lib/smith/agent.rb +259 -0
  19. data/lib/smith/artifacts/file.rb +59 -0
  20. data/lib/smith/artifacts/memory.rb +75 -0
  21. data/lib/smith/artifacts/scoped_store.rb +29 -0
  22. data/lib/smith/artifacts.rb +5 -0
  23. data/lib/smith/budget/ledger.rb +42 -0
  24. data/lib/smith/budget.rb +5 -0
  25. data/lib/smith/cli.rb +82 -0
  26. data/lib/smith/context/observation_masking.rb +19 -0
  27. data/lib/smith/context/session.rb +42 -0
  28. data/lib/smith/context/state_injection.rb +24 -0
  29. data/lib/smith/context.rb +61 -0
  30. data/lib/smith/doctor/check.rb +12 -0
  31. data/lib/smith/doctor/checks/baseline.rb +84 -0
  32. data/lib/smith/doctor/checks/configuration.rb +56 -0
  33. data/lib/smith/doctor/checks/durability.rb +103 -0
  34. data/lib/smith/doctor/checks/live.rb +55 -0
  35. data/lib/smith/doctor/checks/models_registry.rb +66 -0
  36. data/lib/smith/doctor/checks/openai_api_mode.rb +51 -0
  37. data/lib/smith/doctor/checks/persistence.rb +99 -0
  38. data/lib/smith/doctor/checks/persistence_capabilities.rb +60 -0
  39. data/lib/smith/doctor/checks/persistence_registry.rb +82 -0
  40. data/lib/smith/doctor/checks/rails.rb +39 -0
  41. data/lib/smith/doctor/checks/serialization.rb +78 -0
  42. data/lib/smith/doctor/installer.rb +103 -0
  43. data/lib/smith/doctor/printer.rb +62 -0
  44. data/lib/smith/doctor/report.rb +39 -0
  45. data/lib/smith/doctor.rb +53 -0
  46. data/lib/smith/errors.rb +191 -0
  47. data/lib/smith/event.rb +11 -0
  48. data/lib/smith/events/.keep +0 -0
  49. data/lib/smith/events/bus.rb +60 -0
  50. data/lib/smith/events/step_completed.rb +11 -0
  51. data/lib/smith/events/subscription.rb +24 -0
  52. data/lib/smith/events.rb +5 -0
  53. data/lib/smith/guardrails/runner.rb +44 -0
  54. data/lib/smith/guardrails/url_verifier.rb +7 -0
  55. data/lib/smith/guardrails.rb +35 -0
  56. data/lib/smith/models/inference.rb +199 -0
  57. data/lib/smith/models/normalizer.rb +186 -0
  58. data/lib/smith/models/profile.rb +39 -0
  59. data/lib/smith/models.rb +132 -0
  60. data/lib/smith/persistence_adapters/active_record_store.rb +99 -0
  61. data/lib/smith/persistence_adapters/cache_store.rb +79 -0
  62. data/lib/smith/persistence_adapters/memory.rb +105 -0
  63. data/lib/smith/persistence_adapters/rails_cache.rb +20 -0
  64. data/lib/smith/persistence_adapters/redis_store.rb +136 -0
  65. data/lib/smith/persistence_adapters/retry.rb +42 -0
  66. data/lib/smith/persistence_adapters.rb +112 -0
  67. data/lib/smith/pricing.rb +65 -0
  68. data/lib/smith/providers/openai/responses.rb +315 -0
  69. data/lib/smith/providers/openai/routing.rb +67 -0
  70. data/lib/smith/providers/openai/tools_extensions.rb +106 -0
  71. data/lib/smith/railtie.rb +9 -0
  72. data/lib/smith/tasks/doctor.rake +38 -0
  73. data/lib/smith/tool/budget_enforcement.rb +33 -0
  74. data/lib/smith/tool/capability_builder.rb +18 -0
  75. data/lib/smith/tool/capture.rb +22 -0
  76. data/lib/smith/tool/compatibility.rb +72 -0
  77. data/lib/smith/tool/policy.rb +40 -0
  78. data/lib/smith/tool.rb +171 -0
  79. data/lib/smith/tools/think.rb +25 -0
  80. data/lib/smith/tools/url_fetcher.rb +16 -0
  81. data/lib/smith/tools/web_search.rb +17 -0
  82. data/lib/smith/tools.rb +5 -0
  83. data/lib/smith/trace/logger.rb +46 -0
  84. data/lib/smith/trace/memory.rb +53 -0
  85. data/lib/smith/trace/open_telemetry.rb +57 -0
  86. data/lib/smith/trace.rb +89 -0
  87. data/lib/smith/types.rb +16 -0
  88. data/lib/smith/version.rb +5 -0
  89. data/lib/smith/workflow/artifact_integration.rb +41 -0
  90. data/lib/smith/workflow/budget_integration.rb +105 -0
  91. data/lib/smith/workflow/claim.rb +118 -0
  92. data/lib/smith/workflow/data_volume_policy.rb +36 -0
  93. data/lib/smith/workflow/deadline_enforcement.rb +100 -0
  94. data/lib/smith/workflow/deterministic_execution.rb +53 -0
  95. data/lib/smith/workflow/deterministic_step.rb +57 -0
  96. data/lib/smith/workflow/dsl.rb +223 -0
  97. data/lib/smith/workflow/durability.rb +369 -0
  98. data/lib/smith/workflow/evaluator_optimizer.rb +220 -0
  99. data/lib/smith/workflow/event_integration.rb +24 -0
  100. data/lib/smith/workflow/execution.rb +127 -0
  101. data/lib/smith/workflow/execution_frame.rb +166 -0
  102. data/lib/smith/workflow/guardrail_integration.rb +40 -0
  103. data/lib/smith/workflow/nested_execution.rb +69 -0
  104. data/lib/smith/workflow/orchestrator_worker.rb +145 -0
  105. data/lib/smith/workflow/parallel.rb +50 -0
  106. data/lib/smith/workflow/parallel_execution.rb +75 -0
  107. data/lib/smith/workflow/persistence.rb +358 -0
  108. data/lib/smith/workflow/pipeline.rb +117 -0
  109. data/lib/smith/workflow/router.rb +53 -0
  110. data/lib/smith/workflow/transition.rb +208 -0
  111. data/lib/smith/workflow.rb +555 -0
  112. data/lib/smith.rb +254 -0
  113. data/script/profile_tool_results.rb +94 -0
  114. data/sig/smith.rbs +4 -0
  115. metadata +258 -0
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "dry-container"
4
+ require "monitor"
5
+
6
+ module Smith
7
+ # Capability registry for model ids. Decoupled from Smith.config.pricing
8
+ # (per-installation billing) — this catalog describes payload-shape
9
+ # capabilities (thinking encoding, temperature acceptance, endpoint
10
+ # preferences for tools+thinking).
11
+ #
12
+ # The library ships NO specific model_id declarations. Smith::Models::Inference
13
+ # provides PATTERN-BASED PROVIDER RULES that match model_ids at runtime
14
+ # (e.g., "Anthropic Opus 4.7+ uses adaptive thinking"). Applications register
15
+ # explicit Profile overrides via Smith::Models.register ONLY when they have
16
+ # a custom model that diverges from its provider's default behavior.
17
+ #
18
+ # Resolution order in find_or_infer(model_id):
19
+ # 1. Application-registered explicit Profile (override wins)
20
+ # 2. Library Inference rule match
21
+ # 3. Safe default (no thinking, accepts temp, no routing)
22
+ module Models
23
+ extend Dry::Container::Mixin
24
+
25
+ class CollisionError < Smith::Error; end
26
+
27
+ def self.normalize_key(model_id)
28
+ model_id.to_s
29
+ end
30
+
31
+ def self.find(model_id)
32
+ registry_monitor.synchronize do
33
+ key = normalize_key(model_id)
34
+ key?(key) ? resolve(key) : nil
35
+ end
36
+ end
37
+
38
+ # Application overrides first, then Inference rules, then safe default.
39
+ def self.find_or_infer(model_id, provider: nil)
40
+ find(model_id) || infer(model_id, provider: provider)
41
+ end
42
+
43
+ def self.infer(model_id, provider: nil)
44
+ inferred = Inference.profile_for(model_id) if defined?(Inference)
45
+ return inferred if inferred
46
+
47
+ Profile.new(
48
+ model_id: normalize_key(model_id),
49
+ provider: provider || guess_provider(model_id),
50
+ thinking_shape: nil,
51
+ accepts_temperature: true,
52
+ tools_with_thinking_native: false,
53
+ tools_with_thinking_route: nil
54
+ )
55
+ end
56
+
57
+ # Register a Profile. Idempotent when re-registering an identical
58
+ # profile; replaces silently on Rails-reload (same model_id, possibly
59
+ # different Profile object after autoload swap); raises CollisionError
60
+ # on a genuinely conflicting registration.
61
+ #
62
+ # The stale-reload-binding pattern mirrors Smith::Agent::Registry
63
+ # (agent/registry.rb:118-124) which solves the same problem for
64
+ # agent classes during Rails autoreload.
65
+ def self.register(profile)
66
+ registry_monitor.synchronize do
67
+ key = normalize_key(profile.model_id)
68
+ existing = key?(key) ? resolve(key) : nil
69
+
70
+ return profile if existing == profile
71
+
72
+ if existing && stale_reload_binding?(existing, profile)
73
+ # Same model_id, value-unequal Profile — Rails reload swap.
74
+ # Document trade-off: a host that intentionally re-registers with
75
+ # different capabilities also gets silent replacement (same
76
+ # behavior Smith::Agent::Registry chose).
77
+ _container.delete(key)
78
+ super(key, profile)
79
+ return profile
80
+ end
81
+
82
+ if existing
83
+ raise CollisionError,
84
+ "model #{key.inspect} already registered with a different profile"
85
+ end
86
+
87
+ super(key, profile)
88
+ profile
89
+ end
90
+ end
91
+
92
+ def self.all
93
+ registry_monitor.synchronize do
94
+ keys.sort.map { |k| resolve(k) }
95
+ end
96
+ end
97
+
98
+ def self.clear!
99
+ registry_monitor.synchronize { @_container&.clear }
100
+ end
101
+
102
+ # Eagerly initialized at module load so concurrent first-callers
103
+ # cannot race the `||=` lazy-init and end up with separate Monitor
104
+ # instances (which would partially defeat synchronization).
105
+ @_registry_monitor = Monitor.new
106
+
107
+ def self.registry_monitor
108
+ @_registry_monitor
109
+ end
110
+
111
+ PROVIDER_PATTERNS = {
112
+ anthropic: /\Aclaude/i,
113
+ openai: /\A(gpt|o\d)/i,
114
+ gemini: /\Agemini/i
115
+ }.freeze
116
+ private_constant :PROVIDER_PATTERNS
117
+
118
+ def self.guess_provider(model_id)
119
+ key = normalize_key(model_id)
120
+ PROVIDER_PATTERNS.each { |provider, pattern| return provider if key.match?(pattern) }
121
+ :unknown
122
+ end
123
+
124
+ # Same model_id but value-unequal Profile objects (e.g., a host
125
+ # tweaked a built-in profile in config/initializers and Rails
126
+ # reloaded). Replace silently rather than raise.
127
+ def self.stale_reload_binding?(existing, profile)
128
+ existing.model_id == profile.model_id
129
+ end
130
+ private_class_method :stale_reload_binding?
131
+ end
132
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Smith
4
+ module PersistenceAdapters
5
+ class ActiveRecordStore
6
+ # AR transient errors resolved via class-name guard so Smith
7
+ # doesn't require activerecord at load time. Hosts that use this
8
+ # adapter already have activerecord in their dep tree.
9
+ TRANSIENT_ERROR_NAMES = %w[
10
+ ActiveRecord::ConnectionNotEstablished
11
+ ActiveRecord::StatementInvalid
12
+ ActiveRecord::TransactionIsolationConflict
13
+ ].freeze
14
+
15
+ def self.transient_errors
16
+ TRANSIENT_ERROR_NAMES.filter_map do |name|
17
+ Object.const_get(name)
18
+ rescue NameError
19
+ nil
20
+ end
21
+ end
22
+
23
+ def initialize(model:, key_column: :key, payload_column: :payload, version_column: :lock_version)
24
+ @model_source = model
25
+ @key_column = key_column
26
+ @payload_column = payload_column
27
+ @version_column = version_column
28
+ end
29
+
30
+ def store(key, payload, ttl: nil) # rubocop:disable Lint/UnusedMethodArgument
31
+ # TTL is deferred for ActiveRecordStore — would require an
32
+ # `expires_at` column + a periodic sweeper job. Ignored here;
33
+ # documented as a known limitation.
34
+ Retry.with_retries(operation: :store, transient: self.class.transient_errors) do
35
+ record = model_class.find_or_initialize_by(@key_column => key)
36
+ record.public_send(:"#{@payload_column}=", payload)
37
+ record.save!
38
+ end
39
+ end
40
+
41
+ def fetch(key)
42
+ Retry.with_retries(operation: :fetch, transient: self.class.transient_errors) do
43
+ model_class.find_by(@key_column => key)&.public_send(@payload_column)
44
+ end
45
+ end
46
+
47
+ def delete(key)
48
+ Retry.with_retries(operation: :delete, transient: self.class.transient_errors) do
49
+ model_class.where(@key_column => key).delete_all
50
+ end
51
+ end
52
+
53
+ # Optimistic locking via Rails' built-in optimistic locking on the
54
+ # `lock_version` column. Requires the AR model to have a
55
+ # `lock_version` (or configured) integer column with default 0.
56
+ # If absent, raises ArgumentError directing the host to migrate.
57
+ def store_versioned(key, payload, expected_version:, ttl: nil) # rubocop:disable Lint/UnusedMethodArgument
58
+ unless model_class.column_names.include?(@version_column.to_s)
59
+ raise ArgumentError,
60
+ "ActiveRecordStore#store_versioned requires a #{@version_column} column on " \
61
+ "#{model_class.name}. Add via: " \
62
+ "add_column :#{model_class.table_name}, :#{@version_column}, :integer, default: 0"
63
+ end
64
+
65
+ Retry.with_retries(operation: :store_versioned, transient: self.class.transient_errors) do
66
+ record = model_class.find_or_initialize_by(@key_column => key)
67
+ if record.persisted? && record.public_send(@version_column) != expected_version
68
+ raise Smith::PersistenceVersionConflict.new(
69
+ key: key, expected: expected_version, actual: record.public_send(@version_column)
70
+ )
71
+ end
72
+ record.public_send(:"#{@payload_column}=", payload)
73
+ record.save!
74
+ rescue defined?(::ActiveRecord::StaleObjectError) ? ::ActiveRecord::StaleObjectError : StandardError => e
75
+ raise unless defined?(::ActiveRecord::StaleObjectError) && e.is_a?(::ActiveRecord::StaleObjectError)
76
+
77
+ raise Smith::PersistenceVersionConflict.new(
78
+ key: key, expected: expected_version, actual: :concurrent
79
+ )
80
+ end
81
+ end
82
+
83
+ private
84
+
85
+ def model_class
86
+ @model_class ||= begin
87
+ case @model_source
88
+ when String
89
+ Object.const_get(@model_source)
90
+ else
91
+ @model_source
92
+ end
93
+ rescue NameError => e
94
+ raise ArgumentError, "ActiveRecord model #{@model_source.inspect} could not be resolved: #{e.message}"
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Smith
4
+ module PersistenceAdapters
5
+ class CacheStore
6
+ # Cache backends vary widely; the transient list is intentionally
7
+ # broad. Hosts using a specific backend can subclass and tighten.
8
+ # NOTE: NO store_versioned implementation — cache backends don't
9
+ # have uniform CAS semantics. Workflow#persist! checks via
10
+ # respond_to? and falls back to non-versioned store + warning.
11
+ TRANSIENT_ERRORS = [
12
+ Errno::ECONNREFUSED,
13
+ Errno::ETIMEDOUT,
14
+ Errno::EPIPE,
15
+ IOError
16
+ ].freeze
17
+
18
+ def initialize(store:, namespace: "smith")
19
+ @store_source = store
20
+ @namespace = namespace
21
+ end
22
+
23
+ def store(key, payload, ttl: Smith.config.persistence_ttl)
24
+ Retry.with_retries(operation: :store, transient: TRANSIENT_ERRORS) do
25
+ if ttl
26
+ backend.write(namespaced(key), payload, expires_in: ttl)
27
+ else
28
+ backend.write(namespaced(key), payload)
29
+ end
30
+ end
31
+ end
32
+
33
+ def fetch(key)
34
+ Retry.with_retries(operation: :fetch, transient: TRANSIENT_ERRORS) do
35
+ backend.read(namespaced(key))
36
+ end
37
+ end
38
+
39
+ def delete(key)
40
+ Retry.with_retries(operation: :delete, transient: TRANSIENT_ERRORS) do
41
+ backend.delete(namespaced(key))
42
+ end
43
+ end
44
+
45
+ def backend_name
46
+ backend.class.name || backend.class.to_s
47
+ end
48
+
49
+ def durability_warning
50
+ process_local_backend_warning
51
+ end
52
+
53
+ private
54
+
55
+ def backend
56
+ @backend ||= begin
57
+ store = @store_source.respond_to?(:call) ? @store_source.call : @store_source
58
+ raise ArgumentError, "cache store is required" unless store
59
+
60
+ store
61
+ end
62
+ end
63
+
64
+ def namespaced(key)
65
+ [@namespace, key].compact.join(":")
66
+ end
67
+
68
+ def process_local_backend_warning
69
+ return nil unless process_local_memory_backend?
70
+
71
+ "#{backend_name} is process-local memory and will not survive restarts"
72
+ end
73
+
74
+ def process_local_memory_backend?
75
+ backend_name.end_with?("MemoryStore")
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "monitor"
4
+
5
+ module Smith
6
+ module PersistenceAdapters
7
+ # In-process Hash adapter. Thread-safe via Monitor. No I/O, no
8
+ # transient errors. Designed for tests and quick smoke runs.
9
+ #
10
+ # Tracks TTL via stamped expiry times so it behaves consistently with
11
+ # other adapters' TTL semantics. Implements `store_versioned` via the
12
+ # monitor, enabling optimistic-locking tests without Redis.
13
+ #
14
+ # Auto-selected by Smith.persistence_adapter when both
15
+ # Smith.config.persistence_adapter is nil AND Smith.config.test_mode
16
+ # is true (typically set in spec_helper.rb).
17
+ class Memory
18
+ def initialize
19
+ @store = {}
20
+ @heartbeats = {}
21
+ @monitor = Monitor.new
22
+ end
23
+
24
+ def store(key, payload, ttl: Smith.config.persistence_ttl)
25
+ @monitor.synchronize do
26
+ @store[key] = { payload: payload, expires_at: ttl ? Time.now.utc + ttl : nil }
27
+ end
28
+ end
29
+
30
+ def fetch(key)
31
+ @monitor.synchronize do
32
+ entry = @store[key]
33
+ next nil if entry.nil?
34
+
35
+ if entry[:expires_at] && entry[:expires_at] < Time.now.utc
36
+ @store.delete(key)
37
+ next nil
38
+ end
39
+
40
+ entry[:payload]
41
+ end
42
+ end
43
+
44
+ def delete(key)
45
+ @monitor.synchronize do
46
+ @store.delete(key)
47
+ @heartbeats.delete(key)
48
+ end
49
+ end
50
+
51
+ def record_heartbeat(key, ttl: Smith.config.persistence_ttl)
52
+ @monitor.synchronize do
53
+ @heartbeats[key] = { at: Time.now.utc, expires_at: ttl ? Time.now.utc + ttl : nil }
54
+ end
55
+ end
56
+
57
+ def last_heartbeat(key)
58
+ @monitor.synchronize do
59
+ entry = @heartbeats[key]
60
+ next nil if entry.nil?
61
+
62
+ if entry[:expires_at] && entry[:expires_at] < Time.now.utc
63
+ @heartbeats.delete(key)
64
+ next nil
65
+ end
66
+
67
+ entry[:at]
68
+ end
69
+ end
70
+
71
+ # Optimistic locking via Monitor-synchronized version compare.
72
+ # Raises Smith::PersistenceVersionConflict when the stored payload's
73
+ # version differs from expected_version. The version is read from
74
+ # the payload's JSON `persistence_version` field (same shape Redis
75
+ # and ActiveRecord stores use, so the contract is consistent
76
+ # across all versioned adapters).
77
+ def store_versioned(key, payload, expected_version:, ttl: Smith.config.persistence_ttl)
78
+ @monitor.synchronize do
79
+ entry = @store[key]
80
+ if entry
81
+ current_version = parse_version(entry[:payload])
82
+ if current_version != expected_version
83
+ raise Smith::PersistenceVersionConflict.new(
84
+ key: key, expected: expected_version, actual: current_version
85
+ )
86
+ end
87
+ end
88
+ @store[key] = { payload: payload, expires_at: ttl ? Time.now.utc + ttl : nil }
89
+ end
90
+ end
91
+
92
+ def clear!
93
+ @monitor.synchronize { @store.clear }
94
+ end
95
+
96
+ private
97
+
98
+ def parse_version(payload)
99
+ JSON.parse(payload).fetch("persistence_version", 0)
100
+ rescue JSON::ParserError
101
+ 0
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Smith
4
+ module PersistenceAdapters
5
+ class RailsCache < CacheStore
6
+ def initialize(store: nil, namespace: "smith")
7
+ super(store: store || method(:default_store), namespace:)
8
+ end
9
+
10
+ private
11
+
12
+ def default_store
13
+ cache = defined?(::Rails) && ::Rails.respond_to?(:cache) ? ::Rails.cache : nil
14
+ raise ArgumentError, "Rails.cache is not available" unless cache
15
+
16
+ cache
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "time"
4
+
5
+ module Smith
6
+ module PersistenceAdapters
7
+ class RedisStore
8
+ # Redis transient errors — narrow list; non-transient errors
9
+ # (CommandError, etc.) propagate up immediately. Pattern matches
10
+ # Redis::BaseConnectionError if loaded (covers Connection/Timeout)
11
+ # via class-name guard so Smith doesn't require redis at load time.
12
+ TRANSIENT_ERROR_NAMES = %w[
13
+ Redis::BaseConnectionError
14
+ Redis::TimeoutError
15
+ Redis::CannotConnectError
16
+ Redis::ConnectionError
17
+ ].freeze
18
+
19
+ def self.transient_errors
20
+ TRANSIENT_ERROR_NAMES.filter_map do |name|
21
+ Object.const_get(name)
22
+ rescue NameError
23
+ nil
24
+ end + [Errno::ECONNREFUSED, Errno::ETIMEDOUT, Errno::EPIPE]
25
+ end
26
+
27
+ def initialize(redis:, namespace: "smith")
28
+ @redis_source = redis
29
+ @namespace = namespace
30
+ end
31
+
32
+ def store(key, payload, ttl: Smith.config.persistence_ttl)
33
+ Retry.with_retries(operation: :store, transient: self.class.transient_errors) do
34
+ if ttl
35
+ client.set(namespaced(key), payload, ex: ttl)
36
+ else
37
+ client.set(namespaced(key), payload)
38
+ end
39
+ end
40
+ end
41
+
42
+ def fetch(key)
43
+ Retry.with_retries(operation: :fetch, transient: self.class.transient_errors) do
44
+ client.get(namespaced(key))
45
+ end
46
+ end
47
+
48
+ def delete(key)
49
+ Retry.with_retries(operation: :delete, transient: self.class.transient_errors) do
50
+ client.del(namespaced(key), namespaced_heartbeat(key))
51
+ end
52
+ end
53
+
54
+ def record_heartbeat(key, ttl: Smith.config.persistence_ttl)
55
+ Retry.with_retries(operation: :record_heartbeat, transient: self.class.transient_errors) do
56
+ iso = Time.now.utc.iso8601
57
+ if ttl
58
+ client.set(namespaced_heartbeat(key), iso, ex: ttl)
59
+ else
60
+ client.set(namespaced_heartbeat(key), iso)
61
+ end
62
+ end
63
+ end
64
+
65
+ def last_heartbeat(key)
66
+ Retry.with_retries(operation: :last_heartbeat, transient: self.class.transient_errors) do
67
+ raw = client.get(namespaced_heartbeat(key))
68
+ next nil if raw.nil?
69
+
70
+ Time.parse(raw).utc
71
+ rescue ArgumentError
72
+ nil
73
+ end
74
+ end
75
+
76
+ # Optimistic locking via Redis WATCH/MULTI/EXEC. Raises
77
+ # Smith::PersistenceVersionConflict on a stale expected_version
78
+ # OR on EXEC failure (WATCH detected concurrent write).
79
+ def store_versioned(key, payload, expected_version:, ttl: Smith.config.persistence_ttl)
80
+ Retry.with_retries(operation: :store_versioned, transient: self.class.transient_errors) do
81
+ namespaced_key = namespaced(key)
82
+ result = client.watch(namespaced_key) do
83
+ current = client.get(namespaced_key)
84
+ if current && (current_version = parse_version(current)) != expected_version
85
+ client.unwatch
86
+ raise Smith::PersistenceVersionConflict.new(
87
+ key: key, expected: expected_version, actual: current_version
88
+ )
89
+ end
90
+
91
+ client.multi do |tx|
92
+ if ttl
93
+ tx.set(namespaced_key, payload, ex: ttl)
94
+ else
95
+ tx.set(namespaced_key, payload)
96
+ end
97
+ end
98
+ end
99
+
100
+ if result.nil?
101
+ raise Smith::PersistenceVersionConflict.new(
102
+ key: key, expected: expected_version, actual: :concurrent
103
+ )
104
+ end
105
+
106
+ result
107
+ end
108
+ end
109
+
110
+ private
111
+
112
+ def client
113
+ @client ||= begin
114
+ resolved = @redis_source.respond_to?(:call) ? @redis_source.call : @redis_source
115
+ raise ArgumentError, "Redis client is required" unless resolved
116
+
117
+ resolved
118
+ end
119
+ end
120
+
121
+ def namespaced(key)
122
+ [@namespace, key].compact.join(":")
123
+ end
124
+
125
+ def namespaced_heartbeat(key)
126
+ [@namespace, "heartbeat", key].compact.join(":")
127
+ end
128
+
129
+ def parse_version(payload)
130
+ JSON.parse(payload).fetch("persistence_version", 0)
131
+ rescue JSON::ParserError
132
+ 0
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Smith
4
+ module PersistenceAdapters
5
+ # Generic retry-with-exponential-backoff wrapper used by adapter
6
+ # store/fetch/delete/store_versioned operations to survive transient
7
+ # I/O errors. After attempts exhausted, raises Smith::PersistenceIOError
8
+ # wrapping the underlying cause.
9
+ #
10
+ # Adapter-agnostic: each adapter passes its own `transient:` error
11
+ # class list because Redis transient errors differ from AR transient
12
+ # errors differ from cache-backend transient errors. The Memory
13
+ # adapter passes an empty list (it never raises transient errors).
14
+ module Retry
15
+ module_function
16
+
17
+ def with_retries(operation:, transient:, policy: Smith.config.persistence_retry_policy,
18
+ logger: Smith.config.logger)
19
+ attempts = policy.fetch(:attempts, 3)
20
+ base = policy.fetch(:base_delay, 0.1)
21
+ max_delay = policy.fetch(:max_delay, 1.0)
22
+ last_error = nil
23
+
24
+ attempts.times do |i|
25
+ return yield
26
+ rescue *transient => e
27
+ last_error = e
28
+ break if i == attempts - 1
29
+
30
+ delay = [base * (2**i), max_delay].min
31
+ logger&.warn(
32
+ "Smith::PersistenceAdapters::Retry #{operation} attempt #{i + 1}/#{attempts} failed: " \
33
+ "#{e.class}: #{e.message}; sleeping #{delay}s"
34
+ )
35
+ sleep(delay)
36
+ end
37
+
38
+ raise Smith::PersistenceIOError.new(operation: operation, cause: last_error)
39
+ end
40
+ end
41
+ end
42
+ end