deja 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ module Deja
6
+ # Holds everything host-specific so the gem itself stays ignorant of your app.
7
+ # You register at least one provider; cache_root has a sensible default, and the
8
+ # judge settings only matter if you use the `meet_requirements` matcher.
9
+ class Configuration
10
+ # Directory display in error messages is computed relative to this.
11
+ attr_reader :project_root, :adapters
12
+
13
+ # Default recorded-cache location, relative to project_root.
14
+ DEFAULT_CACHE_SUBPATH = "spec/support/deja_cache"
15
+
16
+ # Attrs that override the `meet_requirements` judge's defaults. Set
17
+ # provider-specific args here (model, temperature, …) without Deja having to
18
+ # name each one — different judge LLMs expose different args. The defaults
19
+ # themselves live with the judge code, not here, since they're specific to
20
+ # whatever LLM the judge speaks. `messages` and `output_config` are reserved
21
+ # by the matcher and can't be overridden.
22
+ attr_writer :judge_attrs
23
+
24
+ def initialize
25
+ @cache_root = nil
26
+ @project_root = Pathname.new(Dir.pwd)
27
+ @judge_attrs = {}
28
+ @judge_client = nil
29
+ @adapters = {}
30
+ end
31
+
32
+ def judge_attrs
33
+ @judge_attrs || {}
34
+ end
35
+
36
+ # Where recorded cache files live. Defaults to project_root/spec/support/deja_cache.
37
+ def cache_root
38
+ @cache_root || project_root.join(DEFAULT_CACHE_SUBPATH)
39
+ end
40
+
41
+ # Accepts a String or Pathname (e.g. Rails.root.join(...)).
42
+ def cache_root=(value)
43
+ @cache_root = value && Pathname.new(value.to_s)
44
+ end
45
+
46
+ def project_root=(value)
47
+ @project_root = Pathname.new(value.to_s)
48
+ end
49
+
50
+ # Register a provider adapter. `provider` is a built-in adapter name (today:
51
+ # `:anthropic`). `install` swaps your app's client for Deja's stub and runs in
52
+ # the example's context (RSpec's `allow` is available). `real_client` is an
53
+ # optional block building a live client; it defaults per provider. `as` names
54
+ # the registration when you want two of the same provider.
55
+ #
56
+ # c.register :anthropic,
57
+ # install: ->(client) { allow(AnthropicClient).to receive(:client).and_return(client) },
58
+ # real_client: -> { Anthropic::Client.new(api_key: my_key) }
59
+ def register(provider, install:, real_client: nil, as: provider)
60
+ @adapters[as] = Deja::Adapters.build(provider, key: as, install:, real_client:)
61
+ end
62
+
63
+ # How to build the client used by the `meet_requirements` judge. Required if
64
+ # you use that matcher — there is no default, so the judge's auth/model is an
65
+ # explicit choice. The block returns a client.
66
+ #
67
+ # c.judge_client { Anthropic::Client.new }
68
+ #
69
+ # Called with no block, returns the configured proc (raises if unset).
70
+ def judge_client(&block)
71
+ if block
72
+ @judge_client = block
73
+ else
74
+ @judge_client || raise(Deja::Error, <<~MSG)
75
+ Deja.configuration.judge_client is not set. The `meet_requirements`
76
+ matcher needs a client to judge values against requirements. Set one in
77
+ your Deja.configure block:
78
+
79
+ Deja.configure do |c|
80
+ c.judge_client { Anthropic::Client.new }
81
+ end
82
+ MSG
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "deja/judges/base"
4
+
5
+ module Deja
6
+ module Judges
7
+ # Judge backed by the Anthropic Ruby SDK. Use `::Anthropic` for the SDK
8
+ # constant — bare `Anthropic` would resolve to this class.
9
+ class Anthropic < Base
10
+ DEFAULTS = {
11
+ model: "claude-sonnet-4-5",
12
+ max_tokens: 512,
13
+ system: "You evaluate whether a candidate value meets a set of requirements. " \
14
+ "Use the structured output schema to return your verdict.",
15
+ }.freeze
16
+
17
+ def self.handles?(client)
18
+ defined?(::Anthropic::Client) && client.is_a?(::Anthropic::Client)
19
+ end
20
+
21
+ def self.client_description
22
+ "Anthropic::Client"
23
+ end
24
+
25
+ def defaults
26
+ DEFAULTS
27
+ end
28
+ end
29
+
30
+ register(Anthropic)
31
+ end
32
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deja
4
+ # Judge adapters teach the `meet_requirements` matcher how to judge with a given
5
+ # LLM client. An adapter is selected by the *type* of the object your
6
+ # `judge_client` returns, so the right defaults follow from the provider you
7
+ # chose rather than being assumed globally.
8
+ #
9
+ # Today an adapter supplies the default request attrs (model, etc.). The matcher
10
+ # still builds the request and parses the response (both Anthropic-shaped); as
11
+ # more judge providers are added, that construction/parsing is meant to move
12
+ # onto the adapter too — which is why dispatch already happens here.
13
+ module Judges
14
+ @registered = []
15
+
16
+ class << self
17
+ # Built-in judge adapters register themselves. Newest-first, so a more
18
+ # specific adapter registered later can shadow a more general one.
19
+ def register(klass)
20
+ @registered.unshift(klass)
21
+ end
22
+
23
+ def registered
24
+ @registered
25
+ end
26
+
27
+ # The adapter for the client your `judge_client` returned. Raises a helpful
28
+ # error when no registered adapter handles it.
29
+ def for_client(client)
30
+ klass = @registered.find {|k| k.handles?(client) }
31
+ klass or raise Deja::Error, <<~MSG
32
+ No Deja judge adapter handles #{client.class} (the object your
33
+ judge_client returned). Deja can judge with: #{descriptions}.
34
+ Point judge_client at one of those, or add a Deja::Judges::Base
35
+ subclass that handles your client.
36
+ MSG
37
+ klass.new(client)
38
+ end
39
+
40
+ def descriptions
41
+ @registered.map(&:client_description).join(", ")
42
+ end
43
+ end
44
+
45
+ class Base
46
+ attr_reader :client
47
+
48
+ def initialize(client)
49
+ @client = client
50
+ end
51
+
52
+ # Does this adapter handle the given judge-client instance?
53
+ def self.handles?(_client)
54
+ raise NotImplementedError, "#{name} must implement .handles?"
55
+ end
56
+
57
+ # Human-readable client name, used in error messages.
58
+ def self.client_description
59
+ name
60
+ end
61
+
62
+ # Default request attrs for this judge (model, etc.). The matcher merges the
63
+ # user's judge_attrs over these, then its own reserved keys over both.
64
+ def defaults
65
+ raise NotImplementedError, "#{self.class} must implement #defaults"
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "deja/cache"
4
+
5
+ module Deja
6
+ # Cache layer behind the `meet_requirements` matcher (defined in deja/rspec.rb).
7
+ # Stores confirmed requirement/value pairs keyed by a hash of the requirements
8
+ # text. One file per test: `<cache_root>/meets_requirements/<suite>/<id>.yaml`.
9
+ #
10
+ # test_suite: <derived from spec file path>
11
+ # test_name: <full RSpec description>
12
+ # summary: <human-readable counts: assertions / total confirmed values>
13
+ # assertions:
14
+ # - hash: <12-char fingerprint of the requirements text — used for lookup>
15
+ # requirements: <the requirements text — auditable from the file alone>
16
+ # confirmed_values:
17
+ # - <values previously approved by the LLM judge>
18
+ #
19
+ # Pruning mirrors Deja::Cache: at the end of a passing example (when
20
+ # ALLOW_LLM_CALL=1), assertions whose hash wasn't touched are dropped — so
21
+ # changing the requirements text blows away the now-stale confirmed values.
22
+ module RequirementsCache
23
+ module_function
24
+
25
+ def cache_dir
26
+ Deja.configuration.cache_root.join("meets_requirements")
27
+ end
28
+
29
+ def values_for(requirements)
30
+ record_touched(requirements)
31
+ assertion = load_assertion(requirements)
32
+ assertion ? assertion.fetch("confirmed_values") : []
33
+ end
34
+
35
+ def append!(requirements, value)
36
+ record_touched(requirements)
37
+ data = load_or_init
38
+ upsert_assertion(data, requirements, value)
39
+ data["summary"] = build_summary(data["assertions"])
40
+ cache_file.write(YAML.dump(Deja::Cache.stringify(data)))
41
+ end
42
+
43
+ def prune_untouched_in_current_example!
44
+ return unless cache_file.exist?
45
+
46
+ data = YAML.safe_load(cache_file.read)
47
+ touched = touched_hashes
48
+ fresh_assertions = data["assertions"].select {|a| touched.include?(a["hash"]) }
49
+ return if fresh_assertions.size == data["assertions"].size
50
+
51
+ if fresh_assertions.empty?
52
+ cache_file.delete
53
+ else
54
+ data["assertions"] = fresh_assertions
55
+ data["summary"] = build_summary(fresh_assertions)
56
+ cache_file.write(YAML.dump(Deja::Cache.stringify(data)))
57
+ end
58
+ end
59
+
60
+ def cache_file
61
+ cache_dir.join(Deja::Cache.test_suite, "#{Deja::Cache.current_id!}.yaml")
62
+ end
63
+
64
+ def requirements_hash(requirements)
65
+ Digest::SHA256.hexdigest(requirements.strip)[0, 12]
66
+ end
67
+
68
+ def load_assertion(requirements)
69
+ return nil unless cache_file.exist?
70
+
71
+ hash = requirements_hash(requirements)
72
+ YAML.safe_load(cache_file.read).fetch("assertions").find {|a| a["hash"] == hash }
73
+ end
74
+
75
+ def load_or_init
76
+ if cache_file.exist?
77
+ YAML.safe_load(cache_file.read)
78
+ else
79
+ FileUtils.mkdir_p(cache_file.dirname)
80
+ {
81
+ "test_suite" => Deja::Cache.test_suite,
82
+ "test_name" => Deja::Cache.current_test_name,
83
+ "summary" => "",
84
+ "assertions" => [],
85
+ }
86
+ end
87
+ end
88
+
89
+ def upsert_assertion(data, requirements, value)
90
+ hash = requirements_hash(requirements)
91
+ existing = data["assertions"].find {|a| a["hash"] == hash }
92
+ if existing
93
+ existing["confirmed_values"] = existing.fetch("confirmed_values") + [ value ]
94
+ else
95
+ data["assertions"] << {
96
+ "hash" => hash,
97
+ "requirements" => requirements.strip,
98
+ "confirmed_values" => [ value ],
99
+ }
100
+ end
101
+ end
102
+
103
+ def build_summary(assertions)
104
+ total_values = assertions.sum {|a| a["confirmed_values"].size }
105
+ "#{assertions.size} #{assertions.size == 1 ? 'assertion' : 'assertions'}, " \
106
+ "#{total_values} confirmed #{total_values == 1 ? 'value' : 'values'} total."
107
+ end
108
+
109
+ def record_touched(requirements)
110
+ touched_hashes << requirements_hash(requirements)
111
+ end
112
+
113
+ def touched_hashes
114
+ Deja::Cache.current_example!.metadata[:touched_meet_requirements_hashes] ||= Set.new
115
+ end
116
+ end
117
+ end
data/lib/deja/rspec.rb ADDED
@@ -0,0 +1,162 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "deja"
4
+ require "json"
5
+
6
+ # Require the RSpec libraries we actually use, not the "rspec" meta-gem — hosts
7
+ # on rspec-rails have rspec-core/expectations/mocks but not the meta-gem.
8
+ require "rspec/core"
9
+ require "rspec/expectations"
10
+ require "rspec/mocks"
11
+
12
+ module Deja
13
+ # The test-facing DSL, mixed into every example by the RSpec.configure block
14
+ # below. Require "deja/rspec" from your spec setup to install it.
15
+ module Helpers
16
+ # Call from the top of an `it` block (per-test — the id should be distinct
17
+ # for each test) to install the caching client and set the cache id used for
18
+ # this example.
19
+ def use_llm_cache(id)
20
+ RSpec.current_example.metadata[:llm_cache_id] = id
21
+ Deja::Session.enable
22
+ end
23
+
24
+ # Assert the code path under test never reaches the LLM. Call from a `before`
25
+ # block or the top of an example.
26
+ def forbid_llm_calls
27
+ Deja::Session.forbid
28
+ end
29
+
30
+ # Assert exactly one LLM call happened (across all providers) and return its
31
+ # kwargs.
32
+ def expect_llm_called
33
+ Deja::Session.expect_called
34
+ end
35
+
36
+ # Read a value from a recorded cache YAML file by walking `path`. Each segment
37
+ # is a string key (for hashes) or an integer index (for arrays). Raises with
38
+ # the path traversed so far if any segment is missing — so a renamed key or
39
+ # shifted index fails loud rather than returning nil.
40
+ #
41
+ # cached_llm_value("2026-04-30_17-03",
42
+ # "calls", 0, "response", "tool_uses", 0, "input", "session_instructions")
43
+ def cached_llm_value(id, *path)
44
+ file = Deja::Cache.cache_dir.join(Deja::Cache.test_suite, "#{id}.yaml")
45
+ rel = Deja::Cache.display_path(file)
46
+ raise "No cached LLM file at #{rel}" unless file.exist?
47
+
48
+ current = YAML.safe_load(file.read)
49
+ path.each_with_index do |segment, i|
50
+ crumb = i.zero? ? "<root>" : path[0...i].map(&:inspect).join("/")
51
+ current = case current
52
+ when Hash
53
+ unless current.key?(segment)
54
+ raise "No key #{segment.inspect} at #{crumb} in #{rel}; available: #{current.keys.inspect}"
55
+ end
56
+ current[segment]
57
+ when Array
58
+ unless segment.is_a?(Integer)
59
+ raise "Expected integer index at #{crumb} in #{rel}, got #{segment.inspect}"
60
+ end
61
+ unless segment < current.size
62
+ raise "Index #{segment} out of range at #{crumb} (size #{current.size}) in #{rel}"
63
+ end
64
+ current[segment]
65
+ else
66
+ raise "Cannot traverse into #{current.class} at #{crumb} in #{rel}"
67
+ end
68
+ end
69
+ current
70
+ end
71
+ end
72
+ end
73
+
74
+ # `meet_requirements(requirements_text)` asserts that an LLM-generated value
75
+ # satisfies a free-text description without pinning to a specific stringification.
76
+ #
77
+ # 1. Looks for the requirements_hash in the cache. If `actual` is already a
78
+ # confirmed value, passes — no LLM call.
79
+ # 2. Otherwise, with ALLOW_LLM_CALL=1, asks the judge model whether `actual`
80
+ # meets the requirements (structured output). On "yes", caches and passes.
81
+ # 3. Otherwise, fails telling you to re-record under ALLOW_LLM_CALL=1.
82
+ RSpec::Matchers.define :meet_requirements do |requirements|
83
+ match do |actual|
84
+ @requirements = requirements
85
+
86
+ cached = Deja::RequirementsCache.values_for(requirements)
87
+ next true if cached.include?(actual)
88
+
89
+ unless ENV["ALLOW_LLM_CALL"]
90
+ file = Deja::Cache.display_path(Deja::RequirementsCache.cache_file)
91
+ @reason = "value is not in #{file} for the current requirements. " \
92
+ "Set ALLOW_LLM_CALL=1 to verify it against the requirements via LLM and add it to the cache."
93
+ next false
94
+ end
95
+
96
+ # Use the dedicated judge client — independent of whatever provider the spec
97
+ # is recording, and outside the Deja::Cache layer. The meet_requirements cache
98
+ # is the only cache that should track these calls.
99
+ config = Deja.configuration
100
+ judge_client = config.judge_client.call
101
+ # The judge adapter is chosen by the client's type, and supplies
102
+ # provider-appropriate defaults; judge_attrs override them. messages and
103
+ # output_config are reserved — they carry the requirements and the
104
+ # structured-output contract this matcher parses, so they're merged last and
105
+ # win over both.
106
+ judge = Deja::Judges.for_client(judge_client)
107
+ judge_args = judge.defaults.merge(config.judge_attrs).merge(
108
+ messages: [
109
+ {
110
+ role: "user",
111
+ content: "Requirements:\n#{requirements}\n\nCandidate value:\n#{actual}\n\n" \
112
+ "Does the candidate value meet the requirements?",
113
+ },
114
+ ],
115
+ output_config: {
116
+ format: {
117
+ type: :json_schema,
118
+ schema: {
119
+ "type" => "object",
120
+ "properties" => {
121
+ "meets_requirements" => {"type" => "boolean"},
122
+ "reason" => {"type" => "string"},
123
+ },
124
+ "required" => [ "meets_requirements", "reason" ],
125
+ "additionalProperties" => false,
126
+ },
127
+ },
128
+ },
129
+ )
130
+ response = judge_client.messages.create(**judge_args)
131
+
132
+ parsed = JSON.parse(response.content.first.text)
133
+ if parsed["meets_requirements"]
134
+ Deja::RequirementsCache.append!(requirements, actual)
135
+ true
136
+ else
137
+ @reason = "LLM judge rejected the value: #{parsed['reason']}"
138
+ false
139
+ end
140
+ end
141
+
142
+ failure_message do |actual|
143
+ "expected value to meet requirements\n#{@reason}\nGot: #{actual.inspect}"
144
+ end
145
+ end
146
+
147
+ RSpec.configure do |config|
148
+ config.include Deja::Helpers
149
+
150
+ # Prune stale entries (calls/assertions whose hash wasn't looked up this
151
+ # example) only when ALLOW_LLM_CALL=1 — the re-record path. Cache-only runs
152
+ # leave both files alone so a temporarily-disabled call/assertion doesn't lose
153
+ # its cached entry.
154
+ config.after(:each) do |example|
155
+ next if example.exception
156
+ next unless example.metadata[:llm_cache_id]
157
+ next unless ENV["ALLOW_LLM_CALL"]
158
+
159
+ Deja::Cache.prune_untouched_in_current_example!
160
+ Deja::RequirementsCache.prune_untouched_in_current_example!
161
+ end
162
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deja
4
+ # The per-example runtime. Installs every registered adapter's caching stub (so
5
+ # a suite can mix providers — each test exercises whichever it actually calls),
6
+ # and aggregates the captured calls across adapters.
7
+ module Session
8
+ module_function
9
+
10
+ # Install all registered adapters' stubs and reset the captured call log.
11
+ def enable
12
+ Deja.reset_calls!
13
+ adapters = Deja.adapters
14
+ if adapters.empty?
15
+ raise Deja::Error, "No providers registered. Call `c.register :anthropic, ...` inside Deja.configure."
16
+ end
17
+
18
+ adapters.each {|adapter| install(adapter, adapter.build_mock_client) }
19
+ end
20
+
21
+ # Install a poison client for every adapter so any LLM access raises.
22
+ def forbid
23
+ Deja.adapters.each {|adapter| install(adapter, poison_client) }
24
+ end
25
+
26
+ # Runs an adapter's install block in the current example's context (so RSpec's
27
+ # `allow` is available), handing it the client to return.
28
+ def install(adapter, client)
29
+ example_instance!.instance_exec(client, &adapter.install_block)
30
+ end
31
+
32
+ # Assert exactly one call was captured across all adapters; return its kwargs.
33
+ def expect_called
34
+ instance = example_instance!
35
+ instance.instance_exec do
36
+ expect(Deja.calls.size).to eq(1)
37
+ end
38
+ Deja.calls.first[:kwargs]
39
+ end
40
+
41
+ def example_instance!
42
+ RSpec.current_example&.example_group_instance or
43
+ raise Deja::Error, "Deja must be used inside an RSpec example"
44
+ end
45
+
46
+ def poison_client
47
+ poison = Object.new
48
+ def poison.method_missing(*) = raise("LLM should not be called (deja forbid_llm_calls)")
49
+ def poison.respond_to_missing?(*) = true
50
+ poison
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deja
4
+ VERSION = "0.1.0"
5
+ end
data/lib/deja.rb ADDED
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "deja/version"
4
+
5
+ # Deja records a non-deterministic call (today: an Anthropic LLM call) the first
6
+ # time it happens and replays the recorded response on every run after that, so
7
+ # tests that exercise real model behavior stay fast, offline, and deterministic.
8
+ #
9
+ # Providers are pluggable via adapters (see Deja::Adapters) — a suite can mix
10
+ # them, and each test exercises whichever it actually calls.
11
+ #
12
+ # It also ships `meet_requirements`, an RSpec matcher that asserts an LLM-produced
13
+ # value satisfies a free-text description (judged once, then cached).
14
+ #
15
+ # See README.md for the full record/replay workflow and configuration.
16
+ module Deja
17
+ class Error < StandardError; end
18
+
19
+ # Raised on a cache miss when ALLOW_LLM_CALL is not set — i.e. replay mode hit
20
+ # a request it has never recorded.
21
+ class MissingCacheError < Error; end
22
+
23
+ # Raised when an LLM call is made before `use_llm_cache(id)` set a cache id.
24
+ class MissingIdError < Error; end
25
+
26
+ class << self
27
+ # Configure the gem. Yields the Configuration; returns it.
28
+ #
29
+ # Deja.configure do |c|
30
+ # c.cache_root = Rails.root.join("spec/support/cache")
31
+ # c.register :anthropic,
32
+ # install: ->(client) { allow(AnthropicClient).to receive(:client).and_return(client) }
33
+ # end
34
+ def configure
35
+ yield(configuration)
36
+ configuration
37
+ end
38
+
39
+ def configuration
40
+ @configuration ||= Configuration.new
41
+ end
42
+
43
+ # Drops configuration and the captured call log — used between examples and by
44
+ # the gem's own suite.
45
+ def reset_configuration!
46
+ @configuration = Configuration.new
47
+ reset_calls!
48
+ end
49
+
50
+ # Register a provider adapter (delegates to the configuration). See
51
+ # Configuration#register.
52
+ def register(provider, **opts)
53
+ configuration.register(provider, **opts)
54
+ end
55
+
56
+ # The registered adapters, in registration order.
57
+ def adapters
58
+ configuration.adapters.values
59
+ end
60
+
61
+ # --- captured calls (reset per example by Session.enable) ---
62
+
63
+ def calls
64
+ @calls ||= []
65
+ end
66
+
67
+ def record_call(provider, method, kwargs)
68
+ calls << {provider:, method:, kwargs:}
69
+ end
70
+
71
+ def reset_calls!
72
+ @calls = []
73
+ end
74
+ end
75
+ end
76
+
77
+ require "deja/configuration"
78
+ require "deja/cache"
79
+ require "deja/requirements_cache"
80
+ require "deja/adapters/base"
81
+ require "deja/adapters/anthropic"
82
+ require "deja/judges/base"
83
+ require "deja/judges/anthropic"
84
+ require "deja/session"