rspec-llm 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ba3f15006a8aa1820dd8741a4788ad7cfe94f4b213bcabb1cc1ee75458a1b8d0
4
- data.tar.gz: b27e3aeefc80e89d3e8209672a66ae8e021d95af73867a4260936af916153459
3
+ metadata.gz: 91156e04ac3b433d59baecabcc24e857e6ede80327e9d08f453a34f0c9d511bd
4
+ data.tar.gz: c0fdf5f3a4e7af09e69062a9989dac19ef1e34db5e382fa2420cf51945c852d9
5
5
  SHA512:
6
- metadata.gz: 2ae5b22b4148e8b04c4607d49d0b3a088b77405f8cac92a6db1b223ecc8aefd3b84471e5d8d42db86a1527f06f8d9330f4a3ec29f8abe29732d3e2a2c60c95a5
7
- data.tar.gz: 352bc6061a08c5e8561286782b51c4690d79ef9939ed93498e657c37aa78623c4c35a927ad6a7abdacef82e3b6b925bd81881b4c30971b700d0ea3872791602d
6
+ metadata.gz: e1ca1d12ed29de3c2560c82675a953fa169958f96fa9fe5b7e215017c4d9607ce4be370b4898d232e24118350b483543bb4b7950fd48aebe1c825d8f5229a3f3
7
+ data.tar.gz: 80858d6da5a91633e237552510e6d804a2e15cf4f8693f1d567afacd52335660e23aeb60ac53092b8979b6199fa690a9be105a47e63079896438903ebe9c1509
data/.idea/rspec-llm.iml CHANGED
@@ -20,6 +20,7 @@
20
20
  <orderEntry type="library" scope="PROVIDED" name="crack (v1.0.1, rbenv: 3.4.9) [gem]" level="application" />
21
21
  <orderEntry type="library" scope="PROVIDED" name="csv (v3.3.5, rbenv: 3.4.9) [gem]" level="application" />
22
22
  <orderEntry type="library" scope="PROVIDED" name="diff-lcs (v1.6.2, rbenv: 3.4.9) [gem]" level="application" />
23
+ <orderEntry type="library" scope="PROVIDED" name="docile (v1.4.1, rbenv: 3.4.9) [gem]" level="application" />
23
24
  <orderEntry type="library" scope="PROVIDED" name="event_stream_parser (v1.0.0, rbenv: 3.4.9) [gem]" level="application" />
24
25
  <orderEntry type="library" scope="PROVIDED" name="faraday (v2.14.2, rbenv: 3.4.9) [gem]" level="application" />
25
26
  <orderEntry type="library" scope="PROVIDED" name="faraday-multipart (v1.2.0, rbenv: 3.4.9) [gem]" level="application" />
@@ -56,6 +57,9 @@
56
57
  <orderEntry type="library" scope="PROVIDED" name="ruby-progressbar (v1.13.0, rbenv: 3.4.9) [gem]" level="application" />
57
58
  <orderEntry type="library" scope="TEST" name="ruby_llm (v1.15.0, rbenv: 3.4.9) [gem]" level="application" />
58
59
  <orderEntry type="library" scope="PROVIDED" name="ruby_llm-schema (v0.4.0, rbenv: 3.4.9) [gem]" level="application" />
60
+ <orderEntry type="library" scope="TEST" name="simplecov (v0.22.0, rbenv: 3.4.9) [gem]" level="application" />
61
+ <orderEntry type="library" scope="PROVIDED" name="simplecov-html (v0.13.2, rbenv: 3.4.9) [gem]" level="application" />
62
+ <orderEntry type="library" scope="PROVIDED" name="simplecov_json_formatter (v0.1.4, rbenv: 3.4.9) [gem]" level="application" />
59
63
  <orderEntry type="library" scope="PROVIDED" name="unicode-display_width (v3.2.0, rbenv: 3.4.9) [gem]" level="application" />
60
64
  <orderEntry type="library" scope="PROVIDED" name="unicode-emoji (v4.2.0, rbenv: 3.4.9) [gem]" level="application" />
61
65
  <orderEntry type="library" scope="PROVIDED" name="uri (v1.1.1, rbenv: 3.4.9) [gem]" level="application" />
data/.rubocop.yml CHANGED
@@ -1,5 +1,5 @@
1
1
  AllCops:
2
- TargetRubyVersion: 3.0
2
+ TargetRubyVersion: 3.3
3
3
  NewCops: disable
4
4
  SuggestExtensions: false
5
5
 
data/CHANGELOG.md CHANGED
@@ -1,5 +1,35 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.2.0] - 2026-06-05
4
+
5
+ ### Added
6
+
7
+ - **`pass_llm_judge` — structured JSON evaluation** (#1).
8
+ When the `ruby_llm` gem is loaded, the judge prompt is now sent with a
9
+ `RubyLLM::Schema` contract that constrains the model to return a
10
+ machine-readable `{ passed: boolean, reason: string }` payload. This
11
+ eliminates brittle first-token YES/NO parsing and surfaces a full explanation
12
+ in every failure message. Adapters or configurations that do not support
13
+ structured output fall back automatically to the original text-parsing
14
+ strategy, so no existing code or specs need to change.
15
+ - **`match_json_schema` — class-based schema introspection** (#1).
16
+ The matcher now accepts any Ruby class as its argument in addition to raw
17
+ JSON Schema hashes:
18
+ - `Data.define` / `Struct` → attributes are read via `.members`.
19
+ - PORO / ActiveModel → attributes are discovered from public writer methods
20
+ (`#name=`).
21
+
22
+ The derived schema marks every attribute as `required` (typed `string`).
23
+ Raw Hash schemas continue to work without any changes.
24
+ - `Adapters::Base#chat_structured(messages, schema: nil)` — new adapter
25
+ surface for structured-output requests. The default implementation falls
26
+ back to `#chat`, preserving full backward compatibility for custom adapters.
27
+ - `Adapters::RubyLLM#chat_structured` — calls `client.with_schema(schema)`
28
+ before `client.ask` so the underlying model returns a parsed Hash.
29
+ - `Adapters::Fake#chat_structured` — automatically JSON-parses stub strings
30
+ into Hashes, making it trivial to write hermetic structured-output tests
31
+ without needing the `ruby_llm-schema` gem.
32
+
3
33
  ## [0.1.0] - 2026-06-04
4
34
 
5
35
  ### Added
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
1
  # rspec-llm
2
2
 
3
- [![CI](https://github.com/salscotto/rspec-llm/actions/workflows/ci.yml/badge.svg)](https://github.com/salscotto/rspec-llm/actions/workflows/ci.yml)
3
+ [![CI](https://github.com/washu/rspec-llm/actions/workflows/ci.yml/badge.svg)](https://github.com/washu/rspec-llm/actions/workflows/ci.yml)
4
+ [![Coverage](https://codecov.io/gh/washu/rspec-llm/branch/main/graph/badge.svg)](https://codecov.io/gh/washu/rspec-llm)
5
+ [![Gem Version](https://img.shields.io/gem/v/rspec-llm.svg)](https://rubygems.org/gems/rspec-llm)
6
+ [![Gem Downloads](https://img.shields.io/gem/dt/rspec-llm.svg)](https://rubygems.org/gems/rspec-llm)
4
7
 
5
8
  RSpec matchers, helpers, and a thin DSL for testing LLM-backed code in Ruby.
6
9
 
@@ -74,7 +77,7 @@ end
74
77
 
75
78
  ### `pass_llm_judge`
76
79
 
77
- Sends the response and a criterion to the configured judge model and parses a YES/NO verdict from the first token of the reply. The judge's reasoning is surfaced in the failure message.
80
+ Sends the response and a criterion to the configured judge model. When the `ruby_llm` gem is loaded, the judge uses a `RubyLLM::Schema`-backed structured contract that forces the model to return `{ passed: boolean, reason: string }` — eliminating brittle first-token YES/NO parsing and always surfacing a rich explanation in the failure message. For all other adapters the matcher falls back to parsing the first YES/NO token transparently.
78
81
 
79
82
  ```ruby
80
83
  expect(reply).to pass_llm_judge("is polite and apologetic")
@@ -92,7 +95,9 @@ expect(reply).to match_llm_intent("a refund confirmation for order #12345")
92
95
 
93
96
  ### `match_json_schema`
94
97
 
95
- Parses the actual value as JSON (or accepts a Hash/Array directly) and validates against the given JSON Schema via the `json-schema` gem.
98
+ Parses the actual value as JSON (or accepts a Hash/Array directly) and validates against a schema via the `json-schema` gem. The schema argument can be:
99
+
100
+ **A raw JSON Schema hash** (original behaviour — fully backward-compatible):
96
101
 
97
102
  ```ruby
98
103
  schema = {
@@ -103,6 +108,24 @@ schema = {
103
108
  expect(response).to match_json_schema(schema)
104
109
  ```
105
110
 
111
+ **A Ruby class** — `Data.define`, `Struct`, or any PORO with `attr_accessor`. The matcher introspects the class and derives the required fields automatically:
112
+
113
+ ```ruby
114
+ # Data.define (Ruby >= 3.2)
115
+ UserProfile = Data.define(:full_name, :verified_email)
116
+ expect(response).to match_json_schema(UserProfile)
117
+
118
+ # Struct
119
+ Point = Struct.new(:x, :y)
120
+ expect(response).to match_json_schema(Point)
121
+
122
+ # PORO
123
+ class OrderSummary
124
+ attr_accessor :order_id, :total, :status
125
+ end
126
+ expect(response).to match_json_schema(OrderSummary)
127
+ ```
128
+
106
129
  ### `be_semantically_similar_to`
107
130
 
108
131
  Embeds both sides via the configured `embedder`, computes cosine similarity, and compares to the threshold. Override the threshold per-matcher with `.within(0.9)`.
@@ -129,9 +152,15 @@ RSpec.describe "Greeter" do
129
152
  end
130
153
  ```
131
154
 
132
- Use `stub_llm_judge` to stub the judge model separately — handy when testing your own code that wraps `pass_llm_judge`:
155
+ Use `stub_llm_judge` to stub the judge model separately — handy when testing your own code that wraps `pass_llm_judge`. Stub with a JSON string to exercise the structured-output path, or with a `YES`/`NO` string to exercise the text-parsing fallback:
133
156
 
134
157
  ```ruby
158
+ # Structured output (recommended — matches real ruby_llm behaviour)
159
+ stub_llm_judge do |fake|
160
+ fake.default('{"passed":true,"reason":"Looks good to me."}')
161
+ end
162
+
163
+ # Legacy text format (still works for backward compatibility)
135
164
  stub_llm_judge do |fake|
136
165
  fake.default("YES\nLooks good to me.")
137
166
  end
@@ -187,7 +216,7 @@ bin/console # interactive prompt
187
216
 
188
217
  ## Contributing
189
218
 
190
- Bug reports and pull requests welcome on GitHub at https://github.com/salscotto/rspec-llm.
219
+ Bug reports and pull requests welcome on GitHub at https://github.com/washu/rspec-llm.
191
220
 
192
221
  ## License
193
222
 
@@ -40,6 +40,15 @@ module RSpec
40
40
  raise NotImplementedError
41
41
  end
42
42
 
43
+ # Send a chat with an optional structured output schema. When a
44
+ # RubyLLM::Schema class is provided via +schema:+, adapters that support
45
+ # structured output will return a Hash instead of a String. Adapters that
46
+ # do not support structured output fall back to plain #chat and return a
47
+ # String; callers are responsible for parsing that fallback.
48
+ def chat_structured(messages, schema: nil) # rubocop:disable Lint/UnusedMethodArgument
49
+ chat(messages)
50
+ end
51
+
43
52
  # Embed text. Returns Array<Float>. Optional — adapters may raise
44
53
  # NotImplementedError if the underlying client doesn't support it.
45
54
  def embed(_text)
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "json"
4
+
3
5
  module RSpec
4
6
  module LLM
5
7
  module Adapters
@@ -67,6 +69,26 @@ module RSpec
67
69
  response.is_a?(Proc) ? response.call(prompt) : response
68
70
  end
69
71
 
72
+ # Extends #chat with structured-output support for tests. When +schema+
73
+ # is provided the raw stub value is JSON-parsed; if it produces a Hash it
74
+ # is returned directly so callers receive the same shape that a real
75
+ # structured adapter would deliver. Stubs that are already a Hash are
76
+ # returned unchanged. Non-JSON strings fall back to the plain string so
77
+ # existing YES/NO-style tests continue to work.
78
+ #
79
+ # When no schema is given the adapter still attempts JSON parsing — this
80
+ # lets tests stub structured JSON responses without needing the
81
+ # RubyLLM::Schema gem loaded.
82
+ def chat_structured(messages, schema: nil) # rubocop:disable Lint/UnusedMethodArgument
83
+ response = chat(messages)
84
+ return response if response.is_a?(Hash)
85
+
86
+ parsed = JSON.parse(response.to_s, symbolize_names: true)
87
+ parsed.is_a?(Hash) ? parsed : response
88
+ rescue JSON::ParserError
89
+ response
90
+ end
91
+
70
92
  def embed(text)
71
93
  raise NotImplementedError, "configure with #embed_with { |text| vector }" unless @embedder
72
94
 
@@ -20,6 +20,26 @@ module RSpec
20
20
  extract_content(response)
21
21
  end
22
22
 
23
+ # Uses +with_schema+ on the underlying RubyLLM::Chat client when a schema
24
+ # is provided, yielding a Hash response instead of raw text. Falls back to
25
+ # plain #chat when the client doesn't support +with_schema+.
26
+ def chat_structured(messages, schema: nil)
27
+ return chat(messages) unless schema && client.respond_to?(:with_schema)
28
+
29
+ normalized = normalize_messages(messages)
30
+ last = normalized.last
31
+ system_msgs = normalized[0..-2].select { |m| m[:role] == "system" }
32
+ if system_msgs.any? && client.respond_to?(:with_instructions)
33
+ system_msgs.each do |m|
34
+ client.with_instructions(m[:content])
35
+ end
36
+ end
37
+
38
+ client.with_schema(schema)
39
+ response = client.ask(last[:content])
40
+ extract_content(response)
41
+ end
42
+
23
43
  def embed(text)
24
44
  return @embedder.call(text) if @embedder
25
45
 
@@ -7,10 +7,23 @@ module RSpec
7
7
  module LLM
8
8
  module Matchers
9
9
  # Asserts the actual value parses as JSON and conforms to the provided
10
- # JSON Schema (a Hash, JSON string, or schema file path).
10
+ # schema. The schema argument may be:
11
+ #
12
+ # * A Hash — raw JSON Schema (original behaviour, fully backward-compatible).
13
+ # * A Class — any Ruby class whose attributes can be introspected:
14
+ # * +Data.define+ / +Struct+: attributes are read via +.members+.
15
+ # * PORO / ActiveModel: attributes are discovered from public writer
16
+ # methods (+#name=+).
17
+ # In both cases a JSON Schema is derived automatically; every attribute
18
+ # is typed as +string+ and marked +required+.
11
19
  class MatchJsonSchema
12
20
  def initialize(schema)
13
- @schema = schema
21
+ if schema.is_a?(Class)
22
+ @class_name = schema.name || schema.inspect
23
+ @schema = schema_from_class(schema)
24
+ else
25
+ @schema = schema
26
+ end
14
27
  end
15
28
 
16
29
  def matches?(actual)
@@ -23,7 +36,7 @@ module RSpec
23
36
  end
24
37
 
25
38
  def description
26
- "match JSON schema"
39
+ @class_name ? "match JSON schema for #{@class_name}" : "match JSON schema"
27
40
  end
28
41
 
29
42
  def failure_message
@@ -40,6 +53,27 @@ module RSpec
40
53
 
41
54
  private
42
55
 
56
+ # Converts a Ruby class to a JSON Schema hash by duck-typing its shape.
57
+ # Data.define and Struct expose +.members+; POROs and ActiveModel objects
58
+ # expose writer methods whose names end in +=+.
59
+ def schema_from_class(klass)
60
+ members = if klass.respond_to?(:members)
61
+ klass.members.map(&:to_s)
62
+ else
63
+ klass.public_instance_methods(false)
64
+ .map(&:to_s)
65
+ .select { |m| m.end_with?("=") }
66
+ .map { |m| m.chomp("=") }
67
+ .sort
68
+ end
69
+
70
+ {
71
+ "type" => "object",
72
+ "required" => members,
73
+ "properties" => members.each_with_object({}) { |m, h| h[m] = { "type" => "string" } }
74
+ }
75
+ end
76
+
43
77
  def parse(actual)
44
78
  return actual if actual.is_a?(Hash) || actual.is_a?(Array)
45
79
 
@@ -4,8 +4,18 @@ module RSpec
4
4
  module LLM
5
5
  module Matchers
6
6
  # LLM-as-judge matcher. Asks the configured judge model whether the
7
- # actual response satisfies the given criterion. Parses YES/NO from the
8
- # first non-whitespace token of the judge's reply.
7
+ # actual response satisfies the given criterion.
8
+ #
9
+ # When the +ruby_llm+ gem is loaded, the judge prompt is sent with a
10
+ # structured +RubyLLM::Schema+ contract that forces the model to return
11
+ # a machine-readable JSON payload:
12
+ #
13
+ # { passed: true/false, reason: "..." }
14
+ #
15
+ # This eliminates brittle first-token YES/NO parsing and surfaces a rich
16
+ # reason string for failure messages. Adapters (or gem configurations)
17
+ # that do not support structured output fall back automatically to the
18
+ # original YES/NO text-parsing strategy.
9
19
  class PassLlmJudge
10
20
  def initialize(criterion)
11
21
  @criterion = criterion
@@ -20,8 +30,16 @@ module RSpec
20
30
 
21
31
  def matches?(actual)
22
32
  @actual = actual.to_s
23
- @verdict_text = judge_adapter.chat(prompt_for(@actual, @criterion))
24
- @verdict, @reason = parse_verdict(@verdict_text)
33
+ result = judge_adapter.chat_structured(prompt_for(@actual, @criterion), schema: judge_schema)
34
+
35
+ if result.is_a?(Hash)
36
+ @verdict = result[:passed] || result["passed"]
37
+ @reason = (result[:reason] || result["reason"] || "").to_s
38
+ else
39
+ @verdict_text = result.to_s
40
+ @verdict, @reason = parse_verdict(@verdict_text)
41
+ end
42
+
25
43
  @verdict == true
26
44
  end
27
45
 
@@ -41,6 +59,18 @@ module RSpec
41
59
 
42
60
  private
43
61
 
62
+ # Returns a RubyLLM::Schema class that constrains the judge's reply to a
63
+ # structured { passed:, reason: } payload, or +nil+ when the gem is not
64
+ # loaded (triggering the text-parsing fallback path).
65
+ def judge_schema
66
+ return nil unless defined?(::RubyLLM::Schema)
67
+
68
+ @judge_schema ||= ::RubyLLM::Schema.create do
69
+ boolean :passed, description: "True if the text meets the criteria, false otherwise"
70
+ string :reason, description: "Detailed explanation of why the criteria was or was not met"
71
+ end
72
+ end
73
+
44
74
  def judge_adapter
45
75
  @judge || RSpec::LLM.judge or raise(
46
76
  RSpec::LLM::Error,
@@ -63,7 +93,7 @@ module RSpec
63
93
  end
64
94
 
65
95
  def format_reason
66
- @reason.empty? ? @verdict_text.to_s.strip : @reason
96
+ @reason.empty? ? (@verdict_text || "").strip : @reason
67
97
  end
68
98
  end
69
99
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module RSpec
4
4
  module LLM
5
- VERSION = "0.1.0"
5
+ VERSION = "0.2.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rspec-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sal Scotto
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
108
  requirements:
109
109
  - - ">="
110
110
  - !ruby/object:Gem::Version
111
- version: 3.0.0
111
+ version: 3.3.0
112
112
  required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  requirements:
114
114
  - - ">="