RubyGems - rspec-llm - Versions diffs - 0.1.0 → 0.2.0 - Mend

rspec-llm 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/.idea/rspec-llm.iml +4 -0
data/.rubocop.yml +1 -1
data/CHANGELOG.md +30 -0
data/README.md +34 -5
data/lib/rspec/llm/adapters/base.rb +9 -0
data/lib/rspec/llm/adapters/fake.rb +22 -0
data/lib/rspec/llm/adapters/ruby_llm.rb +20 -0
data/lib/rspec/llm/matchers/match_json_schema.rb +37 -3
data/lib/rspec/llm/matchers/pass_llm_judge.rb +35 -5
data/lib/rspec/llm/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ba3f15006a8aa1820dd8741a4788ad7cfe94f4b213bcabb1cc1ee75458a1b8d0
-  data.tar.gz: b27e3aeefc80e89d3e8209672a66ae8e021d95af73867a4260936af916153459
+  metadata.gz: 91156e04ac3b433d59baecabcc24e857e6ede80327e9d08f453a34f0c9d511bd
+  data.tar.gz: c0fdf5f3a4e7af09e69062a9989dac19ef1e34db5e382fa2420cf51945c852d9
 SHA512:
-  metadata.gz: 2ae5b22b4148e8b04c4607d49d0b3a088b77405f8cac92a6db1b223ecc8aefd3b84471e5d8d42db86a1527f06f8d9330f4a3ec29f8abe29732d3e2a2c60c95a5
-  data.tar.gz: 352bc6061a08c5e8561286782b51c4690d79ef9939ed93498e657c37aa78623c4c35a927ad6a7abdacef82e3b6b925bd81881b4c30971b700d0ea3872791602d
+  metadata.gz: e1ca1d12ed29de3c2560c82675a953fa169958f96fa9fe5b7e215017c4d9607ce4be370b4898d232e24118350b483543bb4b7950fd48aebe1c825d8f5229a3f3
+  data.tar.gz: 80858d6da5a91633e237552510e6d804a2e15cf4f8693f1d567afacd52335660e23aeb60ac53092b8979b6199fa690a9be105a47e63079896438903ebe9c1509

data/.idea/rspec-llm.iml CHANGED Viewed

@@ -20,6 +20,7 @@
     <orderEntry type="library" scope="PROVIDED" name="crack (v1.0.1, rbenv: 3.4.9) [gem]" level="application" />
     <orderEntry type="library" scope="PROVIDED" name="csv (v3.3.5, rbenv: 3.4.9) [gem]" level="application" />
     <orderEntry type="library" scope="PROVIDED" name="diff-lcs (v1.6.2, rbenv: 3.4.9) [gem]" level="application" />
+    <orderEntry type="library" scope="PROVIDED" name="docile (v1.4.1, rbenv: 3.4.9) [gem]" level="application" />
     <orderEntry type="library" scope="PROVIDED" name="event_stream_parser (v1.0.0, rbenv: 3.4.9) [gem]" level="application" />
     <orderEntry type="library" scope="PROVIDED" name="faraday (v2.14.2, rbenv: 3.4.9) [gem]" level="application" />
     <orderEntry type="library" scope="PROVIDED" name="faraday-multipart (v1.2.0, rbenv: 3.4.9) [gem]" level="application" />
@@ -56,6 +57,9 @@
     <orderEntry type="library" scope="PROVIDED" name="ruby-progressbar (v1.13.0, rbenv: 3.4.9) [gem]" level="application" />
     <orderEntry type="library" scope="TEST" name="ruby_llm (v1.15.0, rbenv: 3.4.9) [gem]" level="application" />
     <orderEntry type="library" scope="PROVIDED" name="ruby_llm-schema (v0.4.0, rbenv: 3.4.9) [gem]" level="application" />
+    <orderEntry type="library" scope="TEST" name="simplecov (v0.22.0, rbenv: 3.4.9) [gem]" level="application" />
+    <orderEntry type="library" scope="PROVIDED" name="simplecov-html (v0.13.2, rbenv: 3.4.9) [gem]" level="application" />
+    <orderEntry type="library" scope="PROVIDED" name="simplecov_json_formatter (v0.1.4, rbenv: 3.4.9) [gem]" level="application" />
     <orderEntry type="library" scope="PROVIDED" name="unicode-display_width (v3.2.0, rbenv: 3.4.9) [gem]" level="application" />
     <orderEntry type="library" scope="PROVIDED" name="unicode-emoji (v4.2.0, rbenv: 3.4.9) [gem]" level="application" />
     <orderEntry type="library" scope="PROVIDED" name="uri (v1.1.1, rbenv: 3.4.9) [gem]" level="application" />

data/.rubocop.yml CHANGED Viewed

@@ -1,5 +1,5 @@
 AllCops:
-  TargetRubyVersion: 3.0
+  TargetRubyVersion: 3.3
   NewCops: disable
   SuggestExtensions: false

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,35 @@
 ## [Unreleased]
+## [0.2.0] - 2026-06-05
+### Added
+- **`pass_llm_judge` — structured JSON evaluation** (#1).
+  When the `ruby_llm` gem is loaded, the judge prompt is now sent with a
+  `RubyLLM::Schema` contract that constrains the model to return a
+  machine-readable `{ passed: boolean, reason: string }` payload.  This
+  eliminates brittle first-token YES/NO parsing and surfaces a full explanation
+  in every failure message.  Adapters or configurations that do not support
+  structured output fall back automatically to the original text-parsing
+  strategy, so no existing code or specs need to change.
+- **`match_json_schema` — class-based schema introspection** (#1).
+  The matcher now accepts any Ruby class as its argument in addition to raw
+  JSON Schema hashes:
+  - `Data.define` / `Struct` → attributes are read via `.members`.
+  - PORO / ActiveModel → attributes are discovered from public writer methods
+    (`#name=`).
+  The derived schema marks every attribute as `required` (typed `string`).
+  Raw Hash schemas continue to work without any changes.
+- `Adapters::Base#chat_structured(messages, schema: nil)` — new adapter
+  surface for structured-output requests.  The default implementation falls
+  back to `#chat`, preserving full backward compatibility for custom adapters.
+- `Adapters::RubyLLM#chat_structured` — calls `client.with_schema(schema)`
+  before `client.ask` so the underlying model returns a parsed Hash.
+- `Adapters::Fake#chat_structured` — automatically JSON-parses stub strings
+  into Hashes, making it trivial to write hermetic structured-output tests
+  without needing the `ruby_llm-schema` gem.
 ## [0.1.0] - 2026-06-04
 ### Added

data/README.md CHANGED Viewed

@@ -1,6 +1,9 @@
 # rspec-llm
-[![CI](https://github.com/salscotto/rspec-llm/actions/workflows/ci.yml/badge.svg)](https://github.com/salscotto/rspec-llm/actions/workflows/ci.yml)
+[![CI](https://github.com/washu/rspec-llm/actions/workflows/ci.yml/badge.svg)](https://github.com/washu/rspec-llm/actions/workflows/ci.yml)
+[![Coverage](https://codecov.io/gh/washu/rspec-llm/branch/main/graph/badge.svg)](https://codecov.io/gh/washu/rspec-llm)
+[![Gem Version](https://img.shields.io/gem/v/rspec-llm.svg)](https://rubygems.org/gems/rspec-llm)
+[![Gem Downloads](https://img.shields.io/gem/dt/rspec-llm.svg)](https://rubygems.org/gems/rspec-llm)
 RSpec matchers, helpers, and a thin DSL for testing LLM-backed code in Ruby.
@@ -74,7 +77,7 @@ end
 ### `pass_llm_judge`
-Sends the response and a criterion to the configured judge model and parses a YES/NO verdict from the first token of the reply. The judge's reasoning is surfaced in the failure message.
+Sends the response and a criterion to the configured judge model. When the `ruby_llm` gem is loaded, the judge uses a `RubyLLM::Schema`-backed structured contract that forces the model to return `{ passed: boolean, reason: string }` — eliminating brittle first-token YES/NO parsing and always surfacing a rich explanation in the failure message. For all other adapters the matcher falls back to parsing the first YES/NO token transparently.
 ```ruby
 expect(reply).to pass_llm_judge("is polite and apologetic")
@@ -92,7 +95,9 @@ expect(reply).to match_llm_intent("a refund confirmation for order #12345")
 ### `match_json_schema`
-Parses the actual value as JSON (or accepts a Hash/Array directly) and validates against the given JSON Schema via the `json-schema` gem.
+Parses the actual value as JSON (or accepts a Hash/Array directly) and validates against a schema via the `json-schema` gem. The schema argument can be:
+**A raw JSON Schema hash** (original behaviour — fully backward-compatible):
 ```ruby
 schema = {
@@ -103,6 +108,24 @@ schema = {
 expect(response).to match_json_schema(schema)
 ```
+**A Ruby class** — `Data.define`, `Struct`, or any PORO with `attr_accessor`. The matcher introspects the class and derives the required fields automatically:
+```ruby
+# Data.define (Ruby >= 3.2)
+UserProfile = Data.define(:full_name, :verified_email)
+expect(response).to match_json_schema(UserProfile)
+# Struct
+Point = Struct.new(:x, :y)
+expect(response).to match_json_schema(Point)
+# PORO
+class OrderSummary
+  attr_accessor :order_id, :total, :status
+end
+expect(response).to match_json_schema(OrderSummary)
+```
 ### `be_semantically_similar_to`
 Embeds both sides via the configured `embedder`, computes cosine similarity, and compares to the threshold. Override the threshold per-matcher with `.within(0.9)`.
@@ -129,9 +152,15 @@ RSpec.describe "Greeter" do
 end
 ```
-Use `stub_llm_judge` to stub the judge model separately — handy when testing your own code that wraps `pass_llm_judge`:
+Use `stub_llm_judge` to stub the judge model separately — handy when testing your own code that wraps `pass_llm_judge`. Stub with a JSON string to exercise the structured-output path, or with a `YES`/`NO` string to exercise the text-parsing fallback:
 ```ruby
+# Structured output (recommended — matches real ruby_llm behaviour)
+stub_llm_judge do |fake|
+  fake.default('{"passed":true,"reason":"Looks good to me."}')
+end
+# Legacy text format (still works for backward compatibility)
 stub_llm_judge do |fake|
   fake.default("YES\nLooks good to me.")
 end
@@ -187,7 +216,7 @@ bin/console          # interactive prompt
 ## Contributing
-Bug reports and pull requests welcome on GitHub at https://github.com/salscotto/rspec-llm.
+Bug reports and pull requests welcome on GitHub at https://github.com/washu/rspec-llm.
 ## License

data/lib/rspec/llm/adapters/base.rb CHANGED Viewed

@@ -40,6 +40,15 @@ module RSpec
           raise NotImplementedError
         end
+        # Send a chat with an optional structured output schema. When a
+        # RubyLLM::Schema class is provided via +schema:+, adapters that support
+        # structured output will return a Hash instead of a String. Adapters that
+        # do not support structured output fall back to plain #chat and return a
+        # String; callers are responsible for parsing that fallback.
+        def chat_structured(messages, schema: nil) # rubocop:disable Lint/UnusedMethodArgument
+          chat(messages)
+        end
         # Embed text. Returns Array<Float>. Optional — adapters may raise
         # NotImplementedError if the underlying client doesn't support it.
         def embed(_text)

data/lib/rspec/llm/adapters/fake.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require "json"
 module RSpec
   module LLM
     module Adapters
@@ -67,6 +69,26 @@ module RSpec
           response.is_a?(Proc) ? response.call(prompt) : response
         end
+        # Extends #chat with structured-output support for tests. When +schema+
+        # is provided the raw stub value is JSON-parsed; if it produces a Hash it
+        # is returned directly so callers receive the same shape that a real
+        # structured adapter would deliver. Stubs that are already a Hash are
+        # returned unchanged. Non-JSON strings fall back to the plain string so
+        # existing YES/NO-style tests continue to work.
+        #
+        # When no schema is given the adapter still attempts JSON parsing — this
+        # lets tests stub structured JSON responses without needing the
+        # RubyLLM::Schema gem loaded.
+        def chat_structured(messages, schema: nil) # rubocop:disable Lint/UnusedMethodArgument
+          response = chat(messages)
+          return response if response.is_a?(Hash)
+          parsed = JSON.parse(response.to_s, symbolize_names: true)
+          parsed.is_a?(Hash) ? parsed : response
+        rescue JSON::ParserError
+          response
+        end
         def embed(text)
           raise NotImplementedError, "configure with #embed_with { |text| vector }" unless @embedder

data/lib/rspec/llm/adapters/ruby_llm.rb CHANGED Viewed

@@ -20,6 +20,26 @@ module RSpec
           extract_content(response)
         end
+        # Uses +with_schema+ on the underlying RubyLLM::Chat client when a schema
+        # is provided, yielding a Hash response instead of raw text. Falls back to
+        # plain #chat when the client doesn't support +with_schema+.
+        def chat_structured(messages, schema: nil)
+          return chat(messages) unless schema && client.respond_to?(:with_schema)
+          normalized = normalize_messages(messages)
+          last = normalized.last
+          system_msgs = normalized[0..-2].select { |m| m[:role] == "system" }
+          if system_msgs.any? && client.respond_to?(:with_instructions)
+            system_msgs.each do |m|
+              client.with_instructions(m[:content])
+            end
+          end
+          client.with_schema(schema)
+          response = client.ask(last[:content])
+          extract_content(response)
+        end
         def embed(text)
           return @embedder.call(text) if @embedder

data/lib/rspec/llm/matchers/match_json_schema.rb CHANGED Viewed

@@ -7,10 +7,23 @@ module RSpec
   module LLM
     module Matchers
       # Asserts the actual value parses as JSON and conforms to the provided
-      # JSON Schema (a Hash, JSON string, or schema file path).
+      # schema.  The schema argument may be:
+      #
+      # * A Hash — raw JSON Schema (original behaviour, fully backward-compatible).
+      # * A Class — any Ruby class whose attributes can be introspected:
+      #     * +Data.define+ / +Struct+: attributes are read via +.members+.
+      #     * PORO / ActiveModel: attributes are discovered from public writer
+      #       methods (+#name=+).
+      #   In both cases a JSON Schema is derived automatically; every attribute
+      #   is typed as +string+ and marked +required+.
       class MatchJsonSchema
         def initialize(schema)
-          @schema = schema
+          if schema.is_a?(Class)
+            @class_name = schema.name || schema.inspect
+            @schema     = schema_from_class(schema)
+          else
+            @schema = schema
+          end
         end
         def matches?(actual)
@@ -23,7 +36,7 @@ module RSpec
         end
         def description
-          "match JSON schema"
+          @class_name ? "match JSON schema for #{@class_name}" : "match JSON schema"
         end
         def failure_message
@@ -40,6 +53,27 @@ module RSpec
         private
+        # Converts a Ruby class to a JSON Schema hash by duck-typing its shape.
+        # Data.define and Struct expose +.members+; POROs and ActiveModel objects
+        # expose writer methods whose names end in +=+.
+        def schema_from_class(klass)
+          members = if klass.respond_to?(:members)
+                      klass.members.map(&:to_s)
+                    else
+                      klass.public_instance_methods(false)
+                           .map(&:to_s)
+                           .select { |m| m.end_with?("=") }
+                           .map { |m| m.chomp("=") }
+                           .sort
+                    end
+          {
+            "type" => "object",
+            "required" => members,
+            "properties" => members.each_with_object({}) { |m, h| h[m] = { "type" => "string" } }
+          }
+        end
         def parse(actual)
           return actual if actual.is_a?(Hash) || actual.is_a?(Array)

data/lib/rspec/llm/matchers/pass_llm_judge.rb CHANGED Viewed

@@ -4,8 +4,18 @@ module RSpec
   module LLM
     module Matchers
       # LLM-as-judge matcher. Asks the configured judge model whether the
-      # actual response satisfies the given criterion. Parses YES/NO from the
-      # first non-whitespace token of the judge's reply.
+      # actual response satisfies the given criterion.
+      #
+      # When the +ruby_llm+ gem is loaded, the judge prompt is sent with a
+      # structured +RubyLLM::Schema+ contract that forces the model to return
+      # a machine-readable JSON payload:
+      #
+      #   { passed: true/false, reason: "..." }
+      #
+      # This eliminates brittle first-token YES/NO parsing and surfaces a rich
+      # reason string for failure messages.  Adapters (or gem configurations)
+      # that do not support structured output fall back automatically to the
+      # original YES/NO text-parsing strategy.
       class PassLlmJudge
         def initialize(criterion)
           @criterion = criterion
@@ -20,8 +30,16 @@ module RSpec
         def matches?(actual)
           @actual = actual.to_s
-          @verdict_text = judge_adapter.chat(prompt_for(@actual, @criterion))
-          @verdict, @reason = parse_verdict(@verdict_text)
+          result = judge_adapter.chat_structured(prompt_for(@actual, @criterion), schema: judge_schema)
+          if result.is_a?(Hash)
+            @verdict = result[:passed] || result["passed"]
+            @reason  = (result[:reason] || result["reason"] || "").to_s
+          else
+            @verdict_text = result.to_s
+            @verdict, @reason = parse_verdict(@verdict_text)
+          end
           @verdict == true
         end
@@ -41,6 +59,18 @@ module RSpec
         private
+        # Returns a RubyLLM::Schema class that constrains the judge's reply to a
+        # structured { passed:, reason: } payload, or +nil+ when the gem is not
+        # loaded (triggering the text-parsing fallback path).
+        def judge_schema
+          return nil unless defined?(::RubyLLM::Schema)
+          @judge_schema ||= ::RubyLLM::Schema.create do
+            boolean :passed, description: "True if the text meets the criteria, false otherwise"
+            string  :reason, description: "Detailed explanation of why the criteria was or was not met"
+          end
+        end
         def judge_adapter
           @judge || RSpec::LLM.judge or raise(
             RSpec::LLM::Error,
@@ -63,7 +93,7 @@ module RSpec
         end
         def format_reason
-          @reason.empty? ? @verdict_text.to_s.strip : @reason
+          @reason.empty? ? (@verdict_text || "").strip : @reason
         end
       end
     end

data/lib/rspec/llm/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module RSpec
   module LLM
-    VERSION = "0.1.0"
+    VERSION = "0.2.0"
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rspec-llm
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.0
 platform: ruby
 authors:
 - Sal Scotto
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 3.0.0
+      version: 3.3.0
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="