qualspec 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,9 +81,7 @@ module Qualspec
81
81
  end
82
82
 
83
83
  # Phase 2: Judge all responses together
84
- if responses.any?
85
- judge_responses(responses, scenario, variant, temperature, progress: progress)
86
- end
84
+ judge_responses(responses, scenario, variant, temperature, progress: progress) if responses.any?
87
85
 
88
86
  # Record errors
89
87
  record_errors(errors, scenario, variant, temperature)
@@ -231,8 +229,7 @@ module Qualspec
231
229
  @finished_at = nil
232
230
  end
233
231
 
234
- def record_response(candidate:, scenario:, variant: 'default', temperature: nil,
235
- response:, duration_ms: nil, cost: nil, variant_data: nil)
232
+ def record_response(candidate:, scenario:, response:, variant: 'default', temperature: nil, duration_ms: nil, cost: nil, variant_data: nil)
236
233
  # Store in nested structure
237
234
  @responses[candidate] ||= {}
238
235
  @responses[candidate][scenario] ||= {}
@@ -253,8 +250,7 @@ module Qualspec
253
250
  @costs[candidate] += cost
254
251
  end
255
252
 
256
- def record_evaluation(candidate:, scenario:, variant: 'default', temperature: nil,
257
- criteria:, evaluation:, winner: nil)
253
+ def record_evaluation(candidate:, scenario:, criteria:, evaluation:, variant: 'default', temperature: nil, winner: nil)
258
254
  @evaluations << {
259
255
  candidate: candidate,
260
256
  scenario: scenario,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Qualspec
4
- VERSION = '0.1.0'
4
+ VERSION = '0.1.1'
5
5
  end
@@ -0,0 +1,80 @@
1
+ # Qualspec - Key Structure
2
+
3
+ ## Repository: estiens/qualspec
4
+
5
+ ### Description
6
+ LLM-judged qualitative testing for Ruby. Evaluate AI agents, compare models, and test subjective qualities that traditional assertions can't capture.
7
+
8
+ ### Core Library Files (lib/qualspec/)
9
+ - **builtin_rubrics.rb** - Built-in evaluation criteria
10
+ - **client.rb** - API client for LLM interactions
11
+ - **configuration.rb** - Configuration management
12
+ - **evaluation.rb** - Core evaluation logic
13
+ - **judge.rb** - LLM judge implementation
14
+ - **recorder.rb** - VCR integration for recording
15
+ - **rspec.rb** - RSpec integration entry point
16
+ - **rubric.rb** - Custom rubric definitions
17
+ - **version.rb** - Version info
18
+
19
+ ### Subdirectories
20
+ - **rspec/** - RSpec helpers and matchers
21
+ - **suite/** - Evaluation suite components
22
+
23
+ ### Configuration Environment Variables
24
+ | Variable | Description | Default |
25
+ |----------|-------------|---------|
26
+ | QUALSPEC_API_KEY | API key (required) | - |
27
+ | QUALSPEC_API_URL | API endpoint | https://openrouter.ai/api/v1 |
28
+ | QUALSPEC_MODEL | Default model for candidates | google/gemini-3-flash-preview |
29
+ | QUALSPEC_JUDGE_MODEL | Model used as judge | Same as QUALSPEC_MODEL |
30
+
31
+ ### Key Features
32
+ 1. **Model Comparison CLI** - Compare multiple models on the same prompts
33
+ 2. **LLM Judge** - Use an LLM to evaluate responses qualitatively
34
+ 3. **RSpec Integration** - Test your agents with qualitative assertions
35
+ 4. **Built-in Rubrics** - Pre-defined evaluation criteria
36
+ 5. **Custom Rubrics** - Define your own evaluation criteria
37
+ 6. **VCR Recording** - Record and replay API calls for testing
38
+ 7. **HTML Reports** - Generate visual comparison reports
39
+
40
+ ### Example: Model Comparison
41
+ ```ruby
42
+ # eval/comparison.rb
43
+ Qualspec.evaluation "Model Comparison" do
44
+ candidates do
45
+ candidate "gpt4", model: "openai/gpt-4"
46
+ candidate "claude", model: "anthropic/claude-3-sonnet"
47
+ end
48
+
49
+ scenario "helpfulness" do
50
+ prompt "How do I center a div in CSS?"
51
+ eval "provides a working solution"
52
+ eval "explains the approach"
53
+ end
54
+ end
55
+ ```
56
+
57
+ ### Example: RSpec Integration
58
+ ```ruby
59
+ require "qualspec/rspec"
60
+
61
+ RSpec.describe MyAgent do
62
+ include Qualspec::RSpec::Helpers
63
+
64
+ it "responds helpfully" do
65
+ response = MyAgent.call("Hello")
66
+
67
+ result = qualspec_evaluate(response, "responds in a friendly manner")
68
+ expect(result).to be_passing
69
+ end
70
+ end
71
+ ```
72
+
73
+ ### CLI Usage
74
+ ```shell
75
+ # Run comparison
76
+ qualspec eval/comparison.rb
77
+
78
+ # Generate HTML report
79
+ qualspec --html report.html eval/comparison.rb
80
+ ```
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: qualspec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Stiens
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-12-26 00:00:00.000000000 Z
11
+ date: 2026-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -62,6 +62,7 @@ executables:
62
62
  extensions: []
63
63
  extra_rdoc_files: []
64
64
  files:
65
+ - ".DS_Store"
65
66
  - ".qualspec_cassettes/comparison_test.yml"
66
67
  - ".qualspec_cassettes/quick_test.yml"
67
68
  - ".rspec"
@@ -70,12 +71,16 @@ files:
70
71
  - CHANGELOG.md
71
72
  - README.md
72
73
  - Rakefile
74
+ - docs/.DS_Store
73
75
  - docs/configuration.md
74
76
  - docs/evaluation-suites.md
75
77
  - docs/getting-started.md
76
78
  - docs/recording.md
77
79
  - docs/rspec-integration.md
78
80
  - docs/rubrics.md
81
+ - docs/to_implement/factory_bot_integration_design.md
82
+ - docs/to_implement/variants_first_pass.md
83
+ - examples/README.md
79
84
  - examples/cassettes/qualspec_rspec_integration_basic_evaluation_evaluates_responses_with_inline_criteria.yml
80
85
  - examples/cassettes/qualspec_rspec_integration_basic_evaluation_provides_detailed_feedback_on_failure.yml
81
86
  - examples/cassettes/qualspec_rspec_integration_comparative_evaluation_compares_multiple_responses.yml
@@ -86,9 +91,13 @@ files:
86
91
  - examples/comparison.rb
87
92
  - examples/model_comparison.rb
88
93
  - examples/persona_test.rb
94
+ - examples/prompt_variants_factory.rb
89
95
  - examples/quick_test.rb
90
96
  - examples/report.html
97
+ - examples/results/simple_variant_comparison.json
91
98
  - examples/rspec_example_spec.rb
99
+ - examples/simple_variant_comparison.rb
100
+ - examples/variant_comparison.rb
92
101
  - exe/qualspec
93
102
  - lib/qualspec.rb
94
103
  - lib/qualspec/builtin_rubrics.rb
@@ -96,6 +105,7 @@ files:
96
105
  - lib/qualspec/configuration.rb
97
106
  - lib/qualspec/evaluation.rb
98
107
  - lib/qualspec/judge.rb
108
+ - lib/qualspec/prompt_variant.rb
99
109
  - lib/qualspec/recorder.rb
100
110
  - lib/qualspec/rspec.rb
101
111
  - lib/qualspec/rspec/configuration.rb
@@ -112,6 +122,7 @@ files:
112
122
  - lib/qualspec/suite/runner.rb
113
123
  - lib/qualspec/suite/scenario.rb
114
124
  - lib/qualspec/version.rb
125
+ - qualspec_structure.md
115
126
  - sig/qualspec.rbs
116
127
  homepage: https://github.com/estiens/qualspec
117
128
  licenses: