qualspec 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +14 -0
  3. data/.rubocop_todo.yml +1 -1
  4. data/CHANGELOG.md +31 -0
  5. data/README.md +27 -5
  6. data/config/models.yml +23 -0
  7. data/docs/alpha_readiness.md +94 -0
  8. data/docs/configuration.md +53 -4
  9. data/docs/evaluation-suites.md +45 -2
  10. data/docs/getting-started.md +5 -2
  11. data/docs/recording.md +22 -0
  12. data/examples/EXAMPLES.md +73 -0
  13. data/examples/README.md +5 -0
  14. data/examples/best_value.rb +67 -0
  15. data/examples/cassettes/best_value.yml +649 -0
  16. data/examples/cassettes/character_consistency.yml +680 -0
  17. data/examples/cassettes/customer_service_comparison.yml +593 -0
  18. data/examples/cassettes/date_awareness_gate.yml +420 -0
  19. data/examples/cassettes/qualspec_rspec_integration_comparative_evaluation_compares_multiple_responses.yml +4 -4
  20. data/examples/character_consistency.rb +83 -0
  21. data/examples/comparison.rb +0 -0
  22. data/examples/customer_service_comparison.rb +59 -0
  23. data/examples/date_awareness_gate.rb +57 -0
  24. data/examples/model_comparison.rb +0 -0
  25. data/examples/persona_test.rb +0 -0
  26. data/examples/prompt_variants_factory.rb +0 -0
  27. data/examples/quick_test.rb +0 -0
  28. data/examples/rspec_example_spec.rb +0 -0
  29. data/examples/simple_variant_comparison.rb +0 -0
  30. data/examples/variant_comparison.rb +0 -0
  31. data/exe/qualspec +4 -4
  32. data/lib/qualspec/client.rb +14 -7
  33. data/lib/qualspec/configuration.rb +18 -5
  34. data/lib/qualspec/model_registry.rb +62 -0
  35. data/lib/qualspec/recorder.rb +41 -3
  36. data/lib/qualspec/suite/candidate.rb +7 -4
  37. data/lib/qualspec/suite/dsl.rb +16 -1
  38. data/lib/qualspec/suite/runner.rb +49 -1
  39. data/lib/qualspec/version.rb +1 -1
  40. data/lib/qualspec.rb +17 -0
  41. data/qualspec_structure.md +9 -3
  42. metadata +16 -7
File without changes
File without changes
File without changes
File without changes
data/exe/qualspec CHANGED
@@ -66,10 +66,10 @@ parser = OptionParser.new do |opts|
66
66
  puts opts
67
67
  puts
68
68
  puts 'Environment variables:'
69
- puts ' QUALSPEC_API_URL API endpoint (default: http://localhost:11434/v1)'
70
- puts ' QUALSPEC_API_KEY API key for authentication'
71
- puts ' QUALSPEC_MODEL Default model for candidates'
72
- puts ' QUALSPEC_JUDGE_MODEL Model to use as judge'
69
+ puts ' QUALSPEC_API_URL API endpoint (default: https://openrouter.ai/api/v1)'
70
+ puts ' QUALSPEC_API_KEY API key for authentication (falls back to OPEN_ROUTER_API_KEY)'
71
+ puts ' QUALSPEC_MODEL Default model for candidates (default: openrouter/auto)'
72
+ puts ' QUALSPEC_JUDGE_MODEL Model to use as judge (default: same as QUALSPEC_MODEL)'
73
73
  puts
74
74
  puts 'Example:'
75
75
  puts ' qualspec eval/model_comparison.rb'
@@ -53,8 +53,8 @@ module Qualspec
53
53
  return if @config.api_key_configured?
54
54
 
55
55
  raise Qualspec::Error, <<~MSG.strip
56
- QUALSPEC_API_KEY is required but not set.
57
- Set it via environment variable or Qualspec.configure { |c| c.api_key = '...' }
56
+ No API key set. Set QUALSPEC_API_KEY (or OPEN_ROUTER_API_KEY) as an
57
+ environment variable, or use Qualspec.configure { |c| c.api_key = '...' }
58
58
  MSG
59
59
  end
60
60
 
@@ -70,6 +70,10 @@ module Qualspec
70
70
  # Set temperature if provided
71
71
  payload[:temperature] = temperature if temperature
72
72
 
73
+ # Ask OpenRouter to include usage accounting (cost + token details).
74
+ # Only when metadata is requested, so cost-less calls stay lean.
75
+ payload[:usage] = { include: true } if with_metadata
76
+
73
77
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
74
78
 
75
79
  response = @conn.post('chat/completions', payload)
@@ -108,12 +112,15 @@ module Qualspec
108
112
  end
109
113
 
110
114
  def extract_cost(response, data)
111
- # OpenRouter includes cost in response or headers
112
- header_cost = response.headers['x-openrouter-cost']
113
- return header_cost.to_f if header_cost
115
+ # OpenRouter returns cost under usage.cost when usage accounting is
116
+ # requested (usage: { include: true }). Fall back to other shapes for
117
+ # other OpenAI-compatible providers.
118
+ usage = data['usage'] || {}
119
+ cost = usage['cost'] || usage['total_cost'] || data['cost']
120
+ return cost.to_f if cost
114
121
 
115
- # Check response body (some providers include it)
116
- data.dig('usage', 'total_cost') || data['cost']
122
+ header_cost = response.headers['x-openrouter-cost']
123
+ header_cost&.to_f
117
124
  end
118
125
 
119
126
  def extract_tokens(data)
@@ -2,15 +2,20 @@
2
2
 
3
3
  module Qualspec
4
4
  class Configuration
5
- attr_accessor :api_url, :api_key, :default_model, :judge_model, :cache_enabled, :cache_dir, :judge_system_prompt,
5
+ attr_accessor :api_url, :default_model, :judge_model, :cache_enabled, :cache_dir, :judge_system_prompt,
6
6
  :request_timeout
7
+ attr_writer :api_key
7
8
 
8
9
  DEFAULT_API_URL = 'https://openrouter.ai/api/v1'
9
- DEFAULT_MODEL = 'google/gemini-3-flash-preview'
10
+ # Universal fallback. `openrouter/auto` routes to a sensible model for any
11
+ # request, so qualspec works even with no model configured anywhere.
12
+ DEFAULT_MODEL = 'openrouter/auto'
10
13
 
11
14
  def initialize
12
15
  @api_url = ENV.fetch('QUALSPEC_API_URL', DEFAULT_API_URL)
13
- @api_key = ENV['QUALSPEC_API_KEY']
16
+ # Default nil: set explicitly via Qualspec.configure { |c| c.api_key = ... }.
17
+ # When unset, #api_key falls back to env vars (see reader below).
18
+ @api_key = nil
14
19
  @default_model = ENV.fetch('QUALSPEC_MODEL', DEFAULT_MODEL)
15
20
  @judge_model = ENV.fetch('QUALSPEC_JUDGE_MODEL') { @default_model }
16
21
  @cache_enabled = false
@@ -19,14 +24,22 @@ module Qualspec
19
24
  @request_timeout = 120
20
25
  end
21
26
 
27
+ # Explicitly configured key wins; otherwise fall back to env vars.
28
+ # Prefer QUALSPEC_API_KEY, then OPEN_ROUTER_API_KEY (default backend is
29
+ # OpenRouter). The env vars are a convenience fallback, not a requirement —
30
+ # pass api_key in Qualspec.configure to avoid relying on them.
31
+ def api_key
32
+ @api_key || ENV['QUALSPEC_API_KEY'] || ENV['OPEN_ROUTER_API_KEY']
33
+ end
34
+
22
35
  def api_headers
23
36
  headers = { 'Content-Type' => 'application/json' }
24
- headers['Authorization'] = "Bearer #{@api_key}" unless @api_key.to_s.empty?
37
+ headers['Authorization'] = "Bearer #{api_key}" unless api_key.to_s.empty?
25
38
  headers
26
39
  end
27
40
 
28
41
  def api_key_configured?
29
- !@api_key.to_s.empty?
42
+ !api_key.to_s.empty?
30
43
  end
31
44
  end
32
45
  end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+
5
+ module Qualspec
6
+ # Loads a curated list of named models from a YAML config file and resolves
7
+ # names to their full provider slugs. Unknown/blank names fall back to the
8
+ # configured default (ultimately Configuration::DEFAULT_MODEL, openrouter/auto),
9
+ # so model lookups always return something usable.
10
+ #
11
+ # @example config/models.yml
12
+ # default: openrouter/auto
13
+ # models:
14
+ # glm: z-ai/glm-5.2
15
+ #
16
+ # @example
17
+ # Qualspec.model(:glm) # => "z-ai/glm-5.2"
18
+ # Qualspec.model(:nope) # => "openrouter/auto"
19
+ # Qualspec.model # => "openrouter/auto"
20
+ class ModelRegistry
21
+ DEFAULT_CONFIG_PATH = 'config/models.yml'
22
+
23
+ def initialize(path: nil, default: nil)
24
+ @models = {}
25
+ @default = default
26
+ load_file(path || ENV['QUALSPEC_MODELS_FILE'] || DEFAULT_CONFIG_PATH)
27
+ end
28
+
29
+ # Resolve a model name to its slug, falling back to the default.
30
+ #
31
+ # @param name [Symbol, String, nil] the configured name (or nil for default)
32
+ # @return [String] a model slug
33
+ def resolve(name = nil)
34
+ return default if name.nil? || name.to_s.empty?
35
+
36
+ @models.fetch(name.to_s, default)
37
+ end
38
+
39
+ # @return [Hash{String=>String}] all configured name => slug pairs
40
+ def all
41
+ @models.dup
42
+ end
43
+
44
+ # @return [String] the universal fallback model
45
+ def default
46
+ @default || Configuration::DEFAULT_MODEL
47
+ end
48
+
49
+ private
50
+
51
+ def load_file(path)
52
+ return unless path && File.exist?(path)
53
+
54
+ data = YAML.safe_load_file(path) || {}
55
+ @default ||= data['default']
56
+ (data['models'] || {}).each { |name, slug| @models[name.to_s] = slug }
57
+ rescue StandardError
58
+ # A malformed config file should never break a run; defaults still apply.
59
+ nil
60
+ end
61
+ end
62
+ end
@@ -13,16 +13,32 @@ module Qualspec
13
13
  def setup(cassette_dir: '.qualspec_cassettes')
14
14
  require_vcr!
15
15
 
16
+ recorder = self
16
17
  VCR.configure do |config|
17
18
  config.cassette_library_dir = cassette_dir
18
19
  config.hook_into :faraday
19
20
  config.default_cassette_options = {
20
21
  record: :new_episodes,
21
- match_requests_on: %i[method uri body]
22
+ match_requests_on: %i[method uri body_without_model]
22
23
  }
23
- # Filter out API keys
24
- config.filter_sensitive_data('<API_KEY>') { Qualspec.configuration.api_key }
24
+ # Filter out API keys — guard against adding duplicate filters
25
+ unless @api_key_filter_registered
26
+ config.filter_sensitive_data('<API_KEY>') { Qualspec.configuration.api_key }
27
+ @api_key_filter_registered = true
28
+ end
25
29
  end
30
+
31
+ # Register custom matcher once — ignores the `model` field so cassettes
32
+ # recorded with one model work in CI where a different model is configured.
33
+ unless @matcher_registered
34
+ VCR.configure do |config|
35
+ config.register_request_matcher(:body_without_model) do |r1, r2|
36
+ recorder.send(:normalize_body_for_match, r1.body) == recorder.send(:normalize_body_for_match, r2.body)
37
+ end
38
+ end
39
+ @matcher_registered = true
40
+ end
41
+
26
42
  @configured = true
27
43
  end
28
44
 
@@ -40,6 +56,20 @@ module Qualspec
40
56
  VCR.use_cassette(name, record: :none, &block)
41
57
  end
42
58
 
59
+ # Replay a cassette if it already exists (no API key required), otherwise
60
+ # record a fresh one. Ideal for examples that ship a committed cassette so
61
+ # they run for free, but still record on first run.
62
+ def use_cassette(name, &block)
63
+ setup unless configured?
64
+ mode = cassette_exists?(name) ? :none : :new_episodes
65
+ VCR.use_cassette(name, record: mode, &block)
66
+ end
67
+
68
+ def cassette_exists?(name)
69
+ require_vcr!
70
+ File.exist?(File.join(VCR.configuration.cassette_library_dir, "#{name}.yml"))
71
+ end
72
+
43
73
  private
44
74
 
45
75
  def require_vcr!
@@ -50,6 +80,14 @@ module Qualspec
50
80
  Add to your Gemfile: gem 'vcr'
51
81
  MSG
52
82
  end
83
+
84
+ def normalize_body_for_match(body)
85
+ parsed = JSON.parse(body)
86
+ parsed.delete('model')
87
+ JSON.generate(parsed)
88
+ rescue JSON::ParserError
89
+ body
90
+ end
53
91
  end
54
92
  end
55
93
  end
@@ -5,14 +5,16 @@ module Qualspec
5
5
  class Candidate
6
6
  attr_reader :name, :model, :system_prompt, :options
7
7
 
8
- def initialize(name, model:, system_prompt: nil, **options)
8
+ def initialize(name, model: nil, system_prompt: nil, **options)
9
9
  @name = name.to_s
10
- @model = model
10
+ # Fall back to the configured default model (ultimately openrouter/auto)
11
+ # so a candidate works even when no model is specified.
12
+ @model = model || Qualspec.configuration.default_model
11
13
  @system_prompt = system_prompt
12
14
  @options = options
13
15
  end
14
16
 
15
- def generate_response(prompt:, system_prompt: nil, temperature: nil)
17
+ def generate_response(prompt:, system_prompt: nil, temperature: nil, with_metadata: false)
16
18
  messages = []
17
19
 
18
20
  sys = system_prompt || @system_prompt
@@ -23,7 +25,8 @@ module Qualspec
23
25
  model: @model,
24
26
  messages: messages,
25
27
  json_mode: false, # We want natural responses, not JSON
26
- temperature: normalize_temperature(temperature)
28
+ temperature: normalize_temperature(temperature),
29
+ with_metadata: with_metadata
27
30
  )
28
31
  end
29
32
 
@@ -11,16 +11,31 @@ module Qualspec
11
11
  @scenarios_list = []
12
12
  @variants_config = nil
13
13
  @temperature_list = [nil] # nil means use model default
14
+ @track_cost = false
14
15
 
15
16
  instance_eval(&block) if block_given? # rubocop:disable Style/EvalWithLocation -- DSL pattern requires eval
16
17
  end
17
18
 
19
+ # DSL: capture per-call cost + token metadata so cost/value analysis works.
20
+ # Off by default — evaluations that don't look at cost skip the overhead.
21
+ #
22
+ # @example
23
+ # track_cost
24
+ def track_cost(value = true) # rubocop:disable Style/OptionalBooleanParameter -- reads as a DSL toggle
25
+ @track_cost = value
26
+ end
27
+ alias capture_metadata track_cost
28
+
29
+ def track_cost?
30
+ @track_cost
31
+ end
32
+
18
33
  # DSL: define candidates
19
34
  def candidates(&block)
20
35
  instance_eval(&block) # rubocop:disable Style/EvalWithLocation -- DSL pattern requires eval
21
36
  end
22
37
 
23
- def candidate(name, model:, system_prompt: nil, **options)
38
+ def candidate(name, model: nil, system_prompt: nil, **options)
24
39
  @candidates_list << Candidate.new(name, model: model, system_prompt: system_prompt, **options)
25
40
  end
26
41
 
@@ -15,6 +15,8 @@ module Qualspec
15
15
  @definition.candidates_list.each do |c|
16
16
  @results.candidate_models[c.name] = c.model
17
17
  end
18
+
19
+ @results.metadata_captured = @definition.track_cost?
18
20
  end
19
21
 
20
22
  def run(progress: true)
@@ -106,7 +108,8 @@ module Qualspec
106
108
  response = candidate.generate_response(
107
109
  prompt: final_prompt,
108
110
  system_prompt: final_system_prompt,
109
- temperature: effective_temperature
111
+ temperature: effective_temperature,
112
+ with_metadata: @definition.track_cost?
110
113
  )
111
114
 
112
115
  duration_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
@@ -225,6 +228,7 @@ module Qualspec
225
228
  class Results
226
229
  attr_reader :suite_name, :evaluations, :responses, :started_at, :finished_at, :timing, :costs,
227
230
  :candidate_models, :prompts
231
+ attr_accessor :metadata_captured
228
232
 
229
233
  def initialize(suite_name)
230
234
  @suite_name = suite_name
@@ -236,6 +240,32 @@ module Qualspec
236
240
  @prompts = {} # {scenario_name => prompt_string}
237
241
  @started_at = Time.now
238
242
  @finished_at = nil
243
+ @metadata_captured = false # set true when the suite enables track_cost
244
+ end
245
+
246
+ # Whether per-call cost/token metadata was captured this run.
247
+ def costs_tracked?
248
+ @metadata_captured
249
+ end
250
+
251
+ # Total cost per candidate. Raises if cost tracking wasn't enabled.
252
+ def cost_by_candidate
253
+ ensure_cost_tracking!
254
+ @costs.dup
255
+ end
256
+
257
+ # Rank candidates by quality-per-dollar (avg score / total cost), best
258
+ # first. Candidates with zero recorded cost sort last. Raises a helpful
259
+ # error if cost tracking wasn't enabled for the run.
260
+ def value_ranking
261
+ ensure_cost_tracking!
262
+
263
+ ranked = scores_by_candidate.map do |candidate, stats|
264
+ cost = @costs[candidate].to_f
265
+ score_per_dollar = cost.positive? ? (stats[:avg_score] / cost).round : nil
266
+ [candidate, { avg_score: stats[:avg_score], cost: cost, score_per_dollar: score_per_dollar }]
267
+ end
268
+ ranked.sort_by { |_, v| -(v[:score_per_dollar] || 0) }.to_h
239
269
  end
240
270
 
241
271
  def record_response(candidate:, scenario:, response:, variant: 'default', temperature: nil, duration_ms: nil, cost: nil, variant_data: nil)
@@ -385,6 +415,24 @@ module Qualspec
385
415
  responses: @responses
386
416
  }
387
417
  end
418
+
419
+ private
420
+
421
+ def ensure_cost_tracking!
422
+ return if @metadata_captured
423
+
424
+ raise Qualspec::Error, <<~MSG.strip
425
+ Cost data was not captured for this run, so cost/value analysis is unavailable.
426
+ Enable it with `track_cost` in the suite definition:
427
+
428
+ Qualspec.evaluation 'My Suite' do
429
+ track_cost
430
+ ...
431
+ end
432
+
433
+ (track_cost adds usage accounting to each request via with_metadata.)
434
+ MSG
435
+ end
388
436
  end
389
437
  end
390
438
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Qualspec
4
- VERSION = '0.1.2'
4
+ VERSION = '0.2.0'
5
5
  end
data/lib/qualspec.rb CHANGED
@@ -7,6 +7,7 @@ module Qualspec
7
7
  end
8
8
 
9
9
  require_relative 'qualspec/configuration'
10
+ require_relative 'qualspec/model_registry'
10
11
  require_relative 'qualspec/client'
11
12
  require_relative 'qualspec/evaluation'
12
13
  require_relative 'qualspec/prompt_variant'
@@ -37,6 +38,7 @@ module Qualspec
37
38
  @configuration = nil
38
39
  @client = nil
39
40
  @judge = nil
41
+ @models = nil
40
42
  Rubric.clear!
41
43
  Suite.clear!
42
44
  Suite::Behavior.clear!
@@ -50,6 +52,21 @@ module Qualspec
50
52
  @judge ||= Judge.new
51
53
  end
52
54
 
55
+ # Registry of named models loaded from config/models.yml (or
56
+ # QUALSPEC_MODELS_FILE). See ModelRegistry.
57
+ def models
58
+ @models ||= ModelRegistry.new
59
+ end
60
+
61
+ # Resolve a named model to its slug, falling back to the default
62
+ # (openrouter/auto). Returns the default when name is nil/unknown.
63
+ #
64
+ # Qualspec.model(:glm) # => "z-ai/glm-5.2"
65
+ # Qualspec.model # => "openrouter/auto"
66
+ def model(name = nil)
67
+ models.resolve(name)
68
+ end
69
+
53
70
  # Convenience method for defining rubrics
54
71
  def define_rubric(name, &block)
55
72
  Rubric.define(name, &block)
@@ -7,10 +7,12 @@ LLM-judged qualitative testing for Ruby. Evaluate AI agents, compare models, and
7
7
 
8
8
  ### Core Library Files (lib/qualspec/)
9
9
  - **builtin_rubrics.rb** - Built-in evaluation criteria
10
- - **client.rb** - API client for LLM interactions
10
+ - **client.rb** - API client for LLM interactions (cost/token metadata optional)
11
11
  - **configuration.rb** - Configuration management
12
12
  - **evaluation.rb** - Core evaluation logic
13
13
  - **judge.rb** - LLM judge implementation
14
+ - **model_registry.rb** - Named models from `config/models.yml` (`Qualspec.model`)
15
+ - **prompt_variant.rb** - Variant value object (FactoryBot target)
14
16
  - **recorder.rb** - VCR integration for recording
15
17
  - **rspec.rb** - RSpec integration entry point
16
18
  - **rubric.rb** - Custom rubric definitions
@@ -23,10 +25,11 @@ LLM-judged qualitative testing for Ruby. Evaluate AI agents, compare models, and
23
25
  ### Configuration Environment Variables
24
26
  | Variable | Description | Default |
25
27
  |----------|-------------|---------|
26
- | QUALSPEC_API_KEY | API key (required) | - |
28
+ | QUALSPEC_API_KEY | API key (falls back to OPEN_ROUTER_API_KEY) | - |
27
29
  | QUALSPEC_API_URL | API endpoint | https://openrouter.ai/api/v1 |
28
- | QUALSPEC_MODEL | Default model for candidates | google/gemini-3-flash-preview |
30
+ | QUALSPEC_MODEL | Default model for candidates | openrouter/auto |
29
31
  | QUALSPEC_JUDGE_MODEL | Model used as judge | Same as QUALSPEC_MODEL |
32
+ | QUALSPEC_MODELS_FILE | Named-models YAML | config/models.yml |
30
33
 
31
34
  ### Key Features
32
35
  1. **Model Comparison CLI** - Compare multiple models on the same prompts
@@ -36,6 +39,9 @@ LLM-judged qualitative testing for Ruby. Evaluate AI agents, compare models, and
36
39
  5. **Custom Rubrics** - Define your own evaluation criteria
37
40
  6. **VCR Recording** - Record and replay API calls for testing
38
41
  7. **HTML Reports** - Generate visual comparison reports
42
+ 8. **Named Model Registry** - Reference curated models by name (`Qualspec.model`)
43
+ 9. **Cost Tracking** - Opt-in per-call cost + quality-per-dollar `value_ranking`
44
+ 10. **Variant & Temperature Matrix** - Combinatorial prompt testing via FactoryBot
39
45
 
40
46
  ### Example: Model Comparison
41
47
  ```ruby
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: qualspec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Stiens
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2026-04-16 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: faraday
@@ -70,7 +69,9 @@ files:
70
69
  - CHANGELOG.md
71
70
  - README.md
72
71
  - Rakefile
72
+ - config/models.yml
73
73
  - docs/.DS_Store
74
+ - docs/alpha_readiness.md
74
75
  - docs/configuration.md
75
76
  - docs/evaluation-suites.md
76
77
  - docs/getting-started.md
@@ -79,7 +80,13 @@ files:
79
80
  - docs/rubrics.md
80
81
  - docs/to_implement/factory_bot_integration_design.md
81
82
  - docs/to_implement/variants_first_pass.md
83
+ - examples/EXAMPLES.md
82
84
  - examples/README.md
85
+ - examples/best_value.rb
86
+ - examples/cassettes/best_value.yml
87
+ - examples/cassettes/character_consistency.yml
88
+ - examples/cassettes/customer_service_comparison.yml
89
+ - examples/cassettes/date_awareness_gate.yml
83
90
  - examples/cassettes/qualspec_rspec_integration_basic_evaluation_evaluates_responses_with_inline_criteria.yml
84
91
  - examples/cassettes/qualspec_rspec_integration_basic_evaluation_provides_detailed_feedback_on_failure.yml
85
92
  - examples/cassettes/qualspec_rspec_integration_comparative_evaluation_compares_multiple_responses.yml
@@ -87,7 +94,10 @@ files:
87
94
  - examples/cassettes/qualspec_rspec_integration_vcr_integration_records_and_plays_back_api_calls_automatically.yml
88
95
  - examples/cassettes/qualspec_rspec_integration_with_context_uses_context_in_evaluation.yml
89
96
  - examples/cassettes/qualspec_rspec_integration_with_rubrics_evaluates_using_builtin_rubrics.yml
97
+ - examples/character_consistency.rb
90
98
  - examples/comparison.rb
99
+ - examples/customer_service_comparison.rb
100
+ - examples/date_awareness_gate.rb
91
101
  - examples/model_comparison.rb
92
102
  - examples/persona_test.rb
93
103
  - examples/prompt_variants_factory.rb
@@ -104,6 +114,7 @@ files:
104
114
  - lib/qualspec/configuration.rb
105
115
  - lib/qualspec/evaluation.rb
106
116
  - lib/qualspec/judge.rb
117
+ - lib/qualspec/model_registry.rb
107
118
  - lib/qualspec/prompt_variant.rb
108
119
  - lib/qualspec/recorder.rb
109
120
  - lib/qualspec/rspec.rb
@@ -130,7 +141,6 @@ metadata:
130
141
  homepage_uri: https://github.com/estiens/qualspec
131
142
  source_code_uri: https://github.com/estiens/qualspec
132
143
  changelog_uri: https://github.com/estiens/qualspec/blob/main/CHANGELOG.md
133
- post_install_message:
134
144
  rdoc_options: []
135
145
  require_paths:
136
146
  - lib
@@ -138,15 +148,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
138
148
  requirements:
139
149
  - - ">="
140
150
  - !ruby/object:Gem::Version
141
- version: 3.1.0
151
+ version: 3.3.0
142
152
  required_rubygems_version: !ruby/object:Gem::Requirement
143
153
  requirements:
144
154
  - - ">="
145
155
  - !ruby/object:Gem::Version
146
156
  version: '0'
147
157
  requirements: []
148
- rubygems_version: 3.5.22
149
- signing_key:
158
+ rubygems_version: 3.6.9
150
159
  specification_version: 4
151
160
  summary: RSpec DSL for qualitative LLM-judged testing
152
161
  test_files: []