qualspec 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '069d64a5f846962da842ffe5336739761862d0b1a3ef4cf522bfc83932af504e'
4
- data.tar.gz: 693d58eb17d4e495cc0e7f1ff0ed57fc925d81ee6b8060aef41f350bf1fdb2c9
3
+ metadata.gz: 004a0ca49d3bcb6890cadde4f994ddf6ce9585f06f4c81798644d4e02c74de5a
4
+ data.tar.gz: c3994806a042fc9693cdb7e6e9d0579a644edf811b4f529080b216506056aee0
5
5
  SHA512:
6
- metadata.gz: b46c32e6c43fb4f52234db0f4d6038191e85f8c05f99d8082c0024ef39fe0d7d1fa93a213b9163e8eef1c06a88add8a66f8906fa219ce8c471117a9812778766
7
- data.tar.gz: de83bb65616448ccca96a405a021b19f80cbe9673df8cf65c63af24d2bdaa96efece1ea485b2786cfa7b3507f7f0ea301bd30d193c3f4218d18b67f3d2cfd695
6
+ metadata.gz: dfad9e00f04bc7552f8641c1d12ce956d2e58f66d120a495274e4ca5e6af85a701866f0f8e3951c0a031e72e56f486ebcc041b52be61faadcd8f4822cfebee0d
7
+ data.tar.gz: d4192bc2492c27472169c30b024061ce969aa65d3855dc4c1b0b26d25db2f2fac46da1070c99418d61586cf475d8c759ee8db1a37673f17270ebcbc222f407ea
@@ -141,7 +141,7 @@ module Qualspec
141
141
  parts << '## Responses:'
142
142
 
143
143
  responses.each do |candidate, response|
144
- parts << "\n### #{candidate}:\n#{response}"
144
+ parts << "\n### #{candidate}:\n```\n#{response}\n```"
145
145
  end
146
146
 
147
147
  parts << "\nScore each candidate (#{candidate_names}) from 0-10."
@@ -481,10 +481,12 @@ module Qualspec
481
481
 
482
482
  scenario_blocks = scenarios.map do |scenario|
483
483
  response_cards = responses.map do |candidate, candidate_responses|
484
- response = candidate_responses[scenario]
485
- next unless response
484
+ variant_map = candidate_responses[scenario]
485
+ next unless variant_map
486
486
 
487
- response_text = response.to_s.strip
487
+ contents = variant_map.flat_map { |_v, tm| tm.values.map { |d| d[:content] } }.compact
488
+ response_text = contents.join("\n\n---\n\n").strip
489
+ next if response_text.empty?
488
490
 
489
491
  <<~CARD
490
492
  <div class="response-card">
@@ -660,13 +662,11 @@ module Qualspec
660
662
  end
661
663
 
662
664
  def get_candidate_model(candidate)
663
- # Try to find the model from the suite
664
- @results.evaluations.find { |e| e[:candidate] == candidate }&.dig(:model) || 'unknown'
665
+ @results.candidate_models[candidate] || 'unknown'
665
666
  end
666
667
 
667
- def get_scenario_prompt(_scenario)
668
- # This would need to be stored in results - for now return nil
669
- nil
668
+ def get_scenario_prompt(scenario)
669
+ @results.prompts[scenario]
670
670
  end
671
671
  end
672
672
  end
@@ -11,6 +11,10 @@ module Qualspec
11
11
  @definition = definition.is_a?(String) ? Suite.find(definition) : definition
12
12
  @results = Results.new(@definition.name)
13
13
  @judge = Qualspec.judge
14
+
15
+ @definition.candidates_list.each do |c|
16
+ @results.candidate_models[c.name] = c.model
17
+ end
14
18
  end
15
19
 
16
20
  def run(progress: true)
@@ -52,6 +56,8 @@ module Qualspec
52
56
  responses = {}
53
57
  errors = {}
54
58
 
59
+ @results.prompts[scenario.name] ||= scenario.compose_prompt(variant)
60
+
55
61
  # Phase 1: Collect all candidate responses
56
62
  @definition.candidates_list.each do |candidate|
57
63
  log_candidate_progress(candidate, scenario, 'generating') if progress
@@ -217,7 +223,8 @@ module Qualspec
217
223
 
218
224
  # Results container with multi-dimensional support
219
225
  class Results
220
- attr_reader :suite_name, :evaluations, :responses, :started_at, :finished_at, :timing, :costs
226
+ attr_reader :suite_name, :evaluations, :responses, :started_at, :finished_at, :timing, :costs,
227
+ :candidate_models, :prompts
221
228
 
222
229
  def initialize(suite_name)
223
230
  @suite_name = suite_name
@@ -225,6 +232,8 @@ module Qualspec
225
232
  @responses = {} # Nested: {candidate => {scenario => {variant => {temp => response}}}}
226
233
  @timing = {}
227
234
  @costs = {}
235
+ @candidate_models = {} # {candidate_name => model_string}
236
+ @prompts = {} # {scenario_name => prompt_string}
228
237
  @started_at = Time.now
229
238
  @finished_at = nil
230
239
  end
@@ -329,13 +338,15 @@ module Qualspec
329
338
  def scores_by_scenario
330
339
  @evaluations.group_by { |e| e[:scenario] }.transform_values do |evals|
331
340
  evals.group_by { |e| e[:candidate] }.transform_values do |candidate_evals|
332
- eval_data = candidate_evals.first
341
+ total = candidate_evals.size
342
+ avg_score = (candidate_evals.sum { |e| e[:score] }.to_f / total).round(2)
343
+ first = candidate_evals.first
333
344
  {
334
- score: eval_data[:score],
335
- pass: eval_data[:pass],
336
- reasoning: eval_data[:reasoning],
337
- variant: eval_data[:variant],
338
- temperature: eval_data[:temperature]
345
+ score: avg_score,
346
+ pass: candidate_evals.all? { |e| e[:pass] },
347
+ reasoning: first[:reasoning],
348
+ variant: first[:variant],
349
+ temperature: first[:temperature]
339
350
  }
340
351
  end
341
352
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Qualspec
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: qualspec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Stiens
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-01-05 00:00:00.000000000 Z
11
+ date: 2026-04-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -62,7 +62,6 @@ executables:
62
62
  extensions: []
63
63
  extra_rdoc_files: []
64
64
  files:
65
- - ".DS_Store"
66
65
  - ".qualspec_cassettes/comparison_test.yml"
67
66
  - ".qualspec_cassettes/quick_test.yml"
68
67
  - ".rspec"
data/.DS_Store DELETED
Binary file