RubyGems - qualspec - Versions diffs - 0.1.1 → 0.1.2 - Mend

qualspec 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/lib/qualspec/judge.rb +1 -1
data/lib/qualspec/suite/html_reporter.rb +8 -8
data/lib/qualspec/suite/runner.rb +18 -7
data/lib/qualspec/version.rb +1 -1
metadata +2 -3
data/.DS_Store +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: '069d64a5f846962da842ffe5336739761862d0b1a3ef4cf522bfc83932af504e'
-  data.tar.gz: 693d58eb17d4e495cc0e7f1ff0ed57fc925d81ee6b8060aef41f350bf1fdb2c9
+  metadata.gz: 004a0ca49d3bcb6890cadde4f994ddf6ce9585f06f4c81798644d4e02c74de5a
+  data.tar.gz: c3994806a042fc9693cdb7e6e9d0579a644edf811b4f529080b216506056aee0
 SHA512:
-  metadata.gz: b46c32e6c43fb4f52234db0f4d6038191e85f8c05f99d8082c0024ef39fe0d7d1fa93a213b9163e8eef1c06a88add8a66f8906fa219ce8c471117a9812778766
-  data.tar.gz: de83bb65616448ccca96a405a021b19f80cbe9673df8cf65c63af24d2bdaa96efece1ea485b2786cfa7b3507f7f0ea301bd30d193c3f4218d18b67f3d2cfd695
+  metadata.gz: dfad9e00f04bc7552f8641c1d12ce956d2e58f66d120a495274e4ca5e6af85a701866f0f8e3951c0a031e72e56f486ebcc041b52be61faadcd8f4822cfebee0d
+  data.tar.gz: d4192bc2492c27472169c30b024061ce969aa65d3855dc4c1b0b26d25db2f2fac46da1070c99418d61586cf475d8c759ee8db1a37673f17270ebcbc222f407ea

data/lib/qualspec/judge.rb CHANGED Viewed

@@ -141,7 +141,7 @@ module Qualspec
       parts << '## Responses:'
       responses.each do |candidate, response|
-        parts << "\n### #{candidate}:\n#{response}"
+        parts << "\n### #{candidate}:\n```\n#{response}\n```"
       end
       parts << "\nScore each candidate (#{candidate_names}) from 0-10."

data/lib/qualspec/suite/html_reporter.rb CHANGED Viewed

@@ -481,10 +481,12 @@ module Qualspec
         scenario_blocks = scenarios.map do |scenario|
           response_cards = responses.map do |candidate, candidate_responses|
-            response = candidate_responses[scenario]
-            next unless response
+            variant_map = candidate_responses[scenario]
+            next unless variant_map
-            response_text = response.to_s.strip
+            contents = variant_map.flat_map { |_v, tm| tm.values.map { |d| d[:content] } }.compact
+            response_text = contents.join("\n\n---\n\n").strip
+            next if response_text.empty?
             <<~CARD
               <div class="response-card">
@@ -660,13 +662,11 @@ module Qualspec
       end
       def get_candidate_model(candidate)
-        # Try to find the model from the suite
-        @results.evaluations.find { |e| e[:candidate] == candidate }&.dig(:model) || 'unknown'
+        @results.candidate_models[candidate] || 'unknown'
       end
-      def get_scenario_prompt(_scenario)
-        # This would need to be stored in results - for now return nil
-        nil
+      def get_scenario_prompt(scenario)
+        @results.prompts[scenario]
       end
     end
   end

data/lib/qualspec/suite/runner.rb CHANGED Viewed

@@ -11,6 +11,10 @@ module Qualspec
         @definition = definition.is_a?(String) ? Suite.find(definition) : definition
         @results = Results.new(@definition.name)
         @judge = Qualspec.judge
+        @definition.candidates_list.each do |c|
+          @results.candidate_models[c.name] = c.model
+        end
       end
       def run(progress: true)
@@ -52,6 +56,8 @@ module Qualspec
         responses = {}
         errors = {}
+        @results.prompts[scenario.name] ||= scenario.compose_prompt(variant)
         # Phase 1: Collect all candidate responses
         @definition.candidates_list.each do |candidate|
           log_candidate_progress(candidate, scenario, 'generating') if progress
@@ -217,7 +223,8 @@ module Qualspec
     # Results container with multi-dimensional support
     class Results
-      attr_reader :suite_name, :evaluations, :responses, :started_at, :finished_at, :timing, :costs
+      attr_reader :suite_name, :evaluations, :responses, :started_at, :finished_at, :timing, :costs,
+                  :candidate_models, :prompts
       def initialize(suite_name)
         @suite_name = suite_name
@@ -225,6 +232,8 @@ module Qualspec
         @responses = {} # Nested: {candidate => {scenario => {variant => {temp => response}}}}
         @timing = {}
         @costs = {}
+        @candidate_models = {} # {candidate_name => model_string}
+        @prompts = {}          # {scenario_name => prompt_string}
         @started_at = Time.now
         @finished_at = nil
       end
@@ -329,13 +338,15 @@ module Qualspec
       def scores_by_scenario
         @evaluations.group_by { |e| e[:scenario] }.transform_values do |evals|
           evals.group_by { |e| e[:candidate] }.transform_values do |candidate_evals|
-            eval_data = candidate_evals.first
+            total = candidate_evals.size
+            avg_score = (candidate_evals.sum { |e| e[:score] }.to_f / total).round(2)
+            first = candidate_evals.first
             {
-              score: eval_data[:score],
-              pass: eval_data[:pass],
-              reasoning: eval_data[:reasoning],
-              variant: eval_data[:variant],
-              temperature: eval_data[:temperature]
+              score: avg_score,
+              pass: candidate_evals.all? { |e| e[:pass] },
+              reasoning: first[:reasoning],
+              variant: first[:variant],
+              temperature: first[:temperature]
             }
           end
         end

data/lib/qualspec/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Qualspec
-  VERSION = '0.1.1'
+  VERSION = '0.1.2'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: qualspec
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
 platform: ruby
 authors:
 - Eric Stiens
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2026-01-05 00:00:00.000000000 Z
+date: 2026-04-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: faraday
@@ -62,7 +62,6 @@ executables:
 extensions: []
 extra_rdoc_files: []
 files:
-- ".DS_Store"
 - ".qualspec_cassettes/comparison_test.yml"
 - ".qualspec_cassettes/quick_test.yml"
 - ".rspec"

data/.DS_Store DELETED Viewed

Binary file