qualspec 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.qualspec_cassettes/comparison_test.yml +439 -0
- data/.qualspec_cassettes/quick_test.yml +232 -0
- data/.rspec +3 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +70 -0
- data/CHANGELOG.md +16 -0
- data/README.md +84 -0
- data/Rakefile +8 -0
- data/docs/configuration.md +132 -0
- data/docs/evaluation-suites.md +180 -0
- data/docs/getting-started.md +102 -0
- data/docs/recording.md +196 -0
- data/docs/rspec-integration.md +233 -0
- data/docs/rubrics.md +174 -0
- data/examples/cassettes/qualspec_rspec_integration_basic_evaluation_evaluates_responses_with_inline_criteria.yml +65 -0
- data/examples/cassettes/qualspec_rspec_integration_basic_evaluation_provides_detailed_feedback_on_failure.yml +64 -0
- data/examples/cassettes/qualspec_rspec_integration_comparative_evaluation_compares_multiple_responses.yml +74 -0
- data/examples/cassettes/qualspec_rspec_integration_score_matchers_supports_score_comparisons.yml +65 -0
- data/examples/cassettes/qualspec_rspec_integration_vcr_integration_records_and_plays_back_api_calls_automatically.yml +65 -0
- data/examples/cassettes/qualspec_rspec_integration_with_context_uses_context_in_evaluation.yml +67 -0
- data/examples/cassettes/qualspec_rspec_integration_with_rubrics_evaluates_using_builtin_rubrics.yml +67 -0
- data/examples/comparison.rb +22 -0
- data/examples/model_comparison.rb +38 -0
- data/examples/persona_test.rb +49 -0
- data/examples/quick_test.rb +28 -0
- data/examples/report.html +399 -0
- data/examples/rspec_example_spec.rb +153 -0
- data/exe/qualspec +142 -0
- data/lib/qualspec/builtin_rubrics.rb +83 -0
- data/lib/qualspec/client.rb +127 -0
- data/lib/qualspec/configuration.rb +32 -0
- data/lib/qualspec/evaluation.rb +52 -0
- data/lib/qualspec/judge.rb +217 -0
- data/lib/qualspec/recorder.rb +55 -0
- data/lib/qualspec/rspec/configuration.rb +49 -0
- data/lib/qualspec/rspec/evaluation_result.rb +142 -0
- data/lib/qualspec/rspec/helpers.rb +155 -0
- data/lib/qualspec/rspec/matchers.rb +163 -0
- data/lib/qualspec/rspec.rb +66 -0
- data/lib/qualspec/rubric.rb +43 -0
- data/lib/qualspec/suite/behavior.rb +43 -0
- data/lib/qualspec/suite/builtin_behaviors.rb +84 -0
- data/lib/qualspec/suite/candidate.rb +30 -0
- data/lib/qualspec/suite/dsl.rb +64 -0
- data/lib/qualspec/suite/html_reporter.rb +673 -0
- data/lib/qualspec/suite/reporter.rb +274 -0
- data/lib/qualspec/suite/runner.rb +261 -0
- data/lib/qualspec/suite/scenario.rb +57 -0
- data/lib/qualspec/version.rb +5 -0
- data/lib/qualspec.rb +103 -0
- data/sig/qualspec.rbs +4 -0
- metadata +142 -0
metadata
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: qualspec
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Eric Stiens
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2025-12-25 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: faraday
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '2.0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '2.0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: vcr
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '6.0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '6.0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: webmock
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '3.0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '3.0'
|
|
55
|
+
description: Define qualitative evaluation criteria and let an LLM judge if responses
|
|
56
|
+
pass. Perfect for testing AI agents, comparing models, and evaluating subjective
|
|
57
|
+
qualities.
|
|
58
|
+
email:
|
|
59
|
+
- estiens@users.noreply.github.com
|
|
60
|
+
executables:
|
|
61
|
+
- qualspec
|
|
62
|
+
extensions: []
|
|
63
|
+
extra_rdoc_files: []
|
|
64
|
+
files:
|
|
65
|
+
- ".qualspec_cassettes/comparison_test.yml"
|
|
66
|
+
- ".qualspec_cassettes/quick_test.yml"
|
|
67
|
+
- ".rspec"
|
|
68
|
+
- ".rubocop.yml"
|
|
69
|
+
- ".rubocop_todo.yml"
|
|
70
|
+
- CHANGELOG.md
|
|
71
|
+
- README.md
|
|
72
|
+
- Rakefile
|
|
73
|
+
- docs/configuration.md
|
|
74
|
+
- docs/evaluation-suites.md
|
|
75
|
+
- docs/getting-started.md
|
|
76
|
+
- docs/recording.md
|
|
77
|
+
- docs/rspec-integration.md
|
|
78
|
+
- docs/rubrics.md
|
|
79
|
+
- examples/cassettes/qualspec_rspec_integration_basic_evaluation_evaluates_responses_with_inline_criteria.yml
|
|
80
|
+
- examples/cassettes/qualspec_rspec_integration_basic_evaluation_provides_detailed_feedback_on_failure.yml
|
|
81
|
+
- examples/cassettes/qualspec_rspec_integration_comparative_evaluation_compares_multiple_responses.yml
|
|
82
|
+
- examples/cassettes/qualspec_rspec_integration_score_matchers_supports_score_comparisons.yml
|
|
83
|
+
- examples/cassettes/qualspec_rspec_integration_vcr_integration_records_and_plays_back_api_calls_automatically.yml
|
|
84
|
+
- examples/cassettes/qualspec_rspec_integration_with_context_uses_context_in_evaluation.yml
|
|
85
|
+
- examples/cassettes/qualspec_rspec_integration_with_rubrics_evaluates_using_builtin_rubrics.yml
|
|
86
|
+
- examples/comparison.rb
|
|
87
|
+
- examples/model_comparison.rb
|
|
88
|
+
- examples/persona_test.rb
|
|
89
|
+
- examples/quick_test.rb
|
|
90
|
+
- examples/report.html
|
|
91
|
+
- examples/rspec_example_spec.rb
|
|
92
|
+
- exe/qualspec
|
|
93
|
+
- lib/qualspec.rb
|
|
94
|
+
- lib/qualspec/builtin_rubrics.rb
|
|
95
|
+
- lib/qualspec/client.rb
|
|
96
|
+
- lib/qualspec/configuration.rb
|
|
97
|
+
- lib/qualspec/evaluation.rb
|
|
98
|
+
- lib/qualspec/judge.rb
|
|
99
|
+
- lib/qualspec/recorder.rb
|
|
100
|
+
- lib/qualspec/rspec.rb
|
|
101
|
+
- lib/qualspec/rspec/configuration.rb
|
|
102
|
+
- lib/qualspec/rspec/evaluation_result.rb
|
|
103
|
+
- lib/qualspec/rspec/helpers.rb
|
|
104
|
+
- lib/qualspec/rspec/matchers.rb
|
|
105
|
+
- lib/qualspec/rubric.rb
|
|
106
|
+
- lib/qualspec/suite/behavior.rb
|
|
107
|
+
- lib/qualspec/suite/builtin_behaviors.rb
|
|
108
|
+
- lib/qualspec/suite/candidate.rb
|
|
109
|
+
- lib/qualspec/suite/dsl.rb
|
|
110
|
+
- lib/qualspec/suite/html_reporter.rb
|
|
111
|
+
- lib/qualspec/suite/reporter.rb
|
|
112
|
+
- lib/qualspec/suite/runner.rb
|
|
113
|
+
- lib/qualspec/suite/scenario.rb
|
|
114
|
+
- lib/qualspec/version.rb
|
|
115
|
+
- sig/qualspec.rbs
|
|
116
|
+
homepage: https://github.com/estiens/qualspec
|
|
117
|
+
licenses:
|
|
118
|
+
- MIT
|
|
119
|
+
metadata:
|
|
120
|
+
homepage_uri: https://github.com/estiens/qualspec
|
|
121
|
+
source_code_uri: https://github.com/estiens/qualspec
|
|
122
|
+
changelog_uri: https://github.com/estiens/qualspec/blob/main/CHANGELOG.md
|
|
123
|
+
post_install_message:
|
|
124
|
+
rdoc_options: []
|
|
125
|
+
require_paths:
|
|
126
|
+
- lib
|
|
127
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
128
|
+
requirements:
|
|
129
|
+
- - ">="
|
|
130
|
+
- !ruby/object:Gem::Version
|
|
131
|
+
version: 3.1.0
|
|
132
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
133
|
+
requirements:
|
|
134
|
+
- - ">="
|
|
135
|
+
- !ruby/object:Gem::Version
|
|
136
|
+
version: '0'
|
|
137
|
+
requirements: []
|
|
138
|
+
rubygems_version: 3.5.22
|
|
139
|
+
signing_key:
|
|
140
|
+
specification_version: 4
|
|
141
|
+
summary: RSpec DSL for qualitative LLM-judged testing
|
|
142
|
+
test_files: []
|