minitest-promptfoo 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -9
- data/CLAUDE.md +15 -0
- data/README.md +18 -0
- data/lib/minitest/promptfoo/assertion_builder.rb +26 -4
- data/lib/minitest/promptfoo/test_methods.rb +20 -6
- data/lib/minitest/promptfoo/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: adc15656589f6ac5ef5c617ff36f9461de0d8cea9aaee9c5dcb1f93a3846657e
|
|
4
|
+
data.tar.gz: a64e466f50c5388e03419b3dec3a38ada32d87507b60c7377579c0c1135a96ee
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a11d7f0afa408a7e551068976bbfb9c29ffce7f0935402cd3830660a4d0a75da8a344619ed47c51672f2a90cccb044baf1939e044289f548854af0e0d2b3ba97
|
|
7
|
+
data.tar.gz: 678f4a3dd170d6660f373ac45fe268f37bf7bf8c2a82352722137f6df04037325644d93fc4b36d4388593e61c505cea742ab5120f1343a5905b0f76042dddb0b
|
data/CHANGELOG.md
CHANGED
|
@@ -2,13 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
5
|
+
## [0.1.0] - Blinded by the Light
|
|
7
6
|
|
|
8
|
-
|
|
7
|
+
Initial release of minitest-promptfoo:
|
|
9
8
|
|
|
10
|
-
### Added
|
|
11
|
-
- Initial release of minitest-promptfoo
|
|
12
9
|
- Core `Minitest::Promptfoo::Test` class for prompt testing
|
|
13
10
|
- Configuration system for promptfoo executable path
|
|
14
11
|
- Support for multiple providers
|
|
@@ -21,10 +18,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
21
18
|
- Comprehensive README with examples
|
|
22
19
|
- Basic test coverage
|
|
23
20
|
|
|
24
|
-
## [0.1.0] - Blinded by the Light
|
|
25
|
-
|
|
26
|
-
- Initial release
|
|
27
|
-
|
|
28
21
|
## [0.1.1] - Spirit in the Night
|
|
29
22
|
|
|
30
23
|
- Fixed bug causing unbound method call in Rails apps
|
|
@@ -32,3 +25,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
32
25
|
## [0.1.2] - Growin' Up
|
|
33
26
|
|
|
34
27
|
- Fixed more bugs relating to Rails implementation
|
|
28
|
+
|
|
29
|
+
## [0.1.3] - Does This Bus Stop at 82nd Street?
|
|
30
|
+
|
|
31
|
+
- Renamed `Minitest::Promptfoo::RailsTest` to `Minitest::Promptfoo::PromptTest` for clearer semantics
|
|
32
|
+
|
|
33
|
+
## [0.1.4] - For You
|
|
34
|
+
|
|
35
|
+
- `force_json!` method to handle JSON responses wrapped in markdown code fences (e.g., ` ```json `)
|
|
36
|
+
- Automatically strips backticks before parsing JSON
|
|
37
|
+
- Skips `is-json` validation when enabled (since raw output isn't valid JSON)
|
|
38
|
+
- Particularly useful for Anthropic and other providers that ignore `response_format` settings
|
data/CLAUDE.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
This is a Ruby gem that provides a thin Ruby wrapper around the Promptfoo prompt testing framework.
|
|
2
|
+
|
|
3
|
+
The aim of this project is to provide a Minitest aesthetic to testing prompts that live in a repo.
|
|
4
|
+
|
|
5
|
+
Our aim is not to recreate Promptfoo in Ruby, but rather to harness the power of Promptfoo while keeping the joy of writing Ruby.
|
|
6
|
+
|
|
7
|
+
Your tasks will probably involve providing a Ruby to interface to Promptfoo features. This generally involves
|
|
8
|
+
coercing code within a Minitest-style assertion into some YAML that we then provide when shelling out to the
|
|
9
|
+
Promptfoo command line tool.
|
|
10
|
+
|
|
11
|
+
Always bias towards simple and beautiful Ruby code.
|
|
12
|
+
|
|
13
|
+
Resources:
|
|
14
|
+
- Promptfoo repo: https://github.com/promptfoo/promptfoo
|
|
15
|
+
- Promptfoo docs: https://www.promptfoo.dev/docs/intro/
|
data/README.md
CHANGED
|
@@ -143,6 +143,24 @@ assert_prompt(vars: { query: "status" }) do |response|
|
|
|
143
143
|
end
|
|
144
144
|
```
|
|
145
145
|
|
|
146
|
+
#### Handling Markdown-Wrapped JSON
|
|
147
|
+
|
|
148
|
+
Some providers (looking at you, Anthropic) wrap JSON responses in markdown code fences like ` ```json `. Use `force_json!` to strip these before parsing:
|
|
149
|
+
|
|
150
|
+
```ruby
|
|
151
|
+
assert_prompt(vars: { query: "status" }) do |response|
|
|
152
|
+
response.force_json! # Strips ```json and ``` wrappers
|
|
153
|
+
|
|
154
|
+
response.json_includes(key: "status", value: "success")
|
|
155
|
+
response.json_includes(key: "exclusive_to_app", value: true)
|
|
156
|
+
end
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
When `force_json!` is called:
|
|
160
|
+
- Markdown code fences are automatically stripped before JSON parsing
|
|
161
|
+
- The `is-json` validation is skipped (since the raw output isn't valid JSON)
|
|
162
|
+
- All subsequent `json_includes` assertions handle the stripping automatically
|
|
163
|
+
|
|
146
164
|
### Custom JavaScript
|
|
147
165
|
|
|
148
166
|
```ruby
|
|
@@ -15,6 +15,7 @@ module Minitest
|
|
|
15
15
|
class AssertionBuilder
|
|
16
16
|
def initialize
|
|
17
17
|
@assertions = []
|
|
18
|
+
@force_json = false
|
|
18
19
|
end
|
|
19
20
|
|
|
20
21
|
# String inclusion check
|
|
@@ -43,13 +44,24 @@ module Minitest
|
|
|
43
44
|
|
|
44
45
|
# JSON structure validation using JavaScript
|
|
45
46
|
def json_includes(key:, value:)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
# Only validate is-json if we're not forcing JSON (since force_json means output may have markdown fences)
|
|
48
|
+
unless @force_json
|
|
49
|
+
@assertions << {
|
|
50
|
+
"type" => "is-json"
|
|
51
|
+
}
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Build the parsing logic - strip markdown fences if force_json is enabled
|
|
55
|
+
parse_logic = if @force_json
|
|
56
|
+
"JSON.parse(output.replace(/^```(?:json)?\\n?|\\n?```$/g, '').trim())"
|
|
57
|
+
else
|
|
58
|
+
"JSON.parse(output)"
|
|
59
|
+
end
|
|
60
|
+
|
|
49
61
|
# Handle both string output (needs parsing) and object output (already parsed)
|
|
50
62
|
@assertions << {
|
|
51
63
|
"type" => "javascript",
|
|
52
|
-
"value" => "(typeof output === 'string' ?
|
|
64
|
+
"value" => "(typeof output === 'string' ? #{parse_logic} : output)[#{key.inspect}] === #{value.to_json}"
|
|
53
65
|
}
|
|
54
66
|
end
|
|
55
67
|
|
|
@@ -70,6 +82,16 @@ module Minitest
|
|
|
70
82
|
}
|
|
71
83
|
end
|
|
72
84
|
|
|
85
|
+
# Force JSON parsing by stripping markdown code fences
|
|
86
|
+
def force_json!
|
|
87
|
+
@force_json = true
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Check if force_json was called
|
|
91
|
+
def force_json?
|
|
92
|
+
@force_json
|
|
93
|
+
end
|
|
94
|
+
|
|
73
95
|
# Convert to promptfoo assertion format
|
|
74
96
|
def to_promptfoo_assertions
|
|
75
97
|
@assertions
|
|
@@ -53,7 +53,8 @@ module Minitest
|
|
|
53
53
|
providers: providers,
|
|
54
54
|
assertions: builder.to_promptfoo_assertions,
|
|
55
55
|
verbose: verbose,
|
|
56
|
-
pre_render: pre_render
|
|
56
|
+
pre_render: pre_render,
|
|
57
|
+
force_json: builder.force_json?
|
|
57
58
|
)
|
|
58
59
|
|
|
59
60
|
# Real assertion: verify promptfoo produced results
|
|
@@ -62,7 +63,7 @@ module Minitest
|
|
|
62
63
|
output
|
|
63
64
|
end
|
|
64
65
|
|
|
65
|
-
def evaluate_prompt(prompt_text:, vars:, providers: nil, assertions: [], pre_render: false, verbose: false, show_output: false)
|
|
66
|
+
def evaluate_prompt(prompt_text:, vars:, providers: nil, assertions: [], pre_render: false, verbose: false, show_output: false, force_json: false)
|
|
66
67
|
Dir.mktmpdir do |tmpdir|
|
|
67
68
|
config_path = File.join(tmpdir, "promptfooconfig.yaml")
|
|
68
69
|
output_path = File.join(tmpdir, "output.json")
|
|
@@ -87,7 +88,8 @@ module Minitest
|
|
|
87
88
|
vars: config_vars,
|
|
88
89
|
providers: providers_array,
|
|
89
90
|
assertions: assertions,
|
|
90
|
-
output_path: output_path
|
|
91
|
+
output_path: output_path,
|
|
92
|
+
force_json: force_json
|
|
91
93
|
)
|
|
92
94
|
|
|
93
95
|
config_yaml = YAML.dump(config)
|
|
@@ -144,14 +146,21 @@ module Minitest
|
|
|
144
146
|
end
|
|
145
147
|
end
|
|
146
148
|
|
|
147
|
-
def build_promptfoo_config(prompt:, vars:, providers:, assertions:, output_path:)
|
|
149
|
+
def build_promptfoo_config(prompt:, vars:, providers:, assertions:, output_path:, force_json: false)
|
|
148
150
|
normalized_providers = providers.map do |provider|
|
|
149
|
-
case provider
|
|
151
|
+
provider_config = case provider
|
|
150
152
|
when String
|
|
151
|
-
provider
|
|
153
|
+
{ "id" => provider }
|
|
152
154
|
when Hash
|
|
153
155
|
deep_stringify_keys(provider)
|
|
154
156
|
end
|
|
157
|
+
|
|
158
|
+
if force_json
|
|
159
|
+
provider_config["config"] ||= {}
|
|
160
|
+
provider_config["config"]["transformResponse"] = strip_markdown_fences_js
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
provider_config
|
|
155
164
|
end
|
|
156
165
|
|
|
157
166
|
{
|
|
@@ -199,6 +208,11 @@ module Minitest
|
|
|
199
208
|
else value
|
|
200
209
|
end
|
|
201
210
|
end
|
|
211
|
+
|
|
212
|
+
# JavaScript function to strip markdown code fences from JSON responses
|
|
213
|
+
def strip_markdown_fences_js
|
|
214
|
+
"json.output.replace(/^```(?:json)?\\n?|\\n?```$/g, '').trim()"
|
|
215
|
+
end
|
|
202
216
|
end
|
|
203
217
|
end
|
|
204
218
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: minitest-promptfoo
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Chris Waters
|
|
@@ -62,6 +62,7 @@ extra_rdoc_files: []
|
|
|
62
62
|
files:
|
|
63
63
|
- ".ruby-version"
|
|
64
64
|
- CHANGELOG.md
|
|
65
|
+
- CLAUDE.md
|
|
65
66
|
- LICENSE.txt
|
|
66
67
|
- README.md
|
|
67
68
|
- Rakefile
|