minitest-promptfoo 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c46190c19ff929aa46aa5b39b6857af623f0ca6ede8187a58220c5e8e5ea0fe2
4
- data.tar.gz: fc4d8fec213be486de2398a9fc050d3a683d3b610012bf4107f2bd09e34bea7d
3
+ metadata.gz: 804a53caa0d489f1e5a15dd6342d1821cdaaaaa496748e4f499f38570b7f7689
4
+ data.tar.gz: 8a7347a75901d62524da4ed4a961f4ca586e29602fa278215e2b08b4f122a73c
5
5
  SHA512:
6
- metadata.gz: 8e59555be654b7ab5aa40e1f08747a4d71989a25c162cf57a6ba8ee4cbf4c4365c7f3d9844c61937fb1d82b445bdedc9982e61ae515eb8ac87f5b212771c5c18
7
- data.tar.gz: a8002f9d6f25419abcbb0f89b687246d15c2b89ada13219fed14b11fcf13cb271ce3e694f173a6472d5d0ae38100dc73a3c1fd11e56092c62862cbb43cf67fbe
6
+ metadata.gz: 0e79f96b170749cabe799b901cc9e29685b9e0b75f525bafa1bc15bd7906986498bafca3d264edaa58e67f10b568381a54195f135e7af0c7f4a48f535d62cf61
7
+ data.tar.gz: 40e7006633bd305692ac9426c17944359fe538760888fc4279acd2032f4b56bf266c5fb221ab47fc1e2dc7c70231f60baf1d75ca951461b6a2a7e230b86f6940
data/CHANGELOG.md CHANGED
@@ -28,3 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
28
28
  ## [0.1.1] - Spirit in the Night
29
29
 
30
30
  - Fixed bug causing unbound method call in Rails apps
31
+
32
+ ## [0.1.2] - Growin' Up
33
+
34
+ - Fixed more bugs relating to Rails implementation
@@ -39,7 +39,7 @@ module Minitest
39
39
  return candidate if File.exist?(candidate)
40
40
  end
41
41
 
42
- raise PromptNotFoundError, "Could not find prompt file for #{test_file_path}"
42
+ raise Minitest::Promptfoo::TestMethods::PromptNotFoundError, "Could not find prompt file for #{test_file_path}"
43
43
  end
44
44
  end
45
45
  end
@@ -47,33 +47,13 @@ module Minitest
47
47
 
48
48
  # Convenience class that combines Test + Rails integration
49
49
  # Inherits from ActiveSupport::TestCase if available, otherwise Minitest::Test
50
- if defined?(ActiveSupport::TestCase)
51
- class RailsTest < ActiveSupport::TestCase
52
- include Minitest::Promptfoo::Rails
53
-
54
- # Copy instance methods from Test
55
- Minitest::Promptfoo::Test.instance_methods(false).each do |method_name|
56
- define_method(method_name) do |*args, **kwargs, &block|
57
- # Delegate to Test's implementation
58
- Minitest::Promptfoo::Test.instance_method(method_name).bind_call(self, *args, **kwargs, &block)
59
- end
60
- end
61
-
62
- # Include class methods
63
- class << self
64
- def debug?
65
- ENV["DEBUG_PROMPT_TEST"] == "1"
66
- end
67
-
68
- def providers
69
- @providers || "echo"
70
- end
71
-
72
- attr_writer :providers
73
-
74
- def inherited(subclass)
75
- super
76
- subclass.providers = providers if defined?(@providers)
50
+ if defined?(ActiveSupport)
51
+ # Defer class definition until Rails test framework is fully loaded
52
+ ActiveSupport.on_load(:active_support_test_case) do
53
+ unless Minitest::Promptfoo.const_defined?(:RailsTest)
54
+ class RailsTest < ActiveSupport::TestCase
55
+ include Minitest::Promptfoo::TestMethods
56
+ include Minitest::Promptfoo::Rails
77
57
  end
78
58
  end
79
59
  end
@@ -1,11 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "yaml"
4
- require "tmpdir"
5
3
  require "minitest/test"
6
4
  require_relative "assertion_builder"
7
5
  require_relative "failure_formatter"
8
6
  require_relative "promptfoo_runner"
7
+ require_relative "test_methods"
9
8
 
10
9
  module Minitest
11
10
  module Promptfoo
@@ -36,203 +35,7 @@ module Minitest
36
35
  # end
37
36
  # end
38
37
  class Test < Minitest::Test
39
- class PromptNotFoundError < StandardError; end
40
- class EvaluationError < StandardError; end
41
-
42
- # Class-level configuration
43
- class << self
44
- def debug?
45
- ENV["DEBUG_PROMPT_TEST"] == "1"
46
- end
47
-
48
- def providers
49
- @providers || "echo"
50
- end
51
-
52
- attr_writer :providers
53
-
54
- def inherited(subclass)
55
- super
56
- subclass.providers = providers if defined?(@providers)
57
- end
58
- end
59
-
60
- def prompt_path
61
- raise NotImplementedError, "#{self.class}#prompt_path must be implemented"
62
- end
63
-
64
- def prompt_content
65
- @prompt_content ||= begin
66
- path = prompt_path
67
- raise PromptNotFoundError, "Prompt file not found: #{path}" unless File.exist?(path)
68
- File.read(path, encoding: "UTF-8")
69
- end
70
- end
71
-
72
- # Minitest-like DSL for prompt testing
73
- #
74
- # Example:
75
- # assert_prompt(vars: { input: "test" }) do |response|
76
- # response.includes("expected text")
77
- # response.matches(/\d{3}-\d{4}/)
78
- # response.rubric("Response is professional and courteous")
79
- # end
80
- def assert_prompt(vars:, providers: nil, verbose: false, pre_render: false, &block)
81
- builder = AssertionBuilder.new
82
- yield(builder)
83
-
84
- output = evaluate_prompt(
85
- prompt_text: prompt_content,
86
- vars: vars,
87
- providers: providers,
88
- assertions: builder.to_promptfoo_assertions,
89
- verbose: verbose,
90
- pre_render: pre_render
91
- )
92
-
93
- # Real assertion: verify promptfoo produced results
94
- assert(output.any?, "Promptfoo evaluation produced no output")
95
-
96
- output
97
- end
98
-
99
- def evaluate_prompt(prompt_text:, vars:, providers: nil, assertions: [], pre_render: false, verbose: false, show_output: false)
100
- Dir.mktmpdir do |tmpdir|
101
- config_path = File.join(tmpdir, "promptfooconfig.yaml")
102
- output_path = File.join(tmpdir, "output.json")
103
-
104
- # Convert single-brace {var} syntax to double-brace {{var}} for promptfoo
105
- promptfoo_text = prompt_text.gsub(/(?<!\{)\{(\w+)\}(?!\})/, '{{\1}}')
106
-
107
- if pre_render
108
- vars.each do |key, value|
109
- promptfoo_text = promptfoo_text.gsub("{{#{key}}}", value.to_s)
110
- end
111
- config_vars = {}
112
- else
113
- config_vars = vars
114
- end
115
-
116
- # Use provided provider(s) or fall back to class-level default
117
- providers_array = wrap_array(providers || self.class.providers)
118
-
119
- config = build_promptfoo_config(
120
- prompt: promptfoo_text,
121
- vars: config_vars,
122
- providers: providers_array,
123
- assertions: assertions,
124
- output_path: output_path
125
- )
126
-
127
- config_yaml = YAML.dump(config)
128
- File.write(config_path, config_yaml)
129
-
130
- debug("Promptfoo Config", config_yaml)
131
-
132
- runner = PromptfooRunner.new(Minitest::Promptfoo.configuration)
133
- result = runner.execute(config_path, tmpdir, show_output: show_output, pre_render: pre_render)
134
-
135
- debug("Promptfoo Result", result.inspect)
136
-
137
- output = runner.parse_output(output_path)
138
-
139
- unless result[:success] || output.any?
140
- raise EvaluationError, <<~ERROR
141
- promptfoo evaluation failed
142
- STDOUT: #{result[:stdout]}
143
- STDERR: #{result[:stderr]}
144
- ERROR
145
- end
146
-
147
- check_provider_failures(output, providers_array, verbose: verbose) if assertions.any?
148
-
149
- output
150
- end
151
- end
152
-
153
- private
154
-
155
- def check_provider_failures(output, providers, verbose: false)
156
- results = output.dig("results", "results") || []
157
- passing_providers = []
158
- failing_providers = []
159
-
160
- results.each do |provider_result|
161
- provider_id = provider_result.dig("provider", "id")
162
- success = provider_result.dig("success")
163
-
164
- if success
165
- passing_providers << provider_id
166
- else
167
- failing_providers << {
168
- id: provider_id,
169
- result: provider_result
170
- }
171
- end
172
- end
173
-
174
- if failing_providers.any?
175
- formatter = FailureFormatter.new(verbose: verbose)
176
- error_msg = formatter.format_results(passing_providers, failing_providers)
177
- flunk(error_msg)
178
- end
179
- end
180
-
181
- def build_promptfoo_config(prompt:, vars:, providers:, assertions:, output_path:)
182
- normalized_providers = providers.map do |provider|
183
- case provider
184
- when String
185
- provider
186
- when Hash
187
- deep_stringify_keys(provider)
188
- end
189
- end
190
-
191
- {
192
- "prompts" => [prompt],
193
- "providers" => normalized_providers,
194
- "tests" => [
195
- {
196
- "vars" => vars.transform_keys(&:to_s),
197
- "assert" => assertions
198
- }
199
- ],
200
- "outputPath" => output_path
201
- }
202
- end
203
-
204
- def debug(title, content)
205
- return unless self.class.debug?
206
-
207
- warn "\n=== #{title} ==="
208
- warn content
209
- warn "=" * (title.length + 8)
210
- warn ""
211
- end
212
-
213
- # Simple array wrapper (replaces ActiveSupport's Array.wrap)
214
- def wrap_array(object)
215
- case object
216
- when nil then []
217
- when Array then object
218
- else [object]
219
- end
220
- end
221
-
222
- # Simple deep stringify keys (replaces ActiveSupport method)
223
- def deep_stringify_keys(hash)
224
- hash.each_with_object({}) do |(key, value), result|
225
- result[key.to_s] = stringify_value(value)
226
- end
227
- end
228
-
229
- def stringify_value(value)
230
- case value
231
- when Hash then deep_stringify_keys(value)
232
- when Array then value.map { |v| stringify_value(v) }
233
- else value
234
- end
235
- end
38
+ include TestMethods
236
39
  end
237
40
  end
238
41
  end
@@ -0,0 +1,204 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "yaml"
4
+ require "tmpdir"
5
+
6
+ module Minitest
7
+ module Promptfoo
8
+ # Shared behavior for prompt testing that can be included in any test class
9
+ module TestMethods
10
+ class PromptNotFoundError < StandardError; end
11
+ class EvaluationError < StandardError; end
12
+
13
+ def self.included(base)
14
+ base.extend(ClassMethods)
15
+ end
16
+
17
+ module ClassMethods
18
+ def debug?
19
+ ENV["DEBUG_PROMPT_TEST"] == "1"
20
+ end
21
+
22
+ def providers
23
+ @providers || "echo"
24
+ end
25
+
26
+ attr_writer :providers
27
+
28
+ def inherited(subclass)
29
+ super
30
+ subclass.providers = providers if defined?(@providers)
31
+ end
32
+ end
33
+
34
+ def prompt_path
35
+ raise NotImplementedError, "#{self.class}#prompt_path must be implemented"
36
+ end
37
+
38
+ def prompt_content
39
+ @prompt_content ||= begin
40
+ path = prompt_path
41
+ raise PromptNotFoundError, "Prompt file not found: #{path}" unless File.exist?(path)
42
+ File.read(path, encoding: "UTF-8")
43
+ end
44
+ end
45
+
46
+ def assert_prompt(vars:, providers: nil, verbose: false, pre_render: false, &block)
47
+ builder = AssertionBuilder.new
48
+ yield(builder)
49
+
50
+ output = evaluate_prompt(
51
+ prompt_text: prompt_content,
52
+ vars: vars,
53
+ providers: providers,
54
+ assertions: builder.to_promptfoo_assertions,
55
+ verbose: verbose,
56
+ pre_render: pre_render
57
+ )
58
+
59
+ # Real assertion: verify promptfoo produced results
60
+ assert(output.any?, "Promptfoo evaluation produced no output")
61
+
62
+ output
63
+ end
64
+
65
+ def evaluate_prompt(prompt_text:, vars:, providers: nil, assertions: [], pre_render: false, verbose: false, show_output: false)
66
+ Dir.mktmpdir do |tmpdir|
67
+ config_path = File.join(tmpdir, "promptfooconfig.yaml")
68
+ output_path = File.join(tmpdir, "output.json")
69
+
70
+ # Convert single-brace {var} syntax to double-brace {{var}} for promptfoo
71
+ promptfoo_text = prompt_text.gsub(/(?<!\{)\{(\w+)\}(?!\})/, '{{\1}}')
72
+
73
+ if pre_render
74
+ vars.each do |key, value|
75
+ promptfoo_text = promptfoo_text.gsub("{{#{key}}}", value.to_s)
76
+ end
77
+ config_vars = {}
78
+ else
79
+ config_vars = vars
80
+ end
81
+
82
+ # Use provided provider(s) or fall back to class-level default
83
+ providers_array = wrap_array(providers || self.class.providers)
84
+
85
+ config = build_promptfoo_config(
86
+ prompt: promptfoo_text,
87
+ vars: config_vars,
88
+ providers: providers_array,
89
+ assertions: assertions,
90
+ output_path: output_path
91
+ )
92
+
93
+ config_yaml = YAML.dump(config)
94
+ File.write(config_path, config_yaml)
95
+
96
+ debug("Promptfoo Config", config_yaml)
97
+
98
+ runner = PromptfooRunner.new(Minitest::Promptfoo.configuration)
99
+ result = runner.execute(config_path, tmpdir, show_output: show_output, pre_render: pre_render)
100
+
101
+ debug("Promptfoo Result", result.inspect)
102
+
103
+ output = runner.parse_output(output_path)
104
+
105
+ unless result[:success] || output.any?
106
+ raise EvaluationError, <<~ERROR
107
+ promptfoo evaluation failed
108
+ STDOUT: #{result[:stdout]}
109
+ STDERR: #{result[:stderr]}
110
+ ERROR
111
+ end
112
+
113
+ check_provider_failures(output, providers_array, verbose: verbose) if assertions.any?
114
+
115
+ output
116
+ end
117
+ end
118
+
119
+ private
120
+
121
+ def check_provider_failures(output, providers, verbose: false)
122
+ results = output.dig("results", "results") || []
123
+ passing_providers = []
124
+ failing_providers = []
125
+
126
+ results.each do |provider_result|
127
+ provider_id = provider_result.dig("provider", "id")
128
+ success = provider_result.dig("success")
129
+
130
+ if success
131
+ passing_providers << provider_id
132
+ else
133
+ failing_providers << {
134
+ id: provider_id,
135
+ result: provider_result
136
+ }
137
+ end
138
+ end
139
+
140
+ if failing_providers.any?
141
+ formatter = FailureFormatter.new(verbose: verbose)
142
+ error_msg = formatter.format_results(passing_providers, failing_providers)
143
+ flunk(error_msg)
144
+ end
145
+ end
146
+
147
+ def build_promptfoo_config(prompt:, vars:, providers:, assertions:, output_path:)
148
+ normalized_providers = providers.map do |provider|
149
+ case provider
150
+ when String
151
+ provider
152
+ when Hash
153
+ deep_stringify_keys(provider)
154
+ end
155
+ end
156
+
157
+ {
158
+ "prompts" => [prompt],
159
+ "providers" => normalized_providers,
160
+ "tests" => [
161
+ {
162
+ "vars" => vars.transform_keys(&:to_s),
163
+ "assert" => assertions
164
+ }
165
+ ],
166
+ "outputPath" => output_path
167
+ }
168
+ end
169
+
170
+ def debug(title, content)
171
+ return unless self.class.debug?
172
+
173
+ warn "\n=== #{title} ==="
174
+ warn content
175
+ warn "=" * (title.length + 8)
176
+ warn ""
177
+ end
178
+
179
+ # Simple array wrapper (replaces ActiveSupport's Array.wrap)
180
+ def wrap_array(object)
181
+ case object
182
+ when nil then []
183
+ when Array then object
184
+ else [object]
185
+ end
186
+ end
187
+
188
+ # Simple deep stringify keys (replaces ActiveSupport method)
189
+ def deep_stringify_keys(hash)
190
+ hash.each_with_object({}) do |(key, value), result|
191
+ result[key.to_s] = stringify_value(value)
192
+ end
193
+ end
194
+
195
+ def stringify_value(value)
196
+ case value
197
+ when Hash then deep_stringify_keys(value)
198
+ when Array then value.map { |v| stringify_value(v) }
199
+ else value
200
+ end
201
+ end
202
+ end
203
+ end
204
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Minitest
4
4
  module Promptfoo
5
- VERSION = "0.1.1"
5
+ VERSION = "0.1.2"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: minitest-promptfoo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Waters
@@ -74,6 +74,7 @@ files:
74
74
  - lib/minitest/promptfoo/promptfoo_runner.rb
75
75
  - lib/minitest/promptfoo/rails.rb
76
76
  - lib/minitest/promptfoo/test.rb
77
+ - lib/minitest/promptfoo/test_methods.rb
77
78
  - lib/minitest/promptfoo/version.rb
78
79
  - sig/minitest/promptfoo.rbs
79
80
  homepage: https://github.com/christhesoul/minitest-promptfoo