completion-kit 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/completion_kit/api/v1/imports_controller.rb +29 -0
- data/app/controllers/completion_kit/imports_controller.rb +31 -0
- data/app/models/completion_kit/response.rb +8 -2
- data/app/models/completion_kit/review.rb +8 -2
- data/app/services/completion_kit/mcp_dispatcher.rb +3 -1
- data/app/services/completion_kit/mcp_tools/imports.rb +33 -0
- data/app/services/completion_kit/promptfoo_importer.rb +174 -0
- data/app/views/completion_kit/imports/create.html.erb +89 -0
- data/app/views/completion_kit/imports/new.html.erb +31 -0
- data/app/views/completion_kit/metrics/index.html.erb +1 -0
- data/config/routes.rb +4 -0
- data/lib/completion_kit/version.rb +1 -1
- metadata +7 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 44b946858390ad2aa5d3a3bcb2a68ba666a6725ba5e77725887f99087603e322
|
|
4
|
+
data.tar.gz: 52499c569d0a73b0b714547e9aeed1535abe452cfd093288356d1580ee83d7ce
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 406ae145ee5963e7a1a1a32b7669591f8c04ae49cb1166d6e054d657a77aa0fca218c78a4d2f7b31dc5518ee9aa75a7f5bae28889f81753ccdcdf86822b4dfb7
|
|
7
|
+
data.tar.gz: 2c9f542f56209a7fde4d8d6cc822e7096b43b178d7fef9ef5c65cc95e971c512ad91b14c1713fefd7d076164bff99e74ec587600c9900ce55994e67ff4a273b2
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module Api
|
|
3
|
+
module V1
|
|
4
|
+
class ImportsController < BaseController
|
|
5
|
+
def promptfoo
|
|
6
|
+
content = params[:config].presence || request.raw_post
|
|
7
|
+
result = PromptfooImporter.call(content)
|
|
8
|
+
|
|
9
|
+
if result.ok
|
|
10
|
+
render json: import_summary(result), status: :created
|
|
11
|
+
else
|
|
12
|
+
render_error(result.error, status: :unprocessable_entity)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
def import_summary(result)
|
|
19
|
+
{
|
|
20
|
+
prompts: result.prompts,
|
|
21
|
+
dataset: result.dataset,
|
|
22
|
+
metrics: result.metrics,
|
|
23
|
+
providers: result.providers
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class ImportsController < ApplicationController
|
|
3
|
+
def new
|
|
4
|
+
end
|
|
5
|
+
|
|
6
|
+
def create
|
|
7
|
+
content = uploaded_content
|
|
8
|
+
|
|
9
|
+
if content.blank?
|
|
10
|
+
flash.now[:alert] = "Paste or upload a promptfooconfig.yaml to import."
|
|
11
|
+
return render :new, status: :unprocessable_entity
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
@result = PromptfooImporter.call(content)
|
|
15
|
+
|
|
16
|
+
if @result.ok
|
|
17
|
+
render :create
|
|
18
|
+
else
|
|
19
|
+
flash.now[:alert] = @result.error
|
|
20
|
+
render :new, status: :unprocessable_entity
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def uploaded_content
|
|
27
|
+
file = params[:file]
|
|
28
|
+
file.respond_to?(:read) ? file.read : params[:config]
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -63,15 +63,21 @@ module CompletionKit
|
|
|
63
63
|
end
|
|
64
64
|
|
|
65
65
|
def broadcast_row_update
|
|
66
|
-
run.broadcast_response_update(self)
|
|
66
|
+
safely_broadcast { run.broadcast_response_update(self) }
|
|
67
67
|
end
|
|
68
68
|
|
|
69
69
|
def broadcast_run_progress
|
|
70
|
-
run.broadcast_progress
|
|
70
|
+
safely_broadcast { run.broadcast_progress }
|
|
71
71
|
end
|
|
72
72
|
|
|
73
73
|
def should_broadcast_progress?
|
|
74
74
|
saved_change_to_status? && terminal?
|
|
75
75
|
end
|
|
76
|
+
|
|
77
|
+
def safely_broadcast
|
|
78
|
+
yield
|
|
79
|
+
rescue StandardError => e
|
|
80
|
+
Rails.logger.error("[CompletionKit] response ##{id} broadcast failed: #{e.class}: #{e.message}")
|
|
81
|
+
end
|
|
76
82
|
end
|
|
77
83
|
end
|
|
@@ -40,15 +40,21 @@ module CompletionKit
|
|
|
40
40
|
private
|
|
41
41
|
|
|
42
42
|
def broadcast_parent_row_update
|
|
43
|
-
response.run.broadcast_response_update(response)
|
|
43
|
+
safely_broadcast { response.run.broadcast_response_update(response) }
|
|
44
44
|
end
|
|
45
45
|
|
|
46
46
|
def broadcast_run_progress
|
|
47
|
-
response.run.broadcast_progress
|
|
47
|
+
safely_broadcast { response.run.broadcast_progress }
|
|
48
48
|
end
|
|
49
49
|
|
|
50
50
|
def should_broadcast_progress?
|
|
51
51
|
saved_change_to_status? && terminal?
|
|
52
52
|
end
|
|
53
|
+
|
|
54
|
+
def safely_broadcast
|
|
55
|
+
yield
|
|
56
|
+
rescue StandardError => e
|
|
57
|
+
Rails.logger.error("[CompletionKit] review ##{id} broadcast failed: #{e.class}: #{e.message}")
|
|
58
|
+
end
|
|
53
59
|
end
|
|
54
60
|
end
|
|
@@ -36,7 +36,8 @@ module CompletionKit
|
|
|
36
36
|
McpTools::ProviderCredentials.definitions +
|
|
37
37
|
McpTools::Tags.definitions +
|
|
38
38
|
McpTools::Agreements.definitions +
|
|
39
|
-
McpTools::Judges.definitions
|
|
39
|
+
McpTools::Judges.definitions +
|
|
40
|
+
McpTools::Imports.definitions
|
|
40
41
|
end
|
|
41
42
|
|
|
42
43
|
def self.call_tool(name, arguments)
|
|
@@ -52,6 +53,7 @@ module CompletionKit
|
|
|
52
53
|
when /\Atags_/ then McpTools::Tags.call(name, arguments)
|
|
53
54
|
when /\Aagreements_/ then McpTools::Agreements.call(name, arguments)
|
|
54
55
|
when /\Ajudges_/ then McpTools::Judges.call(name, arguments)
|
|
56
|
+
when /\Apromptfoo_/ then McpTools::Imports.call(name, arguments)
|
|
55
57
|
else raise MethodNotFound, "Unknown tool: #{name}"
|
|
56
58
|
end
|
|
57
59
|
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module McpTools
|
|
3
|
+
module Imports
|
|
4
|
+
extend Base
|
|
5
|
+
|
|
6
|
+
TOOLS = {
|
|
7
|
+
"promptfoo_import" => {
|
|
8
|
+
description: "Import a promptfooconfig.yaml. Creates a prompt, a dataset from the test vars, and metrics from the assert blocks (llm-rubric/g-eval become judge metrics; contains/equals/regex/is-json become deterministic check metrics). Returns a summary of what mapped and what was skipped and why; nothing is dropped silently.",
|
|
9
|
+
inputSchema: {
|
|
10
|
+
type: "object",
|
|
11
|
+
properties: {
|
|
12
|
+
config: {type: "string", description: "The full promptfooconfig.yaml contents."}
|
|
13
|
+
},
|
|
14
|
+
required: ["config"]
|
|
15
|
+
},
|
|
16
|
+
handler: :promptfoo_import
|
|
17
|
+
}
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
20
|
+
def self.promptfoo_import(args)
|
|
21
|
+
result = PromptfooImporter.call(args["config"])
|
|
22
|
+
return error_result(result.error) unless result.ok
|
|
23
|
+
|
|
24
|
+
text_result(
|
|
25
|
+
prompts: result.prompts,
|
|
26
|
+
dataset: result.dataset,
|
|
27
|
+
metrics: result.metrics,
|
|
28
|
+
providers: result.providers
|
|
29
|
+
)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
require "yaml"
|
|
2
|
+
require "csv"
|
|
3
|
+
|
|
4
|
+
module CompletionKit
|
|
5
|
+
class PromptfooImporter
|
|
6
|
+
Result = Data.define(:ok, :error, :prompts, :dataset, :metrics, :providers)
|
|
7
|
+
|
|
8
|
+
JUDGE_ASSERTS = %w[llm-rubric g-eval model-graded-closedqa factuality].freeze
|
|
9
|
+
|
|
10
|
+
def self.call(content)
|
|
11
|
+
new(content).call
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize(content)
|
|
15
|
+
@content = content.to_s
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def call
|
|
19
|
+
config = parse
|
|
20
|
+
return failure("Could not parse YAML: #{@parse_error}") if config.nil?
|
|
21
|
+
return failure("Top-level YAML must be a mapping of promptfoo config keys.") unless config.is_a?(Hash)
|
|
22
|
+
|
|
23
|
+
ApplicationRecord.transaction do
|
|
24
|
+
providers = import_providers(config)
|
|
25
|
+
prompts = import_prompts(config, default_model(config))
|
|
26
|
+
dataset = import_dataset(config)
|
|
27
|
+
metrics = import_metrics(config)
|
|
28
|
+
Result.new(ok: true, error: nil, prompts: prompts, dataset: dataset, metrics: metrics, providers: providers)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def parse
|
|
35
|
+
YAML.safe_load(@content, aliases: true)
|
|
36
|
+
rescue Psych::Exception => e
|
|
37
|
+
@parse_error = e.message
|
|
38
|
+
nil
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def failure(message)
|
|
42
|
+
Result.new(ok: false, error: message, prompts: nil, dataset: nil, metrics: nil, providers: nil)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def import_providers(config)
|
|
46
|
+
entries = Array(config["providers"]).map { |p| provider_name(p) }.compact
|
|
47
|
+
configured = ProviderCredential.pluck(:provider).to_set
|
|
48
|
+
matched = []
|
|
49
|
+
unmatched = []
|
|
50
|
+
entries.each do |entry|
|
|
51
|
+
provider = entry.split(":").first
|
|
52
|
+
(configured.include?(provider) ? matched : unmatched) << entry
|
|
53
|
+
end
|
|
54
|
+
{ matched: matched.uniq, unmatched: unmatched.uniq }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def provider_name(provider)
|
|
58
|
+
provider.is_a?(Hash) ? provider["id"] : provider
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def default_model(config)
|
|
62
|
+
first = Array(config["providers"]).map { |p| provider_name(p) }.compact.first
|
|
63
|
+
return "gpt-4o" if first.nil?
|
|
64
|
+
|
|
65
|
+
parts = first.split(":")
|
|
66
|
+
parts.length > 1 ? parts.last : first
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def import_prompts(config, model)
|
|
70
|
+
created = []
|
|
71
|
+
skipped = []
|
|
72
|
+
Array(config["prompts"]).each_with_index do |raw, index|
|
|
73
|
+
if raw.is_a?(String) && raw.start_with?("file://")
|
|
74
|
+
skipped << { value: raw, reason: "prompt file reference; paste the file's contents to import it" }
|
|
75
|
+
next
|
|
76
|
+
end
|
|
77
|
+
template = prompt_template(raw)
|
|
78
|
+
if template.nil?
|
|
79
|
+
skipped << { value: raw.inspect, reason: "unsupported prompt shape (only inline string templates import)" }
|
|
80
|
+
next
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
prompt = Prompt.create!(name: "Imported prompt #{index + 1}", template: template, llm_model: model)
|
|
84
|
+
created << prompt.name
|
|
85
|
+
end
|
|
86
|
+
{ created: created, skipped: skipped }
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def prompt_template(raw)
|
|
90
|
+
return raw if raw.is_a?(String)
|
|
91
|
+
raw["raw"] || raw["template"] if raw.is_a?(Hash)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def import_dataset(config)
|
|
95
|
+
tests = Array(config["tests"])
|
|
96
|
+
vars_rows = tests.map { |t| (t.is_a?(Hash) ? t["vars"] : nil) || {} }.select { |v| v.is_a?(Hash) }
|
|
97
|
+
return { skipped: "no tests with vars to import" } if vars_rows.empty?
|
|
98
|
+
|
|
99
|
+
columns = vars_rows.flat_map(&:keys).uniq
|
|
100
|
+
csv = CSV.generate do |out|
|
|
101
|
+
out << columns
|
|
102
|
+
vars_rows.each { |vars| out << columns.map { |c| vars[c] } }
|
|
103
|
+
end
|
|
104
|
+
dataset = Dataset.create!(name: "Imported dataset", csv_data: csv)
|
|
105
|
+
{ created: dataset.name, rows: vars_rows.length, columns: columns }
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def import_metrics(config)
|
|
109
|
+
created = []
|
|
110
|
+
skipped = []
|
|
111
|
+
asserts(config).each do |assert|
|
|
112
|
+
attrs = metric_attributes(assert)
|
|
113
|
+
if attrs.nil?
|
|
114
|
+
skipped << { type: assert["type"], reason: "no CompletionKit metric maps to this assert type" }
|
|
115
|
+
next
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
metric = Metric.create!(attrs)
|
|
119
|
+
created << { name: metric.name, type: metric.metric_type }
|
|
120
|
+
rescue ActiveRecord::RecordInvalid => e
|
|
121
|
+
skipped << { type: assert["type"], reason: e.record.errors.full_messages.join(", ") }
|
|
122
|
+
end
|
|
123
|
+
{ created: created, skipped: skipped }
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def asserts(config)
|
|
127
|
+
default = config.dig("defaultTest", "assert")
|
|
128
|
+
per_test = Array(config["tests"]).flat_map { |t| t.is_a?(Hash) ? Array(t["assert"]) : [] }
|
|
129
|
+
(Array(default) + per_test).select { |a| a.is_a?(Hash) && a["type"] }.uniq
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def metric_attributes(assert)
|
|
133
|
+
type = assert["type"].to_s
|
|
134
|
+
value = assert["value"]
|
|
135
|
+
return { name: unique_name("Rubric"), metric_type: "llm_judge", instruction: value.to_s } if JUDGE_ASSERTS.include?(type)
|
|
136
|
+
|
|
137
|
+
config, label = check_mapping(type, value.to_s)
|
|
138
|
+
return nil if config.nil?
|
|
139
|
+
|
|
140
|
+
{ name: unique_name(label), metric_type: "check", check_config: config }
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def check_mapping(type, text)
|
|
144
|
+
case type
|
|
145
|
+
when "contains"
|
|
146
|
+
[{ "check_kind" => "contains", "target" => "response_text", "value" => text }, "Contains #{text.truncate(30).inspect}"]
|
|
147
|
+
when "icontains"
|
|
148
|
+
[{ "check_kind" => "contains", "target" => "response_text", "value" => text, "case_sensitive" => false }, "Contains #{text.truncate(30).inspect}"]
|
|
149
|
+
when "not-contains"
|
|
150
|
+
[{ "check_kind" => "not_contains", "target" => "response_text", "value" => text }, "Does not contain #{text.truncate(30).inspect}"]
|
|
151
|
+
when "not-icontains"
|
|
152
|
+
[{ "check_kind" => "not_contains", "target" => "response_text", "value" => text, "case_sensitive" => false }, "Does not contain #{text.truncate(30).inspect}"]
|
|
153
|
+
when "equals"
|
|
154
|
+
[{ "check_kind" => "equals", "target" => "response_text", "value" => text }, "Equals #{text.truncate(30).inspect}"]
|
|
155
|
+
when "regex"
|
|
156
|
+
[{ "check_kind" => "regex", "target" => "response_text", "pattern" => text }, "Matches /#{text.truncate(30)}/"]
|
|
157
|
+
when "is-json"
|
|
158
|
+
[{ "check_kind" => "valid_json", "target" => "response_text" }, "Valid JSON"]
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def unique_name(base)
|
|
163
|
+
@used_names ||= Metric.pluck(:name).to_set
|
|
164
|
+
candidate = base
|
|
165
|
+
counter = 2
|
|
166
|
+
while @used_names.include?(candidate)
|
|
167
|
+
candidate = "#{base} (#{counter})"
|
|
168
|
+
counter += 1
|
|
169
|
+
end
|
|
170
|
+
@used_names << candidate
|
|
171
|
+
candidate
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
<ol class="ck-breadcrumb">
|
|
2
|
+
<li><%= link_to "Metrics", metrics_path %></li>
|
|
3
|
+
<li><%= link_to "Import from Promptfoo", import_path %></li>
|
|
4
|
+
<li>Summary</li>
|
|
5
|
+
</ol>
|
|
6
|
+
|
|
7
|
+
<section class="ck-page-header">
|
|
8
|
+
<div>
|
|
9
|
+
<h1 class="ck-title">Import summary</h1>
|
|
10
|
+
<p class="ck-lead">Here is what mapped into CompletionKit and what could not be translated.</p>
|
|
11
|
+
</div>
|
|
12
|
+
<div class="ck-actions">
|
|
13
|
+
<%= link_to "Import another", import_path, class: ck_button_classes(:light, variant: :outline) %>
|
|
14
|
+
</div>
|
|
15
|
+
</section>
|
|
16
|
+
|
|
17
|
+
<section class="ck-card ck-card--spaced">
|
|
18
|
+
<p class="ck-kicker">Prompts</p>
|
|
19
|
+
<% if @result.prompts[:created].any? %>
|
|
20
|
+
<ul class="ck-list ck-list--compact">
|
|
21
|
+
<% @result.prompts[:created].each do |name| %>
|
|
22
|
+
<li><%= link_to name, prompts_path, class: "ck-link" %></li>
|
|
23
|
+
<% end %>
|
|
24
|
+
</ul>
|
|
25
|
+
<% else %>
|
|
26
|
+
<p class="ck-meta-copy">No prompts imported.</p>
|
|
27
|
+
<% end %>
|
|
28
|
+
<% if @result.prompts[:skipped].any? %>
|
|
29
|
+
<p class="ck-kicker">Skipped prompts</p>
|
|
30
|
+
<ul class="ck-list ck-list--compact">
|
|
31
|
+
<% @result.prompts[:skipped].each do |item| %>
|
|
32
|
+
<li><code><%= item[:value] %></code>: <span class="ck-meta-copy"><%= item[:reason] %></span></li>
|
|
33
|
+
<% end %>
|
|
34
|
+
</ul>
|
|
35
|
+
<% end %>
|
|
36
|
+
</section>
|
|
37
|
+
|
|
38
|
+
<section class="ck-card ck-card--spaced">
|
|
39
|
+
<p class="ck-kicker">Dataset</p>
|
|
40
|
+
<% if @result.dataset[:created] %>
|
|
41
|
+
<p><%= link_to @result.dataset[:created], datasets_path, class: "ck-link" %>, <%= pluralize(@result.dataset[:rows], "row") %>. Columns: <%= @result.dataset[:columns].join(", ") %>.</p>
|
|
42
|
+
<% else %>
|
|
43
|
+
<p class="ck-meta-copy">Skipped: <%= @result.dataset[:skipped] %>.</p>
|
|
44
|
+
<% end %>
|
|
45
|
+
</section>
|
|
46
|
+
|
|
47
|
+
<section class="ck-card ck-card--spaced">
|
|
48
|
+
<p class="ck-kicker">Metrics</p>
|
|
49
|
+
<% if @result.metrics[:created].any? %>
|
|
50
|
+
<ul class="ck-list ck-list--compact">
|
|
51
|
+
<% @result.metrics[:created].each do |metric| %>
|
|
52
|
+
<li>
|
|
53
|
+
<%= link_to metric[:name], metrics_path, class: "ck-link" %>
|
|
54
|
+
<span class="ck-chip ck-chip--soft"><%= metric[:type] == "check" ? "Check" : "Judge" %></span>
|
|
55
|
+
</li>
|
|
56
|
+
<% end %>
|
|
57
|
+
</ul>
|
|
58
|
+
<% else %>
|
|
59
|
+
<p class="ck-meta-copy">No metrics imported.</p>
|
|
60
|
+
<% end %>
|
|
61
|
+
<% if @result.metrics[:skipped].any? %>
|
|
62
|
+
<p class="ck-kicker">Skipped asserts</p>
|
|
63
|
+
<ul class="ck-list ck-list--compact">
|
|
64
|
+
<% @result.metrics[:skipped].each do |item| %>
|
|
65
|
+
<li><code><%= item[:type] %></code>: <span class="ck-meta-copy"><%= item[:reason] %></span></li>
|
|
66
|
+
<% end %>
|
|
67
|
+
</ul>
|
|
68
|
+
<% end %>
|
|
69
|
+
</section>
|
|
70
|
+
|
|
71
|
+
<section class="ck-card ck-card--spaced">
|
|
72
|
+
<p class="ck-kicker">Providers</p>
|
|
73
|
+
<% if @result.providers[:matched].any? %>
|
|
74
|
+
<p class="ck-meta-copy">Matched to configured credentials:</p>
|
|
75
|
+
<div class="ck-chip-row">
|
|
76
|
+
<% @result.providers[:matched].each do |name| %>
|
|
77
|
+
<span class="ck-chip ck-chip--done"><%= name %></span>
|
|
78
|
+
<% end %>
|
|
79
|
+
</div>
|
|
80
|
+
<% end %>
|
|
81
|
+
<% if @result.providers[:unmatched].any? %>
|
|
82
|
+
<p class="ck-meta-copy">Referenced but not configured here. <%= link_to "Add a provider", provider_credentials_path, class: "ck-link" %> to run against these.</p>
|
|
83
|
+
<div class="ck-chip-row">
|
|
84
|
+
<% @result.providers[:unmatched].each do |name| %>
|
|
85
|
+
<span class="ck-chip ck-chip--warning"><%= name %></span>
|
|
86
|
+
<% end %>
|
|
87
|
+
</div>
|
|
88
|
+
<% end %>
|
|
89
|
+
</section>
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
<ol class="ck-breadcrumb">
|
|
2
|
+
<li><%= link_to "Metrics", metrics_path %></li>
|
|
3
|
+
<li>Import from Promptfoo</li>
|
|
4
|
+
</ol>
|
|
5
|
+
|
|
6
|
+
<section class="ck-page-header">
|
|
7
|
+
<div>
|
|
8
|
+
<h1 class="ck-title">Import from Promptfoo</h1>
|
|
9
|
+
<p class="ck-lead">Paste or upload a <code>promptfooconfig.yaml</code> to bring its prompts, test vars, and assertions into CompletionKit. This maps the eval and judge half of Promptfoo. Anything it cannot translate is reported back on the next page, never dropped silently.</p>
|
|
10
|
+
</div>
|
|
11
|
+
</section>
|
|
12
|
+
|
|
13
|
+
<%= form_with url: import_path, method: :post, multipart: true do |form| %>
|
|
14
|
+
<div class="ck-card ck-form-card">
|
|
15
|
+
<div class="ck-field">
|
|
16
|
+
<%= form.label :config, "Paste config", class: "ck-label" %>
|
|
17
|
+
<%= form.text_area :config, rows: 16, class: "ck-input ck-input--area ck-input--code", placeholder: "prompts:\n - \"Summarize {{article}}.\"\ntests:\n - vars: { article: \"...\" }\ndefaultTest:\n assert:\n - type: contains\n value: \"summary\"" %>
|
|
18
|
+
</div>
|
|
19
|
+
|
|
20
|
+
<div class="ck-field">
|
|
21
|
+
<%= form.label :file, "Or upload a file", class: "ck-label" %>
|
|
22
|
+
<%= form.file_field :file, accept: ".yaml,.yml", class: "ck-input" %>
|
|
23
|
+
<p class="ck-field-hint">A <code>.yaml</code> file takes precedence over the pasted text.</p>
|
|
24
|
+
</div>
|
|
25
|
+
|
|
26
|
+
<div class="ck-actions">
|
|
27
|
+
<%= link_to "Cancel", metrics_path, class: ck_button_classes(:light, variant: :outline), tabindex: "0" %>
|
|
28
|
+
<%= form.submit "Import", class: ck_button_classes(:dark) %>
|
|
29
|
+
</div>
|
|
30
|
+
</div>
|
|
31
|
+
<% end %>
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
<p class="ck-lead">Scoring dimensions used to evaluate each response. Each defines one thing to assess, by an LLM judge on a 1-5 scale or a deterministic check.</p>
|
|
5
5
|
</div>
|
|
6
6
|
<div class="ck-actions">
|
|
7
|
+
<%= link_to "Import from Promptfoo", import_path, class: ck_button_classes(:light, variant: :outline) %>
|
|
7
8
|
<%= link_to "New metric", new_metric_path, class: ck_button_classes(:dark) %>
|
|
8
9
|
</div>
|
|
9
10
|
</section>
|
data/config/routes.rb
CHANGED
|
@@ -59,6 +59,9 @@ CompletionKit::Engine.routes.draw do
|
|
|
59
59
|
|
|
60
60
|
get "api_reference", to: "api_reference#index", as: :api_reference
|
|
61
61
|
|
|
62
|
+
get "import", to: "imports#new", as: :import
|
|
63
|
+
post "import", to: "imports#create"
|
|
64
|
+
|
|
62
65
|
namespace :api do
|
|
63
66
|
namespace :v1 do
|
|
64
67
|
resources :prompts do
|
|
@@ -95,6 +98,7 @@ CompletionKit::Engine.routes.draw do
|
|
|
95
98
|
resources :tags
|
|
96
99
|
resources :provider_credentials
|
|
97
100
|
resources :agreements, only: [:index, :destroy]
|
|
101
|
+
post "imports/promptfoo", to: "imports#promptfoo"
|
|
98
102
|
end
|
|
99
103
|
end
|
|
100
104
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: completion-kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.20.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Damien Bastin
|
|
@@ -238,6 +238,7 @@ files:
|
|
|
238
238
|
- app/controllers/completion_kit/api/v1/agreements_controller.rb
|
|
239
239
|
- app/controllers/completion_kit/api/v1/base_controller.rb
|
|
240
240
|
- app/controllers/completion_kit/api/v1/datasets_controller.rb
|
|
241
|
+
- app/controllers/completion_kit/api/v1/imports_controller.rb
|
|
241
242
|
- app/controllers/completion_kit/api/v1/metric_groups_controller.rb
|
|
242
243
|
- app/controllers/completion_kit/api/v1/metric_versions_controller.rb
|
|
243
244
|
- app/controllers/completion_kit/api/v1/metrics_controller.rb
|
|
@@ -251,6 +252,7 @@ files:
|
|
|
251
252
|
- app/controllers/completion_kit/dashboard_controller.rb
|
|
252
253
|
- app/controllers/completion_kit/dashboard_dismissals_controller.rb
|
|
253
254
|
- app/controllers/completion_kit/datasets_controller.rb
|
|
255
|
+
- app/controllers/completion_kit/imports_controller.rb
|
|
254
256
|
- app/controllers/completion_kit/mcp_controller.rb
|
|
255
257
|
- app/controllers/completion_kit/metric_groups_controller.rb
|
|
256
258
|
- app/controllers/completion_kit/metrics_controller.rb
|
|
@@ -317,6 +319,7 @@ files:
|
|
|
317
319
|
- app/services/completion_kit/mcp_tools/agreements.rb
|
|
318
320
|
- app/services/completion_kit/mcp_tools/base.rb
|
|
319
321
|
- app/services/completion_kit/mcp_tools/datasets.rb
|
|
322
|
+
- app/services/completion_kit/mcp_tools/imports.rb
|
|
320
323
|
- app/services/completion_kit/mcp_tools/judges.rb
|
|
321
324
|
- app/services/completion_kit/mcp_tools/metric_groups.rb
|
|
322
325
|
- app/services/completion_kit/mcp_tools/metric_versions.rb
|
|
@@ -339,6 +342,7 @@ files:
|
|
|
339
342
|
- app/services/completion_kit/open_router_client.rb
|
|
340
343
|
- app/services/completion_kit/prompt_improvement_service.rb
|
|
341
344
|
- app/services/completion_kit/prompt_improvement_validator.rb
|
|
345
|
+
- app/services/completion_kit/promptfoo_importer.rb
|
|
342
346
|
- app/services/completion_kit/provider_endpoint.rb
|
|
343
347
|
- app/services/completion_kit/run_comparison.rb
|
|
344
348
|
- app/services/completion_kit/starter_metrics.rb
|
|
@@ -364,6 +368,8 @@ files:
|
|
|
364
368
|
- app/views/completion_kit/datasets/index.html.erb
|
|
365
369
|
- app/views/completion_kit/datasets/new.html.erb
|
|
366
370
|
- app/views/completion_kit/datasets/show.html.erb
|
|
371
|
+
- app/views/completion_kit/imports/create.html.erb
|
|
372
|
+
- app/views/completion_kit/imports/new.html.erb
|
|
367
373
|
- app/views/completion_kit/metric_groups/_form.html.erb
|
|
368
374
|
- app/views/completion_kit/metric_groups/edit.html.erb
|
|
369
375
|
- app/views/completion_kit/metric_groups/index.html.erb
|