completion-kit 0.5.14 → 0.5.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/images/completion_kit/favicon.ico +0 -0
- data/app/helpers/completion_kit/application_helper.rb +12 -0
- data/app/services/completion_kit/judge_service.rb +9 -14
- data/app/views/completion_kit/runs/_row.html.erb +3 -3
- data/lib/completion_kit/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c9c349e12b1fb11eccb4b691dfac14e86bceda2cac6718523e9d3a9e0516698d
|
|
4
|
+
data.tar.gz: 43374b6f2a44ad5cfc44ac307ad6ba933c61262f79cfc1dd8797cdabef52fd87
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 33ed9038a4416e3d3929b7a18757779e5c406f4b4b3cdb1d8bf0ce945f6a8ceac4606d467068b4710a53f86c544a6fbe5fd6b56473ff9d4c7da2ac0f11cb2a13
|
|
7
|
+
data.tar.gz: 0c907dae8acd807f60061c79213022cdb12bc33c627080d2e85be2954345fea1335eb38fed99ccfe95c9d3f470be1d90e9e3f5a636180ab43648328fd30749b1
|
|
Binary file
|
|
@@ -179,6 +179,18 @@ module CompletionKit
|
|
|
179
179
|
["tag", "tag-#{tag.color}", ("tag-outline" if outline)].compact.join(" ")
|
|
180
180
|
end
|
|
181
181
|
|
|
182
|
+
def ck_run_path(run)
|
|
183
|
+
CompletionKit::Engine.routes.url_helpers.run_path(run)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def ck_prompt_path(prompt)
|
|
187
|
+
CompletionKit::Engine.routes.url_helpers.prompt_path(prompt)
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def ck_dataset_path(dataset)
|
|
191
|
+
CompletionKit::Engine.routes.url_helpers.dataset_path(dataset)
|
|
192
|
+
end
|
|
193
|
+
|
|
182
194
|
def ck_format_maybe_json(text)
|
|
183
195
|
s = text.to_s
|
|
184
196
|
return s if s.strip.empty?
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
require "faraday"
|
|
2
2
|
|
|
3
3
|
module CompletionKit
|
|
4
|
+
class JudgeParseError < StandardError; end
|
|
5
|
+
|
|
4
6
|
class JudgeService
|
|
5
7
|
def initialize(config = {})
|
|
6
8
|
@config = config
|
|
@@ -9,7 +11,7 @@ module CompletionKit
|
|
|
9
11
|
end
|
|
10
12
|
|
|
11
13
|
def evaluate(output, expected_output = nil, prompt = nil, criteria: nil, rubric_text: nil, human_examples: nil, input_data: nil, **_extras)
|
|
12
|
-
|
|
14
|
+
raise CompletionKit::ConfigurationError, "Judge not configured" unless @judge_client.configured?
|
|
13
15
|
|
|
14
16
|
judge_prompt = build_judge_prompt(output, expected_output, prompt,
|
|
15
17
|
criteria: criteria,
|
|
@@ -19,10 +21,6 @@ module CompletionKit
|
|
|
19
21
|
response = @judge_client.generate_completion(judge_prompt, model: @judge_model)
|
|
20
22
|
raise StandardError, response if response.start_with?("Error:")
|
|
21
23
|
parse_judge_response(response)
|
|
22
|
-
rescue Faraday::Error
|
|
23
|
-
raise
|
|
24
|
-
rescue => e
|
|
25
|
-
{ score: 1, feedback: "Error during evaluation: #{e.message}" }
|
|
26
24
|
end
|
|
27
25
|
|
|
28
26
|
private
|
|
@@ -66,16 +64,13 @@ module CompletionKit
|
|
|
66
64
|
score_match = response.match(/\*{0,2}Score:?\*{0,2}\s*(\d+(?:\.\d+)?)/i)
|
|
67
65
|
feedback_match = response.match(/\*{0,2}Feedback:?\*{0,2}\s*(.+)/mi)
|
|
68
66
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
"No feedback provided"
|
|
74
|
-
else
|
|
75
|
-
"Could not parse judge response: #{response.truncate(500)}"
|
|
76
|
-
end
|
|
67
|
+
unless score_match
|
|
68
|
+
raise CompletionKit::JudgeParseError,
|
|
69
|
+
"Could not parse judge response: #{response.truncate(500)}"
|
|
70
|
+
end
|
|
77
71
|
|
|
78
|
-
score = [[
|
|
72
|
+
score = [[score_match[1].to_f, 1].max, 5].min
|
|
73
|
+
feedback = feedback_match ? feedback_match[1].strip : "No feedback provided"
|
|
79
74
|
|
|
80
75
|
{ score: score, feedback: feedback }
|
|
81
76
|
end
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
<tr onclick="window.location='<%=
|
|
1
|
+
<tr onclick="window.location='<%= ck_run_path(run) %>'" style="cursor: pointer;">
|
|
2
2
|
<td>
|
|
3
3
|
<div class="ck-runs-table__identity">
|
|
4
4
|
<span class="ck-run-name">
|
|
@@ -7,14 +7,14 @@
|
|
|
7
7
|
</span>
|
|
8
8
|
<div class="ck-runs-table__config">
|
|
9
9
|
<% if run.prompt %>
|
|
10
|
-
<%= link_to run.prompt.name,
|
|
10
|
+
<%= link_to run.prompt.name, ck_prompt_path(run.prompt), class: "ck-runs-table__config-link", onclick: "event.stopPropagation();" %>
|
|
11
11
|
<span class="ck-runs-table__version">v<%= run.prompt.version_number %></span>
|
|
12
12
|
<% else %>
|
|
13
13
|
<span class="ck-runs-table__version">Judge-only</span>
|
|
14
14
|
<% end %>
|
|
15
15
|
<% if run.dataset %>
|
|
16
16
|
<span class="ck-runs-table__sep">·</span>
|
|
17
|
-
<%= link_to run.dataset.name,
|
|
17
|
+
<%= link_to run.dataset.name, ck_dataset_path(run.dataset), class: "ck-runs-table__config-link", onclick: "event.stopPropagation();" %>
|
|
18
18
|
<% end %>
|
|
19
19
|
</div>
|
|
20
20
|
<% if run.tags.any? %>
|