completion-kit 0.5.13 → 0.5.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0ec878fca14222f69bc34d85967b065b298ea669be11f60c49b52af13aefdfe5
4
- data.tar.gz: 8211fc882175c1e69c52f2cba5a5e14668af4ddfc5784362252e8c051a59bee1
3
+ metadata.gz: c9c349e12b1fb11eccb4b691dfac14e86bceda2cac6718523e9d3a9e0516698d
4
+ data.tar.gz: 43374b6f2a44ad5cfc44ac307ad6ba933c61262f79cfc1dd8797cdabef52fd87
5
5
  SHA512:
6
- metadata.gz: 2afc081fde8f6722aadee50f973bc1512be15e5cbd19913881f0f38f06df37fee4f1cdbda54cfcb230892e73df7b27f6fdf537ea71df7da49ddfba39d2c9f644
7
- data.tar.gz: 02cfae669de0b9a1f7a10bbedd7eecda8d708802ff85c1d8c6a50368fb49385e43f19b388f2ea45313b90e14a3d53464688b897ad23378964cfb55103bd3a8c2
6
+ metadata.gz: 33ed9038a4416e3d3929b7a18757779e5c406f4b4b3cdb1d8bf0ce945f6a8ceac4606d467068b4710a53f86c544a6fbe5fd6b56473ff9d4c7da2ac0f11cb2a13
7
+ data.tar.gz: 0c907dae8acd807f60061c79213022cdb12bc33c627080d2e85be2954345fea1335eb38fed99ccfe95c9d3f470be1d90e9e3f5a636180ab43648328fd30749b1
@@ -179,6 +179,18 @@ module CompletionKit
179
179
  ["tag", "tag-#{tag.color}", ("tag-outline" if outline)].compact.join(" ")
180
180
  end
181
181
 
182
+ def ck_run_path(run)
183
+ CompletionKit::Engine.routes.url_helpers.run_path(run)
184
+ end
185
+
186
+ def ck_prompt_path(prompt)
187
+ CompletionKit::Engine.routes.url_helpers.prompt_path(prompt)
188
+ end
189
+
190
+ def ck_dataset_path(dataset)
191
+ CompletionKit::Engine.routes.url_helpers.dataset_path(dataset)
192
+ end
193
+
182
194
  def ck_format_maybe_json(text)
183
195
  s = text.to_s
184
196
  return s if s.strip.empty?
@@ -1,6 +1,8 @@
1
1
  require "faraday"
2
2
 
3
3
  module CompletionKit
4
+ class JudgeParseError < StandardError; end
5
+
4
6
  class JudgeService
5
7
  def initialize(config = {})
6
8
  @config = config
@@ -9,7 +11,7 @@ module CompletionKit
9
11
  end
10
12
 
11
13
  def evaluate(output, expected_output = nil, prompt = nil, criteria: nil, rubric_text: nil, human_examples: nil, input_data: nil, **_extras)
12
- return { score: 1, feedback: "Judge not configured" } unless @judge_client.configured?
14
+ raise CompletionKit::ConfigurationError, "Judge not configured" unless @judge_client.configured?
13
15
 
14
16
  judge_prompt = build_judge_prompt(output, expected_output, prompt,
15
17
  criteria: criteria,
@@ -19,10 +21,6 @@ module CompletionKit
19
21
  response = @judge_client.generate_completion(judge_prompt, model: @judge_model)
20
22
  raise StandardError, response if response.start_with?("Error:")
21
23
  parse_judge_response(response)
22
- rescue Faraday::Error
23
- raise
24
- rescue => e
25
- { score: 1, feedback: "Error during evaluation: #{e.message}" }
26
24
  end
27
25
 
28
26
  private
@@ -66,16 +64,13 @@ module CompletionKit
66
64
  score_match = response.match(/\*{0,2}Score:?\*{0,2}\s*(\d+(?:\.\d+)?)/i)
67
65
  feedback_match = response.match(/\*{0,2}Feedback:?\*{0,2}\s*(.+)/mi)
68
66
 
69
- score = score_match ? score_match[1].to_f : 1
70
- feedback = if feedback_match
71
- feedback_match[1].strip
72
- elsif score_match
73
- "No feedback provided"
74
- else
75
- "Could not parse judge response: #{response.truncate(500)}"
76
- end
67
+ unless score_match
68
+ raise CompletionKit::JudgeParseError,
69
+ "Could not parse judge response: #{response.truncate(500)}"
70
+ end
77
71
 
78
- score = [[score, 1].max, 5].min
72
+ score = [[score_match[1].to_f, 1].max, 5].min
73
+ feedback = feedback_match ? feedback_match[1].strip : "No feedback provided"
79
74
 
80
75
  { score: score, feedback: feedback }
81
76
  end
@@ -15,7 +15,7 @@ module CompletionKit
15
15
  return "Error: API key not configured" unless configured?
16
16
 
17
17
  model = options[:model] || "gpt-4.1-mini"
18
- max_tokens = options[:max_tokens] || 1000
18
+ max_tokens = options[:max_tokens] || 8192
19
19
  temperature = options[:temperature] || 0.7
20
20
 
21
21
  response = post_responses(model: model, prompt: prompt, max_tokens: max_tokens, temperature: temperature)
@@ -36,8 +36,14 @@ module CompletionKit
36
36
 
37
37
  if response.success?
38
38
  data = JSON.parse(response.body)
39
+ if data["status"] == "incomplete"
40
+ reason = data.dig("incomplete_details", "reason") || "unknown"
41
+ return "Error: response incomplete (#{reason}) — increase max_tokens=#{max_tokens} or pick a non-reasoning judge model"
42
+ end
39
43
  message = Array(data["output"]).find { |o| o["type"] == "message" }
40
- message&.dig("content", 0, "text").to_s.strip
44
+ content = message&.dig("content", 0, "text").to_s.strip
45
+ return "Error: model returned empty content" if content.empty?
46
+ content
41
47
  else
42
48
  "Error: #{response.status} - #{response.body}"
43
49
  end
@@ -13,7 +13,7 @@ module CompletionKit
13
13
  return "Error: API key not configured" unless configured?
14
14
 
15
15
  model = options[:model] || "openai/gpt-4o-mini"
16
- max_tokens = options[:max_tokens] || 1000
16
+ max_tokens = options[:max_tokens] || 8192
17
17
  temperature = options[:temperature] || 0.7
18
18
 
19
19
  response = post_chat(model: model, prompt: prompt, max_tokens: max_tokens, temperature: temperature)
@@ -34,7 +34,13 @@ module CompletionKit
34
34
 
35
35
  if response.success?
36
36
  data = JSON.parse(response.body)
37
- data.dig("choices", 0, "message", "content").to_s.strip
37
+ choice = data.dig("choices", 0) || {}
38
+ if choice["finish_reason"] == "length"
39
+ return "Error: response truncated by max_tokens=#{max_tokens} before visible content was emitted (reasoning model burned through the budget)"
40
+ end
41
+ content = choice.dig("message", "content").to_s.strip
42
+ return "Error: model returned empty content" if content.empty?
43
+ content
38
44
  else
39
45
  "Error: #{response.status} - #{response.body}"
40
46
  end
@@ -1,4 +1,4 @@
1
- <tr onclick="window.location='<%= run_path(run) %>'" style="cursor: pointer;">
1
+ <tr onclick="window.location='<%= ck_run_path(run) %>'" style="cursor: pointer;">
2
2
  <td>
3
3
  <div class="ck-runs-table__identity">
4
4
  <span class="ck-run-name">
@@ -7,14 +7,14 @@
7
7
  </span>
8
8
  <div class="ck-runs-table__config">
9
9
  <% if run.prompt %>
10
- <%= link_to run.prompt.name, prompt_path(run.prompt), class: "ck-runs-table__config-link", onclick: "event.stopPropagation();" %>
10
+ <%= link_to run.prompt.name, ck_prompt_path(run.prompt), class: "ck-runs-table__config-link", onclick: "event.stopPropagation();" %>
11
11
  <span class="ck-runs-table__version">v<%= run.prompt.version_number %></span>
12
12
  <% else %>
13
13
  <span class="ck-runs-table__version">Judge-only</span>
14
14
  <% end %>
15
15
  <% if run.dataset %>
16
16
  <span class="ck-runs-table__sep">·</span>
17
- <%= link_to run.dataset.name, dataset_path(run.dataset), class: "ck-runs-table__config-link", onclick: "event.stopPropagation();" %>
17
+ <%= link_to run.dataset.name, ck_dataset_path(run.dataset), class: "ck-runs-table__config-link", onclick: "event.stopPropagation();" %>
18
18
  <% end %>
19
19
  </div>
20
20
  <% if run.tags.any? %>
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.5.13"
2
+ VERSION = "0.5.15"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.13
4
+ version: 0.5.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin