completion-kit 0.4.1 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/stylesheets/completion_kit/application.css +1882 -785
  3. data/app/controllers/completion_kit/runs_controller.rb +34 -19
  4. data/app/controllers/completion_kit/suggestions_controller.rb +24 -0
  5. data/app/jobs/completion_kit/generate_row_job.rb +7 -0
  6. data/app/jobs/completion_kit/judge_review_job.rb +2 -0
  7. data/app/jobs/completion_kit/model_discovery_job.rb +9 -4
  8. data/app/models/completion_kit/dataset.rb +9 -0
  9. data/app/models/completion_kit/provider_credential.rb +12 -1
  10. data/app/models/completion_kit/response.rb +7 -0
  11. data/app/models/completion_kit/run.rb +47 -9
  12. data/app/services/completion_kit/anthropic_client.rb +33 -14
  13. data/app/services/completion_kit/model_discovery_service.rb +133 -30
  14. data/app/services/completion_kit/ollama_client.rb +31 -10
  15. data/app/services/completion_kit/open_ai_client.rb +35 -13
  16. data/app/services/completion_kit/open_router_client.rb +34 -13
  17. data/app/services/completion_kit/worker_health.rb +4 -1
  18. data/app/views/completion_kit/datasets/index.html.erb +1 -1
  19. data/app/views/completion_kit/datasets/show.html.erb +47 -9
  20. data/app/views/completion_kit/metrics/_form.html.erb +1 -1
  21. data/app/views/completion_kit/metrics/index.html.erb +15 -2
  22. data/app/views/completion_kit/metrics/show.html.erb +1 -1
  23. data/app/views/completion_kit/prompts/index.html.erb +27 -8
  24. data/app/views/completion_kit/prompts/show.html.erb +6 -36
  25. data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +6 -4
  26. data/app/views/completion_kit/provider_credentials/_form.html.erb +1 -32
  27. data/app/views/completion_kit/provider_credentials/_models_card.html.erb +70 -0
  28. data/app/views/completion_kit/provider_credentials/index.html.erb +1 -1
  29. data/app/views/completion_kit/responses/show.html.erb +27 -6
  30. data/app/views/completion_kit/runs/_actions.html.erb +3 -0
  31. data/app/views/completion_kit/runs/_form.html.erb +114 -20
  32. data/app/views/completion_kit/runs/_response_row.html.erb +52 -22
  33. data/app/views/completion_kit/runs/_row.html.erb +50 -0
  34. data/app/views/completion_kit/runs/_sort_toolbar.html.erb +5 -4
  35. data/app/views/completion_kit/runs/_status_header.html.erb +7 -31
  36. data/app/views/completion_kit/runs/_status_panel.html.erb +80 -0
  37. data/app/views/completion_kit/runs/index.html.erb +4 -16
  38. data/app/views/completion_kit/runs/show.html.erb +111 -17
  39. data/app/views/completion_kit/suggestions/show.html.erb +65 -0
  40. data/app/views/layouts/completion_kit/application.html.erb +71 -0
  41. data/config/routes.rb +8 -2
  42. data/db/migrate/20260507000001_add_discovery_error_to_provider_credentials.rb +5 -0
  43. data/db/migrate/20260507150000_add_temperature_ignored_to_runs.rb +5 -0
  44. data/lib/completion_kit/version.rb +1 -1
  45. metadata +9 -4
  46. data/app/views/completion_kit/runs/_progress.html.erb +0 -18
  47. data/app/views/completion_kit/runs/suggestion.html.erb +0 -47
@@ -1,6 +1,6 @@
1
1
  module CompletionKit
2
2
  class RunsController < ApplicationController
3
- before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :suggest, :suggestion, :apply_suggestion, :retry_failures]
3
+ before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :suggest, :retry_failures, :rerun, :refresh_status]
4
4
  before_action :load_form_collections, only: [:new, :edit, :create, :update]
5
5
 
6
6
  def index
@@ -45,7 +45,9 @@ module CompletionKit
45
45
 
46
46
  def update
47
47
  if @run.responses.any?
48
- new_run = Run.create!(run_params.except(:metric_ids).to_h.merge(status: "pending"))
48
+ attrs = run_params.except(:metric_ids).to_h
49
+ attrs.delete("name") if attrs["name"].to_s == @run.name.to_s
50
+ new_run = Run.create!(attrs.merge(status: "pending"))
49
51
  new_run.replace_metrics!(params[:run][:metric_ids]) if params[:run].key?(:metric_ids)
50
52
  redirect_to run_path(new_run), notice: "Saved as a new run. The previous run and its results are preserved."
51
53
  elsif @run.update(run_params.except(:metric_ids))
@@ -70,21 +72,44 @@ module CompletionKit
70
72
  end
71
73
  end
72
74
 
75
+ def rerun
76
+ new_run = Run.create!(
77
+ prompt_id: @run.prompt_id,
78
+ dataset_id: @run.dataset_id,
79
+ judge_model: @run.judge_model,
80
+ temperature: @run.temperature,
81
+ status: "pending"
82
+ )
83
+ new_run.replace_metrics!(@run.metric_ids)
84
+ if new_run.start!
85
+ redirect_to run_path(new_run), notice: "Re-running with the same configuration."
86
+ else
87
+ redirect_to run_path(new_run), alert: new_run.failure_summary || "Could not start the new run."
88
+ end
89
+ end
90
+
91
+ def refresh_status
92
+ respond_to do |format|
93
+ format.turbo_stream do
94
+ render turbo_stream: turbo_stream.replace(
95
+ "run_status_header",
96
+ partial: "completion_kit/runs/status_header",
97
+ locals: { run: @run }
98
+ )
99
+ end
100
+ end
101
+ end
102
+
73
103
  def suggest
74
104
  service = PromptImprovementService.new(@run)
75
105
  result = service.suggest
76
- @run.suggestions.create!(
106
+ suggestion = @run.suggestions.create!(
77
107
  prompt: @run.prompt,
78
108
  reasoning: result["reasoning"],
79
109
  suggested_template: result["suggested_template"],
80
110
  original_template: result["original_template"]
81
111
  )
82
- redirect_to suggestion_run_path(@run)
83
- end
84
-
85
- def suggestion
86
- @suggestion = @run.suggestions.order(created_at: :desc).first
87
- return redirect_to run_path(@run), alert: "No suggestion available. Generate one first." unless @suggestion
112
+ redirect_to suggestion_path(suggestion, from: "run")
88
113
  end
89
114
 
90
115
  def retry_failures
@@ -113,16 +138,6 @@ module CompletionKit
113
138
  redirect_to run_path(@run)
114
139
  end
115
140
 
116
- def apply_suggestion
117
- suggestion = @run.suggestions.order(created_at: :desc).first
118
- return redirect_to run_path(@run), alert: "No suggestion to apply." unless suggestion
119
-
120
- new_prompt = @run.prompt.clone_as_new_version(template: suggestion.suggested_template)
121
- new_prompt.publish!
122
- suggestion.update!(applied_at: Time.current)
123
- redirect_to prompt_path(new_prompt), notice: "Suggestion applied."
124
- end
125
-
126
141
  private
127
142
 
128
143
  def set_run
@@ -0,0 +1,24 @@
1
+ module CompletionKit
2
+ class SuggestionsController < ApplicationController
3
+ before_action :set_suggestion
4
+
5
+ def show
6
+ @run = @suggestion.run
7
+ @from = params[:from] == "run" ? "run" : "prompt"
8
+ end
9
+
10
+ def apply
11
+ run = @suggestion.run
12
+ new_prompt = run.prompt.clone_as_new_version(template: @suggestion.suggested_template)
13
+ new_prompt.publish!
14
+ @suggestion.update!(applied_at: Time.current)
15
+ redirect_to prompt_path(new_prompt), notice: "Suggestion applied."
16
+ end
17
+
18
+ private
19
+
20
+ def set_suggestion
21
+ @suggestion = Suggestion.find(params[:id])
22
+ end
23
+ end
24
+ end
@@ -49,6 +49,11 @@ module CompletionKit
49
49
  raise ConfigurationError, client.configuration_errors.join(", ") unless client.configured?
50
50
 
51
51
  text = client.generate_completion(rendered, model: prompt.llm_model, temperature: run.temperature)
52
+ raise StandardError, text.to_s.sub(/\AError:\s*/, "") if text.to_s.start_with?("Error:")
53
+
54
+ if client.respond_to?(:temperature_dropped?) && client.temperature_dropped? && !run.temperature_ignored?
55
+ run.update_columns(temperature_ignored: true)
56
+ end
52
57
 
53
58
  response.update!(
54
59
  status: "succeeded",
@@ -56,6 +61,7 @@ module CompletionKit
56
61
  error_provider: nil, error_class: nil, error_status: nil, error_message: nil
57
62
  )
58
63
  run.send(:broadcast_response_update, response)
64
+ run.send(:broadcast_progress)
59
65
 
60
66
  if run.judge_configured?
61
67
  run.metrics.each do |metric|
@@ -88,6 +94,7 @@ module CompletionKit
88
94
  error_message: error.message.to_s.truncate(2000)
89
95
  )
90
96
  response.run&.send(:broadcast_response_update, response)
97
+ response.run&.send(:broadcast_progress)
91
98
  end
92
99
 
93
100
  def provider_for(response)
@@ -71,6 +71,7 @@ module CompletionKit
71
71
  review.save!
72
72
 
73
73
  run.send(:broadcast_response_update, response)
74
+ run.send(:broadcast_progress)
74
75
  enqueue_completion_check
75
76
  end
76
77
 
@@ -93,6 +94,7 @@ module CompletionKit
93
94
  )
94
95
  review.save!(validate: false)
95
96
  response.run&.send(:broadcast_response_update, response)
97
+ response.run&.send(:broadcast_progress)
96
98
  end
97
99
 
98
100
  def provider_for(response)
@@ -17,9 +17,9 @@ module CompletionKit
17
17
 
18
18
  discard_on ActiveJob::DeserializationError
19
19
 
20
- rescue_from(StandardError) do |_error|
20
+ rescue_from(StandardError) do |error|
21
21
  credential = ProviderCredential.find(arguments.first)
22
- credential.update_columns(discovery_status: "failed")
22
+ credential.update_columns(discovery_status: "failed", discovery_error: error.message.to_s.truncate(500))
23
23
  credential.reload
24
24
  credential.broadcast_discovery_progress
25
25
  end
@@ -28,7 +28,12 @@ module CompletionKit
28
28
  credential = ProviderCredential.find_by(id: provider_credential_id)
29
29
  return unless credential
30
30
 
31
- credential.update_columns(discovery_status: "discovering", discovery_current: 0, discovery_total: 0)
31
+ credential.update_columns(
32
+ discovery_status: "discovering",
33
+ discovery_current: 0,
34
+ discovery_total: 0,
35
+ discovery_error: nil
36
+ )
32
37
  credential.reload
33
38
  credential.broadcast_discovery_progress
34
39
 
@@ -39,7 +44,7 @@ module CompletionKit
39
44
  credential.broadcast_discovery_progress
40
45
  end
41
46
 
42
- credential.update_columns(discovery_status: "completed", updated_at: Time.current)
47
+ credential.update_columns(discovery_status: "completed", discovery_error: nil, updated_at: Time.current)
43
48
  credential.reload
44
49
  credential.broadcast_discovery_complete
45
50
  end
@@ -20,5 +20,14 @@ module CompletionKit
20
20
  rescue ::CSV::MalformedCSVError
21
21
  0
22
22
  end
23
+
24
+ def headers
25
+ return [] if csv_data.blank?
26
+
27
+ require "csv"
28
+ ::CSV.parse(csv_data.lines.first.to_s).first.to_a.map(&:to_s).map(&:strip)
29
+ rescue ::CSV::MalformedCSVError
30
+ []
31
+ end
23
32
  end
24
33
  end
@@ -56,7 +56,7 @@ module CompletionKit
56
56
  def judge_count
57
57
  model_ids = Model.where(provider: provider).pluck(:model_id)
58
58
  return 0 if model_ids.empty?
59
- Run.where(judge_model: model_ids).count
59
+ Run.where(judge_model: model_ids).distinct.count(:judge_model)
60
60
  end
61
61
 
62
62
  def last_used_at
@@ -75,6 +75,7 @@ module CompletionKit
75
75
  target: "discovery_status_#{id}",
76
76
  html: render_partial("completion_kit/provider_credentials/discovery_status", provider_credential: self)
77
77
  )
78
+ broadcast_provider_models
78
79
  end
79
80
 
80
81
  def broadcast_discovery_complete
@@ -82,6 +83,16 @@ module CompletionKit
82
83
  broadcast_model_dropdowns
83
84
  end
84
85
 
86
+ def broadcast_provider_models
87
+ Turbo::StreamsChannel.broadcast_action_to(
88
+ "completion_kit_provider_#{id}",
89
+ action: "replace",
90
+ target: "provider_models_#{id}",
91
+ method: "morph",
92
+ html: render_partial("completion_kit/provider_credentials/models_card", provider_credential: self)
93
+ )
94
+ end
95
+
85
96
  private
86
97
 
87
98
  def enqueue_discovery
@@ -43,6 +43,13 @@ module CompletionKit
43
43
  reviews.any? { |r| r.ai_score.present? }
44
44
  end
45
45
 
46
+ def fully_reviewed?
47
+ metric_ids = run.metric_ids
48
+ return true if metric_ids.empty?
49
+ reviewed_metric_ids = reviews.where(status: Review::TERMINAL_STATUSES).pluck(:metric_id).uniq
50
+ (metric_ids - reviewed_metric_ids).empty?
51
+ end
52
+
46
53
  def error_payload
47
54
  return nil if error_class.blank?
48
55
  { provider: error_provider, class: error_class, status: error_status, message: error_message }
@@ -13,10 +13,20 @@ module CompletionKit
13
13
 
14
14
  validates :name, presence: true
15
15
  validates :status, inclusion: { in: STATUSES }
16
+ validate :dataset_supplies_prompt_variables
16
17
 
17
18
  before_validation :set_default_status, on: :create
18
19
  before_validation :set_auto_name, on: :create
19
20
 
21
+ def missing_dataset_variables
22
+ return [] unless prompt
23
+ vars = prompt.variables
24
+ return [] if vars.empty?
25
+ return vars if dataset.nil?
26
+
27
+ vars - dataset.headers
28
+ end
29
+
20
30
  def mark_completed!
21
31
  update!(status: "completed")
22
32
  broadcast_ui
@@ -119,12 +129,28 @@ module CompletionKit
119
129
  generated_total = progress_total
120
130
 
121
131
  metric_count = metrics.count
122
- succeeded_count = generated_done
123
- judged_total = succeeded_count * metric_count
124
- judged_done = Review.joins(:response)
125
- .where(completion_kit_responses: { run_id: id }, status: "succeeded").count
126
- judged_failed = Review.joins(:response)
127
- .where(completion_kit_responses: { run_id: id }, status: "failed").count
132
+ judged_total = metric_count > 0 ? generated_done : 0
133
+ judged_done = 0
134
+ judged_failed = 0
135
+
136
+ if metric_count > 0 && judged_total > 0
137
+ succeeded_response_ids = responses.where(status: "succeeded").pluck(:id)
138
+ metric_ids = metrics.pluck(:id)
139
+ review_counts = Review
140
+ .where(response_id: succeeded_response_ids, metric_id: metric_ids)
141
+ .group(:response_id, :status)
142
+ .count
143
+ succeeded_response_ids.each do |rid|
144
+ ok = review_counts[[rid, "succeeded"]] || 0
145
+ bad = review_counts[[rid, "failed"]] || 0
146
+ next unless ok + bad == metric_count
147
+ if bad > 0
148
+ judged_failed += 1
149
+ else
150
+ judged_done += 1
151
+ end
152
+ end
153
+ end
128
154
 
129
155
  {
130
156
  generated_done: generated_done,
@@ -175,6 +201,7 @@ module CompletionKit
175
201
  end
176
202
 
177
203
  def render_engine_partial(partial, locals)
204
+ CompletionKit::Engine.routes.url_helpers
178
205
  CompletionKit::ApplicationController.render(
179
206
  partial: partial,
180
207
  locals: locals
@@ -185,8 +212,8 @@ module CompletionKit
185
212
  reload
186
213
  broadcast_replace_to(
187
214
  "completion_kit_run_#{id}",
188
- target: "run_progress",
189
- html: render_engine_partial("completion_kit/runs/progress", run: self)
215
+ target: "run_status_panel",
216
+ html: render_engine_partial("completion_kit/runs/status_panel", run: self)
190
217
  )
191
218
  broadcast_status_header
192
219
  end
@@ -219,7 +246,7 @@ module CompletionKit
219
246
  broadcast_replace_to(
220
247
  "completion_kit_run_#{id}",
221
248
  target: "run_responses",
222
- html: '<div id="run_responses"></div>'
249
+ html: '<tbody id="run_responses"></tbody>'
223
250
  )
224
251
  end
225
252
 
@@ -250,5 +277,16 @@ module CompletionKit
250
277
  count = Run.where(prompt_id: prompt_id).count + 1
251
278
  self.name = "#{prompt.name} — v#{prompt.version_number} ##{count}"
252
279
  end
280
+
281
+ def dataset_supplies_prompt_variables
282
+ missing = missing_dataset_variables
283
+ return if missing.empty?
284
+
285
+ if dataset.nil?
286
+ errors.add(:dataset_id, "is required: prompt uses #{missing.join(', ')}")
287
+ else
288
+ errors.add(:dataset_id, "is missing columns required by the prompt: #{missing.join(', ')}")
289
+ end
290
+ end
253
291
  end
254
292
  end
@@ -5,28 +5,25 @@ module CompletionKit
5
5
  { id: "claude-3-5-haiku-latest", name: "Claude 3.5 Haiku" }
6
6
  ].freeze
7
7
 
8
+ def temperature_dropped?
9
+ @temperature_dropped == true
10
+ end
11
+
8
12
  def generate_completion(prompt, options = {})
13
+ @temperature_dropped = false
9
14
  return "Error: API key not configured" unless configured?
10
15
 
11
16
  model = options[:model] || "claude-3-7-sonnet-latest"
12
17
  max_tokens = options[:max_tokens] || 1000
13
18
  temperature = options[:temperature] || 0.7
14
19
 
15
- response = build_connection("https://api.anthropic.com").post do |req|
16
- req.url "/v1/messages"
17
- req.headers["Content-Type"] = "application/json"
18
- req.headers["x-api-key"] = api_key
19
- req.headers["anthropic-version"] = "2023-06-01"
20
- req.body = {
21
- model: model,
22
- messages: [
23
- { role: "user", content: prompt }
24
- ],
25
- max_tokens: max_tokens,
26
- temperature: temperature
27
- }.to_json
20
+ response = post_messages(model: model, prompt: prompt, max_tokens: max_tokens, temperature: temperature)
21
+
22
+ if response.status == 400 && temperature_unsupported?(response.body)
23
+ @temperature_dropped = true
24
+ response = post_messages(model: model, prompt: prompt, max_tokens: max_tokens, temperature: nil)
28
25
  end
29
-
26
+
30
27
  if response.status == 429
31
28
  raise CompletionKit::RateLimitError.new(
32
29
  response.body.to_s.truncate(500),
@@ -82,5 +79,27 @@ module CompletionKit
82
79
  def api_key
83
80
  @config[:api_key] || ENV["ANTHROPIC_API_KEY"]
84
81
  end
82
+
83
+ def post_messages(model:, prompt:, max_tokens:, temperature:)
84
+ body = {
85
+ model: model,
86
+ messages: [{ role: "user", content: prompt }],
87
+ max_tokens: max_tokens
88
+ }
89
+ body[:temperature] = temperature unless temperature.nil?
90
+
91
+ build_connection("https://api.anthropic.com").post do |req|
92
+ req.url "/v1/messages"
93
+ req.headers["Content-Type"] = "application/json"
94
+ req.headers["x-api-key"] = api_key
95
+ req.headers["anthropic-version"] = "2023-06-01"
96
+ req.body = body.to_json
97
+ end
98
+ end
99
+
100
+ def temperature_unsupported?(body)
101
+ s = body.to_s
102
+ s.include?("temperature") && (s.include?("deprecated") || s.include?("not supported"))
103
+ end
85
104
  end
86
105
  end