completion-kit 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb708ad1732ab18fac5f415ad90f53374832bd8e629ab2f1530e860a2eca6c03
4
- data.tar.gz: 3e159d8c80601f791a8b5e9fcabea66c1755dfb1635d268efa104a0a17bd9eb1
3
+ metadata.gz: 89a2dd49e9edb75dada51386769cfd7e16f596c3b30c434588864487ab801d5b
4
+ data.tar.gz: df8d9acfd4ef8aeab73508e7cf9adb2c083c7b9ef3d1c4304e1a3e01723b17e1
5
5
  SHA512:
6
- metadata.gz: 2d6bb436f79222b66d6f12bb0cb82642eb9e50c48704bbea76fb6b47c3c3b65f665363a905b4d8e57062445ad31bee44ba65c901de2a07d716d231fdef59e7df
7
- data.tar.gz: ba8c62309efc2f19d343c6744b47e1db25c2a64e3597f19f957bdfd971dac6ee5118fd7f2017c573799f04a5423ac030627ff91ec9785d77822bc22a9ba7d27b
6
+ metadata.gz: 9b03af38565cb43b7a57747896136817d954b06a89ba4551afb314ca93cd56a70e9d3068271d4f522741efa7d830b106a3a39e05997ea38fc3fb65996741ad08
7
+ data.tar.gz: be0dad1d98c850854770351edbfdcd7867fbda7b6590634bae2997707a136a94cee066c79fd2ae8a259ea9c01db06a89b7088012a072655c7c627f7c475a801a
@@ -1148,6 +1148,22 @@ tr:hover .ck-chip--publish {
1148
1148
  font-size: 0.85rem;
1149
1149
  }
1150
1150
 
1151
+ .ck-model-table__unknown {
1152
+ display: inline-flex;
1153
+ align-items: center;
1154
+ justify-content: center;
1155
+ width: 1.25rem;
1156
+ height: 1.25rem;
1157
+ border-radius: 50%;
1158
+ background: rgba(148, 163, 184, 0.12);
1159
+ color: var(--ck-muted);
1160
+ font-family: var(--ck-mono);
1161
+ font-size: 0.78rem;
1162
+ font-weight: 700;
1163
+ line-height: 1;
1164
+ cursor: help;
1165
+ }
1166
+
1151
1167
  .ck-model-list__summary {
1152
1168
  display: flex;
1153
1169
  align-items: center;
@@ -4008,6 +4024,48 @@ a.tag-mark {
4008
4024
  }
4009
4025
  .ck-launch__step-cta:hover { filter: brightness(1.1); transform: translateY(-1px); }
4010
4026
 
4027
+ /* Opt-in starter data — quiet, secondary (bright cyan is reserved for the next-step CTA) */
4028
+ .ck-launch__sample {
4029
+ margin-top: 1.5rem;
4030
+ padding: 1rem 1.1rem;
4031
+ display: flex;
4032
+ flex-wrap: wrap;
4033
+ align-items: center;
4034
+ justify-content: space-between;
4035
+ gap: 0.85rem 1.25rem;
4036
+ border: 1px dashed var(--ck-line);
4037
+ border-radius: var(--ck-radius);
4038
+ background: var(--ck-bg-strong);
4039
+ }
4040
+ .ck-launch__sample-copy {
4041
+ margin: 0;
4042
+ flex: 1 1 18rem;
4043
+ font-size: 0.85rem;
4044
+ line-height: 1.55;
4045
+ color: var(--ck-muted);
4046
+ }
4047
+ .ck-launch__sample-copy code { font-family: var(--ck-mono); font-size: 0.85em; color: var(--ck-text); }
4048
+ .ck-launch__sample-cta {
4049
+ display: inline-flex;
4050
+ align-items: center;
4051
+ gap: 0.4rem;
4052
+ flex-shrink: 0;
4053
+ margin: 0;
4054
+ padding: 0.5rem 0.95rem;
4055
+ background: transparent;
4056
+ border: 1px solid var(--ck-line);
4057
+ color: var(--ck-text);
4058
+ border-radius: var(--ck-radius);
4059
+ font-family: var(--ck-mono);
4060
+ font-size: 0.72rem;
4061
+ letter-spacing: 0.06em;
4062
+ text-transform: uppercase;
4063
+ font-weight: 600;
4064
+ cursor: pointer;
4065
+ transition: border-color 0.15s, color 0.15s, background 0.15s;
4066
+ }
4067
+ .ck-launch__sample-cta:hover { border-color: var(--ck-accent); color: var(--ck-accent); background: var(--ck-accent-soft); }
4068
+
4011
4069
  .ck-launch__panel-footer {
4012
4070
  margin-top: 1.75rem;
4013
4071
  padding-top: 1.25rem;
@@ -14,5 +14,10 @@ module CompletionKit
14
14
  cookies[DISMISS_COOKIE] = { value: "1", expires: 1.year.from_now, httponly: true }
15
15
  redirect_to prompts_path, notice: "Setup skipped. Pick it back up from Settings → Getting started any time."
16
16
  end
17
+
18
+ def sample_data
19
+ Onboarding::SampleData.install!
20
+ redirect_to onboarding_path, notice: "Loaded a sample dataset and prompt — edit or delete them whenever."
21
+ end
17
22
  end
18
23
  end
@@ -72,6 +72,11 @@ module CompletionKit
72
72
  CompletionKit::ProviderCredential::PROVIDER_LABELS[provider.to_s] || provider.to_s.titleize
73
73
  end
74
74
 
75
+ def ck_model_option_label(model)
76
+ return "#{model[:name]} (?)" if model.key?(:judging_confirmed) && !model[:judging_confirmed]
77
+ model[:name]
78
+ end
79
+
75
80
  def ck_grouped_models(models, selected = nil)
76
81
  if selected.present? && models.none? { |m| m[:id] == selected }
77
82
  retired = CompletionKit::Model.find_by(model_id: selected)
@@ -90,7 +95,7 @@ module CompletionKit
90
95
  end
91
96
 
92
97
  ordered_keys = groups.keys.sort_by { |label| [label.start_with?("OpenRouter") ? 1 : 0, label] }
93
- grouped = ordered_keys.map { |label| [label, groups[label].map { |m| [m[:name], m[:id]] }] }
98
+ grouped = ordered_keys.map { |label| [label, groups[label].map { |m| [ck_model_option_label(m), m[:id]] }] }
94
99
  grouped_options_for_select(grouped, selected)
95
100
  end
96
101
 
@@ -71,6 +71,7 @@ module CompletionKit
71
71
  )
72
72
  review.save!
73
73
 
74
+ confirm_judging_capability(run.judge_model)
74
75
  run.send(:broadcast_response_update, response)
75
76
  run.send(:broadcast_progress)
76
77
  enqueue_completion_check
@@ -78,6 +79,15 @@ module CompletionKit
78
79
 
79
80
  private
80
81
 
82
+ # A model with supports_judging == nil ("untested") just produced a valid
83
+ # review — promote it to confirmed. No-op once confirmed (so repeated runs
84
+ # don't churn the row), and a model already flagged as a bad judge stays so.
85
+ def confirm_judging_capability(judge_model_id)
86
+ model = Model.find_by(provider: ApiConfig.provider_for_model(judge_model_id), model_id: judge_model_id)
87
+ return unless model && model.supports_judging.nil?
88
+ model.update_columns(supports_judging: true, judging_error: nil)
89
+ end
90
+
81
91
  def record_terminal_failure!(error)
82
92
  response_id = @response_id || arguments.first
83
93
  metric_id = @metric_id || arguments.last
@@ -8,6 +8,9 @@ module CompletionKit
8
8
 
9
9
  scope :active, -> { where(status: "active") }
10
10
  scope :for_generation, -> { active.where(supports_generation: true) }
11
- scope :for_judging, -> { active.where(supports_judging: true) }
11
+ # Includes models not yet confirmed as judges (supports_judging: nil) — worth
12
+ # a try, and a successful run flips them to confirmed. Only models known to be
13
+ # bad judges (false) are excluded.
14
+ scope :for_judging, -> { active.where(supports_judging: [true, nil]) }
12
15
  end
13
16
  end
@@ -61,7 +61,9 @@ module CompletionKit
61
61
  end
62
62
  query = query.where(provider: provider) if provider.present?
63
63
  models = query.order(:provider, :display_name).map do |m|
64
- { id: m.model_id, name: m.display_name || m.model_id, provider: m.provider }
64
+ entry = { id: m.model_id, name: m.display_name || m.model_id, provider: m.provider }
65
+ entry[:judging_confirmed] = !m.supports_judging.nil? if scope == :judging
66
+ entry
65
67
  end
66
68
 
67
69
  return models if models.any?
@@ -13,8 +13,13 @@ module CompletionKit
13
13
  end
14
14
 
15
15
  def refresh!(&on_progress)
16
- models_with_names = fetch_models
17
- reconcile(models_with_names)
16
+ discovered = fetch_models
17
+ reconcile(discovered)
18
+ # OpenRouter publishes capability metadata (output modalities, etc.), so we
19
+ # derive everything from the model list and skip live probing entirely.
20
+ # Judging stays unknown ("?") until a real run proves it.
21
+ return if @provider == "openrouter"
22
+
18
23
  probe_new_models(&on_progress)
19
24
  end
20
25
 
@@ -86,10 +91,19 @@ module CompletionKit
86
91
  next nil if entry["deprecated"] == true
87
92
  context_length = entry["context_length"].to_i
88
93
  next nil if context_length < 8192
89
- { id: entry["id"], display_name: entry["name"] }
94
+ { id: entry["id"], display_name: entry["name"], supports_generation: openrouter_text_output?(entry) }
90
95
  end
91
96
  end
92
97
 
98
+ # OpenRouter exposes architecture.output_modalities (e.g. ["text"], ["image"],
99
+ # ["text", "image"]). A model can be used for generation/judging only if it
100
+ # outputs text. When the field is missing we keep the historical default of
101
+ # treating the model as text-capable.
102
+ def openrouter_text_output?(entry)
103
+ modalities = Array(entry.dig("architecture", "output_modalities")).map(&:to_s)
104
+ modalities.empty? || modalities.include?("text")
105
+ end
106
+
93
107
  def fetch_ollama_models
94
108
  raise DiscoveryError, "Ollama endpoint URL is required" if @api_endpoint.blank?
95
109
  base_url = @api_endpoint.to_s.delete_suffix("/")
@@ -100,35 +114,67 @@ module CompletionKit
100
114
  JSON.parse(response.body).fetch("data", []).map { |e| { id: e["id"], display_name: e["id"] } }
101
115
  end
102
116
 
103
- def reconcile(models_with_names)
104
- api_model_ids = models_with_names.map { |m| m[:id] }
105
- names_by_id = models_with_names.each_with_object({}) { |m, h| h[m[:id]] = m[:display_name] }
117
+ def reconcile(discovered)
118
+ api_model_ids = discovered.map { |m| m[:id] }
119
+ meta_by_id = discovered.index_by { |m| m[:id] }
106
120
  existing = Model.where(provider: @provider).index_by(&:model_id)
107
121
 
108
122
  api_model_ids.each do |model_id|
109
- if existing[model_id]
110
- attrs = { status: "active", retired_at: nil }
111
- attrs[:display_name] = names_by_id[model_id] if names_by_id[model_id].present?
112
- existing[model_id].update!(attrs) if existing[model_id].status == "retired" || names_by_id[model_id].present?
123
+ meta = meta_by_id[model_id]
124
+ if (model = existing[model_id])
125
+ reconcile_existing_model(model, meta)
113
126
  else
114
- attrs = {
115
- provider: @provider,
116
- model_id: model_id,
117
- display_name: names_by_id[model_id],
118
- status: "active",
119
- discovered_at: Time.current
120
- }
121
- if %w[openrouter ollama].include?(@provider)
122
- attrs[:supports_generation] = true
123
- attrs[:probed_at] = nil
124
- end
125
- Model.create!(attrs)
127
+ Model.create!(new_model_attrs(model_id, meta))
126
128
  end
127
129
  end
128
130
 
129
- active_not_in_api = Model.where(provider: @provider, status: "active")
130
- .where.not(model_id: api_model_ids)
131
- active_not_in_api.update_all(status: "retired", retired_at: Time.current)
131
+ Model.where(provider: @provider, status: "active")
132
+ .where.not(model_id: api_model_ids)
133
+ .update_all(status: "retired", retired_at: Time.current)
134
+ end
135
+
136
+ def new_model_attrs(model_id, meta)
137
+ attrs = {
138
+ provider: @provider,
139
+ model_id: model_id,
140
+ display_name: meta[:display_name],
141
+ status: "active",
142
+ discovered_at: Time.current
143
+ }
144
+ if @provider == "openrouter"
145
+ supports_generation = meta[:supports_generation] != false
146
+ attrs.merge!(
147
+ supports_generation: supports_generation,
148
+ supports_judging: nil,
149
+ probed_at: Time.current,
150
+ status: supports_generation ? "active" : "failed"
151
+ )
152
+ elsif @provider == "ollama"
153
+ attrs[:supports_generation] = true
154
+ attrs[:probed_at] = nil
155
+ end
156
+ attrs
157
+ end
158
+
159
+ def reconcile_existing_model(model, meta)
160
+ if @provider == "openrouter"
161
+ # Re-derive generation capability from the published metadata every refresh
162
+ # (fixes models discovered before capability metadata was used). Leave
163
+ # supports_judging alone — it's "learned" from successful runs.
164
+ supports_generation = meta[:supports_generation] != false
165
+ model.update!(
166
+ display_name: meta[:display_name].presence || model.display_name,
167
+ supports_generation: supports_generation,
168
+ generation_error: nil,
169
+ probed_at: Time.current,
170
+ status: supports_generation ? "active" : "failed",
171
+ retired_at: nil
172
+ )
173
+ else
174
+ attrs = { status: "active", retired_at: nil }
175
+ attrs[:display_name] = meta[:display_name] if meta[:display_name].present?
176
+ model.update!(attrs) if model.status == "retired" || meta[:display_name].present?
177
+ end
132
178
  end
133
179
 
134
180
  def probe_new_models(&on_progress)
@@ -223,7 +269,6 @@ module CompletionKit
223
269
  case @provider
224
270
  when "openai" then openai_probe(model_id, input, max_tokens)
225
271
  when "anthropic" then anthropic_probe(model_id, input, max_tokens)
226
- when "openrouter" then openrouter_probe(model_id, input, max_tokens)
227
272
  when "ollama" then ollama_probe(model_id, input, max_tokens)
228
273
  else raise ArgumentError, "Unsupported probe provider: #{@provider}"
229
274
  end
@@ -290,23 +335,6 @@ module CompletionKit
290
335
  end
291
336
  end
292
337
 
293
- def openrouter_probe(model_id, input, max_tokens)
294
- conn = Faraday.new(url: "https://openrouter.ai") do |f|
295
- f.options.timeout = 30
296
- f.options.open_timeout = 5
297
- f.request :retry, max: 1, interval: 0.5
298
- f.adapter Faraday.default_adapter
299
- end
300
- conn.post do |req|
301
- req.url "/api/v1/chat/completions"
302
- req.headers["Content-Type"] = "application/json"
303
- req.headers["Authorization"] = "Bearer #{@api_key}"
304
- req.headers["HTTP-Referer"] = "https://completionkit.com"
305
- req.headers["X-Title"] = "CompletionKit"
306
- req.body = { model: model_id, messages: [{ role: "user", content: input }], max_tokens: max_tokens }.to_json
307
- end
308
- end
309
-
310
338
  def ollama_probe(model_id, input, max_tokens)
311
339
  base_url = @api_endpoint.to_s.delete_suffix("/")
312
340
  conn = Faraday.new(url: base_url) do |f|
@@ -50,6 +50,13 @@ module CompletionKit
50
50
  steps.all?(&:done?)
51
51
  end
52
52
 
53
+ # Whether the "Load sample data" button should show — only while neither
54
+ # the dataset nor the prompt step is done (SampleData.install! no-ops
55
+ # otherwise, so the button would do nothing).
56
+ def sample_loadable?
57
+ steps.none? { |s| %i[dataset prompt].include?(s.key) && s.done? }
58
+ end
59
+
53
60
  def progress
54
61
  done = steps.count(&:done?)
55
62
  { done: done, total: steps.size, percent: ((done.to_f / steps.size) * 100).round }
@@ -0,0 +1,37 @@
1
+ module CompletionKit
2
+ module Onboarding
3
+ # Opt-in starter data for the onboarding page: one dataset + one prompt so a
4
+ # brand-new install has something to poke at. Idempotent — a no-op once the
5
+ # workspace already has any prompt or dataset. Deliberately does NOT create a
6
+ # provider credential (needs a real API key) or a run (user-initiated).
7
+ module SampleData
8
+ SAMPLE_CSV = <<~CSV.freeze
9
+ ticket
10
+ "My order #4827 arrived with a dented panel. I emailed photos 11 days ago and heard nothing. Today I was told the return window 'closed'. I paid $749. I want a refund or replacement, not store credit."
11
+ "Tracking says delivered to my porch Tuesday 3:47pm. I was home all day, nothing arrived, neighbours' cameras show no van. Order #5102 — a $315 mixer, wedding gift, wedding is Saturday. Can someone look today?"
12
+ "WELCOME20 says 'invalid' at checkout but the promo email says it's good through May 31. Same email I'm signed in with. Tried Chrome and Safari. Cart is $186 waiting on you."
13
+ CSV
14
+
15
+ SAMPLE_PROMPT = {
16
+ name: "Sample: Support reply",
17
+ description: "A starter prompt — drafts a warm, professional reply to a customer support ticket. Edit it or delete it; it's just here to get you going.",
18
+ template: "You are a senior customer-support specialist. Write a warm, professional reply to this ticket. Acknowledge the customer's situation, be specific about next steps, and don't be defensive.\n\nTicket:\n{{ticket}}",
19
+ llm_model: "gpt-4o-mini"
20
+ }.freeze
21
+
22
+ module_function
23
+
24
+ def install!
25
+ return if CompletionKit::Prompt.exists? || CompletionKit::Dataset.exists?
26
+
27
+ CompletionKit::Dataset.create!(name: "Sample: Customer tickets", csv_data: SAMPLE_CSV)
28
+ CompletionKit::Prompt.create!(
29
+ name: SAMPLE_PROMPT[:name],
30
+ description: SAMPLE_PROMPT[:description],
31
+ template: SAMPLE_PROMPT[:template],
32
+ llm_model: SAMPLE_PROMPT[:llm_model]
33
+ )
34
+ end
35
+ end
36
+ end
37
+ end
@@ -69,6 +69,15 @@
69
69
  <% end %>
70
70
  </ol>
71
71
 
72
+ <% if @checklist.sample_loadable? %>
73
+ <div class="ck-launch__sample">
74
+ <p class="ck-launch__sample-copy">Just exploring? Drop in a sample dataset and prompt to poke around — they're labelled <code>Sample:</code>, and you can edit or delete them whenever.</p>
75
+ <%= button_to onboarding_sample_data_path, method: :post, class: "ck-launch__sample-cta", form: { style: "display:inline" } do %>
76
+ Load sample data <span aria-hidden="true">&rarr;</span>
77
+ <% end %>
78
+ </div>
79
+ <% end %>
80
+
72
81
  <div class="ck-launch__panel-footer">
73
82
  <%= button_to dismiss_onboarding_path, method: :post, class: "ck-launch__dismiss", form: { style: "display:inline" } do %>
74
83
  Skip setup &mdash; go to the app <span aria-hidden="true">&rarr;</span>
@@ -36,7 +36,7 @@
36
36
  Gen<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup ck-info-popup--right">Generation models produce the responses your prompts ask for. Pick one when creating a prompt.</span>
37
37
  </th>
38
38
  <th scope="col" class="ck-model-table__cap">
39
- Judge<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup ck-info-popup--right">Judge models score generated responses against your metrics. Pick one when configuring a run.</span>
39
+ Judge<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup ck-info-popup--right">Judge models score generated responses against your metrics. Pick one when configuring a run. A <strong>?</strong> means we haven't confirmed this model works as a judge — it's still selectable, and a successful run promotes it to ✓.</span>
40
40
  </th>
41
41
  </tr>
42
42
  </thead>
@@ -54,8 +54,10 @@
54
54
  <td class="ck-model-table__cap">
55
55
  <% if m.supports_judging %>
56
56
  <span class="ck-model-table__tick" aria-label="Supports judging">✓</span>
57
+ <% elsif m.supports_judging.nil? %>
58
+ <span class="ck-model-table__unknown" aria-label="Untested as judge" title="Untested as a judge — selectable; a successful run confirms it">?</span>
57
59
  <% else %>
58
- <span class="ck-model-table__dash" aria-label="No judging support">—</span>
60
+ <span class="ck-model-table__dash" aria-label="Not usable as judge">—</span>
59
61
  <% end %>
60
62
  </td>
61
63
  </tr>
data/config/routes.rb CHANGED
@@ -3,6 +3,7 @@ CompletionKit::Engine.routes.draw do
3
3
 
4
4
  get "onboarding", to: "onboarding#show", as: :onboarding
5
5
  post "onboarding/dismiss", to: "onboarding#dismiss", as: :dismiss_onboarding
6
+ post "onboarding/sample-data", to: "onboarding#sample_data", as: :onboarding_sample_data
6
7
 
7
8
  resources :prompts do
8
9
  member do
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.5.4"
2
+ VERSION = "0.5.5"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.4
4
+ version: 0.5.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
@@ -294,6 +294,7 @@ files:
294
294
  - app/services/completion_kit/model_discovery_service.rb
295
295
  - app/services/completion_kit/ollama_client.rb
296
296
  - app/services/completion_kit/onboarding/checklist.rb
297
+ - app/services/completion_kit/onboarding/sample_data.rb
297
298
  - app/services/completion_kit/open_ai_client.rb
298
299
  - app/services/completion_kit/open_router_client.rb
299
300
  - app/services/completion_kit/prompt_improvement_service.rb