completion-kit 0.16.3 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -4
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +1 -1
- data/app/controllers/completion_kit/api_reference_controller.rb +1 -1
- data/app/controllers/completion_kit/datasets_controller.rb +1 -1
- data/app/controllers/completion_kit/prompts_controller.rb +1 -0
- data/app/controllers/completion_kit/runs_controller.rb +3 -4
- data/app/helpers/completion_kit/application_helper.rb +6 -0
- data/app/models/completion_kit/provider_credential.rb +2 -1
- data/app/models/completion_kit/run.rb +9 -0
- data/app/services/completion_kit/mcp_tools/runs.rb +1 -1
- data/app/services/completion_kit/metric_agreement_examples.rb +1 -1
- data/app/services/completion_kit/metric_agreement_stats.rb +1 -1
- data/app/views/completion_kit/agreements/_trust_panel.html.erb +2 -2
- data/app/views/completion_kit/datasets/index.html.erb +1 -1
- data/app/views/completion_kit/datasets/show.html.erb +2 -0
- data/app/views/completion_kit/prompts/index.html.erb +2 -2
- data/app/views/completion_kit/prompts/show.html.erb +3 -1
- data/app/views/completion_kit/runs/index.html.erb +1 -3
- data/lib/completion_kit/version.rb +1 -1
- data/lib/completion_kit.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 971ebc337d2fa495fc31e82e98428aaee6f007ab3353f61d560df95f0cd15f78
|
|
4
|
+
data.tar.gz: 6aaa974cef5da25388cc19c4f3ce34b0c0b9fe447523c4430a7ac313351d3035
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6db71b9dfddb72596ec78ec9dd1c012b541e384680d71763cc2e753f11df2c734e9d74f2b30613f00f3aaf5697fd5d4b28125fe728294aa1220986363545a495
|
|
7
|
+
data.tar.gz: 3dafbf56819d71a56d6b346002feba0bfc5b84eb019e618d3607dfb857996d0e0132c24086198c95b73dfcffa6002a3bb82366ea14be5bb3bb136284c6292b85
|
data/README.md
CHANGED
|
@@ -270,16 +270,20 @@ end
|
|
|
270
270
|
|
|
271
271
|
`tenant_scope` runs as each engine model's `default_scope` (use `unscoped` to bypass). `tenant_scope_columns` is appended to every engine uniqueness validation. Adding the tenant columns and composite unique indexes lives in your host migrations. Both defaults (`nil`, `[]`) are no-ops.
|
|
272
272
|
|
|
273
|
-
|
|
273
|
+
One hook lets a host apply run-history retention everywhere the engine lists, counts, or traverses runs, without overriding controllers or views:
|
|
274
274
|
|
|
275
275
|
```ruby
|
|
276
276
|
CompletionKit.configure do |config|
|
|
277
|
-
config.
|
|
278
|
-
config.
|
|
277
|
+
config.runs_display_scope = -> { where(created_at: 90.days.ago..) }
|
|
278
|
+
config.runs_display_footer_partial = "runs/retention_notice"
|
|
279
279
|
end
|
|
280
280
|
```
|
|
281
281
|
|
|
282
|
-
`
|
|
282
|
+
`runs_display_scope` is a callable evaluated against a `Run` relation, in the same bare-`where` style as `tenant_scope` (it runs via `instance_exec`, so write it zero-arg with the relation as `self`, like a Rails `scope` lambda). It must return a relation: a callable that returns `nil` or anything non-chainable raises when a list renders. The engine applies it through `Run.display_scoped` at every run list and count it owns (the runs index, prompt and dataset show pages, the compare picker, new-run tag defaults, the v1 API index and its `X-Total-Count`, the MCP `runs_list` tool, the API reference recent-runs panel, and provider-credential usage stats) and through `Run.visible_run_ids` for child records that traverse runs (the metric trust-panel sample and the agreement examples shown on a metric page). Use it for list-only retention rather than a global `default_scope`, which would null `Run` associations everywhere they are traversed.
|
|
283
|
+
|
|
284
|
+
Deliberately exempt, because they must still see every run: id-addressed single-run lookups (`runs#show`, the MCP `runs_get` tool, the v1 API show), delete-confirmation cascade counts, the auto-generated run-name counter, and the judge few-shot seeding that learns from corrected examples even on hidden runs.
|
|
285
|
+
|
|
286
|
+
`runs_display_footer_partial` names a partial rendered below the runs list on the index and the prompt and dataset show pages; it receives the shown runs as a `runs` local. Use it for a notice like "older runs are hidden, upgrade to see them" — your host owns the retention rule in `runs_display_scope`, so it computes the hidden count itself. Both default to `nil` (no-ops), leaving standalone behaviour unchanged.
|
|
283
287
|
|
|
284
288
|
## Contributing
|
|
285
289
|
|
|
@@ -5,7 +5,7 @@ module CompletionKit
|
|
|
5
5
|
before_action :set_run, only: [:show, :update, :destroy, :generate, :retry_failures, :rerun, :regrade, :compare]
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
|
-
scope = Run.includes(:tags)
|
|
8
|
+
scope = Run.includes(:tags).display_scoped
|
|
9
9
|
scope = scope.where(status: params[:status]) if params[:status].present?
|
|
10
10
|
scope = scope.where(prompt_id: params[:prompt_id]) if params[:prompt_id].present?
|
|
11
11
|
scope = scope.where(dataset_id: params[:dataset_id]) if params[:dataset_id].present?
|
|
@@ -2,7 +2,7 @@ module CompletionKit
|
|
|
2
2
|
class ApiReferenceController < ApplicationController
|
|
3
3
|
def index
|
|
4
4
|
@published_prompts = Prompt.current_versions.order(name: :asc)
|
|
5
|
-
@recent_runs = Run.includes(:prompt).order(created_at: :desc).limit(10)
|
|
5
|
+
@recent_runs = Run.includes(:prompt).display_scoped.order(created_at: :desc).limit(10)
|
|
6
6
|
@datasets = Dataset.order(name: :asc)
|
|
7
7
|
@metrics = Metric.order(name: :asc)
|
|
8
8
|
@metric_groups = MetricGroup.includes(:metrics).order(name: :asc)
|
|
@@ -8,7 +8,7 @@ module CompletionKit
|
|
|
8
8
|
end
|
|
9
9
|
|
|
10
10
|
def show
|
|
11
|
-
@runs = @dataset.runs.includes(:prompt, :responses).order(created_at: :desc)
|
|
11
|
+
@runs = @dataset.runs.includes(:prompt, :responses).order(created_at: :desc).display_scoped
|
|
12
12
|
respond_to do |format|
|
|
13
13
|
format.html
|
|
14
14
|
format.csv do
|
|
@@ -5,9 +5,7 @@ module CompletionKit
|
|
|
5
5
|
before_action :load_form_collections, only: [:new, :edit, :create, :update]
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
|
-
scope = Run.includes(:prompt, :dataset, :tags, responses: :reviews).order(created_at: :desc)
|
|
9
|
-
index_scope = CompletionKit.config.runs_index_scope
|
|
10
|
-
scope = scope.instance_exec(&index_scope) if index_scope
|
|
8
|
+
scope = Run.includes(:prompt, :dataset, :tags, responses: :reviews).order(created_at: :desc).display_scoped
|
|
11
9
|
@runs = apply_tag_filter(scope)
|
|
12
10
|
end
|
|
13
11
|
|
|
@@ -33,7 +31,7 @@ module CompletionKit
|
|
|
33
31
|
@run = Run.new(prompt_id: params[:prompt_id])
|
|
34
32
|
prompt = Prompt.find_by(id: @run.prompt_id)
|
|
35
33
|
if prompt
|
|
36
|
-
last_run = Run.where(prompt_id: prompt.family_versions.ids).order(created_at: :desc).first
|
|
34
|
+
last_run = Run.where(prompt_id: prompt.family_versions.ids).display_scoped.order(created_at: :desc).first
|
|
37
35
|
@run.tag_names = last_run.tag_names if last_run
|
|
38
36
|
end
|
|
39
37
|
end
|
|
@@ -86,6 +84,7 @@ module CompletionKit
|
|
|
86
84
|
if other_id.blank?
|
|
87
85
|
@other_runs = Run.where(dataset_id: @run.dataset_id, prompt_id: @run.prompt_id)
|
|
88
86
|
.where.not(id: @run.id)
|
|
87
|
+
.display_scoped
|
|
89
88
|
.order(created_at: :desc)
|
|
90
89
|
.limit(50)
|
|
91
90
|
return render(:compare_picker)
|
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
module ApplicationHelper
|
|
3
|
+
def ck_runs_display_footer(runs)
|
|
4
|
+
partial = CompletionKit.config.runs_display_footer_partial
|
|
5
|
+
return unless partial
|
|
6
|
+
render partial, runs: runs
|
|
7
|
+
end
|
|
8
|
+
|
|
3
9
|
def ck_button_classes(tone = :dark, variant: :solid)
|
|
4
10
|
base = "ck-button"
|
|
5
11
|
|
|
@@ -65,7 +65,7 @@ module CompletionKit
|
|
|
65
65
|
def judge_count
|
|
66
66
|
model_ids = Model.where(provider: provider).pluck(:model_id)
|
|
67
67
|
return 0 if model_ids.empty?
|
|
68
|
-
Run.where(judge_model: model_ids).distinct.count(:judge_model)
|
|
68
|
+
Run.where(judge_model: model_ids).display_scoped.distinct.count(:judge_model)
|
|
69
69
|
end
|
|
70
70
|
|
|
71
71
|
def last_used_at
|
|
@@ -74,6 +74,7 @@ module CompletionKit
|
|
|
74
74
|
prompt_scope = Prompt.where(llm_model: model_ids).select(:id)
|
|
75
75
|
Run.where("prompt_id IN (:prompts) OR judge_model IN (:models)",
|
|
76
76
|
prompts: prompt_scope, models: model_ids)
|
|
77
|
+
.display_scoped
|
|
77
78
|
.where.not(status: "pending")
|
|
78
79
|
.maximum(:created_at)
|
|
79
80
|
end
|
|
@@ -21,6 +21,15 @@ module CompletionKit
|
|
|
21
21
|
before_validation :set_default_status, on: :create
|
|
22
22
|
before_validation :set_auto_name, on: :create
|
|
23
23
|
|
|
24
|
+
def self.display_scoped
|
|
25
|
+
filter = CompletionKit.config.runs_display_scope
|
|
26
|
+
filter ? all.instance_exec(&filter) : all
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def self.visible_run_ids
|
|
30
|
+
display_scoped.select(:id)
|
|
31
|
+
end
|
|
32
|
+
|
|
24
33
|
# A judge-only run grades a pre-existing column on the dataset instead of
|
|
25
34
|
# generating new outputs. No prompt is attached; the response text is read
|
|
26
35
|
# from row[output_column]; no LLM generation happens.
|
|
@@ -29,7 +29,7 @@ module CompletionKit
|
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
def agreements_for(metric, verdict:, limit:)
|
|
32
|
-
base = Agreement.where(metric_id: metric.id, verdict: verdict)
|
|
32
|
+
base = Agreement.where(metric_id: metric.id, verdict: verdict, run_id: Run.visible_run_ids)
|
|
33
33
|
current_version = MetricVersion.current.find_by(metric_id: metric.id)
|
|
34
34
|
scoped = current_version ? base.where(metric_version_id: current_version.id) : base
|
|
35
35
|
effective = scoped.exists? ? scoped : base
|
|
@@ -49,7 +49,7 @@ module CompletionKit
|
|
|
49
49
|
end
|
|
50
50
|
|
|
51
51
|
def call
|
|
52
|
-
scope = Agreement.where(metric_id: @metric.id)
|
|
52
|
+
scope = Agreement.where(metric_id: @metric.id, run_id: Run.visible_run_ids)
|
|
53
53
|
if @metric_version
|
|
54
54
|
scope = scope.where(metric_version_id: @metric_version.id)
|
|
55
55
|
elsif !@all_versions
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
<% current_metric_version = metric && CompletionKit::MetricVersion.current.find_by(metric_id: metric.id) %>
|
|
5
5
|
<% target_response = if (stats.sample_size.zero? || stats.counter_only?) && metric && current_metric_version
|
|
6
6
|
created_by = CompletionKit.config.username.presence || "operator"
|
|
7
|
-
verdicted_ids = CompletionKit::Agreement.where(metric_id: metric.id, created_by: created_by, metric_version_id: current_metric_version.id).pluck(:response_id)
|
|
8
|
-
CompletionKit::Response.joins(:reviews)
|
|
7
|
+
verdicted_ids = CompletionKit::Agreement.where(metric_id: metric.id, created_by: created_by, metric_version_id: current_metric_version.id, run_id: CompletionKit::Run.visible_run_ids).pluck(:response_id)
|
|
8
|
+
CompletionKit::Response.where(run_id: CompletionKit::Run.visible_run_ids).joins(:reviews)
|
|
9
9
|
.where(reviews: { metric_id: metric.id, metric_version_id: current_metric_version.id })
|
|
10
10
|
.where.not(reviews: { ai_score: nil })
|
|
11
11
|
.where.not(id: verdicted_ids)
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
<% end %>
|
|
37
37
|
</td>
|
|
38
38
|
<td data-label="Rows"><%= dataset.row_count %></td>
|
|
39
|
-
<td data-label="Used in"><%= dataset.runs.count %></td>
|
|
39
|
+
<td data-label="Used in"><%= dataset.runs.display_scoped.count %></td>
|
|
40
40
|
<td data-label="Created" class="ck-meta-copy"><time datetime="<%= dataset.created_at.iso8601 %>"><%= dataset.created_at.strftime("%b %-d, %Y") %></time></td>
|
|
41
41
|
<td class="ck-results-table__arrow">→</td>
|
|
42
42
|
</tr>
|
|
@@ -51,8 +51,8 @@
|
|
|
51
51
|
<% end %>
|
|
52
52
|
</td>
|
|
53
53
|
<td><span class="ck-chip"><%= prompt.llm_model %></span></td>
|
|
54
|
-
<% family_runs = CompletionKit::Run.where(prompt_id: prompt.family_versions.select(:id)) %>
|
|
55
|
-
<% current_version_runs = prompt.runs.includes(responses: :reviews) %>
|
|
54
|
+
<% family_runs = CompletionKit::Run.where(prompt_id: prompt.family_versions.select(:id)).display_scoped %>
|
|
55
|
+
<% current_version_runs = prompt.runs.display_scoped.includes(responses: :reviews) %>
|
|
56
56
|
<% best_score = current_version_runs.map(&:avg_score).compact.max %>
|
|
57
57
|
<td>
|
|
58
58
|
<% if best_score %>
|
|
@@ -64,7 +64,7 @@
|
|
|
64
64
|
</thead>
|
|
65
65
|
<tbody>
|
|
66
66
|
<% versions.each do |v| %>
|
|
67
|
-
<% best_score = v.runs.map(&:avg_score).compact.max %>
|
|
67
|
+
<% best_score = v.runs.display_scoped.map(&:avg_score).compact.max %>
|
|
68
68
|
<% pred = predecessor_of[v] %>
|
|
69
69
|
<tr class="<%= "ck-results-table__row--active" if v.id == @prompt.id %>" onclick="window.location='<%= prompt_path(v) %>'" style="cursor: pointer;">
|
|
70
70
|
<td>
|
|
@@ -143,6 +143,8 @@
|
|
|
143
143
|
</section>
|
|
144
144
|
<% end %>
|
|
145
145
|
|
|
146
|
+
<%= ck_runs_display_footer(@runs) %>
|
|
147
|
+
|
|
146
148
|
<% suggestions = CompletionKit::Suggestion.where(prompt_id: @prompt.family_versions.select(:id)).order(created_at: :desc) %>
|
|
147
149
|
<% if suggestions.any? %>
|
|
148
150
|
<section class="ck-card--spaced">
|
|
@@ -23,6 +23,4 @@
|
|
|
23
23
|
<div class="ck-empty">No runs yet. <%= link_to "Create your first run →", new_run_path, class: "ck-link" %></div>
|
|
24
24
|
<% end %>
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
<%= render CompletionKit.config.runs_index_footer_partial, runs: @runs %>
|
|
28
|
-
<% end %>
|
|
26
|
+
<%= ck_runs_display_footer(@runs) %>
|
data/lib/completion_kit.rb
CHANGED
|
@@ -10,7 +10,7 @@ module CompletionKit
|
|
|
10
10
|
attr_accessor :username, :password, :auth_strategy, :api_token
|
|
11
11
|
attr_accessor :tenant_scope, :tenant_scope_columns
|
|
12
12
|
attr_accessor :api_reference_authentication_partial
|
|
13
|
-
attr_accessor :
|
|
13
|
+
attr_accessor :runs_display_scope, :runs_display_footer_partial
|
|
14
14
|
attr_accessor :api_rate_limit, :web_rate_limit
|
|
15
15
|
attr_accessor :allow_loopback_endpoints
|
|
16
16
|
attr_accessor :judge_agreement_enabled
|