completion-kit 0.5.43 → 0.5.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/stylesheets/completion_kit/application.css +25 -0
- data/app/controllers/completion_kit/api/v1/calibrations_controller.rb +1 -1
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +4 -0
- data/app/controllers/completion_kit/calibrations_controller.rb +1 -1
- data/app/controllers/completion_kit/metrics_controller.rb +63 -17
- data/app/controllers/completion_kit/runs_controller.rb +6 -0
- data/app/jobs/completion_kit/judge_review_job.rb +3 -0
- data/app/models/completion_kit/calibration.rb +6 -2
- data/app/models/completion_kit/metric.rb +0 -17
- data/app/models/completion_kit/{judge_version.rb → metric_version.rb} +4 -2
- data/app/models/completion_kit/review.rb +9 -0
- data/app/models/completion_kit/run.rb +28 -0
- data/app/services/completion_kit/mcp_tools/calibrations.rb +1 -1
- data/app/services/completion_kit/mcp_tools/judges.rb +15 -13
- data/app/services/completion_kit/metric_calibration_stats.rb +9 -9
- data/app/services/completion_kit/{judge_variant_generator.rb → metric_variant_generator.rb} +8 -8
- data/app/views/completion_kit/calibrations/_trust_panel.html.erb +6 -1
- data/app/views/completion_kit/metrics/_form.html.erb +3 -3
- data/app/views/completion_kit/metrics/edit.html.erb +1 -1
- data/app/views/completion_kit/metrics/show.html.erb +12 -14
- data/app/views/completion_kit/responses/show.html.erb +9 -1
- data/app/views/completion_kit/runs/show.html.erb +23 -0
- data/db/migrate/20260528000001_rename_judge_version_to_metric_version.rb +22 -0
- data/db/migrate/20260528000002_add_metric_version_to_reviews.rb +21 -0
- data/lib/completion_kit/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d81df0996441d12c0fb540b9f29bb514813adcdbea3ceefb515d318f28947731
|
|
4
|
+
data.tar.gz: 606764f41e74cec3284f1155d7ef86e77a61af708af2320d5b02640827741f7a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9e468cd12eb143f4b5eb64333339199420db4c9d0c78ec548965972eee5e326d574a80c6c3092d63f4d99d88901ce3470ac688468d2813f5370e589568fba669
|
|
7
|
+
data.tar.gz: 7377f00a31d539297f9e79059083aa7bfef782d18d1ecfcb9f7da1ff648ce1eaf6f8a94bc55d56fcca22a47e09c7fcb1bc89981563aa351e4293c47f8d886570
|
|
@@ -2816,6 +2816,31 @@ select.ck-input {
|
|
|
2816
2816
|
line-height: 1.55;
|
|
2817
2817
|
}
|
|
2818
2818
|
|
|
2819
|
+
.ck-review-card--stale {
|
|
2820
|
+
border-left: 2px solid rgba(224, 164, 88, 0.45);
|
|
2821
|
+
}
|
|
2822
|
+
|
|
2823
|
+
.ck-stale-versions-banner {
|
|
2824
|
+
margin: 0 0 1rem;
|
|
2825
|
+
padding: 0.9rem 1rem;
|
|
2826
|
+
border: 1px solid rgba(224, 164, 88, 0.4);
|
|
2827
|
+
background: rgba(224, 164, 88, 0.06);
|
|
2828
|
+
border-radius: var(--ck-radius);
|
|
2829
|
+
display: flex;
|
|
2830
|
+
align-items: center;
|
|
2831
|
+
justify-content: space-between;
|
|
2832
|
+
gap: 1rem;
|
|
2833
|
+
flex-wrap: wrap;
|
|
2834
|
+
}
|
|
2835
|
+
.ck-stale-versions-banner__body { min-width: 0; flex: 1 1 320px; }
|
|
2836
|
+
.ck-stale-versions-banner .ck-kicker { color: var(--ck-warning); }
|
|
2837
|
+
.ck-review-card__stale-note {
|
|
2838
|
+
margin: 0.4rem 0 0;
|
|
2839
|
+
font-family: var(--ck-mono);
|
|
2840
|
+
font-size: 0.78rem;
|
|
2841
|
+
color: var(--ck-warning);
|
|
2842
|
+
}
|
|
2843
|
+
|
|
2819
2844
|
@media (max-width: 900px) {
|
|
2820
2845
|
.ck-grid--sidebar,
|
|
2821
2846
|
.ck-grid--cards,
|
|
@@ -45,6 +45,10 @@ module CompletionKit
|
|
|
45
45
|
end
|
|
46
46
|
|
|
47
47
|
def retry_failures
|
|
48
|
+
if @run.stale_review_summary.any?
|
|
49
|
+
return render(json: { error: "Judge has changed since this run executed. Retry would mix versions in the same run; use POST /api/v1/runs/:id/rerun instead." }, status: :conflict)
|
|
50
|
+
end
|
|
51
|
+
|
|
48
52
|
scope = @run.responses.where(status: "failed")
|
|
49
53
|
scope = scope.where(id: params[:only]) if params[:only].present?
|
|
50
54
|
|
|
@@ -18,7 +18,7 @@ module CompletionKit
|
|
|
18
18
|
run: @run, response: @response, metric: @metric, created_by: created_by
|
|
19
19
|
)
|
|
20
20
|
calibration.assign_attributes(
|
|
21
|
-
|
|
21
|
+
metric_version: MetricVersion.ensure_current_for(@metric),
|
|
22
22
|
verdict: params[:verdict],
|
|
23
23
|
corrected_score: params[:corrected_score].presence,
|
|
24
24
|
note: params[:note].presence
|
|
@@ -35,16 +35,16 @@ module CompletionKit
|
|
|
35
35
|
end
|
|
36
36
|
|
|
37
37
|
def show
|
|
38
|
-
@
|
|
38
|
+
@published_metric_version = MetricVersion.ensure_current_for(@metric)
|
|
39
39
|
@disagreements = Calibration.where(metric_id: @metric.id, verdict: "disagree")
|
|
40
|
-
.includes(:
|
|
40
|
+
.includes(:metric_version, response: [:reviews, :run])
|
|
41
41
|
.order(created_at: :desc)
|
|
42
42
|
.limit(50)
|
|
43
|
-
@edit_draft =
|
|
44
|
-
@suggestion_draft =
|
|
43
|
+
@edit_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "edit").order(created_at: :desc).first
|
|
44
|
+
@suggestion_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "suggestion").order(created_at: :desc).first
|
|
45
45
|
@improve_disagreement_count = Calibration.where(metric_id: @metric.id, verdict: "disagree",
|
|
46
|
-
|
|
47
|
-
@versions =
|
|
46
|
+
metric_version_id: @published_metric_version.id).count
|
|
47
|
+
@versions = MetricVersion.where(metric_id: @metric.id).order(version_number: :desc).to_a
|
|
48
48
|
end
|
|
49
49
|
|
|
50
50
|
def new
|
|
@@ -52,9 +52,14 @@ module CompletionKit
|
|
|
52
52
|
end
|
|
53
53
|
|
|
54
54
|
def edit
|
|
55
|
-
@suggestion_draft =
|
|
56
|
-
@edit_draft =
|
|
57
|
-
@
|
|
55
|
+
@suggestion_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "suggestion").order(created_at: :desc).first
|
|
56
|
+
@edit_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "edit").order(created_at: :desc).first
|
|
57
|
+
@published_metric_version = MetricVersion.published.where(metric_id: @metric.id, current: true).first
|
|
58
|
+
|
|
59
|
+
if @edit_draft
|
|
60
|
+
@metric.instruction = @edit_draft.instruction
|
|
61
|
+
@metric.rubric_bands = @edit_draft.rubric_bands
|
|
62
|
+
end
|
|
58
63
|
end
|
|
59
64
|
|
|
60
65
|
def create
|
|
@@ -68,10 +73,42 @@ module CompletionKit
|
|
|
68
73
|
end
|
|
69
74
|
|
|
70
75
|
def update
|
|
71
|
-
|
|
72
|
-
|
|
76
|
+
judge_keys = %i[instruction rubric_bands]
|
|
77
|
+
meta_attrs = metric_params.except(*judge_keys)
|
|
78
|
+
proposed_instruction = metric_params[:instruction]
|
|
79
|
+
proposed_rubric = metric_params[:rubric_bands]
|
|
80
|
+
|
|
81
|
+
unless @metric.update(meta_attrs)
|
|
82
|
+
return render(:edit, status: :unprocessable_entity)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
current_instruction = @metric.instruction.to_s
|
|
86
|
+
current_rubric = @metric.rubric_bands || []
|
|
87
|
+
normalized_proposed_rubric = normalize_rubric_bands_for_update(proposed_rubric)
|
|
88
|
+
|
|
89
|
+
instruction_changed = !proposed_instruction.nil? && proposed_instruction.to_s != current_instruction
|
|
90
|
+
rubric_changed = !normalized_proposed_rubric.nil? && normalized_proposed_rubric != current_rubric
|
|
91
|
+
|
|
92
|
+
unless instruction_changed || rubric_changed
|
|
93
|
+
return redirect_to(metric_path(@metric), notice: "Metric was successfully updated.")
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
new_instruction = instruction_changed ? proposed_instruction.to_s : current_instruction
|
|
97
|
+
new_rubric = rubric_changed ? normalized_proposed_rubric : current_rubric
|
|
98
|
+
|
|
99
|
+
if @metric.reviews.exists?
|
|
100
|
+
MetricVersion.drafts.where(metric_id: @metric.id, source: "edit").destroy_all
|
|
101
|
+
draft = MetricVersion.create!(
|
|
102
|
+
metric: @metric, instruction: new_instruction, rubric_bands: new_rubric,
|
|
103
|
+
state: "draft", source: "edit", current: false
|
|
104
|
+
)
|
|
105
|
+
redirect_to edit_metric_path(@metric),
|
|
106
|
+
notice: "Saved as draft #{draft.version_label}. Publish to push these changes to the live judge."
|
|
73
107
|
else
|
|
74
|
-
|
|
108
|
+
@metric.update!(instruction: new_instruction, rubric_bands: new_rubric)
|
|
109
|
+
current_pub = MetricVersion.published.where(metric_id: @metric.id, current: true).first
|
|
110
|
+
current_pub&.update!(instruction: @metric.instruction, rubric_bands: @metric.rubric_bands)
|
|
111
|
+
redirect_to metric_path(@metric), notice: "Metric was successfully updated."
|
|
75
112
|
end
|
|
76
113
|
end
|
|
77
114
|
|
|
@@ -88,9 +125,9 @@ module CompletionKit
|
|
|
88
125
|
return
|
|
89
126
|
end
|
|
90
127
|
|
|
91
|
-
|
|
128
|
+
MetricVersion.drafts.where(metric_id: @metric.id, source: "suggestion").destroy_all
|
|
92
129
|
|
|
93
|
-
generator =
|
|
130
|
+
generator = MetricVariantGenerator.new(@metric, count: 1)
|
|
94
131
|
variants = generator.call
|
|
95
132
|
if variants.empty?
|
|
96
133
|
redirect_to target, alert: "The model returned no usable variants. Try again with a different model."
|
|
@@ -101,18 +138,18 @@ module CompletionKit
|
|
|
101
138
|
end
|
|
102
139
|
|
|
103
140
|
def dismiss_suggestion
|
|
104
|
-
draft =
|
|
141
|
+
draft = MetricVersion.drafts.where(metric_id: @metric.id).find_by(id: params[:draft_id])
|
|
105
142
|
draft&.destroy
|
|
106
143
|
target = params[:back_to] == "edit" ? edit_metric_path(@metric) : metric_path(@metric)
|
|
107
144
|
redirect_to target, notice: "Dismissed."
|
|
108
145
|
end
|
|
109
146
|
|
|
110
147
|
def publish_draft
|
|
111
|
-
scope =
|
|
148
|
+
scope = MetricVersion.where(metric_id: @metric.id)
|
|
112
149
|
version = if params[:draft_id].present?
|
|
113
150
|
scope.find_by(id: params[:draft_id])
|
|
114
151
|
else
|
|
115
|
-
|
|
152
|
+
MetricVersion.drafts.where(metric_id: @metric.id).order(created_at: :desc).first
|
|
116
153
|
end
|
|
117
154
|
|
|
118
155
|
if version.nil?
|
|
@@ -160,5 +197,14 @@ module CompletionKit
|
|
|
160
197
|
params.require(:metric).permit(:name, :instruction,
|
|
161
198
|
rubric_bands: [:stars, :description], tag_names: [])
|
|
162
199
|
end
|
|
200
|
+
|
|
201
|
+
def normalize_rubric_bands_for_update(bands)
|
|
202
|
+
return nil if bands.nil?
|
|
203
|
+
array = bands.is_a?(ActionController::Parameters) ? bands.to_unsafe_h.values : bands
|
|
204
|
+
Array(array).map do |b|
|
|
205
|
+
h = b.respond_to?(:to_unsafe_h) ? b.to_unsafe_h : b
|
|
206
|
+
{ "stars" => h["stars"].to_i, "description" => h["description"].to_s }
|
|
207
|
+
end.sort_by { |b| -b["stars"] }
|
|
208
|
+
end
|
|
163
209
|
end
|
|
164
210
|
end
|
|
@@ -126,6 +126,12 @@ module CompletionKit
|
|
|
126
126
|
end
|
|
127
127
|
|
|
128
128
|
def retry_failures
|
|
129
|
+
if @run.stale_review_summary.any?
|
|
130
|
+
redirect_to run_path(@run),
|
|
131
|
+
alert: "The judge has changed since this run executed. Retrying failed cases would mix scores from two metric versions in the same run. Use 'Re-run with current judge' to refresh everything against the live judge."
|
|
132
|
+
return
|
|
133
|
+
end
|
|
134
|
+
|
|
129
135
|
scope = @run.responses.where(status: "failed")
|
|
130
136
|
scope = scope.where(id: params[:only]) if params[:only].present?
|
|
131
137
|
|
|
@@ -62,9 +62,11 @@ module CompletionKit
|
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
review = response.reviews.find_or_initialize_by(metric_id: metric.id)
|
|
65
|
+
current_metric_version = MetricVersion.ensure_current_for(metric)
|
|
65
66
|
review.assign_attributes(
|
|
66
67
|
metric_name: metric.name,
|
|
67
68
|
instruction: metric.instruction.to_s,
|
|
69
|
+
metric_version_id: current_metric_version.id,
|
|
68
70
|
status: "succeeded",
|
|
69
71
|
ai_score: evaluation[:score],
|
|
70
72
|
ai_feedback: evaluation[:feedback],
|
|
@@ -122,6 +124,7 @@ module CompletionKit
|
|
|
122
124
|
end
|
|
123
125
|
|
|
124
126
|
def few_shot_payload(metric)
|
|
127
|
+
return nil unless CompletionKit.config.judge_calibration_enabled
|
|
125
128
|
Array(metric.few_shot_examples).map do |fs|
|
|
126
129
|
{
|
|
127
130
|
human_score: fs["human_score"],
|
|
@@ -5,7 +5,11 @@ module CompletionKit
|
|
|
5
5
|
belongs_to :run
|
|
6
6
|
belongs_to :response
|
|
7
7
|
belongs_to :metric
|
|
8
|
-
belongs_to :
|
|
8
|
+
belongs_to :metric_version
|
|
9
|
+
|
|
10
|
+
alias_attribute :judge_version_id, :metric_version_id
|
|
11
|
+
alias_method :judge_version, :metric_version
|
|
12
|
+
alias_method :judge_version=, :metric_version=
|
|
9
13
|
|
|
10
14
|
validates :verdict, presence: true, inclusion: { in: VERDICTS }
|
|
11
15
|
validates :response_id,
|
|
@@ -22,7 +26,7 @@ module CompletionKit
|
|
|
22
26
|
run_id: run_id,
|
|
23
27
|
response_id: response_id,
|
|
24
28
|
metric_id: metric_id,
|
|
25
|
-
|
|
29
|
+
metric_version_id: metric_version_id,
|
|
26
30
|
verdict: verdict,
|
|
27
31
|
corrected_score: corrected_score,
|
|
28
32
|
note: note,
|
|
@@ -24,7 +24,6 @@ module CompletionKit
|
|
|
24
24
|
before_validation :generate_key
|
|
25
25
|
before_validation :normalize_rubric_bands
|
|
26
26
|
before_validation :set_defaults
|
|
27
|
-
after_update :fork_draft_judge_version, if: :judge_relevant_changes?
|
|
28
27
|
|
|
29
28
|
def self.default_rubric_bands
|
|
30
29
|
DEFAULT_RUBRIC_BANDS.map(&:dup)
|
|
@@ -98,21 +97,5 @@ module CompletionKit
|
|
|
98
97
|
self.rubric_bands = self.class.normalize_rubric_bands(rubric_bands) if rubric_bands.present?
|
|
99
98
|
end
|
|
100
99
|
|
|
101
|
-
def judge_relevant_changes?
|
|
102
|
-
saved_change_to_instruction? || saved_change_to_rubric_bands?
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
def fork_draft_judge_version
|
|
106
|
-
JudgeVersion.ensure_current_for(self)
|
|
107
|
-
JudgeVersion.where(metric_id: id, state: "draft").update_all(current: false)
|
|
108
|
-
JudgeVersion.create!(
|
|
109
|
-
metric: self,
|
|
110
|
-
instruction: instruction,
|
|
111
|
-
rubric_bands: rubric_bands,
|
|
112
|
-
current: false,
|
|
113
|
-
state: "draft",
|
|
114
|
-
source: "edit"
|
|
115
|
-
)
|
|
116
|
-
end
|
|
117
100
|
end
|
|
118
101
|
end
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
|
-
class
|
|
2
|
+
class MetricVersion < ApplicationRecord
|
|
3
3
|
STATES = %w[draft published].freeze
|
|
4
4
|
|
|
5
5
|
belongs_to :metric
|
|
@@ -41,7 +41,7 @@ module CompletionKit
|
|
|
41
41
|
end
|
|
42
42
|
|
|
43
43
|
def publish!
|
|
44
|
-
|
|
44
|
+
MetricVersion.transaction do
|
|
45
45
|
self.class.where(metric_id: metric_id).where.not(id: id).update_all(current: false)
|
|
46
46
|
reload
|
|
47
47
|
update!(state: "published", current: true, published_at: published_at || Time.current)
|
|
@@ -76,4 +76,6 @@ module CompletionKit
|
|
|
76
76
|
self.version_number = max + 1
|
|
77
77
|
end
|
|
78
78
|
end
|
|
79
|
+
|
|
80
|
+
JudgeVersion = MetricVersion
|
|
79
81
|
end
|
|
@@ -5,8 +5,16 @@ module CompletionKit
|
|
|
5
5
|
|
|
6
6
|
belongs_to :response
|
|
7
7
|
belongs_to :metric, optional: true
|
|
8
|
+
belongs_to :metric_version, optional: true
|
|
8
9
|
has_many :dashboard_dismissals, as: :dismissable, dependent: :destroy
|
|
9
10
|
|
|
11
|
+
def stale_against_current_judge?
|
|
12
|
+
return false unless metric_id && metric_version_id
|
|
13
|
+
current_id = MetricVersion.current.where(metric_id: metric_id).limit(1).pick(:id)
|
|
14
|
+
return false if current_id.nil?
|
|
15
|
+
metric_version_id != current_id
|
|
16
|
+
end
|
|
17
|
+
|
|
10
18
|
validates :metric_name, presence: true
|
|
11
19
|
validates :status, inclusion: { in: STATUSES }
|
|
12
20
|
validates :ai_score, numericality: { greater_than_or_equal_to: 1, less_than_or_equal_to: 5 }, allow_nil: true
|
|
@@ -29,6 +37,7 @@ module CompletionKit
|
|
|
29
37
|
def as_json(options = {})
|
|
30
38
|
{
|
|
31
39
|
id: id, response_id: response_id, metric_id: metric_id,
|
|
40
|
+
metric_version_id: metric_version_id,
|
|
32
41
|
metric_name: metric_name, ai_score: ai_score,
|
|
33
42
|
ai_feedback: ai_feedback, status: status, attempts: attempts,
|
|
34
43
|
error: error_payload
|
|
@@ -89,6 +89,34 @@ module CompletionKit
|
|
|
89
89
|
end
|
|
90
90
|
end
|
|
91
91
|
|
|
92
|
+
def stale_review_summary
|
|
93
|
+
review_pairs = Review.where(response_id: response_ids)
|
|
94
|
+
.where.not(metric_id: nil)
|
|
95
|
+
.where.not(metric_version_id: nil)
|
|
96
|
+
.pluck(:metric_id, :metric_version_id, :metric_name)
|
|
97
|
+
return {} if review_pairs.empty?
|
|
98
|
+
|
|
99
|
+
metric_ids = review_pairs.map(&:first).uniq
|
|
100
|
+
version_ids = review_pairs.map { |_, vid, _| vid }.uniq
|
|
101
|
+
current_by_metric = MetricVersion.current.where(metric_id: metric_ids).pluck(:metric_id, :id, :version_number).each_with_object({}) do |(mid, vid, vnum), h|
|
|
102
|
+
h[mid] = { id: vid, label: "v#{vnum}" }
|
|
103
|
+
end
|
|
104
|
+
label_by_version = MetricVersion.where(id: version_ids).pluck(:id, :version_number).each_with_object({}) { |(vid, vnum), h| h[vid] = "v#{vnum}" }
|
|
105
|
+
|
|
106
|
+
summary = {}
|
|
107
|
+
review_pairs.each do |metric_id, version_id, metric_name|
|
|
108
|
+
current = current_by_metric[metric_id]
|
|
109
|
+
next if current.nil?
|
|
110
|
+
next if version_id == current[:id]
|
|
111
|
+
label = label_by_version[version_id]
|
|
112
|
+
next if label.nil?
|
|
113
|
+
summary[metric_id] ||= { metric_name: metric_name, current_label: current[:label], stale_count: 0, scored_labels: [] }
|
|
114
|
+
summary[metric_id][:stale_count] += 1
|
|
115
|
+
summary[metric_id][:scored_labels] |= [label]
|
|
116
|
+
end
|
|
117
|
+
summary
|
|
118
|
+
end
|
|
119
|
+
|
|
92
120
|
def start!
|
|
93
121
|
rows = if dataset
|
|
94
122
|
CsvProcessor.process_self(self)
|
|
@@ -56,7 +56,7 @@ module CompletionKit
|
|
|
56
56
|
run_id: run.id, response_id: response.id, metric_id: metric.id, created_by: created_by
|
|
57
57
|
)
|
|
58
58
|
calibration.assign_attributes(
|
|
59
|
-
|
|
59
|
+
metric_version: CompletionKit::MetricVersion.ensure_current_for(metric),
|
|
60
60
|
verdict: args["verdict"],
|
|
61
61
|
corrected_score: args["corrected_score"],
|
|
62
62
|
note: args["note"]
|
|
@@ -5,7 +5,7 @@ module CompletionKit
|
|
|
5
5
|
|
|
6
6
|
TOOLS = {
|
|
7
7
|
"judges_suggest" => {
|
|
8
|
-
description: "Ask the model to rewrite the metric's judge instruction in N variants targeted at the recent disagreements. Each variant is saved as a draft
|
|
8
|
+
description: "Ask the model to rewrite the metric's judge instruction in N variants targeted at the recent disagreements. Each variant is saved as a draft MetricVersion with source=\"suggestion\". Returns the persisted drafts. Stripe-metering hooks fire via ActiveSupport::Notifications under completion_kit.judge_suggestion.generated.",
|
|
9
9
|
inputSchema: {
|
|
10
10
|
type: "object",
|
|
11
11
|
properties: {
|
|
@@ -33,15 +33,15 @@ module CompletionKit
|
|
|
33
33
|
handler: :replay
|
|
34
34
|
},
|
|
35
35
|
"judges_compare" => {
|
|
36
|
-
description: "Compare two
|
|
36
|
+
description: "Compare two metric versions' calibration stats side by side. Pass either two metric_version_ids or one metric_id with metric_version_a_id / metric_version_b_id.",
|
|
37
37
|
inputSchema: {
|
|
38
38
|
type: "object",
|
|
39
39
|
properties: {
|
|
40
40
|
metric_id: { type: "integer" },
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
metric_version_a_id: { type: "integer" },
|
|
42
|
+
metric_version_b_id: { type: "integer" }
|
|
43
43
|
},
|
|
44
|
-
required: ["metric_id", "
|
|
44
|
+
required: ["metric_id", "metric_version_a_id", "metric_version_b_id"]
|
|
45
45
|
},
|
|
46
46
|
handler: :compare
|
|
47
47
|
}
|
|
@@ -49,7 +49,7 @@ module CompletionKit
|
|
|
49
49
|
|
|
50
50
|
def self.suggest(args)
|
|
51
51
|
metric = CompletionKit::Metric.find(args["metric_id"])
|
|
52
|
-
generator = CompletionKit::
|
|
52
|
+
generator = CompletionKit::MetricVariantGenerator.new(metric, count: args["count"].to_i, model: args["model"])
|
|
53
53
|
variants = generator.call
|
|
54
54
|
return error_result("Variant generator returned no parseable variants. Try again or change the model.") if variants.empty?
|
|
55
55
|
versions = generator.persist!(variants)
|
|
@@ -75,20 +75,22 @@ module CompletionKit
|
|
|
75
75
|
|
|
76
76
|
def self.compare(args)
|
|
77
77
|
metric = CompletionKit::Metric.find(args["metric_id"])
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
78
|
+
a_id = args["metric_version_a_id"] || args["judge_version_a_id"]
|
|
79
|
+
b_id = args["metric_version_b_id"] || args["judge_version_b_id"]
|
|
80
|
+
a = CompletionKit::MetricVersion.where(metric_id: metric.id).find(a_id)
|
|
81
|
+
b = CompletionKit::MetricVersion.where(metric_id: metric.id).find(b_id)
|
|
82
|
+
stats_a = CompletionKit::MetricCalibrationStats.for(metric, metric_version: a)
|
|
83
|
+
stats_b = CompletionKit::MetricCalibrationStats.for(metric, metric_version: b)
|
|
82
84
|
text_result({
|
|
83
85
|
metric_id: metric.id,
|
|
84
|
-
a:
|
|
85
|
-
b:
|
|
86
|
+
a: metric_version_payload(a, stats_a),
|
|
87
|
+
b: metric_version_payload(b, stats_b),
|
|
86
88
|
delta: delta_payload(stats_a, stats_b),
|
|
87
89
|
recommendation: recommendation_for(stats_a, stats_b)
|
|
88
90
|
})
|
|
89
91
|
end
|
|
90
92
|
|
|
91
|
-
def self.
|
|
93
|
+
def self.metric_version_payload(version, stats)
|
|
92
94
|
{
|
|
93
95
|
id: version.id, state: version.state, current: version.current,
|
|
94
96
|
source: version.source, created_at: version.created_at,
|
|
@@ -33,25 +33,25 @@ module CompletionKit
|
|
|
33
33
|
|
|
34
34
|
CURRENT = :current
|
|
35
35
|
|
|
36
|
-
def self.for(metric,
|
|
37
|
-
resolved = case
|
|
38
|
-
when CURRENT then
|
|
36
|
+
def self.for(metric, metric_version: CURRENT)
|
|
37
|
+
resolved = case metric_version
|
|
38
|
+
when CURRENT then MetricVersion.current.find_by(metric_id: metric.id)
|
|
39
39
|
when nil then nil
|
|
40
|
-
else
|
|
40
|
+
else metric_version
|
|
41
41
|
end
|
|
42
|
-
new(metric: metric,
|
|
42
|
+
new(metric: metric, metric_version: resolved, all_versions: metric_version.nil?).call
|
|
43
43
|
end
|
|
44
44
|
|
|
45
|
-
def initialize(metric:,
|
|
45
|
+
def initialize(metric:, metric_version: nil, all_versions: false)
|
|
46
46
|
@metric = metric
|
|
47
|
-
@
|
|
47
|
+
@metric_version = metric_version
|
|
48
48
|
@all_versions = all_versions
|
|
49
49
|
end
|
|
50
50
|
|
|
51
51
|
def call
|
|
52
52
|
scope = Calibration.where(metric_id: @metric.id)
|
|
53
|
-
if @
|
|
54
|
-
scope = scope.where(
|
|
53
|
+
if @metric_version
|
|
54
|
+
scope = scope.where(metric_version_id: @metric_version.id)
|
|
55
55
|
elsif !@all_versions
|
|
56
56
|
scope = scope.none
|
|
57
57
|
end
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
|
-
class
|
|
2
|
+
class MetricVariantGenerator
|
|
3
3
|
DEFAULT_VARIANT_COUNT = 1
|
|
4
4
|
MAX_VARIANT_COUNT = 3
|
|
5
5
|
DEFAULT_TEMPERATURE = 0.4
|
|
@@ -20,9 +20,9 @@ module CompletionKit
|
|
|
20
20
|
end
|
|
21
21
|
|
|
22
22
|
def persist!(variants)
|
|
23
|
-
|
|
23
|
+
MetricVersion.where(metric_id: @metric.id, state: "draft", source: "suggestion").update_all(current: false)
|
|
24
24
|
versions = variants.map do |variant|
|
|
25
|
-
|
|
25
|
+
MetricVersion.create!(
|
|
26
26
|
metric: @metric,
|
|
27
27
|
instruction: variant.instruction,
|
|
28
28
|
rubric_bands: variant.rubric_bands.presence || @metric.rubric_bands,
|
|
@@ -41,8 +41,8 @@ module CompletionKit
|
|
|
41
41
|
private
|
|
42
42
|
|
|
43
43
|
def build_meta_prompt
|
|
44
|
-
disagreements =
|
|
45
|
-
borderlines =
|
|
44
|
+
disagreements = MetricCalibrationExamples.disagreements_for(@metric)
|
|
45
|
+
borderlines = MetricCalibrationExamples.borderlines_for(@metric)
|
|
46
46
|
sections = []
|
|
47
47
|
sections << "You are an expert evaluator. The judge below is misaligned with humans. Propose #{@count == 1 ? "a single" : "#{@count}"} concrete rewrite that closes the gap."
|
|
48
48
|
sections << ""
|
|
@@ -117,7 +117,7 @@ module CompletionKit
|
|
|
117
117
|
end
|
|
118
118
|
end
|
|
119
119
|
|
|
120
|
-
module
|
|
120
|
+
module MetricCalibrationExamples
|
|
121
121
|
module_function
|
|
122
122
|
|
|
123
123
|
def for(metric, limit: 8)
|
|
@@ -134,8 +134,8 @@ module CompletionKit
|
|
|
134
134
|
|
|
135
135
|
def calibrations_for(metric, verdict:, limit:)
|
|
136
136
|
scope = Calibration.where(metric_id: metric.id, verdict: verdict)
|
|
137
|
-
current_version =
|
|
138
|
-
scope = scope.where(
|
|
137
|
+
current_version = MetricVersion.current.find_by(metric_id: metric.id)
|
|
138
|
+
scope = scope.where(metric_version_id: current_version.id) if current_version
|
|
139
139
|
scope.includes(response: :reviews)
|
|
140
140
|
.order(created_at: :desc)
|
|
141
141
|
.limit(limit)
|
|
@@ -3,7 +3,12 @@
|
|
|
3
3
|
<% anchor = metric&.name&.parameterize %>
|
|
4
4
|
<% target_response = if (stats.sample_size.zero? || stats.counter_only?) && metric
|
|
5
5
|
created_by = CompletionKit.config.username.presence || "operator"
|
|
6
|
-
|
|
6
|
+
current_metric_version = CompletionKit::MetricVersion.current.find_by(metric_id: metric.id)
|
|
7
|
+
verdicted_ids = if current_metric_version
|
|
8
|
+
CompletionKit::Calibration.where(metric_id: metric.id, created_by: created_by, metric_version_id: current_metric_version.id).pluck(:response_id)
|
|
9
|
+
else
|
|
10
|
+
[]
|
|
11
|
+
end
|
|
7
12
|
CompletionKit::Response.joins(:reviews)
|
|
8
13
|
.where(reviews: { metric_id: metric.id })
|
|
9
14
|
.where.not(reviews: { ai_score: nil })
|
|
@@ -16,14 +16,14 @@
|
|
|
16
16
|
</div>
|
|
17
17
|
<% end %>
|
|
18
18
|
|
|
19
|
-
<% if edit_draft
|
|
20
|
-
<% pub = local_assigns[:
|
|
19
|
+
<% if edit_draft %>
|
|
20
|
+
<% pub = local_assigns[:published_metric_version] %>
|
|
21
21
|
<% draft_instr_changed = pub && pub.instruction.to_s != edit_draft.instruction.to_s %>
|
|
22
22
|
<% draft_rubric_changed = pub && pub.rubric_bands != edit_draft.rubric_bands %>
|
|
23
23
|
<div class="ck-suggestion-banner" role="status">
|
|
24
24
|
<div class="ck-suggestion-banner__body">
|
|
25
25
|
<p class="ck-kicker">Draft pending</p>
|
|
26
|
-
<p class="ck-meta-copy">
|
|
26
|
+
<p class="ck-meta-copy">The form below shows your unpublished draft. Publish to replace the live<%= " instruction" if draft_instr_changed %><%= " and" if draft_instr_changed && draft_rubric_changed %><%= " rubric" if draft_rubric_changed %> for future runs, or keep editing.</p>
|
|
27
27
|
</div>
|
|
28
28
|
<div class="ck-suggestion-banner__actions">
|
|
29
29
|
<%= button_to "Discard draft", dismiss_suggestion_metric_path(metric, draft_id: edit_draft.id, back_to: "edit"),
|
|
@@ -19,20 +19,17 @@
|
|
|
19
19
|
</div>
|
|
20
20
|
<div class="ck-actions">
|
|
21
21
|
<% if CompletionKit.config.judge_calibration_enabled %>
|
|
22
|
-
<% if @suggestion_draft %>
|
|
23
|
-
|
|
22
|
+
<% if @suggestion_draft || @edit_draft %>
|
|
23
|
+
<% review_title = @suggestion_draft ? "The model proposed improvements based on your disagreements. Review and apply what you want." : "An unpublished draft of this metric is saved. Review and publish, or keep editing." %>
|
|
24
|
+
<%= link_to "Review changes →", edit_metric_path(@metric),
|
|
24
25
|
class: ck_button_classes(:dark),
|
|
25
|
-
title:
|
|
26
|
-
<% elsif @edit_draft %>
|
|
27
|
-
<%= link_to "Review draft →", edit_metric_path(@metric),
|
|
28
|
-
class: ck_button_classes(:dark),
|
|
29
|
-
title: "An unpublished draft of this metric is saved. Review and publish, or keep editing." %>
|
|
26
|
+
title: review_title %>
|
|
30
27
|
<% elsif @improve_disagreement_count.positive? %>
|
|
31
28
|
<%= button_to "Improve the metric", suggest_variants_metric_path(@metric),
|
|
32
29
|
method: :post, form_class: "inline-block",
|
|
33
30
|
class: ck_button_classes(:light, variant: :outline),
|
|
34
|
-
title: "
|
|
35
|
-
data: { turbo_confirm: "
|
|
31
|
+
title: "Ask the model to suggest improvements to this metric's instruction and rubric based on the disagreements collected so far.",
|
|
32
|
+
data: { turbo_confirm: "Ask the model for suggested improvements based on the disagreements collected so far?" } %>
|
|
36
33
|
<% else %>
|
|
37
34
|
<button type="button" class="<%= ck_button_classes(:light, variant: :outline) %>" disabled
|
|
38
35
|
title="Mark at least one case as Disagree before the model can suggest a change.">Improve the metric</button>
|
|
@@ -168,19 +165,20 @@
|
|
|
168
165
|
<p class="ck-kicker">Cases to learn from</p>
|
|
169
166
|
<span class="ck-chip"><%= pluralize(@disagreements.size, "case") %></span>
|
|
170
167
|
</div>
|
|
171
|
-
|
|
168
|
+
<% mixed_versions = @disagreements.map(&:metric_version_id).uniq.size > 1 %>
|
|
169
|
+
<p class="ck-meta-copy">Cases where a reviewer's score didn't match the judge's. Pin useful ones with <strong>Remember this</strong> so the judge sees them next time it grades<%= " (pins flow into the current judge regardless of which version produced the verdict)" if mixed_versions %>.</p>
|
|
172
170
|
<% existing_ids = Array(@metric.few_shot_examples).map { |fs| fs["calibration_id"] } %>
|
|
173
171
|
<ul class="ck-disagreement-list">
|
|
174
172
|
<% @disagreements.each do |cal| %>
|
|
175
173
|
<% review = cal.response.reviews.find { |r| r.metric_id == @metric.id } %>
|
|
176
174
|
<% already = existing_ids.include?(cal.id) %>
|
|
177
|
-
<%
|
|
178
|
-
<% on_current =
|
|
175
|
+
<% cal_metric_version = cal.metric_version %>
|
|
176
|
+
<% on_current = cal_metric_version&.id == @published_metric_version.id %>
|
|
179
177
|
<li class="ck-disagreement<%= " ck-disagreement--remembered" if already %><%= " ck-disagreement--stale" unless on_current %>">
|
|
180
178
|
<div class="ck-disagreement__head">
|
|
181
179
|
<div class="ck-disagreement__scores">
|
|
182
|
-
<% if
|
|
183
|
-
<span class="ck-source-chip <%= on_current ? "ck-source-chip--current" : "ck-source-chip--past" %>" title="<%= on_current ? "Verdict on the live judge version." : "Verdict on a superseded judge version." %>"><%=
|
|
180
|
+
<% if cal_metric_version && mixed_versions %>
|
|
181
|
+
<span class="ck-source-chip <%= on_current ? "ck-source-chip--current" : "ck-source-chip--past" %>" title="<%= on_current ? "Verdict on the live judge version." : "Verdict on a superseded judge version." %>"><%= cal_metric_version.version_label %></span>
|
|
184
182
|
<% end %>
|
|
185
183
|
<span class="ck-disagreement__scores-label">Judge</span>
|
|
186
184
|
<% if review&.ai_score %>
|
|
@@ -98,10 +98,15 @@
|
|
|
98
98
|
|
|
99
99
|
<div class="ck-review-list">
|
|
100
100
|
<% @reviews.each do |review| %>
|
|
101
|
-
|
|
101
|
+
<% review_version = review.metric_version %>
|
|
102
|
+
<% stale = review.stale_against_current_judge? %>
|
|
103
|
+
<div class="ck-review-card<%= " ck-review-card--stale" if stale %>" id="<%= review.metric&.name&.parameterize || "review-#{review.id}" %>">
|
|
102
104
|
<div class="ck-review-card__header">
|
|
103
105
|
<span class="ck-review-card__metric"><% if review.metric %><%= link_to review.metric_name, metric_path(review.metric), class: "ck-link" %><% else %><%= review.metric_name %><% end %></span>
|
|
104
106
|
<div class="ck-inline">
|
|
107
|
+
<% if review_version %>
|
|
108
|
+
<span class="ck-source-chip <%= stale ? "ck-source-chip--past" : "ck-source-chip--current" %>" title="<%= stale ? "Score produced by #{review_version.version_label} of this metric. The live judge has changed since." : "Score produced by the live judge (#{review_version.version_label})." %>"><%= review_version.version_label %></span>
|
|
109
|
+
<% end %>
|
|
105
110
|
<% if review.ai_score %>
|
|
106
111
|
<% 5.times do |i| %>
|
|
107
112
|
<svg viewBox="0 0 24 24" width="16" height="16" stroke-width="1.75" class="ck-star <%= i < review.ai_score.to_i ? "ck-star--filled" : "ck-star--empty" %>"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"/></svg>
|
|
@@ -111,6 +116,9 @@
|
|
|
111
116
|
<% end %>
|
|
112
117
|
</div>
|
|
113
118
|
</div>
|
|
119
|
+
<% if stale %>
|
|
120
|
+
<p class="ck-review-card__stale-note">Scored against a superseded version of this metric. The live judge may score this differently.</p>
|
|
121
|
+
<% end %>
|
|
114
122
|
<% if review.ai_feedback.present? %>
|
|
115
123
|
<p class="ck-review-card__feedback"><%= review.ai_feedback %></p>
|
|
116
124
|
<% end %>
|
|
@@ -18,6 +18,29 @@
|
|
|
18
18
|
<% dataset_preview_lines = dataset_lines.first(50) %>
|
|
19
19
|
<% end %>
|
|
20
20
|
|
|
21
|
+
<% if CompletionKit.config.judge_calibration_enabled %>
|
|
22
|
+
<% stale_summary = @run.stale_review_summary %>
|
|
23
|
+
<% if stale_summary.any? %>
|
|
24
|
+
<div class="ck-stale-versions-banner" role="status">
|
|
25
|
+
<div class="ck-stale-versions-banner__body">
|
|
26
|
+
<p class="ck-kicker">Stale judge versions</p>
|
|
27
|
+
<p class="ck-meta-copy">
|
|
28
|
+
This run was scored against metric versions that are no longer live.
|
|
29
|
+
<% stale_summary.values.each_with_index do |s, i| %>
|
|
30
|
+
<%= ", " if i > 0 %><strong><%= s[:metric_name] %></strong> (scored by <%= s[:scored_labels].join(", ") %>; live is <%= s[:current_label] %>)<% end %>.
|
|
31
|
+
Re-run to refresh the scores with the current judge.
|
|
32
|
+
</p>
|
|
33
|
+
</div>
|
|
34
|
+
<% if @run.status == "completed" %>
|
|
35
|
+
<%= button_to "Re-run with current judge",
|
|
36
|
+
rerun_run_path(@run), method: :post,
|
|
37
|
+
class: ck_button_classes(:dark), form_class: "inline-block",
|
|
38
|
+
data: { turbo_confirm: "Create a new run with the current metric versions? The original run stays as a record." } %>
|
|
39
|
+
<% end %>
|
|
40
|
+
</div>
|
|
41
|
+
<% end %>
|
|
42
|
+
<% end %>
|
|
43
|
+
|
|
21
44
|
<div class="ck-run-config">
|
|
22
45
|
<div class="ck-run-config__row">
|
|
23
46
|
<span class="ck-run-config__key">Created</span>
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
class RenameJudgeVersionToMetricVersion < ActiveRecord::Migration[8.1]
|
|
2
|
+
def change
|
|
3
|
+
rename_table :completion_kit_judge_versions, :completion_kit_metric_versions
|
|
4
|
+
rename_column :completion_kit_calibrations, :judge_version_id, :metric_version_id
|
|
5
|
+
|
|
6
|
+
rename_index :completion_kit_metric_versions,
|
|
7
|
+
"index_ck_judge_versions_on_metric_id",
|
|
8
|
+
"index_ck_metric_versions_on_metric_id"
|
|
9
|
+
rename_index :completion_kit_metric_versions,
|
|
10
|
+
"index_ck_judge_versions_on_metric_current",
|
|
11
|
+
"index_ck_metric_versions_on_metric_current"
|
|
12
|
+
rename_index :completion_kit_metric_versions,
|
|
13
|
+
"index_ck_judge_versions_on_metric_state",
|
|
14
|
+
"index_ck_metric_versions_on_metric_state"
|
|
15
|
+
rename_index :completion_kit_metric_versions,
|
|
16
|
+
"index_ck_judge_versions_on_metric_version",
|
|
17
|
+
"index_ck_metric_versions_on_metric_vnum"
|
|
18
|
+
rename_index :completion_kit_calibrations,
|
|
19
|
+
"index_ck_calibrations_on_judge_version_id",
|
|
20
|
+
"index_ck_calibrations_on_metric_version_id"
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
class AddMetricVersionToReviews < ActiveRecord::Migration[8.1]
|
|
2
|
+
def change
|
|
3
|
+
add_column :completion_kit_reviews, :metric_version_id, :bigint
|
|
4
|
+
add_index :completion_kit_reviews, :metric_version_id, name: "index_ck_reviews_on_metric_version_id"
|
|
5
|
+
|
|
6
|
+
reversible do |dir|
|
|
7
|
+
dir.up do
|
|
8
|
+
execute <<~SQL
|
|
9
|
+
UPDATE completion_kit_reviews
|
|
10
|
+
SET metric_version_id = (
|
|
11
|
+
SELECT id FROM completion_kit_metric_versions mv
|
|
12
|
+
WHERE mv.metric_id = completion_kit_reviews.metric_id
|
|
13
|
+
AND mv.current = #{ActiveRecord::Base.connection.quote(true)}
|
|
14
|
+
LIMIT 1
|
|
15
|
+
)
|
|
16
|
+
WHERE metric_id IS NOT NULL
|
|
17
|
+
SQL
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: completion-kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.44
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Damien Bastin
|
|
@@ -272,11 +272,11 @@ files:
|
|
|
272
272
|
- app/models/completion_kit/calibration.rb
|
|
273
273
|
- app/models/completion_kit/dashboard_dismissal.rb
|
|
274
274
|
- app/models/completion_kit/dataset.rb
|
|
275
|
-
- app/models/completion_kit/judge_version.rb
|
|
276
275
|
- app/models/completion_kit/mcp_session.rb
|
|
277
276
|
- app/models/completion_kit/metric.rb
|
|
278
277
|
- app/models/completion_kit/metric_group.rb
|
|
279
278
|
- app/models/completion_kit/metric_group_membership.rb
|
|
279
|
+
- app/models/completion_kit/metric_version.rb
|
|
280
280
|
- app/models/completion_kit/model.rb
|
|
281
281
|
- app/models/completion_kit/prompt.rb
|
|
282
282
|
- app/models/completion_kit/provider_credential.rb
|
|
@@ -295,7 +295,6 @@ files:
|
|
|
295
295
|
- app/services/completion_kit/csv_processor.rb
|
|
296
296
|
- app/services/completion_kit/dashboard_stats.rb
|
|
297
297
|
- app/services/completion_kit/judge_service.rb
|
|
298
|
-
- app/services/completion_kit/judge_variant_generator.rb
|
|
299
298
|
- app/services/completion_kit/llm_client.rb
|
|
300
299
|
- app/services/completion_kit/mcp_dispatcher.rb
|
|
301
300
|
- app/services/completion_kit/mcp_tools/base.rb
|
|
@@ -310,6 +309,7 @@ files:
|
|
|
310
309
|
- app/services/completion_kit/mcp_tools/runs.rb
|
|
311
310
|
- app/services/completion_kit/mcp_tools/tags.rb
|
|
312
311
|
- app/services/completion_kit/metric_calibration_stats.rb
|
|
312
|
+
- app/services/completion_kit/metric_variant_generator.rb
|
|
313
313
|
- app/services/completion_kit/model_discovery_service.rb
|
|
314
314
|
- app/services/completion_kit/ollama_client.rb
|
|
315
315
|
- app/services/completion_kit/onboarding/checklist.rb
|
|
@@ -422,6 +422,8 @@ files:
|
|
|
422
422
|
- db/migrate/20260523000002_add_state_to_completion_kit_judge_versions.rb
|
|
423
423
|
- db/migrate/20260524000001_create_completion_kit_starter_metric_dismissals.rb
|
|
424
424
|
- db/migrate/20260525000001_add_version_number_and_published_at_to_judge_versions.rb
|
|
425
|
+
- db/migrate/20260528000001_rename_judge_version_to_metric_version.rb
|
|
426
|
+
- db/migrate/20260528000002_add_metric_version_to_reviews.rb
|
|
425
427
|
- lib/completion-kit.rb
|
|
426
428
|
- lib/completion_kit.rb
|
|
427
429
|
- lib/completion_kit/concurrency_check.rb
|