completion-kit 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/stylesheets/completion_kit/application.css +26 -4
  3. data/app/controllers/completion_kit/api/v1/base_controller.rb +36 -4
  4. data/app/controllers/completion_kit/api/v1/calibrations_controller.rb +24 -5
  5. data/app/controllers/completion_kit/api/v1/datasets_controller.rb +5 -3
  6. data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +5 -3
  7. data/app/controllers/completion_kit/api/v1/metric_versions_controller.rb +51 -0
  8. data/app/controllers/completion_kit/api/v1/metrics_controller.rb +51 -4
  9. data/app/controllers/completion_kit/api/v1/prompts_controller.rb +5 -3
  10. data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +3 -3
  11. data/app/controllers/completion_kit/api/v1/responses_controller.rb +3 -1
  12. data/app/controllers/completion_kit/api/v1/runs_controller.rb +79 -6
  13. data/app/controllers/completion_kit/api/v1/tags_controller.rb +3 -3
  14. data/app/controllers/completion_kit/metrics_controller.rb +3 -3
  15. data/app/controllers/completion_kit/runs_controller.rb +1 -1
  16. data/app/helpers/completion_kit/application_helper.rb +0 -14
  17. data/app/jobs/completion_kit/generate_row_job.rb +5 -12
  18. data/app/jobs/completion_kit/judge_review_job.rb +10 -16
  19. data/app/models/completion_kit/metric.rb +1 -0
  20. data/app/models/completion_kit/metric_version.rb +16 -0
  21. data/app/models/completion_kit/response.rb +13 -17
  22. data/app/models/completion_kit/review.rb +18 -22
  23. data/app/models/completion_kit/run.rb +27 -24
  24. data/app/models/concerns/completion_kit/has_job_status.rb +31 -0
  25. data/app/services/completion_kit/mcp_dispatcher.rb +3 -1
  26. data/app/services/completion_kit/mcp_tools/metric_versions.rb +67 -0
  27. data/app/services/completion_kit/starter_metrics.rb +5 -5
  28. data/app/views/completion_kit/api_reference/_body.html.erb +91 -6
  29. data/app/views/completion_kit/api_reference/index.html.erb +12 -0
  30. data/app/views/completion_kit/metrics/index.html.erb +3 -3
  31. data/app/views/completion_kit/metrics/show.html.erb +1 -0
  32. data/config/routes.rb +16 -1
  33. data/lib/completion_kit/version.rb +1 -1
  34. metadata +4 -1
@@ -31,8 +31,7 @@ module CompletionKit
31
31
  before_perform do |job|
32
32
  response = Response.find_by(id: job.arguments.last)
33
33
  next unless response
34
- response.update_columns(status: "retrying", attempts: response.attempts + 1)
35
- response.run.send(:broadcast_response_update, response) if response.run
34
+ response.update!(status: "retrying", attempts: response.attempts + 1)
36
35
  end
37
36
 
38
37
  def perform(run_id, response_id)
@@ -61,12 +60,10 @@ module CompletionKit
61
60
  response_text: text,
62
61
  error_provider: nil, error_class: nil, error_status: nil, error_message: nil
63
62
  )
64
- run.send(:broadcast_response_update, response)
65
- run.send(:broadcast_progress)
66
63
 
67
64
  if run.judge_configured?
68
65
  run.metrics.each do |metric|
69
- JudgeReviewJob.perform_later(response.id, metric.id)
66
+ JudgeReviewJob.perform_later(response.id, metric.id, run.id)
70
67
  end
71
68
  end
72
69
 
@@ -83,19 +80,16 @@ module CompletionKit
83
80
  end
84
81
 
85
82
  def record_terminal_failure!(error)
86
- response_id = @response_id || arguments.last
87
- response = Response.find_by(id: response_id)
83
+ response = Response.find_by(id: @response_id)
88
84
  return unless response
89
85
 
90
- response.update_columns(
86
+ response.update!(
91
87
  status: "failed",
92
88
  error_provider: provider_for(response),
93
89
  error_class: error.class.name,
94
90
  error_status: error.respond_to?(:status) ? error.status : nil,
95
91
  error_message: error.message.to_s.truncate(2000)
96
92
  )
97
- response.run&.send(:broadcast_response_update, response)
98
- response.run&.send(:broadcast_progress)
99
93
  end
100
94
 
101
95
  def provider_for(response)
@@ -103,8 +97,7 @@ module CompletionKit
103
97
  end
104
98
 
105
99
  def enqueue_completion_check
106
- run_id = @run_id || arguments.first
107
- RunCompletionCheckJob.perform_later(run_id)
100
+ RunCompletionCheckJob.perform_later(@run_id)
108
101
  end
109
102
  end
110
103
  end
@@ -5,7 +5,9 @@ module CompletionKit
5
5
  queue_as :llm
6
6
 
7
7
  limits_concurrency to: ENV.fetch("COMPLETION_KIT_PER_RUN_CONCURRENCY", 5).to_i,
8
- key: ->(response_id, _) { "run:#{Response.find_by(id: response_id)&.run_id}" },
8
+ key: ->(response_id, _metric_id, run_id = nil) {
9
+ "run:#{run_id || Response.where(id: response_id).pick(:run_id)}"
10
+ },
9
11
  duration: 10.minutes
10
12
 
11
13
  def self.rate_limit_wait(executions)
@@ -29,7 +31,7 @@ module CompletionKit
29
31
  end
30
32
 
31
33
  before_perform do |job|
32
- response_id, metric_id = job.arguments
34
+ response_id, metric_id, _run_id = job.arguments
33
35
  response = Response.find_by(id: response_id)
34
36
  next unless response
35
37
  review = response.reviews.find_or_initialize_by(metric_id: metric_id)
@@ -37,10 +39,9 @@ module CompletionKit
37
39
  review.attempts = (review.attempts || 0) + 1
38
40
  review.status = "retrying"
39
41
  review.save!(validate: false)
40
- response.run.send(:broadcast_response_update, response) if response.run
41
42
  end
42
43
 
43
- def perform(response_id, metric_id)
44
+ def perform(response_id, metric_id, _run_id = nil)
44
45
  @response_id = response_id
45
46
  @metric_id = metric_id
46
47
 
@@ -75,8 +76,6 @@ module CompletionKit
75
76
  review.save!
76
77
 
77
78
  confirm_judging_capability(run.judge_model)
78
- run.send(:broadcast_response_update, response)
79
- run.send(:broadcast_progress)
80
79
  enqueue_completion_check
81
80
  end
82
81
 
@@ -92,14 +91,12 @@ module CompletionKit
92
91
  end
93
92
 
94
93
  def record_terminal_failure!(error)
95
- response_id = @response_id || arguments.first
96
- metric_id = @metric_id || arguments.last
97
- response = Response.find_by(id: response_id)
94
+ response = Response.find_by(id: @response_id)
98
95
  return unless response
99
96
 
100
- review = response.reviews.find_or_initialize_by(metric_id: metric_id)
97
+ review = response.reviews.find_or_initialize_by(metric_id: @metric_id)
101
98
  review.assign_attributes(
102
- metric_name: review.metric_name || Metric.find_by(id: metric_id)&.name || "(deleted metric)",
99
+ metric_name: review.metric_name || Metric.find_by(id: @metric_id)&.name || "(deleted metric)",
103
100
  status: "failed",
104
101
  error_provider: provider_for(response),
105
102
  error_class: error.class.name,
@@ -107,19 +104,16 @@ module CompletionKit
107
104
  error_message: error.message.to_s.truncate(2000)
108
105
  )
109
106
  review.save!(validate: false)
110
- response.run&.send(:broadcast_response_update, response)
111
- response.run&.send(:broadcast_progress)
112
107
  end
113
108
 
114
109
  def provider_for(response)
115
110
  run = response.run
116
- return nil unless run&.judge_model
111
+ return nil unless run.judge_model
117
112
  ApiConfig.provider_for_model(run.judge_model)
118
113
  end
119
114
 
120
115
  def enqueue_completion_check
121
- response_id = @response_id || arguments.first
122
- response = Response.find_by(id: response_id)
116
+ response = Response.find_by(id: @response_id)
123
117
  RunCompletionCheckJob.perform_later(response.run_id) if response
124
118
  end
125
119
 
@@ -12,6 +12,7 @@ module CompletionKit
12
12
 
13
13
  has_many :metric_group_memberships, dependent: :destroy
14
14
  has_many :metric_groups, through: :metric_group_memberships, source: :metric_group
15
+ has_many :metric_versions, dependent: :destroy
15
16
  has_many :reviews, dependent: :nullify
16
17
  has_many :dashboard_dismissals, as: :dismissable, dependent: :destroy
17
18
 
@@ -53,6 +53,22 @@ module CompletionKit
53
53
  self
54
54
  end
55
55
 
56
+ def revert!
57
+ raise ArgumentError, "only a published version can be reverted to" unless published?
58
+ audit = nil
59
+ MetricVersion.transaction do
60
+ audit = self.class.create!(
61
+ metric: metric,
62
+ instruction: instruction,
63
+ rubric_bands: rubric_bands,
64
+ state: "draft",
65
+ source: "revert"
66
+ )
67
+ audit.publish!
68
+ end
69
+ audit
70
+ end
71
+
56
72
  def as_json(options = {})
57
73
  {
58
74
  id: id,
@@ -1,7 +1,6 @@
1
1
  module CompletionKit
2
2
  class Response < ApplicationRecord
3
- STATUSES = %w[pending retrying succeeded failed].freeze
4
- TERMINAL_STATUSES = %w[succeeded failed].freeze
3
+ include HasJobStatus
5
4
 
6
5
  belongs_to :run
7
6
  has_many :reviews, dependent: :destroy
@@ -10,17 +9,11 @@ module CompletionKit
10
9
  delegate :prompt, to: :run
11
10
 
12
11
  validates :response_text, presence: true, if: :succeeded?
13
- validates :status, inclusion: { in: STATUSES }
14
12
 
15
13
  before_validation :set_default_status, on: :create
16
14
 
17
- def terminal?
18
- TERMINAL_STATUSES.include?(status)
19
- end
20
-
21
- def succeeded?
22
- status == "succeeded"
23
- end
15
+ after_save_commit :broadcast_row_update, unless: :destroyed?
16
+ after_save_commit :broadcast_run_progress, if: :should_broadcast_progress?
24
17
 
25
18
  def as_json(options = {})
26
19
  {
@@ -47,19 +40,22 @@ module CompletionKit
47
40
  def fully_reviewed?
48
41
  metric_ids = run.metric_ids
49
42
  return true if metric_ids.empty?
50
- reviewed_metric_ids = reviews.where(status: Review::TERMINAL_STATUSES).pluck(:metric_id).uniq
43
+ reviewed_metric_ids = reviews.where(status: HasJobStatus::TERMINAL_STATUSES).pluck(:metric_id).uniq
51
44
  (metric_ids - reviewed_metric_ids).empty?
52
45
  end
53
46
 
54
- def error_payload
55
- return nil if error_class.blank?
56
- { provider: error_provider, class: error_class, status: error_status, message: error_message }
47
+ private
48
+
49
+ def broadcast_row_update
50
+ run.broadcast_response_update(self)
57
51
  end
58
52
 
59
- private
53
+ def broadcast_run_progress
54
+ run.broadcast_progress
55
+ end
60
56
 
61
- def set_default_status
62
- self.status ||= "pending"
57
+ def should_broadcast_progress?
58
+ saved_change_to_status? && terminal?
63
59
  end
64
60
  end
65
61
  end
@@ -1,37 +1,25 @@
1
1
  module CompletionKit
2
2
  class Review < ApplicationRecord
3
- STATUSES = %w[pending retrying succeeded failed].freeze
4
- TERMINAL_STATUSES = %w[succeeded failed].freeze
3
+ include HasJobStatus
5
4
 
6
5
  belongs_to :response
7
6
  belongs_to :metric, optional: true
8
7
  belongs_to :metric_version, optional: true
9
8
  has_many :dashboard_dismissals, as: :dismissable, dependent: :destroy
10
9
 
11
- def stale_against_current_judge?
12
- return false unless metric_id && metric_version_id
13
- current_id = MetricVersion.current.where(metric_id: metric_id).limit(1).pick(:id)
14
- return false if current_id.nil?
15
- metric_version_id != current_id
16
- end
17
-
18
10
  validates :metric_name, presence: true
19
- validates :status, inclusion: { in: STATUSES }
20
11
  validates :ai_score, numericality: { greater_than_or_equal_to: 1, less_than_or_equal_to: 5 }, allow_nil: true
21
12
 
22
13
  before_validation :set_default_status
23
14
 
24
- def terminal?
25
- TERMINAL_STATUSES.include?(status)
26
- end
27
-
28
- def succeeded?
29
- status == "succeeded"
30
- end
15
+ after_save_commit :broadcast_parent_row_update, unless: :destroyed?
16
+ after_save_commit :broadcast_run_progress, if: :should_broadcast_progress?
31
17
 
32
- def error_payload
33
- return nil if error_class.blank?
34
- { provider: error_provider, class: error_class, status: error_status, message: error_message }
18
+ def stale_against_current_judge?
19
+ return false unless metric_id && metric_version_id
20
+ current_id = MetricVersion.current.where(metric_id: metric_id).limit(1).pick(:id)
21
+ return false if current_id.nil?
22
+ metric_version_id != current_id
35
23
  end
36
24
 
37
25
  def as_json(options = {})
@@ -46,8 +34,16 @@ module CompletionKit
46
34
 
47
35
  private
48
36
 
49
- def set_default_status
50
- self.status ||= "pending"
37
+ def broadcast_parent_row_update
38
+ response.run.broadcast_response_update(response)
39
+ end
40
+
41
+ def broadcast_run_progress
42
+ response.run.broadcast_progress
43
+ end
44
+
45
+ def should_broadcast_progress?
46
+ saved_change_to_status? && terminal?
51
47
  end
52
48
  end
53
49
  end
@@ -43,7 +43,7 @@ module CompletionKit
43
43
  end
44
44
 
45
45
  def outstanding_work_zero?
46
- return false if responses.where.not(status: Response::TERMINAL_STATUSES).exists?
46
+ return false if responses.where.not(status: HasJobStatus::TERMINAL_STATUSES).exists?
47
47
 
48
48
  metric_ids = metrics.pluck(:id)
49
49
  return true if metric_ids.empty?
@@ -55,7 +55,7 @@ module CompletionKit
55
55
  terminal_review_count = Review.where(
56
56
  response_id: succeeded_response_ids,
57
57
  metric_id: metric_ids,
58
- status: Review::TERMINAL_STATUSES
58
+ status: HasJobStatus::TERMINAL_STATUSES
59
59
  ).count
60
60
 
61
61
  terminal_review_count >= expected_reviews
@@ -118,6 +118,10 @@ module CompletionKit
118
118
  end
119
119
 
120
120
  def start!
121
+ unless %w[pending failed].include?(status)
122
+ return fail_with_summary!("Cannot start a run in state \"#{status}\". Use rerun to create a fresh copy, or retry_failures / regrade to work with the existing responses.")
123
+ end
124
+
121
125
  rows = if dataset
122
126
  CsvProcessor.process_self(self)
123
127
  else
@@ -161,7 +165,7 @@ module CompletionKit
161
165
  response = responses.create!(attrs)
162
166
 
163
167
  if judge_only?
164
- metrics.each { |m| JudgeReviewJob.perform_later(response.id, m.id) } if judge_configured?
168
+ metrics.each { |m| JudgeReviewJob.perform_later(response.id, m.id, id) } if judge_configured?
165
169
  else
166
170
  GenerateRowJob.perform_later(id, response.id)
167
171
  end
@@ -202,7 +206,7 @@ module CompletionKit
202
206
  update!(status: "running", failure_summary: nil, error_message: nil)
203
207
 
204
208
  response_ids.each do |rid|
205
- grading_metrics.each { |m| JudgeReviewJob.perform_later(rid, m.id) }
209
+ grading_metrics.each { |m| JudgeReviewJob.perform_later(rid, m.id, id) }
206
210
  end
207
211
  RunCompletionCheckJob.perform_later(id)
208
212
  end
@@ -272,17 +276,6 @@ module CompletionKit
272
276
  }
273
277
  end
274
278
 
275
- private
276
-
277
- def fail_with_summary!(message)
278
- errors.add(:base, message)
279
- if persisted?
280
- update_columns(status: "failed", failure_summary: message, error_message: message)
281
- broadcast_ui
282
- end
283
- false
284
- end
285
-
286
279
  def broadcast_ui
287
280
  broadcast_progress
288
281
  broadcast_status_header
@@ -290,14 +283,6 @@ module CompletionKit
290
283
  broadcast_sort_toolbar
291
284
  end
292
285
 
293
- def render_engine_partial(partial, locals)
294
- CompletionKit::Engine.warm_routes!
295
- CompletionKit::ApplicationController.render(
296
- partial: partial,
297
- locals: locals
298
- )
299
- end
300
-
301
286
  def broadcast_progress
302
287
  reload
303
288
  broadcast_replace_to(
@@ -305,7 +290,6 @@ module CompletionKit
305
290
  target: "run_status_panel",
306
291
  html: render_engine_partial("completion_kit/runs/status_panel", run: self)
307
292
  )
308
- broadcast_status_header
309
293
  end
310
294
 
311
295
  def broadcast_status_header
@@ -356,6 +340,25 @@ module CompletionKit
356
340
  )
357
341
  end
358
342
 
343
+ private
344
+
345
+ def fail_with_summary!(message)
346
+ errors.add(:base, message)
347
+ if persisted?
348
+ update_columns(status: "failed", failure_summary: message, error_message: message)
349
+ broadcast_ui
350
+ end
351
+ false
352
+ end
353
+
354
+ def render_engine_partial(partial, locals)
355
+ CompletionKit::Engine.warm_routes!
356
+ CompletionKit::ApplicationController.render(
357
+ partial: partial,
358
+ locals: locals
359
+ )
360
+ end
361
+
359
362
  def set_default_status
360
363
  self.status ||= "pending"
361
364
  end
@@ -0,0 +1,31 @@
1
+ module CompletionKit
2
+ module HasJobStatus
3
+ extend ActiveSupport::Concern
4
+
5
+ STATUSES = %w[pending retrying succeeded failed].freeze
6
+ TERMINAL_STATUSES = %w[succeeded failed].freeze
7
+
8
+ included do
9
+ validates :status, inclusion: { in: STATUSES }
10
+ end
11
+
12
+ def terminal?
13
+ TERMINAL_STATUSES.include?(status)
14
+ end
15
+
16
+ def succeeded?
17
+ status == "succeeded"
18
+ end
19
+
20
+ def error_payload
21
+ return nil if error_class.blank?
22
+ { provider: error_provider, class: error_class, status: error_status, message: error_message }
23
+ end
24
+
25
+ private
26
+
27
+ def set_default_status
28
+ self.status ||= "pending"
29
+ end
30
+ end
31
+ end
@@ -32,6 +32,7 @@ module CompletionKit
32
32
  McpTools::Datasets.definitions +
33
33
  McpTools::Metrics.definitions +
34
34
  McpTools::MetricGroups.definitions +
35
+ McpTools::MetricVersions.definitions +
35
36
  McpTools::ProviderCredentials.definitions +
36
37
  McpTools::Tags.definitions +
37
38
  McpTools::Calibrations.definitions +
@@ -44,8 +45,9 @@ module CompletionKit
44
45
  when /\Aruns_/ then McpTools::Runs.call(name, arguments)
45
46
  when /\Aresponses_/ then McpTools::Responses.call(name, arguments)
46
47
  when /\Adatasets_/ then McpTools::Datasets.call(name, arguments)
47
- when /\Ametrics_/ then McpTools::Metrics.call(name, arguments)
48
+ when /\Ametric_versions_/ then McpTools::MetricVersions.call(name, arguments)
48
49
  when /\Ametric_groups_/ then McpTools::MetricGroups.call(name, arguments)
50
+ when /\Ametrics_/ then McpTools::Metrics.call(name, arguments)
49
51
  when /\Aprovider_credentials_/ then McpTools::ProviderCredentials.call(name, arguments)
50
52
  when /\Atags_/ then McpTools::Tags.call(name, arguments)
51
53
  when /\Acalibrations_/ then McpTools::Calibrations.call(name, arguments)
@@ -0,0 +1,67 @@
1
+ module CompletionKit
2
+ module McpTools
3
+ module MetricVersions
4
+ extend Base
5
+
6
+ TOOLS = {
7
+ "metric_versions_list" => {
8
+ description: "List every MetricVersion (drafts + published) for a metric, newest first. Each row carries version_number, state, source, current flag, and timestamps.",
9
+ inputSchema: {
10
+ type: "object",
11
+ properties: {
12
+ metric_id: { type: "integer" }
13
+ },
14
+ required: ["metric_id"]
15
+ },
16
+ handler: :list
17
+ },
18
+ "metric_versions_publish" => {
19
+ description: "Publish a MetricVersion as the live judge for its metric. Works for both 'draft → published' and 'revert to an older published version → current'. Transactionally flips current, demotes peers, and writes the version's instruction + rubric_bands back onto the metric so the judge actually uses them.",
20
+ inputSchema: {
21
+ type: "object",
22
+ properties: {
23
+ metric_version_id: { type: "integer" }
24
+ },
25
+ required: ["metric_version_id"]
26
+ },
27
+ handler: :publish
28
+ },
29
+ "metric_versions_dismiss" => {
30
+ description: "Destroy a draft MetricVersion (use for either source: 'edit' or source: 'suggestion'). Published versions are refused — to demote a published version, publish a different one as current instead.",
31
+ inputSchema: {
32
+ type: "object",
33
+ properties: {
34
+ metric_version_id: { type: "integer" }
35
+ },
36
+ required: ["metric_version_id"]
37
+ },
38
+ handler: :dismiss
39
+ }
40
+ }.freeze
41
+
42
+ def self.list(args)
43
+ metric = CompletionKit::Metric.find(args["metric_id"])
44
+ versions = CompletionKit::MetricVersion.where(metric_id: metric.id).order(version_number: :desc)
45
+ text_result(versions.map(&:as_json))
46
+ end
47
+
48
+ def self.publish(args)
49
+ version = CompletionKit::MetricVersion.find(args["metric_version_id"])
50
+ if version.published? && !version.current?
51
+ audit = version.revert!
52
+ text_result(audit.as_json)
53
+ else
54
+ version.publish!
55
+ text_result(version.reload.as_json)
56
+ end
57
+ end
58
+
59
+ def self.dismiss(args)
60
+ version = CompletionKit::MetricVersion.find(args["metric_version_id"])
61
+ return error_result("Cannot dismiss a published version. Publish a different version as current instead.") if version.published?
62
+ version.destroy!
63
+ text_result({id: version.id, destroyed: true})
64
+ end
65
+ end
66
+ end
67
+ end
@@ -21,8 +21,8 @@ module CompletionKit
21
21
  key: "instruction_following",
22
22
  name: "Instruction following",
23
23
  description: "Did the model do everything that was asked?",
24
- catches: "The response is factually right but ignores \"answer in two sentences\", \"use bullet points\", \"do not include X\". Distinct from Correctness a response can be right and still fail this.",
25
- instruction: "Did the model do every concrete thing the prompt asked for? Score against the explicit requirements in the prompt (format constraints, count limits, exclusions, audience cues). Factual accuracy is a different dimension score that elsewhere.",
24
+ catches: "The response is factually right but ignores \"answer in two sentences\", \"use bullet points\", \"do not include X\". Distinct from Correctness; a response can be right and still fail this.",
25
+ instruction: "Did the model do every concrete thing the prompt asked for? Score against the explicit requirements in the prompt (format constraints, count limits, exclusions, audience cues). Factual accuracy is a different dimension; score that elsewhere.",
26
26
  rubric_bands: [
27
27
  { "stars" => 5, "description" => "Followed every requirement in the prompt exactly." },
28
28
  { "stars" => 4, "description" => "Followed every requirement with a small slip." },
@@ -36,7 +36,7 @@ module CompletionKit
36
36
  name: "Format compliance",
37
37
  description: "Does the output follow the required structure?",
38
38
  catches: "Invalid JSON, missing schema fields, extra prose around a structured response, wrong casing on keys. Critical for any LLM wired into an API.",
39
- instruction: "Does the output match the format the prompt asked for JSON shape, schema, keys, casing, no stray prose? Score on whether a downstream parser would accept it without massaging.",
39
+ instruction: "Does the output match the format the prompt asked for: JSON shape, schema, keys, casing, no stray prose? Score on whether a downstream parser would accept it without massaging.",
40
40
  rubric_bands: [
41
41
  { "stars" => 5, "description" => "Exact spec, ready to consume programmatically." },
42
42
  { "stars" => 4, "description" => "Spec-compliant with one cosmetic issue." },
@@ -62,9 +62,9 @@ module CompletionKit
62
62
  Starter.new(
63
63
  key: "conciseness",
64
64
  name: "Conciseness",
65
- description: "Is it the right length no padding, no missing detail?",
65
+ description: "Is it the right length, no padding, no missing detail?",
66
66
  catches: "Rambling responses, repetitive caveats, over-hedging. LLMs default to verbose. Conciseness is the dimension where users most often see scores move after tuning.",
67
- instruction: "Is the output the right length for the task no padding, no missing detail, no hedging filler? Penalise rambling, repetition, over-caveating, and unnecessary preamble. Penalise too-short outputs that drop information.",
67
+ instruction: "Is the output the right length for the task: no padding, no missing detail, no hedging filler? Penalise rambling, repetition, over-caveating, and unnecessary preamble. Penalise too-short outputs that drop information.",
68
68
  rubric_bands: [
69
69
  { "stars" => 5, "description" => "Exactly as long as the task needs, no more, no less." },
70
70
  { "stars" => 4, "description" => "Right length with a small redundancy." },