RubyGems - completion-kit - Versions diffs - 0.17.1 → 0.18.0 - Mend

completion-kit 0.17.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of completion-kit might be problematic. Click here for more details.

Files changed (48) hide show

data/app/models/completion_kit/run.rb CHANGED Viewed

@@ -51,10 +51,16 @@ module CompletionKit
       broadcast_ui
     end
+    def gradable_metric_ids
+      ids = check_metrics.pluck(:id)
+      ids += llm_metrics.pluck(:id) if llm_judge_configured?
+      ids
+    end
     def outstanding_work_zero?
       return false if responses.where.not(status: HasJobStatus::TERMINAL_STATUSES).exists?
-      metric_ids = metrics.pluck(:id)
+      metric_ids = gradable_metric_ids
       return true if metric_ids.empty?
       succeeded_response_ids = responses.where(status: "succeeded").pluck(:id)
@@ -74,6 +80,31 @@ module CompletionKit
       judge_model.present? && metrics.any? && ApiConfig.valid_for_model?(judge_model)
     end
+    def llm_metrics
+      metrics.where(metric_type: "llm_judge")
+    end
+    def check_metrics
+      metrics.where(metric_type: "check")
+    end
+    def llm_judge_configured?
+      judge_model.present? && llm_metrics.any? && ApiConfig.valid_for_model?(judge_model)
+    end
+    def gradable?
+      llm_judge_configured? || check_metrics.any?
+    end
+    def judge_only_input_data_checks?
+      return false unless judge_only?
+      attached = run_metrics.filter_map(&:metric)
+      return false if attached.empty?
+      attached.all?(&:check?) && attached.all? { |m| m.check_config.to_h["target"] == "input_data" }
+    end
     def replace_metrics!(metric_ids)
       return unless metric_ids
       run_metrics.delete_all
@@ -91,13 +122,29 @@ module CompletionKit
     end
     def metric_averages
-      all_reviews = responses.flat_map(&:reviews).select { |r| r.ai_score.present? }
-      all_reviews.group_by(&:metric_name).map do |name, reviews|
-        scores = reviews.map { |r| r.ai_score.to_f }
-        { name: name, avg: (scores.sum / scores.length).round(1) }
+      responses.flat_map(&:reviews).group_by(&:metric_name).filter_map do |name, reviews|
+        scored = reviews.select { |r| r.ai_score.present? }
+        if scored.any?
+          scores = scored.map { |r| r.ai_score.to_f }
+          { name: name, avg: (scores.sum / scores.length).round(1) }
+        else
+          resolved = reviews.reject { |r| r.passed.nil? }
+          next if resolved.empty?
+          passed = resolved.count { |r| r.passed == true }
+          { name: name, kind: "check", pass_rate: (passed.to_f / resolved.length).round(2) }
+        end
       end
     end
+    def check_pass_rate
+      resolved = responses.flat_map(&:reviews).reject { |r| r.passed.nil? }
+      return nil if resolved.empty?
+      passed = resolved.count { |r| r.passed == true }
+      (passed.to_f / resolved.length).round(2)
+    end
     def stale_review_summary
       review_pairs = Review.where(response_id: response_ids)
                           .where.not(metric_id: nil)
@@ -141,7 +188,9 @@ module CompletionKit
       if judge_only?
         column = output_column.presence || "actual_output"
-        return fail_with_summary!("Dataset has no \"#{column}\" column") unless dataset && dataset.headers.include?(column)
+        unless judge_only_input_data_checks? || (dataset && dataset.headers.include?(column))
+          return fail_with_summary!("Dataset has no \"#{column}\" column")
+        end
       else
         client = LlmClient.for_model(prompt.llm_model, ApiConfig.for_model(prompt.llm_model))
         unless client.configured?
@@ -168,13 +217,15 @@ module CompletionKit
           }
           if judge_only?
             attrs[:status] = "succeeded"
-            attrs[:response_text] = row[output_column.presence || "actual_output"].to_s
+            column = output_column.presence || "actual_output"
+            attrs[:response_text] = row[column].to_s if dataset && dataset.headers.include?(column)
           end
           response = responses.create!(attrs)
           if judge_only?
-            metrics.each { |m| JudgeReviewJob.perform_later(response.id, m.id, id) } if judge_configured?
+            llm_metrics.each { |m| JudgeReviewJob.perform_later(response.id, m.id, id) } if llm_judge_configured?
+            check_metrics.each { |m| CheckReviewJob.perform_later(response.id, m.id, id) }
           else
             GenerateRowJob.perform_later(id, response.id)
           end
@@ -195,10 +246,10 @@ module CompletionKit
     end
     def regrade!
-      grading_metrics = metrics
-      return false if grading_metrics.empty? || !judge_configured?
+      return false if metrics.empty? || !gradable?
-      eligible_responses = responses.where(status: "succeeded").where.not(response_text: nil)
+      eligible_responses = responses.where(status: "succeeded")
+      eligible_responses = eligible_responses.where.not(response_text: nil) unless judge_only_input_data_checks?
       response_ids = eligible_responses.pluck(:id)
       return false if response_ids.empty?
@@ -208,6 +259,7 @@ module CompletionKit
           attempts: 0,
           metric_version_id: nil,
           ai_score: nil,
+          passed: nil,
           ai_feedback: nil,
           error_provider: nil,
           error_class: nil,
@@ -217,7 +269,8 @@ module CompletionKit
         update!(status: "running", failure_summary: nil, error_message: nil)
         response_ids.each do |rid|
-          grading_metrics.each { |m| JudgeReviewJob.perform_later(rid, m.id, id) }
+          llm_metrics.each { |m| JudgeReviewJob.perform_later(rid, m.id, id) } if llm_judge_configured?
+          check_metrics.each { |m| CheckReviewJob.perform_later(rid, m.id, id) }
         end
         RunCompletionCheckJob.perform_later(id)
       end
@@ -231,14 +284,14 @@ module CompletionKit
       generated_failed = responses.where(status: "failed").count
       generated_total = progress_total
-      metric_count = metrics.count
+      metric_ids = gradable_metric_ids
+      metric_count = metric_ids.size
       judged_total = metric_count > 0 ? generated_done : 0
       judged_done = 0
       judged_failed = 0
       if metric_count > 0 && judged_total > 0
         succeeded_response_ids = responses.where(status: "succeeded").pluck(:id)
-        metric_ids = metrics.pluck(:id)
         review_counts = Review
           .where(response_id: succeeded_response_ids, metric_id: metric_ids)
           .group(:response_id, :status)
@@ -273,6 +326,7 @@ module CompletionKit
         output_column: output_column,
         created_at: created_at, updated_at: updated_at,
         responses_count: responses.count, avg_score: avg_score,
+        check_pass_rate: check_pass_rate,
         progress_current: snap[:generated_done],
         progress_total: snap[:generated_total],
         progress: {
@@ -411,6 +465,8 @@ module CompletionKit
         return
       end
+      return if judge_only_input_data_checks?
       column = output_column.presence || "actual_output"
       unless dataset.headers.include?(column)
         errors.add(:output_column, "\"#{column}\" is not a column on dataset \"#{dataset.name}\"")

data/app/services/completion_kit/checks/contains.rb ADDED Viewed

@@ -0,0 +1,21 @@
+module CompletionKit
+  module Checks
+    class Contains
+      def call(target, config)
+        value = config["value"].to_s
+        haystack = target.to_s
+        present = if config["case_sensitive"] == true
+          haystack.include?(value)
+        else
+          haystack.downcase.include?(value.downcase)
+        end
+        if present
+          Result.new(passed: true, detail: "contains #{value.inspect}")
+        else
+          Result.new(passed: false, detail: "does not contain #{value.inspect}")
+        end
+      end
+    end
+  end
+end

data/app/services/completion_kit/checks/equals.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module CompletionKit
+  module Checks
+    class Equals
+      def call(target, config)
+        actual = target.to_s
+        expected = config["value"].to_s
+        if config["trim"] == true
+          actual = actual.strip
+          expected = expected.strip
+        end
+        match = if config["case_sensitive"] == true
+          actual == expected
+        else
+          actual.casecmp?(expected)
+        end
+        if match
+          Result.new(passed: true, detail: "equals #{expected.inspect}")
+        else
+          Result.new(passed: false, detail: "#{actual.inspect} != #{expected.inspect}")
+        end
+      end
+    end
+  end
+end

data/app/services/completion_kit/checks/json_path_equals.rb ADDED Viewed

@@ -0,0 +1,32 @@
+module CompletionKit
+  module Checks
+    class JsonPathEquals
+      MISSING = Object.new
+      def call(target, config)
+        parsed = JSON.parse(target.to_s)
+        value = dig(parsed, config["json_path"].to_s)
+        if value.equal?(MISSING)
+          Result.new(passed: false, detail: "path #{config["json_path"]} not found")
+        elsif value == config["expected"]
+          Result.new(passed: true, detail: "#{config["json_path"]} == #{config["expected"].inspect}")
+        else
+          Result.new(passed: false, detail: "#{config["json_path"]} was #{value.inspect}, expected #{config["expected"].inspect}")
+        end
+      rescue JSON::ParserError
+        Result.new(passed: false, detail: "not valid JSON")
+      end
+      private
+      def dig(data, path)
+        path.split(".").reduce(data) do |node, key|
+          return MISSING unless node.is_a?(Hash) && node.key?(key)
+          node[key]
+        end
+      end
+    end
+  end
+end

data/app/services/completion_kit/checks/length_bounds.rb ADDED Viewed

@@ -0,0 +1,19 @@
+module CompletionKit
+  module Checks
+    class LengthBounds
+      def call(target, config)
+        length = target.to_s.length
+        min = config["min"] && config["min"].to_i
+        max = config["max"] && config["max"].to_i
+        if min && length < min
+          Result.new(passed: false, detail: "length #{length} below min #{min}")
+        elsif max && length > max
+          Result.new(passed: false, detail: "length #{length} above max #{max}")
+        else
+          Result.new(passed: true, detail: "length #{length} within bounds")
+        end
+      end
+    end
+  end
+end

data/app/services/completion_kit/checks/no_refusal.rb ADDED Viewed

@@ -0,0 +1,23 @@
+module CompletionKit
+  module Checks
+    class NoRefusal
+      PATTERNS = [
+        /\bi'?m sorry\b/i,
+        /\bi can'?t (?:help|assist|comply|do that|provide)/i,
+        /\bi (?:cannot|can'?t) (?:help|assist|fulfill|comply|provide)/i,
+        /\bi'?m (?:unable|not able) to\b/i,
+        /\bi (?:won'?t|will not) (?:be able|help|assist)\b/i,
+        /\bas an ai\b/i
+      ].freeze
+      def call(target, _config)
+        text = target.to_s
+        if PATTERNS.any? { |pattern| pattern.match?(text) }
+          Result.new(passed: false, detail: "refusal detected")
+        else
+          Result.new(passed: true, detail: "no refusal detected")
+        end
+      end
+    end
+  end
+end

data/app/services/completion_kit/checks/not_contains.rb ADDED Viewed

@@ -0,0 +1,21 @@
+module CompletionKit
+  module Checks
+    class NotContains
+      def call(target, config)
+        value = config["value"].to_s
+        haystack = target.to_s
+        present = if config["case_sensitive"] == true
+          haystack.include?(value)
+        else
+          haystack.downcase.include?(value.downcase)
+        end
+        if present
+          Result.new(passed: false, detail: "contains #{value.inspect}")
+        else
+          Result.new(passed: true, detail: "does not contain #{value.inspect}")
+        end
+      end
+    end
+  end
+end

data/app/services/completion_kit/checks/regex.rb ADDED Viewed

@@ -0,0 +1,20 @@
+module CompletionKit
+  module Checks
+    class Regex
+      def call(target, config)
+        options = 0
+        options |= Regexp::IGNORECASE if config["case_sensitive"] == false
+        options |= Regexp::MULTILINE if config["multiline"] == true
+        pattern = Regexp.new(config["pattern"].to_s, options)
+        if pattern.match?(target.to_s)
+          Result.new(passed: true, detail: "matched /#{config["pattern"]}/")
+        else
+          Result.new(passed: false, detail: "no match for /#{config["pattern"]}/")
+        end
+      rescue RegexpError => e
+        Result.new(passed: false, detail: "invalid pattern: #{e.message}")
+      end
+    end
+  end
+end

data/app/services/completion_kit/checks/registry.rb ADDED Viewed

@@ -0,0 +1,41 @@
+module CompletionKit
+  module Checks
+    module Registry
+      CHECKS = {
+        "contains" => Contains,
+        "not_contains" => NotContains,
+        "equals" => Equals,
+        "regex" => Regex,
+        "valid_json" => ValidJson,
+        "json_path_equals" => JsonPathEquals,
+        "length_bounds" => LengthBounds,
+        "no_refusal" => NoRefusal
+      }.freeze
+      REQUIRED_KEYS = {
+        "contains" => %w[value],
+        "not_contains" => %w[value],
+        "equals" => %w[value],
+        "regex" => %w[pattern],
+        "valid_json" => [],
+        "json_path_equals" => %w[json_path expected],
+        "length_bounds" => [],
+        "no_refusal" => []
+      }.freeze
+      KINDS = CHECKS.keys.freeze
+      def self.kinds
+        KINDS
+      end
+      def self.required_keys
+        REQUIRED_KEYS
+      end
+      def self.fetch(kind)
+        CHECKS.fetch(kind).new
+      end
+    end
+  end
+end

data/app/services/completion_kit/checks/result.rb ADDED Viewed

@@ -0,0 +1,5 @@
+module CompletionKit
+  module Checks
+    Result = Data.define(:passed, :detail)
+  end
+end

data/app/services/completion_kit/checks/target_resolver.rb ADDED Viewed

@@ -0,0 +1,31 @@
+module CompletionKit
+  module Checks
+    module TargetResolver
+      TARGETS = %w[response_text input_data json_path].freeze
+      UNRESOLVED = Object.new.freeze
+      def self.call(response, config)
+        case config["target"]
+        when "input_data"
+          response.input_data
+        when "json_path"
+          resolve_json_path(response.response_text, config["target_path"].to_s)
+        else
+          response.response_text
+        end
+      end
+      def self.resolve_json_path(text, path)
+        parsed = JSON.parse(text.to_s)
+        value = path.split(".").reduce(parsed) do |node, key|
+          return UNRESOLVED unless node.is_a?(Hash) && node.key?(key)
+          node[key]
+        end
+        value.to_s
+      rescue JSON::ParserError
+        UNRESOLVED
+      end
+    end
+  end
+end

data/app/services/completion_kit/checks/valid_json.rb ADDED Viewed

@@ -0,0 +1,12 @@
+module CompletionKit
+  module Checks
+    class ValidJson
+      def call(target, _config)
+        JSON.parse(target.to_s)
+        Result.new(passed: true, detail: "valid JSON")
+      rescue JSON::ParserError
+        Result.new(passed: false, detail: "not valid JSON")
+      end
+    end
+  end
+end

data/app/services/completion_kit/mcp_tools/agreements.rb CHANGED Viewed

@@ -50,6 +50,8 @@ module CompletionKit
         run = CompletionKit::Run.find(args["run_id"])
         response = run.responses.find(args["response_id"])
         metric = CompletionKit::Metric.find(args["metric_id"])
+        return error_result("Checks have nothing to calibrate; agreements are only for llm_judge metrics.") if metric.check?
         created_by = args["created_by"].presence || "mcp"
         agreement = CompletionKit::Agreement.find_or_initialize_by(

data/app/services/completion_kit/mcp_tools/judges.rb CHANGED Viewed

@@ -53,6 +53,8 @@ module CompletionKit
       def self.compare(args)
         metric = CompletionKit::Metric.find(args["metric_id"])
+        return error_result("judges_compare is unavailable for check metrics.") if metric.check?
         a = CompletionKit::MetricVersion.where(metric_id: metric.id).find(args["metric_version_a_id"])
         b = CompletionKit::MetricVersion.where(metric_id: metric.id).find(args["metric_version_b_id"])
         stats_a = CompletionKit::MetricAgreementStats.for(metric, metric_version: a)

data/app/services/completion_kit/mcp_tools/metrics.rb CHANGED Viewed

@@ -3,6 +3,28 @@ module CompletionKit
     module Metrics
       extend Base
+      CHECK_CONFIG_SCHEMA = {
+        type: "object",
+        properties: {
+          check_kind: {type: "string", enum: CompletionKit::Checks::Registry.kinds},
+          target: {type: "string", enum: CompletionKit::Checks::TargetResolver::TARGETS},
+          target_path: {type: "string"},
+          value: {type: "string"},
+          pattern: {type: "string"},
+          json_path: {type: "string"},
+          expected: {},
+          min: {type: "integer"},
+          max: {type: "integer"},
+          case_sensitive: {type: "boolean"},
+          multiline: {type: "boolean"},
+          trim: {type: "boolean"}
+        }
+      }.freeze
+      CHECK_CONFIG_HINT = "For a deterministic check set metric_type:\"check\" and check_config. Per-kind required keys: " \
+        "value (contains/not_contains/equals), pattern (regex), json_path+expected (json_path_equals), " \
+        "min and/or max (length_bounds); valid_json and no_refusal take no extra keys. target_path is required when target is json_path."
       TOOLS = {
         "metrics_list" => {
           description: "List all metrics",
@@ -15,12 +37,14 @@ module CompletionKit
           handler: :get
         },
         "metrics_create" => {
-          description: "Create a metric with evaluation criteria",
+          description: "Create a metric with evaluation criteria. #{CHECK_CONFIG_HINT}",
           inputSchema: {
             type: "object",
             properties: {
               name: {type: "string"}, instruction: {type: "string"},
+              metric_type: {type: "string", enum: CompletionKit::Metric::METRIC_TYPES},
               rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}},
+              check_config: CHECK_CONFIG_SCHEMA,
               tag_names: {type: "array", items: {type: "string"}}
             },
             required: ["name"]
@@ -28,12 +52,14 @@ module CompletionKit
           handler: :create
         },
         "metrics_update" => {
-          description: "Update a metric",
+          description: "Update a metric. #{CHECK_CONFIG_HINT}",
           inputSchema: {
             type: "object",
             properties: {
               id: {type: "integer"}, name: {type: "string"}, instruction: {type: "string"},
+              metric_type: {type: "string", enum: CompletionKit::Metric::METRIC_TYPES},
               rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}},
+              check_config: CHECK_CONFIG_SCHEMA,
               tag_names: {type: "array", items: {type: "string"}}
             },
             required: ["id"]
@@ -69,7 +95,7 @@ module CompletionKit
       end
       def self.create(args)
-        metric = Metric.new(args.slice("name", "instruction", "rubric_bands"))
+        metric = Metric.new(args.slice("name", "instruction", "rubric_bands", "metric_type", "check_config"))
         metric.tag_names = args["tag_names"] if args.key?("tag_names")
         if metric.save
           text_result(metric.reload.as_json)
@@ -80,7 +106,7 @@ module CompletionKit
       def self.update(args)
         metric = Metric.find(args["id"])
-        if metric.update(args.except("id").slice("name", "instruction", "rubric_bands"))
+        if metric.update(args.except("id").slice("name", "instruction", "rubric_bands", "metric_type", "check_config"))
           metric.update!(tag_names: args["tag_names"]) if args.key?("tag_names")
           text_result(metric.reload.as_json)
         else
@@ -95,6 +121,8 @@ module CompletionKit
       def self.suggest_variants(args)
         metric = Metric.find(args["metric_id"])
+        return error_result("Metric ##{metric.id} is a check; checks are exact and have no variants to suggest.") if metric.check?
         generator = MetricVariantGenerator.new(metric, count: args["count"].to_i, model: args["model"])
         variants = generator.call
         return error_result("Variant generator returned no parseable variants. Try again or change the model.") if variants.empty?

data/app/services/completion_kit/metric_agreement_examples.rb CHANGED Viewed

@@ -17,6 +17,8 @@ module CompletionKit
     end
     def judge_examples_for(metric, exclude_response_id: nil, limit: DEFAULT_JUDGE_EXAMPLE_LIMIT)
+      return [] if metric.check?
       current_version = MetricVersion.current.find_by(metric_id: metric.id)
       return [] unless current_version

data/app/services/completion_kit/metric_improvement_validator.rb CHANGED Viewed

@@ -9,6 +9,8 @@ module CompletionKit
     end
     def call
+      return summarize([], 0, false) if @metric.check?
       key = answer_key
       rows = []
       key.each do |entry|

data/app/services/completion_kit/metric_variant_generator.rb CHANGED Viewed

@@ -14,6 +14,7 @@ module CompletionKit
     end
     def call
+      return [] if @metric.check?
       raise CompletionKit::ConfigurationError, "No judging model available; set CompletionKit.config.judge_model or add a provider with a judging model" if @model.blank?
       client = LlmClient.for_model(@model, ApiConfig.for_model(@model))

data/app/services/completion_kit/onboarding/concepts.rb CHANGED Viewed

@@ -24,7 +24,7 @@ module CompletionKit
         },
         metric: {
           name: "Metric",
-          definition: "An evaluation dimension with its own 1-5 rubric. The LLM judge scores every response against it."
+          definition: "An evaluation dimension. An LLM judge scores each response on a 1-5 rubric, or a deterministic check passes or fails it with no model call."
         }
       }.freeze
     end

data/app/services/completion_kit/prompt_improvement_service.rb CHANGED Viewed

@@ -37,7 +37,11 @@ module CompletionKit
           sections << "Expected: #{resp.expected_output.truncate(200)}"
         end
         resp.reviews.each do |review|
-          sections << "  #{review.metric_name}: #{review.ai_score}/5 — #{review.ai_feedback}"
+          if review.check?
+            sections << "  #{review.metric_name}: #{review.passed ? "PASS" : "FAIL"}"
+          else
+            sections << "  #{review.metric_name}: #{review.ai_score}/5 — #{review.ai_feedback}"
+          end
         end
         sections << ""
       end
@@ -45,10 +49,10 @@ module CompletionKit
       avg = @run.avg_score
       sections << "## Overall Score: #{avg}/5" if avg
-      metric_avgs = @run.metric_averages
-      if metric_avgs.any?
+      rubric_avgs = @run.metric_averages.select { |m| m.key?(:avg) }
+      if rubric_avgs.any?
         sections << "## Metric Averages"
-        metric_avgs.each { |m| sections << "  #{m[:name]}: #{m[:avg]}/5" }
+        rubric_avgs.each { |m| sections << "  #{m[:name]}: #{m[:avg]}/5" }
         sections << ""
       end

data/app/services/completion_kit/prompt_improvement_validator.rb CHANGED Viewed

@@ -86,7 +86,7 @@ module CompletionKit
     def judge_score(response, new_text)
       config = ApiConfig.for_model(@run.judge_model).merge(judge_model: @run.judge_model)
       judge = JudgeService.new(config)
-      scores = @run.metrics.filter_map do |metric|
+      scores = @run.metrics.select(&:llm_judge?).filter_map do |metric|
         judge.evaluate(
           new_text, response.expected_output, @candidate,
           criteria: metric.instruction.to_s,

data/app/services/completion_kit/starter_metrics.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module CompletionKit
   module StarterMetrics
-    Starter = Struct.new(:key, :name, :description, :catches, :instruction, :rubric_bands, keyword_init: true)
+    Starter = Struct.new(:key, :name, :description, :catches, :instruction, :rubric_bands, :metric_type, :check_config, keyword_init: true)
     ALL = [
       Starter.new(
@@ -72,6 +72,30 @@ module CompletionKit
           { "stars" => 2, "description" => "Noticeable filler or visible gaps." },
           { "stars" => 1, "description" => "Padded, repetitive, or so short it loses information." }
         ]
+      ),
+      Starter.new(
+        key: "valid_json",
+        name: "Valid JSON",
+        description: "Does the output parse as JSON?",
+        catches: "Broken or partial JSON, prose wrapped around a structured response, trailing commas. A deterministic pass/fail with no LLM judgement.",
+        metric_type: "check",
+        check_config: { "check_kind" => "valid_json", "target" => "response_text" }
+      ),
+      Starter.new(
+        key: "no_refusal",
+        name: "No refusal",
+        description: "Did the model answer instead of refusing?",
+        catches: "\"I'm sorry, I can't help with that\" and other refusal boilerplate when a real answer was expected. Deterministic, no judge call.",
+        metric_type: "check",
+        check_config: { "check_kind" => "no_refusal", "target" => "response_text" }
+      ),
+      Starter.new(
+        key: "contains_token",
+        name: "Contains required token",
+        description: "Does the output contain a required substring?",
+        catches: "A required marker, citation, or keyword the output must always include. Set the value to the token you require.",
+        metric_type: "check",
+        check_config: { "check_kind" => "contains", "target" => "response_text", "value" => "REQUIRED" }
       )
     ].freeze