completion-kit 0.1.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +192 -0
  4. data/Rakefile +12 -0
  5. data/app/assets/config/completion_kit_manifest.js +1 -0
  6. data/app/assets/config/manifest.js +3 -0
  7. data/app/assets/images/completion_kit/logo.svg +6 -0
  8. data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +25 -0
  9. data/app/assets/stylesheets/completion_kit/application.css +2214 -0
  10. data/app/controllers/completion_kit/api/v1/base_controller.rb +29 -0
  11. data/app/controllers/completion_kit/api/v1/criteria_controller.rb +62 -0
  12. data/app/controllers/completion_kit/api/v1/datasets_controller.rb +51 -0
  13. data/app/controllers/completion_kit/api/v1/metrics_controller.rb +51 -0
  14. data/app/controllers/completion_kit/api/v1/prompts_controller.rb +64 -0
  15. data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +51 -0
  16. data/app/controllers/completion_kit/api/v1/responses_controller.rb +32 -0
  17. data/app/controllers/completion_kit/api/v1/runs_controller.rb +71 -0
  18. data/app/controllers/completion_kit/api_reference_controller.rb +9 -0
  19. data/app/controllers/completion_kit/application_controller.rb +31 -0
  20. data/app/controllers/completion_kit/criteria_controller.rb +67 -0
  21. data/app/controllers/completion_kit/datasets_controller.rb +53 -0
  22. data/app/controllers/completion_kit/mcp_controller.rb +57 -0
  23. data/app/controllers/completion_kit/metrics_controller.rb +52 -0
  24. data/app/controllers/completion_kit/prompts_controller.rb +69 -0
  25. data/app/controllers/completion_kit/provider_credentials_controller.rb +63 -0
  26. data/app/controllers/completion_kit/responses_controller.rb +44 -0
  27. data/app/controllers/completion_kit/runs_controller.rb +131 -0
  28. data/app/helpers/completion_kit/application_helper.rb +193 -0
  29. data/app/jobs/completion_kit/application_job.rb +4 -0
  30. data/app/jobs/completion_kit/generate_job.rb +12 -0
  31. data/app/jobs/completion_kit/judge_job.rb +12 -0
  32. data/app/jobs/completion_kit/model_discovery_job.rb +29 -0
  33. data/app/mailers/completion_kit/application_mailer.rb +6 -0
  34. data/app/models/completion_kit/application_record.rb +5 -0
  35. data/app/models/completion_kit/criteria.rb +22 -0
  36. data/app/models/completion_kit/criteria_membership.rb +20 -0
  37. data/app/models/completion_kit/dataset.rb +24 -0
  38. data/app/models/completion_kit/metric.rb +97 -0
  39. data/app/models/completion_kit/model.rb +13 -0
  40. data/app/models/completion_kit/prompt.rb +99 -0
  41. data/app/models/completion_kit/provider_credential.rb +114 -0
  42. data/app/models/completion_kit/response.rb +30 -0
  43. data/app/models/completion_kit/review.rb +28 -0
  44. data/app/models/completion_kit/run.rb +253 -0
  45. data/app/models/completion_kit/run_metric.rb +6 -0
  46. data/app/models/completion_kit/suggestion.rb +8 -0
  47. data/app/services/completion_kit/anthropic_client.rb +86 -0
  48. data/app/services/completion_kit/api_config.rb +80 -0
  49. data/app/services/completion_kit/csv_processor.rb +65 -0
  50. data/app/services/completion_kit/judge_service.rb +87 -0
  51. data/app/services/completion_kit/llm_client.rb +45 -0
  52. data/app/services/completion_kit/mcp_dispatcher.rb +53 -0
  53. data/app/services/completion_kit/mcp_tools/criteria.rb +106 -0
  54. data/app/services/completion_kit/mcp_tools/datasets.rb +90 -0
  55. data/app/services/completion_kit/mcp_tools/metrics.rb +98 -0
  56. data/app/services/completion_kit/mcp_tools/prompts.rb +112 -0
  57. data/app/services/completion_kit/mcp_tools/provider_credentials.rb +97 -0
  58. data/app/services/completion_kit/mcp_tools/responses.rb +45 -0
  59. data/app/services/completion_kit/mcp_tools/runs.rb +130 -0
  60. data/app/services/completion_kit/model_discovery_service.rb +223 -0
  61. data/app/services/completion_kit/ollama_client.rb +80 -0
  62. data/app/services/completion_kit/open_ai_client.rb +71 -0
  63. data/app/services/completion_kit/open_router_client.rb +69 -0
  64. data/app/services/completion_kit/prompt_improvement_service.rb +81 -0
  65. data/app/views/completion_kit/api_reference/_example.html.erb +6 -0
  66. data/app/views/completion_kit/api_reference/index.html.erb +308 -0
  67. data/app/views/completion_kit/criteria/_form.html.erb +46 -0
  68. data/app/views/completion_kit/criteria/edit.html.erb +14 -0
  69. data/app/views/completion_kit/criteria/index.html.erb +37 -0
  70. data/app/views/completion_kit/criteria/new.html.erb +13 -0
  71. data/app/views/completion_kit/criteria/show.html.erb +37 -0
  72. data/app/views/completion_kit/datasets/_form.html.erb +29 -0
  73. data/app/views/completion_kit/datasets/edit.html.erb +13 -0
  74. data/app/views/completion_kit/datasets/index.html.erb +38 -0
  75. data/app/views/completion_kit/datasets/new.html.erb +12 -0
  76. data/app/views/completion_kit/datasets/show.html.erb +45 -0
  77. data/app/views/completion_kit/metrics/_form.html.erb +72 -0
  78. data/app/views/completion_kit/metrics/edit.html.erb +13 -0
  79. data/app/views/completion_kit/metrics/index.html.erb +34 -0
  80. data/app/views/completion_kit/metrics/new.html.erb +12 -0
  81. data/app/views/completion_kit/metrics/show.html.erb +49 -0
  82. data/app/views/completion_kit/prompts/_form.html.erb +52 -0
  83. data/app/views/completion_kit/prompts/edit.html.erb +13 -0
  84. data/app/views/completion_kit/prompts/index.html.erb +46 -0
  85. data/app/views/completion_kit/prompts/new.html.erb +12 -0
  86. data/app/views/completion_kit/prompts/show.html.erb +156 -0
  87. data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +30 -0
  88. data/app/views/completion_kit/provider_credentials/_form.html.erb +71 -0
  89. data/app/views/completion_kit/provider_credentials/edit.html.erb +12 -0
  90. data/app/views/completion_kit/provider_credentials/index.html.erb +41 -0
  91. data/app/views/completion_kit/provider_credentials/new.html.erb +12 -0
  92. data/app/views/completion_kit/responses/show.html.erb +87 -0
  93. data/app/views/completion_kit/runs/_actions.html.erb +14 -0
  94. data/app/views/completion_kit/runs/_form.html.erb +159 -0
  95. data/app/views/completion_kit/runs/_progress.html.erb +18 -0
  96. data/app/views/completion_kit/runs/_response_row.html.erb +13 -0
  97. data/app/views/completion_kit/runs/_sort_toolbar.html.erb +8 -0
  98. data/app/views/completion_kit/runs/_status_header.html.erb +15 -0
  99. data/app/views/completion_kit/runs/edit.html.erb +14 -0
  100. data/app/views/completion_kit/runs/index.html.erb +43 -0
  101. data/app/views/completion_kit/runs/new.html.erb +12 -0
  102. data/app/views/completion_kit/runs/show.html.erb +79 -0
  103. data/app/views/completion_kit/runs/suggestion.html.erb +47 -0
  104. data/app/views/layouts/completion_kit/application.html.erb +77 -0
  105. data/config/routes.rb +55 -0
  106. data/db/migrate/20260311000001_create_completion_kit_tables.rb +87 -0
  107. data/db/migrate/20260326000001_rename_criteria_to_instruction_on_metrics_and_reviews.rb +6 -0
  108. data/db/migrate/20260327000001_add_progress_to_runs.rb +6 -0
  109. data/db/migrate/20260327100001_replace_criteria_with_direct_metrics_on_runs.rb +12 -0
  110. data/db/migrate/20260328000001_add_error_message_to_runs.rb +5 -0
  111. data/db/migrate/20260329000001_create_completion_kit_models.rb +20 -0
  112. data/db/migrate/20260401170001_add_discovery_columns_to_completion_kit_provider_credentials.rb +7 -0
  113. data/db/migrate/20260403000001_add_temperature_to_completion_kit_runs.rb +5 -0
  114. data/db/migrate/20260403000002_create_completion_kit_suggestions.rb +13 -0
  115. data/db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb +5 -0
  116. data/lib/completion-kit.rb +1 -0
  117. data/lib/completion_kit/engine.rb +35 -0
  118. data/lib/completion_kit/version.rb +3 -0
  119. data/lib/completion_kit.rb +55 -0
  120. data/lib/generators/completion_kit/install_generator.rb +21 -0
  121. data/lib/generators/completion_kit/templates/README +20 -0
  122. data/lib/generators/completion_kit/templates/initializer.rb +43 -0
  123. metadata +361 -0
@@ -0,0 +1,308 @@
1
+ <% token_display = if @token.present? && @token.length >= 12
2
+ "#{@token[0..3]}#{'•' * [@token.length - 8, 4].max}#{@token[-4..]}"
3
+ elsif @token.present?
4
+ "••••••••"
5
+ else
6
+ "YOUR_TOKEN"
7
+ end %>
8
+
9
+ <section class="ck-page-header">
10
+ <div>
11
+ <h1 class="ck-title">API</h1>
12
+ <p class="ck-lead">REST JSON API for programmatic access. All requests require a bearer token via the Authorization header.</p>
13
+ </div>
14
+ </section>
15
+
16
+ <div class="ck-stack">
17
+ <div class="ck-card">
18
+ <div class="ck-split">
19
+ <div>
20
+ <p class="ck-kicker">Authentication</p>
21
+ <% if @token.present? %>
22
+ <div style="display: flex; align-items: center; gap: 0.4rem; margin: 0.5rem 0;">
23
+ <button type="button" class="ck-chip ck-token-toggle" onclick="ckToggleToken(this)" data-masked="<%= token_display %>" data-real="<%= @token %>" aria-label="Reveal API token" aria-pressed="false" style="cursor: pointer;"><%= token_display %></button>
24
+ <button type="button" class="ck-icon-btn" title="Copy token" aria-label="Copy API token" onclick="var b=this;navigator.clipboard.writeText('<%= @token %>').then(function(){b.classList.add('ck-api-copy--done');setTimeout(function(){b.classList.remove('ck-api-copy--done')},1500)})">
25
+ <%= heroicon_tag "clipboard-document", variant: :outline, size: 14, "aria-hidden": "true" %>
26
+ </button>
27
+ </div>
28
+ <% else %>
29
+ <p class="ck-meta-copy">No API token configured. Set <code>COMPLETION_KIT_API_TOKEN</code> to enable API access.</p>
30
+ <% end %>
31
+ </div>
32
+ </div>
33
+ </div>
34
+ </div>
35
+
36
+ <% if @published_prompts.any? %>
37
+ <div class="ck-api-prompts-section">
38
+ <p class="ck-kicker">Your prompts</p>
39
+ <% @published_prompts.each do |p| %>
40
+ <div class="ck-api-prompt-card">
41
+ <div class="ck-api-prompt-card__top">
42
+ <div>
43
+ <strong class="ck-api-prompt-card__name"><%= p.name %></strong>
44
+ <% if p.description.present? %>
45
+ <p class="ck-api-prompt-card__desc"><%= p.description %></p>
46
+ <% end %>
47
+ </div>
48
+ <span class="ck-chip" style="text-transform: none; flex-shrink: 0;"><%= p.llm_model %></span>
49
+ </div>
50
+ <div class="ck-api-prompt-card__url">
51
+ <code class="ck-endpoint__url" id="prompt_ep_<%= p.id %>"><%= @base_url %>/api/v1/prompts/<%= p.slug %></code>
52
+ <button type="button" class="ck-icon-btn" title="Copy endpoint" aria-label="Copy endpoint URL" onclick="navigator.clipboard.writeText(document.getElementById('prompt_ep_<%= p.id %>').textContent)"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="14" height="14" aria-hidden="true"><path d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 0 1 0 1.5h-1.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-1.5a.75.75 0 0 1 1.5 0v1.5A1.75 1.75 0 0 1 9.25 16h-7.5A1.75 1.75 0 0 1 0 14.25Z"/><path d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0 1 14.25 11h-7.5A1.75 1.75 0 0 1 5 9.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"/></svg></button>
53
+ </div>
54
+ </div>
55
+ <% end %>
56
+ </div>
57
+ <% end %>
58
+
59
+ <div class="ck-api-tabs">
60
+ <input type="radio" name="ck-api-tab" id="ck-tab-mcp" class="ck-api-tabs__radio" checked>
61
+ <input type="radio" name="ck-api-tab" id="ck-tab-prompts" class="ck-api-tabs__radio">
62
+ <input type="radio" name="ck-api-tab" id="ck-tab-runs" class="ck-api-tabs__radio">
63
+ <input type="radio" name="ck-api-tab" id="ck-tab-responses" class="ck-api-tabs__radio">
64
+ <input type="radio" name="ck-api-tab" id="ck-tab-datasets" class="ck-api-tabs__radio">
65
+ <input type="radio" name="ck-api-tab" id="ck-tab-metrics" class="ck-api-tabs__radio">
66
+ <input type="radio" name="ck-api-tab" id="ck-tab-criteria" class="ck-api-tabs__radio">
67
+ <input type="radio" name="ck-api-tab" id="ck-tab-providers" class="ck-api-tabs__radio">
68
+
69
+ <nav class="ck-api-tabs__nav">
70
+ <label for="ck-tab-mcp" class="ck-api-tabs__label">MCP <span class="ck-api-tabs__count">35</span></label>
71
+ <label for="ck-tab-prompts" class="ck-api-tabs__label">Prompts <span class="ck-api-tabs__count">6</span></label>
72
+ <label for="ck-tab-runs" class="ck-api-tabs__label">Runs <span class="ck-api-tabs__count">7</span></label>
73
+ <label for="ck-tab-responses" class="ck-api-tabs__label">Responses <span class="ck-api-tabs__count">2</span></label>
74
+ <label for="ck-tab-datasets" class="ck-api-tabs__label">Datasets <span class="ck-api-tabs__count">5</span></label>
75
+ <label for="ck-tab-metrics" class="ck-api-tabs__label">Metrics <span class="ck-api-tabs__count">5</span></label>
76
+ <label for="ck-tab-criteria" class="ck-api-tabs__label">Criteria <span class="ck-api-tabs__count">5</span></label>
77
+ <label for="ck-tab-providers" class="ck-api-tabs__label">Providers <span class="ck-api-tabs__count">5</span></label>
78
+ </nav>
79
+
80
+ <div class="ck-api-tabs__panels">
81
+
82
+ <div class="ck-api-tabs__panel">
83
+ <h2 class="ck-section-title">MCP Server</h2>
84
+ <p class="ck-copy">Connect Claude Code, Cursor, or any <a href="https://modelcontextprotocol.io" class="ck-link">MCP</a> client to manage prompts, runs, datasets, and metrics conversationally. 35 tools over streamable HTTP.</p>
85
+
86
+ <div class="ck-mcp-install-grid">
87
+ <div class="ck-mcp-install-card">
88
+ <div class="ck-mcp-install-card__header">
89
+ <span class="ck-mcp-install-card__icon">&#9654;</span>
90
+ Claude Code
91
+ </div>
92
+ <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "claude mcp add completion-kit \\\n --transport http \\\n --url #{@base_url}/mcp \\\n --header \"Authorization: Bearer #{token_display}\"" %>
93
+ </div>
94
+ <div class="ck-mcp-install-card">
95
+ <div class="ck-mcp-install-card__header">
96
+ <span class="ck-mcp-install-card__icon">{}</span>
97
+ Cursor / Generic
98
+ </div>
99
+ <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "{\n \"mcpServers\": {\n \"completion-kit\": {\n \"url\": \"#{@base_url}/mcp\",\n \"headers\": {\n \"Authorization\": \"Bearer #{token_display}\"\n }\n }\n }\n}" %>
100
+ </div>
101
+ </div>
102
+
103
+ <div class="ck-api-endpoint" style="padding-top: 1.5rem;">
104
+ <p class="ck-kicker" style="margin-bottom: 0.75rem;">Available tools</p>
105
+ <% grouped = CompletionKit::McpDispatcher.tool_definitions.group_by { |t| t[:name].split("_").first } %>
106
+ <% grouped.each do |group, tools| %>
107
+ <div class="ck-mcp-tool-group">
108
+ <p class="ck-mcp-tool-group__label"><%= group %> <span class="ck-api-tabs__count"><%= tools.size %></span></p>
109
+ <div class="ck-mcp-tools">
110
+ <% tools.each do |tool| %>
111
+ <div class="ck-mcp-tool">
112
+ <code class="ck-mcp-tool__name"><%= tool[:name] %></code>
113
+ <span class="ck-mcp-tool__desc"><%= tool[:description] %></span>
114
+ </div>
115
+ <% end %>
116
+ </div>
117
+ </div>
118
+ <% end %>
119
+ </div>
120
+ </div>
121
+
122
+ <div class="ck-api-tabs__panel">
123
+ <h2 class="ck-section-title">Prompts</h2>
124
+ <p class="ck-copy">Create, version, and manage LLM prompt templates.</p>
125
+ <div class="ck-api-endpoint">
126
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/prompts</p>
127
+ <p class="ck-meta-copy">List all prompts, ordered by most recent.</p>
128
+ <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl #{@base_url}/api/v1/prompts \\\n -H \"Authorization: Bearer #{token_display}\"" %>
129
+ </div>
130
+ <div class="ck-api-endpoint">
131
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/prompts</p>
132
+ <p class="ck-meta-copy">Create a new prompt.</p>
133
+ <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>name</code>, <code>template</code>, <code>llm_model</code>&emsp;<strong>Optional:</strong>&ensp;<code>description</code></p>
134
+ <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/prompts \\\n -H \"Authorization: Bearer #{token_display}\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"name\": \"summarizer\", \"template\": \"Summarize: {{text}}\", \"llm_model\": \"gpt-4.1\"}'" %>
135
+ </div>
136
+ <div class="ck-api-endpoint">
137
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/prompts/:id</p>
138
+ <p class="ck-meta-copy">Get a single prompt by ID.</p>
139
+ </div>
140
+ <div class="ck-api-endpoint">
141
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">PATCH</span> /api/v1/prompts/:id</p>
142
+ <p class="ck-meta-copy">Update a prompt. Accepts same params as create.</p>
143
+ </div>
144
+ <div class="ck-api-endpoint">
145
+ <p class="ck-api-method"><span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/prompts/:id</p>
146
+ <p class="ck-meta-copy">Delete a prompt. Returns 204 No Content.</p>
147
+ </div>
148
+ <div class="ck-api-endpoint">
149
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/prompts/:id/publish</p>
150
+ <p class="ck-meta-copy">Publish a prompt version, making it the current version in its family.</p>
151
+ </div>
152
+ </div>
153
+
154
+ <div class="ck-api-tabs__panel">
155
+ <h2 class="ck-section-title">Runs</h2>
156
+ <p class="ck-copy">Create runs, generate LLM responses, and judge them with metrics.</p>
157
+ <div class="ck-api-endpoint">
158
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/runs</p>
159
+ <p class="ck-meta-copy">List all runs with response counts and average scores.</p>
160
+ </div>
161
+ <div class="ck-api-endpoint">
162
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/runs</p>
163
+ <p class="ck-meta-copy">Create a new run.</p>
164
+ <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>prompt_id</code>&emsp;<strong>Optional:</strong>&ensp;<code>name</code>, <code>dataset_id</code>, <code>metric_ids</code>, <code>judge_model</code></p>
165
+ <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/runs \\\n -H \"Authorization: Bearer #{token_display}\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"prompt_id\": 1, \"dataset_id\": 1, \"metric_ids\": [1, 2]}'" %>
166
+ </div>
167
+ <div class="ck-api-endpoint">
168
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/runs/:id</p>
169
+ <p class="ck-meta-copy">Get a run with status, progress, response count, and average score.</p>
170
+ <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl #{@base_url}/api/v1/runs/1 \\\n -H \"Authorization: Bearer #{token_display}\"" %>
171
+ </div>
172
+ <div class="ck-api-endpoint">
173
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/runs/:id/generate</p>
174
+ <p class="ck-meta-copy">Start generating responses. Returns 202 Accepted. Poll the run to check progress.</p>
175
+ <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/runs/1/generate \\\n -H \"Authorization: Bearer #{token_display}\"" %>
176
+ </div>
177
+ <div class="ck-api-endpoint">
178
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/runs/:id/judge</p>
179
+ <p class="ck-meta-copy">Start judging responses with the configured judge model and metrics. Returns 202 Accepted.</p>
180
+ <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/runs/1/judge \\\n -H \"Authorization: Bearer #{token_display}\"" %>
181
+ </div>
182
+ <div class="ck-api-endpoint">
183
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">PATCH</span> /api/v1/runs/:id</p>
184
+ <p class="ck-meta-copy">Update a run. Accepts same params as create.</p>
185
+ </div>
186
+ <div class="ck-api-endpoint">
187
+ <p class="ck-api-method"><span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/runs/:id</p>
188
+ <p class="ck-meta-copy">Delete a run and all its responses. Returns 204 No Content.</p>
189
+ </div>
190
+ </div>
191
+
192
+ <div class="ck-api-tabs__panel">
193
+ <h2 class="ck-section-title">Responses</h2>
194
+ <p class="ck-copy">Read-only access to generated responses and their review scores. Nested under runs.</p>
195
+ <div class="ck-api-endpoint">
196
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/runs/:run_id/responses</p>
197
+ <p class="ck-meta-copy">List all responses for a run, including nested review scores.</p>
198
+ <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl #{@base_url}/api/v1/runs/1/responses \\\n -H \"Authorization: Bearer #{token_display}\"" %>
199
+ </div>
200
+ <div class="ck-api-endpoint">
201
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/runs/:run_id/responses/:id</p>
202
+ <p class="ck-meta-copy">Get a single response with its review scores and feedback.</p>
203
+ </div>
204
+ </div>
205
+
206
+ <div class="ck-api-tabs__panel">
207
+ <h2 class="ck-section-title">Datasets</h2>
208
+ <p class="ck-copy">Data used as input for runs.</p>
209
+ <div class="ck-api-endpoint">
210
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/datasets</p>
211
+ <p class="ck-meta-copy">List all datasets.</p>
212
+ </div>
213
+ <div class="ck-api-endpoint">
214
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/datasets</p>
215
+ <p class="ck-meta-copy">Create a dataset.</p>
216
+ <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>name</code>, <code>csv_data</code></p>
217
+ <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/datasets \\\n -H \"Authorization: Bearer #{token_display}\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"name\": \"tickets\", \"csv_data\": \"text,expected_output\\\\nHello,Hi\"}'" %>
218
+ </div>
219
+ <div class="ck-api-endpoint">
220
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span>&ensp;<span class="ck-chip ck-chip--soft">PATCH</span>&ensp;<span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/datasets/:id</p>
221
+ <p class="ck-meta-copy">Get, update, or delete a dataset.</p>
222
+ </div>
223
+ </div>
224
+
225
+ <div class="ck-api-tabs__panel">
226
+ <h2 class="ck-section-title">Metrics</h2>
227
+ <p class="ck-copy">Scoring dimensions used by the judge model.</p>
228
+ <div class="ck-api-endpoint">
229
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/metrics</p>
230
+ <p class="ck-meta-copy">List all metrics.</p>
231
+ </div>
232
+ <div class="ck-api-endpoint">
233
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/metrics</p>
234
+ <p class="ck-meta-copy">Create a metric.</p>
235
+ <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>name</code>&emsp;<strong>Optional:</strong>&ensp;<code>instruction</code>, <code>evaluation_steps</code> (array), <code>rubric_bands</code> (array of {stars, description})</p>
236
+ <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/metrics \\\n -H \"Authorization: Bearer #{token_display}\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"name\": \"relevance\", \"instruction\": \"Is the response relevant?\"}'" %>
237
+ </div>
238
+ <div class="ck-api-endpoint">
239
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span>&ensp;<span class="ck-chip ck-chip--soft">PATCH</span>&ensp;<span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/metrics/:id</p>
240
+ <p class="ck-meta-copy">Get, update, or delete a metric.</p>
241
+ </div>
242
+ </div>
243
+
244
+ <div class="ck-api-tabs__panel">
245
+ <h2 class="ck-section-title">Criteria</h2>
246
+ <p class="ck-copy">Named groups of metrics applied to runs as a set.</p>
247
+ <div class="ck-api-endpoint">
248
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/criteria</p>
249
+ <p class="ck-meta-copy">List all criteria with their metric IDs.</p>
250
+ </div>
251
+ <div class="ck-api-endpoint">
252
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/criteria</p>
253
+ <p class="ck-meta-copy">Create a criteria group.</p>
254
+ <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>name</code>&emsp;<strong>Optional:</strong>&ensp;<code>description</code>, <code>metric_ids</code> (array)</p>
255
+ </div>
256
+ <div class="ck-api-endpoint">
257
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span>&ensp;<span class="ck-chip ck-chip--soft">PATCH</span>&ensp;<span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/criteria/:id</p>
258
+ <p class="ck-meta-copy">Get, update, or delete a criteria group. PATCH with <code>metric_ids</code> replaces all metric associations.</p>
259
+ </div>
260
+ </div>
261
+
262
+ <div class="ck-api-tabs__panel">
263
+ <h2 class="ck-section-title">Provider Credentials</h2>
264
+ <p class="ck-copy">LLM provider API keys. The <code>api_key</code> field is write-only and never returned in responses.</p>
265
+ <div class="ck-api-endpoint">
266
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/provider_credentials</p>
267
+ <p class="ck-meta-copy">List all provider credentials (api_key excluded).</p>
268
+ </div>
269
+ <div class="ck-api-endpoint">
270
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/provider_credentials</p>
271
+ <p class="ck-meta-copy">Create a provider credential.</p>
272
+ <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>provider</code> (openai, anthropic, ollama, openrouter), <code>api_key</code>&emsp;<strong>Optional:</strong>&ensp;<code>api_endpoint</code></p>
273
+ </div>
274
+ <div class="ck-api-endpoint">
275
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span>&ensp;<span class="ck-chip ck-chip--soft">PATCH</span>&ensp;<span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/provider_credentials/:id</p>
276
+ <p class="ck-meta-copy">Get, update, or delete a provider credential.</p>
277
+ </div>
278
+ </div>
279
+
280
+ </div>
281
+ </div>
282
+
283
+ <script>
284
+ function ckToggleToken(el) {
285
+ var showing = el.textContent === el.dataset.masked;
286
+ el.textContent = showing ? el.dataset.real : el.dataset.masked;
287
+ el.setAttribute('aria-pressed', showing ? 'true' : 'false');
288
+ el.setAttribute('aria-label', showing ? 'Hide API token' : 'Reveal API token');
289
+ navigator.clipboard.writeText(el.dataset.real).then(function() {
290
+ el.classList.add('ck-api-copy--done');
291
+ setTimeout(function() { el.classList.remove('ck-api-copy--done'); }, 1500);
292
+ });
293
+ }
294
+
295
+ function ckCopyExample(btn) {
296
+ var pre = btn.closest('.ck-api-example').querySelector('pre');
297
+ var text = pre.textContent;
298
+ var realToken = btn.dataset.realToken;
299
+ var displayToken = btn.dataset.displayToken;
300
+ if (realToken && displayToken) {
301
+ text = text.split(displayToken).join(realToken);
302
+ }
303
+ navigator.clipboard.writeText(text).then(function() {
304
+ btn.classList.add('ck-api-copy--done');
305
+ setTimeout(function() { btn.classList.remove('ck-api-copy--done'); }, 1500);
306
+ });
307
+ }
308
+ </script>
@@ -0,0 +1,46 @@
1
+ <%= form_with(model: criteria, url: criteria.persisted? ? criterion_path(criteria) : criteria_path, local: true) do |form| %>
2
+ <% if criteria.errors.any? %>
3
+ <div class="ck-flash ck-flash--alert">
4
+ <p class="ck-flash__title"><%= pluralize(criteria.errors.count, "problem") %> prevented this criteria from being saved.</p>
5
+ <ul class="ck-error-list">
6
+ <% criteria.errors.full_messages.each do |message| %>
7
+ <li><%= message %></li>
8
+ <% end %>
9
+ </ul>
10
+ </div>
11
+ <% end %>
12
+
13
+ <div class="ck-card ck-form-card">
14
+ <div class="ck-field">
15
+ <%= form.label :name, "Criteria name", class: "ck-label" %>
16
+ <%= form.text_field :name, class: "ck-input", placeholder: "Support quality" %>
17
+ </div>
18
+
19
+ <div class="ck-field">
20
+ <%= form.label :description, class: "ck-label" %>
21
+ <%= form.text_area :description, rows: 3, class: "ck-input ck-input--area", placeholder: "When this criteria should be used." %>
22
+ </div>
23
+
24
+ <div class="ck-field">
25
+ <p class="ck-label">Metrics in this criteria</p>
26
+ <p class="ck-hint">A criteria groups metrics together for judging.</p>
27
+ <div class="ck-list ck-list--compact">
28
+ <% @metrics.each do |metric| %>
29
+ <label class="ck-item">
30
+ <span>
31
+ <strong><%= metric.name %></strong>
32
+ <span class="ck-meta-copy"><%= metric.instruction.presence || "No instruction set." %></span>
33
+ </span>
34
+ <%= check_box_tag "criteria[metric_ids][]", metric.id, criteria.metrics.exists?(metric.id), class: "ck-checkbox" %>
35
+ </label>
36
+ <% end %>
37
+ </div>
38
+ <%= hidden_field_tag "criteria[metric_ids][]", "" %>
39
+ </div>
40
+
41
+ <div class="ck-actions">
42
+ <%= link_to "Cancel", criteria_path, class: ck_button_classes(:light, variant: :outline) %>
43
+ <%= form.submit(criteria.persisted? ? "Save criteria" : "Create criteria", class: ck_button_classes(:dark)) %>
44
+ </div>
45
+ </div>
46
+ <% end %>
@@ -0,0 +1,14 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Metrics", metrics_path %></li>
3
+ <li><%= link_to "Criteria", criteria_path %></li>
4
+ <li><%= link_to @criteria.name, criterion_path(@criteria) %></li>
5
+ <li>Edit</li>
6
+ </ol>
7
+
8
+ <section class="ck-page-header">
9
+ <div>
10
+ <h1 class="ck-title">Edit criteria</h1>
11
+ </div>
12
+ </section>
13
+
14
+ <%= render "form", criteria: @criteria %>
@@ -0,0 +1,37 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Metrics", metrics_path %></li>
3
+ <li>Criteria</li>
4
+ </ol>
5
+
6
+ <section class="ck-page-header">
7
+ <div>
8
+ <h1 class="ck-title">Criteria</h1>
9
+ <p class="ck-lead">Named groups of metrics that can be applied to a run as a set. Use criteria to bundle related metrics together for reuse across runs.</p>
10
+ </div>
11
+ <div class="ck-actions">
12
+ <%= link_to "New criteria", new_criterion_path, class: ck_button_classes(:dark) %>
13
+ </div>
14
+ </section>
15
+
16
+ <% if @criterias.any? %>
17
+ <table class="ck-results-table">
18
+ <thead>
19
+ <tr>
20
+ <th>Name</th>
21
+ <th>Metrics</th>
22
+ <th></th>
23
+ </tr>
24
+ </thead>
25
+ <tbody>
26
+ <% @criterias.each do |criteria| %>
27
+ <tr onclick="window.location='<%= criterion_path(criteria) %>'" style="cursor: pointer;">
28
+ <td><strong><%= criteria.name %></strong></td>
29
+ <td><%= criteria.metrics.size %></td>
30
+ <td class="ck-results-table__arrow">&rarr;</td>
31
+ </tr>
32
+ <% end %>
33
+ </tbody>
34
+ </table>
35
+ <% else %>
36
+ <div class="ck-empty">No criteria yet.</div>
37
+ <% end %>
@@ -0,0 +1,13 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Metrics", metrics_path %></li>
3
+ <li><%= link_to "Criteria", criteria_path %></li>
4
+ <li>New</li>
5
+ </ol>
6
+
7
+ <section class="ck-page-header">
8
+ <div>
9
+ <h1 class="ck-title">New criteria</h1>
10
+ </div>
11
+ </section>
12
+
13
+ <%= render "form", criteria: @criteria %>
@@ -0,0 +1,37 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Metrics", metrics_path %></li>
3
+ <li><%= link_to "Criteria", criteria_path %></li>
4
+ <li><%= @criteria.name %></li>
5
+ </ol>
6
+
7
+ <section class="ck-page-header">
8
+ <div>
9
+ <h1 class="ck-title"><%= @criteria.name %></h1>
10
+ <% if @criteria.description.present? %>
11
+ <p class="ck-lead"><%= @criteria.description %></p>
12
+ <% end %>
13
+ </div>
14
+ <div class="ck-actions">
15
+ <%= link_to "Edit", edit_criterion_path(@criteria), class: ck_button_classes(:light, variant: :outline) %>
16
+ </div>
17
+ </section>
18
+
19
+ <section class="ck-card">
20
+ <p class="ck-kicker">Metrics</p>
21
+ <% if @criteria.metrics.any? %>
22
+ <div class="ck-list ck-list--compact">
23
+ <% @criteria.metrics.each do |metric| %>
24
+ <div class="ck-item">
25
+ <div>
26
+ <p class="ck-item-title"><%= link_to metric.name, metric_path(metric), class: "ck-link" %></p>
27
+ <% if metric.instruction.present? %>
28
+ <p class="ck-copy"><%= metric.instruction %></p>
29
+ <% end %>
30
+ </div>
31
+ </div>
32
+ <% end %>
33
+ </div>
34
+ <% else %>
35
+ <p class="ck-copy">No metrics in this criteria yet.</p>
36
+ <% end %>
37
+ </section>
@@ -0,0 +1,29 @@
1
+ <%= form_with(model: dataset, local: true) do |form| %>
2
+ <% if dataset.errors.any? %>
3
+ <div class="ck-flash ck-flash--alert">
4
+ <p class="ck-flash__title"><%= pluralize(dataset.errors.count, "problem") %> prevented this dataset from being saved.</p>
5
+ <ul class="ck-error-list">
6
+ <% dataset.errors.full_messages.each do |message| %>
7
+ <li><%= message %></li>
8
+ <% end %>
9
+ </ul>
10
+ </div>
11
+ <% end %>
12
+
13
+ <div class="ck-card ck-form-card">
14
+ <div class="ck-field">
15
+ <%= form.label :name, "Name", class: "ck-label" %>
16
+ <%= form.text_field :name, class: "ck-input", placeholder: "Customer support tickets" %>
17
+ </div>
18
+
19
+ <div class="ck-field">
20
+ <%= form.label :csv_data, "CSV data", class: "ck-label" %>
21
+ <%= form.text_area :csv_data, rows: 12, class: "ck-input ck-input--area ck-input--code", placeholder: "content,audience\nFirst ticket text,internal\nSecond ticket text,customer" %>
22
+ </div>
23
+
24
+ <div class="ck-actions">
25
+ <%= link_to "Cancel", datasets_path, class: ck_button_classes(:light, variant: :outline) %>
26
+ <%= form.submit(dataset.persisted? ? "Save dataset" : "Create dataset", class: ck_button_classes(:dark)) %>
27
+ </div>
28
+ </div>
29
+ <% end %>
@@ -0,0 +1,13 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Datasets", datasets_path %></li>
3
+ <li><%= link_to @dataset.name, dataset_path(@dataset) %></li>
4
+ <li>Edit</li>
5
+ </ol>
6
+
7
+ <section class="ck-page-header">
8
+ <div>
9
+ <h1 class="ck-title">Edit dataset</h1>
10
+ </div>
11
+ </section>
12
+
13
+ <%= render "form", dataset: @dataset %>
@@ -0,0 +1,38 @@
1
+ <section class="ck-page-header">
2
+ <div>
3
+ <h1 class="ck-title">Datasets</h1>
4
+ <p class="ck-lead">Data used as input for runs. Each row becomes a separate prompt completion, with column values substituted into template variables.</p>
5
+ </div>
6
+ <div class="ck-actions">
7
+ <%= link_to "New dataset", new_dataset_path, class: ck_button_classes(:dark) %>
8
+ </div>
9
+ </section>
10
+
11
+ <% if @datasets.any? %>
12
+ <table class="ck-results-table">
13
+ <thead>
14
+ <tr>
15
+ <th>Name</th>
16
+ <th>Rows</th>
17
+ <th>Used in</th>
18
+ <th>Created</th>
19
+ <th></th>
20
+ </tr>
21
+ </thead>
22
+ <tbody>
23
+ <% @datasets.each do |dataset| %>
24
+ <tr onclick="window.location='<%= dataset_path(dataset) %>'" style="cursor: pointer;">
25
+ <td><strong><%= dataset.name %></strong></td>
26
+ <td><%= dataset.row_count %></td>
27
+ <td><%= dataset.runs.count %></td>
28
+ <td><%= dataset.created_at.strftime("%Y-%m-%d") %></td>
29
+ <td class="ck-results-table__arrow">&rarr;</td>
30
+ </tr>
31
+ <% end %>
32
+ </tbody>
33
+ </table>
34
+ <% else %>
35
+ <div class="ck-empty">
36
+ No datasets yet. Create one to use with your prompts.
37
+ </div>
38
+ <% end %>
@@ -0,0 +1,12 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Datasets", datasets_path %></li>
3
+ <li>New</li>
4
+ </ol>
5
+
6
+ <section class="ck-page-header">
7
+ <div>
8
+ <h1 class="ck-title">New dataset</h1>
9
+ </div>
10
+ </section>
11
+
12
+ <%= render "form", dataset: @dataset %>
@@ -0,0 +1,45 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Datasets", datasets_path %></li>
3
+ <li><%= @dataset.name %></li>
4
+ </ol>
5
+
6
+ <section class="ck-page-header">
7
+ <div>
8
+ <h1 class="ck-title"><%= @dataset.name %></h1>
9
+ </div>
10
+ <div class="ck-actions">
11
+ <%= link_to "Edit", edit_dataset_path(@dataset), class: ck_button_classes(:light, variant: :outline) %>
12
+ </div>
13
+ </section>
14
+
15
+ <section>
16
+ <p class="ck-kicker">CSV preview</p>
17
+ <pre class="ck-code ck-code--dark"><%= @dataset.csv_data %></pre>
18
+ </section>
19
+
20
+ <% if @runs.any? %>
21
+ <section class="ck-card--spaced">
22
+ <p class="ck-kicker">Runs</p>
23
+
24
+ <table class="ck-results-table" style="margin-top: 0.5rem;">
25
+ <thead>
26
+ <tr>
27
+ <th>Run</th>
28
+ <th>Prompt</th>
29
+ <th>Responses</th>
30
+ <th></th>
31
+ </tr>
32
+ </thead>
33
+ <tbody>
34
+ <% @runs.each do |run| %>
35
+ <tr onclick="window.location='<%= run_path(run) %>'" style="cursor: pointer;">
36
+ <td><strong><%= run.name %></strong></td>
37
+ <td><%= link_to run.prompt.name, prompt_path(run.prompt), class: "ck-link" %></td>
38
+ <td><%= run.responses.size %></td>
39
+ <td class="ck-results-table__arrow">&rarr;</td>
40
+ </tr>
41
+ <% end %>
42
+ </tbody>
43
+ </table>
44
+ </section>
45
+ <% end %>