completion-kit 0.1.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +192 -0
  4. data/Rakefile +12 -0
  5. data/app/assets/config/completion_kit_manifest.js +1 -0
  6. data/app/assets/config/manifest.js +3 -0
  7. data/app/assets/images/completion_kit/logo.svg +6 -0
  8. data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +25 -0
  9. data/app/assets/stylesheets/completion_kit/application.css +2214 -0
  10. data/app/controllers/completion_kit/api/v1/base_controller.rb +29 -0
  11. data/app/controllers/completion_kit/api/v1/criteria_controller.rb +62 -0
  12. data/app/controllers/completion_kit/api/v1/datasets_controller.rb +51 -0
  13. data/app/controllers/completion_kit/api/v1/metrics_controller.rb +51 -0
  14. data/app/controllers/completion_kit/api/v1/prompts_controller.rb +64 -0
  15. data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +51 -0
  16. data/app/controllers/completion_kit/api/v1/responses_controller.rb +32 -0
  17. data/app/controllers/completion_kit/api/v1/runs_controller.rb +71 -0
  18. data/app/controllers/completion_kit/api_reference_controller.rb +9 -0
  19. data/app/controllers/completion_kit/application_controller.rb +31 -0
  20. data/app/controllers/completion_kit/criteria_controller.rb +67 -0
  21. data/app/controllers/completion_kit/datasets_controller.rb +53 -0
  22. data/app/controllers/completion_kit/mcp_controller.rb +57 -0
  23. data/app/controllers/completion_kit/metrics_controller.rb +52 -0
  24. data/app/controllers/completion_kit/prompts_controller.rb +69 -0
  25. data/app/controllers/completion_kit/provider_credentials_controller.rb +63 -0
  26. data/app/controllers/completion_kit/responses_controller.rb +44 -0
  27. data/app/controllers/completion_kit/runs_controller.rb +131 -0
  28. data/app/helpers/completion_kit/application_helper.rb +193 -0
  29. data/app/jobs/completion_kit/application_job.rb +4 -0
  30. data/app/jobs/completion_kit/generate_job.rb +12 -0
  31. data/app/jobs/completion_kit/judge_job.rb +12 -0
  32. data/app/jobs/completion_kit/model_discovery_job.rb +29 -0
  33. data/app/mailers/completion_kit/application_mailer.rb +6 -0
  34. data/app/models/completion_kit/application_record.rb +5 -0
  35. data/app/models/completion_kit/criteria.rb +22 -0
  36. data/app/models/completion_kit/criteria_membership.rb +20 -0
  37. data/app/models/completion_kit/dataset.rb +24 -0
  38. data/app/models/completion_kit/metric.rb +97 -0
  39. data/app/models/completion_kit/model.rb +13 -0
  40. data/app/models/completion_kit/prompt.rb +99 -0
  41. data/app/models/completion_kit/provider_credential.rb +114 -0
  42. data/app/models/completion_kit/response.rb +30 -0
  43. data/app/models/completion_kit/review.rb +28 -0
  44. data/app/models/completion_kit/run.rb +253 -0
  45. data/app/models/completion_kit/run_metric.rb +6 -0
  46. data/app/models/completion_kit/suggestion.rb +8 -0
  47. data/app/services/completion_kit/anthropic_client.rb +86 -0
  48. data/app/services/completion_kit/api_config.rb +80 -0
  49. data/app/services/completion_kit/csv_processor.rb +65 -0
  50. data/app/services/completion_kit/judge_service.rb +87 -0
  51. data/app/services/completion_kit/llm_client.rb +45 -0
  52. data/app/services/completion_kit/mcp_dispatcher.rb +53 -0
  53. data/app/services/completion_kit/mcp_tools/criteria.rb +106 -0
  54. data/app/services/completion_kit/mcp_tools/datasets.rb +90 -0
  55. data/app/services/completion_kit/mcp_tools/metrics.rb +98 -0
  56. data/app/services/completion_kit/mcp_tools/prompts.rb +112 -0
  57. data/app/services/completion_kit/mcp_tools/provider_credentials.rb +97 -0
  58. data/app/services/completion_kit/mcp_tools/responses.rb +45 -0
  59. data/app/services/completion_kit/mcp_tools/runs.rb +130 -0
  60. data/app/services/completion_kit/model_discovery_service.rb +223 -0
  61. data/app/services/completion_kit/ollama_client.rb +80 -0
  62. data/app/services/completion_kit/open_ai_client.rb +71 -0
  63. data/app/services/completion_kit/open_router_client.rb +69 -0
  64. data/app/services/completion_kit/prompt_improvement_service.rb +81 -0
  65. data/app/views/completion_kit/api_reference/_example.html.erb +6 -0
  66. data/app/views/completion_kit/api_reference/index.html.erb +308 -0
  67. data/app/views/completion_kit/criteria/_form.html.erb +46 -0
  68. data/app/views/completion_kit/criteria/edit.html.erb +14 -0
  69. data/app/views/completion_kit/criteria/index.html.erb +37 -0
  70. data/app/views/completion_kit/criteria/new.html.erb +13 -0
  71. data/app/views/completion_kit/criteria/show.html.erb +37 -0
  72. data/app/views/completion_kit/datasets/_form.html.erb +29 -0
  73. data/app/views/completion_kit/datasets/edit.html.erb +13 -0
  74. data/app/views/completion_kit/datasets/index.html.erb +38 -0
  75. data/app/views/completion_kit/datasets/new.html.erb +12 -0
  76. data/app/views/completion_kit/datasets/show.html.erb +45 -0
  77. data/app/views/completion_kit/metrics/_form.html.erb +72 -0
  78. data/app/views/completion_kit/metrics/edit.html.erb +13 -0
  79. data/app/views/completion_kit/metrics/index.html.erb +34 -0
  80. data/app/views/completion_kit/metrics/new.html.erb +12 -0
  81. data/app/views/completion_kit/metrics/show.html.erb +49 -0
  82. data/app/views/completion_kit/prompts/_form.html.erb +52 -0
  83. data/app/views/completion_kit/prompts/edit.html.erb +13 -0
  84. data/app/views/completion_kit/prompts/index.html.erb +46 -0
  85. data/app/views/completion_kit/prompts/new.html.erb +12 -0
  86. data/app/views/completion_kit/prompts/show.html.erb +156 -0
  87. data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +30 -0
  88. data/app/views/completion_kit/provider_credentials/_form.html.erb +71 -0
  89. data/app/views/completion_kit/provider_credentials/edit.html.erb +12 -0
  90. data/app/views/completion_kit/provider_credentials/index.html.erb +41 -0
  91. data/app/views/completion_kit/provider_credentials/new.html.erb +12 -0
  92. data/app/views/completion_kit/responses/show.html.erb +87 -0
  93. data/app/views/completion_kit/runs/_actions.html.erb +14 -0
  94. data/app/views/completion_kit/runs/_form.html.erb +159 -0
  95. data/app/views/completion_kit/runs/_progress.html.erb +18 -0
  96. data/app/views/completion_kit/runs/_response_row.html.erb +13 -0
  97. data/app/views/completion_kit/runs/_sort_toolbar.html.erb +8 -0
  98. data/app/views/completion_kit/runs/_status_header.html.erb +15 -0
  99. data/app/views/completion_kit/runs/edit.html.erb +14 -0
  100. data/app/views/completion_kit/runs/index.html.erb +43 -0
  101. data/app/views/completion_kit/runs/new.html.erb +12 -0
  102. data/app/views/completion_kit/runs/show.html.erb +79 -0
  103. data/app/views/completion_kit/runs/suggestion.html.erb +47 -0
  104. data/app/views/layouts/completion_kit/application.html.erb +77 -0
  105. data/config/routes.rb +55 -0
  106. data/db/migrate/20260311000001_create_completion_kit_tables.rb +87 -0
  107. data/db/migrate/20260326000001_rename_criteria_to_instruction_on_metrics_and_reviews.rb +6 -0
  108. data/db/migrate/20260327000001_add_progress_to_runs.rb +6 -0
  109. data/db/migrate/20260327100001_replace_criteria_with_direct_metrics_on_runs.rb +12 -0
  110. data/db/migrate/20260328000001_add_error_message_to_runs.rb +5 -0
  111. data/db/migrate/20260329000001_create_completion_kit_models.rb +20 -0
  112. data/db/migrate/20260401170001_add_discovery_columns_to_completion_kit_provider_credentials.rb +7 -0
  113. data/db/migrate/20260403000001_add_temperature_to_completion_kit_runs.rb +5 -0
  114. data/db/migrate/20260403000002_create_completion_kit_suggestions.rb +13 -0
  115. data/db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb +5 -0
  116. data/lib/completion-kit.rb +1 -0
  117. data/lib/completion_kit/engine.rb +35 -0
  118. data/lib/completion_kit/version.rb +3 -0
  119. data/lib/completion_kit.rb +55 -0
  120. data/lib/generators/completion_kit/install_generator.rb +21 -0
  121. data/lib/generators/completion_kit/templates/README +20 -0
  122. data/lib/generators/completion_kit/templates/initializer.rb +43 -0
  123. metadata +361 -0
@@ -0,0 +1,87 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Prompts", prompts_path %></li>
3
+ <li><%= link_to @run.prompt.name, prompt_path(@run.prompt) %></li>
4
+ <li><%= link_to @run.name, run_path(@run) %></li>
5
+ <li>Response #<%= @response_number %></li>
6
+ </ol>
7
+
8
+ <section class="ck-page-header">
9
+ <div>
10
+ <div class="ck-inline">
11
+ <h1 class="ck-title">Response #<%= @response_number %></h1>
12
+ <% score = @response.score %>
13
+ <% if score %>
14
+ <span class="<%= ck_badge_classes(ck_score_kind(score)) %>"><%= score %></span>
15
+ <% end %>
16
+ </div>
17
+ <p class="ck-meta-copy">
18
+ <span class="ck-run-config__key">Prompt</span> <%= link_to @run.prompt.display_name, prompt_path(@run.prompt), class: "ck-link" %>
19
+ <% if @run.dataset %>
20
+ &ensp;&middot;&ensp;<span class="ck-run-config__key">Dataset</span> <%= link_to @run.dataset.name, dataset_path(@run.dataset), class: "ck-link" %>
21
+ <% end %>
22
+ </p>
23
+ </div>
24
+ <div class="ck-actions">
25
+ <% if @prev_response %>
26
+ <%= link_to "← Prev", run_response_path(@run, @prev_response, sort: @sort), class: ck_button_classes(:light, variant: :outline) %>
27
+ <% end %>
28
+ <% if @next_response %>
29
+ <%= link_to "Next →", run_response_path(@run, @next_response, sort: @sort), class: ck_button_classes(:light, variant: :outline) %>
30
+ <% end %>
31
+ </div>
32
+ </section>
33
+
34
+ <section>
35
+ <p class="ck-kicker">Input</p>
36
+ <pre class="ck-code ck-code--dark"><%= begin; JSON.pretty_generate(JSON.parse(@response.input_data)); rescue; @response.input_data; end %></pre>
37
+ </section>
38
+
39
+ <section class="ck-card--spaced">
40
+ <div class="ck-prompt-preview__header">
41
+ <p class="ck-kicker">Response</p>
42
+ <span class="ck-chip ck-chip--soft" style="text-transform: none;"><%= @run.prompt.llm_model %></span>
43
+ </div>
44
+ <pre class="ck-code"><%= @response.response_text %></pre>
45
+ </section>
46
+
47
+ <% if @response.expected_output.present? %>
48
+ <section class="ck-card--spaced">
49
+ <p class="ck-kicker">Expected</p>
50
+ <pre class="ck-code"><%= @response.expected_output %></pre>
51
+ </section>
52
+ <% end %>
53
+
54
+ <% if @reviews.any? %>
55
+ <section class="ck-card--spaced">
56
+ <div class="ck-prompt-preview__header">
57
+ <p class="ck-kicker">Review</p>
58
+ <% if @run.judge_model.present? %>
59
+ <span class="ck-chip ck-chip--soft" style="text-transform: none;"><%= @run.judge_model %></span>
60
+ <% end %>
61
+ </div>
62
+
63
+ <div class="ck-review-list">
64
+ <% @reviews.each do |review| %>
65
+ <div class="ck-review-card">
66
+ <div class="ck-review-card__header">
67
+ <span class="ck-review-card__metric"><% if review.metric %><%= link_to review.metric_name, metric_path(review.metric), class: "ck-link" %><% else %><%= review.metric_name %><% end %></span>
68
+ <div class="ck-inline">
69
+ <% if review.ai_score %>
70
+ <% 5.times do |i| %>
71
+ <svg viewBox="0 0 24 24" width="16" height="16" stroke-width="1.75" class="ck-star <%= i < review.ai_score.to_i ? "ck-star--filled" : "ck-star--empty" %>"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"/></svg>
72
+ <% end %>
73
+ <% else %>
74
+ <span class="<%= ck_badge_classes(:pending) %>">Pending</span>
75
+ <% end %>
76
+ </div>
77
+ </div>
78
+ <% if review.ai_feedback.present? %>
79
+ <div class="ck-review-card__feedback">
80
+ <div class="ck-note-box"><%= review.ai_feedback %></div>
81
+ </div>
82
+ <% end %>
83
+ </div>
84
+ <% end %>
85
+ </div>
86
+ </section>
87
+ <% end %>
@@ -0,0 +1,14 @@
1
+ <div class="ck-actions" id="run_actions">
2
+ <% running = run.status == "generating" || run.status == "judging" %>
3
+ <%= button_to run_path(run), method: :delete, form_class: "inline-block", class: "ck-icon-btn", title: "Delete run", "aria-label": "Delete run", disabled: running, data: { turbo_confirm: "Delete this run and all its responses?" } do %><%= heroicon_tag "trash", variant: :outline, size: 16, "aria-hidden": "true" %><% end %>
4
+ <% if running %>
5
+ <%= link_to "Edit", edit_run_path(run), class: ck_button_classes(:light, variant: :outline) + " disabled", "aria-disabled": "true", tabindex: "-1" %>
6
+ <% else %>
7
+ <%= link_to "Edit", edit_run_path(run), class: ck_button_classes(:light, variant: :outline) %>
8
+ <% if run.status == "pending" %>
9
+ <%= button_to "Start", generate_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
10
+ <% elsif run.status == "failed" %>
11
+ <%= button_to "Retry", generate_run_path(run), method: :post, class: ck_button_classes(:light, variant: :outline), form_class: "inline-block" %>
12
+ <% end %>
13
+ <% end %>
14
+ </div>
@@ -0,0 +1,159 @@
1
+ <%= form_with(model: run, local: true) do |form| %>
2
+ <% if run.errors.any? %>
3
+ <div class="ck-flash ck-flash--alert">
4
+ <p class="ck-flash__title"><%= pluralize(run.errors.count, "problem") %> prevented this run from being saved.</p>
5
+ <ul class="ck-error-list">
6
+ <% run.errors.full_messages.each do |message| %>
7
+ <li><%= message %></li>
8
+ <% end %>
9
+ </ul>
10
+ </div>
11
+ <% end %>
12
+
13
+ <div class="ck-card ck-form-card">
14
+ <div class="ck-field">
15
+ <%= form.label :name, "Name (auto-generated if blank)", class: "ck-label" %>
16
+ <%= form.text_field :name, class: "ck-input", placeholder: run.name.presence || "Auto-generated from prompt + version + timestamp" %>
17
+ </div>
18
+
19
+ <div class="ck-field">
20
+ <%= form.label :prompt_id, "Prompt", class: "ck-label" %>
21
+ <%= form.select :prompt_id, @prompts.map { |p| [p.display_name, p.id, { "data-has-variables" => p.variables.any? ? "1" : "0" }] }, { include_blank: "Select a prompt" }, { class: "ck-input", id: "run_prompt_id" } %>
22
+ </div>
23
+
24
+ <div class="ck-field" id="dataset-field">
25
+ <%= form.label :dataset_id, "Dataset", class: "ck-label" %>
26
+ <% if @datasets.empty? %>
27
+ <p class="ck-meta-copy">No datasets yet. <%= link_to "Create a dataset", new_dataset_path, class: "ck-link" %> first.</p>
28
+ <% else %>
29
+ <%= form.select :dataset_id, @datasets.map { |d| [d.name, d.id] }, { include_blank: "Select a dataset" }, { class: "ck-input", id: "run_dataset_id" } %>
30
+ <% end %>
31
+ <p class="ck-field-hint" id="dataset-hint" style="display: none; color: var(--ck-warning);">This prompt uses variables. Select a dataset to provide values.</p>
32
+ </div>
33
+
34
+ <div class="ck-field">
35
+ <label class="ck-label" for="run_temperature" style="position: relative;">
36
+ Temperature
37
+ <span class="ck-info-toggle">?</span>
38
+ <div class="ck-info-popup">
39
+ Controls how random the model's output is. Lower values make the model more focused and deterministic — it'll pick the most likely words. Higher values introduce more variety and creativity, but also more risk of odd phrasing. Most LLMs default to 1.0. For evaluation, try different values to see how your prompt performs under varying conditions.
40
+ </div>
41
+ </label>
42
+ <div class="ck-slider-row">
43
+ <%= form.range_field :temperature, min: 0, max: 1, step: 0.1, class: "ck-slider", id: "run_temperature", oninput: "document.getElementById('temp-value').textContent = this.value" %>
44
+ <span class="ck-slider-value" id="temp-value"><%= run.temperature || 1.0 %></span>
45
+ </div>
46
+ </div>
47
+
48
+ <div class="ck-field" id="judge-field">
49
+ <%= form.label :judge_model, "Judge model", class: "ck-label" %>
50
+ <% available = CompletionKit::ApiConfig.available_models(scope: :judging) %>
51
+ <% if available.any? %>
52
+ <div class="ck-select-with-action">
53
+ <%= form.select :judge_model, ck_grouped_models(available, run.judge_model), { include_blank: "None" }, { class: "ck-input", id: "run_judge_model" } %>
54
+ <button type="button" class="ck-icon-btn" title="Refresh models" onclick="ckRefreshModels()"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="16" height="16"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg></button>
55
+ </div>
56
+ <p class="ck-field-hint" id="judge-hint"></p>
57
+ <% CompletionKit::ProviderCredential.find_each do |pc| %>
58
+ <%= turbo_stream_from "completion_kit_provider_#{pc.id}" %>
59
+ <% end %>
60
+ <p class="ck-field-hint" id="refresh-status" style="min-height: 1.2em; margin-top: -0.25rem; font-size: 0.75rem;">&nbsp;</p>
61
+ <% else %>
62
+ <p class="ck-field-hint" style="color: var(--ck-warning);">No judge models available.&ensp;<%= link_to "Add a provider", provider_credentials_path, class: "ck-link" %></p>
63
+ <%= form.hidden_field :judge_model, value: nil, id: "run_judge_model" %>
64
+ <% end %>
65
+ </div>
66
+
67
+ <div class="ck-field" id="metrics-field">
68
+ <label class="ck-label">Metrics</label>
69
+ <p class="ck-field-hint" id="metrics-hint"></p>
70
+ <% if @all_metrics.empty? %>
71
+ <p class="ck-field-hint" style="color: var(--ck-warning);">No metrics yet.&ensp;<%= link_to "Create a metric", new_metric_path, class: "ck-link" %></p>
72
+ <% else %>
73
+ <% if @criterias.any? %>
74
+ <p class="ck-meta-copy" style="margin-bottom: 0.5rem;">
75
+ Quick add:&ensp;
76
+ <% @criterias.each do |c| %>
77
+ <span class="ck-chip" style="cursor: pointer;" onclick="ckQuickAddCriteria(<%= c.metric_ids.to_json %>)"><%= c.name %></span>&ensp;
78
+ <% end %>
79
+ </p>
80
+ <% end %>
81
+ <div class="ck-metric-checkboxes">
82
+ <% @all_metrics.each do |metric| %>
83
+ <label class="ck-checkbox-label">
84
+ <%= check_box_tag "run[metric_ids][]", metric.id, run.metric_ids.include?(metric.id), class: "ck-checkbox", id: "run_metric_#{metric.id}" %>
85
+ <span><%= metric.name %></span>
86
+ </label>
87
+ <% end %>
88
+ </div>
89
+ <% end %>
90
+ </div>
91
+
92
+ <script>
93
+ function updateRunForm() {
94
+ var promptEl = document.getElementById('run_prompt_id');
95
+ var judgeEl = document.getElementById('run_judge_model');
96
+ var prompt = promptEl ? promptEl.value : '';
97
+ var judge = judgeEl ? judgeEl.value : '';
98
+ var metrics = document.querySelectorAll('input[name="run[metric_ids][]"]:checked');
99
+ var judgeField = document.getElementById('judge-field');
100
+ var metricsField = document.getElementById('metrics-field');
101
+ var judgeHint = document.getElementById('judge-hint');
102
+ var metricsHint = document.getElementById('metrics-hint');
103
+ var submitBtn = document.getElementById('run-submit');
104
+
105
+ if (judgeField) judgeField.className = 'ck-field';
106
+ if (metricsField) metricsField.className = 'ck-field';
107
+ if (judgeHint) judgeHint.textContent = '';
108
+ if (metricsHint) metricsHint.textContent = '';
109
+
110
+ if (judge && metrics.length > 0) {
111
+ if (judgeHint) judgeHint.textContent = 'Responses will be generated then judged automatically.';
112
+ } else if (judge && metrics.length === 0) {
113
+ if (judgeField) judgeField.className = 'ck-field ck-field--info';
114
+ if (metricsField) metricsField.className = 'ck-field ck-field--info';
115
+ if (metricsHint) metricsHint.textContent = 'Select at least one metric or criteria to enable judging.';
116
+ } else if (!judge && metrics.length > 0) {
117
+ if (judgeField) judgeField.className = 'ck-field ck-field--info';
118
+ if (judgeHint) judgeHint.textContent = 'Select a judge model to enable judging.';
119
+ }
120
+
121
+ var datasetEl = document.getElementById('run_dataset_id');
122
+ var datasetHint = document.getElementById('dataset-hint');
123
+ var dataset = datasetEl ? datasetEl.value : '';
124
+ var selectedOption = promptEl ? promptEl.options[promptEl.selectedIndex] : null;
125
+ var hasVars = selectedOption && selectedOption.dataset.hasVariables === '1';
126
+ if (datasetHint) datasetHint.style.display = (hasVars && !dataset) ? '' : 'none';
127
+
128
+ var valid = prompt !== '';
129
+ if (judge && metrics.length === 0) valid = false;
130
+ if (!judge && metrics.length > 0) valid = false;
131
+ if (submitBtn) submitBtn.disabled = !valid;
132
+ }
133
+
134
+ function ckQuickAddCriteria(metricIds) {
135
+ metricIds.forEach(function(id) {
136
+ var cb = document.getElementById('run_metric_' + id);
137
+ if (cb) cb.checked = true;
138
+ });
139
+ updateRunForm();
140
+ }
141
+
142
+ var judgeEl = document.getElementById('run_judge_model');
143
+ var promptEl = document.getElementById('run_prompt_id');
144
+ var datasetEl = document.getElementById('run_dataset_id');
145
+ if (judgeEl) judgeEl.addEventListener('change', updateRunForm);
146
+ if (promptEl) promptEl.addEventListener('change', updateRunForm);
147
+ if (datasetEl) datasetEl.addEventListener('change', updateRunForm);
148
+ document.querySelectorAll('input[name="run[metric_ids][]"]').forEach(function(cb) {
149
+ cb.addEventListener('change', updateRunForm);
150
+ });
151
+ updateRunForm();
152
+ </script>
153
+
154
+ <div class="ck-actions">
155
+ <%= link_to "Cancel", run.persisted? ? run_path(run) : runs_path, class: ck_button_classes(:light, variant: :outline) %>
156
+ <%= form.submit(run.persisted? ? "Save run" : "Create run", class: ck_button_classes(:dark), id: "run-submit") %>
157
+ </div>
158
+ </div>
159
+ <% end %>
@@ -0,0 +1,18 @@
1
+ <div id="run_progress">
2
+ <% if run.status == "generating" || run.status == "judging" %>
3
+ <div class="ck-discovery-bar">
4
+ <div class="ck-discovery-bar__label">
5
+ <%= ck_run_status_label(run) %>
6
+ </div>
7
+ <% if run.progress_total.to_i > 0 %>
8
+ <div class="ck-discovery-bar__track">
9
+ <div class="ck-discovery-bar__fill" style="width: <%= (run.progress_current.to_f / run.progress_total * 100).round %>%"></div>
10
+ </div>
11
+ <% else %>
12
+ <div class="ck-discovery-bar__track">
13
+ <div class="ck-discovery-bar__fill ck-discovery-bar__fill--indeterminate"></div>
14
+ </div>
15
+ <% end %>
16
+ </div>
17
+ <% end %>
18
+ </div>
@@ -0,0 +1,13 @@
1
+ <%= link_to run_response_path(run, response, sort: params[:sort]), class: "ck-response-row", id: "response_#{response.id}" do %>
2
+ <span class="ck-response-row__index">#<%= index %></span>
3
+ <span class="ck-response-row__text"><%= truncate(response.response_text.to_s, length: 160) %></span>
4
+ <span class="ck-response-row__score">
5
+ <% if response.reviewed? %>
6
+ <span class="ck-score"><span class="ck-score__star">★</span> <%= response.score %></span>
7
+ <% elsif run.status == "failed" %>
8
+ <span class="ck-chip">Failed</span>
9
+ <% elsif run.status == "judging" %>
10
+ <span class="ck-chip">Judging</span>
11
+ <% end %>
12
+ </span>
13
+ <% end %>
@@ -0,0 +1,8 @@
1
+ <div id="run_sort_toolbar">
2
+ <% if run.status == "completed" && run.judge_configured? && run.responses.joins(:reviews).exists? %>
3
+ <div class="ck-toolbar" style="margin-top: 1.5rem;">
4
+ <%= link_to "Best first", run_path(run, sort: "score_desc"), class: params[:sort].blank? || params[:sort] == "score_desc" ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) %>
5
+ <%= link_to "Worst first", run_path(run, sort: "score_asc"), class: params[:sort] == "score_asc" ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) %>
6
+ </div>
7
+ <% end %>
8
+ </div>
@@ -0,0 +1,15 @@
1
+ <div id="run_status_header">
2
+ <% if run.status == "failed" %>
3
+ <div class="ck-flash ck-flash--alert">
4
+ <%= run.error_message.presence || "Run failed." %>
5
+ </div>
6
+ <% end %>
7
+ <section class="ck-page-header">
8
+ <div>
9
+ <p class="ck-kicker"><span class="<%= ck_run_dot(run) %>"></span> <%= ck_run_status_label(run) %></p>
10
+ <h1 class="ck-title"><%= run.name %></h1>
11
+ <p class="ck-meta-copy"><%= link_to run.prompt.display_name, prompt_path(run.prompt), class: "ck-link" %>&ensp;<span class="ck-chip" style="text-transform: none;"><%= run.prompt.llm_model %></span></p>
12
+ </div>
13
+ <%= render "completion_kit/runs/actions", run: run %>
14
+ </section>
15
+ </div>
@@ -0,0 +1,14 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Prompts", prompts_path %></li>
3
+ <li><%= link_to @run.prompt.name, prompt_path(@run.prompt) %></li>
4
+ <li><%= link_to @run.name, run_path(@run) %></li>
5
+ <li>Edit</li>
6
+ </ol>
7
+
8
+ <section class="ck-page-header">
9
+ <div>
10
+ <h1 class="ck-title">Edit run</h1>
11
+ </div>
12
+ </section>
13
+
14
+ <%= render "form", run: @run %>
@@ -0,0 +1,43 @@
1
+ <section class="ck-page-header">
2
+ <div>
3
+ <h1 class="ck-title">Runs</h1>
4
+ <p class="ck-lead">Execute a prompt against a dataset and optionally score the responses with a judge model. Each run captures the full set of inputs, outputs, and evaluations.</p>
5
+ </div>
6
+ <div class="ck-actions">
7
+ <%= link_to "New run", new_run_path, class: ck_button_classes(:dark) %>
8
+ </div>
9
+ </section>
10
+
11
+ <% if @runs.any? %>
12
+ <table class="ck-results-table">
13
+ <thead>
14
+ <tr>
15
+ <th>Run</th>
16
+ <th>Prompt</th>
17
+ <th>Responses</th>
18
+ <th>Avg score</th>
19
+ <th></th>
20
+ </tr>
21
+ </thead>
22
+ <tbody>
23
+ <% @runs.each do |run| %>
24
+ <tr onclick="window.location='<%= run_path(run) %>'" style="cursor: pointer;">
25
+ <td><span class="ck-run-name"><span class="<%= ck_run_dot(run) %>"></span><strong><%= run.name %></strong></span></td>
26
+ <td><%= link_to run.prompt.name, prompt_path(run.prompt), class: "ck-link" %> &ensp;<span class="ck-chip ck-chip--soft">v<%= run.prompt.version_number %></span></td>
27
+ <td><%= run.responses.size %></td>
28
+ <td>
29
+ <% avg = run.avg_score %>
30
+ <% if avg %>
31
+ <span class="<%= ck_badge_classes(ck_score_kind(avg)) %>"><%= avg %></span>
32
+ <% else %>
33
+ &mdash;
34
+ <% end %>
35
+ </td>
36
+ <td class="ck-results-table__arrow">&rarr;</td>
37
+ </tr>
38
+ <% end %>
39
+ </tbody>
40
+ </table>
41
+ <% else %>
42
+ <div class="ck-empty">No runs yet.&ensp;<%= link_to "Create your first run →", new_run_path, class: "ck-link" %></div>
43
+ <% end %>
@@ -0,0 +1,12 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Runs", runs_path %></li>
3
+ <li>New run</li>
4
+ </ol>
5
+
6
+ <section class="ck-page-header">
7
+ <div>
8
+ <h1 class="ck-title">New run</h1>
9
+ </div>
10
+ </section>
11
+
12
+ <%= render "form", run: @run %>
@@ -0,0 +1,79 @@
1
+ <%= turbo_stream_from "completion_kit_run_#{@run.id}" %>
2
+
3
+ <ol class="ck-breadcrumb">
4
+ <li><%= link_to "Runs", runs_path %></li>
5
+ <li><%= @run.name %></li>
6
+ </ol>
7
+
8
+ <%= render "status_header", run: @run %>
9
+
10
+ <div class="ck-run-config">
11
+ <div class="ck-run-config__row">
12
+ <span class="ck-run-config__key">Created</span>
13
+ <time datetime="<%= @run.created_at.iso8601 %>" data-local-time><%= @run.created_at.utc.strftime("%Y-%m-%d %H:%M UTC") %></time>
14
+ </div>
15
+ <div class="ck-run-config__row">
16
+ <span class="ck-run-config__key">Dataset</span>
17
+ <% if @run.dataset %>
18
+ <%= link_to @run.dataset.name, dataset_path(@run.dataset), class: "ck-link" %>
19
+ <% else %>
20
+ <span class="ck-run-config__none">None</span>
21
+ <% end %>
22
+ </div>
23
+ <% if @run.judge_model.present? %>
24
+ <div class="ck-run-config__row">
25
+ <span class="ck-run-config__key">Judge</span>
26
+ <span style="text-transform: none;"><%= @run.judge_model %></span>
27
+ <% unless @run.judge_configured? %>
28
+ <span class="ck-run-config__warn">provider not configured</span>
29
+ <% end %>
30
+ </div>
31
+ <% end %>
32
+ <% if @run.metrics.any? %>
33
+ <div class="ck-run-config__row">
34
+ <span class="ck-run-config__key">Metrics</span>
35
+ <span><%= @run.metrics.map { |m| link_to(m.name, metric_path(m), class: "ck-link") }.join(", ").html_safe %></span>
36
+ </div>
37
+ <% end %>
38
+ <div class="ck-run-config__row">
39
+ <span class="ck-run-config__key">Temperature</span>
40
+ <span><%= @run.temperature %></span>
41
+ </div>
42
+ </div>
43
+
44
+ <div class="ck-prompt-preview">
45
+ <div class="ck-prompt-preview__header">
46
+ <p class="ck-kicker">Prompt</p>
47
+ <% if @run.suggestions.any? %>
48
+ <%= link_to "View suggestion", suggestion_run_path(@run), class: ck_button_classes(:light, variant: :outline) + " ck-button--sm" %>
49
+ <% elsif @run.status == "completed" && @run.responses.joins(:reviews).exists? %>
50
+ <%= button_to "Suggest improvements", suggest_run_path(@run), method: :post, class: ck_button_classes(:light, variant: :outline) + " ck-button--sm", form_class: "inline-block" %>
51
+ <% end %>
52
+ </div>
53
+ <p class="ck-prompt-preview__text" id="prompt_text"><%= @run.prompt.template %></p>
54
+ <% if @run.prompt.template.length > 200 %>
55
+ <button type="button" class="ck-link" id="prompt_toggle" aria-expanded="false" aria-controls="prompt_text" onclick="var t=document.getElementById('prompt_text');var l=this;var expanded=t.classList.toggle('ck-prompt-preview__text--expanded');l.textContent=expanded?'Show less':'Show more';l.setAttribute('aria-expanded',expanded?'true':'false')">Show more</button>
56
+ <% end %>
57
+ </div>
58
+
59
+ <% if @run.dataset %>
60
+ <details class="ck-details">
61
+ <summary class="ck-details__summary">Dataset preview</summary>
62
+ <pre class="ck-code ck-code--dark"><%= @run.dataset.csv_data.to_s.lines.first(10).join %><% if @run.dataset.csv_data.to_s.lines.count > 10 %>...<% end %></pre>
63
+ </details>
64
+ <% end %>
65
+
66
+ <%= render "progress", run: @run %>
67
+
68
+ <% valid_responses = @responses.reject { |r| r.response_text&.start_with?("Error:") } %>
69
+
70
+ <%= render "sort_toolbar", run: @run %>
71
+
72
+ <% if valid_responses.any? %>
73
+ <p class="ck-kicker" style="margin-top: 1.5rem;">Responses</p>
74
+ <% end %>
75
+ <div id="run_responses">
76
+ <% valid_responses.each_with_index do |response, idx| %>
77
+ <%= render "response_row", run: @run, response: response, index: idx + 1 %>
78
+ <% end %>
79
+ </div>
@@ -0,0 +1,47 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Runs", runs_path %></li>
3
+ <li><%= link_to @run.name, run_path(@run) %></li>
4
+ <li>Suggestion</li>
5
+ </ol>
6
+
7
+ <section class="ck-page-header">
8
+ <div>
9
+ <p class="ck-kicker"><span class="ck-dot ck-dot--running" style="animation: none; background: var(--ck-accent);"></span> Prompt improvement</p>
10
+ <h1 class="ck-title"><%= @run.prompt.name %></h1>
11
+ <p class="ck-meta-copy">Based on <%= @run.responses.count %> responses scored <%= @run.avg_score %>/5</p>
12
+ </div>
13
+ <div class="ck-actions">
14
+ <%= link_to "Back to run", run_path(@run), class: ck_button_classes(:light, variant: :outline) %>
15
+ <% unless @suggestion.applied_at? %>
16
+ <%= button_to "Apply suggestion", apply_suggestion_run_path(@run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
17
+ <% else %>
18
+ <span class="ck-chip" style="background: var(--ck-success-soft); color: var(--ck-success);">Applied</span>
19
+ <% end %>
20
+ </div>
21
+ </section>
22
+
23
+ <div class="ck-suggest-reasoning">
24
+ <p class="ck-kicker">Why these changes</p>
25
+ <div class="ck-suggest-reasoning__body"><%= simple_format(@suggestion.reasoning) %></div>
26
+ </div>
27
+
28
+ <div class="ck-suggest-diff">
29
+ <div class="ck-suggest-diff__pane">
30
+ <div class="ck-suggest-diff__header">
31
+ <span class="ck-suggest-diff__label ck-suggest-diff__label--before">Current prompt</span>
32
+ <span class="ck-suggest-diff__version"><%= @run.prompt.version_label %></span>
33
+ </div>
34
+ <pre class="ck-suggest-diff__code"><%= ck_word_diff_old(@suggestion.original_template, @suggestion.suggested_template) %></pre>
35
+ </div>
36
+ <div class="ck-suggest-diff__pane">
37
+ <div class="ck-suggest-diff__header">
38
+ <span class="ck-suggest-diff__label ck-suggest-diff__label--after">Suggested prompt</span>
39
+ </div>
40
+ <pre class="ck-suggest-diff__code"><%= ck_word_diff_new(@suggestion.original_template, @suggestion.suggested_template) %></pre>
41
+ </div>
42
+ </div>
43
+
44
+ <div class="ck-suggest-full">
45
+ <p class="ck-kicker">Full suggested prompt</p>
46
+ <pre class="ck-code ck-code--dark"><%= @suggestion.suggested_template %></pre>
47
+ </div>
@@ -0,0 +1,77 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <title>CompletionKit</title>
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <%= csrf_meta_tags %>
7
+ <%= csp_meta_tag %>
8
+
9
+ <%= favicon_link_tag "completion_kit/logo.svg", type: "image/svg+xml" %>
10
+ <%= stylesheet_link_tag "completion_kit/application", media: "all" %>
11
+ <%= javascript_include_tag "turbo", type: "module" %>
12
+ <%= action_cable_meta_tag %>
13
+ </head>
14
+ <body class="ck-app">
15
+ <header class="ck-topbar">
16
+ <div class="ck-wrap ck-topbar__inner">
17
+ <%= link_to (main_app.respond_to?(:root_path) ? main_app.root_path : prompts_path), class: "ck-brand" do %><%= image_tag "completion_kit/logo.svg", alt: "CompletionKit", style: "height: 52px; width: auto; margin-bottom: 10px;" %> CompletionKit<% end %>
18
+
19
+ <nav class="ck-nav">
20
+ <% active = ->(path) { request.path.start_with?(path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) } %>
21
+ <%= link_to "Prompts", prompts_path, class: active.(prompts_path) %>
22
+ <%= link_to "Metrics", metrics_path, class: request.path.start_with?(metrics_path) || request.path.start_with?(criteria_path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) %>
23
+ <%= link_to "Datasets", datasets_path, class: active.(datasets_path) %>
24
+ <%= link_to "Runs", runs_path, class: active.(runs_path) %>
25
+ <%= link_to "Settings", provider_credentials_path, class: active.(provider_credentials_path) %>
26
+ <%= link_to "API", api_reference_path, class: active.(api_reference_path) %>
27
+ </nav>
28
+ </div>
29
+ </header>
30
+
31
+ <main class="ck-main">
32
+ <div class="ck-wrap">
33
+ <% flash.each do |type, message| %>
34
+ <div class="ck-flash <%= type.to_s == "notice" ? "ck-flash--notice" : "ck-flash--alert" %>" role="<%= type.to_s == "notice" ? "status" : "alert" %>">
35
+ <%= message %>
36
+ </div>
37
+ <% end %>
38
+
39
+ <%= yield %>
40
+ </div>
41
+ </main>
42
+ <script>
43
+ document.addEventListener("turbo:load", function() {
44
+ document.querySelectorAll("[data-local-time]").forEach(function(el) {
45
+ var d = new Date(el.getAttribute("datetime"));
46
+ el.textContent = d.toLocaleString(undefined, {year:"numeric",month:"short",day:"numeric",hour:"2-digit",minute:"2-digit"});
47
+ });
48
+ });
49
+
50
+ var ckRefreshing = false;
51
+ function ckRefreshModels() {
52
+ if (ckRefreshing) return;
53
+ ckRefreshing = true;
54
+ var btn = document.querySelector('.ck-icon-btn[title="Refresh models"]');
55
+ if (btn) btn.classList.add('ck-icon-btn--spinning');
56
+ var status = document.getElementById('refresh-status');
57
+ if (status) status.textContent = 'Refreshing models\u2026';
58
+ var csrfToken = document.querySelector('meta[name="csrf-token"]').getAttribute("content");
59
+ fetch("/completion_kit/refresh_models", {
60
+ method: "POST",
61
+ headers: { "X-CSRF-Token": csrfToken }
62
+ });
63
+ }
64
+
65
+ document.addEventListener("turbo:before-stream-render", function(event) {
66
+ var target = event.target.getAttribute("target");
67
+ if (target === "prompt_llm_model" || target === "run_judge_model") {
68
+ ckRefreshing = false;
69
+ var btn = document.querySelector('.ck-icon-btn[title="Refresh models"]');
70
+ if (btn) btn.classList.remove('ck-icon-btn--spinning');
71
+ var status = document.getElementById('refresh-status');
72
+ if (status) { status.textContent = 'Models updated.'; setTimeout(function() { status.textContent = '\u00a0'; }, 3000); }
73
+ }
74
+ });
75
+ </script>
76
+ </body>
77
+ </html>