completion-kit 0.1.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +192 -0
- data/Rakefile +12 -0
- data/app/assets/config/completion_kit_manifest.js +1 -0
- data/app/assets/config/manifest.js +3 -0
- data/app/assets/images/completion_kit/logo.svg +6 -0
- data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +25 -0
- data/app/assets/stylesheets/completion_kit/application.css +2214 -0
- data/app/controllers/completion_kit/api/v1/base_controller.rb +29 -0
- data/app/controllers/completion_kit/api/v1/criteria_controller.rb +62 -0
- data/app/controllers/completion_kit/api/v1/datasets_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/prompts_controller.rb +64 -0
- data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/responses_controller.rb +32 -0
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +71 -0
- data/app/controllers/completion_kit/api_reference_controller.rb +9 -0
- data/app/controllers/completion_kit/application_controller.rb +31 -0
- data/app/controllers/completion_kit/criteria_controller.rb +67 -0
- data/app/controllers/completion_kit/datasets_controller.rb +53 -0
- data/app/controllers/completion_kit/mcp_controller.rb +57 -0
- data/app/controllers/completion_kit/metrics_controller.rb +52 -0
- data/app/controllers/completion_kit/prompts_controller.rb +69 -0
- data/app/controllers/completion_kit/provider_credentials_controller.rb +63 -0
- data/app/controllers/completion_kit/responses_controller.rb +44 -0
- data/app/controllers/completion_kit/runs_controller.rb +131 -0
- data/app/helpers/completion_kit/application_helper.rb +193 -0
- data/app/jobs/completion_kit/application_job.rb +4 -0
- data/app/jobs/completion_kit/generate_job.rb +12 -0
- data/app/jobs/completion_kit/judge_job.rb +12 -0
- data/app/jobs/completion_kit/model_discovery_job.rb +29 -0
- data/app/mailers/completion_kit/application_mailer.rb +6 -0
- data/app/models/completion_kit/application_record.rb +5 -0
- data/app/models/completion_kit/criteria.rb +22 -0
- data/app/models/completion_kit/criteria_membership.rb +20 -0
- data/app/models/completion_kit/dataset.rb +24 -0
- data/app/models/completion_kit/metric.rb +97 -0
- data/app/models/completion_kit/model.rb +13 -0
- data/app/models/completion_kit/prompt.rb +99 -0
- data/app/models/completion_kit/provider_credential.rb +114 -0
- data/app/models/completion_kit/response.rb +30 -0
- data/app/models/completion_kit/review.rb +28 -0
- data/app/models/completion_kit/run.rb +253 -0
- data/app/models/completion_kit/run_metric.rb +6 -0
- data/app/models/completion_kit/suggestion.rb +8 -0
- data/app/services/completion_kit/anthropic_client.rb +86 -0
- data/app/services/completion_kit/api_config.rb +80 -0
- data/app/services/completion_kit/csv_processor.rb +65 -0
- data/app/services/completion_kit/judge_service.rb +87 -0
- data/app/services/completion_kit/llm_client.rb +45 -0
- data/app/services/completion_kit/mcp_dispatcher.rb +53 -0
- data/app/services/completion_kit/mcp_tools/criteria.rb +106 -0
- data/app/services/completion_kit/mcp_tools/datasets.rb +90 -0
- data/app/services/completion_kit/mcp_tools/metrics.rb +98 -0
- data/app/services/completion_kit/mcp_tools/prompts.rb +112 -0
- data/app/services/completion_kit/mcp_tools/provider_credentials.rb +97 -0
- data/app/services/completion_kit/mcp_tools/responses.rb +45 -0
- data/app/services/completion_kit/mcp_tools/runs.rb +130 -0
- data/app/services/completion_kit/model_discovery_service.rb +223 -0
- data/app/services/completion_kit/ollama_client.rb +80 -0
- data/app/services/completion_kit/open_ai_client.rb +71 -0
- data/app/services/completion_kit/open_router_client.rb +69 -0
- data/app/services/completion_kit/prompt_improvement_service.rb +81 -0
- data/app/views/completion_kit/api_reference/_example.html.erb +6 -0
- data/app/views/completion_kit/api_reference/index.html.erb +308 -0
- data/app/views/completion_kit/criteria/_form.html.erb +46 -0
- data/app/views/completion_kit/criteria/edit.html.erb +14 -0
- data/app/views/completion_kit/criteria/index.html.erb +37 -0
- data/app/views/completion_kit/criteria/new.html.erb +13 -0
- data/app/views/completion_kit/criteria/show.html.erb +37 -0
- data/app/views/completion_kit/datasets/_form.html.erb +29 -0
- data/app/views/completion_kit/datasets/edit.html.erb +13 -0
- data/app/views/completion_kit/datasets/index.html.erb +38 -0
- data/app/views/completion_kit/datasets/new.html.erb +12 -0
- data/app/views/completion_kit/datasets/show.html.erb +45 -0
- data/app/views/completion_kit/metrics/_form.html.erb +72 -0
- data/app/views/completion_kit/metrics/edit.html.erb +13 -0
- data/app/views/completion_kit/metrics/index.html.erb +34 -0
- data/app/views/completion_kit/metrics/new.html.erb +12 -0
- data/app/views/completion_kit/metrics/show.html.erb +49 -0
- data/app/views/completion_kit/prompts/_form.html.erb +52 -0
- data/app/views/completion_kit/prompts/edit.html.erb +13 -0
- data/app/views/completion_kit/prompts/index.html.erb +46 -0
- data/app/views/completion_kit/prompts/new.html.erb +12 -0
- data/app/views/completion_kit/prompts/show.html.erb +156 -0
- data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +30 -0
- data/app/views/completion_kit/provider_credentials/_form.html.erb +71 -0
- data/app/views/completion_kit/provider_credentials/edit.html.erb +12 -0
- data/app/views/completion_kit/provider_credentials/index.html.erb +41 -0
- data/app/views/completion_kit/provider_credentials/new.html.erb +12 -0
- data/app/views/completion_kit/responses/show.html.erb +87 -0
- data/app/views/completion_kit/runs/_actions.html.erb +14 -0
- data/app/views/completion_kit/runs/_form.html.erb +159 -0
- data/app/views/completion_kit/runs/_progress.html.erb +18 -0
- data/app/views/completion_kit/runs/_response_row.html.erb +13 -0
- data/app/views/completion_kit/runs/_sort_toolbar.html.erb +8 -0
- data/app/views/completion_kit/runs/_status_header.html.erb +15 -0
- data/app/views/completion_kit/runs/edit.html.erb +14 -0
- data/app/views/completion_kit/runs/index.html.erb +43 -0
- data/app/views/completion_kit/runs/new.html.erb +12 -0
- data/app/views/completion_kit/runs/show.html.erb +79 -0
- data/app/views/completion_kit/runs/suggestion.html.erb +47 -0
- data/app/views/layouts/completion_kit/application.html.erb +77 -0
- data/config/routes.rb +55 -0
- data/db/migrate/20260311000001_create_completion_kit_tables.rb +87 -0
- data/db/migrate/20260326000001_rename_criteria_to_instruction_on_metrics_and_reviews.rb +6 -0
- data/db/migrate/20260327000001_add_progress_to_runs.rb +6 -0
- data/db/migrate/20260327100001_replace_criteria_with_direct_metrics_on_runs.rb +12 -0
- data/db/migrate/20260328000001_add_error_message_to_runs.rb +5 -0
- data/db/migrate/20260329000001_create_completion_kit_models.rb +20 -0
- data/db/migrate/20260401170001_add_discovery_columns_to_completion_kit_provider_credentials.rb +7 -0
- data/db/migrate/20260403000001_add_temperature_to_completion_kit_runs.rb +5 -0
- data/db/migrate/20260403000002_create_completion_kit_suggestions.rb +13 -0
- data/db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb +5 -0
- data/lib/completion-kit.rb +1 -0
- data/lib/completion_kit/engine.rb +35 -0
- data/lib/completion_kit/version.rb +3 -0
- data/lib/completion_kit.rb +55 -0
- data/lib/generators/completion_kit/install_generator.rb +21 -0
- data/lib/generators/completion_kit/templates/README +20 -0
- data/lib/generators/completion_kit/templates/initializer.rb +43 -0
- metadata +361 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class PromptsController < ApplicationController
|
|
3
|
+
before_action :set_prompt, only: [:show, :edit, :update, :destroy, :publish]
|
|
4
|
+
|
|
5
|
+
def index
|
|
6
|
+
@prompts = Prompt.current_versions.includes(:runs).order(created_at: :desc)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def show
|
|
10
|
+
@runs = Run.where(prompt_id: @prompt.family_versions.select(:id))
|
|
11
|
+
.includes(:prompt, :dataset, responses: :reviews)
|
|
12
|
+
.order(created_at: :desc)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def new
|
|
16
|
+
@prompt = Prompt.new
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def edit
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def create
|
|
23
|
+
@prompt = Prompt.new(prompt_params)
|
|
24
|
+
|
|
25
|
+
if @prompt.save
|
|
26
|
+
redirect_to prompt_path(@prompt), notice: "Prompt version was successfully created."
|
|
27
|
+
else
|
|
28
|
+
render :new, status: :unprocessable_entity
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def update
|
|
33
|
+
if @prompt.runs.exists?
|
|
34
|
+
new_prompt = @prompt.clone_as_new_version(prompt_params.to_h)
|
|
35
|
+
new_prompt.publish!
|
|
36
|
+
redirect_to prompt_path(new_prompt), notice: "Saved as #{new_prompt.version_label}."
|
|
37
|
+
elsif @prompt.update(prompt_params)
|
|
38
|
+
redirect_to prompt_path(@prompt), notice: "Prompt saved."
|
|
39
|
+
else
|
|
40
|
+
render :edit, status: :unprocessable_entity
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def destroy
|
|
45
|
+
@prompt.destroy
|
|
46
|
+
redirect_to prompts_path, notice: "Prompt version was successfully destroyed."
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def publish
|
|
50
|
+
@prompt.publish!
|
|
51
|
+
redirect_to prompt_path(@prompt), notice: "#{@prompt.display_name} is now the current version."
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def set_prompt
|
|
57
|
+
@prompt = Prompt.find(params[:id])
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def prompt_params
|
|
61
|
+
params.require(:prompt).permit(
|
|
62
|
+
:name,
|
|
63
|
+
:description,
|
|
64
|
+
:template,
|
|
65
|
+
:llm_model
|
|
66
|
+
)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class ProviderCredentialsController < ApplicationController
|
|
3
|
+
before_action :set_provider_credential, only: [:edit, :update, :refresh]
|
|
4
|
+
|
|
5
|
+
def index
|
|
6
|
+
@provider_credentials = ProviderCredential.order(:provider)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def new
|
|
10
|
+
@provider_credential = ProviderCredential.new(provider: params[:provider])
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def create
|
|
14
|
+
@provider_credential = ProviderCredential.new(provider_credential_params)
|
|
15
|
+
|
|
16
|
+
if @provider_credential.save
|
|
17
|
+
redirect_to provider_credentials_path, notice: "Provider credential was successfully created."
|
|
18
|
+
else
|
|
19
|
+
render :new, status: :unprocessable_entity
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def edit
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def update
|
|
27
|
+
if @provider_credential.update(provider_credential_params)
|
|
28
|
+
redirect_to provider_credentials_path, notice: "Provider credential was successfully updated."
|
|
29
|
+
else
|
|
30
|
+
render :edit, status: :unprocessable_entity
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def refresh
|
|
35
|
+
@provider_credential.update_columns(discovery_status: "discovering", discovery_current: 0, discovery_total: 0)
|
|
36
|
+
@provider_credential.reload
|
|
37
|
+
@provider_credential.broadcast_discovery_progress
|
|
38
|
+
ModelDiscoveryJob.perform_later(@provider_credential.id)
|
|
39
|
+
head :ok
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def refresh_all
|
|
43
|
+
ProviderCredential.find_each do |cred|
|
|
44
|
+
cred.update_columns(discovery_status: "discovering", discovery_current: 0, discovery_total: 0)
|
|
45
|
+
cred.reload
|
|
46
|
+
cred.broadcast_discovery_progress
|
|
47
|
+
ModelDiscoveryJob.perform_later(cred.id)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
head :ok
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def set_provider_credential
|
|
56
|
+
@provider_credential = ProviderCredential.find(params[:id])
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def provider_credential_params
|
|
60
|
+
params.require(:provider_credential).permit(:provider, :api_key, :api_endpoint)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class ResponsesController < ApplicationController
|
|
3
|
+
before_action :set_run
|
|
4
|
+
before_action :set_response
|
|
5
|
+
|
|
6
|
+
def show
|
|
7
|
+
@sort = params[:sort]
|
|
8
|
+
ordered_ids = ordered_response_ids
|
|
9
|
+
current_index = ordered_ids.index(@response.id)
|
|
10
|
+
@response_number = current_index + 1
|
|
11
|
+
@reviews = @response.reviews.includes(:metric)
|
|
12
|
+
@prev_response = current_index > 0 ? ordered_ids[current_index - 1] : nil
|
|
13
|
+
@next_response = ordered_ids[current_index + 1]
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
def set_run
|
|
19
|
+
@run = Run.find(params[:run_id])
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def set_response
|
|
23
|
+
@response = @run.responses.find(params[:id])
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def ordered_response_ids
|
|
27
|
+
if @run.judge_configured? && @sort == "score_asc"
|
|
28
|
+
@run.responses
|
|
29
|
+
.left_joins(:reviews)
|
|
30
|
+
.group("completion_kit_responses.id")
|
|
31
|
+
.order(Arel.sql("AVG(completion_kit_reviews.ai_score) ASC NULLS LAST"))
|
|
32
|
+
.pluck(:id)
|
|
33
|
+
elsif @run.judge_configured? && @sort != "none"
|
|
34
|
+
@run.responses
|
|
35
|
+
.left_joins(:reviews)
|
|
36
|
+
.group("completion_kit_responses.id")
|
|
37
|
+
.order(Arel.sql("AVG(completion_kit_reviews.ai_score) DESC NULLS LAST"))
|
|
38
|
+
.pluck(:id)
|
|
39
|
+
else
|
|
40
|
+
@run.responses.order(:id).pluck(:id)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class RunsController < ApplicationController
|
|
3
|
+
before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :judge, :suggest, :suggestion, :apply_suggestion]
|
|
4
|
+
before_action :load_form_collections, only: [:new, :edit, :create, :update]
|
|
5
|
+
|
|
6
|
+
def index
|
|
7
|
+
@runs = Run.includes(:prompt, :dataset, responses: :reviews).order(created_at: :desc)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def show
|
|
11
|
+
@responses = if @run.judge_configured? && params[:sort] == "score_asc"
|
|
12
|
+
@run.responses
|
|
13
|
+
.left_joins(:reviews)
|
|
14
|
+
.includes(:reviews)
|
|
15
|
+
.group("completion_kit_responses.id")
|
|
16
|
+
.order(Arel.sql("AVG(completion_kit_reviews.ai_score) ASC NULLS LAST"))
|
|
17
|
+
elsif @run.judge_configured?
|
|
18
|
+
@run.responses
|
|
19
|
+
.left_joins(:reviews)
|
|
20
|
+
.includes(:reviews)
|
|
21
|
+
.group("completion_kit_responses.id")
|
|
22
|
+
.order(Arel.sql("AVG(completion_kit_reviews.ai_score) DESC NULLS LAST"))
|
|
23
|
+
else
|
|
24
|
+
@run.responses.includes(:reviews).order(:id)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def new
|
|
29
|
+
@run = Run.new(prompt_id: params[:prompt_id])
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def edit
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def create
|
|
36
|
+
@run = Run.new(run_params.except(:metric_ids))
|
|
37
|
+
if @run.save
|
|
38
|
+
replace_run_metrics(@run, params[:run][:metric_ids])
|
|
39
|
+
redirect_to run_path(@run), notice: "Run was successfully created."
|
|
40
|
+
else
|
|
41
|
+
load_form_collections
|
|
42
|
+
render :new, status: :unprocessable_entity
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def update
|
|
47
|
+
if @run.responses.any?
|
|
48
|
+
new_run = Run.create!(run_params.except(:metric_ids).to_h.merge(status: "pending"))
|
|
49
|
+
replace_run_metrics(new_run, params[:run][:metric_ids]) if params[:run].key?(:metric_ids)
|
|
50
|
+
redirect_to run_path(new_run), notice: "Saved as a new run. The previous run and its results are preserved."
|
|
51
|
+
elsif @run.update(run_params.except(:metric_ids))
|
|
52
|
+
replace_run_metrics(@run, params[:run][:metric_ids]) if params[:run].key?(:metric_ids)
|
|
53
|
+
redirect_to run_path(@run), notice: "Run saved."
|
|
54
|
+
else
|
|
55
|
+
load_form_collections
|
|
56
|
+
render :edit, status: :unprocessable_entity
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def destroy
|
|
61
|
+
@run.destroy
|
|
62
|
+
redirect_to runs_path, notice: "Run was successfully destroyed."
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def generate
|
|
66
|
+
@run.update!(status: "generating", progress_current: 0, progress_total: 0, error_message: nil)
|
|
67
|
+
GenerateJob.perform_later(@run.id)
|
|
68
|
+
redirect_to run_path(@run)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def judge
|
|
72
|
+
if params[:run]
|
|
73
|
+
@run.update(judge_model: params[:run][:judge_model])
|
|
74
|
+
end
|
|
75
|
+
JudgeJob.perform_later(@run.id)
|
|
76
|
+
redirect_to run_path(@run)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def suggest
|
|
80
|
+
service = PromptImprovementService.new(@run)
|
|
81
|
+
result = service.suggest
|
|
82
|
+
@run.suggestions.create!(
|
|
83
|
+
prompt: @run.prompt,
|
|
84
|
+
reasoning: result["reasoning"],
|
|
85
|
+
suggested_template: result["suggested_template"],
|
|
86
|
+
original_template: result["original_template"]
|
|
87
|
+
)
|
|
88
|
+
redirect_to suggestion_run_path(@run)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def suggestion
|
|
92
|
+
@suggestion = @run.suggestions.order(created_at: :desc).first
|
|
93
|
+
return redirect_to run_path(@run), alert: "No suggestion available. Generate one first." unless @suggestion
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def apply_suggestion
|
|
97
|
+
suggestion = @run.suggestions.order(created_at: :desc).first
|
|
98
|
+
return redirect_to run_path(@run), alert: "No suggestion to apply." unless suggestion
|
|
99
|
+
|
|
100
|
+
new_prompt = @run.prompt.clone_as_new_version(template: suggestion.suggested_template)
|
|
101
|
+
new_prompt.publish!
|
|
102
|
+
suggestion.update!(applied_at: Time.current)
|
|
103
|
+
redirect_to prompt_path(new_prompt), notice: "Suggestion applied."
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
|
|
108
|
+
def set_run
|
|
109
|
+
@run = Run.find(params[:id])
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def load_form_collections
|
|
113
|
+
@prompts = Prompt.order(:name)
|
|
114
|
+
@datasets = Dataset.order(:name)
|
|
115
|
+
@criterias = Criteria.includes(:metrics).order(:name)
|
|
116
|
+
@all_metrics = Metric.order(:name)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def run_params
|
|
120
|
+
params.require(:run).permit(:name, :prompt_id, :dataset_id, :judge_model, :temperature, metric_ids: [])
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def replace_run_metrics(run, metric_ids)
|
|
124
|
+
return unless metric_ids
|
|
125
|
+
run.run_metrics.delete_all
|
|
126
|
+
Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
|
|
127
|
+
run.run_metrics.create!(metric_id: metric_id, position: index + 1)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module ApplicationHelper
|
|
3
|
+
def ck_button_classes(tone = :dark, variant: :solid)
|
|
4
|
+
base = "ck-button"
|
|
5
|
+
|
|
6
|
+
styles = case [tone, variant]
|
|
7
|
+
when [:dark, :solid]
|
|
8
|
+
"ck-button--primary"
|
|
9
|
+
when [:light, :outline]
|
|
10
|
+
"ck-button--secondary"
|
|
11
|
+
when [:green, :solid]
|
|
12
|
+
"ck-button--success"
|
|
13
|
+
when [:red, :outline]
|
|
14
|
+
"ck-button--danger"
|
|
15
|
+
when [:amber, :outline]
|
|
16
|
+
"ck-button--warning"
|
|
17
|
+
when [:blue, :outline]
|
|
18
|
+
"ck-button--info"
|
|
19
|
+
else
|
|
20
|
+
"ck-button--primary"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
"#{base} #{styles}"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def ck_badge_classes(kind)
|
|
27
|
+
case kind.to_s
|
|
28
|
+
when "high"
|
|
29
|
+
"ck-badge ck-badge--high"
|
|
30
|
+
when "medium"
|
|
31
|
+
"ck-badge ck-badge--medium"
|
|
32
|
+
when "low"
|
|
33
|
+
"ck-badge ck-badge--low"
|
|
34
|
+
when "pending"
|
|
35
|
+
"ck-badge ck-badge--pending"
|
|
36
|
+
when "running"
|
|
37
|
+
"ck-badge ck-badge--running"
|
|
38
|
+
when "generating", "judging"
|
|
39
|
+
"ck-badge ck-badge--running"
|
|
40
|
+
when "completed"
|
|
41
|
+
"ck-badge ck-badge--high"
|
|
42
|
+
when "failed"
|
|
43
|
+
"ck-badge ck-badge--low"
|
|
44
|
+
else
|
|
45
|
+
"ck-badge ck-badge--pending"
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def ck_run_dot(run)
|
|
50
|
+
case run.status
|
|
51
|
+
when "generating", "judging" then "ck-dot ck-dot--running"
|
|
52
|
+
when "failed" then "ck-dot ck-dot--failed"
|
|
53
|
+
when "completed" then "ck-dot ck-dot--completed"
|
|
54
|
+
else "ck-dot ck-dot--pending"
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def ck_run_status_label(run)
|
|
59
|
+
case run.status
|
|
60
|
+
when "pending" then "Ready to run"
|
|
61
|
+
when "generating"
|
|
62
|
+
if run.progress_total.to_i > 0
|
|
63
|
+
"Generating responses (#{run.progress_current}/#{run.progress_total})"
|
|
64
|
+
else
|
|
65
|
+
"Generating responses…"
|
|
66
|
+
end
|
|
67
|
+
when "judging"
|
|
68
|
+
if run.progress_total.to_i > 0
|
|
69
|
+
"Judging (#{run.progress_current}/#{run.progress_total} evaluations)"
|
|
70
|
+
else
|
|
71
|
+
"Judging…"
|
|
72
|
+
end
|
|
73
|
+
when "completed" then "Completed"
|
|
74
|
+
when "failed" then "Failed"
|
|
75
|
+
else run.status.capitalize
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
PROVIDER_LABELS = {
|
|
80
|
+
"openai" => "OpenAI",
|
|
81
|
+
"anthropic" => "Anthropic",
|
|
82
|
+
"ollama" => "Ollama / local endpoint",
|
|
83
|
+
"openrouter" => "OpenRouter"
|
|
84
|
+
}.freeze
|
|
85
|
+
|
|
86
|
+
def ck_provider_label(provider)
|
|
87
|
+
PROVIDER_LABELS[provider.to_s] || provider.to_s.titleize
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def ck_grouped_models(models, selected = nil)
|
|
91
|
+
if selected.present? && models.none? { |m| m[:id] == selected }
|
|
92
|
+
retired = CompletionKit::Model.find_by(model_id: selected)
|
|
93
|
+
if retired
|
|
94
|
+
models = models + [{ id: retired.model_id, name: "#{retired.display_name || retired.model_id} (retired)", provider: retired.provider }]
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
groups = models.group_by do |m|
|
|
99
|
+
if m[:provider] == "openrouter"
|
|
100
|
+
upstream = m[:id].to_s.split("/", 2).first
|
|
101
|
+
"OpenRouter — #{upstream}"
|
|
102
|
+
else
|
|
103
|
+
ck_provider_label(m[:provider])
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
ordered_keys = groups.keys.sort_by { |label| [label.start_with?("OpenRouter") ? 1 : 0, label] }
|
|
108
|
+
grouped = ordered_keys.map { |label| [label, groups[label].map { |m| [m[:name], m[:id]] }] }
|
|
109
|
+
grouped_options_for_select(grouped, selected)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def ck_model_options_html(scope)
|
|
113
|
+
models = CompletionKit::ApiConfig.available_models(scope: scope)
|
|
114
|
+
return "" if models.empty?
|
|
115
|
+
ck_grouped_models(models)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def ck_score_kind(score)
|
|
119
|
+
return :pending if score.nil?
|
|
120
|
+
return :high if score >= CompletionKit.config.high_quality_threshold
|
|
121
|
+
return :medium if score >= CompletionKit.config.medium_quality_threshold
|
|
122
|
+
|
|
123
|
+
:low
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def ck_word_diff_old(old_text, new_text)
|
|
127
|
+
diff_tokens(old_text, new_text, :old)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def ck_word_diff_new(old_text, new_text)
|
|
131
|
+
diff_tokens(old_text, new_text, :new)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
private
|
|
135
|
+
|
|
136
|
+
def diff_tokens(old_text, new_text, side)
|
|
137
|
+
old_words = tokenize_for_diff(old_text)
|
|
138
|
+
new_words = tokenize_for_diff(new_text)
|
|
139
|
+
lcs = lcs_table(old_words, new_words)
|
|
140
|
+
result = []
|
|
141
|
+
i = old_words.length
|
|
142
|
+
j = new_words.length
|
|
143
|
+
|
|
144
|
+
changes = []
|
|
145
|
+
while i > 0 || j > 0
|
|
146
|
+
if i > 0 && j > 0 && old_words[i - 1] == new_words[j - 1]
|
|
147
|
+
changes.unshift([:equal, old_words[i - 1]])
|
|
148
|
+
i -= 1
|
|
149
|
+
j -= 1
|
|
150
|
+
elsif j > 0 && (i == 0 || lcs[i][j - 1] >= lcs[i - 1][j])
|
|
151
|
+
changes.unshift([:add, new_words[j - 1]])
|
|
152
|
+
j -= 1
|
|
153
|
+
else
|
|
154
|
+
changes.unshift([:remove, old_words[i - 1]])
|
|
155
|
+
i -= 1
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
changes.each do |type, token|
|
|
160
|
+
escaped = ERB::Util.html_escape(token)
|
|
161
|
+
if type == :equal
|
|
162
|
+
result << escaped
|
|
163
|
+
elsif type == :remove && side == :old
|
|
164
|
+
result << content_tag(:span, escaped, class: "ck-diff-del")
|
|
165
|
+
elsif type == :add && side == :new
|
|
166
|
+
result << content_tag(:span, escaped, class: "ck-diff-ins")
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
result.join.html_safe
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def tokenize_for_diff(text)
|
|
174
|
+
text.to_s.scan(/\S+|\n| +/)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def lcs_table(a, b)
|
|
178
|
+
m = a.length
|
|
179
|
+
n = b.length
|
|
180
|
+
table = Array.new(m + 1) { Array.new(n + 1, 0) }
|
|
181
|
+
(1..m).each do |i|
|
|
182
|
+
(1..n).each do |j|
|
|
183
|
+
table[i][j] = if a[i - 1] == b[j - 1]
|
|
184
|
+
table[i - 1][j - 1] + 1
|
|
185
|
+
else
|
|
186
|
+
[table[i - 1][j], table[i][j - 1]].max
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
table
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class ModelDiscoveryJob < ApplicationJob
|
|
3
|
+
queue_as :default
|
|
4
|
+
|
|
5
|
+
def perform(provider_credential_id)
|
|
6
|
+
credential = ProviderCredential.find_by(id: provider_credential_id)
|
|
7
|
+
return unless credential
|
|
8
|
+
|
|
9
|
+
credential.update_columns(discovery_status: "discovering", discovery_current: 0, discovery_total: 0)
|
|
10
|
+
credential.reload
|
|
11
|
+
credential.broadcast_discovery_progress
|
|
12
|
+
|
|
13
|
+
service = ModelDiscoveryService.new(config: credential.config_hash)
|
|
14
|
+
service.refresh! do |current, total|
|
|
15
|
+
credential.update_columns(discovery_current: current, discovery_total: total)
|
|
16
|
+
credential.reload
|
|
17
|
+
credential.broadcast_discovery_progress
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
credential.update_columns(discovery_status: "completed", updated_at: Time.current)
|
|
21
|
+
credential.reload
|
|
22
|
+
credential.broadcast_discovery_complete
|
|
23
|
+
rescue StandardError
|
|
24
|
+
credential.update_columns(discovery_status: "failed")
|
|
25
|
+
credential.reload
|
|
26
|
+
credential.broadcast_discovery_progress
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class Criteria < ApplicationRecord
|
|
3
|
+
self.table_name = "completion_kit_criteria"
|
|
4
|
+
|
|
5
|
+
has_many :criteria_memberships, -> { order(:position, :id) }, dependent: :destroy
|
|
6
|
+
has_many :metrics, through: :criteria_memberships
|
|
7
|
+
|
|
8
|
+
validates :name, presence: true
|
|
9
|
+
|
|
10
|
+
def ordered_metrics
|
|
11
|
+
criteria_memberships.includes(:metric).map(&:metric).compact
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def as_json(options = {})
|
|
15
|
+
{
|
|
16
|
+
id: id, name: name, description: description,
|
|
17
|
+
created_at: created_at, updated_at: updated_at,
|
|
18
|
+
metric_ids: metric_ids
|
|
19
|
+
}
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class CriteriaMembership < ApplicationRecord
|
|
3
|
+
self.table_name = "completion_kit_criteria_memberships"
|
|
4
|
+
|
|
5
|
+
belongs_to :criteria, class_name: "CompletionKit::Criteria", foreign_key: "criteria_id"
|
|
6
|
+
belongs_to :metric
|
|
7
|
+
|
|
8
|
+
validates :metric_id, uniqueness: { scope: :criteria_id }
|
|
9
|
+
|
|
10
|
+
before_validation :set_default_position
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def set_default_position
|
|
15
|
+
return if position.present? || criteria.blank?
|
|
16
|
+
|
|
17
|
+
self.position = criteria.criteria_memberships.maximum(:position).to_i + 1
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class Dataset < ApplicationRecord
|
|
3
|
+
has_many :runs, dependent: :restrict_with_error
|
|
4
|
+
|
|
5
|
+
validates :name, presence: true
|
|
6
|
+
validates :csv_data, presence: true
|
|
7
|
+
|
|
8
|
+
def as_json(options = {})
|
|
9
|
+
{
|
|
10
|
+
id: id, name: name, csv_data: csv_data,
|
|
11
|
+
created_at: created_at, updated_at: updated_at
|
|
12
|
+
}
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def row_count
|
|
16
|
+
return 0 if csv_data.blank?
|
|
17
|
+
|
|
18
|
+
require "csv"
|
|
19
|
+
::CSV.parse(csv_data, headers: true).length
|
|
20
|
+
rescue ::CSV::MalformedCSVError
|
|
21
|
+
0
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|