completion-kit 0.1.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +192 -0
  4. data/Rakefile +12 -0
  5. data/app/assets/config/completion_kit_manifest.js +1 -0
  6. data/app/assets/config/manifest.js +3 -0
  7. data/app/assets/images/completion_kit/logo.svg +6 -0
  8. data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +25 -0
  9. data/app/assets/stylesheets/completion_kit/application.css +2214 -0
  10. data/app/controllers/completion_kit/api/v1/base_controller.rb +29 -0
  11. data/app/controllers/completion_kit/api/v1/criteria_controller.rb +62 -0
  12. data/app/controllers/completion_kit/api/v1/datasets_controller.rb +51 -0
  13. data/app/controllers/completion_kit/api/v1/metrics_controller.rb +51 -0
  14. data/app/controllers/completion_kit/api/v1/prompts_controller.rb +64 -0
  15. data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +51 -0
  16. data/app/controllers/completion_kit/api/v1/responses_controller.rb +32 -0
  17. data/app/controllers/completion_kit/api/v1/runs_controller.rb +71 -0
  18. data/app/controllers/completion_kit/api_reference_controller.rb +9 -0
  19. data/app/controllers/completion_kit/application_controller.rb +31 -0
  20. data/app/controllers/completion_kit/criteria_controller.rb +67 -0
  21. data/app/controllers/completion_kit/datasets_controller.rb +53 -0
  22. data/app/controllers/completion_kit/mcp_controller.rb +57 -0
  23. data/app/controllers/completion_kit/metrics_controller.rb +52 -0
  24. data/app/controllers/completion_kit/prompts_controller.rb +69 -0
  25. data/app/controllers/completion_kit/provider_credentials_controller.rb +63 -0
  26. data/app/controllers/completion_kit/responses_controller.rb +44 -0
  27. data/app/controllers/completion_kit/runs_controller.rb +131 -0
  28. data/app/helpers/completion_kit/application_helper.rb +193 -0
  29. data/app/jobs/completion_kit/application_job.rb +4 -0
  30. data/app/jobs/completion_kit/generate_job.rb +12 -0
  31. data/app/jobs/completion_kit/judge_job.rb +12 -0
  32. data/app/jobs/completion_kit/model_discovery_job.rb +29 -0
  33. data/app/mailers/completion_kit/application_mailer.rb +6 -0
  34. data/app/models/completion_kit/application_record.rb +5 -0
  35. data/app/models/completion_kit/criteria.rb +22 -0
  36. data/app/models/completion_kit/criteria_membership.rb +20 -0
  37. data/app/models/completion_kit/dataset.rb +24 -0
  38. data/app/models/completion_kit/metric.rb +97 -0
  39. data/app/models/completion_kit/model.rb +13 -0
  40. data/app/models/completion_kit/prompt.rb +99 -0
  41. data/app/models/completion_kit/provider_credential.rb +114 -0
  42. data/app/models/completion_kit/response.rb +30 -0
  43. data/app/models/completion_kit/review.rb +28 -0
  44. data/app/models/completion_kit/run.rb +253 -0
  45. data/app/models/completion_kit/run_metric.rb +6 -0
  46. data/app/models/completion_kit/suggestion.rb +8 -0
  47. data/app/services/completion_kit/anthropic_client.rb +86 -0
  48. data/app/services/completion_kit/api_config.rb +80 -0
  49. data/app/services/completion_kit/csv_processor.rb +65 -0
  50. data/app/services/completion_kit/judge_service.rb +87 -0
  51. data/app/services/completion_kit/llm_client.rb +45 -0
  52. data/app/services/completion_kit/mcp_dispatcher.rb +53 -0
  53. data/app/services/completion_kit/mcp_tools/criteria.rb +106 -0
  54. data/app/services/completion_kit/mcp_tools/datasets.rb +90 -0
  55. data/app/services/completion_kit/mcp_tools/metrics.rb +98 -0
  56. data/app/services/completion_kit/mcp_tools/prompts.rb +112 -0
  57. data/app/services/completion_kit/mcp_tools/provider_credentials.rb +97 -0
  58. data/app/services/completion_kit/mcp_tools/responses.rb +45 -0
  59. data/app/services/completion_kit/mcp_tools/runs.rb +130 -0
  60. data/app/services/completion_kit/model_discovery_service.rb +223 -0
  61. data/app/services/completion_kit/ollama_client.rb +80 -0
  62. data/app/services/completion_kit/open_ai_client.rb +71 -0
  63. data/app/services/completion_kit/open_router_client.rb +69 -0
  64. data/app/services/completion_kit/prompt_improvement_service.rb +81 -0
  65. data/app/views/completion_kit/api_reference/_example.html.erb +6 -0
  66. data/app/views/completion_kit/api_reference/index.html.erb +308 -0
  67. data/app/views/completion_kit/criteria/_form.html.erb +46 -0
  68. data/app/views/completion_kit/criteria/edit.html.erb +14 -0
  69. data/app/views/completion_kit/criteria/index.html.erb +37 -0
  70. data/app/views/completion_kit/criteria/new.html.erb +13 -0
  71. data/app/views/completion_kit/criteria/show.html.erb +37 -0
  72. data/app/views/completion_kit/datasets/_form.html.erb +29 -0
  73. data/app/views/completion_kit/datasets/edit.html.erb +13 -0
  74. data/app/views/completion_kit/datasets/index.html.erb +38 -0
  75. data/app/views/completion_kit/datasets/new.html.erb +12 -0
  76. data/app/views/completion_kit/datasets/show.html.erb +45 -0
  77. data/app/views/completion_kit/metrics/_form.html.erb +72 -0
  78. data/app/views/completion_kit/metrics/edit.html.erb +13 -0
  79. data/app/views/completion_kit/metrics/index.html.erb +34 -0
  80. data/app/views/completion_kit/metrics/new.html.erb +12 -0
  81. data/app/views/completion_kit/metrics/show.html.erb +49 -0
  82. data/app/views/completion_kit/prompts/_form.html.erb +52 -0
  83. data/app/views/completion_kit/prompts/edit.html.erb +13 -0
  84. data/app/views/completion_kit/prompts/index.html.erb +46 -0
  85. data/app/views/completion_kit/prompts/new.html.erb +12 -0
  86. data/app/views/completion_kit/prompts/show.html.erb +156 -0
  87. data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +30 -0
  88. data/app/views/completion_kit/provider_credentials/_form.html.erb +71 -0
  89. data/app/views/completion_kit/provider_credentials/edit.html.erb +12 -0
  90. data/app/views/completion_kit/provider_credentials/index.html.erb +41 -0
  91. data/app/views/completion_kit/provider_credentials/new.html.erb +12 -0
  92. data/app/views/completion_kit/responses/show.html.erb +87 -0
  93. data/app/views/completion_kit/runs/_actions.html.erb +14 -0
  94. data/app/views/completion_kit/runs/_form.html.erb +159 -0
  95. data/app/views/completion_kit/runs/_progress.html.erb +18 -0
  96. data/app/views/completion_kit/runs/_response_row.html.erb +13 -0
  97. data/app/views/completion_kit/runs/_sort_toolbar.html.erb +8 -0
  98. data/app/views/completion_kit/runs/_status_header.html.erb +15 -0
  99. data/app/views/completion_kit/runs/edit.html.erb +14 -0
  100. data/app/views/completion_kit/runs/index.html.erb +43 -0
  101. data/app/views/completion_kit/runs/new.html.erb +12 -0
  102. data/app/views/completion_kit/runs/show.html.erb +79 -0
  103. data/app/views/completion_kit/runs/suggestion.html.erb +47 -0
  104. data/app/views/layouts/completion_kit/application.html.erb +77 -0
  105. data/config/routes.rb +55 -0
  106. data/db/migrate/20260311000001_create_completion_kit_tables.rb +87 -0
  107. data/db/migrate/20260326000001_rename_criteria_to_instruction_on_metrics_and_reviews.rb +6 -0
  108. data/db/migrate/20260327000001_add_progress_to_runs.rb +6 -0
  109. data/db/migrate/20260327100001_replace_criteria_with_direct_metrics_on_runs.rb +12 -0
  110. data/db/migrate/20260328000001_add_error_message_to_runs.rb +5 -0
  111. data/db/migrate/20260329000001_create_completion_kit_models.rb +20 -0
  112. data/db/migrate/20260401170001_add_discovery_columns_to_completion_kit_provider_credentials.rb +7 -0
  113. data/db/migrate/20260403000001_add_temperature_to_completion_kit_runs.rb +5 -0
  114. data/db/migrate/20260403000002_create_completion_kit_suggestions.rb +13 -0
  115. data/db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb +5 -0
  116. data/lib/completion-kit.rb +1 -0
  117. data/lib/completion_kit/engine.rb +35 -0
  118. data/lib/completion_kit/version.rb +3 -0
  119. data/lib/completion_kit.rb +55 -0
  120. data/lib/generators/completion_kit/install_generator.rb +21 -0
  121. data/lib/generators/completion_kit/templates/README +20 -0
  122. data/lib/generators/completion_kit/templates/initializer.rb +43 -0
  123. metadata +361 -0
@@ -0,0 +1,98 @@
1
+ module CompletionKit
2
+ module McpTools
3
+ module Metrics
4
+ TOOLS = {
5
+ "metrics_list" => {
6
+ description: "List all metrics",
7
+ inputSchema: {type: "object", properties: {}, required: []},
8
+ handler: :list
9
+ },
10
+ "metrics_get" => {
11
+ description: "Get a metric by ID",
12
+ inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
13
+ handler: :get
14
+ },
15
+ "metrics_create" => {
16
+ description: "Create a metric with evaluation criteria",
17
+ inputSchema: {
18
+ type: "object",
19
+ properties: {
20
+ name: {type: "string"}, instruction: {type: "string"},
21
+ evaluation_steps: {type: "array", items: {type: "string"}},
22
+ rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}}
23
+ },
24
+ required: ["name"]
25
+ },
26
+ handler: :create
27
+ },
28
+ "metrics_update" => {
29
+ description: "Update a metric",
30
+ inputSchema: {
31
+ type: "object",
32
+ properties: {
33
+ id: {type: "integer"}, name: {type: "string"}, instruction: {type: "string"},
34
+ evaluation_steps: {type: "array", items: {type: "string"}},
35
+ rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}}
36
+ },
37
+ required: ["id"]
38
+ },
39
+ handler: :update
40
+ },
41
+ "metrics_delete" => {
42
+ description: "Delete a metric",
43
+ inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
44
+ handler: :delete
45
+ }
46
+ }.freeze
47
+
48
+ def self.definitions
49
+ TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
50
+ end
51
+
52
+ def self.call(name, arguments)
53
+ tool = TOOLS.fetch(name)
54
+ send(tool[:handler], arguments)
55
+ end
56
+
57
+ def self.list(_args)
58
+ text_result(Metric.order(created_at: :desc).map(&:as_json))
59
+ end
60
+
61
+ def self.get(args)
62
+ text_result(Metric.find(args["id"]).as_json)
63
+ end
64
+
65
+ def self.create(args)
66
+ metric = Metric.new(args.slice("name", "instruction", "evaluation_steps", "rubric_bands"))
67
+ if metric.save
68
+ text_result(metric.as_json)
69
+ else
70
+ error_result(metric.errors.full_messages.join(", "))
71
+ end
72
+ end
73
+
74
+ def self.update(args)
75
+ metric = Metric.find(args["id"])
76
+ if metric.update(args.except("id").slice("name", "instruction", "evaluation_steps", "rubric_bands"))
77
+ text_result(metric.as_json)
78
+ else
79
+ error_result(metric.errors.full_messages.join(", "))
80
+ end
81
+ end
82
+
83
+ def self.delete(args)
84
+ Metric.find(args["id"]).destroy!
85
+ text_result("Metric #{args["id"]} deleted")
86
+ end
87
+
88
+ def self.text_result(data)
89
+ text = data.is_a?(String) ? data : data.to_json
90
+ {content: [{type: "text", text: text}]}
91
+ end
92
+
93
+ def self.error_result(message)
94
+ {content: [{type: "text", text: message}], isError: true}
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,112 @@
1
+ module CompletionKit
2
+ module McpTools
3
+ module Prompts
4
+ TOOLS = {
5
+ "prompts_list" => {
6
+ description: "List all prompts",
7
+ inputSchema: {type: "object", properties: {}, required: []},
8
+ handler: :list
9
+ },
10
+ "prompts_get" => {
11
+ description: "Get a prompt by ID",
12
+ inputSchema: {type: "object", properties: {id: {type: "integer", description: "Prompt ID"}}, required: ["id"]},
13
+ handler: :get
14
+ },
15
+ "prompts_create" => {
16
+ description: "Create a prompt",
17
+ inputSchema: {
18
+ type: "object",
19
+ properties: {
20
+ name: {type: "string"}, description: {type: "string"},
21
+ template: {type: "string"}, llm_model: {type: "string"}
22
+ },
23
+ required: ["name", "template", "llm_model"]
24
+ },
25
+ handler: :create
26
+ },
27
+ "prompts_update" => {
28
+ description: "Update a prompt",
29
+ inputSchema: {
30
+ type: "object",
31
+ properties: {
32
+ id: {type: "integer"}, name: {type: "string"}, description: {type: "string"},
33
+ template: {type: "string"}, llm_model: {type: "string"}
34
+ },
35
+ required: ["id"]
36
+ },
37
+ handler: :update
38
+ },
39
+ "prompts_delete" => {
40
+ description: "Delete a prompt",
41
+ inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
42
+ handler: :delete
43
+ },
44
+ "prompts_publish" => {
45
+ description: "Publish a prompt version, making it the current version",
46
+ inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
47
+ handler: :publish
48
+ },
49
+ }.freeze
50
+
51
+ def self.definitions
52
+ TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
53
+ end
54
+
55
+ def self.call(name, arguments)
56
+ tool = TOOLS.fetch(name)
57
+ send(tool[:handler], arguments)
58
+ end
59
+
60
+ def self.list(_args)
61
+ text_result(Prompt.order(created_at: :desc).map(&:as_json))
62
+ end
63
+
64
+ def self.get(args)
65
+ text_result(Prompt.find(args["id"]).as_json)
66
+ end
67
+
68
+ def self.create(args)
69
+ prompt = Prompt.new(args.slice("name", "description", "template", "llm_model"))
70
+ if prompt.save
71
+ text_result(prompt.as_json)
72
+ else
73
+ error_result(prompt.errors.full_messages.join(", "))
74
+ end
75
+ end
76
+
77
+ def self.update(args)
78
+ prompt = Prompt.find(args["id"])
79
+ attrs = args.except("id").slice("name", "description", "template", "llm_model")
80
+ if prompt.runs.exists?
81
+ new_prompt = prompt.clone_as_new_version(attrs)
82
+ new_prompt.publish!
83
+ text_result(new_prompt.as_json)
84
+ elsif prompt.update(attrs)
85
+ text_result(prompt.as_json)
86
+ else
87
+ error_result(prompt.errors.full_messages.join(", "))
88
+ end
89
+ end
90
+
91
+ def self.delete(args)
92
+ Prompt.find(args["id"]).destroy!
93
+ text_result("Prompt #{args["id"]} deleted")
94
+ end
95
+
96
+ def self.publish(args)
97
+ prompt = Prompt.find(args["id"])
98
+ prompt.publish!
99
+ text_result(prompt.reload.as_json)
100
+ end
101
+
102
+ def self.text_result(data)
103
+ text = data.is_a?(String) ? data : data.to_json
104
+ {content: [{type: "text", text: text}]}
105
+ end
106
+
107
+ def self.error_result(message)
108
+ {content: [{type: "text", text: message}], isError: true}
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,97 @@
1
+ module CompletionKit
2
+ module McpTools
3
+ module ProviderCredentials
4
+ TOOLS = {
5
+ "provider_credentials_list" => {
6
+ description: "List all provider credentials (API keys are not exposed)",
7
+ inputSchema: {type: "object", properties: {}, required: []},
8
+ handler: :list
9
+ },
10
+ "provider_credentials_get" => {
11
+ description: "Get a provider credential by ID (API key is not exposed)",
12
+ inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
13
+ handler: :get
14
+ },
15
+ "provider_credentials_create" => {
16
+ description: "Create a provider credential",
17
+ inputSchema: {
18
+ type: "object",
19
+ properties: {
20
+ provider: {type: "string", enum: ["openai", "anthropic", "ollama", "openrouter"]},
21
+ api_key: {type: "string"},
22
+ api_endpoint: {type: "string"}
23
+ },
24
+ required: ["provider", "api_key"]
25
+ },
26
+ handler: :create
27
+ },
28
+ "provider_credentials_update" => {
29
+ description: "Update a provider credential",
30
+ inputSchema: {
31
+ type: "object",
32
+ properties: {
33
+ id: {type: "integer"}, provider: {type: "string"},
34
+ api_key: {type: "string"}, api_endpoint: {type: "string"}
35
+ },
36
+ required: ["id"]
37
+ },
38
+ handler: :update
39
+ },
40
+ "provider_credentials_delete" => {
41
+ description: "Delete a provider credential",
42
+ inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
43
+ handler: :delete
44
+ }
45
+ }.freeze
46
+
47
+ def self.definitions
48
+ TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
49
+ end
50
+
51
+ def self.call(name, arguments)
52
+ tool = TOOLS.fetch(name)
53
+ send(tool[:handler], arguments)
54
+ end
55
+
56
+ def self.list(_args)
57
+ text_result(ProviderCredential.order(created_at: :desc).map(&:as_json))
58
+ end
59
+
60
+ def self.get(args)
61
+ text_result(ProviderCredential.find(args["id"]).as_json)
62
+ end
63
+
64
+ def self.create(args)
65
+ credential = ProviderCredential.new(args.slice("provider", "api_key", "api_endpoint"))
66
+ if credential.save
67
+ text_result(credential.as_json)
68
+ else
69
+ error_result(credential.errors.full_messages.join(", "))
70
+ end
71
+ end
72
+
73
+ def self.update(args)
74
+ credential = ProviderCredential.find(args["id"])
75
+ if credential.update(args.except("id").slice("provider", "api_key", "api_endpoint"))
76
+ text_result(credential.as_json)
77
+ else
78
+ error_result(credential.errors.full_messages.join(", "))
79
+ end
80
+ end
81
+
82
+ def self.delete(args)
83
+ ProviderCredential.find(args["id"]).destroy!
84
+ text_result("Provider credential #{args["id"]} deleted")
85
+ end
86
+
87
+ def self.text_result(data)
88
+ text = data.is_a?(String) ? data : data.to_json
89
+ {content: [{type: "text", text: text}]}
90
+ end
91
+
92
+ def self.error_result(message)
93
+ {content: [{type: "text", text: message}], isError: true}
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,45 @@
1
+ module CompletionKit
2
+ module McpTools
3
+ module Responses
4
+ TOOLS = {
5
+ "responses_list" => {
6
+ description: "List responses for a run",
7
+ inputSchema: {type: "object", properties: {run_id: {type: "integer"}}, required: ["run_id"]},
8
+ handler: :list
9
+ },
10
+ "responses_get" => {
11
+ description: "Get a specific response",
12
+ inputSchema: {
13
+ type: "object",
14
+ properties: {run_id: {type: "integer"}, id: {type: "integer"}},
15
+ required: ["run_id", "id"]
16
+ },
17
+ handler: :get
18
+ }
19
+ }.freeze
20
+
21
+ def self.definitions
22
+ TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
23
+ end
24
+
25
+ def self.call(name, arguments)
26
+ tool = TOOLS.fetch(name)
27
+ send(tool[:handler], arguments)
28
+ end
29
+
30
+ def self.list(args)
31
+ run = Run.find(args["run_id"])
32
+ text_result(run.responses.includes(:reviews).map(&:as_json))
33
+ end
34
+
35
+ def self.get(args)
36
+ run = Run.find(args["run_id"])
37
+ text_result(run.responses.find(args["id"]).as_json)
38
+ end
39
+
40
+ def self.text_result(data)
41
+ {content: [{type: "text", text: data.to_json}]}
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,130 @@
1
+ module CompletionKit
2
+ module McpTools
3
+ module Runs
4
+ TOOLS = {
5
+ "runs_list" => {
6
+ description: "List all runs",
7
+ inputSchema: {type: "object", properties: {}, required: []},
8
+ handler: :list
9
+ },
10
+ "runs_get" => {
11
+ description: "Get a run by ID",
12
+ inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
13
+ handler: :get
14
+ },
15
+ "runs_create" => {
16
+ description: "Create a run",
17
+ inputSchema: {
18
+ type: "object",
19
+ properties: {
20
+ name: {type: "string"}, prompt_id: {type: "integer"},
21
+ dataset_id: {type: "integer"}, judge_model: {type: "string"},
22
+ metric_ids: {type: "array", items: {type: "integer"}}
23
+ },
24
+ required: ["name", "prompt_id"]
25
+ },
26
+ handler: :create
27
+ },
28
+ "runs_update" => {
29
+ description: "Update a run",
30
+ inputSchema: {
31
+ type: "object",
32
+ properties: {
33
+ id: {type: "integer"}, name: {type: "string"},
34
+ dataset_id: {type: "integer"}, judge_model: {type: "string"},
35
+ metric_ids: {type: "array", items: {type: "integer"}}
36
+ },
37
+ required: ["id"]
38
+ },
39
+ handler: :update
40
+ },
41
+ "runs_delete" => {
42
+ description: "Delete a run",
43
+ inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
44
+ handler: :delete
45
+ },
46
+ "runs_generate" => {
47
+ description: "Generate responses for a run using its prompt and dataset",
48
+ inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
49
+ handler: :generate
50
+ },
51
+ "runs_judge" => {
52
+ description: "Judge responses for a run using configured metrics",
53
+ inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
54
+ handler: :judge
55
+ }
56
+ }.freeze
57
+
58
+ def self.definitions
59
+ TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
60
+ end
61
+
62
+ def self.call(name, arguments)
63
+ tool = TOOLS.fetch(name)
64
+ send(tool[:handler], arguments)
65
+ end
66
+
67
+ def self.list(_args)
68
+ text_result(Run.order(created_at: :desc).map(&:as_json))
69
+ end
70
+
71
+ def self.get(args)
72
+ text_result(Run.find(args["id"]).as_json)
73
+ end
74
+
75
+ def self.create(args)
76
+ run = Run.new(args.slice("name", "prompt_id", "dataset_id", "judge_model"))
77
+ if run.save
78
+ replace_run_metrics(run, args["metric_ids"])
79
+ text_result(run.reload.as_json)
80
+ else
81
+ error_result(run.errors.full_messages.join(", "))
82
+ end
83
+ end
84
+
85
+ def self.update(args)
86
+ run = Run.find(args["id"])
87
+ if run.update(args.except("id", "metric_ids").slice("name", "dataset_id", "judge_model"))
88
+ replace_run_metrics(run, args["metric_ids"]) if args.key?("metric_ids")
89
+ text_result(run.reload.as_json)
90
+ else
91
+ error_result(run.errors.full_messages.join(", "))
92
+ end
93
+ end
94
+
95
+ def self.delete(args)
96
+ Run.find(args["id"]).destroy!
97
+ text_result("Run #{args["id"]} deleted")
98
+ end
99
+
100
+ def self.generate(args)
101
+ run = Run.find(args["id"])
102
+ GenerateJob.perform_later(run.id)
103
+ text_result(run.reload.as_json)
104
+ end
105
+
106
+ def self.judge(args)
107
+ run = Run.find(args["id"])
108
+ JudgeJob.perform_later(run.id)
109
+ text_result(run.reload.as_json)
110
+ end
111
+
112
+ def self.text_result(data)
113
+ text = data.is_a?(String) ? data : data.to_json
114
+ {content: [{type: "text", text: text}]}
115
+ end
116
+
117
+ def self.error_result(message)
118
+ {content: [{type: "text", text: message}], isError: true}
119
+ end
120
+
121
+ def self.replace_run_metrics(run, metric_ids)
122
+ return unless metric_ids
123
+ run.run_metrics.delete_all
124
+ Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
125
+ run.run_metrics.create!(metric_id: metric_id, position: index + 1)
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end