tuningengines-cli 0.3.5 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +103 -1
- package/dist/cli.js +3 -1
- package/dist/cli.js.map +1 -1
- package/dist/client.d.ts +78 -1
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +111 -2
- package/dist/client.js.map +1 -1
- package/dist/commands/catalog.d.ts +4 -0
- package/dist/commands/catalog.d.ts.map +1 -0
- package/dist/commands/catalog.js +164 -0
- package/dist/commands/catalog.js.map +1 -0
- package/dist/commands/jobs.d.ts.map +1 -1
- package/dist/commands/jobs.js +34 -1
- package/dist/commands/jobs.js.map +1 -1
- package/dist/commands/models.d.ts.map +1 -1
- package/dist/commands/models.js +2 -1
- package/dist/commands/models.js.map +1 -1
- package/dist/mcp.d.ts.map +1 -1
- package/dist/mcp.js +472 -32
- package/dist/mcp.js.map +1 -1
- package/package.json +2 -2
package/dist/mcp.js
CHANGED
|
@@ -7,13 +7,22 @@ const types_js_1 = require("@modelcontextprotocol/sdk/types.js");
|
|
|
7
7
|
const client_1 = require("./client");
|
|
8
8
|
const config_1 = require("./config");
|
|
9
9
|
async function startMcpServer() {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
const
|
|
10
|
+
// Lazy client initialization — deferred until a tool is called.
|
|
11
|
+
// This allows the server to start and list tools without a valid API key,
|
|
12
|
+
// which is required for Glama inspection and tool detection.
|
|
13
|
+
let _client = null;
|
|
14
|
+
const getClient = () => {
|
|
15
|
+
if (!_client) {
|
|
16
|
+
_client = new client_1.TuningEnginesClient({
|
|
17
|
+
apiKey: (0, config_1.getApiKey)(),
|
|
18
|
+
apiUrl: (0, config_1.getApiUrl)(),
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
return _client;
|
|
22
|
+
};
|
|
23
|
+
const server = new index_js_1.Server({ name: "tuning-engines", version: "0.3.6" }, {
|
|
15
24
|
capabilities: { tools: {} },
|
|
16
|
-
instructions: "Tuning Engines — Domain-specific fine-tuning of open-source LLMs and SLMs. Own your sovereign model with zero infrastructure.\n\n" +
|
|
25
|
+
instructions: "Tuning Engines — Domain-specific fine-tuning of open-source LLMs and SLMs, plus a Marketplace of pre-built models and datasets. Own your sovereign model with zero infrastructure.\n\n" +
|
|
17
26
|
"USE THIS SERVER WHEN the user wants to:\n" +
|
|
18
27
|
"- Fine-tune, train, or customize any open-source LLM/SLM on their data\n" +
|
|
19
28
|
"- Build a sovereign AI model trained on their organization's code, documents, or domain data\n" +
|
|
@@ -21,11 +30,17 @@ async function startMcpServer() {
|
|
|
21
30
|
"- Train using LoRA, QLoRA, or full fine-tuning methods\n" +
|
|
22
31
|
"- Estimate the cost of fine-tuning a model\n" +
|
|
23
32
|
"- Check training job status, manage trained models, or check billing\n" +
|
|
24
|
-
"- Export or import models to/from S3\n
|
|
33
|
+
"- Export or import models to/from S3\n" +
|
|
34
|
+
"- Browse and export pre-built models and datasets from the Marketplace\n\n" +
|
|
35
|
+
"MARKETPLACE:\n" +
|
|
36
|
+
"Pre-built, ready-to-use fine-tuned models and datasets curated by the platform. " +
|
|
37
|
+
"Browse the catalog, view details, and export directly to your S3 bucket. " +
|
|
38
|
+
"Credits are charged per export based on the item's price.\n" +
|
|
39
|
+
"Workflow: list_catalog_models → get_catalog_model → export_catalog_model → catalog_export_status\n\n" +
|
|
25
40
|
"SPECIALIZED TUNING AGENTS (more coming):\n" +
|
|
26
41
|
"- Cody (code_repo): Code autocomplete and inline suggestions via QLoRA/Axolotl\n" +
|
|
27
42
|
"- SIERA (sera_code_repo): Bug-fix and error resolution via AllenAI Open Coding Agents\n\n" +
|
|
28
|
-
"TYPICAL WORKFLOW: estimate_job → create_job → job_status (poll until done) → list_models\n\n" +
|
|
43
|
+
"TYPICAL TRAINING WORKFLOW: estimate_job → create_job → job_status (poll until done) → list_models\n\n" +
|
|
29
44
|
"Supports 1B to 72B parameter models from Qwen, Llama, DeepSeek, Mistral, Gemma, Phi, StarCoder, and CodeLlama families.\n" +
|
|
30
45
|
"Zero infrastructure — GPU provisioning, training orchestration, and model delivery fully managed.",
|
|
31
46
|
});
|
|
@@ -75,9 +90,10 @@ async function startMcpServer() {
|
|
|
75
90
|
"Supports quality_tier='low' (faster) or quality_tier='high' (deeper analysis, more training data).\n\n" +
|
|
76
91
|
"SUPPORTED BASE MODELS (by size):\n" +
|
|
77
92
|
"- 3B: Qwen/Qwen2.5-Coder-3B-Instruct\n" +
|
|
78
|
-
"-
|
|
79
|
-
"- 13-15B: codellama/CodeLlama-13b-Instruct-hf, bigcode/starcoder2-15b, Qwen/Qwen2.5-Coder-14B-Instruct\n" +
|
|
80
|
-
"-
|
|
93
|
+
"- 7-8B: codellama/CodeLlama-7b-hf, deepseek-ai/deepseek-coder-7b-instruct-v1.5, Qwen/Qwen2.5-Coder-7B-Instruct, Qwen/Qwen3-8B\n" +
|
|
94
|
+
"- 13-15B: codellama/CodeLlama-13b-Instruct-hf, bigcode/starcoder2-15b, Qwen/Qwen2.5-Coder-14B-Instruct, Qwen/Qwen3-14B\n" +
|
|
95
|
+
"- 22-27B: mistralai/Codestral-22B-v0.1, google/gemma-2-27b\n" +
|
|
96
|
+
"- 30-34B: deepseek-ai/deepseek-coder-33b-instruct, codellama/CodeLlama-34b-Instruct-hf, Qwen/Qwen2.5-Coder-32B-Instruct, Qwen/Qwen3-Coder-30B-A3B, Qwen/Qwen3-32B\n" +
|
|
81
97
|
"- 70-72B: codellama/CodeLlama-70b-Instruct-hf, meta-llama/Llama-3.1-70B-Instruct, Qwen/Qwen2.5-72B-Instruct\n\n" +
|
|
82
98
|
"TYPICAL WORKFLOW: estimate_job first to check cost, then create_job, then job_status to monitor progress.",
|
|
83
99
|
inputSchema: {
|
|
@@ -167,7 +183,10 @@ async function startMcpServer() {
|
|
|
167
183
|
},
|
|
168
184
|
{
|
|
169
185
|
name: "retry_job",
|
|
170
|
-
description: "Retry a failed fine-tuning job from its last checkpoint. Creates a new job that resumes training where the failed one stopped, saving GPU time. Each retry is billed separately
|
|
186
|
+
description: "Retry a failed fine-tuning job from its last checkpoint. Creates a new job that resumes training where the failed one stopped, saving GPU time. Each retry is billed separately.\n\n" +
|
|
187
|
+
"IMPORTANT: This tool fetches a cost estimate and includes it in the response. " +
|
|
188
|
+
"You MUST show the estimate to the user and get their explicit approval before considering the retry confirmed. " +
|
|
189
|
+
"The retry is submitted automatically (the server validates balance), but always present the cost to the user.",
|
|
171
190
|
inputSchema: {
|
|
172
191
|
type: "object",
|
|
173
192
|
properties: {
|
|
@@ -203,6 +222,10 @@ async function startMcpServer() {
|
|
|
203
222
|
type: "number",
|
|
204
223
|
description: "Approximate repository size in MB (helps refine the estimate)",
|
|
205
224
|
},
|
|
225
|
+
use_case: {
|
|
226
|
+
type: "string",
|
|
227
|
+
description: "Agent to use for the estimate (e.g. 'code_repo' for Cody, 'sera_code_repo' for SIERA). Defaults to code_repo.",
|
|
228
|
+
},
|
|
206
229
|
},
|
|
207
230
|
},
|
|
208
231
|
},
|
|
@@ -268,10 +291,15 @@ async function startMcpServer() {
|
|
|
268
291
|
},
|
|
269
292
|
{
|
|
270
293
|
name: "list_supported_models",
|
|
271
|
-
description: "List the supported base HuggingFace models available for fine-tuning on Tuning Engines.",
|
|
294
|
+
description: "List the supported base HuggingFace models available for fine-tuning on Tuning Engines. Optionally filter by agent to see only compatible models.",
|
|
272
295
|
inputSchema: {
|
|
273
296
|
type: "object",
|
|
274
|
-
properties: {
|
|
297
|
+
properties: {
|
|
298
|
+
agent: {
|
|
299
|
+
type: "string",
|
|
300
|
+
description: "Filter models compatible with this agent (e.g. 'code_repo', 'sera_code_repo'). Omit to see all models.",
|
|
301
|
+
},
|
|
302
|
+
},
|
|
275
303
|
},
|
|
276
304
|
},
|
|
277
305
|
{
|
|
@@ -330,6 +358,285 @@ async function startMcpServer() {
|
|
|
330
358
|
required: ["model_id"],
|
|
331
359
|
},
|
|
332
360
|
},
|
|
361
|
+
{
|
|
362
|
+
name: "list_catalog_models",
|
|
363
|
+
description: "List available pre-built models and datasets from the Tuning Engines Marketplace. " +
|
|
364
|
+
"These are platform-owned, ready-to-use assets that can be exported to your S3 bucket. " +
|
|
365
|
+
"Returns name, description, base model, size, export price, and category.",
|
|
366
|
+
inputSchema: {
|
|
367
|
+
type: "object",
|
|
368
|
+
properties: {
|
|
369
|
+
category: {
|
|
370
|
+
type: "string",
|
|
371
|
+
description: "Filter by category (e.g. 'code', 'bug-fix', 'general'). Omit to see all.",
|
|
372
|
+
},
|
|
373
|
+
},
|
|
374
|
+
},
|
|
375
|
+
},
|
|
376
|
+
{
|
|
377
|
+
name: "get_catalog_model",
|
|
378
|
+
description: "Get detailed information about a specific pre-built model or dataset from the Marketplace including description, pricing, and export options.",
|
|
379
|
+
inputSchema: {
|
|
380
|
+
type: "object",
|
|
381
|
+
properties: {
|
|
382
|
+
model_id: { type: "string", description: "Catalog model ID (UUID)" },
|
|
383
|
+
},
|
|
384
|
+
required: ["model_id"],
|
|
385
|
+
},
|
|
386
|
+
},
|
|
387
|
+
{
|
|
388
|
+
name: "export_catalog_model",
|
|
389
|
+
description: "Export a pre-built model or dataset from the Marketplace to your S3 bucket. " +
|
|
390
|
+
"Credits will be charged based on the export price upon successful completion.",
|
|
391
|
+
inputSchema: {
|
|
392
|
+
type: "object",
|
|
393
|
+
properties: {
|
|
394
|
+
model_id: { type: "string", description: "Catalog model ID (UUID) to export" },
|
|
395
|
+
s3_bucket: { type: "string", description: "Destination S3 bucket name" },
|
|
396
|
+
s3_prefix: {
|
|
397
|
+
type: "string",
|
|
398
|
+
description: "Optional S3 key prefix for the exported model",
|
|
399
|
+
},
|
|
400
|
+
s3_access_key_id: { type: "string", description: "AWS access key ID" },
|
|
401
|
+
s3_secret_access_key: { type: "string", description: "AWS secret access key" },
|
|
402
|
+
s3_region: { type: "string", description: "AWS region (e.g. us-east-1)" },
|
|
403
|
+
},
|
|
404
|
+
required: ["model_id", "s3_bucket", "s3_access_key_id", "s3_secret_access_key", "s3_region"],
|
|
405
|
+
},
|
|
406
|
+
},
|
|
407
|
+
{
|
|
408
|
+
name: "catalog_export_status",
|
|
409
|
+
description: "Check the status of a Marketplace export operation. Returns status, charge info, and any error messages.",
|
|
410
|
+
inputSchema: {
|
|
411
|
+
type: "object",
|
|
412
|
+
properties: {
|
|
413
|
+
model_id: { type: "string", description: "Catalog model ID (UUID)" },
|
|
414
|
+
export_id: { type: "string", description: "Export operation ID (UUID)" },
|
|
415
|
+
},
|
|
416
|
+
required: ["model_id", "export_id"],
|
|
417
|
+
},
|
|
418
|
+
},
|
|
419
|
+
// --- Datasets ---
|
|
420
|
+
{
|
|
421
|
+
name: "list_datasets",
|
|
422
|
+
description: "List datasets available for training and evaluation. Datasets can be uploaded from S3 and used for fine-tuning or model evaluation.",
|
|
423
|
+
inputSchema: {
|
|
424
|
+
type: "object",
|
|
425
|
+
properties: {
|
|
426
|
+
limit: { type: "number", description: "Max results (default 20)" },
|
|
427
|
+
},
|
|
428
|
+
},
|
|
429
|
+
},
|
|
430
|
+
{
|
|
431
|
+
name: "show_dataset",
|
|
432
|
+
description: "Get details of a specific dataset including status, source, and metadata.",
|
|
433
|
+
inputSchema: {
|
|
434
|
+
type: "object",
|
|
435
|
+
properties: {
|
|
436
|
+
dataset_id: { type: "string", description: "Dataset ID (UUID)" },
|
|
437
|
+
},
|
|
438
|
+
required: ["dataset_id"],
|
|
439
|
+
},
|
|
440
|
+
},
|
|
441
|
+
{
|
|
442
|
+
name: "create_dataset",
|
|
443
|
+
description: "Create a new dataset by importing from S3. Datasets can be used for fine-tuning or model evaluation.",
|
|
444
|
+
inputSchema: {
|
|
445
|
+
type: "object",
|
|
446
|
+
properties: {
|
|
447
|
+
name: { type: "string", description: "Name for the dataset" },
|
|
448
|
+
description: { type: "string", description: "Description of the dataset contents" },
|
|
449
|
+
source_type: { type: "string", description: "Source type (e.g. 's3')" },
|
|
450
|
+
s3_url: { type: "string", description: "S3 URL of the dataset (e.g. s3://bucket/path/data.jsonl)" },
|
|
451
|
+
s3_access_key_id: { type: "string", description: "AWS access key ID" },
|
|
452
|
+
s3_secret_access_key: { type: "string", description: "AWS secret access key" },
|
|
453
|
+
s3_region: { type: "string", description: "AWS region (e.g. us-east-1)" },
|
|
454
|
+
for_evaluation: { type: "boolean", description: "Whether this dataset is for evaluation (default: false)" },
|
|
455
|
+
},
|
|
456
|
+
required: ["name", "source_type"],
|
|
457
|
+
},
|
|
458
|
+
},
|
|
459
|
+
{
|
|
460
|
+
name: "delete_dataset",
|
|
461
|
+
description: "Delete a dataset from the platform.",
|
|
462
|
+
inputSchema: {
|
|
463
|
+
type: "object",
|
|
464
|
+
properties: {
|
|
465
|
+
dataset_id: { type: "string", description: "Dataset ID to delete" },
|
|
466
|
+
},
|
|
467
|
+
required: ["dataset_id"],
|
|
468
|
+
},
|
|
469
|
+
},
|
|
470
|
+
{
|
|
471
|
+
name: "dataset_status",
|
|
472
|
+
description: "Check the status of a dataset import or processing operation.",
|
|
473
|
+
inputSchema: {
|
|
474
|
+
type: "object",
|
|
475
|
+
properties: {
|
|
476
|
+
dataset_id: { type: "string", description: "Dataset ID (UUID)" },
|
|
477
|
+
},
|
|
478
|
+
required: ["dataset_id"],
|
|
479
|
+
},
|
|
480
|
+
},
|
|
481
|
+
// --- Evaluations ---
|
|
482
|
+
{
|
|
483
|
+
name: "list_evaluations",
|
|
484
|
+
description: "List model evaluations. Evaluations run your trained models against benchmark datasets using various evaluators to measure quality.",
|
|
485
|
+
inputSchema: {
|
|
486
|
+
type: "object",
|
|
487
|
+
properties: {
|
|
488
|
+
status: {
|
|
489
|
+
type: "string",
|
|
490
|
+
description: "Filter by status: queued, running, succeeded, failed, canceled",
|
|
491
|
+
},
|
|
492
|
+
limit: { type: "number", description: "Max results (default 20)" },
|
|
493
|
+
},
|
|
494
|
+
},
|
|
495
|
+
},
|
|
496
|
+
{
|
|
497
|
+
name: "show_evaluation",
|
|
498
|
+
description: "Get full details of a specific evaluation including status, scores, metrics, and comparison data.",
|
|
499
|
+
inputSchema: {
|
|
500
|
+
type: "object",
|
|
501
|
+
properties: {
|
|
502
|
+
evaluation_id: { type: "string", description: "Evaluation ID (UUID)" },
|
|
503
|
+
},
|
|
504
|
+
required: ["evaluation_id"],
|
|
505
|
+
},
|
|
506
|
+
},
|
|
507
|
+
{
|
|
508
|
+
name: "create_evaluation",
|
|
509
|
+
description: "Create a new model evaluation. Run your trained model or a base model against a dataset using selected evaluators. " +
|
|
510
|
+
"Use list_evaluators to see available evaluators (e.g. code_execution, similarity, llm_judge).",
|
|
511
|
+
inputSchema: {
|
|
512
|
+
type: "object",
|
|
513
|
+
properties: {
|
|
514
|
+
name: { type: "string", description: "Name for this evaluation run" },
|
|
515
|
+
user_model_id: {
|
|
516
|
+
type: "string",
|
|
517
|
+
description: "ID of your trained model to evaluate. Either this or base_model is required.",
|
|
518
|
+
},
|
|
519
|
+
base_model: {
|
|
520
|
+
type: "string",
|
|
521
|
+
description: "HuggingFace model ID to evaluate (e.g. 'Qwen/Qwen2.5-Coder-7B-Instruct'). Either this or user_model_id is required.",
|
|
522
|
+
},
|
|
523
|
+
dataset_id: {
|
|
524
|
+
type: "string",
|
|
525
|
+
description: "ID of the evaluation dataset to use. Must be a dataset marked for_evaluation.",
|
|
526
|
+
},
|
|
527
|
+
evaluator_ids: {
|
|
528
|
+
type: "array",
|
|
529
|
+
items: { type: "string" },
|
|
530
|
+
description: "List of evaluator IDs to run (use list_evaluators to see options)",
|
|
531
|
+
},
|
|
532
|
+
max_samples: {
|
|
533
|
+
type: "number",
|
|
534
|
+
description: "Maximum samples to evaluate (default: all)",
|
|
535
|
+
},
|
|
536
|
+
},
|
|
537
|
+
required: ["dataset_id", "evaluator_ids"],
|
|
538
|
+
},
|
|
539
|
+
},
|
|
540
|
+
{
|
|
541
|
+
name: "cancel_evaluation",
|
|
542
|
+
description: "Cancel a running or queued evaluation.",
|
|
543
|
+
inputSchema: {
|
|
544
|
+
type: "object",
|
|
545
|
+
properties: {
|
|
546
|
+
evaluation_id: { type: "string", description: "Evaluation ID to cancel" },
|
|
547
|
+
},
|
|
548
|
+
required: ["evaluation_id"],
|
|
549
|
+
},
|
|
550
|
+
},
|
|
551
|
+
{
|
|
552
|
+
name: "evaluation_status",
|
|
553
|
+
description: "Get live status of an evaluation including progress and current metrics.",
|
|
554
|
+
inputSchema: {
|
|
555
|
+
type: "object",
|
|
556
|
+
properties: {
|
|
557
|
+
evaluation_id: { type: "string", description: "Evaluation ID (UUID)" },
|
|
558
|
+
},
|
|
559
|
+
required: ["evaluation_id"],
|
|
560
|
+
},
|
|
561
|
+
},
|
|
562
|
+
{
|
|
563
|
+
name: "list_evaluators",
|
|
564
|
+
description: "List available evaluators for model evaluation. Evaluators measure different aspects of model quality like code execution, similarity, or LLM-based judgment.",
|
|
565
|
+
inputSchema: {
|
|
566
|
+
type: "object",
|
|
567
|
+
properties: {},
|
|
568
|
+
},
|
|
569
|
+
},
|
|
570
|
+
{
|
|
571
|
+
name: "estimate_evaluation",
|
|
572
|
+
description: "Get a cost estimate for an evaluation before running it.",
|
|
573
|
+
inputSchema: {
|
|
574
|
+
type: "object",
|
|
575
|
+
properties: {
|
|
576
|
+
user_model_id: { type: "string", description: "ID of your trained model" },
|
|
577
|
+
base_model: { type: "string", description: "Or a HuggingFace model ID" },
|
|
578
|
+
dataset_id: { type: "string", description: "Evaluation dataset ID" },
|
|
579
|
+
evaluator_ids: {
|
|
580
|
+
type: "array",
|
|
581
|
+
items: { type: "string" },
|
|
582
|
+
description: "List of evaluator IDs",
|
|
583
|
+
},
|
|
584
|
+
max_samples: { type: "number", description: "Max samples to evaluate" },
|
|
585
|
+
},
|
|
586
|
+
required: ["dataset_id", "evaluator_ids"],
|
|
587
|
+
},
|
|
588
|
+
},
|
|
589
|
+
// --- Inference ---
|
|
590
|
+
{
|
|
591
|
+
name: "list_inference_models",
|
|
592
|
+
description: "List models available for inference through the Tuning Engines inference API. " +
|
|
593
|
+
"Includes both platform models and your deployed trained models.",
|
|
594
|
+
inputSchema: {
|
|
595
|
+
type: "object",
|
|
596
|
+
properties: {},
|
|
597
|
+
},
|
|
598
|
+
},
|
|
599
|
+
{
|
|
600
|
+
name: "inference_usage",
|
|
601
|
+
description: "Get inference API usage statistics including request counts, token usage, and costs.",
|
|
602
|
+
inputSchema: {
|
|
603
|
+
type: "object",
|
|
604
|
+
properties: {
|
|
605
|
+
start_date: { type: "string", description: "Start date (YYYY-MM-DD)" },
|
|
606
|
+
end_date: { type: "string", description: "End date (YYYY-MM-DD)" },
|
|
607
|
+
model: { type: "string", description: "Filter by model name" },
|
|
608
|
+
},
|
|
609
|
+
},
|
|
610
|
+
},
|
|
611
|
+
{
|
|
612
|
+
name: "get_inference_jwt",
|
|
613
|
+
description: "Get a JWT token for authenticating with the Tuning Engines inference API. " +
|
|
614
|
+
"Use this to make direct API calls to the inference endpoint.",
|
|
615
|
+
inputSchema: {
|
|
616
|
+
type: "object",
|
|
617
|
+
properties: {},
|
|
618
|
+
},
|
|
619
|
+
},
|
|
620
|
+
// --- Agents ---
|
|
621
|
+
{
|
|
622
|
+
name: "list_agents",
|
|
623
|
+
description: "List available agents configured for your organization. Agents are AI assistants with specific capabilities and tool access.",
|
|
624
|
+
inputSchema: {
|
|
625
|
+
type: "object",
|
|
626
|
+
properties: {},
|
|
627
|
+
},
|
|
628
|
+
},
|
|
629
|
+
{
|
|
630
|
+
name: "show_agent",
|
|
631
|
+
description: "Get details of a specific agent including capabilities, tools, and configuration.",
|
|
632
|
+
inputSchema: {
|
|
633
|
+
type: "object",
|
|
634
|
+
properties: {
|
|
635
|
+
agent_id: { type: "string", description: "Agent ID" },
|
|
636
|
+
},
|
|
637
|
+
required: ["agent_id"],
|
|
638
|
+
},
|
|
639
|
+
},
|
|
333
640
|
],
|
|
334
641
|
}));
|
|
335
642
|
// Handle tool calls
|
|
@@ -339,13 +646,13 @@ async function startMcpServer() {
|
|
|
339
646
|
let result;
|
|
340
647
|
switch (name) {
|
|
341
648
|
case "list_jobs":
|
|
342
|
-
result = await
|
|
649
|
+
result = await getClient().listJobs({
|
|
343
650
|
status: args?.status,
|
|
344
651
|
limit: args?.limit,
|
|
345
652
|
});
|
|
346
653
|
break;
|
|
347
654
|
case "show_job":
|
|
348
|
-
result = await
|
|
655
|
+
result = await getClient().getJob(args.job_id);
|
|
349
656
|
break;
|
|
350
657
|
case "create_job":
|
|
351
658
|
if (!args?.base_model && !args?.base_user_model_id) {
|
|
@@ -354,7 +661,7 @@ async function startMcpServer() {
|
|
|
354
661
|
isError: true,
|
|
355
662
|
};
|
|
356
663
|
}
|
|
357
|
-
result = await
|
|
664
|
+
result = await getClient().createJob({
|
|
358
665
|
base_model: args?.base_model,
|
|
359
666
|
base_user_model_id: args?.base_user_model_id,
|
|
360
667
|
output_name: args.output_name,
|
|
@@ -371,14 +678,34 @@ async function startMcpServer() {
|
|
|
371
678
|
});
|
|
372
679
|
break;
|
|
373
680
|
case "cancel_job":
|
|
374
|
-
result = await
|
|
681
|
+
result = await getClient().cancelJob(args.job_id);
|
|
375
682
|
break;
|
|
376
683
|
case "job_status":
|
|
377
|
-
result = await
|
|
684
|
+
result = await getClient().getJobStatus(args.job_id);
|
|
378
685
|
break;
|
|
379
|
-
case "retry_job":
|
|
380
|
-
|
|
686
|
+
case "retry_job": {
|
|
687
|
+
// Fetch job details and estimate before retrying so the AI can show cost
|
|
688
|
+
const retryJobId = args.job_id;
|
|
689
|
+
const jobDetails = await getClient().getJob(retryJobId);
|
|
690
|
+
let retryEstimate = null;
|
|
691
|
+
try {
|
|
692
|
+
retryEstimate = await getClient().estimateJob({
|
|
693
|
+
base_model: jobDetails.base_model,
|
|
694
|
+
num_epochs: jobDetails.num_epochs,
|
|
695
|
+
max_examples: jobDetails.max_examples,
|
|
696
|
+
use_case: jobDetails.agent,
|
|
697
|
+
});
|
|
698
|
+
}
|
|
699
|
+
catch (estErr) {
|
|
700
|
+
// Estimate failed — continue with retry (server validates balance)
|
|
701
|
+
}
|
|
702
|
+
const retryResult = await getClient().retryJob(retryJobId, args?.github_token);
|
|
703
|
+
result = {
|
|
704
|
+
...retryResult,
|
|
705
|
+
retry_estimate: retryEstimate,
|
|
706
|
+
};
|
|
381
707
|
break;
|
|
708
|
+
}
|
|
382
709
|
case "estimate_job":
|
|
383
710
|
if (!args?.base_model && !args?.base_user_model_id) {
|
|
384
711
|
return {
|
|
@@ -386,16 +713,17 @@ async function startMcpServer() {
|
|
|
386
713
|
isError: true,
|
|
387
714
|
};
|
|
388
715
|
}
|
|
389
|
-
result = await
|
|
716
|
+
result = await getClient().estimateJob({
|
|
390
717
|
base_model: args?.base_model,
|
|
391
718
|
base_user_model_id: args?.base_user_model_id,
|
|
392
719
|
num_epochs: args?.num_epochs,
|
|
393
720
|
max_examples: args?.max_examples,
|
|
394
721
|
repo_size_mb: args?.repo_size_mb,
|
|
722
|
+
use_case: args?.use_case,
|
|
395
723
|
});
|
|
396
724
|
break;
|
|
397
725
|
case "validate_s3":
|
|
398
|
-
result = await
|
|
726
|
+
result = await getClient().validateS3({
|
|
399
727
|
s3_bucket: args.s3_bucket,
|
|
400
728
|
s3_access_key_id: args.s3_access_key_id,
|
|
401
729
|
s3_secret_access_key: args.s3_secret_access_key,
|
|
@@ -403,25 +731,25 @@ async function startMcpServer() {
|
|
|
403
731
|
});
|
|
404
732
|
break;
|
|
405
733
|
case "list_models":
|
|
406
|
-
result = await
|
|
734
|
+
result = await getClient().listUserModels();
|
|
407
735
|
break;
|
|
408
736
|
case "show_model":
|
|
409
|
-
result = await
|
|
737
|
+
result = await getClient().getUserModel(args.model_id);
|
|
410
738
|
break;
|
|
411
739
|
case "delete_model":
|
|
412
|
-
result = await
|
|
740
|
+
result = await getClient().deleteUserModel(args.model_id);
|
|
413
741
|
break;
|
|
414
742
|
case "get_balance":
|
|
415
|
-
result = await
|
|
743
|
+
result = await getClient().getBilling();
|
|
416
744
|
break;
|
|
417
745
|
case "get_account":
|
|
418
|
-
result = await
|
|
746
|
+
result = await getClient().getAccount();
|
|
419
747
|
break;
|
|
420
748
|
case "list_supported_models":
|
|
421
|
-
result = await
|
|
749
|
+
result = await getClient().listModels({ agent: args?.agent });
|
|
422
750
|
break;
|
|
423
751
|
case "import_model":
|
|
424
|
-
result = await
|
|
752
|
+
result = await getClient().importModel({
|
|
425
753
|
name: args.name,
|
|
426
754
|
source_s3_url: args.source_s3_url,
|
|
427
755
|
base_model: args.base_model,
|
|
@@ -431,7 +759,7 @@ async function startMcpServer() {
|
|
|
431
759
|
});
|
|
432
760
|
break;
|
|
433
761
|
case "export_model":
|
|
434
|
-
result = await
|
|
762
|
+
result = await getClient().exportModel(args.model_id, {
|
|
435
763
|
s3_bucket: args.s3_bucket,
|
|
436
764
|
s3_prefix: args?.s3_prefix,
|
|
437
765
|
s3_access_key_id: args.s3_access_key_id,
|
|
@@ -441,7 +769,119 @@ async function startMcpServer() {
|
|
|
441
769
|
});
|
|
442
770
|
break;
|
|
443
771
|
case "model_status":
|
|
444
|
-
result = await
|
|
772
|
+
result = await getClient().getUserModelStatus(args.model_id);
|
|
773
|
+
break;
|
|
774
|
+
case "list_catalog_models":
|
|
775
|
+
result = await getClient().listCatalogModels({
|
|
776
|
+
category: args?.category,
|
|
777
|
+
});
|
|
778
|
+
break;
|
|
779
|
+
case "get_catalog_model":
|
|
780
|
+
result = await getClient().getCatalogModel(args.model_id);
|
|
781
|
+
break;
|
|
782
|
+
case "export_catalog_model":
|
|
783
|
+
result = await getClient().exportCatalogModel(args.model_id, {
|
|
784
|
+
s3_bucket: args.s3_bucket,
|
|
785
|
+
s3_prefix: args?.s3_prefix,
|
|
786
|
+
s3_access_key_id: args.s3_access_key_id,
|
|
787
|
+
s3_secret_access_key: args.s3_secret_access_key,
|
|
788
|
+
s3_region: args.s3_region,
|
|
789
|
+
});
|
|
790
|
+
break;
|
|
791
|
+
case "catalog_export_status":
|
|
792
|
+
result = await getClient().getCatalogExportStatus(args.model_id, args.export_id);
|
|
793
|
+
break;
|
|
794
|
+
// --- Datasets ---
|
|
795
|
+
case "list_datasets":
|
|
796
|
+
result = await getClient().listDatasets({
|
|
797
|
+
limit: args?.limit,
|
|
798
|
+
});
|
|
799
|
+
break;
|
|
800
|
+
case "show_dataset":
|
|
801
|
+
result = await getClient().getDataset(args.dataset_id);
|
|
802
|
+
break;
|
|
803
|
+
case "create_dataset":
|
|
804
|
+
result = await getClient().createDataset({
|
|
805
|
+
name: args.name,
|
|
806
|
+
description: args?.description,
|
|
807
|
+
source_type: args.source_type,
|
|
808
|
+
s3_url: args?.s3_url,
|
|
809
|
+
s3_access_key_id: args?.s3_access_key_id,
|
|
810
|
+
s3_secret_access_key: args?.s3_secret_access_key,
|
|
811
|
+
s3_region: args?.s3_region,
|
|
812
|
+
for_evaluation: args?.for_evaluation,
|
|
813
|
+
});
|
|
814
|
+
break;
|
|
815
|
+
case "delete_dataset":
|
|
816
|
+
result = await getClient().deleteDataset(args.dataset_id);
|
|
817
|
+
break;
|
|
818
|
+
case "dataset_status":
|
|
819
|
+
result = await getClient().getDatasetStatus(args.dataset_id);
|
|
820
|
+
break;
|
|
821
|
+
// --- Evaluations ---
|
|
822
|
+
case "list_evaluations":
|
|
823
|
+
result = await getClient().listEvaluations({
|
|
824
|
+
status: args?.status,
|
|
825
|
+
limit: args?.limit,
|
|
826
|
+
});
|
|
827
|
+
break;
|
|
828
|
+
case "show_evaluation":
|
|
829
|
+
result = await getClient().getEvaluation(args.evaluation_id);
|
|
830
|
+
break;
|
|
831
|
+
case "create_evaluation":
|
|
832
|
+
if (!args?.user_model_id && !args?.base_model) {
|
|
833
|
+
return {
|
|
834
|
+
content: [{ type: "text", text: "Error: either user_model_id or base_model is required" }],
|
|
835
|
+
isError: true,
|
|
836
|
+
};
|
|
837
|
+
}
|
|
838
|
+
result = await getClient().createEvaluation({
|
|
839
|
+
name: args?.name,
|
|
840
|
+
user_model_id: args?.user_model_id,
|
|
841
|
+
base_model: args?.base_model,
|
|
842
|
+
dataset_id: args.dataset_id,
|
|
843
|
+
evaluator_ids: args.evaluator_ids,
|
|
844
|
+
max_samples: args?.max_samples,
|
|
845
|
+
});
|
|
846
|
+
break;
|
|
847
|
+
case "cancel_evaluation":
|
|
848
|
+
result = await getClient().cancelEvaluation(args.evaluation_id);
|
|
849
|
+
break;
|
|
850
|
+
case "evaluation_status":
|
|
851
|
+
result = await getClient().getEvaluationStatus(args.evaluation_id);
|
|
852
|
+
break;
|
|
853
|
+
case "list_evaluators":
|
|
854
|
+
result = await getClient().listEvaluators();
|
|
855
|
+
break;
|
|
856
|
+
case "estimate_evaluation":
|
|
857
|
+
result = await getClient().estimateEvaluation({
|
|
858
|
+
user_model_id: args?.user_model_id,
|
|
859
|
+
base_model: args?.base_model,
|
|
860
|
+
dataset_id: args.dataset_id,
|
|
861
|
+
evaluator_ids: args.evaluator_ids,
|
|
862
|
+
max_samples: args?.max_samples,
|
|
863
|
+
});
|
|
864
|
+
break;
|
|
865
|
+
// --- Inference ---
|
|
866
|
+
case "list_inference_models":
|
|
867
|
+
result = await getClient().listInferenceModels();
|
|
868
|
+
break;
|
|
869
|
+
case "inference_usage":
|
|
870
|
+
result = await getClient().getInferenceUsage({
|
|
871
|
+
start_date: args?.start_date,
|
|
872
|
+
end_date: args?.end_date,
|
|
873
|
+
model: args?.model,
|
|
874
|
+
});
|
|
875
|
+
break;
|
|
876
|
+
case "get_inference_jwt":
|
|
877
|
+
result = await getClient().getInferenceJwt();
|
|
878
|
+
break;
|
|
879
|
+
// --- Agents ---
|
|
880
|
+
case "list_agents":
|
|
881
|
+
result = await getClient().listAgents();
|
|
882
|
+
break;
|
|
883
|
+
case "show_agent":
|
|
884
|
+
result = await getClient().getAgent(args.agent_id);
|
|
445
885
|
break;
|
|
446
886
|
default:
|
|
447
887
|
return {
|