@fastino-ai/pioneer-cli 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.cursor/rules/api-documentation.mdc +14 -0
  3. package/.cursor/rules/backend-location-rule.mdc +5 -0
  4. package/Medical_NER_Dataset_1.jsonl +50 -0
  5. package/README.md +4 -1
  6. package/bun.lock +52 -0
  7. package/package.json +5 -2
  8. package/src/api.ts +551 -22
  9. package/src/chat/ChatApp.tsx +548 -263
  10. package/src/client/ToolExecutor.ts +175 -0
  11. package/src/client/WebSocketClient.ts +333 -0
  12. package/src/client/index.ts +2 -0
  13. package/src/config.ts +49 -139
  14. package/src/index.tsx +796 -106
  15. package/src/telemetry.ts +173 -0
  16. package/src/tests/config.test.ts +19 -0
  17. package/src/tools/bash.ts +1 -1
  18. package/src/tools/filesystem.ts +1 -1
  19. package/src/tools/index.ts +2 -9
  20. package/src/tools/sandbox.ts +1 -1
  21. package/src/tools/types.ts +25 -0
  22. package/src/utils/index.ts +6 -0
  23. package/fastino-ai-pioneer-cli-0.2.0.tgz +0 -0
  24. package/ner_dataset.json +0 -111
  25. package/src/agent/Agent.ts +0 -342
  26. package/src/agent/BudgetManager.ts +0 -167
  27. package/src/agent/LLMClient.ts +0 -435
  28. package/src/agent/ToolRegistry.ts +0 -97
  29. package/src/agent/index.ts +0 -15
  30. package/src/agent/types.ts +0 -84
  31. package/src/evolution/EvalRunner.ts +0 -301
  32. package/src/evolution/EvolutionEngine.ts +0 -319
  33. package/src/evolution/FeedbackCollector.ts +0 -197
  34. package/src/evolution/ModelTrainer.ts +0 -371
  35. package/src/evolution/index.ts +0 -18
  36. package/src/evolution/types.ts +0 -110
  37. package/src/tools/modal.ts +0 -269
  38. package/src/tools/training.ts +0 -443
  39. package/src/tools/wandb.ts +0 -348
  40. /package/src/{agent → utils}/FileResolver.ts +0 -0
package/src/index.tsx CHANGED
@@ -5,7 +5,7 @@
5
5
  */
6
6
 
7
7
  import React, { useState, useEffect } from "react";
8
- import { render, Box, Text, useApp, useInput } from "ink";
8
+ import { render, Box, Text, useApp, useInput, useStdin, Static } from "ink";
9
9
  import Spinner from "ink-spinner";
10
10
  import TextInput from "ink-text-input";
11
11
  import * as fs from "fs";
@@ -16,15 +16,16 @@ import {
16
16
  getBaseUrl,
17
17
  saveConfig,
18
18
  clearApiKey,
19
- getAgentConfig,
20
- getBudgetConfig,
21
- getSandboxConfig,
22
- getMLConfig,
23
- getSystemPrompt,
19
+ getHfToken,
24
20
  } from "./config.js";
25
21
  import * as api from "./api.js";
26
22
  import { ChatApp } from "./chat/ChatApp.js";
27
- import type { AgentConfig } from "./agent/types.js";
23
+ import {
24
+ isEnabled as isTelemetryEnabled,
25
+ hasChosenTelemetry,
26
+ setEnabled as setTelemetryEnabled,
27
+ trackCommand,
28
+ } from "./telemetry.js";
28
29
 
29
30
  // ─────────────────────────────────────────────────────────────────────────────
30
31
  // ASCII Banner
@@ -43,8 +44,51 @@ const BANNER = `
43
44
  // Helpers
44
45
  // ─────────────────────────────────────────────────────────────────────────────
45
46
 
47
+ // Fields to hide from JSON output (internal paths and user IDs)
48
+ const HIDDEN_FIELDS = new Set([
49
+ "user_id",
50
+ "root_dataset_id",
51
+ "dataset_path",
52
+ "trained_model_path",
53
+ "sagemaker_job_name",
54
+ ]);
55
+
56
+ function filterInternalFields(data: unknown): unknown {
57
+ if (data === null || data === undefined) {
58
+ return data;
59
+ }
60
+ if (Array.isArray(data)) {
61
+ return data.map(filterInternalFields);
62
+ }
63
+ if (typeof data === "object") {
64
+ const filtered: Record<string, unknown> = {};
65
+ for (const [key, value] of Object.entries(data as Record<string, unknown>)) {
66
+ if (!HIDDEN_FIELDS.has(key)) {
67
+ filtered[key] = filterInternalFields(value);
68
+ }
69
+ }
70
+ return filtered;
71
+ }
72
+ return data;
73
+ }
74
+
46
75
  function formatJson(data: unknown): string {
47
- return JSON.stringify(data, null, 2);
76
+ return JSON.stringify(filterInternalFields(data), null, 2);
77
+ }
78
+
79
+ /**
80
+ * Parse a dataset string in "name:version" format into a DatasetRef object.
81
+ * Returns null if the format is invalid.
82
+ */
83
+ function parseDatasetRef(datasetStr: string): api.DatasetRef | null {
84
+ const colonIndex = datasetStr.lastIndexOf(":");
85
+ if (colonIndex === -1) {
86
+ return null;
87
+ }
88
+ return {
89
+ name: datasetStr.slice(0, colonIndex),
90
+ version: datasetStr.slice(colonIndex + 1),
91
+ };
48
92
  }
49
93
 
50
94
  function parseArgs(argv: string[]): { command: string[]; flags: Record<string, string> } {
@@ -111,16 +155,131 @@ const JsonOutput: React.FC<{ data: unknown }> = ({ data }) => (
111
155
  </Box>
112
156
  );
113
157
 
158
+ // ─────────────────────────────────────────────────────────────────────────────
159
+ // Telemetry Consent Prompt
160
+ // ─────────────────────────────────────────────────────────────────────────────
161
+
162
+ interface TelemetryPromptProps {
163
+ onComplete: () => void;
164
+ }
165
+
166
+ const TelemetryPrompt: React.FC<TelemetryPromptProps> = ({ onComplete }) => {
167
+ const [selected, setSelected] = useState<"yes" | "no">("yes");
168
+
169
+ useInput((input, key) => {
170
+ if (key.leftArrow || key.rightArrow) {
171
+ setSelected((s) => (s === "yes" ? "no" : "yes"));
172
+ }
173
+ if (key.return) {
174
+ setTelemetryEnabled(selected === "yes");
175
+ onComplete();
176
+ }
177
+ if (input === "y" || input === "Y") {
178
+ setTelemetryEnabled(true);
179
+ onComplete();
180
+ }
181
+ if (input === "n" || input === "N") {
182
+ setTelemetryEnabled(false);
183
+ onComplete();
184
+ }
185
+ });
186
+
187
+ return (
188
+ <Box flexDirection="column" paddingX={1}>
189
+ <Text bold color="cyan">Pioneer CLI - Usage Analytics</Text>
190
+ <Text> </Text>
191
+ <Text>Help improve Pioneer by sharing usage data?</Text>
192
+ <Text> </Text>
193
+ <Text dimColor>What we collect:</Text>
194
+ <Text dimColor> - Commands and features used</Text>
195
+ <Text dimColor> - Session duration, tool usage</Text>
196
+ <Text dimColor> - OS and CLI version</Text>
197
+ <Text> </Text>
198
+ <Text dimColor>We never collect your prompts or file contents.</Text>
199
+ <Text> </Text>
200
+ <Text dimColor>Change anytime: pioneer telemetry [enable|disable]</Text>
201
+ <Text> </Text>
202
+ <Box>
203
+ <Text>Share usage data? </Text>
204
+ <Text color={selected === "yes" ? "green" : "gray"} bold={selected === "yes"}>
205
+ {selected === "yes" ? "[Yes]" : " Yes "}
206
+ </Text>
207
+ <Text> / </Text>
208
+ <Text color={selected === "no" ? "red" : "gray"} bold={selected === "no"}>
209
+ {selected === "no" ? "[No]" : " No "}
210
+ </Text>
211
+ <Text dimColor> (Y/N or arrow keys)</Text>
212
+ </Box>
213
+ </Box>
214
+ );
215
+ };
216
+
217
+ // ─────────────────────────────────────────────────────────────────────────────
218
+ // Telemetry Status Component
219
+ // ─────────────────────────────────────────────────────────────────────────────
220
+
221
+ interface TelemetryStatusProps {
222
+ action?: string;
223
+ }
224
+
225
+ const TelemetryStatus: React.FC<TelemetryStatusProps> = ({ action }) => {
226
+ const { exit } = useApp();
227
+ const [message, setMessage] = useState<string | null>(null);
228
+
229
+ useEffect(() => {
230
+ if (action === "enable") {
231
+ setTelemetryEnabled(true);
232
+ setMessage("Telemetry enabled. Thank you for helping improve Pioneer!");
233
+ } else if (action === "disable") {
234
+ setTelemetryEnabled(false);
235
+ setMessage("Telemetry disabled. No usage data will be collected.");
236
+ } else {
237
+ const enabled = isTelemetryEnabled();
238
+ setMessage(`Telemetry is currently ${enabled ? "enabled" : "disabled"}.`);
239
+ }
240
+ setTimeout(() => exit(), 500);
241
+ }, [action, exit]);
242
+
243
+ if (!message) return null;
244
+
245
+ return (
246
+ <Box flexDirection="column">
247
+ {action === "enable" && <Success message={message} />}
248
+ {action === "disable" && <Text color="yellow">{message}</Text>}
249
+ {!action && <Text>{message}</Text>}
250
+ {!action && (
251
+ <Text dimColor>
252
+ Use 'pioneer telemetry enable' or 'pioneer telemetry disable' to change.
253
+ </Text>
254
+ )}
255
+ </Box>
256
+ );
257
+ };
258
+
114
259
  // ─────────────────────────────────────────────────────────────────────────────
115
260
  // Auth Login Component
116
261
  // ─────────────────────────────────────────────────────────────────────────────
117
262
 
118
- const AuthLogin: React.FC = () => {
263
+ interface AuthLoginProps {
264
+ apiKeyFromFlag?: string;
265
+ }
266
+
267
+ const AuthLogin: React.FC<AuthLoginProps> = ({ apiKeyFromFlag }) => {
119
268
  const { exit } = useApp();
269
+ const { isRawModeSupported } = useStdin();
120
270
  const [apiKey, setApiKey] = useState("");
121
- const [state, setState] = useState<"input" | "validating" | "done" | "error">("input");
271
+ const [state, setState] = useState<"input" | "validating" | "done" | "error">(
272
+ apiKeyFromFlag ? "validating" : "input"
273
+ );
122
274
  const [error, setError] = useState("");
123
275
 
276
+ // Handle API key from flag (non-interactive)
277
+ useEffect(() => {
278
+ if (apiKeyFromFlag) {
279
+ handleSubmit(apiKeyFromFlag);
280
+ }
281
+ }, [apiKeyFromFlag]);
282
+
124
283
  const handleSubmit = async (value: string) => {
125
284
  if (!value.trim()) {
126
285
  setError("API key cannot be empty");
@@ -158,6 +317,16 @@ const AuthLogin: React.FC = () => {
158
317
  return <ErrorMessage error={error} />;
159
318
  }
160
319
 
320
+ // Check if raw mode is supported for interactive input
321
+ if (!isRawModeSupported) {
322
+ return (
323
+ <Box flexDirection="column">
324
+ <ErrorMessage error="Interactive input not supported in this terminal." />
325
+ <Text dimColor>Use: pioneer auth login --key YOUR_API_KEY</Text>
326
+ </Box>
327
+ );
328
+ }
329
+
161
330
  return (
162
331
  <Box flexDirection="column">
163
332
  <Text>Enter your Pioneer API key:</Text>
@@ -443,13 +612,17 @@ function DatasetListCommand() {
443
612
  <Box flexDirection="column">
444
613
  <Text bold color="cyan">Remote Datasets {remoteError ? "" : `(${remoteDatasets.length})`}</Text>
445
614
  {remoteError ? (
446
- <Text dimColor color="red"> Error: {remoteError.slice(0, 50)}</Text>
615
+ <Box flexDirection="column">
616
+ {remoteError.split("\n").map((line, idx) => (
617
+ <Text key={idx} color="red"> {line}</Text>
618
+ ))}
619
+ </Box>
447
620
  ) : remoteDatasets.length === 0 ? (
448
621
  <Text dimColor> No remote datasets</Text>
449
622
  ) : (
450
623
  remoteDatasets.map((ds) => (
451
624
  <Text key={ds.id}>
452
- {" "}<Text color="yellow">{ds.id.slice(0, 8)}</Text> {ds.dataset_name} <Text dimColor>({ds.dataset_type}, {ds.sample_size} examples)</Text>
625
+ {" "}<Text color="yellow">{ds.dataset_name}:{ds.version_number || "v1"}</Text> <Text dimColor>({ds.dataset_type}, {ds.sample_size} examples)</Text>
453
626
  </Text>
454
627
  ))
455
628
  )}
@@ -540,7 +713,7 @@ function ModelListCommand({ filter }: ModelListCommandProps) {
540
713
  // Help Components
541
714
  // ─────────────────────────────────────────────────────────────────────────────
542
715
 
543
- type HelpContext = "root" | "auth" | "dataset" | "job" | "model" | "chat";
716
+ type HelpContext = "root" | "auth" | "dataset" | "dataset-analyze" | "dataset-edit" | "job" | "model" | "chat" | "eval" | "benchmark";
544
717
 
545
718
  interface HelpProps {
546
719
  context?: HelpContext;
@@ -550,7 +723,7 @@ const Help: React.FC<HelpProps> = ({ context = "root" }) => {
550
723
  const { exit } = useApp();
551
724
 
552
725
  useEffect(() => {
553
- setTimeout(() => exit(), 100);
726
+ setTimeout(() => exit(), 500);
554
727
  }, [exit]);
555
728
 
556
729
  // Auth help
@@ -558,9 +731,10 @@ const Help: React.FC<HelpProps> = ({ context = "root" }) => {
558
731
  return (
559
732
  <Box flexDirection="column">
560
733
  <Text bold>Auth Commands:</Text>
561
- <Text> auth login Login with API key</Text>
562
- <Text> auth logout Clear stored API key</Text>
563
- <Text> auth status Show auth status</Text>
734
+ <Text> auth login Login with API key (interactive)</Text>
735
+ <Text> auth login --key {"<key>"} Login with API key (non-interactive)</Text>
736
+ <Text> auth logout Clear stored API key</Text>
737
+ <Text> auth status Show auth status</Text>
564
738
  </Box>
565
739
  );
566
740
  }
@@ -570,10 +744,11 @@ const Help: React.FC<HelpProps> = ({ context = "root" }) => {
570
744
  return (
571
745
  <Box flexDirection="column">
572
746
  <Text bold>Dataset Commands:</Text>
747
+ <Text dimColor> {"<id>"} format: name:version (e.g., my-dataset:v1)</Text>
748
+ <Text> </Text>
573
749
  <Text> dataset list List all datasets</Text>
574
750
  <Text> dataset get {"<id>"} Get dataset details</Text>
575
751
  <Text> dataset delete {"<id>"} Delete a dataset</Text>
576
- <Text> dataset download {"<id>"} Get download URL</Text>
577
752
  <Text> dataset analyze {"<id>"} Analyze a dataset</Text>
578
753
  <Text> </Text>
579
754
  <Text bold> Generate:</Text>
@@ -604,6 +779,75 @@ const Help: React.FC<HelpProps> = ({ context = "root" }) => {
604
779
  <Text> dataset infer classification Infer classification labels</Text>
605
780
  <Text> dataset infer fields Infer input/output fields</Text>
606
781
  <Text> --domain {"<desc>"} Domain description (required)</Text>
782
+ <Text> </Text>
783
+ <Text bold> Upload/Download:</Text>
784
+ <Text> dataset upload {"<file>"} Upload local file to Pioneer</Text>
785
+ <Text> --name {"<name>"} Dataset name (required)</Text>
786
+ <Text> --type {"<type>"} Type: ner, classification, custom</Text>
787
+ <Text> dataset upload {"<id>"} --to hf Upload Pioneer dataset to Hugging Face</Text>
788
+ <Text> --repo {"<repo>"} HF repo (required, e.g., username/dataset)</Text>
789
+ <Text> --hf-token {"<token>"} HF token (or use HF_TOKEN env var)</Text>
790
+ <Text> --private Make repo private</Text>
791
+ <Text> dataset download {"<id>"} Download from Pioneer to local file</Text>
792
+ <Text> --format {"<type>"} Format: jsonl, csv, parquet (default: jsonl)</Text>
793
+ <Text> --output {"<path>"} Output file path</Text>
794
+ <Text> dataset download --from hf Download from Hugging Face to Pioneer</Text>
795
+ <Text> --repo {"<repo>"} HF repo (required, e.g., username/dataset)</Text>
796
+ <Text> --hf-token {"<token>"} HF token (optional, for private repos)</Text>
797
+ <Text> --name {"<name>"} Local dataset name (optional)</Text>
798
+ <Text> --revision {"<rev>"} Git revision/branch (optional)</Text>
799
+ <Text> </Text>
800
+ <Text bold> Data Editing:</Text>
801
+ <Text> dataset edit --help Show data editing commands</Text>
802
+ <Text> dataset edit scan-pii {"<id>"} Scan for PII</Text>
803
+ <Text> dataset edit subsample {"<id>"} Create a subsample</Text>
804
+ </Box>
805
+ );
806
+ }
807
+
808
+ // Dataset edit help
809
+ if (context === "dataset-edit") {
810
+ return (
811
+ <Box flexDirection="column">
812
+ <Text bold>Dataset Edit Commands:</Text>
813
+ <Text dimColor> {"<dataset-id>"} format: name:version (e.g., my-dataset:v1)</Text>
814
+ <Text> </Text>
815
+ <Text> dataset edit scan-pii {"<dataset-id>"} Scan for PII</Text>
816
+ <Text> --columns {"<col1,col2>"} Columns to scan (optional, scans all if omitted)</Text>
817
+ <Text> --threshold {"<n>"} Detection threshold (default: 0.5)</Text>
818
+ <Text> dataset edit scan-phd {"<dataset-id>"} Scan for prompt injection</Text>
819
+ <Text> --columns {"<col1,col2>"} Columns to scan (optional, scans all if omitted)</Text>
820
+ <Text> --threshold {"<n>"} Detection threshold (default: 0.5)</Text>
821
+ <Text> dataset edit subsample {"<dataset-id>"} Create a subsample</Text>
822
+ <Text> --n {"<count>"} Target sample count (required)</Text>
823
+ <Text> --method {"<type>"} Method: random, balanced, stratified</Text>
824
+ <Text> --label-column {"<col>"} Column for balanced sampling</Text>
825
+ <Text> --seed {"<n>"} Random seed</Text>
826
+ <Text> dataset edit check-labels {"<dataset-id>"} Check label quality with AI</Text>
827
+ <Text> --text-column {"<col>"} Text column (required)</Text>
828
+ <Text> --label-column {"<col>"} Label column (required)</Text>
829
+ <Text> --sample-size {"<n>"} Samples to check (default: 10)</Text>
830
+ </Box>
831
+ );
832
+ }
833
+
834
+ // Dataset analyze help
835
+ if (context === "dataset-analyze") {
836
+ return (
837
+ <Box flexDirection="column">
838
+ <Text bold>Dataset Analyze Command:</Text>
839
+ <Text dimColor> {"<id>"} format: name:version (e.g., my-dataset:v1)</Text>
840
+ <Text> </Text>
841
+ <Text> dataset analyze {"<id>"} Analyze a dataset</Text>
842
+ <Text> </Text>
843
+ <Text bold> Required Options:</Text>
844
+ <Text> --task-type {"<type>"} Task type (required)</Text>
845
+ <Text> Options: ner, classification, generative</Text>
846
+ <Text> --analyses {"<a1,a2,...>"} Analyses to run (required, comma-separated)</Text>
847
+ <Text> Options: distribution, duplicates, outliers, splits, diversity</Text>
848
+ <Text> </Text>
849
+ <Text bold> Example:</Text>
850
+ <Text> dataset analyze abc123 --task-type ner --analyses distribution,duplicates</Text>
607
851
  </Box>
608
852
  );
609
853
  }
@@ -619,7 +863,7 @@ const Help: React.FC<HelpProps> = ({ context = "root" }) => {
619
863
  <Text> job delete {"<id>"} Delete a training job</Text>
620
864
  <Text> job create Create training job</Text>
621
865
  <Text> --model-name {"<name>"} Model name (required)</Text>
622
- <Text> --dataset-ids {"<ids>"} Comma-separated dataset IDs (required)</Text>
866
+ <Text> --datasets {"<ids>"} Comma-separated dataset IDs (required)</Text>
623
867
  <Text> --base-model {"<model>"} Base model (default: fastino/gliner2-base-v1)</Text>
624
868
  <Text> --epochs {"<n>"} Number of epochs (default: 5)</Text>
625
869
  </Box>
@@ -636,6 +880,12 @@ const Help: React.FC<HelpProps> = ({ context = "root" }) => {
636
880
  <Text> model list deployed List deployed models only</Text>
637
881
  <Text> model delete {"<id>"} Undeploy a model</Text>
638
882
  <Text> model download {"<id>"} Get model download URL</Text>
883
+ <Text> </Text>
884
+ <Text bold> Upload:</Text>
885
+ <Text> model upload {"<job-id>"} --to hf Upload trained model to Hugging Face</Text>
886
+ <Text> --repo {"<repo>"} HF repo (required, e.g., username/model)</Text>
887
+ <Text> --hf-token {"<token>"} HF token (or use HF_TOKEN env var)</Text>
888
+ <Text> --private Make repo private</Text>
639
889
  </Box>
640
890
  );
641
891
  }
@@ -646,14 +896,46 @@ const Help: React.FC<HelpProps> = ({ context = "root" }) => {
646
896
  <Box flexDirection="column">
647
897
  <Text bold>Chat Commands:</Text>
648
898
  <Text> chat Start interactive chat agent</Text>
649
- <Text> --provider {"<name>"} LLM provider (anthropic, openai)</Text>
650
- <Text> --model {"<model>"} Model to use</Text>
651
899
  <Text> --message {"<msg>"} Initial message to process</Text>
652
- <Text> --max-tokens {"<n>"} Max tokens (default: 500000, 0=unlimited)</Text>
653
- <Text> --max-cost {"<n>"} Max cost in USD (default: 5.0, 0=unlimited)</Text>
654
- <Text> --max-time {"<n>"} Max time in seconds (default: 7200, 0=unlimited)</Text>
655
- <Text> --max-tools {"<n>"} Max tool calls per turn (default: 50, 0=unlimited)</Text>
656
- <Text> --no-limit Remove all limits</Text>
900
+ <Text> </Text>
901
+ <Text dimColor> Note: Model selection available in chat via /model command</Text>
902
+ </Box>
903
+ );
904
+ }
905
+
906
+ // Eval help
907
+ if (context === "eval") {
908
+ return (
909
+ <Box flexDirection="column">
910
+ <Text bold>Evaluation Commands:</Text>
911
+ <Text dimColor> {"<dataset-id>"} format: name:version (e.g., my-dataset:v1)</Text>
912
+ <Text> </Text>
913
+ <Text> eval list {"<dataset-id>"} List evaluations for a dataset</Text>
914
+ <Text> eval get {"<id>"} Get evaluation details</Text>
915
+ <Text> eval create Create a new evaluation</Text>
916
+ <Text> --model-id {"<id>"} Model to evaluate (required)</Text>
917
+ <Text> --dataset {"<id>"} Dataset to evaluate on (required, name:version)</Text>
918
+ <Text> --task-type {"<type>"} Task type: ner, classification</Text>
919
+ <Text> --text-column {"<col>"} Text column name</Text>
920
+ <Text> --label-column {"<col>"} Label column name</Text>
921
+ </Box>
922
+ );
923
+ }
924
+
925
+ // Benchmark help
926
+ if (context === "benchmark") {
927
+ return (
928
+ <Box flexDirection="column">
929
+ <Text bold>Benchmark Commands:</Text>
930
+ <Text> benchmark list List available benchmarks</Text>
931
+ <Text> benchmark run Start a benchmark evaluation</Text>
932
+ <Text> --model-id {"<id>"} Model to evaluate (required)</Text>
933
+ <Text> --task {"<type>"} Task: ner, text_classification (required)</Text>
934
+ <Text> --benchmark {"<name>"} Benchmark name (required)</Text>
935
+ <Text> --max-samples {"<n>"} Max samples (default: 100)</Text>
936
+ <Text> --split {"<name>"} Dataset split (default: test)</Text>
937
+ <Text> benchmark get {"<id>"} Get evaluation status/results</Text>
938
+ <Text> benchmark cancel {"<id>"} Cancel running evaluation</Text>
657
939
  </Box>
658
940
  );
659
941
  }
@@ -669,17 +951,19 @@ const Help: React.FC<HelpProps> = ({ context = "root" }) => {
669
951
  <Text bold>Commands:</Text>
670
952
  <Text> chat Start interactive chat agent</Text>
671
953
  <Text> auth Authentication (login, logout, status)</Text>
672
- <Text> dataset Manage datasets (list, generate, infer)</Text>
954
+ <Text> dataset Manage datasets (list, generate, edit, analyze)</Text>
673
955
  <Text> job Manage training jobs</Text>
674
956
  <Text> model Manage models</Text>
957
+ <Text> eval Model evaluations on datasets</Text>
958
+ <Text> benchmark Run benchmark evaluations</Text>
959
+ <Text> telemetry Manage anonymous usage analytics</Text>
675
960
  <Text> </Text>
676
961
  <Text dimColor>Run 'pioneer {"<command>"} --help' for details on a specific command.</Text>
677
962
  <Text> </Text>
678
963
  <Text dimColor>Environment:</Text>
679
- <Text dimColor> PIONEER_API_URL API base URL (default: http://localhost:5001)</Text>
964
+ <Text dimColor> PIONEER_API_URL API base URL (default: https://api.fastino.ai)</Text>
680
965
  <Text dimColor> PIONEER_API_KEY API key (overrides saved key)</Text>
681
- <Text dimColor> ANTHROPIC_API_KEY Anthropic API key for chat agent</Text>
682
- <Text dimColor> OPENAI_API_KEY OpenAI API key for chat agent</Text>
966
+ <Text dimColor> HF_TOKEN Hugging Face token for push/pull operations</Text>
683
967
  </Box>
684
968
  );
685
969
  };
@@ -693,69 +977,8 @@ interface ChatWrapperProps {
693
977
  }
694
978
 
695
979
  const ChatWrapper: React.FC<ChatWrapperProps> = ({ flags }) => {
696
- const { exit } = useApp();
697
-
698
- // Build agent config from flags and stored config
699
- const storedConfig = getAgentConfig();
700
- const budgetConfig = getBudgetConfig();
701
- const systemPrompt = getSystemPrompt();
702
-
703
- // Apply budget overrides from flags
704
- const budget = { ...budgetConfig };
705
- let maxToolCalls = 50; // Default
706
-
707
- if (flags["no-limit"] === "true") {
708
- // Remove all limits
709
- budget.maxTokens = undefined;
710
- budget.maxCost = undefined;
711
- budget.maxTime = undefined;
712
- budget.maxIterations = undefined;
713
- maxToolCalls = 1000; // Effectively unlimited
714
- } else {
715
- // Apply individual overrides (0 = unlimited)
716
- if (flags["max-tokens"]) {
717
- const val = parseInt(flags["max-tokens"], 10);
718
- budget.maxTokens = val === 0 ? undefined : val;
719
- }
720
- if (flags["max-cost"]) {
721
- const val = parseFloat(flags["max-cost"]);
722
- budget.maxCost = val === 0 ? undefined : val;
723
- }
724
- if (flags["max-time"]) {
725
- const val = parseInt(flags["max-time"], 10);
726
- budget.maxTime = val === 0 ? undefined : val;
727
- }
728
- if (flags["max-tools"]) {
729
- const val = parseInt(flags["max-tools"], 10);
730
- maxToolCalls = val === 0 ? 1000 : val;
731
- }
732
- }
733
-
734
- const agentConfig: AgentConfig = {
735
- provider: (flags.provider as "anthropic" | "openai") || storedConfig.provider,
736
- model: flags.model || storedConfig.model,
737
- apiKey: storedConfig.apiKey,
738
- baseUrl: storedConfig.baseUrl,
739
- budget,
740
- systemPrompt,
741
- maxToolCalls,
742
- };
743
-
744
- // Validate we have an API key
745
- if (!agentConfig.apiKey) {
746
- return (
747
- <Box flexDirection="column">
748
- <ErrorMessage error="No API key configured. Set ANTHROPIC_API_KEY or OPENAI_API_KEY environment variable." />
749
- <Text dimColor>
750
- Or run: export ANTHROPIC_API_KEY="your-key"
751
- </Text>
752
- </Box>
753
- );
754
- }
755
-
756
980
  const initialMessage = flags.message;
757
-
758
- return <ChatApp config={agentConfig} initialMessage={initialMessage} />;
981
+ return <ChatApp initialMessage={initialMessage} />;
759
982
  };
760
983
 
761
984
  // ─────────────────────────────────────────────────────────────────────────────
@@ -768,8 +991,31 @@ interface AppProps {
768
991
  }
769
992
 
770
993
  const App: React.FC<AppProps> = ({ command, flags }) => {
994
+ // Check if raw mode is supported for interactive prompts
995
+ const { isRawModeSupported } = useStdin();
996
+ const [showTelemetryPrompt, setShowTelemetryPrompt] = useState(!hasChosenTelemetry());
771
997
  const [group, action, ...rest] = command;
772
998
 
999
+ // In non-interactive mode, skip telemetry prompt and default to disabled
1000
+ useEffect(() => {
1001
+ if (!isRawModeSupported && !hasChosenTelemetry()) {
1002
+ setTelemetryEnabled(false);
1003
+ setShowTelemetryPrompt(false);
1004
+ }
1005
+ }, [isRawModeSupported]);
1006
+
1007
+ // Track command usage (must be before any conditional returns)
1008
+ useEffect(() => {
1009
+ if (group && !showTelemetryPrompt) {
1010
+ trackCommand(group, action);
1011
+ }
1012
+ }, [group, action, showTelemetryPrompt]);
1013
+
1014
+ // Show telemetry consent prompt on first run (but not for --help or --version, and only if interactive)
1015
+ if (showTelemetryPrompt && isRawModeSupported && !flags.help && flags.version !== "true" && flags.v !== "true") {
1016
+ return <TelemetryPrompt onComplete={() => setShowTelemetryPrompt(false)} />;
1017
+ }
1018
+
773
1019
  // Root help (no command)
774
1020
  if (!group) {
775
1021
  return <Help />;
@@ -788,36 +1034,314 @@ const App: React.FC<AppProps> = ({ command, flags }) => {
788
1034
  if (flags.help === "true" || !action || action === "help") {
789
1035
  return <Help context="auth" />;
790
1036
  }
791
- if (action === "login") return <AuthLogin />;
1037
+ if (action === "login") return <AuthLogin apiKeyFromFlag={flags.key} />;
792
1038
  if (action === "logout") return <AuthLogout />;
793
1039
  if (action === "status") return <AuthStatus />;
794
1040
  return <Help context="auth" />;
795
1041
  }
796
1042
 
1043
+ // Telemetry commands
1044
+ if (group === "telemetry") {
1045
+ return <TelemetryStatus action={action} />;
1046
+ }
1047
+
797
1048
  // Dataset commands
798
1049
  if (group === "dataset") {
1050
+ // Handle edit subcommand first (before general help check)
1051
+ if (action === "edit") {
1052
+ const subAction = rest[0];
1053
+
1054
+ if (flags.help === "true" || !subAction || subAction === "help") {
1055
+ return <Help context="dataset-edit" />;
1056
+ }
1057
+
1058
+ if (subAction === "scan-pii" && rest[1]) {
1059
+ const dataset = parseDatasetRef(rest[1]);
1060
+ if (!dataset) {
1061
+ return <ErrorMessage error={`Invalid dataset format: ${rest[1]}. Use name:version format.`} />;
1062
+ }
1063
+ const columns = flags["columns"]?.split(",").filter(Boolean);
1064
+ const threshold = flags["threshold"] ? parseFloat(flags["threshold"]) : undefined;
1065
+
1066
+ return (
1067
+ <ApiCommand
1068
+ action={() =>
1069
+ api.scanForPII({
1070
+ dataset,
1071
+ columns: columns?.length ? columns : undefined,
1072
+ threshold,
1073
+ })
1074
+ }
1075
+ />
1076
+ );
1077
+ }
1078
+
1079
+ if (subAction === "scan-phd" && rest[1]) {
1080
+ const dataset = parseDatasetRef(rest[1]);
1081
+ if (!dataset) {
1082
+ return <ErrorMessage error={`Invalid dataset format: ${rest[1]}. Use name:version format.`} />;
1083
+ }
1084
+ const columns = flags["columns"]?.split(",").filter(Boolean);
1085
+ const threshold = flags["threshold"] ? parseFloat(flags["threshold"]) : undefined;
1086
+
1087
+ return (
1088
+ <ApiCommand
1089
+ action={() =>
1090
+ api.scanForPHD({
1091
+ dataset,
1092
+ columns: columns?.length ? columns : undefined,
1093
+ threshold,
1094
+ })
1095
+ }
1096
+ />
1097
+ );
1098
+ }
1099
+
1100
+ if (subAction === "subsample" && rest[1]) {
1101
+ const dataset = parseDatasetRef(rest[1]);
1102
+ if (!dataset) {
1103
+ return <ErrorMessage error={`Invalid dataset format: ${rest[1]}. Use name:version format.`} />;
1104
+ }
1105
+ const n = flags["n"] ? parseInt(flags["n"], 10) : undefined;
1106
+ const method = flags["method"] as "random" | "balanced" | "stratified" | undefined;
1107
+ const labelColumn = flags["label-column"];
1108
+ const seed = flags["seed"] ? parseInt(flags["seed"], 10) : undefined;
1109
+
1110
+ if (!n) {
1111
+ return <ErrorMessage error="--n is required (target sample count)" />;
1112
+ }
1113
+
1114
+ return (
1115
+ <ApiCommand
1116
+ action={() =>
1117
+ api.subsampleDataset({
1118
+ dataset,
1119
+ n,
1120
+ method,
1121
+ label_column: labelColumn,
1122
+ seed,
1123
+ })
1124
+ }
1125
+ successMessage="Subsample created"
1126
+ />
1127
+ );
1128
+ }
1129
+
1130
+ if (subAction === "check-labels" && rest[1]) {
1131
+ const dataset = parseDatasetRef(rest[1]);
1132
+ if (!dataset) {
1133
+ return <ErrorMessage error={`Invalid dataset format: ${rest[1]}. Use name:version format.`} />;
1134
+ }
1135
+ const textColumn = flags["text-column"];
1136
+ const labelColumn = flags["label-column"];
1137
+ const sampleSize = flags["sample-size"] ? parseInt(flags["sample-size"], 10) : undefined;
1138
+
1139
+ if (!textColumn || !labelColumn) {
1140
+ return <ErrorMessage error="--text-column and --label-column are required" />;
1141
+ }
1142
+
1143
+ return (
1144
+ <ApiCommand
1145
+ action={() =>
1146
+ api.checkLabels({
1147
+ dataset,
1148
+ text_column: textColumn,
1149
+ label_column: labelColumn,
1150
+ sample_size: sampleSize,
1151
+ })
1152
+ }
1153
+ />
1154
+ );
1155
+ }
1156
+
1157
+ return <Help context="dataset-edit" />;
1158
+ }
1159
+
799
1160
  if (flags.help === "true" || !action || action === "help") {
800
1161
  return <Help context="dataset" />;
801
1162
  }
1163
+
1164
+ // Dataset upload command
1165
+ if (action === "upload") {
1166
+ const destination = flags["to"];
1167
+
1168
+ // Show help if no arguments provided
1169
+ if (!rest[0] && !destination) {
1170
+ return (
1171
+ <Box flexDirection="column">
1172
+ <Text bold>Dataset Upload:</Text>
1173
+ <Text> </Text>
1174
+ <Text> Upload to Pioneer:</Text>
1175
+ <Text> dataset upload {"<file>"} --name {"<name>"} [--type {"<type>"}]</Text>
1176
+ <Text> </Text>
1177
+ <Text> Upload to Hugging Face:</Text>
1178
+ <Text> dataset upload {"<id>"} --to hf --repo {"<repo>"} [--hf-token {"<token>"}] [--private]</Text>
1179
+ <Text> </Text>
1180
+ <Text dimColor> {"<id>"} format: name:version (e.g., my-dataset:v1)</Text>
1181
+ </Box>
1182
+ );
1183
+ }
1184
+
1185
+ // Upload to Hugging Face
1186
+ if (destination === "hf") {
1187
+ const datasetStr = rest[0];
1188
+ if (!datasetStr) {
1189
+ return <ErrorMessage error="Dataset ID required: dataset upload <name:version> --to hf --repo <repo>" />;
1190
+ }
1191
+ const dataset = parseDatasetRef(datasetStr);
1192
+ if (!dataset) {
1193
+ return <ErrorMessage error={`Invalid dataset format: ${datasetStr}. Use name:version format.`} />;
1194
+ }
1195
+
1196
+ const repo = flags["repo"];
1197
+ const hfTokenFlag = flags["hf-token"];
1198
+ const isPrivate = flags["private"] === "true";
1199
+
1200
+ if (!repo) {
1201
+ return <ErrorMessage error="--repo is required (e.g., username/dataset-name)" />;
1202
+ }
1203
+
1204
+ const hfToken = getHfToken(hfTokenFlag);
1205
+ if (!hfToken) {
1206
+ return <ErrorMessage error="Hugging Face token required. Use --hf-token, HF_TOKEN env var, or set via config." />;
1207
+ }
1208
+
1209
+ return (
1210
+ <ApiCommand
1211
+ action={() =>
1212
+ api.pushDatasetToHub(dataset, {
1213
+ hf_token: hfToken,
1214
+ repo_id: repo,
1215
+ private: isPrivate,
1216
+ })
1217
+ }
1218
+ successMessage={`Dataset uploaded to Hugging Face: ${repo}`}
1219
+ />
1220
+ );
1221
+ }
1222
+
1223
+ // Default: Upload file to Pioneer
1224
+ const name = flags["name"];
1225
+ const datasetType = flags["type"] as "ner" | "classification" | "custom" | undefined;
1226
+
1227
+ if (!name) {
1228
+ return <ErrorMessage error="--name is required for dataset upload" />;
1229
+ }
1230
+
1231
+ return (
1232
+ <ApiCommand
1233
+ action={() =>
1234
+ api.uploadDataset(rest[0], {
1235
+ dataset_name: name,
1236
+ dataset_type: datasetType,
1237
+ })
1238
+ }
1239
+ successMessage="Dataset uploaded"
1240
+ />
1241
+ );
1242
+ }
1243
+
1244
+ // Dataset download command
1245
+ if (action === "download") {
1246
+ const source = flags["from"];
1247
+
1248
+ // Show help if no arguments provided
1249
+ if (!rest[0] && !source) {
1250
+ return (
1251
+ <Box flexDirection="column">
1252
+ <Text bold>Dataset Download:</Text>
1253
+ <Text> </Text>
1254
+ <Text> Download from Pioneer:</Text>
1255
+ <Text> dataset download {"<id>"} [--format jsonl|csv|parquet] [--output {"<path>"}]</Text>
1256
+ <Text> </Text>
1257
+ <Text> Download from Hugging Face:</Text>
1258
+ <Text> dataset download --from hf --repo {"<repo>"} [--hf-token {"<token>"}] [--name {"<name>"}]</Text>
1259
+ <Text> </Text>
1260
+ <Text dimColor> {"<id>"} format: name:version (e.g., my-dataset:v1)</Text>
1261
+ </Box>
1262
+ );
1263
+ }
1264
+
1265
+ // Download from Hugging Face
1266
+ if (source === "hf") {
1267
+ const repo = flags["repo"];
1268
+ const hfTokenFlag = flags["hf-token"];
1269
+ const datasetName = flags["name"];
1270
+ const revision = flags["revision"];
1271
+
1272
+ if (!repo) {
1273
+ return <ErrorMessage error="--repo is required (e.g., username/dataset-name)" />;
1274
+ }
1275
+
1276
+ const hfToken = getHfToken(hfTokenFlag);
1277
+
1278
+ return (
1279
+ <ApiCommand
1280
+ action={() =>
1281
+ api.pullDatasetFromHub({
1282
+ repo_id: repo,
1283
+ hf_token: hfToken,
1284
+ dataset_name: datasetName,
1285
+ revision,
1286
+ })
1287
+ }
1288
+ successMessage={`Dataset downloaded from Hugging Face: ${repo}`}
1289
+ />
1290
+ );
1291
+ }
1292
+
1293
+ // Default: Download from Pioneer
1294
+ const dataset = parseDatasetRef(rest[0]);
1295
+ if (!dataset) {
1296
+ return <ErrorMessage error={`Invalid dataset format: ${rest[0]}. Use name:version format.`} />;
1297
+ }
1298
+ const format = (flags["format"] || "jsonl") as "csv" | "jsonl" | "parquet";
1299
+ const output = flags["output"] || flags["o"];
1300
+ return <ApiCommand action={() => api.downloadDataset(dataset, format, output)} />;
1301
+ }
1302
+
802
1303
  if (action === "list") {
803
1304
  return <DatasetListCommand />;
804
1305
  }
805
1306
  if (action === "get" && rest[0]) {
806
- return <ApiCommand action={() => api.getDataset(rest[0])} />;
1307
+ const dataset = parseDatasetRef(rest[0]);
1308
+ if (!dataset) {
1309
+ return <ErrorMessage error={`Invalid dataset format: ${rest[0]}. Use name:version format.`} />;
1310
+ }
1311
+ return <ApiCommand action={() => api.getDataset(dataset)} />;
807
1312
  }
808
1313
  if (action === "delete" && rest[0]) {
1314
+ const dataset = parseDatasetRef(rest[0]);
1315
+ if (!dataset) {
1316
+ return <ErrorMessage error={`Invalid dataset format: ${rest[0]}. Use name:version format.`} />;
1317
+ }
809
1318
  return (
810
1319
  <ApiCommand
811
- action={() => api.deleteDataset(rest[0])}
1320
+ action={() => api.deleteDataset(dataset)}
812
1321
  successMessage={`Dataset ${rest[0]} deleted`}
813
1322
  />
814
1323
  );
815
1324
  }
816
- if (action === "download" && rest[0]) {
817
- return <ApiCommand action={() => api.downloadDataset(rest[0])} />;
818
- }
819
- if (action === "analyze" && rest[0]) {
820
- return <ApiCommand action={() => api.analyzeDataset(rest[0])} />;
1325
+ if (action === "analyze") {
1326
+ const datasetStr = rest[0];
1327
+ const taskType = flags["task-type"];
1328
+ const analyses = flags["analyses"]?.split(",");
1329
+
1330
+ // Show help if missing required params
1331
+ if (!datasetStr || !taskType || !analyses || analyses.length === 0) {
1332
+ return <Help context="dataset-analyze" />;
1333
+ }
1334
+
1335
+ const dataset = parseDatasetRef(datasetStr);
1336
+ if (!dataset) {
1337
+ return <ErrorMessage error={`Invalid dataset format: ${datasetStr}. Use name:version format.`} />;
1338
+ }
1339
+
1340
+ return (
1341
+ <ApiCommand
1342
+ action={() => api.analyzeDataset(dataset, taskType, analyses)}
1343
+ />
1344
+ );
821
1345
  }
822
1346
 
823
1347
  // Generate commands
@@ -972,12 +1496,25 @@ const App: React.FC<AppProps> = ({ command, flags }) => {
972
1496
  }
973
1497
  if (action === "create") {
974
1498
  const modelName = flags["model-name"];
975
- const datasetIds = flags["dataset-ids"]?.split(",") ?? [];
1499
+ const datasetStrings = flags["datasets"]?.split(",").filter(Boolean) ?? [];
976
1500
  const baseModel = flags["base-model"];
977
1501
  const epochs = flags["epochs"] ? parseInt(flags["epochs"], 10) : undefined;
978
1502
 
979
- if (!modelName || datasetIds.length === 0) {
980
- return <ErrorMessage error="--model-name and --dataset-ids are required" />;
1503
+ if (!modelName || datasetStrings.length === 0) {
1504
+ return <ErrorMessage error="--model-name and --datasets are required" />;
1505
+ }
1506
+
1507
+ // Parse dataset strings in name:version format
1508
+ const datasets: api.DatasetRef[] = [];
1509
+ for (const ds of datasetStrings) {
1510
+ const colonIndex = ds.lastIndexOf(":");
1511
+ if (colonIndex === -1) {
1512
+ return <ErrorMessage error={`Invalid dataset format: ${ds}. Use name:version format.`} />;
1513
+ }
1514
+ datasets.push({
1515
+ name: ds.slice(0, colonIndex),
1516
+ version: ds.slice(colonIndex + 1),
1517
+ });
981
1518
  }
982
1519
 
983
1520
  return (
@@ -985,7 +1522,7 @@ const App: React.FC<AppProps> = ({ command, flags }) => {
985
1522
  action={() =>
986
1523
  api.createJob({
987
1524
  model_name: modelName,
988
- dataset_ids: datasetIds,
1525
+ datasets,
989
1526
  base_model: baseModel,
990
1527
  nr_epochs: epochs,
991
1528
  })
@@ -1027,9 +1564,162 @@ const App: React.FC<AppProps> = ({ command, flags }) => {
1027
1564
  if (action === "download" && rest[0]) {
1028
1565
  return <ApiCommand action={() => api.downloadModel(rest[0])} />;
1029
1566
  }
1567
+ // Model upload command
1568
+ if (action === "upload") {
1569
+ const destination = flags["to"];
1570
+
1571
+ // Show help if no arguments provided
1572
+ if (!rest[0] && !destination) {
1573
+ return (
1574
+ <Box flexDirection="column">
1575
+ <Text bold>Model Upload:</Text>
1576
+ <Text> </Text>
1577
+ <Text> Upload to Hugging Face:</Text>
1578
+ <Text> model upload {"<job-id>"} --to hf --repo {"<repo>"} [--hf-token {"<token>"}] [--private]</Text>
1579
+ <Text> </Text>
1580
+ <Text dimColor> Supported destinations: hf (more coming soon)</Text>
1581
+ </Box>
1582
+ );
1583
+ }
1584
+
1585
+ // Upload to Hugging Face
1586
+ if (destination === "hf") {
1587
+ if (!rest[0]) {
1588
+ return <ErrorMessage error="Job ID required: model upload <job-id> --to hf --repo <repo>" />;
1589
+ }
1590
+ const jobId = rest[0];
1591
+ const repo = flags["repo"];
1592
+ const hfTokenFlag = flags["hf-token"];
1593
+ const isPrivate = flags["private"] === "true";
1594
+
1595
+ if (!repo) {
1596
+ return <ErrorMessage error="--repo is required (e.g., username/model-name)" />;
1597
+ }
1598
+
1599
+ const hfToken = getHfToken(hfTokenFlag);
1600
+ if (!hfToken) {
1601
+ return <ErrorMessage error="Hugging Face token required. Use --hf-token, HF_TOKEN env var, or set via config." />;
1602
+ }
1603
+
1604
+ return (
1605
+ <ApiCommand
1606
+ action={() =>
1607
+ api.pushModelToHub(jobId, {
1608
+ hf_token: hfToken,
1609
+ repo_id: repo,
1610
+ private: isPrivate,
1611
+ })
1612
+ }
1613
+ successMessage={`Model uploaded to Hugging Face: ${repo}`}
1614
+ />
1615
+ );
1616
+ }
1617
+
1618
+ return <ErrorMessage error="--to is required. Supported destinations: hf" />;
1619
+ }
1030
1620
  return <Help context="model" />;
1031
1621
  }
1032
1622
 
1623
+ // Eval commands
1624
+ if (group === "eval") {
1625
+ if (flags.help === "true" || !action || action === "help") {
1626
+ return <Help context="eval" />;
1627
+ }
1628
+ if (action === "list" && rest[0]) {
1629
+ const dataset = parseDatasetRef(rest[0]);
1630
+ if (!dataset) {
1631
+ return <ErrorMessage error={`Invalid dataset format: ${rest[0]}. Use name:version format.`} />;
1632
+ }
1633
+ return <ApiCommand action={() => api.getDatasetEvaluations(dataset)} />;
1634
+ }
1635
+ if (action === "list" && !rest[0]) {
1636
+ return <ErrorMessage error="Dataset ID is required: eval list <dataset-id>" />;
1637
+ }
1638
+ if (action === "get" && rest[0]) {
1639
+ return <ApiCommand action={() => api.getEvaluation(rest[0])} />;
1640
+ }
1641
+ if (action === "create") {
1642
+ const modelId = flags["model-id"];
1643
+ const datasetStr = flags["dataset"];
1644
+ const taskType = flags["task-type"];
1645
+ const textColumn = flags["text-column"];
1646
+ const labelColumn = flags["label-column"];
1647
+
1648
+ if (!modelId || !datasetStr) {
1649
+ return <ErrorMessage error="--model-id and --dataset are required" />;
1650
+ }
1651
+
1652
+ const dataset = parseDatasetRef(datasetStr);
1653
+ if (!dataset) {
1654
+ return <ErrorMessage error={`Invalid dataset format: ${datasetStr}. Use name:version format.`} />;
1655
+ }
1656
+
1657
+ return (
1658
+ <ApiCommand
1659
+ action={() =>
1660
+ api.createEvaluation({
1661
+ model_id: modelId,
1662
+ dataset,
1663
+ task_type: taskType,
1664
+ text_column: textColumn,
1665
+ label_column: labelColumn,
1666
+ })
1667
+ }
1668
+ successMessage="Evaluation created"
1669
+ />
1670
+ );
1671
+ }
1672
+ return <Help context="eval" />;
1673
+ }
1674
+
1675
+ // Benchmark commands
1676
+ if (group === "benchmark") {
1677
+ if (flags.help === "true" || !action || action === "help") {
1678
+ return <Help context="benchmark" />;
1679
+ }
1680
+ if (action === "list") {
1681
+ return <ApiCommand action={api.listBenchmarks} />;
1682
+ }
1683
+ if (action === "run") {
1684
+ const modelId = flags["model-id"];
1685
+ const task = flags["task"] as "ner" | "text_classification";
1686
+ const benchmark = flags["benchmark"];
1687
+ const maxSamples = flags["max-samples"] ? parseInt(flags["max-samples"], 10) : undefined;
1688
+ const split = flags["split"];
1689
+
1690
+ if (!modelId || !task || !benchmark) {
1691
+ return <ErrorMessage error="--model-id, --task, and --benchmark are required" />;
1692
+ }
1693
+
1694
+ return (
1695
+ <ApiCommand
1696
+ action={() =>
1697
+ api.startBenchmarkEvaluation({
1698
+ model_id: modelId,
1699
+ task,
1700
+ benchmark,
1701
+ max_samples: maxSamples,
1702
+ split,
1703
+ })
1704
+ }
1705
+ successMessage="Benchmark evaluation started"
1706
+ />
1707
+ );
1708
+ }
1709
+ if (action === "get" && rest[0]) {
1710
+ return <ApiCommand action={() => api.getBenchmarkEvaluation(rest[0])} />;
1711
+ }
1712
+ if (action === "cancel" && rest[0]) {
1713
+ return (
1714
+ <ApiCommand
1715
+ action={() => api.cancelBenchmarkEvaluation(rest[0])}
1716
+ successMessage="Benchmark evaluation cancelled"
1717
+ />
1718
+ );
1719
+ }
1720
+ return <Help context="benchmark" />;
1721
+ }
1722
+
1033
1723
  return <Help />;
1034
1724
  };
1035
1725
 
@@ -1039,7 +1729,7 @@ const App: React.FC<AppProps> = ({ command, flags }) => {
1039
1729
 
1040
1730
  import packageJson from "../package.json";
1041
1731
 
1042
- function main() {
1732
+ async function main() {
1043
1733
  const argv = process.argv.slice(2);
1044
1734
  const { command, flags } = parseArgs(argv);
1045
1735
 
@@ -1049,7 +1739,7 @@ function main() {
1049
1739
  process.exit(0);
1050
1740
  }
1051
1741
 
1052
- render(<App command={command} flags={flags} />);
1742
+ await render(<App command={command} flags={flags} />).waitUntilExit();
1053
1743
  }
1054
1744
 
1055
1745
  main();