@fastino-ai/pioneer-cli 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -1
- package/.cursor/rules/api-documentation.mdc +14 -0
- package/.cursor/rules/backend-location-rule.mdc +5 -0
- package/Medical_NER_Dataset_1.jsonl +50 -0
- package/README.md +4 -1
- package/bun.lock +52 -0
- package/package.json +5 -2
- package/src/api.ts +551 -22
- package/src/chat/ChatApp.tsx +548 -263
- package/src/client/ToolExecutor.ts +175 -0
- package/src/client/WebSocketClient.ts +333 -0
- package/src/client/index.ts +2 -0
- package/src/config.ts +49 -139
- package/src/index.tsx +796 -106
- package/src/telemetry.ts +173 -0
- package/src/tests/config.test.ts +19 -0
- package/src/tools/bash.ts +1 -1
- package/src/tools/filesystem.ts +1 -1
- package/src/tools/index.ts +2 -9
- package/src/tools/sandbox.ts +1 -1
- package/src/tools/types.ts +25 -0
- package/src/utils/index.ts +6 -0
- package/fastino-ai-pioneer-cli-0.2.0.tgz +0 -0
- package/ner_dataset.json +0 -111
- package/src/agent/Agent.ts +0 -342
- package/src/agent/BudgetManager.ts +0 -167
- package/src/agent/LLMClient.ts +0 -435
- package/src/agent/ToolRegistry.ts +0 -97
- package/src/agent/index.ts +0 -15
- package/src/agent/types.ts +0 -84
- package/src/evolution/EvalRunner.ts +0 -301
- package/src/evolution/EvolutionEngine.ts +0 -319
- package/src/evolution/FeedbackCollector.ts +0 -197
- package/src/evolution/ModelTrainer.ts +0 -371
- package/src/evolution/index.ts +0 -18
- package/src/evolution/types.ts +0 -110
- package/src/tools/modal.ts +0 -269
- package/src/tools/training.ts +0 -443
- package/src/tools/wandb.ts +0 -348
- /package/src/{agent → utils}/FileResolver.ts +0 -0
package/src/api.ts
CHANGED
|
@@ -12,6 +12,37 @@ export interface ApiResult<T = unknown> {
|
|
|
12
12
|
error?: string;
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
+
/**
|
|
16
|
+
* Format authentication error with helpful fix instructions.
|
|
17
|
+
*/
|
|
18
|
+
function formatAuthError(originalError?: string): string {
|
|
19
|
+
return [
|
|
20
|
+
"Felix API authentication failed.",
|
|
21
|
+
"",
|
|
22
|
+
"To fix this, set your API key:",
|
|
23
|
+
" export PIONEER_API_KEY=your-key-here",
|
|
24
|
+
"",
|
|
25
|
+
"Or login interactively:",
|
|
26
|
+
" pioneer auth login",
|
|
27
|
+
originalError ? `\nOriginal error: ${originalError}` : "",
|
|
28
|
+
]
|
|
29
|
+
.filter(Boolean)
|
|
30
|
+
.join("\n");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Check if error is authentication-related.
|
|
35
|
+
*/
|
|
36
|
+
function isAuthError(status: number, errorText: string): boolean {
|
|
37
|
+
return (
|
|
38
|
+
status === 401 ||
|
|
39
|
+
status === 403 ||
|
|
40
|
+
errorText.toLowerCase().includes("authentication") ||
|
|
41
|
+
errorText.toLowerCase().includes("api key") ||
|
|
42
|
+
errorText.toLowerCase().includes("unauthorized")
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
|
|
15
46
|
async function request<T = unknown>(
|
|
16
47
|
method: string,
|
|
17
48
|
path: string,
|
|
@@ -21,6 +52,10 @@ async function request<T = unknown>(
|
|
|
21
52
|
const apiKey = getApiKey();
|
|
22
53
|
const url = `${baseUrl}${path}`;
|
|
23
54
|
|
|
55
|
+
// Debug logging
|
|
56
|
+
console.error(`[DEBUG] Request: ${method} ${url}`);
|
|
57
|
+
console.error(`[DEBUG] API Key present: ${!!apiKey}, starts with: ${apiKey?.slice(0, 10)}...`);
|
|
58
|
+
|
|
24
59
|
const headers: Record<string, string> = {
|
|
25
60
|
"Content-Type": "application/json",
|
|
26
61
|
"User-Agent": "pioneer-cli/0.1.0",
|
|
@@ -46,10 +81,22 @@ async function request<T = unknown>(
|
|
|
46
81
|
}
|
|
47
82
|
|
|
48
83
|
if (!res.ok) {
|
|
84
|
+
const rawError = data ? JSON.stringify(data) : text || `HTTP ${res.status}`;
|
|
85
|
+
|
|
86
|
+
// Format authentication errors with helpful instructions
|
|
87
|
+
if (isAuthError(res.status, rawError)) {
|
|
88
|
+
console.error(`[DEBUG] Auth error - status: ${res.status}, response: ${rawError}`);
|
|
89
|
+
return {
|
|
90
|
+
ok: false,
|
|
91
|
+
status: res.status,
|
|
92
|
+
error: formatAuthError(rawError),
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
49
96
|
return {
|
|
50
97
|
ok: false,
|
|
51
98
|
status: res.status,
|
|
52
|
-
error:
|
|
99
|
+
error: rawError,
|
|
53
100
|
};
|
|
54
101
|
}
|
|
55
102
|
|
|
@@ -63,6 +110,15 @@ async function request<T = unknown>(
|
|
|
63
110
|
}
|
|
64
111
|
}
|
|
65
112
|
|
|
113
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
114
|
+
// Common Types
|
|
115
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
116
|
+
|
|
117
|
+
export interface DatasetRef {
|
|
118
|
+
name: string;
|
|
119
|
+
version: string;
|
|
120
|
+
}
|
|
121
|
+
|
|
66
122
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
67
123
|
// Auth - validate API key by calling /felix/datasets
|
|
68
124
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -112,8 +168,12 @@ export interface Dataset {
|
|
|
112
168
|
dataset_type: string;
|
|
113
169
|
size?: number;
|
|
114
170
|
sample_size?: number;
|
|
171
|
+
version_number?: string;
|
|
172
|
+
root_dataset_id?: string;
|
|
173
|
+
schema?: Record<string, unknown>;
|
|
174
|
+
schema_warnings?: string[];
|
|
115
175
|
created_at: string;
|
|
116
|
-
updated_at
|
|
176
|
+
updated_at?: string;
|
|
117
177
|
}
|
|
118
178
|
|
|
119
179
|
export interface DatasetListResponse {
|
|
@@ -122,12 +182,20 @@ export interface DatasetListResponse {
|
|
|
122
182
|
count: number;
|
|
123
183
|
}
|
|
124
184
|
|
|
125
|
-
export async function listDatasets(
|
|
126
|
-
|
|
185
|
+
export async function listDatasets(
|
|
186
|
+
options: { includeAllVersions?: boolean } = {}
|
|
187
|
+
): Promise<ApiResult<DatasetListResponse>> {
|
|
188
|
+
const params = new URLSearchParams();
|
|
189
|
+
if (options.includeAllVersions) {
|
|
190
|
+
params.set("include_all_versions", "true");
|
|
191
|
+
}
|
|
192
|
+
const query = params.toString();
|
|
193
|
+
const url = query ? `/felix/datasets?${query}` : "/felix/datasets";
|
|
194
|
+
return request<DatasetListResponse>("GET", url);
|
|
127
195
|
}
|
|
128
196
|
|
|
129
|
-
export async function getDataset(
|
|
130
|
-
return request<Dataset>("GET", `/felix/datasets/${
|
|
197
|
+
export async function getDataset(dataset: DatasetRef): Promise<ApiResult<Dataset>> {
|
|
198
|
+
return request<Dataset>("GET", `/felix/datasets/${dataset.name}/${dataset.version}`);
|
|
131
199
|
}
|
|
132
200
|
|
|
133
201
|
export interface DatasetCreateRequest {
|
|
@@ -144,25 +212,160 @@ export async function createDataset(
|
|
|
144
212
|
return request<Dataset>("POST", "/felix/datasets", req);
|
|
145
213
|
}
|
|
146
214
|
|
|
147
|
-
export async function deleteDataset(
|
|
148
|
-
return request("DELETE", `/felix/datasets/${
|
|
215
|
+
export async function deleteDataset(dataset: DatasetRef): Promise<ApiResult> {
|
|
216
|
+
return request("DELETE", `/felix/datasets/${dataset.name}/${dataset.version}`);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export interface DatasetUploadRequest {
|
|
220
|
+
dataset_name: string;
|
|
221
|
+
dataset_type?: "ner" | "classification" | "custom";
|
|
222
|
+
format?: "jsonl" | "csv" | "parquet";
|
|
223
|
+
schema?: Record<string, string>;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
export async function uploadDataset(
|
|
227
|
+
filePath: string,
|
|
228
|
+
options: DatasetUploadRequest
|
|
229
|
+
): Promise<ApiResult<Dataset>> {
|
|
230
|
+
const baseUrl = getBaseUrl().replace(/\/$/, "");
|
|
231
|
+
const apiKey = getApiKey();
|
|
232
|
+
const url = `${baseUrl}/datasets/upload`;
|
|
233
|
+
|
|
234
|
+
const headers: Record<string, string> = {
|
|
235
|
+
"User-Agent": "pioneer-cli/0.1.0",
|
|
236
|
+
};
|
|
237
|
+
|
|
238
|
+
if (apiKey) {
|
|
239
|
+
headers["X-API-Key"] = apiKey;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
try {
|
|
243
|
+
const fs = await import("fs");
|
|
244
|
+
const path = await import("path");
|
|
245
|
+
const fileContent = fs.readFileSync(filePath);
|
|
246
|
+
const filename = path.basename(filePath);
|
|
247
|
+
|
|
248
|
+
const formData = new FormData();
|
|
249
|
+
formData.append("file", new Blob([fileContent]), filename);
|
|
250
|
+
formData.append("dataset_name", options.dataset_name);
|
|
251
|
+
if (options.dataset_type) {
|
|
252
|
+
formData.append("dataset_type", options.dataset_type);
|
|
253
|
+
}
|
|
254
|
+
if (options.format) {
|
|
255
|
+
formData.append("format", options.format);
|
|
256
|
+
}
|
|
257
|
+
if (options.schema) {
|
|
258
|
+
formData.append("schema", JSON.stringify(options.schema));
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const res = await fetch(url, {
|
|
262
|
+
method: "POST",
|
|
263
|
+
headers,
|
|
264
|
+
body: formData,
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
const text = await res.text();
|
|
268
|
+
let data: Dataset | undefined;
|
|
269
|
+
try {
|
|
270
|
+
data = JSON.parse(text) as Dataset;
|
|
271
|
+
} catch {
|
|
272
|
+
// Not JSON
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (!res.ok) {
|
|
276
|
+
return {
|
|
277
|
+
ok: false,
|
|
278
|
+
status: res.status,
|
|
279
|
+
error: data ? JSON.stringify(data) : text || `HTTP ${res.status}`,
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
return { ok: true, status: res.status, data };
|
|
284
|
+
} catch (err) {
|
|
285
|
+
return {
|
|
286
|
+
ok: false,
|
|
287
|
+
status: 0,
|
|
288
|
+
error: err instanceof Error ? err.message : String(err),
|
|
289
|
+
};
|
|
290
|
+
}
|
|
149
291
|
}
|
|
150
292
|
|
|
151
293
|
export interface DatasetDownloadResponse {
|
|
152
294
|
download_url: string;
|
|
295
|
+
file_path?: string;
|
|
296
|
+
size?: number;
|
|
297
|
+
dataset_name?: string;
|
|
298
|
+
version?: string;
|
|
153
299
|
}
|
|
154
300
|
|
|
155
301
|
export async function downloadDataset(
|
|
156
|
-
|
|
302
|
+
dataset: DatasetRef,
|
|
303
|
+
format: "csv" | "jsonl" | "parquet" = "jsonl",
|
|
304
|
+
outputPath?: string
|
|
157
305
|
): Promise<ApiResult<DatasetDownloadResponse>> {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
306
|
+
const baseUrl = getBaseUrl().replace(/\/$/, "");
|
|
307
|
+
const apiKey = getApiKey();
|
|
308
|
+
const downloadUrl = `${baseUrl}/datasets/${dataset.name}/${dataset.version}/download?format=${format}`;
|
|
309
|
+
|
|
310
|
+
// Generate default filename if not specified
|
|
311
|
+
const safeDatasetName = dataset.name.replace(/[^a-zA-Z0-9-_]/g, "_");
|
|
312
|
+
if (!outputPath) {
|
|
313
|
+
outputPath = `${safeDatasetName}_${dataset.version}.${format}`;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Download the file
|
|
317
|
+
const headers: Record<string, string> = {
|
|
318
|
+
"User-Agent": "pioneer-cli/0.1.0",
|
|
319
|
+
};
|
|
320
|
+
|
|
321
|
+
if (apiKey) {
|
|
322
|
+
headers["X-API-Key"] = apiKey;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
try {
|
|
326
|
+
const res = await fetch(downloadUrl, {
|
|
327
|
+
method: "GET",
|
|
328
|
+
headers,
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
if (!res.ok) {
|
|
332
|
+
const text = await res.text();
|
|
333
|
+
return {
|
|
334
|
+
ok: false,
|
|
335
|
+
status: res.status,
|
|
336
|
+
error: text || `HTTP ${res.status}`,
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// Write the file
|
|
341
|
+
const fs = await import("fs");
|
|
342
|
+
const buffer = await res.arrayBuffer();
|
|
343
|
+
fs.writeFileSync(outputPath, Buffer.from(buffer));
|
|
344
|
+
|
|
345
|
+
return {
|
|
346
|
+
ok: true,
|
|
347
|
+
status: 200,
|
|
348
|
+
data: {
|
|
349
|
+
download_url: downloadUrl,
|
|
350
|
+
file_path: outputPath,
|
|
351
|
+
size: buffer.byteLength,
|
|
352
|
+
dataset_name: dataset.name,
|
|
353
|
+
version: dataset.version,
|
|
354
|
+
},
|
|
355
|
+
};
|
|
356
|
+
} catch (err) {
|
|
357
|
+
return {
|
|
358
|
+
ok: false,
|
|
359
|
+
status: 0,
|
|
360
|
+
error: err instanceof Error ? err.message : String(err),
|
|
361
|
+
};
|
|
362
|
+
}
|
|
162
363
|
}
|
|
163
364
|
|
|
164
365
|
export interface DatasetAnalysisRequest {
|
|
165
|
-
|
|
366
|
+
dataset: DatasetRef;
|
|
367
|
+
task_type: string;
|
|
368
|
+
analyses: string[];
|
|
166
369
|
}
|
|
167
370
|
|
|
168
371
|
export interface DatasetAnalysisResponse {
|
|
@@ -171,10 +374,15 @@ export interface DatasetAnalysisResponse {
|
|
|
171
374
|
}
|
|
172
375
|
|
|
173
376
|
export async function analyzeDataset(
|
|
174
|
-
|
|
377
|
+
dataset: DatasetRef,
|
|
378
|
+
taskType: string,
|
|
379
|
+
analyses: string[]
|
|
175
380
|
): Promise<ApiResult<DatasetAnalysisResponse>> {
|
|
176
381
|
return request<DatasetAnalysisResponse>("POST", "/felix/dataset/analyze", {
|
|
177
|
-
|
|
382
|
+
dataset_name: dataset.name,
|
|
383
|
+
dataset_version: dataset.version,
|
|
384
|
+
task_type: taskType,
|
|
385
|
+
analyses: analyses,
|
|
178
386
|
});
|
|
179
387
|
}
|
|
180
388
|
|
|
@@ -437,7 +645,7 @@ export async function getJob(jobId: string): Promise<ApiResult<TrainingJob>> {
|
|
|
437
645
|
|
|
438
646
|
export interface TrainingJobCreateRequest {
|
|
439
647
|
model_name: string;
|
|
440
|
-
|
|
648
|
+
datasets: DatasetRef[];
|
|
441
649
|
base_model?: string;
|
|
442
650
|
validation_data_percentage?: number;
|
|
443
651
|
nr_epochs?: number;
|
|
@@ -515,7 +723,7 @@ export interface TrainedModel {
|
|
|
515
723
|
|
|
516
724
|
export interface TrainedModelsListResponse {
|
|
517
725
|
success: boolean;
|
|
518
|
-
|
|
726
|
+
training_jobs: TrainedModel[];
|
|
519
727
|
count: number;
|
|
520
728
|
}
|
|
521
729
|
|
|
@@ -548,7 +756,7 @@ export async function listAllModels(): Promise<ApiResult<AllModelsResponse>> {
|
|
|
548
756
|
status: 200,
|
|
549
757
|
data: {
|
|
550
758
|
deployed: deployedResult.data?.models ?? [],
|
|
551
|
-
trained: trainedResult.data?.
|
|
759
|
+
trained: trainedResult.data?.training_jobs ?? [],
|
|
552
760
|
},
|
|
553
761
|
};
|
|
554
762
|
}
|
|
@@ -587,12 +795,333 @@ export async function getEvaluation(
|
|
|
587
795
|
}
|
|
588
796
|
|
|
589
797
|
export interface EvaluationCreateRequest {
|
|
590
|
-
|
|
591
|
-
model_id
|
|
798
|
+
dataset: DatasetRef;
|
|
799
|
+
model_id: string;
|
|
800
|
+
task_type?: string;
|
|
801
|
+
text_column?: string;
|
|
802
|
+
label_column?: string;
|
|
592
803
|
}
|
|
593
804
|
|
|
594
805
|
export async function createEvaluation(
|
|
595
806
|
req: EvaluationCreateRequest
|
|
596
807
|
): Promise<ApiResult<Evaluation>> {
|
|
597
|
-
|
|
808
|
+
const { dataset, ...rest } = req;
|
|
809
|
+
return request<Evaluation>("POST", "/felix/evaluations", {
|
|
810
|
+
...rest,
|
|
811
|
+
dataset_name: dataset.name,
|
|
812
|
+
dataset_version: dataset.version,
|
|
813
|
+
});
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
export interface ModelWithEvaluation {
|
|
817
|
+
model_id: string;
|
|
818
|
+
model_name: string;
|
|
819
|
+
is_base_model: boolean;
|
|
820
|
+
evaluation?: Evaluation;
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
export interface DatasetEvaluationsResponse {
|
|
824
|
+
success: boolean;
|
|
825
|
+
dataset_id: string;
|
|
826
|
+
dataset_name: string;
|
|
827
|
+
sample_count: number;
|
|
828
|
+
models: ModelWithEvaluation[];
|
|
829
|
+
count: number;
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
export async function getDatasetEvaluations(
|
|
833
|
+
dataset: DatasetRef
|
|
834
|
+
): Promise<ApiResult<DatasetEvaluationsResponse>> {
|
|
835
|
+
return request<DatasetEvaluationsResponse>(
|
|
836
|
+
"GET",
|
|
837
|
+
`/felix/datasets/${dataset.name}/${dataset.version}/evaluations`
|
|
838
|
+
);
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
842
|
+
// Benchmarks
|
|
843
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
844
|
+
|
|
845
|
+
export interface BenchmarkInfo {
|
|
846
|
+
name: string;
|
|
847
|
+
description: string;
|
|
848
|
+
task: string;
|
|
849
|
+
dataset_source?: string;
|
|
850
|
+
metrics: string[];
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
export interface ListBenchmarksResponse {
|
|
854
|
+
success: boolean;
|
|
855
|
+
benchmarks: Record<string, BenchmarkInfo[]>;
|
|
856
|
+
count: number;
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
export async function listBenchmarks(): Promise<ApiResult<ListBenchmarksResponse>> {
|
|
860
|
+
return request<ListBenchmarksResponse>("GET", "/felix/benchmarks");
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
export interface BenchmarkEvaluationRequest {
|
|
864
|
+
model_id: string;
|
|
865
|
+
task: "ner" | "text_classification";
|
|
866
|
+
benchmark: string;
|
|
867
|
+
max_samples?: number;
|
|
868
|
+
split?: string;
|
|
869
|
+
benchmark_config?: Record<string, unknown>;
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
export interface BenchmarkEvaluationResponse {
|
|
873
|
+
success: boolean;
|
|
874
|
+
evaluation_id: string;
|
|
875
|
+
status: "pending" | "running" | "complete" | "errored";
|
|
876
|
+
task: string;
|
|
877
|
+
benchmark: string;
|
|
878
|
+
model_id: string;
|
|
879
|
+
metrics?: Record<string, unknown>;
|
|
880
|
+
error_message?: string;
|
|
881
|
+
created_at: string;
|
|
882
|
+
completed_at?: string;
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
export async function startBenchmarkEvaluation(
|
|
886
|
+
req: BenchmarkEvaluationRequest
|
|
887
|
+
): Promise<ApiResult<BenchmarkEvaluationResponse>> {
|
|
888
|
+
return request<BenchmarkEvaluationResponse>("POST", "/felix/benchmarks/evaluate", req);
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
export async function getBenchmarkEvaluation(
|
|
892
|
+
evaluationId: string
|
|
893
|
+
): Promise<ApiResult<BenchmarkEvaluationResponse>> {
|
|
894
|
+
return request<BenchmarkEvaluationResponse>(
|
|
895
|
+
"GET",
|
|
896
|
+
`/felix/benchmarks/evaluate/${evaluationId}`
|
|
897
|
+
);
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
export async function cancelBenchmarkEvaluation(
|
|
901
|
+
evaluationId: string
|
|
902
|
+
): Promise<ApiResult<{ success: boolean; message: string }>> {
|
|
903
|
+
return request<{ success: boolean; message: string }>(
|
|
904
|
+
"POST",
|
|
905
|
+
`/felix/benchmarks/evaluate/${evaluationId}/cancel`
|
|
906
|
+
);
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
910
|
+
// Data Editing (operates on persisted datasets)
|
|
911
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
912
|
+
|
|
913
|
+
export interface PIIFinding {
|
|
914
|
+
row_index: number;
|
|
915
|
+
column: string;
|
|
916
|
+
entity_type: string;
|
|
917
|
+
text: string;
|
|
918
|
+
start: number;
|
|
919
|
+
end: number;
|
|
920
|
+
score: number;
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
export interface DataEditingScanRequest {
|
|
924
|
+
dataset: DatasetRef;
|
|
925
|
+
columns?: string[];
|
|
926
|
+
threshold?: number;
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
export interface DataEditingScanResponse {
|
|
930
|
+
success: boolean;
|
|
931
|
+
dataset_id: string;
|
|
932
|
+
scan_type: "pii" | "phd";
|
|
933
|
+
findings_count: number;
|
|
934
|
+
affected_rows: number;
|
|
935
|
+
findings: PIIFinding[];
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
export async function scanForPII(
|
|
939
|
+
req: DataEditingScanRequest
|
|
940
|
+
): Promise<ApiResult<DataEditingScanResponse>> {
|
|
941
|
+
const { dataset, ...rest } = req;
|
|
942
|
+
return request<DataEditingScanResponse>("POST", "/felix/data-editing/scan-pii", {
|
|
943
|
+
...rest,
|
|
944
|
+
dataset_name: dataset.name,
|
|
945
|
+
dataset_version: dataset.version,
|
|
946
|
+
});
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
export async function scanForPHD(
|
|
950
|
+
req: DataEditingScanRequest
|
|
951
|
+
): Promise<ApiResult<DataEditingScanResponse>> {
|
|
952
|
+
const { dataset, ...rest } = req;
|
|
953
|
+
return request<DataEditingScanResponse>("POST", "/felix/data-editing/scan-phd", {
|
|
954
|
+
...rest,
|
|
955
|
+
dataset_name: dataset.name,
|
|
956
|
+
dataset_version: dataset.version,
|
|
957
|
+
});
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
export interface DataEditingRemoveRequest {
|
|
961
|
+
dataset: DatasetRef;
|
|
962
|
+
findings: PIIFinding[];
|
|
963
|
+
redaction_method?: "redact" | "remove_row" | "mask";
|
|
964
|
+
save_as_new?: boolean;
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
export interface DataEditingRemoveResponse {
|
|
968
|
+
success: boolean;
|
|
969
|
+
dataset_id: string;
|
|
970
|
+
new_dataset_id?: string;
|
|
971
|
+
rows_affected: number;
|
|
972
|
+
entities_removed: number;
|
|
973
|
+
message: string;
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
export async function removePII(
|
|
977
|
+
req: DataEditingRemoveRequest
|
|
978
|
+
): Promise<ApiResult<DataEditingRemoveResponse>> {
|
|
979
|
+
const { dataset, ...rest } = req;
|
|
980
|
+
return request<DataEditingRemoveResponse>("POST", "/felix/data-editing/remove-pii", {
|
|
981
|
+
...rest,
|
|
982
|
+
dataset_name: dataset.name,
|
|
983
|
+
dataset_version: dataset.version,
|
|
984
|
+
});
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
export interface DataEditingSubsampleRequest {
|
|
988
|
+
dataset: DatasetRef;
|
|
989
|
+
method?: "random" | "balanced" | "stratified";
|
|
990
|
+
n: number;
|
|
991
|
+
label_column?: string;
|
|
992
|
+
seed?: number;
|
|
993
|
+
save_as_new?: boolean;
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
export interface DataEditingSubsampleResponse {
|
|
997
|
+
success: boolean;
|
|
998
|
+
dataset_id: string;
|
|
999
|
+
new_dataset_id: string;
|
|
1000
|
+
original_rows: number;
|
|
1001
|
+
new_rows: number;
|
|
1002
|
+
method: string;
|
|
1003
|
+
message: string;
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
export async function subsampleDataset(
|
|
1007
|
+
req: DataEditingSubsampleRequest
|
|
1008
|
+
): Promise<ApiResult<DataEditingSubsampleResponse>> {
|
|
1009
|
+
const { dataset, ...rest } = req;
|
|
1010
|
+
return request<DataEditingSubsampleResponse>("POST", "/felix/data-editing/subsample", {
|
|
1011
|
+
...rest,
|
|
1012
|
+
dataset_name: dataset.name,
|
|
1013
|
+
dataset_version: dataset.version,
|
|
1014
|
+
});
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
export interface DataEditingCheckLabelsRequest {
|
|
1018
|
+
dataset: DatasetRef;
|
|
1019
|
+
text_column: string;
|
|
1020
|
+
label_column: string;
|
|
1021
|
+
sample_size?: number;
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
export interface LabelCheckResult {
|
|
1025
|
+
row_index: number;
|
|
1026
|
+
text: string;
|
|
1027
|
+
current_label: string;
|
|
1028
|
+
suggested_label: string;
|
|
1029
|
+
confidence: number;
|
|
1030
|
+
reasoning: string;
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
export interface DataEditingCheckLabelsResponse {
|
|
1034
|
+
success: boolean;
|
|
1035
|
+
dataset_id: string;
|
|
1036
|
+
checked_count: number;
|
|
1037
|
+
issues_found: number;
|
|
1038
|
+
results: LabelCheckResult[];
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
export async function checkLabels(
|
|
1042
|
+
req: DataEditingCheckLabelsRequest
|
|
1043
|
+
): Promise<ApiResult<DataEditingCheckLabelsResponse>> {
|
|
1044
|
+
const { dataset, ...rest } = req;
|
|
1045
|
+
return request<DataEditingCheckLabelsResponse>("POST", "/felix/data-editing/check-labels", {
|
|
1046
|
+
...rest,
|
|
1047
|
+
dataset_name: dataset.name,
|
|
1048
|
+
dataset_version: dataset.version,
|
|
1049
|
+
});
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1053
|
+
// Hugging Face Integration
|
|
1054
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1055
|
+
|
|
1056
|
+
export interface HuggingFacePushRequest {
|
|
1057
|
+
hf_token: string;
|
|
1058
|
+
repo_id: string;
|
|
1059
|
+
private?: boolean;
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
export interface HuggingFacePushResponse {
|
|
1063
|
+
success: boolean;
|
|
1064
|
+
repo_url: string;
|
|
1065
|
+
repo_id: string;
|
|
1066
|
+
message?: string;
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
export async function pushDatasetToHub(
|
|
1070
|
+
dataset: DatasetRef,
|
|
1071
|
+
options: HuggingFacePushRequest
|
|
1072
|
+
): Promise<ApiResult<HuggingFacePushResponse>> {
|
|
1073
|
+
return request<HuggingFacePushResponse>(
|
|
1074
|
+
"POST",
|
|
1075
|
+
`/datasets/${dataset.name}/${dataset.version}/push-to-hub`,
|
|
1076
|
+
options
|
|
1077
|
+
);
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
export interface HuggingFacePushModelRequest {
|
|
1081
|
+
hf_token: string;
|
|
1082
|
+
repo_id: string;
|
|
1083
|
+
private?: boolean;
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
export interface HuggingFacePushModelResponse {
|
|
1087
|
+
success: boolean;
|
|
1088
|
+
repo_url: string;
|
|
1089
|
+
repo_id: string;
|
|
1090
|
+
message?: string;
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
export async function pushModelToHub(
|
|
1094
|
+
jobId: string,
|
|
1095
|
+
options: HuggingFacePushModelRequest
|
|
1096
|
+
): Promise<ApiResult<HuggingFacePushModelResponse>> {
|
|
1097
|
+
return request<HuggingFacePushModelResponse>(
|
|
1098
|
+
"POST",
|
|
1099
|
+
`/felix/training-jobs/${jobId}/push-to-hub`,
|
|
1100
|
+
options
|
|
1101
|
+
);
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
export interface HuggingFacePullRequest {
|
|
1105
|
+
hf_token?: string;
|
|
1106
|
+
repo_id: string;
|
|
1107
|
+
dataset_name?: string;
|
|
1108
|
+
revision?: string;
|
|
1109
|
+
}
|
|
1110
|
+
|
|
1111
|
+
export interface HuggingFacePullResponse {
|
|
1112
|
+
success: boolean;
|
|
1113
|
+
dataset_id: string;
|
|
1114
|
+
dataset_name: string;
|
|
1115
|
+
sample_count: number;
|
|
1116
|
+
message?: string;
|
|
1117
|
+
}
|
|
1118
|
+
|
|
1119
|
+
export async function pullDatasetFromHub(
|
|
1120
|
+
options: HuggingFacePullRequest
|
|
1121
|
+
): Promise<ApiResult<HuggingFacePullResponse>> {
|
|
1122
|
+
return request<HuggingFacePullResponse>(
|
|
1123
|
+
"POST",
|
|
1124
|
+
"/datasets/pull-from-hub",
|
|
1125
|
+
options
|
|
1126
|
+
);
|
|
598
1127
|
}
|