@proofhound/web-ui 0.1.12 → 0.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/hooks/run-result.d.ts +4 -0
- package/dist/hooks/run-result.d.ts.map +1 -1
- package/dist/i18n/index.d.ts +28 -2
- package/dist/i18n/index.d.ts.map +1 -1
- package/dist/i18n/index.js +28 -2
- package/dist/i18n/index.js.map +1 -1
- package/dist/screens/datasets/dataset-import-runner.d.ts +19 -1
- package/dist/screens/datasets/dataset-import-runner.d.ts.map +1 -1
- package/dist/screens/datasets/dataset-import-runner.js +56 -0
- package/dist/screens/datasets/dataset-import-runner.js.map +1 -1
- package/dist/screens/datasets/dataset-upload-page.d.ts +17 -0
- package/dist/screens/datasets/dataset-upload-page.d.ts.map +1 -1
- package/dist/screens/datasets/dataset-upload-page.js +186 -36
- package/dist/screens/datasets/dataset-upload-page.js.map +1 -1
- package/dist/screens/datasets/dataset-upload-parser.d.ts +7 -2
- package/dist/screens/datasets/dataset-upload-parser.d.ts.map +1 -1
- package/dist/screens/datasets/dataset-upload-parser.js +163 -67
- package/dist/screens/datasets/dataset-upload-parser.js.map +1 -1
- package/package.json +12 -10
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
import { type DatasetImportClient } from '@proofhound/api-client';
|
|
2
|
-
import type { CompleteDatasetImportResponseDto, CreateDatasetImportDto } from '@proofhound/shared';
|
|
2
|
+
import type { CompleteDatasetImportResponseDto, CreateDatasetImportDto, CreateRawDatasetImportDto } from '@proofhound/shared';
|
|
3
3
|
export interface DatasetImportProgress {
|
|
4
4
|
phase: 'uploading' | 'completing';
|
|
5
5
|
receivedRows: number;
|
|
6
6
|
}
|
|
7
|
+
export declare const DEFAULT_IMPORT_BATCH_MAX_ROWS = 1000;
|
|
8
|
+
export declare const DEFAULT_IMPORT_BATCH_MAX_BYTES: number;
|
|
9
|
+
export declare function estimateDatasetImportBatchBytes(samples: Array<Record<string, unknown>>): number;
|
|
10
|
+
export declare function projectSampleRowsToBatches(rows: AsyncIterable<Record<string, unknown>>, columns: string[], options?: {
|
|
11
|
+
maxRows?: number;
|
|
12
|
+
maxBytes?: number;
|
|
13
|
+
signal?: AbortSignal;
|
|
14
|
+
}): AsyncGenerator<Array<Record<string, unknown>>>;
|
|
7
15
|
export interface RunDatasetImportOptions {
|
|
8
16
|
projectId: string;
|
|
9
17
|
createBody: CreateDatasetImportDto;
|
|
@@ -13,5 +21,15 @@ export interface RunDatasetImportOptions {
|
|
|
13
21
|
onProgress?: (progress: DatasetImportProgress) => void;
|
|
14
22
|
client?: DatasetImportClient;
|
|
15
23
|
}
|
|
24
|
+
export interface RunRawDatasetImportOptions {
|
|
25
|
+
projectId: string;
|
|
26
|
+
createBody: CreateRawDatasetImportDto;
|
|
27
|
+
file: Blob;
|
|
28
|
+
signal?: AbortSignal;
|
|
29
|
+
onCreated?: (importId: string) => void;
|
|
30
|
+
onUploaded?: () => void;
|
|
31
|
+
client?: DatasetImportClient;
|
|
32
|
+
}
|
|
16
33
|
export declare function runDatasetImport(options: RunDatasetImportOptions): Promise<CompleteDatasetImportResponseDto>;
|
|
34
|
+
export declare function runRawDatasetImport(options: RunRawDatasetImportOptions): Promise<CompleteDatasetImportResponseDto>;
|
|
17
35
|
//# sourceMappingURL=dataset-import-runner.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dataset-import-runner.d.ts","sourceRoot":"","sources":["../../../src/screens/datasets/dataset-import-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,KAAK,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AACvF,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"dataset-import-runner.d.ts","sourceRoot":"","sources":["../../../src/screens/datasets/dataset-import-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,KAAK,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AACvF,OAAO,KAAK,EACV,gCAAgC,EAChC,sBAAsB,EACtB,yBAAyB,EAC1B,MAAM,oBAAoB,CAAC;AAG5B,MAAM,WAAW,qBAAqB;IACpC,KAAK,EAAE,WAAW,GAAG,YAAY,CAAC;IAClC,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,eAAO,MAAM,6BAA6B,OAAO,CAAC;AAElD,eAAO,MAAM,8BAA8B,QAAkB,CAAC;AAI9D,wBAAgB,+BAA+B,CAAC,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,GAAG,MAAM,CAE/F;AAED,wBAAuB,0BAA0B,CAC/C,IAAI,EAAE,aAAa,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,EAC5C,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,GAAE;IACP,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,WAAW,CAAC;CACjB,GACL,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,CA+BhD;AAED,MAAM,WAAW,uBAAuB;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,sBAAsB,CAAC;IAEnC,OAAO,EAAE,aAAa,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC;IACvD,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,SAAS,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;IACvC,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,qBAAqB,KAAK,IAAI,CAAC;IAEvD,MAAM,CAAC,EAAE,mBAAmB,CAAC;CAC9B;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,yBAAyB,CAAC;IACtC,IAAI,EAAE,IAAI,CAAC;IACX,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,SAAS,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;IACvC,UAAU,CAAC,EAAE,MAAM,IAAI,CAAC;IACxB,MAAM,CAAC,EAAE,mBAAmB,CAAC;CAC9B;AAID,wBAAsB,gBAAgB,CAAC,OAAO,EAAE,uBAAuB,GAAG,OAAO,CAAC,gCAAgC,CAAC,CA0BlH;AAED,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,0BAA0B,GAClC,OAAO,CAAC,gCAAgC,CAAC,CAiB3C"}
|
|
@@ -1,4 +1,41 @@
|
|
|
1
1
|
import { datasetImportClient } from '@proofhound/api-client';
|
|
2
|
+
import { projectSamplesToColumns } from './dataset-upload-parser';
|
|
3
|
+
export const DEFAULT_IMPORT_BATCH_MAX_ROWS = 1000;
|
|
4
|
+
// Keep the encoded JSON body below the default SERVER_BODY_LIMIT=10mb.
|
|
5
|
+
export const DEFAULT_IMPORT_BATCH_MAX_BYTES = 8 * 1024 * 1024;
|
|
6
|
+
const textEncoder = new TextEncoder();
|
|
7
|
+
export function estimateDatasetImportBatchBytes(samples) {
|
|
8
|
+
return textEncoder.encode(JSON.stringify({ batchStartIndex: 0, samples })).length;
|
|
9
|
+
}
|
|
10
|
+
export async function* projectSampleRowsToBatches(rows, columns, options = {}) {
|
|
11
|
+
const maxRows = options.maxRows ?? DEFAULT_IMPORT_BATCH_MAX_ROWS;
|
|
12
|
+
const maxBytes = options.maxBytes ?? DEFAULT_IMPORT_BATCH_MAX_BYTES;
|
|
13
|
+
let batch = [];
|
|
14
|
+
for await (const row of rows) {
|
|
15
|
+
if (options.signal?.aborted)
|
|
16
|
+
throw new DOMException('aborted', 'AbortError');
|
|
17
|
+
if (batch.length >= maxRows) {
|
|
18
|
+
yield batch;
|
|
19
|
+
batch = [];
|
|
20
|
+
}
|
|
21
|
+
const projected = projectSamplesToColumns([row], columns)[0] ?? {};
|
|
22
|
+
const singleBytes = estimateDatasetImportBatchBytes([projected]);
|
|
23
|
+
if (singleBytes > maxBytes) {
|
|
24
|
+
throw new Error('dataset_import_sample_too_large');
|
|
25
|
+
}
|
|
26
|
+
const candidate = [...batch, projected];
|
|
27
|
+
if (batch.length > 0 && (candidate.length > maxRows || estimateDatasetImportBatchBytes(candidate) > maxBytes)) {
|
|
28
|
+
yield batch;
|
|
29
|
+
batch = [];
|
|
30
|
+
}
|
|
31
|
+
batch.push(projected);
|
|
32
|
+
}
|
|
33
|
+
if (batch.length > 0) {
|
|
34
|
+
if (options.signal?.aborted)
|
|
35
|
+
throw new DOMException('aborted', 'AbortError');
|
|
36
|
+
yield batch;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
2
39
|
// Orchestrates a large-file import: create session -> append batches in order -> complete (atomic promote).
|
|
3
40
|
// Any failure or abort before complete deletes the session + staging (best-effort), per "中断即删干净".
|
|
4
41
|
export async function runDatasetImport(options) {
|
|
@@ -30,4 +67,23 @@ export async function runDatasetImport(options) {
|
|
|
30
67
|
throw error;
|
|
31
68
|
}
|
|
32
69
|
}
|
|
70
|
+
export async function runRawDatasetImport(options) {
|
|
71
|
+
const client = options.client ?? datasetImportClient;
|
|
72
|
+
const { projectId, createBody, file, signal } = options;
|
|
73
|
+
const { import: session, uploadSession } = await client.createRawDatasetImport(projectId, createBody);
|
|
74
|
+
options.onCreated?.(session.id);
|
|
75
|
+
try {
|
|
76
|
+
if (signal?.aborted)
|
|
77
|
+
throw new DOMException('aborted', 'AbortError');
|
|
78
|
+
await client.uploadRawDatasetFile(uploadSession, file, { signal });
|
|
79
|
+
options.onUploaded?.();
|
|
80
|
+
if (signal?.aborted)
|
|
81
|
+
throw new DOMException('aborted', 'AbortError');
|
|
82
|
+
return await client.completeDatasetImport(projectId, session.id);
|
|
83
|
+
}
|
|
84
|
+
catch (error) {
|
|
85
|
+
await client.abortDatasetImport(projectId, session.id).catch(() => undefined);
|
|
86
|
+
throw error;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
33
89
|
//# sourceMappingURL=dataset-import-runner.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dataset-import-runner.js","sourceRoot":"","sources":["../../../src/screens/datasets/dataset-import-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAA4B,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"dataset-import-runner.js","sourceRoot":"","sources":["../../../src/screens/datasets/dataset-import-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAA4B,MAAM,wBAAwB,CAAC;AAMvF,OAAO,EAAE,uBAAuB,EAAE,MAAM,yBAAyB,CAAC;AAOlE,MAAM,CAAC,MAAM,6BAA6B,GAAG,IAAI,CAAC;AAClD,uEAAuE;AACvE,MAAM,CAAC,MAAM,8BAA8B,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;AAE9D,MAAM,WAAW,GAAG,IAAI,WAAW,EAAE,CAAC;AAEtC,MAAM,UAAU,+BAA+B,CAAC,OAAuC;IACrF,OAAO,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;AACpF,CAAC;AAED,MAAM,CAAC,KAAK,SAAS,CAAC,CAAC,0BAA0B,CAC/C,IAA4C,EAC5C,OAAiB,EACjB,UAII,EAAE;IAEN,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,6BAA6B,CAAC;IACjE,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,8BAA8B,CAAC;IACpE,IAAI,KAAK,GAAmC,EAAE,CAAC;IAE/C,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,MAAM,EAAE,OAAO;YAAE,MAAM,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;QAC7E,IAAI,KAAK,CAAC,MAAM,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,KAAK,CAAC;YACZ,KAAK,GAAG,EAAE,CAAC;QACb,CAAC;QAED,MAAM,SAAS,GAAG,uBAAuB,CAAC,CAAC,GAAG,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACnE,MAAM,WAAW,GAAG,+BAA+B,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;QACjE,IAAI,WAAW,GAAG,QAAQ,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,SAAS,GAAG,CAAC,GAAG,KAAK,EAAE,SAAS,CAAC,CAAC;QACxC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,OAAO,IAAI,+BAA+B,CAAC,SAAS,CAAC,GAAG,QAAQ,CAAC,EAAE,CAAC;YAC9G,MAAM,KAAK,CAAC;YACZ,KAAK,GAAG,EAAE,CAAC;QACb,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxB,CAAC;IAED,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,IAAI,OAAO,CAAC,MAAM,EAAE,OAAO;YAAE,MAAM,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;QAC7E,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAwBD,4GAA4G;AAC5G,kGAAkG;AAClG,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,OAAgC;IACrE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,mBAAmB,CAAC;IACrD,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAE3D,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;IACxE,OAAO,CAAC,SAAS,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAEhC,IAAI,CAAC;QACH,IAAI,eAAe,GAAG,CAAC,CAAC;QACxB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAClC,IAAI,MAAM,EAAE,OAAO;gBAAE,MAAM,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;YACrE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YACjC,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,wBAAwB,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE;gBACpF,eAAe;gBACf,OAAO,EAAE,KAAK;aACf,CAAC,CAAC;YACH,eAAe,IAAI,KAAK,CAAC,MAAM,CAAC;YAChC,OAAO,CAAC,UAAU,EAAE,CAAC,EAAE,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,CAAC,CAAC;QAC7D,CAAC;QACD,IAAI,MAAM,EAAE,OAAO;YAAE,MAAM,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;QACrE,OAAO,CAAC,UAAU,EAAE,CAAC,EAAE,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,eAAe,EAAE,CAAC,CAAC;QAC7E,OAAO,MAAM,MAAM,CAAC,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC;IACnE,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,MAAM,CAAC,kBAAkB,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;QAC9E,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,OAAmC;IAEnC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,mBAAmB,CAAC;IACrD,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAExD,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;IACtG,OAAO,CAAC,SAAS,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAEhC,IAAI,CAAC;QACH,IAAI,MAAM,EAAE,OAAO;YAAE,MAAM,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;QACrE,MAAM,MAAM,CAAC,oBAAoB,CAAC,aAAa,EAAE,IAAI,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;QACnE,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;QACvB,IAAI,MAAM,EAAE,OAAO;YAAE,MAAM,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;QACrE,OAAO,MAAM,MAAM,CAAC,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC;IACnE,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,MAAM,CAAC,kBAAkB,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;QAC9E,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
import type { CreateDatasetDto, DatasetRawImportCapabilitiesDto } from '@proofhound/shared';
|
|
2
|
+
import { type TranslationKey } from '../../i18n';
|
|
3
|
+
export declare const DATASET_IMAGE_SAMPLE_DOWNLOADS: Array<{
|
|
4
|
+
labelKey: TranslationKey;
|
|
5
|
+
href: string;
|
|
6
|
+
fileName: string;
|
|
7
|
+
}>;
|
|
8
|
+
export declare function formatFileSize(bytes: number): string;
|
|
9
|
+
export declare function estimateUploadProgressBytes(sourceFile: CreateDatasetDto['uploadSource']): number;
|
|
10
|
+
export type DatasetUploadImportPath = 'sync' | 'buffered' | 'streaming' | 'raw';
|
|
11
|
+
export declare function selectDatasetUploadImportPath({ file, isLargeFile, parsedSampleCount, rawImportCapabilities, }: {
|
|
12
|
+
file: Pick<File, 'name' | 'size'>;
|
|
13
|
+
isLargeFile: boolean;
|
|
14
|
+
parsedSampleCount: number;
|
|
15
|
+
rawImportCapabilities: DatasetRawImportCapabilitiesDto | null;
|
|
16
|
+
}): DatasetUploadImportPath;
|
|
17
|
+
export declare function projectBufferedSampleBatches(samples: Array<Record<string, unknown>>, columns: string[], size: number, signal?: AbortSignal): AsyncGenerator<Record<string, unknown>[], void, any>;
|
|
1
18
|
export declare function DatasetUploadPage({ projectId }: {
|
|
2
19
|
projectId: string;
|
|
3
20
|
}): import("react/jsx-runtime").JSX.Element;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dataset-upload-page.d.ts","sourceRoot":"","sources":["../../../src/screens/datasets/dataset-upload-page.tsx"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"dataset-upload-page.d.ts","sourceRoot":"","sources":["../../../src/screens/datasets/dataset-upload-page.tsx"],"names":[],"mappings":"AAKA,OAAO,KAAK,EACV,gBAAgB,EAIhB,+BAA+B,EAChC,MAAM,oBAAoB,CAAC;AA8B5B,OAAO,EAAW,KAAK,cAAc,EAAE,MAAM,YAAY,CAAC;AA8B1D,eAAO,MAAM,8BAA8B,EAAE,KAAK,CAAC;IACjD,QAAQ,EAAE,cAAc,CAAC;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CAClB,CAqBA,CAAC;AAqDF,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,UAK3C;AA+ED,wBAAgB,2BAA2B,CAAC,UAAU,EAAE,gBAAgB,CAAC,cAAc,CAAC,UAEvF;AAYD,MAAM,MAAM,uBAAuB,GAAG,MAAM,GAAG,UAAU,GAAG,WAAW,GAAG,KAAK,CAAC;AAchF,wBAAgB,6BAA6B,CAAC,EAC5C,IAAI,EACJ,WAAW,EACX,iBAAiB,EACjB,qBAAqB,GACtB,EAAE;IACD,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC,CAAC;IAClC,WAAW,EAAE,OAAO,CAAC;IACrB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,qBAAqB,EAAE,+BAA+B,GAAG,IAAI,CAAC;CAC/D,GAAG,uBAAuB,CAc1B;AAkBD,wBAAuB,4BAA4B,CACjD,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,EACvC,OAAO,EAAE,MAAM,EAAE,EACjB,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,WAAW,wDAYrB;AAoFD,wBAAgB,iBAAiB,CAAC,EAAE,SAAS,EAAE,EAAE;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,2CA6xBrE"}
|
|
@@ -4,15 +4,15 @@ import { Link } from '../../components/navigation/link';
|
|
|
4
4
|
import { useRouter } from '../../hooks/use-router';
|
|
5
5
|
import { useEffect, useId, useMemo, useRef, useState } from 'react';
|
|
6
6
|
import { datasetImportClient } from '@proofhound/api-client';
|
|
7
|
-
import { AlertTriangle, Check, ChevronLeft, ChevronRight, FileText, Loader2, Upload } from 'lucide-react';
|
|
8
|
-
import { Button, Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle, Progress, formatProgressLabel, cn, } from '@proofhound/ui';
|
|
7
|
+
import { AlertTriangle, Check, ChevronLeft, ChevronRight, Download, FileText, Info, Loader2, Upload, } from 'lucide-react';
|
|
8
|
+
import { Button, Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle, Progress, Tooltip, TooltipContent, TooltipProvider, TooltipTrigger, formatProgressLabel, cn, } from '@proofhound/ui';
|
|
9
9
|
import { Main } from '@proofhound/ui/layout';
|
|
10
10
|
import { useCreateDataset } from '../../hooks';
|
|
11
11
|
import { useI18n } from '../../i18n';
|
|
12
|
-
import { runDatasetImport } from './dataset-import-runner';
|
|
12
|
+
import { projectSampleRowsToBatches, runDatasetImport, runRawDatasetImport } from './dataset-import-runner';
|
|
13
13
|
import { DatasetTransferProgressPanel, useDatasetTransferProgress } from './dataset-transfer-progress';
|
|
14
14
|
import { RoleArrowLabel, RolePill } from './dataset-ui';
|
|
15
|
-
import { FORMAT_CHIPS, PREVIEW_LIMIT, getDatasetNameFromFile, getDisplayValue, getUploadFilePath, inferRole,
|
|
15
|
+
import { FORMAT_CHIPS, PREVIEW_LIMIT, getDatasetNameFromFile, getDisplayValue, getUploadFilePath, inferRole, isStreamingImportFile, parseDatasetFile, parseStreamingPrefix, projectSamplesToColumns, selectDatasetFile, streamDatasetRows, } from './dataset-upload-parser';
|
|
16
16
|
const ROLE_OPTIONS = [
|
|
17
17
|
{ role: 'id', labelKey: 'datasets.role.id' },
|
|
18
18
|
{ role: 'text', labelKey: 'datasets.role.text' },
|
|
@@ -21,6 +21,28 @@ const ROLE_OPTIONS = [
|
|
|
21
21
|
{ role: 'metadata', labelKey: 'datasets.role.metadata' },
|
|
22
22
|
];
|
|
23
23
|
const directoryInputProps = { webkitdirectory: '', directory: '' };
|
|
24
|
+
export const DATASET_IMAGE_SAMPLE_DOWNLOADS = [
|
|
25
|
+
{
|
|
26
|
+
labelKey: 'datasets.upload.imageSamples.urlFields',
|
|
27
|
+
href: '/examples/datasets/images/image-url-fields.csv',
|
|
28
|
+
fileName: 'proofhound-image-url-fields.csv',
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
labelKey: 'datasets.upload.imageSamples.urlArray',
|
|
32
|
+
href: '/examples/datasets/images/image-url-array.csv',
|
|
33
|
+
fileName: 'proofhound-image-url-array.csv',
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
labelKey: 'datasets.upload.imageSamples.base64',
|
|
37
|
+
href: '/examples/datasets/images/image-base64.jsonl',
|
|
38
|
+
fileName: 'proofhound-image-base64.jsonl',
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
labelKey: 'datasets.upload.imageSamples.zip',
|
|
42
|
+
href: '/examples/datasets/images/image-zip-relative-paths.zip',
|
|
43
|
+
fileName: 'proofhound-image-zip-relative-paths.zip',
|
|
44
|
+
},
|
|
45
|
+
];
|
|
24
46
|
function normalizeExpectedRoles(roles, preferredColumn) {
|
|
25
47
|
let expectedColumn = preferredColumn && roles[preferredColumn] === 'expected' ? preferredColumn : null;
|
|
26
48
|
if (!expectedColumn) {
|
|
@@ -37,12 +59,22 @@ function SectionNumber({ value }) {
|
|
|
37
59
|
function Section({ number, title, hint, children, className, }) {
|
|
38
60
|
return (_jsxs("section", { className: cn('rounded-lg border bg-card', className), children: [_jsxs("div", { className: "flex items-center gap-2 border-b px-4 py-3", children: [_jsx(SectionNumber, { value: number }), _jsx("h2", { className: "text-[14.5px] font-semibold", children: title }), _jsx("span", { className: "ml-auto text-[11.5px] text-muted-foreground", children: hint })] }), _jsx("div", { className: "p-4", children: children })] }));
|
|
39
61
|
}
|
|
40
|
-
function formatFileSize(bytes) {
|
|
62
|
+
export function formatFileSize(bytes) {
|
|
41
63
|
if (bytes < 1024)
|
|
42
64
|
return `${bytes} B`;
|
|
43
65
|
if (bytes < 1024 * 1024)
|
|
44
66
|
return `${(bytes / 1024).toFixed(1)} KB`;
|
|
45
|
-
|
|
67
|
+
if (bytes < 1024 * 1024 * 1024)
|
|
68
|
+
return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
|
|
69
|
+
return `${(bytes / 1024 / 1024 / 1024).toFixed(1)} GB`;
|
|
70
|
+
}
|
|
71
|
+
function formatByteLimit(bytes) {
|
|
72
|
+
if (bytes < 1024 * 1024 * 1024)
|
|
73
|
+
return formatFileSize(bytes);
|
|
74
|
+
return `${(bytes / 1024 / 1024 / 1024).toFixed(1)} GB`;
|
|
75
|
+
}
|
|
76
|
+
function formatTemplate(template, values) {
|
|
77
|
+
return Object.entries(values).reduce((output, [key, value]) => output.replaceAll(`{${key}}`, String(value)), template);
|
|
46
78
|
}
|
|
47
79
|
function withRelativePath(file, relativePath) {
|
|
48
80
|
Object.defineProperty(file, 'proofhoundRelativePath', {
|
|
@@ -96,12 +128,12 @@ async function getDroppedFiles(dataTransfer) {
|
|
|
96
128
|
function getParseErrorKey(parseError) {
|
|
97
129
|
if (parseError === 'unsupported_file_type')
|
|
98
130
|
return 'datasets.upload.unsupportedFile';
|
|
99
|
-
if (parseError === '
|
|
100
|
-
return 'datasets.upload.
|
|
131
|
+
if (parseError === 'large_requires_streaming_format')
|
|
132
|
+
return 'datasets.upload.largeRequiresStreamingFormat';
|
|
101
133
|
return 'datasets.upload.parseFailed';
|
|
102
134
|
}
|
|
103
|
-
function
|
|
104
|
-
return
|
|
135
|
+
export function estimateUploadProgressBytes(sourceFile) {
|
|
136
|
+
return Math.max(1, sourceFile.fileSizeBytes);
|
|
105
137
|
}
|
|
106
138
|
// Files larger than this are not parsed whole on drop: only a head prefix is read for preview,
|
|
107
139
|
// and on import they stream off disk through the dataset-import session.
|
|
@@ -110,6 +142,8 @@ const SYNC_MAX_FILE_BYTES = 10 * 1024 * 1024;
|
|
|
110
142
|
// this many samples, because the synchronous POST /datasets path is capped server-side.
|
|
111
143
|
const SYNC_MAX_SAMPLES = 5000;
|
|
112
144
|
const IMPORT_BATCH_SIZE = 1000;
|
|
145
|
+
const DEFAULT_RAW_UPLOAD_MAX_BYTES = 2 * 1024 * 1024 * 1024;
|
|
146
|
+
const RAW_IMPORT_MIN_FILE_BYTES = 256 * 1024 * 1024;
|
|
113
147
|
function toImportSourceFormat(fileName) {
|
|
114
148
|
const lower = fileName.toLowerCase();
|
|
115
149
|
if (lower.endsWith('.csv'))
|
|
@@ -118,16 +152,68 @@ function toImportSourceFormat(fileName) {
|
|
|
118
152
|
return 'tsv';
|
|
119
153
|
return 'jsonl';
|
|
120
154
|
}
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
155
|
+
function isStreamingImportFileName(fileName) {
|
|
156
|
+
const lower = fileName.toLowerCase();
|
|
157
|
+
return lower.endsWith('.jsonl') || lower.endsWith('.csv') || lower.endsWith('.tsv');
|
|
158
|
+
}
|
|
159
|
+
export function selectDatasetUploadImportPath({ file, isLargeFile, parsedSampleCount, rawImportCapabilities, }) {
|
|
160
|
+
if (isLargeFile) {
|
|
161
|
+
if (rawImportCapabilities?.supported === true &&
|
|
162
|
+
isStreamingImportFileName(file.name) &&
|
|
163
|
+
file.size > RAW_IMPORT_MIN_FILE_BYTES &&
|
|
164
|
+
file.size <= rawImportCapabilities.maxBytes) {
|
|
165
|
+
return 'raw';
|
|
166
|
+
}
|
|
167
|
+
return 'streaming';
|
|
124
168
|
}
|
|
169
|
+
return parsedSampleCount > SYNC_MAX_SAMPLES ? 'buffered' : 'sync';
|
|
125
170
|
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
171
|
+
function yieldToBrowser() {
|
|
172
|
+
return new Promise((resolve) => {
|
|
173
|
+
if (typeof window === 'undefined' || typeof window.requestAnimationFrame !== 'function') {
|
|
174
|
+
setTimeout(resolve, 0);
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
window.requestAnimationFrame(() => window.requestAnimationFrame(() => resolve()));
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
function throwIfAborted(signal) {
|
|
181
|
+
if (!signal?.aborted)
|
|
182
|
+
return;
|
|
183
|
+
throw new DOMException('aborted', 'AbortError');
|
|
184
|
+
}
|
|
185
|
+
export async function* projectBufferedSampleBatches(samples, columns, size, signal) {
|
|
186
|
+
async function* rows() {
|
|
187
|
+
for (let index = 0; index < samples.length; index += 1) {
|
|
188
|
+
throwIfAborted(signal);
|
|
189
|
+
if (index > 0 && index % size === 0)
|
|
190
|
+
await yieldToBrowser();
|
|
191
|
+
throwIfAborted(signal);
|
|
192
|
+
yield samples[index] ?? {};
|
|
193
|
+
}
|
|
130
194
|
}
|
|
195
|
+
yield* projectSampleRowsToBatches(rows(), columns, { maxRows: size, signal });
|
|
196
|
+
}
|
|
197
|
+
// Streams a large JSONL/CSV/TSV file off disk, projecting each batch to the selected columns before upload.
|
|
198
|
+
async function* projectedStreamingFileBatches(file, columns, size, onBytes, signal) {
|
|
199
|
+
yield* projectSampleRowsToBatches(streamDatasetRows(file, onBytes, signal), columns, { maxRows: size, signal });
|
|
200
|
+
}
|
|
201
|
+
function UploadLimitInfoIcon({ rawMaxBytes }) {
|
|
202
|
+
const { t } = useI18n();
|
|
203
|
+
const rawLimit = formatByteLimit(rawMaxBytes);
|
|
204
|
+
const syncLimit = formatByteLimit(SYNC_MAX_FILE_BYTES);
|
|
205
|
+
return (_jsx(TooltipProvider, { delayDuration: 140, children: _jsxs(Tooltip, { children: [_jsx(TooltipTrigger, { asChild: true, children: _jsx("button", { type: "button", className: "inline-flex size-5 items-center justify-center rounded-full text-muted-foreground transition-colors hover:text-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring", "aria-label": t('datasets.upload.limitInfoLabel'), "data-testid": "dataset-upload-limit-info", children: _jsx(Info, { className: "size-3.5" }) }) }), _jsxs(TooltipContent, { side: "top", className: "max-w-[340px] text-left", children: [_jsx("div", { className: "text-[11.5px] font-semibold", children: t('datasets.upload.limitInfoTitle') }), _jsxs("div", { className: "mt-1.5 space-y-1 text-[11px] leading-relaxed", children: [_jsx("p", { children: formatTemplate(t('datasets.upload.limitInfoSmall'), {
|
|
206
|
+
syncLimit,
|
|
207
|
+
}) }), _jsx("p", { children: t('datasets.upload.limitInfoStreaming') }), _jsx("p", { children: formatTemplate(t('datasets.upload.limitInfoRaw'), {
|
|
208
|
+
rawLimit,
|
|
209
|
+
}) }), _jsx("p", { children: t('datasets.upload.limitInfoJsonZip') })] })] })] }) }));
|
|
210
|
+
}
|
|
211
|
+
function ImageSampleDownloads() {
|
|
212
|
+
const { t } = useI18n();
|
|
213
|
+
return (_jsxs("div", { className: "border-t pt-3", "data-testid": "dataset-upload-image-samples", children: [_jsxs("div", { className: "flex flex-col gap-1 sm:flex-row sm:items-center sm:justify-between", children: [_jsx("div", { className: "text-[12px] font-semibold", children: t('datasets.upload.imageSamples.title') }), _jsx("div", { className: "text-[11.5px] text-muted-foreground", children: t('datasets.upload.imageSamples.hint') })] }), _jsx("div", { className: "mt-2 flex flex-wrap gap-2", children: DATASET_IMAGE_SAMPLE_DOWNLOADS.map((sample) => {
|
|
214
|
+
const label = t(sample.labelKey);
|
|
215
|
+
return (_jsxs("a", { className: "inline-flex h-8 items-center gap-1.5 rounded-md border bg-background px-2.5 text-[11.5px] font-medium text-foreground transition-colors hover:bg-muted", href: sample.href, download: sample.fileName, "aria-label": formatTemplate(t('datasets.upload.imageSamples.downloadAria'), { name: label }), "data-testid": `dataset-image-sample-${sample.fileName}`, children: [_jsx(Download, { className: "size-3.5" }), label] }, sample.href));
|
|
216
|
+
}) })] }));
|
|
131
217
|
}
|
|
132
218
|
export function DatasetUploadPage({ projectId }) {
|
|
133
219
|
const { t } = useI18n();
|
|
@@ -146,12 +232,29 @@ export function DatasetUploadPage({ projectId }) {
|
|
|
146
232
|
const [isDragOver, setIsDragOver] = useState(false);
|
|
147
233
|
const [isImporting, setIsImporting] = useState(false);
|
|
148
234
|
const [isLargeFile, setIsLargeFile] = useState(false);
|
|
235
|
+
const [rawImportCapabilities, setRawImportCapabilities] = useState(null);
|
|
149
236
|
const importAbortRef = useRef(null);
|
|
150
237
|
const importIdRef = useRef(null);
|
|
151
238
|
const leaveActionRef = useRef(null);
|
|
152
239
|
const [leaveDialogOpen, setLeaveDialogOpen] = useState(false);
|
|
153
240
|
// Leaving the page mid-import aborts the session so the server clears its staging rows (中断即删干净).
|
|
154
241
|
useEffect(() => () => importAbortRef.current?.abort(), []);
|
|
242
|
+
useEffect(() => {
|
|
243
|
+
let cancelled = false;
|
|
244
|
+
datasetImportClient
|
|
245
|
+
.getRawImportCapabilities(projectId)
|
|
246
|
+
.then((capabilities) => {
|
|
247
|
+
if (!cancelled)
|
|
248
|
+
setRawImportCapabilities(capabilities);
|
|
249
|
+
})
|
|
250
|
+
.catch(() => {
|
|
251
|
+
if (!cancelled)
|
|
252
|
+
setRawImportCapabilities({ supported: false, maxBytes: 1 });
|
|
253
|
+
});
|
|
254
|
+
return () => {
|
|
255
|
+
cancelled = true;
|
|
256
|
+
};
|
|
257
|
+
}, [projectId]);
|
|
155
258
|
// While an import is in flight, guard every way to leave so the user is warned before losing it.
|
|
156
259
|
useEffect(() => {
|
|
157
260
|
if (!isImporting)
|
|
@@ -248,12 +351,12 @@ export function DatasetUploadPage({ projectId }) {
|
|
|
248
351
|
try {
|
|
249
352
|
const file = await selectDatasetFile(files);
|
|
250
353
|
const large = file.size > SYNC_MAX_FILE_BYTES;
|
|
251
|
-
if (large && !
|
|
252
|
-
//
|
|
253
|
-
throw new Error('
|
|
354
|
+
if (large && !isStreamingImportFile(file)) {
|
|
355
|
+
// Large JSON arrays / ZIPs are not parsed whole on drop until they have real streaming parsers.
|
|
356
|
+
throw new Error('large_requires_streaming_format');
|
|
254
357
|
}
|
|
255
358
|
// Large files: read only a head prefix for preview/mapping, never the whole file.
|
|
256
|
-
const parsed = large ? await
|
|
359
|
+
const parsed = large ? await parseStreamingPrefix(file) : await parseDatasetFile(file);
|
|
257
360
|
setSelectedFile(file);
|
|
258
361
|
setParsedFile(parsed);
|
|
259
362
|
setIsLargeFile(large);
|
|
@@ -283,17 +386,18 @@ export function DatasetUploadPage({ projectId }) {
|
|
|
283
386
|
description: description.trim() || null,
|
|
284
387
|
fieldMappings,
|
|
285
388
|
sourceFile,
|
|
286
|
-
sourceFormat:
|
|
389
|
+
sourceFormat: toImportSourceFormat(sourceFile.fileName),
|
|
287
390
|
};
|
|
288
391
|
const controller = new AbortController();
|
|
289
392
|
importAbortRef.current = controller;
|
|
290
393
|
setIsImporting(true);
|
|
291
394
|
uploadProgress.start(t('datasets.transfer.uploadTitle'), totalBytes);
|
|
395
|
+
await yieldToBrowser();
|
|
292
396
|
try {
|
|
293
397
|
await runDatasetImport({
|
|
294
398
|
projectId,
|
|
295
399
|
createBody,
|
|
296
|
-
batches:
|
|
400
|
+
batches: projectedStreamingFileBatches(file, selectedColumns, IMPORT_BATCH_SIZE, (readBytes) => uploadProgress.update({ loadedBytes: readBytes, totalBytes }), controller.signal),
|
|
297
401
|
signal: controller.signal,
|
|
298
402
|
onCreated: (id) => {
|
|
299
403
|
importIdRef.current = id;
|
|
@@ -311,9 +415,46 @@ export function DatasetUploadPage({ projectId }) {
|
|
|
311
415
|
importIdRef.current = null;
|
|
312
416
|
}
|
|
313
417
|
};
|
|
314
|
-
const
|
|
418
|
+
const importRawDataset = async (fieldMappings, sourceFile, file) => {
|
|
419
|
+
const totalBytes = file.size;
|
|
420
|
+
const createBody = {
|
|
421
|
+
name: datasetName.trim(),
|
|
422
|
+
description: description.trim() || null,
|
|
423
|
+
fieldMappings,
|
|
424
|
+
sourceFile,
|
|
425
|
+
sourceFormat: toImportSourceFormat(sourceFile.fileName),
|
|
426
|
+
};
|
|
427
|
+
const controller = new AbortController();
|
|
428
|
+
importAbortRef.current = controller;
|
|
429
|
+
setIsImporting(true);
|
|
430
|
+
uploadProgress.start(t('datasets.transfer.uploadTitle'), totalBytes);
|
|
431
|
+
await yieldToBrowser();
|
|
432
|
+
try {
|
|
433
|
+
await runRawDatasetImport({
|
|
434
|
+
projectId,
|
|
435
|
+
createBody,
|
|
436
|
+
file,
|
|
437
|
+
signal: controller.signal,
|
|
438
|
+
onCreated: (id) => {
|
|
439
|
+
importIdRef.current = id;
|
|
440
|
+
},
|
|
441
|
+
onUploaded: () => uploadProgress.update({ loadedBytes: totalBytes, totalBytes }),
|
|
442
|
+
});
|
|
443
|
+
uploadProgress.complete(totalBytes);
|
|
444
|
+
router.push(`/datasets`);
|
|
445
|
+
}
|
|
446
|
+
catch {
|
|
447
|
+
uploadProgress.fail();
|
|
448
|
+
}
|
|
449
|
+
finally {
|
|
450
|
+
setIsImporting(false);
|
|
451
|
+
importAbortRef.current = null;
|
|
452
|
+
importIdRef.current = null;
|
|
453
|
+
}
|
|
454
|
+
};
|
|
455
|
+
const importBufferedDataset = async (fieldMappings, sourceFile, samples, columns) => {
|
|
315
456
|
const totalRows = samples.length;
|
|
316
|
-
const estimatedBytes =
|
|
457
|
+
const estimatedBytes = estimateUploadProgressBytes(sourceFile);
|
|
317
458
|
const createBody = {
|
|
318
459
|
name: datasetName.trim(),
|
|
319
460
|
description: description.trim() || null,
|
|
@@ -326,11 +467,12 @@ export function DatasetUploadPage({ projectId }) {
|
|
|
326
467
|
importAbortRef.current = controller;
|
|
327
468
|
setIsImporting(true);
|
|
328
469
|
uploadProgress.start(t('datasets.transfer.uploadTitle'), estimatedBytes);
|
|
470
|
+
await yieldToBrowser();
|
|
329
471
|
try {
|
|
330
472
|
await runDatasetImport({
|
|
331
473
|
projectId,
|
|
332
474
|
createBody,
|
|
333
|
-
batches:
|
|
475
|
+
batches: projectBufferedSampleBatches(samples, columns, IMPORT_BATCH_SIZE, controller.signal),
|
|
334
476
|
signal: controller.signal,
|
|
335
477
|
onCreated: (id) => {
|
|
336
478
|
importIdRef.current = id;
|
|
@@ -364,15 +506,25 @@ export function DatasetUploadPage({ projectId }) {
|
|
|
364
506
|
fileSizeBytes: selectedFile.size,
|
|
365
507
|
contentType: selectedFile.type || undefined,
|
|
366
508
|
};
|
|
367
|
-
|
|
509
|
+
const importPath = selectDatasetUploadImportPath({
|
|
510
|
+
file: selectedFile,
|
|
511
|
+
isLargeFile,
|
|
512
|
+
parsedSampleCount: parsedFile.samples.length,
|
|
513
|
+
rawImportCapabilities,
|
|
514
|
+
});
|
|
515
|
+
if (importPath === 'raw') {
|
|
516
|
+
await importRawDataset(fieldMappings, sourceFile, selectedFile);
|
|
517
|
+
return;
|
|
518
|
+
}
|
|
519
|
+
if (importPath === 'streaming') {
|
|
368
520
|
await importStreamingDataset(fieldMappings, sourceFile, selectedFile);
|
|
369
521
|
return;
|
|
370
522
|
}
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
await importBufferedDataset(fieldMappings, sourceFile, samples);
|
|
523
|
+
if (importPath === 'buffered') {
|
|
524
|
+
await importBufferedDataset(fieldMappings, sourceFile, parsedFile.samples, selectedColumns);
|
|
374
525
|
return;
|
|
375
526
|
}
|
|
527
|
+
const samples = projectSamplesToColumns(parsedFile.samples, selectedColumns);
|
|
376
528
|
const body = {
|
|
377
529
|
name: datasetName.trim(),
|
|
378
530
|
description: description.trim() || null,
|
|
@@ -380,7 +532,7 @@ export function DatasetUploadPage({ projectId }) {
|
|
|
380
532
|
fieldMappings,
|
|
381
533
|
samples,
|
|
382
534
|
};
|
|
383
|
-
const estimatedBytes =
|
|
535
|
+
const estimatedBytes = estimateUploadProgressBytes(sourceFile);
|
|
384
536
|
uploadProgress.start(t('datasets.transfer.uploadTitle'), estimatedBytes);
|
|
385
537
|
try {
|
|
386
538
|
await createDataset.mutateAsync({
|
|
@@ -403,11 +555,9 @@ export function DatasetUploadPage({ projectId }) {
|
|
|
403
555
|
? t('datasets.upload.uploadReady')
|
|
404
556
|
: isDragOver
|
|
405
557
|
? t('datasets.upload.dropHere')
|
|
406
|
-
: t('datasets.upload.waitingForFile') }), _jsxs("div", { className: "flex items-center gap-2 text-[11.5px]", children: [_jsx("label", { className: "cursor-pointer text-muted-foreground hover:text-foreground", htmlFor: fileInputId, children: selectedFile ? t('datasets.action.replaceFile') : t('datasets.upload.browse') }), _jsx("span", { className: "text-muted-foreground", children: "\u00B7" }), _jsx("label", { className: "cursor-pointer text-muted-foreground hover:text-foreground", htmlFor: folderInputId, children: t('datasets.upload.browseFolder') })] })] })] })] })] }), parseError && (_jsxs("div", { className: "flex gap-2 rounded-md border border-destructive/35 bg-destructive/10 p-3 text-[12px] text-destructive", children: [_jsx(AlertTriangle, { className: "mt-0.5 size-4 shrink-0" }), _jsx("div", { children: t(parseErrorKey) })] })), _jsxs("div", { className: "flex flex-wrap items-center gap-2", children: [_jsx("span", { className: "font-mono text-[11px] text-muted-foreground", children: t('datasets.upload.supportedFormats') }), FORMAT_CHIPS.map((format) => (_jsx("span", { className: "inline-flex rounded-[5px] border bg-muted px-2 py-0.5 font-mono text-[11px]", children: format }, format)))] })] }) }), _jsx(Section, { number: 2, title: t('datasets.upload.basicInfo'), hint: t('datasets.upload.basicInfoHint'), children: _jsxs("div", { className: "space-y-4", children: [_jsxs("div", { children: [_jsxs("label", { className: "mb-1.5 block text-xs font-medium", children: [t('datasets.upload.name'), " ", _jsx("span", { className: "text-destructive", children: "*" })] }), _jsx("input", { className: "flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm outline-none focus-visible:ring-2 focus-visible:ring-ring", value: datasetName, onChange: (event) => setDatasetName(event.target.value), placeholder: "risk-eval-v4" }), _jsx("div", { className: "mt-1 text-[11px] text-muted-foreground", children: t('datasets.upload.nameHelp') })] }), _jsxs("div", { children: [_jsx("label", { className: "mb-1.5 block text-xs font-medium", children: t('datasets.upload.description') }), _jsx("textarea", { className: "min-h-24 w-full rounded-md border border-input bg-background px-3 py-2 text-sm outline-none focus-visible:ring-2 focus-visible:ring-ring", value: description, onChange: (event) => setDescription(event.target.value), placeholder: t('datasets.upload.descriptionPlaceholder') })] })] }) }), _jsx(Section, { number: 3, title: t('datasets.upload.previewAndMapping'), hint: parsedFile
|
|
558
|
+
: t('datasets.upload.waitingForFile') }), _jsxs("div", { className: "flex items-center gap-2 text-[11.5px]", children: [_jsx("label", { className: "cursor-pointer text-muted-foreground hover:text-foreground", htmlFor: fileInputId, children: selectedFile ? t('datasets.action.replaceFile') : t('datasets.upload.browse') }), _jsx("span", { className: "text-muted-foreground", children: "\u00B7" }), _jsx("label", { className: "cursor-pointer text-muted-foreground hover:text-foreground", htmlFor: folderInputId, children: t('datasets.upload.browseFolder') })] })] })] })] })] }), parseError && (_jsxs("div", { className: "flex gap-2 rounded-md border border-destructive/35 bg-destructive/10 p-3 text-[12px] text-destructive", children: [_jsx(AlertTriangle, { className: "mt-0.5 size-4 shrink-0" }), _jsx("div", { children: t(parseErrorKey) })] })), _jsxs("div", { className: "flex flex-wrap items-center gap-2", children: [_jsx("span", { className: "font-mono text-[11px] text-muted-foreground", children: t('datasets.upload.supportedFormats') }), _jsx(UploadLimitInfoIcon, { rawMaxBytes: rawImportCapabilities?.maxBytes ?? DEFAULT_RAW_UPLOAD_MAX_BYTES }), FORMAT_CHIPS.map((format) => (_jsx("span", { className: "inline-flex rounded-[5px] border bg-muted px-2 py-0.5 font-mono text-[11px]", children: format }, format)))] }), _jsx(ImageSampleDownloads, {})] }) }), _jsx(Section, { number: 2, title: t('datasets.upload.basicInfo'), hint: t('datasets.upload.basicInfoHint'), children: _jsxs("div", { className: "space-y-4", children: [_jsxs("div", { children: [_jsxs("label", { className: "mb-1.5 block text-xs font-medium", children: [t('datasets.upload.name'), " ", _jsx("span", { className: "text-destructive", children: "*" })] }), _jsx("input", { className: "flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm outline-none focus-visible:ring-2 focus-visible:ring-ring", value: datasetName, onChange: (event) => setDatasetName(event.target.value), placeholder: "risk-eval-v4" }), _jsx("div", { className: "mt-1 text-[11px] text-muted-foreground", children: t('datasets.upload.nameHelp') })] }), _jsxs("div", { children: [_jsx("label", { className: "mb-1.5 block text-xs font-medium", children: t('datasets.upload.description') }), _jsx("textarea", { className: "min-h-24 w-full rounded-md border border-input bg-background px-3 py-2 text-sm outline-none focus-visible:ring-2 focus-visible:ring-ring", value: description, onChange: (event) => setDescription(event.target.value), placeholder: t('datasets.upload.descriptionPlaceholder') })] })] }) }), _jsx(Section, { number: 3, title: t('datasets.upload.previewAndMapping'), hint: parsedFile
|
|
407
559
|
? `${parsedFile.columns.length} ${t('datasets.detail.fields')} · ${sampleCountLabel}`
|
|
408
|
-
: t('datasets.upload.previewAndMappingHint'), className: "xl:col-span-2", children: !parsedFile ? (_jsx("div", { className: "rounded-md border border-dashed bg-muted/30 p-8 text-center text-sm text-muted-foreground", children: t('datasets.upload.noPreview') })) : (_jsxs("div", { className: "-m-4", children: [_jsxs("div", { className: "border-b", children: [_jsxs("div", { className: "flex flex-col gap-2 bg-muted/30 px-4 py-2.5 sm:flex-row sm:items-center sm:justify-between", children: [_jsxs("div", { className: "flex items-center gap-2", children: [_jsx("span", { className: "text-xs font-semibold", children: t('datasets.upload.samplePreview') }), _jsx("span", { className: "text-[11px] text-muted-foreground", children: t('datasets.upload.samplePreviewHint') })] }), _jsx("span", { className: "font-mono text-[11px] text-muted-foreground", children: t('datasets.upload.fieldRoleHint') })] }), _jsx("div", { className: "overflow-x-auto", children: _jsxs("table", { className: "w-full min-w-[880px] text-sm", children: [_jsx("thead", { children: _jsx("tr", { className: "border-b bg-muted/60 text-left text-xs font-medium text-muted-foreground", children: parsedFile.columns.map((column) => (_jsx("th", { className: cn('px-3 py-3', !selectedFields[column] && 'opacity-45'), children: _jsxs("div", { className: "flex flex-col", children: [_jsx("span", { children: column }), selectedFields[column] ? (_jsx(RoleArrowLabel, { role: fieldRoles[column] ?? 'metadata' })) : (_jsxs("span", { className: "font-mono text-[10px] font-normal text-muted-foreground", children: ['->', " ", t('datasets.upload.notImported')] }))] }) }, column))) }) }), _jsx("tbody", { children: previewRows.map((row, index) => (_jsx("tr", { className: "border-b last:border-b-0 hover:bg-muted/35", children: parsedFile.columns.map((column) => (_jsx("td", { className: "max-w-[280px] truncate px-3 py-3 font-mono text-[12px]", children: getDisplayValue(row[column]) }, column))) }, index))) })] }) }), _jsxs("div", { className: "flex items-center justify-between border-t px-4 py-2.5 text-xs text-muted-foreground", children: [_jsxs("div", { className: "flex items-center gap-1.5", children: [_jsx(Button, { type: "button", variant: "ghost", size: "icon", className: "size-7", "aria-label": t('common.previousPage'), disabled: true, children: _jsx(ChevronLeft, { className: "size-3.5" }) }), _jsxs("span", { className: "font-mono", children: ["1-", previewRows.length, ' ', isLargeFile
|
|
409
|
-
? `· ${t('datasets.upload.previewPrefixOnly')}`
|
|
410
|
-
: `/ ${parsedFile.samples.length}`] }), _jsx(Button, { type: "button", variant: "ghost", size: "icon", className: "size-7", "aria-label": t('common.nextPage'), disabled: true, children: _jsx(ChevronRight, { className: "size-3.5" }) })] }), _jsxs("span", { className: "font-mono text-[11.5px]", children: [sampleCountLabel, " \u00B7 ", selectedColumns.length, ' ', t('datasets.detail.fields'), ' ', selectedColumns.length > 0
|
|
560
|
+
: t('datasets.upload.previewAndMappingHint'), className: "xl:col-span-2", children: !parsedFile ? (_jsx("div", { className: "rounded-md border border-dashed bg-muted/30 p-8 text-center text-sm text-muted-foreground", children: t('datasets.upload.noPreview') })) : (_jsxs("div", { className: "-m-4", children: [_jsxs("div", { className: "border-b", children: [_jsxs("div", { className: "flex flex-col gap-2 bg-muted/30 px-4 py-2.5 sm:flex-row sm:items-center sm:justify-between", children: [_jsxs("div", { className: "flex items-center gap-2", children: [_jsx("span", { className: "text-xs font-semibold", children: t('datasets.upload.samplePreview') }), _jsx("span", { className: "text-[11px] text-muted-foreground", children: t('datasets.upload.samplePreviewHint') })] }), _jsx("span", { className: "font-mono text-[11px] text-muted-foreground", children: t('datasets.upload.fieldRoleHint') })] }), _jsx("div", { className: "overflow-x-auto", children: _jsxs("table", { className: "w-full min-w-[880px] text-sm", children: [_jsx("thead", { children: _jsx("tr", { className: "border-b bg-muted/60 text-left text-xs font-medium text-muted-foreground", children: parsedFile.columns.map((column) => (_jsx("th", { className: cn('px-3 py-3', !selectedFields[column] && 'opacity-45'), children: _jsxs("div", { className: "flex flex-col", children: [_jsx("span", { children: column }), selectedFields[column] ? (_jsx(RoleArrowLabel, { role: fieldRoles[column] ?? 'metadata' })) : (_jsxs("span", { className: "font-mono text-[10px] font-normal text-muted-foreground", children: ['->', " ", t('datasets.upload.notImported')] }))] }) }, column))) }) }), _jsx("tbody", { children: previewRows.map((row, index) => (_jsx("tr", { className: "border-b last:border-b-0 hover:bg-muted/35", children: parsedFile.columns.map((column) => (_jsx("td", { className: "max-w-[280px] truncate px-3 py-3 font-mono text-[12px]", children: getDisplayValue(row[column]) }, column))) }, index))) })] }) }), _jsxs("div", { className: "flex items-center justify-between border-t px-4 py-2.5 text-xs text-muted-foreground", children: [_jsxs("div", { className: "flex items-center gap-1.5", children: [_jsx(Button, { type: "button", variant: "ghost", size: "icon", className: "size-7", "aria-label": t('common.previousPage'), disabled: true, children: _jsx(ChevronLeft, { className: "size-3.5" }) }), _jsxs("span", { className: "font-mono", children: ["1-", previewRows.length, ' ', isLargeFile ? `· ${t('datasets.upload.previewPrefixOnly')}` : `/ ${parsedFile.samples.length}`] }), _jsx(Button, { type: "button", variant: "ghost", size: "icon", className: "size-7", "aria-label": t('common.nextPage'), disabled: true, children: _jsx(ChevronRight, { className: "size-3.5" }) })] }), _jsxs("span", { className: "font-mono text-[11.5px]", children: [sampleCountLabel, " \u00B7 ", selectedColumns.length, " ", t('datasets.detail.fields'), ' ', selectedColumns.length > 0
|
|
411
561
|
? t('datasets.upload.readyToImport')
|
|
412
562
|
: t('datasets.upload.noSelectedFields')] })] })] }), _jsxs("div", { children: [_jsxs("div", { className: "flex flex-col gap-2 bg-muted/30 px-4 py-2.5 sm:flex-row sm:items-center sm:justify-between", children: [_jsxs("div", { className: "flex items-center gap-2", children: [_jsx("span", { className: "text-xs font-semibold", children: t('datasets.upload.fieldMapping') }), _jsx("span", { className: "text-[11px] text-muted-foreground", children: t('datasets.upload.fieldMappingHint') })] }), _jsxs("span", { className: "font-mono text-[11px] text-muted-foreground", children: [t('datasets.upload.selectedFields'), ": ", selectedColumns.length, " / ", parsedFile.columns.length] }), _jsx("div", { className: "flex flex-wrap items-center gap-1.5", children: ROLE_OPTIONS.map((option) => (_jsx(RolePill, { role: option.role }, option.role))) })] }), _jsxs("div", { className: "grid grid-cols-[44px_96px_minmax(0,1fr)_minmax(0,1.2fr)_200px] border-t bg-muted/60 px-4 py-2.5 text-xs font-medium text-muted-foreground", children: [_jsx("div", { children: "#" }), _jsx("div", { children: t('datasets.upload.importField') }), _jsx("div", { children: t('datasets.upload.originalColumn') }), _jsx("div", { children: t('datasets.upload.firstRow') }), _jsx("div", { children: t('datasets.upload.role') })] }), parsedFile.columns.map((column, index) => (_jsxs("div", { className: cn('grid grid-cols-[44px_96px_minmax(0,1fr)_minmax(0,1.2fr)_200px] items-center border-t px-4 py-3 text-sm', !selectedFields[column] && 'bg-muted/25 text-muted-foreground'), children: [_jsx("span", { className: "flex size-6 items-center justify-center rounded bg-muted font-mono text-[11px] text-muted-foreground", children: index + 1 }), _jsxs("label", { className: "inline-flex items-center gap-2 text-xs font-medium", children: [_jsx("input", { type: "checkbox", checked: selectedFields[column] ?? false, onChange: (event) => setSelectedFields((current) => ({
|
|
413
563
|
...current,
|
|
@@ -415,7 +565,7 @@ export function DatasetUploadPage({ projectId }) {
|
|
|
415
565
|
})), className: "size-4 accent-primary", "aria-label": `${t('datasets.upload.importField')}: ${column}` }), selectedFields[column] ? t('datasets.upload.importField') : t('datasets.upload.notImported')] }), _jsx("div", { className: "min-w-0", children: _jsx("div", { className: "truncate font-mono text-[12.5px] font-semibold", children: column }) }), _jsx("div", { className: "truncate rounded-md bg-muted/45 px-2 py-1 font-mono text-[11.5px] text-muted-foreground", children: getDisplayValue(parsedFile.samples[0]?.[column]) }), _jsx("select", { value: fieldRoles[column] ?? 'metadata', onChange: (event) => setFieldRoles((current) => normalizeExpectedRoles({
|
|
416
566
|
...current,
|
|
417
567
|
[column]: event.target.value,
|
|
418
|
-
}, column)), disabled: !selectedFields[column], className: "h-8 rounded-md border bg-background px-2 text-sm outline-none focus-visible:ring-2 focus-visible:ring-ring", "aria-label": `${t('datasets.upload.role')}: ${column}`, children: ROLE_OPTIONS.map((option) => (_jsx("option", { value: option.role, children: t(option.labelKey) }, option.role))) })] }, column)))] })] })) })] })] }), _jsx("div", { className: "fixed bottom-0 left-0 right-0 z-20 border-t bg-background/95 px-4 py-3 shadow-lg backdrop-blur supports-[backdrop-filter]:bg-background/75 md:left-[var(--sidebar-width)]", children: _jsxs("div", { className: "mx-auto flex w-full max-w-[1440px] flex-col gap-3 sm:flex-row sm:items-center sm:justify-between", children: [_jsx("div", { className: "min-w-0 truncate font-mono text-[11.5px] text-muted-foreground", children: parsedFile ? (_jsxs("span", { children: [sampleCountLabel, " \u00B7 ", selectedColumns.length,
|
|
568
|
+
}, column)), disabled: !selectedFields[column], className: "h-8 rounded-md border bg-background px-2 text-sm outline-none focus-visible:ring-2 focus-visible:ring-ring", "aria-label": `${t('datasets.upload.role')}: ${column}`, children: ROLE_OPTIONS.map((option) => (_jsx("option", { value: option.role, children: t(option.labelKey) }, option.role))) })] }, column)))] })] })) })] })] }), _jsx("div", { className: "fixed bottom-0 left-0 right-0 z-20 border-t bg-background/95 px-4 py-3 shadow-lg backdrop-blur supports-[backdrop-filter]:bg-background/75 md:left-[var(--sidebar-width)]", children: _jsxs("div", { className: "mx-auto flex w-full max-w-[1440px] flex-col gap-3 sm:flex-row sm:items-center sm:justify-between", children: [_jsx("div", { className: "min-w-0 truncate font-mono text-[11.5px] text-muted-foreground", children: parsedFile ? (_jsxs("span", { children: [sampleCountLabel, " \u00B7 ", selectedColumns.length, " ", t('datasets.detail.fields'), " \u00B7", ' ', selectedColumns.length > 0
|
|
419
569
|
? t('datasets.upload.readyToImport')
|
|
420
570
|
: t('datasets.upload.noSelectedFields')] })) : (_jsx("span", { children: t('datasets.upload.waitingForFile') })) }), _jsxs("div", { className: "flex w-full flex-col-reverse gap-2 sm:w-auto sm:flex-row sm:items-center", children: [_jsx(Button, { asChild: true, variant: "outline", size: "sm", className: "h-9 w-full sm:w-auto", children: _jsx(Link, { href: `/datasets`, children: t('common.cancel') }) }), _jsxs(Button, { type: "button", size: "sm", className: "h-9 w-full sm:w-auto", disabled: !canImport, "aria-busy": isSubmitting, onClick: () => void importDataset(), children: [isSubmitting ? _jsx(Loader2, { className: "size-4 animate-spin" }) : _jsx(Check, { className: "size-4" }), importButtonLabel] })] })] }) }), _jsx(Dialog, { open: leaveDialogOpen, onOpenChange: (open) => {
|
|
421
571
|
if (!open)
|