@ekairos/dataset 1.22.79-beta.development.0 → 1.22.81-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,82 @@
1
- import type { AnyDatasetRuntime, DatasetBuilderState, InternalSource } from "./types.js";
1
+ import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalSource } from "./types.js";
2
+ import type { SandboxState } from "../file/file-dataset.types.js";
3
+ import type { FilePreviewContext } from "../file/filepreview.types.js";
4
+ import type { TransformSandboxState, TransformSourcePreviewContext } from "../transform/transform-dataset.types.js";
2
5
  export declare function resolveDatasetAgentDurable(requestedDurable?: boolean): Promise<boolean>;
6
+ type PreparedFileDatasetContext = {
7
+ kind: "file";
8
+ datasetId: string;
9
+ sandboxId: string;
10
+ fileId: string;
11
+ sandboxState: SandboxState;
12
+ filePreview?: FilePreviewContext;
13
+ schema?: DatasetSchemaInput | null;
14
+ };
15
+ type PreparedTransformDatasetContext = {
16
+ kind: "transform";
17
+ datasetId: string;
18
+ sandboxId: string;
19
+ sourceDatasetIds: string[];
20
+ outputSchema: DatasetSchemaInput;
21
+ sandboxState: TransformSandboxState;
22
+ sourcePreviews?: Array<{
23
+ datasetId: string;
24
+ preview: TransformSourcePreviewContext;
25
+ }>;
26
+ };
27
+ type PreparedDatasetContext = PreparedFileDatasetContext | PreparedTransformDatasetContext;
28
+ type DatasetContextInitialization = PreparedDatasetContext & {
29
+ prompt: string;
30
+ instructions?: string;
31
+ };
32
+ export declare function initializeDatasetStep<Runtime extends AnyDatasetRuntime>(params: {
33
+ runtime: Runtime;
34
+ datasetId: string;
35
+ sandboxId: string;
36
+ title?: string;
37
+ instructions?: string;
38
+ sources: any[];
39
+ sourceKinds: string[];
40
+ schema?: DatasetSchemaInput;
41
+ }): Promise<{
42
+ datasetId: string;
43
+ sandboxId: string;
44
+ }>;
45
+ export declare function prepareDatasetSourcesStep<Runtime extends AnyDatasetRuntime>(params: {
46
+ kind: "file";
47
+ runtime: Runtime;
48
+ datasetId: string;
49
+ sandboxId: string;
50
+ source: Extract<InternalSource, {
51
+ kind: "file" | "text";
52
+ }>;
53
+ schema?: DatasetSchemaInput;
54
+ } | {
55
+ kind: "transform";
56
+ runtime: Runtime;
57
+ datasetId: string;
58
+ sandboxId: string;
59
+ sourceDatasetIds: string[];
60
+ outputSchema: DatasetSchemaInput;
61
+ }): Promise<PreparedDatasetContext>;
62
+ export declare function initializeDatasetContextStep(params: {
63
+ prepared: PreparedDatasetContext;
64
+ instructions?: string;
65
+ outputSchema?: DatasetSchemaInput;
66
+ }): Promise<DatasetContextInitialization>;
67
+ export declare function completeDatasetStep<Runtime extends AnyDatasetRuntime>(params: {
68
+ runtime: Runtime;
69
+ datasetId: string;
70
+ schema?: DatasetSchemaInput;
71
+ first: boolean;
72
+ }): Promise<{
73
+ datasetId: string;
74
+ dataset: any;
75
+ previewRows: any[];
76
+ firstRow: any;
77
+ }>;
3
78
  export declare function materializeSingleFileLikeSource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, source: Extract<InternalSource, {
4
79
  kind: "file" | "text";
5
80
  }>, targetDatasetId: string): Promise<string>;
6
81
  export declare function materializeDerivedDataset<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, targetDatasetId: string): Promise<string>;
82
+ export {};
@@ -1,14 +1,16 @@
1
1
  import { createFileParseContext } from "../file/file-dataset.agent.js";
2
2
  import { readInstantFileStep } from "../file/steps.js";
3
+ import { generateFileParsePreviewStep, initializeFileParseSandboxStep, } from "../file/file-dataset.steps.js";
3
4
  import { createTransformDatasetContext } from "../transform/transform-dataset.agent.js";
4
- import { datasetInferAndUpdateSchemaStep, datasetReadOneStep, } from "../dataset/steps.js";
5
- import { getDatasetOutputPath, getDatasetWorkstation } from "../datasetFiles.js";
5
+ import { ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "../transform/transform-dataset.steps.js";
6
+ import { datasetGetByIdStep, datasetInferAndUpdateSchemaStep, datasetPreviewRowsStep, datasetReadOneStep, } from "../dataset/steps.js";
7
+ import { getDatasetOutputPath, getDatasetScriptsDir, getDatasetSourcesDir, getDatasetStandardDirs, } from "../datasetFiles.js";
6
8
  import { registerDatasetAgentMaterializers } from "./agentMaterializers.js";
7
9
  import { buildFileDefaultInstructions, buildRawSourceInstructions, buildTransformInstructions, } from "./instructions.js";
8
10
  import { createOrUpdateDatasetMetadata, materializeRowsToDataset, uploadInlineTextSource, } from "./persistence.js";
9
11
  import { getDomainDescriptor } from "./sourceRows.js";
10
12
  import { materializeQuerySource } from "./materializeQuery.js";
11
- import { createDatasetSandboxStep, readDatasetSandboxTextFileStep, runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFilesStep, } from "../sandbox/steps.js";
13
+ import { readDatasetSandboxTextFileStep, runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFilesStep, } from "../sandbox/steps.js";
12
14
  function makeIntermediateDatasetId(targetDatasetId, sourceKind, index) {
13
15
  return `${targetDatasetId}__${sourceKind}_${index}`;
14
16
  }
@@ -101,17 +103,16 @@ async function tryMaterializeRawPdfFileSource(state, source, targetDatasetId) {
101
103
  const file = await readInstantFileStep({ runtime: state.runtime, fileId: source.fileId });
102
104
  if (!isPdfContentDisposition(file.contentDisposition))
103
105
  return null;
104
- const sandboxId = await resolveDatasetSandboxId(state, targetDatasetId);
105
- const workstation = getDatasetWorkstation(targetDatasetId);
106
+ const sandboxId = resolveDatasetSandboxId(state, targetDatasetId);
106
107
  const outputPath = getDatasetOutputPath(targetDatasetId);
107
108
  const fileName = sanitizePdfFileName(parseContentDispositionFileName(file.contentDisposition), `${source.fileId}.pdf`);
108
- const sourcePath = `${workstation}/${fileName}`;
109
- const scriptPath = `${workstation}/extract_pdf_text.py`;
109
+ const sourcePath = `${getDatasetSourcesDir(targetDatasetId)}/${fileName}`;
110
+ const scriptPath = `${getDatasetScriptsDir(targetDatasetId)}/extract_pdf_text.py`;
110
111
  await runDatasetSandboxCommandStep({
111
112
  runtime: state.runtime,
112
113
  sandboxId,
113
114
  cmd: "mkdir",
114
- args: ["-p", workstation],
115
+ args: ["-p", ...getDatasetStandardDirs(targetDatasetId)],
115
116
  });
116
117
  await writeDatasetSandboxFilesStep({
117
118
  runtime: state.runtime,
@@ -222,24 +223,11 @@ async function materializeRawTextSource(state, source, targetDatasetId) {
222
223
  });
223
224
  return targetDatasetId;
224
225
  }
225
- async function resolveDatasetSandboxId(state, targetDatasetId) {
226
+ function resolveDatasetSandboxId(state, _targetDatasetId) {
226
227
  const sandboxId = String(state.sandboxId ?? "").trim();
227
228
  if (sandboxId)
228
229
  return sandboxId;
229
- const created = await createDatasetSandboxStep({
230
- runtime: state.runtime,
231
- provider: "vercel",
232
- sandboxRuntime: "python3.13",
233
- timeoutMs: 20 * 60 * 1000,
234
- resources: { vcpus: 2 },
235
- purpose: "dataset.materialize",
236
- params: { datasetId: targetDatasetId },
237
- vercel: {
238
- profile: "ephemeral",
239
- deleteOnStop: true,
240
- },
241
- });
242
- return created.sandboxId;
230
+ throw new Error("dataset_sandbox_required");
243
231
  }
244
232
  export async function resolveDatasetAgentDurable(requestedDurable) {
245
233
  if (!requestedDurable)
@@ -255,20 +243,150 @@ export async function resolveDatasetAgentDurable(requestedDurable) {
255
243
  }
256
244
  return true;
257
245
  }
246
+ export async function initializeDatasetStep(params) {
247
+ "use step";
248
+ await createOrUpdateDatasetMetadata(params.runtime, {
249
+ datasetId: params.datasetId,
250
+ sandboxId: params.sandboxId,
251
+ title: params.title ?? params.datasetId,
252
+ instructions: params.instructions,
253
+ sources: params.sources,
254
+ sourceKinds: params.sourceKinds,
255
+ schema: params.schema,
256
+ status: "building",
257
+ });
258
+ return {
259
+ datasetId: params.datasetId,
260
+ sandboxId: params.sandboxId,
261
+ };
262
+ }
263
+ export async function prepareDatasetSourcesStep(params) {
264
+ "use step";
265
+ if (params.kind === "file") {
266
+ const fileId = params.source.kind === "file"
267
+ ? params.source.fileId
268
+ : await uploadInlineTextSource(params.runtime, params.datasetId, params.source);
269
+ const initialized = await initializeFileParseSandboxStep({
270
+ runtime: params.runtime,
271
+ sandboxId: params.sandboxId,
272
+ datasetId: params.datasetId,
273
+ fileId,
274
+ state: { initialized: false, filePath: "" },
275
+ });
276
+ const filePreview = await generateFileParsePreviewStep({
277
+ runtime: params.runtime,
278
+ sandboxId: params.sandboxId,
279
+ sandboxFilePath: initialized.filePath,
280
+ datasetId: params.datasetId,
281
+ });
282
+ return {
283
+ kind: "file",
284
+ datasetId: params.datasetId,
285
+ sandboxId: params.sandboxId,
286
+ fileId,
287
+ sandboxState: initialized.state,
288
+ filePreview,
289
+ schema: params.schema ?? null,
290
+ };
291
+ }
292
+ const initialized = await ensureTransformSourcesInSandboxStep({
293
+ runtime: params.runtime,
294
+ sandboxId: params.sandboxId,
295
+ datasetId: params.datasetId,
296
+ sourceDatasetIds: params.sourceDatasetIds,
297
+ state: { initialized: false, sourcePaths: [] },
298
+ });
299
+ const sourcePreviews = await generateTransformSourcePreviewsStep({
300
+ runtime: params.runtime,
301
+ sandboxId: params.sandboxId,
302
+ datasetId: params.datasetId,
303
+ sourcePaths: initialized.sourcePaths,
304
+ });
305
+ return {
306
+ kind: "transform",
307
+ datasetId: params.datasetId,
308
+ sandboxId: params.sandboxId,
309
+ sourceDatasetIds: params.sourceDatasetIds,
310
+ outputSchema: params.outputSchema,
311
+ sandboxState: initialized.state,
312
+ sourcePreviews,
313
+ };
314
+ }
315
+ export async function initializeDatasetContextStep(params) {
316
+ "use step";
317
+ if (params.prepared.kind === "file") {
318
+ return {
319
+ ...params.prepared,
320
+ instructions: params.instructions ?? buildFileDefaultInstructions(params.outputSchema),
321
+ prompt: "generate a dataset for this file",
322
+ };
323
+ }
324
+ return {
325
+ ...params.prepared,
326
+ instructions: params.instructions,
327
+ prompt: params.prepared.sourceDatasetIds.length === 1
328
+ ? "Transform the source dataset into a new dataset matching the provided output schema"
329
+ : `Transform ${params.prepared.sourceDatasetIds.length} source datasets into a new dataset matching the provided output schema`,
330
+ };
331
+ }
332
+ export async function completeDatasetStep(params) {
333
+ "use step";
334
+ let datasetResult = await datasetGetByIdStep({
335
+ runtime: params.runtime,
336
+ datasetId: params.datasetId,
337
+ });
338
+ if (!datasetResult.ok)
339
+ throw new Error(datasetResult.error);
340
+ if (!params.schema && !datasetResult.data?.schema) {
341
+ await datasetInferAndUpdateSchemaStep({
342
+ runtime: params.runtime,
343
+ datasetId: params.datasetId,
344
+ title: `${params.datasetId}Row`,
345
+ description: "One dataset row",
346
+ });
347
+ datasetResult = await datasetGetByIdStep({
348
+ runtime: params.runtime,
349
+ datasetId: params.datasetId,
350
+ });
351
+ if (!datasetResult.ok)
352
+ throw new Error(datasetResult.error);
353
+ }
354
+ const previewResult = await datasetPreviewRowsStep({
355
+ runtime: params.runtime,
356
+ datasetId: params.datasetId,
357
+ limit: 20,
358
+ });
359
+ if (!params.first) {
360
+ return {
361
+ datasetId: params.datasetId,
362
+ dataset: datasetResult.data,
363
+ previewRows: previewResult.rows,
364
+ firstRow: undefined,
365
+ };
366
+ }
367
+ const firstResult = await datasetReadOneStep({
368
+ runtime: params.runtime,
369
+ datasetId: params.datasetId,
370
+ });
371
+ return {
372
+ datasetId: params.datasetId,
373
+ dataset: datasetResult.data,
374
+ previewRows: previewResult.rows,
375
+ firstRow: firstResult.row,
376
+ };
377
+ }
258
378
  export async function materializeSingleFileLikeSource(state, source, targetDatasetId) {
259
379
  if (source.kind === "file" && !state.outputSchema) {
260
380
  const materializedPdf = await tryMaterializeRawPdfFileSource(state, source, targetDatasetId);
261
381
  if (materializedPdf)
262
382
  return materializedPdf;
263
383
  }
384
+ const sandboxId = resolveDatasetSandboxId(state, targetDatasetId);
264
385
  if (!state.reactor) {
265
386
  throw new Error("dataset_reactor_required");
266
387
  }
267
- const sandboxId = await resolveDatasetSandboxId(state, targetDatasetId);
268
- const fileId = source.kind === "file"
269
- ? source.fileId
270
- : await uploadInlineTextSource(state.runtime, targetDatasetId, source);
271
- await createOrUpdateDatasetMetadata(state.runtime, {
388
+ await initializeDatasetStep({
389
+ runtime: state.runtime,
272
390
  datasetId: targetDatasetId,
273
391
  sandboxId,
274
392
  title: state.title ?? targetDatasetId,
@@ -285,28 +403,45 @@ export async function materializeSingleFileLikeSource(state, source, targetDatas
285
403
  ],
286
404
  sourceKinds: [source.kind],
287
405
  schema: state.outputSchema,
288
- status: "building",
289
406
  });
290
- const parseContext = createFileParseContext(fileId, {
407
+ const prepared = await prepareDatasetSourcesStep({
408
+ kind: "file",
409
+ runtime: state.runtime,
291
410
  datasetId: targetDatasetId,
292
- instructions: state.instructions ?? buildFileDefaultInstructions(state.outputSchema),
293
- reactor: state.reactor,
294
411
  sandboxId,
412
+ source,
413
+ schema: state.outputSchema,
414
+ });
415
+ const context = await initializeDatasetContextStep({
416
+ prepared,
417
+ instructions: state.instructions,
418
+ outputSchema: state.outputSchema,
419
+ });
420
+ if (context.kind !== "file") {
421
+ throw new Error("dataset_context_kind_mismatch:file");
422
+ }
423
+ const parseContext = createFileParseContext(context.fileId, {
424
+ datasetId: context.datasetId,
425
+ instructions: context.instructions,
426
+ reactor: state.reactor,
427
+ sandboxId: context.sandboxId,
428
+ sandboxState: context.sandboxState,
429
+ filePreview: context.filePreview,
430
+ schema: context.schema,
295
431
  });
296
432
  await parseContext.parse(state.runtime, {
297
433
  durable: await resolveDatasetAgentDurable(state.durable),
434
+ prompt: context.prompt,
435
+ initialContent: {
436
+ datasetId: context.datasetId,
437
+ fileId: context.fileId,
438
+ instructions: context.instructions ?? "",
439
+ sandboxId: context.sandboxId,
440
+ sandboxState: context.sandboxState,
441
+ filePreview: context.filePreview,
442
+ schema: context.schema,
443
+ },
298
444
  });
299
- if (!state.outputSchema) {
300
- await datasetInferAndUpdateSchemaStep({
301
- runtime: state.runtime,
302
- datasetId: targetDatasetId,
303
- title: `${targetDatasetId}Row`,
304
- description: "One dataset row",
305
- });
306
- }
307
- if (state.first) {
308
- await datasetReadOneStep({ runtime: state.runtime, datasetId: targetDatasetId });
309
- }
310
445
  return targetDatasetId;
311
446
  }
312
447
  async function normalizeSourceToDatasetId(state, source, targetDatasetId, sourceIndex) {
@@ -345,7 +480,7 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
345
480
  if (!state.reactor) {
346
481
  throw new Error("dataset_reactor_required");
347
482
  }
348
- const sandboxId = await resolveDatasetSandboxId(state, targetDatasetId);
483
+ const sandboxId = resolveDatasetSandboxId(state, targetDatasetId);
349
484
  const stateWithSandbox = { ...state, sandboxId };
350
485
  const normalizedSources = [];
351
486
  for (let index = 0; index < stateWithSandbox.sources.length; index++) {
@@ -361,7 +496,8 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
361
496
  properties: {},
362
497
  },
363
498
  };
364
- await createOrUpdateDatasetMetadata(stateWithSandbox.runtime, {
499
+ await initializeDatasetStep({
500
+ runtime: stateWithSandbox.runtime,
365
501
  datasetId: targetDatasetId,
366
502
  sandboxId,
367
503
  title: stateWithSandbox.title ?? targetDatasetId,
@@ -377,30 +513,46 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
377
513
  : source),
378
514
  sourceKinds: stateWithSandbox.sources.map((source) => source.kind),
379
515
  schema: transformSchema,
380
- status: "building",
381
516
  });
382
- const transformContext = createTransformDatasetContext({
517
+ const prepared = await prepareDatasetSourcesStep({
518
+ kind: "transform",
519
+ runtime: stateWithSandbox.runtime,
520
+ datasetId: targetDatasetId,
521
+ sandboxId,
383
522
  sourceDatasetIds: normalizedSources,
384
523
  outputSchema: transformSchema,
524
+ });
525
+ const context = await initializeDatasetContextStep({
526
+ prepared,
385
527
  instructions: buildTransformInstructions(normalizedSources.length, stateWithSandbox.instructions, stateWithSandbox.outputSchema),
386
- datasetId: targetDatasetId,
528
+ outputSchema: transformSchema,
529
+ });
530
+ if (context.kind !== "transform") {
531
+ throw new Error("dataset_context_kind_mismatch:transform");
532
+ }
533
+ const transformContext = createTransformDatasetContext({
534
+ sourceDatasetIds: context.sourceDatasetIds,
535
+ outputSchema: context.outputSchema,
536
+ instructions: context.instructions,
537
+ datasetId: context.datasetId,
387
538
  reactor: stateWithSandbox.reactor,
388
- sandboxId,
539
+ sandboxId: context.sandboxId,
540
+ sandboxState: context.sandboxState,
541
+ sourcePreviews: context.sourcePreviews,
389
542
  });
390
543
  await transformContext.transform(stateWithSandbox.runtime, {
391
544
  durable: await resolveDatasetAgentDurable(stateWithSandbox.durable),
545
+ prompt: context.prompt,
546
+ initialContent: {
547
+ datasetId: context.datasetId,
548
+ sourceDatasetIds: context.sourceDatasetIds,
549
+ outputSchema: context.outputSchema,
550
+ instructions: context.instructions,
551
+ sandboxId: context.sandboxId,
552
+ sandboxState: context.sandboxState,
553
+ sourcePreviews: context.sourcePreviews,
554
+ },
392
555
  });
393
- if (!stateWithSandbox.outputSchema) {
394
- await datasetInferAndUpdateSchemaStep({
395
- runtime: stateWithSandbox.runtime,
396
- datasetId: targetDatasetId,
397
- title: `${targetDatasetId}Row`,
398
- description: "One dataset row",
399
- });
400
- }
401
- if (stateWithSandbox.first) {
402
- await datasetReadOneStep({ runtime: stateWithSandbox.runtime, datasetId: targetDatasetId });
403
- }
404
556
  return targetDatasetId;
405
557
  }
406
558
  registerDatasetAgentMaterializers({
@@ -15,3 +15,9 @@ export declare function createOrUpdateDatasetMetadata<Runtime extends AnyDataset
15
15
  export declare function materializeRowsToDataset<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: MaterializeRowsParams): Promise<string>;
16
16
  export declare function uploadInlineTextSource<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, source: DatasetTextSourceInput): Promise<string>;
17
17
  export declare function finalizeBuildResult<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, withFirst: boolean): Promise<DatasetBuildResult>;
18
+ export declare function createDatasetBuildResult<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: {
19
+ datasetId: string;
20
+ dataset: any;
21
+ previewRows: any[];
22
+ firstRow?: any | null;
23
+ }): DatasetBuildResult;
@@ -128,3 +128,25 @@ export async function finalizeBuildResult(runtime, datasetId, withFirst) {
128
128
  firstRow: firstResult.row,
129
129
  };
130
130
  }
131
+ export function createDatasetBuildResult(runtime, params) {
132
+ const reader = {
133
+ async read(cursorOrParams, limit) {
134
+ const readParams = typeof cursorOrParams === "object" && cursorOrParams !== null
135
+ ? cursorOrParams
136
+ : { cursor: cursorOrParams, limit };
137
+ return await datasetReadRowsStep({
138
+ runtime,
139
+ datasetId: params.datasetId,
140
+ cursor: readParams.cursor,
141
+ limit: readParams.limit,
142
+ });
143
+ },
144
+ };
145
+ return {
146
+ datasetId: params.datasetId,
147
+ dataset: params.dataset,
148
+ previewRows: params.previewRows,
149
+ reader,
150
+ ...(params.firstRow !== undefined ? { firstRow: params.firstRow } : {}),
151
+ };
152
+ }
@@ -0,0 +1,87 @@
1
+ export interface PersistDatasetStepParams {
2
+ datasetId: string;
3
+ sandboxId: string;
4
+ runtime: any;
5
+ summary?: string;
6
+ }
7
+ export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summary }: PersistDatasetStepParams): Promise<{
8
+ success: boolean;
9
+ validation?: RowValidationEntry[];
10
+ validationTruncated?: number;
11
+ failureSummary?: ValidationFailureSummary;
12
+ repairInstructions?: string[];
13
+ validRowCount?: number;
14
+ rowRecordCount?: number;
15
+ error?: string;
16
+ status?: string;
17
+ message?: string;
18
+ } | {
19
+ success: boolean;
20
+ status: string;
21
+ validRows: number;
22
+ rowRecordCount: number;
23
+ validation: RowValidationEntry[] | undefined;
24
+ error: string;
25
+ message: string;
26
+ fileId?: undefined;
27
+ storagePath?: undefined;
28
+ } | {
29
+ success: boolean;
30
+ status: string;
31
+ validRows: number;
32
+ rowRecordCount: number;
33
+ fileId: string;
34
+ storagePath: string;
35
+ message: string;
36
+ validation?: undefined;
37
+ error?: undefined;
38
+ }>;
39
+ type RowValidationEntry = {
40
+ index: number;
41
+ valid: boolean;
42
+ errors?: string[];
43
+ errorDetails?: Array<{
44
+ path: string;
45
+ keyword: string;
46
+ message: string;
47
+ params?: Record<string, unknown>;
48
+ schemaPath?: string;
49
+ }>;
50
+ dataKeys?: string[];
51
+ };
52
+ type ValidationFailureSummary = {
53
+ rowRecordCount: number;
54
+ validRowCount: number;
55
+ invalidRowCount: number;
56
+ expectedTopLevelKeys: string[];
57
+ requiredTopLevelKeys: string[];
58
+ requiredPaths: string[];
59
+ enumConstraints: Array<{
60
+ path: string;
61
+ values: unknown[];
62
+ }>;
63
+ topErrors: Array<{
64
+ message: string;
65
+ count: number;
66
+ }>;
67
+ missingRequiredProperties: Array<{
68
+ property: string;
69
+ count: number;
70
+ }>;
71
+ additionalProperties: Array<{
72
+ property: string;
73
+ count: number;
74
+ }>;
75
+ enumFailures: Array<{
76
+ path: string;
77
+ allowedValues: unknown[];
78
+ count: number;
79
+ }>;
80
+ observedTopLevelKeys: string[];
81
+ sampleInvalidRows: Array<{
82
+ index: number;
83
+ dataKeys?: string[];
84
+ errors?: string[];
85
+ }>;
86
+ };
87
+ export {};