@ekairos/dataset 1.22.85-beta.development.0 → 1.22.86-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/context.d.ts +8 -0
- package/dist/builder/context.js +68 -9
- package/dist/builder/instructions.js +3 -2
- package/dist/builder/materialize.js +11 -25
- package/dist/builder/types.d.ts +2 -1
- package/dist/completeDataset.steps.d.ts +29 -0
- package/dist/completeDataset.steps.js +32 -1
- package/dist/completeDataset.tool.d.ts +41 -0
- package/dist/completeDataset.tool.js +6 -3
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +7 -0
- package/dist/contextWorkspace.js +17 -1
- package/dist/dataset/steps.js +12 -0
- package/dist/dataset.js +1 -0
- package/dist/executeCommand.tool.d.ts +1 -4
- package/dist/executeCommand.tool.js +113 -31
- package/dist/sandbox/steps.js +4 -2
- package/dist/service.d.ts +4 -0
- package/dist/service.js +59 -2
- package/dist/transform/prompts.js +37 -21
- package/dist/transform/transform-dataset.agent.d.ts +1 -0
- package/dist/transform/transform-dataset.agent.js +25 -25
- package/dist/transform/transform-dataset.types.d.ts +4 -1
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +4 -4
|
@@ -2,6 +2,14 @@ import type { AnyDatasetRuntime, InternalDatasetResource } from "./types.js";
|
|
|
2
2
|
type DatasetContextResolution = {
|
|
3
3
|
contextId: string;
|
|
4
4
|
resources: InternalDatasetResource[];
|
|
5
|
+
contextResources: DatasetContextResourceRecord[];
|
|
6
|
+
};
|
|
7
|
+
type DatasetContextResourceRecord = {
|
|
8
|
+
key: string;
|
|
9
|
+
type: string;
|
|
10
|
+
name: string;
|
|
11
|
+
description: string;
|
|
12
|
+
[key: string]: unknown;
|
|
5
13
|
};
|
|
6
14
|
export declare function resolveDatasetResourceContext<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, resources: InternalDatasetResource[]): Promise<DatasetContextResolution>;
|
|
7
15
|
export {};
|
package/dist/builder/context.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { eventsDomain } from "@ekairos/events";
|
|
2
2
|
import { createDatasetId } from "../id.js";
|
|
3
|
+
import { datasetDomain } from "../schema.js";
|
|
4
|
+
import { DatasetService } from "../service.js";
|
|
3
5
|
import { getDomainDescriptor } from "./rows.js";
|
|
4
6
|
function getContextWhere(context) {
|
|
5
7
|
return "id" in context ? { id: context.id } : { key: context.key };
|
|
@@ -8,6 +10,10 @@ async function getEventsDb(runtime) {
|
|
|
8
10
|
const scoped = await runtime.use(eventsDomain);
|
|
9
11
|
return scoped.db;
|
|
10
12
|
}
|
|
13
|
+
async function getDatasetDb(runtime) {
|
|
14
|
+
const scoped = await runtime.use(datasetDomain);
|
|
15
|
+
return scoped.db;
|
|
16
|
+
}
|
|
11
17
|
function resourceKey(index, resource) {
|
|
12
18
|
if (resource.kind === "file")
|
|
13
19
|
return `file:${index}:${resource.fileId}`;
|
|
@@ -81,27 +87,63 @@ function resourceToContextResource(index, resource) {
|
|
|
81
87
|
async function createDatasetResourceContextStep(params) {
|
|
82
88
|
"use step";
|
|
83
89
|
const db = await getEventsDb(params.runtime);
|
|
84
|
-
const
|
|
90
|
+
const contextKey = `dataset:${params.datasetId}`;
|
|
91
|
+
const existing = await db.query({
|
|
92
|
+
event_contexts: {
|
|
93
|
+
$: { where: { key: contextKey }, limit: 1 },
|
|
94
|
+
},
|
|
95
|
+
});
|
|
96
|
+
const contextId = existing.event_contexts?.[0]?.id ?? createDatasetId();
|
|
85
97
|
const now = new Date();
|
|
98
|
+
const resources = await enrichDatasetContextResources(params.runtime, params.resources);
|
|
86
99
|
await db.transact([
|
|
87
|
-
db.tx.event_contexts[contextId].
|
|
100
|
+
db.tx.event_contexts[contextId].update({
|
|
101
|
+
key: contextKey,
|
|
88
102
|
createdAt: now,
|
|
89
103
|
updatedAt: now,
|
|
90
|
-
name: `Dataset ${params.datasetId}
|
|
104
|
+
name: `Dataset ${params.datasetId}`,
|
|
91
105
|
status: "open_idle",
|
|
92
106
|
content: {
|
|
93
107
|
datasetId: params.datasetId,
|
|
94
|
-
resourceCount:
|
|
108
|
+
resourceCount: resources.length,
|
|
95
109
|
},
|
|
96
|
-
resources
|
|
97
|
-
description: `Dataset
|
|
98
|
-
goal: "
|
|
110
|
+
resources,
|
|
111
|
+
description: `Dataset execution context for ${params.datasetId}.`,
|
|
112
|
+
goal: "Produce the dataset output from the resources declared in this context.",
|
|
99
113
|
}),
|
|
100
114
|
]);
|
|
101
115
|
return {
|
|
102
116
|
contextId,
|
|
103
117
|
};
|
|
104
118
|
}
|
|
119
|
+
async function enrichDatasetContextResources(runtime, resources) {
|
|
120
|
+
const datasetResources = resources.filter((resource) => resource.type === "dataset" && typeof resource.datasetId === "string");
|
|
121
|
+
if (datasetResources.length === 0)
|
|
122
|
+
return resources;
|
|
123
|
+
const db = await getDatasetDb(runtime);
|
|
124
|
+
const service = new DatasetService(db);
|
|
125
|
+
const enriched = [];
|
|
126
|
+
for (const resource of resources) {
|
|
127
|
+
if (resource.type !== "dataset" || typeof resource.datasetId !== "string") {
|
|
128
|
+
enriched.push(resource);
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
const preview = await service.previewRows(resource.datasetId, 20);
|
|
132
|
+
if (!preview.ok) {
|
|
133
|
+
enriched.push({
|
|
134
|
+
...resource,
|
|
135
|
+
previewError: preview.error,
|
|
136
|
+
});
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
enriched.push({
|
|
140
|
+
...resource,
|
|
141
|
+
previewRows: preview.data,
|
|
142
|
+
previewLimit: 20,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
return enriched;
|
|
146
|
+
}
|
|
105
147
|
function contextResourceToDatasetResource(resource) {
|
|
106
148
|
if (resource.type === "file" && typeof resource.fileId === "string" && resource.fileId.trim()) {
|
|
107
149
|
return {
|
|
@@ -163,9 +205,16 @@ async function readExistingContext(params) {
|
|
|
163
205
|
if (resources.length === 0) {
|
|
164
206
|
throw new Error("dataset_context_resources_required");
|
|
165
207
|
}
|
|
208
|
+
const sourceContextId = String(row.id);
|
|
209
|
+
const copiedResources = resources.map((resource) => ({
|
|
210
|
+
...resource,
|
|
211
|
+
sourceContextId: resource.sourceContextId ?? sourceContextId,
|
|
212
|
+
sourceResourceKey: resource.sourceResourceKey ?? resource.key,
|
|
213
|
+
}));
|
|
166
214
|
return {
|
|
167
|
-
contextId:
|
|
215
|
+
contextId: sourceContextId,
|
|
168
216
|
resources: resources.map((resource) => contextResourceToDatasetResource(resource)),
|
|
217
|
+
contextResources: copiedResources,
|
|
169
218
|
};
|
|
170
219
|
}
|
|
171
220
|
export async function resolveDatasetResourceContext(runtime, datasetId, resources) {
|
|
@@ -177,7 +226,16 @@ export async function resolveDatasetResourceContext(runtime, datasetId, resource
|
|
|
177
226
|
if (resources.length > 1) {
|
|
178
227
|
throw new Error("dataset_context_resource_is_exclusive");
|
|
179
228
|
}
|
|
180
|
-
|
|
229
|
+
const source = await readExistingContext({ runtime, context: contextRefs[0] });
|
|
230
|
+
const created = await createDatasetResourceContextStep({
|
|
231
|
+
runtime,
|
|
232
|
+
datasetId,
|
|
233
|
+
resources: source.contextResources,
|
|
234
|
+
});
|
|
235
|
+
return {
|
|
236
|
+
...source,
|
|
237
|
+
contextId: created.contextId,
|
|
238
|
+
};
|
|
181
239
|
}
|
|
182
240
|
const contextResourceRecords = resources.map((resource, index) => resourceToContextResource(index, resource));
|
|
183
241
|
const created = await createDatasetResourceContextStep({
|
|
@@ -188,5 +246,6 @@ export async function resolveDatasetResourceContext(runtime, datasetId, resource
|
|
|
188
246
|
return {
|
|
189
247
|
contextId: created.contextId,
|
|
190
248
|
resources,
|
|
249
|
+
contextResources: contextResourceRecords,
|
|
191
250
|
};
|
|
192
251
|
}
|
|
@@ -29,8 +29,9 @@ export function buildObjectOutputInstructions(userInstructions) {
|
|
|
29
29
|
const base = String(userInstructions ?? "").trim();
|
|
30
30
|
const objectContract = [
|
|
31
31
|
"Output mode is object.",
|
|
32
|
-
"Produce exactly one
|
|
33
|
-
"
|
|
32
|
+
"Produce exactly one final object.",
|
|
33
|
+
"completeObject({ data: <the final object>, summary }) is available to complete the dataset directly.",
|
|
34
|
+
"If you use output.jsonl instead, produce exactly one row: {\"type\":\"row\",\"data\":<the final object>}.",
|
|
34
35
|
"Do not emit multiple rows, headers, summaries, or metadata rows.",
|
|
35
36
|
].join("\n");
|
|
36
37
|
if (!base)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { createFileParseContext } from "../file/file-dataset.agent.js";
|
|
2
2
|
import { readInstantFileStep } from "../file/steps.js";
|
|
3
3
|
import { createTransformDatasetContext } from "../transform/transform-dataset.agent.js";
|
|
4
|
-
import { ensureTransformInputsInSandboxStep, generateTransformInputPreviewsStep, } from "../transform/transform-dataset.steps.js";
|
|
5
4
|
import { datasetGetByIdStep, datasetInferAndUpdateSchemaStep, datasetPreviewRowsStep, datasetReadOneStep, } from "../dataset/steps.js";
|
|
6
5
|
import { getDatasetOutputPath, getDatasetScriptsDir, getDatasetResourcesDir, getDatasetStandardDirs, } from "../datasetFiles.js";
|
|
7
6
|
import { registerDatasetAgentMaterializers } from "./agentMaterializers.js";
|
|
@@ -288,27 +287,14 @@ export async function prepareDatasetResourcesStep(params) {
|
|
|
288
287
|
mediaType: params.resource.kind === "file" ? params.resource.mediaType : params.resource.mimeType,
|
|
289
288
|
};
|
|
290
289
|
}
|
|
291
|
-
const initialized = await ensureTransformInputsInSandboxStep({
|
|
292
|
-
runtime: params.runtime,
|
|
293
|
-
sandboxId: params.sandboxId,
|
|
294
|
-
datasetId: params.datasetId,
|
|
295
|
-
inputDatasetIds: params.inputDatasetIds,
|
|
296
|
-
state: { initialized: false, inputPaths: [] },
|
|
297
|
-
});
|
|
298
|
-
const inputPreviews = await generateTransformInputPreviewsStep({
|
|
299
|
-
runtime: params.runtime,
|
|
300
|
-
sandboxId: params.sandboxId,
|
|
301
|
-
datasetId: params.datasetId,
|
|
302
|
-
inputPaths: initialized.inputPaths,
|
|
303
|
-
});
|
|
304
290
|
return {
|
|
305
291
|
kind: "transform",
|
|
306
292
|
datasetId: params.datasetId,
|
|
307
293
|
sandboxId: params.sandboxId,
|
|
308
294
|
inputDatasetIds: params.inputDatasetIds,
|
|
309
295
|
outputSchema: params.outputSchema,
|
|
310
|
-
sandboxState: initialized
|
|
311
|
-
inputPreviews,
|
|
296
|
+
sandboxState: { initialized: false, inputPaths: [] },
|
|
297
|
+
inputPreviews: undefined,
|
|
312
298
|
};
|
|
313
299
|
}
|
|
314
300
|
export async function initializeDatasetContextStep(params) {
|
|
@@ -492,10 +478,7 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
|
|
|
492
478
|
}
|
|
493
479
|
const sandboxId = resolveDatasetSandboxId(state, targetDatasetId);
|
|
494
480
|
const stateWithSandbox = { ...state, sandboxId };
|
|
495
|
-
const
|
|
496
|
-
for (let index = 0; index < stateWithSandbox.resources.length; index++) {
|
|
497
|
-
normalizedResources.push(await normalizeResourceToDatasetId(stateWithSandbox, stateWithSandbox.resources[index], targetDatasetId, index));
|
|
498
|
-
}
|
|
481
|
+
const inputDatasetIds = (stateWithSandbox.contextResources ?? []).map((resource, index) => String(resource.datasetId ?? resource.key ?? `resource_${index + 1}`));
|
|
499
482
|
const transformSchema = stateWithSandbox.outputSchema ??
|
|
500
483
|
{
|
|
501
484
|
title: "DatasetRow",
|
|
@@ -515,17 +498,18 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
|
|
|
515
498
|
contextId: stateWithSandbox.contextId ?? "",
|
|
516
499
|
schema: transformSchema,
|
|
517
500
|
});
|
|
518
|
-
const prepared =
|
|
501
|
+
const prepared = {
|
|
519
502
|
kind: "transform",
|
|
520
|
-
runtime: stateWithSandbox.runtime,
|
|
521
503
|
datasetId: targetDatasetId,
|
|
522
504
|
sandboxId,
|
|
523
|
-
inputDatasetIds
|
|
505
|
+
inputDatasetIds,
|
|
524
506
|
outputSchema: transformSchema,
|
|
525
|
-
|
|
507
|
+
sandboxState: { initialized: false, inputPaths: [] },
|
|
508
|
+
inputPreviews: undefined,
|
|
509
|
+
};
|
|
526
510
|
const context = await initializeDatasetContextStep({
|
|
527
511
|
prepared,
|
|
528
|
-
instructions: buildTransformInstructions(
|
|
512
|
+
instructions: buildTransformInstructions(inputDatasetIds.length, stateWithSandbox.instructions, stateWithSandbox.outputSchema),
|
|
529
513
|
outputSchema: transformSchema,
|
|
530
514
|
});
|
|
531
515
|
if (context.kind !== "transform") {
|
|
@@ -540,6 +524,7 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
|
|
|
540
524
|
sandboxId: context.sandboxId,
|
|
541
525
|
sandboxState: context.sandboxState,
|
|
542
526
|
inputPreviews: context.inputPreviews,
|
|
527
|
+
contextResources: stateWithSandbox.contextResources ?? [],
|
|
543
528
|
});
|
|
544
529
|
await transformContext.transform(stateWithSandbox.runtime, {
|
|
545
530
|
durable: await resolveDatasetAgentDurable(stateWithSandbox.durable),
|
|
@@ -552,6 +537,7 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
|
|
|
552
537
|
sandboxId: context.sandboxId,
|
|
553
538
|
sandboxState: context.sandboxState,
|
|
554
539
|
inputPreviews: context.inputPreviews,
|
|
540
|
+
contextResources: stateWithSandbox.contextResources ?? [],
|
|
555
541
|
},
|
|
556
542
|
});
|
|
557
543
|
return targetDatasetId;
|
package/dist/builder/types.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { InstaQLParams, ValidQuery } from "@instantdb/core";
|
|
2
2
|
import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
|
|
3
3
|
import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
|
|
4
|
-
import type { ContextIdentifier, ContextReactor } from "@ekairos/events";
|
|
4
|
+
import type { ContextIdentifier, ContextReactor, StoredContextResource } from "@ekairos/events";
|
|
5
5
|
import { datasetDomain } from "../schema.js";
|
|
6
6
|
export type DatasetQueryResourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
|
|
7
7
|
query: InstaQLParams<DomainInstantSchema<D>>;
|
|
@@ -92,6 +92,7 @@ export type DatasetBuilderState<Runtime extends AnyDatasetRuntime> = {
|
|
|
92
92
|
runtime: Runtime;
|
|
93
93
|
env: Runtime["env"] & DatasetRuntimeEnv;
|
|
94
94
|
resources: InternalDatasetResource[];
|
|
95
|
+
contextResources?: StoredContextResource[];
|
|
95
96
|
title?: string;
|
|
96
97
|
sandboxId?: string;
|
|
97
98
|
contextId?: string;
|
|
@@ -6,6 +6,9 @@ export interface PersistDatasetStepParams {
|
|
|
6
6
|
outputPath?: string;
|
|
7
7
|
}
|
|
8
8
|
export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summary, outputPath }: PersistDatasetStepParams): Promise<{
|
|
9
|
+
rowSource: string;
|
|
10
|
+
outputPath: string;
|
|
11
|
+
storagePath: string;
|
|
9
12
|
success: boolean;
|
|
10
13
|
validation?: RowValidationEntry[];
|
|
11
14
|
validationTruncated?: number;
|
|
@@ -16,21 +19,47 @@ export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summ
|
|
|
16
19
|
error?: string;
|
|
17
20
|
status?: string;
|
|
18
21
|
message?: string;
|
|
22
|
+
validRows?: undefined;
|
|
23
|
+
dataFileId?: undefined;
|
|
24
|
+
records?: undefined;
|
|
25
|
+
summary?: undefined;
|
|
26
|
+
} | {
|
|
27
|
+
success: boolean;
|
|
28
|
+
status: string;
|
|
29
|
+
rowSource: string;
|
|
30
|
+
validRows: number;
|
|
31
|
+
rowRecordCount: number;
|
|
32
|
+
validation: RowValidationEntry[] | undefined;
|
|
33
|
+
error: string;
|
|
34
|
+
message: string;
|
|
35
|
+
outputPath: string;
|
|
36
|
+
storagePath: string;
|
|
37
|
+
dataFileId?: undefined;
|
|
38
|
+
records?: undefined;
|
|
39
|
+
summary?: undefined;
|
|
19
40
|
} | {
|
|
20
41
|
success: boolean;
|
|
21
42
|
status: string;
|
|
43
|
+
rowSource: string;
|
|
22
44
|
validRows: number;
|
|
23
45
|
rowRecordCount: number;
|
|
24
46
|
validation: RowValidationEntry[] | undefined;
|
|
25
47
|
error: string;
|
|
26
48
|
message: string;
|
|
49
|
+
outputPath: string;
|
|
50
|
+
storagePath: string;
|
|
51
|
+
dataFileId: string;
|
|
27
52
|
records?: undefined;
|
|
28
53
|
summary?: undefined;
|
|
29
54
|
} | {
|
|
30
55
|
success: boolean;
|
|
31
56
|
status: string;
|
|
57
|
+
rowSource: string;
|
|
32
58
|
records: number;
|
|
33
59
|
summary: string;
|
|
60
|
+
outputPath: string;
|
|
61
|
+
storagePath: string;
|
|
62
|
+
dataFileId: string;
|
|
34
63
|
validRows?: undefined;
|
|
35
64
|
rowRecordCount?: undefined;
|
|
36
65
|
validation?: undefined;
|
|
@@ -29,11 +29,14 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
29
29
|
return {
|
|
30
30
|
success: false,
|
|
31
31
|
status: "missing_output",
|
|
32
|
+
rowSource: "jsonl",
|
|
32
33
|
validRows: 0,
|
|
33
34
|
rowRecordCount: 0,
|
|
34
35
|
validation: [],
|
|
35
36
|
error: message,
|
|
36
37
|
message,
|
|
38
|
+
outputPath: resolvedOutputPath,
|
|
39
|
+
storagePath,
|
|
37
40
|
};
|
|
38
41
|
}
|
|
39
42
|
console.log(`[Dataset ${datasetId}] Validating dataset rows against schema`);
|
|
@@ -45,11 +48,14 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
45
48
|
return {
|
|
46
49
|
success: false,
|
|
47
50
|
status: "dataset_not_found",
|
|
51
|
+
rowSource: "jsonl",
|
|
48
52
|
validRows: 0,
|
|
49
53
|
rowRecordCount: 0,
|
|
50
54
|
validation: [],
|
|
51
55
|
error: datasetResult.error,
|
|
52
56
|
message: datasetResult.error,
|
|
57
|
+
outputPath: resolvedOutputPath,
|
|
58
|
+
storagePath,
|
|
53
59
|
};
|
|
54
60
|
}
|
|
55
61
|
const datasetRecord = datasetResult.data;
|
|
@@ -58,11 +64,14 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
58
64
|
return {
|
|
59
65
|
success: false,
|
|
60
66
|
status: "schema_missing",
|
|
67
|
+
rowSource: "jsonl",
|
|
61
68
|
validRows: 0,
|
|
62
69
|
rowRecordCount: 0,
|
|
63
70
|
validation: [],
|
|
64
71
|
error: "Schema not found in database. Please generate schema first.",
|
|
65
72
|
message: "Schema not found in database. Please generate schema first.",
|
|
73
|
+
outputPath: resolvedOutputPath,
|
|
74
|
+
storagePath,
|
|
66
75
|
};
|
|
67
76
|
}
|
|
68
77
|
const schemaJson = datasetRecord.schema.schema;
|
|
@@ -76,11 +85,14 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
76
85
|
return {
|
|
77
86
|
success: false,
|
|
78
87
|
status: "schema_invalid",
|
|
88
|
+
rowSource: "jsonl",
|
|
79
89
|
validRows: 0,
|
|
80
90
|
rowRecordCount: 0,
|
|
81
91
|
validation: [],
|
|
82
92
|
error: `Failed to compile schema: ${message}`,
|
|
83
93
|
message: `Failed to compile schema: ${message}`,
|
|
94
|
+
outputPath: resolvedOutputPath,
|
|
95
|
+
storagePath,
|
|
84
96
|
};
|
|
85
97
|
}
|
|
86
98
|
const validationResult = await validateJsonlRows({
|
|
@@ -92,7 +104,12 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
92
104
|
datasetId,
|
|
93
105
|
});
|
|
94
106
|
if (!validationResult.success) {
|
|
95
|
-
return
|
|
107
|
+
return {
|
|
108
|
+
...validationResult,
|
|
109
|
+
rowSource: "jsonl",
|
|
110
|
+
outputPath: resolvedOutputPath,
|
|
111
|
+
storagePath,
|
|
112
|
+
};
|
|
96
113
|
}
|
|
97
114
|
const totalValidRows = validationResult.validRowCount ?? 0;
|
|
98
115
|
const rowRecordCount = validationResult.rowRecordCount ?? totalValidRows;
|
|
@@ -103,11 +120,14 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
103
120
|
return {
|
|
104
121
|
success: false,
|
|
105
122
|
status: "empty_output",
|
|
123
|
+
rowSource: "jsonl",
|
|
106
124
|
validRows: 0,
|
|
107
125
|
rowRecordCount: 0,
|
|
108
126
|
validation: [],
|
|
109
127
|
error: "Empty file content",
|
|
110
128
|
message: "Empty file content",
|
|
129
|
+
outputPath: resolvedOutputPath,
|
|
130
|
+
storagePath,
|
|
111
131
|
};
|
|
112
132
|
}
|
|
113
133
|
console.log(`[Dataset ${datasetId}] Uploading file to InstantDB storage`);
|
|
@@ -121,11 +141,14 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
121
141
|
return {
|
|
122
142
|
success: false,
|
|
123
143
|
status: "upload_failed",
|
|
144
|
+
rowSource: "jsonl",
|
|
124
145
|
validRows: totalValidRows,
|
|
125
146
|
rowRecordCount,
|
|
126
147
|
validation: validationResult.validation,
|
|
127
148
|
error: uploadResult.error,
|
|
128
149
|
message: uploadResult.error,
|
|
150
|
+
outputPath: resolvedOutputPath,
|
|
151
|
+
storagePath,
|
|
129
152
|
};
|
|
130
153
|
}
|
|
131
154
|
console.log(`[Dataset ${datasetId}] File uploaded successfully: ${uploadResult.data.fileId}`);
|
|
@@ -140,11 +163,15 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
140
163
|
return {
|
|
141
164
|
success: false,
|
|
142
165
|
status: "status_update_failed",
|
|
166
|
+
rowSource: "jsonl",
|
|
143
167
|
validRows: totalValidRows,
|
|
144
168
|
rowRecordCount,
|
|
145
169
|
validation: validationResult.validation,
|
|
146
170
|
error: statusResult.error,
|
|
147
171
|
message: statusResult.error,
|
|
172
|
+
outputPath: resolvedOutputPath,
|
|
173
|
+
storagePath,
|
|
174
|
+
dataFileId: uploadResult.data.fileId,
|
|
148
175
|
};
|
|
149
176
|
}
|
|
150
177
|
console.log(`[Dataset ${datasetId}] Dataset marked as COMPLETED (${totalValidRows} valid rows)`);
|
|
@@ -152,8 +179,12 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
152
179
|
return {
|
|
153
180
|
success: true,
|
|
154
181
|
status: "completed",
|
|
182
|
+
rowSource: "jsonl",
|
|
155
183
|
records: totalValidRows,
|
|
156
184
|
summary: summary ?? `Dataset completed with ${totalValidRows} records.`,
|
|
185
|
+
outputPath: resolvedOutputPath,
|
|
186
|
+
storagePath,
|
|
187
|
+
dataFileId: uploadResult.data.fileId,
|
|
157
188
|
};
|
|
158
189
|
}
|
|
159
190
|
function resolveExecutionStoragePath(outputPath, datasetId) {
|
|
@@ -7,6 +7,9 @@ interface CompleteDatasetToolParams {
|
|
|
7
7
|
export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }: CompleteDatasetToolParams): import("ai").Tool<{
|
|
8
8
|
summary: string;
|
|
9
9
|
}, {
|
|
10
|
+
rowSource: string;
|
|
11
|
+
outputPath: string;
|
|
12
|
+
storagePath: string;
|
|
10
13
|
success: boolean;
|
|
11
14
|
validation?: {
|
|
12
15
|
index: number;
|
|
@@ -63,9 +66,40 @@ export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtim
|
|
|
63
66
|
error?: string;
|
|
64
67
|
status?: string;
|
|
65
68
|
message?: string;
|
|
69
|
+
validRows?: undefined;
|
|
70
|
+
dataFileId?: undefined;
|
|
71
|
+
records?: undefined;
|
|
72
|
+
summary?: undefined;
|
|
73
|
+
} | {
|
|
74
|
+
success: boolean;
|
|
75
|
+
status: string;
|
|
76
|
+
rowSource: string;
|
|
77
|
+
validRows: number;
|
|
78
|
+
rowRecordCount: number;
|
|
79
|
+
validation: {
|
|
80
|
+
index: number;
|
|
81
|
+
valid: boolean;
|
|
82
|
+
errors?: string[];
|
|
83
|
+
errorDetails?: Array<{
|
|
84
|
+
path: string;
|
|
85
|
+
keyword: string;
|
|
86
|
+
message: string;
|
|
87
|
+
params?: Record<string, unknown>;
|
|
88
|
+
schemaPath?: string;
|
|
89
|
+
}>;
|
|
90
|
+
dataKeys?: string[];
|
|
91
|
+
}[] | undefined;
|
|
92
|
+
error: string;
|
|
93
|
+
message: string;
|
|
94
|
+
outputPath: string;
|
|
95
|
+
storagePath: string;
|
|
96
|
+
dataFileId?: undefined;
|
|
97
|
+
records?: undefined;
|
|
98
|
+
summary?: undefined;
|
|
66
99
|
} | {
|
|
67
100
|
success: boolean;
|
|
68
101
|
status: string;
|
|
102
|
+
rowSource: string;
|
|
69
103
|
validRows: number;
|
|
70
104
|
rowRecordCount: number;
|
|
71
105
|
validation: {
|
|
@@ -83,13 +117,20 @@ export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtim
|
|
|
83
117
|
}[] | undefined;
|
|
84
118
|
error: string;
|
|
85
119
|
message: string;
|
|
120
|
+
outputPath: string;
|
|
121
|
+
storagePath: string;
|
|
122
|
+
dataFileId: string;
|
|
86
123
|
records?: undefined;
|
|
87
124
|
summary?: undefined;
|
|
88
125
|
} | {
|
|
89
126
|
success: boolean;
|
|
90
127
|
status: string;
|
|
128
|
+
rowSource: string;
|
|
91
129
|
records: number;
|
|
92
130
|
summary: string;
|
|
131
|
+
outputPath: string;
|
|
132
|
+
storagePath: string;
|
|
133
|
+
dataFileId: string;
|
|
93
134
|
validRows?: undefined;
|
|
94
135
|
rowRecordCount?: undefined;
|
|
95
136
|
validation?: undefined;
|
|
@@ -3,7 +3,7 @@ import { z } from "zod";
|
|
|
3
3
|
import { persistDatasetStep } from "./completeDataset.steps.js";
|
|
4
4
|
export function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }) {
|
|
5
5
|
return tool({
|
|
6
|
-
description: "
|
|
6
|
+
description: "Validate and complete the dataset from output.jsonl. The result includes the JSONL outputPath and storagePath used for completion.",
|
|
7
7
|
inputSchema: z.object({
|
|
8
8
|
summary: z.string().describe("Summary of the completed dataset including record count and structure"),
|
|
9
9
|
}),
|
|
@@ -25,11 +25,14 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outpu
|
|
|
25
25
|
export function didCompleteDatasetSucceed(event) {
|
|
26
26
|
const parts = Array.isArray(event?.content?.parts) ? event.content.parts : [];
|
|
27
27
|
return parts.some((part) => {
|
|
28
|
-
if (part?.type === "action" &&
|
|
28
|
+
if (part?.type === "action" &&
|
|
29
|
+
["completeDataset", "completeObject", "replaceRows"].includes(part?.content?.actionName)) {
|
|
29
30
|
const output = part.content.output;
|
|
30
31
|
return part.content.status === "completed" && output?.success === true && output?.status === "completed";
|
|
31
32
|
}
|
|
32
|
-
if (part?.type === "tool-completeDataset"
|
|
33
|
+
if (part?.type === "tool-completeDataset" ||
|
|
34
|
+
part?.type === "tool-completeObject" ||
|
|
35
|
+
part?.type === "tool-replaceRows") {
|
|
33
36
|
const output = part.output ?? part.result;
|
|
34
37
|
return part.state === "output-available" && output?.success === true && output?.status === "completed";
|
|
35
38
|
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { StoredContextResource } from "@ekairos/events";
|
|
2
|
+
type MaterializedContextResource = {
|
|
3
|
+
key: string;
|
|
4
|
+
type: string;
|
|
5
|
+
name: string;
|
|
6
|
+
description: string;
|
|
7
|
+
dir: string;
|
|
8
|
+
metadataPath: string;
|
|
9
|
+
files: Array<{
|
|
10
|
+
path: string;
|
|
11
|
+
role: string;
|
|
12
|
+
mediaType?: string;
|
|
13
|
+
}>;
|
|
14
|
+
status: "materialized" | "metadata_only" | "skipped";
|
|
15
|
+
reason?: string;
|
|
16
|
+
};
|
|
17
|
+
export type ContextResourcesMaterialization = {
|
|
18
|
+
contextId: string;
|
|
19
|
+
sandboxId: string;
|
|
20
|
+
resourcesDir: string;
|
|
21
|
+
manifestPath: string;
|
|
22
|
+
resources: MaterializedContextResource[];
|
|
23
|
+
};
|
|
24
|
+
export declare function materializeContextResourcesStep(params: {
|
|
25
|
+
runtime: any;
|
|
26
|
+
sandboxId: string;
|
|
27
|
+
contextId: string;
|
|
28
|
+
resources: StoredContextResource[];
|
|
29
|
+
resourceKeys?: string[] | null;
|
|
30
|
+
}): Promise<ContextResourcesMaterialization>;
|
|
31
|
+
export {};
|