@ekairos/dataset 1.22.49-beta.development.0 → 1.22.51-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/materialize.d.ts +1 -11
- package/dist/builder/materialize.js +25 -77
- package/dist/builder/materializeQuery.d.ts +11 -0
- package/dist/builder/materializeQuery.js +40 -0
- package/dist/builder/persistence.js +13 -21
- package/dist/builder/types.d.ts +3 -0
- package/dist/clearDataset.tool.d.ts +2 -2
- package/dist/clearDataset.tool.js +3 -3
- package/dist/completeDataset.tool.d.ts +31 -3
- package/dist/completeDataset.tool.js +101 -13
- package/dist/dataset/steps.d.ts +32 -8
- package/dist/dataset/steps.js +69 -13
- package/dist/dataset.js +13 -7
- package/dist/executeCommand.tool.d.ts +2 -2
- package/dist/executeCommand.tool.js +3 -3
- package/dist/file/file-dataset.agent.d.ts +17 -11
- package/dist/file/file-dataset.agent.js +54 -47
- package/dist/file/filepreview.d.ts +2 -2
- package/dist/file/filepreview.js +13 -13
- package/dist/file/generateSchema.tool.d.ts +2 -2
- package/dist/file/generateSchema.tool.js +2 -2
- package/dist/file/prompts.d.ts +2 -2
- package/dist/file/prompts.js +6 -1
- package/dist/file/steps.d.ts +1 -1
- package/dist/file/steps.js +8 -2
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/query/queryDomain.d.ts +3 -3
- package/dist/query/queryDomain.js +3 -3
- package/dist/query/queryDomain.step.d.ts +1 -0
- package/dist/query/queryDomain.step.js +8 -4
- package/dist/sandbox/steps.d.ts +6 -6
- package/dist/sandbox/steps.js +16 -12
- package/dist/transform/filepreview.d.ts +1 -1
- package/dist/transform/filepreview.js +6 -6
- package/dist/transform/index.d.ts +1 -1
- package/dist/transform/index.js +1 -1
- package/dist/transform/prompts.js +4 -1
- package/dist/transform/transform-dataset.agent.d.ts +9 -3
- package/dist/transform/transform-dataset.agent.js +39 -32
- package/dist/transform/transformDataset.d.ts +3 -2
- package/dist/transform/transformDataset.js +10 -9
- package/package.json +19 -5
- package/dist/eventsReactRuntime.d.ts +0 -21
- package/dist/eventsReactRuntime.js +0 -25
|
@@ -14,7 +14,7 @@ function getAjv() {
|
|
|
14
14
|
}
|
|
15
15
|
return ajvInstance;
|
|
16
16
|
}
|
|
17
|
-
export function createCompleteDatasetTool({ datasetId, sandboxId,
|
|
17
|
+
export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
|
|
18
18
|
return tool({
|
|
19
19
|
description: "Mark the dataset as completed. Use only when output.jsonl has been successfully generated and is ready for validation.",
|
|
20
20
|
inputSchema: z.object({
|
|
@@ -27,23 +27,33 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
27
27
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
28
28
|
const outputPath = getDatasetOutputPath(datasetId);
|
|
29
29
|
try {
|
|
30
|
-
await ensureFileExists(
|
|
30
|
+
await ensureFileExists(runtime, sandboxId, outputPath);
|
|
31
31
|
}
|
|
32
32
|
catch (error) {
|
|
33
33
|
const message = error instanceof Error ? error.message : String(error);
|
|
34
34
|
console.error(`[Dataset ${datasetId}] Missing output file:`, message);
|
|
35
35
|
return {
|
|
36
36
|
success: false,
|
|
37
|
+
status: "missing_output",
|
|
38
|
+
validRows: 0,
|
|
39
|
+
rowRecordCount: 0,
|
|
40
|
+
validation: [],
|
|
37
41
|
error: message,
|
|
42
|
+
message,
|
|
38
43
|
};
|
|
39
44
|
}
|
|
40
45
|
console.log(`[Dataset ${datasetId}] Validating dataset rows against schema`);
|
|
41
|
-
const datasetResult = await datasetGetByIdStep({
|
|
46
|
+
const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
|
|
42
47
|
if (!datasetResult.ok) {
|
|
43
48
|
console.error(`[Dataset ${datasetId}] ${datasetResult.error}`);
|
|
44
49
|
return {
|
|
45
50
|
success: false,
|
|
51
|
+
status: "dataset_not_found",
|
|
52
|
+
validRows: 0,
|
|
53
|
+
rowRecordCount: 0,
|
|
54
|
+
validation: [],
|
|
46
55
|
error: datasetResult.error,
|
|
56
|
+
message: datasetResult.error,
|
|
47
57
|
};
|
|
48
58
|
}
|
|
49
59
|
const datasetRecord = datasetResult.data;
|
|
@@ -51,7 +61,12 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
51
61
|
console.error(`[Dataset ${datasetId}] Schema not found in database`);
|
|
52
62
|
return {
|
|
53
63
|
success: false,
|
|
64
|
+
status: "schema_missing",
|
|
65
|
+
validRows: 0,
|
|
66
|
+
rowRecordCount: 0,
|
|
67
|
+
validation: [],
|
|
54
68
|
error: "Schema not found in database. Please generate schema first.",
|
|
69
|
+
message: "Schema not found in database. Please generate schema first.",
|
|
55
70
|
};
|
|
56
71
|
}
|
|
57
72
|
const schemaJson = datasetRecord.schema.schema;
|
|
@@ -64,11 +79,16 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
64
79
|
console.error(`[Dataset ${datasetId}] Failed to compile schema:`, message);
|
|
65
80
|
return {
|
|
66
81
|
success: false,
|
|
82
|
+
status: "schema_invalid",
|
|
83
|
+
validRows: 0,
|
|
84
|
+
rowRecordCount: 0,
|
|
85
|
+
validation: [],
|
|
67
86
|
error: `Failed to compile schema: ${message}`,
|
|
87
|
+
message: `Failed to compile schema: ${message}`,
|
|
68
88
|
};
|
|
69
89
|
}
|
|
70
90
|
const validationResult = await validateJsonlRows({
|
|
71
|
-
|
|
91
|
+
runtime,
|
|
72
92
|
sandboxId,
|
|
73
93
|
outputPath,
|
|
74
94
|
validator,
|
|
@@ -77,29 +97,40 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
77
97
|
if (!validationResult.success) {
|
|
78
98
|
return validationResult;
|
|
79
99
|
}
|
|
80
|
-
const totalValidRows = validationResult.validRowCount;
|
|
100
|
+
const totalValidRows = validationResult.validRowCount ?? 0;
|
|
101
|
+
const rowRecordCount = validationResult.rowRecordCount ?? totalValidRows;
|
|
81
102
|
console.log(`[Dataset ${datasetId}] Reading file content for upload`);
|
|
82
|
-
const fileRead = await readDatasetSandboxFileStep({
|
|
103
|
+
const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path: outputPath });
|
|
83
104
|
if (!fileRead.contentBase64) {
|
|
84
105
|
console.error(`[Dataset ${datasetId}] Empty file content`);
|
|
85
106
|
return {
|
|
86
107
|
success: false,
|
|
108
|
+
status: "empty_output",
|
|
109
|
+
validRows: 0,
|
|
110
|
+
rowRecordCount: 0,
|
|
111
|
+
validation: [],
|
|
87
112
|
error: "Empty file content",
|
|
113
|
+
message: "Empty file content",
|
|
88
114
|
};
|
|
89
115
|
}
|
|
90
116
|
const fileBuffer = Buffer.from(fileRead.contentBase64, "base64");
|
|
91
117
|
console.log(`[Dataset ${datasetId}] Uploading file to InstantDB storage`);
|
|
92
|
-
const uploadResult = await datasetUploadOutputFileStep({
|
|
118
|
+
const uploadResult = await datasetUploadOutputFileStep({ runtime, datasetId, fileBuffer });
|
|
93
119
|
if (!uploadResult.ok) {
|
|
94
120
|
console.error(`[Dataset ${datasetId}] File upload failed: ${uploadResult.error}`);
|
|
95
121
|
return {
|
|
96
122
|
success: false,
|
|
123
|
+
status: "upload_failed",
|
|
124
|
+
validRows: totalValidRows,
|
|
125
|
+
rowRecordCount,
|
|
126
|
+
validation: validationResult.validation,
|
|
97
127
|
error: uploadResult.error,
|
|
128
|
+
message: uploadResult.error,
|
|
98
129
|
};
|
|
99
130
|
}
|
|
100
131
|
console.log(`[Dataset ${datasetId}] File uploaded successfully: ${uploadResult.data.fileId}`);
|
|
101
132
|
const statusResult = await datasetUpdateStatusStep({
|
|
102
|
-
|
|
133
|
+
runtime,
|
|
103
134
|
datasetId,
|
|
104
135
|
status: "completed",
|
|
105
136
|
calculatedTotalRows: totalValidRows,
|
|
@@ -109,14 +140,21 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
109
140
|
console.error(`[Dataset ${datasetId}] Failed to update status: ${statusResult.error}`);
|
|
110
141
|
return {
|
|
111
142
|
success: false,
|
|
143
|
+
status: "status_update_failed",
|
|
144
|
+
validRows: totalValidRows,
|
|
145
|
+
rowRecordCount,
|
|
146
|
+
validation: validationResult.validation,
|
|
112
147
|
error: statusResult.error,
|
|
148
|
+
message: statusResult.error,
|
|
113
149
|
};
|
|
114
150
|
}
|
|
115
151
|
console.log(`[Dataset ${datasetId}] Dataset marked as COMPLETED (${totalValidRows} valid rows)`);
|
|
116
152
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
117
153
|
return {
|
|
118
154
|
success: true,
|
|
155
|
+
status: "completed",
|
|
119
156
|
validRows: totalValidRows,
|
|
157
|
+
rowRecordCount,
|
|
120
158
|
fileId: uploadResult.data.fileId,
|
|
121
159
|
storagePath: uploadResult.data.storagePath,
|
|
122
160
|
message: "Dataset creation completed and uploaded to storage",
|
|
@@ -124,9 +162,23 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
124
162
|
},
|
|
125
163
|
});
|
|
126
164
|
}
|
|
127
|
-
|
|
165
|
+
export function didCompleteDatasetSucceed(event) {
|
|
166
|
+
const parts = Array.isArray(event?.content?.parts) ? event.content.parts : [];
|
|
167
|
+
return parts.some((part) => {
|
|
168
|
+
if (part?.type === "action" && part?.content?.actionName === "completeDataset") {
|
|
169
|
+
const output = part.content.output;
|
|
170
|
+
return part.content.status === "completed" && output?.success === true && output?.status === "completed";
|
|
171
|
+
}
|
|
172
|
+
if (part?.type === "tool-completeDataset") {
|
|
173
|
+
const output = part.output ?? part.result;
|
|
174
|
+
return part.state === "output-available" && output?.success === true && output?.status === "completed";
|
|
175
|
+
}
|
|
176
|
+
return false;
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
async function ensureFileExists(runtime, sandboxId, path) {
|
|
128
180
|
const result = await runDatasetSandboxCommandStep({
|
|
129
|
-
|
|
181
|
+
runtime,
|
|
130
182
|
sandboxId,
|
|
131
183
|
cmd: "test",
|
|
132
184
|
args: ["-f", path],
|
|
@@ -135,14 +187,23 @@ async function ensureFileExists(env, sandboxId, path) {
|
|
|
135
187
|
throw new Error(`Required file not found: ${path}`);
|
|
136
188
|
}
|
|
137
189
|
}
|
|
138
|
-
async function validateJsonlRows({
|
|
190
|
+
async function validateJsonlRows({ runtime, sandboxId, outputPath, validator, datasetId }) {
|
|
139
191
|
const validation = [];
|
|
140
192
|
let validRowCount = 0;
|
|
193
|
+
let rowRecordCount = 0;
|
|
141
194
|
console.log(`[Dataset ${datasetId}] Reading and validating JSONL file from sandbox`);
|
|
142
|
-
const fileRead = await readDatasetSandboxFileStep({
|
|
195
|
+
const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path: outputPath });
|
|
143
196
|
if (!fileRead.contentBase64) {
|
|
144
197
|
console.log(`[Dataset ${datasetId}] Empty output file`);
|
|
145
|
-
return {
|
|
198
|
+
return {
|
|
199
|
+
success: false,
|
|
200
|
+
status: "empty_output",
|
|
201
|
+
validation,
|
|
202
|
+
validRowCount: 0,
|
|
203
|
+
rowRecordCount: 0,
|
|
204
|
+
error: "output.jsonl is empty",
|
|
205
|
+
message: "output.jsonl is empty",
|
|
206
|
+
};
|
|
146
207
|
}
|
|
147
208
|
const fileContent = Buffer.from(fileRead.contentBase64, "base64").toString();
|
|
148
209
|
const lines = fileContent.split("\n");
|
|
@@ -167,8 +228,14 @@ async function validateJsonlRows({ env, sandboxId, outputPath, validator, datase
|
|
|
167
228
|
continue;
|
|
168
229
|
}
|
|
169
230
|
if (record.type !== "row") {
|
|
231
|
+
validation.push({
|
|
232
|
+
index,
|
|
233
|
+
valid: false,
|
|
234
|
+
errors: ["Every non-empty output line must be a JSON object with type 'row'"],
|
|
235
|
+
});
|
|
170
236
|
continue;
|
|
171
237
|
}
|
|
238
|
+
rowRecordCount++;
|
|
172
239
|
const data = record.data;
|
|
173
240
|
if (data === undefined || data === null) {
|
|
174
241
|
validation.push({
|
|
@@ -187,6 +254,7 @@ async function validateJsonlRows({ env, sandboxId, outputPath, validator, datase
|
|
|
187
254
|
index,
|
|
188
255
|
valid: false,
|
|
189
256
|
errors,
|
|
257
|
+
dataKeys: data && typeof data === "object" && !Array.isArray(data) ? Object.keys(data) : [],
|
|
190
258
|
});
|
|
191
259
|
continue;
|
|
192
260
|
}
|
|
@@ -197,9 +265,29 @@ async function validateJsonlRows({ env, sandboxId, outputPath, validator, datase
|
|
|
197
265
|
validRowCount++;
|
|
198
266
|
}
|
|
199
267
|
console.log(`[Dataset ${datasetId}] Validation completed: ${validRowCount} valid rows`);
|
|
268
|
+
const invalidRows = validation.filter((entry) => !entry.valid);
|
|
269
|
+
if (rowRecordCount === 0 || validRowCount === 0 || invalidRows.length > 0) {
|
|
270
|
+
const message = rowRecordCount === 0
|
|
271
|
+
? "output.jsonl does not contain any type='row' records"
|
|
272
|
+
: validRowCount === 0
|
|
273
|
+
? "No dataset rows matched the stored schema"
|
|
274
|
+
: `${invalidRows.length} dataset row(s) failed schema validation`;
|
|
275
|
+
console.error(`[Dataset ${datasetId}] Validation failed: ${message}`);
|
|
276
|
+
return {
|
|
277
|
+
success: false,
|
|
278
|
+
status: "validation_failed",
|
|
279
|
+
validation,
|
|
280
|
+
validRowCount,
|
|
281
|
+
rowRecordCount,
|
|
282
|
+
error: message,
|
|
283
|
+
message,
|
|
284
|
+
};
|
|
285
|
+
}
|
|
200
286
|
return {
|
|
201
287
|
success: true,
|
|
288
|
+
status: "completed",
|
|
202
289
|
validation,
|
|
203
290
|
validRowCount,
|
|
291
|
+
rowRecordCount,
|
|
204
292
|
};
|
|
205
293
|
}
|
package/dist/dataset/steps.d.ts
CHANGED
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
export declare function getDatasetServiceDb(
|
|
1
|
+
export declare function getDatasetServiceDb(runtime: any): Promise<any>;
|
|
2
2
|
export declare function datasetGetByIdStep(params: {
|
|
3
|
-
|
|
3
|
+
runtime: any;
|
|
4
4
|
datasetId: string;
|
|
5
5
|
}): Promise<import("../service.js").ServiceResult<any>>;
|
|
6
6
|
export declare function datasetReadOutputJsonlStep(params: {
|
|
7
|
-
|
|
7
|
+
runtime: any;
|
|
8
8
|
datasetId: string;
|
|
9
9
|
}): Promise<{
|
|
10
10
|
contentBase64: string;
|
|
11
11
|
}>;
|
|
12
12
|
export declare function datasetUpdateSchemaStep(params: {
|
|
13
|
-
|
|
13
|
+
runtime: any;
|
|
14
14
|
datasetId: string;
|
|
15
15
|
schema: any;
|
|
16
16
|
status?: string;
|
|
17
17
|
}): Promise<import("../service.js").ServiceResult<void>>;
|
|
18
18
|
export declare function datasetUploadOutputFileStep(params: {
|
|
19
|
-
|
|
19
|
+
runtime: any;
|
|
20
20
|
datasetId: string;
|
|
21
21
|
fileBuffer: Buffer;
|
|
22
22
|
}): Promise<import("../service.js").ServiceResult<{
|
|
@@ -24,22 +24,46 @@ export declare function datasetUploadOutputFileStep(params: {
|
|
|
24
24
|
storagePath: string;
|
|
25
25
|
}>>;
|
|
26
26
|
export declare function datasetUpdateStatusStep(params: {
|
|
27
|
-
|
|
27
|
+
runtime: any;
|
|
28
28
|
datasetId: string;
|
|
29
29
|
status: string;
|
|
30
30
|
calculatedTotalRows?: number;
|
|
31
31
|
actualGeneratedRowCount?: number;
|
|
32
32
|
}): Promise<import("../service.js").ServiceResult<void>>;
|
|
33
33
|
export declare function datasetClearStep(params: {
|
|
34
|
-
|
|
34
|
+
runtime: any;
|
|
35
35
|
datasetId: string;
|
|
36
36
|
}): Promise<import("../service.js").ServiceResult<{
|
|
37
37
|
deletedCount: number;
|
|
38
38
|
}>>;
|
|
39
39
|
export declare function datasetPreviewRowsStep(params: {
|
|
40
|
-
|
|
40
|
+
runtime: any;
|
|
41
41
|
datasetId: string;
|
|
42
42
|
limit?: number;
|
|
43
43
|
}): Promise<{
|
|
44
44
|
rows: any[];
|
|
45
45
|
}>;
|
|
46
|
+
export declare function datasetReadRowsStep(params: {
|
|
47
|
+
runtime: any;
|
|
48
|
+
datasetId: string;
|
|
49
|
+
cursor?: number;
|
|
50
|
+
limit?: number;
|
|
51
|
+
}): Promise<{
|
|
52
|
+
rows: any[];
|
|
53
|
+
cursor: number;
|
|
54
|
+
done: boolean;
|
|
55
|
+
}>;
|
|
56
|
+
export declare function datasetReadOneStep(params: {
|
|
57
|
+
runtime: any;
|
|
58
|
+
datasetId: string;
|
|
59
|
+
}): Promise<{
|
|
60
|
+
row: any | null;
|
|
61
|
+
}>;
|
|
62
|
+
export declare function datasetInferAndUpdateSchemaStep(params: {
|
|
63
|
+
runtime: any;
|
|
64
|
+
datasetId: string;
|
|
65
|
+
title?: string;
|
|
66
|
+
description?: string;
|
|
67
|
+
}): Promise<{
|
|
68
|
+
schema: import("../dataset.js").DatasetSchemaInput;
|
|
69
|
+
}>;
|
package/dist/dataset/steps.js
CHANGED
|
@@ -1,22 +1,31 @@
|
|
|
1
|
-
import { getContextRuntime, getContextEnv } from "@ekairos/events/runtime";
|
|
2
1
|
import { DatasetService } from "../service.js";
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
import { datasetDomain } from "../schema.js";
|
|
3
|
+
import { inferDatasetSchema } from "../builder/schemaInference.js";
|
|
4
|
+
async function getRuntimeDb(runtime) {
|
|
5
|
+
if (!runtime) {
|
|
6
|
+
throw new Error("Dataset step requires runtime.");
|
|
7
|
+
}
|
|
8
|
+
if (typeof runtime.use === "function") {
|
|
9
|
+
const scoped = await runtime.use(datasetDomain);
|
|
10
|
+
const scopedDb = scoped.db;
|
|
11
|
+
return typeof scopedDb === "function" ? await scopedDb.call(scoped) : scopedDb;
|
|
12
|
+
}
|
|
13
|
+
const db = runtime.db;
|
|
14
|
+
return typeof db === "function" ? await db.call(runtime) : db;
|
|
5
15
|
}
|
|
6
|
-
export async function getDatasetServiceDb(
|
|
16
|
+
export async function getDatasetServiceDb(runtime) {
|
|
7
17
|
"use step";
|
|
8
|
-
|
|
9
|
-
return runtime.db;
|
|
18
|
+
return await getRuntimeDb(runtime);
|
|
10
19
|
}
|
|
11
20
|
export async function datasetGetByIdStep(params) {
|
|
12
21
|
"use step";
|
|
13
|
-
const db =
|
|
22
|
+
const db = await getRuntimeDb(params.runtime);
|
|
14
23
|
const service = new DatasetService(db);
|
|
15
24
|
return await service.getDatasetById(params.datasetId);
|
|
16
25
|
}
|
|
17
26
|
export async function datasetReadOutputJsonlStep(params) {
|
|
18
27
|
"use step";
|
|
19
|
-
const db =
|
|
28
|
+
const db = await getRuntimeDb(params.runtime);
|
|
20
29
|
for (let attempt = 1; attempt <= 20; attempt++) {
|
|
21
30
|
const query = await db.query({
|
|
22
31
|
dataset_datasets: {
|
|
@@ -37,7 +46,7 @@ export async function datasetReadOutputJsonlStep(params) {
|
|
|
37
46
|
}
|
|
38
47
|
export async function datasetUpdateSchemaStep(params) {
|
|
39
48
|
"use step";
|
|
40
|
-
const db =
|
|
49
|
+
const db = await getRuntimeDb(params.runtime);
|
|
41
50
|
const service = new DatasetService(db);
|
|
42
51
|
return await service.updateDatasetSchema({
|
|
43
52
|
datasetId: params.datasetId,
|
|
@@ -47,7 +56,7 @@ export async function datasetUpdateSchemaStep(params) {
|
|
|
47
56
|
}
|
|
48
57
|
export async function datasetUploadOutputFileStep(params) {
|
|
49
58
|
"use step";
|
|
50
|
-
const db =
|
|
59
|
+
const db = await getRuntimeDb(params.runtime);
|
|
51
60
|
const service = new DatasetService(db);
|
|
52
61
|
return await service.uploadDatasetOutputFile({
|
|
53
62
|
datasetId: params.datasetId,
|
|
@@ -56,7 +65,7 @@ export async function datasetUploadOutputFileStep(params) {
|
|
|
56
65
|
}
|
|
57
66
|
export async function datasetUpdateStatusStep(params) {
|
|
58
67
|
"use step";
|
|
59
|
-
const db =
|
|
68
|
+
const db = await getRuntimeDb(params.runtime);
|
|
60
69
|
const service = new DatasetService(db);
|
|
61
70
|
return await service.updateDatasetStatus({
|
|
62
71
|
datasetId: params.datasetId,
|
|
@@ -67,13 +76,13 @@ export async function datasetUpdateStatusStep(params) {
|
|
|
67
76
|
}
|
|
68
77
|
export async function datasetClearStep(params) {
|
|
69
78
|
"use step";
|
|
70
|
-
const db =
|
|
79
|
+
const db = await getRuntimeDb(params.runtime);
|
|
71
80
|
const service = new DatasetService(db);
|
|
72
81
|
return await service.clearDataset(params.datasetId);
|
|
73
82
|
}
|
|
74
83
|
export async function datasetPreviewRowsStep(params) {
|
|
75
84
|
"use step";
|
|
76
|
-
const db =
|
|
85
|
+
const db = await getRuntimeDb(params.runtime);
|
|
77
86
|
const service = new DatasetService(db);
|
|
78
87
|
const rowsResult = await service.previewRows(params.datasetId, params.limit ?? 20);
|
|
79
88
|
if (!rowsResult.ok) {
|
|
@@ -81,3 +90,50 @@ export async function datasetPreviewRowsStep(params) {
|
|
|
81
90
|
}
|
|
82
91
|
return { rows: rowsResult.data };
|
|
83
92
|
}
|
|
93
|
+
export async function datasetReadRowsStep(params) {
|
|
94
|
+
"use step";
|
|
95
|
+
const db = await getRuntimeDb(params.runtime);
|
|
96
|
+
const service = new DatasetService(db);
|
|
97
|
+
const rowsResult = await service.readRows({
|
|
98
|
+
datasetId: params.datasetId,
|
|
99
|
+
cursor: params.cursor,
|
|
100
|
+
limit: params.limit,
|
|
101
|
+
});
|
|
102
|
+
if (!rowsResult.ok) {
|
|
103
|
+
throw new Error(rowsResult.error);
|
|
104
|
+
}
|
|
105
|
+
return rowsResult.data;
|
|
106
|
+
}
|
|
107
|
+
export async function datasetReadOneStep(params) {
|
|
108
|
+
"use step";
|
|
109
|
+
const db = await getRuntimeDb(params.runtime);
|
|
110
|
+
const service = new DatasetService(db);
|
|
111
|
+
const firstResult = await service.readOne(params.datasetId);
|
|
112
|
+
if (!firstResult.ok) {
|
|
113
|
+
throw new Error(firstResult.error);
|
|
114
|
+
}
|
|
115
|
+
return { row: firstResult.data };
|
|
116
|
+
}
|
|
117
|
+
export async function datasetInferAndUpdateSchemaStep(params) {
|
|
118
|
+
"use step";
|
|
119
|
+
const db = await getRuntimeDb(params.runtime);
|
|
120
|
+
const service = new DatasetService(db);
|
|
121
|
+
const readResult = await service.readRows({
|
|
122
|
+
datasetId: params.datasetId,
|
|
123
|
+
cursor: 0,
|
|
124
|
+
limit: 1000,
|
|
125
|
+
});
|
|
126
|
+
if (!readResult.ok) {
|
|
127
|
+
throw new Error(readResult.error);
|
|
128
|
+
}
|
|
129
|
+
const inferred = inferDatasetSchema(readResult.data.rows, params.title ?? `${params.datasetId}Row`, params.description ?? "One dataset row");
|
|
130
|
+
const updateResult = await service.updateDatasetSchema({
|
|
131
|
+
datasetId: params.datasetId,
|
|
132
|
+
schema: inferred,
|
|
133
|
+
status: "completed",
|
|
134
|
+
});
|
|
135
|
+
if (!updateResult.ok) {
|
|
136
|
+
throw new Error(updateResult.error);
|
|
137
|
+
}
|
|
138
|
+
return { schema: inferred };
|
|
139
|
+
}
|
package/dist/dataset.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { id as newId } from "@instantdb/admin";
|
|
2
2
|
import { buildObjectOutputInstructions } from "./builder/instructions.js";
|
|
3
|
-
import {
|
|
3
|
+
import { getDatasetAgentMaterializers } from "./builder/agentMaterializers.js";
|
|
4
|
+
import { materializeQuerySource } from "./builder/materializeQuery.js";
|
|
4
5
|
import { finalizeBuildResult } from "./builder/persistence.js";
|
|
5
6
|
export function dataset(runtime, options = {}) {
|
|
6
7
|
const datasetId = normalizeDatasetId(options.datasetId);
|
|
@@ -11,6 +12,7 @@ export function dataset(runtime, options = {}) {
|
|
|
11
12
|
sources: [],
|
|
12
13
|
output: "rows",
|
|
13
14
|
inferSchema: false,
|
|
15
|
+
durable: options.durable,
|
|
14
16
|
first: false,
|
|
15
17
|
};
|
|
16
18
|
const api = {
|
|
@@ -100,13 +102,17 @@ export function dataset(runtime, options = {}) {
|
|
|
100
102
|
const targetDatasetId = options?.datasetId
|
|
101
103
|
? normalizeDatasetId(options.datasetId)
|
|
102
104
|
: datasetId;
|
|
103
|
-
const
|
|
105
|
+
const stateWithBuildOptions = {
|
|
106
|
+
...state,
|
|
107
|
+
durable: options?.durable ?? state.durable,
|
|
108
|
+
};
|
|
109
|
+
const effectiveState = stateWithBuildOptions.output === "object"
|
|
104
110
|
? {
|
|
105
|
-
...
|
|
111
|
+
...stateWithBuildOptions,
|
|
106
112
|
first: true,
|
|
107
|
-
instructions: buildObjectOutputInstructions(
|
|
113
|
+
instructions: buildObjectOutputInstructions(stateWithBuildOptions.instructions),
|
|
108
114
|
}
|
|
109
|
-
:
|
|
115
|
+
: stateWithBuildOptions;
|
|
110
116
|
const onlySource = effectiveState.sources[0];
|
|
111
117
|
const isSingleSource = effectiveState.sources.length === 1;
|
|
112
118
|
const hasInstructions = Boolean(String(effectiveState.instructions ?? "").trim());
|
|
@@ -128,7 +134,7 @@ export function dataset(runtime, options = {}) {
|
|
|
128
134
|
if (!effectiveState.reactor) {
|
|
129
135
|
throw new Error("dataset_reactor_required");
|
|
130
136
|
}
|
|
131
|
-
await materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
|
|
137
|
+
await getDatasetAgentMaterializers().materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
|
|
132
138
|
return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
|
|
133
139
|
}
|
|
134
140
|
if (!effectiveState.sandboxId) {
|
|
@@ -137,7 +143,7 @@ export function dataset(runtime, options = {}) {
|
|
|
137
143
|
if (!effectiveState.reactor) {
|
|
138
144
|
throw new Error("dataset_reactor_required");
|
|
139
145
|
}
|
|
140
|
-
await materializeDerivedDataset(effectiveState, targetDatasetId);
|
|
146
|
+
await getDatasetAgentMaterializers().materializeDerivedDataset(effectiveState, targetDatasetId);
|
|
141
147
|
return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
|
|
142
148
|
},
|
|
143
149
|
};
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
interface ExecuteCommandToolParams {
|
|
2
2
|
datasetId: string;
|
|
3
3
|
sandboxId: string;
|
|
4
|
-
|
|
4
|
+
runtime: any;
|
|
5
5
|
}
|
|
6
|
-
export declare function createExecuteCommandTool({ datasetId, sandboxId,
|
|
6
|
+
export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): import("ai").Tool<{
|
|
7
7
|
pythonCode: string;
|
|
8
8
|
scriptName: string;
|
|
9
9
|
}, {
|
|
@@ -6,7 +6,7 @@ import { getDatasetWorkstation } from "./datasetFiles.js";
|
|
|
6
6
|
// The tool's return payload exposes stdout (capped) plus the on-disk script path.
|
|
7
7
|
const MAX_STDOUT_CHARS = 20000;
|
|
8
8
|
const MAX_STDERR_CHARS = 5000;
|
|
9
|
-
export function createExecuteCommandTool({ datasetId, sandboxId,
|
|
9
|
+
export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
10
10
|
return tool({
|
|
11
11
|
description: "Execute Python scripts in the sandbox. Always saves script to a file before executing. The tool's output is EXACTLY the script's stdout and includes the script file path for traceability. CRITICAL: Print concise, human-readable summaries only; do NOT print raw large data. For big results, write artifacts to files in the workstation and print their file paths. Always include progress/result prints (e.g., 'Processing file X...', 'Found Y records', 'Generated output.csv').",
|
|
12
12
|
inputSchema: z.object({
|
|
@@ -25,7 +25,7 @@ export function createExecuteCommandTool({ datasetId, sandboxId, env }) {
|
|
|
25
25
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
26
26
|
try {
|
|
27
27
|
await writeDatasetSandboxFilesStep({
|
|
28
|
-
|
|
28
|
+
runtime,
|
|
29
29
|
sandboxId,
|
|
30
30
|
files: [
|
|
31
31
|
{
|
|
@@ -37,7 +37,7 @@ export function createExecuteCommandTool({ datasetId, sandboxId, env }) {
|
|
|
37
37
|
console.log(`[Dataset ${datasetId}] Script written to: ${scriptFile}`);
|
|
38
38
|
console.log(`[Dataset ${datasetId}] Executing: python ${scriptFile}`);
|
|
39
39
|
const result = await runDatasetSandboxCommandStep({
|
|
40
|
-
|
|
40
|
+
runtime,
|
|
41
41
|
sandboxId,
|
|
42
42
|
cmd: "python",
|
|
43
43
|
args: [scriptFile],
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { createContext, type ContextReactor } from "@ekairos/events";
|
|
2
2
|
import { FilePreviewContext } from "./filepreview.js";
|
|
3
|
-
export type
|
|
3
|
+
export type FileParseContext = {
|
|
4
4
|
datasetId: string;
|
|
5
5
|
fileId: string;
|
|
6
6
|
instructions: string;
|
|
@@ -15,7 +15,7 @@ export type FileParseStoryContext = {
|
|
|
15
15
|
iterationCount: number;
|
|
16
16
|
filePreview?: FilePreviewContext;
|
|
17
17
|
};
|
|
18
|
-
export type
|
|
18
|
+
export type FileParseContextParams = {
|
|
19
19
|
fileId: string;
|
|
20
20
|
instructions?: string;
|
|
21
21
|
sandboxId?: string;
|
|
@@ -23,11 +23,15 @@ export type FileParseStoryParams = {
|
|
|
23
23
|
model?: string;
|
|
24
24
|
reactor?: ContextReactor<any, any>;
|
|
25
25
|
};
|
|
26
|
-
export type
|
|
26
|
+
export type FileParseRunOptions = {
|
|
27
|
+
prompt?: string;
|
|
28
|
+
durable?: boolean;
|
|
29
|
+
};
|
|
30
|
+
export type FileParseContextBuilder<Env extends {
|
|
27
31
|
orgId: string;
|
|
28
32
|
}> = {
|
|
29
33
|
datasetId: string;
|
|
30
|
-
|
|
34
|
+
context: ReturnType<ReturnType<typeof createContext<Env>>["context"]> extends any ? any : any;
|
|
31
35
|
};
|
|
32
36
|
export type DatasetResult = {
|
|
33
37
|
id: string;
|
|
@@ -44,13 +48,13 @@ export type DatasetResult = {
|
|
|
44
48
|
* Factory (DX-first):
|
|
45
49
|
*
|
|
46
50
|
* Usage:
|
|
47
|
-
* const { datasetId } = await
|
|
51
|
+
* const { datasetId } = await createFileParseContext(fileId, { instructions }).parse(runtime)
|
|
48
52
|
*
|
|
49
|
-
* -
|
|
50
|
-
* - All I/O happens in `"use step"` functions via Ekairos runtime
|
|
51
|
-
* - `parse()` is the entrypoint; it calls `
|
|
53
|
+
* - Uses the caller runtime; no secondary runtime is created.
|
|
54
|
+
* - All I/O happens in `"use step"` functions via the provided Ekairos runtime.
|
|
55
|
+
* - `parse()` is the entrypoint; it calls `context.react(...)` internally.
|
|
52
56
|
*/
|
|
53
|
-
export declare function
|
|
57
|
+
export declare function createFileParseContext<Env extends {
|
|
54
58
|
orgId: string;
|
|
55
59
|
}>(fileId: string, opts?: {
|
|
56
60
|
instructions?: string;
|
|
@@ -60,8 +64,10 @@ export declare function createFileParseStory<Env extends {
|
|
|
60
64
|
reactor?: ContextReactor<any, any>;
|
|
61
65
|
}): {
|
|
62
66
|
datasetId: string;
|
|
63
|
-
parse(
|
|
67
|
+
parse(runtime: {
|
|
68
|
+
env: Env;
|
|
69
|
+
}, options?: FileParseRunOptions): Promise<{
|
|
64
70
|
datasetId: string;
|
|
65
71
|
}>;
|
|
66
|
-
|
|
72
|
+
context: any;
|
|
67
73
|
};
|