@ekairos/dataset 1.22.56-beta.development.0 → 1.22.58-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/completeDataset.tool.d.ts +5 -0
- package/dist/completeDataset.tool.js +34 -7
- package/dist/dataset/steps.d.ts +1 -1
- package/dist/dataset/steps.js +1 -1
- package/dist/dataset.js +5 -5
- package/dist/executeCommand.tool.d.ts +18 -0
- package/dist/executeCommand.tool.js +49 -7
- package/dist/file/file-dataset.agent.js +8 -4
- package/dist/id.d.ts +1 -0
- package/dist/id.js +10 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +2 -1
- package/dist/query/queryDomain.step.js +2 -2
- package/dist/sandbox/steps.d.ts +15 -0
- package/dist/sandbox/steps.js +32 -0
- package/dist/service.d.ts +1 -1
- package/dist/service.js +4 -4
- package/dist/transform/transform-dataset.agent.js +8 -4
- package/package.json +4 -4
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { tool } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
-
import { readDatasetSandboxFileStep, runDatasetSandboxCommandStep } from "./sandbox/steps.js";
|
|
3
|
+
import { readDatasetSandboxFileStep, readDatasetSandboxTextFileStep, runDatasetSandboxCommandStep } from "./sandbox/steps.js";
|
|
4
4
|
import Ajv from "ajv";
|
|
5
5
|
import { getDatasetOutputPath, } from "./datasetFiles.js";
|
|
6
6
|
import { datasetGetByIdStep, datasetUpdateStatusStep, datasetUploadOutputFileStep } from "./dataset/steps.js";
|
|
@@ -113,9 +113,12 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
|
|
|
113
113
|
message: "Empty file content",
|
|
114
114
|
};
|
|
115
115
|
}
|
|
116
|
-
const fileBuffer = Buffer.from(fileRead.contentBase64, "base64");
|
|
117
116
|
console.log(`[Dataset ${datasetId}] Uploading file to InstantDB storage`);
|
|
118
|
-
const uploadResult = await datasetUploadOutputFileStep({
|
|
117
|
+
const uploadResult = await datasetUploadOutputFileStep({
|
|
118
|
+
runtime,
|
|
119
|
+
datasetId,
|
|
120
|
+
contentBase64: fileRead.contentBase64,
|
|
121
|
+
});
|
|
119
122
|
if (!uploadResult.ok) {
|
|
120
123
|
console.error(`[Dataset ${datasetId}] File upload failed: ${uploadResult.error}`);
|
|
121
124
|
return {
|
|
@@ -176,6 +179,31 @@ export function didCompleteDatasetSucceed(event) {
|
|
|
176
179
|
return false;
|
|
177
180
|
});
|
|
178
181
|
}
|
|
182
|
+
export function getDatasetFatalFailure(event) {
|
|
183
|
+
const parts = Array.isArray(event?.content?.parts) ? event.content.parts : [];
|
|
184
|
+
for (const part of parts) {
|
|
185
|
+
let actionName;
|
|
186
|
+
let output;
|
|
187
|
+
if (part?.type === "action") {
|
|
188
|
+
actionName = part.content?.actionName;
|
|
189
|
+
output = part.content?.output;
|
|
190
|
+
}
|
|
191
|
+
else if (typeof part?.type === "string" && part.type.startsWith("tool-")) {
|
|
192
|
+
actionName = part.type.slice("tool-".length);
|
|
193
|
+
output = part.output ?? part.result;
|
|
194
|
+
}
|
|
195
|
+
if (!output || output.fatal !== true) {
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
const message = typeof output.error === "string" && output.error.trim()
|
|
199
|
+
? output.error.trim()
|
|
200
|
+
: typeof output.message === "string" && output.message.trim()
|
|
201
|
+
? output.message.trim()
|
|
202
|
+
: "Dataset action failed fatally";
|
|
203
|
+
return actionName ? `${actionName}: ${message}` : message;
|
|
204
|
+
}
|
|
205
|
+
return null;
|
|
206
|
+
}
|
|
179
207
|
async function ensureFileExists(runtime, sandboxId, path) {
|
|
180
208
|
const result = await runDatasetSandboxCommandStep({
|
|
181
209
|
runtime,
|
|
@@ -192,8 +220,8 @@ async function validateJsonlRows({ runtime, sandboxId, outputPath, validator, da
|
|
|
192
220
|
let validRowCount = 0;
|
|
193
221
|
let rowRecordCount = 0;
|
|
194
222
|
console.log(`[Dataset ${datasetId}] Reading and validating JSONL file from sandbox`);
|
|
195
|
-
const fileRead = await
|
|
196
|
-
if (!fileRead.
|
|
223
|
+
const fileRead = await readDatasetSandboxTextFileStep({ runtime, sandboxId, path: outputPath });
|
|
224
|
+
if (!fileRead.content) {
|
|
197
225
|
console.log(`[Dataset ${datasetId}] Empty output file`);
|
|
198
226
|
return {
|
|
199
227
|
success: false,
|
|
@@ -205,8 +233,7 @@ async function validateJsonlRows({ runtime, sandboxId, outputPath, validator, da
|
|
|
205
233
|
message: "output.jsonl is empty",
|
|
206
234
|
};
|
|
207
235
|
}
|
|
208
|
-
const
|
|
209
|
-
const lines = fileContent.split("\n");
|
|
236
|
+
const lines = fileRead.content.split("\n");
|
|
210
237
|
console.log(`[Dataset ${datasetId}] Validating ${lines.length} lines`);
|
|
211
238
|
for (let index = 0; index < lines.length; index++) {
|
|
212
239
|
const line = lines[index];
|
package/dist/dataset/steps.d.ts
CHANGED
|
@@ -18,7 +18,7 @@ export declare function datasetUpdateSchemaStep(params: {
|
|
|
18
18
|
export declare function datasetUploadOutputFileStep(params: {
|
|
19
19
|
runtime: any;
|
|
20
20
|
datasetId: string;
|
|
21
|
-
|
|
21
|
+
contentBase64: string;
|
|
22
22
|
}): Promise<import("../service.js").ServiceResult<{
|
|
23
23
|
fileId: string;
|
|
24
24
|
storagePath: string;
|
package/dist/dataset/steps.js
CHANGED
|
@@ -60,7 +60,7 @@ export async function datasetUploadOutputFileStep(params) {
|
|
|
60
60
|
const service = new DatasetService(db);
|
|
61
61
|
return await service.uploadDatasetOutputFile({
|
|
62
62
|
datasetId: params.datasetId,
|
|
63
|
-
fileBuffer: params.
|
|
63
|
+
fileBuffer: Buffer.from(params.contentBase64, "base64"),
|
|
64
64
|
});
|
|
65
65
|
}
|
|
66
66
|
export async function datasetUpdateStatusStep(params) {
|
package/dist/dataset.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { id as newId } from "@instantdb/admin";
|
|
2
1
|
import { buildObjectOutputInstructions } from "./builder/instructions.js";
|
|
3
|
-
import {
|
|
2
|
+
import { createDatasetId } from "./id.js";
|
|
3
|
+
import { materializeDerivedDataset, materializeSingleFileLikeSource, } from "./builder/materialize.js";
|
|
4
4
|
import { materializeQuerySource } from "./builder/materializeQuery.js";
|
|
5
5
|
import { finalizeBuildResult } from "./builder/persistence.js";
|
|
6
6
|
export function dataset(runtime, options = {}) {
|
|
@@ -131,20 +131,20 @@ export function dataset(runtime, options = {}) {
|
|
|
131
131
|
if (!effectiveState.reactor) {
|
|
132
132
|
throw new Error("dataset_reactor_required");
|
|
133
133
|
}
|
|
134
|
-
await
|
|
134
|
+
await materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
|
|
135
135
|
return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
|
|
136
136
|
}
|
|
137
137
|
if (!effectiveState.reactor) {
|
|
138
138
|
throw new Error("dataset_reactor_required");
|
|
139
139
|
}
|
|
140
|
-
await
|
|
140
|
+
await materializeDerivedDataset(effectiveState, targetDatasetId);
|
|
141
141
|
return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
|
|
142
142
|
},
|
|
143
143
|
};
|
|
144
144
|
return api;
|
|
145
145
|
}
|
|
146
146
|
function normalizeDatasetId(datasetId) {
|
|
147
|
-
const normalized = String(datasetId ??
|
|
147
|
+
const normalized = String(datasetId ?? createDatasetId()).trim();
|
|
148
148
|
if (!normalized) {
|
|
149
149
|
throw new Error("dataset_id_required");
|
|
150
150
|
}
|
|
@@ -7,6 +7,20 @@ export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime
|
|
|
7
7
|
pythonCode: string;
|
|
8
8
|
scriptName: string;
|
|
9
9
|
}, {
|
|
10
|
+
success: boolean;
|
|
11
|
+
fatal: boolean;
|
|
12
|
+
status: string;
|
|
13
|
+
error: string;
|
|
14
|
+
stdout: string;
|
|
15
|
+
stderr: string;
|
|
16
|
+
exitCode: number;
|
|
17
|
+
scriptPath: string;
|
|
18
|
+
stdoutTruncated: boolean;
|
|
19
|
+
stderrTruncated: boolean;
|
|
20
|
+
stdoutOriginalLength: number;
|
|
21
|
+
stderrOriginalLength: number;
|
|
22
|
+
message?: undefined;
|
|
23
|
+
} | {
|
|
10
24
|
success: boolean;
|
|
11
25
|
exitCode: number;
|
|
12
26
|
stdout: string;
|
|
@@ -17,6 +31,8 @@ export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime
|
|
|
17
31
|
stderrTruncated: boolean;
|
|
18
32
|
stdoutOriginalLength: number;
|
|
19
33
|
stderrOriginalLength: number;
|
|
34
|
+
fatal?: undefined;
|
|
35
|
+
status?: undefined;
|
|
20
36
|
message?: undefined;
|
|
21
37
|
} | {
|
|
22
38
|
success: boolean;
|
|
@@ -29,6 +45,8 @@ export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime
|
|
|
29
45
|
stderrTruncated: boolean;
|
|
30
46
|
stdoutOriginalLength: number;
|
|
31
47
|
stderrOriginalLength: number;
|
|
48
|
+
fatal?: undefined;
|
|
49
|
+
status?: undefined;
|
|
32
50
|
error?: undefined;
|
|
33
51
|
}>;
|
|
34
52
|
export {};
|
|
@@ -1,39 +1,81 @@
|
|
|
1
1
|
import { tool } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
-
import { runDatasetSandboxCommandStep,
|
|
3
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFilesStep } from "./sandbox/steps.js";
|
|
4
4
|
import { getDatasetWorkstation } from "./datasetFiles.js";
|
|
5
5
|
// To keep responses predictable for big data scenarios, we cap stdout/stderr.
|
|
6
6
|
// The tool's return payload exposes stdout (capped) plus the on-disk script path.
|
|
7
7
|
const MAX_STDOUT_CHARS = 20000;
|
|
8
8
|
const MAX_STDERR_CHARS = 5000;
|
|
9
|
+
function normalizeScriptName(scriptName) {
|
|
10
|
+
const normalized = String(scriptName ?? "")
|
|
11
|
+
.trim()
|
|
12
|
+
.replace(/[^a-zA-Z0-9_.-]/g, "_")
|
|
13
|
+
.replace(/_+/g, "_")
|
|
14
|
+
.slice(0, 80);
|
|
15
|
+
return normalized || "script";
|
|
16
|
+
}
|
|
17
|
+
function stableScriptHash(value) {
|
|
18
|
+
let hash = 2166136261;
|
|
19
|
+
for (let index = 0; index < value.length; index++) {
|
|
20
|
+
hash ^= value.charCodeAt(index);
|
|
21
|
+
hash = Math.imul(hash, 16777619);
|
|
22
|
+
}
|
|
23
|
+
return (hash >>> 0).toString(36);
|
|
24
|
+
}
|
|
9
25
|
export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
10
26
|
return tool({
|
|
11
27
|
description: "Execute Python scripts in the sandbox. Always saves script to a file before executing. The tool's output is EXACTLY the script's stdout and includes the script file path for traceability. CRITICAL: Print concise, human-readable summaries only; do NOT print raw large data. For big results, write artifacts to files in the workstation and print their file paths. Always include progress/result prints (e.g., 'Processing file X...', 'Found Y records', 'Generated output.csv').",
|
|
12
28
|
inputSchema: z.object({
|
|
13
29
|
pythonCode: z.string().describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
|
|
14
|
-
scriptName: z.string().describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A
|
|
30
|
+
scriptName: z.string().describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A deterministic suffix will be appended automatically."),
|
|
15
31
|
}),
|
|
16
32
|
execute: async ({ pythonCode, scriptName }) => {
|
|
17
|
-
const uuid = `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
|
|
18
33
|
const workstation = getDatasetWorkstation(datasetId);
|
|
19
|
-
const
|
|
34
|
+
const normalizedScriptName = normalizeScriptName(scriptName);
|
|
35
|
+
const scriptHash = stableScriptHash(`${normalizedScriptName}\0${pythonCode}`);
|
|
36
|
+
const scriptFile = `${workstation}/${normalizedScriptName}-${scriptHash}.py`;
|
|
20
37
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
21
38
|
console.log(`[Dataset ${datasetId}] Tool: executeCommand`);
|
|
22
|
-
console.log(`[Dataset ${datasetId}] Script: ${
|
|
39
|
+
console.log(`[Dataset ${datasetId}] Script: ${normalizedScriptName}`);
|
|
23
40
|
console.log(`[Dataset ${datasetId}] File: ${scriptFile}`);
|
|
24
41
|
console.log(`[Dataset ${datasetId}] Code length: ${pythonCode.length} chars`);
|
|
25
42
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
26
43
|
try {
|
|
27
|
-
await
|
|
44
|
+
await writeDatasetSandboxTextFilesStep({
|
|
28
45
|
runtime,
|
|
29
46
|
sandboxId,
|
|
30
47
|
files: [
|
|
31
48
|
{
|
|
32
49
|
path: scriptFile,
|
|
33
|
-
|
|
50
|
+
content: pythonCode,
|
|
34
51
|
},
|
|
35
52
|
],
|
|
36
53
|
});
|
|
54
|
+
const written = await runDatasetSandboxCommandStep({
|
|
55
|
+
runtime,
|
|
56
|
+
sandboxId,
|
|
57
|
+
cmd: "test",
|
|
58
|
+
args: ["-f", scriptFile],
|
|
59
|
+
});
|
|
60
|
+
if (written.exitCode !== 0) {
|
|
61
|
+
const error = `Script write verification failed: ${scriptFile}`;
|
|
62
|
+
console.error(`[Dataset ${datasetId}] ${error}`);
|
|
63
|
+
console.error(`[Dataset ${datasetId}] ========================================`);
|
|
64
|
+
return {
|
|
65
|
+
success: false,
|
|
66
|
+
fatal: true,
|
|
67
|
+
status: "script_write_failed",
|
|
68
|
+
error,
|
|
69
|
+
stdout: written.stdout || "",
|
|
70
|
+
stderr: written.stderr || "",
|
|
71
|
+
exitCode: written.exitCode,
|
|
72
|
+
scriptPath: scriptFile,
|
|
73
|
+
stdoutTruncated: false,
|
|
74
|
+
stderrTruncated: false,
|
|
75
|
+
stdoutOriginalLength: 0,
|
|
76
|
+
stderrOriginalLength: 0,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
37
79
|
console.log(`[Dataset ${datasetId}] Script written to: ${scriptFile}`);
|
|
38
80
|
console.log(`[Dataset ${datasetId}] Executing: python ${scriptFile}`);
|
|
39
81
|
const result = await runDatasetSandboxCommandStep({
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
|
|
2
|
-
import { id } from "@instantdb/admin";
|
|
3
2
|
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
4
|
-
import { createCompleteDatasetTool, didCompleteDatasetSucceed, } from "../completeDataset.tool.js";
|
|
3
|
+
import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
|
|
5
4
|
import { datasetGetByIdStep } from "../dataset/steps.js";
|
|
6
5
|
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
7
6
|
import { createGenerateSchemaTool } from "./generateSchema.tool.js";
|
|
8
7
|
import { buildFileDatasetPromptStep, generateFileParsePreviewStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
|
|
8
|
+
import { createDatasetId } from "../id.js";
|
|
9
9
|
async function awaitContextRun(run) {
|
|
10
10
|
if (!run)
|
|
11
11
|
return;
|
|
@@ -136,6 +136,10 @@ function createFileParseContextDefinition(params) {
|
|
|
136
136
|
return actions;
|
|
137
137
|
})
|
|
138
138
|
.shouldContinue(({ reactionEvent }) => {
|
|
139
|
+
const fatalFailure = getDatasetFatalFailure(reactionEvent);
|
|
140
|
+
if (fatalFailure) {
|
|
141
|
+
throw new Error(fatalFailure);
|
|
142
|
+
}
|
|
139
143
|
return !didCompleteDatasetSucceed(reactionEvent);
|
|
140
144
|
});
|
|
141
145
|
if (params.reactor) {
|
|
@@ -148,7 +152,7 @@ function createFileParseContextDefinition(params) {
|
|
|
148
152
|
return { datasetId: fallbackDatasetId ?? "", context };
|
|
149
153
|
}
|
|
150
154
|
export function createFileParseContext(fileId, opts) {
|
|
151
|
-
const datasetId = opts?.datasetId ??
|
|
155
|
+
const datasetId = opts?.datasetId ?? createDatasetId();
|
|
152
156
|
const params = {
|
|
153
157
|
fileId,
|
|
154
158
|
instructions: opts?.instructions,
|
|
@@ -162,7 +166,7 @@ export function createFileParseContext(fileId, opts) {
|
|
|
162
166
|
datasetId,
|
|
163
167
|
async parse(runtime, options = {}) {
|
|
164
168
|
const triggerEvent = {
|
|
165
|
-
id:
|
|
169
|
+
id: createDatasetId(),
|
|
166
170
|
type: INPUT_TEXT_ITEM_TYPE,
|
|
167
171
|
channel: WEB_CHANNEL,
|
|
168
172
|
createdAt: new Date().toISOString(),
|
package/dist/id.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function createDatasetId(): string;
|
package/dist/id.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export function createDatasetId() {
|
|
2
|
+
const uuid = globalThis.crypto?.randomUUID?.();
|
|
3
|
+
if (typeof uuid === "string" && uuid.length > 0)
|
|
4
|
+
return uuid;
|
|
5
|
+
return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
|
|
6
|
+
const r = (Math.random() * 16) | 0;
|
|
7
|
+
const v = c === "x" ? r : (r & 0x3) | 0x8;
|
|
8
|
+
return v.toString(16);
|
|
9
|
+
});
|
|
10
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import "./builder/materialize.js";
|
|
2
1
|
export * from "./dataset.js";
|
|
3
2
|
export * from "./domain.js";
|
|
4
3
|
export * from "./materializeDataset.tool.js";
|
|
5
4
|
export * from "./schema.js";
|
|
6
5
|
export * from "./service.js";
|
|
6
|
+
export { registerFileParseContext } from "./file/file-dataset.agent.js";
|
|
7
|
+
export { registerTransformDatasetContext } from "./transform/index.js";
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import "./builder/materialize.js";
|
|
2
1
|
export * from "./dataset.js";
|
|
3
2
|
export * from "./domain.js";
|
|
4
3
|
export * from "./materializeDataset.tool.js";
|
|
5
4
|
export * from "./schema.js";
|
|
6
5
|
export * from "./service.js";
|
|
6
|
+
export { registerFileParseContext } from "./file/file-dataset.agent.js";
|
|
7
|
+
export { registerTransformDatasetContext } from "./transform/index.js";
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { id as newId } from "@instantdb/admin";
|
|
2
1
|
import { DatasetService } from "../service.js";
|
|
2
|
+
import { createDatasetId } from "../id.js";
|
|
3
3
|
function normalizeRows(result) {
|
|
4
4
|
if (!result || typeof result !== "object")
|
|
5
5
|
return [];
|
|
@@ -56,7 +56,7 @@ export async function queryDomainStep(params) {
|
|
|
56
56
|
"use step";
|
|
57
57
|
const db = await getRuntimeDb(params.runtime);
|
|
58
58
|
const service = new DatasetService(db);
|
|
59
|
-
const datasetId = params.datasetId ??
|
|
59
|
+
const datasetId = params.datasetId ?? createDatasetId();
|
|
60
60
|
const queryResult = await db.query(params.query);
|
|
61
61
|
const rows = normalizeRows(queryResult);
|
|
62
62
|
const previewRows = rows.slice(0, 20);
|
package/dist/sandbox/steps.d.ts
CHANGED
|
@@ -27,6 +27,14 @@ export declare function writeDatasetSandboxFilesStep(params: {
|
|
|
27
27
|
contentBase64: string;
|
|
28
28
|
}>;
|
|
29
29
|
}): Promise<void>;
|
|
30
|
+
export declare function writeDatasetSandboxTextFilesStep(params: {
|
|
31
|
+
runtime: any;
|
|
32
|
+
sandboxId: DatasetSandboxId;
|
|
33
|
+
files: Array<{
|
|
34
|
+
path: string;
|
|
35
|
+
content: string;
|
|
36
|
+
}>;
|
|
37
|
+
}): Promise<void>;
|
|
30
38
|
export declare function readDatasetSandboxFileStep(params: {
|
|
31
39
|
runtime: any;
|
|
32
40
|
sandboxId: DatasetSandboxId;
|
|
@@ -34,6 +42,13 @@ export declare function readDatasetSandboxFileStep(params: {
|
|
|
34
42
|
}): Promise<{
|
|
35
43
|
contentBase64: string;
|
|
36
44
|
}>;
|
|
45
|
+
export declare function readDatasetSandboxTextFileStep(params: {
|
|
46
|
+
runtime: any;
|
|
47
|
+
sandboxId: DatasetSandboxId;
|
|
48
|
+
path: string;
|
|
49
|
+
}): Promise<{
|
|
50
|
+
content: string;
|
|
51
|
+
}>;
|
|
37
52
|
export declare function stopDatasetSandboxStep(params: {
|
|
38
53
|
runtime: any;
|
|
39
54
|
sandboxId: DatasetSandboxId;
|
package/dist/sandbox/steps.js
CHANGED
|
@@ -117,6 +117,25 @@ export async function writeDatasetSandboxFilesStep(params) {
|
|
|
117
117
|
if (!result.ok)
|
|
118
118
|
throw new Error(result.error);
|
|
119
119
|
}
|
|
120
|
+
export async function writeDatasetSandboxTextFilesStep(params) {
|
|
121
|
+
"use step";
|
|
122
|
+
if (isLocalDatasetSandboxMode()) {
|
|
123
|
+
for (const file of params.files) {
|
|
124
|
+
await fs.mkdir(path.dirname(file.path), { recursive: true });
|
|
125
|
+
await fs.writeFile(file.path, file.content, "utf-8");
|
|
126
|
+
}
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
const db = await getRuntimeDb(params.runtime);
|
|
130
|
+
const service = new SandboxService(db);
|
|
131
|
+
const files = params.files.map((file) => ({
|
|
132
|
+
path: file.path,
|
|
133
|
+
contentBase64: Buffer.from(file.content, "utf-8").toString("base64"),
|
|
134
|
+
}));
|
|
135
|
+
const result = await service.writeFiles(params.sandboxId, files);
|
|
136
|
+
if (!result.ok)
|
|
137
|
+
throw new Error(result.error);
|
|
138
|
+
}
|
|
120
139
|
export async function readDatasetSandboxFileStep(params) {
|
|
121
140
|
"use step";
|
|
122
141
|
if (isLocalDatasetSandboxMode()) {
|
|
@@ -130,6 +149,19 @@ export async function readDatasetSandboxFileStep(params) {
|
|
|
130
149
|
throw new Error(result.error);
|
|
131
150
|
return result.data;
|
|
132
151
|
}
|
|
152
|
+
export async function readDatasetSandboxTextFileStep(params) {
|
|
153
|
+
"use step";
|
|
154
|
+
if (isLocalDatasetSandboxMode()) {
|
|
155
|
+
const content = await fs.readFile(params.path, "utf-8");
|
|
156
|
+
return { content };
|
|
157
|
+
}
|
|
158
|
+
const db = await getRuntimeDb(params.runtime);
|
|
159
|
+
const service = new SandboxService(db);
|
|
160
|
+
const result = await service.readFile(params.sandboxId, params.path);
|
|
161
|
+
if (!result.ok)
|
|
162
|
+
throw new Error(result.error);
|
|
163
|
+
return { content: Buffer.from(result.data.contentBase64, "base64").toString("utf-8") };
|
|
164
|
+
}
|
|
133
165
|
export async function stopDatasetSandboxStep(params) {
|
|
134
166
|
"use step";
|
|
135
167
|
if (isLocalDatasetSandboxMode()) {
|
package/dist/service.d.ts
CHANGED
package/dist/service.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { createDatasetId } from "./id.js";
|
|
2
2
|
export class DatasetService {
|
|
3
3
|
constructor(db) {
|
|
4
4
|
this.db = db;
|
|
@@ -27,9 +27,9 @@ export class DatasetService {
|
|
|
27
27
|
}
|
|
28
28
|
async createDataset(params) {
|
|
29
29
|
try {
|
|
30
|
-
const datasetId = params.id ??
|
|
30
|
+
const datasetId = params.id ?? createDatasetId();
|
|
31
31
|
const existing = await this.resolveDatasetEntityId(datasetId);
|
|
32
|
-
const entityId = existing.ok ? existing.data :
|
|
32
|
+
const entityId = existing.ok ? existing.data : createDatasetId();
|
|
33
33
|
const mutations = [];
|
|
34
34
|
mutations.push(this.db.tx.dataset_datasets[entityId].update({
|
|
35
35
|
datasetId,
|
|
@@ -73,7 +73,7 @@ export class DatasetService {
|
|
|
73
73
|
return resolved;
|
|
74
74
|
const mutations = [];
|
|
75
75
|
for (const record of params.records) {
|
|
76
|
-
const recordId =
|
|
76
|
+
const recordId = createDatasetId();
|
|
77
77
|
mutations.push(this.db.tx.dataset_records[recordId].update({
|
|
78
78
|
rowContent: record.rowContent,
|
|
79
79
|
order: record.order,
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
|
|
2
|
-
import { id } from "@instantdb/admin";
|
|
3
2
|
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
4
|
-
import { createCompleteDatasetTool, didCompleteDatasetSucceed, } from "../completeDataset.tool.js";
|
|
3
|
+
import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
|
|
5
4
|
import { datasetUpdateSchemaStep } from "../dataset/steps.js";
|
|
6
5
|
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
7
6
|
import { buildTransformDatasetPromptStep, ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "./transform-dataset.steps.js";
|
|
7
|
+
import { createDatasetId } from "../id.js";
|
|
8
8
|
async function awaitContextRun(run) {
|
|
9
9
|
if (!run)
|
|
10
10
|
return;
|
|
@@ -130,6 +130,10 @@ function createTransformDatasetContextDefinition(params) {
|
|
|
130
130
|
};
|
|
131
131
|
})
|
|
132
132
|
.shouldContinue(({ reactionEvent }) => {
|
|
133
|
+
const fatalFailure = getDatasetFatalFailure(reactionEvent);
|
|
134
|
+
if (fatalFailure) {
|
|
135
|
+
throw new Error(fatalFailure);
|
|
136
|
+
}
|
|
133
137
|
return !didCompleteDatasetSucceed(reactionEvent);
|
|
134
138
|
});
|
|
135
139
|
if (params.reactor) {
|
|
@@ -142,7 +146,7 @@ function createTransformDatasetContextDefinition(params) {
|
|
|
142
146
|
return { datasetId: fallbackDatasetId ?? "", context };
|
|
143
147
|
}
|
|
144
148
|
export function createTransformDatasetContext(params) {
|
|
145
|
-
const datasetId = params.datasetId ??
|
|
149
|
+
const datasetId = params.datasetId ?? createDatasetId();
|
|
146
150
|
const { context } = createTransformDatasetContextDefinition({
|
|
147
151
|
sourceDatasetIds: params.sourceDatasetIds,
|
|
148
152
|
outputSchema: params.outputSchema,
|
|
@@ -159,7 +163,7 @@ export function createTransformDatasetContext(params) {
|
|
|
159
163
|
? "the source dataset"
|
|
160
164
|
: `${params.sourceDatasetIds.length} source datasets`;
|
|
161
165
|
const triggerEvent = {
|
|
162
|
-
id:
|
|
166
|
+
id: createDatasetId(),
|
|
163
167
|
type: INPUT_TEXT_ITEM_TYPE,
|
|
164
168
|
channel: WEB_CHANNEL,
|
|
165
169
|
createdAt: new Date().toISOString(),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.58-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.58-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.58-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.58-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|