@ekairos/dataset 1.22.56-beta.development.0 → 1.22.57-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,4 +49,9 @@ export declare function didCompleteDatasetSucceed(event: {
49
49
  parts?: any[];
50
50
  };
51
51
  }): boolean;
52
+ export declare function getDatasetFatalFailure(event: {
53
+ content?: {
54
+ parts?: any[];
55
+ };
56
+ }): string | null;
52
57
  export {};
@@ -1,6 +1,6 @@
1
1
  import { tool } from "ai";
2
2
  import { z } from "zod";
3
- import { readDatasetSandboxFileStep, runDatasetSandboxCommandStep } from "./sandbox/steps.js";
3
+ import { readDatasetSandboxFileStep, readDatasetSandboxTextFileStep, runDatasetSandboxCommandStep } from "./sandbox/steps.js";
4
4
  import Ajv from "ajv";
5
5
  import { getDatasetOutputPath, } from "./datasetFiles.js";
6
6
  import { datasetGetByIdStep, datasetUpdateStatusStep, datasetUploadOutputFileStep } from "./dataset/steps.js";
@@ -113,9 +113,12 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
113
113
  message: "Empty file content",
114
114
  };
115
115
  }
116
- const fileBuffer = Buffer.from(fileRead.contentBase64, "base64");
117
116
  console.log(`[Dataset ${datasetId}] Uploading file to InstantDB storage`);
118
- const uploadResult = await datasetUploadOutputFileStep({ runtime, datasetId, fileBuffer });
117
+ const uploadResult = await datasetUploadOutputFileStep({
118
+ runtime,
119
+ datasetId,
120
+ contentBase64: fileRead.contentBase64,
121
+ });
119
122
  if (!uploadResult.ok) {
120
123
  console.error(`[Dataset ${datasetId}] File upload failed: ${uploadResult.error}`);
121
124
  return {
@@ -176,6 +179,31 @@ export function didCompleteDatasetSucceed(event) {
176
179
  return false;
177
180
  });
178
181
  }
182
+ export function getDatasetFatalFailure(event) {
183
+ const parts = Array.isArray(event?.content?.parts) ? event.content.parts : [];
184
+ for (const part of parts) {
185
+ let actionName;
186
+ let output;
187
+ if (part?.type === "action") {
188
+ actionName = part.content?.actionName;
189
+ output = part.content?.output;
190
+ }
191
+ else if (typeof part?.type === "string" && part.type.startsWith("tool-")) {
192
+ actionName = part.type.slice("tool-".length);
193
+ output = part.output ?? part.result;
194
+ }
195
+ if (!output || output.fatal !== true) {
196
+ continue;
197
+ }
198
+ const message = typeof output.error === "string" && output.error.trim()
199
+ ? output.error.trim()
200
+ : typeof output.message === "string" && output.message.trim()
201
+ ? output.message.trim()
202
+ : "Dataset action failed fatally";
203
+ return actionName ? `${actionName}: ${message}` : message;
204
+ }
205
+ return null;
206
+ }
179
207
  async function ensureFileExists(runtime, sandboxId, path) {
180
208
  const result = await runDatasetSandboxCommandStep({
181
209
  runtime,
@@ -192,8 +220,8 @@ async function validateJsonlRows({ runtime, sandboxId, outputPath, validator, da
192
220
  let validRowCount = 0;
193
221
  let rowRecordCount = 0;
194
222
  console.log(`[Dataset ${datasetId}] Reading and validating JSONL file from sandbox`);
195
- const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path: outputPath });
196
- if (!fileRead.contentBase64) {
223
+ const fileRead = await readDatasetSandboxTextFileStep({ runtime, sandboxId, path: outputPath });
224
+ if (!fileRead.content) {
197
225
  console.log(`[Dataset ${datasetId}] Empty output file`);
198
226
  return {
199
227
  success: false,
@@ -205,8 +233,7 @@ async function validateJsonlRows({ runtime, sandboxId, outputPath, validator, da
205
233
  message: "output.jsonl is empty",
206
234
  };
207
235
  }
208
- const fileContent = Buffer.from(fileRead.contentBase64, "base64").toString();
209
- const lines = fileContent.split("\n");
236
+ const lines = fileRead.content.split("\n");
210
237
  console.log(`[Dataset ${datasetId}] Validating ${lines.length} lines`);
211
238
  for (let index = 0; index < lines.length; index++) {
212
239
  const line = lines[index];
@@ -18,7 +18,7 @@ export declare function datasetUpdateSchemaStep(params: {
18
18
  export declare function datasetUploadOutputFileStep(params: {
19
19
  runtime: any;
20
20
  datasetId: string;
21
- fileBuffer: Buffer;
21
+ contentBase64: string;
22
22
  }): Promise<import("../service.js").ServiceResult<{
23
23
  fileId: string;
24
24
  storagePath: string;
@@ -60,7 +60,7 @@ export async function datasetUploadOutputFileStep(params) {
60
60
  const service = new DatasetService(db);
61
61
  return await service.uploadDatasetOutputFile({
62
62
  datasetId: params.datasetId,
63
- fileBuffer: params.fileBuffer,
63
+ fileBuffer: Buffer.from(params.contentBase64, "base64"),
64
64
  });
65
65
  }
66
66
  export async function datasetUpdateStatusStep(params) {
package/dist/dataset.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { id as newId } from "@instantdb/admin";
2
2
  import { buildObjectOutputInstructions } from "./builder/instructions.js";
3
- import { getDatasetAgentMaterializers } from "./builder/agentMaterializers.js";
3
+ import { materializeDerivedDataset, materializeSingleFileLikeSource, } from "./builder/materialize.js";
4
4
  import { materializeQuerySource } from "./builder/materializeQuery.js";
5
5
  import { finalizeBuildResult } from "./builder/persistence.js";
6
6
  export function dataset(runtime, options = {}) {
@@ -131,13 +131,13 @@ export function dataset(runtime, options = {}) {
131
131
  if (!effectiveState.reactor) {
132
132
  throw new Error("dataset_reactor_required");
133
133
  }
134
- await getDatasetAgentMaterializers().materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
134
+ await materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
135
135
  return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
136
136
  }
137
137
  if (!effectiveState.reactor) {
138
138
  throw new Error("dataset_reactor_required");
139
139
  }
140
- await getDatasetAgentMaterializers().materializeDerivedDataset(effectiveState, targetDatasetId);
140
+ await materializeDerivedDataset(effectiveState, targetDatasetId);
141
141
  return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
142
142
  },
143
143
  };
@@ -7,6 +7,20 @@ export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime
7
7
  pythonCode: string;
8
8
  scriptName: string;
9
9
  }, {
10
+ success: boolean;
11
+ fatal: boolean;
12
+ status: string;
13
+ error: string;
14
+ stdout: string;
15
+ stderr: string;
16
+ exitCode: number;
17
+ scriptPath: string;
18
+ stdoutTruncated: boolean;
19
+ stderrTruncated: boolean;
20
+ stdoutOriginalLength: number;
21
+ stderrOriginalLength: number;
22
+ message?: undefined;
23
+ } | {
10
24
  success: boolean;
11
25
  exitCode: number;
12
26
  stdout: string;
@@ -17,6 +31,8 @@ export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime
17
31
  stderrTruncated: boolean;
18
32
  stdoutOriginalLength: number;
19
33
  stderrOriginalLength: number;
34
+ fatal?: undefined;
35
+ status?: undefined;
20
36
  message?: undefined;
21
37
  } | {
22
38
  success: boolean;
@@ -29,6 +45,8 @@ export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime
29
45
  stderrTruncated: boolean;
30
46
  stdoutOriginalLength: number;
31
47
  stderrOriginalLength: number;
48
+ fatal?: undefined;
49
+ status?: undefined;
32
50
  error?: undefined;
33
51
  }>;
34
52
  export {};
@@ -1,39 +1,81 @@
1
1
  import { tool } from "ai";
2
2
  import { z } from "zod";
3
- import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "./sandbox/steps.js";
3
+ import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFilesStep } from "./sandbox/steps.js";
4
4
  import { getDatasetWorkstation } from "./datasetFiles.js";
5
5
  // To keep responses predictable for big data scenarios, we cap stdout/stderr.
6
6
  // The tool's return payload exposes stdout (capped) plus the on-disk script path.
7
7
  const MAX_STDOUT_CHARS = 20000;
8
8
  const MAX_STDERR_CHARS = 5000;
9
+ function normalizeScriptName(scriptName) {
10
+ const normalized = String(scriptName ?? "")
11
+ .trim()
12
+ .replace(/[^a-zA-Z0-9_.-]/g, "_")
13
+ .replace(/_+/g, "_")
14
+ .slice(0, 80);
15
+ return normalized || "script";
16
+ }
17
+ function stableScriptHash(value) {
18
+ let hash = 2166136261;
19
+ for (let index = 0; index < value.length; index++) {
20
+ hash ^= value.charCodeAt(index);
21
+ hash = Math.imul(hash, 16777619);
22
+ }
23
+ return (hash >>> 0).toString(36);
24
+ }
9
25
  export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
10
26
  return tool({
11
27
  description: "Execute Python scripts in the sandbox. Always saves script to a file before executing. The tool's output is EXACTLY the script's stdout and includes the script file path for traceability. CRITICAL: Print concise, human-readable summaries only; do NOT print raw large data. For big results, write artifacts to files in the workstation and print their file paths. Always include progress/result prints (e.g., 'Processing file X...', 'Found Y records', 'Generated output.csv').",
12
28
  inputSchema: z.object({
13
29
  pythonCode: z.string().describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
14
- scriptName: z.string().describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A UUID will be appended automatically."),
30
+ scriptName: z.string().describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A deterministic suffix will be appended automatically."),
15
31
  }),
16
32
  execute: async ({ pythonCode, scriptName }) => {
17
- const uuid = `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
18
33
  const workstation = getDatasetWorkstation(datasetId);
19
- const scriptFile = `${workstation}/${scriptName}-${uuid}.py`;
34
+ const normalizedScriptName = normalizeScriptName(scriptName);
35
+ const scriptHash = stableScriptHash(`${normalizedScriptName}\0${pythonCode}`);
36
+ const scriptFile = `${workstation}/${normalizedScriptName}-${scriptHash}.py`;
20
37
  console.log(`[Dataset ${datasetId}] ========================================`);
21
38
  console.log(`[Dataset ${datasetId}] Tool: executeCommand`);
22
- console.log(`[Dataset ${datasetId}] Script: ${scriptName}`);
39
+ console.log(`[Dataset ${datasetId}] Script: ${normalizedScriptName}`);
23
40
  console.log(`[Dataset ${datasetId}] File: ${scriptFile}`);
24
41
  console.log(`[Dataset ${datasetId}] Code length: ${pythonCode.length} chars`);
25
42
  console.log(`[Dataset ${datasetId}] ========================================`);
26
43
  try {
27
- await writeDatasetSandboxFilesStep({
44
+ await writeDatasetSandboxTextFilesStep({
28
45
  runtime,
29
46
  sandboxId,
30
47
  files: [
31
48
  {
32
49
  path: scriptFile,
33
- contentBase64: Buffer.from(pythonCode, "utf-8").toString("base64"),
50
+ content: pythonCode,
34
51
  },
35
52
  ],
36
53
  });
54
+ const written = await runDatasetSandboxCommandStep({
55
+ runtime,
56
+ sandboxId,
57
+ cmd: "test",
58
+ args: ["-f", scriptFile],
59
+ });
60
+ if (written.exitCode !== 0) {
61
+ const error = `Script write verification failed: ${scriptFile}`;
62
+ console.error(`[Dataset ${datasetId}] ${error}`);
63
+ console.error(`[Dataset ${datasetId}] ========================================`);
64
+ return {
65
+ success: false,
66
+ fatal: true,
67
+ status: "script_write_failed",
68
+ error,
69
+ stdout: written.stdout || "",
70
+ stderr: written.stderr || "",
71
+ exitCode: written.exitCode,
72
+ scriptPath: scriptFile,
73
+ stdoutTruncated: false,
74
+ stderrTruncated: false,
75
+ stdoutOriginalLength: 0,
76
+ stderrOriginalLength: 0,
77
+ };
78
+ }
37
79
  console.log(`[Dataset ${datasetId}] Script written to: ${scriptFile}`);
38
80
  console.log(`[Dataset ${datasetId}] Executing: python ${scriptFile}`);
39
81
  const result = await runDatasetSandboxCommandStep({
@@ -1,7 +1,7 @@
1
1
  import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
2
2
  import { id } from "@instantdb/admin";
3
3
  import { createClearDatasetTool } from "../clearDataset.tool.js";
4
- import { createCompleteDatasetTool, didCompleteDatasetSucceed, } from "../completeDataset.tool.js";
4
+ import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
5
5
  import { datasetGetByIdStep } from "../dataset/steps.js";
6
6
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
7
7
  import { createGenerateSchemaTool } from "./generateSchema.tool.js";
@@ -136,6 +136,10 @@ function createFileParseContextDefinition(params) {
136
136
  return actions;
137
137
  })
138
138
  .shouldContinue(({ reactionEvent }) => {
139
+ const fatalFailure = getDatasetFatalFailure(reactionEvent);
140
+ if (fatalFailure) {
141
+ throw new Error(fatalFailure);
142
+ }
139
143
  return !didCompleteDatasetSucceed(reactionEvent);
140
144
  });
141
145
  if (params.reactor) {
package/dist/index.d.ts CHANGED
@@ -1,6 +1,7 @@
1
- import "./builder/materialize.js";
2
1
  export * from "./dataset.js";
3
2
  export * from "./domain.js";
4
3
  export * from "./materializeDataset.tool.js";
5
4
  export * from "./schema.js";
6
5
  export * from "./service.js";
6
+ export { registerFileParseContext } from "./file/file-dataset.agent.js";
7
+ export { registerTransformDatasetContext } from "./transform/index.js";
package/dist/index.js CHANGED
@@ -1,6 +1,7 @@
1
- import "./builder/materialize.js";
2
1
  export * from "./dataset.js";
3
2
  export * from "./domain.js";
4
3
  export * from "./materializeDataset.tool.js";
5
4
  export * from "./schema.js";
6
5
  export * from "./service.js";
6
+ export { registerFileParseContext } from "./file/file-dataset.agent.js";
7
+ export { registerTransformDatasetContext } from "./transform/index.js";
@@ -27,6 +27,14 @@ export declare function writeDatasetSandboxFilesStep(params: {
27
27
  contentBase64: string;
28
28
  }>;
29
29
  }): Promise<void>;
30
+ export declare function writeDatasetSandboxTextFilesStep(params: {
31
+ runtime: any;
32
+ sandboxId: DatasetSandboxId;
33
+ files: Array<{
34
+ path: string;
35
+ content: string;
36
+ }>;
37
+ }): Promise<void>;
30
38
  export declare function readDatasetSandboxFileStep(params: {
31
39
  runtime: any;
32
40
  sandboxId: DatasetSandboxId;
@@ -34,6 +42,13 @@ export declare function readDatasetSandboxFileStep(params: {
34
42
  }): Promise<{
35
43
  contentBase64: string;
36
44
  }>;
45
+ export declare function readDatasetSandboxTextFileStep(params: {
46
+ runtime: any;
47
+ sandboxId: DatasetSandboxId;
48
+ path: string;
49
+ }): Promise<{
50
+ content: string;
51
+ }>;
37
52
  export declare function stopDatasetSandboxStep(params: {
38
53
  runtime: any;
39
54
  sandboxId: DatasetSandboxId;
@@ -117,6 +117,25 @@ export async function writeDatasetSandboxFilesStep(params) {
117
117
  if (!result.ok)
118
118
  throw new Error(result.error);
119
119
  }
120
+ export async function writeDatasetSandboxTextFilesStep(params) {
121
+ "use step";
122
+ if (isLocalDatasetSandboxMode()) {
123
+ for (const file of params.files) {
124
+ await fs.mkdir(path.dirname(file.path), { recursive: true });
125
+ await fs.writeFile(file.path, file.content, "utf-8");
126
+ }
127
+ return;
128
+ }
129
+ const db = await getRuntimeDb(params.runtime);
130
+ const service = new SandboxService(db);
131
+ const files = params.files.map((file) => ({
132
+ path: file.path,
133
+ contentBase64: Buffer.from(file.content, "utf-8").toString("base64"),
134
+ }));
135
+ const result = await service.writeFiles(params.sandboxId, files);
136
+ if (!result.ok)
137
+ throw new Error(result.error);
138
+ }
120
139
  export async function readDatasetSandboxFileStep(params) {
121
140
  "use step";
122
141
  if (isLocalDatasetSandboxMode()) {
@@ -130,6 +149,19 @@ export async function readDatasetSandboxFileStep(params) {
130
149
  throw new Error(result.error);
131
150
  return result.data;
132
151
  }
152
+ export async function readDatasetSandboxTextFileStep(params) {
153
+ "use step";
154
+ if (isLocalDatasetSandboxMode()) {
155
+ const content = await fs.readFile(params.path, "utf-8");
156
+ return { content };
157
+ }
158
+ const db = await getRuntimeDb(params.runtime);
159
+ const service = new SandboxService(db);
160
+ const result = await service.readFile(params.sandboxId, params.path);
161
+ if (!result.ok)
162
+ throw new Error(result.error);
163
+ return { content: Buffer.from(result.data.contentBase64, "base64").toString("utf-8") };
164
+ }
133
165
  export async function stopDatasetSandboxStep(params) {
134
166
  "use step";
135
167
  if (isLocalDatasetSandboxMode()) {
@@ -1,7 +1,7 @@
1
1
  import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
2
2
  import { id } from "@instantdb/admin";
3
3
  import { createClearDatasetTool } from "../clearDataset.tool.js";
4
- import { createCompleteDatasetTool, didCompleteDatasetSucceed, } from "../completeDataset.tool.js";
4
+ import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
5
5
  import { datasetUpdateSchemaStep } from "../dataset/steps.js";
6
6
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
7
7
  import { buildTransformDatasetPromptStep, ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "./transform-dataset.steps.js";
@@ -130,6 +130,10 @@ function createTransformDatasetContextDefinition(params) {
130
130
  };
131
131
  })
132
132
  .shouldContinue(({ reactionEvent }) => {
133
+ const fatalFailure = getDatasetFatalFailure(reactionEvent);
134
+ if (fatalFailure) {
135
+ throw new Error(fatalFailure);
136
+ }
133
137
  return !didCompleteDatasetSucceed(reactionEvent);
134
138
  });
135
139
  if (params.reactor) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.56-beta.development.0",
3
+ "version": "1.22.57-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.56-beta.development.0",
69
- "@ekairos/events": "^1.22.56-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.56-beta.development.0",
68
+ "@ekairos/domain": "^1.22.57-beta.development.0",
69
+ "@ekairos/events": "^1.22.57-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.57-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",