@ekairos/dataset 1.22.81-beta.development.0 → 1.22.83-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,8 @@ type PreparedFileDatasetContext = {
11
11
  sandboxState: SandboxState;
12
12
  filePreview?: FilePreviewContext;
13
13
  schema?: DatasetSchemaInput | null;
14
+ filename?: string;
15
+ mediaType?: string;
14
16
  };
15
17
  type PreparedTransformDatasetContext = {
16
18
  kind: "transform";
@@ -1,6 +1,5 @@
1
1
  import { createFileParseContext } from "../file/file-dataset.agent.js";
2
2
  import { readInstantFileStep } from "../file/steps.js";
3
- import { generateFileParsePreviewStep, initializeFileParseSandboxStep, } from "../file/file-dataset.steps.js";
4
3
  import { createTransformDatasetContext } from "../transform/transform-dataset.agent.js";
5
4
  import { ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "../transform/transform-dataset.steps.js";
6
5
  import { datasetGetByIdStep, datasetInferAndUpdateSchemaStep, datasetPreviewRowsStep, datasetReadOneStep, } from "../dataset/steps.js";
@@ -266,27 +265,16 @@ export async function prepareDatasetSourcesStep(params) {
266
265
  const fileId = params.source.kind === "file"
267
266
  ? params.source.fileId
268
267
  : await uploadInlineTextSource(params.runtime, params.datasetId, params.source);
269
- const initialized = await initializeFileParseSandboxStep({
270
- runtime: params.runtime,
271
- sandboxId: params.sandboxId,
272
- datasetId: params.datasetId,
273
- fileId,
274
- state: { initialized: false, filePath: "" },
275
- });
276
- const filePreview = await generateFileParsePreviewStep({
277
- runtime: params.runtime,
278
- sandboxId: params.sandboxId,
279
- sandboxFilePath: initialized.filePath,
280
- datasetId: params.datasetId,
281
- });
282
268
  return {
283
269
  kind: "file",
284
270
  datasetId: params.datasetId,
285
271
  sandboxId: params.sandboxId,
286
272
  fileId,
287
- sandboxState: initialized.state,
288
- filePreview,
273
+ sandboxState: { initialized: false, filePath: "" },
274
+ filePreview: undefined,
289
275
  schema: params.schema ?? null,
276
+ filename: params.source.kind === "file" ? params.source.filename : params.source.name,
277
+ mediaType: params.source.kind === "file" ? params.source.mediaType : params.source.mimeType,
290
278
  };
291
279
  }
292
280
  const initialized = await ensureTransformSourcesInSandboxStep({
@@ -393,7 +381,13 @@ export async function materializeSingleFileLikeSource(state, source, targetDatas
393
381
  instructions: state.instructions,
394
382
  sources: [
395
383
  source.kind === "file"
396
- ? { kind: "file", fileId: source.fileId, description: source.description }
384
+ ? {
385
+ kind: "file",
386
+ fileId: source.fileId,
387
+ description: source.description,
388
+ filename: source.filename,
389
+ mediaType: source.mediaType,
390
+ }
397
391
  : {
398
392
  kind: "text",
399
393
  mimeType: source.mimeType,
@@ -428,6 +422,8 @@ export async function materializeSingleFileLikeSource(state, source, targetDatas
428
422
  sandboxState: context.sandboxState,
429
423
  filePreview: context.filePreview,
430
424
  schema: context.schema,
425
+ filename: context.filename,
426
+ mediaType: context.mediaType,
431
427
  });
432
428
  await parseContext.parse(state.runtime, {
433
429
  durable: await resolveDatasetAgentDurable(state.durable),
@@ -440,6 +436,8 @@ export async function materializeSingleFileLikeSource(state, source, targetDatas
440
436
  sandboxState: context.sandboxState,
441
437
  filePreview: context.filePreview,
442
438
  schema: context.schema,
439
+ filename: context.filename,
440
+ mediaType: context.mediaType,
443
441
  },
444
442
  });
445
443
  return targetDatasetId;
@@ -12,6 +12,8 @@ export type DatasetQuerySourceInput<D extends DomainSchemaResult = DomainSchemaR
12
12
  export type DatasetFileSourceInput = {
13
13
  fileId: string;
14
14
  description?: string;
15
+ filename?: string;
16
+ mediaType?: string;
15
17
  };
16
18
  export type DatasetTextSourceInput = {
17
19
  text: string;
@@ -3,8 +3,9 @@ export interface PersistDatasetStepParams {
3
3
  sandboxId: string;
4
4
  runtime: any;
5
5
  summary?: string;
6
+ outputPath?: string;
6
7
  }
7
- export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summary }: PersistDatasetStepParams): Promise<{
8
+ export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summary, outputPath }: PersistDatasetStepParams): Promise<{
8
9
  success: boolean;
9
10
  validation?: RowValidationEntry[];
10
11
  validationTruncated?: number;
@@ -23,18 +24,18 @@ export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summ
23
24
  validation: RowValidationEntry[] | undefined;
24
25
  error: string;
25
26
  message: string;
26
- fileId?: undefined;
27
- storagePath?: undefined;
27
+ records?: undefined;
28
+ summary?: undefined;
28
29
  } | {
29
30
  success: boolean;
30
31
  status: string;
31
- validRows: number;
32
- rowRecordCount: number;
33
- fileId: string;
34
- storagePath: string;
35
- message: string;
32
+ records: number;
33
+ summary: string;
34
+ validRows?: undefined;
35
+ rowRecordCount?: undefined;
36
36
  validation?: undefined;
37
37
  error?: undefined;
38
+ message?: undefined;
38
39
  }>;
39
40
  type RowValidationEntry = {
40
41
  index: number;
@@ -13,14 +13,15 @@ function getAjv() {
13
13
  }
14
14
  return ajvInstance;
15
15
  }
16
- export async function persistDatasetStep({ runtime, datasetId, sandboxId, summary }) {
16
+ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summary, outputPath }) {
17
17
  "use step";
18
- const outputPath = getDatasetOutputPath(datasetId);
18
+ const resolvedOutputPath = outputPath ?? getDatasetOutputPath(datasetId);
19
+ const storagePath = resolveExecutionStoragePath(resolvedOutputPath, datasetId);
19
20
  if (summary) {
20
21
  console.log(`[Dataset ${datasetId}] Persisting completed dataset: ${summary}`);
21
22
  }
22
23
  try {
23
- await ensureFileExists(runtime, sandboxId, outputPath);
24
+ await ensureFileExists(runtime, sandboxId, resolvedOutputPath);
24
25
  }
25
26
  catch (error) {
26
27
  const message = error instanceof Error ? error.message : String(error);
@@ -85,7 +86,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
85
86
  const validationResult = await validateJsonlRows({
86
87
  runtime,
87
88
  sandboxId,
88
- outputPath,
89
+ outputPath: resolvedOutputPath,
89
90
  validator,
90
91
  schema: schemaJson,
91
92
  datasetId,
@@ -96,7 +97,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
96
97
  const totalValidRows = validationResult.validRowCount ?? 0;
97
98
  const rowRecordCount = validationResult.rowRecordCount ?? totalValidRows;
98
99
  console.log(`[Dataset ${datasetId}] Reading file content for upload`);
99
- const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path: outputPath });
100
+ const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path: resolvedOutputPath });
100
101
  if (!fileRead.contentBase64) {
101
102
  console.error(`[Dataset ${datasetId}] Empty file content`);
102
103
  return {
@@ -113,6 +114,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
113
114
  const uploadResult = await service.uploadDatasetOutputFile({
114
115
  datasetId,
115
116
  fileBuffer: Buffer.from(fileRead.contentBase64, "base64"),
117
+ storagePath,
116
118
  });
117
119
  if (!uploadResult.ok) {
118
120
  console.error(`[Dataset ${datasetId}] File upload failed: ${uploadResult.error}`);
@@ -150,13 +152,18 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
150
152
  return {
151
153
  success: true,
152
154
  status: "completed",
153
- validRows: totalValidRows,
154
- rowRecordCount,
155
- fileId: uploadResult.data.fileId,
156
- storagePath: uploadResult.data.storagePath,
157
- message: "Dataset creation completed and uploaded to storage",
155
+ records: totalValidRows,
156
+ summary: summary ?? `Dataset completed with ${totalValidRows} records.`,
158
157
  };
159
158
  }
159
+ function resolveExecutionStoragePath(outputPath, datasetId) {
160
+ const normalized = String(outputPath ?? "").replace(/\\/g, "/");
161
+ const marker = "/tmp/ekairos/contexts/";
162
+ if (normalized.startsWith(marker)) {
163
+ return normalized.slice("/tmp/ekairos".length);
164
+ }
165
+ return `/dataset/${datasetId}/output.jsonl`;
166
+ }
160
167
  async function ensureFileExists(runtime, sandboxId, path) {
161
168
  const result = await runDatasetSandboxCommandStep({
162
169
  runtime,
@@ -2,8 +2,9 @@ interface CompleteDatasetToolParams {
2
2
  datasetId: string;
3
3
  sandboxId: string;
4
4
  runtime: any;
5
+ outputPath?: string;
5
6
  }
6
- export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtime }: CompleteDatasetToolParams): import("ai").Tool<{
7
+ export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }: CompleteDatasetToolParams): import("ai").Tool<{
7
8
  summary: string;
8
9
  }, {
9
10
  success: boolean;
@@ -82,18 +83,18 @@ export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtim
82
83
  }[] | undefined;
83
84
  error: string;
84
85
  message: string;
85
- fileId?: undefined;
86
- storagePath?: undefined;
86
+ records?: undefined;
87
+ summary?: undefined;
87
88
  } | {
88
89
  success: boolean;
89
90
  status: string;
90
- validRows: number;
91
- rowRecordCount: number;
92
- fileId: string;
93
- storagePath: string;
94
- message: string;
91
+ records: number;
92
+ summary: string;
93
+ validRows?: undefined;
94
+ rowRecordCount?: undefined;
95
95
  validation?: undefined;
96
96
  error?: undefined;
97
+ message?: undefined;
97
98
  }>;
98
99
  export declare function didCompleteDatasetSucceed(event: {
99
100
  content?: {
@@ -1,7 +1,7 @@
1
1
  import { tool } from "ai";
2
2
  import { z } from "zod";
3
3
  import { persistDatasetStep } from "./completeDataset.steps.js";
4
- export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
4
+ export function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }) {
5
5
  return tool({
6
6
  description: "Mark the dataset as completed. Use only when output.jsonl has been successfully generated and is ready for validation.",
7
7
  inputSchema: z.object({
@@ -17,6 +17,7 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
17
17
  datasetId,
18
18
  sandboxId,
19
19
  summary,
20
+ outputPath,
20
21
  });
21
22
  },
22
23
  });
@@ -0,0 +1,72 @@
1
+ export type ContextWorkspaceFileRole = "input" | "output" | "artifact";
2
+ export type ContextWorkspaceFileInput = {
3
+ fileId: string;
4
+ filename?: string;
5
+ mediaType?: string;
6
+ role?: ContextWorkspaceFileRole;
7
+ sourceEventId?: string;
8
+ sourcePartIndex?: number;
9
+ };
10
+ export type PreparedContextWorkspaceFile = {
11
+ fileId: string;
12
+ filename: string;
13
+ mediaType?: string;
14
+ role: ContextWorkspaceFileRole;
15
+ path: string;
16
+ sourceEventId?: string;
17
+ sourcePartIndex?: number;
18
+ };
19
+ export type PreparedContextExecutionWorkspace = {
20
+ contextId: string;
21
+ executionId: string;
22
+ sandboxId: string;
23
+ root: string;
24
+ contextRoot: string;
25
+ eventsDir: string;
26
+ outputDir: string;
27
+ scriptsDir: string;
28
+ tmpDir: string;
29
+ manifestPath: string;
30
+ files: PreparedContextWorkspaceFile[];
31
+ };
32
+ export declare function getContextWorkspaceBase(): string;
33
+ export declare function getContextExecutionWorkspaceRoot(params: {
34
+ contextId: string;
35
+ executionId: string;
36
+ root?: string;
37
+ }): string;
38
+ export declare function getContextWorkspaceRoot(params: {
39
+ contextId: string;
40
+ root?: string;
41
+ }): string;
42
+ export declare function getContextEventsDir(params: {
43
+ contextId: string;
44
+ root?: string;
45
+ }): string;
46
+ export declare function getContextExecutionWorkspaceDirs(params: {
47
+ contextId: string;
48
+ executionId: string;
49
+ root?: string;
50
+ }): {
51
+ root: string;
52
+ contextRoot: string;
53
+ eventsDir: string;
54
+ outputDir: string;
55
+ scriptsDir: string;
56
+ tmpDir: string;
57
+ manifestPath: string;
58
+ };
59
+ export declare function getContextExecutionWorkspaceStandardDirs(params: {
60
+ contextId: string;
61
+ executionId: string;
62
+ root?: string;
63
+ }): string[];
64
+ export declare function extractContextWorkspaceFilesFromEventItems(eventItems: unknown[]): ContextWorkspaceFileInput[];
65
+ export declare function prepareContextExecutionWorkspaceStep(params: {
66
+ runtime: any;
67
+ sandboxId: string;
68
+ contextId: string;
69
+ executionId: string;
70
+ files: ContextWorkspaceFileInput[];
71
+ root?: string;
72
+ }): Promise<PreparedContextExecutionWorkspace>;
@@ -0,0 +1,218 @@
1
+ import { readInstantFileStep } from "./file/steps.js";
2
+ import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
3
+ const CONTEXT_WORKSPACE_BASE = "/tmp/ekairos/contexts";
4
+ const WORKSPACE_MANIFEST_FILE_NAME = "manifest.json";
5
+ function trimTrailingSlash(value) {
6
+ return value.endsWith("/") ? value.slice(0, -1) : value;
7
+ }
8
+ function sanitizePathSegment(value, fallback) {
9
+ const parts = String(value ?? "")
10
+ .trim()
11
+ .replace(/\\/g, "/")
12
+ .split("/")
13
+ .filter(Boolean);
14
+ const normalized = parts[parts.length - 1]
15
+ ?.replace(/[^a-zA-Z0-9_.-]/g, "_")
16
+ .replace(/_+/g, "_")
17
+ .slice(0, 160);
18
+ return normalized || fallback;
19
+ }
20
+ function filenameFromContentDisposition(value, fallback) {
21
+ const raw = String(value ?? "").trim();
22
+ if (!raw)
23
+ return fallback;
24
+ const filenameStar = raw.match(/filename\*=UTF-8''([^;]+)/i)?.[1];
25
+ if (filenameStar) {
26
+ return sanitizePathSegment(decodeURIComponent(filenameStar), fallback);
27
+ }
28
+ const filename = raw.match(/filename="?([^";]+)"?/i)?.[1];
29
+ return sanitizePathSegment(filename ?? raw, fallback);
30
+ }
31
+ function resolveContextEventPartDir(params) {
32
+ const sourceEventId = sanitizePathSegment(params.sourceEventId, "event");
33
+ const sourcePartIndex = Number.isFinite(params.sourcePartIndex)
34
+ ? Math.max(0, Math.floor(params.sourcePartIndex))
35
+ : 0;
36
+ return `${params.eventsDir}/${sourceEventId}/parts/${sourcePartIndex}`;
37
+ }
38
+ function resolveWorkspaceFilePath(params) {
39
+ return `${resolveContextEventPartDir(params)}/file`;
40
+ }
41
+ export function getContextWorkspaceBase() {
42
+ return trimTrailingSlash(CONTEXT_WORKSPACE_BASE);
43
+ }
44
+ export function getContextExecutionWorkspaceRoot(params) {
45
+ if (params.root)
46
+ return trimTrailingSlash(params.root);
47
+ const contextId = sanitizePathSegment(params.contextId, "context");
48
+ const executionId = sanitizePathSegment(params.executionId, "execution");
49
+ return `${getContextWorkspaceBase()}/${contextId}/executions/${executionId}`;
50
+ }
51
+ export function getContextWorkspaceRoot(params) {
52
+ if (params.root)
53
+ return trimTrailingSlash(params.root);
54
+ const contextId = sanitizePathSegment(params.contextId, "context");
55
+ return `${getContextWorkspaceBase()}/${contextId}`;
56
+ }
57
+ export function getContextEventsDir(params) {
58
+ return `${getContextWorkspaceRoot(params)}/events`;
59
+ }
60
+ export function getContextExecutionWorkspaceDirs(params) {
61
+ const root = getContextExecutionWorkspaceRoot(params);
62
+ const contextRoot = getContextWorkspaceRoot(params);
63
+ const eventsDir = getContextEventsDir(params);
64
+ return {
65
+ root,
66
+ contextRoot,
67
+ eventsDir,
68
+ outputDir: `${root}/output`,
69
+ scriptsDir: `${root}/scripts`,
70
+ tmpDir: `${root}/tmp`,
71
+ manifestPath: `${root}/${WORKSPACE_MANIFEST_FILE_NAME}`,
72
+ };
73
+ }
74
+ export function getContextExecutionWorkspaceStandardDirs(params) {
75
+ const dirs = getContextExecutionWorkspaceDirs(params);
76
+ return [dirs.contextRoot, dirs.eventsDir, dirs.root, dirs.outputDir, dirs.scriptsDir, dirs.tmpDir];
77
+ }
78
+ export function extractContextWorkspaceFilesFromEventItems(eventItems) {
79
+ const files = [];
80
+ for (const item of eventItems) {
81
+ const itemRecord = asRecord(item);
82
+ const parts = Array.isArray(asRecord(itemRecord?.content)?.parts)
83
+ ? asRecord(itemRecord?.content)?.parts
84
+ : [];
85
+ parts.forEach((part, partIndex) => {
86
+ collectPartFiles(part, {
87
+ files,
88
+ sourceEventId: asText(itemRecord?.id),
89
+ sourcePartIndex: partIndex,
90
+ });
91
+ });
92
+ }
93
+ return files;
94
+ }
95
+ export async function prepareContextExecutionWorkspaceStep(params) {
96
+ "use step";
97
+ const dirs = getContextExecutionWorkspaceDirs(params);
98
+ const filePartDirs = Array.from(new Set(params.files.map((fileInput) => resolveContextEventPartDir({
99
+ eventsDir: dirs.eventsDir,
100
+ sourceEventId: fileInput.sourceEventId ?? fileInput.fileId,
101
+ sourcePartIndex: fileInput.sourcePartIndex ?? 0,
102
+ }))));
103
+ await runDatasetSandboxCommandStep({
104
+ runtime: params.runtime,
105
+ sandboxId: params.sandboxId,
106
+ cmd: "mkdir",
107
+ args: ["-p", ...getContextExecutionWorkspaceStandardDirs(params), ...filePartDirs],
108
+ });
109
+ const preparedFiles = [];
110
+ for (const fileInput of params.files) {
111
+ const fileId = String(fileInput.fileId ?? "").trim();
112
+ if (!fileId)
113
+ continue;
114
+ const file = await readInstantFileStep({ runtime: params.runtime, fileId });
115
+ const filename = sanitizePathSegment(fileInput.filename ??
116
+ filenameFromContentDisposition(file.contentDisposition, `${fileId}.bin`), `${fileId}.bin`);
117
+ const path = resolveWorkspaceFilePath({
118
+ eventsDir: dirs.eventsDir,
119
+ sourceEventId: fileInput.sourceEventId ?? fileId,
120
+ sourcePartIndex: fileInput.sourcePartIndex ?? 0,
121
+ });
122
+ const metadataPath = `${resolveContextEventPartDir({
123
+ eventsDir: dirs.eventsDir,
124
+ sourceEventId: fileInput.sourceEventId ?? fileId,
125
+ sourcePartIndex: fileInput.sourcePartIndex ?? 0,
126
+ })}/metadata.json`;
127
+ await writeDatasetSandboxFilesStep({
128
+ runtime: params.runtime,
129
+ sandboxId: params.sandboxId,
130
+ files: [{ path, contentBase64: file.contentBase64 }],
131
+ });
132
+ await writeDatasetSandboxTextFilesStep({
133
+ runtime: params.runtime,
134
+ sandboxId: params.sandboxId,
135
+ files: [
136
+ {
137
+ path: metadataPath,
138
+ content: JSON.stringify({
139
+ fileId,
140
+ filename,
141
+ mediaType: fileInput.mediaType,
142
+ role: fileInput.role ?? "input",
143
+ sourceEventId: fileInput.sourceEventId,
144
+ sourcePartIndex: fileInput.sourcePartIndex,
145
+ }, null, 2),
146
+ },
147
+ ],
148
+ });
149
+ preparedFiles.push({
150
+ fileId,
151
+ filename,
152
+ mediaType: fileInput.mediaType,
153
+ role: fileInput.role ?? "input",
154
+ path,
155
+ sourceEventId: fileInput.sourceEventId,
156
+ sourcePartIndex: fileInput.sourcePartIndex,
157
+ });
158
+ }
159
+ const manifest = {
160
+ contextId: params.contextId,
161
+ executionId: params.executionId,
162
+ sandboxId: params.sandboxId,
163
+ ...dirs,
164
+ files: preparedFiles,
165
+ };
166
+ await writeDatasetSandboxTextFilesStep({
167
+ runtime: params.runtime,
168
+ sandboxId: params.sandboxId,
169
+ files: [
170
+ {
171
+ path: dirs.manifestPath,
172
+ content: JSON.stringify(manifest, null, 2),
173
+ },
174
+ ],
175
+ });
176
+ return manifest;
177
+ }
178
+ function collectPartFiles(value, params) {
179
+ const record = asRecord(value);
180
+ if (!record)
181
+ return;
182
+ if (record.type === "file") {
183
+ pushFileRecord(record, params);
184
+ return;
185
+ }
186
+ const content = asRecord(record.content);
187
+ if (!content)
188
+ return;
189
+ if (Array.isArray(content.blocks)) {
190
+ for (const block of content.blocks) {
191
+ const blockRecord = asRecord(block);
192
+ if (blockRecord?.type === "file") {
193
+ pushFileRecord(blockRecord, params);
194
+ }
195
+ }
196
+ }
197
+ }
198
+ function pushFileRecord(record, params) {
199
+ const fileId = asText(record.fileId);
200
+ if (!fileId)
201
+ return;
202
+ params.files.push({
203
+ fileId,
204
+ filename: asText(record.filename),
205
+ mediaType: asText(record.mediaType),
206
+ role: "input",
207
+ sourceEventId: params.sourceEventId,
208
+ sourcePartIndex: params.sourcePartIndex,
209
+ });
210
+ }
211
+ function asRecord(value) {
212
+ return value && typeof value === "object" && !Array.isArray(value)
213
+ ? value
214
+ : null;
215
+ }
216
+ function asText(value) {
217
+ return typeof value === "string" && value.trim() ? value.trim() : undefined;
218
+ }
@@ -6,47 +6,5 @@ interface ExecuteCommandToolParams {
6
6
  export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): import("ai").Tool<{
7
7
  pythonCode: string;
8
8
  scriptName: string;
9
- }, {
10
- success: boolean;
11
- fatal: boolean;
12
- status: string;
13
- error: string;
14
- stdout: string;
15
- stderr: string;
16
- exitCode: number;
17
- scriptPath: string;
18
- stdoutTruncated: boolean;
19
- stderrTruncated: boolean;
20
- stdoutOriginalLength: number;
21
- stderrOriginalLength: number;
22
- message?: undefined;
23
- } | {
24
- success: boolean;
25
- exitCode: number;
26
- stdout: string;
27
- stderr: string;
28
- scriptPath: string;
29
- error: string;
30
- stdoutTruncated: boolean;
31
- stderrTruncated: boolean;
32
- stdoutOriginalLength: number;
33
- stderrOriginalLength: number;
34
- fatal?: undefined;
35
- status?: undefined;
36
- message?: undefined;
37
- } | {
38
- success: boolean;
39
- exitCode: number;
40
- stdout: string;
41
- stderr: string;
42
- scriptPath: string;
43
- message: string;
44
- stdoutTruncated: boolean;
45
- stderrTruncated: boolean;
46
- stdoutOriginalLength: number;
47
- stderrOriginalLength: number;
48
- fatal?: undefined;
49
- status?: undefined;
50
- error?: undefined;
51
- }>;
9
+ }, unknown>;
52
10
  export {};
@@ -2,6 +2,7 @@ import { tool } from "ai";
2
2
  import { z } from "zod";
3
3
  import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFilesStep } from "./sandbox/steps.js";
4
4
  import { getDatasetScriptsDir } from "./datasetFiles.js";
5
+ import { getContextExecutionWorkspaceDirs } from "./contextWorkspace.js";
5
6
  // To keep responses predictable for big data scenarios, we cap stdout/stderr.
6
7
  // The tool's return payload exposes stdout (capped) plus the on-disk script path.
7
8
  const MAX_STDOUT_CHARS = 20000;
@@ -29,10 +30,16 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
29
30
  pythonCode: z.string().describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
30
31
  scriptName: z.string().describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A deterministic suffix will be appended automatically."),
31
32
  }),
32
- execute: async ({ pythonCode, scriptName }) => {
33
+ execute: (async ({ pythonCode, scriptName }, actionContext) => {
33
34
  const normalizedScriptName = normalizeScriptName(scriptName);
34
35
  const scriptHash = stableScriptHash(`${normalizedScriptName}\0${pythonCode}`);
35
- const scriptFile = `${getDatasetScriptsDir(datasetId)}/${normalizedScriptName}-${scriptHash}.py`;
36
+ const scriptsDir = actionContext?.contextId && actionContext.executionId
37
+ ? getContextExecutionWorkspaceDirs({
38
+ contextId: actionContext.contextId,
39
+ executionId: actionContext.executionId,
40
+ }).scriptsDir
41
+ : getDatasetScriptsDir(datasetId);
42
+ const scriptFile = `${scriptsDir}/${normalizedScriptName}-${scriptHash}.py`;
36
43
  console.log(`[Dataset ${datasetId}] ========================================`);
37
44
  console.log(`[Dataset ${datasetId}] Tool: executeCommand`);
38
45
  console.log(`[Dataset ${datasetId}] Script: ${normalizedScriptName}`);
@@ -162,6 +169,6 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
162
169
  stderrOriginalLength: 0,
163
170
  };
164
171
  }
165
- },
172
+ }),
166
173
  });
167
174
  }
@@ -12,6 +12,8 @@ export declare function createFileParseContext<Env extends {
12
12
  sandboxState?: SandboxState;
13
13
  filePreview?: FileParseContext["filePreview"];
14
14
  schema?: any | null;
15
+ filename?: string;
16
+ mediaType?: string;
15
17
  }): {
16
18
  datasetId: string;
17
19
  parse(runtime: {
@@ -4,7 +4,7 @@ import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFa
4
4
  import { datasetGetByIdStep } from "../dataset/steps.js";
5
5
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
6
6
  import { createGenerateSchemaTool } from "./generateSchema.tool.js";
7
- import { buildFileDatasetPromptStep, generateFileParsePreviewStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
7
+ import { buildFileDatasetPromptStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
8
8
  import { createDatasetId } from "../id.js";
9
9
  async function awaitContextRun(run) {
10
10
  if (!run)
@@ -27,6 +27,15 @@ function createFileParseContextDefinition(params) {
27
27
  const fileId = previous?.fileId ?? params.fileId ?? "";
28
28
  const instructions = previous?.instructions ?? params.instructions ?? "";
29
29
  const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
30
+ const contextRun = runtime?.__ekairosContextRun ?? {};
31
+ const contextId = String(contextRun.contextId ?? stored?.id ?? "").trim();
32
+ const executionId = String(contextRun.executionId ?? previous?.executionId ?? "").trim();
33
+ const sourceEventId = String(previous?.sourceEventId ?? params.sourceEventId ?? "").trim();
34
+ const sourcePartIndex = typeof previous?.sourcePartIndex === "number"
35
+ ? previous.sourcePartIndex
36
+ : typeof params.sourcePartIndex === "number"
37
+ ? params.sourcePartIndex
38
+ : 0;
30
39
  if (!datasetId) {
31
40
  throw new Error("dataset_id_required");
32
41
  }
@@ -36,30 +45,29 @@ function createFileParseContextDefinition(params) {
36
45
  if (!sandboxId) {
37
46
  throw new Error("dataset_sandbox_required");
38
47
  }
48
+ if (!contextId) {
49
+ throw new Error("dataset_context_id_required");
50
+ }
51
+ if (!executionId) {
52
+ throw new Error("dataset_execution_id_required");
53
+ }
39
54
  const initialized = sandboxState.initialized && sandboxState.filePath
40
55
  ? { filePath: sandboxState.filePath, state: sandboxState }
41
56
  : await initializeFileParseSandboxStep({
42
57
  runtime,
43
58
  sandboxId,
59
+ contextId,
60
+ executionId,
44
61
  datasetId,
45
62
  fileId,
63
+ sourceEventId,
64
+ sourcePartIndex,
65
+ filename: previous?.filename ?? params.filename,
66
+ mediaType: previous?.mediaType ?? params.mediaType,
46
67
  state: sandboxState,
47
68
  });
48
69
  const sandboxFilePath = initialized.filePath;
49
70
  let filePreview = previous?.filePreview ?? previous?.ctx?.filePreview ?? params.filePreview;
50
- if (!filePreview) {
51
- try {
52
- filePreview = await generateFileParsePreviewStep({
53
- runtime,
54
- sandboxId,
55
- sandboxFilePath,
56
- datasetId,
57
- });
58
- }
59
- catch {
60
- // Preview is optional; parsing can still proceed from the file path.
61
- }
62
- }
63
71
  let schema = previous?.ctx?.schema ?? previous?.schema ?? params.schema ?? null;
64
72
  const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
65
73
  if (datasetResult.ok && datasetResult.data.schema) {
@@ -69,7 +77,12 @@ function createFileParseContextDefinition(params) {
69
77
  datasetId,
70
78
  fileId,
71
79
  instructions,
72
- sandboxConfig: { filePath: sandboxFilePath },
80
+ sandboxConfig: {
81
+ filePath: sandboxFilePath,
82
+ outputPath: initialized.state.outputPath,
83
+ scriptsDir: initialized.state.scriptsDir,
84
+ manifestPath: initialized.state.manifestPath,
85
+ },
73
86
  analysis: [],
74
87
  schema,
75
88
  plan: null,
@@ -84,6 +97,11 @@ function createFileParseContextDefinition(params) {
84
97
  fileId,
85
98
  instructions,
86
99
  sandboxId,
100
+ executionId,
101
+ sourceEventId,
102
+ sourcePartIndex,
103
+ filename: previous?.filename ?? params.filename,
104
+ mediaType: previous?.mediaType ?? params.mediaType,
87
105
  sandboxState: initialized.state,
88
106
  filePreview,
89
107
  ctx,
@@ -109,6 +127,7 @@ function createFileParseContextDefinition(params) {
109
127
  const datasetId = _stored?.content?.datasetId ?? fallbackDatasetId ?? "";
110
128
  const fileId = _stored?.content?.fileId ?? params.fileId ?? "";
111
129
  const sandboxId = _stored?.content?.sandboxId ?? params.sandboxId ?? "";
130
+ const outputPath = _stored?.content?.ctx?.sandboxConfig?.outputPath;
112
131
  if (!datasetId)
113
132
  throw new Error("dataset_id_required");
114
133
  if (!fileId)
@@ -125,6 +144,7 @@ function createFileParseContextDefinition(params) {
125
144
  datasetId,
126
145
  sandboxId,
127
146
  runtime,
147
+ outputPath,
128
148
  }),
129
149
  clearDataset: createClearDatasetTool({
130
150
  datasetId,
@@ -169,6 +189,8 @@ export function createFileParseContext(fileId, opts) {
169
189
  sandboxState: opts?.sandboxState,
170
190
  filePreview: opts?.filePreview,
171
191
  schema: opts?.schema,
192
+ filename: opts?.filename,
193
+ mediaType: opts?.mediaType,
172
194
  };
173
195
  const { context } = createFileParseContextDefinition(params);
174
196
  return {
@@ -185,9 +207,19 @@ export function createFileParseContext(fileId, opts) {
185
207
  type: "text",
186
208
  text: options.prompt ?? "generate a dataset for this file",
187
209
  },
210
+ {
211
+ type: "file",
212
+ fileId,
213
+ filename: opts?.filename ?? "source-file",
214
+ mediaType: opts?.mediaType ?? "application/octet-stream",
215
+ },
188
216
  ],
189
217
  },
190
218
  };
219
+ params.sourceEventId = triggerEvent.id;
220
+ params.sourcePartIndex = 1;
221
+ params.filename = opts?.filename ?? "source-file";
222
+ params.mediaType = opts?.mediaType ?? "application/octet-stream";
191
223
  const shell = await context.react(triggerEvent, {
192
224
  runtime: runtime,
193
225
  context: { key: `dataset:${datasetId}` },
@@ -203,6 +235,10 @@ export function createFileParseContext(fileId, opts) {
203
235
  ...(options.initialContent ?? {}),
204
236
  datasetId,
205
237
  fileId,
238
+ sourceEventId: triggerEvent.id,
239
+ sourcePartIndex: 1,
240
+ filename: opts?.filename ?? "source-file",
241
+ mediaType: opts?.mediaType ?? "application/octet-stream",
206
242
  instructions: opts?.instructions ?? "",
207
243
  sandboxId: opts?.sandboxId ?? "",
208
244
  sandboxState: opts?.sandboxState ?? { initialized: false, filePath: "" },
@@ -3,8 +3,14 @@ import type { FilePreviewContext } from "./filepreview.types.js";
3
3
  export declare function initializeFileParseSandboxStep(params: {
4
4
  runtime: any;
5
5
  sandboxId: string;
6
+ contextId: string;
7
+ executionId: string;
6
8
  datasetId: string;
7
9
  fileId: string;
10
+ sourceEventId?: string;
11
+ sourcePartIndex?: number;
12
+ filename?: string;
13
+ mediaType?: string;
8
14
  state: SandboxState;
9
15
  }): Promise<{
10
16
  filePath: string;
@@ -1,42 +1,39 @@
1
- import { getDatasetSourcesDir, getDatasetStandardDirs, getDatasetWorkstation, } from "../datasetFiles.js";
2
- import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
1
+ import { DATASET_OUTPUT_FILE_NAME } from "../datasetFiles.js";
2
+ import { prepareContextExecutionWorkspaceStep } from "../contextWorkspace.js";
3
3
  import { buildFileDatasetPrompt } from "./prompts.js";
4
4
  import { generateFilePreview } from "./filepreview.js";
5
- import { readInstantFileStep } from "./steps.js";
6
5
  export async function initializeFileParseSandboxStep(params) {
7
6
  "use step";
8
7
  if (params.state.initialized) {
9
8
  return { filePath: params.state.filePath, state: params.state };
10
9
  }
11
- console.log(`[FileParseContext ${params.datasetId}] Preparing source file in sandbox...`);
12
- console.log(`[FileParseContext ${params.datasetId}] Fetching file from InstantDB...`);
13
- const file = await readInstantFileStep({ runtime: params.runtime, fileId: params.fileId });
14
- console.log(`[FileParseContext ${params.datasetId}] Creating dataset workstation...`);
15
- const workstation = getDatasetWorkstation(params.datasetId);
16
- await runDatasetSandboxCommandStep({
17
- runtime: params.runtime,
18
- sandboxId: params.sandboxId,
19
- cmd: "mkdir",
20
- args: ["-p", ...getDatasetStandardDirs(params.datasetId)],
21
- });
22
- const fileName = file.contentDisposition ?? "";
23
- const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
24
- const sandboxFilePath = `${getDatasetSourcesDir(params.datasetId)}/${params.fileId}${fileExtension}`;
25
- await writeDatasetSandboxFilesStep({
10
+ console.log(`[FileParseContext ${params.datasetId}] Preparing context execution workspace...`);
11
+ const workspace = await prepareContextExecutionWorkspaceStep({
26
12
  runtime: params.runtime,
27
13
  sandboxId: params.sandboxId,
14
+ contextId: params.contextId,
15
+ executionId: params.executionId,
28
16
  files: [
29
17
  {
30
- path: sandboxFilePath,
31
- contentBase64: file.contentBase64,
18
+ fileId: params.fileId,
19
+ filename: params.filename,
20
+ mediaType: params.mediaType,
21
+ sourceEventId: params.sourceEventId,
22
+ sourcePartIndex: params.sourcePartIndex,
32
23
  },
33
24
  ],
34
25
  });
35
- console.log(`[FileParseContext ${params.datasetId}] Workstation created: ${workstation}`);
26
+ const sandboxFilePath = workspace.files[0]?.path ?? "";
27
+ if (!sandboxFilePath)
28
+ throw new Error("dataset_workspace_file_missing");
29
+ console.log(`[FileParseContext ${params.datasetId}] Context workspace created: ${workspace.root}`);
36
30
  console.log(`[FileParseContext ${params.datasetId}] File saved: ${sandboxFilePath}`);
37
31
  const state = {
38
32
  initialized: true,
39
33
  filePath: sandboxFilePath,
34
+ outputPath: `${workspace.outputDir}/${DATASET_OUTPUT_FILE_NAME}`,
35
+ scriptsDir: workspace.scriptsDir,
36
+ manifestPath: workspace.manifestPath,
40
37
  };
41
38
  return { filePath: sandboxFilePath, state };
42
39
  }
@@ -3,6 +3,9 @@ import type { FilePreviewContext } from "./filepreview.types.js";
3
3
  export type SandboxState = {
4
4
  initialized: boolean;
5
5
  filePath: string;
6
+ outputPath?: string;
7
+ scriptsDir?: string;
8
+ manifestPath?: string;
6
9
  };
7
10
  export type FileParseContext = {
8
11
  datasetId: string;
@@ -10,6 +13,9 @@ export type FileParseContext = {
10
13
  instructions: string;
11
14
  sandboxConfig: {
12
15
  filePath: string;
16
+ outputPath?: string;
17
+ scriptsDir?: string;
18
+ manifestPath?: string;
13
19
  };
14
20
  analysis: any[];
15
21
  schema: any | null;
@@ -29,6 +35,10 @@ export type FileParseContextParams = {
29
35
  sandboxState?: SandboxState;
30
36
  filePreview?: FilePreviewContext;
31
37
  schema?: any | null;
38
+ sourceEventId?: string;
39
+ sourcePartIndex?: number;
40
+ filename?: string;
41
+ mediaType?: string;
32
42
  };
33
43
  export type FileParseRunOptions = {
34
44
  prompt?: string;
@@ -223,8 +223,10 @@ function buildSchemaSection(context) {
223
223
  return xml.end({ prettyPrint: true, headless: true });
224
224
  }
225
225
  function buildInstructions(context) {
226
- const datasetWorkstation = getDatasetWorkstation(context.datasetId);
227
- const outputPath = getDatasetOutputPath(context.datasetId);
226
+ const datasetWorkstation = context.sandboxConfig.scriptsDir
227
+ ? context.sandboxConfig.scriptsDir.replace(/\/scripts$/, "")
228
+ : getDatasetWorkstation(context.datasetId);
229
+ const outputPath = context.sandboxConfig.outputPath ?? getDatasetOutputPath(context.datasetId);
228
230
  const hasProvidedSchema = Boolean(context.schema?.schema);
229
231
  const currentTask = hasProvidedSchema
230
232
  ? "Review FilePreview section, use the provided schema as the output contract, then parse the file and generate the dataset"
package/dist/index.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  export * from "./dataset.js";
2
+ export * from "./contextWorkspace.js";
2
3
  export * from "./domain.js";
3
4
  export * from "./materializeDataset.tool.js";
4
5
  export * from "./schema.js";
package/dist/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  export * from "./dataset.js";
2
+ export * from "./contextWorkspace.js";
2
3
  export * from "./domain.js";
3
4
  export * from "./materializeDataset.tool.js";
4
5
  export * from "./schema.js";
package/dist/service.d.ts CHANGED
@@ -64,6 +64,7 @@ export declare class DatasetService {
64
64
  uploadDatasetOutputFile(params: {
65
65
  datasetId: string;
66
66
  fileBuffer: Buffer;
67
+ storagePath?: string;
67
68
  }): Promise<ServiceResult<{
68
69
  fileId: string;
69
70
  storagePath: string;
package/dist/service.js CHANGED
@@ -308,7 +308,7 @@ export class DatasetService {
308
308
  }
309
309
  async uploadDatasetOutputFile(params) {
310
310
  try {
311
- const storagePath = `/dataset/${params.datasetId}/output.jsonl`;
311
+ const storagePath = params.storagePath ?? `/dataset/${params.datasetId}/output.jsonl`;
312
312
  const uploadResult = await this.db.storage.uploadFile(storagePath, params.fileBuffer, {
313
313
  contentType: "application/x-ndjson",
314
314
  contentDisposition: "output.jsonl",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.81-beta.development.0",
3
+ "version": "1.22.83-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.81-beta.development.0",
69
- "@ekairos/events": "^1.22.81-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.81-beta.development.0",
68
+ "@ekairos/domain": "^1.22.83-beta.development.0",
69
+ "@ekairos/events": "^1.22.83-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.83-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",
@@ -80,10 +80,10 @@
80
80
  "@ekairos/openai-reactor": "workspace:*",
81
81
  "@ekairos/tsconfig": "workspace:*",
82
82
  "@types/node": "^24.5.0",
83
- "@workflow/serde": "5.0.0-beta.0",
84
- "@workflow/vitest": "5.0.0-beta.1",
83
+ "@workflow/serde": "5.0.0-beta.1",
84
+ "@workflow/vitest": "5.0.0-beta.5",
85
85
  "dotenv": "^17.2.3",
86
86
  "typescript": "^5.9.2",
87
- "workflow": "5.0.0-beta.1"
87
+ "workflow": "5.0.0-beta.5"
88
88
  }
89
89
  }