@ekairos/dataset 1.22.85-beta.development.0 → 1.22.87-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,151 @@
1
+ import { datasetReadOutputJsonlStep } from "./dataset/steps.js";
2
+ import { readInstantFileStep } from "./file/steps.js";
3
+ import { getContextResourcesDir, sanitizeContextWorkspacePathSegment, } from "./contextWorkspace.js";
4
+ import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
5
+ function asRecord(value) {
6
+ return value && typeof value === "object" && !Array.isArray(value)
7
+ ? value
8
+ : null;
9
+ }
10
+ function asString(value) {
11
+ return typeof value === "string" ? value.trim() : "";
12
+ }
13
+ function contentFileName(resource) {
14
+ const filename = asString(resource.filename);
15
+ if (filename) {
16
+ return sanitizeContextWorkspacePathSegment(filename, "resource");
17
+ }
18
+ if (resource.type === "dataset")
19
+ return "resource.jsonl";
20
+ if (resource.type === "text")
21
+ return "resource.txt";
22
+ return "resource";
23
+ }
24
+ function selectResources(resources, resourceKeys) {
25
+ const requested = new Set(Array.isArray(resourceKeys)
26
+ ? resourceKeys.map((key) => String(key).trim()).filter(Boolean)
27
+ : []);
28
+ if (requested.size === 0)
29
+ return resources;
30
+ return resources.filter((resource) => requested.has(resource.key));
31
+ }
32
+ export async function materializeContextResourcesStep(params) {
33
+ "use step";
34
+ const resourcesDir = getContextResourcesDir({ contextId: params.contextId });
35
+ const manifestPath = `${resourcesDir}/manifest.json`;
36
+ const selectedResources = selectResources(params.resources, params.resourceKeys);
37
+ const resourceDirs = selectedResources.map((resource, index) => {
38
+ const segment = sanitizeContextWorkspacePathSegment(resource.key, `resource_${index + 1}`);
39
+ return `${resourcesDir}/${segment}`;
40
+ });
41
+ await runDatasetSandboxCommandStep({
42
+ runtime: params.runtime,
43
+ sandboxId: params.sandboxId,
44
+ cmd: "mkdir",
45
+ args: ["-p", resourcesDir, ...resourceDirs],
46
+ });
47
+ const materialized = [];
48
+ for (let index = 0; index < selectedResources.length; index++) {
49
+ const resource = selectedResources[index];
50
+ const resourceDir = resourceDirs[index];
51
+ const metadataPath = `${resourceDir}/metadata.json`;
52
+ const files = [];
53
+ let status = "metadata_only";
54
+ let reason;
55
+ if (resource.type === "file" && asString(resource.fileId)) {
56
+ const file = await readInstantFileStep({
57
+ runtime: params.runtime,
58
+ fileId: asString(resource.fileId),
59
+ });
60
+ const path = `${resourceDir}/${contentFileName(resource)}`;
61
+ await writeDatasetSandboxFilesStep({
62
+ runtime: params.runtime,
63
+ sandboxId: params.sandboxId,
64
+ files: [{ path, contentBase64: file.contentBase64 }],
65
+ });
66
+ files.push({ path, role: "content", mediaType: asString(resource.mediaType) || undefined });
67
+ status = "materialized";
68
+ }
69
+ else if (resource.type === "dataset" && asString(resource.datasetId)) {
70
+ const datasetId = asString(resource.datasetId);
71
+ const content = await datasetReadOutputJsonlStep({
72
+ runtime: params.runtime,
73
+ datasetId,
74
+ });
75
+ const path = `${resourceDir}/resource.jsonl`;
76
+ await writeDatasetSandboxFilesStep({
77
+ runtime: params.runtime,
78
+ sandboxId: params.sandboxId,
79
+ files: [{ path, contentBase64: content.contentBase64 }],
80
+ });
81
+ files.push({ path, role: "content", mediaType: "application/x-ndjson" });
82
+ status = "materialized";
83
+ }
84
+ else if (resource.type === "text" && typeof resource.text === "string") {
85
+ const path = `${resourceDir}/${contentFileName(resource)}`;
86
+ await writeDatasetSandboxTextFilesStep({
87
+ runtime: params.runtime,
88
+ sandboxId: params.sandboxId,
89
+ files: [{ path, content: String(resource.text) }],
90
+ });
91
+ files.push({
92
+ path,
93
+ role: "content",
94
+ mediaType: asString(resource.mimeType) || "text/plain",
95
+ });
96
+ status = "materialized";
97
+ }
98
+ else {
99
+ reason =
100
+ resource.type === "file"
101
+ ? "file resource has no fileId"
102
+ : resource.type === "dataset"
103
+ ? "dataset resource has no datasetId"
104
+ : resource.type === "link" || resource.type === "repository" || resource.type === "external"
105
+ ? `${resource.type} resources are metadata-only until an approved adapter materializes them`
106
+ : "resource type is metadata-only";
107
+ }
108
+ const metadata = {
109
+ ...(asRecord(resource) ?? {}),
110
+ key: resource.key,
111
+ type: resource.type,
112
+ name: resource.name,
113
+ description: resource.description,
114
+ materialized: {
115
+ status,
116
+ reason,
117
+ dir: resourceDir,
118
+ files,
119
+ },
120
+ };
121
+ await writeDatasetSandboxTextFilesStep({
122
+ runtime: params.runtime,
123
+ sandboxId: params.sandboxId,
124
+ files: [{ path: metadataPath, content: JSON.stringify(metadata, null, 2) }],
125
+ });
126
+ materialized.push({
127
+ key: resource.key,
128
+ type: resource.type,
129
+ name: resource.name,
130
+ description: resource.description,
131
+ dir: resourceDir,
132
+ metadataPath,
133
+ files,
134
+ status,
135
+ reason,
136
+ });
137
+ }
138
+ const manifest = {
139
+ contextId: params.contextId,
140
+ sandboxId: params.sandboxId,
141
+ resourcesDir,
142
+ manifestPath,
143
+ resources: materialized,
144
+ };
145
+ await writeDatasetSandboxTextFilesStep({
146
+ runtime: params.runtime,
147
+ sandboxId: params.sandboxId,
148
+ files: [{ path: manifestPath, content: JSON.stringify(manifest, null, 2) }],
149
+ });
150
+ return manifest;
151
+ }
@@ -23,12 +23,14 @@ export type PreparedContextExecutionWorkspace = {
23
23
  root: string;
24
24
  contextRoot: string;
25
25
  eventsDir: string;
26
+ resourcesDir: string;
26
27
  outputDir: string;
27
28
  scriptsDir: string;
28
29
  tmpDir: string;
29
30
  manifestPath: string;
30
31
  files: PreparedContextWorkspaceFile[];
31
32
  };
33
+ export declare function sanitizeContextWorkspacePathSegment(value: string, fallback: string): string;
32
34
  export declare function getContextWorkspaceBase(): string;
33
35
  export declare function getContextExecutionWorkspaceRoot(params: {
34
36
  contextId: string;
@@ -43,6 +45,10 @@ export declare function getContextEventsDir(params: {
43
45
  contextId: string;
44
46
  root?: string;
45
47
  }): string;
48
+ export declare function getContextResourcesDir(params: {
49
+ contextId: string;
50
+ root?: string;
51
+ }): string;
46
52
  export declare function getContextExecutionWorkspaceDirs(params: {
47
53
  contextId: string;
48
54
  executionId: string;
@@ -51,6 +57,7 @@ export declare function getContextExecutionWorkspaceDirs(params: {
51
57
  root: string;
52
58
  contextRoot: string;
53
59
  eventsDir: string;
60
+ resourcesDir: string;
54
61
  outputDir: string;
55
62
  scriptsDir: string;
56
63
  tmpDir: string;
@@ -17,6 +17,9 @@ function sanitizePathSegment(value, fallback) {
17
17
  .slice(0, 160);
18
18
  return normalized || fallback;
19
19
  }
20
+ export function sanitizeContextWorkspacePathSegment(value, fallback) {
21
+ return sanitizePathSegment(value, fallback);
22
+ }
20
23
  function filenameFromContentDisposition(value, fallback) {
21
24
  const raw = String(value ?? "").trim();
22
25
  if (!raw)
@@ -57,14 +60,19 @@ export function getContextWorkspaceRoot(params) {
57
60
  export function getContextEventsDir(params) {
58
61
  return `${getContextWorkspaceRoot(params)}/events`;
59
62
  }
63
+ export function getContextResourcesDir(params) {
64
+ return `${getContextWorkspaceRoot(params)}/resources`;
65
+ }
60
66
  export function getContextExecutionWorkspaceDirs(params) {
61
67
  const root = getContextExecutionWorkspaceRoot(params);
62
68
  const contextRoot = getContextWorkspaceRoot(params);
63
69
  const eventsDir = getContextEventsDir(params);
70
+ const resourcesDir = getContextResourcesDir(params);
64
71
  return {
65
72
  root,
66
73
  contextRoot,
67
74
  eventsDir,
75
+ resourcesDir,
68
76
  outputDir: `${root}/output`,
69
77
  scriptsDir: `${root}/scripts`,
70
78
  tmpDir: `${root}/tmp`,
@@ -73,7 +81,15 @@ export function getContextExecutionWorkspaceDirs(params) {
73
81
  }
74
82
  export function getContextExecutionWorkspaceStandardDirs(params) {
75
83
  const dirs = getContextExecutionWorkspaceDirs(params);
76
- return [dirs.contextRoot, dirs.eventsDir, dirs.root, dirs.outputDir, dirs.scriptsDir, dirs.tmpDir];
84
+ return [
85
+ dirs.contextRoot,
86
+ dirs.eventsDir,
87
+ dirs.resourcesDir,
88
+ dirs.root,
89
+ dirs.outputDir,
90
+ dirs.scriptsDir,
91
+ dirs.tmpDir,
92
+ ];
77
93
  }
78
94
  export function extractContextWorkspaceFilesFromEventItems(eventItems) {
79
95
  const files = [];
@@ -1,6 +1,7 @@
1
1
  import { DatasetService } from "../service.js";
2
2
  import { datasetDomain } from "../schema.js";
3
3
  import { inferDatasetSchema } from "../builder/schemaInference.js";
4
+ import { rowsToJsonl } from "../builder/rows.js";
4
5
  export async function getDatasetRuntimeDb(runtime) {
5
6
  if (!runtime) {
6
7
  throw new Error("Dataset step requires runtime.");
@@ -26,6 +27,7 @@ export async function datasetGetByIdStep(params) {
26
27
  export async function datasetReadOutputJsonlStep(params) {
27
28
  "use step";
28
29
  const db = await getDatasetRuntimeDb(params.runtime);
30
+ const service = new DatasetService(db);
29
31
  for (let attempt = 1; attempt <= 20; attempt++) {
30
32
  const query = await db.query({
31
33
  dataset_datasets: {
@@ -40,6 +42,16 @@ export async function datasetReadOutputJsonlStep(params) {
40
42
  const fileBuffer = await fetch(url).then((r) => r.arrayBuffer());
41
43
  return { contentBase64: Buffer.from(fileBuffer).toString("base64") };
42
44
  }
45
+ const directRows = await service.readRows({
46
+ datasetId: params.datasetId,
47
+ cursor: 0,
48
+ limit: 100000,
49
+ });
50
+ if (directRows.ok && directRows.data.rows.length > 0) {
51
+ return {
52
+ contentBase64: Buffer.from(rowsToJsonl(directRows.data.rows), "utf-8").toString("base64"),
53
+ };
54
+ }
43
55
  await new Promise((resolve) => setTimeout(resolve, 250 * attempt));
44
56
  }
45
57
  throw new Error("Dataset output file not found");
package/dist/dataset.js CHANGED
@@ -118,6 +118,7 @@ export function dataset(runtime, options = {}) {
118
118
  const context = await resolveDatasetResourceContext(typedRuntime, targetDatasetId, stateWithBuildOptions.resources);
119
119
  stateWithBuildOptions.resources = context.resources;
120
120
  stateWithBuildOptions.contextId = context.contextId;
121
+ stateWithBuildOptions.contextResources = context.contextResources;
121
122
  const effectiveState = stateWithBuildOptions.output === "object"
122
123
  ? {
123
124
  ...stateWithBuildOptions,
@@ -3,8 +3,5 @@ interface ExecuteCommandToolParams {
3
3
  sandboxId: string;
4
4
  runtime: any;
5
5
  }
6
- export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): import("ai").Tool<{
7
- pythonCode: string;
8
- scriptName: string;
9
- }, unknown>;
6
+ export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): any;
10
7
  export {};
@@ -1,10 +1,9 @@
1
- import { tool } from "ai";
1
+ import { defineAction } from "@ekairos/events";
2
2
  import { z } from "zod";
3
- import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFilesStep } from "./sandbox/steps.js";
4
- import { getDatasetScriptsDir } from "./datasetFiles.js";
3
+ import { materializeContextResourcesStep } from "./contextResources.js";
4
+ import { getDatasetScriptsDir, getDatasetStandardDirs } from "./datasetFiles.js";
5
5
  import { getContextExecutionWorkspaceDirs } from "./contextWorkspace.js";
6
- // To keep responses predictable for big data scenarios, we cap stdout/stderr.
7
- // The tool's return payload exposes stdout (capped) plus the on-disk script path.
6
+ import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
8
7
  const MAX_STDOUT_CHARS = 20000;
9
8
  const MAX_STDERR_CHARS = 5000;
10
9
  function normalizeScriptName(scriptName) {
@@ -23,39 +22,96 @@ function stableScriptHash(value) {
23
22
  }
24
23
  return (hash >>> 0).toString(36);
25
24
  }
25
+ const executeCommandInputSchema = z.object({
26
+ commandDescription: z
27
+ .string()
28
+ .min(1)
29
+ .describe("Required pre-execution description of the command. Describe the inputs/resources it will use, the operation it will perform, the expected output, and why a command is the right tool instead of direct completion. Invalid descriptions include rereading resources whose descriptor/preview already contains the needed evidence, merely formatting JSON, constructing the final object, writing output.jsonl, or making completion easier."),
30
+ pythonCode: z
31
+ .string()
32
+ .describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. If context resources are materialized, read os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] to discover files and metadata. Do not install packages, download dependencies, use pip/npm/apt/curl/wget, or access the network. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
33
+ scriptName: z
34
+ .string()
35
+ .describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A deterministic suffix will be appended automatically."),
36
+ resourceKeys: z
37
+ .array(z.string())
38
+ .optional()
39
+ .describe("Optional context resource keys to materialize before running the script. Omit to materialize every context resource."),
40
+ });
41
+ const materializedResourceSchema = z.object({
42
+ key: z.string(),
43
+ type: z.string(),
44
+ status: z.string(),
45
+ dir: z.string(),
46
+ files: z.array(z.object({
47
+ path: z.string(),
48
+ role: z.string(),
49
+ mediaType: z.string().optional(),
50
+ })),
51
+ reason: z.string().optional(),
52
+ });
53
+ const executeCommandOutputSchema = z
54
+ .object({
55
+ success: z.boolean(),
56
+ fatal: z.boolean().optional(),
57
+ status: z.string().optional(),
58
+ exitCode: z.number().optional(),
59
+ stdout: z.string(),
60
+ stderr: z.string(),
61
+ scriptPath: z.string(),
62
+ message: z.string().optional(),
63
+ error: z.string().optional(),
64
+ resourcesDir: z.string().optional(),
65
+ resourcesManifestPath: z.string().optional(),
66
+ materializedResources: z.array(materializedResourceSchema).optional(),
67
+ stdoutTruncated: z.boolean(),
68
+ stderrTruncated: z.boolean(),
69
+ stdoutOriginalLength: z.number(),
70
+ stderrOriginalLength: z.number(),
71
+ })
72
+ .passthrough();
26
73
  export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
27
- return tool({
28
- description: "Execute Python scripts in the sandbox. Always saves script to a file before executing. The tool's output is EXACTLY the script's stdout and includes the script file path for traceability. CRITICAL: Print concise, human-readable summaries only; do NOT print raw large data. For big results, write artifacts to files in the workstation and print their file paths. Always include progress/result prints (e.g., 'Processing file X...', 'Found Y records', 'Generated output.csv').",
29
- inputSchema: z.object({
30
- pythonCode: z.string().describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
31
- scriptName: z.string().describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A deterministic suffix will be appended automatically."),
32
- }),
33
- execute: (async ({ pythonCode, scriptName }, actionContext) => {
74
+ return defineAction({
75
+ description: "Execute Python scripts in the sandbox only when command execution is necessary to inspect, parse, aggregate, join, or compute over context resources that are not sufficiently represented in the visible context, resource descriptors, or previews. This is a high-cost computation tool, not a completion tool. Do not use it merely to reread resources whose descriptor/preview already contains the needed evidence, format JSON, build the final object, write output.jsonl, or make completion easier when completeObject or replaceRows can return the result directly. Before the script runs, requested context resources are materialized into /tmp/ekairos/contexts/{contextId}/resources and a manifest.json is written there. The Python process receives EKAIROS_CONTEXT_RESOURCES_DIR and EKAIROS_CONTEXT_RESOURCES_MANIFEST environment variables when resources are available; manifest entries expose files as resources[].files[].path. Do not install packages, download dependencies, use pip/npm/apt/curl/wget, or access the network; use only the available runtime and standard library unless a dependency is already present. Print concise progress and results only; do not dump large data.",
76
+ input: executeCommandInputSchema,
77
+ output: executeCommandOutputSchema,
78
+ execute: async ({ input, context, contextId, executionId, }) => {
79
+ const { commandDescription, pythonCode, resourceKeys, scriptName } = input;
34
80
  const normalizedScriptName = normalizeScriptName(scriptName);
35
81
  const scriptHash = stableScriptHash(`${normalizedScriptName}\0${pythonCode}`);
36
- const scriptsDir = actionContext?.contextId && actionContext.executionId
37
- ? getContextExecutionWorkspaceDirs({
38
- contextId: actionContext.contextId,
39
- executionId: actionContext.executionId,
40
- }).scriptsDir
82
+ const scriptsDir = contextId && executionId
83
+ ? getContextExecutionWorkspaceDirs({ contextId, executionId }).scriptsDir
41
84
  : getDatasetScriptsDir(datasetId);
42
85
  const scriptFile = `${scriptsDir}/${normalizedScriptName}-${scriptHash}.py`;
86
+ let resourcesManifest = null;
43
87
  console.log(`[Dataset ${datasetId}] ========================================`);
44
- console.log(`[Dataset ${datasetId}] Tool: executeCommand`);
88
+ console.log(`[Dataset ${datasetId}] Action: executeCommand`);
89
+ console.log(`[Dataset ${datasetId}] Description: ${commandDescription}`);
45
90
  console.log(`[Dataset ${datasetId}] Script: ${normalizedScriptName}`);
46
91
  console.log(`[Dataset ${datasetId}] File: ${scriptFile}`);
47
92
  console.log(`[Dataset ${datasetId}] Code length: ${pythonCode.length} chars`);
48
93
  console.log(`[Dataset ${datasetId}] ========================================`);
49
94
  try {
95
+ if (contextId && Array.isArray(context?.resources) && context.resources.length > 0) {
96
+ resourcesManifest = await materializeContextResourcesStep({
97
+ runtime,
98
+ sandboxId,
99
+ contextId,
100
+ resources: context.resources,
101
+ resourceKeys,
102
+ });
103
+ console.log(`[Dataset ${datasetId}] Resources manifest: ${resourcesManifest.manifestPath}`);
104
+ }
105
+ await runDatasetSandboxCommandStep({
106
+ runtime,
107
+ sandboxId,
108
+ cmd: "mkdir",
109
+ args: ["-p", ...getDatasetStandardDirs(datasetId), scriptsDir],
110
+ });
50
111
  await writeDatasetSandboxTextFilesStep({
51
112
  runtime,
52
113
  sandboxId,
53
- files: [
54
- {
55
- path: scriptFile,
56
- content: pythonCode,
57
- },
58
- ],
114
+ files: [{ path: scriptFile, content: pythonCode }],
59
115
  });
60
116
  const written = await runDatasetSandboxCommandStep({
61
117
  runtime,
@@ -76,19 +132,33 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
76
132
  stderr: written.stderr || "",
77
133
  exitCode: written.exitCode,
78
134
  scriptPath: scriptFile,
135
+ resourcesDir: resourcesManifest?.resourcesDir,
136
+ resourcesManifestPath: resourcesManifest?.manifestPath,
137
+ materializedResources: resourcesManifest?.resources,
79
138
  stdoutTruncated: false,
80
139
  stderrTruncated: false,
81
140
  stdoutOriginalLength: 0,
82
141
  stderrOriginalLength: 0,
83
142
  };
84
143
  }
144
+ const pythonArgs = resourcesManifest
145
+ ? [
146
+ "-c",
147
+ [
148
+ "import os, runpy",
149
+ `os.environ["EKAIROS_CONTEXT_RESOURCES_DIR"] = ${JSON.stringify(resourcesManifest.resourcesDir)}`,
150
+ `os.environ["EKAIROS_CONTEXT_RESOURCES_MANIFEST"] = ${JSON.stringify(resourcesManifest.manifestPath)}`,
151
+ `runpy.run_path(${JSON.stringify(scriptFile)}, run_name="__main__")`,
152
+ ].join("; "),
153
+ ]
154
+ : [scriptFile];
85
155
  console.log(`[Dataset ${datasetId}] Script written to: ${scriptFile}`);
86
- console.log(`[Dataset ${datasetId}] Executing: python ${scriptFile}`);
156
+ console.log(`[Dataset ${datasetId}] Executing: python ${resourcesManifest ? "<with context resources env>" : scriptFile}`);
87
157
  const result = await runDatasetSandboxCommandStep({
88
158
  runtime,
89
159
  sandboxId,
90
160
  cmd: "python",
91
- args: [scriptFile],
161
+ args: pythonArgs,
92
162
  });
93
163
  const stdout = result.stdout || "";
94
164
  const stderr = result.stderr || "";
@@ -98,7 +168,7 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
98
168
  const stdoutCapped = isStdoutTruncated ? stdout.slice(0, MAX_STDOUT_CHARS) : stdout;
99
169
  const stderrCapped = isStderrTruncated ? stderr.slice(0, MAX_STDERR_CHARS) : stderr;
100
170
  if (exitCode !== 0) {
101
- console.error(`[Dataset ${datasetId}] Command failed with exit code ${exitCode}`);
171
+ console.error(`[Dataset ${datasetId}] Command failed with exit code ${exitCode}`);
102
172
  console.error(`[Dataset ${datasetId}] Stderr:`, stderrCapped.substring(0, 500));
103
173
  console.error(`[Dataset ${datasetId}] ========================================`);
104
174
  return {
@@ -108,6 +178,9 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
108
178
  stderr: stderrCapped,
109
179
  scriptPath: scriptFile,
110
180
  error: `Command failed with exit code ${exitCode}`,
181
+ resourcesDir: resourcesManifest?.resourcesDir,
182
+ resourcesManifestPath: resourcesManifest?.manifestPath,
183
+ materializedResources: resourcesManifest?.resources,
111
184
  stdoutTruncated: isStdoutTruncated,
112
185
  stderrTruncated: isStderrTruncated,
113
186
  stdoutOriginalLength: stdout.length,
@@ -115,7 +188,7 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
115
188
  };
116
189
  }
117
190
  if (stderr && (stderr.includes("Traceback") || stderr.toLowerCase().includes("error"))) {
118
- console.error(`[Dataset ${datasetId}] Python error detected`);
191
+ console.error(`[Dataset ${datasetId}] Python error detected`);
119
192
  console.error(`[Dataset ${datasetId}] Stderr:`, stderrCapped.substring(0, 500));
120
193
  console.error(`[Dataset ${datasetId}] ========================================`);
121
194
  return {
@@ -125,17 +198,20 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
125
198
  stderr: stderrCapped,
126
199
  scriptPath: scriptFile,
127
200
  error: "Python error detected in stderr",
201
+ resourcesDir: resourcesManifest?.resourcesDir,
202
+ resourcesManifestPath: resourcesManifest?.manifestPath,
203
+ materializedResources: resourcesManifest?.resources,
128
204
  stdoutTruncated: isStdoutTruncated,
129
205
  stderrTruncated: isStderrTruncated,
130
206
  stdoutOriginalLength: stdout.length,
131
207
  stderrOriginalLength: stderr.length,
132
208
  };
133
209
  }
134
- console.log(`[Dataset ${datasetId}] Command executed successfully`);
210
+ console.log(`[Dataset ${datasetId}] Command executed successfully`);
135
211
  if (stdout) {
136
212
  console.log(`[Dataset ${datasetId}] Output length: ${stdout.length} chars`);
137
213
  if (isStdoutTruncated) {
138
- console.log(`[Dataset ${datasetId}] ⚠️ Stdout truncated to ${MAX_STDOUT_CHARS} chars`);
214
+ console.log(`[Dataset ${datasetId}] Stdout truncated to ${MAX_STDOUT_CHARS} chars`);
139
215
  }
140
216
  }
141
217
  console.log(`[Dataset ${datasetId}] ========================================`);
@@ -146,6 +222,9 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
146
222
  stderr: stderrCapped,
147
223
  scriptPath: scriptFile,
148
224
  message: "Command executed successfully",
225
+ resourcesDir: resourcesManifest?.resourcesDir,
226
+ resourcesManifestPath: resourcesManifest?.manifestPath,
227
+ materializedResources: resourcesManifest?.resources,
149
228
  stdoutTruncated: isStdoutTruncated,
150
229
  stderrTruncated: isStderrTruncated,
151
230
  stdoutOriginalLength: stdout.length,
@@ -163,12 +242,15 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
163
242
  stderr: "",
164
243
  exitCode: -1,
165
244
  scriptPath: scriptFile,
245
+ resourcesDir: resourcesManifest?.resourcesDir,
246
+ resourcesManifestPath: resourcesManifest?.manifestPath,
247
+ materializedResources: resourcesManifest?.resources,
166
248
  stdoutTruncated: false,
167
249
  stderrTruncated: false,
168
250
  stdoutOriginalLength: 0,
169
251
  stderrOriginalLength: 0,
170
252
  };
171
253
  }
172
- }),
254
+ },
173
255
  });
174
256
  }
@@ -30,8 +30,10 @@ async function runLocalSandboxCommand(params) {
30
30
  const cmd = String(params.cmd ?? "").trim();
31
31
  const args = params.args ?? [];
32
32
  if (cmd === "mkdir") {
33
- const target = args[args.length - 1];
34
- await fs.mkdir(String(target ?? ""), { recursive: true });
33
+ const targets = args.filter((arg) => !String(arg).startsWith("-"));
34
+ for (const target of targets) {
35
+ await fs.mkdir(String(target ?? ""), { recursive: true });
36
+ }
35
37
  return { exitCode: 0, stdout: "", stderr: "" };
36
38
  }
37
39
  if (cmd === "rm") {
package/dist/service.d.ts CHANGED
@@ -74,6 +74,10 @@ export declare class DatasetService {
74
74
  storagePath: string;
75
75
  }): Promise<ServiceResult<void>>;
76
76
  readRecordsFromFile(datasetId: string): Promise<ServiceResult<AsyncGenerator<any, void, unknown>>>;
77
+ readLinkedRecords(datasetId: string): Promise<ServiceResult<Array<{
78
+ rowContent: any;
79
+ order: number;
80
+ }>>>;
77
81
  previewRows(datasetId: string, limit?: number): Promise<ServiceResult<any[]>>;
78
82
  readRows(params: {
79
83
  datasetId: string;
package/dist/service.js CHANGED
@@ -432,8 +432,51 @@ export class DatasetService {
432
432
  return { ok: false, error: message };
433
433
  }
434
434
  }
435
+ async readLinkedRecords(datasetId) {
436
+ try {
437
+ const query = await this.db.query({
438
+ dataset_datasets: {
439
+ $: {
440
+ where: { datasetId },
441
+ limit: 1,
442
+ },
443
+ records: {},
444
+ },
445
+ });
446
+ const datasetRecord = query.dataset_datasets?.[0];
447
+ if (!datasetRecord) {
448
+ return { ok: false, error: `Dataset not found with id: ${datasetId}` };
449
+ }
450
+ const linkedRecords = Array.isArray(datasetRecord?.records)
451
+ ? datasetRecord.records
452
+ : [];
453
+ return {
454
+ ok: true,
455
+ data: linkedRecords
456
+ .slice()
457
+ .sort((a, b) => Number(a?.order ?? 0) - Number(b?.order ?? 0))
458
+ .map((record) => ({
459
+ rowContent: record?.rowContent,
460
+ order: Number(record?.order ?? 0),
461
+ })),
462
+ };
463
+ }
464
+ catch (error) {
465
+ const message = error instanceof Error ? error.message : String(error);
466
+ return { ok: false, error: message };
467
+ }
468
+ }
435
469
  async previewRows(datasetId, limit = 20) {
436
470
  try {
471
+ const linkedRecords = await this.readLinkedRecords(datasetId);
472
+ if (linkedRecords.ok && linkedRecords.data.length > 0) {
473
+ return {
474
+ ok: true,
475
+ data: linkedRecords.data
476
+ .slice(0, Math.max(0, Number(limit ?? 20)))
477
+ .map((record) => record.rowContent),
478
+ };
479
+ }
437
480
  const readResult = await this.readRecordsFromFile(datasetId);
438
481
  if (!readResult.ok) {
439
482
  return readResult;
@@ -454,12 +497,26 @@ export class DatasetService {
454
497
  }
455
498
  async readRows(params) {
456
499
  try {
500
+ const start = Math.max(0, Number(params.cursor ?? 0));
501
+ const limit = Math.max(1, Number(params.limit ?? 200));
502
+ const linkedRecords = await this.readLinkedRecords(params.datasetId);
503
+ if (linkedRecords.ok && linkedRecords.data.length > 0) {
504
+ const rows = linkedRecords.data
505
+ .slice(start, start + limit)
506
+ .map((record) => record.rowContent);
507
+ return {
508
+ ok: true,
509
+ data: {
510
+ rows,
511
+ cursor: start + rows.length,
512
+ done: start + rows.length >= linkedRecords.data.length,
513
+ },
514
+ };
515
+ }
457
516
  const readResult = await this.readRecordsFromFile(params.datasetId);
458
517
  if (!readResult.ok) {
459
518
  return readResult;
460
519
  }
461
- const start = Math.max(0, Number(params.cursor ?? 0));
462
- const limit = Math.max(1, Number(params.limit ?? 200));
463
520
  const rows = [];
464
521
  let index = 0;
465
522
  let hasMore = false;