npm - @ekairos/dataset - Versions diffs - 1.22.85-beta.development.0 → 1.22.86-beta.development.0 - Mend

@ekairos/dataset 1.22.85-beta.development.0 → 1.22.86-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/dist/builder/context.d.ts +8 -0
package/dist/builder/context.js +68 -9
package/dist/builder/instructions.js +3 -2
package/dist/builder/materialize.js +11 -25
package/dist/builder/types.d.ts +2 -1
package/dist/completeDataset.steps.d.ts +29 -0
package/dist/completeDataset.steps.js +32 -1
package/dist/completeDataset.tool.d.ts +41 -0
package/dist/completeDataset.tool.js +6 -3
package/dist/contextResources.d.ts +31 -0
package/dist/contextResources.js +151 -0
package/dist/contextWorkspace.d.ts +7 -0
package/dist/contextWorkspace.js +17 -1
package/dist/dataset/steps.js +12 -0
package/dist/dataset.js +1 -0
package/dist/executeCommand.tool.d.ts +1 -4
package/dist/executeCommand.tool.js +113 -31
package/dist/sandbox/steps.js +4 -2
package/dist/service.d.ts +4 -0
package/dist/service.js +59 -2
package/dist/transform/prompts.js +37 -21
package/dist/transform/transform-dataset.agent.d.ts +1 -0
package/dist/transform/transform-dataset.agent.js +25 -25
package/dist/transform/transform-dataset.types.d.ts +4 -1
package/dist/writeDatasetRows.tool.d.ts +188 -0
package/dist/writeDatasetRows.tool.js +258 -0
package/package.json +4 -4

package/dist/contextResources.js ADDED Viewed

@@ -0,0 +1,151 @@
+import { datasetReadOutputJsonlStep } from "./dataset/steps.js";
+import { readInstantFileStep } from "./file/steps.js";
+import { getContextResourcesDir, sanitizeContextWorkspacePathSegment, } from "./contextWorkspace.js";
+import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
+function asRecord(value) {
+    return value && typeof value === "object" && !Array.isArray(value)
+        ? value
+        : null;
+}
+function asString(value) {
+    return typeof value === "string" ? value.trim() : "";
+}
+function contentFileName(resource) {
+    const filename = asString(resource.filename);
+    if (filename) {
+        return sanitizeContextWorkspacePathSegment(filename, "resource");
+    }
+    if (resource.type === "dataset")
+        return "resource.jsonl";
+    if (resource.type === "text")
+        return "resource.txt";
+    return "resource";
+}
+function selectResources(resources, resourceKeys) {
+    const requested = new Set(Array.isArray(resourceKeys)
+        ? resourceKeys.map((key) => String(key).trim()).filter(Boolean)
+        : []);
+    if (requested.size === 0)
+        return resources;
+    return resources.filter((resource) => requested.has(resource.key));
+}
+export async function materializeContextResourcesStep(params) {
+    "use step";
+    const resourcesDir = getContextResourcesDir({ contextId: params.contextId });
+    const manifestPath = `${resourcesDir}/manifest.json`;
+    const selectedResources = selectResources(params.resources, params.resourceKeys);
+    const resourceDirs = selectedResources.map((resource, index) => {
+        const segment = sanitizeContextWorkspacePathSegment(resource.key, `resource_${index + 1}`);
+        return `${resourcesDir}/${segment}`;
+    });
+    await runDatasetSandboxCommandStep({
+        runtime: params.runtime,
+        sandboxId: params.sandboxId,
+        cmd: "mkdir",
+        args: ["-p", resourcesDir, ...resourceDirs],
+    });
+    const materialized = [];
+    for (let index = 0; index < selectedResources.length; index++) {
+        const resource = selectedResources[index];
+        const resourceDir = resourceDirs[index];
+        const metadataPath = `${resourceDir}/metadata.json`;
+        const files = [];
+        let status = "metadata_only";
+        let reason;
+        if (resource.type === "file" && asString(resource.fileId)) {
+            const file = await readInstantFileStep({
+                runtime: params.runtime,
+                fileId: asString(resource.fileId),
+            });
+            const path = `${resourceDir}/${contentFileName(resource)}`;
+            await writeDatasetSandboxFilesStep({
+                runtime: params.runtime,
+                sandboxId: params.sandboxId,
+                files: [{ path, contentBase64: file.contentBase64 }],
+            });
+            files.push({ path, role: "content", mediaType: asString(resource.mediaType) || undefined });
+            status = "materialized";
+        }
+        else if (resource.type === "dataset" && asString(resource.datasetId)) {
+            const datasetId = asString(resource.datasetId);
+            const content = await datasetReadOutputJsonlStep({
+                runtime: params.runtime,
+                datasetId,
+            });
+            const path = `${resourceDir}/resource.jsonl`;
+            await writeDatasetSandboxFilesStep({
+                runtime: params.runtime,
+                sandboxId: params.sandboxId,
+                files: [{ path, contentBase64: content.contentBase64 }],
+            });
+            files.push({ path, role: "content", mediaType: "application/x-ndjson" });
+            status = "materialized";
+        }
+        else if (resource.type === "text" && typeof resource.text === "string") {
+            const path = `${resourceDir}/${contentFileName(resource)}`;
+            await writeDatasetSandboxTextFilesStep({
+                runtime: params.runtime,
+                sandboxId: params.sandboxId,
+                files: [{ path, content: String(resource.text) }],
+            });
+            files.push({
+                path,
+                role: "content",
+                mediaType: asString(resource.mimeType) || "text/plain",
+            });
+            status = "materialized";
+        }
+        else {
+            reason =
+                resource.type === "file"
+                    ? "file resource has no fileId"
+                    : resource.type === "dataset"
+                        ? "dataset resource has no datasetId"
+                        : resource.type === "link" || resource.type === "repository" || resource.type === "external"
+                            ? `${resource.type} resources are metadata-only until an approved adapter materializes them`
+                            : "resource type is metadata-only";
+        }
+        const metadata = {
+            ...(asRecord(resource) ?? {}),
+            key: resource.key,
+            type: resource.type,
+            name: resource.name,
+            description: resource.description,
+            materialized: {
+                status,
+                reason,
+                dir: resourceDir,
+                files,
+            },
+        };
+        await writeDatasetSandboxTextFilesStep({
+            runtime: params.runtime,
+            sandboxId: params.sandboxId,
+            files: [{ path: metadataPath, content: JSON.stringify(metadata, null, 2) }],
+        });
+        materialized.push({
+            key: resource.key,
+            type: resource.type,
+            name: resource.name,
+            description: resource.description,
+            dir: resourceDir,
+            metadataPath,
+            files,
+            status,
+            reason,
+        });
+    }
+    const manifest = {
+        contextId: params.contextId,
+        sandboxId: params.sandboxId,
+        resourcesDir,
+        manifestPath,
+        resources: materialized,
+    };
+    await writeDatasetSandboxTextFilesStep({
+        runtime: params.runtime,
+        sandboxId: params.sandboxId,
+        files: [{ path: manifestPath, content: JSON.stringify(manifest, null, 2) }],
+    });
+    return manifest;
+}

package/dist/contextWorkspace.d.ts CHANGED Viewed

@@ -23,12 +23,14 @@ export type PreparedContextExecutionWorkspace = {
     root: string;
     contextRoot: string;
     eventsDir: string;
+    resourcesDir: string;
     outputDir: string;
     scriptsDir: string;
     tmpDir: string;
     manifestPath: string;
     files: PreparedContextWorkspaceFile[];
 };
+export declare function sanitizeContextWorkspacePathSegment(value: string, fallback: string): string;
 export declare function getContextWorkspaceBase(): string;
 export declare function getContextExecutionWorkspaceRoot(params: {
     contextId: string;
@@ -43,6 +45,10 @@ export declare function getContextEventsDir(params: {
     contextId: string;
     root?: string;
 }): string;
+export declare function getContextResourcesDir(params: {
+    contextId: string;
+    root?: string;
+}): string;
 export declare function getContextExecutionWorkspaceDirs(params: {
     contextId: string;
     executionId: string;
@@ -51,6 +57,7 @@ export declare function getContextExecutionWorkspaceDirs(params: {
     root: string;
     contextRoot: string;
     eventsDir: string;
+    resourcesDir: string;
     outputDir: string;
     scriptsDir: string;
     tmpDir: string;

package/dist/contextWorkspace.js CHANGED Viewed

@@ -17,6 +17,9 @@ function sanitizePathSegment(value, fallback) {
         .slice(0, 160);
     return normalized || fallback;
 }
+export function sanitizeContextWorkspacePathSegment(value, fallback) {
+    return sanitizePathSegment(value, fallback);
+}
 function filenameFromContentDisposition(value, fallback) {
     const raw = String(value ?? "").trim();
     if (!raw)
@@ -57,14 +60,19 @@ export function getContextWorkspaceRoot(params) {
 export function getContextEventsDir(params) {
     return `${getContextWorkspaceRoot(params)}/events`;
 }
+export function getContextResourcesDir(params) {
+    return `${getContextWorkspaceRoot(params)}/resources`;
+}
 export function getContextExecutionWorkspaceDirs(params) {
     const root = getContextExecutionWorkspaceRoot(params);
     const contextRoot = getContextWorkspaceRoot(params);
     const eventsDir = getContextEventsDir(params);
+    const resourcesDir = getContextResourcesDir(params);
     return {
         root,
         contextRoot,
         eventsDir,
+        resourcesDir,
         outputDir: `${root}/output`,
         scriptsDir: `${root}/scripts`,
         tmpDir: `${root}/tmp`,
@@ -73,7 +81,15 @@ export function getContextExecutionWorkspaceDirs(params) {
 }
 export function getContextExecutionWorkspaceStandardDirs(params) {
     const dirs = getContextExecutionWorkspaceDirs(params);
-    return [dirs.contextRoot, dirs.eventsDir, dirs.root, dirs.outputDir, dirs.scriptsDir, dirs.tmpDir];
+    return [
+        dirs.contextRoot,
+        dirs.eventsDir,
+        dirs.resourcesDir,
+        dirs.root,
+        dirs.outputDir,
+        dirs.scriptsDir,
+        dirs.tmpDir,
+    ];
 }
 export function extractContextWorkspaceFilesFromEventItems(eventItems) {
     const files = [];

package/dist/dataset/steps.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { DatasetService } from "../service.js";
 import { datasetDomain } from "../schema.js";
 import { inferDatasetSchema } from "../builder/schemaInference.js";
+import { rowsToJsonl } from "../builder/rows.js";
 export async function getDatasetRuntimeDb(runtime) {
     if (!runtime) {
         throw new Error("Dataset step requires runtime.");
@@ -26,6 +27,7 @@ export async function datasetGetByIdStep(params) {
 export async function datasetReadOutputJsonlStep(params) {
     "use step";
     const db = await getDatasetRuntimeDb(params.runtime);
+    const service = new DatasetService(db);
     for (let attempt = 1; attempt <= 20; attempt++) {
         const query = await db.query({
             dataset_datasets: {
@@ -40,6 +42,16 @@ export async function datasetReadOutputJsonlStep(params) {
             const fileBuffer = await fetch(url).then((r) => r.arrayBuffer());
             return { contentBase64: Buffer.from(fileBuffer).toString("base64") };
         }
+        const directRows = await service.readRows({
+            datasetId: params.datasetId,
+            cursor: 0,
+            limit: 100000,
+        });
+        if (directRows.ok && directRows.data.rows.length > 0) {
+            return {
+                contentBase64: Buffer.from(rowsToJsonl(directRows.data.rows), "utf-8").toString("base64"),
+            };
+        }
         await new Promise((resolve) => setTimeout(resolve, 250 * attempt));
     }
     throw new Error("Dataset output file not found");

package/dist/dataset.js CHANGED Viewed

@@ -118,6 +118,7 @@ export function dataset(runtime, options = {}) {
             const context = await resolveDatasetResourceContext(typedRuntime, targetDatasetId, stateWithBuildOptions.resources);
             stateWithBuildOptions.resources = context.resources;
             stateWithBuildOptions.contextId = context.contextId;
+            stateWithBuildOptions.contextResources = context.contextResources;
             const effectiveState = stateWithBuildOptions.output === "object"
                 ? {
                     ...stateWithBuildOptions,

package/dist/executeCommand.tool.d.ts CHANGED Viewed

@@ -3,8 +3,5 @@ interface ExecuteCommandToolParams {
     sandboxId: string;
     runtime: any;
 }
-export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): import("ai").Tool<{
-    pythonCode: string;
-    scriptName: string;
-}, unknown>;
+export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): any;
 export {};

package/dist/executeCommand.tool.js CHANGED Viewed

@@ -1,10 +1,9 @@
-import { tool } from "ai";
+import { defineAction } from "@ekairos/events";
 import { z } from "zod";
-import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFilesStep } from "./sandbox/steps.js";
-import { getDatasetScriptsDir } from "./datasetFiles.js";
+import { materializeContextResourcesStep } from "./contextResources.js";
+import { getDatasetScriptsDir, getDatasetStandardDirs } from "./datasetFiles.js";
 import { getContextExecutionWorkspaceDirs } from "./contextWorkspace.js";
-// To keep responses predictable for big data scenarios, we cap stdout/stderr.
-// The tool's return payload exposes stdout (capped) plus the on-disk script path.
+import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
 const MAX_STDOUT_CHARS = 20000;
 const MAX_STDERR_CHARS = 5000;
 function normalizeScriptName(scriptName) {
@@ -23,39 +22,96 @@ function stableScriptHash(value) {
     }
     return (hash >>> 0).toString(36);
 }
+const executeCommandInputSchema = z.object({
+    commandDescription: z
+        .string()
+        .min(1)
+        .describe("Required pre-execution description of the command. Describe the inputs/resources it will use, the operation it will perform, the expected output, and why a command is the right tool instead of direct completion. Invalid descriptions include rereading resources whose descriptor/preview already contains the needed evidence, merely formatting JSON, constructing the final object, writing output.jsonl, or making completion easier."),
+    pythonCode: z
+        .string()
+        .describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. If context resources are materialized, read os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] to discover files and metadata. Do not install packages, download dependencies, use pip/npm/apt/curl/wget, or access the network. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
+    scriptName: z
+        .string()
+        .describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A deterministic suffix will be appended automatically."),
+    resourceKeys: z
+        .array(z.string())
+        .optional()
+        .describe("Optional context resource keys to materialize before running the script. Omit to materialize every context resource."),
+});
+const materializedResourceSchema = z.object({
+    key: z.string(),
+    type: z.string(),
+    status: z.string(),
+    dir: z.string(),
+    files: z.array(z.object({
+        path: z.string(),
+        role: z.string(),
+        mediaType: z.string().optional(),
+    })),
+    reason: z.string().optional(),
+});
+const executeCommandOutputSchema = z
+    .object({
+    success: z.boolean(),
+    fatal: z.boolean().optional(),
+    status: z.string().optional(),
+    exitCode: z.number().optional(),
+    stdout: z.string(),
+    stderr: z.string(),
+    scriptPath: z.string(),
+    message: z.string().optional(),
+    error: z.string().optional(),
+    resourcesDir: z.string().optional(),
+    resourcesManifestPath: z.string().optional(),
+    materializedResources: z.array(materializedResourceSchema).optional(),
+    stdoutTruncated: z.boolean(),
+    stderrTruncated: z.boolean(),
+    stdoutOriginalLength: z.number(),
+    stderrOriginalLength: z.number(),
+})
+    .passthrough();
 export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
-    return tool({
-        description: "Execute Python scripts in the sandbox. Always saves script to a file before executing. The tool's output is EXACTLY the script's stdout and includes the script file path for traceability. CRITICAL: Print concise, human-readable summaries only; do NOT print raw large data. For big results, write artifacts to files in the workstation and print their file paths. Always include progress/result prints (e.g., 'Processing file X...', 'Found Y records', 'Generated output.csv').",
-        inputSchema: z.object({
-            pythonCode: z.string().describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
-            scriptName: z.string().describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A deterministic suffix will be appended automatically."),
-        }),
-        execute: (async ({ pythonCode, scriptName }, actionContext) => {
+    return defineAction({
+        description: "Execute Python scripts in the sandbox only when command execution is necessary to inspect, parse, aggregate, join, or compute over context resources that are not sufficiently represented in the visible context, resource descriptors, or previews. This is a high-cost computation tool, not a completion tool. Do not use it merely to reread resources whose descriptor/preview already contains the needed evidence, format JSON, build the final object, write output.jsonl, or make completion easier when completeObject or replaceRows can return the result directly. Before the script runs, requested context resources are materialized into /tmp/ekairos/contexts/{contextId}/resources and a manifest.json is written there. The Python process receives EKAIROS_CONTEXT_RESOURCES_DIR and EKAIROS_CONTEXT_RESOURCES_MANIFEST environment variables when resources are available; manifest entries expose files as resources[].files[].path. Do not install packages, download dependencies, use pip/npm/apt/curl/wget, or access the network; use only the available runtime and standard library unless a dependency is already present. Print concise progress and results only; do not dump large data.",
+        input: executeCommandInputSchema,
+        output: executeCommandOutputSchema,
+        execute: async ({ input, context, contextId, executionId, }) => {
+            const { commandDescription, pythonCode, resourceKeys, scriptName } = input;
             const normalizedScriptName = normalizeScriptName(scriptName);
             const scriptHash = stableScriptHash(`${normalizedScriptName}\0${pythonCode}`);
-            const scriptsDir = actionContext?.contextId && actionContext.executionId
-                ? getContextExecutionWorkspaceDirs({
-                    contextId: actionContext.contextId,
-                    executionId: actionContext.executionId,
-                }).scriptsDir
+            const scriptsDir = contextId && executionId
+                ? getContextExecutionWorkspaceDirs({ contextId, executionId }).scriptsDir
                 : getDatasetScriptsDir(datasetId);
             const scriptFile = `${scriptsDir}/${normalizedScriptName}-${scriptHash}.py`;
+            let resourcesManifest = null;
             console.log(`[Dataset ${datasetId}] ========================================`);
-            console.log(`[Dataset ${datasetId}] Tool: executeCommand`);
+            console.log(`[Dataset ${datasetId}] Action: executeCommand`);
+            console.log(`[Dataset ${datasetId}] Description: ${commandDescription}`);
             console.log(`[Dataset ${datasetId}] Script: ${normalizedScriptName}`);
             console.log(`[Dataset ${datasetId}] File: ${scriptFile}`);
             console.log(`[Dataset ${datasetId}] Code length: ${pythonCode.length} chars`);
             console.log(`[Dataset ${datasetId}] ========================================`);
             try {
+                if (contextId && Array.isArray(context?.resources) && context.resources.length > 0) {
+                    resourcesManifest = await materializeContextResourcesStep({
+                        runtime,
+                        sandboxId,
+                        contextId,
+                        resources: context.resources,
+                        resourceKeys,
+                    });
+                    console.log(`[Dataset ${datasetId}] Resources manifest: ${resourcesManifest.manifestPath}`);
+                }
+                await runDatasetSandboxCommandStep({
+                    runtime,
+                    sandboxId,
+                    cmd: "mkdir",
+                    args: ["-p", ...getDatasetStandardDirs(datasetId), scriptsDir],
+                });
                 await writeDatasetSandboxTextFilesStep({
                     runtime,
                     sandboxId,
-                    files: [
-                        {
-                            path: scriptFile,
-                            content: pythonCode,
-                        },
-                    ],
+                    files: [{ path: scriptFile, content: pythonCode }],
                 });
                 const written = await runDatasetSandboxCommandStep({
                     runtime,
@@ -76,19 +132,33 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
                         stderr: written.stderr || "",
                         exitCode: written.exitCode,
                         scriptPath: scriptFile,
+                        resourcesDir: resourcesManifest?.resourcesDir,
+                        resourcesManifestPath: resourcesManifest?.manifestPath,
+                        materializedResources: resourcesManifest?.resources,
                         stdoutTruncated: false,
                         stderrTruncated: false,
                         stdoutOriginalLength: 0,
                         stderrOriginalLength: 0,
                     };
                 }
+                const pythonArgs = resourcesManifest
+                    ? [
+                        "-c",
+                        [
+                            "import os, runpy",
+                            `os.environ["EKAIROS_CONTEXT_RESOURCES_DIR"] = ${JSON.stringify(resourcesManifest.resourcesDir)}`,
+                            `os.environ["EKAIROS_CONTEXT_RESOURCES_MANIFEST"] = ${JSON.stringify(resourcesManifest.manifestPath)}`,
+                            `runpy.run_path(${JSON.stringify(scriptFile)}, run_name="__main__")`,
+                        ].join("; "),
+                    ]
+                    : [scriptFile];
                 console.log(`[Dataset ${datasetId}] Script written to: ${scriptFile}`);
-                console.log(`[Dataset ${datasetId}] Executing: python ${scriptFile}`);
+                console.log(`[Dataset ${datasetId}] Executing: python ${resourcesManifest ? "<with context resources env>" : scriptFile}`);
                 const result = await runDatasetSandboxCommandStep({
                     runtime,
                     sandboxId,
                     cmd: "python",
-                    args: [scriptFile],
+                    args: pythonArgs,
                 });
                 const stdout = result.stdout || "";
                 const stderr = result.stderr || "";
@@ -98,7 +168,7 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
                 const stdoutCapped = isStdoutTruncated ? stdout.slice(0, MAX_STDOUT_CHARS) : stdout;
                 const stderrCapped = isStderrTruncated ? stderr.slice(0, MAX_STDERR_CHARS) : stderr;
                 if (exitCode !== 0) {
-                    console.error(`[Dataset ${datasetId}] ❌ Command failed with exit code ${exitCode}`);
+                    console.error(`[Dataset ${datasetId}] Command failed with exit code ${exitCode}`);
                     console.error(`[Dataset ${datasetId}] Stderr:`, stderrCapped.substring(0, 500));
                     console.error(`[Dataset ${datasetId}] ========================================`);
                     return {
@@ -108,6 +178,9 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
                         stderr: stderrCapped,
                         scriptPath: scriptFile,
                         error: `Command failed with exit code ${exitCode}`,
+                        resourcesDir: resourcesManifest?.resourcesDir,
+                        resourcesManifestPath: resourcesManifest?.manifestPath,
+                        materializedResources: resourcesManifest?.resources,
                         stdoutTruncated: isStdoutTruncated,
                         stderrTruncated: isStderrTruncated,
                         stdoutOriginalLength: stdout.length,
@@ -115,7 +188,7 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
                     };
                 }
                 if (stderr && (stderr.includes("Traceback") || stderr.toLowerCase().includes("error"))) {
-                    console.error(`[Dataset ${datasetId}] ❌ Python error detected`);
+                    console.error(`[Dataset ${datasetId}] Python error detected`);
                     console.error(`[Dataset ${datasetId}] Stderr:`, stderrCapped.substring(0, 500));
                     console.error(`[Dataset ${datasetId}] ========================================`);
                     return {
@@ -125,17 +198,20 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
                         stderr: stderrCapped,
                         scriptPath: scriptFile,
                         error: "Python error detected in stderr",
+                        resourcesDir: resourcesManifest?.resourcesDir,
+                        resourcesManifestPath: resourcesManifest?.manifestPath,
+                        materializedResources: resourcesManifest?.resources,
                         stdoutTruncated: isStdoutTruncated,
                         stderrTruncated: isStderrTruncated,
                         stdoutOriginalLength: stdout.length,
                         stderrOriginalLength: stderr.length,
                     };
                 }
-                console.log(`[Dataset ${datasetId}] ✅ Command executed successfully`);
+                console.log(`[Dataset ${datasetId}] Command executed successfully`);
                 if (stdout) {
                     console.log(`[Dataset ${datasetId}] Output length: ${stdout.length} chars`);
                     if (isStdoutTruncated) {
-                        console.log(`[Dataset ${datasetId}] ⚠️  Stdout truncated to ${MAX_STDOUT_CHARS} chars`);
+                        console.log(`[Dataset ${datasetId}] Stdout truncated to ${MAX_STDOUT_CHARS} chars`);
                     }
                 }
                 console.log(`[Dataset ${datasetId}] ========================================`);
@@ -146,6 +222,9 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
                     stderr: stderrCapped,
                     scriptPath: scriptFile,
                     message: "Command executed successfully",
+                    resourcesDir: resourcesManifest?.resourcesDir,
+                    resourcesManifestPath: resourcesManifest?.manifestPath,
+                    materializedResources: resourcesManifest?.resources,
                     stdoutTruncated: isStdoutTruncated,
                     stderrTruncated: isStderrTruncated,
                     stdoutOriginalLength: stdout.length,
@@ -163,12 +242,15 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
                     stderr: "",
                     exitCode: -1,
                     scriptPath: scriptFile,
+                    resourcesDir: resourcesManifest?.resourcesDir,
+                    resourcesManifestPath: resourcesManifest?.manifestPath,
+                    materializedResources: resourcesManifest?.resources,
                     stdoutTruncated: false,
                     stderrTruncated: false,
                     stdoutOriginalLength: 0,
                     stderrOriginalLength: 0,
                 };
             }
-        }),
+        },
     });
 }

package/dist/sandbox/steps.js CHANGED Viewed

@@ -30,8 +30,10 @@ async function runLocalSandboxCommand(params) {
     const cmd = String(params.cmd ?? "").trim();
     const args = params.args ?? [];
     if (cmd === "mkdir") {
-        const target = args[args.length - 1];
-        await fs.mkdir(String(target ?? ""), { recursive: true });
+        const targets = args.filter((arg) => !String(arg).startsWith("-"));
+        for (const target of targets) {
+            await fs.mkdir(String(target ?? ""), { recursive: true });
+        }
         return { exitCode: 0, stdout: "", stderr: "" };
     }
     if (cmd === "rm") {

package/dist/service.d.ts CHANGED Viewed

@@ -74,6 +74,10 @@ export declare class DatasetService {
         storagePath: string;
     }): Promise<ServiceResult<void>>;
     readRecordsFromFile(datasetId: string): Promise<ServiceResult<AsyncGenerator<any, void, unknown>>>;
+    readLinkedRecords(datasetId: string): Promise<ServiceResult<Array<{
+        rowContent: any;
+        order: number;
+    }>>>;
     previewRows(datasetId: string, limit?: number): Promise<ServiceResult<any[]>>;
     readRows(params: {
         datasetId: string;

package/dist/service.js CHANGED Viewed

@@ -432,8 +432,51 @@ export class DatasetService {
             return { ok: false, error: message };
         }
     }
+    async readLinkedRecords(datasetId) {
+        try {
+            const query = await this.db.query({
+                dataset_datasets: {
+                    $: {
+                        where: { datasetId },
+                        limit: 1,
+                    },
+                    records: {},
+                },
+            });
+            const datasetRecord = query.dataset_datasets?.[0];
+            if (!datasetRecord) {
+                return { ok: false, error: `Dataset not found with id: ${datasetId}` };
+            }
+            const linkedRecords = Array.isArray(datasetRecord?.records)
+                ? datasetRecord.records
+                : [];
+            return {
+                ok: true,
+                data: linkedRecords
+                    .slice()
+                    .sort((a, b) => Number(a?.order ?? 0) - Number(b?.order ?? 0))
+                    .map((record) => ({
+                    rowContent: record?.rowContent,
+                    order: Number(record?.order ?? 0),
+                })),
+            };
+        }
+        catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            return { ok: false, error: message };
+        }
+    }
     async previewRows(datasetId, limit = 20) {
         try {
+            const linkedRecords = await this.readLinkedRecords(datasetId);
+            if (linkedRecords.ok && linkedRecords.data.length > 0) {
+                return {
+                    ok: true,
+                    data: linkedRecords.data
+                        .slice(0, Math.max(0, Number(limit ?? 20)))
+                        .map((record) => record.rowContent),
+                };
+            }
             const readResult = await this.readRecordsFromFile(datasetId);
             if (!readResult.ok) {
                 return readResult;
@@ -454,12 +497,26 @@ export class DatasetService {
     }
     async readRows(params) {
         try {
+            const start = Math.max(0, Number(params.cursor ?? 0));
+            const limit = Math.max(1, Number(params.limit ?? 200));
+            const linkedRecords = await this.readLinkedRecords(params.datasetId);
+            if (linkedRecords.ok && linkedRecords.data.length > 0) {
+                const rows = linkedRecords.data
+                    .slice(start, start + limit)
+                    .map((record) => record.rowContent);
+                return {
+                    ok: true,
+                    data: {
+                        rows,
+                        cursor: start + rows.length,
+                        done: start + rows.length >= linkedRecords.data.length,
+                    },
+                };
+            }
             const readResult = await this.readRecordsFromFile(params.datasetId);
             if (!readResult.ok) {
                 return readResult;
             }
-            const start = Math.max(0, Number(params.cursor ?? 0));
-            const limit = Math.max(1, Number(params.limit ?? 200));
             const rows = [];
             let index = 0;
             let hasMore = false;