npm - @ekairos/dataset - Versions diffs - 1.22.82-beta.development.0 → 1.22.84-beta.development.0 - Mend

@ekairos/dataset 1.22.82-beta.development.0 → 1.22.84-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/dist/builder/agentMaterializers.d.ts +2 -2
package/dist/builder/context.d.ts +7 -0
package/dist/builder/context.js +192 -0
package/dist/builder/instructions.d.ts +3 -3
package/dist/builder/instructions.js +10 -10
package/dist/builder/materialize.d.ts +12 -11
package/dist/builder/materialize.js +122 -121
package/dist/builder/materializeQuery.d.ts +3 -2
package/dist/builder/materializeQuery.js +10 -19
package/dist/builder/persistence.d.ts +4 -5
package/dist/builder/persistence.js +20 -19
package/dist/builder/types.d.ts +31 -24
package/dist/completeDataset.steps.d.ts +9 -8
package/dist/completeDataset.steps.js +18 -11
package/dist/completeDataset.tool.d.ts +9 -8
package/dist/completeDataset.tool.js +2 -1
package/dist/contextWorkspace.d.ts +72 -0
package/dist/contextWorkspace.js +218 -0
package/dist/dataset.d.ts +1 -1
package/dist/dataset.js +42 -29
package/dist/datasetFiles.d.ts +1 -1
package/dist/datasetFiles.js +3 -3
package/dist/executeCommand.tool.d.ts +1 -43
package/dist/executeCommand.tool.js +10 -3
package/dist/file/file-dataset.agent.d.ts +2 -0
package/dist/file/file-dataset.agent.js +51 -16
package/dist/file/file-dataset.steps.d.ts +6 -0
package/dist/file/file-dataset.steps.js +18 -21
package/dist/file/file-dataset.types.d.ts +10 -0
package/dist/file/prompts.js +16 -14
package/dist/index.d.ts +1 -0
package/dist/index.js +1 -0
package/dist/materializeDataset.tool.d.ts +34 -26
package/dist/materializeDataset.tool.js +40 -29
package/dist/schema.d.ts +12 -2
package/dist/schema.js +6 -3
package/dist/service.d.ts +2 -2
package/dist/service.js +6 -3
package/dist/transform/filepreview.d.ts +2 -2
package/dist/transform/filepreview.js +3 -3
package/dist/transform/prompts.js +25 -25
package/dist/transform/transform-dataset.agent.d.ts +4 -4
package/dist/transform/transform-dataset.agent.js +29 -30
package/dist/transform/transform-dataset.steps.d.ts +7 -7
package/dist/transform/transform-dataset.steps.js +20 -20
package/dist/transform/transform-dataset.types.d.ts +13 -13
package/dist/transform/transformDataset.js +4 -4
package/package.json +4 -4
/package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -0
/package/dist/builder/{sourceRows.js → rows.js} +0 -0

package/dist/dataset.js CHANGED Viewed

@@ -1,7 +1,8 @@
 import { buildObjectOutputInstructions } from "./builder/instructions.js";
+import { resolveDatasetResourceContext } from "./builder/context.js";
 import { createDatasetId } from "./id.js";
-import { completeDatasetStep, materializeDerivedDataset, materializeSingleFileLikeSource, } from "./builder/materialize.js";
-import { materializeQuerySource } from "./builder/materializeQuery.js";
+import { completeDatasetStep, materializeDerivedDataset, materializeSingleFileLikeResource, } from "./builder/materialize.js";
+import { materializeQueryResource } from "./builder/materializeQuery.js";
 import { createDatasetBuildResult, finalizeBuildResult, } from "./builder/persistence.js";
 export function dataset(runtime, options = {}) {
     const datasetId = normalizeDatasetId(options.datasetId);
@@ -9,7 +10,7 @@ export function dataset(runtime, options = {}) {
     const state = {
         runtime: typedRuntime,
         env: typedRuntime.env,
-        sources: [],
+        resources: [],
         output: "rows",
         inferSchema: false,
         durable: options.durable,
@@ -17,38 +18,46 @@ export function dataset(runtime, options = {}) {
     };
     const api = {
         datasetId,
-        fromFile(source) {
-            state.sources.push({ kind: "file", ...source });
+        fromFile(resource) {
+            state.resources.push({ kind: "file", ...resource });
             return api;
         },
-        fromText(source) {
-            state.sources.push({ kind: "text", ...source });
+        fromText(resource) {
+            state.resources.push({ kind: "text", ...resource });
             return api;
         },
-        fromDataset(source) {
-            state.sources.push({ kind: "dataset", ...source });
+        fromDataset(resource) {
+            state.resources.push({ kind: "dataset", ...resource });
             return api;
         },
-        from(...sources) {
-            for (const source of sources) {
-                if ("kind" in source) {
-                    state.sources.push(source);
+        fromContext(context) {
+            state.resources.push({ kind: "context", ...context });
+            return api;
+        },
+        from(...resources) {
+            for (const resource of resources) {
+                if ("kind" in resource) {
+                    state.resources.push(resource);
+                    continue;
+                }
+                if ("fileId" in resource) {
+                    state.resources.push({ kind: "file", ...resource });
                     continue;
                 }
-                if ("fileId" in source) {
-                    state.sources.push({ kind: "file", ...source });
+                if ("datasetId" in resource) {
+                    state.resources.push({ kind: "dataset", ...resource });
                     continue;
                 }
-                if ("datasetId" in source) {
-                    state.sources.push({ kind: "dataset", ...source });
+                if ("id" in resource || "key" in resource) {
+                    state.resources.push({ kind: "context", ...resource });
                     continue;
                 }
-                state.sources.push({ kind: "text", ...source });
+                state.resources.push({ kind: "text", ...resource });
             }
             return api;
         },
-        fromQuery(domain, source) {
-            state.sources.push({ kind: "query", domain, ...source });
+        fromQuery(domain, resource) {
+            state.resources.push({ kind: "query", domain, ...resource });
             return api;
         },
         title(title) {
@@ -96,8 +105,8 @@ export function dataset(runtime, options = {}) {
             return api;
         },
         async build(options) {
-            if (state.sources.length === 0) {
-                throw new Error("dataset_sources_required");
+            if (state.resources.length === 0) {
+                throw new Error("dataset_resources_required");
             }
             const targetDatasetId = options?.datasetId
                 ? normalizeDatasetId(options.datasetId)
@@ -106,6 +115,9 @@ export function dataset(runtime, options = {}) {
                 ...state,
                 durable: options?.durable ?? state.durable,
             };
+            const context = await resolveDatasetResourceContext(typedRuntime, targetDatasetId, stateWithBuildOptions.resources);
+            stateWithBuildOptions.resources = context.resources;
+            stateWithBuildOptions.contextId = context.contextId;
             const effectiveState = stateWithBuildOptions.output === "object"
                 ? {
                     ...stateWithBuildOptions,
@@ -113,25 +125,26 @@ export function dataset(runtime, options = {}) {
                     instructions: buildObjectOutputInstructions(stateWithBuildOptions.instructions),
                 }
                 : stateWithBuildOptions;
-            const onlySource = effectiveState.sources[0];
-            const isSingleSource = effectiveState.sources.length === 1;
+            const onlyResource = effectiveState.resources[0];
+            const isSingleResource = effectiveState.resources.length === 1;
             const hasInstructions = Boolean(String(effectiveState.instructions ?? "").trim());
-            if (isSingleSource && onlySource.kind === "query" && !hasInstructions) {
-                await materializeQuerySource(effectiveState.runtime, onlySource, {
+            if (isSingleResource && onlyResource.kind === "query" && !hasInstructions) {
+                await materializeQueryResource(effectiveState.runtime, onlyResource, {
                     datasetId: targetDatasetId,
                     sandboxId: effectiveState.sandboxId,
                     schema: effectiveState.outputSchema,
-                    title: effectiveState.title ?? onlySource.title,
+                    title: effectiveState.title ?? onlyResource.title,
                     instructions: effectiveState.instructions,
                     first: effectiveState.first,
+                    contextId: effectiveState.contextId ?? "",
                 });
                 return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
             }
-            if (isSingleSource && (onlySource.kind === "file" || onlySource.kind === "text")) {
+            if (isSingleResource && (onlyResource.kind === "file" || onlyResource.kind === "text")) {
                 if (!effectiveState.reactor) {
                     throw new Error("dataset_reactor_required");
                 }
-                await materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
+                await materializeSingleFileLikeResource(effectiveState, onlyResource, targetDatasetId);
                 const completed = await completeDatasetStep({
                     runtime: effectiveState.runtime,
                     datasetId: targetDatasetId,

package/dist/datasetFiles.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 export declare const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
 export declare function getDatasetWorkdirBase(): string;
 export declare function getDatasetWorkstation(datasetId: string): string;
-export declare function getDatasetSourcesDir(datasetId: string): string;
+export declare function getDatasetResourcesDir(datasetId: string): string;
 export declare function getDatasetScriptsDir(datasetId: string): string;
 export declare function getDatasetArtifactsDir(datasetId: string): string;
 export declare function getDatasetLogsDir(datasetId: string): string;

package/dist/datasetFiles.js CHANGED Viewed

@@ -9,8 +9,8 @@ export function getDatasetWorkdirBase() {
 export function getDatasetWorkstation(datasetId) {
     return `${getDatasetWorkdirBase()}/${datasetId}`;
 }
-export function getDatasetSourcesDir(datasetId) {
-    return `${getDatasetWorkstation(datasetId)}/sources`;
+export function getDatasetResourcesDir(datasetId) {
+    return `${getDatasetWorkstation(datasetId)}/resources`;
 }
 export function getDatasetScriptsDir(datasetId) {
     return `${getDatasetWorkstation(datasetId)}/scripts`;
@@ -24,7 +24,7 @@ export function getDatasetLogsDir(datasetId) {
 export function getDatasetStandardDirs(datasetId) {
     return [
         getDatasetWorkstation(datasetId),
-        getDatasetSourcesDir(datasetId),
+        getDatasetResourcesDir(datasetId),
         getDatasetScriptsDir(datasetId),
         getDatasetArtifactsDir(datasetId),
         getDatasetLogsDir(datasetId),

package/dist/executeCommand.tool.d.ts CHANGED Viewed

@@ -6,47 +6,5 @@ interface ExecuteCommandToolParams {
 export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): import("ai").Tool<{
     pythonCode: string;
     scriptName: string;
-}, {
-    success: boolean;
-    fatal: boolean;
-    status: string;
-    error: string;
-    stdout: string;
-    stderr: string;
-    exitCode: number;
-    scriptPath: string;
-    stdoutTruncated: boolean;
-    stderrTruncated: boolean;
-    stdoutOriginalLength: number;
-    stderrOriginalLength: number;
-    message?: undefined;
-} | {
-    success: boolean;
-    exitCode: number;
-    stdout: string;
-    stderr: string;
-    scriptPath: string;
-    error: string;
-    stdoutTruncated: boolean;
-    stderrTruncated: boolean;
-    stdoutOriginalLength: number;
-    stderrOriginalLength: number;
-    fatal?: undefined;
-    status?: undefined;
-    message?: undefined;
-} | {
-    success: boolean;
-    exitCode: number;
-    stdout: string;
-    stderr: string;
-    scriptPath: string;
-    message: string;
-    stdoutTruncated: boolean;
-    stderrTruncated: boolean;
-    stdoutOriginalLength: number;
-    stderrOriginalLength: number;
-    fatal?: undefined;
-    status?: undefined;
-    error?: undefined;
-}>;
+}, unknown>;
 export {};

package/dist/executeCommand.tool.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { tool } from "ai";
 import { z } from "zod";
 import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFilesStep } from "./sandbox/steps.js";
 import { getDatasetScriptsDir } from "./datasetFiles.js";
+import { getContextExecutionWorkspaceDirs } from "./contextWorkspace.js";
 // To keep responses predictable for big data scenarios, we cap stdout/stderr.
 // The tool's return payload exposes stdout (capped) plus the on-disk script path.
 const MAX_STDOUT_CHARS = 20000;
@@ -29,10 +30,16 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
             pythonCode: z.string().describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
             scriptName: z.string().describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A deterministic suffix will be appended automatically."),
         }),
-        execute: async ({ pythonCode, scriptName }) => {
+        execute: (async ({ pythonCode, scriptName }, actionContext) => {
             const normalizedScriptName = normalizeScriptName(scriptName);
             const scriptHash = stableScriptHash(`${normalizedScriptName}\0${pythonCode}`);
-            const scriptFile = `${getDatasetScriptsDir(datasetId)}/${normalizedScriptName}-${scriptHash}.py`;
+            const scriptsDir = actionContext?.contextId && actionContext.executionId
+                ? getContextExecutionWorkspaceDirs({
+                    contextId: actionContext.contextId,
+                    executionId: actionContext.executionId,
+                }).scriptsDir
+                : getDatasetScriptsDir(datasetId);
+            const scriptFile = `${scriptsDir}/${normalizedScriptName}-${scriptHash}.py`;
             console.log(`[Dataset ${datasetId}] ========================================`);
             console.log(`[Dataset ${datasetId}] Tool: executeCommand`);
             console.log(`[Dataset ${datasetId}] Script: ${normalizedScriptName}`);
@@ -162,6 +169,6 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
                     stderrOriginalLength: 0,
                 };
             }
-        },
+        }),
     });
 }

package/dist/file/file-dataset.agent.d.ts CHANGED Viewed

@@ -12,6 +12,8 @@ export declare function createFileParseContext<Env extends {
     sandboxState?: SandboxState;
     filePreview?: FileParseContext["filePreview"];
     schema?: any | null;
+    filename?: string;
+    mediaType?: string;
 }): {
     datasetId: string;
     parse(runtime: {

package/dist/file/file-dataset.agent.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFa
 import { datasetGetByIdStep } from "../dataset/steps.js";
 import { createExecuteCommandTool } from "../executeCommand.tool.js";
 import { createGenerateSchemaTool } from "./generateSchema.tool.js";
-import { buildFileDatasetPromptStep, generateFileParsePreviewStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
+import { buildFileDatasetPromptStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
 import { createDatasetId } from "../id.js";
 async function awaitContextRun(run) {
     if (!run)
@@ -27,6 +27,15 @@ function createFileParseContextDefinition(params) {
         const fileId = previous?.fileId ?? params.fileId ?? "";
         const instructions = previous?.instructions ?? params.instructions ?? "";
         const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
+        const contextRun = runtime?.__ekairosContextRun ?? {};
+        const contextId = String(contextRun.contextId ?? stored?.id ?? "").trim();
+        const executionId = String(contextRun.executionId ?? previous?.executionId ?? "").trim();
+        const sourceEventId = String(previous?.sourceEventId ?? params.sourceEventId ?? "").trim();
+        const sourcePartIndex = typeof previous?.sourcePartIndex === "number"
+            ? previous.sourcePartIndex
+            : typeof params.sourcePartIndex === "number"
+                ? params.sourcePartIndex
+                : 0;
         if (!datasetId) {
             throw new Error("dataset_id_required");
         }
@@ -36,30 +45,29 @@ function createFileParseContextDefinition(params) {
         if (!sandboxId) {
             throw new Error("dataset_sandbox_required");
         }
+        if (!contextId) {
+            throw new Error("dataset_context_id_required");
+        }
+        if (!executionId) {
+            throw new Error("dataset_execution_id_required");
+        }
         const initialized = sandboxState.initialized && sandboxState.filePath
             ? { filePath: sandboxState.filePath, state: sandboxState }
             : await initializeFileParseSandboxStep({
                 runtime,
                 sandboxId,
+                contextId,
+                executionId,
                 datasetId,
                 fileId,
+                sourceEventId,
+                sourcePartIndex,
+                filename: previous?.filename ?? params.filename,
+                mediaType: previous?.mediaType ?? params.mediaType,
                 state: sandboxState,
             });
         const sandboxFilePath = initialized.filePath;
         let filePreview = previous?.filePreview ?? previous?.ctx?.filePreview ?? params.filePreview;
-        if (!filePreview) {
-            try {
-                filePreview = await generateFileParsePreviewStep({
-                    runtime,
-                    sandboxId,
-                    sandboxFilePath,
-                    datasetId,
-                });
-            }
-            catch {
-                // Preview is optional; parsing can still proceed from the file path.
-            }
-        }
         let schema = previous?.ctx?.schema ?? previous?.schema ?? params.schema ?? null;
         const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
         if (datasetResult.ok && datasetResult.data.schema) {
@@ -69,7 +77,12 @@ function createFileParseContextDefinition(params) {
             datasetId,
             fileId,
             instructions,
-            sandboxConfig: { filePath: sandboxFilePath },
+            sandboxConfig: {
+                filePath: sandboxFilePath,
+                outputPath: initialized.state.outputPath,
+                scriptsDir: initialized.state.scriptsDir,
+                manifestPath: initialized.state.manifestPath,
+            },
             analysis: [],
             schema,
             plan: null,
@@ -84,6 +97,11 @@ function createFileParseContextDefinition(params) {
             fileId,
             instructions,
             sandboxId,
+            executionId,
+            sourceEventId,
+            sourcePartIndex,
+            filename: previous?.filename ?? params.filename,
+            mediaType: previous?.mediaType ?? params.mediaType,
             sandboxState: initialized.state,
             filePreview,
             ctx,
@@ -109,6 +127,7 @@ function createFileParseContextDefinition(params) {
         const datasetId = _stored?.content?.datasetId ?? fallbackDatasetId ?? "";
         const fileId = _stored?.content?.fileId ?? params.fileId ?? "";
         const sandboxId = _stored?.content?.sandboxId ?? params.sandboxId ?? "";
+        const outputPath = _stored?.content?.ctx?.sandboxConfig?.outputPath;
         if (!datasetId)
             throw new Error("dataset_id_required");
         if (!fileId)
@@ -125,6 +144,7 @@ function createFileParseContextDefinition(params) {
                 datasetId,
                 sandboxId,
                 runtime,
+                outputPath,
             }),
             clearDataset: createClearDatasetTool({
                 datasetId,
@@ -169,6 +189,8 @@ export function createFileParseContext(fileId, opts) {
         sandboxState: opts?.sandboxState,
         filePreview: opts?.filePreview,
         schema: opts?.schema,
+        filename: opts?.filename,
+        mediaType: opts?.mediaType,
     };
     const { context } = createFileParseContextDefinition(params);
     return {
@@ -185,15 +207,24 @@ export function createFileParseContext(fileId, opts) {
                             type: "text",
                             text: options.prompt ?? "generate a dataset for this file",
                         },
+                        {
+                            type: "file",
+                            fileId,
+                            filename: opts?.filename ?? "resource-file",
+                            mediaType: opts?.mediaType ?? "application/octet-stream",
+                        },
                     ],
                 },
             };
+            params.sourceEventId = triggerEvent.id;
+            params.sourcePartIndex = 1;
+            params.filename = opts?.filename ?? "resource-file";
+            params.mediaType = opts?.mediaType ?? "application/octet-stream";
             const shell = await context.react(triggerEvent, {
                 runtime: runtime,
                 context: { key: `dataset:${datasetId}` },
                 durable: options.durable ?? false,
                 options: {
-                    silent: true,
                     preventClose: true,
                     sendFinish: false,
                     maxIterations: 20,
@@ -203,6 +234,10 @@ export function createFileParseContext(fileId, opts) {
                     ...(options.initialContent ?? {}),
                     datasetId,
                     fileId,
+                    sourceEventId: triggerEvent.id,
+                    sourcePartIndex: 1,
+                    filename: opts?.filename ?? "resource-file",
+                    mediaType: opts?.mediaType ?? "application/octet-stream",
                     instructions: opts?.instructions ?? "",
                     sandboxId: opts?.sandboxId ?? "",
                     sandboxState: opts?.sandboxState ?? { initialized: false, filePath: "" },

package/dist/file/file-dataset.steps.d.ts CHANGED Viewed

@@ -3,8 +3,14 @@ import type { FilePreviewContext } from "./filepreview.types.js";
 export declare function initializeFileParseSandboxStep(params: {
     runtime: any;
     sandboxId: string;
+    contextId: string;
+    executionId: string;
     datasetId: string;
     fileId: string;
+    sourceEventId?: string;
+    sourcePartIndex?: number;
+    filename?: string;
+    mediaType?: string;
     state: SandboxState;
 }): Promise<{
     filePath: string;

package/dist/file/file-dataset.steps.js CHANGED Viewed

@@ -1,42 +1,39 @@
-import { getDatasetSourcesDir, getDatasetStandardDirs, getDatasetWorkstation, } from "../datasetFiles.js";
-import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
+import { DATASET_OUTPUT_FILE_NAME } from "../datasetFiles.js";
+import { prepareContextExecutionWorkspaceStep } from "../contextWorkspace.js";
 import { buildFileDatasetPrompt } from "./prompts.js";
 import { generateFilePreview } from "./filepreview.js";
-import { readInstantFileStep } from "./steps.js";
 export async function initializeFileParseSandboxStep(params) {
     "use step";
     if (params.state.initialized) {
         return { filePath: params.state.filePath, state: params.state };
     }
-    console.log(`[FileParseContext ${params.datasetId}] Preparing source file in sandbox...`);
-    console.log(`[FileParseContext ${params.datasetId}] Fetching file from InstantDB...`);
-    const file = await readInstantFileStep({ runtime: params.runtime, fileId: params.fileId });
-    console.log(`[FileParseContext ${params.datasetId}] Creating dataset workstation...`);
-    const workstation = getDatasetWorkstation(params.datasetId);
-    await runDatasetSandboxCommandStep({
-        runtime: params.runtime,
-        sandboxId: params.sandboxId,
-        cmd: "mkdir",
-        args: ["-p", ...getDatasetStandardDirs(params.datasetId)],
-    });
-    const fileName = file.contentDisposition ?? "";
-    const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
-    const sandboxFilePath = `${getDatasetSourcesDir(params.datasetId)}/${params.fileId}${fileExtension}`;
-    await writeDatasetSandboxFilesStep({
+    console.log(`[FileParseContext ${params.datasetId}] Preparing context execution workspace...`);
+    const workspace = await prepareContextExecutionWorkspaceStep({
         runtime: params.runtime,
         sandboxId: params.sandboxId,
+        contextId: params.contextId,
+        executionId: params.executionId,
         files: [
             {
-                path: sandboxFilePath,
-                contentBase64: file.contentBase64,
+                fileId: params.fileId,
+                filename: params.filename,
+                mediaType: params.mediaType,
+                sourceEventId: params.sourceEventId,
+                sourcePartIndex: params.sourcePartIndex,
             },
         ],
     });
-    console.log(`[FileParseContext ${params.datasetId}] Workstation created: ${workstation}`);
+    const sandboxFilePath = workspace.files[0]?.path ?? "";
+    if (!sandboxFilePath)
+        throw new Error("dataset_workspace_file_missing");
+    console.log(`[FileParseContext ${params.datasetId}] Context workspace created: ${workspace.root}`);
     console.log(`[FileParseContext ${params.datasetId}] File saved: ${sandboxFilePath}`);
     const state = {
         initialized: true,
         filePath: sandboxFilePath,
+        outputPath: `${workspace.outputDir}/${DATASET_OUTPUT_FILE_NAME}`,
+        scriptsDir: workspace.scriptsDir,
+        manifestPath: workspace.manifestPath,
     };
     return { filePath: sandboxFilePath, state };
 }

package/dist/file/file-dataset.types.d.ts CHANGED Viewed

@@ -3,6 +3,9 @@ import type { FilePreviewContext } from "./filepreview.types.js";
 export type SandboxState = {
     initialized: boolean;
     filePath: string;
+    outputPath?: string;
+    scriptsDir?: string;
+    manifestPath?: string;
 };
 export type FileParseContext = {
     datasetId: string;
@@ -10,6 +13,9 @@ export type FileParseContext = {
     instructions: string;
     sandboxConfig: {
         filePath: string;
+        outputPath?: string;
+        scriptsDir?: string;
+        manifestPath?: string;
     };
     analysis: any[];
     schema: any | null;
@@ -29,6 +35,10 @@ export type FileParseContextParams = {
     sandboxState?: SandboxState;
     filePreview?: FilePreviewContext;
     schema?: any | null;
+    sourceEventId?: string;
+    sourcePartIndex?: number;
+    filename?: string;
+    mediaType?: string;
 };
 export type FileParseRunOptions = {
     prompt?: string;

package/dist/file/prompts.js CHANGED Viewed

@@ -11,13 +11,13 @@ function buildRole() {
 function buildGoal() {
     let xml = create()
         .ele("Goal")
-        .txt("Convert the source file into a validated JSONL dataset (output.jsonl) where each line is a JSON object conforming to a generated schema. The schema describes ONE data record structure. Extract ONLY data records; exclude any header sections, metadata, or summary information from the file.")
+        .txt("Convert the input file into a validated JSONL dataset (output.jsonl) where each line is a JSON object conforming to a generated schema. The schema describes ONE data record structure. Extract ONLY data records; exclude any header sections, metadata, or summary information from the file.")
         .up();
     return xml.end({ prettyPrint: true, headless: true });
 }
-function buildSourceInfo(context) {
+function buildResourceInfo(context) {
     let xml = create()
-        .ele("Source")
+        .ele("FileResource")
         .ele("Type").txt("file").up()
         .ele("FileId").txt(context.fileId).up()
         .ele("DatasetId").txt(context.datasetId).up()
@@ -90,7 +90,7 @@ function buildErrorsSection(errors) {
     }
     let xml = create()
         .ele("PreviousErrors")
-        .ele("Instruction").txt("Treat these as repair feedback from the previous validation attempt. Rewrite output.jsonl from the schema contract; do not patch source column names into schema keys piecemeal.").up();
+        .ele("Instruction").txt("Treat these as repair feedback from the previous validation attempt. Rewrite output.jsonl from the schema contract; do not patch input column names into schema keys piecemeal.").up();
     for (const error of errors) {
         xml = xml.ele("Error").txt(error).up();
     }
@@ -100,8 +100,8 @@ function buildErrorsSection(errors) {
 function buildContextSection(context) {
     let xml = create()
         .ele("Context");
-    const sourceXml = buildSourceInfo(context);
-    xml = xml.import(sourceXml.first());
+    const resourceXml = buildResourceInfo(context);
+    xml = xml.import(resourceXml.first());
     if (context.filePreview) {
         const previewXml = buildFilePreviewSection(context.filePreview);
         xml = xml.import(previewXml.first());
@@ -195,9 +195,9 @@ function buildSchemaSection(context) {
     xml = xml
         .ele("SchemaContract")
         .ele("Purpose").txt("Compact output contract derived from JSON Schema. Use this before writing output.jsonl.").up()
-        .ele("Rule").txt("Use only schema property keys in data objects. Source headers are input labels, not output keys.").up()
+        .ele("Rule").txt("Use only schema property keys in data objects. Input headers are input labels, not output keys.").up()
         .ele("Rule").txt("Required paths are required everywhere, including nested objects and array items.").up()
-        .ele("Rule").txt("Enum fields must use exactly one of the listed literal values. Normalize source labels to the closest valid enum literal; never emit a value outside the enum.").up();
+        .ele("Rule").txt("Enum fields must use exactly one of the listed literal values. Normalize input labels to the closest valid enum literal; never emit a value outside the enum.").up();
     xml = appendLimitedList(xml, "RequiredPaths", "Path", contract.requiredPaths, 120);
     xml = appendLimitedList(xml, "PropertyPaths", "Path", contract.propertyPaths, 160);
     let enumsXml = xml.ele("EnumConstraints");
@@ -223,8 +223,10 @@ function buildSchemaSection(context) {
     return xml.end({ prettyPrint: true, headless: true });
 }
 function buildInstructions(context) {
-    const datasetWorkstation = getDatasetWorkstation(context.datasetId);
-    const outputPath = getDatasetOutputPath(context.datasetId);
+    const datasetWorkstation = context.sandboxConfig.scriptsDir
+        ? context.sandboxConfig.scriptsDir.replace(/\/scripts$/, "")
+        : getDatasetWorkstation(context.datasetId);
+    const outputPath = context.sandboxConfig.outputPath ?? getDatasetOutputPath(context.datasetId);
     const hasProvidedSchema = Boolean(context.schema?.schema);
     const currentTask = hasProvidedSchema
         ? "Review FilePreview section, use the provided schema as the output contract, then parse the file and generate the dataset"
@@ -243,10 +245,10 @@ function buildInstructions(context) {
             .ele("Requirements")
             .ele("Requirement").txt("Every output row must conform exactly to the provided schema").up()
             .ele("Requirement").txt("Every data object MUST use the exact property names from the provided JSON Schema required/properties keys").up()
-            .ele("Requirement").txt("Build a schema-first mapping from source columns to schema fields before writing output.jsonl. Do not use raw source headers as JSON keys unless they are exactly schema keys").up()
+            .ele("Requirement").txt("Build a schema-first mapping from input columns to schema fields before writing output.jsonl. Do not use raw input headers as JSON keys unless they are exactly schema keys").up()
             .ele("Requirement").txt("For nested required fields, populate the required child keys inside each nested object or array item; top-level validity is not enough").up()
             .ele("Requirement").txt("For enum fields, emit exactly one allowed enum literal from SchemaContract; normalize labels or abbreviations into allowed literals").up()
-            .ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the source language").up()
+            .ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the input language").up()
             .ele("Requirement").txt("Do not call generateSchema when a schema is already provided").up()
             .up()
             .up();
@@ -284,8 +286,8 @@ function buildInstructions(context) {
         .up()
         .ele("Rules")
         .ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
-        .ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the source language").up()
-        .ele("Rule").txt("Original/source language applies to extracted values only, not to JSON object keys").up()
+        .ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the input language").up()
+        .ele("Rule").txt("Original/input language applies to extracted values only, not to JSON object keys").up()
         .ele("Rule").txt("Datasets contain ONLY data records; exclude all header sections and file metadata").up()
         .ele("Rule").txt("JSONL format: each line = separate JSON object representing one data record").up()
         .ele("Rule").txt("FilePreview shows raw file content - use Script to understand data extraction").up()

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 export * from "./dataset.js";
+export * from "./contextWorkspace.js";
 export * from "./domain.js";
 export * from "./materializeDataset.tool.js";
 export * from "./schema.js";

package/dist/index.js CHANGED Viewed

@@ -1,4 +1,5 @@
 export * from "./dataset.js";
+export * from "./contextWorkspace.js";
 export * from "./domain.js";
 export * from "./materializeDataset.tool.js";
 export * from "./schema.js";