@ekairos/dataset 1.22.85-beta.development.0 → 1.22.86-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/context.d.ts +8 -0
- package/dist/builder/context.js +68 -9
- package/dist/builder/instructions.js +3 -2
- package/dist/builder/materialize.js +11 -25
- package/dist/builder/types.d.ts +2 -1
- package/dist/completeDataset.steps.d.ts +29 -0
- package/dist/completeDataset.steps.js +32 -1
- package/dist/completeDataset.tool.d.ts +41 -0
- package/dist/completeDataset.tool.js +6 -3
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +7 -0
- package/dist/contextWorkspace.js +17 -1
- package/dist/dataset/steps.js +12 -0
- package/dist/dataset.js +1 -0
- package/dist/executeCommand.tool.d.ts +1 -4
- package/dist/executeCommand.tool.js +113 -31
- package/dist/sandbox/steps.js +4 -2
- package/dist/service.d.ts +4 -0
- package/dist/service.js +59 -2
- package/dist/transform/prompts.js +37 -21
- package/dist/transform/transform-dataset.agent.d.ts +1 -0
- package/dist/transform/transform-dataset.agent.js +25 -25
- package/dist/transform/transform-dataset.types.d.ts +4 -1
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +4 -4
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import { datasetReadOutputJsonlStep } from "./dataset/steps.js";
|
|
2
|
+
import { readInstantFileStep } from "./file/steps.js";
|
|
3
|
+
import { getContextResourcesDir, sanitizeContextWorkspacePathSegment, } from "./contextWorkspace.js";
|
|
4
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
|
|
5
|
+
function asRecord(value) {
|
|
6
|
+
return value && typeof value === "object" && !Array.isArray(value)
|
|
7
|
+
? value
|
|
8
|
+
: null;
|
|
9
|
+
}
|
|
10
|
+
function asString(value) {
|
|
11
|
+
return typeof value === "string" ? value.trim() : "";
|
|
12
|
+
}
|
|
13
|
+
function contentFileName(resource) {
|
|
14
|
+
const filename = asString(resource.filename);
|
|
15
|
+
if (filename) {
|
|
16
|
+
return sanitizeContextWorkspacePathSegment(filename, "resource");
|
|
17
|
+
}
|
|
18
|
+
if (resource.type === "dataset")
|
|
19
|
+
return "resource.jsonl";
|
|
20
|
+
if (resource.type === "text")
|
|
21
|
+
return "resource.txt";
|
|
22
|
+
return "resource";
|
|
23
|
+
}
|
|
24
|
+
function selectResources(resources, resourceKeys) {
|
|
25
|
+
const requested = new Set(Array.isArray(resourceKeys)
|
|
26
|
+
? resourceKeys.map((key) => String(key).trim()).filter(Boolean)
|
|
27
|
+
: []);
|
|
28
|
+
if (requested.size === 0)
|
|
29
|
+
return resources;
|
|
30
|
+
return resources.filter((resource) => requested.has(resource.key));
|
|
31
|
+
}
|
|
32
|
+
export async function materializeContextResourcesStep(params) {
|
|
33
|
+
"use step";
|
|
34
|
+
const resourcesDir = getContextResourcesDir({ contextId: params.contextId });
|
|
35
|
+
const manifestPath = `${resourcesDir}/manifest.json`;
|
|
36
|
+
const selectedResources = selectResources(params.resources, params.resourceKeys);
|
|
37
|
+
const resourceDirs = selectedResources.map((resource, index) => {
|
|
38
|
+
const segment = sanitizeContextWorkspacePathSegment(resource.key, `resource_${index + 1}`);
|
|
39
|
+
return `${resourcesDir}/${segment}`;
|
|
40
|
+
});
|
|
41
|
+
await runDatasetSandboxCommandStep({
|
|
42
|
+
runtime: params.runtime,
|
|
43
|
+
sandboxId: params.sandboxId,
|
|
44
|
+
cmd: "mkdir",
|
|
45
|
+
args: ["-p", resourcesDir, ...resourceDirs],
|
|
46
|
+
});
|
|
47
|
+
const materialized = [];
|
|
48
|
+
for (let index = 0; index < selectedResources.length; index++) {
|
|
49
|
+
const resource = selectedResources[index];
|
|
50
|
+
const resourceDir = resourceDirs[index];
|
|
51
|
+
const metadataPath = `${resourceDir}/metadata.json`;
|
|
52
|
+
const files = [];
|
|
53
|
+
let status = "metadata_only";
|
|
54
|
+
let reason;
|
|
55
|
+
if (resource.type === "file" && asString(resource.fileId)) {
|
|
56
|
+
const file = await readInstantFileStep({
|
|
57
|
+
runtime: params.runtime,
|
|
58
|
+
fileId: asString(resource.fileId),
|
|
59
|
+
});
|
|
60
|
+
const path = `${resourceDir}/${contentFileName(resource)}`;
|
|
61
|
+
await writeDatasetSandboxFilesStep({
|
|
62
|
+
runtime: params.runtime,
|
|
63
|
+
sandboxId: params.sandboxId,
|
|
64
|
+
files: [{ path, contentBase64: file.contentBase64 }],
|
|
65
|
+
});
|
|
66
|
+
files.push({ path, role: "content", mediaType: asString(resource.mediaType) || undefined });
|
|
67
|
+
status = "materialized";
|
|
68
|
+
}
|
|
69
|
+
else if (resource.type === "dataset" && asString(resource.datasetId)) {
|
|
70
|
+
const datasetId = asString(resource.datasetId);
|
|
71
|
+
const content = await datasetReadOutputJsonlStep({
|
|
72
|
+
runtime: params.runtime,
|
|
73
|
+
datasetId,
|
|
74
|
+
});
|
|
75
|
+
const path = `${resourceDir}/resource.jsonl`;
|
|
76
|
+
await writeDatasetSandboxFilesStep({
|
|
77
|
+
runtime: params.runtime,
|
|
78
|
+
sandboxId: params.sandboxId,
|
|
79
|
+
files: [{ path, contentBase64: content.contentBase64 }],
|
|
80
|
+
});
|
|
81
|
+
files.push({ path, role: "content", mediaType: "application/x-ndjson" });
|
|
82
|
+
status = "materialized";
|
|
83
|
+
}
|
|
84
|
+
else if (resource.type === "text" && typeof resource.text === "string") {
|
|
85
|
+
const path = `${resourceDir}/${contentFileName(resource)}`;
|
|
86
|
+
await writeDatasetSandboxTextFilesStep({
|
|
87
|
+
runtime: params.runtime,
|
|
88
|
+
sandboxId: params.sandboxId,
|
|
89
|
+
files: [{ path, content: String(resource.text) }],
|
|
90
|
+
});
|
|
91
|
+
files.push({
|
|
92
|
+
path,
|
|
93
|
+
role: "content",
|
|
94
|
+
mediaType: asString(resource.mimeType) || "text/plain",
|
|
95
|
+
});
|
|
96
|
+
status = "materialized";
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
reason =
|
|
100
|
+
resource.type === "file"
|
|
101
|
+
? "file resource has no fileId"
|
|
102
|
+
: resource.type === "dataset"
|
|
103
|
+
? "dataset resource has no datasetId"
|
|
104
|
+
: resource.type === "link" || resource.type === "repository" || resource.type === "external"
|
|
105
|
+
? `${resource.type} resources are metadata-only until an approved adapter materializes them`
|
|
106
|
+
: "resource type is metadata-only";
|
|
107
|
+
}
|
|
108
|
+
const metadata = {
|
|
109
|
+
...(asRecord(resource) ?? {}),
|
|
110
|
+
key: resource.key,
|
|
111
|
+
type: resource.type,
|
|
112
|
+
name: resource.name,
|
|
113
|
+
description: resource.description,
|
|
114
|
+
materialized: {
|
|
115
|
+
status,
|
|
116
|
+
reason,
|
|
117
|
+
dir: resourceDir,
|
|
118
|
+
files,
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
await writeDatasetSandboxTextFilesStep({
|
|
122
|
+
runtime: params.runtime,
|
|
123
|
+
sandboxId: params.sandboxId,
|
|
124
|
+
files: [{ path: metadataPath, content: JSON.stringify(metadata, null, 2) }],
|
|
125
|
+
});
|
|
126
|
+
materialized.push({
|
|
127
|
+
key: resource.key,
|
|
128
|
+
type: resource.type,
|
|
129
|
+
name: resource.name,
|
|
130
|
+
description: resource.description,
|
|
131
|
+
dir: resourceDir,
|
|
132
|
+
metadataPath,
|
|
133
|
+
files,
|
|
134
|
+
status,
|
|
135
|
+
reason,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
const manifest = {
|
|
139
|
+
contextId: params.contextId,
|
|
140
|
+
sandboxId: params.sandboxId,
|
|
141
|
+
resourcesDir,
|
|
142
|
+
manifestPath,
|
|
143
|
+
resources: materialized,
|
|
144
|
+
};
|
|
145
|
+
await writeDatasetSandboxTextFilesStep({
|
|
146
|
+
runtime: params.runtime,
|
|
147
|
+
sandboxId: params.sandboxId,
|
|
148
|
+
files: [{ path: manifestPath, content: JSON.stringify(manifest, null, 2) }],
|
|
149
|
+
});
|
|
150
|
+
return manifest;
|
|
151
|
+
}
|
|
@@ -23,12 +23,14 @@ export type PreparedContextExecutionWorkspace = {
|
|
|
23
23
|
root: string;
|
|
24
24
|
contextRoot: string;
|
|
25
25
|
eventsDir: string;
|
|
26
|
+
resourcesDir: string;
|
|
26
27
|
outputDir: string;
|
|
27
28
|
scriptsDir: string;
|
|
28
29
|
tmpDir: string;
|
|
29
30
|
manifestPath: string;
|
|
30
31
|
files: PreparedContextWorkspaceFile[];
|
|
31
32
|
};
|
|
33
|
+
export declare function sanitizeContextWorkspacePathSegment(value: string, fallback: string): string;
|
|
32
34
|
export declare function getContextWorkspaceBase(): string;
|
|
33
35
|
export declare function getContextExecutionWorkspaceRoot(params: {
|
|
34
36
|
contextId: string;
|
|
@@ -43,6 +45,10 @@ export declare function getContextEventsDir(params: {
|
|
|
43
45
|
contextId: string;
|
|
44
46
|
root?: string;
|
|
45
47
|
}): string;
|
|
48
|
+
export declare function getContextResourcesDir(params: {
|
|
49
|
+
contextId: string;
|
|
50
|
+
root?: string;
|
|
51
|
+
}): string;
|
|
46
52
|
export declare function getContextExecutionWorkspaceDirs(params: {
|
|
47
53
|
contextId: string;
|
|
48
54
|
executionId: string;
|
|
@@ -51,6 +57,7 @@ export declare function getContextExecutionWorkspaceDirs(params: {
|
|
|
51
57
|
root: string;
|
|
52
58
|
contextRoot: string;
|
|
53
59
|
eventsDir: string;
|
|
60
|
+
resourcesDir: string;
|
|
54
61
|
outputDir: string;
|
|
55
62
|
scriptsDir: string;
|
|
56
63
|
tmpDir: string;
|
package/dist/contextWorkspace.js
CHANGED
|
@@ -17,6 +17,9 @@ function sanitizePathSegment(value, fallback) {
|
|
|
17
17
|
.slice(0, 160);
|
|
18
18
|
return normalized || fallback;
|
|
19
19
|
}
|
|
20
|
+
export function sanitizeContextWorkspacePathSegment(value, fallback) {
|
|
21
|
+
return sanitizePathSegment(value, fallback);
|
|
22
|
+
}
|
|
20
23
|
function filenameFromContentDisposition(value, fallback) {
|
|
21
24
|
const raw = String(value ?? "").trim();
|
|
22
25
|
if (!raw)
|
|
@@ -57,14 +60,19 @@ export function getContextWorkspaceRoot(params) {
|
|
|
57
60
|
export function getContextEventsDir(params) {
|
|
58
61
|
return `${getContextWorkspaceRoot(params)}/events`;
|
|
59
62
|
}
|
|
63
|
+
export function getContextResourcesDir(params) {
|
|
64
|
+
return `${getContextWorkspaceRoot(params)}/resources`;
|
|
65
|
+
}
|
|
60
66
|
export function getContextExecutionWorkspaceDirs(params) {
|
|
61
67
|
const root = getContextExecutionWorkspaceRoot(params);
|
|
62
68
|
const contextRoot = getContextWorkspaceRoot(params);
|
|
63
69
|
const eventsDir = getContextEventsDir(params);
|
|
70
|
+
const resourcesDir = getContextResourcesDir(params);
|
|
64
71
|
return {
|
|
65
72
|
root,
|
|
66
73
|
contextRoot,
|
|
67
74
|
eventsDir,
|
|
75
|
+
resourcesDir,
|
|
68
76
|
outputDir: `${root}/output`,
|
|
69
77
|
scriptsDir: `${root}/scripts`,
|
|
70
78
|
tmpDir: `${root}/tmp`,
|
|
@@ -73,7 +81,15 @@ export function getContextExecutionWorkspaceDirs(params) {
|
|
|
73
81
|
}
|
|
74
82
|
export function getContextExecutionWorkspaceStandardDirs(params) {
|
|
75
83
|
const dirs = getContextExecutionWorkspaceDirs(params);
|
|
76
|
-
return [
|
|
84
|
+
return [
|
|
85
|
+
dirs.contextRoot,
|
|
86
|
+
dirs.eventsDir,
|
|
87
|
+
dirs.resourcesDir,
|
|
88
|
+
dirs.root,
|
|
89
|
+
dirs.outputDir,
|
|
90
|
+
dirs.scriptsDir,
|
|
91
|
+
dirs.tmpDir,
|
|
92
|
+
];
|
|
77
93
|
}
|
|
78
94
|
export function extractContextWorkspaceFilesFromEventItems(eventItems) {
|
|
79
95
|
const files = [];
|
package/dist/dataset/steps.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { DatasetService } from "../service.js";
|
|
2
2
|
import { datasetDomain } from "../schema.js";
|
|
3
3
|
import { inferDatasetSchema } from "../builder/schemaInference.js";
|
|
4
|
+
import { rowsToJsonl } from "../builder/rows.js";
|
|
4
5
|
export async function getDatasetRuntimeDb(runtime) {
|
|
5
6
|
if (!runtime) {
|
|
6
7
|
throw new Error("Dataset step requires runtime.");
|
|
@@ -26,6 +27,7 @@ export async function datasetGetByIdStep(params) {
|
|
|
26
27
|
export async function datasetReadOutputJsonlStep(params) {
|
|
27
28
|
"use step";
|
|
28
29
|
const db = await getDatasetRuntimeDb(params.runtime);
|
|
30
|
+
const service = new DatasetService(db);
|
|
29
31
|
for (let attempt = 1; attempt <= 20; attempt++) {
|
|
30
32
|
const query = await db.query({
|
|
31
33
|
dataset_datasets: {
|
|
@@ -40,6 +42,16 @@ export async function datasetReadOutputJsonlStep(params) {
|
|
|
40
42
|
const fileBuffer = await fetch(url).then((r) => r.arrayBuffer());
|
|
41
43
|
return { contentBase64: Buffer.from(fileBuffer).toString("base64") };
|
|
42
44
|
}
|
|
45
|
+
const directRows = await service.readRows({
|
|
46
|
+
datasetId: params.datasetId,
|
|
47
|
+
cursor: 0,
|
|
48
|
+
limit: 100000,
|
|
49
|
+
});
|
|
50
|
+
if (directRows.ok && directRows.data.rows.length > 0) {
|
|
51
|
+
return {
|
|
52
|
+
contentBase64: Buffer.from(rowsToJsonl(directRows.data.rows), "utf-8").toString("base64"),
|
|
53
|
+
};
|
|
54
|
+
}
|
|
43
55
|
await new Promise((resolve) => setTimeout(resolve, 250 * attempt));
|
|
44
56
|
}
|
|
45
57
|
throw new Error("Dataset output file not found");
|
package/dist/dataset.js
CHANGED
|
@@ -118,6 +118,7 @@ export function dataset(runtime, options = {}) {
|
|
|
118
118
|
const context = await resolveDatasetResourceContext(typedRuntime, targetDatasetId, stateWithBuildOptions.resources);
|
|
119
119
|
stateWithBuildOptions.resources = context.resources;
|
|
120
120
|
stateWithBuildOptions.contextId = context.contextId;
|
|
121
|
+
stateWithBuildOptions.contextResources = context.contextResources;
|
|
121
122
|
const effectiveState = stateWithBuildOptions.output === "object"
|
|
122
123
|
? {
|
|
123
124
|
...stateWithBuildOptions,
|
|
@@ -3,8 +3,5 @@ interface ExecuteCommandToolParams {
|
|
|
3
3
|
sandboxId: string;
|
|
4
4
|
runtime: any;
|
|
5
5
|
}
|
|
6
|
-
export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams):
|
|
7
|
-
pythonCode: string;
|
|
8
|
-
scriptName: string;
|
|
9
|
-
}, unknown>;
|
|
6
|
+
export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): any;
|
|
10
7
|
export {};
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { defineAction } from "@ekairos/events";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
-
import {
|
|
4
|
-
import { getDatasetScriptsDir } from "./datasetFiles.js";
|
|
3
|
+
import { materializeContextResourcesStep } from "./contextResources.js";
|
|
4
|
+
import { getDatasetScriptsDir, getDatasetStandardDirs } from "./datasetFiles.js";
|
|
5
5
|
import { getContextExecutionWorkspaceDirs } from "./contextWorkspace.js";
|
|
6
|
-
|
|
7
|
-
// The tool's return payload exposes stdout (capped) plus the on-disk script path.
|
|
6
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
|
|
8
7
|
const MAX_STDOUT_CHARS = 20000;
|
|
9
8
|
const MAX_STDERR_CHARS = 5000;
|
|
10
9
|
function normalizeScriptName(scriptName) {
|
|
@@ -23,39 +22,96 @@ function stableScriptHash(value) {
|
|
|
23
22
|
}
|
|
24
23
|
return (hash >>> 0).toString(36);
|
|
25
24
|
}
|
|
25
|
+
const executeCommandInputSchema = z.object({
|
|
26
|
+
commandDescription: z
|
|
27
|
+
.string()
|
|
28
|
+
.min(1)
|
|
29
|
+
.describe("Required pre-execution description of the command. Describe the inputs/resources it will use, the operation it will perform, the expected output, and why a command is the right tool instead of direct completion. Invalid descriptions include rereading resources whose descriptor/preview already contains the needed evidence, merely formatting JSON, constructing the final object, writing output.jsonl, or making completion easier."),
|
|
30
|
+
pythonCode: z
|
|
31
|
+
.string()
|
|
32
|
+
.describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. If context resources are materialized, read os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] to discover files and metadata. Do not install packages, download dependencies, use pip/npm/apt/curl/wget, or access the network. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
|
|
33
|
+
scriptName: z
|
|
34
|
+
.string()
|
|
35
|
+
.describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A deterministic suffix will be appended automatically."),
|
|
36
|
+
resourceKeys: z
|
|
37
|
+
.array(z.string())
|
|
38
|
+
.optional()
|
|
39
|
+
.describe("Optional context resource keys to materialize before running the script. Omit to materialize every context resource."),
|
|
40
|
+
});
|
|
41
|
+
const materializedResourceSchema = z.object({
|
|
42
|
+
key: z.string(),
|
|
43
|
+
type: z.string(),
|
|
44
|
+
status: z.string(),
|
|
45
|
+
dir: z.string(),
|
|
46
|
+
files: z.array(z.object({
|
|
47
|
+
path: z.string(),
|
|
48
|
+
role: z.string(),
|
|
49
|
+
mediaType: z.string().optional(),
|
|
50
|
+
})),
|
|
51
|
+
reason: z.string().optional(),
|
|
52
|
+
});
|
|
53
|
+
const executeCommandOutputSchema = z
|
|
54
|
+
.object({
|
|
55
|
+
success: z.boolean(),
|
|
56
|
+
fatal: z.boolean().optional(),
|
|
57
|
+
status: z.string().optional(),
|
|
58
|
+
exitCode: z.number().optional(),
|
|
59
|
+
stdout: z.string(),
|
|
60
|
+
stderr: z.string(),
|
|
61
|
+
scriptPath: z.string(),
|
|
62
|
+
message: z.string().optional(),
|
|
63
|
+
error: z.string().optional(),
|
|
64
|
+
resourcesDir: z.string().optional(),
|
|
65
|
+
resourcesManifestPath: z.string().optional(),
|
|
66
|
+
materializedResources: z.array(materializedResourceSchema).optional(),
|
|
67
|
+
stdoutTruncated: z.boolean(),
|
|
68
|
+
stderrTruncated: z.boolean(),
|
|
69
|
+
stdoutOriginalLength: z.number(),
|
|
70
|
+
stderrOriginalLength: z.number(),
|
|
71
|
+
})
|
|
72
|
+
.passthrough();
|
|
26
73
|
export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
27
|
-
return
|
|
28
|
-
description: "Execute Python scripts in the sandbox
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
execute: (async ({ pythonCode, scriptName }, actionContext) => {
|
|
74
|
+
return defineAction({
|
|
75
|
+
description: "Execute Python scripts in the sandbox only when command execution is necessary to inspect, parse, aggregate, join, or compute over context resources that are not sufficiently represented in the visible context, resource descriptors, or previews. This is a high-cost computation tool, not a completion tool. Do not use it merely to reread resources whose descriptor/preview already contains the needed evidence, format JSON, build the final object, write output.jsonl, or make completion easier when completeObject or replaceRows can return the result directly. Before the script runs, requested context resources are materialized into /tmp/ekairos/contexts/{contextId}/resources and a manifest.json is written there. The Python process receives EKAIROS_CONTEXT_RESOURCES_DIR and EKAIROS_CONTEXT_RESOURCES_MANIFEST environment variables when resources are available; manifest entries expose files as resources[].files[].path. Do not install packages, download dependencies, use pip/npm/apt/curl/wget, or access the network; use only the available runtime and standard library unless a dependency is already present. Print concise progress and results only; do not dump large data.",
|
|
76
|
+
input: executeCommandInputSchema,
|
|
77
|
+
output: executeCommandOutputSchema,
|
|
78
|
+
execute: async ({ input, context, contextId, executionId, }) => {
|
|
79
|
+
const { commandDescription, pythonCode, resourceKeys, scriptName } = input;
|
|
34
80
|
const normalizedScriptName = normalizeScriptName(scriptName);
|
|
35
81
|
const scriptHash = stableScriptHash(`${normalizedScriptName}\0${pythonCode}`);
|
|
36
|
-
const scriptsDir =
|
|
37
|
-
? getContextExecutionWorkspaceDirs({
|
|
38
|
-
contextId: actionContext.contextId,
|
|
39
|
-
executionId: actionContext.executionId,
|
|
40
|
-
}).scriptsDir
|
|
82
|
+
const scriptsDir = contextId && executionId
|
|
83
|
+
? getContextExecutionWorkspaceDirs({ contextId, executionId }).scriptsDir
|
|
41
84
|
: getDatasetScriptsDir(datasetId);
|
|
42
85
|
const scriptFile = `${scriptsDir}/${normalizedScriptName}-${scriptHash}.py`;
|
|
86
|
+
let resourcesManifest = null;
|
|
43
87
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
44
|
-
console.log(`[Dataset ${datasetId}]
|
|
88
|
+
console.log(`[Dataset ${datasetId}] Action: executeCommand`);
|
|
89
|
+
console.log(`[Dataset ${datasetId}] Description: ${commandDescription}`);
|
|
45
90
|
console.log(`[Dataset ${datasetId}] Script: ${normalizedScriptName}`);
|
|
46
91
|
console.log(`[Dataset ${datasetId}] File: ${scriptFile}`);
|
|
47
92
|
console.log(`[Dataset ${datasetId}] Code length: ${pythonCode.length} chars`);
|
|
48
93
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
49
94
|
try {
|
|
95
|
+
if (contextId && Array.isArray(context?.resources) && context.resources.length > 0) {
|
|
96
|
+
resourcesManifest = await materializeContextResourcesStep({
|
|
97
|
+
runtime,
|
|
98
|
+
sandboxId,
|
|
99
|
+
contextId,
|
|
100
|
+
resources: context.resources,
|
|
101
|
+
resourceKeys,
|
|
102
|
+
});
|
|
103
|
+
console.log(`[Dataset ${datasetId}] Resources manifest: ${resourcesManifest.manifestPath}`);
|
|
104
|
+
}
|
|
105
|
+
await runDatasetSandboxCommandStep({
|
|
106
|
+
runtime,
|
|
107
|
+
sandboxId,
|
|
108
|
+
cmd: "mkdir",
|
|
109
|
+
args: ["-p", ...getDatasetStandardDirs(datasetId), scriptsDir],
|
|
110
|
+
});
|
|
50
111
|
await writeDatasetSandboxTextFilesStep({
|
|
51
112
|
runtime,
|
|
52
113
|
sandboxId,
|
|
53
|
-
files: [
|
|
54
|
-
{
|
|
55
|
-
path: scriptFile,
|
|
56
|
-
content: pythonCode,
|
|
57
|
-
},
|
|
58
|
-
],
|
|
114
|
+
files: [{ path: scriptFile, content: pythonCode }],
|
|
59
115
|
});
|
|
60
116
|
const written = await runDatasetSandboxCommandStep({
|
|
61
117
|
runtime,
|
|
@@ -76,19 +132,33 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
|
76
132
|
stderr: written.stderr || "",
|
|
77
133
|
exitCode: written.exitCode,
|
|
78
134
|
scriptPath: scriptFile,
|
|
135
|
+
resourcesDir: resourcesManifest?.resourcesDir,
|
|
136
|
+
resourcesManifestPath: resourcesManifest?.manifestPath,
|
|
137
|
+
materializedResources: resourcesManifest?.resources,
|
|
79
138
|
stdoutTruncated: false,
|
|
80
139
|
stderrTruncated: false,
|
|
81
140
|
stdoutOriginalLength: 0,
|
|
82
141
|
stderrOriginalLength: 0,
|
|
83
142
|
};
|
|
84
143
|
}
|
|
144
|
+
const pythonArgs = resourcesManifest
|
|
145
|
+
? [
|
|
146
|
+
"-c",
|
|
147
|
+
[
|
|
148
|
+
"import os, runpy",
|
|
149
|
+
`os.environ["EKAIROS_CONTEXT_RESOURCES_DIR"] = ${JSON.stringify(resourcesManifest.resourcesDir)}`,
|
|
150
|
+
`os.environ["EKAIROS_CONTEXT_RESOURCES_MANIFEST"] = ${JSON.stringify(resourcesManifest.manifestPath)}`,
|
|
151
|
+
`runpy.run_path(${JSON.stringify(scriptFile)}, run_name="__main__")`,
|
|
152
|
+
].join("; "),
|
|
153
|
+
]
|
|
154
|
+
: [scriptFile];
|
|
85
155
|
console.log(`[Dataset ${datasetId}] Script written to: ${scriptFile}`);
|
|
86
|
-
console.log(`[Dataset ${datasetId}] Executing: python ${scriptFile}`);
|
|
156
|
+
console.log(`[Dataset ${datasetId}] Executing: python ${resourcesManifest ? "<with context resources env>" : scriptFile}`);
|
|
87
157
|
const result = await runDatasetSandboxCommandStep({
|
|
88
158
|
runtime,
|
|
89
159
|
sandboxId,
|
|
90
160
|
cmd: "python",
|
|
91
|
-
args:
|
|
161
|
+
args: pythonArgs,
|
|
92
162
|
});
|
|
93
163
|
const stdout = result.stdout || "";
|
|
94
164
|
const stderr = result.stderr || "";
|
|
@@ -98,7 +168,7 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
|
98
168
|
const stdoutCapped = isStdoutTruncated ? stdout.slice(0, MAX_STDOUT_CHARS) : stdout;
|
|
99
169
|
const stderrCapped = isStderrTruncated ? stderr.slice(0, MAX_STDERR_CHARS) : stderr;
|
|
100
170
|
if (exitCode !== 0) {
|
|
101
|
-
console.error(`[Dataset ${datasetId}]
|
|
171
|
+
console.error(`[Dataset ${datasetId}] Command failed with exit code ${exitCode}`);
|
|
102
172
|
console.error(`[Dataset ${datasetId}] Stderr:`, stderrCapped.substring(0, 500));
|
|
103
173
|
console.error(`[Dataset ${datasetId}] ========================================`);
|
|
104
174
|
return {
|
|
@@ -108,6 +178,9 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
|
108
178
|
stderr: stderrCapped,
|
|
109
179
|
scriptPath: scriptFile,
|
|
110
180
|
error: `Command failed with exit code ${exitCode}`,
|
|
181
|
+
resourcesDir: resourcesManifest?.resourcesDir,
|
|
182
|
+
resourcesManifestPath: resourcesManifest?.manifestPath,
|
|
183
|
+
materializedResources: resourcesManifest?.resources,
|
|
111
184
|
stdoutTruncated: isStdoutTruncated,
|
|
112
185
|
stderrTruncated: isStderrTruncated,
|
|
113
186
|
stdoutOriginalLength: stdout.length,
|
|
@@ -115,7 +188,7 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
|
115
188
|
};
|
|
116
189
|
}
|
|
117
190
|
if (stderr && (stderr.includes("Traceback") || stderr.toLowerCase().includes("error"))) {
|
|
118
|
-
console.error(`[Dataset ${datasetId}]
|
|
191
|
+
console.error(`[Dataset ${datasetId}] Python error detected`);
|
|
119
192
|
console.error(`[Dataset ${datasetId}] Stderr:`, stderrCapped.substring(0, 500));
|
|
120
193
|
console.error(`[Dataset ${datasetId}] ========================================`);
|
|
121
194
|
return {
|
|
@@ -125,17 +198,20 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
|
125
198
|
stderr: stderrCapped,
|
|
126
199
|
scriptPath: scriptFile,
|
|
127
200
|
error: "Python error detected in stderr",
|
|
201
|
+
resourcesDir: resourcesManifest?.resourcesDir,
|
|
202
|
+
resourcesManifestPath: resourcesManifest?.manifestPath,
|
|
203
|
+
materializedResources: resourcesManifest?.resources,
|
|
128
204
|
stdoutTruncated: isStdoutTruncated,
|
|
129
205
|
stderrTruncated: isStderrTruncated,
|
|
130
206
|
stdoutOriginalLength: stdout.length,
|
|
131
207
|
stderrOriginalLength: stderr.length,
|
|
132
208
|
};
|
|
133
209
|
}
|
|
134
|
-
console.log(`[Dataset ${datasetId}]
|
|
210
|
+
console.log(`[Dataset ${datasetId}] Command executed successfully`);
|
|
135
211
|
if (stdout) {
|
|
136
212
|
console.log(`[Dataset ${datasetId}] Output length: ${stdout.length} chars`);
|
|
137
213
|
if (isStdoutTruncated) {
|
|
138
|
-
console.log(`[Dataset ${datasetId}]
|
|
214
|
+
console.log(`[Dataset ${datasetId}] Stdout truncated to ${MAX_STDOUT_CHARS} chars`);
|
|
139
215
|
}
|
|
140
216
|
}
|
|
141
217
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
@@ -146,6 +222,9 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
|
146
222
|
stderr: stderrCapped,
|
|
147
223
|
scriptPath: scriptFile,
|
|
148
224
|
message: "Command executed successfully",
|
|
225
|
+
resourcesDir: resourcesManifest?.resourcesDir,
|
|
226
|
+
resourcesManifestPath: resourcesManifest?.manifestPath,
|
|
227
|
+
materializedResources: resourcesManifest?.resources,
|
|
149
228
|
stdoutTruncated: isStdoutTruncated,
|
|
150
229
|
stderrTruncated: isStderrTruncated,
|
|
151
230
|
stdoutOriginalLength: stdout.length,
|
|
@@ -163,12 +242,15 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
|
163
242
|
stderr: "",
|
|
164
243
|
exitCode: -1,
|
|
165
244
|
scriptPath: scriptFile,
|
|
245
|
+
resourcesDir: resourcesManifest?.resourcesDir,
|
|
246
|
+
resourcesManifestPath: resourcesManifest?.manifestPath,
|
|
247
|
+
materializedResources: resourcesManifest?.resources,
|
|
166
248
|
stdoutTruncated: false,
|
|
167
249
|
stderrTruncated: false,
|
|
168
250
|
stdoutOriginalLength: 0,
|
|
169
251
|
stderrOriginalLength: 0,
|
|
170
252
|
};
|
|
171
253
|
}
|
|
172
|
-
}
|
|
254
|
+
},
|
|
173
255
|
});
|
|
174
256
|
}
|
package/dist/sandbox/steps.js
CHANGED
|
@@ -30,8 +30,10 @@ async function runLocalSandboxCommand(params) {
|
|
|
30
30
|
const cmd = String(params.cmd ?? "").trim();
|
|
31
31
|
const args = params.args ?? [];
|
|
32
32
|
if (cmd === "mkdir") {
|
|
33
|
-
const
|
|
34
|
-
|
|
33
|
+
const targets = args.filter((arg) => !String(arg).startsWith("-"));
|
|
34
|
+
for (const target of targets) {
|
|
35
|
+
await fs.mkdir(String(target ?? ""), { recursive: true });
|
|
36
|
+
}
|
|
35
37
|
return { exitCode: 0, stdout: "", stderr: "" };
|
|
36
38
|
}
|
|
37
39
|
if (cmd === "rm") {
|
package/dist/service.d.ts
CHANGED
|
@@ -74,6 +74,10 @@ export declare class DatasetService {
|
|
|
74
74
|
storagePath: string;
|
|
75
75
|
}): Promise<ServiceResult<void>>;
|
|
76
76
|
readRecordsFromFile(datasetId: string): Promise<ServiceResult<AsyncGenerator<any, void, unknown>>>;
|
|
77
|
+
readLinkedRecords(datasetId: string): Promise<ServiceResult<Array<{
|
|
78
|
+
rowContent: any;
|
|
79
|
+
order: number;
|
|
80
|
+
}>>>;
|
|
77
81
|
previewRows(datasetId: string, limit?: number): Promise<ServiceResult<any[]>>;
|
|
78
82
|
readRows(params: {
|
|
79
83
|
datasetId: string;
|
package/dist/service.js
CHANGED
|
@@ -432,8 +432,51 @@ export class DatasetService {
|
|
|
432
432
|
return { ok: false, error: message };
|
|
433
433
|
}
|
|
434
434
|
}
|
|
435
|
+
async readLinkedRecords(datasetId) {
|
|
436
|
+
try {
|
|
437
|
+
const query = await this.db.query({
|
|
438
|
+
dataset_datasets: {
|
|
439
|
+
$: {
|
|
440
|
+
where: { datasetId },
|
|
441
|
+
limit: 1,
|
|
442
|
+
},
|
|
443
|
+
records: {},
|
|
444
|
+
},
|
|
445
|
+
});
|
|
446
|
+
const datasetRecord = query.dataset_datasets?.[0];
|
|
447
|
+
if (!datasetRecord) {
|
|
448
|
+
return { ok: false, error: `Dataset not found with id: ${datasetId}` };
|
|
449
|
+
}
|
|
450
|
+
const linkedRecords = Array.isArray(datasetRecord?.records)
|
|
451
|
+
? datasetRecord.records
|
|
452
|
+
: [];
|
|
453
|
+
return {
|
|
454
|
+
ok: true,
|
|
455
|
+
data: linkedRecords
|
|
456
|
+
.slice()
|
|
457
|
+
.sort((a, b) => Number(a?.order ?? 0) - Number(b?.order ?? 0))
|
|
458
|
+
.map((record) => ({
|
|
459
|
+
rowContent: record?.rowContent,
|
|
460
|
+
order: Number(record?.order ?? 0),
|
|
461
|
+
})),
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
catch (error) {
|
|
465
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
466
|
+
return { ok: false, error: message };
|
|
467
|
+
}
|
|
468
|
+
}
|
|
435
469
|
async previewRows(datasetId, limit = 20) {
|
|
436
470
|
try {
|
|
471
|
+
const linkedRecords = await this.readLinkedRecords(datasetId);
|
|
472
|
+
if (linkedRecords.ok && linkedRecords.data.length > 0) {
|
|
473
|
+
return {
|
|
474
|
+
ok: true,
|
|
475
|
+
data: linkedRecords.data
|
|
476
|
+
.slice(0, Math.max(0, Number(limit ?? 20)))
|
|
477
|
+
.map((record) => record.rowContent),
|
|
478
|
+
};
|
|
479
|
+
}
|
|
437
480
|
const readResult = await this.readRecordsFromFile(datasetId);
|
|
438
481
|
if (!readResult.ok) {
|
|
439
482
|
return readResult;
|
|
@@ -454,12 +497,26 @@ export class DatasetService {
|
|
|
454
497
|
}
|
|
455
498
|
async readRows(params) {
|
|
456
499
|
try {
|
|
500
|
+
const start = Math.max(0, Number(params.cursor ?? 0));
|
|
501
|
+
const limit = Math.max(1, Number(params.limit ?? 200));
|
|
502
|
+
const linkedRecords = await this.readLinkedRecords(params.datasetId);
|
|
503
|
+
if (linkedRecords.ok && linkedRecords.data.length > 0) {
|
|
504
|
+
const rows = linkedRecords.data
|
|
505
|
+
.slice(start, start + limit)
|
|
506
|
+
.map((record) => record.rowContent);
|
|
507
|
+
return {
|
|
508
|
+
ok: true,
|
|
509
|
+
data: {
|
|
510
|
+
rows,
|
|
511
|
+
cursor: start + rows.length,
|
|
512
|
+
done: start + rows.length >= linkedRecords.data.length,
|
|
513
|
+
},
|
|
514
|
+
};
|
|
515
|
+
}
|
|
457
516
|
const readResult = await this.readRecordsFromFile(params.datasetId);
|
|
458
517
|
if (!readResult.ok) {
|
|
459
518
|
return readResult;
|
|
460
519
|
}
|
|
461
|
-
const start = Math.max(0, Number(params.cursor ?? 0));
|
|
462
|
-
const limit = Math.max(1, Number(params.limit ?? 200));
|
|
463
520
|
const rows = [];
|
|
464
521
|
let index = 0;
|
|
465
522
|
let hasMore = false;
|