@ekairos/structure 1.21.74-beta.0 → 1.21.77-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dataset/steps.js +6 -0
- package/dist/datasetFiles.d.ts +6 -0
- package/dist/datasetFiles.js +25 -1
- package/dist/prompts.d.ts +7 -0
- package/dist/prompts.js +13 -0
- package/dist/rowsOutputPaging.js +9 -0
- package/dist/sandbox/steps.d.ts +4 -9
- package/dist/sandbox/steps.js +192 -3
- package/dist/structure.d.ts +2 -0
- package/dist/structure.js +133 -23
- package/package.json +2 -2
- package/dist/datasetReader.d.ts +0 -16
- package/dist/datasetReader.js +0 -25
- package/dist/rowsOutputPaging.steps.d.ts +0 -37
- package/dist/rowsOutputPaging.steps.js +0 -125
- package/dist/rowsPagination.steps.d.ts +0 -59
- package/dist/rowsPagination.steps.js +0 -190
package/dist/dataset/steps.js
CHANGED
|
@@ -63,6 +63,7 @@ export async function structurePatchContextContentStep(params) {
|
|
|
63
63
|
}
|
|
64
64
|
export async function structureUploadRowsOutputJsonlStep(params) {
|
|
65
65
|
"use step";
|
|
66
|
+
const startedAt = Date.now();
|
|
66
67
|
try {
|
|
67
68
|
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
68
69
|
const runtime = await resolveStoryRuntime(params.env);
|
|
@@ -76,6 +77,7 @@ export async function structureUploadRowsOutputJsonlStep(params) {
|
|
|
76
77
|
const fileId = uploadResult?.data?.id;
|
|
77
78
|
if (!fileId)
|
|
78
79
|
return { ok: false, error: "Failed to upload file to storage" };
|
|
80
|
+
console.log(`[structure:upload-jsonl] structureId=${params.structureId} bytes=${fileBuffer.byteLength} elapsedMs=${Date.now() - startedAt}`);
|
|
79
81
|
return { ok: true, data: { fileId, storagePath } };
|
|
80
82
|
}
|
|
81
83
|
catch (error) {
|
|
@@ -85,6 +87,7 @@ export async function structureUploadRowsOutputJsonlStep(params) {
|
|
|
85
87
|
}
|
|
86
88
|
export async function structureLinkRowsOutputFileToContextStep(params) {
|
|
87
89
|
"use step";
|
|
90
|
+
const startedAt = Date.now();
|
|
88
91
|
try {
|
|
89
92
|
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
90
93
|
const runtime = await resolveStoryRuntime(params.env);
|
|
@@ -95,6 +98,7 @@ export async function structureLinkRowsOutputFileToContextStep(params) {
|
|
|
95
98
|
if (!ctxId)
|
|
96
99
|
return { ok: false, error: "Context not found" };
|
|
97
100
|
await db.transact([db.tx.context_contexts[ctxId].link({ structure_output_file: params.fileId })]);
|
|
101
|
+
console.log(`[structure:link-jsonl] contextKey=${params.contextKey} fileId=${params.fileId} elapsedMs=${Date.now() - startedAt}`);
|
|
98
102
|
return { ok: true };
|
|
99
103
|
}
|
|
100
104
|
catch (error) {
|
|
@@ -145,6 +149,7 @@ export async function structureGetContextWithRowsOutputFileStep(params) {
|
|
|
145
149
|
}
|
|
146
150
|
export async function structureReadRowsOutputJsonlStep(params) {
|
|
147
151
|
"use step";
|
|
152
|
+
const startedAt = Date.now();
|
|
148
153
|
try {
|
|
149
154
|
const contextKey = `structure:${params.structureId}`;
|
|
150
155
|
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
@@ -164,6 +169,7 @@ export async function structureReadRowsOutputJsonlStep(params) {
|
|
|
164
169
|
if (!url)
|
|
165
170
|
return { ok: false, error: "Rows output file not found" };
|
|
166
171
|
const fileBuffer = await fetchArrayBufferWithRetry(url, { attempts: 4, timeoutMs: 90000 });
|
|
172
|
+
console.log(`[structure:read-jsonl] structureId=${params.structureId} bytes=${fileBuffer.byteLength} elapsedMs=${Date.now() - startedAt}`);
|
|
167
173
|
return { ok: true, data: { contentBase64: Buffer.from(fileBuffer).toString("base64") } };
|
|
168
174
|
}
|
|
169
175
|
catch (error) {
|
package/dist/datasetFiles.d.ts
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
export declare const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
|
|
2
2
|
export declare const DATASET_OUTPUT_SCHEMA_FILE_NAME = "output_schema.json";
|
|
3
|
+
export declare const DEFAULT_DAYTONA_VOLUME_MOUNT_PATH = "/home/daytona/.ekairos";
|
|
4
|
+
export declare const DEFAULT_DAYTONA_VOLUME_NAME = "ekairos-structure";
|
|
5
|
+
export declare const DEFAULT_DATASET_WORKDIR_BASE = "/home/daytona/.ekairos/datasets";
|
|
6
|
+
export declare function getDaytonaVolumeMountPath(): string;
|
|
7
|
+
export declare function getDaytonaVolumeName(): string;
|
|
8
|
+
export declare function getDatasetWorkdirBase(): string;
|
|
3
9
|
export declare function getDatasetWorkstation(datasetId: string): string;
|
|
4
10
|
export declare function getDatasetOutputPath(datasetId: string): string;
|
|
5
11
|
export declare function getDatasetOutputSchemaPath(datasetId: string): string;
|
package/dist/datasetFiles.js
CHANGED
|
@@ -1,7 +1,31 @@
|
|
|
1
1
|
export const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
|
|
2
2
|
export const DATASET_OUTPUT_SCHEMA_FILE_NAME = "output_schema.json";
|
|
3
|
+
export const DEFAULT_DAYTONA_VOLUME_MOUNT_PATH = "/home/daytona/.ekairos";
|
|
4
|
+
export const DEFAULT_DAYTONA_VOLUME_NAME = "ekairos-structure";
|
|
5
|
+
export const DEFAULT_DATASET_WORKDIR_BASE = `${DEFAULT_DAYTONA_VOLUME_MOUNT_PATH}/datasets`;
|
|
6
|
+
function trimTrailingSlash(value) {
|
|
7
|
+
return value.endsWith("/") ? value.slice(0, -1) : value;
|
|
8
|
+
}
|
|
9
|
+
export function getDaytonaVolumeMountPath() {
|
|
10
|
+
const fromEnv = String(process.env.STRUCTURE_DAYTONA_VOLUME_MOUNT_PATH ?? "").trim();
|
|
11
|
+
if (fromEnv)
|
|
12
|
+
return trimTrailingSlash(fromEnv);
|
|
13
|
+
return DEFAULT_DAYTONA_VOLUME_MOUNT_PATH;
|
|
14
|
+
}
|
|
15
|
+
export function getDaytonaVolumeName() {
|
|
16
|
+
const fromEnv = String(process.env.STRUCTURE_DAYTONA_VOLUME_NAME ?? "").trim();
|
|
17
|
+
if (fromEnv)
|
|
18
|
+
return fromEnv;
|
|
19
|
+
return DEFAULT_DAYTONA_VOLUME_NAME;
|
|
20
|
+
}
|
|
21
|
+
export function getDatasetWorkdirBase() {
|
|
22
|
+
const fromEnv = String(process.env.STRUCTURE_SANDBOX_WORKDIR_BASE ?? "").trim();
|
|
23
|
+
if (fromEnv)
|
|
24
|
+
return trimTrailingSlash(fromEnv);
|
|
25
|
+
return `${getDaytonaVolumeMountPath()}/datasets`;
|
|
26
|
+
}
|
|
3
27
|
export function getDatasetWorkstation(datasetId) {
|
|
4
|
-
return
|
|
28
|
+
return `${getDatasetWorkdirBase()}/${datasetId}`;
|
|
5
29
|
}
|
|
6
30
|
export function getDatasetOutputPath(datasetId) {
|
|
7
31
|
return `${getDatasetWorkstation(datasetId)}/${DATASET_OUTPUT_FILE_NAME}`;
|
package/dist/prompts.d.ts
CHANGED
|
@@ -13,5 +13,12 @@ export type StructurePromptContext = {
|
|
|
13
13
|
sources: PreparedSource[];
|
|
14
14
|
workstation: string;
|
|
15
15
|
outputPath: string;
|
|
16
|
+
sandboxProvider?: string;
|
|
17
|
+
sandboxRuntime?: string;
|
|
18
|
+
sandboxEphemeral?: boolean;
|
|
19
|
+
sandboxVolumeName?: string;
|
|
20
|
+
sandboxVolumeMountPath?: string;
|
|
21
|
+
sandboxSnapshot?: string;
|
|
22
|
+
sandboxImage?: string;
|
|
16
23
|
};
|
|
17
24
|
export declare function buildStructurePrompt(ctx: StructurePromptContext): string;
|
package/dist/prompts.js
CHANGED
|
@@ -24,6 +24,19 @@ export function buildStructurePrompt(ctx) {
|
|
|
24
24
|
}
|
|
25
25
|
lines.push("");
|
|
26
26
|
lines.push("## SANDBOX");
|
|
27
|
+
if (ctx.sandboxProvider)
|
|
28
|
+
lines.push(`Provider: ${ctx.sandboxProvider}`);
|
|
29
|
+
if (ctx.sandboxRuntime)
|
|
30
|
+
lines.push(`Runtime: ${ctx.sandboxRuntime}`);
|
|
31
|
+
if (ctx.sandboxEphemeral !== undefined)
|
|
32
|
+
lines.push(`Ephemeral: ${ctx.sandboxEphemeral ? "true" : "false"}`);
|
|
33
|
+
if (ctx.sandboxVolumeName || ctx.sandboxVolumeMountPath) {
|
|
34
|
+
lines.push(`Volume: ${ctx.sandboxVolumeName ?? "unknown"} -> ${ctx.sandboxVolumeMountPath ?? "unknown"}`);
|
|
35
|
+
}
|
|
36
|
+
if (ctx.sandboxSnapshot)
|
|
37
|
+
lines.push(`Snapshot: ${ctx.sandboxSnapshot}`);
|
|
38
|
+
if (ctx.sandboxImage)
|
|
39
|
+
lines.push(`Image: ${ctx.sandboxImage}`);
|
|
27
40
|
lines.push(`Workstation: ${ctx.workstation}`);
|
|
28
41
|
lines.push(`OutputPath: ${ctx.outputPath}`);
|
|
29
42
|
lines.push("");
|
package/dist/rowsOutputPaging.js
CHANGED
|
@@ -28,6 +28,15 @@ export async function structureDownloadRowsOutputToSandboxStep(params) {
|
|
|
28
28
|
cmd: "mkdir",
|
|
29
29
|
args: ["-p", workstation],
|
|
30
30
|
});
|
|
31
|
+
const exists = await runDatasetSandboxCommandStep({
|
|
32
|
+
env: params.env,
|
|
33
|
+
sandboxId,
|
|
34
|
+
cmd: "test",
|
|
35
|
+
args: ["-f", localPath],
|
|
36
|
+
});
|
|
37
|
+
if (exists.exitCode === 0) {
|
|
38
|
+
return { sandboxId, localPath };
|
|
39
|
+
}
|
|
31
40
|
const storyRuntime = await getStoryRuntime(params.env);
|
|
32
41
|
const db = storyRuntime.db;
|
|
33
42
|
const contextKey = `structure:${params.structureId}`;
|
package/dist/sandbox/steps.d.ts
CHANGED
|
@@ -1,13 +1,8 @@
|
|
|
1
|
+
import type { SandboxConfig } from "@ekairos/sandbox";
|
|
1
2
|
export type DatasetSandboxId = string;
|
|
2
|
-
export type CreateDatasetSandboxParams = {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
ports?: number[];
|
|
6
|
-
resources?: {
|
|
7
|
-
vcpus?: number;
|
|
8
|
-
};
|
|
9
|
-
purpose?: string;
|
|
10
|
-
params?: Record<string, any>;
|
|
3
|
+
export type CreateDatasetSandboxParams = Omit<SandboxConfig, "provider" | "daytona"> & {
|
|
4
|
+
provider?: SandboxConfig["provider"];
|
|
5
|
+
daytona?: SandboxConfig["daytona"];
|
|
11
6
|
};
|
|
12
7
|
export type DatasetSandboxRunCommandResult = {
|
|
13
8
|
exitCode: number;
|
package/dist/sandbox/steps.js
CHANGED
|
@@ -1,16 +1,186 @@
|
|
|
1
|
+
import { getDatasetWorkdirBase, getDaytonaVolumeMountPath, getDaytonaVolumeName } from "../datasetFiles";
|
|
2
|
+
function parseOptionalNumber(value) {
|
|
3
|
+
const parsed = Number(value);
|
|
4
|
+
if (!Number.isFinite(parsed))
|
|
5
|
+
return undefined;
|
|
6
|
+
return parsed;
|
|
7
|
+
}
|
|
8
|
+
function parseOptionalBoolean(value) {
|
|
9
|
+
const normalized = String(value ?? "").trim().toLowerCase();
|
|
10
|
+
if (!normalized)
|
|
11
|
+
return undefined;
|
|
12
|
+
if (["1", "true", "yes", "y", "on"].includes(normalized))
|
|
13
|
+
return true;
|
|
14
|
+
if (["0", "false", "no", "n", "off"].includes(normalized))
|
|
15
|
+
return false;
|
|
16
|
+
return undefined;
|
|
17
|
+
}
|
|
18
|
+
function shouldLogDaytonaResources() {
|
|
19
|
+
const explicit = parseOptionalBoolean(process.env.STRUCTURE_DAYTONA_LOG_RESOURCES);
|
|
20
|
+
return explicit ?? false;
|
|
21
|
+
}
|
|
22
|
+
function getDaytonaConfigFromEnv() {
|
|
23
|
+
const apiKey = String(process.env.DAYTONA_API_KEY ?? "").trim();
|
|
24
|
+
const apiUrl = String(process.env.DAYTONA_API_URL ?? "").trim() ||
|
|
25
|
+
String(process.env.DAYTONA_SERVER_URL ?? "").trim();
|
|
26
|
+
const jwtToken = String(process.env.DAYTONA_JWT_TOKEN ?? "").trim();
|
|
27
|
+
const organizationId = String(process.env.DAYTONA_ORGANIZATION_ID ?? "").trim();
|
|
28
|
+
const target = String(process.env.DAYTONA_TARGET ?? "").trim();
|
|
29
|
+
if (!apiUrl)
|
|
30
|
+
return null;
|
|
31
|
+
if (!apiKey && !(jwtToken && organizationId))
|
|
32
|
+
return null;
|
|
33
|
+
return {
|
|
34
|
+
apiUrl,
|
|
35
|
+
apiKey: apiKey || undefined,
|
|
36
|
+
jwtToken: jwtToken || undefined,
|
|
37
|
+
organizationId: organizationId || undefined,
|
|
38
|
+
target: target || undefined,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
async function logDaytonaResources(label) {
|
|
42
|
+
if (!shouldLogDaytonaResources())
|
|
43
|
+
return;
|
|
44
|
+
const cfg = getDaytonaConfigFromEnv();
|
|
45
|
+
if (!cfg) {
|
|
46
|
+
console.log(`[daytona:${label}] missing Daytona env config`);
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
try {
|
|
50
|
+
const moduleName = "@daytonaio/sdk";
|
|
51
|
+
const importer = new Function("m", "return import(m)");
|
|
52
|
+
const { Daytona } = (await importer(moduleName));
|
|
53
|
+
const daytona = new Daytona(cfg);
|
|
54
|
+
const list = await daytona.list(undefined, 1, 50);
|
|
55
|
+
const total = list?.total ?? list?.items?.length ?? 0;
|
|
56
|
+
console.log(`[daytona:${label}] sandboxes total=${total} page=${list?.page ?? 1}`);
|
|
57
|
+
const items = Array.isArray(list?.items) ? list.items : [];
|
|
58
|
+
for (const sb of items.slice(0, 25)) {
|
|
59
|
+
console.log(`[daytona:${label}] sandbox id=${sb?.id} state=${sb?.state} disk=${sb?.disk} cpu=${sb?.cpu} memory=${sb?.memory} autoDelete=${sb?.autoDeleteInterval} autoStop=${sb?.autoStopInterval} snapshot=${sb?.snapshot ?? ""} createdAt=${sb?.createdAt ?? ""}`);
|
|
60
|
+
}
|
|
61
|
+
try {
|
|
62
|
+
const volumes = await daytona.volume.list();
|
|
63
|
+
const names = volumes.map((v) => v?.name).filter(Boolean);
|
|
64
|
+
console.log(`[daytona:${label}] volumes count=${volumes.length} names=${names.slice(0, 20).join(",")}`);
|
|
65
|
+
}
|
|
66
|
+
catch (e) {
|
|
67
|
+
console.log(`[daytona:${label}] volumes list error: ${e instanceof Error ? e.message : String(e)}`);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
catch (e) {
|
|
71
|
+
console.log(`[daytona:${label}] list error: ${e instanceof Error ? e.message : String(e)}`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
function getStructureDaytonaDefaults() {
|
|
75
|
+
const snapshot = String(process.env.STRUCTURE_DAYTONA_SNAPSHOT ?? "").trim();
|
|
76
|
+
const image = String(process.env.STRUCTURE_DAYTONA_IMAGE ?? "").trim();
|
|
77
|
+
const declarative = String(process.env.STRUCTURE_DAYTONA_DECLARATIVE_IMAGE ?? "").trim();
|
|
78
|
+
const ephemeralEnv = parseOptionalBoolean(process.env.STRUCTURE_DAYTONA_EPHEMERAL);
|
|
79
|
+
const ephemeral = ephemeralEnv ?? true;
|
|
80
|
+
const autoStopIntervalMin = parseOptionalNumber(process.env.STRUCTURE_DAYTONA_AUTO_STOP_MIN);
|
|
81
|
+
const autoArchiveIntervalMin = parseOptionalNumber(process.env.STRUCTURE_DAYTONA_AUTO_ARCHIVE_MIN);
|
|
82
|
+
const autoDeleteIntervalMin = parseOptionalNumber(process.env.STRUCTURE_DAYTONA_AUTO_DELETE_MIN);
|
|
83
|
+
const volumeName = getDaytonaVolumeName();
|
|
84
|
+
const mountPath = getDaytonaVolumeMountPath();
|
|
85
|
+
const volumes = volumeName && mountPath
|
|
86
|
+
? [
|
|
87
|
+
{
|
|
88
|
+
volumeName,
|
|
89
|
+
mountPath,
|
|
90
|
+
},
|
|
91
|
+
]
|
|
92
|
+
: [];
|
|
93
|
+
return {
|
|
94
|
+
snapshot: snapshot || undefined,
|
|
95
|
+
image: image || (declarative ? "declarative" : undefined),
|
|
96
|
+
ephemeral,
|
|
97
|
+
autoStopIntervalMin,
|
|
98
|
+
autoArchiveIntervalMin,
|
|
99
|
+
autoDeleteIntervalMin,
|
|
100
|
+
volumes,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
1
103
|
export async function createDatasetSandboxStep(params) {
|
|
2
104
|
"use step";
|
|
105
|
+
const startedAt = Date.now();
|
|
106
|
+
const { env, ...configInput } = params;
|
|
3
107
|
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
4
|
-
const db = (await resolveStoryRuntime(
|
|
108
|
+
const db = (await resolveStoryRuntime(env)).db;
|
|
5
109
|
const { SandboxService } = (await import("@ekairos/sandbox"));
|
|
6
110
|
const service = new SandboxService(db);
|
|
7
|
-
const
|
|
8
|
-
|
|
111
|
+
const daytonaDefaults = getStructureDaytonaDefaults();
|
|
112
|
+
const explicitVolumes = configInput.daytona?.volumes;
|
|
113
|
+
const mergedDaytona = {
|
|
114
|
+
...daytonaDefaults,
|
|
115
|
+
...(configInput.daytona ?? {}),
|
|
116
|
+
volumes: Array.isArray(explicitVolumes) ? explicitVolumes : daytonaDefaults.volumes,
|
|
117
|
+
};
|
|
118
|
+
const vcpusOverride = parseOptionalNumber(process.env.STRUCTURE_DAYTONA_VCPUS);
|
|
119
|
+
const mergedResources = configInput.resources ?? (vcpusOverride ? { vcpus: vcpusOverride } : undefined);
|
|
120
|
+
const config = {
|
|
121
|
+
...configInput,
|
|
122
|
+
provider: "daytona",
|
|
123
|
+
resources: mergedResources,
|
|
124
|
+
daytona: mergedDaytona,
|
|
125
|
+
};
|
|
126
|
+
if (shouldLogDaytonaResources()) {
|
|
127
|
+
console.log(`[daytona:create] config runtime=${config.runtime ?? ""} purpose=${config.purpose ?? ""} params=${JSON.stringify(config.params ?? {})} snapshot=${config.daytona?.snapshot ?? ""} image=${config.daytona?.image ?? ""} ephemeral=${config.daytona?.ephemeral} autoStop=${config.daytona?.autoStopIntervalMin ?? ""} autoDelete=${config.daytona?.autoDeleteIntervalMin ?? ""} volumes=${JSON.stringify(config.daytona?.volumes ?? [])}`);
|
|
128
|
+
console.log(`[daytona:create] ts=${new Date(startedAt).toISOString()} startMs=${startedAt}`);
|
|
129
|
+
}
|
|
130
|
+
await logDaytonaResources("before_create");
|
|
131
|
+
const created = await service.createSandbox(config);
|
|
132
|
+
if (!created.ok) {
|
|
133
|
+
await logDaytonaResources("create_failed");
|
|
9
134
|
throw new Error(created.error);
|
|
135
|
+
}
|
|
136
|
+
await logDaytonaResources("after_create");
|
|
137
|
+
if (shouldLogDaytonaResources()) {
|
|
138
|
+
const elapsedMs = Date.now() - startedAt;
|
|
139
|
+
console.log(`[daytona:create] doneMs=${Date.now()} elapsedMs=${elapsedMs}`);
|
|
140
|
+
}
|
|
141
|
+
if (shouldLogDaytonaResources()) {
|
|
142
|
+
try {
|
|
143
|
+
const info = await service.reconnectToSandbox(created.data.sandboxId);
|
|
144
|
+
if (info.ok && !info.data.sandbox.sandboxId) {
|
|
145
|
+
const sb = info.data.sandbox;
|
|
146
|
+
console.log(`[daytona:after_create] sandbox id=${sb?.id} state=${sb?.state} disk=${sb?.disk} cpu=${sb?.cpu} memory=${sb?.memory} autoDelete=${sb?.autoDeleteInterval} autoStop=${sb?.autoStopInterval}`);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
catch (e) {
|
|
150
|
+
console.log(`[daytona:after_create] reconnect error: ${e instanceof Error ? e.message : String(e)}`);
|
|
151
|
+
}
|
|
152
|
+
try {
|
|
153
|
+
const df = await service.runCommand(created.data.sandboxId, "df", ["-h"]);
|
|
154
|
+
if (df.ok) {
|
|
155
|
+
console.log(`[sandbox:${created.data.sandboxId}] df -h\n${df.data.output}`);
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
console.log(`[sandbox:${created.data.sandboxId}] df error: ${df.error}`);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
catch (e) {
|
|
162
|
+
console.log(`[sandbox:${created.data.sandboxId}] df error: ${e instanceof Error ? e.message : String(e)}`);
|
|
163
|
+
}
|
|
164
|
+
try {
|
|
165
|
+
const mountPath = getDaytonaVolumeMountPath();
|
|
166
|
+
const basePath = getDatasetWorkdirBase();
|
|
167
|
+
const du = await service.runCommand(created.data.sandboxId, "du", ["-sh", mountPath, basePath]);
|
|
168
|
+
if (du.ok) {
|
|
169
|
+
console.log(`[sandbox:${created.data.sandboxId}] du -sh\n${du.data.output}`);
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
console.log(`[sandbox:${created.data.sandboxId}] du error: ${du.error}`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
catch (e) {
|
|
176
|
+
console.log(`[sandbox:${created.data.sandboxId}] du error: ${e instanceof Error ? e.message : String(e)}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
10
179
|
return { sandboxId: created.data.sandboxId };
|
|
11
180
|
}
|
|
12
181
|
export async function runDatasetSandboxCommandStep(params) {
|
|
13
182
|
"use step";
|
|
183
|
+
const startedAt = Date.now();
|
|
14
184
|
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
15
185
|
const db = (await resolveStoryRuntime(params.env)).db;
|
|
16
186
|
const { SandboxService } = (await import("@ekairos/sandbox"));
|
|
@@ -23,10 +193,15 @@ export async function runDatasetSandboxCommandStep(params) {
|
|
|
23
193
|
stdout: result.data.output ?? "",
|
|
24
194
|
stderr: result.data.error ?? "",
|
|
25
195
|
};
|
|
196
|
+
if (shouldLogDaytonaResources()) {
|
|
197
|
+
const elapsedMs = Date.now() - startedAt;
|
|
198
|
+
console.log(`[daytona:cmd] sandboxId=${params.sandboxId} cmd=${params.cmd} args=${JSON.stringify(params.args ?? [])} elapsedMs=${elapsedMs}`);
|
|
199
|
+
}
|
|
26
200
|
return normalized;
|
|
27
201
|
}
|
|
28
202
|
export async function writeDatasetSandboxFilesStep(params) {
|
|
29
203
|
"use step";
|
|
204
|
+
const startedAt = Date.now();
|
|
30
205
|
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
31
206
|
const db = (await resolveStoryRuntime(params.env)).db;
|
|
32
207
|
const { SandboxService } = (await import("@ekairos/sandbox"));
|
|
@@ -34,6 +209,10 @@ export async function writeDatasetSandboxFilesStep(params) {
|
|
|
34
209
|
const result = await service.writeFiles(params.sandboxId, params.files);
|
|
35
210
|
if (!result.ok)
|
|
36
211
|
throw new Error(result.error);
|
|
212
|
+
if (shouldLogDaytonaResources()) {
|
|
213
|
+
const elapsedMs = Date.now() - startedAt;
|
|
214
|
+
console.log(`[daytona:write] sandboxId=${params.sandboxId} files=${params.files.length} elapsedMs=${elapsedMs}`);
|
|
215
|
+
}
|
|
37
216
|
}
|
|
38
217
|
/**
|
|
39
218
|
* Workflow-safe helper:
|
|
@@ -53,6 +232,7 @@ export async function writeDatasetSandboxTextFileStep(params) {
|
|
|
53
232
|
}
|
|
54
233
|
export async function readDatasetSandboxFileStep(params) {
|
|
55
234
|
"use step";
|
|
235
|
+
const startedAt = Date.now();
|
|
56
236
|
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
57
237
|
const db = (await resolveStoryRuntime(params.env)).db;
|
|
58
238
|
const { SandboxService } = (await import("@ekairos/sandbox"));
|
|
@@ -60,6 +240,10 @@ export async function readDatasetSandboxFileStep(params) {
|
|
|
60
240
|
const result = await service.readFile(params.sandboxId, params.path);
|
|
61
241
|
if (!result.ok)
|
|
62
242
|
throw new Error(result.error);
|
|
243
|
+
if (shouldLogDaytonaResources()) {
|
|
244
|
+
const elapsedMs = Date.now() - startedAt;
|
|
245
|
+
console.log(`[daytona:read] sandboxId=${params.sandboxId} path=${params.path} bytes=${result.data.contentBase64?.length ?? 0} elapsedMs=${elapsedMs}`);
|
|
246
|
+
}
|
|
63
247
|
return result.data;
|
|
64
248
|
}
|
|
65
249
|
/**
|
|
@@ -77,6 +261,7 @@ export async function readDatasetSandboxTextFileStep(params) {
|
|
|
77
261
|
}
|
|
78
262
|
export async function stopDatasetSandboxStep(params) {
|
|
79
263
|
"use step";
|
|
264
|
+
const startedAt = Date.now();
|
|
80
265
|
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
81
266
|
const db = (await resolveStoryRuntime(params.env)).db;
|
|
82
267
|
const { SandboxService } = (await import("@ekairos/sandbox"));
|
|
@@ -84,4 +269,8 @@ export async function stopDatasetSandboxStep(params) {
|
|
|
84
269
|
const result = await service.stopSandbox(params.sandboxId);
|
|
85
270
|
if (!result.ok)
|
|
86
271
|
throw new Error(result.error);
|
|
272
|
+
if (shouldLogDaytonaResources()) {
|
|
273
|
+
const elapsedMs = Date.now() - startedAt;
|
|
274
|
+
console.log(`[daytona:stop] sandboxId=${params.sandboxId} elapsedMs=${elapsedMs}`);
|
|
275
|
+
}
|
|
87
276
|
}
|
package/dist/structure.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { type StructureRowsOutputPagingCursor } from "./rowsOutputPaging";
|
|
2
|
+
import type { SandboxConfig } from "@ekairos/sandbox";
|
|
2
3
|
export type StructureSource = {
|
|
3
4
|
kind: "file";
|
|
4
5
|
fileId: string;
|
|
@@ -69,6 +70,7 @@ export declare function structure<Env extends {
|
|
|
69
70
|
orgId: string;
|
|
70
71
|
}>(env: Env, opts?: {
|
|
71
72
|
datasetId?: string;
|
|
73
|
+
sandboxConfig?: SandboxConfig;
|
|
72
74
|
}): {
|
|
73
75
|
datasetId: string;
|
|
74
76
|
from(...src: StructureSource[]): /*elided*/ any;
|
package/dist/structure.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { createStory, didToolExecute, USER_MESSAGE_TYPE, WEB_CHANNEL } from "@ekairos/story";
|
|
2
|
-
import { getDatasetOutputPath, getDatasetOutputSchemaPath, getDatasetWorkstation } from "./datasetFiles";
|
|
2
|
+
import { getDatasetOutputPath, getDatasetOutputSchemaPath, getDatasetWorkstation, getDaytonaVolumeMountPath, getDaytonaVolumeName, } from "./datasetFiles";
|
|
3
3
|
import { structureDownloadRowsOutputToSandboxStep, structureReadRowsOutputPageFromSandboxStep, } from "./rowsOutputPaging";
|
|
4
4
|
import { structureSplitRowsOutputToDatasetStep } from "./rowsOutputSplit";
|
|
5
5
|
import { createDatasetSandboxStep, readDatasetSandboxFileStep, readDatasetSandboxTextFileStep, runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFileStep, } from "./sandbox/steps";
|
|
@@ -50,6 +50,88 @@ function guessTextFileExtension(mimeType, name) {
|
|
|
50
50
|
return ".yaml";
|
|
51
51
|
return ".txt";
|
|
52
52
|
}
|
|
53
|
+
function shouldSkipPipInstall() {
|
|
54
|
+
const explicit = String(process.env.STRUCTURE_DAYTONA_SKIP_PIP_INSTALL ?? "").trim().toLowerCase();
|
|
55
|
+
if (explicit === "1" || explicit === "true" || explicit === "yes")
|
|
56
|
+
return true;
|
|
57
|
+
const declarative = String(process.env.STRUCTURE_DAYTONA_DECLARATIVE_IMAGE ?? "").trim().toLowerCase();
|
|
58
|
+
if (declarative === "1" || declarative === "true" || declarative === "yes")
|
|
59
|
+
return true;
|
|
60
|
+
const snapshot = String(process.env.STRUCTURE_DAYTONA_SNAPSHOT ?? "").trim();
|
|
61
|
+
return Boolean(snapshot);
|
|
62
|
+
}
|
|
63
|
+
function getDefaultSandboxConfig(datasetId) {
|
|
64
|
+
const volumeName = getDaytonaVolumeName();
|
|
65
|
+
const mountPath = getDaytonaVolumeMountPath();
|
|
66
|
+
const volumes = volumeName && mountPath
|
|
67
|
+
? [
|
|
68
|
+
{
|
|
69
|
+
volumeName,
|
|
70
|
+
mountPath,
|
|
71
|
+
},
|
|
72
|
+
]
|
|
73
|
+
: [];
|
|
74
|
+
return {
|
|
75
|
+
provider: "daytona",
|
|
76
|
+
runtime: "python3.13",
|
|
77
|
+
timeoutMs: 10 * 60 * 1000,
|
|
78
|
+
purpose: "structure.dataset",
|
|
79
|
+
params: { datasetId },
|
|
80
|
+
daytona: {
|
|
81
|
+
ephemeral: true,
|
|
82
|
+
autoStopIntervalMin: 5,
|
|
83
|
+
volumes,
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
function mergeSandboxConfig(base, override) {
|
|
88
|
+
if (!override)
|
|
89
|
+
return base;
|
|
90
|
+
const mergedParams = {
|
|
91
|
+
...(base.params ?? {}),
|
|
92
|
+
...(override.params ?? {}),
|
|
93
|
+
};
|
|
94
|
+
const mergedDaytona = {
|
|
95
|
+
...(base.daytona ?? {}),
|
|
96
|
+
...(override.daytona ?? {}),
|
|
97
|
+
};
|
|
98
|
+
if (override.daytona && "volumes" in override.daytona) {
|
|
99
|
+
mergedDaytona.volumes = override.daytona?.volumes;
|
|
100
|
+
}
|
|
101
|
+
return {
|
|
102
|
+
...base,
|
|
103
|
+
...override,
|
|
104
|
+
params: mergedParams,
|
|
105
|
+
daytona: mergedDaytona,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
async function sandboxFileExists(env, sandboxId, path) {
|
|
109
|
+
const res = await runDatasetSandboxCommandStep({
|
|
110
|
+
env,
|
|
111
|
+
sandboxId,
|
|
112
|
+
cmd: "test",
|
|
113
|
+
args: ["-f", path],
|
|
114
|
+
});
|
|
115
|
+
return res.exitCode === 0;
|
|
116
|
+
}
|
|
117
|
+
async function sandboxFindFirstMatch(env, sandboxId, pattern) {
|
|
118
|
+
const py = [
|
|
119
|
+
"import sys, glob",
|
|
120
|
+
"pattern = sys.argv[1]",
|
|
121
|
+
"matches = glob.glob(pattern)",
|
|
122
|
+
"print(matches[0] if matches else '')",
|
|
123
|
+
].join("\n");
|
|
124
|
+
const res = await runDatasetSandboxCommandStep({
|
|
125
|
+
env,
|
|
126
|
+
sandboxId,
|
|
127
|
+
cmd: "python",
|
|
128
|
+
args: ["-c", py, pattern],
|
|
129
|
+
});
|
|
130
|
+
if (res.exitCode !== 0)
|
|
131
|
+
return null;
|
|
132
|
+
const out = String(res.stdout ?? "").trim();
|
|
133
|
+
return out ? out : null;
|
|
134
|
+
}
|
|
53
135
|
async function ensureSandboxPrepared(params) {
|
|
54
136
|
const { env, datasetId, sandboxId, sources, state } = params;
|
|
55
137
|
const workstation = getDatasetWorkstation(datasetId);
|
|
@@ -60,25 +142,33 @@ async function ensureSandboxPrepared(params) {
|
|
|
60
142
|
const mkdirRes = await runDatasetSandboxCommandStep({ env, sandboxId, cmd: "mkdir", args: ["-p", workstation] });
|
|
61
143
|
// Align with dataset sandbox behavior: install python deps up-front (once per dataset sandbox).
|
|
62
144
|
// This avoids tool-level "install if used" heuristics and ensures scripts can import pandas.
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
145
|
+
if (!shouldSkipPipInstall()) {
|
|
146
|
+
const pipInstall = await runDatasetSandboxCommandStep({
|
|
147
|
+
env,
|
|
148
|
+
sandboxId,
|
|
149
|
+
cmd: "python",
|
|
150
|
+
// NOTE: pandas needs openpyxl to read .xlsx files.
|
|
151
|
+
args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
|
|
152
|
+
});
|
|
153
|
+
const installStderr = pipInstall.stderr ?? "";
|
|
154
|
+
if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
|
|
155
|
+
throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
|
|
156
|
+
}
|
|
73
157
|
}
|
|
74
158
|
const prepared = [];
|
|
75
159
|
for (let i = 0; i < sources.length; i++) {
|
|
76
160
|
const src = sources[i];
|
|
77
161
|
if (src.kind === "file") {
|
|
162
|
+
const basePath = `${workstation}/file_${i}_${src.fileId}`;
|
|
163
|
+
const existingPath = await sandboxFindFirstMatch(env, sandboxId, `${basePath}*`);
|
|
164
|
+
if (existingPath) {
|
|
165
|
+
prepared.push({ kind: "file", id: src.fileId, path: existingPath });
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
78
168
|
const file = await readInstantFileStep({ env, fileId: src.fileId });
|
|
79
169
|
const fileName = String(file.contentDisposition ?? "");
|
|
80
170
|
const ext = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
|
|
81
|
-
const path = `${
|
|
171
|
+
const path = `${basePath}${ext}`;
|
|
82
172
|
await writeDatasetSandboxFilesStep({
|
|
83
173
|
env,
|
|
84
174
|
sandboxId,
|
|
@@ -88,16 +178,19 @@ async function ensureSandboxPrepared(params) {
|
|
|
88
178
|
continue;
|
|
89
179
|
}
|
|
90
180
|
if (src.kind === "dataset") {
|
|
91
|
-
const content = await structureReadRowsOutputJsonlStep({ env, structureId: src.datasetId });
|
|
92
|
-
if (!content.ok) {
|
|
93
|
-
throw new Error(content.error);
|
|
94
|
-
}
|
|
95
181
|
const path = `${workstation}/dataset_${src.datasetId}.jsonl`;
|
|
96
|
-
await
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
182
|
+
const exists = await sandboxFileExists(env, sandboxId, path);
|
|
183
|
+
if (!exists) {
|
|
184
|
+
const content = await structureReadRowsOutputJsonlStep({ env, structureId: src.datasetId });
|
|
185
|
+
if (!content.ok) {
|
|
186
|
+
throw new Error(content.error);
|
|
187
|
+
}
|
|
188
|
+
await writeDatasetSandboxFilesStep({
|
|
189
|
+
env,
|
|
190
|
+
sandboxId,
|
|
191
|
+
files: [{ path, contentBase64: content.data.contentBase64 }],
|
|
192
|
+
});
|
|
193
|
+
}
|
|
101
194
|
prepared.push({ kind: "dataset", id: src.datasetId, path });
|
|
102
195
|
continue;
|
|
103
196
|
}
|
|
@@ -105,7 +198,10 @@ async function ensureSandboxPrepared(params) {
|
|
|
105
198
|
const ext = guessTextFileExtension(src.mimeType, src.name);
|
|
106
199
|
const textId = `text_${i}`;
|
|
107
200
|
const path = `${workstation}/${textId}${ext}`;
|
|
108
|
-
await
|
|
201
|
+
const exists = await sandboxFileExists(env, sandboxId, path);
|
|
202
|
+
if (!exists) {
|
|
203
|
+
await writeDatasetSandboxTextFileStep({ env, sandboxId, path, text: String(src.text ?? "") });
|
|
204
|
+
}
|
|
109
205
|
prepared.push({ kind: "text", id: textId, path, name: src.name, mimeType: src.mimeType });
|
|
110
206
|
continue;
|
|
111
207
|
}
|
|
@@ -149,6 +245,8 @@ async function readSchemaFromSandboxIfPresent(params) {
|
|
|
149
245
|
function createStructureStoryDefinition(config) {
|
|
150
246
|
const datasetId = config.datasetId;
|
|
151
247
|
const model = config.model ?? "openai/gpt-5.2";
|
|
248
|
+
const defaultSandboxConfig = getDefaultSandboxConfig(datasetId);
|
|
249
|
+
const resolvedSandboxConfig = mergeSandboxConfig(defaultSandboxConfig, config.sandboxConfig);
|
|
152
250
|
const story = createStory("ekairos.structure")
|
|
153
251
|
.context(async (stored, env) => {
|
|
154
252
|
const prev = stored?.content ?? {};
|
|
@@ -156,7 +254,10 @@ function createStructureStoryDefinition(config) {
|
|
|
156
254
|
const existingSandboxId = prev.sandboxId ?? config.sandboxId ?? "";
|
|
157
255
|
let sandboxId = existingSandboxId;
|
|
158
256
|
if (!sandboxId) {
|
|
159
|
-
const created = await createDatasetSandboxStep({
|
|
257
|
+
const created = await createDatasetSandboxStep({
|
|
258
|
+
env,
|
|
259
|
+
...resolvedSandboxConfig,
|
|
260
|
+
});
|
|
160
261
|
sandboxId = created.sandboxId;
|
|
161
262
|
}
|
|
162
263
|
const { preparedSources, workstation, outputPath } = await ensureSandboxPrepared({
|
|
@@ -176,6 +277,13 @@ function createStructureStoryDefinition(config) {
|
|
|
176
277
|
sources: preparedSources,
|
|
177
278
|
workstation,
|
|
178
279
|
outputPath,
|
|
280
|
+
sandboxProvider: resolvedSandboxConfig.provider ?? "daytona",
|
|
281
|
+
sandboxRuntime: resolvedSandboxConfig.runtime ?? "python3.13",
|
|
282
|
+
sandboxEphemeral: resolvedSandboxConfig.daytona?.ephemeral ?? true,
|
|
283
|
+
sandboxVolumeName: resolvedSandboxConfig.daytona?.volumes?.[0]?.volumeName,
|
|
284
|
+
sandboxVolumeMountPath: resolvedSandboxConfig.daytona?.volumes?.[0]?.mountPath,
|
|
285
|
+
sandboxSnapshot: resolvedSandboxConfig.daytona?.snapshot,
|
|
286
|
+
sandboxImage: resolvedSandboxConfig.daytona?.image,
|
|
179
287
|
};
|
|
180
288
|
const contextKey = `structure:${datasetId}`;
|
|
181
289
|
// IMPORTANT:
|
|
@@ -274,6 +382,7 @@ export function structure(env, opts) {
|
|
|
274
382
|
let mode = "auto";
|
|
275
383
|
let output = "rows";
|
|
276
384
|
let outputSchema;
|
|
385
|
+
const sandboxConfig = opts?.sandboxConfig;
|
|
277
386
|
const api = {
|
|
278
387
|
datasetId,
|
|
279
388
|
from(...src) {
|
|
@@ -314,6 +423,7 @@ export function structure(env, opts) {
|
|
|
314
423
|
mode,
|
|
315
424
|
output,
|
|
316
425
|
outputSchema,
|
|
426
|
+
sandboxConfig,
|
|
317
427
|
};
|
|
318
428
|
const { story } = createStructureStoryDefinition(storyConfig);
|
|
319
429
|
function makeUserMessageEvent(text) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/structure",
|
|
3
|
-
"version": "1.21.
|
|
3
|
+
"version": "1.21.77-beta.0",
|
|
4
4
|
"description": "Ekairos Structure - Unified structured extraction (rows or object) from file/text/dataset inputs",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"typecheck": "tsc --noEmit"
|
|
37
37
|
},
|
|
38
38
|
"dependencies": {
|
|
39
|
-
"@ekairos/domain": "^1.21.
|
|
39
|
+
"@ekairos/domain": "^1.21.77-beta.0",
|
|
40
40
|
"@ekairos/sandbox": "^1.21.60-beta.0",
|
|
41
41
|
"@instantdb/admin": "^0.22.13",
|
|
42
42
|
"@instantdb/core": "^0.22.13",
|
package/dist/datasetReader.d.ts
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import { type StructureRowsOutputPagingCursor, type StructureRowsOutputSandboxRef } from "./rowsOutputPaging";
|
|
2
|
-
export declare function datasetReader(env: any, opts: {
|
|
3
|
-
datasetId: string;
|
|
4
|
-
sandboxId?: string;
|
|
5
|
-
runtime?: string;
|
|
6
|
-
timeoutMs?: number;
|
|
7
|
-
}): {
|
|
8
|
-
datasetId: string;
|
|
9
|
-
download(): Promise<StructureRowsOutputSandboxRef>;
|
|
10
|
-
readPage(params: {
|
|
11
|
-
sandboxId: string;
|
|
12
|
-
localPath: string;
|
|
13
|
-
cursor?: Partial<StructureRowsOutputPagingCursor>;
|
|
14
|
-
limit: number;
|
|
15
|
-
}): Promise<import("./rowsOutputPaging").StructureReadRowsOutputPageFromSandboxResult>;
|
|
16
|
-
};
|
package/dist/datasetReader.js
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import { structureDownloadRowsOutputToSandboxStep, structureReadRowsOutputPageFromSandboxStep, } from "./rowsOutputPaging";
|
|
2
|
-
export function datasetReader(env, opts) {
|
|
3
|
-
const datasetId = opts.datasetId;
|
|
4
|
-
return {
|
|
5
|
-
datasetId,
|
|
6
|
-
async download() {
|
|
7
|
-
return await structureDownloadRowsOutputToSandboxStep({
|
|
8
|
-
env,
|
|
9
|
-
structureId: datasetId,
|
|
10
|
-
sandboxId: opts.sandboxId,
|
|
11
|
-
runtime: opts.runtime,
|
|
12
|
-
timeoutMs: opts.timeoutMs,
|
|
13
|
-
});
|
|
14
|
-
},
|
|
15
|
-
async readPage(params) {
|
|
16
|
-
return await structureReadRowsOutputPageFromSandboxStep({
|
|
17
|
-
env,
|
|
18
|
-
sandboxId: params.sandboxId,
|
|
19
|
-
localPath: params.localPath,
|
|
20
|
-
cursor: params.cursor,
|
|
21
|
-
limit: params.limit,
|
|
22
|
-
});
|
|
23
|
-
},
|
|
24
|
-
};
|
|
25
|
-
}
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
export type RowsOutputSandboxDownloadResult = {
|
|
2
|
-
sandboxId: string;
|
|
3
|
-
localPath: string;
|
|
4
|
-
};
|
|
5
|
-
export type RowsOutputChunkResult<T = any> = {
|
|
6
|
-
rows: T[];
|
|
7
|
-
nextByteOffset: number;
|
|
8
|
-
nextRowOffset: number;
|
|
9
|
-
done: boolean;
|
|
10
|
-
};
|
|
11
|
-
/**
|
|
12
|
-
* Step 1/2:
|
|
13
|
-
* Download the rows output.jsonl from Instant storage into a sandbox file.
|
|
14
|
-
*
|
|
15
|
-
* This isolates network flakiness (e.g. undici `TypeError: terminated`) into a single step
|
|
16
|
-
* and makes subsequent reads purely sandbox-local.
|
|
17
|
-
*/
|
|
18
|
-
export declare function structureDownloadRowsOutputToSandboxStep(params: {
|
|
19
|
-
env: any;
|
|
20
|
-
structureId: string;
|
|
21
|
-
runtime?: string;
|
|
22
|
-
timeoutMs?: number;
|
|
23
|
-
}): Promise<RowsOutputSandboxDownloadResult>;
|
|
24
|
-
/**
|
|
25
|
-
* Step 2/2:
|
|
26
|
-
* Read the next chunk of ROW records from the sandbox-local output.jsonl, bounded by `limit`.
|
|
27
|
-
*
|
|
28
|
-
* Pagination state is passed explicitly via `{ byteOffset, rowOffset }` and returned as next offsets.
|
|
29
|
-
*/
|
|
30
|
-
export declare function structureReadRowsOutputChunkStep<T = any>(params: {
|
|
31
|
-
env: any;
|
|
32
|
-
sandboxId: string;
|
|
33
|
-
localPath: string;
|
|
34
|
-
byteOffset: number;
|
|
35
|
-
rowOffset: number;
|
|
36
|
-
limit: number;
|
|
37
|
-
}): Promise<RowsOutputChunkResult<T>>;
|
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
import { getDatasetOutputPath, getDatasetWorkstation } from "./datasetFiles";
|
|
2
|
-
import { createDatasetSandboxStep, runDatasetSandboxCommandStep } from "./sandbox/steps";
|
|
3
|
-
import { getStoryRuntime } from "./runtime";
|
|
4
|
-
/**
|
|
5
|
-
* Step 1/2:
|
|
6
|
-
* Download the rows output.jsonl from Instant storage into a sandbox file.
|
|
7
|
-
*
|
|
8
|
-
* This isolates network flakiness (e.g. undici `TypeError: terminated`) into a single step
|
|
9
|
-
* and makes subsequent reads purely sandbox-local.
|
|
10
|
-
*/
|
|
11
|
-
export async function structureDownloadRowsOutputToSandboxStep(params) {
|
|
12
|
-
"use step";
|
|
13
|
-
const runtime = params.runtime ?? "python3.13";
|
|
14
|
-
const timeoutMs = params.timeoutMs ?? 10 * 60 * 1000;
|
|
15
|
-
const { sandboxId } = await createDatasetSandboxStep({
|
|
16
|
-
env: params.env,
|
|
17
|
-
runtime,
|
|
18
|
-
timeoutMs,
|
|
19
|
-
purpose: "structure.rows-output.reader",
|
|
20
|
-
params: { structureId: params.structureId },
|
|
21
|
-
});
|
|
22
|
-
const workstation = getDatasetWorkstation(params.structureId);
|
|
23
|
-
const localPath = getDatasetOutputPath(params.structureId);
|
|
24
|
-
await runDatasetSandboxCommandStep({
|
|
25
|
-
env: params.env,
|
|
26
|
-
sandboxId,
|
|
27
|
-
cmd: "mkdir",
|
|
28
|
-
args: ["-p", workstation],
|
|
29
|
-
});
|
|
30
|
-
const storyRuntime = await getStoryRuntime(params.env);
|
|
31
|
-
const db = storyRuntime.db;
|
|
32
|
-
const contextKey = `structure:${params.structureId}`;
|
|
33
|
-
const query = (await db.query({
|
|
34
|
-
context_contexts: {
|
|
35
|
-
$: { where: { key: contextKey }, limit: 1 },
|
|
36
|
-
structure_output_file: {},
|
|
37
|
-
},
|
|
38
|
-
}));
|
|
39
|
-
const ctx = query.context_contexts?.[0];
|
|
40
|
-
const linked = Array.isArray(ctx?.structure_output_file) ? ctx.structure_output_file[0] : ctx.structure_output_file;
|
|
41
|
-
const url = linked?.url;
|
|
42
|
-
if (!url) {
|
|
43
|
-
throw new Error("Rows output file not found");
|
|
44
|
-
}
|
|
45
|
-
const py = [
|
|
46
|
-
"import sys, urllib.request",
|
|
47
|
-
"url = sys.argv[1]",
|
|
48
|
-
"out_path = sys.argv[2]",
|
|
49
|
-
"with urllib.request.urlopen(url) as r:",
|
|
50
|
-
" data = r.read()",
|
|
51
|
-
"with open(out_path, 'wb') as f:",
|
|
52
|
-
" f.write(data)",
|
|
53
|
-
"print('ok', len(data))",
|
|
54
|
-
].join("\n");
|
|
55
|
-
const res = await runDatasetSandboxCommandStep({
|
|
56
|
-
env: params.env,
|
|
57
|
-
sandboxId,
|
|
58
|
-
cmd: "python",
|
|
59
|
-
args: ["-c", py, String(url), localPath],
|
|
60
|
-
});
|
|
61
|
-
if (res.exitCode !== 0) {
|
|
62
|
-
throw new Error(res.stderr || "Failed to download rows output to sandbox");
|
|
63
|
-
}
|
|
64
|
-
return { sandboxId, localPath };
|
|
65
|
-
}
|
|
66
|
-
/**
|
|
67
|
-
* Step 2/2:
|
|
68
|
-
* Read the next chunk of ROW records from the sandbox-local output.jsonl, bounded by `limit`.
|
|
69
|
-
*
|
|
70
|
-
* Pagination state is passed explicitly via `{ byteOffset, rowOffset }` and returned as next offsets.
|
|
71
|
-
*/
|
|
72
|
-
export async function structureReadRowsOutputChunkStep(params) {
|
|
73
|
-
"use step";
|
|
74
|
-
const py = [
|
|
75
|
-
"import sys, json",
|
|
76
|
-
"path = sys.argv[1]",
|
|
77
|
-
"byte_offset = int(sys.argv[2])",
|
|
78
|
-
"row_offset = int(sys.argv[3])",
|
|
79
|
-
"limit = int(sys.argv[4])",
|
|
80
|
-
"rows = []",
|
|
81
|
-
"next_byte = byte_offset",
|
|
82
|
-
"next_row = row_offset",
|
|
83
|
-
"with open(path, 'rb') as f:",
|
|
84
|
-
" f.seek(byte_offset)",
|
|
85
|
-
" while len(rows) < limit:",
|
|
86
|
-
" line = f.readline()",
|
|
87
|
-
" if not line:",
|
|
88
|
-
" break",
|
|
89
|
-
" next_byte = f.tell()",
|
|
90
|
-
" try:",
|
|
91
|
-
" obj = json.loads(line.decode('utf-8'))",
|
|
92
|
-
" except Exception:",
|
|
93
|
-
" continue",
|
|
94
|
-
" if obj.get('type') != 'row':",
|
|
95
|
-
" continue",
|
|
96
|
-
" rows.append(obj.get('data'))",
|
|
97
|
-
" next_row += 1",
|
|
98
|
-
"done = len(rows) < limit",
|
|
99
|
-
"print(json.dumps({",
|
|
100
|
-
" 'rows': rows,",
|
|
101
|
-
" 'nextByteOffset': next_byte,",
|
|
102
|
-
" 'nextRowOffset': next_row,",
|
|
103
|
-
" 'done': done,",
|
|
104
|
-
"}))",
|
|
105
|
-
].join("\n");
|
|
106
|
-
const res = await runDatasetSandboxCommandStep({
|
|
107
|
-
env: params.env,
|
|
108
|
-
sandboxId: params.sandboxId,
|
|
109
|
-
cmd: "python",
|
|
110
|
-
args: [
|
|
111
|
-
"-c",
|
|
112
|
-
py,
|
|
113
|
-
params.localPath,
|
|
114
|
-
String(params.byteOffset ?? 0),
|
|
115
|
-
String(params.rowOffset ?? 0),
|
|
116
|
-
String(params.limit),
|
|
117
|
-
],
|
|
118
|
-
});
|
|
119
|
-
if (res.exitCode !== 0) {
|
|
120
|
-
throw new Error(res.stderr || "Failed to read rows chunk from sandbox");
|
|
121
|
-
}
|
|
122
|
-
const out = String(res.stdout ?? "").trim();
|
|
123
|
-
const parsed = JSON.parse(out);
|
|
124
|
-
return parsed;
|
|
125
|
-
}
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
import { type DatasetSandboxId } from "./sandbox/steps";
|
|
2
|
-
/**
|
|
3
|
-
* Step 1: Download Structure rows output file (output.jsonl) into a sandbox.
|
|
4
|
-
*
|
|
5
|
-
* This enables pagination by reading chunks from the local sandbox filesystem.
|
|
6
|
-
*/
|
|
7
|
-
export declare function downloadStructureRowsOutputToSandboxStep(params: {
|
|
8
|
-
env: any;
|
|
9
|
-
sandboxId: DatasetSandboxId;
|
|
10
|
-
structureId: string;
|
|
11
|
-
}): Promise<{
|
|
12
|
-
filePath: string;
|
|
13
|
-
}>;
|
|
14
|
-
/**
|
|
15
|
-
* @deprecated Prefer `downloadStructureRowsOutputToSandboxStep` (kept for symmetry with chunk naming).
|
|
16
|
-
*
|
|
17
|
-
* Note: The name includes `RowsOutputJsonl` to be explicit about the file format.
|
|
18
|
-
*/
|
|
19
|
-
export declare function structureDownloadRowsOutputJsonlToSandboxStep(params: {
|
|
20
|
-
env: any;
|
|
21
|
-
sandboxId: DatasetSandboxId;
|
|
22
|
-
structureId: string;
|
|
23
|
-
}): Promise<{
|
|
24
|
-
filePath: string;
|
|
25
|
-
}>;
|
|
26
|
-
/**
|
|
27
|
-
* Step 2: Read a chunk/page of JSONL records from the downloaded sandbox file.
|
|
28
|
-
*
|
|
29
|
-
* Naming (consistent):
|
|
30
|
-
* - `structureReadRowsOutputJsonlStep` reads the whole file (base64)
|
|
31
|
-
* - `structureReadRowsOutputJsonlChunkStep` reads a paginated chunk from sandbox
|
|
32
|
-
*
|
|
33
|
-
* Offset/limit are line-based (0-indexed).
|
|
34
|
-
*/
|
|
35
|
-
export declare function structureReadRowsOutputJsonlChunkStep(params: {
|
|
36
|
-
env: any;
|
|
37
|
-
sandboxId: DatasetSandboxId;
|
|
38
|
-
structureId: string;
|
|
39
|
-
offset: number;
|
|
40
|
-
limit: number;
|
|
41
|
-
}): Promise<{
|
|
42
|
-
records: any[];
|
|
43
|
-
nextOffset: number;
|
|
44
|
-
done: boolean;
|
|
45
|
-
}>;
|
|
46
|
-
/**
|
|
47
|
-
* @deprecated Use `structureReadRowsOutputJsonlChunkStep` instead (naming consistency).
|
|
48
|
-
*/
|
|
49
|
-
export declare function readStructureRowsChunkFromSandboxStep(params: {
|
|
50
|
-
env: any;
|
|
51
|
-
sandboxId: DatasetSandboxId;
|
|
52
|
-
structureId: string;
|
|
53
|
-
offset: number;
|
|
54
|
-
limit: number;
|
|
55
|
-
}): Promise<{
|
|
56
|
-
records: any[];
|
|
57
|
-
nextOffset: number;
|
|
58
|
-
done: boolean;
|
|
59
|
-
}>;
|
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
import { getDatasetWorkstation } from "./datasetFiles";
|
|
2
|
-
import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFileStep, } from "./sandbox/steps";
|
|
3
|
-
async function getRowsOutputUrl(params) {
|
|
4
|
-
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
5
|
-
const runtime = (await resolveStoryRuntime(params.env));
|
|
6
|
-
const db = runtime.db;
|
|
7
|
-
const contextKey = `structure:${params.structureId}`;
|
|
8
|
-
const query = (await db.query({
|
|
9
|
-
context_contexts: {
|
|
10
|
-
$: { where: { key: contextKey }, limit: 1 },
|
|
11
|
-
structure_output_file: {},
|
|
12
|
-
},
|
|
13
|
-
}));
|
|
14
|
-
const ctx = query.context_contexts?.[0];
|
|
15
|
-
const linked = Array.isArray(ctx?.structure_output_file) ? ctx.structure_output_file[0] : ctx?.structure_output_file;
|
|
16
|
-
const url = linked?.url;
|
|
17
|
-
if (!url) {
|
|
18
|
-
throw new Error("Rows output file not found");
|
|
19
|
-
}
|
|
20
|
-
return String(url);
|
|
21
|
-
}
|
|
22
|
-
/**
|
|
23
|
-
* Step 1: Download Structure rows output file (output.jsonl) into a sandbox.
|
|
24
|
-
*
|
|
25
|
-
* This enables pagination by reading chunks from the local sandbox filesystem.
|
|
26
|
-
*/
|
|
27
|
-
export async function downloadStructureRowsOutputToSandboxStep(params) {
|
|
28
|
-
"use step";
|
|
29
|
-
const workstation = getDatasetWorkstation(params.structureId);
|
|
30
|
-
const filePath = `${workstation}/rows_output.jsonl`;
|
|
31
|
-
const scriptPath = `${workstation}/download_rows_output.py`;
|
|
32
|
-
// Ensure directory exists
|
|
33
|
-
await runDatasetSandboxCommandStep({
|
|
34
|
-
env: params.env,
|
|
35
|
-
sandboxId: params.sandboxId,
|
|
36
|
-
cmd: "mkdir",
|
|
37
|
-
args: ["-p", workstation],
|
|
38
|
-
});
|
|
39
|
-
const url = await getRowsOutputUrl({ env: params.env, structureId: params.structureId });
|
|
40
|
-
// Write a deterministic script to the sandbox and run it (no external deps).
|
|
41
|
-
await writeDatasetSandboxTextFileStep({
|
|
42
|
-
env: params.env,
|
|
43
|
-
sandboxId: params.sandboxId,
|
|
44
|
-
path: scriptPath,
|
|
45
|
-
text: [
|
|
46
|
-
"import argparse",
|
|
47
|
-
"import urllib.request",
|
|
48
|
-
"",
|
|
49
|
-
"def main():",
|
|
50
|
-
" p = argparse.ArgumentParser()",
|
|
51
|
-
" p.add_argument('--url', required=True)",
|
|
52
|
-
" p.add_argument('--out', required=True)",
|
|
53
|
-
" args = p.parse_args()",
|
|
54
|
-
"",
|
|
55
|
-
" # Download to local sandbox file",
|
|
56
|
-
" with urllib.request.urlopen(args.url, timeout=60) as r:",
|
|
57
|
-
" data = r.read()",
|
|
58
|
-
" with open(args.out, 'wb') as f:",
|
|
59
|
-
" f.write(data)",
|
|
60
|
-
"",
|
|
61
|
-
" print('ok')",
|
|
62
|
-
" print('bytes', len(data))",
|
|
63
|
-
"",
|
|
64
|
-
"if __name__ == '__main__':",
|
|
65
|
-
" main()",
|
|
66
|
-
"",
|
|
67
|
-
].join("\n"),
|
|
68
|
-
});
|
|
69
|
-
const res = await runDatasetSandboxCommandStep({
|
|
70
|
-
env: params.env,
|
|
71
|
-
sandboxId: params.sandboxId,
|
|
72
|
-
cmd: "python",
|
|
73
|
-
args: [scriptPath, "--url", url, "--out", filePath],
|
|
74
|
-
});
|
|
75
|
-
if (res.exitCode !== 0) {
|
|
76
|
-
throw new Error(res.stderr || "Failed to download rows output into sandbox");
|
|
77
|
-
}
|
|
78
|
-
return { filePath };
|
|
79
|
-
}
|
|
80
|
-
/**
|
|
81
|
-
* @deprecated Prefer `downloadStructureRowsOutputToSandboxStep` (kept for symmetry with chunk naming).
|
|
82
|
-
*
|
|
83
|
-
* Note: The name includes `RowsOutputJsonl` to be explicit about the file format.
|
|
84
|
-
*/
|
|
85
|
-
export async function structureDownloadRowsOutputJsonlToSandboxStep(params) {
|
|
86
|
-
"use step";
|
|
87
|
-
return await downloadStructureRowsOutputToSandboxStep(params);
|
|
88
|
-
}
|
|
89
|
-
/**
|
|
90
|
-
* Step 2: Read a chunk/page of JSONL records from the downloaded sandbox file.
|
|
91
|
-
*
|
|
92
|
-
* Naming (consistent):
|
|
93
|
-
* - `structureReadRowsOutputJsonlStep` reads the whole file (base64)
|
|
94
|
-
* - `structureReadRowsOutputJsonlChunkStep` reads a paginated chunk from sandbox
|
|
95
|
-
*
|
|
96
|
-
* Offset/limit are line-based (0-indexed).
|
|
97
|
-
*/
|
|
98
|
-
export async function structureReadRowsOutputJsonlChunkStep(params) {
|
|
99
|
-
"use step";
|
|
100
|
-
const workstation = getDatasetWorkstation(params.structureId);
|
|
101
|
-
const filePath = `${workstation}/rows_output.jsonl`;
|
|
102
|
-
const scriptPath = `${workstation}/read_rows_chunk.py`;
|
|
103
|
-
await writeDatasetSandboxTextFileStep({
|
|
104
|
-
env: params.env,
|
|
105
|
-
sandboxId: params.sandboxId,
|
|
106
|
-
path: scriptPath,
|
|
107
|
-
text: [
|
|
108
|
-
"import argparse",
|
|
109
|
-
"import json",
|
|
110
|
-
"",
|
|
111
|
-
"def main():",
|
|
112
|
-
" p = argparse.ArgumentParser()",
|
|
113
|
-
" p.add_argument('--path', required=True)",
|
|
114
|
-
" p.add_argument('--offset', type=int, required=True)",
|
|
115
|
-
" p.add_argument('--limit', type=int, required=True)",
|
|
116
|
-
" args = p.parse_args()",
|
|
117
|
-
"",
|
|
118
|
-
" records = []",
|
|
119
|
-
" current = 0",
|
|
120
|
-
" taken = 0",
|
|
121
|
-
" done = True",
|
|
122
|
-
"",
|
|
123
|
-
" with open(args.path, 'r', encoding='utf-8', errors='replace') as f:",
|
|
124
|
-
" for line in f:",
|
|
125
|
-
" if current < args.offset:",
|
|
126
|
-
" current += 1",
|
|
127
|
-
" continue",
|
|
128
|
-
" if taken >= args.limit:",
|
|
129
|
-
" done = False",
|
|
130
|
-
" break",
|
|
131
|
-
" line = line.strip()",
|
|
132
|
-
" if not line:",
|
|
133
|
-
" current += 1",
|
|
134
|
-
" continue",
|
|
135
|
-
" try:",
|
|
136
|
-
" records.append(json.loads(line))",
|
|
137
|
-
" except Exception:",
|
|
138
|
-
" # Skip invalid JSON line",
|
|
139
|
-
" pass",
|
|
140
|
-
" taken += 1",
|
|
141
|
-
" current += 1",
|
|
142
|
-
"",
|
|
143
|
-
" out = {",
|
|
144
|
-
" 'records': records,",
|
|
145
|
-
" 'nextOffset': args.offset + taken,",
|
|
146
|
-
" 'done': done,",
|
|
147
|
-
" }",
|
|
148
|
-
" print(json.dumps(out, ensure_ascii=False))",
|
|
149
|
-
"",
|
|
150
|
-
"if __name__ == '__main__':",
|
|
151
|
-
" main()",
|
|
152
|
-
"",
|
|
153
|
-
].join("\n"),
|
|
154
|
-
});
|
|
155
|
-
const res = await runDatasetSandboxCommandStep({
|
|
156
|
-
env: params.env,
|
|
157
|
-
sandboxId: params.sandboxId,
|
|
158
|
-
cmd: "python",
|
|
159
|
-
args: [
|
|
160
|
-
scriptPath,
|
|
161
|
-
"--path",
|
|
162
|
-
filePath,
|
|
163
|
-
"--offset",
|
|
164
|
-
String(Math.max(0, params.offset ?? 0)),
|
|
165
|
-
"--limit",
|
|
166
|
-
String(Math.max(1, params.limit ?? 1)),
|
|
167
|
-
],
|
|
168
|
-
});
|
|
169
|
-
if (res.exitCode !== 0) {
|
|
170
|
-
throw new Error(res.stderr || "Failed to read rows chunk from sandbox");
|
|
171
|
-
}
|
|
172
|
-
const text = (res.stdout ?? "").trim();
|
|
173
|
-
if (!text) {
|
|
174
|
-
return { records: [], nextOffset: params.offset, done: true };
|
|
175
|
-
}
|
|
176
|
-
// The script prints a single JSON object.
|
|
177
|
-
const parsed = JSON.parse(text);
|
|
178
|
-
return {
|
|
179
|
-
records: Array.isArray(parsed?.records) ? parsed.records : [],
|
|
180
|
-
nextOffset: Number(parsed?.nextOffset ?? params.offset),
|
|
181
|
-
done: Boolean(parsed?.done),
|
|
182
|
-
};
|
|
183
|
-
}
|
|
184
|
-
/**
|
|
185
|
-
* @deprecated Use `structureReadRowsOutputJsonlChunkStep` instead (naming consistency).
|
|
186
|
-
*/
|
|
187
|
-
export async function readStructureRowsChunkFromSandboxStep(params) {
|
|
188
|
-
"use step";
|
|
189
|
-
return await structureReadRowsOutputJsonlChunkStep(params);
|
|
190
|
-
}
|