@ekairos/structure 1.21.54-beta.0 → 1.21.57-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/clearDataset.tool.d.ts +0 -1
- package/dist/clearDataset.tool.js +0 -1
- package/dist/completeObject.tool.d.ts +0 -1
- package/dist/completeObject.tool.js +0 -1
- package/dist/completeRows.tool.d.ts +0 -1
- package/dist/completeRows.tool.js +0 -1
- package/dist/dataset/steps.d.ts +0 -1
- package/dist/dataset/steps.js +48 -12
- package/dist/datasetFiles.d.ts +0 -1
- package/dist/datasetFiles.js +0 -1
- package/dist/datasetReader.d.ts +16 -0
- package/dist/datasetReader.js +25 -0
- package/dist/domain.d.ts +0 -1
- package/dist/domain.js +0 -1
- package/dist/executeCommand.tool.d.ts +0 -1
- package/dist/executeCommand.tool.js +0 -1
- package/dist/file/steps.d.ts +0 -1
- package/dist/file/steps.js +3 -4
- package/dist/generateSchema.tool.d.ts +0 -1
- package/dist/generateSchema.tool.js +0 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/prompts.d.ts +0 -1
- package/dist/prompts.js +0 -1
- package/dist/rowsOutputPaging.d.ts +57 -0
- package/dist/rowsOutputPaging.js +148 -0
- package/dist/rowsOutputPaging.steps.d.ts +37 -0
- package/dist/rowsOutputPaging.steps.js +125 -0
- package/dist/rowsPagination.steps.d.ts +59 -0
- package/dist/rowsPagination.steps.js +190 -0
- package/dist/runtime.d.ts +1 -2
- package/dist/runtime.js +1 -2
- package/dist/sandbox/steps.d.ts +0 -1
- package/dist/sandbox/steps.js +10 -7
- package/dist/schema.d.ts +0 -1
- package/dist/schema.js +3 -2
- package/dist/service.d.ts +0 -1
- package/dist/service.js +43 -34
- package/dist/steps/commitFromEvents.step.d.ts +0 -1
- package/dist/steps/commitFromEvents.step.js +2 -3
- package/dist/steps/persistObjectFromStory.step.d.ts +0 -1
- package/dist/steps/persistObjectFromStory.step.js +3 -4
- package/dist/structure.d.ts +29 -4
- package/dist/structure.js +35 -5
- package/package.json +14 -4
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeObject.tool.d.ts.map +0 -1
- package/dist/completeObject.tool.js.map +0 -1
- package/dist/completeRows.tool.d.ts.map +0 -1
- package/dist/completeRows.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/generateSchema.tool.d.ts.map +0 -1
- package/dist/generateSchema.tool.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/prompts.d.ts.map +0 -1
- package/dist/prompts.js.map +0 -1
- package/dist/runtime.d.ts.map +0 -1
- package/dist/runtime.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/steps/commitFromEvents.step.d.ts.map +0 -1
- package/dist/steps/commitFromEvents.step.js.map +0 -1
- package/dist/steps/persistObjectFromStory.step.d.ts.map +0 -1
- package/dist/steps/persistObjectFromStory.step.js.map +0 -1
- package/dist/structure.d.ts.map +0 -1
- package/dist/structure.js.map +0 -1
- package/dist/types/runtime.d.ts +0 -56
- package/dist/types/runtime.d.ts.map +0 -1
- package/dist/types/runtime.js +0 -2
- package/dist/types/runtime.js.map +0 -1
package/dist/dataset/steps.d.ts
CHANGED
package/dist/dataset/steps.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { getStoryRuntime } from "../runtime";
|
|
2
1
|
export async function structureGetOrCreateContextStep(params) {
|
|
3
2
|
"use step";
|
|
4
3
|
try {
|
|
5
|
-
const
|
|
4
|
+
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
5
|
+
const runtime = await resolveStoryRuntime(params.env);
|
|
6
6
|
const ctx = await runtime.store.getOrCreateContext({ key: params.contextKey });
|
|
7
7
|
return { ok: true, data: ctx };
|
|
8
8
|
}
|
|
@@ -14,7 +14,8 @@ export async function structureGetOrCreateContextStep(params) {
|
|
|
14
14
|
export async function structureGetContextStep(params) {
|
|
15
15
|
"use step";
|
|
16
16
|
try {
|
|
17
|
-
const
|
|
17
|
+
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
18
|
+
const runtime = await resolveStoryRuntime(params.env);
|
|
18
19
|
const ctx = await runtime.store.getContext({ key: params.contextKey });
|
|
19
20
|
if (!ctx)
|
|
20
21
|
return { ok: false, error: "Context not found" };
|
|
@@ -28,7 +29,8 @@ export async function structureGetContextStep(params) {
|
|
|
28
29
|
export async function structureUpdateContextContentStep(params) {
|
|
29
30
|
"use step";
|
|
30
31
|
try {
|
|
31
|
-
const
|
|
32
|
+
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
33
|
+
const runtime = await resolveStoryRuntime(params.env);
|
|
32
34
|
const updated = await runtime.store.updateContextContent({ key: params.contextKey }, params.content);
|
|
33
35
|
return { ok: true, data: updated };
|
|
34
36
|
}
|
|
@@ -40,7 +42,8 @@ export async function structureUpdateContextContentStep(params) {
|
|
|
40
42
|
export async function structurePatchContextContentStep(params) {
|
|
41
43
|
"use step";
|
|
42
44
|
try {
|
|
43
|
-
const
|
|
45
|
+
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
46
|
+
const runtime = await resolveStoryRuntime(params.env);
|
|
44
47
|
const existing = await runtime.store.getOrCreateContext({ key: params.contextKey });
|
|
45
48
|
const existingContent = (existing?.content ?? {});
|
|
46
49
|
const existingStructure = (existingContent?.structure ?? {});
|
|
@@ -61,7 +64,8 @@ export async function structurePatchContextContentStep(params) {
|
|
|
61
64
|
export async function structureUploadRowsOutputJsonlStep(params) {
|
|
62
65
|
"use step";
|
|
63
66
|
try {
|
|
64
|
-
const
|
|
67
|
+
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
68
|
+
const runtime = await resolveStoryRuntime(params.env);
|
|
65
69
|
const db = runtime.db;
|
|
66
70
|
const storagePath = `/structure/${params.structureId}/output.jsonl`;
|
|
67
71
|
const fileBuffer = Buffer.from(params.contentBase64 ?? "", "base64");
|
|
@@ -82,7 +86,8 @@ export async function structureUploadRowsOutputJsonlStep(params) {
|
|
|
82
86
|
export async function structureLinkRowsOutputFileToContextStep(params) {
|
|
83
87
|
"use step";
|
|
84
88
|
try {
|
|
85
|
-
const
|
|
89
|
+
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
90
|
+
const runtime = await resolveStoryRuntime(params.env);
|
|
86
91
|
const store = runtime.store;
|
|
87
92
|
const db = runtime.db;
|
|
88
93
|
const ctx = await store.getOrCreateContext({ key: params.contextKey });
|
|
@@ -100,7 +105,8 @@ export async function structureLinkRowsOutputFileToContextStep(params) {
|
|
|
100
105
|
export async function structureUnlinkRowsOutputFileFromContextStep(params) {
|
|
101
106
|
"use step";
|
|
102
107
|
try {
|
|
103
|
-
const
|
|
108
|
+
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
109
|
+
const runtime = await resolveStoryRuntime(params.env);
|
|
104
110
|
const store = runtime.store;
|
|
105
111
|
const db = runtime.db;
|
|
106
112
|
const ctx = await store.getOrCreateContext({ key: params.contextKey });
|
|
@@ -118,7 +124,8 @@ export async function structureUnlinkRowsOutputFileFromContextStep(params) {
|
|
|
118
124
|
export async function structureGetContextWithRowsOutputFileStep(params) {
|
|
119
125
|
"use step";
|
|
120
126
|
try {
|
|
121
|
-
const
|
|
127
|
+
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
128
|
+
const runtime = await resolveStoryRuntime(params.env);
|
|
122
129
|
const db = runtime.db;
|
|
123
130
|
const query = (await db.query({
|
|
124
131
|
context_contexts: {
|
|
@@ -140,7 +147,8 @@ export async function structureReadRowsOutputJsonlStep(params) {
|
|
|
140
147
|
"use step";
|
|
141
148
|
try {
|
|
142
149
|
const contextKey = `structure:${params.structureId}`;
|
|
143
|
-
const
|
|
150
|
+
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
151
|
+
const runtime = await resolveStoryRuntime(params.env);
|
|
144
152
|
const db = runtime.db;
|
|
145
153
|
const query = (await db.query({
|
|
146
154
|
context_contexts: {
|
|
@@ -155,7 +163,7 @@ export async function structureReadRowsOutputJsonlStep(params) {
|
|
|
155
163
|
const url = linked?.url;
|
|
156
164
|
if (!url)
|
|
157
165
|
return { ok: false, error: "Rows output file not found" };
|
|
158
|
-
const fileBuffer = await
|
|
166
|
+
const fileBuffer = await fetchArrayBufferWithRetry(url, { attempts: 4, timeoutMs: 90000 });
|
|
159
167
|
return { ok: true, data: { contentBase64: Buffer.from(fileBuffer).toString("base64") } };
|
|
160
168
|
}
|
|
161
169
|
catch (error) {
|
|
@@ -163,4 +171,32 @@ export async function structureReadRowsOutputJsonlStep(params) {
|
|
|
163
171
|
return { ok: false, error: message };
|
|
164
172
|
}
|
|
165
173
|
}
|
|
166
|
-
|
|
174
|
+
async function sleep(ms) {
|
|
175
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
176
|
+
}
|
|
177
|
+
async function fetchArrayBufferWithRetry(url, opts) {
|
|
178
|
+
let lastError = null;
|
|
179
|
+
for (let attempt = 1; attempt <= opts.attempts; attempt++) {
|
|
180
|
+
const controller = new AbortController();
|
|
181
|
+
const timer = setTimeout(() => controller.abort(), opts.timeoutMs);
|
|
182
|
+
try {
|
|
183
|
+
const res = await fetch(url, { signal: controller.signal });
|
|
184
|
+
if (!res.ok) {
|
|
185
|
+
throw new Error(`Failed to download rows output file (HTTP ${res.status})`);
|
|
186
|
+
}
|
|
187
|
+
return await res.arrayBuffer();
|
|
188
|
+
}
|
|
189
|
+
catch (e) {
|
|
190
|
+
lastError = e;
|
|
191
|
+
if (attempt < opts.attempts) {
|
|
192
|
+
await sleep(250 * Math.pow(2, attempt - 1));
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
finally {
|
|
197
|
+
clearTimeout(timer);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
const message = lastError instanceof Error ? lastError.message : String(lastError);
|
|
201
|
+
throw new Error(message || "Failed to download rows output file");
|
|
202
|
+
}
|
package/dist/datasetFiles.d.ts
CHANGED
|
@@ -3,4 +3,3 @@ export declare const DATASET_OUTPUT_SCHEMA_FILE_NAME = "output_schema.json";
|
|
|
3
3
|
export declare function getDatasetWorkstation(datasetId: string): string;
|
|
4
4
|
export declare function getDatasetOutputPath(datasetId: string): string;
|
|
5
5
|
export declare function getDatasetOutputSchemaPath(datasetId: string): string;
|
|
6
|
-
//# sourceMappingURL=datasetFiles.d.ts.map
|
package/dist/datasetFiles.js
CHANGED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { type StructureRowsOutputPagingCursor, type StructureRowsOutputSandboxRef } from "./rowsOutputPaging";
|
|
2
|
+
export declare function datasetReader(env: any, opts: {
|
|
3
|
+
datasetId: string;
|
|
4
|
+
sandboxId?: string;
|
|
5
|
+
runtime?: string;
|
|
6
|
+
timeoutMs?: number;
|
|
7
|
+
}): {
|
|
8
|
+
datasetId: string;
|
|
9
|
+
download(): Promise<StructureRowsOutputSandboxRef>;
|
|
10
|
+
readPage(params: {
|
|
11
|
+
sandboxId: string;
|
|
12
|
+
localPath: string;
|
|
13
|
+
cursor?: Partial<StructureRowsOutputPagingCursor>;
|
|
14
|
+
limit: number;
|
|
15
|
+
}): Promise<import("./rowsOutputPaging").StructureReadRowsOutputPageFromSandboxResult>;
|
|
16
|
+
};
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { structureDownloadRowsOutputToSandboxStep, structureReadRowsOutputPageFromSandboxStep, } from "./rowsOutputPaging";
|
|
2
|
+
export function datasetReader(env, opts) {
|
|
3
|
+
const datasetId = opts.datasetId;
|
|
4
|
+
return {
|
|
5
|
+
datasetId,
|
|
6
|
+
async download() {
|
|
7
|
+
return await structureDownloadRowsOutputToSandboxStep({
|
|
8
|
+
env,
|
|
9
|
+
structureId: datasetId,
|
|
10
|
+
sandboxId: opts.sandboxId,
|
|
11
|
+
runtime: opts.runtime,
|
|
12
|
+
timeoutMs: opts.timeoutMs,
|
|
13
|
+
});
|
|
14
|
+
},
|
|
15
|
+
async readPage(params) {
|
|
16
|
+
return await structureReadRowsOutputPageFromSandboxStep({
|
|
17
|
+
env,
|
|
18
|
+
sandboxId: params.sandboxId,
|
|
19
|
+
localPath: params.localPath,
|
|
20
|
+
cursor: params.cursor,
|
|
21
|
+
limit: params.limit,
|
|
22
|
+
});
|
|
23
|
+
},
|
|
24
|
+
};
|
|
25
|
+
}
|
package/dist/domain.d.ts
CHANGED
package/dist/domain.js
CHANGED
package/dist/file/steps.d.ts
CHANGED
package/dist/file/steps.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { getStoryRuntime } from "../runtime";
|
|
2
|
-
import { DatasetService } from "../service";
|
|
3
1
|
export async function readInstantFileStep(params) {
|
|
4
2
|
"use step";
|
|
5
|
-
const
|
|
3
|
+
const { resolveStoryRuntime } = await import("@ekairos/story/runtime");
|
|
4
|
+
const runtime = (await resolveStoryRuntime(params.env));
|
|
6
5
|
const db = runtime.db;
|
|
6
|
+
const { DatasetService } = await import("../service");
|
|
7
7
|
const service = new DatasetService(db);
|
|
8
8
|
const file = await service.getFileById(params.fileId);
|
|
9
9
|
const fileRow = file?.$files?.[0];
|
|
@@ -17,4 +17,3 @@ export async function readInstantFileStep(params) {
|
|
|
17
17
|
contentDisposition: fileRow["content-disposition"],
|
|
18
18
|
};
|
|
19
19
|
}
|
|
20
|
-
//# sourceMappingURL=steps.js.map
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
package/dist/prompts.d.ts
CHANGED
package/dist/prompts.js
CHANGED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
export type StructureRowsOutputPagingCursor = {
|
|
2
|
+
byteOffset: number;
|
|
3
|
+
rowOffset: number;
|
|
4
|
+
};
|
|
5
|
+
export type StructureRowsOutputSandboxRef = {
|
|
6
|
+
sandboxId: string;
|
|
7
|
+
localPath: string;
|
|
8
|
+
};
|
|
9
|
+
/**
|
|
10
|
+
* Step 1/2:
|
|
11
|
+
* Download the rows output.jsonl from Instant storage into a sandbox file.
|
|
12
|
+
*
|
|
13
|
+
* This isolates network flakiness (e.g. undici `TypeError: terminated`) into a single step
|
|
14
|
+
* and makes subsequent reads purely sandbox-local.
|
|
15
|
+
*/
|
|
16
|
+
export declare function structureDownloadRowsOutputToSandboxStep(params: {
|
|
17
|
+
env: any;
|
|
18
|
+
structureId: string;
|
|
19
|
+
sandboxId?: string;
|
|
20
|
+
runtime?: string;
|
|
21
|
+
timeoutMs?: number;
|
|
22
|
+
}): Promise<StructureRowsOutputSandboxRef>;
|
|
23
|
+
export type StructureReadRowsOutputPageFromSandboxResult = {
|
|
24
|
+
rows: any[];
|
|
25
|
+
nextCursor: StructureRowsOutputPagingCursor;
|
|
26
|
+
done: boolean;
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Step 2/2:
|
|
30
|
+
* Read the next page of ROW records from the sandbox-local output.jsonl, bounded by `limit`.
|
|
31
|
+
*
|
|
32
|
+
* Pagination state is passed explicitly via `cursor` and returned as `nextCursor`.
|
|
33
|
+
*/
|
|
34
|
+
export declare function structureReadRowsOutputPageFromSandboxStep(params: {
|
|
35
|
+
env: any;
|
|
36
|
+
sandboxId: string;
|
|
37
|
+
localPath: string;
|
|
38
|
+
cursor?: Partial<StructureRowsOutputPagingCursor>;
|
|
39
|
+
limit: number;
|
|
40
|
+
}): Promise<StructureReadRowsOutputPageFromSandboxResult>;
|
|
41
|
+
/**
|
|
42
|
+
* Back-compat alias (older naming).
|
|
43
|
+
* Prefer `structureReadRowsOutputPageFromSandboxStep`.
|
|
44
|
+
*/
|
|
45
|
+
export declare function structureReadRowsOutputChunkStep(params: {
|
|
46
|
+
env: any;
|
|
47
|
+
sandboxId: string;
|
|
48
|
+
localPath: string;
|
|
49
|
+
byteOffset?: number;
|
|
50
|
+
rowOffset?: number;
|
|
51
|
+
limit: number;
|
|
52
|
+
}): Promise<{
|
|
53
|
+
rows: any[];
|
|
54
|
+
nextByteOffset: number;
|
|
55
|
+
nextRowOffset: number;
|
|
56
|
+
done: boolean;
|
|
57
|
+
}>;
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import { getDatasetOutputPath, getDatasetWorkstation } from "./datasetFiles";
|
|
2
|
+
import { createDatasetSandboxStep, runDatasetSandboxCommandStep } from "./sandbox/steps";
|
|
3
|
+
import { getStoryRuntime } from "./runtime";
|
|
4
|
+
/**
|
|
5
|
+
* Step 1/2:
|
|
6
|
+
* Download the rows output.jsonl from Instant storage into a sandbox file.
|
|
7
|
+
*
|
|
8
|
+
* This isolates network flakiness (e.g. undici `TypeError: terminated`) into a single step
|
|
9
|
+
* and makes subsequent reads purely sandbox-local.
|
|
10
|
+
*/
|
|
11
|
+
export async function structureDownloadRowsOutputToSandboxStep(params) {
|
|
12
|
+
"use step";
|
|
13
|
+
const runtime = params.runtime ?? "python3.13";
|
|
14
|
+
const timeoutMs = params.timeoutMs ?? 10 * 60 * 1000;
|
|
15
|
+
const sandboxId = params.sandboxId ??
|
|
16
|
+
(await createDatasetSandboxStep({
|
|
17
|
+
env: params.env,
|
|
18
|
+
runtime,
|
|
19
|
+
timeoutMs,
|
|
20
|
+
purpose: "structure.rows-output.reader",
|
|
21
|
+
params: { structureId: params.structureId },
|
|
22
|
+
})).sandboxId;
|
|
23
|
+
const workstation = getDatasetWorkstation(params.structureId);
|
|
24
|
+
const localPath = getDatasetOutputPath(params.structureId);
|
|
25
|
+
await runDatasetSandboxCommandStep({
|
|
26
|
+
env: params.env,
|
|
27
|
+
sandboxId,
|
|
28
|
+
cmd: "mkdir",
|
|
29
|
+
args: ["-p", workstation],
|
|
30
|
+
});
|
|
31
|
+
const storyRuntime = await getStoryRuntime(params.env);
|
|
32
|
+
const db = storyRuntime.db;
|
|
33
|
+
const contextKey = `structure:${params.structureId}`;
|
|
34
|
+
const query = (await db.query({
|
|
35
|
+
context_contexts: {
|
|
36
|
+
$: { where: { key: contextKey }, limit: 1 },
|
|
37
|
+
structure_output_file: {},
|
|
38
|
+
},
|
|
39
|
+
}));
|
|
40
|
+
const ctx = query.context_contexts?.[0];
|
|
41
|
+
const linked = Array.isArray(ctx?.structure_output_file) ? ctx.structure_output_file[0] : ctx.structure_output_file;
|
|
42
|
+
const url = linked?.url;
|
|
43
|
+
if (!url) {
|
|
44
|
+
throw new Error("Rows output file not found");
|
|
45
|
+
}
|
|
46
|
+
// Download inside the sandbox runtime (python) to avoid streaming aborts in the Node step runtime.
|
|
47
|
+
const py = [
|
|
48
|
+
"import sys, urllib.request",
|
|
49
|
+
"url = sys.argv[1]",
|
|
50
|
+
"out_path = sys.argv[2]",
|
|
51
|
+
"with urllib.request.urlopen(url) as r:",
|
|
52
|
+
" data = r.read()",
|
|
53
|
+
"with open(out_path, 'wb') as f:",
|
|
54
|
+
" f.write(data)",
|
|
55
|
+
"print('ok', len(data))",
|
|
56
|
+
].join("\n");
|
|
57
|
+
const res = await runDatasetSandboxCommandStep({
|
|
58
|
+
env: params.env,
|
|
59
|
+
sandboxId,
|
|
60
|
+
cmd: "python",
|
|
61
|
+
args: ["-c", py, String(url), localPath],
|
|
62
|
+
});
|
|
63
|
+
if (res.exitCode !== 0) {
|
|
64
|
+
throw new Error(res.stderr || "Failed to download rows output to sandbox");
|
|
65
|
+
}
|
|
66
|
+
return { sandboxId, localPath };
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Step 2/2:
|
|
70
|
+
* Read the next page of ROW records from the sandbox-local output.jsonl, bounded by `limit`.
|
|
71
|
+
*
|
|
72
|
+
* Pagination state is passed explicitly via `cursor` and returned as `nextCursor`.
|
|
73
|
+
*/
|
|
74
|
+
export async function structureReadRowsOutputPageFromSandboxStep(params) {
|
|
75
|
+
"use step";
|
|
76
|
+
const byteOffset = params.cursor?.byteOffset ?? 0;
|
|
77
|
+
const rowOffset = params.cursor?.rowOffset ?? 0;
|
|
78
|
+
const py = [
|
|
79
|
+
"import sys, json",
|
|
80
|
+
"path = sys.argv[1]",
|
|
81
|
+
"byte_offset = int(sys.argv[2])",
|
|
82
|
+
"row_offset = int(sys.argv[3])",
|
|
83
|
+
"limit = int(sys.argv[4])",
|
|
84
|
+
"rows = []",
|
|
85
|
+
"next_byte = byte_offset",
|
|
86
|
+
"next_row = row_offset",
|
|
87
|
+
"with open(path, 'rb') as f:",
|
|
88
|
+
" f.seek(byte_offset)",
|
|
89
|
+
" while len(rows) < limit:",
|
|
90
|
+
" line = f.readline()",
|
|
91
|
+
" if not line:",
|
|
92
|
+
" break",
|
|
93
|
+
" next_byte = f.tell()",
|
|
94
|
+
" try:",
|
|
95
|
+
" obj = json.loads(line.decode('utf-8'))",
|
|
96
|
+
" except Exception:",
|
|
97
|
+
" continue",
|
|
98
|
+
" if obj.get('type') != 'row':",
|
|
99
|
+
" continue",
|
|
100
|
+
" rows.append(obj.get('data'))",
|
|
101
|
+
" next_row += 1",
|
|
102
|
+
"done = len(rows) < limit",
|
|
103
|
+
"print(json.dumps({",
|
|
104
|
+
" 'rows': rows,",
|
|
105
|
+
" 'nextByteOffset': next_byte,",
|
|
106
|
+
" 'nextRowOffset': next_row,",
|
|
107
|
+
" 'done': done,",
|
|
108
|
+
"}))",
|
|
109
|
+
].join("\n");
|
|
110
|
+
const res = await runDatasetSandboxCommandStep({
|
|
111
|
+
env: params.env,
|
|
112
|
+
sandboxId: params.sandboxId,
|
|
113
|
+
cmd: "python",
|
|
114
|
+
args: ["-c", py, params.localPath, String(byteOffset), String(rowOffset), String(params.limit)],
|
|
115
|
+
});
|
|
116
|
+
if (res.exitCode !== 0) {
|
|
117
|
+
throw new Error(res.stderr || "Failed to read rows page from sandbox");
|
|
118
|
+
}
|
|
119
|
+
const out = String(res.stdout ?? "").trim();
|
|
120
|
+
const parsed = JSON.parse(out);
|
|
121
|
+
return {
|
|
122
|
+
rows: parsed.rows ?? [],
|
|
123
|
+
nextCursor: {
|
|
124
|
+
byteOffset: parsed.nextByteOffset ?? byteOffset,
|
|
125
|
+
rowOffset: parsed.nextRowOffset ?? rowOffset,
|
|
126
|
+
},
|
|
127
|
+
done: Boolean(parsed.done),
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Back-compat alias (older naming).
|
|
132
|
+
* Prefer `structureReadRowsOutputPageFromSandboxStep`.
|
|
133
|
+
*/
|
|
134
|
+
export async function structureReadRowsOutputChunkStep(params) {
|
|
135
|
+
const res = await structureReadRowsOutputPageFromSandboxStep({
|
|
136
|
+
env: params.env,
|
|
137
|
+
sandboxId: params.sandboxId,
|
|
138
|
+
localPath: params.localPath,
|
|
139
|
+
cursor: { byteOffset: params.byteOffset ?? 0, rowOffset: params.rowOffset ?? 0 },
|
|
140
|
+
limit: params.limit,
|
|
141
|
+
});
|
|
142
|
+
return {
|
|
143
|
+
rows: res.rows,
|
|
144
|
+
nextByteOffset: res.nextCursor.byteOffset,
|
|
145
|
+
nextRowOffset: res.nextCursor.rowOffset,
|
|
146
|
+
done: res.done,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
export type RowsOutputSandboxDownloadResult = {
|
|
2
|
+
sandboxId: string;
|
|
3
|
+
localPath: string;
|
|
4
|
+
};
|
|
5
|
+
export type RowsOutputChunkResult<T = any> = {
|
|
6
|
+
rows: T[];
|
|
7
|
+
nextByteOffset: number;
|
|
8
|
+
nextRowOffset: number;
|
|
9
|
+
done: boolean;
|
|
10
|
+
};
|
|
11
|
+
/**
|
|
12
|
+
* Step 1/2:
|
|
13
|
+
* Download the rows output.jsonl from Instant storage into a sandbox file.
|
|
14
|
+
*
|
|
15
|
+
* This isolates network flakiness (e.g. undici `TypeError: terminated`) into a single step
|
|
16
|
+
* and makes subsequent reads purely sandbox-local.
|
|
17
|
+
*/
|
|
18
|
+
export declare function structureDownloadRowsOutputToSandboxStep(params: {
|
|
19
|
+
env: any;
|
|
20
|
+
structureId: string;
|
|
21
|
+
runtime?: string;
|
|
22
|
+
timeoutMs?: number;
|
|
23
|
+
}): Promise<RowsOutputSandboxDownloadResult>;
|
|
24
|
+
/**
|
|
25
|
+
* Step 2/2:
|
|
26
|
+
* Read the next chunk of ROW records from the sandbox-local output.jsonl, bounded by `limit`.
|
|
27
|
+
*
|
|
28
|
+
* Pagination state is passed explicitly via `{ byteOffset, rowOffset }` and returned as next offsets.
|
|
29
|
+
*/
|
|
30
|
+
export declare function structureReadRowsOutputChunkStep<T = any>(params: {
|
|
31
|
+
env: any;
|
|
32
|
+
sandboxId: string;
|
|
33
|
+
localPath: string;
|
|
34
|
+
byteOffset: number;
|
|
35
|
+
rowOffset: number;
|
|
36
|
+
limit: number;
|
|
37
|
+
}): Promise<RowsOutputChunkResult<T>>;
|