@ekairos/dataset 1.22.81-beta.development.0 → 1.22.83-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/materialize.d.ts +2 -0
- package/dist/builder/materialize.js +15 -17
- package/dist/builder/types.d.ts +2 -0
- package/dist/completeDataset.steps.d.ts +9 -8
- package/dist/completeDataset.steps.js +17 -10
- package/dist/completeDataset.tool.d.ts +9 -8
- package/dist/completeDataset.tool.js +2 -1
- package/dist/contextWorkspace.d.ts +72 -0
- package/dist/contextWorkspace.js +218 -0
- package/dist/executeCommand.tool.d.ts +1 -43
- package/dist/executeCommand.tool.js +10 -3
- package/dist/file/file-dataset.agent.d.ts +2 -0
- package/dist/file/file-dataset.agent.js +51 -15
- package/dist/file/file-dataset.steps.d.ts +6 -0
- package/dist/file/file-dataset.steps.js +18 -21
- package/dist/file/file-dataset.types.d.ts +10 -0
- package/dist/file/prompts.js +4 -2
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/service.d.ts +1 -0
- package/dist/service.js +1 -1
- package/package.json +7 -7
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { createFileParseContext } from "../file/file-dataset.agent.js";
|
|
2
2
|
import { readInstantFileStep } from "../file/steps.js";
|
|
3
|
-
import { generateFileParsePreviewStep, initializeFileParseSandboxStep, } from "../file/file-dataset.steps.js";
|
|
4
3
|
import { createTransformDatasetContext } from "../transform/transform-dataset.agent.js";
|
|
5
4
|
import { ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "../transform/transform-dataset.steps.js";
|
|
6
5
|
import { datasetGetByIdStep, datasetInferAndUpdateSchemaStep, datasetPreviewRowsStep, datasetReadOneStep, } from "../dataset/steps.js";
|
|
@@ -266,27 +265,16 @@ export async function prepareDatasetSourcesStep(params) {
|
|
|
266
265
|
const fileId = params.source.kind === "file"
|
|
267
266
|
? params.source.fileId
|
|
268
267
|
: await uploadInlineTextSource(params.runtime, params.datasetId, params.source);
|
|
269
|
-
const initialized = await initializeFileParseSandboxStep({
|
|
270
|
-
runtime: params.runtime,
|
|
271
|
-
sandboxId: params.sandboxId,
|
|
272
|
-
datasetId: params.datasetId,
|
|
273
|
-
fileId,
|
|
274
|
-
state: { initialized: false, filePath: "" },
|
|
275
|
-
});
|
|
276
|
-
const filePreview = await generateFileParsePreviewStep({
|
|
277
|
-
runtime: params.runtime,
|
|
278
|
-
sandboxId: params.sandboxId,
|
|
279
|
-
sandboxFilePath: initialized.filePath,
|
|
280
|
-
datasetId: params.datasetId,
|
|
281
|
-
});
|
|
282
268
|
return {
|
|
283
269
|
kind: "file",
|
|
284
270
|
datasetId: params.datasetId,
|
|
285
271
|
sandboxId: params.sandboxId,
|
|
286
272
|
fileId,
|
|
287
|
-
sandboxState: initialized
|
|
288
|
-
filePreview,
|
|
273
|
+
sandboxState: { initialized: false, filePath: "" },
|
|
274
|
+
filePreview: undefined,
|
|
289
275
|
schema: params.schema ?? null,
|
|
276
|
+
filename: params.source.kind === "file" ? params.source.filename : params.source.name,
|
|
277
|
+
mediaType: params.source.kind === "file" ? params.source.mediaType : params.source.mimeType,
|
|
290
278
|
};
|
|
291
279
|
}
|
|
292
280
|
const initialized = await ensureTransformSourcesInSandboxStep({
|
|
@@ -393,7 +381,13 @@ export async function materializeSingleFileLikeSource(state, source, targetDatas
|
|
|
393
381
|
instructions: state.instructions,
|
|
394
382
|
sources: [
|
|
395
383
|
source.kind === "file"
|
|
396
|
-
? {
|
|
384
|
+
? {
|
|
385
|
+
kind: "file",
|
|
386
|
+
fileId: source.fileId,
|
|
387
|
+
description: source.description,
|
|
388
|
+
filename: source.filename,
|
|
389
|
+
mediaType: source.mediaType,
|
|
390
|
+
}
|
|
397
391
|
: {
|
|
398
392
|
kind: "text",
|
|
399
393
|
mimeType: source.mimeType,
|
|
@@ -428,6 +422,8 @@ export async function materializeSingleFileLikeSource(state, source, targetDatas
|
|
|
428
422
|
sandboxState: context.sandboxState,
|
|
429
423
|
filePreview: context.filePreview,
|
|
430
424
|
schema: context.schema,
|
|
425
|
+
filename: context.filename,
|
|
426
|
+
mediaType: context.mediaType,
|
|
431
427
|
});
|
|
432
428
|
await parseContext.parse(state.runtime, {
|
|
433
429
|
durable: await resolveDatasetAgentDurable(state.durable),
|
|
@@ -440,6 +436,8 @@ export async function materializeSingleFileLikeSource(state, source, targetDatas
|
|
|
440
436
|
sandboxState: context.sandboxState,
|
|
441
437
|
filePreview: context.filePreview,
|
|
442
438
|
schema: context.schema,
|
|
439
|
+
filename: context.filename,
|
|
440
|
+
mediaType: context.mediaType,
|
|
443
441
|
},
|
|
444
442
|
});
|
|
445
443
|
return targetDatasetId;
|
package/dist/builder/types.d.ts
CHANGED
|
@@ -12,6 +12,8 @@ export type DatasetQuerySourceInput<D extends DomainSchemaResult = DomainSchemaR
|
|
|
12
12
|
export type DatasetFileSourceInput = {
|
|
13
13
|
fileId: string;
|
|
14
14
|
description?: string;
|
|
15
|
+
filename?: string;
|
|
16
|
+
mediaType?: string;
|
|
15
17
|
};
|
|
16
18
|
export type DatasetTextSourceInput = {
|
|
17
19
|
text: string;
|
|
@@ -3,8 +3,9 @@ export interface PersistDatasetStepParams {
|
|
|
3
3
|
sandboxId: string;
|
|
4
4
|
runtime: any;
|
|
5
5
|
summary?: string;
|
|
6
|
+
outputPath?: string;
|
|
6
7
|
}
|
|
7
|
-
export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summary }: PersistDatasetStepParams): Promise<{
|
|
8
|
+
export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summary, outputPath }: PersistDatasetStepParams): Promise<{
|
|
8
9
|
success: boolean;
|
|
9
10
|
validation?: RowValidationEntry[];
|
|
10
11
|
validationTruncated?: number;
|
|
@@ -23,18 +24,18 @@ export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summ
|
|
|
23
24
|
validation: RowValidationEntry[] | undefined;
|
|
24
25
|
error: string;
|
|
25
26
|
message: string;
|
|
26
|
-
|
|
27
|
-
|
|
27
|
+
records?: undefined;
|
|
28
|
+
summary?: undefined;
|
|
28
29
|
} | {
|
|
29
30
|
success: boolean;
|
|
30
31
|
status: string;
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
message: string;
|
|
32
|
+
records: number;
|
|
33
|
+
summary: string;
|
|
34
|
+
validRows?: undefined;
|
|
35
|
+
rowRecordCount?: undefined;
|
|
36
36
|
validation?: undefined;
|
|
37
37
|
error?: undefined;
|
|
38
|
+
message?: undefined;
|
|
38
39
|
}>;
|
|
39
40
|
type RowValidationEntry = {
|
|
40
41
|
index: number;
|
|
@@ -13,14 +13,15 @@ function getAjv() {
|
|
|
13
13
|
}
|
|
14
14
|
return ajvInstance;
|
|
15
15
|
}
|
|
16
|
-
export async function persistDatasetStep({ runtime, datasetId, sandboxId, summary }) {
|
|
16
|
+
export async function persistDatasetStep({ runtime, datasetId, sandboxId, summary, outputPath }) {
|
|
17
17
|
"use step";
|
|
18
|
-
const
|
|
18
|
+
const resolvedOutputPath = outputPath ?? getDatasetOutputPath(datasetId);
|
|
19
|
+
const storagePath = resolveExecutionStoragePath(resolvedOutputPath, datasetId);
|
|
19
20
|
if (summary) {
|
|
20
21
|
console.log(`[Dataset ${datasetId}] Persisting completed dataset: ${summary}`);
|
|
21
22
|
}
|
|
22
23
|
try {
|
|
23
|
-
await ensureFileExists(runtime, sandboxId,
|
|
24
|
+
await ensureFileExists(runtime, sandboxId, resolvedOutputPath);
|
|
24
25
|
}
|
|
25
26
|
catch (error) {
|
|
26
27
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -85,7 +86,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
85
86
|
const validationResult = await validateJsonlRows({
|
|
86
87
|
runtime,
|
|
87
88
|
sandboxId,
|
|
88
|
-
outputPath,
|
|
89
|
+
outputPath: resolvedOutputPath,
|
|
89
90
|
validator,
|
|
90
91
|
schema: schemaJson,
|
|
91
92
|
datasetId,
|
|
@@ -96,7 +97,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
96
97
|
const totalValidRows = validationResult.validRowCount ?? 0;
|
|
97
98
|
const rowRecordCount = validationResult.rowRecordCount ?? totalValidRows;
|
|
98
99
|
console.log(`[Dataset ${datasetId}] Reading file content for upload`);
|
|
99
|
-
const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path:
|
|
100
|
+
const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path: resolvedOutputPath });
|
|
100
101
|
if (!fileRead.contentBase64) {
|
|
101
102
|
console.error(`[Dataset ${datasetId}] Empty file content`);
|
|
102
103
|
return {
|
|
@@ -113,6 +114,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
113
114
|
const uploadResult = await service.uploadDatasetOutputFile({
|
|
114
115
|
datasetId,
|
|
115
116
|
fileBuffer: Buffer.from(fileRead.contentBase64, "base64"),
|
|
117
|
+
storagePath,
|
|
116
118
|
});
|
|
117
119
|
if (!uploadResult.ok) {
|
|
118
120
|
console.error(`[Dataset ${datasetId}] File upload failed: ${uploadResult.error}`);
|
|
@@ -150,13 +152,18 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
150
152
|
return {
|
|
151
153
|
success: true,
|
|
152
154
|
status: "completed",
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
fileId: uploadResult.data.fileId,
|
|
156
|
-
storagePath: uploadResult.data.storagePath,
|
|
157
|
-
message: "Dataset creation completed and uploaded to storage",
|
|
155
|
+
records: totalValidRows,
|
|
156
|
+
summary: summary ?? `Dataset completed with ${totalValidRows} records.`,
|
|
158
157
|
};
|
|
159
158
|
}
|
|
159
|
+
function resolveExecutionStoragePath(outputPath, datasetId) {
|
|
160
|
+
const normalized = String(outputPath ?? "").replace(/\\/g, "/");
|
|
161
|
+
const marker = "/tmp/ekairos/contexts/";
|
|
162
|
+
if (normalized.startsWith(marker)) {
|
|
163
|
+
return normalized.slice("/tmp/ekairos".length);
|
|
164
|
+
}
|
|
165
|
+
return `/dataset/${datasetId}/output.jsonl`;
|
|
166
|
+
}
|
|
160
167
|
async function ensureFileExists(runtime, sandboxId, path) {
|
|
161
168
|
const result = await runDatasetSandboxCommandStep({
|
|
162
169
|
runtime,
|
|
@@ -2,8 +2,9 @@ interface CompleteDatasetToolParams {
|
|
|
2
2
|
datasetId: string;
|
|
3
3
|
sandboxId: string;
|
|
4
4
|
runtime: any;
|
|
5
|
+
outputPath?: string;
|
|
5
6
|
}
|
|
6
|
-
export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtime }: CompleteDatasetToolParams): import("ai").Tool<{
|
|
7
|
+
export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }: CompleteDatasetToolParams): import("ai").Tool<{
|
|
7
8
|
summary: string;
|
|
8
9
|
}, {
|
|
9
10
|
success: boolean;
|
|
@@ -82,18 +83,18 @@ export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtim
|
|
|
82
83
|
}[] | undefined;
|
|
83
84
|
error: string;
|
|
84
85
|
message: string;
|
|
85
|
-
|
|
86
|
-
|
|
86
|
+
records?: undefined;
|
|
87
|
+
summary?: undefined;
|
|
87
88
|
} | {
|
|
88
89
|
success: boolean;
|
|
89
90
|
status: string;
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
message: string;
|
|
91
|
+
records: number;
|
|
92
|
+
summary: string;
|
|
93
|
+
validRows?: undefined;
|
|
94
|
+
rowRecordCount?: undefined;
|
|
95
95
|
validation?: undefined;
|
|
96
96
|
error?: undefined;
|
|
97
|
+
message?: undefined;
|
|
97
98
|
}>;
|
|
98
99
|
export declare function didCompleteDatasetSucceed(event: {
|
|
99
100
|
content?: {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { tool } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { persistDatasetStep } from "./completeDataset.steps.js";
|
|
4
|
-
export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
|
|
4
|
+
export function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }) {
|
|
5
5
|
return tool({
|
|
6
6
|
description: "Mark the dataset as completed. Use only when output.jsonl has been successfully generated and is ready for validation.",
|
|
7
7
|
inputSchema: z.object({
|
|
@@ -17,6 +17,7 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
|
|
|
17
17
|
datasetId,
|
|
18
18
|
sandboxId,
|
|
19
19
|
summary,
|
|
20
|
+
outputPath,
|
|
20
21
|
});
|
|
21
22
|
},
|
|
22
23
|
});
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
export type ContextWorkspaceFileRole = "input" | "output" | "artifact";
|
|
2
|
+
export type ContextWorkspaceFileInput = {
|
|
3
|
+
fileId: string;
|
|
4
|
+
filename?: string;
|
|
5
|
+
mediaType?: string;
|
|
6
|
+
role?: ContextWorkspaceFileRole;
|
|
7
|
+
sourceEventId?: string;
|
|
8
|
+
sourcePartIndex?: number;
|
|
9
|
+
};
|
|
10
|
+
export type PreparedContextWorkspaceFile = {
|
|
11
|
+
fileId: string;
|
|
12
|
+
filename: string;
|
|
13
|
+
mediaType?: string;
|
|
14
|
+
role: ContextWorkspaceFileRole;
|
|
15
|
+
path: string;
|
|
16
|
+
sourceEventId?: string;
|
|
17
|
+
sourcePartIndex?: number;
|
|
18
|
+
};
|
|
19
|
+
export type PreparedContextExecutionWorkspace = {
|
|
20
|
+
contextId: string;
|
|
21
|
+
executionId: string;
|
|
22
|
+
sandboxId: string;
|
|
23
|
+
root: string;
|
|
24
|
+
contextRoot: string;
|
|
25
|
+
eventsDir: string;
|
|
26
|
+
outputDir: string;
|
|
27
|
+
scriptsDir: string;
|
|
28
|
+
tmpDir: string;
|
|
29
|
+
manifestPath: string;
|
|
30
|
+
files: PreparedContextWorkspaceFile[];
|
|
31
|
+
};
|
|
32
|
+
export declare function getContextWorkspaceBase(): string;
|
|
33
|
+
export declare function getContextExecutionWorkspaceRoot(params: {
|
|
34
|
+
contextId: string;
|
|
35
|
+
executionId: string;
|
|
36
|
+
root?: string;
|
|
37
|
+
}): string;
|
|
38
|
+
export declare function getContextWorkspaceRoot(params: {
|
|
39
|
+
contextId: string;
|
|
40
|
+
root?: string;
|
|
41
|
+
}): string;
|
|
42
|
+
export declare function getContextEventsDir(params: {
|
|
43
|
+
contextId: string;
|
|
44
|
+
root?: string;
|
|
45
|
+
}): string;
|
|
46
|
+
export declare function getContextExecutionWorkspaceDirs(params: {
|
|
47
|
+
contextId: string;
|
|
48
|
+
executionId: string;
|
|
49
|
+
root?: string;
|
|
50
|
+
}): {
|
|
51
|
+
root: string;
|
|
52
|
+
contextRoot: string;
|
|
53
|
+
eventsDir: string;
|
|
54
|
+
outputDir: string;
|
|
55
|
+
scriptsDir: string;
|
|
56
|
+
tmpDir: string;
|
|
57
|
+
manifestPath: string;
|
|
58
|
+
};
|
|
59
|
+
export declare function getContextExecutionWorkspaceStandardDirs(params: {
|
|
60
|
+
contextId: string;
|
|
61
|
+
executionId: string;
|
|
62
|
+
root?: string;
|
|
63
|
+
}): string[];
|
|
64
|
+
export declare function extractContextWorkspaceFilesFromEventItems(eventItems: unknown[]): ContextWorkspaceFileInput[];
|
|
65
|
+
export declare function prepareContextExecutionWorkspaceStep(params: {
|
|
66
|
+
runtime: any;
|
|
67
|
+
sandboxId: string;
|
|
68
|
+
contextId: string;
|
|
69
|
+
executionId: string;
|
|
70
|
+
files: ContextWorkspaceFileInput[];
|
|
71
|
+
root?: string;
|
|
72
|
+
}): Promise<PreparedContextExecutionWorkspace>;
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import { readInstantFileStep } from "./file/steps.js";
|
|
2
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
|
|
3
|
+
const CONTEXT_WORKSPACE_BASE = "/tmp/ekairos/contexts";
|
|
4
|
+
const WORKSPACE_MANIFEST_FILE_NAME = "manifest.json";
|
|
5
|
+
function trimTrailingSlash(value) {
|
|
6
|
+
return value.endsWith("/") ? value.slice(0, -1) : value;
|
|
7
|
+
}
|
|
8
|
+
function sanitizePathSegment(value, fallback) {
|
|
9
|
+
const parts = String(value ?? "")
|
|
10
|
+
.trim()
|
|
11
|
+
.replace(/\\/g, "/")
|
|
12
|
+
.split("/")
|
|
13
|
+
.filter(Boolean);
|
|
14
|
+
const normalized = parts[parts.length - 1]
|
|
15
|
+
?.replace(/[^a-zA-Z0-9_.-]/g, "_")
|
|
16
|
+
.replace(/_+/g, "_")
|
|
17
|
+
.slice(0, 160);
|
|
18
|
+
return normalized || fallback;
|
|
19
|
+
}
|
|
20
|
+
function filenameFromContentDisposition(value, fallback) {
|
|
21
|
+
const raw = String(value ?? "").trim();
|
|
22
|
+
if (!raw)
|
|
23
|
+
return fallback;
|
|
24
|
+
const filenameStar = raw.match(/filename\*=UTF-8''([^;]+)/i)?.[1];
|
|
25
|
+
if (filenameStar) {
|
|
26
|
+
return sanitizePathSegment(decodeURIComponent(filenameStar), fallback);
|
|
27
|
+
}
|
|
28
|
+
const filename = raw.match(/filename="?([^";]+)"?/i)?.[1];
|
|
29
|
+
return sanitizePathSegment(filename ?? raw, fallback);
|
|
30
|
+
}
|
|
31
|
+
function resolveContextEventPartDir(params) {
|
|
32
|
+
const sourceEventId = sanitizePathSegment(params.sourceEventId, "event");
|
|
33
|
+
const sourcePartIndex = Number.isFinite(params.sourcePartIndex)
|
|
34
|
+
? Math.max(0, Math.floor(params.sourcePartIndex))
|
|
35
|
+
: 0;
|
|
36
|
+
return `${params.eventsDir}/${sourceEventId}/parts/${sourcePartIndex}`;
|
|
37
|
+
}
|
|
38
|
+
function resolveWorkspaceFilePath(params) {
|
|
39
|
+
return `${resolveContextEventPartDir(params)}/file`;
|
|
40
|
+
}
|
|
41
|
+
export function getContextWorkspaceBase() {
|
|
42
|
+
return trimTrailingSlash(CONTEXT_WORKSPACE_BASE);
|
|
43
|
+
}
|
|
44
|
+
export function getContextExecutionWorkspaceRoot(params) {
|
|
45
|
+
if (params.root)
|
|
46
|
+
return trimTrailingSlash(params.root);
|
|
47
|
+
const contextId = sanitizePathSegment(params.contextId, "context");
|
|
48
|
+
const executionId = sanitizePathSegment(params.executionId, "execution");
|
|
49
|
+
return `${getContextWorkspaceBase()}/${contextId}/executions/${executionId}`;
|
|
50
|
+
}
|
|
51
|
+
export function getContextWorkspaceRoot(params) {
|
|
52
|
+
if (params.root)
|
|
53
|
+
return trimTrailingSlash(params.root);
|
|
54
|
+
const contextId = sanitizePathSegment(params.contextId, "context");
|
|
55
|
+
return `${getContextWorkspaceBase()}/${contextId}`;
|
|
56
|
+
}
|
|
57
|
+
export function getContextEventsDir(params) {
|
|
58
|
+
return `${getContextWorkspaceRoot(params)}/events`;
|
|
59
|
+
}
|
|
60
|
+
export function getContextExecutionWorkspaceDirs(params) {
|
|
61
|
+
const root = getContextExecutionWorkspaceRoot(params);
|
|
62
|
+
const contextRoot = getContextWorkspaceRoot(params);
|
|
63
|
+
const eventsDir = getContextEventsDir(params);
|
|
64
|
+
return {
|
|
65
|
+
root,
|
|
66
|
+
contextRoot,
|
|
67
|
+
eventsDir,
|
|
68
|
+
outputDir: `${root}/output`,
|
|
69
|
+
scriptsDir: `${root}/scripts`,
|
|
70
|
+
tmpDir: `${root}/tmp`,
|
|
71
|
+
manifestPath: `${root}/${WORKSPACE_MANIFEST_FILE_NAME}`,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
export function getContextExecutionWorkspaceStandardDirs(params) {
|
|
75
|
+
const dirs = getContextExecutionWorkspaceDirs(params);
|
|
76
|
+
return [dirs.contextRoot, dirs.eventsDir, dirs.root, dirs.outputDir, dirs.scriptsDir, dirs.tmpDir];
|
|
77
|
+
}
|
|
78
|
+
export function extractContextWorkspaceFilesFromEventItems(eventItems) {
|
|
79
|
+
const files = [];
|
|
80
|
+
for (const item of eventItems) {
|
|
81
|
+
const itemRecord = asRecord(item);
|
|
82
|
+
const parts = Array.isArray(asRecord(itemRecord?.content)?.parts)
|
|
83
|
+
? asRecord(itemRecord?.content)?.parts
|
|
84
|
+
: [];
|
|
85
|
+
parts.forEach((part, partIndex) => {
|
|
86
|
+
collectPartFiles(part, {
|
|
87
|
+
files,
|
|
88
|
+
sourceEventId: asText(itemRecord?.id),
|
|
89
|
+
sourcePartIndex: partIndex,
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
return files;
|
|
94
|
+
}
|
|
95
|
+
export async function prepareContextExecutionWorkspaceStep(params) {
|
|
96
|
+
"use step";
|
|
97
|
+
const dirs = getContextExecutionWorkspaceDirs(params);
|
|
98
|
+
const filePartDirs = Array.from(new Set(params.files.map((fileInput) => resolveContextEventPartDir({
|
|
99
|
+
eventsDir: dirs.eventsDir,
|
|
100
|
+
sourceEventId: fileInput.sourceEventId ?? fileInput.fileId,
|
|
101
|
+
sourcePartIndex: fileInput.sourcePartIndex ?? 0,
|
|
102
|
+
}))));
|
|
103
|
+
await runDatasetSandboxCommandStep({
|
|
104
|
+
runtime: params.runtime,
|
|
105
|
+
sandboxId: params.sandboxId,
|
|
106
|
+
cmd: "mkdir",
|
|
107
|
+
args: ["-p", ...getContextExecutionWorkspaceStandardDirs(params), ...filePartDirs],
|
|
108
|
+
});
|
|
109
|
+
const preparedFiles = [];
|
|
110
|
+
for (const fileInput of params.files) {
|
|
111
|
+
const fileId = String(fileInput.fileId ?? "").trim();
|
|
112
|
+
if (!fileId)
|
|
113
|
+
continue;
|
|
114
|
+
const file = await readInstantFileStep({ runtime: params.runtime, fileId });
|
|
115
|
+
const filename = sanitizePathSegment(fileInput.filename ??
|
|
116
|
+
filenameFromContentDisposition(file.contentDisposition, `${fileId}.bin`), `${fileId}.bin`);
|
|
117
|
+
const path = resolveWorkspaceFilePath({
|
|
118
|
+
eventsDir: dirs.eventsDir,
|
|
119
|
+
sourceEventId: fileInput.sourceEventId ?? fileId,
|
|
120
|
+
sourcePartIndex: fileInput.sourcePartIndex ?? 0,
|
|
121
|
+
});
|
|
122
|
+
const metadataPath = `${resolveContextEventPartDir({
|
|
123
|
+
eventsDir: dirs.eventsDir,
|
|
124
|
+
sourceEventId: fileInput.sourceEventId ?? fileId,
|
|
125
|
+
sourcePartIndex: fileInput.sourcePartIndex ?? 0,
|
|
126
|
+
})}/metadata.json`;
|
|
127
|
+
await writeDatasetSandboxFilesStep({
|
|
128
|
+
runtime: params.runtime,
|
|
129
|
+
sandboxId: params.sandboxId,
|
|
130
|
+
files: [{ path, contentBase64: file.contentBase64 }],
|
|
131
|
+
});
|
|
132
|
+
await writeDatasetSandboxTextFilesStep({
|
|
133
|
+
runtime: params.runtime,
|
|
134
|
+
sandboxId: params.sandboxId,
|
|
135
|
+
files: [
|
|
136
|
+
{
|
|
137
|
+
path: metadataPath,
|
|
138
|
+
content: JSON.stringify({
|
|
139
|
+
fileId,
|
|
140
|
+
filename,
|
|
141
|
+
mediaType: fileInput.mediaType,
|
|
142
|
+
role: fileInput.role ?? "input",
|
|
143
|
+
sourceEventId: fileInput.sourceEventId,
|
|
144
|
+
sourcePartIndex: fileInput.sourcePartIndex,
|
|
145
|
+
}, null, 2),
|
|
146
|
+
},
|
|
147
|
+
],
|
|
148
|
+
});
|
|
149
|
+
preparedFiles.push({
|
|
150
|
+
fileId,
|
|
151
|
+
filename,
|
|
152
|
+
mediaType: fileInput.mediaType,
|
|
153
|
+
role: fileInput.role ?? "input",
|
|
154
|
+
path,
|
|
155
|
+
sourceEventId: fileInput.sourceEventId,
|
|
156
|
+
sourcePartIndex: fileInput.sourcePartIndex,
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
const manifest = {
|
|
160
|
+
contextId: params.contextId,
|
|
161
|
+
executionId: params.executionId,
|
|
162
|
+
sandboxId: params.sandboxId,
|
|
163
|
+
...dirs,
|
|
164
|
+
files: preparedFiles,
|
|
165
|
+
};
|
|
166
|
+
await writeDatasetSandboxTextFilesStep({
|
|
167
|
+
runtime: params.runtime,
|
|
168
|
+
sandboxId: params.sandboxId,
|
|
169
|
+
files: [
|
|
170
|
+
{
|
|
171
|
+
path: dirs.manifestPath,
|
|
172
|
+
content: JSON.stringify(manifest, null, 2),
|
|
173
|
+
},
|
|
174
|
+
],
|
|
175
|
+
});
|
|
176
|
+
return manifest;
|
|
177
|
+
}
|
|
178
|
+
function collectPartFiles(value, params) {
|
|
179
|
+
const record = asRecord(value);
|
|
180
|
+
if (!record)
|
|
181
|
+
return;
|
|
182
|
+
if (record.type === "file") {
|
|
183
|
+
pushFileRecord(record, params);
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
const content = asRecord(record.content);
|
|
187
|
+
if (!content)
|
|
188
|
+
return;
|
|
189
|
+
if (Array.isArray(content.blocks)) {
|
|
190
|
+
for (const block of content.blocks) {
|
|
191
|
+
const blockRecord = asRecord(block);
|
|
192
|
+
if (blockRecord?.type === "file") {
|
|
193
|
+
pushFileRecord(blockRecord, params);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
function pushFileRecord(record, params) {
|
|
199
|
+
const fileId = asText(record.fileId);
|
|
200
|
+
if (!fileId)
|
|
201
|
+
return;
|
|
202
|
+
params.files.push({
|
|
203
|
+
fileId,
|
|
204
|
+
filename: asText(record.filename),
|
|
205
|
+
mediaType: asText(record.mediaType),
|
|
206
|
+
role: "input",
|
|
207
|
+
sourceEventId: params.sourceEventId,
|
|
208
|
+
sourcePartIndex: params.sourcePartIndex,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
function asRecord(value) {
|
|
212
|
+
return value && typeof value === "object" && !Array.isArray(value)
|
|
213
|
+
? value
|
|
214
|
+
: null;
|
|
215
|
+
}
|
|
216
|
+
function asText(value) {
|
|
217
|
+
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
|
218
|
+
}
|
|
@@ -6,47 +6,5 @@ interface ExecuteCommandToolParams {
|
|
|
6
6
|
export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): import("ai").Tool<{
|
|
7
7
|
pythonCode: string;
|
|
8
8
|
scriptName: string;
|
|
9
|
-
},
|
|
10
|
-
success: boolean;
|
|
11
|
-
fatal: boolean;
|
|
12
|
-
status: string;
|
|
13
|
-
error: string;
|
|
14
|
-
stdout: string;
|
|
15
|
-
stderr: string;
|
|
16
|
-
exitCode: number;
|
|
17
|
-
scriptPath: string;
|
|
18
|
-
stdoutTruncated: boolean;
|
|
19
|
-
stderrTruncated: boolean;
|
|
20
|
-
stdoutOriginalLength: number;
|
|
21
|
-
stderrOriginalLength: number;
|
|
22
|
-
message?: undefined;
|
|
23
|
-
} | {
|
|
24
|
-
success: boolean;
|
|
25
|
-
exitCode: number;
|
|
26
|
-
stdout: string;
|
|
27
|
-
stderr: string;
|
|
28
|
-
scriptPath: string;
|
|
29
|
-
error: string;
|
|
30
|
-
stdoutTruncated: boolean;
|
|
31
|
-
stderrTruncated: boolean;
|
|
32
|
-
stdoutOriginalLength: number;
|
|
33
|
-
stderrOriginalLength: number;
|
|
34
|
-
fatal?: undefined;
|
|
35
|
-
status?: undefined;
|
|
36
|
-
message?: undefined;
|
|
37
|
-
} | {
|
|
38
|
-
success: boolean;
|
|
39
|
-
exitCode: number;
|
|
40
|
-
stdout: string;
|
|
41
|
-
stderr: string;
|
|
42
|
-
scriptPath: string;
|
|
43
|
-
message: string;
|
|
44
|
-
stdoutTruncated: boolean;
|
|
45
|
-
stderrTruncated: boolean;
|
|
46
|
-
stdoutOriginalLength: number;
|
|
47
|
-
stderrOriginalLength: number;
|
|
48
|
-
fatal?: undefined;
|
|
49
|
-
status?: undefined;
|
|
50
|
-
error?: undefined;
|
|
51
|
-
}>;
|
|
9
|
+
}, unknown>;
|
|
52
10
|
export {};
|
|
@@ -2,6 +2,7 @@ import { tool } from "ai";
|
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFilesStep } from "./sandbox/steps.js";
|
|
4
4
|
import { getDatasetScriptsDir } from "./datasetFiles.js";
|
|
5
|
+
import { getContextExecutionWorkspaceDirs } from "./contextWorkspace.js";
|
|
5
6
|
// To keep responses predictable for big data scenarios, we cap stdout/stderr.
|
|
6
7
|
// The tool's return payload exposes stdout (capped) plus the on-disk script path.
|
|
7
8
|
const MAX_STDOUT_CHARS = 20000;
|
|
@@ -29,10 +30,16 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
|
29
30
|
pythonCode: z.string().describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
|
|
30
31
|
scriptName: z.string().describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A deterministic suffix will be appended automatically."),
|
|
31
32
|
}),
|
|
32
|
-
execute: async ({ pythonCode, scriptName }) => {
|
|
33
|
+
execute: (async ({ pythonCode, scriptName }, actionContext) => {
|
|
33
34
|
const normalizedScriptName = normalizeScriptName(scriptName);
|
|
34
35
|
const scriptHash = stableScriptHash(`${normalizedScriptName}\0${pythonCode}`);
|
|
35
|
-
const
|
|
36
|
+
const scriptsDir = actionContext?.contextId && actionContext.executionId
|
|
37
|
+
? getContextExecutionWorkspaceDirs({
|
|
38
|
+
contextId: actionContext.contextId,
|
|
39
|
+
executionId: actionContext.executionId,
|
|
40
|
+
}).scriptsDir
|
|
41
|
+
: getDatasetScriptsDir(datasetId);
|
|
42
|
+
const scriptFile = `${scriptsDir}/${normalizedScriptName}-${scriptHash}.py`;
|
|
36
43
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
37
44
|
console.log(`[Dataset ${datasetId}] Tool: executeCommand`);
|
|
38
45
|
console.log(`[Dataset ${datasetId}] Script: ${normalizedScriptName}`);
|
|
@@ -162,6 +169,6 @@ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
|
|
|
162
169
|
stderrOriginalLength: 0,
|
|
163
170
|
};
|
|
164
171
|
}
|
|
165
|
-
},
|
|
172
|
+
}),
|
|
166
173
|
});
|
|
167
174
|
}
|
|
@@ -12,6 +12,8 @@ export declare function createFileParseContext<Env extends {
|
|
|
12
12
|
sandboxState?: SandboxState;
|
|
13
13
|
filePreview?: FileParseContext["filePreview"];
|
|
14
14
|
schema?: any | null;
|
|
15
|
+
filename?: string;
|
|
16
|
+
mediaType?: string;
|
|
15
17
|
}): {
|
|
16
18
|
datasetId: string;
|
|
17
19
|
parse(runtime: {
|
|
@@ -4,7 +4,7 @@ import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFa
|
|
|
4
4
|
import { datasetGetByIdStep } from "../dataset/steps.js";
|
|
5
5
|
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
6
6
|
import { createGenerateSchemaTool } from "./generateSchema.tool.js";
|
|
7
|
-
import { buildFileDatasetPromptStep,
|
|
7
|
+
import { buildFileDatasetPromptStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
|
|
8
8
|
import { createDatasetId } from "../id.js";
|
|
9
9
|
async function awaitContextRun(run) {
|
|
10
10
|
if (!run)
|
|
@@ -27,6 +27,15 @@ function createFileParseContextDefinition(params) {
|
|
|
27
27
|
const fileId = previous?.fileId ?? params.fileId ?? "";
|
|
28
28
|
const instructions = previous?.instructions ?? params.instructions ?? "";
|
|
29
29
|
const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
|
|
30
|
+
const contextRun = runtime?.__ekairosContextRun ?? {};
|
|
31
|
+
const contextId = String(contextRun.contextId ?? stored?.id ?? "").trim();
|
|
32
|
+
const executionId = String(contextRun.executionId ?? previous?.executionId ?? "").trim();
|
|
33
|
+
const sourceEventId = String(previous?.sourceEventId ?? params.sourceEventId ?? "").trim();
|
|
34
|
+
const sourcePartIndex = typeof previous?.sourcePartIndex === "number"
|
|
35
|
+
? previous.sourcePartIndex
|
|
36
|
+
: typeof params.sourcePartIndex === "number"
|
|
37
|
+
? params.sourcePartIndex
|
|
38
|
+
: 0;
|
|
30
39
|
if (!datasetId) {
|
|
31
40
|
throw new Error("dataset_id_required");
|
|
32
41
|
}
|
|
@@ -36,30 +45,29 @@ function createFileParseContextDefinition(params) {
|
|
|
36
45
|
if (!sandboxId) {
|
|
37
46
|
throw new Error("dataset_sandbox_required");
|
|
38
47
|
}
|
|
48
|
+
if (!contextId) {
|
|
49
|
+
throw new Error("dataset_context_id_required");
|
|
50
|
+
}
|
|
51
|
+
if (!executionId) {
|
|
52
|
+
throw new Error("dataset_execution_id_required");
|
|
53
|
+
}
|
|
39
54
|
const initialized = sandboxState.initialized && sandboxState.filePath
|
|
40
55
|
? { filePath: sandboxState.filePath, state: sandboxState }
|
|
41
56
|
: await initializeFileParseSandboxStep({
|
|
42
57
|
runtime,
|
|
43
58
|
sandboxId,
|
|
59
|
+
contextId,
|
|
60
|
+
executionId,
|
|
44
61
|
datasetId,
|
|
45
62
|
fileId,
|
|
63
|
+
sourceEventId,
|
|
64
|
+
sourcePartIndex,
|
|
65
|
+
filename: previous?.filename ?? params.filename,
|
|
66
|
+
mediaType: previous?.mediaType ?? params.mediaType,
|
|
46
67
|
state: sandboxState,
|
|
47
68
|
});
|
|
48
69
|
const sandboxFilePath = initialized.filePath;
|
|
49
70
|
let filePreview = previous?.filePreview ?? previous?.ctx?.filePreview ?? params.filePreview;
|
|
50
|
-
if (!filePreview) {
|
|
51
|
-
try {
|
|
52
|
-
filePreview = await generateFileParsePreviewStep({
|
|
53
|
-
runtime,
|
|
54
|
-
sandboxId,
|
|
55
|
-
sandboxFilePath,
|
|
56
|
-
datasetId,
|
|
57
|
-
});
|
|
58
|
-
}
|
|
59
|
-
catch {
|
|
60
|
-
// Preview is optional; parsing can still proceed from the file path.
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
71
|
let schema = previous?.ctx?.schema ?? previous?.schema ?? params.schema ?? null;
|
|
64
72
|
const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
|
|
65
73
|
if (datasetResult.ok && datasetResult.data.schema) {
|
|
@@ -69,7 +77,12 @@ function createFileParseContextDefinition(params) {
|
|
|
69
77
|
datasetId,
|
|
70
78
|
fileId,
|
|
71
79
|
instructions,
|
|
72
|
-
sandboxConfig: {
|
|
80
|
+
sandboxConfig: {
|
|
81
|
+
filePath: sandboxFilePath,
|
|
82
|
+
outputPath: initialized.state.outputPath,
|
|
83
|
+
scriptsDir: initialized.state.scriptsDir,
|
|
84
|
+
manifestPath: initialized.state.manifestPath,
|
|
85
|
+
},
|
|
73
86
|
analysis: [],
|
|
74
87
|
schema,
|
|
75
88
|
plan: null,
|
|
@@ -84,6 +97,11 @@ function createFileParseContextDefinition(params) {
|
|
|
84
97
|
fileId,
|
|
85
98
|
instructions,
|
|
86
99
|
sandboxId,
|
|
100
|
+
executionId,
|
|
101
|
+
sourceEventId,
|
|
102
|
+
sourcePartIndex,
|
|
103
|
+
filename: previous?.filename ?? params.filename,
|
|
104
|
+
mediaType: previous?.mediaType ?? params.mediaType,
|
|
87
105
|
sandboxState: initialized.state,
|
|
88
106
|
filePreview,
|
|
89
107
|
ctx,
|
|
@@ -109,6 +127,7 @@ function createFileParseContextDefinition(params) {
|
|
|
109
127
|
const datasetId = _stored?.content?.datasetId ?? fallbackDatasetId ?? "";
|
|
110
128
|
const fileId = _stored?.content?.fileId ?? params.fileId ?? "";
|
|
111
129
|
const sandboxId = _stored?.content?.sandboxId ?? params.sandboxId ?? "";
|
|
130
|
+
const outputPath = _stored?.content?.ctx?.sandboxConfig?.outputPath;
|
|
112
131
|
if (!datasetId)
|
|
113
132
|
throw new Error("dataset_id_required");
|
|
114
133
|
if (!fileId)
|
|
@@ -125,6 +144,7 @@ function createFileParseContextDefinition(params) {
|
|
|
125
144
|
datasetId,
|
|
126
145
|
sandboxId,
|
|
127
146
|
runtime,
|
|
147
|
+
outputPath,
|
|
128
148
|
}),
|
|
129
149
|
clearDataset: createClearDatasetTool({
|
|
130
150
|
datasetId,
|
|
@@ -169,6 +189,8 @@ export function createFileParseContext(fileId, opts) {
|
|
|
169
189
|
sandboxState: opts?.sandboxState,
|
|
170
190
|
filePreview: opts?.filePreview,
|
|
171
191
|
schema: opts?.schema,
|
|
192
|
+
filename: opts?.filename,
|
|
193
|
+
mediaType: opts?.mediaType,
|
|
172
194
|
};
|
|
173
195
|
const { context } = createFileParseContextDefinition(params);
|
|
174
196
|
return {
|
|
@@ -185,9 +207,19 @@ export function createFileParseContext(fileId, opts) {
|
|
|
185
207
|
type: "text",
|
|
186
208
|
text: options.prompt ?? "generate a dataset for this file",
|
|
187
209
|
},
|
|
210
|
+
{
|
|
211
|
+
type: "file",
|
|
212
|
+
fileId,
|
|
213
|
+
filename: opts?.filename ?? "source-file",
|
|
214
|
+
mediaType: opts?.mediaType ?? "application/octet-stream",
|
|
215
|
+
},
|
|
188
216
|
],
|
|
189
217
|
},
|
|
190
218
|
};
|
|
219
|
+
params.sourceEventId = triggerEvent.id;
|
|
220
|
+
params.sourcePartIndex = 1;
|
|
221
|
+
params.filename = opts?.filename ?? "source-file";
|
|
222
|
+
params.mediaType = opts?.mediaType ?? "application/octet-stream";
|
|
191
223
|
const shell = await context.react(triggerEvent, {
|
|
192
224
|
runtime: runtime,
|
|
193
225
|
context: { key: `dataset:${datasetId}` },
|
|
@@ -203,6 +235,10 @@ export function createFileParseContext(fileId, opts) {
|
|
|
203
235
|
...(options.initialContent ?? {}),
|
|
204
236
|
datasetId,
|
|
205
237
|
fileId,
|
|
238
|
+
sourceEventId: triggerEvent.id,
|
|
239
|
+
sourcePartIndex: 1,
|
|
240
|
+
filename: opts?.filename ?? "source-file",
|
|
241
|
+
mediaType: opts?.mediaType ?? "application/octet-stream",
|
|
206
242
|
instructions: opts?.instructions ?? "",
|
|
207
243
|
sandboxId: opts?.sandboxId ?? "",
|
|
208
244
|
sandboxState: opts?.sandboxState ?? { initialized: false, filePath: "" },
|
|
@@ -3,8 +3,14 @@ import type { FilePreviewContext } from "./filepreview.types.js";
|
|
|
3
3
|
export declare function initializeFileParseSandboxStep(params: {
|
|
4
4
|
runtime: any;
|
|
5
5
|
sandboxId: string;
|
|
6
|
+
contextId: string;
|
|
7
|
+
executionId: string;
|
|
6
8
|
datasetId: string;
|
|
7
9
|
fileId: string;
|
|
10
|
+
sourceEventId?: string;
|
|
11
|
+
sourcePartIndex?: number;
|
|
12
|
+
filename?: string;
|
|
13
|
+
mediaType?: string;
|
|
8
14
|
state: SandboxState;
|
|
9
15
|
}): Promise<{
|
|
10
16
|
filePath: string;
|
|
@@ -1,42 +1,39 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { DATASET_OUTPUT_FILE_NAME } from "../datasetFiles.js";
|
|
2
|
+
import { prepareContextExecutionWorkspaceStep } from "../contextWorkspace.js";
|
|
3
3
|
import { buildFileDatasetPrompt } from "./prompts.js";
|
|
4
4
|
import { generateFilePreview } from "./filepreview.js";
|
|
5
|
-
import { readInstantFileStep } from "./steps.js";
|
|
6
5
|
export async function initializeFileParseSandboxStep(params) {
|
|
7
6
|
"use step";
|
|
8
7
|
if (params.state.initialized) {
|
|
9
8
|
return { filePath: params.state.filePath, state: params.state };
|
|
10
9
|
}
|
|
11
|
-
console.log(`[FileParseContext ${params.datasetId}] Preparing
|
|
12
|
-
|
|
13
|
-
const file = await readInstantFileStep({ runtime: params.runtime, fileId: params.fileId });
|
|
14
|
-
console.log(`[FileParseContext ${params.datasetId}] Creating dataset workstation...`);
|
|
15
|
-
const workstation = getDatasetWorkstation(params.datasetId);
|
|
16
|
-
await runDatasetSandboxCommandStep({
|
|
17
|
-
runtime: params.runtime,
|
|
18
|
-
sandboxId: params.sandboxId,
|
|
19
|
-
cmd: "mkdir",
|
|
20
|
-
args: ["-p", ...getDatasetStandardDirs(params.datasetId)],
|
|
21
|
-
});
|
|
22
|
-
const fileName = file.contentDisposition ?? "";
|
|
23
|
-
const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
|
|
24
|
-
const sandboxFilePath = `${getDatasetSourcesDir(params.datasetId)}/${params.fileId}${fileExtension}`;
|
|
25
|
-
await writeDatasetSandboxFilesStep({
|
|
10
|
+
console.log(`[FileParseContext ${params.datasetId}] Preparing context execution workspace...`);
|
|
11
|
+
const workspace = await prepareContextExecutionWorkspaceStep({
|
|
26
12
|
runtime: params.runtime,
|
|
27
13
|
sandboxId: params.sandboxId,
|
|
14
|
+
contextId: params.contextId,
|
|
15
|
+
executionId: params.executionId,
|
|
28
16
|
files: [
|
|
29
17
|
{
|
|
30
|
-
|
|
31
|
-
|
|
18
|
+
fileId: params.fileId,
|
|
19
|
+
filename: params.filename,
|
|
20
|
+
mediaType: params.mediaType,
|
|
21
|
+
sourceEventId: params.sourceEventId,
|
|
22
|
+
sourcePartIndex: params.sourcePartIndex,
|
|
32
23
|
},
|
|
33
24
|
],
|
|
34
25
|
});
|
|
35
|
-
|
|
26
|
+
const sandboxFilePath = workspace.files[0]?.path ?? "";
|
|
27
|
+
if (!sandboxFilePath)
|
|
28
|
+
throw new Error("dataset_workspace_file_missing");
|
|
29
|
+
console.log(`[FileParseContext ${params.datasetId}] Context workspace created: ${workspace.root}`);
|
|
36
30
|
console.log(`[FileParseContext ${params.datasetId}] File saved: ${sandboxFilePath}`);
|
|
37
31
|
const state = {
|
|
38
32
|
initialized: true,
|
|
39
33
|
filePath: sandboxFilePath,
|
|
34
|
+
outputPath: `${workspace.outputDir}/${DATASET_OUTPUT_FILE_NAME}`,
|
|
35
|
+
scriptsDir: workspace.scriptsDir,
|
|
36
|
+
manifestPath: workspace.manifestPath,
|
|
40
37
|
};
|
|
41
38
|
return { filePath: sandboxFilePath, state };
|
|
42
39
|
}
|
|
@@ -3,6 +3,9 @@ import type { FilePreviewContext } from "./filepreview.types.js";
|
|
|
3
3
|
export type SandboxState = {
|
|
4
4
|
initialized: boolean;
|
|
5
5
|
filePath: string;
|
|
6
|
+
outputPath?: string;
|
|
7
|
+
scriptsDir?: string;
|
|
8
|
+
manifestPath?: string;
|
|
6
9
|
};
|
|
7
10
|
export type FileParseContext = {
|
|
8
11
|
datasetId: string;
|
|
@@ -10,6 +13,9 @@ export type FileParseContext = {
|
|
|
10
13
|
instructions: string;
|
|
11
14
|
sandboxConfig: {
|
|
12
15
|
filePath: string;
|
|
16
|
+
outputPath?: string;
|
|
17
|
+
scriptsDir?: string;
|
|
18
|
+
manifestPath?: string;
|
|
13
19
|
};
|
|
14
20
|
analysis: any[];
|
|
15
21
|
schema: any | null;
|
|
@@ -29,6 +35,10 @@ export type FileParseContextParams = {
|
|
|
29
35
|
sandboxState?: SandboxState;
|
|
30
36
|
filePreview?: FilePreviewContext;
|
|
31
37
|
schema?: any | null;
|
|
38
|
+
sourceEventId?: string;
|
|
39
|
+
sourcePartIndex?: number;
|
|
40
|
+
filename?: string;
|
|
41
|
+
mediaType?: string;
|
|
32
42
|
};
|
|
33
43
|
export type FileParseRunOptions = {
|
|
34
44
|
prompt?: string;
|
package/dist/file/prompts.js
CHANGED
|
@@ -223,8 +223,10 @@ function buildSchemaSection(context) {
|
|
|
223
223
|
return xml.end({ prettyPrint: true, headless: true });
|
|
224
224
|
}
|
|
225
225
|
function buildInstructions(context) {
|
|
226
|
-
const datasetWorkstation =
|
|
227
|
-
|
|
226
|
+
const datasetWorkstation = context.sandboxConfig.scriptsDir
|
|
227
|
+
? context.sandboxConfig.scriptsDir.replace(/\/scripts$/, "")
|
|
228
|
+
: getDatasetWorkstation(context.datasetId);
|
|
229
|
+
const outputPath = context.sandboxConfig.outputPath ?? getDatasetOutputPath(context.datasetId);
|
|
228
230
|
const hasProvidedSchema = Boolean(context.schema?.schema);
|
|
229
231
|
const currentTask = hasProvidedSchema
|
|
230
232
|
? "Review FilePreview section, use the provided schema as the output contract, then parse the file and generate the dataset"
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
package/dist/service.d.ts
CHANGED
package/dist/service.js
CHANGED
|
@@ -308,7 +308,7 @@ export class DatasetService {
|
|
|
308
308
|
}
|
|
309
309
|
async uploadDatasetOutputFile(params) {
|
|
310
310
|
try {
|
|
311
|
-
const storagePath = `/dataset/${params.datasetId}/output.jsonl`;
|
|
311
|
+
const storagePath = params.storagePath ?? `/dataset/${params.datasetId}/output.jsonl`;
|
|
312
312
|
const uploadResult = await this.db.storage.uploadFile(storagePath, params.fileBuffer, {
|
|
313
313
|
contentType: "application/x-ndjson",
|
|
314
314
|
contentDisposition: "output.jsonl",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.83-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.83-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.83-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.83-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|
|
@@ -80,10 +80,10 @@
|
|
|
80
80
|
"@ekairos/openai-reactor": "workspace:*",
|
|
81
81
|
"@ekairos/tsconfig": "workspace:*",
|
|
82
82
|
"@types/node": "^24.5.0",
|
|
83
|
-
"@workflow/serde": "5.0.0-beta.
|
|
84
|
-
"@workflow/vitest": "5.0.0-beta.
|
|
83
|
+
"@workflow/serde": "5.0.0-beta.1",
|
|
84
|
+
"@workflow/vitest": "5.0.0-beta.5",
|
|
85
85
|
"dotenv": "^17.2.3",
|
|
86
86
|
"typescript": "^5.9.2",
|
|
87
|
-
"workflow": "5.0.0-beta.
|
|
87
|
+
"workflow": "5.0.0-beta.5"
|
|
88
88
|
}
|
|
89
89
|
}
|