@ekairos/dataset 1.22.39-beta.development.0 → 1.22.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +347 -0
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/context.d.ts +15 -0
- package/dist/builder/context.js +251 -0
- package/dist/builder/instructions.d.ts +5 -0
- package/dist/builder/instructions.js +40 -0
- package/dist/builder/materialize.d.ts +83 -0
- package/dist/builder/materialize.js +548 -0
- package/dist/builder/materializeQuery.d.ts +12 -0
- package/dist/builder/materializeQuery.js +31 -0
- package/dist/builder/persistence.d.ts +22 -0
- package/dist/builder/persistence.js +192 -0
- package/dist/builder/rows.d.ts +7 -0
- package/dist/builder/rows.js +56 -0
- package/dist/builder/schemaInference.d.ts +3 -0
- package/dist/builder/schemaInference.js +61 -0
- package/dist/builder/types.d.ts +144 -0
- package/dist/builder/types.js +1 -0
- package/dist/clearDataset.tool.d.ts +2 -3
- package/dist/clearDataset.tool.js +13 -17
- package/dist/completeDataset.steps.d.ts +117 -0
- package/dist/completeDataset.steps.js +537 -0
- package/dist/completeDataset.tool.d.ts +132 -7
- package/dist/completeDataset.tool.js +46 -192
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +79 -0
- package/dist/contextWorkspace.js +234 -0
- package/dist/dataset/steps.d.ts +39 -15
- package/dist/dataset/steps.js +96 -39
- package/dist/dataset.d.ts +3 -67
- package/dist/dataset.js +129 -521
- package/dist/datasetFiles.d.ts +5 -1
- package/dist/datasetFiles.js +29 -27
- package/dist/defineNotation.tool.d.ts +49 -0
- package/dist/defineNotation.tool.js +154 -0
- package/dist/domain.d.ts +1 -2
- package/dist/domain.js +1 -6
- package/dist/executeCommand.tool.d.ts +2 -30
- package/dist/executeCommand.tool.js +165 -39
- package/dist/file/file-dataset.agent.d.ts +19 -56
- package/dist/file/file-dataset.agent.js +181 -134
- package/dist/file/file-dataset.steps.d.ts +27 -0
- package/dist/file/file-dataset.steps.js +47 -0
- package/dist/file/file-dataset.types.d.ts +64 -0
- package/dist/file/file-dataset.types.js +1 -0
- package/dist/file/filepreview.d.ts +5 -35
- package/dist/file/filepreview.js +60 -107
- package/dist/file/filepreview.types.d.ts +31 -0
- package/dist/file/filepreview.types.js +1 -0
- package/dist/file/generateSchema.tool.d.ts +2 -3
- package/dist/file/generateSchema.tool.js +11 -15
- package/dist/file/index.d.ts +1 -2
- package/dist/file/index.js +1 -18
- package/dist/file/prompts.d.ts +2 -3
- package/dist/file/prompts.js +152 -32
- package/dist/file/scripts.generated.d.ts +1 -0
- package/dist/file/scripts.generated.js +11 -0
- package/dist/file/steps.d.ts +1 -2
- package/dist/file/steps.js +9 -7
- package/dist/id.d.ts +1 -0
- package/dist/id.js +10 -0
- package/dist/index.d.ts +9 -7
- package/dist/index.js +9 -23
- package/dist/materializeDataset.tool.d.ts +51 -31
- package/dist/materializeDataset.tool.js +81 -65
- package/dist/notation.d.ts +205 -0
- package/dist/notation.js +424 -0
- package/dist/query/index.d.ts +1 -2
- package/dist/query/index.js +1 -18
- package/dist/query/queryDomain.d.ts +3 -4
- package/dist/query/queryDomain.js +3 -40
- package/dist/query/queryDomain.step.d.ts +1 -1
- package/dist/query/queryDomain.step.js +24 -13
- package/dist/sandbox/steps.d.ts +23 -15
- package/dist/sandbox/steps.js +73 -76
- package/dist/sandbox.steps.d.ts +1 -2
- package/dist/sandbox.steps.js +1 -18
- package/dist/schema.d.ts +15 -13
- package/dist/schema.js +27 -37
- package/dist/service.d.ts +12 -5
- package/dist/service.js +88 -15
- package/dist/skill.d.ts +0 -1
- package/dist/skill.js +12 -17
- package/dist/transform/filepreview.d.ts +2 -3
- package/dist/transform/filepreview.js +9 -26
- package/dist/transform/index.d.ts +2 -3
- package/dist/transform/index.js +2 -8
- package/dist/transform/prompts.d.ts +1 -34
- package/dist/transform/prompts.js +66 -46
- package/dist/transform/transform-dataset.agent.d.ts +20 -45
- package/dist/transform/transform-dataset.agent.js +151 -91
- package/dist/transform/transform-dataset.steps.d.ts +30 -0
- package/dist/transform/transform-dataset.steps.js +61 -0
- package/dist/transform/transform-dataset.types.d.ts +95 -0
- package/dist/transform/transform-dataset.types.js +1 -0
- package/dist/transform/transformDataset.d.ts +3 -3
- package/dist/transform/transformDataset.js +15 -18
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +33 -8
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeDataset.tool.d.ts.map +0 -1
- package/dist/completeDataset.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/dataset.d.ts.map +0 -1
- package/dist/dataset.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/eventsReactRuntime.d.ts +0 -22
- package/dist/eventsReactRuntime.d.ts.map +0 -1
- package/dist/eventsReactRuntime.js +0 -29
- package/dist/eventsReactRuntime.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/file-dataset.agent.d.ts.map +0 -1
- package/dist/file/file-dataset.agent.js.map +0 -1
- package/dist/file/filepreview.d.ts.map +0 -1
- package/dist/file/filepreview.js.map +0 -1
- package/dist/file/generateSchema.tool.d.ts.map +0 -1
- package/dist/file/generateSchema.tool.js.map +0 -1
- package/dist/file/index.d.ts.map +0 -1
- package/dist/file/index.js.map +0 -1
- package/dist/file/prompts.d.ts.map +0 -1
- package/dist/file/prompts.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/materializeDataset.tool.d.ts.map +0 -1
- package/dist/materializeDataset.tool.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/queryDomain.d.ts.map +0 -1
- package/dist/query/queryDomain.js.map +0 -1
- package/dist/query/queryDomain.step.d.ts.map +0 -1
- package/dist/query/queryDomain.step.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/sandbox.steps.d.ts.map +0 -1
- package/dist/sandbox.steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/skill.d.ts.map +0 -1
- package/dist/skill.js.map +0 -1
- package/dist/transform/filepreview.d.ts.map +0 -1
- package/dist/transform/filepreview.js.map +0 -1
- package/dist/transform/index.d.ts.map +0 -1
- package/dist/transform/index.js.map +0 -1
- package/dist/transform/prompts.d.ts.map +0 -1
- package/dist/transform/prompts.js.map +0 -1
- package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
- package/dist/transform/transform-dataset.agent.js.map +0 -1
- package/dist/transform/transformDataset.d.ts.map +0 -1
- package/dist/transform/transformDataset.js.map +0 -1
|
@@ -1,56 +1,7 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
export type
|
|
4
|
-
|
|
5
|
-
fileId: string;
|
|
6
|
-
instructions: string;
|
|
7
|
-
sandboxConfig: {
|
|
8
|
-
filePath: string;
|
|
9
|
-
};
|
|
10
|
-
analysis: any[];
|
|
11
|
-
schema: any | null;
|
|
12
|
-
plan: any | null;
|
|
13
|
-
executionResult: any | null;
|
|
14
|
-
errors: string[];
|
|
15
|
-
iterationCount: number;
|
|
16
|
-
filePreview?: FilePreviewContext;
|
|
17
|
-
};
|
|
18
|
-
export type FileParseStoryParams = {
|
|
19
|
-
fileId: string;
|
|
20
|
-
instructions?: string;
|
|
21
|
-
sandboxId?: string;
|
|
22
|
-
datasetId?: string;
|
|
23
|
-
model?: string;
|
|
24
|
-
reactor?: ContextReactor<any, any>;
|
|
25
|
-
};
|
|
26
|
-
export type FileParseStoryBuilder<Env extends {
|
|
27
|
-
orgId: string;
|
|
28
|
-
}> = {
|
|
29
|
-
datasetId: string;
|
|
30
|
-
story: ReturnType<ReturnType<typeof createContext<Env>>["context"]> extends any ? any : any;
|
|
31
|
-
};
|
|
32
|
-
export type DatasetResult = {
|
|
33
|
-
id: string;
|
|
34
|
-
status?: string;
|
|
35
|
-
title?: string;
|
|
36
|
-
schema?: any;
|
|
37
|
-
analysis?: any;
|
|
38
|
-
calculatedTotalRows?: number;
|
|
39
|
-
actualGeneratedRowCount?: number;
|
|
40
|
-
createdAt?: number;
|
|
41
|
-
updatedAt?: number;
|
|
42
|
-
};
|
|
43
|
-
/**
|
|
44
|
-
* Factory (DX-first):
|
|
45
|
-
*
|
|
46
|
-
* Usage:
|
|
47
|
-
* const { datasetId } = await createFileParseStory(fileId, { instructions }).parse(env)
|
|
48
|
-
*
|
|
49
|
-
* - No `db` is accepted/stored (workflow-safe).
|
|
50
|
-
* - All I/O happens in `"use step"` functions via Ekairos runtime (`getContextRuntime(env).db`).
|
|
51
|
-
* - `parse()` is the entrypoint; it calls `story.react(...)` internally.
|
|
52
|
-
*/
|
|
53
|
-
export declare function createFileParseStory<Env extends {
|
|
1
|
+
import { type ContextReactor } from "@ekairos/events";
|
|
2
|
+
import type { FileParseContext, FileParseRunOptions, SandboxState } from "./file-dataset.types.js";
|
|
3
|
+
export type { DatasetResult, FileParseContext, FileParseContextBuilder, FileParseContextParams, FileParseRunOptions, SandboxState, } from "./file-dataset.types.js";
|
|
4
|
+
export declare function createFileParseContext<Env extends {
|
|
54
5
|
orgId: string;
|
|
55
6
|
}>(fileId: string, opts?: {
|
|
56
7
|
instructions?: string;
|
|
@@ -58,11 +9,23 @@ export declare function createFileParseStory<Env extends {
|
|
|
58
9
|
datasetId?: string;
|
|
59
10
|
model?: string;
|
|
60
11
|
reactor?: ContextReactor<any, any>;
|
|
12
|
+
sandboxState?: SandboxState;
|
|
13
|
+
filePreview?: FileParseContext["filePreview"];
|
|
14
|
+
schema?: any | null;
|
|
15
|
+
filename?: string;
|
|
16
|
+
mediaType?: string;
|
|
61
17
|
}): {
|
|
62
18
|
datasetId: string;
|
|
63
|
-
parse(
|
|
19
|
+
parse(runtime: {
|
|
20
|
+
env: Env;
|
|
21
|
+
}, options?: FileParseRunOptions): Promise<{
|
|
64
22
|
datasetId: string;
|
|
65
23
|
}>;
|
|
66
|
-
|
|
24
|
+
context: any;
|
|
67
25
|
};
|
|
68
|
-
|
|
26
|
+
export declare function registerFileParseContext<Env extends {
|
|
27
|
+
orgId: string;
|
|
28
|
+
}>(opts?: {
|
|
29
|
+
model?: string;
|
|
30
|
+
reactor?: ContextReactor<any, any>;
|
|
31
|
+
}): void;
|
|
@@ -1,103 +1,89 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
const eventsReactRuntime_1 = require("../eventsReactRuntime");
|
|
17
|
-
async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
18
|
-
if (state.initialized) {
|
|
19
|
-
return state.filePath;
|
|
1
|
+
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
|
|
2
|
+
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
3
|
+
import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
|
|
4
|
+
import { datasetGetByIdStep } from "../dataset/steps.js";
|
|
5
|
+
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
6
|
+
import { createDefineNotationTool } from "../defineNotation.tool.js";
|
|
7
|
+
import { createGenerateSchemaTool } from "./generateSchema.tool.js";
|
|
8
|
+
import { buildFileDatasetPromptStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
|
|
9
|
+
import { createDatasetId } from "../id.js";
|
|
10
|
+
async function awaitContextRun(run) {
|
|
11
|
+
if (!run)
|
|
12
|
+
return;
|
|
13
|
+
if (run.returnValue) {
|
|
14
|
+
await run.returnValue;
|
|
15
|
+
return;
|
|
20
16
|
}
|
|
21
|
-
|
|
22
|
-
await (0, filepreview_1.ensurePreviewScriptsAvailable)(env, sandboxId);
|
|
23
|
-
console.log(`[FileParseStory ${datasetId}] Installing Python dependencies...`);
|
|
24
|
-
const pipInstall = await (0, steps_1.runDatasetSandboxCommandStep)({
|
|
25
|
-
env,
|
|
26
|
-
sandboxId,
|
|
27
|
-
cmd: "python",
|
|
28
|
-
args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
|
|
29
|
-
});
|
|
30
|
-
const installStderr = pipInstall.stderr;
|
|
31
|
-
if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
|
|
32
|
-
throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
|
|
33
|
-
}
|
|
34
|
-
console.log(`[FileParseStory ${datasetId}] Fetching file from InstantDB...`);
|
|
35
|
-
const file = await (0, steps_2.readInstantFileStep)({ env, fileId });
|
|
36
|
-
console.log(`[FileParseStory ${datasetId}] Creating dataset workstation...`);
|
|
37
|
-
const workstation = (0, datasetFiles_1.getDatasetWorkstation)(datasetId);
|
|
38
|
-
await (0, steps_1.runDatasetSandboxCommandStep)({
|
|
39
|
-
env,
|
|
40
|
-
sandboxId,
|
|
41
|
-
cmd: "mkdir",
|
|
42
|
-
args: ["-p", workstation],
|
|
43
|
-
});
|
|
44
|
-
const fileName = file.contentDisposition ?? "";
|
|
45
|
-
const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
|
|
46
|
-
const sandboxFilePath = `${workstation}/${fileId}${fileExtension}`;
|
|
47
|
-
await (0, steps_1.writeDatasetSandboxFilesStep)({
|
|
48
|
-
env,
|
|
49
|
-
sandboxId,
|
|
50
|
-
files: [
|
|
51
|
-
{
|
|
52
|
-
path: sandboxFilePath,
|
|
53
|
-
contentBase64: file.contentBase64,
|
|
54
|
-
},
|
|
55
|
-
],
|
|
56
|
-
});
|
|
57
|
-
console.log(`[FileParseStory ${datasetId}] ✅ Workstation created: ${workstation}`);
|
|
58
|
-
console.log(`[FileParseStory ${datasetId}] ✅ File saved: ${sandboxFilePath}`);
|
|
59
|
-
state.filePath = sandboxFilePath;
|
|
60
|
-
state.initialized = true;
|
|
61
|
-
return sandboxFilePath;
|
|
17
|
+
await run;
|
|
62
18
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
*
|
|
66
|
-
* Uso:
|
|
67
|
-
* - Crear una instancia con `fileId`, `instructions` y un `sandbox`
|
|
68
|
-
* - Llamar `getDataset()` para crear un dataset nuevo (crea un datasetId interno)
|
|
69
|
-
* - Llamar `followUp(datasetId, feedback)` para iterar el mismo dataset con feedback
|
|
70
|
-
*
|
|
71
|
-
* Internamente corre un Context (`createContext("file.parse")`) que itera hasta que se ejecuta el tool `completeDataset`.
|
|
72
|
-
*/
|
|
73
|
-
function createFileParseStoryDefinition(params) {
|
|
74
|
-
const datasetId = params.datasetId ?? (0, admin_1.id)();
|
|
19
|
+
function createFileParseContextDefinition(params) {
|
|
20
|
+
const fallbackDatasetId = params.datasetId;
|
|
75
21
|
const model = params.model ?? "openai/gpt-5";
|
|
76
|
-
let
|
|
77
|
-
.context(async (stored,
|
|
22
|
+
let contextBuilder = createContext("file.parse")
|
|
23
|
+
.context(async (stored, _env, runtime) => {
|
|
78
24
|
const previous = stored?.content ?? {};
|
|
79
|
-
const sandboxState = previous?.sandboxState ??
|
|
25
|
+
const sandboxState = previous?.sandboxState ??
|
|
26
|
+
params.sandboxState ?? { initialized: false, filePath: "" };
|
|
27
|
+
const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
|
|
28
|
+
const fileId = previous?.fileId ?? params.fileId ?? "";
|
|
29
|
+
const instructions = previous?.instructions ?? params.instructions ?? "";
|
|
80
30
|
const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
|
|
31
|
+
const contextRun = runtime?.__ekairosContextRun ?? {};
|
|
32
|
+
const contextId = String(contextRun.contextId ?? stored?.id ?? "").trim();
|
|
33
|
+
const executionId = String(contextRun.executionId ?? previous?.executionId ?? "").trim();
|
|
34
|
+
const sourceEventId = String(previous?.sourceEventId ?? params.sourceEventId ?? "").trim();
|
|
35
|
+
const sourcePartIndex = typeof previous?.sourcePartIndex === "number"
|
|
36
|
+
? previous.sourcePartIndex
|
|
37
|
+
: typeof params.sourcePartIndex === "number"
|
|
38
|
+
? params.sourcePartIndex
|
|
39
|
+
: 0;
|
|
40
|
+
if (!datasetId) {
|
|
41
|
+
throw new Error("dataset_id_required");
|
|
42
|
+
}
|
|
43
|
+
if (!fileId) {
|
|
44
|
+
throw new Error("dataset_file_id_required");
|
|
45
|
+
}
|
|
81
46
|
if (!sandboxId) {
|
|
82
47
|
throw new Error("dataset_sandbox_required");
|
|
83
48
|
}
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
try {
|
|
87
|
-
filePreview = await (0, filepreview_1.generateFilePreview)(env, sandboxId, sandboxFilePath, datasetId);
|
|
49
|
+
if (!contextId) {
|
|
50
|
+
throw new Error("dataset_context_id_required");
|
|
88
51
|
}
|
|
89
|
-
|
|
90
|
-
|
|
52
|
+
if (!executionId) {
|
|
53
|
+
throw new Error("dataset_execution_id_required");
|
|
91
54
|
}
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
55
|
+
const initialized = sandboxState.initialized && sandboxState.filePath
|
|
56
|
+
? { filePath: sandboxState.filePath, state: sandboxState }
|
|
57
|
+
: await initializeFileParseSandboxStep({
|
|
58
|
+
runtime,
|
|
59
|
+
sandboxId,
|
|
60
|
+
contextId,
|
|
61
|
+
executionId,
|
|
62
|
+
datasetId,
|
|
63
|
+
fileId,
|
|
64
|
+
sourceEventId,
|
|
65
|
+
sourcePartIndex,
|
|
66
|
+
filename: previous?.filename ?? params.filename,
|
|
67
|
+
mediaType: previous?.mediaType ?? params.mediaType,
|
|
68
|
+
state: sandboxState,
|
|
69
|
+
});
|
|
70
|
+
const sandboxFilePath = initialized.filePath;
|
|
71
|
+
let filePreview = previous?.filePreview ?? previous?.ctx?.filePreview ?? params.filePreview;
|
|
72
|
+
let schema = previous?.ctx?.schema ?? previous?.schema ?? params.schema ?? null;
|
|
73
|
+
const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
|
|
74
|
+
if (datasetResult.ok && datasetResult.data.schema) {
|
|
95
75
|
schema = datasetResult.data.schema;
|
|
76
|
+
}
|
|
96
77
|
const ctx = {
|
|
97
78
|
datasetId,
|
|
98
|
-
fileId
|
|
99
|
-
instructions
|
|
100
|
-
sandboxConfig: {
|
|
79
|
+
fileId,
|
|
80
|
+
instructions,
|
|
81
|
+
sandboxConfig: {
|
|
82
|
+
filePath: sandboxFilePath,
|
|
83
|
+
outputPath: initialized.state.outputPath,
|
|
84
|
+
scriptsDir: initialized.state.scriptsDir,
|
|
85
|
+
manifestPath: initialized.state.manifestPath,
|
|
86
|
+
},
|
|
101
87
|
analysis: [],
|
|
102
88
|
schema,
|
|
103
89
|
plan: null,
|
|
@@ -109,16 +95,22 @@ function createFileParseStoryDefinition(params) {
|
|
|
109
95
|
return {
|
|
110
96
|
...previous,
|
|
111
97
|
datasetId,
|
|
112
|
-
fileId
|
|
113
|
-
instructions
|
|
98
|
+
fileId,
|
|
99
|
+
instructions,
|
|
114
100
|
sandboxId,
|
|
115
|
-
|
|
101
|
+
executionId,
|
|
102
|
+
sourceEventId,
|
|
103
|
+
sourcePartIndex,
|
|
104
|
+
filename: previous?.filename ?? params.filename,
|
|
105
|
+
mediaType: previous?.mediaType ?? params.mediaType,
|
|
106
|
+
sandboxState: initialized.state,
|
|
107
|
+
filePreview,
|
|
116
108
|
ctx,
|
|
117
109
|
};
|
|
118
110
|
})
|
|
119
111
|
.narrative(async (stored) => {
|
|
120
112
|
const ctx = stored?.content?.ctx;
|
|
121
|
-
const base = (
|
|
113
|
+
const base = await buildFileDatasetPromptStep({ context: ctx });
|
|
122
114
|
const userInstructions = String(ctx?.instructions ?? "").trim();
|
|
123
115
|
if (!userInstructions)
|
|
124
116
|
return base;
|
|
@@ -131,88 +123,143 @@ function createFileParseStoryDefinition(params) {
|
|
|
131
123
|
base,
|
|
132
124
|
].join("\n");
|
|
133
125
|
})
|
|
134
|
-
.actions(async (_stored,
|
|
126
|
+
.actions(async (_stored, _env, runtime) => {
|
|
135
127
|
const existingSchema = _stored?.content?.ctx?.schema?.schema;
|
|
128
|
+
const datasetId = _stored?.content?.datasetId ?? fallbackDatasetId ?? "";
|
|
129
|
+
const fileId = _stored?.content?.fileId ?? params.fileId ?? "";
|
|
130
|
+
const sandboxId = _stored?.content?.sandboxId ?? params.sandboxId ?? "";
|
|
131
|
+
const outputPath = _stored?.content?.ctx?.sandboxConfig?.outputPath;
|
|
132
|
+
if (!datasetId)
|
|
133
|
+
throw new Error("dataset_id_required");
|
|
134
|
+
if (!fileId)
|
|
135
|
+
throw new Error("dataset_file_id_required");
|
|
136
|
+
if (!sandboxId)
|
|
137
|
+
throw new Error("dataset_sandbox_required");
|
|
136
138
|
const actions = {
|
|
137
|
-
executeCommand:
|
|
139
|
+
executeCommand: createExecuteCommandTool({
|
|
138
140
|
datasetId,
|
|
139
|
-
sandboxId
|
|
140
|
-
|
|
141
|
+
sandboxId,
|
|
142
|
+
runtime,
|
|
141
143
|
}),
|
|
142
|
-
completeDataset:
|
|
144
|
+
completeDataset: createCompleteDatasetTool({
|
|
143
145
|
datasetId,
|
|
144
|
-
sandboxId
|
|
145
|
-
|
|
146
|
+
sandboxId,
|
|
147
|
+
runtime,
|
|
148
|
+
outputPath,
|
|
146
149
|
}),
|
|
147
|
-
clearDataset:
|
|
150
|
+
clearDataset: createClearDatasetTool({
|
|
148
151
|
datasetId,
|
|
149
|
-
sandboxId
|
|
150
|
-
|
|
152
|
+
sandboxId,
|
|
153
|
+
runtime,
|
|
154
|
+
}),
|
|
155
|
+
defineNotation: createDefineNotationTool({
|
|
156
|
+
datasetId,
|
|
157
|
+
runtime,
|
|
151
158
|
}),
|
|
152
159
|
};
|
|
153
160
|
if (!existingSchema) {
|
|
154
|
-
actions.generateSchema =
|
|
161
|
+
actions.generateSchema = createGenerateSchemaTool({
|
|
155
162
|
datasetId,
|
|
156
|
-
fileId
|
|
157
|
-
|
|
163
|
+
fileId,
|
|
164
|
+
runtime,
|
|
158
165
|
});
|
|
159
166
|
}
|
|
160
167
|
return actions;
|
|
161
168
|
})
|
|
162
169
|
.shouldContinue(({ reactionEvent }) => {
|
|
163
|
-
|
|
170
|
+
const fatalFailure = getDatasetFatalFailure(reactionEvent);
|
|
171
|
+
if (fatalFailure) {
|
|
172
|
+
throw new Error(fatalFailure);
|
|
173
|
+
}
|
|
174
|
+
return !didCompleteDatasetSucceed(reactionEvent);
|
|
164
175
|
});
|
|
165
176
|
if (params.reactor) {
|
|
166
|
-
|
|
177
|
+
contextBuilder = contextBuilder.reactor(params.reactor);
|
|
167
178
|
}
|
|
168
179
|
else {
|
|
169
|
-
|
|
180
|
+
contextBuilder = contextBuilder.model(model);
|
|
170
181
|
}
|
|
171
|
-
const
|
|
172
|
-
return { datasetId,
|
|
182
|
+
const context = contextBuilder.build();
|
|
183
|
+
return { datasetId: fallbackDatasetId ?? "", context };
|
|
173
184
|
}
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
*
|
|
177
|
-
* Usage:
|
|
178
|
-
* const { datasetId } = await createFileParseStory(fileId, { instructions }).parse(env)
|
|
179
|
-
*
|
|
180
|
-
* - No `db` is accepted/stored (workflow-safe).
|
|
181
|
-
* - All I/O happens in `"use step"` functions via Ekairos runtime (`getContextRuntime(env).db`).
|
|
182
|
-
* - `parse()` is the entrypoint; it calls `story.react(...)` internally.
|
|
183
|
-
*/
|
|
184
|
-
function createFileParseStory(fileId, opts) {
|
|
185
|
+
export function createFileParseContext(fileId, opts) {
|
|
186
|
+
const datasetId = opts?.datasetId ?? createDatasetId();
|
|
185
187
|
const params = {
|
|
186
188
|
fileId,
|
|
187
189
|
instructions: opts?.instructions,
|
|
188
190
|
sandboxId: opts?.sandboxId,
|
|
189
|
-
datasetId
|
|
191
|
+
datasetId,
|
|
190
192
|
model: opts?.model,
|
|
191
193
|
reactor: opts?.reactor,
|
|
194
|
+
sandboxState: opts?.sandboxState,
|
|
195
|
+
filePreview: opts?.filePreview,
|
|
196
|
+
schema: opts?.schema,
|
|
197
|
+
filename: opts?.filename,
|
|
198
|
+
mediaType: opts?.mediaType,
|
|
192
199
|
};
|
|
193
|
-
const {
|
|
200
|
+
const { context } = createFileParseContextDefinition(params);
|
|
194
201
|
return {
|
|
195
202
|
datasetId,
|
|
196
|
-
async parse(
|
|
203
|
+
async parse(runtime, options = {}) {
|
|
197
204
|
const triggerEvent = {
|
|
198
|
-
id: (
|
|
199
|
-
type:
|
|
200
|
-
channel:
|
|
205
|
+
id: createDatasetId(),
|
|
206
|
+
type: INPUT_TEXT_ITEM_TYPE,
|
|
207
|
+
channel: WEB_CHANNEL,
|
|
201
208
|
createdAt: new Date().toISOString(),
|
|
202
209
|
content: {
|
|
203
|
-
parts: [
|
|
210
|
+
parts: [
|
|
211
|
+
{
|
|
212
|
+
type: "text",
|
|
213
|
+
text: options.prompt ?? "generate a dataset for this file",
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
type: "file",
|
|
217
|
+
fileId,
|
|
218
|
+
filename: opts?.filename ?? "resource-file",
|
|
219
|
+
mediaType: opts?.mediaType ?? "application/octet-stream",
|
|
220
|
+
},
|
|
221
|
+
],
|
|
204
222
|
},
|
|
205
223
|
};
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
224
|
+
params.sourceEventId = triggerEvent.id;
|
|
225
|
+
params.sourcePartIndex = 1;
|
|
226
|
+
params.filename = opts?.filename ?? "resource-file";
|
|
227
|
+
params.mediaType = opts?.mediaType ?? "application/octet-stream";
|
|
228
|
+
const shell = await context.react(triggerEvent, {
|
|
229
|
+
runtime: runtime,
|
|
209
230
|
context: { key: `dataset:${datasetId}` },
|
|
210
|
-
|
|
231
|
+
durable: options.durable ?? false,
|
|
232
|
+
options: {
|
|
233
|
+
preventClose: true,
|
|
234
|
+
sendFinish: false,
|
|
235
|
+
maxIterations: 20,
|
|
236
|
+
maxModelSteps: 5,
|
|
237
|
+
},
|
|
238
|
+
__initialContent: {
|
|
239
|
+
...(options.initialContent ?? {}),
|
|
240
|
+
datasetId,
|
|
241
|
+
fileId,
|
|
242
|
+
sourceEventId: triggerEvent.id,
|
|
243
|
+
sourcePartIndex: 1,
|
|
244
|
+
filename: opts?.filename ?? "resource-file",
|
|
245
|
+
mediaType: opts?.mediaType ?? "application/octet-stream",
|
|
246
|
+
instructions: opts?.instructions ?? "",
|
|
247
|
+
sandboxId: opts?.sandboxId ?? "",
|
|
248
|
+
sandboxState: opts?.sandboxState ?? { initialized: false, filePath: "" },
|
|
249
|
+
filePreview: opts?.filePreview,
|
|
250
|
+
schema: opts?.schema,
|
|
251
|
+
},
|
|
211
252
|
});
|
|
253
|
+
await awaitContextRun(shell.run);
|
|
212
254
|
return { datasetId };
|
|
213
255
|
},
|
|
214
|
-
|
|
215
|
-
story,
|
|
256
|
+
context,
|
|
216
257
|
};
|
|
217
258
|
}
|
|
218
|
-
|
|
259
|
+
export function registerFileParseContext(opts) {
|
|
260
|
+
createFileParseContextDefinition({
|
|
261
|
+
model: opts?.model,
|
|
262
|
+
reactor: opts?.reactor,
|
|
263
|
+
}).context;
|
|
264
|
+
}
|
|
265
|
+
registerFileParseContext();
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { FileParseContext, SandboxState } from "./file-dataset.types.js";
|
|
2
|
+
import type { FilePreviewContext } from "./filepreview.types.js";
|
|
3
|
+
export declare function initializeFileParseSandboxStep(params: {
|
|
4
|
+
runtime: any;
|
|
5
|
+
sandboxId: string;
|
|
6
|
+
contextId: string;
|
|
7
|
+
executionId: string;
|
|
8
|
+
datasetId: string;
|
|
9
|
+
fileId: string;
|
|
10
|
+
sourceEventId?: string;
|
|
11
|
+
sourcePartIndex?: number;
|
|
12
|
+
filename?: string;
|
|
13
|
+
mediaType?: string;
|
|
14
|
+
state: SandboxState;
|
|
15
|
+
}): Promise<{
|
|
16
|
+
filePath: string;
|
|
17
|
+
state: SandboxState;
|
|
18
|
+
}>;
|
|
19
|
+
export declare function generateFileParsePreviewStep(params: {
|
|
20
|
+
runtime: any;
|
|
21
|
+
sandboxId: string;
|
|
22
|
+
sandboxFilePath: string;
|
|
23
|
+
datasetId: string;
|
|
24
|
+
}): Promise<FilePreviewContext>;
|
|
25
|
+
export declare function buildFileDatasetPromptStep(params: {
|
|
26
|
+
context: FileParseContext;
|
|
27
|
+
}): Promise<string>;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { DATASET_OUTPUT_FILE_NAME } from "../datasetFiles.js";
|
|
2
|
+
import { prepareContextExecutionWorkspaceStep } from "../contextWorkspace.js";
|
|
3
|
+
import { buildFileDatasetPrompt } from "./prompts.js";
|
|
4
|
+
import { generateFilePreview } from "./filepreview.js";
|
|
5
|
+
export async function initializeFileParseSandboxStep(params) {
|
|
6
|
+
"use step";
|
|
7
|
+
if (params.state.initialized) {
|
|
8
|
+
return { filePath: params.state.filePath, state: params.state };
|
|
9
|
+
}
|
|
10
|
+
console.log(`[FileParseContext ${params.datasetId}] Preparing context execution workspace...`);
|
|
11
|
+
const workspace = await prepareContextExecutionWorkspaceStep({
|
|
12
|
+
runtime: params.runtime,
|
|
13
|
+
sandboxId: params.sandboxId,
|
|
14
|
+
contextId: params.contextId,
|
|
15
|
+
executionId: params.executionId,
|
|
16
|
+
files: [
|
|
17
|
+
{
|
|
18
|
+
fileId: params.fileId,
|
|
19
|
+
filename: params.filename,
|
|
20
|
+
mediaType: params.mediaType,
|
|
21
|
+
sourceEventId: params.sourceEventId,
|
|
22
|
+
sourcePartIndex: params.sourcePartIndex,
|
|
23
|
+
},
|
|
24
|
+
],
|
|
25
|
+
});
|
|
26
|
+
const sandboxFilePath = workspace.files[0]?.path ?? "";
|
|
27
|
+
if (!sandboxFilePath)
|
|
28
|
+
throw new Error("dataset_workspace_file_missing");
|
|
29
|
+
console.log(`[FileParseContext ${params.datasetId}] Context workspace created: ${workspace.root}`);
|
|
30
|
+
console.log(`[FileParseContext ${params.datasetId}] File saved: ${sandboxFilePath}`);
|
|
31
|
+
const state = {
|
|
32
|
+
initialized: true,
|
|
33
|
+
filePath: sandboxFilePath,
|
|
34
|
+
outputPath: `${workspace.outputDir}/${DATASET_OUTPUT_FILE_NAME}`,
|
|
35
|
+
scriptsDir: workspace.scriptsDir,
|
|
36
|
+
manifestPath: workspace.manifestPath,
|
|
37
|
+
};
|
|
38
|
+
return { filePath: sandboxFilePath, state };
|
|
39
|
+
}
|
|
40
|
+
export async function generateFileParsePreviewStep(params) {
|
|
41
|
+
"use step";
|
|
42
|
+
return await generateFilePreview(params.runtime, params.sandboxId, params.sandboxFilePath, params.datasetId);
|
|
43
|
+
}
|
|
44
|
+
export async function buildFileDatasetPromptStep(params) {
|
|
45
|
+
"use step";
|
|
46
|
+
return buildFileDatasetPrompt(params.context);
|
|
47
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import type { ContextReactor } from "@ekairos/events";
|
|
2
|
+
import type { FilePreviewContext } from "./filepreview.types.js";
|
|
3
|
+
export type SandboxState = {
|
|
4
|
+
initialized: boolean;
|
|
5
|
+
filePath: string;
|
|
6
|
+
outputPath?: string;
|
|
7
|
+
scriptsDir?: string;
|
|
8
|
+
manifestPath?: string;
|
|
9
|
+
};
|
|
10
|
+
export type FileParseContext = {
|
|
11
|
+
datasetId: string;
|
|
12
|
+
fileId: string;
|
|
13
|
+
instructions: string;
|
|
14
|
+
sandboxConfig: {
|
|
15
|
+
filePath: string;
|
|
16
|
+
outputPath?: string;
|
|
17
|
+
scriptsDir?: string;
|
|
18
|
+
manifestPath?: string;
|
|
19
|
+
};
|
|
20
|
+
analysis: any[];
|
|
21
|
+
schema: any | null;
|
|
22
|
+
plan: any | null;
|
|
23
|
+
executionResult: any | null;
|
|
24
|
+
errors: string[];
|
|
25
|
+
iterationCount: number;
|
|
26
|
+
filePreview?: FilePreviewContext;
|
|
27
|
+
};
|
|
28
|
+
export type FileParseContextParams = {
|
|
29
|
+
fileId?: string;
|
|
30
|
+
instructions?: string;
|
|
31
|
+
sandboxId?: string;
|
|
32
|
+
datasetId?: string;
|
|
33
|
+
model?: string;
|
|
34
|
+
reactor?: ContextReactor<any, any>;
|
|
35
|
+
sandboxState?: SandboxState;
|
|
36
|
+
filePreview?: FilePreviewContext;
|
|
37
|
+
schema?: any | null;
|
|
38
|
+
sourceEventId?: string;
|
|
39
|
+
sourcePartIndex?: number;
|
|
40
|
+
filename?: string;
|
|
41
|
+
mediaType?: string;
|
|
42
|
+
};
|
|
43
|
+
export type FileParseRunOptions = {
|
|
44
|
+
prompt?: string;
|
|
45
|
+
durable?: boolean;
|
|
46
|
+
initialContent?: Record<string, any>;
|
|
47
|
+
};
|
|
48
|
+
export type FileParseContextBuilder<Env extends {
|
|
49
|
+
orgId: string;
|
|
50
|
+
}> = {
|
|
51
|
+
datasetId: string;
|
|
52
|
+
context: any;
|
|
53
|
+
};
|
|
54
|
+
export type DatasetResult = {
|
|
55
|
+
id: string;
|
|
56
|
+
status?: string;
|
|
57
|
+
title?: string;
|
|
58
|
+
schema?: any;
|
|
59
|
+
analysis?: any;
|
|
60
|
+
calculatedTotalRows?: number;
|
|
61
|
+
actualGeneratedRowCount?: number;
|
|
62
|
+
createdAt?: number;
|
|
63
|
+
updatedAt?: number;
|
|
64
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -1,40 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
metadata?: {
|
|
4
|
-
description: string;
|
|
5
|
-
script: string;
|
|
6
|
-
command: string;
|
|
7
|
-
stdout: string;
|
|
8
|
-
stderr: string;
|
|
9
|
-
};
|
|
10
|
-
head?: {
|
|
11
|
-
description: string;
|
|
12
|
-
script: string;
|
|
13
|
-
command: string;
|
|
14
|
-
stdout: string;
|
|
15
|
-
stderr: string;
|
|
16
|
-
};
|
|
17
|
-
tail?: {
|
|
18
|
-
description: string;
|
|
19
|
-
script: string;
|
|
20
|
-
command: string;
|
|
21
|
-
stdout: string;
|
|
22
|
-
stderr: string;
|
|
23
|
-
};
|
|
24
|
-
mid?: {
|
|
25
|
-
description: string;
|
|
26
|
-
script: string;
|
|
27
|
-
command: string;
|
|
28
|
-
stdout: string;
|
|
29
|
-
stderr: string;
|
|
30
|
-
};
|
|
31
|
-
};
|
|
1
|
+
import type { FilePreviewContext } from "./filepreview.types.js";
|
|
2
|
+
export type { FilePreviewContext } from "./filepreview.types.js";
|
|
32
3
|
interface PreviewOptions {
|
|
33
4
|
headLines?: number;
|
|
34
5
|
tailLines?: number;
|
|
35
6
|
midLines?: number;
|
|
36
7
|
}
|
|
37
|
-
export declare function
|
|
38
|
-
export declare function
|
|
39
|
-
export
|
|
40
|
-
//# sourceMappingURL=filepreview.d.ts.map
|
|
8
|
+
export declare function getEmbeddedFilePreviewScriptBase64(scriptName: string): string;
|
|
9
|
+
export declare function ensurePreviewScriptsAvailable(_runtime: any, _sandboxId: string): Promise<void>;
|
|
10
|
+
export declare function generateFilePreview(runtime: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
|