@ekairos/dataset 1.22.34-beta.development.0 → 1.22.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +347 -0
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/context.d.ts +15 -0
- package/dist/builder/context.js +251 -0
- package/dist/builder/instructions.d.ts +5 -0
- package/dist/builder/instructions.js +40 -0
- package/dist/builder/materialize.d.ts +83 -0
- package/dist/builder/materialize.js +548 -0
- package/dist/builder/materializeQuery.d.ts +12 -0
- package/dist/builder/materializeQuery.js +31 -0
- package/dist/builder/persistence.d.ts +22 -0
- package/dist/builder/persistence.js +153 -0
- package/dist/builder/rows.d.ts +7 -0
- package/dist/builder/rows.js +56 -0
- package/dist/builder/schemaInference.d.ts +3 -0
- package/dist/builder/schemaInference.js +61 -0
- package/dist/builder/types.d.ts +140 -0
- package/dist/builder/types.js +1 -0
- package/dist/clearDataset.tool.d.ts +2 -3
- package/dist/clearDataset.tool.js +13 -17
- package/dist/completeDataset.steps.d.ts +117 -0
- package/dist/completeDataset.steps.js +487 -0
- package/dist/completeDataset.tool.d.ts +132 -7
- package/dist/completeDataset.tool.js +46 -192
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +79 -0
- package/dist/contextWorkspace.js +234 -0
- package/dist/dataset/steps.d.ts +39 -15
- package/dist/dataset/steps.js +96 -39
- package/dist/dataset.d.ts +3 -67
- package/dist/dataset.js +129 -520
- package/dist/datasetFiles.d.ts +5 -1
- package/dist/datasetFiles.js +29 -27
- package/dist/domain.d.ts +1 -2
- package/dist/domain.js +1 -6
- package/dist/executeCommand.tool.d.ts +2 -30
- package/dist/executeCommand.tool.js +165 -39
- package/dist/file/file-dataset.agent.d.ts +19 -56
- package/dist/file/file-dataset.agent.js +176 -132
- package/dist/file/file-dataset.steps.d.ts +27 -0
- package/dist/file/file-dataset.steps.js +47 -0
- package/dist/file/file-dataset.types.d.ts +64 -0
- package/dist/file/file-dataset.types.js +1 -0
- package/dist/file/filepreview.d.ts +5 -35
- package/dist/file/filepreview.js +60 -107
- package/dist/file/filepreview.types.d.ts +31 -0
- package/dist/file/filepreview.types.js +1 -0
- package/dist/file/generateSchema.tool.d.ts +2 -3
- package/dist/file/generateSchema.tool.js +11 -15
- package/dist/file/index.d.ts +1 -2
- package/dist/file/index.js +1 -18
- package/dist/file/prompts.d.ts +2 -3
- package/dist/file/prompts.js +134 -27
- package/dist/file/scripts.generated.d.ts +1 -0
- package/dist/file/scripts.generated.js +11 -0
- package/dist/file/steps.d.ts +1 -2
- package/dist/file/steps.js +9 -7
- package/dist/id.d.ts +1 -0
- package/dist/id.js +10 -0
- package/dist/index.d.ts +8 -7
- package/dist/index.js +8 -23
- package/dist/materializeDataset.tool.d.ts +52 -32
- package/dist/materializeDataset.tool.js +81 -65
- package/dist/query/index.d.ts +1 -2
- package/dist/query/index.js +1 -18
- package/dist/query/queryDomain.d.ts +3 -4
- package/dist/query/queryDomain.js +3 -40
- package/dist/query/queryDomain.step.d.ts +1 -1
- package/dist/query/queryDomain.step.js +13 -13
- package/dist/sandbox/steps.d.ts +23 -15
- package/dist/sandbox/steps.js +73 -76
- package/dist/sandbox.steps.d.ts +1 -2
- package/dist/sandbox.steps.js +1 -18
- package/dist/schema.d.ts +13 -13
- package/dist/schema.js +25 -37
- package/dist/service.d.ts +8 -5
- package/dist/service.js +70 -15
- package/dist/skill.d.ts +0 -1
- package/dist/skill.js +12 -17
- package/dist/transform/filepreview.d.ts +2 -3
- package/dist/transform/filepreview.js +9 -26
- package/dist/transform/index.d.ts +2 -3
- package/dist/transform/index.js +2 -8
- package/dist/transform/prompts.d.ts +1 -34
- package/dist/transform/prompts.js +58 -43
- package/dist/transform/transform-dataset.agent.d.ts +20 -45
- package/dist/transform/transform-dataset.agent.js +146 -89
- package/dist/transform/transform-dataset.steps.d.ts +30 -0
- package/dist/transform/transform-dataset.steps.js +61 -0
- package/dist/transform/transform-dataset.types.d.ts +95 -0
- package/dist/transform/transform-dataset.types.js +1 -0
- package/dist/transform/transformDataset.d.ts +3 -3
- package/dist/transform/transformDataset.js +15 -18
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +36 -11
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeDataset.tool.d.ts.map +0 -1
- package/dist/completeDataset.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/dataset.d.ts.map +0 -1
- package/dist/dataset.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/file-dataset.agent.d.ts.map +0 -1
- package/dist/file/file-dataset.agent.js.map +0 -1
- package/dist/file/filepreview.d.ts.map +0 -1
- package/dist/file/filepreview.js.map +0 -1
- package/dist/file/generateSchema.tool.d.ts.map +0 -1
- package/dist/file/generateSchema.tool.js.map +0 -1
- package/dist/file/index.d.ts.map +0 -1
- package/dist/file/index.js.map +0 -1
- package/dist/file/prompts.d.ts.map +0 -1
- package/dist/file/prompts.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/materializeDataset.tool.d.ts.map +0 -1
- package/dist/materializeDataset.tool.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/queryDomain.d.ts.map +0 -1
- package/dist/query/queryDomain.js.map +0 -1
- package/dist/query/queryDomain.step.d.ts.map +0 -1
- package/dist/query/queryDomain.step.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/sandbox.steps.d.ts.map +0 -1
- package/dist/sandbox.steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/skill.d.ts.map +0 -1
- package/dist/skill.js.map +0 -1
- package/dist/transform/filepreview.d.ts.map +0 -1
- package/dist/transform/filepreview.js.map +0 -1
- package/dist/transform/index.d.ts.map +0 -1
- package/dist/transform/index.js.map +0 -1
- package/dist/transform/prompts.d.ts.map +0 -1
- package/dist/transform/prompts.js.map +0 -1
- package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
- package/dist/transform/transform-dataset.agent.js.map +0 -1
- package/dist/transform/transformDataset.d.ts.map +0 -1
- package/dist/transform/transformDataset.js.map +0 -1
|
@@ -1,102 +1,88 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
const steps_3 = require("../dataset/steps");
|
|
16
|
-
async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
17
|
-
if (state.initialized) {
|
|
18
|
-
return state.filePath;
|
|
1
|
+
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
|
|
2
|
+
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
3
|
+
import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
|
|
4
|
+
import { datasetGetByIdStep } from "../dataset/steps.js";
|
|
5
|
+
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
6
|
+
import { createGenerateSchemaTool } from "./generateSchema.tool.js";
|
|
7
|
+
import { buildFileDatasetPromptStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
|
|
8
|
+
import { createDatasetId } from "../id.js";
|
|
9
|
+
async function awaitContextRun(run) {
|
|
10
|
+
if (!run)
|
|
11
|
+
return;
|
|
12
|
+
if (run.returnValue) {
|
|
13
|
+
await run.returnValue;
|
|
14
|
+
return;
|
|
19
15
|
}
|
|
20
|
-
|
|
21
|
-
await (0, filepreview_1.ensurePreviewScriptsAvailable)(env, sandboxId);
|
|
22
|
-
console.log(`[FileParseStory ${datasetId}] Installing Python dependencies...`);
|
|
23
|
-
const pipInstall = await (0, steps_1.runDatasetSandboxCommandStep)({
|
|
24
|
-
env,
|
|
25
|
-
sandboxId,
|
|
26
|
-
cmd: "python",
|
|
27
|
-
args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
|
|
28
|
-
});
|
|
29
|
-
const installStderr = pipInstall.stderr;
|
|
30
|
-
if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
|
|
31
|
-
throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
|
|
32
|
-
}
|
|
33
|
-
console.log(`[FileParseStory ${datasetId}] Fetching file from InstantDB...`);
|
|
34
|
-
const file = await (0, steps_2.readInstantFileStep)({ env, fileId });
|
|
35
|
-
console.log(`[FileParseStory ${datasetId}] Creating dataset workstation...`);
|
|
36
|
-
const workstation = (0, datasetFiles_1.getDatasetWorkstation)(datasetId);
|
|
37
|
-
await (0, steps_1.runDatasetSandboxCommandStep)({
|
|
38
|
-
env,
|
|
39
|
-
sandboxId,
|
|
40
|
-
cmd: "mkdir",
|
|
41
|
-
args: ["-p", workstation],
|
|
42
|
-
});
|
|
43
|
-
const fileName = file.contentDisposition ?? "";
|
|
44
|
-
const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
|
|
45
|
-
const sandboxFilePath = `${workstation}/${fileId}${fileExtension}`;
|
|
46
|
-
await (0, steps_1.writeDatasetSandboxFilesStep)({
|
|
47
|
-
env,
|
|
48
|
-
sandboxId,
|
|
49
|
-
files: [
|
|
50
|
-
{
|
|
51
|
-
path: sandboxFilePath,
|
|
52
|
-
contentBase64: file.contentBase64,
|
|
53
|
-
},
|
|
54
|
-
],
|
|
55
|
-
});
|
|
56
|
-
console.log(`[FileParseStory ${datasetId}] ✅ Workstation created: ${workstation}`);
|
|
57
|
-
console.log(`[FileParseStory ${datasetId}] ✅ File saved: ${sandboxFilePath}`);
|
|
58
|
-
state.filePath = sandboxFilePath;
|
|
59
|
-
state.initialized = true;
|
|
60
|
-
return sandboxFilePath;
|
|
16
|
+
await run;
|
|
61
17
|
}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
*
|
|
65
|
-
* Uso:
|
|
66
|
-
* - Crear una instancia con `fileId`, `instructions` y un `sandbox`
|
|
67
|
-
* - Llamar `getDataset()` para crear un dataset nuevo (crea un datasetId interno)
|
|
68
|
-
* - Llamar `followUp(datasetId, feedback)` para iterar el mismo dataset con feedback
|
|
69
|
-
*
|
|
70
|
-
* Internamente corre un Context (`createContext("file.parse")`) que itera hasta que se ejecuta el tool `completeDataset`.
|
|
71
|
-
*/
|
|
72
|
-
function createFileParseStoryDefinition(params) {
|
|
73
|
-
const datasetId = params.datasetId ?? (0, admin_1.id)();
|
|
18
|
+
function createFileParseContextDefinition(params) {
|
|
19
|
+
const fallbackDatasetId = params.datasetId;
|
|
74
20
|
const model = params.model ?? "openai/gpt-5";
|
|
75
|
-
let
|
|
76
|
-
.context(async (stored,
|
|
21
|
+
let contextBuilder = createContext("file.parse")
|
|
22
|
+
.context(async (stored, _env, runtime) => {
|
|
77
23
|
const previous = stored?.content ?? {};
|
|
78
|
-
const sandboxState = previous?.sandboxState ??
|
|
24
|
+
const sandboxState = previous?.sandboxState ??
|
|
25
|
+
params.sandboxState ?? { initialized: false, filePath: "" };
|
|
26
|
+
const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
|
|
27
|
+
const fileId = previous?.fileId ?? params.fileId ?? "";
|
|
28
|
+
const instructions = previous?.instructions ?? params.instructions ?? "";
|
|
79
29
|
const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
|
|
30
|
+
const contextRun = runtime?.__ekairosContextRun ?? {};
|
|
31
|
+
const contextId = String(contextRun.contextId ?? stored?.id ?? "").trim();
|
|
32
|
+
const executionId = String(contextRun.executionId ?? previous?.executionId ?? "").trim();
|
|
33
|
+
const sourceEventId = String(previous?.sourceEventId ?? params.sourceEventId ?? "").trim();
|
|
34
|
+
const sourcePartIndex = typeof previous?.sourcePartIndex === "number"
|
|
35
|
+
? previous.sourcePartIndex
|
|
36
|
+
: typeof params.sourcePartIndex === "number"
|
|
37
|
+
? params.sourcePartIndex
|
|
38
|
+
: 0;
|
|
39
|
+
if (!datasetId) {
|
|
40
|
+
throw new Error("dataset_id_required");
|
|
41
|
+
}
|
|
42
|
+
if (!fileId) {
|
|
43
|
+
throw new Error("dataset_file_id_required");
|
|
44
|
+
}
|
|
80
45
|
if (!sandboxId) {
|
|
81
46
|
throw new Error("dataset_sandbox_required");
|
|
82
47
|
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
try {
|
|
86
|
-
filePreview = await (0, filepreview_1.generateFilePreview)(env, sandboxId, sandboxFilePath, datasetId);
|
|
48
|
+
if (!contextId) {
|
|
49
|
+
throw new Error("dataset_context_id_required");
|
|
87
50
|
}
|
|
88
|
-
|
|
89
|
-
|
|
51
|
+
if (!executionId) {
|
|
52
|
+
throw new Error("dataset_execution_id_required");
|
|
90
53
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
54
|
+
const initialized = sandboxState.initialized && sandboxState.filePath
|
|
55
|
+
? { filePath: sandboxState.filePath, state: sandboxState }
|
|
56
|
+
: await initializeFileParseSandboxStep({
|
|
57
|
+
runtime,
|
|
58
|
+
sandboxId,
|
|
59
|
+
contextId,
|
|
60
|
+
executionId,
|
|
61
|
+
datasetId,
|
|
62
|
+
fileId,
|
|
63
|
+
sourceEventId,
|
|
64
|
+
sourcePartIndex,
|
|
65
|
+
filename: previous?.filename ?? params.filename,
|
|
66
|
+
mediaType: previous?.mediaType ?? params.mediaType,
|
|
67
|
+
state: sandboxState,
|
|
68
|
+
});
|
|
69
|
+
const sandboxFilePath = initialized.filePath;
|
|
70
|
+
let filePreview = previous?.filePreview ?? previous?.ctx?.filePreview ?? params.filePreview;
|
|
71
|
+
let schema = previous?.ctx?.schema ?? previous?.schema ?? params.schema ?? null;
|
|
72
|
+
const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
|
|
73
|
+
if (datasetResult.ok && datasetResult.data.schema) {
|
|
94
74
|
schema = datasetResult.data.schema;
|
|
75
|
+
}
|
|
95
76
|
const ctx = {
|
|
96
77
|
datasetId,
|
|
97
|
-
fileId
|
|
98
|
-
instructions
|
|
99
|
-
sandboxConfig: {
|
|
78
|
+
fileId,
|
|
79
|
+
instructions,
|
|
80
|
+
sandboxConfig: {
|
|
81
|
+
filePath: sandboxFilePath,
|
|
82
|
+
outputPath: initialized.state.outputPath,
|
|
83
|
+
scriptsDir: initialized.state.scriptsDir,
|
|
84
|
+
manifestPath: initialized.state.manifestPath,
|
|
85
|
+
},
|
|
100
86
|
analysis: [],
|
|
101
87
|
schema,
|
|
102
88
|
plan: null,
|
|
@@ -108,16 +94,22 @@ function createFileParseStoryDefinition(params) {
|
|
|
108
94
|
return {
|
|
109
95
|
...previous,
|
|
110
96
|
datasetId,
|
|
111
|
-
fileId
|
|
112
|
-
instructions
|
|
97
|
+
fileId,
|
|
98
|
+
instructions,
|
|
113
99
|
sandboxId,
|
|
114
|
-
|
|
100
|
+
executionId,
|
|
101
|
+
sourceEventId,
|
|
102
|
+
sourcePartIndex,
|
|
103
|
+
filename: previous?.filename ?? params.filename,
|
|
104
|
+
mediaType: previous?.mediaType ?? params.mediaType,
|
|
105
|
+
sandboxState: initialized.state,
|
|
106
|
+
filePreview,
|
|
115
107
|
ctx,
|
|
116
108
|
};
|
|
117
109
|
})
|
|
118
110
|
.narrative(async (stored) => {
|
|
119
111
|
const ctx = stored?.content?.ctx;
|
|
120
|
-
const base = (
|
|
112
|
+
const base = await buildFileDatasetPromptStep({ context: ctx });
|
|
121
113
|
const userInstructions = String(ctx?.instructions ?? "").trim();
|
|
122
114
|
if (!userInstructions)
|
|
123
115
|
return base;
|
|
@@ -130,87 +122,139 @@ function createFileParseStoryDefinition(params) {
|
|
|
130
122
|
base,
|
|
131
123
|
].join("\n");
|
|
132
124
|
})
|
|
133
|
-
.actions(async (_stored,
|
|
125
|
+
.actions(async (_stored, _env, runtime) => {
|
|
134
126
|
const existingSchema = _stored?.content?.ctx?.schema?.schema;
|
|
127
|
+
const datasetId = _stored?.content?.datasetId ?? fallbackDatasetId ?? "";
|
|
128
|
+
const fileId = _stored?.content?.fileId ?? params.fileId ?? "";
|
|
129
|
+
const sandboxId = _stored?.content?.sandboxId ?? params.sandboxId ?? "";
|
|
130
|
+
const outputPath = _stored?.content?.ctx?.sandboxConfig?.outputPath;
|
|
131
|
+
if (!datasetId)
|
|
132
|
+
throw new Error("dataset_id_required");
|
|
133
|
+
if (!fileId)
|
|
134
|
+
throw new Error("dataset_file_id_required");
|
|
135
|
+
if (!sandboxId)
|
|
136
|
+
throw new Error("dataset_sandbox_required");
|
|
135
137
|
const actions = {
|
|
136
|
-
executeCommand:
|
|
138
|
+
executeCommand: createExecuteCommandTool({
|
|
137
139
|
datasetId,
|
|
138
|
-
sandboxId
|
|
139
|
-
|
|
140
|
+
sandboxId,
|
|
141
|
+
runtime,
|
|
140
142
|
}),
|
|
141
|
-
completeDataset:
|
|
143
|
+
completeDataset: createCompleteDatasetTool({
|
|
142
144
|
datasetId,
|
|
143
|
-
sandboxId
|
|
144
|
-
|
|
145
|
+
sandboxId,
|
|
146
|
+
runtime,
|
|
147
|
+
outputPath,
|
|
145
148
|
}),
|
|
146
|
-
clearDataset:
|
|
149
|
+
clearDataset: createClearDatasetTool({
|
|
147
150
|
datasetId,
|
|
148
|
-
sandboxId
|
|
149
|
-
|
|
151
|
+
sandboxId,
|
|
152
|
+
runtime,
|
|
150
153
|
}),
|
|
151
154
|
};
|
|
152
155
|
if (!existingSchema) {
|
|
153
|
-
actions.generateSchema =
|
|
156
|
+
actions.generateSchema = createGenerateSchemaTool({
|
|
154
157
|
datasetId,
|
|
155
|
-
fileId
|
|
156
|
-
|
|
158
|
+
fileId,
|
|
159
|
+
runtime,
|
|
157
160
|
});
|
|
158
161
|
}
|
|
159
162
|
return actions;
|
|
160
163
|
})
|
|
161
164
|
.shouldContinue(({ reactionEvent }) => {
|
|
162
|
-
|
|
165
|
+
const fatalFailure = getDatasetFatalFailure(reactionEvent);
|
|
166
|
+
if (fatalFailure) {
|
|
167
|
+
throw new Error(fatalFailure);
|
|
168
|
+
}
|
|
169
|
+
return !didCompleteDatasetSucceed(reactionEvent);
|
|
163
170
|
});
|
|
164
171
|
if (params.reactor) {
|
|
165
|
-
|
|
172
|
+
contextBuilder = contextBuilder.reactor(params.reactor);
|
|
166
173
|
}
|
|
167
174
|
else {
|
|
168
|
-
|
|
175
|
+
contextBuilder = contextBuilder.model(model);
|
|
169
176
|
}
|
|
170
|
-
const
|
|
171
|
-
return { datasetId,
|
|
177
|
+
const context = contextBuilder.build();
|
|
178
|
+
return { datasetId: fallbackDatasetId ?? "", context };
|
|
172
179
|
}
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
*
|
|
176
|
-
* Usage:
|
|
177
|
-
* const { datasetId } = await createFileParseStory(fileId, { instructions }).parse(env)
|
|
178
|
-
*
|
|
179
|
-
* - No `db` is accepted/stored (workflow-safe).
|
|
180
|
-
* - All I/O happens in `"use step"` functions via Ekairos runtime (`getContextRuntime(env).db`).
|
|
181
|
-
* - `parse()` is the entrypoint; it calls `story.react(...)` internally.
|
|
182
|
-
*/
|
|
183
|
-
function createFileParseStory(fileId, opts) {
|
|
180
|
+
export function createFileParseContext(fileId, opts) {
|
|
181
|
+
const datasetId = opts?.datasetId ?? createDatasetId();
|
|
184
182
|
const params = {
|
|
185
183
|
fileId,
|
|
186
184
|
instructions: opts?.instructions,
|
|
187
185
|
sandboxId: opts?.sandboxId,
|
|
188
|
-
datasetId
|
|
186
|
+
datasetId,
|
|
189
187
|
model: opts?.model,
|
|
190
188
|
reactor: opts?.reactor,
|
|
189
|
+
sandboxState: opts?.sandboxState,
|
|
190
|
+
filePreview: opts?.filePreview,
|
|
191
|
+
schema: opts?.schema,
|
|
192
|
+
filename: opts?.filename,
|
|
193
|
+
mediaType: opts?.mediaType,
|
|
191
194
|
};
|
|
192
|
-
const {
|
|
195
|
+
const { context } = createFileParseContextDefinition(params);
|
|
193
196
|
return {
|
|
194
197
|
datasetId,
|
|
195
|
-
async parse(
|
|
198
|
+
async parse(runtime, options = {}) {
|
|
196
199
|
const triggerEvent = {
|
|
197
|
-
id: (
|
|
198
|
-
type:
|
|
199
|
-
channel:
|
|
200
|
+
id: createDatasetId(),
|
|
201
|
+
type: INPUT_TEXT_ITEM_TYPE,
|
|
202
|
+
channel: WEB_CHANNEL,
|
|
200
203
|
createdAt: new Date().toISOString(),
|
|
201
204
|
content: {
|
|
202
|
-
parts: [
|
|
205
|
+
parts: [
|
|
206
|
+
{
|
|
207
|
+
type: "text",
|
|
208
|
+
text: options.prompt ?? "generate a dataset for this file",
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
type: "file",
|
|
212
|
+
fileId,
|
|
213
|
+
filename: opts?.filename ?? "resource-file",
|
|
214
|
+
mediaType: opts?.mediaType ?? "application/octet-stream",
|
|
215
|
+
},
|
|
216
|
+
],
|
|
203
217
|
},
|
|
204
218
|
};
|
|
205
|
-
|
|
206
|
-
|
|
219
|
+
params.sourceEventId = triggerEvent.id;
|
|
220
|
+
params.sourcePartIndex = 1;
|
|
221
|
+
params.filename = opts?.filename ?? "resource-file";
|
|
222
|
+
params.mediaType = opts?.mediaType ?? "application/octet-stream";
|
|
223
|
+
const shell = await context.react(triggerEvent, {
|
|
224
|
+
runtime: runtime,
|
|
207
225
|
context: { key: `dataset:${datasetId}` },
|
|
208
|
-
|
|
226
|
+
durable: options.durable ?? false,
|
|
227
|
+
options: {
|
|
228
|
+
preventClose: true,
|
|
229
|
+
sendFinish: false,
|
|
230
|
+
maxIterations: 20,
|
|
231
|
+
maxModelSteps: 5,
|
|
232
|
+
},
|
|
233
|
+
__initialContent: {
|
|
234
|
+
...(options.initialContent ?? {}),
|
|
235
|
+
datasetId,
|
|
236
|
+
fileId,
|
|
237
|
+
sourceEventId: triggerEvent.id,
|
|
238
|
+
sourcePartIndex: 1,
|
|
239
|
+
filename: opts?.filename ?? "resource-file",
|
|
240
|
+
mediaType: opts?.mediaType ?? "application/octet-stream",
|
|
241
|
+
instructions: opts?.instructions ?? "",
|
|
242
|
+
sandboxId: opts?.sandboxId ?? "",
|
|
243
|
+
sandboxState: opts?.sandboxState ?? { initialized: false, filePath: "" },
|
|
244
|
+
filePreview: opts?.filePreview,
|
|
245
|
+
schema: opts?.schema,
|
|
246
|
+
},
|
|
209
247
|
});
|
|
248
|
+
await awaitContextRun(shell.run);
|
|
210
249
|
return { datasetId };
|
|
211
250
|
},
|
|
212
|
-
|
|
213
|
-
story,
|
|
251
|
+
context,
|
|
214
252
|
};
|
|
215
253
|
}
|
|
216
|
-
|
|
254
|
+
export function registerFileParseContext(opts) {
|
|
255
|
+
createFileParseContextDefinition({
|
|
256
|
+
model: opts?.model,
|
|
257
|
+
reactor: opts?.reactor,
|
|
258
|
+
}).context;
|
|
259
|
+
}
|
|
260
|
+
registerFileParseContext();
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { FileParseContext, SandboxState } from "./file-dataset.types.js";
|
|
2
|
+
import type { FilePreviewContext } from "./filepreview.types.js";
|
|
3
|
+
export declare function initializeFileParseSandboxStep(params: {
|
|
4
|
+
runtime: any;
|
|
5
|
+
sandboxId: string;
|
|
6
|
+
contextId: string;
|
|
7
|
+
executionId: string;
|
|
8
|
+
datasetId: string;
|
|
9
|
+
fileId: string;
|
|
10
|
+
sourceEventId?: string;
|
|
11
|
+
sourcePartIndex?: number;
|
|
12
|
+
filename?: string;
|
|
13
|
+
mediaType?: string;
|
|
14
|
+
state: SandboxState;
|
|
15
|
+
}): Promise<{
|
|
16
|
+
filePath: string;
|
|
17
|
+
state: SandboxState;
|
|
18
|
+
}>;
|
|
19
|
+
export declare function generateFileParsePreviewStep(params: {
|
|
20
|
+
runtime: any;
|
|
21
|
+
sandboxId: string;
|
|
22
|
+
sandboxFilePath: string;
|
|
23
|
+
datasetId: string;
|
|
24
|
+
}): Promise<FilePreviewContext>;
|
|
25
|
+
export declare function buildFileDatasetPromptStep(params: {
|
|
26
|
+
context: FileParseContext;
|
|
27
|
+
}): Promise<string>;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { DATASET_OUTPUT_FILE_NAME } from "../datasetFiles.js";
|
|
2
|
+
import { prepareContextExecutionWorkspaceStep } from "../contextWorkspace.js";
|
|
3
|
+
import { buildFileDatasetPrompt } from "./prompts.js";
|
|
4
|
+
import { generateFilePreview } from "./filepreview.js";
|
|
5
|
+
export async function initializeFileParseSandboxStep(params) {
|
|
6
|
+
"use step";
|
|
7
|
+
if (params.state.initialized) {
|
|
8
|
+
return { filePath: params.state.filePath, state: params.state };
|
|
9
|
+
}
|
|
10
|
+
console.log(`[FileParseContext ${params.datasetId}] Preparing context execution workspace...`);
|
|
11
|
+
const workspace = await prepareContextExecutionWorkspaceStep({
|
|
12
|
+
runtime: params.runtime,
|
|
13
|
+
sandboxId: params.sandboxId,
|
|
14
|
+
contextId: params.contextId,
|
|
15
|
+
executionId: params.executionId,
|
|
16
|
+
files: [
|
|
17
|
+
{
|
|
18
|
+
fileId: params.fileId,
|
|
19
|
+
filename: params.filename,
|
|
20
|
+
mediaType: params.mediaType,
|
|
21
|
+
sourceEventId: params.sourceEventId,
|
|
22
|
+
sourcePartIndex: params.sourcePartIndex,
|
|
23
|
+
},
|
|
24
|
+
],
|
|
25
|
+
});
|
|
26
|
+
const sandboxFilePath = workspace.files[0]?.path ?? "";
|
|
27
|
+
if (!sandboxFilePath)
|
|
28
|
+
throw new Error("dataset_workspace_file_missing");
|
|
29
|
+
console.log(`[FileParseContext ${params.datasetId}] Context workspace created: ${workspace.root}`);
|
|
30
|
+
console.log(`[FileParseContext ${params.datasetId}] File saved: ${sandboxFilePath}`);
|
|
31
|
+
const state = {
|
|
32
|
+
initialized: true,
|
|
33
|
+
filePath: sandboxFilePath,
|
|
34
|
+
outputPath: `${workspace.outputDir}/${DATASET_OUTPUT_FILE_NAME}`,
|
|
35
|
+
scriptsDir: workspace.scriptsDir,
|
|
36
|
+
manifestPath: workspace.manifestPath,
|
|
37
|
+
};
|
|
38
|
+
return { filePath: sandboxFilePath, state };
|
|
39
|
+
}
|
|
40
|
+
export async function generateFileParsePreviewStep(params) {
|
|
41
|
+
"use step";
|
|
42
|
+
return await generateFilePreview(params.runtime, params.sandboxId, params.sandboxFilePath, params.datasetId);
|
|
43
|
+
}
|
|
44
|
+
export async function buildFileDatasetPromptStep(params) {
|
|
45
|
+
"use step";
|
|
46
|
+
return buildFileDatasetPrompt(params.context);
|
|
47
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import type { ContextReactor } from "@ekairos/events";
|
|
2
|
+
import type { FilePreviewContext } from "./filepreview.types.js";
|
|
3
|
+
export type SandboxState = {
|
|
4
|
+
initialized: boolean;
|
|
5
|
+
filePath: string;
|
|
6
|
+
outputPath?: string;
|
|
7
|
+
scriptsDir?: string;
|
|
8
|
+
manifestPath?: string;
|
|
9
|
+
};
|
|
10
|
+
export type FileParseContext = {
|
|
11
|
+
datasetId: string;
|
|
12
|
+
fileId: string;
|
|
13
|
+
instructions: string;
|
|
14
|
+
sandboxConfig: {
|
|
15
|
+
filePath: string;
|
|
16
|
+
outputPath?: string;
|
|
17
|
+
scriptsDir?: string;
|
|
18
|
+
manifestPath?: string;
|
|
19
|
+
};
|
|
20
|
+
analysis: any[];
|
|
21
|
+
schema: any | null;
|
|
22
|
+
plan: any | null;
|
|
23
|
+
executionResult: any | null;
|
|
24
|
+
errors: string[];
|
|
25
|
+
iterationCount: number;
|
|
26
|
+
filePreview?: FilePreviewContext;
|
|
27
|
+
};
|
|
28
|
+
export type FileParseContextParams = {
|
|
29
|
+
fileId?: string;
|
|
30
|
+
instructions?: string;
|
|
31
|
+
sandboxId?: string;
|
|
32
|
+
datasetId?: string;
|
|
33
|
+
model?: string;
|
|
34
|
+
reactor?: ContextReactor<any, any>;
|
|
35
|
+
sandboxState?: SandboxState;
|
|
36
|
+
filePreview?: FilePreviewContext;
|
|
37
|
+
schema?: any | null;
|
|
38
|
+
sourceEventId?: string;
|
|
39
|
+
sourcePartIndex?: number;
|
|
40
|
+
filename?: string;
|
|
41
|
+
mediaType?: string;
|
|
42
|
+
};
|
|
43
|
+
export type FileParseRunOptions = {
|
|
44
|
+
prompt?: string;
|
|
45
|
+
durable?: boolean;
|
|
46
|
+
initialContent?: Record<string, any>;
|
|
47
|
+
};
|
|
48
|
+
export type FileParseContextBuilder<Env extends {
|
|
49
|
+
orgId: string;
|
|
50
|
+
}> = {
|
|
51
|
+
datasetId: string;
|
|
52
|
+
context: any;
|
|
53
|
+
};
|
|
54
|
+
export type DatasetResult = {
|
|
55
|
+
id: string;
|
|
56
|
+
status?: string;
|
|
57
|
+
title?: string;
|
|
58
|
+
schema?: any;
|
|
59
|
+
analysis?: any;
|
|
60
|
+
calculatedTotalRows?: number;
|
|
61
|
+
actualGeneratedRowCount?: number;
|
|
62
|
+
createdAt?: number;
|
|
63
|
+
updatedAt?: number;
|
|
64
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -1,40 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
metadata?: {
|
|
4
|
-
description: string;
|
|
5
|
-
script: string;
|
|
6
|
-
command: string;
|
|
7
|
-
stdout: string;
|
|
8
|
-
stderr: string;
|
|
9
|
-
};
|
|
10
|
-
head?: {
|
|
11
|
-
description: string;
|
|
12
|
-
script: string;
|
|
13
|
-
command: string;
|
|
14
|
-
stdout: string;
|
|
15
|
-
stderr: string;
|
|
16
|
-
};
|
|
17
|
-
tail?: {
|
|
18
|
-
description: string;
|
|
19
|
-
script: string;
|
|
20
|
-
command: string;
|
|
21
|
-
stdout: string;
|
|
22
|
-
stderr: string;
|
|
23
|
-
};
|
|
24
|
-
mid?: {
|
|
25
|
-
description: string;
|
|
26
|
-
script: string;
|
|
27
|
-
command: string;
|
|
28
|
-
stdout: string;
|
|
29
|
-
stderr: string;
|
|
30
|
-
};
|
|
31
|
-
};
|
|
1
|
+
import type { FilePreviewContext } from "./filepreview.types.js";
|
|
2
|
+
export type { FilePreviewContext } from "./filepreview.types.js";
|
|
32
3
|
interface PreviewOptions {
|
|
33
4
|
headLines?: number;
|
|
34
5
|
tailLines?: number;
|
|
35
6
|
midLines?: number;
|
|
36
7
|
}
|
|
37
|
-
export declare function
|
|
38
|
-
export declare function
|
|
39
|
-
export
|
|
40
|
-
//# sourceMappingURL=filepreview.d.ts.map
|
|
8
|
+
export declare function getEmbeddedFilePreviewScriptBase64(scriptName: string): string;
|
|
9
|
+
export declare function ensurePreviewScriptsAvailable(_runtime: any, _sandboxId: string): Promise<void>;
|
|
10
|
+
export declare function generateFilePreview(runtime: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
|