@ekairos/dataset 1.22.49-beta.development.0 → 1.22.51-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/materialize.d.ts +1 -11
- package/dist/builder/materialize.js +25 -77
- package/dist/builder/materializeQuery.d.ts +11 -0
- package/dist/builder/materializeQuery.js +40 -0
- package/dist/builder/persistence.js +13 -21
- package/dist/builder/types.d.ts +3 -0
- package/dist/clearDataset.tool.d.ts +2 -2
- package/dist/clearDataset.tool.js +3 -3
- package/dist/completeDataset.tool.d.ts +31 -3
- package/dist/completeDataset.tool.js +101 -13
- package/dist/dataset/steps.d.ts +32 -8
- package/dist/dataset/steps.js +69 -13
- package/dist/dataset.js +13 -7
- package/dist/executeCommand.tool.d.ts +2 -2
- package/dist/executeCommand.tool.js +3 -3
- package/dist/file/file-dataset.agent.d.ts +17 -11
- package/dist/file/file-dataset.agent.js +54 -47
- package/dist/file/filepreview.d.ts +2 -2
- package/dist/file/filepreview.js +13 -13
- package/dist/file/generateSchema.tool.d.ts +2 -2
- package/dist/file/generateSchema.tool.js +2 -2
- package/dist/file/prompts.d.ts +2 -2
- package/dist/file/prompts.js +6 -1
- package/dist/file/steps.d.ts +1 -1
- package/dist/file/steps.js +8 -2
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/query/queryDomain.d.ts +3 -3
- package/dist/query/queryDomain.js +3 -3
- package/dist/query/queryDomain.step.d.ts +1 -0
- package/dist/query/queryDomain.step.js +8 -4
- package/dist/sandbox/steps.d.ts +6 -6
- package/dist/sandbox/steps.js +16 -12
- package/dist/transform/filepreview.d.ts +1 -1
- package/dist/transform/filepreview.js +6 -6
- package/dist/transform/index.d.ts +1 -1
- package/dist/transform/index.js +1 -1
- package/dist/transform/prompts.js +4 -1
- package/dist/transform/transform-dataset.agent.d.ts +9 -3
- package/dist/transform/transform-dataset.agent.js +39 -32
- package/dist/transform/transformDataset.d.ts +3 -2
- package/dist/transform/transformDataset.js +10 -9
- package/package.json +19 -5
- package/dist/eventsReactRuntime.d.ts +0 -21
- package/dist/eventsReactRuntime.js +0 -25
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { createContext,
|
|
1
|
+
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL } from "@ekairos/events";
|
|
2
2
|
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
|
|
3
3
|
import { createGenerateSchemaTool } from "./generateSchema.tool.js";
|
|
4
|
-
import { createCompleteDatasetTool } from "../completeDataset.tool.js";
|
|
4
|
+
import { createCompleteDatasetTool, didCompleteDatasetSucceed } from "../completeDataset.tool.js";
|
|
5
5
|
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
6
6
|
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
7
7
|
import { buildFileDatasetPrompt } from "./prompts.js";
|
|
@@ -10,16 +10,24 @@ import { id } from "@instantdb/admin";
|
|
|
10
10
|
import { getDatasetWorkstation } from "../datasetFiles.js";
|
|
11
11
|
import { readInstantFileStep } from "./steps.js";
|
|
12
12
|
import { datasetGetByIdStep } from "../dataset/steps.js";
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
async function awaitContextRun(run) {
|
|
14
|
+
if (!run)
|
|
15
|
+
return;
|
|
16
|
+
if (run.returnValue) {
|
|
17
|
+
await run.returnValue;
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
await run;
|
|
21
|
+
}
|
|
22
|
+
async function initializeSandbox(runtime, sandboxId, datasetId, fileId, state) {
|
|
15
23
|
if (state.initialized) {
|
|
16
24
|
return state.filePath;
|
|
17
25
|
}
|
|
18
|
-
console.log(`[
|
|
19
|
-
await ensurePreviewScriptsAvailable(
|
|
20
|
-
console.log(`[
|
|
26
|
+
console.log(`[FileParseContext ${datasetId}] Initializing sandbox...`);
|
|
27
|
+
await ensurePreviewScriptsAvailable(runtime, sandboxId);
|
|
28
|
+
console.log(`[FileParseContext ${datasetId}] Installing Python dependencies...`);
|
|
21
29
|
const pipInstall = await runDatasetSandboxCommandStep({
|
|
22
|
-
|
|
30
|
+
runtime,
|
|
23
31
|
sandboxId,
|
|
24
32
|
cmd: "python",
|
|
25
33
|
args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
|
|
@@ -28,12 +36,12 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
|
28
36
|
if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
|
|
29
37
|
throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
|
|
30
38
|
}
|
|
31
|
-
console.log(`[
|
|
32
|
-
const file = await readInstantFileStep({
|
|
33
|
-
console.log(`[
|
|
39
|
+
console.log(`[FileParseContext ${datasetId}] Fetching file from InstantDB...`);
|
|
40
|
+
const file = await readInstantFileStep({ runtime, fileId });
|
|
41
|
+
console.log(`[FileParseContext ${datasetId}] Creating dataset workstation...`);
|
|
34
42
|
const workstation = getDatasetWorkstation(datasetId);
|
|
35
43
|
await runDatasetSandboxCommandStep({
|
|
36
|
-
|
|
44
|
+
runtime,
|
|
37
45
|
sandboxId,
|
|
38
46
|
cmd: "mkdir",
|
|
39
47
|
args: ["-p", workstation],
|
|
@@ -42,7 +50,7 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
|
42
50
|
const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
|
|
43
51
|
const sandboxFilePath = `${workstation}/${fileId}${fileExtension}`;
|
|
44
52
|
await writeDatasetSandboxFilesStep({
|
|
45
|
-
|
|
53
|
+
runtime,
|
|
46
54
|
sandboxId,
|
|
47
55
|
files: [
|
|
48
56
|
{
|
|
@@ -51,14 +59,14 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
|
51
59
|
},
|
|
52
60
|
],
|
|
53
61
|
});
|
|
54
|
-
console.log(`[
|
|
55
|
-
console.log(`[
|
|
62
|
+
console.log(`[FileParseContext ${datasetId}] ✅ Workstation created: ${workstation}`);
|
|
63
|
+
console.log(`[FileParseContext ${datasetId}] ✅ File saved: ${sandboxFilePath}`);
|
|
56
64
|
state.filePath = sandboxFilePath;
|
|
57
65
|
state.initialized = true;
|
|
58
66
|
return sandboxFilePath;
|
|
59
67
|
}
|
|
60
68
|
/**
|
|
61
|
-
*
|
|
69
|
+
* FileParseContext
|
|
62
70
|
*
|
|
63
71
|
* Uso:
|
|
64
72
|
* - Crear una instancia con `fileId`, `instructions` y un `sandbox`
|
|
@@ -67,27 +75,27 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
|
67
75
|
*
|
|
68
76
|
* Internamente corre un Context (`createContext("file.parse")`) que itera hasta que se ejecuta el tool `completeDataset`.
|
|
69
77
|
*/
|
|
70
|
-
function
|
|
78
|
+
function createFileParseContextDefinition(params) {
|
|
71
79
|
const datasetId = params.datasetId ?? id();
|
|
72
80
|
const model = params.model ?? "openai/gpt-5";
|
|
73
|
-
let
|
|
74
|
-
.context(async (stored,
|
|
81
|
+
let contextBuilder = createContext("file.parse")
|
|
82
|
+
.context(async (stored, _env, runtime) => {
|
|
75
83
|
const previous = stored?.content ?? {};
|
|
76
84
|
const sandboxState = previous?.sandboxState ?? { initialized: false, filePath: "" };
|
|
77
85
|
const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
|
|
78
86
|
if (!sandboxId) {
|
|
79
87
|
throw new Error("dataset_sandbox_required");
|
|
80
88
|
}
|
|
81
|
-
const sandboxFilePath = await initializeSandbox(
|
|
89
|
+
const sandboxFilePath = await initializeSandbox(runtime, sandboxId, datasetId, params.fileId, sandboxState);
|
|
82
90
|
let filePreview = undefined;
|
|
83
91
|
try {
|
|
84
|
-
filePreview = await generateFilePreview(
|
|
92
|
+
filePreview = await generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId);
|
|
85
93
|
}
|
|
86
94
|
catch {
|
|
87
95
|
// optional
|
|
88
96
|
}
|
|
89
97
|
let schema = null;
|
|
90
|
-
const datasetResult = await datasetGetByIdStep({
|
|
98
|
+
const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
|
|
91
99
|
if (datasetResult.ok && datasetResult.data.schema)
|
|
92
100
|
schema = datasetResult.data.schema;
|
|
93
101
|
const ctx = {
|
|
@@ -128,57 +136,57 @@ function createFileParseStoryDefinition(params) {
|
|
|
128
136
|
base,
|
|
129
137
|
].join("\n");
|
|
130
138
|
})
|
|
131
|
-
.actions(async (_stored,
|
|
139
|
+
.actions(async (_stored, _env, runtime) => {
|
|
132
140
|
const existingSchema = _stored?.content?.ctx?.schema?.schema;
|
|
133
141
|
const actions = {
|
|
134
142
|
executeCommand: createExecuteCommandTool({
|
|
135
143
|
datasetId,
|
|
136
144
|
sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
|
|
137
|
-
|
|
145
|
+
runtime,
|
|
138
146
|
}),
|
|
139
147
|
completeDataset: createCompleteDatasetTool({
|
|
140
148
|
datasetId,
|
|
141
149
|
sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
|
|
142
|
-
|
|
150
|
+
runtime,
|
|
143
151
|
}),
|
|
144
152
|
clearDataset: createClearDatasetTool({
|
|
145
153
|
datasetId,
|
|
146
154
|
sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
|
|
147
|
-
|
|
155
|
+
runtime,
|
|
148
156
|
}),
|
|
149
157
|
};
|
|
150
158
|
if (!existingSchema) {
|
|
151
159
|
actions.generateSchema = createGenerateSchemaTool({
|
|
152
160
|
datasetId,
|
|
153
161
|
fileId: params.fileId,
|
|
154
|
-
|
|
162
|
+
runtime,
|
|
155
163
|
});
|
|
156
164
|
}
|
|
157
165
|
return actions;
|
|
158
166
|
})
|
|
159
167
|
.shouldContinue(({ reactionEvent }) => {
|
|
160
|
-
return !
|
|
168
|
+
return !didCompleteDatasetSucceed(reactionEvent);
|
|
161
169
|
});
|
|
162
170
|
if (params.reactor) {
|
|
163
|
-
|
|
171
|
+
contextBuilder = contextBuilder.reactor(params.reactor);
|
|
164
172
|
}
|
|
165
173
|
else {
|
|
166
|
-
|
|
174
|
+
contextBuilder = contextBuilder.model(model);
|
|
167
175
|
}
|
|
168
|
-
const
|
|
169
|
-
return { datasetId,
|
|
176
|
+
const context = contextBuilder.build();
|
|
177
|
+
return { datasetId, context };
|
|
170
178
|
}
|
|
171
179
|
/**
|
|
172
180
|
* Factory (DX-first):
|
|
173
181
|
*
|
|
174
182
|
* Usage:
|
|
175
|
-
* const { datasetId } = await
|
|
183
|
+
* const { datasetId } = await createFileParseContext(fileId, { instructions }).parse(runtime)
|
|
176
184
|
*
|
|
177
|
-
* -
|
|
178
|
-
* - All I/O happens in `"use step"` functions via Ekairos runtime
|
|
179
|
-
* - `parse()` is the entrypoint; it calls `
|
|
185
|
+
* - Uses the caller runtime; no secondary runtime is created.
|
|
186
|
+
* - All I/O happens in `"use step"` functions via the provided Ekairos runtime.
|
|
187
|
+
* - `parse()` is the entrypoint; it calls `context.react(...)` internally.
|
|
180
188
|
*/
|
|
181
|
-
export function
|
|
189
|
+
export function createFileParseContext(fileId, opts) {
|
|
182
190
|
const params = {
|
|
183
191
|
fileId,
|
|
184
192
|
instructions: opts?.instructions,
|
|
@@ -187,30 +195,29 @@ export function createFileParseStory(fileId, opts) {
|
|
|
187
195
|
model: opts?.model,
|
|
188
196
|
reactor: opts?.reactor,
|
|
189
197
|
};
|
|
190
|
-
const { datasetId,
|
|
198
|
+
const { datasetId, context } = createFileParseContextDefinition(params);
|
|
191
199
|
return {
|
|
192
200
|
datasetId,
|
|
193
|
-
async parse(
|
|
201
|
+
async parse(runtime, options = {}) {
|
|
194
202
|
const triggerEvent = {
|
|
195
203
|
id: id(),
|
|
196
204
|
type: INPUT_TEXT_ITEM_TYPE,
|
|
197
205
|
channel: WEB_CHANNEL,
|
|
198
206
|
createdAt: new Date().toISOString(),
|
|
199
207
|
content: {
|
|
200
|
-
parts: [{ type: "text", text: prompt ?? "generate a dataset for this file" }],
|
|
208
|
+
parts: [{ type: "text", text: options.prompt ?? "generate a dataset for this file" }],
|
|
201
209
|
},
|
|
202
210
|
};
|
|
203
|
-
const
|
|
204
|
-
|
|
205
|
-
runtime,
|
|
211
|
+
const shell = await context.react(triggerEvent, {
|
|
212
|
+
runtime: runtime,
|
|
206
213
|
context: { key: `dataset:${datasetId}` },
|
|
207
|
-
durable: false,
|
|
214
|
+
durable: options.durable ?? false,
|
|
208
215
|
options: { silent: true, preventClose: true, sendFinish: false, maxIterations: 20, maxModelSteps: 5 },
|
|
209
216
|
});
|
|
210
|
-
await shell.run;
|
|
217
|
+
await awaitContextRun(shell.run);
|
|
211
218
|
return { datasetId };
|
|
212
219
|
},
|
|
213
|
-
// Optional: expose the built
|
|
214
|
-
|
|
220
|
+
// Optional: expose the built context for advanced callers (not required for parse DX)
|
|
221
|
+
context,
|
|
215
222
|
};
|
|
216
223
|
}
|
|
@@ -34,6 +34,6 @@ interface PreviewOptions {
|
|
|
34
34
|
tailLines?: number;
|
|
35
35
|
midLines?: number;
|
|
36
36
|
}
|
|
37
|
-
export declare function ensurePreviewScriptsAvailable(
|
|
38
|
-
export declare function generateFilePreview(
|
|
37
|
+
export declare function ensurePreviewScriptsAvailable(runtime: any, sandboxId: string): Promise<void>;
|
|
38
|
+
export declare function generateFilePreview(runtime: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
|
|
39
39
|
export {};
|
package/dist/file/filepreview.js
CHANGED
|
@@ -41,7 +41,7 @@ function validateScriptResult(result, context) {
|
|
|
41
41
|
throw new Error(`${context} failed: ${stderr.substring(0, 500)}`);
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
|
-
export async function ensurePreviewScriptsAvailable(
|
|
44
|
+
export async function ensurePreviewScriptsAvailable(runtime, sandboxId) {
|
|
45
45
|
if (preparedSandboxIds.has(sandboxId)) {
|
|
46
46
|
return;
|
|
47
47
|
}
|
|
@@ -53,7 +53,7 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
|
53
53
|
const setupPromise = (async () => {
|
|
54
54
|
try {
|
|
55
55
|
await runDatasetSandboxCommandStep({
|
|
56
|
-
|
|
56
|
+
runtime,
|
|
57
57
|
sandboxId,
|
|
58
58
|
cmd: "mkdir",
|
|
59
59
|
args: ["-p", SANDBOX_SCRIPT_DIRECTORY],
|
|
@@ -79,7 +79,7 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
|
79
79
|
}
|
|
80
80
|
if (filesToWrite.length > 0) {
|
|
81
81
|
await writeDatasetSandboxFilesStep({
|
|
82
|
-
|
|
82
|
+
runtime,
|
|
83
83
|
sandboxId,
|
|
84
84
|
files: filesToWrite,
|
|
85
85
|
});
|
|
@@ -95,13 +95,13 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
|
95
95
|
throw error;
|
|
96
96
|
}
|
|
97
97
|
}
|
|
98
|
-
export async function generateFilePreview(
|
|
98
|
+
export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId, options = {}) {
|
|
99
99
|
const context = {
|
|
100
100
|
totalRows: 0,
|
|
101
101
|
};
|
|
102
102
|
try {
|
|
103
|
-
await ensurePreviewScriptsAvailable(
|
|
104
|
-
const metadataResult = await runScript(
|
|
103
|
+
await ensurePreviewScriptsAvailable(runtime, sandboxId);
|
|
104
|
+
const metadataResult = await runScript(runtime, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
|
|
105
105
|
context.metadata = metadataResult;
|
|
106
106
|
let isExcel = false;
|
|
107
107
|
if (metadataResult.stdout) {
|
|
@@ -127,23 +127,23 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
|
|
|
127
127
|
const midScript = isExcel ? "preview_mid_excel.py" : "preview_mid_csv.py";
|
|
128
128
|
if (totalRows <= headLines) {
|
|
129
129
|
console.log(`[Dataset ${datasetId}] File has ${totalRows} rows, reading all with head only`);
|
|
130
|
-
const headResult = await runScript(
|
|
130
|
+
const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
|
|
131
131
|
validateScriptResult(headResult, `preview_head for ${datasetId}`);
|
|
132
132
|
context.head = headResult;
|
|
133
133
|
return context;
|
|
134
134
|
}
|
|
135
135
|
if (headLines + tailLines >= totalRows) {
|
|
136
136
|
console.log(`[Dataset ${datasetId}] Head + tail would cover entire file (${totalRows} rows), reading all with head only`);
|
|
137
|
-
const headResult = await runScript(
|
|
137
|
+
const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
|
|
138
138
|
validateScriptResult(headResult, `preview_head for ${datasetId}`);
|
|
139
139
|
context.head = headResult;
|
|
140
140
|
return context;
|
|
141
141
|
}
|
|
142
142
|
console.log(`[Dataset ${datasetId}] Reading head (${headLines} rows) and tail (${tailLines} rows) from ${totalRows} total rows`);
|
|
143
|
-
const headResult = await runScript(
|
|
143
|
+
const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(headLines)], `Reads the first ${headLines} rows of the file`);
|
|
144
144
|
validateScriptResult(headResult, `preview_head for ${datasetId}`);
|
|
145
145
|
context.head = headResult;
|
|
146
|
-
const tailResult = await runScript(
|
|
146
|
+
const tailResult = await runScript(runtime, sandboxId, tailScript, [sandboxFilePath, String(tailLines)], `Reads the last ${tailLines} rows of the file`);
|
|
147
147
|
validateScriptResult(tailResult, `preview_tail for ${datasetId}`);
|
|
148
148
|
context.tail = tailResult;
|
|
149
149
|
const midLines = options.midLines || DEFAULT_MID_LINES;
|
|
@@ -152,7 +152,7 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
|
|
|
152
152
|
const midStart = headLines;
|
|
153
153
|
const midEnd = totalRows - tailLines;
|
|
154
154
|
console.log(`[Dataset ${datasetId}] Large gap (${gapSize} rows), adding mid sample (${midLines} rows)`);
|
|
155
|
-
const midResult = await runScript(
|
|
155
|
+
const midResult = await runScript(runtime, sandboxId, midScript, [sandboxFilePath, String(midStart), String(midEnd), String(midLines)], `Samples ${midLines} rows from the middle section (rows ${midStart + 1} to ${midEnd})`);
|
|
156
156
|
validateScriptResult(midResult, `preview_mid for ${datasetId}`);
|
|
157
157
|
context.mid = midResult;
|
|
158
158
|
}
|
|
@@ -162,7 +162,7 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
|
|
|
162
162
|
}
|
|
163
163
|
return context;
|
|
164
164
|
}
|
|
165
|
-
async function runScript(
|
|
165
|
+
async function runScript(runtime, sandboxId, scriptName, args, description) {
|
|
166
166
|
const scriptPath = `${SANDBOX_SCRIPT_DIRECTORY}/${scriptName}`;
|
|
167
167
|
const command = `python ${scriptPath} ${args.join(" ")}`;
|
|
168
168
|
let scriptContent = "";
|
|
@@ -175,7 +175,7 @@ async function runScript(env, sandboxId, scriptName, args, description) {
|
|
|
175
175
|
}
|
|
176
176
|
try {
|
|
177
177
|
const result = await runDatasetSandboxCommandStep({
|
|
178
|
-
|
|
178
|
+
runtime,
|
|
179
179
|
sandboxId,
|
|
180
180
|
cmd: "python",
|
|
181
181
|
args: [scriptPath, ...args],
|
|
@@ -2,9 +2,9 @@ interface GenerateSchemaToolParams {
|
|
|
2
2
|
datasetId: string;
|
|
3
3
|
isNested?: boolean;
|
|
4
4
|
fileId?: string;
|
|
5
|
-
|
|
5
|
+
runtime: any;
|
|
6
6
|
}
|
|
7
|
-
export declare function createGenerateSchemaTool({ datasetId, isNested, fileId,
|
|
7
|
+
export declare function createGenerateSchemaTool({ datasetId, isNested, fileId, runtime }: GenerateSchemaToolParams): import("ai").Tool<{
|
|
8
8
|
schemaTitle: string;
|
|
9
9
|
schemaDescription: string;
|
|
10
10
|
schemaJson: string;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { tool } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { datasetUpdateSchemaStep } from "../dataset/steps.js";
|
|
4
|
-
export function createGenerateSchemaTool({ datasetId, isNested, fileId,
|
|
4
|
+
export function createGenerateSchemaTool({ datasetId, isNested, fileId, runtime }) {
|
|
5
5
|
return tool({
|
|
6
6
|
description: `Generate a formal JSON schema for a SINGLE RECORD (row) from the file. This schema describes the structure of ONE record, not the entire dataset or array of records. Requirements:
|
|
7
7
|
1. Schema describes ONE RECORD structure only (no array wrappers)
|
|
@@ -72,7 +72,7 @@ export function createGenerateSchemaTool({ datasetId, isNested, fileId, env }) {
|
|
|
72
72
|
console.log(`[Dataset ${datasetId}] Schema JSON:`);
|
|
73
73
|
console.log(JSON.stringify(parsedSchema, null, 2));
|
|
74
74
|
const updateResult = await datasetUpdateSchemaStep({
|
|
75
|
-
|
|
75
|
+
runtime,
|
|
76
76
|
datasetId,
|
|
77
77
|
schema: schemaData,
|
|
78
78
|
status: "schema_complete",
|
package/dist/file/prompts.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import {
|
|
2
|
-
export declare function buildFileDatasetPrompt(context:
|
|
1
|
+
import { FileParseContext } from "./file-dataset.agent.js";
|
|
2
|
+
export declare function buildFileDatasetPrompt(context: FileParseContext): string;
|
package/dist/file/prompts.js
CHANGED
|
@@ -147,6 +147,8 @@ function buildInstructions(context) {
|
|
|
147
147
|
.ele("Action").txt("Use the provided schema as the output contract for every row in output.jsonl").up()
|
|
148
148
|
.ele("Requirements")
|
|
149
149
|
.ele("Requirement").txt("Every output row must conform exactly to the provided schema").up()
|
|
150
|
+
.ele("Requirement").txt("Every data object MUST use the exact property names from the provided JSON Schema required/properties keys").up()
|
|
151
|
+
.ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the source language").up()
|
|
150
152
|
.ele("Requirement").txt("Do not call generateSchema when a schema is already provided").up()
|
|
151
153
|
.up()
|
|
152
154
|
.up();
|
|
@@ -170,6 +172,7 @@ function buildInstructions(context) {
|
|
|
170
172
|
.ele("Requirements")
|
|
171
173
|
.ele("Requirement").txt("Parse ALL data rows/records from the file (exclude header sections and metadata)").up()
|
|
172
174
|
.ele("Requirement").txt("Output JSONL format: each line is {\"type\": \"row\", \"data\": {...record...}}").up()
|
|
175
|
+
.ele("Requirement").txt("When a schema is provided, each data object must contain the exact required schema keys and must not use translated or synonymous keys").up()
|
|
173
176
|
.ele("Requirement").txt("Extract ONLY data records; skip any header lines, summary sections, or file metadata").up()
|
|
174
177
|
.ele("Requirement").txt(`Save output to: ${outputPath}`).up()
|
|
175
178
|
.ele("Requirement").txt("Use descriptive scriptName in snake_case (e.g., 'parse_csv_to_jsonl')").up()
|
|
@@ -177,11 +180,13 @@ function buildInstructions(context) {
|
|
|
177
180
|
.up()
|
|
178
181
|
.ele("Step", { number: "4", name: "Complete and Validate" })
|
|
179
182
|
.ele("Action").txt("Call completeDataset to validate the dataset").up()
|
|
180
|
-
.ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns
|
|
183
|
+
.ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns success:false with validation details if validation fails. If validation fails, inspect validation errors, rewrite output.jsonl, and call completeDataset again. Do not stop until completeDataset returns success:true.").up()
|
|
181
184
|
.up()
|
|
182
185
|
.up()
|
|
183
186
|
.ele("Rules")
|
|
184
187
|
.ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
|
|
188
|
+
.ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the source language").up()
|
|
189
|
+
.ele("Rule").txt("Original/source language applies to extracted values only, not to JSON object keys").up()
|
|
185
190
|
.ele("Rule").txt("Datasets contain ONLY data records; exclude all header sections and file metadata").up()
|
|
186
191
|
.ele("Rule").txt("JSONL format: each line = separate JSON object representing one data record").up()
|
|
187
192
|
.ele("Rule").txt("FilePreview shows raw file content - use Script to understand data extraction").up()
|
package/dist/file/steps.d.ts
CHANGED
package/dist/file/steps.js
CHANGED
|
@@ -1,7 +1,13 @@
|
|
|
1
|
-
|
|
1
|
+
async function getRuntimeDb(runtime) {
|
|
2
|
+
if (!runtime) {
|
|
3
|
+
throw new Error("Dataset file step requires runtime.");
|
|
4
|
+
}
|
|
5
|
+
const db = runtime.db;
|
|
6
|
+
return typeof db === "function" ? await db.call(runtime) : db;
|
|
7
|
+
}
|
|
2
8
|
export async function readInstantFileStep(params) {
|
|
3
9
|
"use step";
|
|
4
|
-
const db =
|
|
10
|
+
const db = await getRuntimeDb(params.runtime);
|
|
5
11
|
const fileQuery = await db.query({
|
|
6
12
|
$files: { $: { where: { id: params.fileId }, limit: 1 } },
|
|
7
13
|
});
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import type
|
|
2
|
-
export type QueryDomainInput = QueryDomainStepInput
|
|
1
|
+
import { type QueryDomainStepInput, type QueryDomainStepResult } from "./queryDomain.step.js";
|
|
2
|
+
export type QueryDomainInput = Omit<QueryDomainStepInput, "runtime">;
|
|
3
3
|
export type QueryDomainResult = QueryDomainStepResult;
|
|
4
4
|
/**
|
|
5
5
|
* Workflow-compatible domain query.
|
|
6
6
|
* Always returns a dataset + preview rows.
|
|
7
7
|
*/
|
|
8
|
-
export declare function queryDomain(input: QueryDomainInput): Promise<QueryDomainResult>;
|
|
8
|
+
export declare function queryDomain(runtime: any, input: QueryDomainInput): Promise<QueryDomainResult>;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
+
import { queryDomainStep } from "./queryDomain.step.js";
|
|
1
2
|
/**
|
|
2
3
|
* Workflow-compatible domain query.
|
|
3
4
|
* Always returns a dataset + preview rows.
|
|
4
5
|
*/
|
|
5
|
-
export async function queryDomain(input) {
|
|
6
|
+
export async function queryDomain(runtime, input) {
|
|
6
7
|
"use step";
|
|
7
|
-
|
|
8
|
-
return await queryDomainStep(input);
|
|
8
|
+
return await queryDomainStep({ runtime, ...input });
|
|
9
9
|
}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { id as newId } from "@instantdb/admin";
|
|
2
|
-
import { getContextRuntime, getContextEnv } from "@ekairos/events/runtime";
|
|
3
2
|
import { DatasetService } from "../service.js";
|
|
4
3
|
function normalizeRows(result) {
|
|
5
4
|
if (!result || typeof result !== "object")
|
|
@@ -46,11 +45,16 @@ function inferSchema(rows) {
|
|
|
46
45
|
}
|
|
47
46
|
return { schema };
|
|
48
47
|
}
|
|
48
|
+
async function getRuntimeDb(runtime) {
|
|
49
|
+
if (!runtime) {
|
|
50
|
+
throw new Error("Dataset query step requires runtime.");
|
|
51
|
+
}
|
|
52
|
+
const db = runtime.db;
|
|
53
|
+
return typeof db === "function" ? await db.call(runtime) : db;
|
|
54
|
+
}
|
|
49
55
|
export async function queryDomainStep(params) {
|
|
50
56
|
"use step";
|
|
51
|
-
const
|
|
52
|
-
const runtime = await getContextRuntime(env);
|
|
53
|
-
const db = runtime.db;
|
|
57
|
+
const db = await getRuntimeDb(params.runtime);
|
|
54
58
|
const service = new DatasetService(db);
|
|
55
59
|
const datasetId = params.datasetId ?? newId();
|
|
56
60
|
const queryResult = await db.query(params.query);
|
package/dist/sandbox/steps.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export type DatasetSandboxId = string;
|
|
2
2
|
export type CreateDatasetSandboxParams = {
|
|
3
|
-
|
|
3
|
+
sandboxRuntime?: string;
|
|
4
4
|
timeoutMs?: number;
|
|
5
5
|
ports?: number[];
|
|
6
6
|
resources?: {
|
|
@@ -15,18 +15,18 @@ export type DatasetSandboxRunCommandResult = {
|
|
|
15
15
|
stderr: string;
|
|
16
16
|
};
|
|
17
17
|
export declare function createDatasetSandboxStep(params: {
|
|
18
|
-
|
|
18
|
+
runtime: any;
|
|
19
19
|
} & CreateDatasetSandboxParams): Promise<{
|
|
20
20
|
sandboxId: DatasetSandboxId;
|
|
21
21
|
}>;
|
|
22
22
|
export declare function runDatasetSandboxCommandStep(params: {
|
|
23
|
-
|
|
23
|
+
runtime: any;
|
|
24
24
|
sandboxId: DatasetSandboxId;
|
|
25
25
|
cmd: string;
|
|
26
26
|
args?: string[];
|
|
27
27
|
}): Promise<DatasetSandboxRunCommandResult>;
|
|
28
28
|
export declare function writeDatasetSandboxFilesStep(params: {
|
|
29
|
-
|
|
29
|
+
runtime: any;
|
|
30
30
|
sandboxId: DatasetSandboxId;
|
|
31
31
|
files: Array<{
|
|
32
32
|
path: string;
|
|
@@ -34,13 +34,13 @@ export declare function writeDatasetSandboxFilesStep(params: {
|
|
|
34
34
|
}>;
|
|
35
35
|
}): Promise<void>;
|
|
36
36
|
export declare function readDatasetSandboxFileStep(params: {
|
|
37
|
-
|
|
37
|
+
runtime: any;
|
|
38
38
|
sandboxId: DatasetSandboxId;
|
|
39
39
|
path: string;
|
|
40
40
|
}): Promise<{
|
|
41
41
|
contentBase64: string;
|
|
42
42
|
}>;
|
|
43
43
|
export declare function stopDatasetSandboxStep(params: {
|
|
44
|
-
|
|
44
|
+
runtime: any;
|
|
45
45
|
sandboxId: DatasetSandboxId;
|
|
46
46
|
}): Promise<void>;
|
package/dist/sandbox/steps.js
CHANGED
|
@@ -2,12 +2,19 @@ import { execFile } from "node:child_process";
|
|
|
2
2
|
import { promises as fs } from "node:fs";
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import { promisify } from "node:util";
|
|
5
|
-
import {
|
|
5
|
+
import { SandboxService } from "@ekairos/sandbox";
|
|
6
6
|
const execFileAsync = promisify(execFile);
|
|
7
7
|
const localSandboxRoots = new Map();
|
|
8
8
|
function isLocalDatasetSandboxMode() {
|
|
9
9
|
return String(process.env.DATASET_TEST_LOCAL_SANDBOX ?? "").trim() === "1";
|
|
10
10
|
}
|
|
11
|
+
async function getRuntimeDb(runtime) {
|
|
12
|
+
if (!runtime) {
|
|
13
|
+
throw new Error("Dataset sandbox step requires runtime.");
|
|
14
|
+
}
|
|
15
|
+
const db = runtime.db;
|
|
16
|
+
return typeof db === "function" ? await db.call(runtime) : db;
|
|
17
|
+
}
|
|
11
18
|
function getLocalSandboxRoot(sandboxId) {
|
|
12
19
|
return (localSandboxRoots.get(sandboxId) ||
|
|
13
20
|
path.resolve(process.cwd(), "test-results", "dataset-sandboxes", sandboxId));
|
|
@@ -63,10 +70,11 @@ export async function createDatasetSandboxStep(params) {
|
|
|
63
70
|
await ensureLocalSandboxRoot(sandboxId);
|
|
64
71
|
return { sandboxId };
|
|
65
72
|
}
|
|
66
|
-
const db =
|
|
67
|
-
const { SandboxService } = (await import("@ekairos/sandbox"));
|
|
73
|
+
const db = await getRuntimeDb(params.runtime);
|
|
68
74
|
const service = new SandboxService(db);
|
|
69
|
-
const
|
|
75
|
+
const sandboxParams = { ...params, runtime: params.sandboxRuntime };
|
|
76
|
+
delete sandboxParams.sandboxRuntime;
|
|
77
|
+
const created = await service.createSandbox(sandboxParams);
|
|
70
78
|
if (!created.ok)
|
|
71
79
|
throw new Error(created.error);
|
|
72
80
|
return { sandboxId: created.data.sandboxId };
|
|
@@ -80,8 +88,7 @@ export async function runDatasetSandboxCommandStep(params) {
|
|
|
80
88
|
args: params.args,
|
|
81
89
|
});
|
|
82
90
|
}
|
|
83
|
-
const db =
|
|
84
|
-
const { SandboxService } = (await import("@ekairos/sandbox"));
|
|
91
|
+
const db = await getRuntimeDb(params.runtime);
|
|
85
92
|
const service = new SandboxService(db);
|
|
86
93
|
const result = await service.runCommand(params.sandboxId, params.cmd, params.args ?? []);
|
|
87
94
|
if (!result.ok)
|
|
@@ -101,8 +108,7 @@ export async function writeDatasetSandboxFilesStep(params) {
|
|
|
101
108
|
}
|
|
102
109
|
return;
|
|
103
110
|
}
|
|
104
|
-
const db =
|
|
105
|
-
const { SandboxService } = (await import("@ekairos/sandbox"));
|
|
111
|
+
const db = await getRuntimeDb(params.runtime);
|
|
106
112
|
const service = new SandboxService(db);
|
|
107
113
|
const result = await service.writeFiles(params.sandboxId, params.files);
|
|
108
114
|
if (!result.ok)
|
|
@@ -114,8 +120,7 @@ export async function readDatasetSandboxFileStep(params) {
|
|
|
114
120
|
const content = await fs.readFile(params.path);
|
|
115
121
|
return { contentBase64: Buffer.from(content).toString("base64") };
|
|
116
122
|
}
|
|
117
|
-
const db =
|
|
118
|
-
const { SandboxService } = (await import("@ekairos/sandbox"));
|
|
123
|
+
const db = await getRuntimeDb(params.runtime);
|
|
119
124
|
const service = new SandboxService(db);
|
|
120
125
|
const result = await service.readFile(params.sandboxId, params.path);
|
|
121
126
|
if (!result.ok)
|
|
@@ -130,8 +135,7 @@ export async function stopDatasetSandboxStep(params) {
|
|
|
130
135
|
localSandboxRoots.delete(params.sandboxId);
|
|
131
136
|
return;
|
|
132
137
|
}
|
|
133
|
-
const db =
|
|
134
|
-
const { SandboxService } = (await import("@ekairos/sandbox"));
|
|
138
|
+
const db = await getRuntimeDb(params.runtime);
|
|
135
139
|
const service = new SandboxService(db);
|
|
136
140
|
const result = await service.stopSandbox(params.sandboxId);
|
|
137
141
|
if (!result.ok)
|
|
@@ -18,5 +18,5 @@ export type TransformSourcePreviewContext = {
|
|
|
18
18
|
interface PreviewOptions {
|
|
19
19
|
headLines?: number;
|
|
20
20
|
}
|
|
21
|
-
export declare function generateSourcePreview(
|
|
21
|
+
export declare function generateSourcePreview(runtime: any, sandboxId: string, sourcePath: string, datasetId: string, options?: PreviewOptions): Promise<TransformSourcePreviewContext>;
|
|
22
22
|
export {};
|