@ekairos/dataset 1.22.48-beta.development.0 → 1.22.50-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/materialize.d.ts +1 -11
- package/dist/builder/materialize.js +25 -77
- package/dist/builder/materializeQuery.d.ts +11 -0
- package/dist/builder/materializeQuery.js +40 -0
- package/dist/builder/persistence.js +13 -21
- package/dist/builder/types.d.ts +3 -0
- package/dist/clearDataset.tool.d.ts +2 -2
- package/dist/clearDataset.tool.js +3 -3
- package/dist/completeDataset.tool.d.ts +31 -3
- package/dist/completeDataset.tool.js +101 -13
- package/dist/dataset/steps.d.ts +32 -8
- package/dist/dataset/steps.js +69 -13
- package/dist/dataset.js +13 -7
- package/dist/executeCommand.tool.d.ts +2 -2
- package/dist/executeCommand.tool.js +3 -3
- package/dist/file/file-dataset.agent.d.ts +17 -11
- package/dist/file/file-dataset.agent.js +54 -47
- package/dist/file/filepreview.d.ts +2 -2
- package/dist/file/filepreview.js +24 -17
- package/dist/file/generateSchema.tool.d.ts +2 -2
- package/dist/file/generateSchema.tool.js +2 -2
- package/dist/file/prompts.d.ts +2 -2
- package/dist/file/prompts.js +6 -1
- package/dist/file/steps.d.ts +1 -1
- package/dist/file/steps.js +8 -2
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/query/queryDomain.d.ts +3 -3
- package/dist/query/queryDomain.js +3 -3
- package/dist/query/queryDomain.step.d.ts +1 -0
- package/dist/query/queryDomain.step.js +8 -4
- package/dist/sandbox/steps.d.ts +6 -6
- package/dist/sandbox/steps.js +16 -12
- package/dist/transform/filepreview.d.ts +1 -1
- package/dist/transform/filepreview.js +6 -6
- package/dist/transform/index.d.ts +1 -1
- package/dist/transform/index.js +1 -1
- package/dist/transform/prompts.js +4 -1
- package/dist/transform/transform-dataset.agent.d.ts +9 -3
- package/dist/transform/transform-dataset.agent.js +39 -32
- package/dist/transform/transformDataset.d.ts +3 -2
- package/dist/transform/transformDataset.js +10 -9
- package/package.json +19 -5
- package/dist/eventsReactRuntime.d.ts +0 -21
- package/dist/eventsReactRuntime.js +0 -25
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { createContext,
|
|
1
|
+
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL } from "@ekairos/events";
|
|
2
2
|
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
|
|
3
3
|
import { createGenerateSchemaTool } from "./generateSchema.tool.js";
|
|
4
|
-
import { createCompleteDatasetTool } from "../completeDataset.tool.js";
|
|
4
|
+
import { createCompleteDatasetTool, didCompleteDatasetSucceed } from "../completeDataset.tool.js";
|
|
5
5
|
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
6
6
|
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
7
7
|
import { buildFileDatasetPrompt } from "./prompts.js";
|
|
@@ -10,16 +10,24 @@ import { id } from "@instantdb/admin";
|
|
|
10
10
|
import { getDatasetWorkstation } from "../datasetFiles.js";
|
|
11
11
|
import { readInstantFileStep } from "./steps.js";
|
|
12
12
|
import { datasetGetByIdStep } from "../dataset/steps.js";
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
async function awaitContextRun(run) {
|
|
14
|
+
if (!run)
|
|
15
|
+
return;
|
|
16
|
+
if (run.returnValue) {
|
|
17
|
+
await run.returnValue;
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
await run;
|
|
21
|
+
}
|
|
22
|
+
async function initializeSandbox(runtime, sandboxId, datasetId, fileId, state) {
|
|
15
23
|
if (state.initialized) {
|
|
16
24
|
return state.filePath;
|
|
17
25
|
}
|
|
18
|
-
console.log(`[
|
|
19
|
-
await ensurePreviewScriptsAvailable(
|
|
20
|
-
console.log(`[
|
|
26
|
+
console.log(`[FileParseContext ${datasetId}] Initializing sandbox...`);
|
|
27
|
+
await ensurePreviewScriptsAvailable(runtime, sandboxId);
|
|
28
|
+
console.log(`[FileParseContext ${datasetId}] Installing Python dependencies...`);
|
|
21
29
|
const pipInstall = await runDatasetSandboxCommandStep({
|
|
22
|
-
|
|
30
|
+
runtime,
|
|
23
31
|
sandboxId,
|
|
24
32
|
cmd: "python",
|
|
25
33
|
args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
|
|
@@ -28,12 +36,12 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
|
28
36
|
if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
|
|
29
37
|
throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
|
|
30
38
|
}
|
|
31
|
-
console.log(`[
|
|
32
|
-
const file = await readInstantFileStep({
|
|
33
|
-
console.log(`[
|
|
39
|
+
console.log(`[FileParseContext ${datasetId}] Fetching file from InstantDB...`);
|
|
40
|
+
const file = await readInstantFileStep({ runtime, fileId });
|
|
41
|
+
console.log(`[FileParseContext ${datasetId}] Creating dataset workstation...`);
|
|
34
42
|
const workstation = getDatasetWorkstation(datasetId);
|
|
35
43
|
await runDatasetSandboxCommandStep({
|
|
36
|
-
|
|
44
|
+
runtime,
|
|
37
45
|
sandboxId,
|
|
38
46
|
cmd: "mkdir",
|
|
39
47
|
args: ["-p", workstation],
|
|
@@ -42,7 +50,7 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
|
42
50
|
const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
|
|
43
51
|
const sandboxFilePath = `${workstation}/${fileId}${fileExtension}`;
|
|
44
52
|
await writeDatasetSandboxFilesStep({
|
|
45
|
-
|
|
53
|
+
runtime,
|
|
46
54
|
sandboxId,
|
|
47
55
|
files: [
|
|
48
56
|
{
|
|
@@ -51,14 +59,14 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
|
51
59
|
},
|
|
52
60
|
],
|
|
53
61
|
});
|
|
54
|
-
console.log(`[
|
|
55
|
-
console.log(`[
|
|
62
|
+
console.log(`[FileParseContext ${datasetId}] ✅ Workstation created: ${workstation}`);
|
|
63
|
+
console.log(`[FileParseContext ${datasetId}] ✅ File saved: ${sandboxFilePath}`);
|
|
56
64
|
state.filePath = sandboxFilePath;
|
|
57
65
|
state.initialized = true;
|
|
58
66
|
return sandboxFilePath;
|
|
59
67
|
}
|
|
60
68
|
/**
|
|
61
|
-
*
|
|
69
|
+
* FileParseContext
|
|
62
70
|
*
|
|
63
71
|
* Uso:
|
|
64
72
|
* - Crear una instancia con `fileId`, `instructions` y un `sandbox`
|
|
@@ -67,27 +75,27 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
|
67
75
|
*
|
|
68
76
|
* Internamente corre un Context (`createContext("file.parse")`) que itera hasta que se ejecuta el tool `completeDataset`.
|
|
69
77
|
*/
|
|
70
|
-
function
|
|
78
|
+
function createFileParseContextDefinition(params) {
|
|
71
79
|
const datasetId = params.datasetId ?? id();
|
|
72
80
|
const model = params.model ?? "openai/gpt-5";
|
|
73
|
-
let
|
|
74
|
-
.context(async (stored,
|
|
81
|
+
let contextBuilder = createContext("file.parse")
|
|
82
|
+
.context(async (stored, _env, runtime) => {
|
|
75
83
|
const previous = stored?.content ?? {};
|
|
76
84
|
const sandboxState = previous?.sandboxState ?? { initialized: false, filePath: "" };
|
|
77
85
|
const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
|
|
78
86
|
if (!sandboxId) {
|
|
79
87
|
throw new Error("dataset_sandbox_required");
|
|
80
88
|
}
|
|
81
|
-
const sandboxFilePath = await initializeSandbox(
|
|
89
|
+
const sandboxFilePath = await initializeSandbox(runtime, sandboxId, datasetId, params.fileId, sandboxState);
|
|
82
90
|
let filePreview = undefined;
|
|
83
91
|
try {
|
|
84
|
-
filePreview = await generateFilePreview(
|
|
92
|
+
filePreview = await generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId);
|
|
85
93
|
}
|
|
86
94
|
catch {
|
|
87
95
|
// optional
|
|
88
96
|
}
|
|
89
97
|
let schema = null;
|
|
90
|
-
const datasetResult = await datasetGetByIdStep({
|
|
98
|
+
const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
|
|
91
99
|
if (datasetResult.ok && datasetResult.data.schema)
|
|
92
100
|
schema = datasetResult.data.schema;
|
|
93
101
|
const ctx = {
|
|
@@ -128,57 +136,57 @@ function createFileParseStoryDefinition(params) {
|
|
|
128
136
|
base,
|
|
129
137
|
].join("\n");
|
|
130
138
|
})
|
|
131
|
-
.actions(async (_stored,
|
|
139
|
+
.actions(async (_stored, _env, runtime) => {
|
|
132
140
|
const existingSchema = _stored?.content?.ctx?.schema?.schema;
|
|
133
141
|
const actions = {
|
|
134
142
|
executeCommand: createExecuteCommandTool({
|
|
135
143
|
datasetId,
|
|
136
144
|
sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
|
|
137
|
-
|
|
145
|
+
runtime,
|
|
138
146
|
}),
|
|
139
147
|
completeDataset: createCompleteDatasetTool({
|
|
140
148
|
datasetId,
|
|
141
149
|
sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
|
|
142
|
-
|
|
150
|
+
runtime,
|
|
143
151
|
}),
|
|
144
152
|
clearDataset: createClearDatasetTool({
|
|
145
153
|
datasetId,
|
|
146
154
|
sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
|
|
147
|
-
|
|
155
|
+
runtime,
|
|
148
156
|
}),
|
|
149
157
|
};
|
|
150
158
|
if (!existingSchema) {
|
|
151
159
|
actions.generateSchema = createGenerateSchemaTool({
|
|
152
160
|
datasetId,
|
|
153
161
|
fileId: params.fileId,
|
|
154
|
-
|
|
162
|
+
runtime,
|
|
155
163
|
});
|
|
156
164
|
}
|
|
157
165
|
return actions;
|
|
158
166
|
})
|
|
159
167
|
.shouldContinue(({ reactionEvent }) => {
|
|
160
|
-
return !
|
|
168
|
+
return !didCompleteDatasetSucceed(reactionEvent);
|
|
161
169
|
});
|
|
162
170
|
if (params.reactor) {
|
|
163
|
-
|
|
171
|
+
contextBuilder = contextBuilder.reactor(params.reactor);
|
|
164
172
|
}
|
|
165
173
|
else {
|
|
166
|
-
|
|
174
|
+
contextBuilder = contextBuilder.model(model);
|
|
167
175
|
}
|
|
168
|
-
const
|
|
169
|
-
return { datasetId,
|
|
176
|
+
const context = contextBuilder.build();
|
|
177
|
+
return { datasetId, context };
|
|
170
178
|
}
|
|
171
179
|
/**
|
|
172
180
|
* Factory (DX-first):
|
|
173
181
|
*
|
|
174
182
|
* Usage:
|
|
175
|
-
* const { datasetId } = await
|
|
183
|
+
* const { datasetId } = await createFileParseContext(fileId, { instructions }).parse(runtime)
|
|
176
184
|
*
|
|
177
|
-
* -
|
|
178
|
-
* - All I/O happens in `"use step"` functions via Ekairos runtime
|
|
179
|
-
* - `parse()` is the entrypoint; it calls `
|
|
185
|
+
* - Uses the caller runtime; no secondary runtime is created.
|
|
186
|
+
* - All I/O happens in `"use step"` functions via the provided Ekairos runtime.
|
|
187
|
+
* - `parse()` is the entrypoint; it calls `context.react(...)` internally.
|
|
180
188
|
*/
|
|
181
|
-
export function
|
|
189
|
+
export function createFileParseContext(fileId, opts) {
|
|
182
190
|
const params = {
|
|
183
191
|
fileId,
|
|
184
192
|
instructions: opts?.instructions,
|
|
@@ -187,30 +195,29 @@ export function createFileParseStory(fileId, opts) {
|
|
|
187
195
|
model: opts?.model,
|
|
188
196
|
reactor: opts?.reactor,
|
|
189
197
|
};
|
|
190
|
-
const { datasetId,
|
|
198
|
+
const { datasetId, context } = createFileParseContextDefinition(params);
|
|
191
199
|
return {
|
|
192
200
|
datasetId,
|
|
193
|
-
async parse(
|
|
201
|
+
async parse(runtime, options = {}) {
|
|
194
202
|
const triggerEvent = {
|
|
195
203
|
id: id(),
|
|
196
204
|
type: INPUT_TEXT_ITEM_TYPE,
|
|
197
205
|
channel: WEB_CHANNEL,
|
|
198
206
|
createdAt: new Date().toISOString(),
|
|
199
207
|
content: {
|
|
200
|
-
parts: [{ type: "text", text: prompt ?? "generate a dataset for this file" }],
|
|
208
|
+
parts: [{ type: "text", text: options.prompt ?? "generate a dataset for this file" }],
|
|
201
209
|
},
|
|
202
210
|
};
|
|
203
|
-
const
|
|
204
|
-
|
|
205
|
-
runtime,
|
|
211
|
+
const shell = await context.react(triggerEvent, {
|
|
212
|
+
runtime: runtime,
|
|
206
213
|
context: { key: `dataset:${datasetId}` },
|
|
207
|
-
durable: false,
|
|
214
|
+
durable: options.durable ?? false,
|
|
208
215
|
options: { silent: true, preventClose: true, sendFinish: false, maxIterations: 20, maxModelSteps: 5 },
|
|
209
216
|
});
|
|
210
|
-
await shell.run;
|
|
217
|
+
await awaitContextRun(shell.run);
|
|
211
218
|
return { datasetId };
|
|
212
219
|
},
|
|
213
|
-
// Optional: expose the built
|
|
214
|
-
|
|
220
|
+
// Optional: expose the built context for advanced callers (not required for parse DX)
|
|
221
|
+
context,
|
|
215
222
|
};
|
|
216
223
|
}
|
|
@@ -34,6 +34,6 @@ interface PreviewOptions {
|
|
|
34
34
|
tailLines?: number;
|
|
35
35
|
midLines?: number;
|
|
36
36
|
}
|
|
37
|
-
export declare function ensurePreviewScriptsAvailable(
|
|
38
|
-
export declare function generateFilePreview(
|
|
37
|
+
export declare function ensurePreviewScriptsAvailable(runtime: any, sandboxId: string): Promise<void>;
|
|
38
|
+
export declare function generateFilePreview(runtime: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
|
|
39
39
|
export {};
|
package/dist/file/filepreview.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { readFileSync } from "node:fs";
|
|
2
|
+
import { createRequire } from "node:module";
|
|
2
3
|
import { dirname, join } from "node:path";
|
|
3
4
|
import { fileURLToPath } from "node:url";
|
|
4
5
|
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
|
|
@@ -15,10 +16,16 @@ const PYTHON_SCRIPT_FILES = [
|
|
|
15
16
|
"preview_tail_csv.py",
|
|
16
17
|
"preview_tail_excel.py",
|
|
17
18
|
];
|
|
19
|
+
const require = createRequire(import.meta.url);
|
|
18
20
|
function resolveScriptPath(scriptName) {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
21
|
+
try {
|
|
22
|
+
return require.resolve(`@ekairos/dataset/file/scripts/${scriptName}`);
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
// Prefer local scripts in src/ (tests/dev), and after build the scripts are copied to dist/
|
|
26
|
+
// at the same relative path, so this works in both environments.
|
|
27
|
+
return join(dirname(fileURLToPath(import.meta.url)), "scripts", scriptName);
|
|
28
|
+
}
|
|
22
29
|
}
|
|
23
30
|
const preparedSandboxIds = new Set();
|
|
24
31
|
const sandboxSetupPromises = new Map();
|
|
@@ -34,7 +41,7 @@ function validateScriptResult(result, context) {
|
|
|
34
41
|
throw new Error(`${context} failed: ${stderr.substring(0, 500)}`);
|
|
35
42
|
}
|
|
36
43
|
}
|
|
37
|
-
export async function ensurePreviewScriptsAvailable(
|
|
44
|
+
export async function ensurePreviewScriptsAvailable(runtime, sandboxId) {
|
|
38
45
|
if (preparedSandboxIds.has(sandboxId)) {
|
|
39
46
|
return;
|
|
40
47
|
}
|
|
@@ -46,7 +53,7 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
|
46
53
|
const setupPromise = (async () => {
|
|
47
54
|
try {
|
|
48
55
|
await runDatasetSandboxCommandStep({
|
|
49
|
-
|
|
56
|
+
runtime,
|
|
50
57
|
sandboxId,
|
|
51
58
|
cmd: "mkdir",
|
|
52
59
|
args: ["-p", SANDBOX_SCRIPT_DIRECTORY],
|
|
@@ -72,7 +79,7 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
|
72
79
|
}
|
|
73
80
|
if (filesToWrite.length > 0) {
|
|
74
81
|
await writeDatasetSandboxFilesStep({
|
|
75
|
-
|
|
82
|
+
runtime,
|
|
76
83
|
sandboxId,
|
|
77
84
|
files: filesToWrite,
|
|
78
85
|
});
|
|
@@ -88,13 +95,13 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
|
88
95
|
throw error;
|
|
89
96
|
}
|
|
90
97
|
}
|
|
91
|
-
export async function generateFilePreview(
|
|
98
|
+
export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId, options = {}) {
|
|
92
99
|
const context = {
|
|
93
100
|
totalRows: 0,
|
|
94
101
|
};
|
|
95
102
|
try {
|
|
96
|
-
await ensurePreviewScriptsAvailable(
|
|
97
|
-
const metadataResult = await runScript(
|
|
103
|
+
await ensurePreviewScriptsAvailable(runtime, sandboxId);
|
|
104
|
+
const metadataResult = await runScript(runtime, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
|
|
98
105
|
context.metadata = metadataResult;
|
|
99
106
|
let isExcel = false;
|
|
100
107
|
if (metadataResult.stdout) {
|
|
@@ -120,23 +127,23 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
|
|
|
120
127
|
const midScript = isExcel ? "preview_mid_excel.py" : "preview_mid_csv.py";
|
|
121
128
|
if (totalRows <= headLines) {
|
|
122
129
|
console.log(`[Dataset ${datasetId}] File has ${totalRows} rows, reading all with head only`);
|
|
123
|
-
const headResult = await runScript(
|
|
130
|
+
const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
|
|
124
131
|
validateScriptResult(headResult, `preview_head for ${datasetId}`);
|
|
125
132
|
context.head = headResult;
|
|
126
133
|
return context;
|
|
127
134
|
}
|
|
128
135
|
if (headLines + tailLines >= totalRows) {
|
|
129
136
|
console.log(`[Dataset ${datasetId}] Head + tail would cover entire file (${totalRows} rows), reading all with head only`);
|
|
130
|
-
const headResult = await runScript(
|
|
137
|
+
const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
|
|
131
138
|
validateScriptResult(headResult, `preview_head for ${datasetId}`);
|
|
132
139
|
context.head = headResult;
|
|
133
140
|
return context;
|
|
134
141
|
}
|
|
135
142
|
console.log(`[Dataset ${datasetId}] Reading head (${headLines} rows) and tail (${tailLines} rows) from ${totalRows} total rows`);
|
|
136
|
-
const headResult = await runScript(
|
|
143
|
+
const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(headLines)], `Reads the first ${headLines} rows of the file`);
|
|
137
144
|
validateScriptResult(headResult, `preview_head for ${datasetId}`);
|
|
138
145
|
context.head = headResult;
|
|
139
|
-
const tailResult = await runScript(
|
|
146
|
+
const tailResult = await runScript(runtime, sandboxId, tailScript, [sandboxFilePath, String(tailLines)], `Reads the last ${tailLines} rows of the file`);
|
|
140
147
|
validateScriptResult(tailResult, `preview_tail for ${datasetId}`);
|
|
141
148
|
context.tail = tailResult;
|
|
142
149
|
const midLines = options.midLines || DEFAULT_MID_LINES;
|
|
@@ -145,7 +152,7 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
|
|
|
145
152
|
const midStart = headLines;
|
|
146
153
|
const midEnd = totalRows - tailLines;
|
|
147
154
|
console.log(`[Dataset ${datasetId}] Large gap (${gapSize} rows), adding mid sample (${midLines} rows)`);
|
|
148
|
-
const midResult = await runScript(
|
|
155
|
+
const midResult = await runScript(runtime, sandboxId, midScript, [sandboxFilePath, String(midStart), String(midEnd), String(midLines)], `Samples ${midLines} rows from the middle section (rows ${midStart + 1} to ${midEnd})`);
|
|
149
156
|
validateScriptResult(midResult, `preview_mid for ${datasetId}`);
|
|
150
157
|
context.mid = midResult;
|
|
151
158
|
}
|
|
@@ -155,8 +162,8 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
|
|
|
155
162
|
}
|
|
156
163
|
return context;
|
|
157
164
|
}
|
|
158
|
-
async function runScript(
|
|
159
|
-
const scriptPath =
|
|
165
|
+
async function runScript(runtime, sandboxId, scriptName, args, description) {
|
|
166
|
+
const scriptPath = `${SANDBOX_SCRIPT_DIRECTORY}/${scriptName}`;
|
|
160
167
|
const command = `python ${scriptPath} ${args.join(" ")}`;
|
|
161
168
|
let scriptContent = "";
|
|
162
169
|
try {
|
|
@@ -168,7 +175,7 @@ async function runScript(env, sandboxId, scriptName, args, description) {
|
|
|
168
175
|
}
|
|
169
176
|
try {
|
|
170
177
|
const result = await runDatasetSandboxCommandStep({
|
|
171
|
-
|
|
178
|
+
runtime,
|
|
172
179
|
sandboxId,
|
|
173
180
|
cmd: "python",
|
|
174
181
|
args: [scriptPath, ...args],
|
|
@@ -2,9 +2,9 @@ interface GenerateSchemaToolParams {
|
|
|
2
2
|
datasetId: string;
|
|
3
3
|
isNested?: boolean;
|
|
4
4
|
fileId?: string;
|
|
5
|
-
|
|
5
|
+
runtime: any;
|
|
6
6
|
}
|
|
7
|
-
export declare function createGenerateSchemaTool({ datasetId, isNested, fileId,
|
|
7
|
+
export declare function createGenerateSchemaTool({ datasetId, isNested, fileId, runtime }: GenerateSchemaToolParams): import("ai").Tool<{
|
|
8
8
|
schemaTitle: string;
|
|
9
9
|
schemaDescription: string;
|
|
10
10
|
schemaJson: string;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { tool } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { datasetUpdateSchemaStep } from "../dataset/steps.js";
|
|
4
|
-
export function createGenerateSchemaTool({ datasetId, isNested, fileId,
|
|
4
|
+
export function createGenerateSchemaTool({ datasetId, isNested, fileId, runtime }) {
|
|
5
5
|
return tool({
|
|
6
6
|
description: `Generate a formal JSON schema for a SINGLE RECORD (row) from the file. This schema describes the structure of ONE record, not the entire dataset or array of records. Requirements:
|
|
7
7
|
1. Schema describes ONE RECORD structure only (no array wrappers)
|
|
@@ -72,7 +72,7 @@ export function createGenerateSchemaTool({ datasetId, isNested, fileId, env }) {
|
|
|
72
72
|
console.log(`[Dataset ${datasetId}] Schema JSON:`);
|
|
73
73
|
console.log(JSON.stringify(parsedSchema, null, 2));
|
|
74
74
|
const updateResult = await datasetUpdateSchemaStep({
|
|
75
|
-
|
|
75
|
+
runtime,
|
|
76
76
|
datasetId,
|
|
77
77
|
schema: schemaData,
|
|
78
78
|
status: "schema_complete",
|
package/dist/file/prompts.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import {
|
|
2
|
-
export declare function buildFileDatasetPrompt(context:
|
|
1
|
+
import { FileParseContext } from "./file-dataset.agent.js";
|
|
2
|
+
export declare function buildFileDatasetPrompt(context: FileParseContext): string;
|
package/dist/file/prompts.js
CHANGED
|
@@ -147,6 +147,8 @@ function buildInstructions(context) {
|
|
|
147
147
|
.ele("Action").txt("Use the provided schema as the output contract for every row in output.jsonl").up()
|
|
148
148
|
.ele("Requirements")
|
|
149
149
|
.ele("Requirement").txt("Every output row must conform exactly to the provided schema").up()
|
|
150
|
+
.ele("Requirement").txt("Every data object MUST use the exact property names from the provided JSON Schema required/properties keys").up()
|
|
151
|
+
.ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the source language").up()
|
|
150
152
|
.ele("Requirement").txt("Do not call generateSchema when a schema is already provided").up()
|
|
151
153
|
.up()
|
|
152
154
|
.up();
|
|
@@ -170,6 +172,7 @@ function buildInstructions(context) {
|
|
|
170
172
|
.ele("Requirements")
|
|
171
173
|
.ele("Requirement").txt("Parse ALL data rows/records from the file (exclude header sections and metadata)").up()
|
|
172
174
|
.ele("Requirement").txt("Output JSONL format: each line is {\"type\": \"row\", \"data\": {...record...}}").up()
|
|
175
|
+
.ele("Requirement").txt("When a schema is provided, each data object must contain the exact required schema keys and must not use translated or synonymous keys").up()
|
|
173
176
|
.ele("Requirement").txt("Extract ONLY data records; skip any header lines, summary sections, or file metadata").up()
|
|
174
177
|
.ele("Requirement").txt(`Save output to: ${outputPath}`).up()
|
|
175
178
|
.ele("Requirement").txt("Use descriptive scriptName in snake_case (e.g., 'parse_csv_to_jsonl')").up()
|
|
@@ -177,11 +180,13 @@ function buildInstructions(context) {
|
|
|
177
180
|
.up()
|
|
178
181
|
.ele("Step", { number: "4", name: "Complete and Validate" })
|
|
179
182
|
.ele("Action").txt("Call completeDataset to validate the dataset").up()
|
|
180
|
-
.ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns
|
|
183
|
+
.ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns success:false with validation details if validation fails. If validation fails, inspect validation errors, rewrite output.jsonl, and call completeDataset again. Do not stop until completeDataset returns success:true.").up()
|
|
181
184
|
.up()
|
|
182
185
|
.up()
|
|
183
186
|
.ele("Rules")
|
|
184
187
|
.ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
|
|
188
|
+
.ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the source language").up()
|
|
189
|
+
.ele("Rule").txt("Original/source language applies to extracted values only, not to JSON object keys").up()
|
|
185
190
|
.ele("Rule").txt("Datasets contain ONLY data records; exclude all header sections and file metadata").up()
|
|
186
191
|
.ele("Rule").txt("JSONL format: each line = separate JSON object representing one data record").up()
|
|
187
192
|
.ele("Rule").txt("FilePreview shows raw file content - use Script to understand data extraction").up()
|
package/dist/file/steps.d.ts
CHANGED
package/dist/file/steps.js
CHANGED
|
@@ -1,7 +1,13 @@
|
|
|
1
|
-
|
|
1
|
+
async function getRuntimeDb(runtime) {
|
|
2
|
+
if (!runtime) {
|
|
3
|
+
throw new Error("Dataset file step requires runtime.");
|
|
4
|
+
}
|
|
5
|
+
const db = runtime.db;
|
|
6
|
+
return typeof db === "function" ? await db.call(runtime) : db;
|
|
7
|
+
}
|
|
2
8
|
export async function readInstantFileStep(params) {
|
|
3
9
|
"use step";
|
|
4
|
-
const db =
|
|
10
|
+
const db = await getRuntimeDb(params.runtime);
|
|
5
11
|
const fileQuery = await db.query({
|
|
6
12
|
$files: { $: { where: { id: params.fileId }, limit: 1 } },
|
|
7
13
|
});
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import type
|
|
2
|
-
export type QueryDomainInput = QueryDomainStepInput
|
|
1
|
+
import { type QueryDomainStepInput, type QueryDomainStepResult } from "./queryDomain.step.js";
|
|
2
|
+
export type QueryDomainInput = Omit<QueryDomainStepInput, "runtime">;
|
|
3
3
|
export type QueryDomainResult = QueryDomainStepResult;
|
|
4
4
|
/**
|
|
5
5
|
* Workflow-compatible domain query.
|
|
6
6
|
* Always returns a dataset + preview rows.
|
|
7
7
|
*/
|
|
8
|
-
export declare function queryDomain(input: QueryDomainInput): Promise<QueryDomainResult>;
|
|
8
|
+
export declare function queryDomain(runtime: any, input: QueryDomainInput): Promise<QueryDomainResult>;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
+
import { queryDomainStep } from "./queryDomain.step.js";
|
|
1
2
|
/**
|
|
2
3
|
* Workflow-compatible domain query.
|
|
3
4
|
* Always returns a dataset + preview rows.
|
|
4
5
|
*/
|
|
5
|
-
export async function queryDomain(input) {
|
|
6
|
+
export async function queryDomain(runtime, input) {
|
|
6
7
|
"use step";
|
|
7
|
-
|
|
8
|
-
return await queryDomainStep(input);
|
|
8
|
+
return await queryDomainStep({ runtime, ...input });
|
|
9
9
|
}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { id as newId } from "@instantdb/admin";
|
|
2
|
-
import { getContextRuntime, getContextEnv } from "@ekairos/events/runtime";
|
|
3
2
|
import { DatasetService } from "../service.js";
|
|
4
3
|
function normalizeRows(result) {
|
|
5
4
|
if (!result || typeof result !== "object")
|
|
@@ -46,11 +45,16 @@ function inferSchema(rows) {
|
|
|
46
45
|
}
|
|
47
46
|
return { schema };
|
|
48
47
|
}
|
|
48
|
+
async function getRuntimeDb(runtime) {
|
|
49
|
+
if (!runtime) {
|
|
50
|
+
throw new Error("Dataset query step requires runtime.");
|
|
51
|
+
}
|
|
52
|
+
const db = runtime.db;
|
|
53
|
+
return typeof db === "function" ? await db.call(runtime) : db;
|
|
54
|
+
}
|
|
49
55
|
export async function queryDomainStep(params) {
|
|
50
56
|
"use step";
|
|
51
|
-
const
|
|
52
|
-
const runtime = await getContextRuntime(env);
|
|
53
|
-
const db = runtime.db;
|
|
57
|
+
const db = await getRuntimeDb(params.runtime);
|
|
54
58
|
const service = new DatasetService(db);
|
|
55
59
|
const datasetId = params.datasetId ?? newId();
|
|
56
60
|
const queryResult = await db.query(params.query);
|
package/dist/sandbox/steps.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export type DatasetSandboxId = string;
|
|
2
2
|
export type CreateDatasetSandboxParams = {
|
|
3
|
-
|
|
3
|
+
sandboxRuntime?: string;
|
|
4
4
|
timeoutMs?: number;
|
|
5
5
|
ports?: number[];
|
|
6
6
|
resources?: {
|
|
@@ -15,18 +15,18 @@ export type DatasetSandboxRunCommandResult = {
|
|
|
15
15
|
stderr: string;
|
|
16
16
|
};
|
|
17
17
|
export declare function createDatasetSandboxStep(params: {
|
|
18
|
-
|
|
18
|
+
runtime: any;
|
|
19
19
|
} & CreateDatasetSandboxParams): Promise<{
|
|
20
20
|
sandboxId: DatasetSandboxId;
|
|
21
21
|
}>;
|
|
22
22
|
export declare function runDatasetSandboxCommandStep(params: {
|
|
23
|
-
|
|
23
|
+
runtime: any;
|
|
24
24
|
sandboxId: DatasetSandboxId;
|
|
25
25
|
cmd: string;
|
|
26
26
|
args?: string[];
|
|
27
27
|
}): Promise<DatasetSandboxRunCommandResult>;
|
|
28
28
|
export declare function writeDatasetSandboxFilesStep(params: {
|
|
29
|
-
|
|
29
|
+
runtime: any;
|
|
30
30
|
sandboxId: DatasetSandboxId;
|
|
31
31
|
files: Array<{
|
|
32
32
|
path: string;
|
|
@@ -34,13 +34,13 @@ export declare function writeDatasetSandboxFilesStep(params: {
|
|
|
34
34
|
}>;
|
|
35
35
|
}): Promise<void>;
|
|
36
36
|
export declare function readDatasetSandboxFileStep(params: {
|
|
37
|
-
|
|
37
|
+
runtime: any;
|
|
38
38
|
sandboxId: DatasetSandboxId;
|
|
39
39
|
path: string;
|
|
40
40
|
}): Promise<{
|
|
41
41
|
contentBase64: string;
|
|
42
42
|
}>;
|
|
43
43
|
export declare function stopDatasetSandboxStep(params: {
|
|
44
|
-
|
|
44
|
+
runtime: any;
|
|
45
45
|
sandboxId: DatasetSandboxId;
|
|
46
46
|
}): Promise<void>;
|