@ekairos/dataset 1.22.48-beta.development.0 → 1.22.50-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/agents.d.ts +8 -0
  2. package/dist/agents.js +8 -0
  3. package/dist/builder/agentMaterializers.d.ts +9 -0
  4. package/dist/builder/agentMaterializers.js +10 -0
  5. package/dist/builder/materialize.d.ts +1 -11
  6. package/dist/builder/materialize.js +25 -77
  7. package/dist/builder/materializeQuery.d.ts +11 -0
  8. package/dist/builder/materializeQuery.js +40 -0
  9. package/dist/builder/persistence.js +13 -21
  10. package/dist/builder/types.d.ts +3 -0
  11. package/dist/clearDataset.tool.d.ts +2 -2
  12. package/dist/clearDataset.tool.js +3 -3
  13. package/dist/completeDataset.tool.d.ts +31 -3
  14. package/dist/completeDataset.tool.js +101 -13
  15. package/dist/dataset/steps.d.ts +32 -8
  16. package/dist/dataset/steps.js +69 -13
  17. package/dist/dataset.js +13 -7
  18. package/dist/executeCommand.tool.d.ts +2 -2
  19. package/dist/executeCommand.tool.js +3 -3
  20. package/dist/file/file-dataset.agent.d.ts +17 -11
  21. package/dist/file/file-dataset.agent.js +54 -47
  22. package/dist/file/filepreview.d.ts +2 -2
  23. package/dist/file/filepreview.js +24 -17
  24. package/dist/file/generateSchema.tool.d.ts +2 -2
  25. package/dist/file/generateSchema.tool.js +2 -2
  26. package/dist/file/prompts.d.ts +2 -2
  27. package/dist/file/prompts.js +6 -1
  28. package/dist/file/steps.d.ts +1 -1
  29. package/dist/file/steps.js +8 -2
  30. package/dist/index.d.ts +0 -1
  31. package/dist/index.js +0 -1
  32. package/dist/query/queryDomain.d.ts +3 -3
  33. package/dist/query/queryDomain.js +3 -3
  34. package/dist/query/queryDomain.step.d.ts +1 -0
  35. package/dist/query/queryDomain.step.js +8 -4
  36. package/dist/sandbox/steps.d.ts +6 -6
  37. package/dist/sandbox/steps.js +16 -12
  38. package/dist/transform/filepreview.d.ts +1 -1
  39. package/dist/transform/filepreview.js +6 -6
  40. package/dist/transform/index.d.ts +1 -1
  41. package/dist/transform/index.js +1 -1
  42. package/dist/transform/prompts.js +4 -1
  43. package/dist/transform/transform-dataset.agent.d.ts +9 -3
  44. package/dist/transform/transform-dataset.agent.js +39 -32
  45. package/dist/transform/transformDataset.d.ts +3 -2
  46. package/dist/transform/transformDataset.js +10 -9
  47. package/package.json +19 -5
  48. package/dist/eventsReactRuntime.d.ts +0 -21
  49. package/dist/eventsReactRuntime.js +0 -25
@@ -1,7 +1,7 @@
1
- import { createContext, didToolExecute, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL } from "@ekairos/events";
1
+ import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL } from "@ekairos/events";
2
2
  import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
3
3
  import { createGenerateSchemaTool } from "./generateSchema.tool.js";
4
- import { createCompleteDatasetTool } from "../completeDataset.tool.js";
4
+ import { createCompleteDatasetTool, didCompleteDatasetSucceed } from "../completeDataset.tool.js";
5
5
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
6
6
  import { createClearDatasetTool } from "../clearDataset.tool.js";
7
7
  import { buildFileDatasetPrompt } from "./prompts.js";
@@ -10,16 +10,24 @@ import { id } from "@instantdb/admin";
10
10
  import { getDatasetWorkstation } from "../datasetFiles.js";
11
11
  import { readInstantFileStep } from "./steps.js";
12
12
  import { datasetGetByIdStep } from "../dataset/steps.js";
13
- import { createEventsReactRuntime } from "../eventsReactRuntime.js";
14
- async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
13
+ async function awaitContextRun(run) {
14
+ if (!run)
15
+ return;
16
+ if (run.returnValue) {
17
+ await run.returnValue;
18
+ return;
19
+ }
20
+ await run;
21
+ }
22
+ async function initializeSandbox(runtime, sandboxId, datasetId, fileId, state) {
15
23
  if (state.initialized) {
16
24
  return state.filePath;
17
25
  }
18
- console.log(`[FileParseStory ${datasetId}] Initializing sandbox...`);
19
- await ensurePreviewScriptsAvailable(env, sandboxId);
20
- console.log(`[FileParseStory ${datasetId}] Installing Python dependencies...`);
26
+ console.log(`[FileParseContext ${datasetId}] Initializing sandbox...`);
27
+ await ensurePreviewScriptsAvailable(runtime, sandboxId);
28
+ console.log(`[FileParseContext ${datasetId}] Installing Python dependencies...`);
21
29
  const pipInstall = await runDatasetSandboxCommandStep({
22
- env,
30
+ runtime,
23
31
  sandboxId,
24
32
  cmd: "python",
25
33
  args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
@@ -28,12 +36,12 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
28
36
  if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
29
37
  throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
30
38
  }
31
- console.log(`[FileParseStory ${datasetId}] Fetching file from InstantDB...`);
32
- const file = await readInstantFileStep({ env, fileId });
33
- console.log(`[FileParseStory ${datasetId}] Creating dataset workstation...`);
39
+ console.log(`[FileParseContext ${datasetId}] Fetching file from InstantDB...`);
40
+ const file = await readInstantFileStep({ runtime, fileId });
41
+ console.log(`[FileParseContext ${datasetId}] Creating dataset workstation...`);
34
42
  const workstation = getDatasetWorkstation(datasetId);
35
43
  await runDatasetSandboxCommandStep({
36
- env,
44
+ runtime,
37
45
  sandboxId,
38
46
  cmd: "mkdir",
39
47
  args: ["-p", workstation],
@@ -42,7 +50,7 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
42
50
  const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
43
51
  const sandboxFilePath = `${workstation}/${fileId}${fileExtension}`;
44
52
  await writeDatasetSandboxFilesStep({
45
- env,
53
+ runtime,
46
54
  sandboxId,
47
55
  files: [
48
56
  {
@@ -51,14 +59,14 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
51
59
  },
52
60
  ],
53
61
  });
54
- console.log(`[FileParseStory ${datasetId}] ✅ Workstation created: ${workstation}`);
55
- console.log(`[FileParseStory ${datasetId}] ✅ File saved: ${sandboxFilePath}`);
62
+ console.log(`[FileParseContext ${datasetId}] ✅ Workstation created: ${workstation}`);
63
+ console.log(`[FileParseContext ${datasetId}] ✅ File saved: ${sandboxFilePath}`);
56
64
  state.filePath = sandboxFilePath;
57
65
  state.initialized = true;
58
66
  return sandboxFilePath;
59
67
  }
60
68
  /**
61
- * FileParseStory
69
+ * FileParseContext
62
70
  *
63
71
  * Uso:
64
72
  * - Crear una instancia con `fileId`, `instructions` y un `sandbox`
@@ -67,27 +75,27 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
67
75
  *
68
76
  * Internamente corre un Context (`createContext("file.parse")`) que itera hasta que se ejecuta el tool `completeDataset`.
69
77
  */
70
- function createFileParseStoryDefinition(params) {
78
+ function createFileParseContextDefinition(params) {
71
79
  const datasetId = params.datasetId ?? id();
72
80
  const model = params.model ?? "openai/gpt-5";
73
- let storyBuilder = createContext("file.parse")
74
- .context(async (stored, env) => {
81
+ let contextBuilder = createContext("file.parse")
82
+ .context(async (stored, _env, runtime) => {
75
83
  const previous = stored?.content ?? {};
76
84
  const sandboxState = previous?.sandboxState ?? { initialized: false, filePath: "" };
77
85
  const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
78
86
  if (!sandboxId) {
79
87
  throw new Error("dataset_sandbox_required");
80
88
  }
81
- const sandboxFilePath = await initializeSandbox(env, sandboxId, datasetId, params.fileId, sandboxState);
89
+ const sandboxFilePath = await initializeSandbox(runtime, sandboxId, datasetId, params.fileId, sandboxState);
82
90
  let filePreview = undefined;
83
91
  try {
84
- filePreview = await generateFilePreview(env, sandboxId, sandboxFilePath, datasetId);
92
+ filePreview = await generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId);
85
93
  }
86
94
  catch {
87
95
  // optional
88
96
  }
89
97
  let schema = null;
90
- const datasetResult = await datasetGetByIdStep({ env, datasetId });
98
+ const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
91
99
  if (datasetResult.ok && datasetResult.data.schema)
92
100
  schema = datasetResult.data.schema;
93
101
  const ctx = {
@@ -128,57 +136,57 @@ function createFileParseStoryDefinition(params) {
128
136
  base,
129
137
  ].join("\n");
130
138
  })
131
- .actions(async (_stored, env) => {
139
+ .actions(async (_stored, _env, runtime) => {
132
140
  const existingSchema = _stored?.content?.ctx?.schema?.schema;
133
141
  const actions = {
134
142
  executeCommand: createExecuteCommandTool({
135
143
  datasetId,
136
144
  sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
137
- env,
145
+ runtime,
138
146
  }),
139
147
  completeDataset: createCompleteDatasetTool({
140
148
  datasetId,
141
149
  sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
142
- env,
150
+ runtime,
143
151
  }),
144
152
  clearDataset: createClearDatasetTool({
145
153
  datasetId,
146
154
  sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
147
- env,
155
+ runtime,
148
156
  }),
149
157
  };
150
158
  if (!existingSchema) {
151
159
  actions.generateSchema = createGenerateSchemaTool({
152
160
  datasetId,
153
161
  fileId: params.fileId,
154
- env,
162
+ runtime,
155
163
  });
156
164
  }
157
165
  return actions;
158
166
  })
159
167
  .shouldContinue(({ reactionEvent }) => {
160
- return !didToolExecute(reactionEvent, "completeDataset");
168
+ return !didCompleteDatasetSucceed(reactionEvent);
161
169
  });
162
170
  if (params.reactor) {
163
- storyBuilder = storyBuilder.reactor(params.reactor);
171
+ contextBuilder = contextBuilder.reactor(params.reactor);
164
172
  }
165
173
  else {
166
- storyBuilder = storyBuilder.model(model);
174
+ contextBuilder = contextBuilder.model(model);
167
175
  }
168
- const story = storyBuilder.build();
169
- return { datasetId, story };
176
+ const context = contextBuilder.build();
177
+ return { datasetId, context };
170
178
  }
171
179
  /**
172
180
  * Factory (DX-first):
173
181
  *
174
182
  * Usage:
175
- * const { datasetId } = await createFileParseStory(fileId, { instructions }).parse(env)
183
+ * const { datasetId } = await createFileParseContext(fileId, { instructions }).parse(runtime)
176
184
  *
177
- * - No `db` is accepted/stored (workflow-safe).
178
- * - All I/O happens in `"use step"` functions via Ekairos runtime (`getContextRuntime(env).db`).
179
- * - `parse()` is the entrypoint; it calls `story.react(...)` internally.
185
+ * - Uses the caller runtime; no secondary runtime is created.
186
+ * - All I/O happens in `"use step"` functions via the provided Ekairos runtime.
187
+ * - `parse()` is the entrypoint; it calls `context.react(...)` internally.
180
188
  */
181
- export function createFileParseStory(fileId, opts) {
189
+ export function createFileParseContext(fileId, opts) {
182
190
  const params = {
183
191
  fileId,
184
192
  instructions: opts?.instructions,
@@ -187,30 +195,29 @@ export function createFileParseStory(fileId, opts) {
187
195
  model: opts?.model,
188
196
  reactor: opts?.reactor,
189
197
  };
190
- const { datasetId, story } = createFileParseStoryDefinition(params);
198
+ const { datasetId, context } = createFileParseContextDefinition(params);
191
199
  return {
192
200
  datasetId,
193
- async parse(env, prompt) {
201
+ async parse(runtime, options = {}) {
194
202
  const triggerEvent = {
195
203
  id: id(),
196
204
  type: INPUT_TEXT_ITEM_TYPE,
197
205
  channel: WEB_CHANNEL,
198
206
  createdAt: new Date().toISOString(),
199
207
  content: {
200
- parts: [{ type: "text", text: prompt ?? "generate a dataset for this file" }],
208
+ parts: [{ type: "text", text: options.prompt ?? "generate a dataset for this file" }],
201
209
  },
202
210
  };
203
- const runtime = createEventsReactRuntime((env ?? {}));
204
- const shell = await story.react(triggerEvent, {
205
- runtime,
211
+ const shell = await context.react(triggerEvent, {
212
+ runtime: runtime,
206
213
  context: { key: `dataset:${datasetId}` },
207
- durable: false,
214
+ durable: options.durable ?? false,
208
215
  options: { silent: true, preventClose: true, sendFinish: false, maxIterations: 20, maxModelSteps: 5 },
209
216
  });
210
- await shell.run;
217
+ await awaitContextRun(shell.run);
211
218
  return { datasetId };
212
219
  },
213
- // Optional: expose the built story for advanced callers (not required for parse DX)
214
- story,
220
+ // Optional: expose the built context for advanced callers (not required for parse DX)
221
+ context,
215
222
  };
216
223
  }
@@ -34,6 +34,6 @@ interface PreviewOptions {
34
34
  tailLines?: number;
35
35
  midLines?: number;
36
36
  }
37
- export declare function ensurePreviewScriptsAvailable(env: any, sandboxId: string): Promise<void>;
38
- export declare function generateFilePreview(env: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
37
+ export declare function ensurePreviewScriptsAvailable(runtime: any, sandboxId: string): Promise<void>;
38
+ export declare function generateFilePreview(runtime: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
39
39
  export {};
@@ -1,4 +1,5 @@
1
1
  import { readFileSync } from "node:fs";
2
+ import { createRequire } from "node:module";
2
3
  import { dirname, join } from "node:path";
3
4
  import { fileURLToPath } from "node:url";
4
5
  import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
@@ -15,10 +16,16 @@ const PYTHON_SCRIPT_FILES = [
15
16
  "preview_tail_csv.py",
16
17
  "preview_tail_excel.py",
17
18
  ];
19
+ const require = createRequire(import.meta.url);
18
20
  function resolveScriptPath(scriptName) {
19
- // Prefer local scripts in src/ (tests/dev), and after build the scripts are copied to dist/
20
- // at the same relative path, so this works in both environments.
21
- return join(dirname(fileURLToPath(import.meta.url)), "scripts", scriptName);
21
+ try {
22
+ return require.resolve(`@ekairos/dataset/file/scripts/${scriptName}`);
23
+ }
24
+ catch {
25
+ // Prefer local scripts in src/ (tests/dev), and after build the scripts are copied to dist/
26
+ // at the same relative path, so this works in both environments.
27
+ return join(dirname(fileURLToPath(import.meta.url)), "scripts", scriptName);
28
+ }
22
29
  }
23
30
  const preparedSandboxIds = new Set();
24
31
  const sandboxSetupPromises = new Map();
@@ -34,7 +41,7 @@ function validateScriptResult(result, context) {
34
41
  throw new Error(`${context} failed: ${stderr.substring(0, 500)}`);
35
42
  }
36
43
  }
37
- export async function ensurePreviewScriptsAvailable(env, sandboxId) {
44
+ export async function ensurePreviewScriptsAvailable(runtime, sandboxId) {
38
45
  if (preparedSandboxIds.has(sandboxId)) {
39
46
  return;
40
47
  }
@@ -46,7 +53,7 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
46
53
  const setupPromise = (async () => {
47
54
  try {
48
55
  await runDatasetSandboxCommandStep({
49
- env,
56
+ runtime,
50
57
  sandboxId,
51
58
  cmd: "mkdir",
52
59
  args: ["-p", SANDBOX_SCRIPT_DIRECTORY],
@@ -72,7 +79,7 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
72
79
  }
73
80
  if (filesToWrite.length > 0) {
74
81
  await writeDatasetSandboxFilesStep({
75
- env,
82
+ runtime,
76
83
  sandboxId,
77
84
  files: filesToWrite,
78
85
  });
@@ -88,13 +95,13 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
88
95
  throw error;
89
96
  }
90
97
  }
91
- export async function generateFilePreview(env, sandboxId, sandboxFilePath, datasetId, options = {}) {
98
+ export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId, options = {}) {
92
99
  const context = {
93
100
  totalRows: 0,
94
101
  };
95
102
  try {
96
- await ensurePreviewScriptsAvailable(env, sandboxId);
97
- const metadataResult = await runScript(env, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
103
+ await ensurePreviewScriptsAvailable(runtime, sandboxId);
104
+ const metadataResult = await runScript(runtime, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
98
105
  context.metadata = metadataResult;
99
106
  let isExcel = false;
100
107
  if (metadataResult.stdout) {
@@ -120,23 +127,23 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
120
127
  const midScript = isExcel ? "preview_mid_excel.py" : "preview_mid_csv.py";
121
128
  if (totalRows <= headLines) {
122
129
  console.log(`[Dataset ${datasetId}] File has ${totalRows} rows, reading all with head only`);
123
- const headResult = await runScript(env, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
130
+ const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
124
131
  validateScriptResult(headResult, `preview_head for ${datasetId}`);
125
132
  context.head = headResult;
126
133
  return context;
127
134
  }
128
135
  if (headLines + tailLines >= totalRows) {
129
136
  console.log(`[Dataset ${datasetId}] Head + tail would cover entire file (${totalRows} rows), reading all with head only`);
130
- const headResult = await runScript(env, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
137
+ const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
131
138
  validateScriptResult(headResult, `preview_head for ${datasetId}`);
132
139
  context.head = headResult;
133
140
  return context;
134
141
  }
135
142
  console.log(`[Dataset ${datasetId}] Reading head (${headLines} rows) and tail (${tailLines} rows) from ${totalRows} total rows`);
136
- const headResult = await runScript(env, sandboxId, headScript, [sandboxFilePath, String(headLines)], `Reads the first ${headLines} rows of the file`);
143
+ const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(headLines)], `Reads the first ${headLines} rows of the file`);
137
144
  validateScriptResult(headResult, `preview_head for ${datasetId}`);
138
145
  context.head = headResult;
139
- const tailResult = await runScript(env, sandboxId, tailScript, [sandboxFilePath, String(tailLines)], `Reads the last ${tailLines} rows of the file`);
146
+ const tailResult = await runScript(runtime, sandboxId, tailScript, [sandboxFilePath, String(tailLines)], `Reads the last ${tailLines} rows of the file`);
140
147
  validateScriptResult(tailResult, `preview_tail for ${datasetId}`);
141
148
  context.tail = tailResult;
142
149
  const midLines = options.midLines || DEFAULT_MID_LINES;
@@ -145,7 +152,7 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
145
152
  const midStart = headLines;
146
153
  const midEnd = totalRows - tailLines;
147
154
  console.log(`[Dataset ${datasetId}] Large gap (${gapSize} rows), adding mid sample (${midLines} rows)`);
148
- const midResult = await runScript(env, sandboxId, midScript, [sandboxFilePath, String(midStart), String(midEnd), String(midLines)], `Samples ${midLines} rows from the middle section (rows ${midStart + 1} to ${midEnd})`);
155
+ const midResult = await runScript(runtime, sandboxId, midScript, [sandboxFilePath, String(midStart), String(midEnd), String(midLines)], `Samples ${midLines} rows from the middle section (rows ${midStart + 1} to ${midEnd})`);
149
156
  validateScriptResult(midResult, `preview_mid for ${datasetId}`);
150
157
  context.mid = midResult;
151
158
  }
@@ -155,8 +162,8 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
155
162
  }
156
163
  return context;
157
164
  }
158
- async function runScript(env, sandboxId, scriptName, args, description) {
159
- const scriptPath = `/vercel/sandbox/lib/domain/dataset/file/scripts/${scriptName}`;
165
+ async function runScript(runtime, sandboxId, scriptName, args, description) {
166
+ const scriptPath = `${SANDBOX_SCRIPT_DIRECTORY}/${scriptName}`;
160
167
  const command = `python ${scriptPath} ${args.join(" ")}`;
161
168
  let scriptContent = "";
162
169
  try {
@@ -168,7 +175,7 @@ async function runScript(env, sandboxId, scriptName, args, description) {
168
175
  }
169
176
  try {
170
177
  const result = await runDatasetSandboxCommandStep({
171
- env,
178
+ runtime,
172
179
  sandboxId,
173
180
  cmd: "python",
174
181
  args: [scriptPath, ...args],
@@ -2,9 +2,9 @@ interface GenerateSchemaToolParams {
2
2
  datasetId: string;
3
3
  isNested?: boolean;
4
4
  fileId?: string;
5
- env: any;
5
+ runtime: any;
6
6
  }
7
- export declare function createGenerateSchemaTool({ datasetId, isNested, fileId, env }: GenerateSchemaToolParams): import("ai").Tool<{
7
+ export declare function createGenerateSchemaTool({ datasetId, isNested, fileId, runtime }: GenerateSchemaToolParams): import("ai").Tool<{
8
8
  schemaTitle: string;
9
9
  schemaDescription: string;
10
10
  schemaJson: string;
@@ -1,7 +1,7 @@
1
1
  import { tool } from "ai";
2
2
  import { z } from "zod";
3
3
  import { datasetUpdateSchemaStep } from "../dataset/steps.js";
4
- export function createGenerateSchemaTool({ datasetId, isNested, fileId, env }) {
4
+ export function createGenerateSchemaTool({ datasetId, isNested, fileId, runtime }) {
5
5
  return tool({
6
6
  description: `Generate a formal JSON schema for a SINGLE RECORD (row) from the file. This schema describes the structure of ONE record, not the entire dataset or array of records. Requirements:
7
7
  1. Schema describes ONE RECORD structure only (no array wrappers)
@@ -72,7 +72,7 @@ export function createGenerateSchemaTool({ datasetId, isNested, fileId, env }) {
72
72
  console.log(`[Dataset ${datasetId}] Schema JSON:`);
73
73
  console.log(JSON.stringify(parsedSchema, null, 2));
74
74
  const updateResult = await datasetUpdateSchemaStep({
75
- env,
75
+ runtime,
76
76
  datasetId,
77
77
  schema: schemaData,
78
78
  status: "schema_complete",
@@ -1,2 +1,2 @@
1
- import { FileParseStoryContext } from "./file-dataset.agent.js";
2
- export declare function buildFileDatasetPrompt(context: FileParseStoryContext): string;
1
+ import { FileParseContext } from "./file-dataset.agent.js";
2
+ export declare function buildFileDatasetPrompt(context: FileParseContext): string;
@@ -147,6 +147,8 @@ function buildInstructions(context) {
147
147
  .ele("Action").txt("Use the provided schema as the output contract for every row in output.jsonl").up()
148
148
  .ele("Requirements")
149
149
  .ele("Requirement").txt("Every output row must conform exactly to the provided schema").up()
150
+ .ele("Requirement").txt("Every data object MUST use the exact property names from the provided JSON Schema required/properties keys").up()
151
+ .ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the source language").up()
150
152
  .ele("Requirement").txt("Do not call generateSchema when a schema is already provided").up()
151
153
  .up()
152
154
  .up();
@@ -170,6 +172,7 @@ function buildInstructions(context) {
170
172
  .ele("Requirements")
171
173
  .ele("Requirement").txt("Parse ALL data rows/records from the file (exclude header sections and metadata)").up()
172
174
  .ele("Requirement").txt("Output JSONL format: each line is {\"type\": \"row\", \"data\": {...record...}}").up()
175
+ .ele("Requirement").txt("When a schema is provided, each data object must contain the exact required schema keys and must not use translated or synonymous keys").up()
173
176
  .ele("Requirement").txt("Extract ONLY data records; skip any header lines, summary sections, or file metadata").up()
174
177
  .ele("Requirement").txt(`Save output to: ${outputPath}`).up()
175
178
  .ele("Requirement").txt("Use descriptive scriptName in snake_case (e.g., 'parse_csv_to_jsonl')").up()
@@ -177,11 +180,13 @@ function buildInstructions(context) {
177
180
  .up()
178
181
  .ele("Step", { number: "4", name: "Complete and Validate" })
179
182
  .ele("Action").txt("Call completeDataset to validate the dataset").up()
180
- .ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns error details if validation fails.").up()
183
+ .ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns success:false with validation details if validation fails. If validation fails, inspect validation errors, rewrite output.jsonl, and call completeDataset again. Do not stop until completeDataset returns success:true.").up()
181
184
  .up()
182
185
  .up()
183
186
  .ele("Rules")
184
187
  .ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
188
+ .ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the source language").up()
189
+ .ele("Rule").txt("Original/source language applies to extracted values only, not to JSON object keys").up()
185
190
  .ele("Rule").txt("Datasets contain ONLY data records; exclude all header sections and file metadata").up()
186
191
  .ele("Rule").txt("JSONL format: each line = separate JSON object representing one data record").up()
187
192
  .ele("Rule").txt("FilePreview shows raw file content - use Script to understand data extraction").up()
@@ -1,5 +1,5 @@
1
1
  export declare function readInstantFileStep(params: {
2
- env: any;
2
+ runtime: any;
3
3
  fileId: string;
4
4
  }): Promise<{
5
5
  url: string;
@@ -1,7 +1,13 @@
1
- import { getContextRuntime } from "@ekairos/events/runtime";
1
+ async function getRuntimeDb(runtime) {
2
+ if (!runtime) {
3
+ throw new Error("Dataset file step requires runtime.");
4
+ }
5
+ const db = runtime.db;
6
+ return typeof db === "function" ? await db.call(runtime) : db;
7
+ }
2
8
  export async function readInstantFileStep(params) {
3
9
  "use step";
4
- const db = (await getContextRuntime(params.env)).db;
10
+ const db = await getRuntimeDb(params.runtime);
5
11
  const fileQuery = await db.query({
6
12
  $files: { $: { where: { id: params.fileId }, limit: 1 } },
7
13
  });
package/dist/index.d.ts CHANGED
@@ -3,4 +3,3 @@ export * from "./domain.js";
3
3
  export * from "./materializeDataset.tool.js";
4
4
  export * from "./schema.js";
5
5
  export * from "./service.js";
6
- export * from "./skill.js";
package/dist/index.js CHANGED
@@ -3,4 +3,3 @@ export * from "./domain.js";
3
3
  export * from "./materializeDataset.tool.js";
4
4
  export * from "./schema.js";
5
5
  export * from "./service.js";
6
- export * from "./skill.js";
@@ -1,8 +1,8 @@
1
- import type { QueryDomainStepInput, QueryDomainStepResult } from "./queryDomain.step.js";
2
- export type QueryDomainInput = QueryDomainStepInput;
1
+ import { type QueryDomainStepInput, type QueryDomainStepResult } from "./queryDomain.step.js";
2
+ export type QueryDomainInput = Omit<QueryDomainStepInput, "runtime">;
3
3
  export type QueryDomainResult = QueryDomainStepResult;
4
4
  /**
5
5
  * Workflow-compatible domain query.
6
6
  * Always returns a dataset + preview rows.
7
7
  */
8
- export declare function queryDomain(input: QueryDomainInput): Promise<QueryDomainResult>;
8
+ export declare function queryDomain(runtime: any, input: QueryDomainInput): Promise<QueryDomainResult>;
@@ -1,9 +1,9 @@
1
+ import { queryDomainStep } from "./queryDomain.step.js";
1
2
  /**
2
3
  * Workflow-compatible domain query.
3
4
  * Always returns a dataset + preview rows.
4
5
  */
5
- export async function queryDomain(input) {
6
+ export async function queryDomain(runtime, input) {
6
7
  "use step";
7
- const { queryDomainStep } = await import("./queryDomain.step.js");
8
- return await queryDomainStep(input);
8
+ return await queryDomainStep({ runtime, ...input });
9
9
  }
@@ -1,4 +1,5 @@
1
1
  export type QueryDomainStepInput = {
2
+ runtime: any;
2
3
  query: Record<string, any>;
3
4
  explanation: string;
4
5
  title?: string;
@@ -1,5 +1,4 @@
1
1
  import { id as newId } from "@instantdb/admin";
2
- import { getContextRuntime, getContextEnv } from "@ekairos/events/runtime";
3
2
  import { DatasetService } from "../service.js";
4
3
  function normalizeRows(result) {
5
4
  if (!result || typeof result !== "object")
@@ -46,11 +45,16 @@ function inferSchema(rows) {
46
45
  }
47
46
  return { schema };
48
47
  }
48
+ async function getRuntimeDb(runtime) {
49
+ if (!runtime) {
50
+ throw new Error("Dataset query step requires runtime.");
51
+ }
52
+ const db = runtime.db;
53
+ return typeof db === "function" ? await db.call(runtime) : db;
54
+ }
49
55
  export async function queryDomainStep(params) {
50
56
  "use step";
51
- const env = await getContextEnv();
52
- const runtime = await getContextRuntime(env);
53
- const db = runtime.db;
57
+ const db = await getRuntimeDb(params.runtime);
54
58
  const service = new DatasetService(db);
55
59
  const datasetId = params.datasetId ?? newId();
56
60
  const queryResult = await db.query(params.query);
@@ -1,6 +1,6 @@
1
1
  export type DatasetSandboxId = string;
2
2
  export type CreateDatasetSandboxParams = {
3
- runtime?: string;
3
+ sandboxRuntime?: string;
4
4
  timeoutMs?: number;
5
5
  ports?: number[];
6
6
  resources?: {
@@ -15,18 +15,18 @@ export type DatasetSandboxRunCommandResult = {
15
15
  stderr: string;
16
16
  };
17
17
  export declare function createDatasetSandboxStep(params: {
18
- env: any;
18
+ runtime: any;
19
19
  } & CreateDatasetSandboxParams): Promise<{
20
20
  sandboxId: DatasetSandboxId;
21
21
  }>;
22
22
  export declare function runDatasetSandboxCommandStep(params: {
23
- env: any;
23
+ runtime: any;
24
24
  sandboxId: DatasetSandboxId;
25
25
  cmd: string;
26
26
  args?: string[];
27
27
  }): Promise<DatasetSandboxRunCommandResult>;
28
28
  export declare function writeDatasetSandboxFilesStep(params: {
29
- env: any;
29
+ runtime: any;
30
30
  sandboxId: DatasetSandboxId;
31
31
  files: Array<{
32
32
  path: string;
@@ -34,13 +34,13 @@ export declare function writeDatasetSandboxFilesStep(params: {
34
34
  }>;
35
35
  }): Promise<void>;
36
36
  export declare function readDatasetSandboxFileStep(params: {
37
- env: any;
37
+ runtime: any;
38
38
  sandboxId: DatasetSandboxId;
39
39
  path: string;
40
40
  }): Promise<{
41
41
  contentBase64: string;
42
42
  }>;
43
43
  export declare function stopDatasetSandboxStep(params: {
44
- env: any;
44
+ runtime: any;
45
45
  sandboxId: DatasetSandboxId;
46
46
  }): Promise<void>;