@ekairos/dataset 1.22.49-beta.development.0 → 1.22.51-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/agents.d.ts +8 -0
  2. package/dist/agents.js +8 -0
  3. package/dist/builder/agentMaterializers.d.ts +9 -0
  4. package/dist/builder/agentMaterializers.js +10 -0
  5. package/dist/builder/materialize.d.ts +1 -11
  6. package/dist/builder/materialize.js +25 -77
  7. package/dist/builder/materializeQuery.d.ts +11 -0
  8. package/dist/builder/materializeQuery.js +40 -0
  9. package/dist/builder/persistence.js +13 -21
  10. package/dist/builder/types.d.ts +3 -0
  11. package/dist/clearDataset.tool.d.ts +2 -2
  12. package/dist/clearDataset.tool.js +3 -3
  13. package/dist/completeDataset.tool.d.ts +31 -3
  14. package/dist/completeDataset.tool.js +101 -13
  15. package/dist/dataset/steps.d.ts +32 -8
  16. package/dist/dataset/steps.js +69 -13
  17. package/dist/dataset.js +13 -7
  18. package/dist/executeCommand.tool.d.ts +2 -2
  19. package/dist/executeCommand.tool.js +3 -3
  20. package/dist/file/file-dataset.agent.d.ts +17 -11
  21. package/dist/file/file-dataset.agent.js +54 -47
  22. package/dist/file/filepreview.d.ts +2 -2
  23. package/dist/file/filepreview.js +13 -13
  24. package/dist/file/generateSchema.tool.d.ts +2 -2
  25. package/dist/file/generateSchema.tool.js +2 -2
  26. package/dist/file/prompts.d.ts +2 -2
  27. package/dist/file/prompts.js +6 -1
  28. package/dist/file/steps.d.ts +1 -1
  29. package/dist/file/steps.js +8 -2
  30. package/dist/index.d.ts +0 -1
  31. package/dist/index.js +0 -1
  32. package/dist/query/queryDomain.d.ts +3 -3
  33. package/dist/query/queryDomain.js +3 -3
  34. package/dist/query/queryDomain.step.d.ts +1 -0
  35. package/dist/query/queryDomain.step.js +8 -4
  36. package/dist/sandbox/steps.d.ts +6 -6
  37. package/dist/sandbox/steps.js +16 -12
  38. package/dist/transform/filepreview.d.ts +1 -1
  39. package/dist/transform/filepreview.js +6 -6
  40. package/dist/transform/index.d.ts +1 -1
  41. package/dist/transform/index.js +1 -1
  42. package/dist/transform/prompts.js +4 -1
  43. package/dist/transform/transform-dataset.agent.d.ts +9 -3
  44. package/dist/transform/transform-dataset.agent.js +39 -32
  45. package/dist/transform/transformDataset.d.ts +3 -2
  46. package/dist/transform/transformDataset.js +10 -9
  47. package/package.json +19 -5
  48. package/dist/eventsReactRuntime.d.ts +0 -21
  49. package/dist/eventsReactRuntime.js +0 -25
@@ -1,7 +1,7 @@
1
- import { createContext, didToolExecute, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL } from "@ekairos/events";
1
+ import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL } from "@ekairos/events";
2
2
  import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
3
3
  import { createGenerateSchemaTool } from "./generateSchema.tool.js";
4
- import { createCompleteDatasetTool } from "../completeDataset.tool.js";
4
+ import { createCompleteDatasetTool, didCompleteDatasetSucceed } from "../completeDataset.tool.js";
5
5
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
6
6
  import { createClearDatasetTool } from "../clearDataset.tool.js";
7
7
  import { buildFileDatasetPrompt } from "./prompts.js";
@@ -10,16 +10,24 @@ import { id } from "@instantdb/admin";
10
10
  import { getDatasetWorkstation } from "../datasetFiles.js";
11
11
  import { readInstantFileStep } from "./steps.js";
12
12
  import { datasetGetByIdStep } from "../dataset/steps.js";
13
- import { createEventsReactRuntime } from "../eventsReactRuntime.js";
14
- async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
13
+ async function awaitContextRun(run) {
14
+ if (!run)
15
+ return;
16
+ if (run.returnValue) {
17
+ await run.returnValue;
18
+ return;
19
+ }
20
+ await run;
21
+ }
22
+ async function initializeSandbox(runtime, sandboxId, datasetId, fileId, state) {
15
23
  if (state.initialized) {
16
24
  return state.filePath;
17
25
  }
18
- console.log(`[FileParseStory ${datasetId}] Initializing sandbox...`);
19
- await ensurePreviewScriptsAvailable(env, sandboxId);
20
- console.log(`[FileParseStory ${datasetId}] Installing Python dependencies...`);
26
+ console.log(`[FileParseContext ${datasetId}] Initializing sandbox...`);
27
+ await ensurePreviewScriptsAvailable(runtime, sandboxId);
28
+ console.log(`[FileParseContext ${datasetId}] Installing Python dependencies...`);
21
29
  const pipInstall = await runDatasetSandboxCommandStep({
22
- env,
30
+ runtime,
23
31
  sandboxId,
24
32
  cmd: "python",
25
33
  args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
@@ -28,12 +36,12 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
28
36
  if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
29
37
  throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
30
38
  }
31
- console.log(`[FileParseStory ${datasetId}] Fetching file from InstantDB...`);
32
- const file = await readInstantFileStep({ env, fileId });
33
- console.log(`[FileParseStory ${datasetId}] Creating dataset workstation...`);
39
+ console.log(`[FileParseContext ${datasetId}] Fetching file from InstantDB...`);
40
+ const file = await readInstantFileStep({ runtime, fileId });
41
+ console.log(`[FileParseContext ${datasetId}] Creating dataset workstation...`);
34
42
  const workstation = getDatasetWorkstation(datasetId);
35
43
  await runDatasetSandboxCommandStep({
36
- env,
44
+ runtime,
37
45
  sandboxId,
38
46
  cmd: "mkdir",
39
47
  args: ["-p", workstation],
@@ -42,7 +50,7 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
42
50
  const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
43
51
  const sandboxFilePath = `${workstation}/${fileId}${fileExtension}`;
44
52
  await writeDatasetSandboxFilesStep({
45
- env,
53
+ runtime,
46
54
  sandboxId,
47
55
  files: [
48
56
  {
@@ -51,14 +59,14 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
51
59
  },
52
60
  ],
53
61
  });
54
- console.log(`[FileParseStory ${datasetId}] ✅ Workstation created: ${workstation}`);
55
- console.log(`[FileParseStory ${datasetId}] ✅ File saved: ${sandboxFilePath}`);
62
+ console.log(`[FileParseContext ${datasetId}] ✅ Workstation created: ${workstation}`);
63
+ console.log(`[FileParseContext ${datasetId}] ✅ File saved: ${sandboxFilePath}`);
56
64
  state.filePath = sandboxFilePath;
57
65
  state.initialized = true;
58
66
  return sandboxFilePath;
59
67
  }
60
68
  /**
61
- * FileParseStory
69
+ * FileParseContext
62
70
  *
63
71
  * Uso:
64
72
  * - Crear una instancia con `fileId`, `instructions` y un `sandbox`
@@ -67,27 +75,27 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
67
75
  *
68
76
  * Internamente corre un Context (`createContext("file.parse")`) que itera hasta que se ejecuta el tool `completeDataset`.
69
77
  */
70
- function createFileParseStoryDefinition(params) {
78
+ function createFileParseContextDefinition(params) {
71
79
  const datasetId = params.datasetId ?? id();
72
80
  const model = params.model ?? "openai/gpt-5";
73
- let storyBuilder = createContext("file.parse")
74
- .context(async (stored, env) => {
81
+ let contextBuilder = createContext("file.parse")
82
+ .context(async (stored, _env, runtime) => {
75
83
  const previous = stored?.content ?? {};
76
84
  const sandboxState = previous?.sandboxState ?? { initialized: false, filePath: "" };
77
85
  const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
78
86
  if (!sandboxId) {
79
87
  throw new Error("dataset_sandbox_required");
80
88
  }
81
- const sandboxFilePath = await initializeSandbox(env, sandboxId, datasetId, params.fileId, sandboxState);
89
+ const sandboxFilePath = await initializeSandbox(runtime, sandboxId, datasetId, params.fileId, sandboxState);
82
90
  let filePreview = undefined;
83
91
  try {
84
- filePreview = await generateFilePreview(env, sandboxId, sandboxFilePath, datasetId);
92
+ filePreview = await generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId);
85
93
  }
86
94
  catch {
87
95
  // optional
88
96
  }
89
97
  let schema = null;
90
- const datasetResult = await datasetGetByIdStep({ env, datasetId });
98
+ const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
91
99
  if (datasetResult.ok && datasetResult.data.schema)
92
100
  schema = datasetResult.data.schema;
93
101
  const ctx = {
@@ -128,57 +136,57 @@ function createFileParseStoryDefinition(params) {
128
136
  base,
129
137
  ].join("\n");
130
138
  })
131
- .actions(async (_stored, env) => {
139
+ .actions(async (_stored, _env, runtime) => {
132
140
  const existingSchema = _stored?.content?.ctx?.schema?.schema;
133
141
  const actions = {
134
142
  executeCommand: createExecuteCommandTool({
135
143
  datasetId,
136
144
  sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
137
- env,
145
+ runtime,
138
146
  }),
139
147
  completeDataset: createCompleteDatasetTool({
140
148
  datasetId,
141
149
  sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
142
- env,
150
+ runtime,
143
151
  }),
144
152
  clearDataset: createClearDatasetTool({
145
153
  datasetId,
146
154
  sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
147
- env,
155
+ runtime,
148
156
  }),
149
157
  };
150
158
  if (!existingSchema) {
151
159
  actions.generateSchema = createGenerateSchemaTool({
152
160
  datasetId,
153
161
  fileId: params.fileId,
154
- env,
162
+ runtime,
155
163
  });
156
164
  }
157
165
  return actions;
158
166
  })
159
167
  .shouldContinue(({ reactionEvent }) => {
160
- return !didToolExecute(reactionEvent, "completeDataset");
168
+ return !didCompleteDatasetSucceed(reactionEvent);
161
169
  });
162
170
  if (params.reactor) {
163
- storyBuilder = storyBuilder.reactor(params.reactor);
171
+ contextBuilder = contextBuilder.reactor(params.reactor);
164
172
  }
165
173
  else {
166
- storyBuilder = storyBuilder.model(model);
174
+ contextBuilder = contextBuilder.model(model);
167
175
  }
168
- const story = storyBuilder.build();
169
- return { datasetId, story };
176
+ const context = contextBuilder.build();
177
+ return { datasetId, context };
170
178
  }
171
179
  /**
172
180
  * Factory (DX-first):
173
181
  *
174
182
  * Usage:
175
- * const { datasetId } = await createFileParseStory(fileId, { instructions }).parse(env)
183
+ * const { datasetId } = await createFileParseContext(fileId, { instructions }).parse(runtime)
176
184
  *
177
- * - No `db` is accepted/stored (workflow-safe).
178
- * - All I/O happens in `"use step"` functions via Ekairos runtime (`getContextRuntime(env).db`).
179
- * - `parse()` is the entrypoint; it calls `story.react(...)` internally.
185
+ * - Uses the caller runtime; no secondary runtime is created.
186
+ * - All I/O happens in `"use step"` functions via the provided Ekairos runtime.
187
+ * - `parse()` is the entrypoint; it calls `context.react(...)` internally.
180
188
  */
181
- export function createFileParseStory(fileId, opts) {
189
+ export function createFileParseContext(fileId, opts) {
182
190
  const params = {
183
191
  fileId,
184
192
  instructions: opts?.instructions,
@@ -187,30 +195,29 @@ export function createFileParseStory(fileId, opts) {
187
195
  model: opts?.model,
188
196
  reactor: opts?.reactor,
189
197
  };
190
- const { datasetId, story } = createFileParseStoryDefinition(params);
198
+ const { datasetId, context } = createFileParseContextDefinition(params);
191
199
  return {
192
200
  datasetId,
193
- async parse(env, prompt) {
201
+ async parse(runtime, options = {}) {
194
202
  const triggerEvent = {
195
203
  id: id(),
196
204
  type: INPUT_TEXT_ITEM_TYPE,
197
205
  channel: WEB_CHANNEL,
198
206
  createdAt: new Date().toISOString(),
199
207
  content: {
200
- parts: [{ type: "text", text: prompt ?? "generate a dataset for this file" }],
208
+ parts: [{ type: "text", text: options.prompt ?? "generate a dataset for this file" }],
201
209
  },
202
210
  };
203
- const runtime = createEventsReactRuntime((env ?? {}));
204
- const shell = await story.react(triggerEvent, {
205
- runtime,
211
+ const shell = await context.react(triggerEvent, {
212
+ runtime: runtime,
206
213
  context: { key: `dataset:${datasetId}` },
207
- durable: false,
214
+ durable: options.durable ?? false,
208
215
  options: { silent: true, preventClose: true, sendFinish: false, maxIterations: 20, maxModelSteps: 5 },
209
216
  });
210
- await shell.run;
217
+ await awaitContextRun(shell.run);
211
218
  return { datasetId };
212
219
  },
213
- // Optional: expose the built story for advanced callers (not required for parse DX)
214
- story,
220
+ // Optional: expose the built context for advanced callers (not required for parse DX)
221
+ context,
215
222
  };
216
223
  }
@@ -34,6 +34,6 @@ interface PreviewOptions {
34
34
  tailLines?: number;
35
35
  midLines?: number;
36
36
  }
37
- export declare function ensurePreviewScriptsAvailable(env: any, sandboxId: string): Promise<void>;
38
- export declare function generateFilePreview(env: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
37
+ export declare function ensurePreviewScriptsAvailable(runtime: any, sandboxId: string): Promise<void>;
38
+ export declare function generateFilePreview(runtime: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
39
39
  export {};
@@ -41,7 +41,7 @@ function validateScriptResult(result, context) {
41
41
  throw new Error(`${context} failed: ${stderr.substring(0, 500)}`);
42
42
  }
43
43
  }
44
- export async function ensurePreviewScriptsAvailable(env, sandboxId) {
44
+ export async function ensurePreviewScriptsAvailable(runtime, sandboxId) {
45
45
  if (preparedSandboxIds.has(sandboxId)) {
46
46
  return;
47
47
  }
@@ -53,7 +53,7 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
53
53
  const setupPromise = (async () => {
54
54
  try {
55
55
  await runDatasetSandboxCommandStep({
56
- env,
56
+ runtime,
57
57
  sandboxId,
58
58
  cmd: "mkdir",
59
59
  args: ["-p", SANDBOX_SCRIPT_DIRECTORY],
@@ -79,7 +79,7 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
79
79
  }
80
80
  if (filesToWrite.length > 0) {
81
81
  await writeDatasetSandboxFilesStep({
82
- env,
82
+ runtime,
83
83
  sandboxId,
84
84
  files: filesToWrite,
85
85
  });
@@ -95,13 +95,13 @@ export async function ensurePreviewScriptsAvailable(env, sandboxId) {
95
95
  throw error;
96
96
  }
97
97
  }
98
- export async function generateFilePreview(env, sandboxId, sandboxFilePath, datasetId, options = {}) {
98
+ export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId, options = {}) {
99
99
  const context = {
100
100
  totalRows: 0,
101
101
  };
102
102
  try {
103
- await ensurePreviewScriptsAvailable(env, sandboxId);
104
- const metadataResult = await runScript(env, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
103
+ await ensurePreviewScriptsAvailable(runtime, sandboxId);
104
+ const metadataResult = await runScript(runtime, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
105
105
  context.metadata = metadataResult;
106
106
  let isExcel = false;
107
107
  if (metadataResult.stdout) {
@@ -127,23 +127,23 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
127
127
  const midScript = isExcel ? "preview_mid_excel.py" : "preview_mid_csv.py";
128
128
  if (totalRows <= headLines) {
129
129
  console.log(`[Dataset ${datasetId}] File has ${totalRows} rows, reading all with head only`);
130
- const headResult = await runScript(env, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
130
+ const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
131
131
  validateScriptResult(headResult, `preview_head for ${datasetId}`);
132
132
  context.head = headResult;
133
133
  return context;
134
134
  }
135
135
  if (headLines + tailLines >= totalRows) {
136
136
  console.log(`[Dataset ${datasetId}] Head + tail would cover entire file (${totalRows} rows), reading all with head only`);
137
- const headResult = await runScript(env, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
137
+ const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
138
138
  validateScriptResult(headResult, `preview_head for ${datasetId}`);
139
139
  context.head = headResult;
140
140
  return context;
141
141
  }
142
142
  console.log(`[Dataset ${datasetId}] Reading head (${headLines} rows) and tail (${tailLines} rows) from ${totalRows} total rows`);
143
- const headResult = await runScript(env, sandboxId, headScript, [sandboxFilePath, String(headLines)], `Reads the first ${headLines} rows of the file`);
143
+ const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(headLines)], `Reads the first ${headLines} rows of the file`);
144
144
  validateScriptResult(headResult, `preview_head for ${datasetId}`);
145
145
  context.head = headResult;
146
- const tailResult = await runScript(env, sandboxId, tailScript, [sandboxFilePath, String(tailLines)], `Reads the last ${tailLines} rows of the file`);
146
+ const tailResult = await runScript(runtime, sandboxId, tailScript, [sandboxFilePath, String(tailLines)], `Reads the last ${tailLines} rows of the file`);
147
147
  validateScriptResult(tailResult, `preview_tail for ${datasetId}`);
148
148
  context.tail = tailResult;
149
149
  const midLines = options.midLines || DEFAULT_MID_LINES;
@@ -152,7 +152,7 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
152
152
  const midStart = headLines;
153
153
  const midEnd = totalRows - tailLines;
154
154
  console.log(`[Dataset ${datasetId}] Large gap (${gapSize} rows), adding mid sample (${midLines} rows)`);
155
- const midResult = await runScript(env, sandboxId, midScript, [sandboxFilePath, String(midStart), String(midEnd), String(midLines)], `Samples ${midLines} rows from the middle section (rows ${midStart + 1} to ${midEnd})`);
155
+ const midResult = await runScript(runtime, sandboxId, midScript, [sandboxFilePath, String(midStart), String(midEnd), String(midLines)], `Samples ${midLines} rows from the middle section (rows ${midStart + 1} to ${midEnd})`);
156
156
  validateScriptResult(midResult, `preview_mid for ${datasetId}`);
157
157
  context.mid = midResult;
158
158
  }
@@ -162,7 +162,7 @@ export async function generateFilePreview(env, sandboxId, sandboxFilePath, datas
162
162
  }
163
163
  return context;
164
164
  }
165
- async function runScript(env, sandboxId, scriptName, args, description) {
165
+ async function runScript(runtime, sandboxId, scriptName, args, description) {
166
166
  const scriptPath = `${SANDBOX_SCRIPT_DIRECTORY}/${scriptName}`;
167
167
  const command = `python ${scriptPath} ${args.join(" ")}`;
168
168
  let scriptContent = "";
@@ -175,7 +175,7 @@ async function runScript(env, sandboxId, scriptName, args, description) {
175
175
  }
176
176
  try {
177
177
  const result = await runDatasetSandboxCommandStep({
178
- env,
178
+ runtime,
179
179
  sandboxId,
180
180
  cmd: "python",
181
181
  args: [scriptPath, ...args],
@@ -2,9 +2,9 @@ interface GenerateSchemaToolParams {
2
2
  datasetId: string;
3
3
  isNested?: boolean;
4
4
  fileId?: string;
5
- env: any;
5
+ runtime: any;
6
6
  }
7
- export declare function createGenerateSchemaTool({ datasetId, isNested, fileId, env }: GenerateSchemaToolParams): import("ai").Tool<{
7
+ export declare function createGenerateSchemaTool({ datasetId, isNested, fileId, runtime }: GenerateSchemaToolParams): import("ai").Tool<{
8
8
  schemaTitle: string;
9
9
  schemaDescription: string;
10
10
  schemaJson: string;
@@ -1,7 +1,7 @@
1
1
  import { tool } from "ai";
2
2
  import { z } from "zod";
3
3
  import { datasetUpdateSchemaStep } from "../dataset/steps.js";
4
- export function createGenerateSchemaTool({ datasetId, isNested, fileId, env }) {
4
+ export function createGenerateSchemaTool({ datasetId, isNested, fileId, runtime }) {
5
5
  return tool({
6
6
  description: `Generate a formal JSON schema for a SINGLE RECORD (row) from the file. This schema describes the structure of ONE record, not the entire dataset or array of records. Requirements:
7
7
  1. Schema describes ONE RECORD structure only (no array wrappers)
@@ -72,7 +72,7 @@ export function createGenerateSchemaTool({ datasetId, isNested, fileId, env }) {
72
72
  console.log(`[Dataset ${datasetId}] Schema JSON:`);
73
73
  console.log(JSON.stringify(parsedSchema, null, 2));
74
74
  const updateResult = await datasetUpdateSchemaStep({
75
- env,
75
+ runtime,
76
76
  datasetId,
77
77
  schema: schemaData,
78
78
  status: "schema_complete",
@@ -1,2 +1,2 @@
1
- import { FileParseStoryContext } from "./file-dataset.agent.js";
2
- export declare function buildFileDatasetPrompt(context: FileParseStoryContext): string;
1
+ import { FileParseContext } from "./file-dataset.agent.js";
2
+ export declare function buildFileDatasetPrompt(context: FileParseContext): string;
@@ -147,6 +147,8 @@ function buildInstructions(context) {
147
147
  .ele("Action").txt("Use the provided schema as the output contract for every row in output.jsonl").up()
148
148
  .ele("Requirements")
149
149
  .ele("Requirement").txt("Every output row must conform exactly to the provided schema").up()
150
+ .ele("Requirement").txt("Every data object MUST use the exact property names from the provided JSON Schema required/properties keys").up()
151
+ .ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the source language").up()
150
152
  .ele("Requirement").txt("Do not call generateSchema when a schema is already provided").up()
151
153
  .up()
152
154
  .up();
@@ -170,6 +172,7 @@ function buildInstructions(context) {
170
172
  .ele("Requirements")
171
173
  .ele("Requirement").txt("Parse ALL data rows/records from the file (exclude header sections and metadata)").up()
172
174
  .ele("Requirement").txt("Output JSONL format: each line is {\"type\": \"row\", \"data\": {...record...}}").up()
175
+ .ele("Requirement").txt("When a schema is provided, each data object must contain the exact required schema keys and must not use translated or synonymous keys").up()
173
176
  .ele("Requirement").txt("Extract ONLY data records; skip any header lines, summary sections, or file metadata").up()
174
177
  .ele("Requirement").txt(`Save output to: ${outputPath}`).up()
175
178
  .ele("Requirement").txt("Use descriptive scriptName in snake_case (e.g., 'parse_csv_to_jsonl')").up()
@@ -177,11 +180,13 @@ function buildInstructions(context) {
177
180
  .up()
178
181
  .ele("Step", { number: "4", name: "Complete and Validate" })
179
182
  .ele("Action").txt("Call completeDataset to validate the dataset").up()
180
- .ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns error details if validation fails.").up()
183
+ .ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns success:false with validation details if validation fails. If validation fails, inspect validation errors, rewrite output.jsonl, and call completeDataset again. Do not stop until completeDataset returns success:true.").up()
181
184
  .up()
182
185
  .up()
183
186
  .ele("Rules")
184
187
  .ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
188
+ .ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the source language").up()
189
+ .ele("Rule").txt("Original/source language applies to extracted values only, not to JSON object keys").up()
185
190
  .ele("Rule").txt("Datasets contain ONLY data records; exclude all header sections and file metadata").up()
186
191
  .ele("Rule").txt("JSONL format: each line = separate JSON object representing one data record").up()
187
192
  .ele("Rule").txt("FilePreview shows raw file content - use Script to understand data extraction").up()
@@ -1,5 +1,5 @@
1
1
  export declare function readInstantFileStep(params: {
2
- env: any;
2
+ runtime: any;
3
3
  fileId: string;
4
4
  }): Promise<{
5
5
  url: string;
@@ -1,7 +1,13 @@
1
- import { getContextRuntime } from "@ekairos/events/runtime";
1
+ async function getRuntimeDb(runtime) {
2
+ if (!runtime) {
3
+ throw new Error("Dataset file step requires runtime.");
4
+ }
5
+ const db = runtime.db;
6
+ return typeof db === "function" ? await db.call(runtime) : db;
7
+ }
2
8
  export async function readInstantFileStep(params) {
3
9
  "use step";
4
- const db = (await getContextRuntime(params.env)).db;
10
+ const db = await getRuntimeDb(params.runtime);
5
11
  const fileQuery = await db.query({
6
12
  $files: { $: { where: { id: params.fileId }, limit: 1 } },
7
13
  });
package/dist/index.d.ts CHANGED
@@ -3,4 +3,3 @@ export * from "./domain.js";
3
3
  export * from "./materializeDataset.tool.js";
4
4
  export * from "./schema.js";
5
5
  export * from "./service.js";
6
- export * from "./skill.js";
package/dist/index.js CHANGED
@@ -3,4 +3,3 @@ export * from "./domain.js";
3
3
  export * from "./materializeDataset.tool.js";
4
4
  export * from "./schema.js";
5
5
  export * from "./service.js";
6
- export * from "./skill.js";
@@ -1,8 +1,8 @@
1
- import type { QueryDomainStepInput, QueryDomainStepResult } from "./queryDomain.step.js";
2
- export type QueryDomainInput = QueryDomainStepInput;
1
+ import { type QueryDomainStepInput, type QueryDomainStepResult } from "./queryDomain.step.js";
2
+ export type QueryDomainInput = Omit<QueryDomainStepInput, "runtime">;
3
3
  export type QueryDomainResult = QueryDomainStepResult;
4
4
  /**
5
5
  * Workflow-compatible domain query.
6
6
  * Always returns a dataset + preview rows.
7
7
  */
8
- export declare function queryDomain(input: QueryDomainInput): Promise<QueryDomainResult>;
8
+ export declare function queryDomain(runtime: any, input: QueryDomainInput): Promise<QueryDomainResult>;
@@ -1,9 +1,9 @@
1
+ import { queryDomainStep } from "./queryDomain.step.js";
1
2
  /**
2
3
  * Workflow-compatible domain query.
3
4
  * Always returns a dataset + preview rows.
4
5
  */
5
- export async function queryDomain(input) {
6
+ export async function queryDomain(runtime, input) {
6
7
  "use step";
7
- const { queryDomainStep } = await import("./queryDomain.step.js");
8
- return await queryDomainStep(input);
8
+ return await queryDomainStep({ runtime, ...input });
9
9
  }
@@ -1,4 +1,5 @@
1
1
  export type QueryDomainStepInput = {
2
+ runtime: any;
2
3
  query: Record<string, any>;
3
4
  explanation: string;
4
5
  title?: string;
@@ -1,5 +1,4 @@
1
1
  import { id as newId } from "@instantdb/admin";
2
- import { getContextRuntime, getContextEnv } from "@ekairos/events/runtime";
3
2
  import { DatasetService } from "../service.js";
4
3
  function normalizeRows(result) {
5
4
  if (!result || typeof result !== "object")
@@ -46,11 +45,16 @@ function inferSchema(rows) {
46
45
  }
47
46
  return { schema };
48
47
  }
48
+ async function getRuntimeDb(runtime) {
49
+ if (!runtime) {
50
+ throw new Error("Dataset query step requires runtime.");
51
+ }
52
+ const db = runtime.db;
53
+ return typeof db === "function" ? await db.call(runtime) : db;
54
+ }
49
55
  export async function queryDomainStep(params) {
50
56
  "use step";
51
- const env = await getContextEnv();
52
- const runtime = await getContextRuntime(env);
53
- const db = runtime.db;
57
+ const db = await getRuntimeDb(params.runtime);
54
58
  const service = new DatasetService(db);
55
59
  const datasetId = params.datasetId ?? newId();
56
60
  const queryResult = await db.query(params.query);
@@ -1,6 +1,6 @@
1
1
  export type DatasetSandboxId = string;
2
2
  export type CreateDatasetSandboxParams = {
3
- runtime?: string;
3
+ sandboxRuntime?: string;
4
4
  timeoutMs?: number;
5
5
  ports?: number[];
6
6
  resources?: {
@@ -15,18 +15,18 @@ export type DatasetSandboxRunCommandResult = {
15
15
  stderr: string;
16
16
  };
17
17
  export declare function createDatasetSandboxStep(params: {
18
- env: any;
18
+ runtime: any;
19
19
  } & CreateDatasetSandboxParams): Promise<{
20
20
  sandboxId: DatasetSandboxId;
21
21
  }>;
22
22
  export declare function runDatasetSandboxCommandStep(params: {
23
- env: any;
23
+ runtime: any;
24
24
  sandboxId: DatasetSandboxId;
25
25
  cmd: string;
26
26
  args?: string[];
27
27
  }): Promise<DatasetSandboxRunCommandResult>;
28
28
  export declare function writeDatasetSandboxFilesStep(params: {
29
- env: any;
29
+ runtime: any;
30
30
  sandboxId: DatasetSandboxId;
31
31
  files: Array<{
32
32
  path: string;
@@ -34,13 +34,13 @@ export declare function writeDatasetSandboxFilesStep(params: {
34
34
  }>;
35
35
  }): Promise<void>;
36
36
  export declare function readDatasetSandboxFileStep(params: {
37
- env: any;
37
+ runtime: any;
38
38
  sandboxId: DatasetSandboxId;
39
39
  path: string;
40
40
  }): Promise<{
41
41
  contentBase64: string;
42
42
  }>;
43
43
  export declare function stopDatasetSandboxStep(params: {
44
- env: any;
44
+ runtime: any;
45
45
  sandboxId: DatasetSandboxId;
46
46
  }): Promise<void>;
@@ -2,12 +2,19 @@ import { execFile } from "node:child_process";
2
2
  import { promises as fs } from "node:fs";
3
3
  import path from "node:path";
4
4
  import { promisify } from "node:util";
5
- import { getContextRuntime } from "@ekairos/events/runtime";
5
+ import { SandboxService } from "@ekairos/sandbox";
6
6
  const execFileAsync = promisify(execFile);
7
7
  const localSandboxRoots = new Map();
8
8
  function isLocalDatasetSandboxMode() {
9
9
  return String(process.env.DATASET_TEST_LOCAL_SANDBOX ?? "").trim() === "1";
10
10
  }
11
+ async function getRuntimeDb(runtime) {
12
+ if (!runtime) {
13
+ throw new Error("Dataset sandbox step requires runtime.");
14
+ }
15
+ const db = runtime.db;
16
+ return typeof db === "function" ? await db.call(runtime) : db;
17
+ }
11
18
  function getLocalSandboxRoot(sandboxId) {
12
19
  return (localSandboxRoots.get(sandboxId) ||
13
20
  path.resolve(process.cwd(), "test-results", "dataset-sandboxes", sandboxId));
@@ -63,10 +70,11 @@ export async function createDatasetSandboxStep(params) {
63
70
  await ensureLocalSandboxRoot(sandboxId);
64
71
  return { sandboxId };
65
72
  }
66
- const db = (await getContextRuntime(params.env)).db;
67
- const { SandboxService } = (await import("@ekairos/sandbox"));
73
+ const db = await getRuntimeDb(params.runtime);
68
74
  const service = new SandboxService(db);
69
- const created = await service.createSandbox(params);
75
+ const sandboxParams = { ...params, runtime: params.sandboxRuntime };
76
+ delete sandboxParams.sandboxRuntime;
77
+ const created = await service.createSandbox(sandboxParams);
70
78
  if (!created.ok)
71
79
  throw new Error(created.error);
72
80
  return { sandboxId: created.data.sandboxId };
@@ -80,8 +88,7 @@ export async function runDatasetSandboxCommandStep(params) {
80
88
  args: params.args,
81
89
  });
82
90
  }
83
- const db = (await getContextRuntime(params.env)).db;
84
- const { SandboxService } = (await import("@ekairos/sandbox"));
91
+ const db = await getRuntimeDb(params.runtime);
85
92
  const service = new SandboxService(db);
86
93
  const result = await service.runCommand(params.sandboxId, params.cmd, params.args ?? []);
87
94
  if (!result.ok)
@@ -101,8 +108,7 @@ export async function writeDatasetSandboxFilesStep(params) {
101
108
  }
102
109
  return;
103
110
  }
104
- const db = (await getContextRuntime(params.env)).db;
105
- const { SandboxService } = (await import("@ekairos/sandbox"));
111
+ const db = await getRuntimeDb(params.runtime);
106
112
  const service = new SandboxService(db);
107
113
  const result = await service.writeFiles(params.sandboxId, params.files);
108
114
  if (!result.ok)
@@ -114,8 +120,7 @@ export async function readDatasetSandboxFileStep(params) {
114
120
  const content = await fs.readFile(params.path);
115
121
  return { contentBase64: Buffer.from(content).toString("base64") };
116
122
  }
117
- const db = (await getContextRuntime(params.env)).db;
118
- const { SandboxService } = (await import("@ekairos/sandbox"));
123
+ const db = await getRuntimeDb(params.runtime);
119
124
  const service = new SandboxService(db);
120
125
  const result = await service.readFile(params.sandboxId, params.path);
121
126
  if (!result.ok)
@@ -130,8 +135,7 @@ export async function stopDatasetSandboxStep(params) {
130
135
  localSandboxRoots.delete(params.sandboxId);
131
136
  return;
132
137
  }
133
- const db = (await getContextRuntime(params.env)).db;
134
- const { SandboxService } = (await import("@ekairos/sandbox"));
138
+ const db = await getRuntimeDb(params.runtime);
135
139
  const service = new SandboxService(db);
136
140
  const result = await service.stopSandbox(params.sandboxId);
137
141
  if (!result.ok)
@@ -18,5 +18,5 @@ export type TransformSourcePreviewContext = {
18
18
  interface PreviewOptions {
19
19
  headLines?: number;
20
20
  }
21
- export declare function generateSourcePreview(env: any, sandboxId: string, sourcePath: string, datasetId: string, options?: PreviewOptions): Promise<TransformSourcePreviewContext>;
21
+ export declare function generateSourcePreview(runtime: any, sandboxId: string, sourcePath: string, datasetId: string, options?: PreviewOptions): Promise<TransformSourcePreviewContext>;
22
22
  export {};