@ekairos/dataset 1.22.49-beta.development.0 → 1.22.51-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/agents.d.ts +8 -0
  2. package/dist/agents.js +8 -0
  3. package/dist/builder/agentMaterializers.d.ts +9 -0
  4. package/dist/builder/agentMaterializers.js +10 -0
  5. package/dist/builder/materialize.d.ts +1 -11
  6. package/dist/builder/materialize.js +25 -77
  7. package/dist/builder/materializeQuery.d.ts +11 -0
  8. package/dist/builder/materializeQuery.js +40 -0
  9. package/dist/builder/persistence.js +13 -21
  10. package/dist/builder/types.d.ts +3 -0
  11. package/dist/clearDataset.tool.d.ts +2 -2
  12. package/dist/clearDataset.tool.js +3 -3
  13. package/dist/completeDataset.tool.d.ts +31 -3
  14. package/dist/completeDataset.tool.js +101 -13
  15. package/dist/dataset/steps.d.ts +32 -8
  16. package/dist/dataset/steps.js +69 -13
  17. package/dist/dataset.js +13 -7
  18. package/dist/executeCommand.tool.d.ts +2 -2
  19. package/dist/executeCommand.tool.js +3 -3
  20. package/dist/file/file-dataset.agent.d.ts +17 -11
  21. package/dist/file/file-dataset.agent.js +54 -47
  22. package/dist/file/filepreview.d.ts +2 -2
  23. package/dist/file/filepreview.js +13 -13
  24. package/dist/file/generateSchema.tool.d.ts +2 -2
  25. package/dist/file/generateSchema.tool.js +2 -2
  26. package/dist/file/prompts.d.ts +2 -2
  27. package/dist/file/prompts.js +6 -1
  28. package/dist/file/steps.d.ts +1 -1
  29. package/dist/file/steps.js +8 -2
  30. package/dist/index.d.ts +0 -1
  31. package/dist/index.js +0 -1
  32. package/dist/query/queryDomain.d.ts +3 -3
  33. package/dist/query/queryDomain.js +3 -3
  34. package/dist/query/queryDomain.step.d.ts +1 -0
  35. package/dist/query/queryDomain.step.js +8 -4
  36. package/dist/sandbox/steps.d.ts +6 -6
  37. package/dist/sandbox/steps.js +16 -12
  38. package/dist/transform/filepreview.d.ts +1 -1
  39. package/dist/transform/filepreview.js +6 -6
  40. package/dist/transform/index.d.ts +1 -1
  41. package/dist/transform/index.js +1 -1
  42. package/dist/transform/prompts.js +4 -1
  43. package/dist/transform/transform-dataset.agent.d.ts +9 -3
  44. package/dist/transform/transform-dataset.agent.js +39 -32
  45. package/dist/transform/transformDataset.d.ts +3 -2
  46. package/dist/transform/transformDataset.js +10 -9
  47. package/package.json +19 -5
  48. package/dist/eventsReactRuntime.d.ts +0 -21
  49. package/dist/eventsReactRuntime.js +0 -25
@@ -14,7 +14,7 @@ function getAjv() {
14
14
  }
15
15
  return ajvInstance;
16
16
  }
17
- export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
17
+ export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
18
18
  return tool({
19
19
  description: "Mark the dataset as completed. Use only when output.jsonl has been successfully generated and is ready for validation.",
20
20
  inputSchema: z.object({
@@ -27,23 +27,33 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
27
27
  console.log(`[Dataset ${datasetId}] ========================================`);
28
28
  const outputPath = getDatasetOutputPath(datasetId);
29
29
  try {
30
- await ensureFileExists(env, sandboxId, outputPath);
30
+ await ensureFileExists(runtime, sandboxId, outputPath);
31
31
  }
32
32
  catch (error) {
33
33
  const message = error instanceof Error ? error.message : String(error);
34
34
  console.error(`[Dataset ${datasetId}] Missing output file:`, message);
35
35
  return {
36
36
  success: false,
37
+ status: "missing_output",
38
+ validRows: 0,
39
+ rowRecordCount: 0,
40
+ validation: [],
37
41
  error: message,
42
+ message,
38
43
  };
39
44
  }
40
45
  console.log(`[Dataset ${datasetId}] Validating dataset rows against schema`);
41
- const datasetResult = await datasetGetByIdStep({ env, datasetId });
46
+ const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
42
47
  if (!datasetResult.ok) {
43
48
  console.error(`[Dataset ${datasetId}] ${datasetResult.error}`);
44
49
  return {
45
50
  success: false,
51
+ status: "dataset_not_found",
52
+ validRows: 0,
53
+ rowRecordCount: 0,
54
+ validation: [],
46
55
  error: datasetResult.error,
56
+ message: datasetResult.error,
47
57
  };
48
58
  }
49
59
  const datasetRecord = datasetResult.data;
@@ -51,7 +61,12 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
51
61
  console.error(`[Dataset ${datasetId}] Schema not found in database`);
52
62
  return {
53
63
  success: false,
64
+ status: "schema_missing",
65
+ validRows: 0,
66
+ rowRecordCount: 0,
67
+ validation: [],
54
68
  error: "Schema not found in database. Please generate schema first.",
69
+ message: "Schema not found in database. Please generate schema first.",
55
70
  };
56
71
  }
57
72
  const schemaJson = datasetRecord.schema.schema;
@@ -64,11 +79,16 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
64
79
  console.error(`[Dataset ${datasetId}] Failed to compile schema:`, message);
65
80
  return {
66
81
  success: false,
82
+ status: "schema_invalid",
83
+ validRows: 0,
84
+ rowRecordCount: 0,
85
+ validation: [],
67
86
  error: `Failed to compile schema: ${message}`,
87
+ message: `Failed to compile schema: ${message}`,
68
88
  };
69
89
  }
70
90
  const validationResult = await validateJsonlRows({
71
- env,
91
+ runtime,
72
92
  sandboxId,
73
93
  outputPath,
74
94
  validator,
@@ -77,29 +97,40 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
77
97
  if (!validationResult.success) {
78
98
  return validationResult;
79
99
  }
80
- const totalValidRows = validationResult.validRowCount;
100
+ const totalValidRows = validationResult.validRowCount ?? 0;
101
+ const rowRecordCount = validationResult.rowRecordCount ?? totalValidRows;
81
102
  console.log(`[Dataset ${datasetId}] Reading file content for upload`);
82
- const fileRead = await readDatasetSandboxFileStep({ env, sandboxId, path: outputPath });
103
+ const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path: outputPath });
83
104
  if (!fileRead.contentBase64) {
84
105
  console.error(`[Dataset ${datasetId}] Empty file content`);
85
106
  return {
86
107
  success: false,
108
+ status: "empty_output",
109
+ validRows: 0,
110
+ rowRecordCount: 0,
111
+ validation: [],
87
112
  error: "Empty file content",
113
+ message: "Empty file content",
88
114
  };
89
115
  }
90
116
  const fileBuffer = Buffer.from(fileRead.contentBase64, "base64");
91
117
  console.log(`[Dataset ${datasetId}] Uploading file to InstantDB storage`);
92
- const uploadResult = await datasetUploadOutputFileStep({ env, datasetId, fileBuffer });
118
+ const uploadResult = await datasetUploadOutputFileStep({ runtime, datasetId, fileBuffer });
93
119
  if (!uploadResult.ok) {
94
120
  console.error(`[Dataset ${datasetId}] File upload failed: ${uploadResult.error}`);
95
121
  return {
96
122
  success: false,
123
+ status: "upload_failed",
124
+ validRows: totalValidRows,
125
+ rowRecordCount,
126
+ validation: validationResult.validation,
97
127
  error: uploadResult.error,
128
+ message: uploadResult.error,
98
129
  };
99
130
  }
100
131
  console.log(`[Dataset ${datasetId}] File uploaded successfully: ${uploadResult.data.fileId}`);
101
132
  const statusResult = await datasetUpdateStatusStep({
102
- env,
133
+ runtime,
103
134
  datasetId,
104
135
  status: "completed",
105
136
  calculatedTotalRows: totalValidRows,
@@ -109,14 +140,21 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
109
140
  console.error(`[Dataset ${datasetId}] Failed to update status: ${statusResult.error}`);
110
141
  return {
111
142
  success: false,
143
+ status: "status_update_failed",
144
+ validRows: totalValidRows,
145
+ rowRecordCount,
146
+ validation: validationResult.validation,
112
147
  error: statusResult.error,
148
+ message: statusResult.error,
113
149
  };
114
150
  }
115
151
  console.log(`[Dataset ${datasetId}] Dataset marked as COMPLETED (${totalValidRows} valid rows)`);
116
152
  console.log(`[Dataset ${datasetId}] ========================================`);
117
153
  return {
118
154
  success: true,
155
+ status: "completed",
119
156
  validRows: totalValidRows,
157
+ rowRecordCount,
120
158
  fileId: uploadResult.data.fileId,
121
159
  storagePath: uploadResult.data.storagePath,
122
160
  message: "Dataset creation completed and uploaded to storage",
@@ -124,9 +162,23 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
124
162
  },
125
163
  });
126
164
  }
127
- async function ensureFileExists(env, sandboxId, path) {
165
+ export function didCompleteDatasetSucceed(event) {
166
+ const parts = Array.isArray(event?.content?.parts) ? event.content.parts : [];
167
+ return parts.some((part) => {
168
+ if (part?.type === "action" && part?.content?.actionName === "completeDataset") {
169
+ const output = part.content.output;
170
+ return part.content.status === "completed" && output?.success === true && output?.status === "completed";
171
+ }
172
+ if (part?.type === "tool-completeDataset") {
173
+ const output = part.output ?? part.result;
174
+ return part.state === "output-available" && output?.success === true && output?.status === "completed";
175
+ }
176
+ return false;
177
+ });
178
+ }
179
+ async function ensureFileExists(runtime, sandboxId, path) {
128
180
  const result = await runDatasetSandboxCommandStep({
129
- env,
181
+ runtime,
130
182
  sandboxId,
131
183
  cmd: "test",
132
184
  args: ["-f", path],
@@ -135,14 +187,23 @@ async function ensureFileExists(env, sandboxId, path) {
135
187
  throw new Error(`Required file not found: ${path}`);
136
188
  }
137
189
  }
138
- async function validateJsonlRows({ env, sandboxId, outputPath, validator, datasetId }) {
190
+ async function validateJsonlRows({ runtime, sandboxId, outputPath, validator, datasetId }) {
139
191
  const validation = [];
140
192
  let validRowCount = 0;
193
+ let rowRecordCount = 0;
141
194
  console.log(`[Dataset ${datasetId}] Reading and validating JSONL file from sandbox`);
142
- const fileRead = await readDatasetSandboxFileStep({ env, sandboxId, path: outputPath });
195
+ const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path: outputPath });
143
196
  if (!fileRead.contentBase64) {
144
197
  console.log(`[Dataset ${datasetId}] Empty output file`);
145
- return { success: true, validation, validRowCount: 0 };
198
+ return {
199
+ success: false,
200
+ status: "empty_output",
201
+ validation,
202
+ validRowCount: 0,
203
+ rowRecordCount: 0,
204
+ error: "output.jsonl is empty",
205
+ message: "output.jsonl is empty",
206
+ };
146
207
  }
147
208
  const fileContent = Buffer.from(fileRead.contentBase64, "base64").toString();
148
209
  const lines = fileContent.split("\n");
@@ -167,8 +228,14 @@ async function validateJsonlRows({ env, sandboxId, outputPath, validator, datase
167
228
  continue;
168
229
  }
169
230
  if (record.type !== "row") {
231
+ validation.push({
232
+ index,
233
+ valid: false,
234
+ errors: ["Every non-empty output line must be a JSON object with type 'row'"],
235
+ });
170
236
  continue;
171
237
  }
238
+ rowRecordCount++;
172
239
  const data = record.data;
173
240
  if (data === undefined || data === null) {
174
241
  validation.push({
@@ -187,6 +254,7 @@ async function validateJsonlRows({ env, sandboxId, outputPath, validator, datase
187
254
  index,
188
255
  valid: false,
189
256
  errors,
257
+ dataKeys: data && typeof data === "object" && !Array.isArray(data) ? Object.keys(data) : [],
190
258
  });
191
259
  continue;
192
260
  }
@@ -197,9 +265,29 @@ async function validateJsonlRows({ env, sandboxId, outputPath, validator, datase
197
265
  validRowCount++;
198
266
  }
199
267
  console.log(`[Dataset ${datasetId}] Validation completed: ${validRowCount} valid rows`);
268
+ const invalidRows = validation.filter((entry) => !entry.valid);
269
+ if (rowRecordCount === 0 || validRowCount === 0 || invalidRows.length > 0) {
270
+ const message = rowRecordCount === 0
271
+ ? "output.jsonl does not contain any type='row' records"
272
+ : validRowCount === 0
273
+ ? "No dataset rows matched the stored schema"
274
+ : `${invalidRows.length} dataset row(s) failed schema validation`;
275
+ console.error(`[Dataset ${datasetId}] Validation failed: ${message}`);
276
+ return {
277
+ success: false,
278
+ status: "validation_failed",
279
+ validation,
280
+ validRowCount,
281
+ rowRecordCount,
282
+ error: message,
283
+ message,
284
+ };
285
+ }
200
286
  return {
201
287
  success: true,
288
+ status: "completed",
202
289
  validation,
203
290
  validRowCount,
291
+ rowRecordCount,
204
292
  };
205
293
  }
@@ -1,22 +1,22 @@
1
- export declare function getDatasetServiceDb(env?: any): Promise<any>;
1
+ export declare function getDatasetServiceDb(runtime: any): Promise<any>;
2
2
  export declare function datasetGetByIdStep(params: {
3
- env?: any;
3
+ runtime: any;
4
4
  datasetId: string;
5
5
  }): Promise<import("../service.js").ServiceResult<any>>;
6
6
  export declare function datasetReadOutputJsonlStep(params: {
7
- env?: any;
7
+ runtime: any;
8
8
  datasetId: string;
9
9
  }): Promise<{
10
10
  contentBase64: string;
11
11
  }>;
12
12
  export declare function datasetUpdateSchemaStep(params: {
13
- env?: any;
13
+ runtime: any;
14
14
  datasetId: string;
15
15
  schema: any;
16
16
  status?: string;
17
17
  }): Promise<import("../service.js").ServiceResult<void>>;
18
18
  export declare function datasetUploadOutputFileStep(params: {
19
- env?: any;
19
+ runtime: any;
20
20
  datasetId: string;
21
21
  fileBuffer: Buffer;
22
22
  }): Promise<import("../service.js").ServiceResult<{
@@ -24,22 +24,46 @@ export declare function datasetUploadOutputFileStep(params: {
24
24
  storagePath: string;
25
25
  }>>;
26
26
  export declare function datasetUpdateStatusStep(params: {
27
- env?: any;
27
+ runtime: any;
28
28
  datasetId: string;
29
29
  status: string;
30
30
  calculatedTotalRows?: number;
31
31
  actualGeneratedRowCount?: number;
32
32
  }): Promise<import("../service.js").ServiceResult<void>>;
33
33
  export declare function datasetClearStep(params: {
34
- env?: any;
34
+ runtime: any;
35
35
  datasetId: string;
36
36
  }): Promise<import("../service.js").ServiceResult<{
37
37
  deletedCount: number;
38
38
  }>>;
39
39
  export declare function datasetPreviewRowsStep(params: {
40
- env?: any;
40
+ runtime: any;
41
41
  datasetId: string;
42
42
  limit?: number;
43
43
  }): Promise<{
44
44
  rows: any[];
45
45
  }>;
46
+ export declare function datasetReadRowsStep(params: {
47
+ runtime: any;
48
+ datasetId: string;
49
+ cursor?: number;
50
+ limit?: number;
51
+ }): Promise<{
52
+ rows: any[];
53
+ cursor: number;
54
+ done: boolean;
55
+ }>;
56
+ export declare function datasetReadOneStep(params: {
57
+ runtime: any;
58
+ datasetId: string;
59
+ }): Promise<{
60
+ row: any | null;
61
+ }>;
62
+ export declare function datasetInferAndUpdateSchemaStep(params: {
63
+ runtime: any;
64
+ datasetId: string;
65
+ title?: string;
66
+ description?: string;
67
+ }): Promise<{
68
+ schema: import("../dataset.js").DatasetSchemaInput;
69
+ }>;
@@ -1,22 +1,31 @@
1
- import { getContextRuntime, getContextEnv } from "@ekairos/events/runtime";
2
1
  import { DatasetService } from "../service.js";
3
- async function resolveEnv(env) {
4
- return env ?? (await getContextEnv());
2
+ import { datasetDomain } from "../schema.js";
3
+ import { inferDatasetSchema } from "../builder/schemaInference.js";
4
+ async function getRuntimeDb(runtime) {
5
+ if (!runtime) {
6
+ throw new Error("Dataset step requires runtime.");
7
+ }
8
+ if (typeof runtime.use === "function") {
9
+ const scoped = await runtime.use(datasetDomain);
10
+ const scopedDb = scoped.db;
11
+ return typeof scopedDb === "function" ? await scopedDb.call(scoped) : scopedDb;
12
+ }
13
+ const db = runtime.db;
14
+ return typeof db === "function" ? await db.call(runtime) : db;
5
15
  }
6
- export async function getDatasetServiceDb(env) {
16
+ export async function getDatasetServiceDb(runtime) {
7
17
  "use step";
8
- const runtime = (await getContextRuntime(await resolveEnv(env)));
9
- return runtime.db;
18
+ return await getRuntimeDb(runtime);
10
19
  }
11
20
  export async function datasetGetByIdStep(params) {
12
21
  "use step";
13
- const db = (await getContextRuntime(await resolveEnv(params.env))).db;
22
+ const db = await getRuntimeDb(params.runtime);
14
23
  const service = new DatasetService(db);
15
24
  return await service.getDatasetById(params.datasetId);
16
25
  }
17
26
  export async function datasetReadOutputJsonlStep(params) {
18
27
  "use step";
19
- const db = (await getContextRuntime(await resolveEnv(params.env))).db;
28
+ const db = await getRuntimeDb(params.runtime);
20
29
  for (let attempt = 1; attempt <= 20; attempt++) {
21
30
  const query = await db.query({
22
31
  dataset_datasets: {
@@ -37,7 +46,7 @@ export async function datasetReadOutputJsonlStep(params) {
37
46
  }
38
47
  export async function datasetUpdateSchemaStep(params) {
39
48
  "use step";
40
- const db = (await getContextRuntime(await resolveEnv(params.env))).db;
49
+ const db = await getRuntimeDb(params.runtime);
41
50
  const service = new DatasetService(db);
42
51
  return await service.updateDatasetSchema({
43
52
  datasetId: params.datasetId,
@@ -47,7 +56,7 @@ export async function datasetUpdateSchemaStep(params) {
47
56
  }
48
57
  export async function datasetUploadOutputFileStep(params) {
49
58
  "use step";
50
- const db = (await getContextRuntime(await resolveEnv(params.env))).db;
59
+ const db = await getRuntimeDb(params.runtime);
51
60
  const service = new DatasetService(db);
52
61
  return await service.uploadDatasetOutputFile({
53
62
  datasetId: params.datasetId,
@@ -56,7 +65,7 @@ export async function datasetUploadOutputFileStep(params) {
56
65
  }
57
66
  export async function datasetUpdateStatusStep(params) {
58
67
  "use step";
59
- const db = (await getContextRuntime(await resolveEnv(params.env))).db;
68
+ const db = await getRuntimeDb(params.runtime);
60
69
  const service = new DatasetService(db);
61
70
  return await service.updateDatasetStatus({
62
71
  datasetId: params.datasetId,
@@ -67,13 +76,13 @@ export async function datasetUpdateStatusStep(params) {
67
76
  }
68
77
  export async function datasetClearStep(params) {
69
78
  "use step";
70
- const db = (await getContextRuntime(await resolveEnv(params.env))).db;
79
+ const db = await getRuntimeDb(params.runtime);
71
80
  const service = new DatasetService(db);
72
81
  return await service.clearDataset(params.datasetId);
73
82
  }
74
83
  export async function datasetPreviewRowsStep(params) {
75
84
  "use step";
76
- const db = (await getContextRuntime(await resolveEnv(params.env))).db;
85
+ const db = await getRuntimeDb(params.runtime);
77
86
  const service = new DatasetService(db);
78
87
  const rowsResult = await service.previewRows(params.datasetId, params.limit ?? 20);
79
88
  if (!rowsResult.ok) {
@@ -81,3 +90,50 @@ export async function datasetPreviewRowsStep(params) {
81
90
  }
82
91
  return { rows: rowsResult.data };
83
92
  }
93
+ export async function datasetReadRowsStep(params) {
94
+ "use step";
95
+ const db = await getRuntimeDb(params.runtime);
96
+ const service = new DatasetService(db);
97
+ const rowsResult = await service.readRows({
98
+ datasetId: params.datasetId,
99
+ cursor: params.cursor,
100
+ limit: params.limit,
101
+ });
102
+ if (!rowsResult.ok) {
103
+ throw new Error(rowsResult.error);
104
+ }
105
+ return rowsResult.data;
106
+ }
107
+ export async function datasetReadOneStep(params) {
108
+ "use step";
109
+ const db = await getRuntimeDb(params.runtime);
110
+ const service = new DatasetService(db);
111
+ const firstResult = await service.readOne(params.datasetId);
112
+ if (!firstResult.ok) {
113
+ throw new Error(firstResult.error);
114
+ }
115
+ return { row: firstResult.data };
116
+ }
117
+ export async function datasetInferAndUpdateSchemaStep(params) {
118
+ "use step";
119
+ const db = await getRuntimeDb(params.runtime);
120
+ const service = new DatasetService(db);
121
+ const readResult = await service.readRows({
122
+ datasetId: params.datasetId,
123
+ cursor: 0,
124
+ limit: 1000,
125
+ });
126
+ if (!readResult.ok) {
127
+ throw new Error(readResult.error);
128
+ }
129
+ const inferred = inferDatasetSchema(readResult.data.rows, params.title ?? `${params.datasetId}Row`, params.description ?? "One dataset row");
130
+ const updateResult = await service.updateDatasetSchema({
131
+ datasetId: params.datasetId,
132
+ schema: inferred,
133
+ status: "completed",
134
+ });
135
+ if (!updateResult.ok) {
136
+ throw new Error(updateResult.error);
137
+ }
138
+ return { schema: inferred };
139
+ }
package/dist/dataset.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import { id as newId } from "@instantdb/admin";
2
2
  import { buildObjectOutputInstructions } from "./builder/instructions.js";
3
- import { materializeDerivedDataset, materializeQuerySource, materializeSingleFileLikeSource, } from "./builder/materialize.js";
3
+ import { getDatasetAgentMaterializers } from "./builder/agentMaterializers.js";
4
+ import { materializeQuerySource } from "./builder/materializeQuery.js";
4
5
  import { finalizeBuildResult } from "./builder/persistence.js";
5
6
  export function dataset(runtime, options = {}) {
6
7
  const datasetId = normalizeDatasetId(options.datasetId);
@@ -11,6 +12,7 @@ export function dataset(runtime, options = {}) {
11
12
  sources: [],
12
13
  output: "rows",
13
14
  inferSchema: false,
15
+ durable: options.durable,
14
16
  first: false,
15
17
  };
16
18
  const api = {
@@ -100,13 +102,17 @@ export function dataset(runtime, options = {}) {
100
102
  const targetDatasetId = options?.datasetId
101
103
  ? normalizeDatasetId(options.datasetId)
102
104
  : datasetId;
103
- const effectiveState = state.output === "object"
105
+ const stateWithBuildOptions = {
106
+ ...state,
107
+ durable: options?.durable ?? state.durable,
108
+ };
109
+ const effectiveState = stateWithBuildOptions.output === "object"
104
110
  ? {
105
- ...state,
111
+ ...stateWithBuildOptions,
106
112
  first: true,
107
- instructions: buildObjectOutputInstructions(state.instructions),
113
+ instructions: buildObjectOutputInstructions(stateWithBuildOptions.instructions),
108
114
  }
109
- : state;
115
+ : stateWithBuildOptions;
110
116
  const onlySource = effectiveState.sources[0];
111
117
  const isSingleSource = effectiveState.sources.length === 1;
112
118
  const hasInstructions = Boolean(String(effectiveState.instructions ?? "").trim());
@@ -128,7 +134,7 @@ export function dataset(runtime, options = {}) {
128
134
  if (!effectiveState.reactor) {
129
135
  throw new Error("dataset_reactor_required");
130
136
  }
131
- await materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
137
+ await getDatasetAgentMaterializers().materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
132
138
  return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
133
139
  }
134
140
  if (!effectiveState.sandboxId) {
@@ -137,7 +143,7 @@ export function dataset(runtime, options = {}) {
137
143
  if (!effectiveState.reactor) {
138
144
  throw new Error("dataset_reactor_required");
139
145
  }
140
- await materializeDerivedDataset(effectiveState, targetDatasetId);
146
+ await getDatasetAgentMaterializers().materializeDerivedDataset(effectiveState, targetDatasetId);
141
147
  return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
142
148
  },
143
149
  };
@@ -1,9 +1,9 @@
1
1
  interface ExecuteCommandToolParams {
2
2
  datasetId: string;
3
3
  sandboxId: string;
4
- env?: any;
4
+ runtime: any;
5
5
  }
6
- export declare function createExecuteCommandTool({ datasetId, sandboxId, env }: ExecuteCommandToolParams): import("ai").Tool<{
6
+ export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): import("ai").Tool<{
7
7
  pythonCode: string;
8
8
  scriptName: string;
9
9
  }, {
@@ -6,7 +6,7 @@ import { getDatasetWorkstation } from "./datasetFiles.js";
6
6
  // The tool's return payload exposes stdout (capped) plus the on-disk script path.
7
7
  const MAX_STDOUT_CHARS = 20000;
8
8
  const MAX_STDERR_CHARS = 5000;
9
- export function createExecuteCommandTool({ datasetId, sandboxId, env }) {
9
+ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
10
10
  return tool({
11
11
  description: "Execute Python scripts in the sandbox. Always saves script to a file before executing. The tool's output is EXACTLY the script's stdout and includes the script file path for traceability. CRITICAL: Print concise, human-readable summaries only; do NOT print raw large data. For big results, write artifacts to files in the workstation and print their file paths. Always include progress/result prints (e.g., 'Processing file X...', 'Found Y records', 'Generated output.csv').",
12
12
  inputSchema: z.object({
@@ -25,7 +25,7 @@ export function createExecuteCommandTool({ datasetId, sandboxId, env }) {
25
25
  console.log(`[Dataset ${datasetId}] ========================================`);
26
26
  try {
27
27
  await writeDatasetSandboxFilesStep({
28
- env,
28
+ runtime,
29
29
  sandboxId,
30
30
  files: [
31
31
  {
@@ -37,7 +37,7 @@ export function createExecuteCommandTool({ datasetId, sandboxId, env }) {
37
37
  console.log(`[Dataset ${datasetId}] Script written to: ${scriptFile}`);
38
38
  console.log(`[Dataset ${datasetId}] Executing: python ${scriptFile}`);
39
39
  const result = await runDatasetSandboxCommandStep({
40
- env,
40
+ runtime,
41
41
  sandboxId,
42
42
  cmd: "python",
43
43
  args: [scriptFile],
@@ -1,6 +1,6 @@
1
1
  import { createContext, type ContextReactor } from "@ekairos/events";
2
2
  import { FilePreviewContext } from "./filepreview.js";
3
- export type FileParseStoryContext = {
3
+ export type FileParseContext = {
4
4
  datasetId: string;
5
5
  fileId: string;
6
6
  instructions: string;
@@ -15,7 +15,7 @@ export type FileParseStoryContext = {
15
15
  iterationCount: number;
16
16
  filePreview?: FilePreviewContext;
17
17
  };
18
- export type FileParseStoryParams = {
18
+ export type FileParseContextParams = {
19
19
  fileId: string;
20
20
  instructions?: string;
21
21
  sandboxId?: string;
@@ -23,11 +23,15 @@ export type FileParseStoryParams = {
23
23
  model?: string;
24
24
  reactor?: ContextReactor<any, any>;
25
25
  };
26
- export type FileParseStoryBuilder<Env extends {
26
+ export type FileParseRunOptions = {
27
+ prompt?: string;
28
+ durable?: boolean;
29
+ };
30
+ export type FileParseContextBuilder<Env extends {
27
31
  orgId: string;
28
32
  }> = {
29
33
  datasetId: string;
30
- story: ReturnType<ReturnType<typeof createContext<Env>>["context"]> extends any ? any : any;
34
+ context: ReturnType<ReturnType<typeof createContext<Env>>["context"]> extends any ? any : any;
31
35
  };
32
36
  export type DatasetResult = {
33
37
  id: string;
@@ -44,13 +48,13 @@ export type DatasetResult = {
44
48
  * Factory (DX-first):
45
49
  *
46
50
  * Usage:
47
- * const { datasetId } = await createFileParseStory(fileId, { instructions }).parse(env)
51
+ * const { datasetId } = await createFileParseContext(fileId, { instructions }).parse(runtime)
48
52
  *
49
- * - No `db` is accepted/stored (workflow-safe).
50
- * - All I/O happens in `"use step"` functions via Ekairos runtime (`getContextRuntime(env).db`).
51
- * - `parse()` is the entrypoint; it calls `story.react(...)` internally.
53
+ * - Uses the caller runtime; no secondary runtime is created.
54
+ * - All I/O happens in `"use step"` functions via the provided Ekairos runtime.
55
+ * - `parse()` is the entrypoint; it calls `context.react(...)` internally.
52
56
  */
53
- export declare function createFileParseStory<Env extends {
57
+ export declare function createFileParseContext<Env extends {
54
58
  orgId: string;
55
59
  }>(fileId: string, opts?: {
56
60
  instructions?: string;
@@ -60,8 +64,10 @@ export declare function createFileParseStory<Env extends {
60
64
  reactor?: ContextReactor<any, any>;
61
65
  }): {
62
66
  datasetId: string;
63
- parse(env?: Env, prompt?: string): Promise<{
67
+ parse(runtime: {
68
+ env: Env;
69
+ }, options?: FileParseRunOptions): Promise<{
64
70
  datasetId: string;
65
71
  }>;
66
- story: any;
72
+ context: any;
67
73
  };