@ekairos/dataset 1.22.53-beta.development.0 → 1.22.55-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ export type FileParseContext = {
16
16
  filePreview?: FilePreviewContext;
17
17
  };
18
18
  export type FileParseContextParams = {
19
- fileId: string;
19
+ fileId?: string;
20
20
  instructions?: string;
21
21
  sandboxId?: string;
22
22
  datasetId?: string;
@@ -71,3 +71,9 @@ export declare function createFileParseContext<Env extends {
71
71
  }>;
72
72
  context: any;
73
73
  };
74
+ export declare function registerFileParseContext<Env extends {
75
+ orgId: string;
76
+ }>(opts?: {
77
+ model?: string;
78
+ reactor?: ContextReactor<any, any>;
79
+ }): void;
@@ -20,6 +20,7 @@ async function awaitContextRun(run) {
20
20
  await run;
21
21
  }
22
22
  async function initializeSandbox(runtime, sandboxId, datasetId, fileId, state) {
23
+ "use step";
23
24
  if (state.initialized) {
24
25
  return state.filePath;
25
26
  }
@@ -76,17 +77,26 @@ async function initializeSandbox(runtime, sandboxId, datasetId, fileId, state) {
76
77
  * Internamente corre un Context (`createContext("file.parse")`) que itera hasta que se ejecuta el tool `completeDataset`.
77
78
  */
78
79
  function createFileParseContextDefinition(params) {
79
- const datasetId = params.datasetId ?? id();
80
+ const fallbackDatasetId = params.datasetId;
80
81
  const model = params.model ?? "openai/gpt-5";
81
82
  let contextBuilder = createContext("file.parse")
82
83
  .context(async (stored, _env, runtime) => {
83
84
  const previous = stored?.content ?? {};
84
85
  const sandboxState = previous?.sandboxState ?? { initialized: false, filePath: "" };
86
+ const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
87
+ const fileId = previous?.fileId ?? params.fileId ?? "";
88
+ const instructions = previous?.instructions ?? params.instructions ?? "";
85
89
  const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
90
+ if (!datasetId) {
91
+ throw new Error("dataset_id_required");
92
+ }
93
+ if (!fileId) {
94
+ throw new Error("dataset_file_id_required");
95
+ }
86
96
  if (!sandboxId) {
87
97
  throw new Error("dataset_sandbox_required");
88
98
  }
89
- const sandboxFilePath = await initializeSandbox(runtime, sandboxId, datasetId, params.fileId, sandboxState);
99
+ const sandboxFilePath = await initializeSandbox(runtime, sandboxId, datasetId, fileId, sandboxState);
90
100
  let filePreview = undefined;
91
101
  try {
92
102
  filePreview = await generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId);
@@ -100,8 +110,8 @@ function createFileParseContextDefinition(params) {
100
110
  schema = datasetResult.data.schema;
101
111
  const ctx = {
102
112
  datasetId,
103
- fileId: params.fileId,
104
- instructions: params.instructions ?? "",
113
+ fileId,
114
+ instructions,
105
115
  sandboxConfig: { filePath: sandboxFilePath },
106
116
  analysis: [],
107
117
  schema,
@@ -114,8 +124,8 @@ function createFileParseContextDefinition(params) {
114
124
  return {
115
125
  ...previous,
116
126
  datasetId,
117
- fileId: params.fileId,
118
- instructions: params.instructions ?? "",
127
+ fileId,
128
+ instructions,
119
129
  sandboxId,
120
130
  sandboxState,
121
131
  ctx,
@@ -138,27 +148,36 @@ function createFileParseContextDefinition(params) {
138
148
  })
139
149
  .actions(async (_stored, _env, runtime) => {
140
150
  const existingSchema = _stored?.content?.ctx?.schema?.schema;
151
+ const datasetId = _stored?.content?.datasetId ?? fallbackDatasetId ?? "";
152
+ const fileId = _stored?.content?.fileId ?? params.fileId ?? "";
153
+ const sandboxId = _stored?.content?.sandboxId ?? params.sandboxId ?? "";
154
+ if (!datasetId)
155
+ throw new Error("dataset_id_required");
156
+ if (!fileId)
157
+ throw new Error("dataset_file_id_required");
158
+ if (!sandboxId)
159
+ throw new Error("dataset_sandbox_required");
141
160
  const actions = {
142
161
  executeCommand: createExecuteCommandTool({
143
162
  datasetId,
144
- sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
163
+ sandboxId,
145
164
  runtime,
146
165
  }),
147
166
  completeDataset: createCompleteDatasetTool({
148
167
  datasetId,
149
- sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
168
+ sandboxId,
150
169
  runtime,
151
170
  }),
152
171
  clearDataset: createClearDatasetTool({
153
172
  datasetId,
154
- sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
173
+ sandboxId,
155
174
  runtime,
156
175
  }),
157
176
  };
158
177
  if (!existingSchema) {
159
178
  actions.generateSchema = createGenerateSchemaTool({
160
179
  datasetId,
161
- fileId: params.fileId,
180
+ fileId,
162
181
  runtime,
163
182
  });
164
183
  }
@@ -174,7 +193,7 @@ function createFileParseContextDefinition(params) {
174
193
  contextBuilder = contextBuilder.model(model);
175
194
  }
176
195
  const context = contextBuilder.build();
177
- return { datasetId, context };
196
+ return { datasetId: fallbackDatasetId ?? "", context };
178
197
  }
179
198
  /**
180
199
  * Factory (DX-first):
@@ -187,15 +206,16 @@ function createFileParseContextDefinition(params) {
187
206
  * - `parse()` is the entrypoint; it calls `context.react(...)` internally.
188
207
  */
189
208
  export function createFileParseContext(fileId, opts) {
209
+ const datasetId = opts?.datasetId ?? id();
190
210
  const params = {
191
211
  fileId,
192
212
  instructions: opts?.instructions,
193
213
  sandboxId: opts?.sandboxId,
194
- datasetId: opts?.datasetId,
214
+ datasetId,
195
215
  model: opts?.model,
196
216
  reactor: opts?.reactor,
197
217
  };
198
- const { datasetId, context } = createFileParseContextDefinition(params);
218
+ const { context } = createFileParseContextDefinition(params);
199
219
  return {
200
220
  datasetId,
201
221
  async parse(runtime, options = {}) {
@@ -213,6 +233,13 @@ export function createFileParseContext(fileId, opts) {
213
233
  context: { key: `dataset:${datasetId}` },
214
234
  durable: options.durable ?? false,
215
235
  options: { silent: true, preventClose: true, sendFinish: false, maxIterations: 20, maxModelSteps: 5 },
236
+ __initialContent: {
237
+ datasetId,
238
+ fileId,
239
+ instructions: opts?.instructions ?? "",
240
+ sandboxId: opts?.sandboxId ?? "",
241
+ sandboxState: { initialized: false, filePath: "" },
242
+ },
216
243
  });
217
244
  await awaitContextRun(shell.run);
218
245
  return { datasetId };
@@ -221,3 +248,10 @@ export function createFileParseContext(fileId, opts) {
221
248
  context,
222
249
  };
223
250
  }
251
+ export function registerFileParseContext(opts) {
252
+ createFileParseContextDefinition({
253
+ model: opts?.model,
254
+ reactor: opts?.reactor,
255
+ }).context;
256
+ }
257
+ registerFileParseContext();
@@ -20,8 +20,8 @@ export type TransformDatasetContext = {
20
20
  instructions?: string;
21
21
  };
22
22
  export type TransformDatasetAgentParams = {
23
- sourceDatasetIds: string[];
24
- outputSchema: any;
23
+ sourceDatasetIds?: string[];
24
+ outputSchema?: any;
25
25
  instructions?: string;
26
26
  datasetId?: string;
27
27
  model?: string;
@@ -62,3 +62,9 @@ export declare function createTransformDatasetContext<Env extends {
62
62
  }>;
63
63
  context: any;
64
64
  };
65
+ export declare function registerTransformDatasetContext<Env extends {
66
+ orgId: string;
67
+ }>(opts?: {
68
+ model?: string;
69
+ reactor?: ContextReactor<any, any>;
70
+ }): void;
@@ -18,6 +18,7 @@ async function awaitContextRun(run) {
18
18
  await run;
19
19
  }
20
20
  async function ensureSourcesInSandbox(runtime, sandboxId, datasetId, sourceDatasetIds, state) {
21
+ "use step";
21
22
  if (state.initialized) {
22
23
  return { sourcePaths: state.sourcePaths, outputPath: getDatasetOutputPath(datasetId) };
23
24
  }
@@ -39,17 +40,34 @@ async function ensureSourcesInSandbox(runtime, sandboxId, datasetId, sourceDatas
39
40
  return { sourcePaths, outputPath: getDatasetOutputPath(datasetId) };
40
41
  }
41
42
  function createTransformDatasetContextDefinition(params) {
42
- const datasetId = params.datasetId ?? id();
43
+ const fallbackDatasetId = params.datasetId;
43
44
  const model = params.model ?? "openai/gpt-5";
44
45
  let contextBuilder = createContext("dataset.transform")
45
46
  .context(async (stored, _env, runtime) => {
46
47
  const previous = stored?.content ?? {};
47
48
  const sandboxState = previous?.sandboxState ?? { initialized: false, sourcePaths: [] };
49
+ const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
50
+ const sourceDatasetIds = Array.isArray(previous?.sourceDatasetIds)
51
+ ? previous.sourceDatasetIds
52
+ : Array.isArray(params.sourceDatasetIds)
53
+ ? params.sourceDatasetIds
54
+ : [];
55
+ const outputSchema = previous?.outputSchema ?? params.outputSchema;
56
+ const instructions = previous?.instructions ?? params.instructions;
48
57
  const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
58
+ if (!datasetId) {
59
+ throw new Error("dataset_id_required");
60
+ }
61
+ if (sourceDatasetIds.length === 0) {
62
+ throw new Error("dataset_transform_sources_required");
63
+ }
64
+ if (!outputSchema) {
65
+ throw new Error("dataset_transform_schema_required");
66
+ }
49
67
  if (!sandboxId) {
50
68
  throw new Error("dataset_sandbox_required");
51
69
  }
52
- const { sourcePaths, outputPath } = await ensureSourcesInSandbox(runtime, sandboxId, datasetId, params.sourceDatasetIds, sandboxState);
70
+ const { sourcePaths, outputPath } = await ensureSourcesInSandbox(runtime, sandboxId, datasetId, sourceDatasetIds, sandboxState);
53
71
  const sourcePreviews = [];
54
72
  for (const sp of sourcePaths) {
55
73
  try {
@@ -64,19 +82,19 @@ function createTransformDatasetContextDefinition(params) {
64
82
  await datasetUpdateSchemaStep({
65
83
  runtime,
66
84
  datasetId,
67
- schema: params.outputSchema,
85
+ schema: outputSchema,
68
86
  status: "schema_complete",
69
87
  });
70
88
  const promptContext = {
71
89
  datasetId,
72
- sourceDatasetIds: params.sourceDatasetIds,
73
- outputSchema: params.outputSchema,
90
+ sourceDatasetIds,
91
+ outputSchema,
74
92
  sandboxConfig: { sourcePaths, outputPath },
75
93
  sourcePreviews: sourcePreviews.length > 0 ? sourcePreviews : undefined,
76
94
  errors: [],
77
95
  };
78
96
  const basePrompt = buildTransformDatasetPrompt(promptContext);
79
- const userInstructions = String(params.instructions ?? "").trim();
97
+ const userInstructions = String(instructions ?? "").trim();
80
98
  const system = userInstructions
81
99
  ? [
82
100
  "## USER INSTRUCTIONS",
@@ -90,6 +108,9 @@ function createTransformDatasetContextDefinition(params) {
90
108
  return {
91
109
  ...previous,
92
110
  datasetId,
111
+ sourceDatasetIds,
112
+ outputSchema,
113
+ instructions,
93
114
  sandboxId,
94
115
  sandboxState,
95
116
  system,
@@ -100,7 +121,12 @@ function createTransformDatasetContextDefinition(params) {
100
121
  return String(stored?.content?.system ?? "");
101
122
  })
102
123
  .actions(async (stored, _env, runtime) => {
124
+ const datasetId = stored?.content?.datasetId ?? fallbackDatasetId ?? "";
103
125
  const sandboxId = stored?.content?.sandboxId ?? params.sandboxId ?? "";
126
+ if (!datasetId)
127
+ throw new Error("dataset_id_required");
128
+ if (!sandboxId)
129
+ throw new Error("dataset_sandbox_required");
104
130
  return {
105
131
  executeCommand: createExecuteCommandTool({
106
132
  datasetId,
@@ -129,14 +155,15 @@ function createTransformDatasetContextDefinition(params) {
129
155
  contextBuilder = contextBuilder.model(model);
130
156
  }
131
157
  const context = contextBuilder.build();
132
- return { datasetId, context };
158
+ return { datasetId: fallbackDatasetId ?? "", context };
133
159
  }
134
160
  export function createTransformDatasetContext(params) {
135
- const { datasetId, context } = createTransformDatasetContextDefinition({
161
+ const datasetId = params.datasetId ?? id();
162
+ const { context } = createTransformDatasetContextDefinition({
136
163
  sourceDatasetIds: params.sourceDatasetIds,
137
164
  outputSchema: params.outputSchema,
138
165
  instructions: params.instructions,
139
- datasetId: params.datasetId,
166
+ datasetId,
140
167
  model: params.model,
141
168
  sandboxId: params.sandboxId,
142
169
  reactor: params.reactor,
@@ -167,6 +194,14 @@ export function createTransformDatasetContext(params) {
167
194
  context: { key: `dataset:${datasetId}` },
168
195
  durable: options.durable ?? false,
169
196
  options: { silent: true, preventClose: true, sendFinish: false, maxIterations: 20, maxModelSteps: 5 },
197
+ __initialContent: {
198
+ datasetId,
199
+ sourceDatasetIds: params.sourceDatasetIds,
200
+ outputSchema: params.outputSchema,
201
+ instructions: params.instructions,
202
+ sandboxId: params.sandboxId ?? "",
203
+ sandboxState: { initialized: false, sourcePaths: [] },
204
+ },
170
205
  });
171
206
  await awaitContextRun(shell.run);
172
207
  return { datasetId };
@@ -174,3 +209,10 @@ export function createTransformDatasetContext(params) {
174
209
  context,
175
210
  };
176
211
  }
212
+ export function registerTransformDatasetContext(opts) {
213
+ createTransformDatasetContextDefinition({
214
+ model: opts?.model,
215
+ reactor: opts?.reactor,
216
+ }).context;
217
+ }
218
+ registerTransformDatasetContext();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.53-beta.development.0",
3
+ "version": "1.22.55-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.53-beta.development.0",
69
- "@ekairos/events": "^1.22.53-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.53-beta.development.0",
68
+ "@ekairos/domain": "^1.22.55-beta.development.0",
69
+ "@ekairos/events": "^1.22.55-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.55-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",