@ekairos/dataset 1.22.55-beta.development.0 → 1.22.56-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,59 +1,6 @@
1
- import { createContext, type ContextReactor } from "@ekairos/events";
2
- import { FilePreviewContext } from "./filepreview.js";
3
- export type FileParseContext = {
4
- datasetId: string;
5
- fileId: string;
6
- instructions: string;
7
- sandboxConfig: {
8
- filePath: string;
9
- };
10
- analysis: any[];
11
- schema: any | null;
12
- plan: any | null;
13
- executionResult: any | null;
14
- errors: string[];
15
- iterationCount: number;
16
- filePreview?: FilePreviewContext;
17
- };
18
- export type FileParseContextParams = {
19
- fileId?: string;
20
- instructions?: string;
21
- sandboxId?: string;
22
- datasetId?: string;
23
- model?: string;
24
- reactor?: ContextReactor<any, any>;
25
- };
26
- export type FileParseRunOptions = {
27
- prompt?: string;
28
- durable?: boolean;
29
- };
30
- export type FileParseContextBuilder<Env extends {
31
- orgId: string;
32
- }> = {
33
- datasetId: string;
34
- context: ReturnType<ReturnType<typeof createContext<Env>>["context"]> extends any ? any : any;
35
- };
36
- export type DatasetResult = {
37
- id: string;
38
- status?: string;
39
- title?: string;
40
- schema?: any;
41
- analysis?: any;
42
- calculatedTotalRows?: number;
43
- actualGeneratedRowCount?: number;
44
- createdAt?: number;
45
- updatedAt?: number;
46
- };
47
- /**
48
- * Factory (DX-first):
49
- *
50
- * Usage:
51
- * const { datasetId } = await createFileParseContext(fileId, { instructions }).parse(runtime)
52
- *
53
- * - Uses the caller runtime; no secondary runtime is created.
54
- * - All I/O happens in `"use step"` functions via the provided Ekairos runtime.
55
- * - `parse()` is the entrypoint; it calls `context.react(...)` internally.
56
- */
1
+ import { type ContextReactor } from "@ekairos/events";
2
+ import type { FileParseRunOptions } from "./file-dataset.types.js";
3
+ export type { DatasetResult, FileParseContext, FileParseContextBuilder, FileParseContextParams, FileParseRunOptions, SandboxState, } from "./file-dataset.types.js";
57
4
  export declare function createFileParseContext<Env extends {
58
5
  orgId: string;
59
6
  }>(fileId: string, opts?: {
@@ -1,15 +1,11 @@
1
- import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL } from "@ekairos/events";
2
- import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
3
- import { createGenerateSchemaTool } from "./generateSchema.tool.js";
4
- import { createCompleteDatasetTool, didCompleteDatasetSucceed } from "../completeDataset.tool.js";
5
- import { createExecuteCommandTool } from "../executeCommand.tool.js";
6
- import { createClearDatasetTool } from "../clearDataset.tool.js";
7
- import { buildFileDatasetPrompt } from "./prompts.js";
8
- import { generateFilePreview, ensurePreviewScriptsAvailable } from "./filepreview.js";
1
+ import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
9
2
  import { id } from "@instantdb/admin";
10
- import { getDatasetWorkstation } from "../datasetFiles.js";
11
- import { readInstantFileStep } from "./steps.js";
3
+ import { createClearDatasetTool } from "../clearDataset.tool.js";
4
+ import { createCompleteDatasetTool, didCompleteDatasetSucceed, } from "../completeDataset.tool.js";
12
5
  import { datasetGetByIdStep } from "../dataset/steps.js";
6
+ import { createExecuteCommandTool } from "../executeCommand.tool.js";
7
+ import { createGenerateSchemaTool } from "./generateSchema.tool.js";
8
+ import { buildFileDatasetPromptStep, generateFileParsePreviewStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
13
9
  async function awaitContextRun(run) {
14
10
  if (!run)
15
11
  return;
@@ -19,63 +15,6 @@ async function awaitContextRun(run) {
19
15
  }
20
16
  await run;
21
17
  }
22
- async function initializeSandbox(runtime, sandboxId, datasetId, fileId, state) {
23
- "use step";
24
- if (state.initialized) {
25
- return state.filePath;
26
- }
27
- console.log(`[FileParseContext ${datasetId}] Initializing sandbox...`);
28
- await ensurePreviewScriptsAvailable(runtime, sandboxId);
29
- console.log(`[FileParseContext ${datasetId}] Installing Python dependencies...`);
30
- const pipInstall = await runDatasetSandboxCommandStep({
31
- runtime,
32
- sandboxId,
33
- cmd: "python",
34
- args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
35
- });
36
- const installStderr = pipInstall.stderr;
37
- if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
38
- throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
39
- }
40
- console.log(`[FileParseContext ${datasetId}] Fetching file from InstantDB...`);
41
- const file = await readInstantFileStep({ runtime, fileId });
42
- console.log(`[FileParseContext ${datasetId}] Creating dataset workstation...`);
43
- const workstation = getDatasetWorkstation(datasetId);
44
- await runDatasetSandboxCommandStep({
45
- runtime,
46
- sandboxId,
47
- cmd: "mkdir",
48
- args: ["-p", workstation],
49
- });
50
- const fileName = file.contentDisposition ?? "";
51
- const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
52
- const sandboxFilePath = `${workstation}/${fileId}${fileExtension}`;
53
- await writeDatasetSandboxFilesStep({
54
- runtime,
55
- sandboxId,
56
- files: [
57
- {
58
- path: sandboxFilePath,
59
- contentBase64: file.contentBase64,
60
- },
61
- ],
62
- });
63
- console.log(`[FileParseContext ${datasetId}] ✅ Workstation created: ${workstation}`);
64
- console.log(`[FileParseContext ${datasetId}] ✅ File saved: ${sandboxFilePath}`);
65
- state.filePath = sandboxFilePath;
66
- state.initialized = true;
67
- return sandboxFilePath;
68
- }
69
- /**
70
- * FileParseContext
71
- *
72
- * Uso:
73
- * - Crear una instancia con `fileId`, `instructions` y un `sandbox`
74
- * - Llamar `getDataset()` para crear un dataset nuevo (crea un datasetId interno)
75
- * - Llamar `followUp(datasetId, feedback)` para iterar el mismo dataset con feedback
76
- *
77
- * Internamente corre un Context (`createContext("file.parse")`) que itera hasta que se ejecuta el tool `completeDataset`.
78
- */
79
18
  function createFileParseContextDefinition(params) {
80
19
  const fallbackDatasetId = params.datasetId;
81
20
  const model = params.model ?? "openai/gpt-5";
@@ -96,18 +35,31 @@ function createFileParseContextDefinition(params) {
96
35
  if (!sandboxId) {
97
36
  throw new Error("dataset_sandbox_required");
98
37
  }
99
- const sandboxFilePath = await initializeSandbox(runtime, sandboxId, datasetId, fileId, sandboxState);
38
+ const initialized = await initializeFileParseSandboxStep({
39
+ runtime,
40
+ sandboxId,
41
+ datasetId,
42
+ fileId,
43
+ state: sandboxState,
44
+ });
45
+ const sandboxFilePath = initialized.filePath;
100
46
  let filePreview = undefined;
101
47
  try {
102
- filePreview = await generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId);
48
+ filePreview = await generateFileParsePreviewStep({
49
+ runtime,
50
+ sandboxId,
51
+ sandboxFilePath,
52
+ datasetId,
53
+ });
103
54
  }
104
55
  catch {
105
- // optional
56
+ // Preview is optional; parsing can still proceed from the file path.
106
57
  }
107
58
  let schema = null;
108
59
  const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
109
- if (datasetResult.ok && datasetResult.data.schema)
60
+ if (datasetResult.ok && datasetResult.data.schema) {
110
61
  schema = datasetResult.data.schema;
62
+ }
111
63
  const ctx = {
112
64
  datasetId,
113
65
  fileId,
@@ -127,13 +79,13 @@ function createFileParseContextDefinition(params) {
127
79
  fileId,
128
80
  instructions,
129
81
  sandboxId,
130
- sandboxState,
82
+ sandboxState: initialized.state,
131
83
  ctx,
132
84
  };
133
85
  })
134
86
  .narrative(async (stored) => {
135
87
  const ctx = stored?.content?.ctx;
136
- const base = buildFileDatasetPrompt(ctx);
88
+ const base = await buildFileDatasetPromptStep({ context: ctx });
137
89
  const userInstructions = String(ctx?.instructions ?? "").trim();
138
90
  if (!userInstructions)
139
91
  return base;
@@ -195,16 +147,6 @@ function createFileParseContextDefinition(params) {
195
147
  const context = contextBuilder.build();
196
148
  return { datasetId: fallbackDatasetId ?? "", context };
197
149
  }
198
- /**
199
- * Factory (DX-first):
200
- *
201
- * Usage:
202
- * const { datasetId } = await createFileParseContext(fileId, { instructions }).parse(runtime)
203
- *
204
- * - Uses the caller runtime; no secondary runtime is created.
205
- * - All I/O happens in `"use step"` functions via the provided Ekairos runtime.
206
- * - `parse()` is the entrypoint; it calls `context.react(...)` internally.
207
- */
208
150
  export function createFileParseContext(fileId, opts) {
209
151
  const datasetId = opts?.datasetId ?? id();
210
152
  const params = {
@@ -225,14 +167,25 @@ export function createFileParseContext(fileId, opts) {
225
167
  channel: WEB_CHANNEL,
226
168
  createdAt: new Date().toISOString(),
227
169
  content: {
228
- parts: [{ type: "text", text: options.prompt ?? "generate a dataset for this file" }],
170
+ parts: [
171
+ {
172
+ type: "text",
173
+ text: options.prompt ?? "generate a dataset for this file",
174
+ },
175
+ ],
229
176
  },
230
177
  };
231
178
  const shell = await context.react(triggerEvent, {
232
179
  runtime: runtime,
233
180
  context: { key: `dataset:${datasetId}` },
234
181
  durable: options.durable ?? false,
235
- options: { silent: true, preventClose: true, sendFinish: false, maxIterations: 20, maxModelSteps: 5 },
182
+ options: {
183
+ silent: true,
184
+ preventClose: true,
185
+ sendFinish: false,
186
+ maxIterations: 20,
187
+ maxModelSteps: 5,
188
+ },
236
189
  __initialContent: {
237
190
  datasetId,
238
191
  fileId,
@@ -244,7 +197,6 @@ export function createFileParseContext(fileId, opts) {
244
197
  await awaitContextRun(shell.run);
245
198
  return { datasetId };
246
199
  },
247
- // Optional: expose the built context for advanced callers (not required for parse DX)
248
200
  context,
249
201
  };
250
202
  }
@@ -0,0 +1,21 @@
1
+ import type { FileParseContext, SandboxState } from "./file-dataset.types.js";
2
+ import type { FilePreviewContext } from "./filepreview.types.js";
3
+ export declare function initializeFileParseSandboxStep(params: {
4
+ runtime: any;
5
+ sandboxId: string;
6
+ datasetId: string;
7
+ fileId: string;
8
+ state: SandboxState;
9
+ }): Promise<{
10
+ filePath: string;
11
+ state: SandboxState;
12
+ }>;
13
+ export declare function generateFileParsePreviewStep(params: {
14
+ runtime: any;
15
+ sandboxId: string;
16
+ sandboxFilePath: string;
17
+ datasetId: string;
18
+ }): Promise<FilePreviewContext>;
19
+ export declare function buildFileDatasetPromptStep(params: {
20
+ context: FileParseContext;
21
+ }): Promise<string>;
@@ -0,0 +1,62 @@
1
+ import { getDatasetWorkstation } from "../datasetFiles.js";
2
+ import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
3
+ import { buildFileDatasetPrompt } from "./prompts.js";
4
+ import { generateFilePreview, ensurePreviewScriptsAvailable } from "./filepreview.js";
5
+ import { readInstantFileStep } from "./steps.js";
6
+ export async function initializeFileParseSandboxStep(params) {
7
+ "use step";
8
+ if (params.state.initialized) {
9
+ return { filePath: params.state.filePath, state: params.state };
10
+ }
11
+ console.log(`[FileParseContext ${params.datasetId}] Initializing sandbox...`);
12
+ await ensurePreviewScriptsAvailable(params.runtime, params.sandboxId);
13
+ console.log(`[FileParseContext ${params.datasetId}] Installing Python dependencies...`);
14
+ const pipInstall = await runDatasetSandboxCommandStep({
15
+ runtime: params.runtime,
16
+ sandboxId: params.sandboxId,
17
+ cmd: "python",
18
+ args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
19
+ });
20
+ const installStderr = pipInstall.stderr;
21
+ if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
22
+ throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
23
+ }
24
+ console.log(`[FileParseContext ${params.datasetId}] Fetching file from InstantDB...`);
25
+ const file = await readInstantFileStep({ runtime: params.runtime, fileId: params.fileId });
26
+ console.log(`[FileParseContext ${params.datasetId}] Creating dataset workstation...`);
27
+ const workstation = getDatasetWorkstation(params.datasetId);
28
+ await runDatasetSandboxCommandStep({
29
+ runtime: params.runtime,
30
+ sandboxId: params.sandboxId,
31
+ cmd: "mkdir",
32
+ args: ["-p", workstation],
33
+ });
34
+ const fileName = file.contentDisposition ?? "";
35
+ const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
36
+ const sandboxFilePath = `${workstation}/${params.fileId}${fileExtension}`;
37
+ await writeDatasetSandboxFilesStep({
38
+ runtime: params.runtime,
39
+ sandboxId: params.sandboxId,
40
+ files: [
41
+ {
42
+ path: sandboxFilePath,
43
+ contentBase64: file.contentBase64,
44
+ },
45
+ ],
46
+ });
47
+ console.log(`[FileParseContext ${params.datasetId}] Workstation created: ${workstation}`);
48
+ console.log(`[FileParseContext ${params.datasetId}] File saved: ${sandboxFilePath}`);
49
+ const state = {
50
+ initialized: true,
51
+ filePath: sandboxFilePath,
52
+ };
53
+ return { filePath: sandboxFilePath, state };
54
+ }
55
+ export async function generateFileParsePreviewStep(params) {
56
+ "use step";
57
+ return await generateFilePreview(params.runtime, params.sandboxId, params.sandboxFilePath, params.datasetId);
58
+ }
59
+ export async function buildFileDatasetPromptStep(params) {
60
+ "use step";
61
+ return buildFileDatasetPrompt(params.context);
62
+ }
@@ -0,0 +1,50 @@
1
+ import type { ContextReactor } from "@ekairos/events";
2
+ import type { FilePreviewContext } from "./filepreview.types.js";
3
+ export type SandboxState = {
4
+ initialized: boolean;
5
+ filePath: string;
6
+ };
7
+ export type FileParseContext = {
8
+ datasetId: string;
9
+ fileId: string;
10
+ instructions: string;
11
+ sandboxConfig: {
12
+ filePath: string;
13
+ };
14
+ analysis: any[];
15
+ schema: any | null;
16
+ plan: any | null;
17
+ executionResult: any | null;
18
+ errors: string[];
19
+ iterationCount: number;
20
+ filePreview?: FilePreviewContext;
21
+ };
22
+ export type FileParseContextParams = {
23
+ fileId?: string;
24
+ instructions?: string;
25
+ sandboxId?: string;
26
+ datasetId?: string;
27
+ model?: string;
28
+ reactor?: ContextReactor<any, any>;
29
+ };
30
+ export type FileParseRunOptions = {
31
+ prompt?: string;
32
+ durable?: boolean;
33
+ };
34
+ export type FileParseContextBuilder<Env extends {
35
+ orgId: string;
36
+ }> = {
37
+ datasetId: string;
38
+ context: any;
39
+ };
40
+ export type DatasetResult = {
41
+ id: string;
42
+ status?: string;
43
+ title?: string;
44
+ schema?: any;
45
+ analysis?: any;
46
+ calculatedTotalRows?: number;
47
+ actualGeneratedRowCount?: number;
48
+ createdAt?: number;
49
+ updatedAt?: number;
50
+ };
@@ -0,0 +1 @@
1
+ export {};
@@ -1,34 +1,5 @@
1
- export type FilePreviewContext = {
2
- totalRows: number;
3
- metadata?: {
4
- description: string;
5
- script: string;
6
- command: string;
7
- stdout: string;
8
- stderr: string;
9
- };
10
- head?: {
11
- description: string;
12
- script: string;
13
- command: string;
14
- stdout: string;
15
- stderr: string;
16
- };
17
- tail?: {
18
- description: string;
19
- script: string;
20
- command: string;
21
- stdout: string;
22
- stderr: string;
23
- };
24
- mid?: {
25
- description: string;
26
- script: string;
27
- command: string;
28
- stdout: string;
29
- stderr: string;
30
- };
31
- };
1
+ import type { FilePreviewContext } from "./filepreview.types.js";
2
+ export type { FilePreviewContext } from "./filepreview.types.js";
32
3
  interface PreviewOptions {
33
4
  headLines?: number;
34
5
  tailLines?: number;
@@ -36,4 +7,3 @@ interface PreviewOptions {
36
7
  }
37
8
  export declare function ensurePreviewScriptsAvailable(runtime: any, sandboxId: string): Promise<void>;
38
9
  export declare function generateFilePreview(runtime: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
39
- export {};
@@ -0,0 +1,31 @@
1
+ export type FilePreviewContext = {
2
+ totalRows: number;
3
+ metadata?: {
4
+ description: string;
5
+ script: string;
6
+ command: string;
7
+ stdout: string;
8
+ stderr: string;
9
+ };
10
+ head?: {
11
+ description: string;
12
+ script: string;
13
+ command: string;
14
+ stdout: string;
15
+ stderr: string;
16
+ };
17
+ tail?: {
18
+ description: string;
19
+ script: string;
20
+ command: string;
21
+ stdout: string;
22
+ stderr: string;
23
+ };
24
+ mid?: {
25
+ description: string;
26
+ script: string;
27
+ command: string;
28
+ stdout: string;
29
+ stderr: string;
30
+ };
31
+ };
@@ -0,0 +1 @@
1
+ export {};
@@ -1,2 +1,2 @@
1
- import { FileParseContext } from "./file-dataset.agent.js";
1
+ import type { FileParseContext } from "./file-dataset.types.js";
2
2
  export declare function buildFileDatasetPrompt(context: FileParseContext): string;
package/dist/index.d.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import "./builder/materialize.js";
1
2
  export * from "./dataset.js";
2
3
  export * from "./domain.js";
3
4
  export * from "./materializeDataset.tool.js";
package/dist/index.js CHANGED
@@ -1,3 +1,4 @@
1
+ import "./builder/materialize.js";
1
2
  export * from "./dataset.js";
2
3
  export * from "./domain.js";
3
4
  export * from "./materializeDataset.tool.js";
@@ -1,2 +1,2 @@
1
- export { createTransformDatasetContext, type TransformDatasetAgentParams, type TransformDatasetContext, } from "./transform-dataset.agent.js";
1
+ export { createTransformDatasetContext, registerTransformDatasetContext, type TransformDatasetAgentParams, type TransformDatasetContext, type TransformDatasetRunOptions, } from "./transform-dataset.agent.js";
2
2
  export { transformDataset, type TransformDatasetInput, type TransformDatasetResult, } from "./transformDataset.js";
@@ -1,2 +1,2 @@
1
- export { createTransformDatasetContext, } from "./transform-dataset.agent.js";
1
+ export { createTransformDatasetContext, registerTransformDatasetContext, } from "./transform-dataset.agent.js";
2
2
  export { transformDataset, } from "./transformDataset.js";
@@ -1,34 +1,2 @@
1
- export type TransformPromptContext = {
2
- datasetId: string;
3
- sourceDatasetIds: string[];
4
- outputSchema: any;
5
- sandboxConfig: {
6
- sourcePaths: Array<{
7
- datasetId: string;
8
- path: string;
9
- }>;
10
- outputPath: string;
11
- };
12
- sourcePreviews?: Array<{
13
- datasetId: string;
14
- preview: {
15
- totalRows: number;
16
- metadata?: {
17
- description: string;
18
- script: string;
19
- command: string;
20
- stdout: string;
21
- stderr: string;
22
- };
23
- head?: {
24
- description: string;
25
- script: string;
26
- command: string;
27
- stdout: string;
28
- stderr: string;
29
- };
30
- };
31
- }>;
32
- errors: string[];
33
- };
1
+ import type { TransformPromptContext } from "./transform-dataset.types.js";
34
2
  export declare function buildTransformDatasetPrompt(context: TransformPromptContext): string;
@@ -1,48 +1,6 @@
1
1
  import { type ContextReactor } from "@ekairos/events";
2
- import { TransformSourcePreviewContext } from "./filepreview.js";
3
- export type TransformDatasetContext = {
4
- datasetId: string;
5
- sourceDatasetIds: string[];
6
- outputSchema: any;
7
- sandboxConfig: {
8
- sourcePaths: Array<{
9
- datasetId: string;
10
- path: string;
11
- }>;
12
- outputPath: string;
13
- };
14
- sourcePreviews?: Array<{
15
- datasetId: string;
16
- preview: TransformSourcePreviewContext;
17
- }>;
18
- errors: string[];
19
- iterationCount: number;
20
- instructions?: string;
21
- };
22
- export type TransformDatasetAgentParams = {
23
- sourceDatasetIds?: string[];
24
- outputSchema?: any;
25
- instructions?: string;
26
- datasetId?: string;
27
- model?: string;
28
- sandboxId?: string;
29
- reactor?: ContextReactor<any, any>;
30
- };
31
- export type TransformDatasetRunOptions = {
32
- prompt?: string;
33
- durable?: boolean;
34
- };
35
- export type TransformDatasetResult = {
36
- id: string;
37
- status?: string;
38
- title?: string;
39
- schema?: any;
40
- analysis?: any;
41
- calculatedTotalRows?: number;
42
- actualGeneratedRowCount?: number;
43
- createdAt?: number;
44
- updatedAt?: number;
45
- };
2
+ import type { TransformDatasetRunOptions } from "./transform-dataset.types.js";
3
+ export type { TransformDatasetAgentParams, TransformDatasetContext, TransformDatasetResult, TransformDatasetRunOptions, TransformPromptContext, TransformSandboxState, } from "./transform-dataset.types.js";
46
4
  export declare function createTransformDatasetContext<Env extends {
47
5
  orgId: string;
48
6
  }>(params: {
@@ -1,13 +1,10 @@
1
- import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL } from "@ekairos/events";
2
- import { createCompleteDatasetTool, didCompleteDatasetSucceed } from "../completeDataset.tool.js";
3
- import { createExecuteCommandTool } from "../executeCommand.tool.js";
4
- import { createClearDatasetTool } from "../clearDataset.tool.js";
5
- import { buildTransformDatasetPrompt } from "./prompts.js";
6
- import { getDatasetWorkstation, getDatasetOutputPath } from "../datasetFiles.js";
1
+ import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
7
2
  import { id } from "@instantdb/admin";
8
- import { generateSourcePreview } from "./filepreview.js";
9
- import { datasetReadOutputJsonlStep, datasetUpdateSchemaStep } from "../dataset/steps.js";
10
- import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
3
+ import { createClearDatasetTool } from "../clearDataset.tool.js";
4
+ import { createCompleteDatasetTool, didCompleteDatasetSucceed, } from "../completeDataset.tool.js";
5
+ import { datasetUpdateSchemaStep } from "../dataset/steps.js";
6
+ import { createExecuteCommandTool } from "../executeCommand.tool.js";
7
+ import { buildTransformDatasetPromptStep, ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "./transform-dataset.steps.js";
11
8
  async function awaitContextRun(run) {
12
9
  if (!run)
13
10
  return;
@@ -17,28 +14,6 @@ async function awaitContextRun(run) {
17
14
  }
18
15
  await run;
19
16
  }
20
- async function ensureSourcesInSandbox(runtime, sandboxId, datasetId, sourceDatasetIds, state) {
21
- "use step";
22
- if (state.initialized) {
23
- return { sourcePaths: state.sourcePaths, outputPath: getDatasetOutputPath(datasetId) };
24
- }
25
- const workstation = getDatasetWorkstation(datasetId);
26
- await runDatasetSandboxCommandStep({ runtime, sandboxId, cmd: "mkdir", args: ["-p", workstation] });
27
- const sourcePaths = [];
28
- for (const sourceDatasetId of sourceDatasetIds) {
29
- const sourcePath = `${workstation}/source_${sourceDatasetId}.jsonl`;
30
- const source = await datasetReadOutputJsonlStep({ runtime, datasetId: sourceDatasetId });
31
- await writeDatasetSandboxFilesStep({
32
- runtime,
33
- sandboxId,
34
- files: [{ path: sourcePath, contentBase64: source.contentBase64 }],
35
- });
36
- sourcePaths.push({ datasetId: sourceDatasetId, path: sourcePath });
37
- }
38
- state.sourcePaths = sourcePaths;
39
- state.initialized = true;
40
- return { sourcePaths, outputPath: getDatasetOutputPath(datasetId) };
41
- }
42
17
  function createTransformDatasetContextDefinition(params) {
43
18
  const fallbackDatasetId = params.datasetId;
44
19
  const model = params.model ?? "openai/gpt-5";
@@ -67,18 +42,19 @@ function createTransformDatasetContextDefinition(params) {
67
42
  if (!sandboxId) {
68
43
  throw new Error("dataset_sandbox_required");
69
44
  }
70
- const { sourcePaths, outputPath } = await ensureSourcesInSandbox(runtime, sandboxId, datasetId, sourceDatasetIds, sandboxState);
71
- const sourcePreviews = [];
72
- for (const sp of sourcePaths) {
73
- try {
74
- const preview = await generateSourcePreview(runtime, sandboxId, sp.path, datasetId);
75
- sourcePreviews.push({ datasetId: sp.datasetId, preview });
76
- }
77
- catch {
78
- // optional
79
- }
80
- }
81
- // Persist output schema on the dataset record (so completeDataset validates against it)
45
+ const initialized = await ensureTransformSourcesInSandboxStep({
46
+ runtime,
47
+ sandboxId,
48
+ datasetId,
49
+ sourceDatasetIds,
50
+ state: sandboxState,
51
+ });
52
+ const sourcePreviews = await generateTransformSourcePreviewsStep({
53
+ runtime,
54
+ sandboxId,
55
+ datasetId,
56
+ sourcePaths: initialized.sourcePaths,
57
+ });
82
58
  await datasetUpdateSchemaStep({
83
59
  runtime,
84
60
  datasetId,
@@ -89,11 +65,16 @@ function createTransformDatasetContextDefinition(params) {
89
65
  datasetId,
90
66
  sourceDatasetIds,
91
67
  outputSchema,
92
- sandboxConfig: { sourcePaths, outputPath },
68
+ sandboxConfig: {
69
+ sourcePaths: initialized.sourcePaths,
70
+ outputPath: initialized.outputPath,
71
+ },
93
72
  sourcePreviews: sourcePreviews.length > 0 ? sourcePreviews : undefined,
94
73
  errors: [],
95
74
  };
96
- const basePrompt = buildTransformDatasetPrompt(promptContext);
75
+ const basePrompt = await buildTransformDatasetPromptStep({
76
+ context: promptContext,
77
+ });
97
78
  const userInstructions = String(instructions ?? "").trim();
98
79
  const system = userInstructions
99
80
  ? [
@@ -112,9 +93,12 @@ function createTransformDatasetContextDefinition(params) {
112
93
  outputSchema,
113
94
  instructions,
114
95
  sandboxId,
115
- sandboxState,
96
+ sandboxState: initialized.state,
116
97
  system,
117
- sandboxConfig: { sourcePaths, outputPath },
98
+ sandboxConfig: {
99
+ sourcePaths: initialized.sourcePaths,
100
+ outputPath: initialized.outputPath,
101
+ },
118
102
  };
119
103
  })
120
104
  .narrative(async (stored) => {
@@ -193,7 +177,13 @@ export function createTransformDatasetContext(params) {
193
177
  runtime: runtime,
194
178
  context: { key: `dataset:${datasetId}` },
195
179
  durable: options.durable ?? false,
196
- options: { silent: true, preventClose: true, sendFinish: false, maxIterations: 20, maxModelSteps: 5 },
180
+ options: {
181
+ silent: true,
182
+ preventClose: true,
183
+ sendFinish: false,
184
+ maxIterations: 20,
185
+ maxModelSteps: 5,
186
+ },
197
187
  __initialContent: {
198
188
  datasetId,
199
189
  sourceDatasetIds: params.sourceDatasetIds,
@@ -0,0 +1,30 @@
1
+ import type { TransformPromptContext, TransformSandboxState, TransformSourcePreviewContext } from "./transform-dataset.types.js";
2
+ export declare function ensureTransformSourcesInSandboxStep(params: {
3
+ runtime: any;
4
+ sandboxId: string;
5
+ datasetId: string;
6
+ sourceDatasetIds: string[];
7
+ state: TransformSandboxState;
8
+ }): Promise<{
9
+ sourcePaths: Array<{
10
+ datasetId: string;
11
+ path: string;
12
+ }>;
13
+ outputPath: string;
14
+ state: TransformSandboxState;
15
+ }>;
16
+ export declare function generateTransformSourcePreviewsStep(params: {
17
+ runtime: any;
18
+ sandboxId: string;
19
+ datasetId: string;
20
+ sourcePaths: Array<{
21
+ datasetId: string;
22
+ path: string;
23
+ }>;
24
+ }): Promise<Array<{
25
+ datasetId: string;
26
+ preview: TransformSourcePreviewContext;
27
+ }>>;
28
+ export declare function buildTransformDatasetPromptStep(params: {
29
+ context: TransformPromptContext;
30
+ }): Promise<string>;
@@ -0,0 +1,62 @@
1
+ import { getDatasetOutputPath, getDatasetWorkstation } from "../datasetFiles.js";
2
+ import { datasetReadOutputJsonlStep } from "../dataset/steps.js";
3
+ import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
4
+ import { generateSourcePreview } from "./filepreview.js";
5
+ import { buildTransformDatasetPrompt } from "./prompts.js";
6
+ export async function ensureTransformSourcesInSandboxStep(params) {
7
+ "use step";
8
+ if (params.state.initialized) {
9
+ return {
10
+ sourcePaths: params.state.sourcePaths,
11
+ outputPath: getDatasetOutputPath(params.datasetId),
12
+ state: params.state,
13
+ };
14
+ }
15
+ const workstation = getDatasetWorkstation(params.datasetId);
16
+ await runDatasetSandboxCommandStep({
17
+ runtime: params.runtime,
18
+ sandboxId: params.sandboxId,
19
+ cmd: "mkdir",
20
+ args: ["-p", workstation],
21
+ });
22
+ const sourcePaths = [];
23
+ for (const sourceDatasetId of params.sourceDatasetIds) {
24
+ const sourcePath = `${workstation}/source_${sourceDatasetId}.jsonl`;
25
+ const source = await datasetReadOutputJsonlStep({
26
+ runtime: params.runtime,
27
+ datasetId: sourceDatasetId,
28
+ });
29
+ await writeDatasetSandboxFilesStep({
30
+ runtime: params.runtime,
31
+ sandboxId: params.sandboxId,
32
+ files: [{ path: sourcePath, contentBase64: source.contentBase64 }],
33
+ });
34
+ sourcePaths.push({ datasetId: sourceDatasetId, path: sourcePath });
35
+ }
36
+ return {
37
+ sourcePaths,
38
+ outputPath: getDatasetOutputPath(params.datasetId),
39
+ state: {
40
+ initialized: true,
41
+ sourcePaths,
42
+ },
43
+ };
44
+ }
45
+ export async function generateTransformSourcePreviewsStep(params) {
46
+ "use step";
47
+ const sourcePreviews = [];
48
+ for (const sourcePath of params.sourcePaths) {
49
+ try {
50
+ const preview = await generateSourcePreview(params.runtime, params.sandboxId, sourcePath.path, params.datasetId);
51
+ sourcePreviews.push({ datasetId: sourcePath.datasetId, preview });
52
+ }
53
+ catch {
54
+ // Source preview is optional; transformation can still read the JSONL files.
55
+ }
56
+ }
57
+ return sourcePreviews;
58
+ }
59
+ export async function buildTransformDatasetPromptStep(params) {
60
+ "use step";
61
+ return buildTransformDatasetPrompt(params.context);
62
+ }
@@ -0,0 +1,86 @@
1
+ import type { ContextReactor } from "@ekairos/events";
2
+ import type { TransformSourcePreviewContext } from "./filepreview.js";
3
+ export type { TransformSourcePreviewContext } from "./filepreview.js";
4
+ export type TransformSandboxState = {
5
+ initialized: boolean;
6
+ sourcePaths: Array<{
7
+ datasetId: string;
8
+ path: string;
9
+ }>;
10
+ };
11
+ export type TransformDatasetContext = {
12
+ datasetId: string;
13
+ sourceDatasetIds: string[];
14
+ outputSchema: any;
15
+ sandboxConfig: {
16
+ sourcePaths: Array<{
17
+ datasetId: string;
18
+ path: string;
19
+ }>;
20
+ outputPath: string;
21
+ };
22
+ sourcePreviews?: Array<{
23
+ datasetId: string;
24
+ preview: TransformSourcePreviewContext;
25
+ }>;
26
+ errors: string[];
27
+ iterationCount: number;
28
+ instructions?: string;
29
+ };
30
+ export type TransformDatasetAgentParams = {
31
+ sourceDatasetIds?: string[];
32
+ outputSchema?: any;
33
+ instructions?: string;
34
+ datasetId?: string;
35
+ model?: string;
36
+ sandboxId?: string;
37
+ reactor?: ContextReactor<any, any>;
38
+ };
39
+ export type TransformDatasetRunOptions = {
40
+ prompt?: string;
41
+ durable?: boolean;
42
+ };
43
+ export type TransformDatasetResult = {
44
+ id: string;
45
+ status?: string;
46
+ title?: string;
47
+ schema?: any;
48
+ analysis?: any;
49
+ calculatedTotalRows?: number;
50
+ actualGeneratedRowCount?: number;
51
+ createdAt?: number;
52
+ updatedAt?: number;
53
+ };
54
+ export type TransformPromptContext = {
55
+ datasetId: string;
56
+ sourceDatasetIds: string[];
57
+ outputSchema: any;
58
+ sandboxConfig: {
59
+ sourcePaths: Array<{
60
+ datasetId: string;
61
+ path: string;
62
+ }>;
63
+ outputPath: string;
64
+ };
65
+ sourcePreviews?: Array<{
66
+ datasetId: string;
67
+ preview: {
68
+ totalRows: number;
69
+ metadata?: {
70
+ description: string;
71
+ script: string;
72
+ command: string;
73
+ stdout: string;
74
+ stderr: string;
75
+ };
76
+ head?: {
77
+ description: string;
78
+ script: string;
79
+ command: string;
80
+ stdout: string;
81
+ stderr: string;
82
+ };
83
+ };
84
+ }>;
85
+ errors: string[];
86
+ };
@@ -0,0 +1 @@
1
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.55-beta.development.0",
3
+ "version": "1.22.56-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.55-beta.development.0",
69
- "@ekairos/events": "^1.22.55-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.55-beta.development.0",
68
+ "@ekairos/domain": "^1.22.56-beta.development.0",
69
+ "@ekairos/events": "^1.22.56-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.56-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",