@ekairos/dataset 1.22.40-beta.development.0 → 1.22.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/dist/agents.d.ts +8 -0
  2. package/dist/agents.js +8 -0
  3. package/dist/builder/agentMaterializers.d.ts +9 -0
  4. package/dist/builder/agentMaterializers.js +10 -0
  5. package/dist/builder/context.d.ts +15 -0
  6. package/dist/builder/context.js +251 -0
  7. package/dist/builder/instructions.d.ts +4 -5
  8. package/dist/builder/instructions.js +15 -21
  9. package/dist/builder/materialize.d.ts +77 -10
  10. package/dist/builder/materialize.js +495 -152
  11. package/dist/builder/materializeQuery.d.ts +12 -0
  12. package/dist/builder/materializeQuery.js +31 -0
  13. package/dist/builder/persistence.d.ts +10 -6
  14. package/dist/builder/persistence.js +107 -62
  15. package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -1
  16. package/dist/builder/{sourceRows.js → rows.js} +3 -9
  17. package/dist/builder/schemaInference.d.ts +1 -2
  18. package/dist/builder/schemaInference.js +4 -12
  19. package/dist/builder/types.d.ts +41 -26
  20. package/dist/builder/types.js +1 -3
  21. package/dist/clearDataset.tool.d.ts +2 -3
  22. package/dist/clearDataset.tool.js +13 -17
  23. package/dist/completeDataset.steps.d.ts +117 -0
  24. package/dist/completeDataset.steps.js +537 -0
  25. package/dist/completeDataset.tool.d.ts +132 -7
  26. package/dist/completeDataset.tool.js +46 -192
  27. package/dist/contextResources.d.ts +31 -0
  28. package/dist/contextResources.js +151 -0
  29. package/dist/contextWorkspace.d.ts +79 -0
  30. package/dist/contextWorkspace.js +234 -0
  31. package/dist/dataset/steps.d.ts +39 -15
  32. package/dist/dataset/steps.js +96 -39
  33. package/dist/dataset.d.ts +2 -3
  34. package/dist/dataset.js +73 -51
  35. package/dist/datasetFiles.d.ts +5 -1
  36. package/dist/datasetFiles.js +29 -27
  37. package/dist/defineNotation.tool.d.ts +49 -0
  38. package/dist/defineNotation.tool.js +154 -0
  39. package/dist/domain.d.ts +1 -2
  40. package/dist/domain.js +1 -6
  41. package/dist/executeCommand.tool.d.ts +2 -30
  42. package/dist/executeCommand.tool.js +165 -39
  43. package/dist/file/file-dataset.agent.d.ts +19 -56
  44. package/dist/file/file-dataset.agent.js +182 -136
  45. package/dist/file/file-dataset.steps.d.ts +27 -0
  46. package/dist/file/file-dataset.steps.js +47 -0
  47. package/dist/file/file-dataset.types.d.ts +64 -0
  48. package/dist/file/file-dataset.types.js +1 -0
  49. package/dist/file/filepreview.d.ts +5 -35
  50. package/dist/file/filepreview.js +60 -107
  51. package/dist/file/filepreview.types.d.ts +31 -0
  52. package/dist/file/filepreview.types.js +1 -0
  53. package/dist/file/generateSchema.tool.d.ts +2 -3
  54. package/dist/file/generateSchema.tool.js +11 -15
  55. package/dist/file/index.d.ts +1 -2
  56. package/dist/file/index.js +1 -18
  57. package/dist/file/prompts.d.ts +2 -3
  58. package/dist/file/prompts.js +152 -32
  59. package/dist/file/scripts.generated.d.ts +1 -0
  60. package/dist/file/scripts.generated.js +11 -0
  61. package/dist/file/steps.d.ts +1 -2
  62. package/dist/file/steps.js +9 -7
  63. package/dist/id.d.ts +1 -0
  64. package/dist/id.js +10 -0
  65. package/dist/index.d.ts +9 -7
  66. package/dist/index.js +9 -23
  67. package/dist/materializeDataset.tool.d.ts +35 -28
  68. package/dist/materializeDataset.tool.js +74 -68
  69. package/dist/notation.d.ts +205 -0
  70. package/dist/notation.js +424 -0
  71. package/dist/query/index.d.ts +1 -2
  72. package/dist/query/index.js +1 -18
  73. package/dist/query/queryDomain.d.ts +3 -4
  74. package/dist/query/queryDomain.js +3 -40
  75. package/dist/query/queryDomain.step.d.ts +1 -1
  76. package/dist/query/queryDomain.step.js +24 -13
  77. package/dist/sandbox/steps.d.ts +23 -15
  78. package/dist/sandbox/steps.js +73 -76
  79. package/dist/sandbox.steps.d.ts +1 -2
  80. package/dist/sandbox.steps.js +1 -18
  81. package/dist/schema.d.ts +14 -3
  82. package/dist/schema.js +27 -26
  83. package/dist/service.d.ts +12 -5
  84. package/dist/service.js +88 -15
  85. package/dist/skill.d.ts +0 -1
  86. package/dist/skill.js +12 -17
  87. package/dist/transform/filepreview.d.ts +2 -3
  88. package/dist/transform/filepreview.js +9 -26
  89. package/dist/transform/index.d.ts +2 -3
  90. package/dist/transform/index.js +2 -8
  91. package/dist/transform/prompts.d.ts +1 -34
  92. package/dist/transform/prompts.js +66 -46
  93. package/dist/transform/transform-dataset.agent.d.ts +21 -46
  94. package/dist/transform/transform-dataset.agent.js +152 -93
  95. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  96. package/dist/transform/transform-dataset.steps.js +61 -0
  97. package/dist/transform/transform-dataset.types.d.ts +96 -0
  98. package/dist/transform/transform-dataset.types.js +1 -0
  99. package/dist/transform/transformDataset.d.ts +3 -3
  100. package/dist/transform/transformDataset.js +15 -18
  101. package/dist/writeDatasetRows.tool.d.ts +188 -0
  102. package/dist/writeDatasetRows.tool.js +258 -0
  103. package/package.json +33 -8
  104. package/dist/builder/instructions.d.ts.map +0 -1
  105. package/dist/builder/instructions.js.map +0 -1
  106. package/dist/builder/materialize.d.ts.map +0 -1
  107. package/dist/builder/materialize.js.map +0 -1
  108. package/dist/builder/persistence.d.ts.map +0 -1
  109. package/dist/builder/persistence.js.map +0 -1
  110. package/dist/builder/schemaInference.d.ts.map +0 -1
  111. package/dist/builder/schemaInference.js.map +0 -1
  112. package/dist/builder/sourceRows.d.ts.map +0 -1
  113. package/dist/builder/sourceRows.js.map +0 -1
  114. package/dist/builder/types.d.ts.map +0 -1
  115. package/dist/builder/types.js.map +0 -1
  116. package/dist/clearDataset.tool.d.ts.map +0 -1
  117. package/dist/clearDataset.tool.js.map +0 -1
  118. package/dist/completeDataset.tool.d.ts.map +0 -1
  119. package/dist/completeDataset.tool.js.map +0 -1
  120. package/dist/dataset/steps.d.ts.map +0 -1
  121. package/dist/dataset/steps.js.map +0 -1
  122. package/dist/dataset.d.ts.map +0 -1
  123. package/dist/dataset.js.map +0 -1
  124. package/dist/datasetFiles.d.ts.map +0 -1
  125. package/dist/datasetFiles.js.map +0 -1
  126. package/dist/domain.d.ts.map +0 -1
  127. package/dist/domain.js.map +0 -1
  128. package/dist/eventsReactRuntime.d.ts +0 -22
  129. package/dist/eventsReactRuntime.d.ts.map +0 -1
  130. package/dist/eventsReactRuntime.js +0 -29
  131. package/dist/eventsReactRuntime.js.map +0 -1
  132. package/dist/executeCommand.tool.d.ts.map +0 -1
  133. package/dist/executeCommand.tool.js.map +0 -1
  134. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  135. package/dist/file/file-dataset.agent.js.map +0 -1
  136. package/dist/file/filepreview.d.ts.map +0 -1
  137. package/dist/file/filepreview.js.map +0 -1
  138. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  139. package/dist/file/generateSchema.tool.js.map +0 -1
  140. package/dist/file/index.d.ts.map +0 -1
  141. package/dist/file/index.js.map +0 -1
  142. package/dist/file/prompts.d.ts.map +0 -1
  143. package/dist/file/prompts.js.map +0 -1
  144. package/dist/file/steps.d.ts.map +0 -1
  145. package/dist/file/steps.js.map +0 -1
  146. package/dist/index.d.ts.map +0 -1
  147. package/dist/index.js.map +0 -1
  148. package/dist/materializeDataset.tool.d.ts.map +0 -1
  149. package/dist/materializeDataset.tool.js.map +0 -1
  150. package/dist/query/index.d.ts.map +0 -1
  151. package/dist/query/index.js.map +0 -1
  152. package/dist/query/queryDomain.d.ts.map +0 -1
  153. package/dist/query/queryDomain.js.map +0 -1
  154. package/dist/query/queryDomain.step.d.ts.map +0 -1
  155. package/dist/query/queryDomain.step.js.map +0 -1
  156. package/dist/sandbox/steps.d.ts.map +0 -1
  157. package/dist/sandbox/steps.js.map +0 -1
  158. package/dist/sandbox.steps.d.ts.map +0 -1
  159. package/dist/sandbox.steps.js.map +0 -1
  160. package/dist/schema.d.ts.map +0 -1
  161. package/dist/schema.js.map +0 -1
  162. package/dist/service.d.ts.map +0 -1
  163. package/dist/service.js.map +0 -1
  164. package/dist/skill.d.ts.map +0 -1
  165. package/dist/skill.js.map +0 -1
  166. package/dist/transform/filepreview.d.ts.map +0 -1
  167. package/dist/transform/filepreview.js.map +0 -1
  168. package/dist/transform/index.d.ts.map +0 -1
  169. package/dist/transform/index.js.map +0 -1
  170. package/dist/transform/prompts.d.ts.map +0 -1
  171. package/dist/transform/prompts.js.map +0 -1
  172. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  173. package/dist/transform/transform-dataset.agent.js.map +0 -1
  174. package/dist/transform/transformDataset.d.ts.map +0 -1
  175. package/dist/transform/transformDataset.js.map +0 -1
@@ -1,77 +1,78 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.createTransformDatasetStory = createTransformDatasetStory;
4
- const events_1 = require("@ekairos/events");
5
- const completeDataset_tool_1 = require("../completeDataset.tool");
6
- const executeCommand_tool_1 = require("../executeCommand.tool");
7
- const clearDataset_tool_1 = require("../clearDataset.tool");
8
- const prompts_1 = require("./prompts");
9
- const datasetFiles_1 = require("../datasetFiles");
10
- const admin_1 = require("@instantdb/admin");
11
- const filepreview_1 = require("./filepreview");
12
- const steps_1 = require("../dataset/steps");
13
- const steps_2 = require("../sandbox/steps");
14
- const eventsReactRuntime_1 = require("../eventsReactRuntime");
15
- async function ensureSourcesInSandbox(env, sandboxId, datasetId, sourceDatasetIds, state) {
16
- if (state.initialized) {
17
- return { sourcePaths: state.sourcePaths, outputPath: (0, datasetFiles_1.getDatasetOutputPath)(datasetId) };
1
+ import { INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
2
+ import { createContext } from "@ekairos/reactor/context";
3
+ import { createClearDatasetTool } from "../clearDataset.tool.js";
4
+ import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
5
+ import { datasetUpdateSchemaStep } from "../dataset/steps.js";
6
+ import { getDatasetOutputPath } from "../datasetFiles.js";
7
+ import { createExecuteCommandTool } from "../executeCommand.tool.js";
8
+ import { createDefineNotationTool } from "../defineNotation.tool.js";
9
+ import { createCompleteObjectTool, createReplaceRowsTool, } from "../writeDatasetRows.tool.js";
10
+ import { buildTransformDatasetPromptStep, } from "./transform-dataset.steps.js";
11
+ import { createDatasetId } from "../id.js";
12
+ async function awaitContextRun(run) {
13
+ if (!run)
14
+ return;
15
+ if (run.returnValue) {
16
+ await run.returnValue;
17
+ return;
18
18
  }
19
- const workstation = (0, datasetFiles_1.getDatasetWorkstation)(datasetId);
20
- await (0, steps_2.runDatasetSandboxCommandStep)({ env, sandboxId, cmd: "mkdir", args: ["-p", workstation] });
21
- const sourcePaths = [];
22
- for (const sourceDatasetId of sourceDatasetIds) {
23
- const sourcePath = `${workstation}/source_${sourceDatasetId}.jsonl`;
24
- const source = await (0, steps_1.datasetReadOutputJsonlStep)({ env, datasetId: sourceDatasetId });
25
- await (0, steps_2.writeDatasetSandboxFilesStep)({
26
- env,
27
- sandboxId,
28
- files: [{ path: sourcePath, contentBase64: source.contentBase64 }],
29
- });
30
- sourcePaths.push({ datasetId: sourceDatasetId, path: sourcePath });
31
- }
32
- state.sourcePaths = sourcePaths;
33
- state.initialized = true;
34
- return { sourcePaths, outputPath: (0, datasetFiles_1.getDatasetOutputPath)(datasetId) };
19
+ await run;
35
20
  }
36
- function createTransformDatasetStoryDefinition(params) {
37
- const datasetId = params.datasetId ?? (0, admin_1.id)();
21
+ function createTransformDatasetContextDefinition(params) {
22
+ const fallbackDatasetId = params.datasetId;
38
23
  const model = params.model ?? "openai/gpt-5";
39
- let storyBuilder = (0, events_1.createContext)("dataset.transform")
40
- .context(async (stored, env) => {
24
+ let contextBuilder = createContext("dataset.transform")
25
+ .context(async (stored, _env, runtime) => {
41
26
  const previous = stored?.content ?? {};
42
- const sandboxState = previous?.sandboxState ?? { initialized: false, sourcePaths: [] };
27
+ const sandboxState = previous?.sandboxState ??
28
+ params.sandboxState ?? { initialized: false, inputPaths: [] };
29
+ const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
30
+ const inputDatasetIds = Array.isArray(previous?.inputDatasetIds)
31
+ ? previous.inputDatasetIds
32
+ : Array.isArray(params.inputDatasetIds)
33
+ ? params.inputDatasetIds
34
+ : [];
35
+ const outputSchema = previous?.outputSchema ?? params.outputSchema;
36
+ const instructions = previous?.instructions ?? params.instructions;
43
37
  const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
38
+ if (!datasetId) {
39
+ throw new Error("dataset_id_required");
40
+ }
41
+ if (inputDatasetIds.length === 0) {
42
+ throw new Error("dataset_transform_inputs_required");
43
+ }
44
+ if (!outputSchema) {
45
+ throw new Error("dataset_transform_schema_required");
46
+ }
44
47
  if (!sandboxId) {
45
48
  throw new Error("dataset_sandbox_required");
46
49
  }
47
- const { sourcePaths, outputPath } = await ensureSourcesInSandbox(env, sandboxId, datasetId, params.sourceDatasetIds, sandboxState);
48
- const sourcePreviews = [];
49
- for (const sp of sourcePaths) {
50
- try {
51
- const preview = await (0, filepreview_1.generateSourcePreview)(env, sandboxId, sp.path, datasetId);
52
- sourcePreviews.push({ datasetId: sp.datasetId, preview });
53
- }
54
- catch {
55
- // optional
56
- }
57
- }
58
- // Persist output schema on the dataset record (so completeDataset validates against it)
59
- await (0, steps_1.datasetUpdateSchemaStep)({
60
- env,
50
+ const initialized = sandboxState.initialized && Array.isArray(sandboxState.inputPaths)
51
+ ? sandboxState
52
+ : { initialized: false, inputPaths: [] };
53
+ const inputPreviews = previous?.inputPreviews ?? params.inputPreviews ?? [];
54
+ await datasetUpdateSchemaStep({
55
+ runtime,
61
56
  datasetId,
62
- schema: params.outputSchema,
57
+ schema: outputSchema,
63
58
  status: "schema_complete",
64
59
  });
65
60
  const promptContext = {
66
61
  datasetId,
67
- sourceDatasetIds: params.sourceDatasetIds,
68
- outputSchema: params.outputSchema,
69
- sandboxConfig: { sourcePaths, outputPath },
70
- sourcePreviews: sourcePreviews.length > 0 ? sourcePreviews : undefined,
62
+ inputDatasetIds,
63
+ outputSchema,
64
+ sandboxConfig: {
65
+ inputPaths: initialized.inputPaths,
66
+ outputPath: previous?.sandboxConfig?.outputPath ?? getDatasetOutputPath(datasetId),
67
+ },
68
+ inputPreviews: inputPreviews.length > 0 ? inputPreviews : undefined,
69
+ contextResources: previous?.contextResources ?? params.contextResources ?? [],
71
70
  errors: [],
72
71
  };
73
- const basePrompt = (0, prompts_1.buildTransformDatasetPrompt)(promptContext);
74
- const userInstructions = String(params.instructions ?? "").trim();
72
+ const basePrompt = await buildTransformDatasetPromptStep({
73
+ context: promptContext,
74
+ });
75
+ const userInstructions = String(instructions ?? "").trim();
75
76
  const system = userInstructions
76
77
  ? [
77
78
  "## USER INSTRUCTIONS",
@@ -85,89 +86,147 @@ function createTransformDatasetStoryDefinition(params) {
85
86
  return {
86
87
  ...previous,
87
88
  datasetId,
89
+ inputDatasetIds,
90
+ outputSchema,
91
+ instructions,
88
92
  sandboxId,
89
- sandboxState,
93
+ sandboxState: initialized,
94
+ contextResources: previous?.contextResources ?? params.contextResources ?? [],
90
95
  system,
91
- sandboxConfig: { sourcePaths, outputPath },
96
+ sandboxConfig: {
97
+ inputPaths: initialized.inputPaths,
98
+ outputPath: previous?.sandboxConfig?.outputPath ?? getDatasetOutputPath(datasetId),
99
+ },
92
100
  };
93
101
  })
102
+ .resources(({ content }) => Array.isArray(content?.contextResources) ? content.contextResources : [])
94
103
  .narrative(async (stored) => {
95
104
  return String(stored?.content?.system ?? "");
96
105
  })
97
- .actions(async (stored, env) => {
106
+ .actions(async (stored, _env, runtime) => {
107
+ const datasetId = stored?.content?.datasetId ?? fallbackDatasetId ?? "";
98
108
  const sandboxId = stored?.content?.sandboxId ?? params.sandboxId ?? "";
109
+ if (!datasetId)
110
+ throw new Error("dataset_id_required");
111
+ if (!sandboxId)
112
+ throw new Error("dataset_sandbox_required");
99
113
  return {
100
- executeCommand: (0, executeCommand_tool_1.createExecuteCommandTool)({
114
+ completeObject: createCompleteObjectTool({
101
115
  datasetId,
102
116
  sandboxId,
103
- env,
117
+ runtime,
118
+ schema: stored?.content?.outputSchema,
104
119
  }),
105
- completeDataset: (0, completeDataset_tool_1.createCompleteDatasetTool)({
120
+ replaceRows: createReplaceRowsTool({
106
121
  datasetId,
107
122
  sandboxId,
108
- env,
123
+ runtime,
124
+ schema: stored?.content?.outputSchema,
109
125
  }),
110
- clearDataset: (0, clearDataset_tool_1.createClearDatasetTool)({
126
+ executeCommand: createExecuteCommandTool({
111
127
  datasetId,
112
128
  sandboxId,
113
- env,
129
+ runtime,
130
+ }),
131
+ completeDataset: createCompleteDatasetTool({
132
+ datasetId,
133
+ sandboxId,
134
+ runtime,
135
+ }),
136
+ clearDataset: createClearDatasetTool({
137
+ datasetId,
138
+ sandboxId,
139
+ runtime,
140
+ }),
141
+ defineNotation: createDefineNotationTool({
142
+ datasetId,
143
+ runtime,
114
144
  }),
115
145
  };
116
146
  })
117
147
  .shouldContinue(({ reactionEvent }) => {
118
- return !(0, events_1.didToolExecute)(reactionEvent, "completeDataset");
148
+ const fatalFailure = getDatasetFatalFailure(reactionEvent);
149
+ if (fatalFailure) {
150
+ throw new Error(fatalFailure);
151
+ }
152
+ return !didCompleteDatasetSucceed(reactionEvent);
119
153
  });
120
154
  if (params.reactor) {
121
- storyBuilder = storyBuilder.reactor(params.reactor);
155
+ contextBuilder = contextBuilder.reactor(params.reactor);
122
156
  }
123
157
  else {
124
- storyBuilder = storyBuilder.model(model);
158
+ contextBuilder = contextBuilder.model(model);
125
159
  }
126
- const story = storyBuilder.build();
127
- return { datasetId, story };
160
+ const context = contextBuilder.build();
161
+ return { datasetId: fallbackDatasetId ?? "", context };
128
162
  }
129
- function createTransformDatasetStory(params) {
130
- const { datasetId, story } = createTransformDatasetStoryDefinition({
131
- sourceDatasetIds: params.sourceDatasetIds,
163
+ export function createTransformDatasetContext(params) {
164
+ const datasetId = params.datasetId ?? createDatasetId();
165
+ const { context } = createTransformDatasetContextDefinition({
166
+ inputDatasetIds: params.inputDatasetIds,
132
167
  outputSchema: params.outputSchema,
133
168
  instructions: params.instructions,
134
- datasetId: params.datasetId,
169
+ datasetId,
135
170
  model: params.model,
136
171
  sandboxId: params.sandboxId,
137
172
  reactor: params.reactor,
173
+ sandboxState: params.sandboxState,
174
+ inputPreviews: params.inputPreviews,
175
+ contextResources: params.contextResources,
138
176
  });
139
177
  return {
140
178
  datasetId,
141
- async transform(env, prompt) {
142
- const datasetCountText = params.sourceDatasetIds.length === 1
143
- ? "the source dataset"
144
- : `${params.sourceDatasetIds.length} source datasets`;
179
+ async transform(runtime, options = {}) {
180
+ const datasetCountText = params.inputDatasetIds.length === 1
181
+ ? "the input dataset"
182
+ : `${params.inputDatasetIds.length} input datasets`;
145
183
  const triggerEvent = {
146
- id: (0, admin_1.id)(),
147
- type: events_1.INPUT_TEXT_ITEM_TYPE,
148
- channel: events_1.WEB_CHANNEL,
184
+ id: createDatasetId(),
185
+ type: INPUT_TEXT_ITEM_TYPE,
186
+ channel: WEB_CHANNEL,
149
187
  createdAt: new Date().toISOString(),
150
188
  content: {
151
189
  parts: [
152
190
  {
153
191
  type: "text",
154
- text: prompt ??
192
+ text: options.prompt ??
155
193
  `Transform ${datasetCountText} into a new dataset matching the provided output schema`,
156
194
  },
157
195
  ],
158
196
  },
159
197
  };
160
- const runtime = (0, eventsReactRuntime_1.createEventsReactRuntime)(env);
161
- const shell = await story.react(triggerEvent, {
162
- runtime,
198
+ const shell = await context.react(triggerEvent, {
199
+ runtime: runtime,
163
200
  context: { key: `dataset:${datasetId}` },
164
- durable: false,
165
- options: { silent: true, preventClose: true, sendFinish: false, maxIterations: 20, maxModelSteps: 5 },
201
+ durable: options.durable ?? false,
202
+ options: {
203
+ preventClose: true,
204
+ sendFinish: false,
205
+ maxIterations: 20,
206
+ maxModelSteps: 5,
207
+ },
208
+ __initialContent: {
209
+ ...(options.initialContent ?? {}),
210
+ datasetId,
211
+ inputDatasetIds: params.inputDatasetIds,
212
+ outputSchema: params.outputSchema,
213
+ instructions: params.instructions,
214
+ sandboxId: params.sandboxId ?? "",
215
+ sandboxState: params.sandboxState ?? { initialized: false, inputPaths: [] },
216
+ inputPreviews: params.inputPreviews,
217
+ contextResources: params.contextResources ?? [],
218
+ },
166
219
  });
167
- await shell.run;
220
+ await awaitContextRun(shell.run);
168
221
  return { datasetId };
169
222
  },
170
- story,
223
+ context,
171
224
  };
172
225
  }
173
- //# sourceMappingURL=transform-dataset.agent.js.map
226
+ export function registerTransformDatasetContext(opts) {
227
+ createTransformDatasetContextDefinition({
228
+ model: opts?.model,
229
+ reactor: opts?.reactor,
230
+ }).context;
231
+ }
232
+ registerTransformDatasetContext();
@@ -0,0 +1,30 @@
1
+ import type { TransformPromptContext, TransformSandboxState, TransformInputPreviewContext } from "./transform-dataset.types.js";
2
+ export declare function ensureTransformInputsInSandboxStep(params: {
3
+ runtime: any;
4
+ sandboxId: string;
5
+ datasetId: string;
6
+ inputDatasetIds: string[];
7
+ state: TransformSandboxState;
8
+ }): Promise<{
9
+ inputPaths: Array<{
10
+ datasetId: string;
11
+ path: string;
12
+ }>;
13
+ outputPath: string;
14
+ state: TransformSandboxState;
15
+ }>;
16
+ export declare function generateTransformInputPreviewsStep(params: {
17
+ runtime: any;
18
+ sandboxId: string;
19
+ datasetId: string;
20
+ inputPaths: Array<{
21
+ datasetId: string;
22
+ path: string;
23
+ }>;
24
+ }): Promise<Array<{
25
+ datasetId: string;
26
+ preview: TransformInputPreviewContext;
27
+ }>>;
28
+ export declare function buildTransformDatasetPromptStep(params: {
29
+ context: TransformPromptContext;
30
+ }): Promise<string>;
@@ -0,0 +1,61 @@
1
+ import { getDatasetOutputPath, getDatasetResourcesDir, getDatasetStandardDirs, } from "../datasetFiles.js";
2
+ import { datasetReadOutputJsonlStep } from "../dataset/steps.js";
3
+ import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
4
+ import { generateInputPreview } from "./filepreview.js";
5
+ import { buildTransformDatasetPrompt } from "./prompts.js";
6
+ export async function ensureTransformInputsInSandboxStep(params) {
7
+ "use step";
8
+ if (params.state.initialized) {
9
+ return {
10
+ inputPaths: params.state.inputPaths,
11
+ outputPath: getDatasetOutputPath(params.datasetId),
12
+ state: params.state,
13
+ };
14
+ }
15
+ await runDatasetSandboxCommandStep({
16
+ runtime: params.runtime,
17
+ sandboxId: params.sandboxId,
18
+ cmd: "mkdir",
19
+ args: ["-p", ...getDatasetStandardDirs(params.datasetId)],
20
+ });
21
+ const inputPaths = [];
22
+ for (const inputDatasetId of params.inputDatasetIds) {
23
+ const inputPath = `${getDatasetResourcesDir(params.datasetId)}/resource_${inputDatasetId}.jsonl`;
24
+ const input = await datasetReadOutputJsonlStep({
25
+ runtime: params.runtime,
26
+ datasetId: inputDatasetId,
27
+ });
28
+ await writeDatasetSandboxFilesStep({
29
+ runtime: params.runtime,
30
+ sandboxId: params.sandboxId,
31
+ files: [{ path: inputPath, contentBase64: input.contentBase64 }],
32
+ });
33
+ inputPaths.push({ datasetId: inputDatasetId, path: inputPath });
34
+ }
35
+ return {
36
+ inputPaths,
37
+ outputPath: getDatasetOutputPath(params.datasetId),
38
+ state: {
39
+ initialized: true,
40
+ inputPaths,
41
+ },
42
+ };
43
+ }
44
+ export async function generateTransformInputPreviewsStep(params) {
45
+ "use step";
46
+ const inputPreviews = [];
47
+ for (const inputPath of params.inputPaths) {
48
+ try {
49
+ const preview = await generateInputPreview(params.runtime, params.sandboxId, inputPath.path, params.datasetId);
50
+ inputPreviews.push({ datasetId: inputPath.datasetId, preview });
51
+ }
52
+ catch {
53
+ // Input preview is optional; transformation can still read the JSONL files.
54
+ }
55
+ }
56
+ return inputPreviews;
57
+ }
58
+ export async function buildTransformDatasetPromptStep(params) {
59
+ "use step";
60
+ return buildTransformDatasetPrompt(params.context);
61
+ }
@@ -0,0 +1,96 @@
1
+ import type { StoredContextResource } from "@ekairos/events";
2
+ import type { ContextReactor } from "@ekairos/reactor/context";
3
+ import type { TransformInputPreviewContext } from "./filepreview.js";
4
+ export type { TransformInputPreviewContext } from "./filepreview.js";
5
+ export type TransformSandboxState = {
6
+ initialized: boolean;
7
+ inputPaths: Array<{
8
+ datasetId: string;
9
+ path: string;
10
+ }>;
11
+ };
12
+ export type TransformDatasetContext = {
13
+ datasetId: string;
14
+ inputDatasetIds: string[];
15
+ outputSchema: any;
16
+ sandboxConfig: {
17
+ inputPaths: Array<{
18
+ datasetId: string;
19
+ path: string;
20
+ }>;
21
+ outputPath: string;
22
+ };
23
+ inputPreviews?: Array<{
24
+ datasetId: string;
25
+ preview: TransformInputPreviewContext;
26
+ }>;
27
+ contextResources?: StoredContextResource[];
28
+ errors: string[];
29
+ iterationCount: number;
30
+ instructions?: string;
31
+ };
32
+ export type TransformDatasetAgentParams = {
33
+ inputDatasetIds?: string[];
34
+ outputSchema?: any;
35
+ instructions?: string;
36
+ datasetId?: string;
37
+ model?: string;
38
+ sandboxId?: string;
39
+ reactor?: ContextReactor<any, any>;
40
+ sandboxState?: TransformSandboxState;
41
+ inputPreviews?: Array<{
42
+ datasetId: string;
43
+ preview: TransformInputPreviewContext;
44
+ }>;
45
+ contextResources?: StoredContextResource[];
46
+ };
47
+ export type TransformDatasetRunOptions = {
48
+ prompt?: string;
49
+ durable?: boolean;
50
+ initialContent?: Record<string, any>;
51
+ };
52
+ export type TransformDatasetResult = {
53
+ id: string;
54
+ status?: string;
55
+ title?: string;
56
+ schema?: any;
57
+ analysis?: any;
58
+ calculatedTotalRows?: number;
59
+ actualGeneratedRowCount?: number;
60
+ createdAt?: number;
61
+ updatedAt?: number;
62
+ };
63
+ export type TransformPromptContext = {
64
+ datasetId: string;
65
+ inputDatasetIds: string[];
66
+ outputSchema: any;
67
+ sandboxConfig: {
68
+ inputPaths: Array<{
69
+ datasetId: string;
70
+ path: string;
71
+ }>;
72
+ outputPath: string;
73
+ };
74
+ inputPreviews?: Array<{
75
+ datasetId: string;
76
+ preview: {
77
+ totalRows: number;
78
+ metadata?: {
79
+ description: string;
80
+ script: string;
81
+ command: string;
82
+ stdout: string;
83
+ stderr: string;
84
+ };
85
+ head?: {
86
+ description: string;
87
+ script: string;
88
+ command: string;
89
+ stdout: string;
90
+ stderr: string;
91
+ };
92
+ };
93
+ }>;
94
+ contextResources?: StoredContextResource[];
95
+ errors: string[];
96
+ };
@@ -0,0 +1 @@
1
+ export {};
@@ -1,3 +1,4 @@
1
+ import type { AnyDatasetRuntime } from "../builder/types.js";
1
2
  export type TransformDatasetInput = {
2
3
  datasets: Array<{
3
4
  id: string;
@@ -14,7 +15,6 @@ export type TransformDatasetResult = {
14
15
  };
15
16
  /**
16
17
  * Workflow-compatible dataset transform.
17
- * Executes the transform story and returns datasetId + preview rows.
18
+ * Executes the transform context and returns datasetId + preview rows.
18
19
  */
19
- export declare function transformDataset(input: TransformDatasetInput): Promise<TransformDatasetResult>;
20
- //# sourceMappingURL=transformDataset.d.ts.map
20
+ export declare function transformDataset(runtime: AnyDatasetRuntime, input: TransformDatasetInput): Promise<TransformDatasetResult>;
@@ -1,11 +1,7 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.transformDataset = transformDataset;
4
- const runtime_1 = require("@ekairos/events/runtime");
5
- const steps_1 = require("../dataset/steps");
6
- const transform_dataset_agent_1 = require("./transform-dataset.agent");
1
+ import { datasetPreviewRowsStep } from "../dataset/steps.js";
2
+ import { createTransformDatasetContext } from "./transform-dataset.agent.js";
7
3
  function buildInstructions(input) {
8
- const sources = input.datasets
4
+ const inputs = input.datasets
9
5
  .map((d, idx) => {
10
6
  const name = d.description ? ` - ${d.description}` : "";
11
7
  return `${idx + 1}. ${d.id}${name}`;
@@ -16,8 +12,8 @@ function buildInstructions(input) {
16
12
  "Use pandas when helpful. Output must be JSONL with {type:'row', data:{...}} lines.",
17
13
  "Respect the provided output schema exactly.",
18
14
  "",
19
- "## Source Datasets",
20
- sources || "- (none)",
15
+ "## Input Datasets",
16
+ inputs || "- (none)",
21
17
  "",
22
18
  "## Transformation Description (LaTeX + sets)",
23
19
  String(input.description ?? "").trim(),
@@ -25,19 +21,20 @@ function buildInstructions(input) {
25
21
  }
26
22
  /**
27
23
  * Workflow-compatible dataset transform.
28
- * Executes the transform story and returns datasetId + preview rows.
24
+ * Executes the transform context and returns datasetId + preview rows.
29
25
  */
30
- async function transformDataset(input) {
31
- const env = await (0, runtime_1.getContextEnv)();
32
- const { datasetId, story } = (0, transform_dataset_agent_1.createTransformDatasetStory)({
33
- sourceDatasetIds: input.datasets.map((d) => d.id),
26
+ export async function transformDataset(runtime, input) {
27
+ const transformContext = createTransformDatasetContext({
28
+ inputDatasetIds: input.datasets.map((d) => d.id),
34
29
  outputSchema: input.outputSchema,
35
30
  instructions: buildInstructions(input),
36
31
  datasetId: input.datasetId,
37
32
  model: input.model,
38
33
  });
39
- await story.transform(env);
40
- const preview = await (0, steps_1.datasetPreviewRowsStep)({ datasetId });
41
- return { datasetId, previewRows: preview.rows };
34
+ await transformContext.transform(runtime);
35
+ const preview = await datasetPreviewRowsStep({
36
+ runtime,
37
+ datasetId: transformContext.datasetId,
38
+ });
39
+ return { datasetId: transformContext.datasetId, previewRows: preview.rows };
42
40
  }
43
- //# sourceMappingURL=transformDataset.js.map