@ekairos/dataset 1.22.40-beta.development.0 → 1.22.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/dist/agents.d.ts +8 -0
  2. package/dist/agents.js +8 -0
  3. package/dist/builder/agentMaterializers.d.ts +9 -0
  4. package/dist/builder/agentMaterializers.js +10 -0
  5. package/dist/builder/context.d.ts +15 -0
  6. package/dist/builder/context.js +251 -0
  7. package/dist/builder/instructions.d.ts +4 -5
  8. package/dist/builder/instructions.js +15 -21
  9. package/dist/builder/materialize.d.ts +77 -10
  10. package/dist/builder/materialize.js +495 -152
  11. package/dist/builder/materializeQuery.d.ts +12 -0
  12. package/dist/builder/materializeQuery.js +31 -0
  13. package/dist/builder/persistence.d.ts +10 -6
  14. package/dist/builder/persistence.js +107 -62
  15. package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -1
  16. package/dist/builder/{sourceRows.js → rows.js} +3 -9
  17. package/dist/builder/schemaInference.d.ts +1 -2
  18. package/dist/builder/schemaInference.js +4 -12
  19. package/dist/builder/types.d.ts +41 -26
  20. package/dist/builder/types.js +1 -3
  21. package/dist/clearDataset.tool.d.ts +2 -3
  22. package/dist/clearDataset.tool.js +13 -17
  23. package/dist/completeDataset.steps.d.ts +117 -0
  24. package/dist/completeDataset.steps.js +537 -0
  25. package/dist/completeDataset.tool.d.ts +132 -7
  26. package/dist/completeDataset.tool.js +46 -192
  27. package/dist/contextResources.d.ts +31 -0
  28. package/dist/contextResources.js +151 -0
  29. package/dist/contextWorkspace.d.ts +79 -0
  30. package/dist/contextWorkspace.js +234 -0
  31. package/dist/dataset/steps.d.ts +39 -15
  32. package/dist/dataset/steps.js +96 -39
  33. package/dist/dataset.d.ts +2 -3
  34. package/dist/dataset.js +73 -51
  35. package/dist/datasetFiles.d.ts +5 -1
  36. package/dist/datasetFiles.js +29 -27
  37. package/dist/defineNotation.tool.d.ts +49 -0
  38. package/dist/defineNotation.tool.js +154 -0
  39. package/dist/domain.d.ts +1 -2
  40. package/dist/domain.js +1 -6
  41. package/dist/executeCommand.tool.d.ts +2 -30
  42. package/dist/executeCommand.tool.js +165 -39
  43. package/dist/file/file-dataset.agent.d.ts +19 -56
  44. package/dist/file/file-dataset.agent.js +182 -136
  45. package/dist/file/file-dataset.steps.d.ts +27 -0
  46. package/dist/file/file-dataset.steps.js +47 -0
  47. package/dist/file/file-dataset.types.d.ts +64 -0
  48. package/dist/file/file-dataset.types.js +1 -0
  49. package/dist/file/filepreview.d.ts +5 -35
  50. package/dist/file/filepreview.js +60 -107
  51. package/dist/file/filepreview.types.d.ts +31 -0
  52. package/dist/file/filepreview.types.js +1 -0
  53. package/dist/file/generateSchema.tool.d.ts +2 -3
  54. package/dist/file/generateSchema.tool.js +11 -15
  55. package/dist/file/index.d.ts +1 -2
  56. package/dist/file/index.js +1 -18
  57. package/dist/file/prompts.d.ts +2 -3
  58. package/dist/file/prompts.js +152 -32
  59. package/dist/file/scripts.generated.d.ts +1 -0
  60. package/dist/file/scripts.generated.js +11 -0
  61. package/dist/file/steps.d.ts +1 -2
  62. package/dist/file/steps.js +9 -7
  63. package/dist/id.d.ts +1 -0
  64. package/dist/id.js +10 -0
  65. package/dist/index.d.ts +9 -7
  66. package/dist/index.js +9 -23
  67. package/dist/materializeDataset.tool.d.ts +35 -28
  68. package/dist/materializeDataset.tool.js +74 -68
  69. package/dist/notation.d.ts +205 -0
  70. package/dist/notation.js +424 -0
  71. package/dist/query/index.d.ts +1 -2
  72. package/dist/query/index.js +1 -18
  73. package/dist/query/queryDomain.d.ts +3 -4
  74. package/dist/query/queryDomain.js +3 -40
  75. package/dist/query/queryDomain.step.d.ts +1 -1
  76. package/dist/query/queryDomain.step.js +24 -13
  77. package/dist/sandbox/steps.d.ts +23 -15
  78. package/dist/sandbox/steps.js +73 -76
  79. package/dist/sandbox.steps.d.ts +1 -2
  80. package/dist/sandbox.steps.js +1 -18
  81. package/dist/schema.d.ts +14 -3
  82. package/dist/schema.js +27 -26
  83. package/dist/service.d.ts +12 -5
  84. package/dist/service.js +88 -15
  85. package/dist/skill.d.ts +0 -1
  86. package/dist/skill.js +12 -17
  87. package/dist/transform/filepreview.d.ts +2 -3
  88. package/dist/transform/filepreview.js +9 -26
  89. package/dist/transform/index.d.ts +2 -3
  90. package/dist/transform/index.js +2 -8
  91. package/dist/transform/prompts.d.ts +1 -34
  92. package/dist/transform/prompts.js +66 -46
  93. package/dist/transform/transform-dataset.agent.d.ts +21 -46
  94. package/dist/transform/transform-dataset.agent.js +152 -93
  95. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  96. package/dist/transform/transform-dataset.steps.js +61 -0
  97. package/dist/transform/transform-dataset.types.d.ts +96 -0
  98. package/dist/transform/transform-dataset.types.js +1 -0
  99. package/dist/transform/transformDataset.d.ts +3 -3
  100. package/dist/transform/transformDataset.js +15 -18
  101. package/dist/writeDatasetRows.tool.d.ts +188 -0
  102. package/dist/writeDatasetRows.tool.js +258 -0
  103. package/package.json +33 -8
  104. package/dist/builder/instructions.d.ts.map +0 -1
  105. package/dist/builder/instructions.js.map +0 -1
  106. package/dist/builder/materialize.d.ts.map +0 -1
  107. package/dist/builder/materialize.js.map +0 -1
  108. package/dist/builder/persistence.d.ts.map +0 -1
  109. package/dist/builder/persistence.js.map +0 -1
  110. package/dist/builder/schemaInference.d.ts.map +0 -1
  111. package/dist/builder/schemaInference.js.map +0 -1
  112. package/dist/builder/sourceRows.d.ts.map +0 -1
  113. package/dist/builder/sourceRows.js.map +0 -1
  114. package/dist/builder/types.d.ts.map +0 -1
  115. package/dist/builder/types.js.map +0 -1
  116. package/dist/clearDataset.tool.d.ts.map +0 -1
  117. package/dist/clearDataset.tool.js.map +0 -1
  118. package/dist/completeDataset.tool.d.ts.map +0 -1
  119. package/dist/completeDataset.tool.js.map +0 -1
  120. package/dist/dataset/steps.d.ts.map +0 -1
  121. package/dist/dataset/steps.js.map +0 -1
  122. package/dist/dataset.d.ts.map +0 -1
  123. package/dist/dataset.js.map +0 -1
  124. package/dist/datasetFiles.d.ts.map +0 -1
  125. package/dist/datasetFiles.js.map +0 -1
  126. package/dist/domain.d.ts.map +0 -1
  127. package/dist/domain.js.map +0 -1
  128. package/dist/eventsReactRuntime.d.ts +0 -22
  129. package/dist/eventsReactRuntime.d.ts.map +0 -1
  130. package/dist/eventsReactRuntime.js +0 -29
  131. package/dist/eventsReactRuntime.js.map +0 -1
  132. package/dist/executeCommand.tool.d.ts.map +0 -1
  133. package/dist/executeCommand.tool.js.map +0 -1
  134. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  135. package/dist/file/file-dataset.agent.js.map +0 -1
  136. package/dist/file/filepreview.d.ts.map +0 -1
  137. package/dist/file/filepreview.js.map +0 -1
  138. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  139. package/dist/file/generateSchema.tool.js.map +0 -1
  140. package/dist/file/index.d.ts.map +0 -1
  141. package/dist/file/index.js.map +0 -1
  142. package/dist/file/prompts.d.ts.map +0 -1
  143. package/dist/file/prompts.js.map +0 -1
  144. package/dist/file/steps.d.ts.map +0 -1
  145. package/dist/file/steps.js.map +0 -1
  146. package/dist/index.d.ts.map +0 -1
  147. package/dist/index.js.map +0 -1
  148. package/dist/materializeDataset.tool.d.ts.map +0 -1
  149. package/dist/materializeDataset.tool.js.map +0 -1
  150. package/dist/query/index.d.ts.map +0 -1
  151. package/dist/query/index.js.map +0 -1
  152. package/dist/query/queryDomain.d.ts.map +0 -1
  153. package/dist/query/queryDomain.js.map +0 -1
  154. package/dist/query/queryDomain.step.d.ts.map +0 -1
  155. package/dist/query/queryDomain.step.js.map +0 -1
  156. package/dist/sandbox/steps.d.ts.map +0 -1
  157. package/dist/sandbox/steps.js.map +0 -1
  158. package/dist/sandbox.steps.d.ts.map +0 -1
  159. package/dist/sandbox.steps.js.map +0 -1
  160. package/dist/schema.d.ts.map +0 -1
  161. package/dist/schema.js.map +0 -1
  162. package/dist/service.d.ts.map +0 -1
  163. package/dist/service.js.map +0 -1
  164. package/dist/skill.d.ts.map +0 -1
  165. package/dist/skill.js.map +0 -1
  166. package/dist/transform/filepreview.d.ts.map +0 -1
  167. package/dist/transform/filepreview.js.map +0 -1
  168. package/dist/transform/index.d.ts.map +0 -1
  169. package/dist/transform/index.js.map +0 -1
  170. package/dist/transform/prompts.d.ts.map +0 -1
  171. package/dist/transform/prompts.js.map +0 -1
  172. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  173. package/dist/transform/transform-dataset.agent.js.map +0 -1
  174. package/dist/transform/transformDataset.d.ts.map +0 -1
  175. package/dist/transform/transformDataset.js.map +0 -1
@@ -1,4 +1,4 @@
1
- export type TransformSourcePreviewContext = {
1
+ export type TransformInputPreviewContext = {
2
2
  totalRows: number;
3
3
  metadata?: {
4
4
  description: string;
@@ -18,6 +18,5 @@ export type TransformSourcePreviewContext = {
18
18
  interface PreviewOptions {
19
19
  headLines?: number;
20
20
  }
21
- export declare function generateSourcePreview(env: any, sandboxId: string, sourcePath: string, datasetId: string, options?: PreviewOptions): Promise<TransformSourcePreviewContext>;
21
+ export declare function generateInputPreview(runtime: any, sandboxId: string, inputPath: string, datasetId: string, options?: PreviewOptions): Promise<TransformInputPreviewContext>;
22
22
  export {};
23
- //# sourceMappingURL=filepreview.d.ts.map
@@ -1,39 +1,23 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.generateSourcePreview = generateSourcePreview;
4
- const datasetFiles_1 = require("../datasetFiles");
5
- const steps_1 = require("../sandbox/steps");
1
+ import { runDatasetSandboxCommandStep } from "../sandbox/steps.js";
6
2
  const DEFAULT_HEAD_LINES = 50;
7
- async function runPythonSnippet(env, sandboxId, datasetId, scriptName, code, args, description) {
8
- const workstation = (0, datasetFiles_1.getDatasetWorkstation)(datasetId);
9
- const scriptPath = `${workstation}/${scriptName}.py`;
10
- await (0, steps_1.writeDatasetSandboxFilesStep)({
11
- env,
12
- sandboxId,
13
- files: [
14
- {
15
- path: scriptPath,
16
- contentBase64: Buffer.from(code, "utf-8").toString("base64"),
17
- },
18
- ],
19
- });
20
- const result = await (0, steps_1.runDatasetSandboxCommandStep)({
21
- env,
3
+ async function runPythonSnippet(runtime, sandboxId, datasetId, scriptName, code, args, description) {
4
+ const result = await runDatasetSandboxCommandStep({
5
+ runtime,
22
6
  sandboxId,
23
7
  cmd: "python",
24
- args: [scriptPath, ...args],
8
+ args: ["-c", code, ...args],
25
9
  });
26
10
  const stdout = result.stdout || "";
27
11
  const stderr = result.stderr || "";
28
12
  return {
29
13
  description,
30
14
  script: code,
31
- command: `python ${scriptPath} ${args.join(" ")}`,
15
+ command: `python -c <${scriptName}.py> ${args.join(" ")}`,
32
16
  stdout,
33
17
  stderr,
34
18
  };
35
19
  }
36
- async function generateSourcePreview(env, sandboxId, sourcePath, datasetId, options = {}) {
20
+ export async function generateInputPreview(runtime, sandboxId, inputPath, datasetId, options = {}) {
37
21
  const context = {
38
22
  totalRows: 0,
39
23
  };
@@ -57,7 +41,7 @@ try:
57
41
  except Exception as e:
58
42
  print(str(e))
59
43
  `;
60
- const meta = await runPythonSnippet(env, sandboxId, datasetId, "jsonl_count", countScript, [sourcePath], "Counts number of JSONL records with type='row'");
44
+ const meta = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_count", countScript, [inputPath], "Counts number of JSONL records with type='row'");
61
45
  context.metadata = meta;
62
46
  try {
63
47
  if (meta.stdout) {
@@ -92,8 +76,7 @@ try:
92
76
  except Exception as e:
93
77
  print(str(e))
94
78
  `;
95
- const head = await runPythonSnippet(env, sandboxId, datasetId, "jsonl_head", headScript, [sourcePath, String(headLines)], `Reads the first ${headLines} JSONL row records`);
79
+ const head = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_head", headScript, [inputPath, String(headLines)], `Reads the first ${headLines} JSONL row records`);
96
80
  context.head = head;
97
81
  return context;
98
82
  }
99
- //# sourceMappingURL=filepreview.js.map
@@ -1,3 +1,2 @@
1
- export { createTransformDatasetStory, type TransformDatasetAgentParams, type TransformDatasetContext, } from "./transform-dataset.agent";
2
- export { transformDataset, type TransformDatasetInput, type TransformDatasetResult, } from "./transformDataset";
3
- //# sourceMappingURL=index.d.ts.map
1
+ export { createTransformDatasetContext, registerTransformDatasetContext, type TransformDatasetAgentParams, type TransformDatasetContext, type TransformDatasetRunOptions, } from "./transform-dataset.agent.js";
2
+ export { transformDataset, type TransformDatasetInput, type TransformDatasetResult, } from "./transformDataset.js";
@@ -1,8 +1,2 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.transformDataset = exports.createTransformDatasetStory = void 0;
4
- var transform_dataset_agent_1 = require("./transform-dataset.agent");
5
- Object.defineProperty(exports, "createTransformDatasetStory", { enumerable: true, get: function () { return transform_dataset_agent_1.createTransformDatasetStory; } });
6
- var transformDataset_1 = require("./transformDataset");
7
- Object.defineProperty(exports, "transformDataset", { enumerable: true, get: function () { return transformDataset_1.transformDataset; } });
8
- //# sourceMappingURL=index.js.map
1
+ export { createTransformDatasetContext, registerTransformDatasetContext, } from "./transform-dataset.agent.js";
2
+ export { transformDataset, } from "./transformDataset.js";
@@ -1,35 +1,2 @@
1
- export type TransformPromptContext = {
2
- datasetId: string;
3
- sourceDatasetIds: string[];
4
- outputSchema: any;
5
- sandboxConfig: {
6
- sourcePaths: Array<{
7
- datasetId: string;
8
- path: string;
9
- }>;
10
- outputPath: string;
11
- };
12
- sourcePreviews?: Array<{
13
- datasetId: string;
14
- preview: {
15
- totalRows: number;
16
- metadata?: {
17
- description: string;
18
- script: string;
19
- command: string;
20
- stdout: string;
21
- stderr: string;
22
- };
23
- head?: {
24
- description: string;
25
- script: string;
26
- command: string;
27
- stdout: string;
28
- stderr: string;
29
- };
30
- };
31
- }>;
32
- errors: string[];
33
- };
1
+ import type { TransformPromptContext } from "./transform-dataset.types.js";
34
2
  export declare function buildTransformDatasetPrompt(context: TransformPromptContext): string;
35
- //# sourceMappingURL=prompts.d.ts.map
@@ -1,45 +1,48 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.buildTransformDatasetPrompt = buildTransformDatasetPrompt;
4
- const xmlbuilder2_1 = require("xmlbuilder2");
1
+ import { create } from "xmlbuilder2";
5
2
  function buildRole() {
6
- let xml = (0, xmlbuilder2_1.create)()
3
+ let xml = create()
7
4
  .ele("Role")
8
- .txt("You are a dataset transformer. Your goal is to read one or more existing JSONL datasets and produce a NEW JSONL dataset whose records conform exactly to the provided output schema.")
5
+ .txt("You are a dataset transformer. Your goal is to read one or more input datasets/resources and produce a NEW dataset whose records conform exactly to the provided output schema.")
9
6
  .up();
10
7
  return xml.end({ prettyPrint: true, headless: true });
11
8
  }
12
9
  function buildGoal() {
13
- let xml = (0, xmlbuilder2_1.create)()
10
+ let xml = create()
14
11
  .ele("Goal")
15
- .txt("Transform the source dataset(s) (JSONL with {type:'row', data:{...}} per line) into a new dataset strictly matching the output schema. Save to output.jsonl in the dataset workstation. Each line must remain a single JSON object representing one record. You may need to combine, filter, or reshape data from multiple source datasets.")
12
+ .txt("Transform the input dataset(s) into a new dataset strictly matching the output schema. Use the lowest-cost direct completion tool that can produce the correct output. Use sandbox command execution only when commands are necessary to inspect, parse, aggregate, join, or compute over files/resources that cannot be handled directly from the visible context and previews.")
16
13
  .up();
17
14
  return xml.end({ prettyPrint: true, headless: true });
18
15
  }
19
16
  function buildContextSection(context) {
20
- let xml = (0, xmlbuilder2_1.create)()
17
+ let xml = create()
21
18
  .ele("Context")
22
19
  .ele("DatasetId").txt(context.datasetId).up();
23
- let sourcesXml = (0, xmlbuilder2_1.create)().ele("SourceDatasets");
24
- for (const sourceId of context.sourceDatasetIds) {
25
- sourcesXml = sourcesXml.ele("SourceDatasetId").txt(sourceId).up();
26
- }
27
- xml = xml.import(sourcesXml.first());
28
- let sandboxXml = (0, xmlbuilder2_1.create)().ele("Sandbox");
29
- for (const sourcePathInfo of context.sandboxConfig.sourcePaths) {
30
- sandboxXml = sandboxXml.ele("SourceFile")
31
- .ele("DatasetId").txt(sourcePathInfo.datasetId).up()
32
- .ele("Path").txt(sourcePathInfo.path).up()
33
- .up();
20
+ if (context.contextResources && context.contextResources.length > 0) {
21
+ let resourcesXml = create().ele("ContextResources");
22
+ for (const resource of context.contextResources) {
23
+ resourcesXml = resourcesXml
24
+ .ele("Resource")
25
+ .ele("Key").txt(String(resource.key)).up()
26
+ .ele("Type").txt(String(resource.type)).up()
27
+ .ele("Name").txt(String(resource.name)).up()
28
+ .ele("Description").txt(String(resource.description)).up()
29
+ .ele("DescriptorJson").txt(JSON.stringify(resource, null, 2)).up()
30
+ .up();
31
+ }
32
+ xml = xml.import(resourcesXml.first());
34
33
  }
34
+ let sandboxXml = create().ele("Sandbox");
35
+ sandboxXml = sandboxXml.ele("ContextResourcesPath").txt("/tmp/ekairos/contexts/{contextId}/resources").up();
36
+ sandboxXml = sandboxXml.ele("ResourcesManifest").txt("/tmp/ekairos/contexts/{contextId}/resources/manifest.json").up();
35
37
  sandboxXml = sandboxXml.ele("OutputPath").txt(context.sandboxConfig.outputPath).up();
38
+ sandboxXml = sandboxXml.ele("Note").txt("Context resources are materialized lazily only when executeCommand is called. Do not assume resource files exist unless you are using executeCommand. If executeCommand is used, read the manifest path from os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] inside Python.").up();
36
39
  xml = xml.import(sandboxXml.first());
37
- if (context.sourcePreviews && context.sourcePreviews.length > 0) {
38
- let previewsXml = (0, xmlbuilder2_1.create)().ele("SourcePreviews");
39
- for (const sourcePreviewInfo of context.sourcePreviews) {
40
- const sp = sourcePreviewInfo.preview;
41
- let px = (0, xmlbuilder2_1.create)().ele("SourcePreview")
42
- .ele("DatasetId").txt(sourcePreviewInfo.datasetId).up()
40
+ if (context.inputPreviews && context.inputPreviews.length > 0) {
41
+ let previewsXml = create().ele("InputPreviews");
42
+ for (const inputPreviewInfo of context.inputPreviews) {
43
+ const sp = inputPreviewInfo.preview;
44
+ let px = create().ele("InputPreview")
45
+ .ele("DatasetId").txt(inputPreviewInfo.datasetId).up()
43
46
  .ele("TotalRows").txt(String(sp.totalRows)).up();
44
47
  if (sp.metadata) {
45
48
  const m = sp.metadata;
@@ -71,7 +74,7 @@ function buildContextSection(context) {
71
74
  xml = xml.import(previewsXml.first());
72
75
  }
73
76
  if (Array.isArray(context.errors) && context.errors.length > 0) {
74
- let ex = (0, xmlbuilder2_1.create)().ele("PreviousErrors");
77
+ let ex = create().ele("PreviousErrors");
75
78
  for (const e of context.errors) {
76
79
  ex = ex.ele("Error").txt(e).up();
77
80
  }
@@ -81,7 +84,7 @@ function buildContextSection(context) {
81
84
  return xml.end({ prettyPrint: true, headless: true });
82
85
  }
83
86
  function buildOutputSchemaSection(context) {
84
- let xml = (0, xmlbuilder2_1.create)()
87
+ let xml = create()
85
88
  .ele("OutputSchema")
86
89
  .ele("JsonSchema").txt(JSON.stringify(context.outputSchema?.schema ?? context.outputSchema ?? {}, null, 2)).up()
87
90
  .up();
@@ -89,39 +92,57 @@ function buildOutputSchemaSection(context) {
89
92
  }
90
93
  function buildInstructions(context) {
91
94
  const outputPath = context.sandboxConfig.outputPath;
92
- const multipleSourcesNote = context.sourceDatasetIds.length > 1
93
- ? "You have multiple source datasets available. You may need to read, join, filter, or combine data from them to produce the output."
95
+ const multipleInputsNote = (context.contextResources?.length ?? context.inputDatasetIds.length) > 1
96
+ ? "You have multiple context resources available. You may need to read, join, filter, or combine data from them to produce the output."
94
97
  : "";
95
- let xml = (0, xmlbuilder2_1.create)()
98
+ let xml = create()
96
99
  .ele("Instructions")
97
100
  .ele("Workflow")
98
- .ele("Step", { number: "1", name: "Inspect Source" })
99
- .ele("Action").txt(`Review SourcePreviews to understand current record structures (data fields, shapes, edge cases). ${multipleSourcesNote}`).up()
101
+ .ele("Step", { number: "1", name: "Inspect Inputs" })
102
+ .ele("Action").txt(`Review ContextResources and any InputPreviews to understand current record structures, evidence, fields, shapes and edge cases. ${multipleInputsNote}`).up()
103
+ .ele("Note").txt("ContextResources DescriptorJson may include inline text, metadata, previewRows, or other visible evidence. Treat that visible content as already available context. Do not use executeCommand only to reread it.").up()
104
+ .up()
105
+ .ele("Step", { number: "2", name: "Define the Output Dataset (PLAN FIRST)" })
106
+ .ele("Action").txt("Call defineNotation with the formal definition of the OUTPUT dataset as a set derived from the input sets: e.g. D = \\pi_{fields}(\\sigma_{condition}(A \\bowtie B)) or set-builder with quantifiers, in LaTeX. Declare the input sets, bound variables and the predicates the output set satisfies.").up()
107
+ .ele("Note").txt("The definition and the materialization (the transform code + output rows) are TWO CO-EQUAL FACES of the dataset; author the definition FIRST as the PLAN: it states which sets you draw from, how they combine (join, filter, project, aggregate) and which invariants the output keeps (e.g. totals preserved). The definition is a formal proposition we trust — predicates may be semantic. Only for purely arithmetic invariants you MAY add a checkJson for optional advisory evidence. REFINE the definition whenever inspection of the inputs reveals new sets, variables or corrections, and call defineNotation with final=true just before completing — as the RESULT it describes the produced output; any arithmetic predicates then get advisory evidence (never a verdict).").up()
100
108
  .up()
101
- .ele("Step", { number: "2", name: "Plan Mapping" })
102
- .ele("Action").txt("Plan a deterministic mapping from source data fields to the output schema fields (normalize names, types, and formats).").up()
103
- .ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple sources, decide how to combine or relate them.").up()
109
+ .ele("Step", { number: "3", name: "Plan Mapping" })
110
+ .ele("Action").txt("Plan a deterministic mapping from input data fields to the output schema fields (normalize names, types, and formats).").up()
111
+ .ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple inputs, decide how to combine or relate them. Output field names must remain exactly as declared by the output schema.").up()
104
112
  .up()
105
- .ele("Step", { number: "3", name: "Transform" })
106
- .ele("Action").txt("Use executeCommand to run a Python script that reads source JSONL file(s) and writes transformed records to output.jsonl. Keep line-per-record JSON objects with { 'type': 'row', 'data': { ... } }.").up()
107
- .ele("Requirement").txt(`Write file to: ${outputPath}`).up()
113
+ .ele("Step", { number: "4", name: "Transform" })
114
+ .ele("Action").txt("For single-object output, use completeObject with the final object. For row output, use replaceRows with the final rows. Use executeCommand only when command execution is necessary, not merely convenient.").up()
115
+ .ele("Requirement").txt("Do not call completeObject until you have constructed the complete data object. completeObject requires data; a summary-only call is invalid and wastes a model iteration.").up()
116
+ .ele("Requirement").txt("Command execution is necessary only when the final output cannot be produced directly from the provided context, resource descriptors, or previews, and requires running code to inspect, parse, aggregate, join, or compute over files/resources.").up()
117
+ .ele("Requirement").txt("If the final output can be written directly from context already visible to you, do not use executeCommand. Do not use executeCommand just to format JSON, build an object, write output.jsonl, or make completion easier.").up()
118
+ .ele("Requirement").txt("Before using executeCommand, verify that direct completion is insufficient: you need file/resource contents not already visible in DescriptorJson or previews, deterministic computation over many rows, parsing/aggregation that is unreliable to do directly, or output too large/repetitive for direct completion. If none apply, command execution is not needed.").up()
119
+ .ele("Requirement").txt("When using executeCommand, provide commandDescription before the script runs. It must describe the inputs/resources used, operation performed, expected output, and why a command is the right tool.").up()
120
+ .ele("Requirement").txt("When executeCommand is used, context resources are materialized before the script runs at /tmp/ekairos/contexts/{contextId}/resources. The Python process receives EKAIROS_CONTEXT_RESOURCES_DIR and EKAIROS_CONTEXT_RESOURCES_MANIFEST environment variables. Read os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] inside the script to discover exact files and metadata. Manifest entries expose files as resource['files'][index]['path'].").up()
121
+ .ele("Requirement").txt("If only some resources are needed for a command, pass resourceKeys with the specific ContextResources keys. Omit resourceKeys only when the script truly needs all resources.").up()
122
+ .ele("Requirement").txt(`If executeCommand is used, write file to: ${outputPath}`).up()
123
+ .ele("Requirement").txt("Every data object MUST use the exact property names from OutputSchema required/properties keys. Do not translate, localize, rename, or infer alternative field names.").up()
108
124
  .ele("Requirement").txt("Do not print large data to stdout; only progress and summaries.").up()
125
+ .ele("Requirement").txt("Do not install packages, download dependencies, or access the network from executeCommand. Use only the available runtime and standard library unless a dependency is already present.").up()
109
126
  .up()
110
- .ele("Step", { number: "4", name: "Validate and Complete" })
111
- .ele("Action").txt("Call completeDataset to validate against the output schema and mark as completed.").up()
127
+ .ele("Step", { number: "5", name: "Validate and Complete" })
128
+ .ele("Action").txt("Call defineNotation with final=true (the definition as RESULT, matching the produced output), then: when using completeObject or replaceRows, no separate completeDataset call is needed. When using executeCommand, call completeDataset to validate against the output schema and mark as completed.").up()
129
+ .ele("Behavior").txt("If any completion tool returns success:false, inspect validation details, repair the output, and call the appropriate completion tool again. Do not stop until a completion tool returns success:true.").up()
112
130
  .up()
113
131
  .up()
114
132
  .ele("Rules")
133
+ .ele("Rule").txt("The formal definition (defineNotation) and the materialization (transform code + output rows) are co-equal faces of the dataset: author the definition first as the PLAN, refine it on every discovery, finalize it as the RESULT before completing.").up()
115
134
  .ele("Rule").txt("Output must strictly match the output schema for each record in data.").up()
116
- .ele("Rule").txt("Each line in output.jsonl must be a standalone JSON object with {type:'row', data:{...}}.").up()
135
+ .ele("Rule").txt("OutputSchema property names are authoritative. Field names are a technical contract; only field values may preserve input language.").up()
136
+ .ele("Rule").txt("Use the cheapest correct tool. completeObject and replaceRows are low-cost completion tools. executeCommand is a high-cost computation tool and requires an explicit commandDescription.").up()
137
+ .ele("Rule").txt("If using output.jsonl, each line must be a standalone JSON object with {type:'row', data:{...}}.").up()
117
138
  .ele("Rule").txt("Do not include headers, summaries, or metadata as records.").up()
118
- .ele("Rule").txt("Be robust to malformed lines in source: skip or sanitize, but do not crash.").up()
139
+ .ele("Rule").txt("Be robust to malformed lines in input: skip or sanitize, but do not crash.").up()
119
140
  .up()
120
- .ele("CurrentTask").txt("Transform source dataset(s) to match OutputSchema and write output.jsonl, then complete.").up()
141
+ .ele("CurrentTask").txt("Transform input dataset(s) to match OutputSchema and complete the dataset with the appropriate available tool.").up()
121
142
  .up();
122
143
  return xml.end({ prettyPrint: true, headless: true });
123
144
  }
124
- function buildTransformDatasetPrompt(context) {
145
+ export function buildTransformDatasetPrompt(context) {
125
146
  const sections = [];
126
147
  sections.push(buildRole());
127
148
  sections.push("");
@@ -134,4 +155,3 @@ function buildTransformDatasetPrompt(context) {
134
155
  sections.push(buildInstructions(context));
135
156
  return sections.join("\n");
136
157
  }
137
- //# sourceMappingURL=prompts.js.map
@@ -1,59 +1,34 @@
1
- import { type ContextReactor } from "@ekairos/events";
2
- import { TransformSourcePreviewContext } from "./filepreview";
3
- export type TransformDatasetContext = {
4
- datasetId: string;
5
- sourceDatasetIds: string[];
6
- outputSchema: any;
7
- sandboxConfig: {
8
- sourcePaths: Array<{
9
- datasetId: string;
10
- path: string;
11
- }>;
12
- outputPath: string;
13
- };
14
- sourcePreviews?: Array<{
15
- datasetId: string;
16
- preview: TransformSourcePreviewContext;
17
- }>;
18
- errors: string[];
19
- iterationCount: number;
20
- instructions?: string;
21
- };
22
- export type TransformDatasetAgentParams = {
23
- sourceDatasetIds: string[];
24
- outputSchema: any;
25
- instructions?: string;
26
- datasetId?: string;
27
- model?: string;
28
- sandboxId?: string;
29
- reactor?: ContextReactor<any, any>;
30
- };
31
- export type TransformDatasetResult = {
32
- id: string;
33
- status?: string;
34
- title?: string;
35
- schema?: any;
36
- analysis?: any;
37
- calculatedTotalRows?: number;
38
- actualGeneratedRowCount?: number;
39
- createdAt?: number;
40
- updatedAt?: number;
41
- };
42
- export declare function createTransformDatasetStory<Env extends {
1
+ import { type ContextReactor } from "@ekairos/reactor/context";
2
+ import type { TransformDatasetRunOptions, TransformSandboxState, TransformInputPreviewContext } from "./transform-dataset.types.js";
3
+ export type { TransformDatasetAgentParams, TransformDatasetContext, TransformDatasetResult, TransformDatasetRunOptions, TransformPromptContext, TransformSandboxState, } from "./transform-dataset.types.js";
4
+ export declare function createTransformDatasetContext<Env extends {
43
5
  orgId: string;
44
6
  }>(params: {
45
- sourceDatasetIds: string[];
7
+ inputDatasetIds: string[];
46
8
  outputSchema: any;
47
9
  instructions?: string;
48
10
  datasetId?: string;
49
11
  model?: string;
50
12
  sandboxId?: string;
51
13
  reactor?: ContextReactor<any, any>;
14
+ sandboxState?: TransformSandboxState;
15
+ inputPreviews?: Array<{
16
+ datasetId: string;
17
+ preview: TransformInputPreviewContext;
18
+ }>;
19
+ contextResources?: any[];
52
20
  }): {
53
21
  datasetId: string;
54
- transform(env: Env, prompt?: string): Promise<{
22
+ transform(runtime: {
23
+ env: Env;
24
+ }, options?: TransformDatasetRunOptions): Promise<{
55
25
  datasetId: string;
56
26
  }>;
57
- story: any;
27
+ context: any;
58
28
  };
59
- //# sourceMappingURL=transform-dataset.agent.d.ts.map
29
+ export declare function registerTransformDatasetContext<Env extends {
30
+ orgId: string;
31
+ }>(opts?: {
32
+ model?: string;
33
+ reactor?: ContextReactor<any, any>;
34
+ }): void;