@ekairos/dataset 1.22.37-beta.development.0 → 1.22.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/README.md +347 -0
  2. package/dist/agents.d.ts +8 -0
  3. package/dist/agents.js +8 -0
  4. package/dist/builder/agentMaterializers.d.ts +9 -0
  5. package/dist/builder/agentMaterializers.js +10 -0
  6. package/dist/builder/context.d.ts +15 -0
  7. package/dist/builder/context.js +251 -0
  8. package/dist/builder/instructions.d.ts +5 -0
  9. package/dist/builder/instructions.js +40 -0
  10. package/dist/builder/materialize.d.ts +83 -0
  11. package/dist/builder/materialize.js +548 -0
  12. package/dist/builder/materializeQuery.d.ts +12 -0
  13. package/dist/builder/materializeQuery.js +31 -0
  14. package/dist/builder/persistence.d.ts +22 -0
  15. package/dist/builder/persistence.js +153 -0
  16. package/dist/builder/rows.d.ts +7 -0
  17. package/dist/builder/rows.js +56 -0
  18. package/dist/builder/schemaInference.d.ts +3 -0
  19. package/dist/builder/schemaInference.js +61 -0
  20. package/dist/builder/types.d.ts +140 -0
  21. package/dist/builder/types.js +1 -0
  22. package/dist/clearDataset.tool.d.ts +2 -3
  23. package/dist/clearDataset.tool.js +13 -17
  24. package/dist/completeDataset.steps.d.ts +117 -0
  25. package/dist/completeDataset.steps.js +487 -0
  26. package/dist/completeDataset.tool.d.ts +132 -7
  27. package/dist/completeDataset.tool.js +46 -192
  28. package/dist/contextResources.d.ts +31 -0
  29. package/dist/contextResources.js +151 -0
  30. package/dist/contextWorkspace.d.ts +79 -0
  31. package/dist/contextWorkspace.js +234 -0
  32. package/dist/dataset/steps.d.ts +39 -15
  33. package/dist/dataset/steps.js +96 -39
  34. package/dist/dataset.d.ts +3 -67
  35. package/dist/dataset.js +129 -521
  36. package/dist/datasetFiles.d.ts +5 -1
  37. package/dist/datasetFiles.js +29 -27
  38. package/dist/domain.d.ts +1 -2
  39. package/dist/domain.js +1 -6
  40. package/dist/executeCommand.tool.d.ts +2 -30
  41. package/dist/executeCommand.tool.js +165 -39
  42. package/dist/file/file-dataset.agent.d.ts +19 -56
  43. package/dist/file/file-dataset.agent.js +176 -134
  44. package/dist/file/file-dataset.steps.d.ts +27 -0
  45. package/dist/file/file-dataset.steps.js +47 -0
  46. package/dist/file/file-dataset.types.d.ts +64 -0
  47. package/dist/file/file-dataset.types.js +1 -0
  48. package/dist/file/filepreview.d.ts +5 -35
  49. package/dist/file/filepreview.js +60 -107
  50. package/dist/file/filepreview.types.d.ts +31 -0
  51. package/dist/file/filepreview.types.js +1 -0
  52. package/dist/file/generateSchema.tool.d.ts +2 -3
  53. package/dist/file/generateSchema.tool.js +11 -15
  54. package/dist/file/index.d.ts +1 -2
  55. package/dist/file/index.js +1 -18
  56. package/dist/file/prompts.d.ts +2 -3
  57. package/dist/file/prompts.js +134 -27
  58. package/dist/file/scripts.generated.d.ts +1 -0
  59. package/dist/file/scripts.generated.js +11 -0
  60. package/dist/file/steps.d.ts +1 -2
  61. package/dist/file/steps.js +9 -7
  62. package/dist/id.d.ts +1 -0
  63. package/dist/id.js +10 -0
  64. package/dist/index.d.ts +8 -7
  65. package/dist/index.js +8 -23
  66. package/dist/materializeDataset.tool.d.ts +52 -32
  67. package/dist/materializeDataset.tool.js +81 -65
  68. package/dist/query/index.d.ts +1 -2
  69. package/dist/query/index.js +1 -18
  70. package/dist/query/queryDomain.d.ts +3 -4
  71. package/dist/query/queryDomain.js +3 -40
  72. package/dist/query/queryDomain.step.d.ts +1 -1
  73. package/dist/query/queryDomain.step.js +13 -13
  74. package/dist/sandbox/steps.d.ts +23 -15
  75. package/dist/sandbox/steps.js +73 -76
  76. package/dist/sandbox.steps.d.ts +1 -2
  77. package/dist/sandbox.steps.js +1 -18
  78. package/dist/schema.d.ts +13 -13
  79. package/dist/schema.js +25 -37
  80. package/dist/service.d.ts +8 -5
  81. package/dist/service.js +70 -15
  82. package/dist/skill.d.ts +0 -1
  83. package/dist/skill.js +12 -17
  84. package/dist/transform/filepreview.d.ts +2 -3
  85. package/dist/transform/filepreview.js +9 -26
  86. package/dist/transform/index.d.ts +2 -3
  87. package/dist/transform/index.js +2 -8
  88. package/dist/transform/prompts.d.ts +1 -34
  89. package/dist/transform/prompts.js +58 -43
  90. package/dist/transform/transform-dataset.agent.d.ts +20 -45
  91. package/dist/transform/transform-dataset.agent.js +146 -91
  92. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  93. package/dist/transform/transform-dataset.steps.js +61 -0
  94. package/dist/transform/transform-dataset.types.d.ts +95 -0
  95. package/dist/transform/transform-dataset.types.js +1 -0
  96. package/dist/transform/transformDataset.d.ts +3 -3
  97. package/dist/transform/transformDataset.js +15 -18
  98. package/dist/writeDatasetRows.tool.d.ts +188 -0
  99. package/dist/writeDatasetRows.tool.js +258 -0
  100. package/package.json +35 -10
  101. package/dist/clearDataset.tool.d.ts.map +0 -1
  102. package/dist/clearDataset.tool.js.map +0 -1
  103. package/dist/completeDataset.tool.d.ts.map +0 -1
  104. package/dist/completeDataset.tool.js.map +0 -1
  105. package/dist/dataset/steps.d.ts.map +0 -1
  106. package/dist/dataset/steps.js.map +0 -1
  107. package/dist/dataset.d.ts.map +0 -1
  108. package/dist/dataset.js.map +0 -1
  109. package/dist/datasetFiles.d.ts.map +0 -1
  110. package/dist/datasetFiles.js.map +0 -1
  111. package/dist/domain.d.ts.map +0 -1
  112. package/dist/domain.js.map +0 -1
  113. package/dist/eventsReactRuntime.d.ts +0 -22
  114. package/dist/eventsReactRuntime.d.ts.map +0 -1
  115. package/dist/eventsReactRuntime.js +0 -29
  116. package/dist/eventsReactRuntime.js.map +0 -1
  117. package/dist/executeCommand.tool.d.ts.map +0 -1
  118. package/dist/executeCommand.tool.js.map +0 -1
  119. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  120. package/dist/file/file-dataset.agent.js.map +0 -1
  121. package/dist/file/filepreview.d.ts.map +0 -1
  122. package/dist/file/filepreview.js.map +0 -1
  123. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  124. package/dist/file/generateSchema.tool.js.map +0 -1
  125. package/dist/file/index.d.ts.map +0 -1
  126. package/dist/file/index.js.map +0 -1
  127. package/dist/file/prompts.d.ts.map +0 -1
  128. package/dist/file/prompts.js.map +0 -1
  129. package/dist/file/steps.d.ts.map +0 -1
  130. package/dist/file/steps.js.map +0 -1
  131. package/dist/index.d.ts.map +0 -1
  132. package/dist/index.js.map +0 -1
  133. package/dist/materializeDataset.tool.d.ts.map +0 -1
  134. package/dist/materializeDataset.tool.js.map +0 -1
  135. package/dist/query/index.d.ts.map +0 -1
  136. package/dist/query/index.js.map +0 -1
  137. package/dist/query/queryDomain.d.ts.map +0 -1
  138. package/dist/query/queryDomain.js.map +0 -1
  139. package/dist/query/queryDomain.step.d.ts.map +0 -1
  140. package/dist/query/queryDomain.step.js.map +0 -1
  141. package/dist/sandbox/steps.d.ts.map +0 -1
  142. package/dist/sandbox/steps.js.map +0 -1
  143. package/dist/sandbox.steps.d.ts.map +0 -1
  144. package/dist/sandbox.steps.js.map +0 -1
  145. package/dist/schema.d.ts.map +0 -1
  146. package/dist/schema.js.map +0 -1
  147. package/dist/service.d.ts.map +0 -1
  148. package/dist/service.js.map +0 -1
  149. package/dist/skill.d.ts.map +0 -1
  150. package/dist/skill.js.map +0 -1
  151. package/dist/transform/filepreview.d.ts.map +0 -1
  152. package/dist/transform/filepreview.js.map +0 -1
  153. package/dist/transform/index.d.ts.map +0 -1
  154. package/dist/transform/index.js.map +0 -1
  155. package/dist/transform/prompts.d.ts.map +0 -1
  156. package/dist/transform/prompts.js.map +0 -1
  157. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  158. package/dist/transform/transform-dataset.agent.js.map +0 -1
  159. package/dist/transform/transformDataset.d.ts.map +0 -1
  160. package/dist/transform/transformDataset.js.map +0 -1
package/dist/skill.js CHANGED
@@ -1,48 +1,44 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.buildDatasetSkillPackage = buildDatasetSkillPackage;
7
- const node_fs_1 = require("node:fs");
8
- const node_path_1 = __importDefault(require("node:path"));
1
+ import { readdirSync, readFileSync, statSync } from "node:fs";
2
+ import path from "node:path";
3
+ import { fileURLToPath } from "node:url";
9
4
  function walkFiles(rootDir, currentDir = rootDir) {
10
- const entries = (0, node_fs_1.readdirSync)(currentDir, { withFileTypes: true });
5
+ const entries = readdirSync(currentDir, { withFileTypes: true });
11
6
  const files = [];
12
7
  for (const entry of entries) {
13
- const absPath = node_path_1.default.join(currentDir, entry.name);
8
+ const absPath = path.join(currentDir, entry.name);
14
9
  if (entry.isDirectory()) {
15
10
  files.push(...walkFiles(rootDir, absPath));
16
11
  continue;
17
12
  }
18
13
  if (entry.isFile()) {
19
- files.push(node_path_1.default.relative(rootDir, absPath).replace(/\\/g, "/"));
14
+ files.push(path.relative(rootDir, absPath).replace(/\\/g, "/"));
20
15
  }
21
16
  }
22
17
  return files;
23
18
  }
24
19
  function resolveDatasetSkillRoot() {
25
- const fromDist = node_path_1.default.resolve(__dirname, "..", "skill");
20
+ const currentDir = path.dirname(fileURLToPath(import.meta.url));
21
+ const fromDist = path.resolve(currentDir, "..", "skill");
26
22
  if (statExists(fromDist))
27
23
  return fromDist;
28
- const fromSrc = node_path_1.default.resolve(__dirname, "..", "..", "skill");
24
+ const fromSrc = path.resolve(currentDir, "..", "..", "skill");
29
25
  if (statExists(fromSrc))
30
26
  return fromSrc;
31
27
  throw new Error("dataset_skill_root_not_found");
32
28
  }
33
29
  function statExists(candidate) {
34
30
  try {
35
- return (0, node_fs_1.statSync)(candidate).isDirectory();
31
+ return statSync(candidate).isDirectory();
36
32
  }
37
33
  catch {
38
34
  return false;
39
35
  }
40
36
  }
41
- function buildDatasetSkillPackage() {
37
+ export function buildDatasetSkillPackage() {
42
38
  const skillRoot = resolveDatasetSkillRoot();
43
39
  const files = walkFiles(skillRoot).map((relativePath) => ({
44
40
  path: relativePath,
45
- contentBase64: (0, node_fs_1.readFileSync)(node_path_1.default.join(skillRoot, relativePath)).toString("base64"),
41
+ contentBase64: readFileSync(path.join(skillRoot, relativePath)).toString("base64"),
46
42
  }));
47
43
  return {
48
44
  name: "dataset",
@@ -50,4 +46,3 @@ function buildDatasetSkillPackage() {
50
46
  files,
51
47
  };
52
48
  }
53
- //# sourceMappingURL=skill.js.map
@@ -1,4 +1,4 @@
1
- export type TransformSourcePreviewContext = {
1
+ export type TransformInputPreviewContext = {
2
2
  totalRows: number;
3
3
  metadata?: {
4
4
  description: string;
@@ -18,6 +18,5 @@ export type TransformSourcePreviewContext = {
18
18
  interface PreviewOptions {
19
19
  headLines?: number;
20
20
  }
21
- export declare function generateSourcePreview(env: any, sandboxId: string, sourcePath: string, datasetId: string, options?: PreviewOptions): Promise<TransformSourcePreviewContext>;
21
+ export declare function generateInputPreview(runtime: any, sandboxId: string, inputPath: string, datasetId: string, options?: PreviewOptions): Promise<TransformInputPreviewContext>;
22
22
  export {};
23
- //# sourceMappingURL=filepreview.d.ts.map
@@ -1,39 +1,23 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.generateSourcePreview = generateSourcePreview;
4
- const datasetFiles_1 = require("../datasetFiles");
5
- const steps_1 = require("../sandbox/steps");
1
+ import { runDatasetSandboxCommandStep } from "../sandbox/steps.js";
6
2
  const DEFAULT_HEAD_LINES = 50;
7
- async function runPythonSnippet(env, sandboxId, datasetId, scriptName, code, args, description) {
8
- const workstation = (0, datasetFiles_1.getDatasetWorkstation)(datasetId);
9
- const scriptPath = `${workstation}/${scriptName}.py`;
10
- await (0, steps_1.writeDatasetSandboxFilesStep)({
11
- env,
12
- sandboxId,
13
- files: [
14
- {
15
- path: scriptPath,
16
- contentBase64: Buffer.from(code, "utf-8").toString("base64"),
17
- },
18
- ],
19
- });
20
- const result = await (0, steps_1.runDatasetSandboxCommandStep)({
21
- env,
3
+ async function runPythonSnippet(runtime, sandboxId, datasetId, scriptName, code, args, description) {
4
+ const result = await runDatasetSandboxCommandStep({
5
+ runtime,
22
6
  sandboxId,
23
7
  cmd: "python",
24
- args: [scriptPath, ...args],
8
+ args: ["-c", code, ...args],
25
9
  });
26
10
  const stdout = result.stdout || "";
27
11
  const stderr = result.stderr || "";
28
12
  return {
29
13
  description,
30
14
  script: code,
31
- command: `python ${scriptPath} ${args.join(" ")}`,
15
+ command: `python -c <${scriptName}.py> ${args.join(" ")}`,
32
16
  stdout,
33
17
  stderr,
34
18
  };
35
19
  }
36
- async function generateSourcePreview(env, sandboxId, sourcePath, datasetId, options = {}) {
20
+ export async function generateInputPreview(runtime, sandboxId, inputPath, datasetId, options = {}) {
37
21
  const context = {
38
22
  totalRows: 0,
39
23
  };
@@ -57,7 +41,7 @@ try:
57
41
  except Exception as e:
58
42
  print(str(e))
59
43
  `;
60
- const meta = await runPythonSnippet(env, sandboxId, datasetId, "jsonl_count", countScript, [sourcePath], "Counts number of JSONL records with type='row'");
44
+ const meta = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_count", countScript, [inputPath], "Counts number of JSONL records with type='row'");
61
45
  context.metadata = meta;
62
46
  try {
63
47
  if (meta.stdout) {
@@ -92,8 +76,7 @@ try:
92
76
  except Exception as e:
93
77
  print(str(e))
94
78
  `;
95
- const head = await runPythonSnippet(env, sandboxId, datasetId, "jsonl_head", headScript, [sourcePath, String(headLines)], `Reads the first ${headLines} JSONL row records`);
79
+ const head = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_head", headScript, [inputPath, String(headLines)], `Reads the first ${headLines} JSONL row records`);
96
80
  context.head = head;
97
81
  return context;
98
82
  }
99
- //# sourceMappingURL=filepreview.js.map
@@ -1,3 +1,2 @@
1
- export { createTransformDatasetStory, type TransformDatasetAgentParams, type TransformDatasetContext, } from "./transform-dataset.agent";
2
- export { transformDataset, type TransformDatasetInput, type TransformDatasetResult, } from "./transformDataset";
3
- //# sourceMappingURL=index.d.ts.map
1
+ export { createTransformDatasetContext, registerTransformDatasetContext, type TransformDatasetAgentParams, type TransformDatasetContext, type TransformDatasetRunOptions, } from "./transform-dataset.agent.js";
2
+ export { transformDataset, type TransformDatasetInput, type TransformDatasetResult, } from "./transformDataset.js";
@@ -1,8 +1,2 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.transformDataset = exports.createTransformDatasetStory = void 0;
4
- var transform_dataset_agent_1 = require("./transform-dataset.agent");
5
- Object.defineProperty(exports, "createTransformDatasetStory", { enumerable: true, get: function () { return transform_dataset_agent_1.createTransformDatasetStory; } });
6
- var transformDataset_1 = require("./transformDataset");
7
- Object.defineProperty(exports, "transformDataset", { enumerable: true, get: function () { return transformDataset_1.transformDataset; } });
8
- //# sourceMappingURL=index.js.map
1
+ export { createTransformDatasetContext, registerTransformDatasetContext, } from "./transform-dataset.agent.js";
2
+ export { transformDataset, } from "./transformDataset.js";
@@ -1,35 +1,2 @@
1
- export type TransformPromptContext = {
2
- datasetId: string;
3
- sourceDatasetIds: string[];
4
- outputSchema: any;
5
- sandboxConfig: {
6
- sourcePaths: Array<{
7
- datasetId: string;
8
- path: string;
9
- }>;
10
- outputPath: string;
11
- };
12
- sourcePreviews?: Array<{
13
- datasetId: string;
14
- preview: {
15
- totalRows: number;
16
- metadata?: {
17
- description: string;
18
- script: string;
19
- command: string;
20
- stdout: string;
21
- stderr: string;
22
- };
23
- head?: {
24
- description: string;
25
- script: string;
26
- command: string;
27
- stdout: string;
28
- stderr: string;
29
- };
30
- };
31
- }>;
32
- errors: string[];
33
- };
1
+ import type { TransformPromptContext } from "./transform-dataset.types.js";
34
2
  export declare function buildTransformDatasetPrompt(context: TransformPromptContext): string;
35
- //# sourceMappingURL=prompts.d.ts.map
@@ -1,45 +1,48 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.buildTransformDatasetPrompt = buildTransformDatasetPrompt;
4
- const xmlbuilder2_1 = require("xmlbuilder2");
1
+ import { create } from "xmlbuilder2";
5
2
  function buildRole() {
6
- let xml = (0, xmlbuilder2_1.create)()
3
+ let xml = create()
7
4
  .ele("Role")
8
- .txt("You are a dataset transformer. Your goal is to read one or more existing JSONL datasets and produce a NEW JSONL dataset whose records conform exactly to the provided output schema.")
5
+ .txt("You are a dataset transformer. Your goal is to read one or more input datasets/resources and produce a NEW dataset whose records conform exactly to the provided output schema.")
9
6
  .up();
10
7
  return xml.end({ prettyPrint: true, headless: true });
11
8
  }
12
9
  function buildGoal() {
13
- let xml = (0, xmlbuilder2_1.create)()
10
+ let xml = create()
14
11
  .ele("Goal")
15
- .txt("Transform the source dataset(s) (JSONL with {type:'row', data:{...}} per line) into a new dataset strictly matching the output schema. Save to output.jsonl in the dataset workstation. Each line must remain a single JSON object representing one record. You may need to combine, filter, or reshape data from multiple source datasets.")
12
+ .txt("Transform the input dataset(s) into a new dataset strictly matching the output schema. Use the lowest-cost direct completion tool that can produce the correct output. Use sandbox command execution only when commands are necessary to inspect, parse, aggregate, join, or compute over files/resources that cannot be handled directly from the visible context and previews.")
16
13
  .up();
17
14
  return xml.end({ prettyPrint: true, headless: true });
18
15
  }
19
16
  function buildContextSection(context) {
20
- let xml = (0, xmlbuilder2_1.create)()
17
+ let xml = create()
21
18
  .ele("Context")
22
19
  .ele("DatasetId").txt(context.datasetId).up();
23
- let sourcesXml = (0, xmlbuilder2_1.create)().ele("SourceDatasets");
24
- for (const sourceId of context.sourceDatasetIds) {
25
- sourcesXml = sourcesXml.ele("SourceDatasetId").txt(sourceId).up();
26
- }
27
- xml = xml.import(sourcesXml.first());
28
- let sandboxXml = (0, xmlbuilder2_1.create)().ele("Sandbox");
29
- for (const sourcePathInfo of context.sandboxConfig.sourcePaths) {
30
- sandboxXml = sandboxXml.ele("SourceFile")
31
- .ele("DatasetId").txt(sourcePathInfo.datasetId).up()
32
- .ele("Path").txt(sourcePathInfo.path).up()
33
- .up();
20
+ if (context.contextResources && context.contextResources.length > 0) {
21
+ let resourcesXml = create().ele("ContextResources");
22
+ for (const resource of context.contextResources) {
23
+ resourcesXml = resourcesXml
24
+ .ele("Resource")
25
+ .ele("Key").txt(String(resource.key)).up()
26
+ .ele("Type").txt(String(resource.type)).up()
27
+ .ele("Name").txt(String(resource.name)).up()
28
+ .ele("Description").txt(String(resource.description)).up()
29
+ .ele("DescriptorJson").txt(JSON.stringify(resource, null, 2)).up()
30
+ .up();
31
+ }
32
+ xml = xml.import(resourcesXml.first());
34
33
  }
34
+ let sandboxXml = create().ele("Sandbox");
35
+ sandboxXml = sandboxXml.ele("ContextResourcesPath").txt("/tmp/ekairos/contexts/{contextId}/resources").up();
36
+ sandboxXml = sandboxXml.ele("ResourcesManifest").txt("/tmp/ekairos/contexts/{contextId}/resources/manifest.json").up();
35
37
  sandboxXml = sandboxXml.ele("OutputPath").txt(context.sandboxConfig.outputPath).up();
38
+ sandboxXml = sandboxXml.ele("Note").txt("Context resources are materialized lazily only when executeCommand is called. Do not assume resource files exist unless you are using executeCommand. If executeCommand is used, read the manifest path from os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] inside Python.").up();
36
39
  xml = xml.import(sandboxXml.first());
37
- if (context.sourcePreviews && context.sourcePreviews.length > 0) {
38
- let previewsXml = (0, xmlbuilder2_1.create)().ele("SourcePreviews");
39
- for (const sourcePreviewInfo of context.sourcePreviews) {
40
- const sp = sourcePreviewInfo.preview;
41
- let px = (0, xmlbuilder2_1.create)().ele("SourcePreview")
42
- .ele("DatasetId").txt(sourcePreviewInfo.datasetId).up()
40
+ if (context.inputPreviews && context.inputPreviews.length > 0) {
41
+ let previewsXml = create().ele("InputPreviews");
42
+ for (const inputPreviewInfo of context.inputPreviews) {
43
+ const sp = inputPreviewInfo.preview;
44
+ let px = create().ele("InputPreview")
45
+ .ele("DatasetId").txt(inputPreviewInfo.datasetId).up()
43
46
  .ele("TotalRows").txt(String(sp.totalRows)).up();
44
47
  if (sp.metadata) {
45
48
  const m = sp.metadata;
@@ -71,7 +74,7 @@ function buildContextSection(context) {
71
74
  xml = xml.import(previewsXml.first());
72
75
  }
73
76
  if (Array.isArray(context.errors) && context.errors.length > 0) {
74
- let ex = (0, xmlbuilder2_1.create)().ele("PreviousErrors");
77
+ let ex = create().ele("PreviousErrors");
75
78
  for (const e of context.errors) {
76
79
  ex = ex.ele("Error").txt(e).up();
77
80
  }
@@ -81,7 +84,7 @@ function buildContextSection(context) {
81
84
  return xml.end({ prettyPrint: true, headless: true });
82
85
  }
83
86
  function buildOutputSchemaSection(context) {
84
- let xml = (0, xmlbuilder2_1.create)()
87
+ let xml = create()
85
88
  .ele("OutputSchema")
86
89
  .ele("JsonSchema").txt(JSON.stringify(context.outputSchema?.schema ?? context.outputSchema ?? {}, null, 2)).up()
87
90
  .up();
@@ -89,39 +92,52 @@ function buildOutputSchemaSection(context) {
89
92
  }
90
93
  function buildInstructions(context) {
91
94
  const outputPath = context.sandboxConfig.outputPath;
92
- const multipleSourcesNote = context.sourceDatasetIds.length > 1
93
- ? "You have multiple source datasets available. You may need to read, join, filter, or combine data from them to produce the output."
95
+ const multipleInputsNote = (context.contextResources?.length ?? context.inputDatasetIds.length) > 1
96
+ ? "You have multiple context resources available. You may need to read, join, filter, or combine data from them to produce the output."
94
97
  : "";
95
- let xml = (0, xmlbuilder2_1.create)()
98
+ let xml = create()
96
99
  .ele("Instructions")
97
100
  .ele("Workflow")
98
- .ele("Step", { number: "1", name: "Inspect Source" })
99
- .ele("Action").txt(`Review SourcePreviews to understand current record structures (data fields, shapes, edge cases). ${multipleSourcesNote}`).up()
101
+ .ele("Step", { number: "1", name: "Inspect Inputs" })
102
+ .ele("Action").txt(`Review ContextResources and any InputPreviews to understand current record structures, evidence, fields, shapes and edge cases. ${multipleInputsNote}`).up()
103
+ .ele("Note").txt("ContextResources DescriptorJson may include inline text, metadata, previewRows, or other visible evidence. Treat that visible content as already available context. Do not use executeCommand only to reread it.").up()
100
104
  .up()
101
105
  .ele("Step", { number: "2", name: "Plan Mapping" })
102
- .ele("Action").txt("Plan a deterministic mapping from source data fields to the output schema fields (normalize names, types, and formats).").up()
103
- .ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple sources, decide how to combine or relate them.").up()
106
+ .ele("Action").txt("Plan a deterministic mapping from input data fields to the output schema fields (normalize names, types, and formats).").up()
107
+ .ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple inputs, decide how to combine or relate them. Output field names must remain exactly as declared by the output schema.").up()
104
108
  .up()
105
109
  .ele("Step", { number: "3", name: "Transform" })
106
- .ele("Action").txt("Use executeCommand to run a Python script that reads source JSONL file(s) and writes transformed records to output.jsonl. Keep line-per-record JSON objects with { 'type': 'row', 'data': { ... } }.").up()
107
- .ele("Requirement").txt(`Write file to: ${outputPath}`).up()
110
+ .ele("Action").txt("For single-object output, use completeObject with the final object. For row output, use replaceRows with the final rows. Use executeCommand only when command execution is necessary, not merely convenient.").up()
111
+ .ele("Requirement").txt("Do not call completeObject until you have constructed the complete data object. completeObject requires data; a summary-only call is invalid and wastes a model iteration.").up()
112
+ .ele("Requirement").txt("Command execution is necessary only when the final output cannot be produced directly from the provided context, resource descriptors, or previews, and requires running code to inspect, parse, aggregate, join, or compute over files/resources.").up()
113
+ .ele("Requirement").txt("If the final output can be written directly from context already visible to you, do not use executeCommand. Do not use executeCommand just to format JSON, build an object, write output.jsonl, or make completion easier.").up()
114
+ .ele("Requirement").txt("Before using executeCommand, verify that direct completion is insufficient: you need file/resource contents not already visible in DescriptorJson or previews, deterministic computation over many rows, parsing/aggregation that is unreliable to do directly, or output too large/repetitive for direct completion. If none apply, command execution is not needed.").up()
115
+ .ele("Requirement").txt("When using executeCommand, provide commandDescription before the script runs. It must describe the inputs/resources used, operation performed, expected output, and why a command is the right tool.").up()
116
+ .ele("Requirement").txt("When executeCommand is used, context resources are materialized before the script runs at /tmp/ekairos/contexts/{contextId}/resources. The Python process receives EKAIROS_CONTEXT_RESOURCES_DIR and EKAIROS_CONTEXT_RESOURCES_MANIFEST environment variables. Read os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] inside the script to discover exact files and metadata. Manifest entries expose files as resource['files'][index]['path'].").up()
117
+ .ele("Requirement").txt("If only some resources are needed for a command, pass resourceKeys with the specific ContextResources keys. Omit resourceKeys only when the script truly needs all resources.").up()
118
+ .ele("Requirement").txt(`If executeCommand is used, write file to: ${outputPath}`).up()
119
+ .ele("Requirement").txt("Every data object MUST use the exact property names from OutputSchema required/properties keys. Do not translate, localize, rename, or infer alternative field names.").up()
108
120
  .ele("Requirement").txt("Do not print large data to stdout; only progress and summaries.").up()
121
+ .ele("Requirement").txt("Do not install packages, download dependencies, or access the network from executeCommand. Use only the available runtime and standard library unless a dependency is already present.").up()
109
122
  .up()
110
123
  .ele("Step", { number: "4", name: "Validate and Complete" })
111
- .ele("Action").txt("Call completeDataset to validate against the output schema and mark as completed.").up()
124
+ .ele("Action").txt("When using completeObject or replaceRows, no separate completeDataset call is needed. When using executeCommand, call completeDataset to validate against the output schema and mark as completed.").up()
125
+ .ele("Behavior").txt("If any completion tool returns success:false, inspect validation details, repair the output, and call the appropriate completion tool again. Do not stop until a completion tool returns success:true.").up()
112
126
  .up()
113
127
  .up()
114
128
  .ele("Rules")
115
129
  .ele("Rule").txt("Output must strictly match the output schema for each record in data.").up()
116
- .ele("Rule").txt("Each line in output.jsonl must be a standalone JSON object with {type:'row', data:{...}}.").up()
130
+ .ele("Rule").txt("OutputSchema property names are authoritative. Field names are a technical contract; only field values may preserve input language.").up()
131
+ .ele("Rule").txt("Use the cheapest correct tool. completeObject and replaceRows are low-cost completion tools. executeCommand is a high-cost computation tool and requires an explicit commandDescription.").up()
132
+ .ele("Rule").txt("If using output.jsonl, each line must be a standalone JSON object with {type:'row', data:{...}}.").up()
117
133
  .ele("Rule").txt("Do not include headers, summaries, or metadata as records.").up()
118
- .ele("Rule").txt("Be robust to malformed lines in source: skip or sanitize, but do not crash.").up()
134
+ .ele("Rule").txt("Be robust to malformed lines in input: skip or sanitize, but do not crash.").up()
119
135
  .up()
120
- .ele("CurrentTask").txt("Transform source dataset(s) to match OutputSchema and write output.jsonl, then complete.").up()
136
+ .ele("CurrentTask").txt("Transform input dataset(s) to match OutputSchema and complete the dataset with the appropriate available tool.").up()
121
137
  .up();
122
138
  return xml.end({ prettyPrint: true, headless: true });
123
139
  }
124
- function buildTransformDatasetPrompt(context) {
140
+ export function buildTransformDatasetPrompt(context) {
125
141
  const sections = [];
126
142
  sections.push(buildRole());
127
143
  sections.push("");
@@ -134,4 +150,3 @@ function buildTransformDatasetPrompt(context) {
134
150
  sections.push(buildInstructions(context));
135
151
  return sections.join("\n");
136
152
  }
137
- //# sourceMappingURL=prompts.js.map
@@ -1,59 +1,34 @@
1
1
  import { type ContextReactor } from "@ekairos/events";
2
- import { TransformSourcePreviewContext } from "./filepreview";
3
- export type TransformDatasetContext = {
4
- datasetId: string;
5
- sourceDatasetIds: string[];
6
- outputSchema: any;
7
- sandboxConfig: {
8
- sourcePaths: Array<{
9
- datasetId: string;
10
- path: string;
11
- }>;
12
- outputPath: string;
13
- };
14
- sourcePreviews?: Array<{
15
- datasetId: string;
16
- preview: TransformSourcePreviewContext;
17
- }>;
18
- errors: string[];
19
- iterationCount: number;
20
- instructions?: string;
21
- };
22
- export type TransformDatasetAgentParams = {
23
- sourceDatasetIds: string[];
24
- outputSchema: any;
25
- instructions?: string;
26
- datasetId?: string;
27
- model?: string;
28
- sandboxId?: string;
29
- reactor?: ContextReactor<any, any>;
30
- };
31
- export type TransformDatasetResult = {
32
- id: string;
33
- status?: string;
34
- title?: string;
35
- schema?: any;
36
- analysis?: any;
37
- calculatedTotalRows?: number;
38
- actualGeneratedRowCount?: number;
39
- createdAt?: number;
40
- updatedAt?: number;
41
- };
42
- export declare function createTransformDatasetStory<Env extends {
2
+ import type { TransformDatasetRunOptions, TransformSandboxState, TransformInputPreviewContext } from "./transform-dataset.types.js";
3
+ export type { TransformDatasetAgentParams, TransformDatasetContext, TransformDatasetResult, TransformDatasetRunOptions, TransformPromptContext, TransformSandboxState, } from "./transform-dataset.types.js";
4
+ export declare function createTransformDatasetContext<Env extends {
43
5
  orgId: string;
44
6
  }>(params: {
45
- sourceDatasetIds: string[];
7
+ inputDatasetIds: string[];
46
8
  outputSchema: any;
47
9
  instructions?: string;
48
10
  datasetId?: string;
49
11
  model?: string;
50
12
  sandboxId?: string;
51
13
  reactor?: ContextReactor<any, any>;
14
+ sandboxState?: TransformSandboxState;
15
+ inputPreviews?: Array<{
16
+ datasetId: string;
17
+ preview: TransformInputPreviewContext;
18
+ }>;
19
+ contextResources?: any[];
52
20
  }): {
53
21
  datasetId: string;
54
- transform(env: Env, prompt?: string): Promise<{
22
+ transform(runtime: {
23
+ env: Env;
24
+ }, options?: TransformDatasetRunOptions): Promise<{
55
25
  datasetId: string;
56
26
  }>;
57
- story: any;
27
+ context: any;
58
28
  };
59
- //# sourceMappingURL=transform-dataset.agent.d.ts.map
29
+ export declare function registerTransformDatasetContext<Env extends {
30
+ orgId: string;
31
+ }>(opts?: {
32
+ model?: string;
33
+ reactor?: ContextReactor<any, any>;
34
+ }): void;