@ekairos/dataset 1.22.39-beta.development.0 → 1.22.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/README.md +347 -0
  2. package/dist/agents.d.ts +8 -0
  3. package/dist/agents.js +8 -0
  4. package/dist/builder/agentMaterializers.d.ts +9 -0
  5. package/dist/builder/agentMaterializers.js +10 -0
  6. package/dist/builder/context.d.ts +15 -0
  7. package/dist/builder/context.js +251 -0
  8. package/dist/builder/instructions.d.ts +5 -0
  9. package/dist/builder/instructions.js +40 -0
  10. package/dist/builder/materialize.d.ts +83 -0
  11. package/dist/builder/materialize.js +548 -0
  12. package/dist/builder/materializeQuery.d.ts +12 -0
  13. package/dist/builder/materializeQuery.js +31 -0
  14. package/dist/builder/persistence.d.ts +22 -0
  15. package/dist/builder/persistence.js +192 -0
  16. package/dist/builder/rows.d.ts +7 -0
  17. package/dist/builder/rows.js +56 -0
  18. package/dist/builder/schemaInference.d.ts +3 -0
  19. package/dist/builder/schemaInference.js +61 -0
  20. package/dist/builder/types.d.ts +144 -0
  21. package/dist/builder/types.js +1 -0
  22. package/dist/clearDataset.tool.d.ts +2 -3
  23. package/dist/clearDataset.tool.js +13 -17
  24. package/dist/completeDataset.steps.d.ts +117 -0
  25. package/dist/completeDataset.steps.js +537 -0
  26. package/dist/completeDataset.tool.d.ts +132 -7
  27. package/dist/completeDataset.tool.js +46 -192
  28. package/dist/contextResources.d.ts +31 -0
  29. package/dist/contextResources.js +151 -0
  30. package/dist/contextWorkspace.d.ts +79 -0
  31. package/dist/contextWorkspace.js +234 -0
  32. package/dist/dataset/steps.d.ts +39 -15
  33. package/dist/dataset/steps.js +96 -39
  34. package/dist/dataset.d.ts +3 -67
  35. package/dist/dataset.js +129 -521
  36. package/dist/datasetFiles.d.ts +5 -1
  37. package/dist/datasetFiles.js +29 -27
  38. package/dist/defineNotation.tool.d.ts +49 -0
  39. package/dist/defineNotation.tool.js +154 -0
  40. package/dist/domain.d.ts +1 -2
  41. package/dist/domain.js +1 -6
  42. package/dist/executeCommand.tool.d.ts +2 -30
  43. package/dist/executeCommand.tool.js +165 -39
  44. package/dist/file/file-dataset.agent.d.ts +19 -56
  45. package/dist/file/file-dataset.agent.js +181 -134
  46. package/dist/file/file-dataset.steps.d.ts +27 -0
  47. package/dist/file/file-dataset.steps.js +47 -0
  48. package/dist/file/file-dataset.types.d.ts +64 -0
  49. package/dist/file/file-dataset.types.js +1 -0
  50. package/dist/file/filepreview.d.ts +5 -35
  51. package/dist/file/filepreview.js +60 -107
  52. package/dist/file/filepreview.types.d.ts +31 -0
  53. package/dist/file/filepreview.types.js +1 -0
  54. package/dist/file/generateSchema.tool.d.ts +2 -3
  55. package/dist/file/generateSchema.tool.js +11 -15
  56. package/dist/file/index.d.ts +1 -2
  57. package/dist/file/index.js +1 -18
  58. package/dist/file/prompts.d.ts +2 -3
  59. package/dist/file/prompts.js +152 -32
  60. package/dist/file/scripts.generated.d.ts +1 -0
  61. package/dist/file/scripts.generated.js +11 -0
  62. package/dist/file/steps.d.ts +1 -2
  63. package/dist/file/steps.js +9 -7
  64. package/dist/id.d.ts +1 -0
  65. package/dist/id.js +10 -0
  66. package/dist/index.d.ts +9 -7
  67. package/dist/index.js +9 -23
  68. package/dist/materializeDataset.tool.d.ts +51 -31
  69. package/dist/materializeDataset.tool.js +81 -65
  70. package/dist/notation.d.ts +205 -0
  71. package/dist/notation.js +424 -0
  72. package/dist/query/index.d.ts +1 -2
  73. package/dist/query/index.js +1 -18
  74. package/dist/query/queryDomain.d.ts +3 -4
  75. package/dist/query/queryDomain.js +3 -40
  76. package/dist/query/queryDomain.step.d.ts +1 -1
  77. package/dist/query/queryDomain.step.js +24 -13
  78. package/dist/sandbox/steps.d.ts +23 -15
  79. package/dist/sandbox/steps.js +73 -76
  80. package/dist/sandbox.steps.d.ts +1 -2
  81. package/dist/sandbox.steps.js +1 -18
  82. package/dist/schema.d.ts +15 -13
  83. package/dist/schema.js +27 -37
  84. package/dist/service.d.ts +12 -5
  85. package/dist/service.js +88 -15
  86. package/dist/skill.d.ts +0 -1
  87. package/dist/skill.js +12 -17
  88. package/dist/transform/filepreview.d.ts +2 -3
  89. package/dist/transform/filepreview.js +9 -26
  90. package/dist/transform/index.d.ts +2 -3
  91. package/dist/transform/index.js +2 -8
  92. package/dist/transform/prompts.d.ts +1 -34
  93. package/dist/transform/prompts.js +66 -46
  94. package/dist/transform/transform-dataset.agent.d.ts +20 -45
  95. package/dist/transform/transform-dataset.agent.js +151 -91
  96. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  97. package/dist/transform/transform-dataset.steps.js +61 -0
  98. package/dist/transform/transform-dataset.types.d.ts +95 -0
  99. package/dist/transform/transform-dataset.types.js +1 -0
  100. package/dist/transform/transformDataset.d.ts +3 -3
  101. package/dist/transform/transformDataset.js +15 -18
  102. package/dist/writeDatasetRows.tool.d.ts +188 -0
  103. package/dist/writeDatasetRows.tool.js +258 -0
  104. package/package.json +33 -8
  105. package/dist/clearDataset.tool.d.ts.map +0 -1
  106. package/dist/clearDataset.tool.js.map +0 -1
  107. package/dist/completeDataset.tool.d.ts.map +0 -1
  108. package/dist/completeDataset.tool.js.map +0 -1
  109. package/dist/dataset/steps.d.ts.map +0 -1
  110. package/dist/dataset/steps.js.map +0 -1
  111. package/dist/dataset.d.ts.map +0 -1
  112. package/dist/dataset.js.map +0 -1
  113. package/dist/datasetFiles.d.ts.map +0 -1
  114. package/dist/datasetFiles.js.map +0 -1
  115. package/dist/domain.d.ts.map +0 -1
  116. package/dist/domain.js.map +0 -1
  117. package/dist/eventsReactRuntime.d.ts +0 -22
  118. package/dist/eventsReactRuntime.d.ts.map +0 -1
  119. package/dist/eventsReactRuntime.js +0 -29
  120. package/dist/eventsReactRuntime.js.map +0 -1
  121. package/dist/executeCommand.tool.d.ts.map +0 -1
  122. package/dist/executeCommand.tool.js.map +0 -1
  123. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  124. package/dist/file/file-dataset.agent.js.map +0 -1
  125. package/dist/file/filepreview.d.ts.map +0 -1
  126. package/dist/file/filepreview.js.map +0 -1
  127. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  128. package/dist/file/generateSchema.tool.js.map +0 -1
  129. package/dist/file/index.d.ts.map +0 -1
  130. package/dist/file/index.js.map +0 -1
  131. package/dist/file/prompts.d.ts.map +0 -1
  132. package/dist/file/prompts.js.map +0 -1
  133. package/dist/file/steps.d.ts.map +0 -1
  134. package/dist/file/steps.js.map +0 -1
  135. package/dist/index.d.ts.map +0 -1
  136. package/dist/index.js.map +0 -1
  137. package/dist/materializeDataset.tool.d.ts.map +0 -1
  138. package/dist/materializeDataset.tool.js.map +0 -1
  139. package/dist/query/index.d.ts.map +0 -1
  140. package/dist/query/index.js.map +0 -1
  141. package/dist/query/queryDomain.d.ts.map +0 -1
  142. package/dist/query/queryDomain.js.map +0 -1
  143. package/dist/query/queryDomain.step.d.ts.map +0 -1
  144. package/dist/query/queryDomain.step.js.map +0 -1
  145. package/dist/sandbox/steps.d.ts.map +0 -1
  146. package/dist/sandbox/steps.js.map +0 -1
  147. package/dist/sandbox.steps.d.ts.map +0 -1
  148. package/dist/sandbox.steps.js.map +0 -1
  149. package/dist/schema.d.ts.map +0 -1
  150. package/dist/schema.js.map +0 -1
  151. package/dist/service.d.ts.map +0 -1
  152. package/dist/service.js.map +0 -1
  153. package/dist/skill.d.ts.map +0 -1
  154. package/dist/skill.js.map +0 -1
  155. package/dist/transform/filepreview.d.ts.map +0 -1
  156. package/dist/transform/filepreview.js.map +0 -1
  157. package/dist/transform/index.d.ts.map +0 -1
  158. package/dist/transform/index.js.map +0 -1
  159. package/dist/transform/prompts.d.ts.map +0 -1
  160. package/dist/transform/prompts.js.map +0 -1
  161. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  162. package/dist/transform/transform-dataset.agent.js.map +0 -1
  163. package/dist/transform/transformDataset.d.ts.map +0 -1
  164. package/dist/transform/transformDataset.js.map +0 -1
@@ -1,5 +1,9 @@
1
1
  export declare const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
2
2
  export declare function getDatasetWorkdirBase(): string;
3
3
  export declare function getDatasetWorkstation(datasetId: string): string;
4
+ export declare function getDatasetResourcesDir(datasetId: string): string;
5
+ export declare function getDatasetScriptsDir(datasetId: string): string;
6
+ export declare function getDatasetArtifactsDir(datasetId: string): string;
7
+ export declare function getDatasetLogsDir(datasetId: string): string;
8
+ export declare function getDatasetStandardDirs(datasetId: string): string[];
4
9
  export declare function getDatasetOutputPath(datasetId: string): string;
5
- //# sourceMappingURL=datasetFiles.d.ts.map
@@ -1,33 +1,35 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.DATASET_OUTPUT_FILE_NAME = void 0;
4
- exports.getDatasetWorkdirBase = getDatasetWorkdirBase;
5
- exports.getDatasetWorkstation = getDatasetWorkstation;
6
- exports.getDatasetOutputPath = getDatasetOutputPath;
7
- exports.DATASET_OUTPUT_FILE_NAME = "output.jsonl";
8
- const DEFAULT_VERCEL_WORKDIR_BASE = "/vercel/sandbox/datasets";
9
- const DEFAULT_DAYTONA_WORKDIR_BASE = "/home/daytona/.ekairos/datasets";
10
- const DEFAULT_SPRITES_WORKDIR_BASE = "/workspace/.ekairos/datasets";
1
+ export const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
2
+ const DATASET_WORKDIR_BASE = "/tmp/ekairos/dataset";
11
3
  function trimTrailingSlash(value) {
12
4
  return value.endsWith("/") ? value.slice(0, -1) : value;
13
5
  }
14
- function getDatasetWorkdirBase() {
15
- const explicit = String(process.env.DATASET_SANDBOX_WORKDIR_BASE ?? "").trim();
16
- if (explicit)
17
- return trimTrailingSlash(explicit);
18
- const provider = String(process.env.SANDBOX_PROVIDER ?? "").trim().toLowerCase();
19
- if (provider === "daytona")
20
- return DEFAULT_DAYTONA_WORKDIR_BASE;
21
- if (provider === "vercel")
22
- return DEFAULT_VERCEL_WORKDIR_BASE;
23
- if (provider === "sprites")
24
- return DEFAULT_SPRITES_WORKDIR_BASE;
25
- return DEFAULT_VERCEL_WORKDIR_BASE;
26
- }
27
- function getDatasetWorkstation(datasetId) {
6
+ export function getDatasetWorkdirBase() {
7
+ return trimTrailingSlash(DATASET_WORKDIR_BASE);
8
+ }
9
+ export function getDatasetWorkstation(datasetId) {
28
10
  return `${getDatasetWorkdirBase()}/${datasetId}`;
29
11
  }
30
- function getDatasetOutputPath(datasetId) {
31
- return `${getDatasetWorkstation(datasetId)}/${exports.DATASET_OUTPUT_FILE_NAME}`;
12
+ export function getDatasetResourcesDir(datasetId) {
13
+ return `${getDatasetWorkstation(datasetId)}/resources`;
14
+ }
15
+ export function getDatasetScriptsDir(datasetId) {
16
+ return `${getDatasetWorkstation(datasetId)}/scripts`;
17
+ }
18
+ export function getDatasetArtifactsDir(datasetId) {
19
+ return `${getDatasetWorkstation(datasetId)}/artifacts`;
20
+ }
21
+ export function getDatasetLogsDir(datasetId) {
22
+ return `${getDatasetWorkstation(datasetId)}/logs`;
23
+ }
24
+ export function getDatasetStandardDirs(datasetId) {
25
+ return [
26
+ getDatasetWorkstation(datasetId),
27
+ getDatasetResourcesDir(datasetId),
28
+ getDatasetScriptsDir(datasetId),
29
+ getDatasetArtifactsDir(datasetId),
30
+ getDatasetLogsDir(datasetId),
31
+ ];
32
+ }
33
+ export function getDatasetOutputPath(datasetId) {
34
+ return `${getDatasetWorkstation(datasetId)}/${DATASET_OUTPUT_FILE_NAME}`;
32
35
  }
33
- //# sourceMappingURL=datasetFiles.js.map
@@ -0,0 +1,49 @@
1
+ interface DefineNotationToolParams {
2
+ datasetId: string;
3
+ runtime: any;
4
+ }
5
+ /**
6
+ * defineNotation — author or REFINE the formal DEFINITION of the dataset.
7
+ *
8
+ * A dataset has two co-equal faces: its formal definition (the notation —
9
+ * the proposition that defines the set, in LaTeX) and its materialization
10
+ * (the rows + the code that produces them). They sit at the SAME level: the
11
+ * definition is not a side note about the data, it IS the dataset stated
12
+ * intensionally. The same notation is the PLAN (you state it first and the
13
+ * materialization realizes it) and, finalized, the RESULT (it describes what
14
+ * you produced).
15
+ *
16
+ * Call it FIRST with the initial definition derived from the resources, and
17
+ * AGAIN whenever the analysis discovers new sets, variables, constraints or
18
+ * corrections — every call keeps the prior version in history. Mark the last
19
+ * call with final=true so the definition describes the produced dataset.
20
+ * Predicates may be formal/semantic (trusted); the few that are arithmetic
21
+ * MAY carry optional advisory evidence.
22
+ */
23
+ export declare function createDefineNotationTool({ datasetId, runtime }: DefineNotationToolParams): import("ai").Tool<{
24
+ latex: string;
25
+ symbols: {
26
+ name: string;
27
+ kind: "function" | "set" | "variable" | "constant" | "predicate";
28
+ description: string;
29
+ latex?: string | undefined;
30
+ }[];
31
+ predicates: {
32
+ id: string;
33
+ description: string;
34
+ latex: string;
35
+ checkJson?: string | undefined;
36
+ }[];
37
+ reason: string;
38
+ final?: boolean | undefined;
39
+ }, {
40
+ success: boolean;
41
+ error: string;
42
+ } | {
43
+ warning?: string | undefined;
44
+ success: boolean;
45
+ version: number;
46
+ status: import("./notation.js").DatasetNotationStatus;
47
+ error?: undefined;
48
+ }>;
49
+ export {};
@@ -0,0 +1,154 @@
1
+ import { tool } from "ai";
2
+ import { z } from "zod";
3
+ import { DatasetService } from "./service.js";
4
+ import { datasetDomain } from "./schema.js";
5
+ import { reviseDatasetNotation, } from "./notation.js";
6
+ const symbolSchema = z.object({
7
+ name: z.string().describe("Plain identifier, e.g. 'D', 'Orders', 'w'"),
8
+ latex: z
9
+ .string()
10
+ .optional()
11
+ .describe("LaTeX for the symbol, e.g. '\\\\mathcal{D}' (defaults to the name)"),
12
+ kind: z.enum(["set", "variable", "function", "constant", "predicate"]),
13
+ description: z.string().describe("What this symbol denotes in the data"),
14
+ });
15
+ const predicateSchema = z.object({
16
+ id: z.string().describe("Stable id, e.g. 'p1', 'cardinality'"),
17
+ description: z.string().describe("The claim in plain language"),
18
+ latex: z
19
+ .string()
20
+ .describe("The claim in LaTeX, e.g. '\\\\forall r \\\\in D: r.amount > 0'"),
21
+ checkJson: z
22
+ .string()
23
+ .optional()
24
+ .describe([
25
+ "OPTIONAL arithmetic form of the claim as a JSON string, used only for",
26
+ "advisory evidence over the produced rows (not a verdict). Shapes:",
27
+ '{"kind":"row_count","op":"=","value":124}',
28
+ '{"kind":"field_type","field":"amount","type":"number","allowNull":true}',
29
+ '{"kind":"field_range","field":"amount","min":0}',
30
+ '{"kind":"field_in","field":"status","values":["paid","void"]}',
31
+ '{"kind":"field_nonnull","field":"orderId"}',
32
+ '{"kind":"field_matches","field":"sku","pattern":"^[A-Z0-9-]+$"}',
33
+ '{"kind":"unique","fields":["orderId"]}',
34
+ '{"kind":"aggregate","fn":"sum","field":"amount","op":">=","value":0}',
35
+ 'Propositional composition: {"kind":"and"|"or","checks":[...]},',
36
+ '{"kind":"not","check":...}, {"kind":"implies","if":...,"then":...}.',
37
+ "Fields support dot-paths into nested records (company.taxId).",
38
+ "Omit for formal/semantic claims (the normal case) — they are trusted.",
39
+ ].join(" ")),
40
+ });
41
+ async function getDatasetService(runtime) {
42
+ const scoped = await runtime.use(datasetDomain);
43
+ return new DatasetService(scoped.db);
44
+ }
45
+ /**
46
+ * defineNotation — author or REFINE the formal DEFINITION of the dataset.
47
+ *
48
+ * A dataset has two co-equal faces: its formal definition (the notation —
49
+ * the proposition that defines the set, in LaTeX) and its materialization
50
+ * (the rows + the code that produces them). They sit at the SAME level: the
51
+ * definition is not a side note about the data, it IS the dataset stated
52
+ * intensionally. The same notation is the PLAN (you state it first and the
53
+ * materialization realizes it) and, finalized, the RESULT (it describes what
54
+ * you produced).
55
+ *
56
+ * Call it FIRST with the initial definition derived from the resources, and
57
+ * AGAIN whenever the analysis discovers new sets, variables, constraints or
58
+ * corrections — every call keeps the prior version in history. Mark the last
59
+ * call with final=true so the definition describes the produced dataset.
60
+ * Predicates may be formal/semantic (trusted); the few that are arithmetic
61
+ * MAY carry optional advisory evidence.
62
+ */
63
+ export function createDefineNotationTool({ datasetId, runtime }) {
64
+ return tool({
65
+ description: [
66
+ "Author or refine the formal DEFINITION of the dataset: the dataset as a",
67
+ "set in LaTeX (set-builder, relational algebra, quantified or even",
68
+ "semantic predicates) plus the symbols it binds. This definition and the",
69
+ "materialization (rows + code) are TWO CO-EQUAL FACES of the dataset —",
70
+ "the definition is the dataset stated intensionally, not a comment on it.",
71
+ "It is your PLAN (state it before writing any code; the materialization",
72
+ "realizes it) and, once final, the RESULT (it describes what you",
73
+ "produced). The definition is a logical proposition, possibly derived —",
74
+ "it need not be mechanically provable; we trust the formality. State it",
75
+ "first, refine it on every discovery, and set final=true on the last",
76
+ "call. For the few predicates that are arithmetic you MAY attach a",
77
+ "checkJson for optional advisory evidence (non-blocking, never a verdict).",
78
+ ].join(" "),
79
+ inputSchema: z.object({
80
+ latex: z
81
+ .string()
82
+ .describe("Main definition of the dataset as a set, in LaTeX. Example: 'D = \\\\{(w,r,t) \\\\mid t = \\\\sum_{o \\\\in Orders} o.amount,\\\\; o.status = paid\\\\}'"),
83
+ symbols: z.array(symbolSchema).describe("Symbols bound by the definition"),
84
+ predicates: z
85
+ .array(predicateSchema)
86
+ .describe("Claims the set satisfies; attach a checkJson only when arithmetic"),
87
+ reason: z
88
+ .string()
89
+ .describe("What this revision states or what discovery triggered it (or 'initial definition')"),
90
+ final: z
91
+ .boolean()
92
+ .optional()
93
+ .describe("true when this definition describes the dataset you are about to complete (the RESULT)"),
94
+ }),
95
+ execute: async ({ latex, symbols, predicates, reason, final }) => {
96
+ try {
97
+ const service = await getDatasetService(runtime);
98
+ const existing = await service.getDatasetById(datasetId);
99
+ const previous = (existing.ok ? existing.data?.notation : null);
100
+ const parsedPredicates = [];
101
+ const checkErrors = [];
102
+ for (const predicate of predicates) {
103
+ let check;
104
+ if (predicate.checkJson) {
105
+ try {
106
+ check = JSON.parse(predicate.checkJson);
107
+ if (!check || typeof check !== "object" || !("kind" in check)) {
108
+ throw new Error("check must be an object with a 'kind'");
109
+ }
110
+ }
111
+ catch (error) {
112
+ checkErrors.push(`predicate ${predicate.id}: invalid checkJson (${String(error).slice(0, 80)})`);
113
+ check = undefined;
114
+ }
115
+ }
116
+ parsedPredicates.push({
117
+ id: predicate.id,
118
+ description: predicate.description,
119
+ latex: predicate.latex,
120
+ ...(check ? { check } : {}),
121
+ });
122
+ }
123
+ const notation = reviseDatasetNotation(previous, {
124
+ latex,
125
+ symbols: symbols,
126
+ predicates: parsedPredicates,
127
+ reason,
128
+ final,
129
+ });
130
+ const update = await service.updateDatasetNotation({ datasetId, notation });
131
+ if (!update.ok) {
132
+ return { success: false, error: update.error };
133
+ }
134
+ console.log(`[Dataset ${datasetId}] definition v${notation.version} (${notation.status}): ${reason}`);
135
+ return {
136
+ success: true,
137
+ version: notation.version,
138
+ status: notation.status,
139
+ ...(checkErrors.length
140
+ ? {
141
+ warning: `some checks were dropped: ${checkErrors.join("; ")}`,
142
+ }
143
+ : {}),
144
+ };
145
+ }
146
+ catch (error) {
147
+ return {
148
+ success: false,
149
+ error: error instanceof Error ? error.message : String(error),
150
+ };
151
+ }
152
+ },
153
+ });
154
+ }
package/dist/domain.d.ts CHANGED
@@ -1,2 +1 @@
1
- export { datasetDomain } from "./schema";
2
- //# sourceMappingURL=domain.d.ts.map
1
+ export { datasetDomain } from "./schema.js";
package/dist/domain.js CHANGED
@@ -1,6 +1 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.datasetDomain = void 0;
4
- var schema_1 = require("./schema");
5
- Object.defineProperty(exports, "datasetDomain", { enumerable: true, get: function () { return schema_1.datasetDomain; } });
6
- //# sourceMappingURL=domain.js.map
1
+ export { datasetDomain } from "./schema.js";
@@ -1,35 +1,7 @@
1
1
  interface ExecuteCommandToolParams {
2
2
  datasetId: string;
3
3
  sandboxId: string;
4
- env?: any;
4
+ runtime: any;
5
5
  }
6
- export declare function createExecuteCommandTool({ datasetId, sandboxId, env }: ExecuteCommandToolParams): import("ai").Tool<{
7
- pythonCode: string;
8
- scriptName: string;
9
- }, {
10
- success: boolean;
11
- exitCode: number;
12
- stdout: string;
13
- stderr: string;
14
- scriptPath: string;
15
- error: string;
16
- stdoutTruncated: boolean;
17
- stderrTruncated: boolean;
18
- stdoutOriginalLength: number;
19
- stderrOriginalLength: number;
20
- message?: undefined;
21
- } | {
22
- success: boolean;
23
- exitCode: number;
24
- stdout: string;
25
- stderr: string;
26
- scriptPath: string;
27
- message: string;
28
- stdoutTruncated: boolean;
29
- stderrTruncated: boolean;
30
- stdoutOriginalLength: number;
31
- stderrOriginalLength: number;
32
- error?: undefined;
33
- }>;
6
+ export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): any;
34
7
  export {};
35
- //# sourceMappingURL=executeCommand.tool.d.ts.map
@@ -1,49 +1,164 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.createExecuteCommandTool = createExecuteCommandTool;
4
- const ai_1 = require("ai");
5
- const zod_1 = require("zod");
6
- const steps_1 = require("./sandbox/steps");
7
- const datasetFiles_1 = require("./datasetFiles");
8
- // To keep responses predictable for big data scenarios, we cap stdout/stderr.
9
- // The tool's return payload exposes stdout (capped) plus the on-disk script path.
1
+ import { defineAction } from "@ekairos/events";
2
+ import { z } from "zod";
3
+ import { materializeContextResourcesStep } from "./contextResources.js";
4
+ import { getDatasetScriptsDir, getDatasetStandardDirs } from "./datasetFiles.js";
5
+ import { getContextExecutionWorkspaceDirs } from "./contextWorkspace.js";
6
+ import { runDatasetSandboxCommandStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
10
7
  const MAX_STDOUT_CHARS = 20000;
11
8
  const MAX_STDERR_CHARS = 5000;
12
- function createExecuteCommandTool({ datasetId, sandboxId, env }) {
13
- return (0, ai_1.tool)({
14
- description: "Execute Python scripts in the sandbox. Always saves script to a file before executing. The tool's output is EXACTLY the script's stdout and includes the script file path for traceability. CRITICAL: Print concise, human-readable summaries only; do NOT print raw large data. For big results, write artifacts to files in the workstation and print their file paths. Always include progress/result prints (e.g., 'Processing file X...', 'Found Y records', 'Generated output.csv').",
15
- inputSchema: zod_1.z.object({
16
- pythonCode: zod_1.z.string().describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
17
- scriptName: zod_1.z.string().describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A UUID will be appended automatically."),
18
- }),
19
- execute: async ({ pythonCode, scriptName }) => {
20
- const uuid = `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
21
- const workstation = (0, datasetFiles_1.getDatasetWorkstation)(datasetId);
22
- const scriptFile = `${workstation}/${scriptName}-${uuid}.py`;
9
+ function normalizeScriptName(scriptName) {
10
+ const normalized = String(scriptName ?? "")
11
+ .trim()
12
+ .replace(/[^a-zA-Z0-9_.-]/g, "_")
13
+ .replace(/_+/g, "_")
14
+ .slice(0, 80);
15
+ return normalized || "script";
16
+ }
17
+ function stableScriptHash(value) {
18
+ let hash = 2166136261;
19
+ for (let index = 0; index < value.length; index++) {
20
+ hash ^= value.charCodeAt(index);
21
+ hash = Math.imul(hash, 16777619);
22
+ }
23
+ return (hash >>> 0).toString(36);
24
+ }
25
+ const executeCommandInputSchema = z.object({
26
+ commandDescription: z
27
+ .string()
28
+ .min(1)
29
+ .describe("Required pre-execution description of the command. Describe the inputs/resources it will use, the operation it will perform, the expected output, and why a command is the right tool instead of direct completion. Invalid descriptions include rereading resources whose descriptor/preview already contains the needed evidence, merely formatting JSON, constructing the final object, writing output.jsonl, or making completion easier."),
30
+ pythonCode: z
31
+ .string()
32
+ .describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. If context resources are materialized, read os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] to discover files and metadata. Do not install packages, download dependencies, use pip/npm/apt/curl/wget, or access the network. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
33
+ scriptName: z
34
+ .string()
35
+ .describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A deterministic suffix will be appended automatically."),
36
+ resourceKeys: z
37
+ .array(z.string())
38
+ .optional()
39
+ .describe("Optional context resource keys to materialize before running the script. Omit to materialize every context resource."),
40
+ });
41
+ const materializedResourceSchema = z.object({
42
+ key: z.string(),
43
+ type: z.string(),
44
+ status: z.string(),
45
+ dir: z.string(),
46
+ files: z.array(z.object({
47
+ path: z.string(),
48
+ role: z.string(),
49
+ mediaType: z.string().optional(),
50
+ })),
51
+ reason: z.string().optional(),
52
+ });
53
+ const executeCommandOutputSchema = z
54
+ .object({
55
+ success: z.boolean(),
56
+ fatal: z.boolean().optional(),
57
+ status: z.string().optional(),
58
+ exitCode: z.number().optional(),
59
+ stdout: z.string(),
60
+ stderr: z.string(),
61
+ scriptPath: z.string(),
62
+ message: z.string().optional(),
63
+ error: z.string().optional(),
64
+ resourcesDir: z.string().optional(),
65
+ resourcesManifestPath: z.string().optional(),
66
+ materializedResources: z.array(materializedResourceSchema).optional(),
67
+ stdoutTruncated: z.boolean(),
68
+ stderrTruncated: z.boolean(),
69
+ stdoutOriginalLength: z.number(),
70
+ stderrOriginalLength: z.number(),
71
+ })
72
+ .passthrough();
73
+ export function createExecuteCommandTool({ datasetId, sandboxId, runtime }) {
74
+ return defineAction({
75
+ description: "Execute Python scripts in the sandbox only when command execution is necessary to inspect, parse, aggregate, join, or compute over context resources that are not sufficiently represented in the visible context, resource descriptors, or previews. This is a high-cost computation tool, not a completion tool. Do not use it merely to reread resources whose descriptor/preview already contains the needed evidence, format JSON, build the final object, write output.jsonl, or make completion easier when completeObject or replaceRows can return the result directly. Before the script runs, requested context resources are materialized into /tmp/ekairos/contexts/{contextId}/resources and a manifest.json is written there. The Python process receives EKAIROS_CONTEXT_RESOURCES_DIR and EKAIROS_CONTEXT_RESOURCES_MANIFEST environment variables when resources are available; manifest entries expose files as resources[].files[].path. Do not install packages, download dependencies, use pip/npm/apt/curl/wget, or access the network; use only the available runtime and standard library unless a dependency is already present. Print concise progress and results only; do not dump large data.",
76
+ input: executeCommandInputSchema,
77
+ output: executeCommandOutputSchema,
78
+ execute: async ({ input, context, contextId, executionId, }) => {
79
+ const { commandDescription, pythonCode, resourceKeys, scriptName } = input;
80
+ const normalizedScriptName = normalizeScriptName(scriptName);
81
+ const scriptHash = stableScriptHash(`${normalizedScriptName}\0${pythonCode}`);
82
+ const scriptsDir = contextId && executionId
83
+ ? getContextExecutionWorkspaceDirs({ contextId, executionId }).scriptsDir
84
+ : getDatasetScriptsDir(datasetId);
85
+ const scriptFile = `${scriptsDir}/${normalizedScriptName}-${scriptHash}.py`;
86
+ let resourcesManifest = null;
23
87
  console.log(`[Dataset ${datasetId}] ========================================`);
24
- console.log(`[Dataset ${datasetId}] Tool: executeCommand`);
25
- console.log(`[Dataset ${datasetId}] Script: ${scriptName}`);
88
+ console.log(`[Dataset ${datasetId}] Action: executeCommand`);
89
+ console.log(`[Dataset ${datasetId}] Description: ${commandDescription}`);
90
+ console.log(`[Dataset ${datasetId}] Script: ${normalizedScriptName}`);
26
91
  console.log(`[Dataset ${datasetId}] File: ${scriptFile}`);
27
92
  console.log(`[Dataset ${datasetId}] Code length: ${pythonCode.length} chars`);
28
93
  console.log(`[Dataset ${datasetId}] ========================================`);
29
94
  try {
30
- await (0, steps_1.writeDatasetSandboxFilesStep)({
31
- env,
95
+ if (contextId && Array.isArray(context?.resources) && context.resources.length > 0) {
96
+ resourcesManifest = await materializeContextResourcesStep({
97
+ runtime,
98
+ sandboxId,
99
+ contextId,
100
+ resources: context.resources,
101
+ resourceKeys,
102
+ });
103
+ console.log(`[Dataset ${datasetId}] Resources manifest: ${resourcesManifest.manifestPath}`);
104
+ }
105
+ await runDatasetSandboxCommandStep({
106
+ runtime,
107
+ sandboxId,
108
+ cmd: "mkdir",
109
+ args: ["-p", ...getDatasetStandardDirs(datasetId), scriptsDir],
110
+ });
111
+ await writeDatasetSandboxTextFilesStep({
112
+ runtime,
113
+ sandboxId,
114
+ files: [{ path: scriptFile, content: pythonCode }],
115
+ });
116
+ const written = await runDatasetSandboxCommandStep({
117
+ runtime,
32
118
  sandboxId,
33
- files: [
34
- {
35
- path: scriptFile,
36
- contentBase64: Buffer.from(pythonCode, "utf-8").toString("base64"),
37
- },
38
- ],
119
+ cmd: "test",
120
+ args: ["-f", scriptFile],
39
121
  });
122
+ if (written.exitCode !== 0) {
123
+ const error = `Script write verification failed: ${scriptFile}`;
124
+ console.error(`[Dataset ${datasetId}] ${error}`);
125
+ console.error(`[Dataset ${datasetId}] ========================================`);
126
+ return {
127
+ success: false,
128
+ fatal: true,
129
+ status: "script_write_failed",
130
+ error,
131
+ stdout: written.stdout || "",
132
+ stderr: written.stderr || "",
133
+ exitCode: written.exitCode,
134
+ scriptPath: scriptFile,
135
+ resourcesDir: resourcesManifest?.resourcesDir,
136
+ resourcesManifestPath: resourcesManifest?.manifestPath,
137
+ materializedResources: resourcesManifest?.resources,
138
+ stdoutTruncated: false,
139
+ stderrTruncated: false,
140
+ stdoutOriginalLength: 0,
141
+ stderrOriginalLength: 0,
142
+ };
143
+ }
144
+ const pythonArgs = resourcesManifest
145
+ ? [
146
+ "-c",
147
+ [
148
+ "import os, runpy",
149
+ `os.environ["EKAIROS_CONTEXT_RESOURCES_DIR"] = ${JSON.stringify(resourcesManifest.resourcesDir)}`,
150
+ `os.environ["EKAIROS_CONTEXT_RESOURCES_MANIFEST"] = ${JSON.stringify(resourcesManifest.manifestPath)}`,
151
+ `runpy.run_path(${JSON.stringify(scriptFile)}, run_name="__main__")`,
152
+ ].join("; "),
153
+ ]
154
+ : [scriptFile];
40
155
  console.log(`[Dataset ${datasetId}] Script written to: ${scriptFile}`);
41
- console.log(`[Dataset ${datasetId}] Executing: python ${scriptFile}`);
42
- const result = await (0, steps_1.runDatasetSandboxCommandStep)({
43
- env,
156
+ console.log(`[Dataset ${datasetId}] Executing: python ${resourcesManifest ? "<with context resources env>" : scriptFile}`);
157
+ const result = await runDatasetSandboxCommandStep({
158
+ runtime,
44
159
  sandboxId,
45
160
  cmd: "python",
46
- args: [scriptFile],
161
+ args: pythonArgs,
47
162
  });
48
163
  const stdout = result.stdout || "";
49
164
  const stderr = result.stderr || "";
@@ -53,7 +168,7 @@ function createExecuteCommandTool({ datasetId, sandboxId, env }) {
53
168
  const stdoutCapped = isStdoutTruncated ? stdout.slice(0, MAX_STDOUT_CHARS) : stdout;
54
169
  const stderrCapped = isStderrTruncated ? stderr.slice(0, MAX_STDERR_CHARS) : stderr;
55
170
  if (exitCode !== 0) {
56
- console.error(`[Dataset ${datasetId}] Command failed with exit code ${exitCode}`);
171
+ console.error(`[Dataset ${datasetId}] Command failed with exit code ${exitCode}`);
57
172
  console.error(`[Dataset ${datasetId}] Stderr:`, stderrCapped.substring(0, 500));
58
173
  console.error(`[Dataset ${datasetId}] ========================================`);
59
174
  return {
@@ -63,6 +178,9 @@ function createExecuteCommandTool({ datasetId, sandboxId, env }) {
63
178
  stderr: stderrCapped,
64
179
  scriptPath: scriptFile,
65
180
  error: `Command failed with exit code ${exitCode}`,
181
+ resourcesDir: resourcesManifest?.resourcesDir,
182
+ resourcesManifestPath: resourcesManifest?.manifestPath,
183
+ materializedResources: resourcesManifest?.resources,
66
184
  stdoutTruncated: isStdoutTruncated,
67
185
  stderrTruncated: isStderrTruncated,
68
186
  stdoutOriginalLength: stdout.length,
@@ -70,7 +188,7 @@ function createExecuteCommandTool({ datasetId, sandboxId, env }) {
70
188
  };
71
189
  }
72
190
  if (stderr && (stderr.includes("Traceback") || stderr.toLowerCase().includes("error"))) {
73
- console.error(`[Dataset ${datasetId}] Python error detected`);
191
+ console.error(`[Dataset ${datasetId}] Python error detected`);
74
192
  console.error(`[Dataset ${datasetId}] Stderr:`, stderrCapped.substring(0, 500));
75
193
  console.error(`[Dataset ${datasetId}] ========================================`);
76
194
  return {
@@ -80,17 +198,20 @@ function createExecuteCommandTool({ datasetId, sandboxId, env }) {
80
198
  stderr: stderrCapped,
81
199
  scriptPath: scriptFile,
82
200
  error: "Python error detected in stderr",
201
+ resourcesDir: resourcesManifest?.resourcesDir,
202
+ resourcesManifestPath: resourcesManifest?.manifestPath,
203
+ materializedResources: resourcesManifest?.resources,
83
204
  stdoutTruncated: isStdoutTruncated,
84
205
  stderrTruncated: isStderrTruncated,
85
206
  stdoutOriginalLength: stdout.length,
86
207
  stderrOriginalLength: stderr.length,
87
208
  };
88
209
  }
89
- console.log(`[Dataset ${datasetId}] Command executed successfully`);
210
+ console.log(`[Dataset ${datasetId}] Command executed successfully`);
90
211
  if (stdout) {
91
212
  console.log(`[Dataset ${datasetId}] Output length: ${stdout.length} chars`);
92
213
  if (isStdoutTruncated) {
93
- console.log(`[Dataset ${datasetId}] ⚠️ Stdout truncated to ${MAX_STDOUT_CHARS} chars`);
214
+ console.log(`[Dataset ${datasetId}] Stdout truncated to ${MAX_STDOUT_CHARS} chars`);
94
215
  }
95
216
  }
96
217
  console.log(`[Dataset ${datasetId}] ========================================`);
@@ -101,6 +222,9 @@ function createExecuteCommandTool({ datasetId, sandboxId, env }) {
101
222
  stderr: stderrCapped,
102
223
  scriptPath: scriptFile,
103
224
  message: "Command executed successfully",
225
+ resourcesDir: resourcesManifest?.resourcesDir,
226
+ resourcesManifestPath: resourcesManifest?.manifestPath,
227
+ materializedResources: resourcesManifest?.resources,
104
228
  stdoutTruncated: isStdoutTruncated,
105
229
  stderrTruncated: isStderrTruncated,
106
230
  stdoutOriginalLength: stdout.length,
@@ -118,6 +242,9 @@ function createExecuteCommandTool({ datasetId, sandboxId, env }) {
118
242
  stderr: "",
119
243
  exitCode: -1,
120
244
  scriptPath: scriptFile,
245
+ resourcesDir: resourcesManifest?.resourcesDir,
246
+ resourcesManifestPath: resourcesManifest?.manifestPath,
247
+ materializedResources: resourcesManifest?.resources,
121
248
  stdoutTruncated: false,
122
249
  stderrTruncated: false,
123
250
  stdoutOriginalLength: 0,
@@ -127,4 +254,3 @@ function createExecuteCommandTool({ datasetId, sandboxId, env }) {
127
254
  },
128
255
  });
129
256
  }
130
- //# sourceMappingURL=executeCommand.tool.js.map