@ekairos/dataset 1.22.40-beta.development.0 → 1.22.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/dist/agents.d.ts +8 -0
  2. package/dist/agents.js +8 -0
  3. package/dist/builder/agentMaterializers.d.ts +9 -0
  4. package/dist/builder/agentMaterializers.js +10 -0
  5. package/dist/builder/context.d.ts +15 -0
  6. package/dist/builder/context.js +251 -0
  7. package/dist/builder/instructions.d.ts +4 -5
  8. package/dist/builder/instructions.js +15 -21
  9. package/dist/builder/materialize.d.ts +77 -10
  10. package/dist/builder/materialize.js +495 -152
  11. package/dist/builder/materializeQuery.d.ts +12 -0
  12. package/dist/builder/materializeQuery.js +31 -0
  13. package/dist/builder/persistence.d.ts +10 -6
  14. package/dist/builder/persistence.js +107 -62
  15. package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -1
  16. package/dist/builder/{sourceRows.js → rows.js} +3 -9
  17. package/dist/builder/schemaInference.d.ts +1 -2
  18. package/dist/builder/schemaInference.js +4 -12
  19. package/dist/builder/types.d.ts +41 -26
  20. package/dist/builder/types.js +1 -3
  21. package/dist/clearDataset.tool.d.ts +2 -3
  22. package/dist/clearDataset.tool.js +13 -17
  23. package/dist/completeDataset.steps.d.ts +117 -0
  24. package/dist/completeDataset.steps.js +537 -0
  25. package/dist/completeDataset.tool.d.ts +132 -7
  26. package/dist/completeDataset.tool.js +46 -192
  27. package/dist/contextResources.d.ts +31 -0
  28. package/dist/contextResources.js +151 -0
  29. package/dist/contextWorkspace.d.ts +79 -0
  30. package/dist/contextWorkspace.js +234 -0
  31. package/dist/dataset/steps.d.ts +39 -15
  32. package/dist/dataset/steps.js +96 -39
  33. package/dist/dataset.d.ts +2 -3
  34. package/dist/dataset.js +73 -51
  35. package/dist/datasetFiles.d.ts +5 -1
  36. package/dist/datasetFiles.js +29 -27
  37. package/dist/defineNotation.tool.d.ts +49 -0
  38. package/dist/defineNotation.tool.js +154 -0
  39. package/dist/domain.d.ts +1 -2
  40. package/dist/domain.js +1 -6
  41. package/dist/executeCommand.tool.d.ts +2 -30
  42. package/dist/executeCommand.tool.js +165 -39
  43. package/dist/file/file-dataset.agent.d.ts +19 -56
  44. package/dist/file/file-dataset.agent.js +182 -136
  45. package/dist/file/file-dataset.steps.d.ts +27 -0
  46. package/dist/file/file-dataset.steps.js +47 -0
  47. package/dist/file/file-dataset.types.d.ts +64 -0
  48. package/dist/file/file-dataset.types.js +1 -0
  49. package/dist/file/filepreview.d.ts +5 -35
  50. package/dist/file/filepreview.js +60 -107
  51. package/dist/file/filepreview.types.d.ts +31 -0
  52. package/dist/file/filepreview.types.js +1 -0
  53. package/dist/file/generateSchema.tool.d.ts +2 -3
  54. package/dist/file/generateSchema.tool.js +11 -15
  55. package/dist/file/index.d.ts +1 -2
  56. package/dist/file/index.js +1 -18
  57. package/dist/file/prompts.d.ts +2 -3
  58. package/dist/file/prompts.js +152 -32
  59. package/dist/file/scripts.generated.d.ts +1 -0
  60. package/dist/file/scripts.generated.js +11 -0
  61. package/dist/file/steps.d.ts +1 -2
  62. package/dist/file/steps.js +9 -7
  63. package/dist/id.d.ts +1 -0
  64. package/dist/id.js +10 -0
  65. package/dist/index.d.ts +9 -7
  66. package/dist/index.js +9 -23
  67. package/dist/materializeDataset.tool.d.ts +35 -28
  68. package/dist/materializeDataset.tool.js +74 -68
  69. package/dist/notation.d.ts +205 -0
  70. package/dist/notation.js +424 -0
  71. package/dist/query/index.d.ts +1 -2
  72. package/dist/query/index.js +1 -18
  73. package/dist/query/queryDomain.d.ts +3 -4
  74. package/dist/query/queryDomain.js +3 -40
  75. package/dist/query/queryDomain.step.d.ts +1 -1
  76. package/dist/query/queryDomain.step.js +24 -13
  77. package/dist/sandbox/steps.d.ts +23 -15
  78. package/dist/sandbox/steps.js +73 -76
  79. package/dist/sandbox.steps.d.ts +1 -2
  80. package/dist/sandbox.steps.js +1 -18
  81. package/dist/schema.d.ts +14 -3
  82. package/dist/schema.js +27 -26
  83. package/dist/service.d.ts +12 -5
  84. package/dist/service.js +88 -15
  85. package/dist/skill.d.ts +0 -1
  86. package/dist/skill.js +12 -17
  87. package/dist/transform/filepreview.d.ts +2 -3
  88. package/dist/transform/filepreview.js +9 -26
  89. package/dist/transform/index.d.ts +2 -3
  90. package/dist/transform/index.js +2 -8
  91. package/dist/transform/prompts.d.ts +1 -34
  92. package/dist/transform/prompts.js +66 -46
  93. package/dist/transform/transform-dataset.agent.d.ts +21 -46
  94. package/dist/transform/transform-dataset.agent.js +152 -93
  95. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  96. package/dist/transform/transform-dataset.steps.js +61 -0
  97. package/dist/transform/transform-dataset.types.d.ts +96 -0
  98. package/dist/transform/transform-dataset.types.js +1 -0
  99. package/dist/transform/transformDataset.d.ts +3 -3
  100. package/dist/transform/transformDataset.js +15 -18
  101. package/dist/writeDatasetRows.tool.d.ts +188 -0
  102. package/dist/writeDatasetRows.tool.js +258 -0
  103. package/package.json +33 -8
  104. package/dist/builder/instructions.d.ts.map +0 -1
  105. package/dist/builder/instructions.js.map +0 -1
  106. package/dist/builder/materialize.d.ts.map +0 -1
  107. package/dist/builder/materialize.js.map +0 -1
  108. package/dist/builder/persistence.d.ts.map +0 -1
  109. package/dist/builder/persistence.js.map +0 -1
  110. package/dist/builder/schemaInference.d.ts.map +0 -1
  111. package/dist/builder/schemaInference.js.map +0 -1
  112. package/dist/builder/sourceRows.d.ts.map +0 -1
  113. package/dist/builder/sourceRows.js.map +0 -1
  114. package/dist/builder/types.d.ts.map +0 -1
  115. package/dist/builder/types.js.map +0 -1
  116. package/dist/clearDataset.tool.d.ts.map +0 -1
  117. package/dist/clearDataset.tool.js.map +0 -1
  118. package/dist/completeDataset.tool.d.ts.map +0 -1
  119. package/dist/completeDataset.tool.js.map +0 -1
  120. package/dist/dataset/steps.d.ts.map +0 -1
  121. package/dist/dataset/steps.js.map +0 -1
  122. package/dist/dataset.d.ts.map +0 -1
  123. package/dist/dataset.js.map +0 -1
  124. package/dist/datasetFiles.d.ts.map +0 -1
  125. package/dist/datasetFiles.js.map +0 -1
  126. package/dist/domain.d.ts.map +0 -1
  127. package/dist/domain.js.map +0 -1
  128. package/dist/eventsReactRuntime.d.ts +0 -22
  129. package/dist/eventsReactRuntime.d.ts.map +0 -1
  130. package/dist/eventsReactRuntime.js +0 -29
  131. package/dist/eventsReactRuntime.js.map +0 -1
  132. package/dist/executeCommand.tool.d.ts.map +0 -1
  133. package/dist/executeCommand.tool.js.map +0 -1
  134. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  135. package/dist/file/file-dataset.agent.js.map +0 -1
  136. package/dist/file/filepreview.d.ts.map +0 -1
  137. package/dist/file/filepreview.js.map +0 -1
  138. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  139. package/dist/file/generateSchema.tool.js.map +0 -1
  140. package/dist/file/index.d.ts.map +0 -1
  141. package/dist/file/index.js.map +0 -1
  142. package/dist/file/prompts.d.ts.map +0 -1
  143. package/dist/file/prompts.js.map +0 -1
  144. package/dist/file/steps.d.ts.map +0 -1
  145. package/dist/file/steps.js.map +0 -1
  146. package/dist/index.d.ts.map +0 -1
  147. package/dist/index.js.map +0 -1
  148. package/dist/materializeDataset.tool.d.ts.map +0 -1
  149. package/dist/materializeDataset.tool.js.map +0 -1
  150. package/dist/query/index.d.ts.map +0 -1
  151. package/dist/query/index.js.map +0 -1
  152. package/dist/query/queryDomain.d.ts.map +0 -1
  153. package/dist/query/queryDomain.js.map +0 -1
  154. package/dist/query/queryDomain.step.d.ts.map +0 -1
  155. package/dist/query/queryDomain.step.js.map +0 -1
  156. package/dist/sandbox/steps.d.ts.map +0 -1
  157. package/dist/sandbox/steps.js.map +0 -1
  158. package/dist/sandbox.steps.d.ts.map +0 -1
  159. package/dist/sandbox.steps.js.map +0 -1
  160. package/dist/schema.d.ts.map +0 -1
  161. package/dist/schema.js.map +0 -1
  162. package/dist/service.d.ts.map +0 -1
  163. package/dist/service.js.map +0 -1
  164. package/dist/skill.d.ts.map +0 -1
  165. package/dist/skill.js.map +0 -1
  166. package/dist/transform/filepreview.d.ts.map +0 -1
  167. package/dist/transform/filepreview.js.map +0 -1
  168. package/dist/transform/index.d.ts.map +0 -1
  169. package/dist/transform/index.js.map +0 -1
  170. package/dist/transform/prompts.d.ts.map +0 -1
  171. package/dist/transform/prompts.js.map +0 -1
  172. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  173. package/dist/transform/transform-dataset.agent.js.map +0 -1
  174. package/dist/transform/transformDataset.d.ts.map +0 -1
  175. package/dist/transform/transformDataset.js.map +0 -1
package/dist/dataset.js CHANGED
@@ -1,55 +1,63 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.dataset = dataset;
4
- const admin_1 = require("@instantdb/admin");
5
- const instructions_1 = require("./builder/instructions");
6
- const materialize_1 = require("./builder/materialize");
7
- const persistence_1 = require("./builder/persistence");
8
- function dataset(runtime, options = {}) {
1
+ import { buildObjectOutputInstructions } from "./builder/instructions.js";
2
+ import { resolveDatasetResourceContext } from "./builder/context.js";
3
+ import { createDatasetId } from "./id.js";
4
+ import { completeDatasetStep, materializeDerivedDataset, materializeSingleFileLikeResource, } from "./builder/materialize.js";
5
+ import { materializeQueryResource } from "./builder/materializeQuery.js";
6
+ import { createDatasetBuildResult, finalizeBuildResult, } from "./builder/persistence.js";
7
+ export function dataset(runtime, options = {}) {
9
8
  const datasetId = normalizeDatasetId(options.datasetId);
10
9
  const typedRuntime = runtime;
11
10
  const state = {
12
11
  runtime: typedRuntime,
13
12
  env: typedRuntime.env,
14
- sources: [],
13
+ resources: [],
15
14
  output: "rows",
16
15
  inferSchema: false,
16
+ durable: options.durable,
17
17
  first: false,
18
18
  };
19
19
  const api = {
20
20
  datasetId,
21
- fromFile(source) {
22
- state.sources.push({ kind: "file", ...source });
21
+ fromFile(resource) {
22
+ state.resources.push({ kind: "file", ...resource });
23
23
  return api;
24
24
  },
25
- fromText(source) {
26
- state.sources.push({ kind: "text", ...source });
25
+ fromText(resource) {
26
+ state.resources.push({ kind: "text", ...resource });
27
27
  return api;
28
28
  },
29
- fromDataset(source) {
30
- state.sources.push({ kind: "dataset", ...source });
29
+ fromDataset(resource) {
30
+ state.resources.push({ kind: "dataset", ...resource });
31
31
  return api;
32
32
  },
33
- from(...sources) {
34
- for (const source of sources) {
35
- if ("kind" in source) {
36
- state.sources.push(source);
33
+ fromContext(context) {
34
+ state.resources.push({ kind: "context", ...context });
35
+ return api;
36
+ },
37
+ from(...resources) {
38
+ for (const resource of resources) {
39
+ if ("kind" in resource) {
40
+ state.resources.push(resource);
37
41
  continue;
38
42
  }
39
- if ("fileId" in source) {
40
- state.sources.push({ kind: "file", ...source });
43
+ if ("fileId" in resource) {
44
+ state.resources.push({ kind: "file", ...resource });
41
45
  continue;
42
46
  }
43
- if ("datasetId" in source) {
44
- state.sources.push({ kind: "dataset", ...source });
47
+ if ("datasetId" in resource) {
48
+ state.resources.push({ kind: "dataset", ...resource });
45
49
  continue;
46
50
  }
47
- state.sources.push({ kind: "text", ...source });
51
+ if ("id" in resource || "key" in resource) {
52
+ state.resources.push({ kind: "context", ...resource });
53
+ continue;
54
+ }
55
+ state.resources.push({ kind: "text", ...resource });
48
56
  }
49
57
  return api;
50
58
  },
51
- fromQuery(domain, source) {
52
- state.sources.push({ kind: "query", domain, ...source });
59
+ fromQuery(domain, resource) {
60
+ state.resources.push({ kind: "query", domain, ...resource });
53
61
  return api;
54
62
  },
55
63
  title(title) {
@@ -97,57 +105,72 @@ function dataset(runtime, options = {}) {
97
105
  return api;
98
106
  },
99
107
  async build(options) {
100
- if (state.sources.length === 0) {
101
- throw new Error("dataset_sources_required");
108
+ if (state.resources.length === 0) {
109
+ throw new Error("dataset_resources_required");
102
110
  }
103
111
  const targetDatasetId = options?.datasetId
104
112
  ? normalizeDatasetId(options.datasetId)
105
113
  : datasetId;
106
- const effectiveState = state.output === "object"
114
+ const stateWithBuildOptions = {
115
+ ...state,
116
+ durable: options?.durable ?? state.durable,
117
+ };
118
+ const context = await resolveDatasetResourceContext(typedRuntime, targetDatasetId, stateWithBuildOptions.resources);
119
+ stateWithBuildOptions.resources = context.resources;
120
+ stateWithBuildOptions.contextId = context.contextId;
121
+ stateWithBuildOptions.contextResources = context.contextResources;
122
+ const effectiveState = stateWithBuildOptions.output === "object"
107
123
  ? {
108
- ...state,
124
+ ...stateWithBuildOptions,
109
125
  first: true,
110
- instructions: (0, instructions_1.buildObjectOutputInstructions)(state.instructions),
126
+ instructions: buildObjectOutputInstructions(stateWithBuildOptions.instructions),
111
127
  }
112
- : state;
113
- const onlySource = effectiveState.sources[0];
114
- const isSingleSource = effectiveState.sources.length === 1;
128
+ : stateWithBuildOptions;
129
+ const onlyResource = effectiveState.resources[0];
130
+ const isSingleResource = effectiveState.resources.length === 1;
115
131
  const hasInstructions = Boolean(String(effectiveState.instructions ?? "").trim());
116
- if (isSingleSource && onlySource.kind === "query" && !hasInstructions) {
117
- await (0, materialize_1.materializeQuerySource)(effectiveState.runtime, onlySource, {
132
+ if (isSingleResource && onlyResource.kind === "query" && !hasInstructions) {
133
+ await materializeQueryResource(effectiveState.runtime, onlyResource, {
118
134
  datasetId: targetDatasetId,
119
135
  sandboxId: effectiveState.sandboxId,
120
136
  schema: effectiveState.outputSchema,
121
- title: effectiveState.title ?? onlySource.title,
137
+ title: effectiveState.title ?? onlyResource.title,
122
138
  instructions: effectiveState.instructions,
123
139
  first: effectiveState.first,
140
+ contextId: effectiveState.contextId ?? "",
124
141
  });
125
- return finalizeOutputResult(await (0, persistence_1.finalizeBuildResult)(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
142
+ return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
126
143
  }
127
- if (isSingleSource && (onlySource.kind === "file" || onlySource.kind === "text")) {
128
- if (!effectiveState.sandboxId) {
129
- throw new Error("dataset_sandbox_required");
130
- }
144
+ if (isSingleResource && (onlyResource.kind === "file" || onlyResource.kind === "text")) {
131
145
  if (!effectiveState.reactor) {
132
146
  throw new Error("dataset_reactor_required");
133
147
  }
134
- await (0, materialize_1.materializeSingleFileLikeSource)(effectiveState, onlySource, targetDatasetId);
135
- return finalizeOutputResult(await (0, persistence_1.finalizeBuildResult)(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
136
- }
137
- if (!effectiveState.sandboxId) {
138
- throw new Error("dataset_sandbox_required");
148
+ await materializeSingleFileLikeResource(effectiveState, onlyResource, targetDatasetId);
149
+ const completed = await completeDatasetStep({
150
+ runtime: effectiveState.runtime,
151
+ datasetId: targetDatasetId,
152
+ schema: effectiveState.outputSchema,
153
+ first: effectiveState.first,
154
+ });
155
+ return finalizeOutputResult(createDatasetBuildResult(effectiveState.runtime, completed), effectiveState.output);
139
156
  }
140
157
  if (!effectiveState.reactor) {
141
158
  throw new Error("dataset_reactor_required");
142
159
  }
143
- await (0, materialize_1.materializeDerivedDataset)(effectiveState, targetDatasetId);
144
- return finalizeOutputResult(await (0, persistence_1.finalizeBuildResult)(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
160
+ await materializeDerivedDataset(effectiveState, targetDatasetId);
161
+ const completed = await completeDatasetStep({
162
+ runtime: effectiveState.runtime,
163
+ datasetId: targetDatasetId,
164
+ schema: effectiveState.outputSchema,
165
+ first: effectiveState.first,
166
+ });
167
+ return finalizeOutputResult(createDatasetBuildResult(effectiveState.runtime, completed), effectiveState.output);
145
168
  },
146
169
  };
147
170
  return api;
148
171
  }
149
172
  function normalizeDatasetId(datasetId) {
150
- const normalized = String(datasetId ?? (0, admin_1.id)()).trim();
173
+ const normalized = String(datasetId ?? createDatasetId()).trim();
151
174
  if (!normalized) {
152
175
  throw new Error("dataset_id_required");
153
176
  }
@@ -161,4 +184,3 @@ function finalizeOutputResult(result, output) {
161
184
  object: result.firstRow ?? null,
162
185
  };
163
186
  }
164
- //# sourceMappingURL=dataset.js.map
@@ -1,5 +1,9 @@
1
1
  export declare const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
2
2
  export declare function getDatasetWorkdirBase(): string;
3
3
  export declare function getDatasetWorkstation(datasetId: string): string;
4
+ export declare function getDatasetResourcesDir(datasetId: string): string;
5
+ export declare function getDatasetScriptsDir(datasetId: string): string;
6
+ export declare function getDatasetArtifactsDir(datasetId: string): string;
7
+ export declare function getDatasetLogsDir(datasetId: string): string;
8
+ export declare function getDatasetStandardDirs(datasetId: string): string[];
4
9
  export declare function getDatasetOutputPath(datasetId: string): string;
5
- //# sourceMappingURL=datasetFiles.d.ts.map
@@ -1,33 +1,35 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.DATASET_OUTPUT_FILE_NAME = void 0;
4
- exports.getDatasetWorkdirBase = getDatasetWorkdirBase;
5
- exports.getDatasetWorkstation = getDatasetWorkstation;
6
- exports.getDatasetOutputPath = getDatasetOutputPath;
7
- exports.DATASET_OUTPUT_FILE_NAME = "output.jsonl";
8
- const DEFAULT_VERCEL_WORKDIR_BASE = "/vercel/sandbox/datasets";
9
- const DEFAULT_DAYTONA_WORKDIR_BASE = "/home/daytona/.ekairos/datasets";
10
- const DEFAULT_SPRITES_WORKDIR_BASE = "/workspace/.ekairos/datasets";
1
+ export const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
2
+ const DATASET_WORKDIR_BASE = "/tmp/ekairos/dataset";
11
3
  function trimTrailingSlash(value) {
12
4
  return value.endsWith("/") ? value.slice(0, -1) : value;
13
5
  }
14
- function getDatasetWorkdirBase() {
15
- const explicit = String(process.env.DATASET_SANDBOX_WORKDIR_BASE ?? "").trim();
16
- if (explicit)
17
- return trimTrailingSlash(explicit);
18
- const provider = String(process.env.SANDBOX_PROVIDER ?? "").trim().toLowerCase();
19
- if (provider === "daytona")
20
- return DEFAULT_DAYTONA_WORKDIR_BASE;
21
- if (provider === "vercel")
22
- return DEFAULT_VERCEL_WORKDIR_BASE;
23
- if (provider === "sprites")
24
- return DEFAULT_SPRITES_WORKDIR_BASE;
25
- return DEFAULT_VERCEL_WORKDIR_BASE;
26
- }
27
- function getDatasetWorkstation(datasetId) {
6
+ export function getDatasetWorkdirBase() {
7
+ return trimTrailingSlash(DATASET_WORKDIR_BASE);
8
+ }
9
+ export function getDatasetWorkstation(datasetId) {
28
10
  return `${getDatasetWorkdirBase()}/${datasetId}`;
29
11
  }
30
- function getDatasetOutputPath(datasetId) {
31
- return `${getDatasetWorkstation(datasetId)}/${exports.DATASET_OUTPUT_FILE_NAME}`;
12
+ export function getDatasetResourcesDir(datasetId) {
13
+ return `${getDatasetWorkstation(datasetId)}/resources`;
14
+ }
15
+ export function getDatasetScriptsDir(datasetId) {
16
+ return `${getDatasetWorkstation(datasetId)}/scripts`;
17
+ }
18
+ export function getDatasetArtifactsDir(datasetId) {
19
+ return `${getDatasetWorkstation(datasetId)}/artifacts`;
20
+ }
21
+ export function getDatasetLogsDir(datasetId) {
22
+ return `${getDatasetWorkstation(datasetId)}/logs`;
23
+ }
24
+ export function getDatasetStandardDirs(datasetId) {
25
+ return [
26
+ getDatasetWorkstation(datasetId),
27
+ getDatasetResourcesDir(datasetId),
28
+ getDatasetScriptsDir(datasetId),
29
+ getDatasetArtifactsDir(datasetId),
30
+ getDatasetLogsDir(datasetId),
31
+ ];
32
+ }
33
+ export function getDatasetOutputPath(datasetId) {
34
+ return `${getDatasetWorkstation(datasetId)}/${DATASET_OUTPUT_FILE_NAME}`;
32
35
  }
33
- //# sourceMappingURL=datasetFiles.js.map
@@ -0,0 +1,49 @@
1
+ interface DefineNotationToolParams {
2
+ datasetId: string;
3
+ runtime: any;
4
+ }
5
+ /**
6
+ * defineNotation — author or REFINE the formal DEFINITION of the dataset.
7
+ *
8
+ * A dataset has two co-equal faces: its formal definition (the notation —
9
+ * the proposition that defines the set, in LaTeX) and its materialization
10
+ * (the rows + the code that produces them). They sit at the SAME level: the
11
+ * definition is not a side note about the data, it IS the dataset stated
12
+ * intensionally. The same notation is the PLAN (you state it first and the
13
+ * materialization realizes it) and, finalized, the RESULT (it describes what
14
+ * you produced).
15
+ *
16
+ * Call it FIRST with the initial definition derived from the resources, and
17
+ * AGAIN whenever the analysis discovers new sets, variables, constraints or
18
+ * corrections — every call keeps the prior version in history. Mark the last
19
+ * call with final=true so the definition describes the produced dataset.
20
+ * Predicates may be formal/semantic (trusted); the few that are arithmetic
21
+ * MAY carry optional advisory evidence.
22
+ */
23
+ export declare function createDefineNotationTool({ datasetId, runtime }: DefineNotationToolParams): import("ai").Tool<{
24
+ latex: string;
25
+ symbols: {
26
+ name: string;
27
+ kind: "function" | "set" | "variable" | "constant" | "predicate";
28
+ description: string;
29
+ latex?: string | undefined;
30
+ }[];
31
+ predicates: {
32
+ id: string;
33
+ description: string;
34
+ latex: string;
35
+ checkJson?: string | undefined;
36
+ }[];
37
+ reason: string;
38
+ final?: boolean | undefined;
39
+ }, {
40
+ success: boolean;
41
+ error: string;
42
+ } | {
43
+ warning?: string | undefined;
44
+ success: boolean;
45
+ version: number;
46
+ status: import("./notation.js").DatasetNotationStatus;
47
+ error?: undefined;
48
+ }>;
49
+ export {};
@@ -0,0 +1,154 @@
1
+ import { tool } from "ai";
2
+ import { z } from "zod";
3
+ import { DatasetService } from "./service.js";
4
+ import { datasetDomain } from "./schema.js";
5
+ import { reviseDatasetNotation, } from "./notation.js";
6
+ const symbolSchema = z.object({
7
+ name: z.string().describe("Plain identifier, e.g. 'D', 'Orders', 'w'"),
8
+ latex: z
9
+ .string()
10
+ .optional()
11
+ .describe("LaTeX for the symbol, e.g. '\\\\mathcal{D}' (defaults to the name)"),
12
+ kind: z.enum(["set", "variable", "function", "constant", "predicate"]),
13
+ description: z.string().describe("What this symbol denotes in the data"),
14
+ });
15
+ const predicateSchema = z.object({
16
+ id: z.string().describe("Stable id, e.g. 'p1', 'cardinality'"),
17
+ description: z.string().describe("The claim in plain language"),
18
+ latex: z
19
+ .string()
20
+ .describe("The claim in LaTeX, e.g. '\\\\forall r \\\\in D: r.amount > 0'"),
21
+ checkJson: z
22
+ .string()
23
+ .optional()
24
+ .describe([
25
+ "OPTIONAL arithmetic form of the claim as a JSON string, used only for",
26
+ "advisory evidence over the produced rows (not a verdict). Shapes:",
27
+ '{"kind":"row_count","op":"=","value":124}',
28
+ '{"kind":"field_type","field":"amount","type":"number","allowNull":true}',
29
+ '{"kind":"field_range","field":"amount","min":0}',
30
+ '{"kind":"field_in","field":"status","values":["paid","void"]}',
31
+ '{"kind":"field_nonnull","field":"orderId"}',
32
+ '{"kind":"field_matches","field":"sku","pattern":"^[A-Z0-9-]+$"}',
33
+ '{"kind":"unique","fields":["orderId"]}',
34
+ '{"kind":"aggregate","fn":"sum","field":"amount","op":">=","value":0}',
35
+ 'Propositional composition: {"kind":"and"|"or","checks":[...]},',
36
+ '{"kind":"not","check":...}, {"kind":"implies","if":...,"then":...}.',
37
+ "Fields support dot-paths into nested records (company.taxId).",
38
+ "Omit for formal/semantic claims (the normal case) — they are trusted.",
39
+ ].join(" ")),
40
+ });
41
+ async function getDatasetService(runtime) {
42
+ const scoped = await runtime.use(datasetDomain);
43
+ return new DatasetService(scoped.db);
44
+ }
45
+ /**
46
+ * defineNotation — author or REFINE the formal DEFINITION of the dataset.
47
+ *
48
+ * A dataset has two co-equal faces: its formal definition (the notation —
49
+ * the proposition that defines the set, in LaTeX) and its materialization
50
+ * (the rows + the code that produces them). They sit at the SAME level: the
51
+ * definition is not a side note about the data, it IS the dataset stated
52
+ * intensionally. The same notation is the PLAN (you state it first and the
53
+ * materialization realizes it) and, finalized, the RESULT (it describes what
54
+ * you produced).
55
+ *
56
+ * Call it FIRST with the initial definition derived from the resources, and
57
+ * AGAIN whenever the analysis discovers new sets, variables, constraints or
58
+ * corrections — every call keeps the prior version in history. Mark the last
59
+ * call with final=true so the definition describes the produced dataset.
60
+ * Predicates may be formal/semantic (trusted); the few that are arithmetic
61
+ * MAY carry optional advisory evidence.
62
+ */
63
+ export function createDefineNotationTool({ datasetId, runtime }) {
64
+ return tool({
65
+ description: [
66
+ "Author or refine the formal DEFINITION of the dataset: the dataset as a",
67
+ "set in LaTeX (set-builder, relational algebra, quantified or even",
68
+ "semantic predicates) plus the symbols it binds. This definition and the",
69
+ "materialization (rows + code) are TWO CO-EQUAL FACES of the dataset —",
70
+ "the definition is the dataset stated intensionally, not a comment on it.",
71
+ "It is your PLAN (state it before writing any code; the materialization",
72
+ "realizes it) and, once final, the RESULT (it describes what you",
73
+ "produced). The definition is a logical proposition, possibly derived —",
74
+ "it need not be mechanically provable; we trust the formality. State it",
75
+ "first, refine it on every discovery, and set final=true on the last",
76
+ "call. For the few predicates that are arithmetic you MAY attach a",
77
+ "checkJson for optional advisory evidence (non-blocking, never a verdict).",
78
+ ].join(" "),
79
+ inputSchema: z.object({
80
+ latex: z
81
+ .string()
82
+ .describe("Main definition of the dataset as a set, in LaTeX. Example: 'D = \\\\{(w,r,t) \\\\mid t = \\\\sum_{o \\\\in Orders} o.amount,\\\\; o.status = paid\\\\}'"),
83
+ symbols: z.array(symbolSchema).describe("Symbols bound by the definition"),
84
+ predicates: z
85
+ .array(predicateSchema)
86
+ .describe("Claims the set satisfies; attach a checkJson only when arithmetic"),
87
+ reason: z
88
+ .string()
89
+ .describe("What this revision states or what discovery triggered it (or 'initial definition')"),
90
+ final: z
91
+ .boolean()
92
+ .optional()
93
+ .describe("true when this definition describes the dataset you are about to complete (the RESULT)"),
94
+ }),
95
+ execute: async ({ latex, symbols, predicates, reason, final }) => {
96
+ try {
97
+ const service = await getDatasetService(runtime);
98
+ const existing = await service.getDatasetById(datasetId);
99
+ const previous = (existing.ok ? existing.data?.notation : null);
100
+ const parsedPredicates = [];
101
+ const checkErrors = [];
102
+ for (const predicate of predicates) {
103
+ let check;
104
+ if (predicate.checkJson) {
105
+ try {
106
+ check = JSON.parse(predicate.checkJson);
107
+ if (!check || typeof check !== "object" || !("kind" in check)) {
108
+ throw new Error("check must be an object with a 'kind'");
109
+ }
110
+ }
111
+ catch (error) {
112
+ checkErrors.push(`predicate ${predicate.id}: invalid checkJson (${String(error).slice(0, 80)})`);
113
+ check = undefined;
114
+ }
115
+ }
116
+ parsedPredicates.push({
117
+ id: predicate.id,
118
+ description: predicate.description,
119
+ latex: predicate.latex,
120
+ ...(check ? { check } : {}),
121
+ });
122
+ }
123
+ const notation = reviseDatasetNotation(previous, {
124
+ latex,
125
+ symbols: symbols,
126
+ predicates: parsedPredicates,
127
+ reason,
128
+ final,
129
+ });
130
+ const update = await service.updateDatasetNotation({ datasetId, notation });
131
+ if (!update.ok) {
132
+ return { success: false, error: update.error };
133
+ }
134
+ console.log(`[Dataset ${datasetId}] definition v${notation.version} (${notation.status}): ${reason}`);
135
+ return {
136
+ success: true,
137
+ version: notation.version,
138
+ status: notation.status,
139
+ ...(checkErrors.length
140
+ ? {
141
+ warning: `some checks were dropped: ${checkErrors.join("; ")}`,
142
+ }
143
+ : {}),
144
+ };
145
+ }
146
+ catch (error) {
147
+ return {
148
+ success: false,
149
+ error: error instanceof Error ? error.message : String(error),
150
+ };
151
+ }
152
+ },
153
+ });
154
+ }
package/dist/domain.d.ts CHANGED
@@ -1,2 +1 @@
1
- export { datasetDomain } from "./schema";
2
- //# sourceMappingURL=domain.d.ts.map
1
+ export { datasetDomain } from "./schema.js";
package/dist/domain.js CHANGED
@@ -1,6 +1 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.datasetDomain = void 0;
4
- var schema_1 = require("./schema");
5
- Object.defineProperty(exports, "datasetDomain", { enumerable: true, get: function () { return schema_1.datasetDomain; } });
6
- //# sourceMappingURL=domain.js.map
1
+ export { datasetDomain } from "./schema.js";
@@ -1,35 +1,7 @@
1
1
  interface ExecuteCommandToolParams {
2
2
  datasetId: string;
3
3
  sandboxId: string;
4
- env?: any;
4
+ runtime: any;
5
5
  }
6
- export declare function createExecuteCommandTool({ datasetId, sandboxId, env }: ExecuteCommandToolParams): import("ai").Tool<{
7
- pythonCode: string;
8
- scriptName: string;
9
- }, {
10
- success: boolean;
11
- exitCode: number;
12
- stdout: string;
13
- stderr: string;
14
- scriptPath: string;
15
- error: string;
16
- stdoutTruncated: boolean;
17
- stderrTruncated: boolean;
18
- stdoutOriginalLength: number;
19
- stderrOriginalLength: number;
20
- message?: undefined;
21
- } | {
22
- success: boolean;
23
- exitCode: number;
24
- stdout: string;
25
- stderr: string;
26
- scriptPath: string;
27
- message: string;
28
- stdoutTruncated: boolean;
29
- stderrTruncated: boolean;
30
- stdoutOriginalLength: number;
31
- stderrOriginalLength: number;
32
- error?: undefined;
33
- }>;
6
+ export declare function createExecuteCommandTool({ datasetId, sandboxId, runtime }: ExecuteCommandToolParams): any;
34
7
  export {};
35
- //# sourceMappingURL=executeCommand.tool.d.ts.map