@ekairos/dataset 1.22.97-beta.development.0 → 1.22.98-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -147,10 +147,12 @@ export async function finalizeBuildResult(runtime, datasetId, withFirst) {
147
147
  });
148
148
  },
149
149
  };
150
+ const notation = (datasetResult.data?.notation ?? null);
150
151
  if (!withFirst) {
151
152
  return {
152
153
  datasetId,
153
154
  dataset: datasetResult.data,
155
+ notation,
154
156
  previewRows: previewResult.rows,
155
157
  reader,
156
158
  };
@@ -159,6 +161,7 @@ export async function finalizeBuildResult(runtime, datasetId, withFirst) {
159
161
  return {
160
162
  datasetId,
161
163
  dataset: datasetResult.data,
164
+ notation,
162
165
  previewRows: previewResult.rows,
163
166
  reader,
164
167
  firstRow: firstResult.row,
@@ -181,6 +184,7 @@ export function createDatasetBuildResult(runtime, params) {
181
184
  return {
182
185
  datasetId: params.datasetId,
183
186
  dataset: params.dataset,
187
+ notation: (params.dataset?.notation ?? null),
184
188
  previewRows: params.previewRows,
185
189
  reader,
186
190
  ...(params.firstRow !== undefined ? { firstRow: params.firstRow } : {}),
@@ -3,6 +3,7 @@ import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
3
3
  import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
4
4
  import type { ContextIdentifier, ContextReactor, StoredContextResource } from "@ekairos/events";
5
5
  import { datasetDomain } from "../schema.js";
6
+ import type { DatasetNotation } from "../notation.js";
6
7
  export type DatasetQueryResourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
7
8
  query: InstaQLParams<DomainInstantSchema<D>>;
8
9
  title?: string;
@@ -72,6 +73,9 @@ export type DatasetReader = {
72
73
  export type DatasetBuildResult = {
73
74
  datasetId: string;
74
75
  dataset: any;
76
+ /** the formal definition (intensional face), co-equal with the rows */
77
+ notation: DatasetNotation | null;
78
+ /** preview of the materialization (extensional face) */
75
79
  previewRows: any[];
76
80
  reader: DatasetReader;
77
81
  object?: any | null;
@@ -0,0 +1,49 @@
1
+ interface DefineNotationToolParams {
2
+ datasetId: string;
3
+ runtime: any;
4
+ }
5
+ /**
6
+ * defineNotation — author or REFINE the formal DEFINITION of the dataset.
7
+ *
8
+ * A dataset has two co-equal faces: its formal definition (the notation —
9
+ * the proposition that defines the set, in LaTeX) and its materialization
10
+ * (the rows + the code that produces them). They sit at the SAME level: the
11
+ * definition is not a side note about the data, it IS the dataset stated
12
+ * intensionally. The same notation is the PLAN (you state it first and the
13
+ * materialization realizes it) and, finalized, the RESULT (it describes what
14
+ * you produced).
15
+ *
16
+ * Call it FIRST with the initial definition derived from the resources, and
17
+ * AGAIN whenever the analysis discovers new sets, variables, constraints or
18
+ * corrections — every call keeps the prior version in history. Mark the last
19
+ * call with final=true so the definition describes the produced dataset.
20
+ * Predicates may be formal/semantic (trusted); the few that are arithmetic
21
+ * MAY carry optional advisory evidence.
22
+ */
23
+ export declare function createDefineNotationTool({ datasetId, runtime }: DefineNotationToolParams): import("ai").Tool<{
24
+ latex: string;
25
+ symbols: {
26
+ name: string;
27
+ kind: "function" | "set" | "variable" | "constant" | "predicate";
28
+ description: string;
29
+ latex?: string | undefined;
30
+ }[];
31
+ predicates: {
32
+ id: string;
33
+ description: string;
34
+ latex: string;
35
+ checkJson?: string | undefined;
36
+ }[];
37
+ reason: string;
38
+ final?: boolean | undefined;
39
+ }, {
40
+ success: boolean;
41
+ error: string;
42
+ } | {
43
+ warning?: string | undefined;
44
+ success: boolean;
45
+ version: number;
46
+ status: import("./notation.js").DatasetNotationStatus;
47
+ error?: undefined;
48
+ }>;
49
+ export {};
@@ -43,44 +43,54 @@ async function getDatasetService(runtime) {
43
43
  return new DatasetService(scoped.db);
44
44
  }
45
45
  /**
46
- * proposeNotationdeclare or ITERATE the formal notation of the dataset.
46
+ * defineNotationauthor or REFINE the formal DEFINITION of the dataset.
47
47
  *
48
- * The notation is the planning artifact: call it FIRST with the initial
49
- * set definition derived from the resources, and call it AGAIN whenever
50
- * the analysis discovers new sets, variables, constraints or corrections.
51
- * Every call appends a revision (the discovery trail is preserved). Mark
52
- * the last call with final=true so the notation describes the produced
53
- * dataset. Predicates may be formal/semantic (we trust them); the few that
54
- * are arithmetic get optional advisory evidence after completion.
48
+ * A dataset has two co-equal faces: its formal definition (the notation —
49
+ * the proposition that defines the set, in LaTeX) and its materialization
50
+ * (the rows + the code that produces them). They sit at the SAME level: the
51
+ * definition is not a side note about the data, it IS the dataset stated
52
+ * intensionally. The same notation is the PLAN (you state it first and the
53
+ * materialization realizes it) and, finalized, the RESULT (it describes what
54
+ * you produced).
55
+ *
56
+ * Call it FIRST with the initial definition derived from the resources, and
57
+ * AGAIN whenever the analysis discovers new sets, variables, constraints or
58
+ * corrections — every call keeps the prior version in history. Mark the last
59
+ * call with final=true so the definition describes the produced dataset.
60
+ * Predicates may be formal/semantic (trusted); the few that are arithmetic
61
+ * MAY carry optional advisory evidence.
55
62
  */
56
- export function createProposeNotationTool({ datasetId, runtime }) {
63
+ export function createDefineNotationTool({ datasetId, runtime }) {
57
64
  return tool({
58
65
  description: [
59
- "Declare or refine the FORMAL NOTATION of the dataset: the dataset as a",
60
- "set defined in LaTeX (set-builder, relational algebra, quantified or",
61
- "even semantic predicates) plus the symbols it binds. The definition is",
62
- "a logical proposition, possibly derived it does not need to be",
63
- "mechanically provable; we trust the formality. This is your PLANNING",
64
- "artifact propose it before writing any code, and revise it whenever",
65
- "the analysis discovers new sets, variables or constraints. For the few",
66
- "predicates that happen to be arithmetic you MAY attach a checkJson for",
67
- "optional advisory evidence (non-blocking, never a verdict).",
66
+ "Author or refine the formal DEFINITION of the dataset: the dataset as a",
67
+ "set in LaTeX (set-builder, relational algebra, quantified or even",
68
+ "semantic predicates) plus the symbols it binds. This definition and the",
69
+ "materialization (rows + code) are TWO CO-EQUAL FACES of the dataset ",
70
+ "the definition is the dataset stated intensionally, not a comment on it.",
71
+ "It is your PLAN (state it before writing any code; the materialization",
72
+ "realizes it) and, once final, the RESULT (it describes what you",
73
+ "produced). The definition is a logical proposition, possibly derived ",
74
+ "it need not be mechanically provable; we trust the formality. State it",
75
+ "first, refine it on every discovery, and set final=true on the last",
76
+ "call. For the few predicates that are arithmetic you MAY attach a",
77
+ "checkJson for optional advisory evidence (non-blocking, never a verdict).",
68
78
  ].join(" "),
69
79
  inputSchema: z.object({
70
80
  latex: z
71
81
  .string()
72
82
  .describe("Main definition of the dataset as a set, in LaTeX. Example: 'D = \\\\{(w,r,t) \\\\mid t = \\\\sum_{o \\\\in Orders} o.amount,\\\\; o.status = paid\\\\}'"),
73
- symbols: z.array(symbolSchema).describe("Symbols bound by the notation"),
83
+ symbols: z.array(symbolSchema).describe("Symbols bound by the definition"),
74
84
  predicates: z
75
85
  .array(predicateSchema)
76
- .describe("Claims about the dataset; include machine-checkable forms when possible"),
86
+ .describe("Claims the set satisfies; attach a checkJson only when arithmetic"),
77
87
  reason: z
78
88
  .string()
79
- .describe("What discovery triggered this revision (or 'initial proposal')"),
89
+ .describe("What this revision states or what discovery triggered it (or 'initial definition')"),
80
90
  final: z
81
91
  .boolean()
82
92
  .optional()
83
- .describe("true when this notation describes the dataset you are about to complete"),
93
+ .describe("true when this definition describes the dataset you are about to complete (the RESULT)"),
84
94
  }),
85
95
  execute: async ({ latex, symbols, predicates, reason, final }) => {
86
96
  try {
@@ -121,7 +131,7 @@ export function createProposeNotationTool({ datasetId, runtime }) {
121
131
  if (!update.ok) {
122
132
  return { success: false, error: update.error };
123
133
  }
124
- console.log(`[Dataset ${datasetId}] notation v${notation.version} (${notation.status}): ${reason}`);
134
+ console.log(`[Dataset ${datasetId}] definition v${notation.version} (${notation.status}): ${reason}`);
125
135
  return {
126
136
  success: true,
127
137
  version: notation.version,
@@ -3,7 +3,7 @@ import { createClearDatasetTool } from "../clearDataset.tool.js";
3
3
  import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
4
4
  import { datasetGetByIdStep } from "../dataset/steps.js";
5
5
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
6
- import { createProposeNotationTool } from "../proposeNotation.tool.js";
6
+ import { createDefineNotationTool } from "../defineNotation.tool.js";
7
7
  import { createGenerateSchemaTool } from "./generateSchema.tool.js";
8
8
  import { buildFileDatasetPromptStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
9
9
  import { createDatasetId } from "../id.js";
@@ -152,7 +152,7 @@ function createFileParseContextDefinition(params) {
152
152
  sandboxId,
153
153
  runtime,
154
154
  }),
155
- proposeNotation: createProposeNotationTool({
155
+ defineNotation: createDefineNotationTool({
156
156
  datasetId,
157
157
  runtime,
158
158
  }),
@@ -239,15 +239,15 @@ function buildInstructions(context) {
239
239
  .ele("Note").txt("FilePreview contains: TotalRows (total data rows), Metadata (file properties with JSON output), Head (first N raw file lines), Tail (last N lines if present), Mid (middle sample for large files). Each section shows Description, Script (full Python code), Command, Stdout (raw content), Stderr. This allows you to understand the exact file format.").up()
240
240
  .up();
241
241
  xml = xml
242
- .ele("Step", { number: "2", name: "Propose Formal Notation (PLAN FIRST)" })
243
- .ele("Action").txt("Call proposeNotation with the INITIAL formal definition of the dataset as a set, derived from the file preview: D = { r | r ∈ File ∧ <constraints> } in LaTeX, the symbols it binds (sets, variables, functions) and the predicates every row will satisfy").up()
242
+ .ele("Step", { number: "2", name: "Define the Dataset (PLAN FIRST)" })
243
+ .ele("Action").txt("Call defineNotation with the INITIAL formal definition of the dataset as a set, derived from the file preview: D = { r | r ∈ File ∧ <constraints> } in LaTeX, the symbols it binds (sets, variables, functions) and the predicates the set satisfies").up()
244
244
  .ele("Requirements")
245
- .ele("Requirement").txt("The notation is your PLANNING artifact: it comes BEFORE the schema and BEFORE any parsing code. The LaTeX that explains the dataset matters more than the code that produces it").up()
245
+ .ele("Requirement").txt("The definition and the materialization (schema + parsing code + rows) are TWO CO-EQUAL FACES of the dataset. The definition is the dataset stated intensionally author it FIRST; it is your PLAN and the code is built to realize it").up()
246
246
  .ele("Requirement").txt("Use set-builder notation, quantifiers and arithmetic in LaTeX (e.g. D = \\{(c, q, p) \\mid q \\in \\mathbb{Z}^{+},\\; p \\in \\mathbb{R}_{\\geq 0}\\})").up()
247
247
  .ele("Requirement").txt("Declare every discovered set and variable as a symbol with a one-line meaning").up()
248
248
  .ele("Requirement").txt("Predicates are formal claims we trust; they may be semantic (e.g. 'x es una frase relevante'). Only for the few that are purely arithmetic (row counts, field types, ranges, uniqueness, aggregates) you MAY add a checkJson for optional advisory evidence — leave every other claim without checkJson").up()
249
- .ele("Requirement").txt("ITERATE: every time the analysis discovers a new set, variable, constraint or correction (new columns, unexpected types, excluded sections), call proposeNotation again with the refined notation and the reason. The notation is not definitive — discovery is the point").up()
250
- .ele("Requirement").txt("Before calling completeDataset, call proposeNotation one last time with final=true so the notation describes EXACTLY the dataset you produced; any arithmetic predicates get optional advisory evidence afterwards (never a pass/fail verdict — the dataset's validity is trusted)").up()
249
+ .ele("Requirement").txt("REFINE: every time the analysis discovers a new set, variable, constraint or correction (new columns, unexpected types, excluded sections), call defineNotation again with the updated definition and the reason. The definition is not fixed up front — discovery is the point").up()
250
+ .ele("Requirement").txt("Before calling completeDataset, call defineNotation one last time with final=true so the definition becomes the RESULT — it describes EXACTLY the dataset you produced; any arithmetic predicates get optional advisory evidence afterwards (never a pass/fail verdict — the dataset's validity is trusted)").up()
251
251
  .up()
252
252
  .up();
253
253
  if (hasProvidedSchema) {
@@ -292,12 +292,12 @@ function buildInstructions(context) {
292
292
  .up()
293
293
  .up()
294
294
  .ele("Step", { number: "5", name: "Complete and Validate" })
295
- .ele("Action").txt("Call proposeNotation with final=true (refined to match the produced rows), then call completeDataset to validate the dataset").up()
295
+ .ele("Action").txt("Call defineNotation with final=true (the definition as RESULT, matching the produced rows), then call completeDataset to validate the dataset").up()
296
296
  .ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns success:false with validation details if validation fails. If validation fails, inspect validation errors, rewrite output.jsonl, and call completeDataset again. Do not stop until completeDataset returns success:true.").up()
297
297
  .up()
298
298
  .up()
299
299
  .ele("Rules")
300
- .ele("Rule").txt("The formal notation (proposeNotation) is the planning artifact: propose it first, iterate it on every discovery, finalize it before completion. The LaTeX explains the dataset; the code merely produces it").up()
300
+ .ele("Rule").txt("The formal definition (defineNotation) and the materialization (schema + code + rows) are co-equal faces of the dataset: author the definition first as the PLAN, refine it on every discovery, finalize it as the RESULT before completion").up()
301
301
  .ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
302
302
  .ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the input language").up()
303
303
  .ele("Rule").txt("Original/input language applies to extracted values only, not to JSON object keys").up()
@@ -18,8 +18,8 @@ declare const materializeDatasetToolInputSchema: z.ZodObject<{
18
18
  }, z.core.$strip>>>;
19
19
  texts: z.ZodOptional<z.ZodArray<z.ZodObject<{
20
20
  name: z.ZodOptional<z.ZodString>;
21
- text: z.ZodString;
22
21
  description: z.ZodOptional<z.ZodString>;
22
+ text: z.ZodString;
23
23
  mimeType: z.ZodOptional<z.ZodString>;
24
24
  }, z.core.$strip>>>;
25
25
  datasets: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -1,29 +1,31 @@
1
1
  /**
2
- * Formal notation for datasets.
2
+ * Formal notation for datasets — the dataset stated intensionally.
3
3
  *
4
- * A dataset is the materialization of a set defined by FORMAL NOTATION:
5
- * LaTeX (set-builder, relational algebra, quantified predicates) that
6
- * EXPLAINS the data — what sets it draws from, what variables it binds,
7
- * what every member satisfies. The definition is a logical proposition,
8
- * possibly DERIVED (a syllogism), so it is NOT, in general, mechanically
9
- * verifiable: a predicate may be semantic ("x es una frase divertida"),
10
- * and the set is still perfectly well-formed. We TRUST that the formality
11
- * and the produced dataset are valid — formal notation is the planning
12
- * and explanatory artifact, not a proof obligation.
4
+ * A dataset has TWO CO-EQUAL FACES at the same level:
5
+ * - its formal DEFINITION (this notation: the proposition that defines the
6
+ * set, in LaTeX), and
7
+ * - its MATERIALIZATION (the rows + the code that produces them).
8
+ * The notation is not a comment about the data; it IS the dataset, written
9
+ * as a logical statement. The materialization is the same set written
10
+ * extensionally. Neither is subordinate to the other.
13
11
  *
14
- * It is the planning artifact: it starts as a proposal from the first look
15
- * at the resources and is ITERATED as the analysis discovers new sets,
16
- * variables and constraints. The notation is not definitive discovery is
17
- * the point.
12
+ * The SAME notation plays two roles across the lifecycle: it is the PLAN
13
+ * (status "plan": stated first, the materialization is built to realize it)
14
+ * and, once finalized, the RESULT (status "result": it describes exactly
15
+ * what was produced). It is iterated in between — every revision keeps the
16
+ * prior version in `history`, so the discovery trail stays visible.
17
+ *
18
+ * The definition is a logical proposition, possibly DERIVED (a syllogism),
19
+ * so it is NOT, in general, mechanically verifiable: a predicate may be
20
+ * semantic ("x es una frase divertida") and the set is still well-formed.
21
+ * We TRUST the formality and the produced dataset — there is no verdict.
18
22
  *
19
23
  * SOME predicates happen to be arithmetic (a row count, a field type, a
20
- * preserved total). For those, and only those, we can attach OPTIONAL
21
- * arithmetic evidence computed over the produced rows. That evidence is
22
- * advisory: a contradiction is a hint worth surfacing, never a verdict
23
- * that the dataset is invalid. Predicates with no arithmetic form are
24
- * "asserted" formal claims we trust. Nothing here blocks or changes a
25
- * dataset build; the notation simply rides alongside on
26
- * dataset_datasets.notation.
24
+ * preserved total). For those, and only those, we attach OPTIONAL evidence
25
+ * computed over the rows. It is advisory: a contradiction is a hint, never
26
+ * a claim that the dataset is invalid. Predicates with no arithmetic form
27
+ * are "asserted" trusted. Nothing here blocks or changes a build; the
28
+ * notation rides on dataset_datasets.notation.
27
29
  */
28
30
  export type DatasetNotationSymbolKind = "set" | "variable" | "function" | "constant" | "predicate";
29
31
  export type DatasetNotationSymbol = {
@@ -125,12 +127,14 @@ export type DatasetNotationRevision = {
125
127
  at: number;
126
128
  };
127
129
  /**
128
- * Lifecycle of the formal notation. There is intentionally NO
129
- * "verified"/"violated" verdict the dataset's validity is trusted, not
130
- * proven. Advisory arithmetic evidence (when any predicate has it) lives
131
- * in `checks`, separate from this status.
130
+ * The role the notation currently plays the two ends of its life:
131
+ * - "plan": stated before/while building; the materialization realizes it
132
+ * - "result": finalized; it describes the dataset that was produced
133
+ * There is intentionally NO "verified"/"violated" verdict — validity is
134
+ * trusted, not proven. Iteration is tracked by `version`/`history`; advisory
135
+ * arithmetic evidence lives in `checks`, separate from this role.
132
136
  */
133
- export type DatasetNotationStatus = "proposed" | "refined" | "final";
137
+ export type DatasetNotationStatus = "plan" | "result";
134
138
  export type DatasetNotation = {
135
139
  version: number;
136
140
  status: DatasetNotationStatus;
package/dist/notation.js CHANGED
@@ -1,29 +1,31 @@
1
1
  /**
2
- * Formal notation for datasets.
2
+ * Formal notation for datasets — the dataset stated intensionally.
3
3
  *
4
- * A dataset is the materialization of a set defined by FORMAL NOTATION:
5
- * LaTeX (set-builder, relational algebra, quantified predicates) that
6
- * EXPLAINS the data — what sets it draws from, what variables it binds,
7
- * what every member satisfies. The definition is a logical proposition,
8
- * possibly DERIVED (a syllogism), so it is NOT, in general, mechanically
9
- * verifiable: a predicate may be semantic ("x es una frase divertida"),
10
- * and the set is still perfectly well-formed. We TRUST that the formality
11
- * and the produced dataset are valid — formal notation is the planning
12
- * and explanatory artifact, not a proof obligation.
4
+ * A dataset has TWO CO-EQUAL FACES at the same level:
5
+ * - its formal DEFINITION (this notation: the proposition that defines the
6
+ * set, in LaTeX), and
7
+ * - its MATERIALIZATION (the rows + the code that produces them).
8
+ * The notation is not a comment about the data; it IS the dataset, written
9
+ * as a logical statement. The materialization is the same set written
10
+ * extensionally. Neither is subordinate to the other.
13
11
  *
14
- * It is the planning artifact: it starts as a proposal from the first look
15
- * at the resources and is ITERATED as the analysis discovers new sets,
16
- * variables and constraints. The notation is not definitive discovery is
17
- * the point.
12
+ * The SAME notation plays two roles across the lifecycle: it is the PLAN
13
+ * (status "plan": stated first, the materialization is built to realize it)
14
+ * and, once finalized, the RESULT (status "result": it describes exactly
15
+ * what was produced). It is iterated in between — every revision keeps the
16
+ * prior version in `history`, so the discovery trail stays visible.
17
+ *
18
+ * The definition is a logical proposition, possibly DERIVED (a syllogism),
19
+ * so it is NOT, in general, mechanically verifiable: a predicate may be
20
+ * semantic ("x es una frase divertida") and the set is still well-formed.
21
+ * We TRUST the formality and the produced dataset — there is no verdict.
18
22
  *
19
23
  * SOME predicates happen to be arithmetic (a row count, a field type, a
20
- * preserved total). For those, and only those, we can attach OPTIONAL
21
- * arithmetic evidence computed over the produced rows. That evidence is
22
- * advisory: a contradiction is a hint worth surfacing, never a verdict
23
- * that the dataset is invalid. Predicates with no arithmetic form are
24
- * "asserted" formal claims we trust. Nothing here blocks or changes a
25
- * dataset build; the notation simply rides alongside on
26
- * dataset_datasets.notation.
24
+ * preserved total). For those, and only those, we attach OPTIONAL evidence
25
+ * computed over the rows. It is advisory: a contradiction is a hint, never
26
+ * a claim that the dataset is invalid. Predicates with no arithmetic form
27
+ * are "asserted" trusted. Nothing here blocks or changes a build; the
28
+ * notation rides on dataset_datasets.notation.
27
29
  */
28
30
  /**
29
31
  * Iterate the notation: every revision bumps the version and appends to
@@ -40,7 +42,7 @@ export function reviseDatasetNotation(previous, input) {
40
42
  };
41
43
  return {
42
44
  version,
43
- status: input.final ? "final" : previous ? "refined" : "proposed",
45
+ status: input.final ? "result" : "plan",
44
46
  latex: input.latex,
45
47
  symbols: input.symbols ?? previous?.symbols ?? [],
46
48
  predicates: input.predicates ?? previous?.predicates ?? [],
@@ -102,9 +102,9 @@ function buildInstructions(context) {
102
102
  .ele("Action").txt(`Review ContextResources and any InputPreviews to understand current record structures, evidence, fields, shapes and edge cases. ${multipleInputsNote}`).up()
103
103
  .ele("Note").txt("ContextResources DescriptorJson may include inline text, metadata, previewRows, or other visible evidence. Treat that visible content as already available context. Do not use executeCommand only to reread it.").up()
104
104
  .up()
105
- .ele("Step", { number: "2", name: "Propose Formal Notation (PLAN FIRST)" })
106
- .ele("Action").txt("Call proposeNotation with the formal definition of the OUTPUT dataset as a set derived from the input sets: e.g. D = \\pi_{fields}(\\sigma_{condition}(A \\bowtie B)) or set-builder with quantifiers, in LaTeX. Declare the input sets, bound variables and the predicates every output row satisfies.").up()
107
- .ele("Note").txt("The notation is the planning artifact and comes BEFORE the transformation: it states which sets you draw from, how they combine (join, filter, project, aggregate) and which invariants the output keeps (e.g. totals preserved across the transformation). The definition is a formal proposition we trust — predicates may be semantic. Only for purely arithmetic invariants you MAY add a checkJson for optional advisory evidence. ITERATE the notation whenever inspection of the inputs reveals new sets, variables or corrections, and call proposeNotation with final=true just before completing — any arithmetic predicates then get advisory evidence (never a verdict).").up()
105
+ .ele("Step", { number: "2", name: "Define the Output Dataset (PLAN FIRST)" })
106
+ .ele("Action").txt("Call defineNotation with the formal definition of the OUTPUT dataset as a set derived from the input sets: e.g. D = \\pi_{fields}(\\sigma_{condition}(A \\bowtie B)) or set-builder with quantifiers, in LaTeX. Declare the input sets, bound variables and the predicates the output set satisfies.").up()
107
+ .ele("Note").txt("The definition and the materialization (the transform code + output rows) are TWO CO-EQUAL FACES of the dataset; author the definition FIRST as the PLAN: it states which sets you draw from, how they combine (join, filter, project, aggregate) and which invariants the output keeps (e.g. totals preserved). The definition is a formal proposition we trust — predicates may be semantic. Only for purely arithmetic invariants you MAY add a checkJson for optional advisory evidence. REFINE the definition whenever inspection of the inputs reveals new sets, variables or corrections, and call defineNotation with final=true just before completing — as the RESULT it describes the produced output; any arithmetic predicates then get advisory evidence (never a verdict).").up()
108
108
  .up()
109
109
  .ele("Step", { number: "3", name: "Plan Mapping" })
110
110
  .ele("Action").txt("Plan a deterministic mapping from input data fields to the output schema fields (normalize names, types, and formats).").up()
@@ -125,12 +125,12 @@ function buildInstructions(context) {
125
125
  .ele("Requirement").txt("Do not install packages, download dependencies, or access the network from executeCommand. Use only the available runtime and standard library unless a dependency is already present.").up()
126
126
  .up()
127
127
  .ele("Step", { number: "5", name: "Validate and Complete" })
128
- .ele("Action").txt("Call proposeNotation with final=true (refined to match the produced output), then: when using completeObject or replaceRows, no separate completeDataset call is needed. When using executeCommand, call completeDataset to validate against the output schema and mark as completed.").up()
128
+ .ele("Action").txt("Call defineNotation with final=true (the definition as RESULT, matching the produced output), then: when using completeObject or replaceRows, no separate completeDataset call is needed. When using executeCommand, call completeDataset to validate against the output schema and mark as completed.").up()
129
129
  .ele("Behavior").txt("If any completion tool returns success:false, inspect validation details, repair the output, and call the appropriate completion tool again. Do not stop until a completion tool returns success:true.").up()
130
130
  .up()
131
131
  .up()
132
132
  .ele("Rules")
133
- .ele("Rule").txt("The formal notation (proposeNotation) is the planning artifact: propose it before transforming, iterate it on every discovery, finalize it before completing. The LaTeX explains the dataset; the code merely produces it.").up()
133
+ .ele("Rule").txt("The formal definition (defineNotation) and the materialization (transform code + output rows) are co-equal faces of the dataset: author the definition first as the PLAN, refine it on every discovery, finalize it as the RESULT before completing.").up()
134
134
  .ele("Rule").txt("Output must strictly match the output schema for each record in data.").up()
135
135
  .ele("Rule").txt("OutputSchema property names are authoritative. Field names are a technical contract; only field values may preserve input language.").up()
136
136
  .ele("Rule").txt("Use the cheapest correct tool. completeObject and replaceRows are low-cost completion tools. executeCommand is a high-cost computation tool and requires an explicit commandDescription.").up()
@@ -4,7 +4,7 @@ import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFa
4
4
  import { datasetUpdateSchemaStep } from "../dataset/steps.js";
5
5
  import { getDatasetOutputPath } from "../datasetFiles.js";
6
6
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
7
- import { createProposeNotationTool } from "../proposeNotation.tool.js";
7
+ import { createDefineNotationTool } from "../defineNotation.tool.js";
8
8
  import { createCompleteObjectTool, createReplaceRowsTool, } from "../writeDatasetRows.tool.js";
9
9
  import { buildTransformDatasetPromptStep, } from "./transform-dataset.steps.js";
10
10
  import { createDatasetId } from "../id.js";
@@ -137,7 +137,7 @@ function createTransformDatasetContextDefinition(params) {
137
137
  sandboxId,
138
138
  runtime,
139
139
  }),
140
- proposeNotation: createProposeNotationTool({
140
+ defineNotation: createDefineNotationTool({
141
141
  datasetId,
142
142
  runtime,
143
143
  }),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.97-beta.development.0",
3
+ "version": "1.22.98-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.97-beta.development.0",
69
- "@ekairos/events": "^1.22.97-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.97-beta.development.0",
68
+ "@ekairos/domain": "^1.22.98-beta.development.0",
69
+ "@ekairos/events": "^1.22.98-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.98-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",
@@ -1,42 +0,0 @@
1
- interface ProposeNotationToolParams {
2
- datasetId: string;
3
- runtime: any;
4
- }
5
- /**
6
- * proposeNotation — declare or ITERATE the formal notation of the dataset.
7
- *
8
- * The notation is the planning artifact: call it FIRST with the initial
9
- * set definition derived from the resources, and call it AGAIN whenever
10
- * the analysis discovers new sets, variables, constraints or corrections.
11
- * Every call appends a revision (the discovery trail is preserved). Mark
12
- * the last call with final=true so the notation describes the produced
13
- * dataset. Predicates may be formal/semantic (we trust them); the few that
14
- * are arithmetic get optional advisory evidence after completion.
15
- */
16
- export declare function createProposeNotationTool({ datasetId, runtime }: ProposeNotationToolParams): import("ai").Tool<{
17
- latex: string;
18
- symbols: {
19
- name: string;
20
- kind: "function" | "set" | "variable" | "constant" | "predicate";
21
- description: string;
22
- latex?: string | undefined;
23
- }[];
24
- predicates: {
25
- id: string;
26
- description: string;
27
- latex: string;
28
- checkJson?: string | undefined;
29
- }[];
30
- reason: string;
31
- final?: boolean | undefined;
32
- }, {
33
- success: boolean;
34
- error: string;
35
- } | {
36
- warning?: string | undefined;
37
- success: boolean;
38
- version: number;
39
- status: import("./notation.js").DatasetNotationStatus;
40
- error?: undefined;
41
- }>;
42
- export {};