npm - @ekairos/dataset - Versions diffs - 1.22.97-beta.development.0 → 1.22.98-beta.development.0 - Mend

@ekairos/dataset 1.22.97-beta.development.0 → 1.22.98-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/builder/persistence.js +4 -0
package/dist/builder/types.d.ts +4 -0
package/dist/defineNotation.tool.d.ts +49 -0
package/dist/{proposeNotation.tool.js → defineNotation.tool.js} +33 -23
package/dist/file/file-dataset.agent.js +2 -2
package/dist/file/prompts.js +7 -7
package/dist/materializeDataset.tool.d.ts +1 -1
package/dist/notation.d.ts +30 -26
package/dist/notation.js +24 -22
package/dist/transform/prompts.js +5 -5
package/dist/transform/transform-dataset.agent.js +2 -2
package/package.json +4 -4
package/dist/proposeNotation.tool.d.ts +0 -42

package/dist/builder/persistence.js CHANGED Viewed

@@ -147,10 +147,12 @@ export async function finalizeBuildResult(runtime, datasetId, withFirst) {
             });
         },
     };
+    const notation = (datasetResult.data?.notation ?? null);
     if (!withFirst) {
         return {
             datasetId,
             dataset: datasetResult.data,
+            notation,
             previewRows: previewResult.rows,
             reader,
         };
@@ -159,6 +161,7 @@ export async function finalizeBuildResult(runtime, datasetId, withFirst) {
     return {
         datasetId,
         dataset: datasetResult.data,
+        notation,
         previewRows: previewResult.rows,
         reader,
         firstRow: firstResult.row,
@@ -181,6 +184,7 @@ export function createDatasetBuildResult(runtime, params) {
     return {
         datasetId: params.datasetId,
         dataset: params.dataset,
+        notation: (params.dataset?.notation ?? null),
         previewRows: params.previewRows,
         reader,
         ...(params.firstRow !== undefined ? { firstRow: params.firstRow } : {}),

package/dist/builder/types.d.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
 import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
 import type { ContextIdentifier, ContextReactor, StoredContextResource } from "@ekairos/events";
 import { datasetDomain } from "../schema.js";
+import type { DatasetNotation } from "../notation.js";
 export type DatasetQueryResourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
     query: InstaQLParams<DomainInstantSchema<D>>;
     title?: string;
@@ -72,6 +73,9 @@ export type DatasetReader = {
 export type DatasetBuildResult = {
     datasetId: string;
     dataset: any;
+    /** the formal definition (intensional face), co-equal with the rows */
+    notation: DatasetNotation | null;
+    /** preview of the materialization (extensional face) */
     previewRows: any[];
     reader: DatasetReader;
     object?: any | null;

package/dist/defineNotation.tool.d.ts ADDED Viewed

@@ -0,0 +1,49 @@
+interface DefineNotationToolParams {
+    datasetId: string;
+    runtime: any;
+}
+/**
+ * defineNotation — author or REFINE the formal DEFINITION of the dataset.
+ *
+ * A dataset has two co-equal faces: its formal definition (the notation —
+ * the proposition that defines the set, in LaTeX) and its materialization
+ * (the rows + the code that produces them). They sit at the SAME level: the
+ * definition is not a side note about the data, it IS the dataset stated
+ * intensionally. The same notation is the PLAN (you state it first and the
+ * materialization realizes it) and, finalized, the RESULT (it describes what
+ * you produced).
+ *
+ * Call it FIRST with the initial definition derived from the resources, and
+ * AGAIN whenever the analysis discovers new sets, variables, constraints or
+ * corrections — every call keeps the prior version in history. Mark the last
+ * call with final=true so the definition describes the produced dataset.
+ * Predicates may be formal/semantic (trusted); the few that are arithmetic
+ * MAY carry optional advisory evidence.
+ */
+export declare function createDefineNotationTool({ datasetId, runtime }: DefineNotationToolParams): import("ai").Tool<{
+    latex: string;
+    symbols: {
+        name: string;
+        kind: "function" | "set" | "variable" | "constant" | "predicate";
+        description: string;
+        latex?: string | undefined;
+    }[];
+    predicates: {
+        id: string;
+        description: string;
+        latex: string;
+        checkJson?: string | undefined;
+    }[];
+    reason: string;
+    final?: boolean | undefined;
+}, {
+    success: boolean;
+    error: string;
+} | {
+    warning?: string | undefined;
+    success: boolean;
+    version: number;
+    status: import("./notation.js").DatasetNotationStatus;
+    error?: undefined;
+}>;
+export {};

package/dist/{proposeNotation.tool.js → defineNotation.tool.js} RENAMED Viewed

@@ -43,44 +43,54 @@ async function getDatasetService(runtime) {
     return new DatasetService(scoped.db);
 }
 /**
- * proposeNotation — declare or ITERATE the formal notation of the dataset.
+ * defineNotation — author or REFINE the formal DEFINITION of the dataset.
  *
- * The notation is the planning artifact: call it FIRST with the initial
- * set definition derived from the resources, and call it AGAIN whenever
- * the analysis discovers new sets, variables, constraints or corrections.
- * Every call appends a revision (the discovery trail is preserved). Mark
- * the last call with final=true so the notation describes the produced
- * dataset. Predicates may be formal/semantic (we trust them); the few that
- * are arithmetic get optional advisory evidence after completion.
+ * A dataset has two co-equal faces: its formal definition (the notation —
+ * the proposition that defines the set, in LaTeX) and its materialization
+ * (the rows + the code that produces them). They sit at the SAME level: the
+ * definition is not a side note about the data, it IS the dataset stated
+ * intensionally. The same notation is the PLAN (you state it first and the
+ * materialization realizes it) and, finalized, the RESULT (it describes what
+ * you produced).
+ *
+ * Call it FIRST with the initial definition derived from the resources, and
+ * AGAIN whenever the analysis discovers new sets, variables, constraints or
+ * corrections — every call keeps the prior version in history. Mark the last
+ * call with final=true so the definition describes the produced dataset.
+ * Predicates may be formal/semantic (trusted); the few that are arithmetic
+ * MAY carry optional advisory evidence.
  */
-export function createProposeNotationTool({ datasetId, runtime }) {
+export function createDefineNotationTool({ datasetId, runtime }) {
     return tool({
         description: [
-            "Declare or refine the FORMAL NOTATION of the dataset: the dataset as a",
-            "set defined in LaTeX (set-builder, relational algebra, quantified or",
-            "even semantic predicates) plus the symbols it binds. The definition is",
-            "a logical proposition, possibly derived — it does not need to be",
-            "mechanically provable; we trust the formality. This is your PLANNING",
-            "artifact — propose it before writing any code, and revise it whenever",
-            "the analysis discovers new sets, variables or constraints. For the few",
-            "predicates that happen to be arithmetic you MAY attach a checkJson for",
-            "optional advisory evidence (non-blocking, never a verdict).",
+            "Author or refine the formal DEFINITION of the dataset: the dataset as a",
+            "set in LaTeX (set-builder, relational algebra, quantified or even",
+            "semantic predicates) plus the symbols it binds. This definition and the",
+            "materialization (rows + code) are TWO CO-EQUAL FACES of the dataset —",
+            "the definition is the dataset stated intensionally, not a comment on it.",
+            "It is your PLAN (state it before writing any code; the materialization",
+            "realizes it) and, once final, the RESULT (it describes what you",
+            "produced). The definition is a logical proposition, possibly derived —",
+            "it need not be mechanically provable; we trust the formality. State it",
+            "first, refine it on every discovery, and set final=true on the last",
+            "call. For the few predicates that are arithmetic you MAY attach a",
+            "checkJson for optional advisory evidence (non-blocking, never a verdict).",
         ].join(" "),
         inputSchema: z.object({
             latex: z
                 .string()
                 .describe("Main definition of the dataset as a set, in LaTeX. Example: 'D = \\\\{(w,r,t) \\\\mid t = \\\\sum_{o \\\\in Orders} o.amount,\\\\; o.status = paid\\\\}'"),
-            symbols: z.array(symbolSchema).describe("Symbols bound by the notation"),
+            symbols: z.array(symbolSchema).describe("Symbols bound by the definition"),
             predicates: z
                 .array(predicateSchema)
-                .describe("Claims about the dataset; include machine-checkable forms when possible"),
+                .describe("Claims the set satisfies; attach a checkJson only when arithmetic"),
             reason: z
                 .string()
-                .describe("What discovery triggered this revision (or 'initial proposal')"),
+                .describe("What this revision states or what discovery triggered it (or 'initial definition')"),
             final: z
                 .boolean()
                 .optional()
-                .describe("true when this notation describes the dataset you are about to complete"),
+                .describe("true when this definition describes the dataset you are about to complete (the RESULT)"),
         }),
         execute: async ({ latex, symbols, predicates, reason, final }) => {
             try {
@@ -121,7 +131,7 @@ export function createProposeNotationTool({ datasetId, runtime }) {
                 if (!update.ok) {
                     return { success: false, error: update.error };
                 }
-                console.log(`[Dataset ${datasetId}] notation v${notation.version} (${notation.status}): ${reason}`);
+                console.log(`[Dataset ${datasetId}] definition v${notation.version} (${notation.status}): ${reason}`);
                 return {
                     success: true,
                     version: notation.version,

package/dist/file/file-dataset.agent.js CHANGED Viewed

@@ -3,7 +3,7 @@ import { createClearDatasetTool } from "../clearDataset.tool.js";
 import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
 import { datasetGetByIdStep } from "../dataset/steps.js";
 import { createExecuteCommandTool } from "../executeCommand.tool.js";
-import { createProposeNotationTool } from "../proposeNotation.tool.js";
+import { createDefineNotationTool } from "../defineNotation.tool.js";
 import { createGenerateSchemaTool } from "./generateSchema.tool.js";
 import { buildFileDatasetPromptStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
 import { createDatasetId } from "../id.js";
@@ -152,7 +152,7 @@ function createFileParseContextDefinition(params) {
                 sandboxId,
                 runtime,
             }),
-            proposeNotation: createProposeNotationTool({
+            defineNotation: createDefineNotationTool({
                 datasetId,
                 runtime,
             }),

package/dist/file/prompts.js CHANGED Viewed

@@ -239,15 +239,15 @@ function buildInstructions(context) {
         .ele("Note").txt("FilePreview contains: TotalRows (total data rows), Metadata (file properties with JSON output), Head (first N raw file lines), Tail (last N lines if present), Mid (middle sample for large files). Each section shows Description, Script (full Python code), Command, Stdout (raw content), Stderr. This allows you to understand the exact file format.").up()
         .up();
     xml = xml
-        .ele("Step", { number: "2", name: "Propose Formal Notation (PLAN FIRST)" })
-        .ele("Action").txt("Call proposeNotation with the INITIAL formal definition of the dataset as a set, derived from the file preview: D = { r | r ∈ File ∧ <constraints> } in LaTeX, the symbols it binds (sets, variables, functions) and the predicates every row will satisfy").up()
+        .ele("Step", { number: "2", name: "Define the Dataset (PLAN FIRST)" })
+        .ele("Action").txt("Call defineNotation with the INITIAL formal definition of the dataset as a set, derived from the file preview: D = { r | r ∈ File ∧ <constraints> } in LaTeX, the symbols it binds (sets, variables, functions) and the predicates the set satisfies").up()
         .ele("Requirements")
-        .ele("Requirement").txt("The notation is your PLANNING artifact: it comes BEFORE the schema and BEFORE any parsing code. The LaTeX that explains the dataset matters more than the code that produces it").up()
+        .ele("Requirement").txt("The definition and the materialization (schema + parsing code + rows) are TWO CO-EQUAL FACES of the dataset. The definition is the dataset stated intensionally — author it FIRST; it is your PLAN and the code is built to realize it").up()
         .ele("Requirement").txt("Use set-builder notation, quantifiers and arithmetic in LaTeX (e.g. D = \\{(c, q, p) \\mid q \\in \\mathbb{Z}^{+},\\; p \\in \\mathbb{R}_{\\geq 0}\\})").up()
         .ele("Requirement").txt("Declare every discovered set and variable as a symbol with a one-line meaning").up()
         .ele("Requirement").txt("Predicates are formal claims we trust; they may be semantic (e.g. 'x es una frase relevante'). Only for the few that are purely arithmetic (row counts, field types, ranges, uniqueness, aggregates) you MAY add a checkJson for optional advisory evidence — leave every other claim without checkJson").up()
-        .ele("Requirement").txt("ITERATE: every time the analysis discovers a new set, variable, constraint or correction (new columns, unexpected types, excluded sections), call proposeNotation again with the refined notation and the reason. The notation is not definitive — discovery is the point").up()
-        .ele("Requirement").txt("Before calling completeDataset, call proposeNotation one last time with final=true so the notation describes EXACTLY the dataset you produced; any arithmetic predicates get optional advisory evidence afterwards (never a pass/fail verdict — the dataset's validity is trusted)").up()
+        .ele("Requirement").txt("REFINE: every time the analysis discovers a new set, variable, constraint or correction (new columns, unexpected types, excluded sections), call defineNotation again with the updated definition and the reason. The definition is not fixed up front — discovery is the point").up()
+        .ele("Requirement").txt("Before calling completeDataset, call defineNotation one last time with final=true so the definition becomes the RESULT — it describes EXACTLY the dataset you produced; any arithmetic predicates get optional advisory evidence afterwards (never a pass/fail verdict — the dataset's validity is trusted)").up()
         .up()
         .up();
     if (hasProvidedSchema) {
@@ -292,12 +292,12 @@ function buildInstructions(context) {
         .up()
         .up()
         .ele("Step", { number: "5", name: "Complete and Validate" })
-        .ele("Action").txt("Call proposeNotation with final=true (refined to match the produced rows), then call completeDataset to validate the dataset").up()
+        .ele("Action").txt("Call defineNotation with final=true (the definition as RESULT, matching the produced rows), then call completeDataset to validate the dataset").up()
         .ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns success:false with validation details if validation fails. If validation fails, inspect validation errors, rewrite output.jsonl, and call completeDataset again. Do not stop until completeDataset returns success:true.").up()
         .up()
         .up()
         .ele("Rules")
-        .ele("Rule").txt("The formal notation (proposeNotation) is the planning artifact: propose it first, iterate it on every discovery, finalize it before completion. The LaTeX explains the dataset; the code merely produces it").up()
+        .ele("Rule").txt("The formal definition (defineNotation) and the materialization (schema + code + rows) are co-equal faces of the dataset: author the definition first as the PLAN, refine it on every discovery, finalize it as the RESULT before completion").up()
         .ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
         .ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the input language").up()
         .ele("Rule").txt("Original/input language applies to extracted values only, not to JSON object keys").up()

package/dist/materializeDataset.tool.d.ts CHANGED Viewed

@@ -18,8 +18,8 @@ declare const materializeDatasetToolInputSchema: z.ZodObject<{
     }, z.core.$strip>>>;
     texts: z.ZodOptional<z.ZodArray<z.ZodObject<{
         name: z.ZodOptional<z.ZodString>;
-        text: z.ZodString;
         description: z.ZodOptional<z.ZodString>;
+        text: z.ZodString;
         mimeType: z.ZodOptional<z.ZodString>;
     }, z.core.$strip>>>;
     datasets: z.ZodOptional<z.ZodArray<z.ZodObject<{

package/dist/notation.d.ts CHANGED Viewed

@@ -1,29 +1,31 @@
 /**
- * Formal notation for datasets.
+ * Formal notation for datasets — the dataset stated intensionally.
  *
- * A dataset is the materialization of a set defined by FORMAL NOTATION:
- * LaTeX (set-builder, relational algebra, quantified predicates) that
- * EXPLAINS the data — what sets it draws from, what variables it binds,
- * what every member satisfies. The definition is a logical proposition,
- * possibly DERIVED (a syllogism), so it is NOT, in general, mechanically
- * verifiable: a predicate may be semantic ("x es una frase divertida"),
- * and the set is still perfectly well-formed. We TRUST that the formality
- * and the produced dataset are valid — formal notation is the planning
- * and explanatory artifact, not a proof obligation.
+ * A dataset has TWO CO-EQUAL FACES at the same level:
+ * - its formal DEFINITION (this notation: the proposition that defines the
+ *   set, in LaTeX), and
+ * - its MATERIALIZATION (the rows + the code that produces them).
+ * The notation is not a comment about the data; it IS the dataset, written
+ * as a logical statement. The materialization is the same set written
+ * extensionally. Neither is subordinate to the other.
  *
- * It is the planning artifact: it starts as a proposal from the first look
- * at the resources and is ITERATED as the analysis discovers new sets,
- * variables and constraints. The notation is not definitive — discovery is
- * the point.
+ * The SAME notation plays two roles across the lifecycle: it is the PLAN
+ * (status "plan": stated first, the materialization is built to realize it)
+ * and, once finalized, the RESULT (status "result": it describes exactly
+ * what was produced). It is iterated in between — every revision keeps the
+ * prior version in `history`, so the discovery trail stays visible.
+ *
+ * The definition is a logical proposition, possibly DERIVED (a syllogism),
+ * so it is NOT, in general, mechanically verifiable: a predicate may be
+ * semantic ("x es una frase divertida") and the set is still well-formed.
+ * We TRUST the formality and the produced dataset — there is no verdict.
  *
  * SOME predicates happen to be arithmetic (a row count, a field type, a
- * preserved total). For those, and only those, we can attach OPTIONAL
- * arithmetic evidence computed over the produced rows. That evidence is
- * advisory: a contradiction is a hint worth surfacing, never a verdict
- * that the dataset is invalid. Predicates with no arithmetic form are
- * "asserted" — formal claims we trust. Nothing here blocks or changes a
- * dataset build; the notation simply rides alongside on
- * dataset_datasets.notation.
+ * preserved total). For those, and only those, we attach OPTIONAL evidence
+ * computed over the rows. It is advisory: a contradiction is a hint, never
+ * a claim that the dataset is invalid. Predicates with no arithmetic form
+ * are "asserted" — trusted. Nothing here blocks or changes a build; the
+ * notation rides on dataset_datasets.notation.
  */
 export type DatasetNotationSymbolKind = "set" | "variable" | "function" | "constant" | "predicate";
 export type DatasetNotationSymbol = {
@@ -125,12 +127,14 @@ export type DatasetNotationRevision = {
     at: number;
 };
 /**
- * Lifecycle of the formal notation. There is intentionally NO
- * "verified"/"violated" verdict — the dataset's validity is trusted, not
- * proven. Advisory arithmetic evidence (when any predicate has it) lives
- * in `checks`, separate from this status.
+ * The role the notation currently plays — the two ends of its life:
+ * - "plan":   stated before/while building; the materialization realizes it
+ * - "result": finalized; it describes the dataset that was produced
+ * There is intentionally NO "verified"/"violated" verdict — validity is
+ * trusted, not proven. Iteration is tracked by `version`/`history`; advisory
+ * arithmetic evidence lives in `checks`, separate from this role.
  */
-export type DatasetNotationStatus = "proposed" | "refined" | "final";
+export type DatasetNotationStatus = "plan" | "result";
 export type DatasetNotation = {
     version: number;
     status: DatasetNotationStatus;

package/dist/notation.js CHANGED Viewed

@@ -1,29 +1,31 @@
 /**
- * Formal notation for datasets.
+ * Formal notation for datasets — the dataset stated intensionally.
  *
- * A dataset is the materialization of a set defined by FORMAL NOTATION:
- * LaTeX (set-builder, relational algebra, quantified predicates) that
- * EXPLAINS the data — what sets it draws from, what variables it binds,
- * what every member satisfies. The definition is a logical proposition,
- * possibly DERIVED (a syllogism), so it is NOT, in general, mechanically
- * verifiable: a predicate may be semantic ("x es una frase divertida"),
- * and the set is still perfectly well-formed. We TRUST that the formality
- * and the produced dataset are valid — formal notation is the planning
- * and explanatory artifact, not a proof obligation.
+ * A dataset has TWO CO-EQUAL FACES at the same level:
+ * - its formal DEFINITION (this notation: the proposition that defines the
+ *   set, in LaTeX), and
+ * - its MATERIALIZATION (the rows + the code that produces them).
+ * The notation is not a comment about the data; it IS the dataset, written
+ * as a logical statement. The materialization is the same set written
+ * extensionally. Neither is subordinate to the other.
  *
- * It is the planning artifact: it starts as a proposal from the first look
- * at the resources and is ITERATED as the analysis discovers new sets,
- * variables and constraints. The notation is not definitive — discovery is
- * the point.
+ * The SAME notation plays two roles across the lifecycle: it is the PLAN
+ * (status "plan": stated first, the materialization is built to realize it)
+ * and, once finalized, the RESULT (status "result": it describes exactly
+ * what was produced). It is iterated in between — every revision keeps the
+ * prior version in `history`, so the discovery trail stays visible.
+ *
+ * The definition is a logical proposition, possibly DERIVED (a syllogism),
+ * so it is NOT, in general, mechanically verifiable: a predicate may be
+ * semantic ("x es una frase divertida") and the set is still well-formed.
+ * We TRUST the formality and the produced dataset — there is no verdict.
  *
  * SOME predicates happen to be arithmetic (a row count, a field type, a
- * preserved total). For those, and only those, we can attach OPTIONAL
- * arithmetic evidence computed over the produced rows. That evidence is
- * advisory: a contradiction is a hint worth surfacing, never a verdict
- * that the dataset is invalid. Predicates with no arithmetic form are
- * "asserted" — formal claims we trust. Nothing here blocks or changes a
- * dataset build; the notation simply rides alongside on
- * dataset_datasets.notation.
+ * preserved total). For those, and only those, we attach OPTIONAL evidence
+ * computed over the rows. It is advisory: a contradiction is a hint, never
+ * a claim that the dataset is invalid. Predicates with no arithmetic form
+ * are "asserted" — trusted. Nothing here blocks or changes a build; the
+ * notation rides on dataset_datasets.notation.
  */
 /**
  * Iterate the notation: every revision bumps the version and appends to
@@ -40,7 +42,7 @@ export function reviseDatasetNotation(previous, input) {
     };
     return {
         version,
-        status: input.final ? "final" : previous ? "refined" : "proposed",
+        status: input.final ? "result" : "plan",
         latex: input.latex,
         symbols: input.symbols ?? previous?.symbols ?? [],
         predicates: input.predicates ?? previous?.predicates ?? [],

package/dist/transform/prompts.js CHANGED Viewed

@@ -102,9 +102,9 @@ function buildInstructions(context) {
         .ele("Action").txt(`Review ContextResources and any InputPreviews to understand current record structures, evidence, fields, shapes and edge cases. ${multipleInputsNote}`).up()
         .ele("Note").txt("ContextResources DescriptorJson may include inline text, metadata, previewRows, or other visible evidence. Treat that visible content as already available context. Do not use executeCommand only to reread it.").up()
         .up()
-        .ele("Step", { number: "2", name: "Propose Formal Notation (PLAN FIRST)" })
-        .ele("Action").txt("Call proposeNotation with the formal definition of the OUTPUT dataset as a set derived from the input sets: e.g. D = \\pi_{fields}(\\sigma_{condition}(A \\bowtie B)) or set-builder with quantifiers, in LaTeX. Declare the input sets, bound variables and the predicates every output row satisfies.").up()
-        .ele("Note").txt("The notation is the planning artifact and comes BEFORE the transformation: it states which sets you draw from, how they combine (join, filter, project, aggregate) and which invariants the output keeps (e.g. totals preserved across the transformation). The definition is a formal proposition we trust — predicates may be semantic. Only for purely arithmetic invariants you MAY add a checkJson for optional advisory evidence. ITERATE the notation whenever inspection of the inputs reveals new sets, variables or corrections, and call proposeNotation with final=true just before completing — any arithmetic predicates then get advisory evidence (never a verdict).").up()
+        .ele("Step", { number: "2", name: "Define the Output Dataset (PLAN FIRST)" })
+        .ele("Action").txt("Call defineNotation with the formal definition of the OUTPUT dataset as a set derived from the input sets: e.g. D = \\pi_{fields}(\\sigma_{condition}(A \\bowtie B)) or set-builder with quantifiers, in LaTeX. Declare the input sets, bound variables and the predicates the output set satisfies.").up()
+        .ele("Note").txt("The definition and the materialization (the transform code + output rows) are TWO CO-EQUAL FACES of the dataset; author the definition FIRST as the PLAN: it states which sets you draw from, how they combine (join, filter, project, aggregate) and which invariants the output keeps (e.g. totals preserved). The definition is a formal proposition we trust — predicates may be semantic. Only for purely arithmetic invariants you MAY add a checkJson for optional advisory evidence. REFINE the definition whenever inspection of the inputs reveals new sets, variables or corrections, and call defineNotation with final=true just before completing — as the RESULT it describes the produced output; any arithmetic predicates then get advisory evidence (never a verdict).").up()
         .up()
         .ele("Step", { number: "3", name: "Plan Mapping" })
         .ele("Action").txt("Plan a deterministic mapping from input data fields to the output schema fields (normalize names, types, and formats).").up()
@@ -125,12 +125,12 @@ function buildInstructions(context) {
         .ele("Requirement").txt("Do not install packages, download dependencies, or access the network from executeCommand. Use only the available runtime and standard library unless a dependency is already present.").up()
         .up()
         .ele("Step", { number: "5", name: "Validate and Complete" })
-        .ele("Action").txt("Call proposeNotation with final=true (refined to match the produced output), then: when using completeObject or replaceRows, no separate completeDataset call is needed. When using executeCommand, call completeDataset to validate against the output schema and mark as completed.").up()
+        .ele("Action").txt("Call defineNotation with final=true (the definition as RESULT, matching the produced output), then: when using completeObject or replaceRows, no separate completeDataset call is needed. When using executeCommand, call completeDataset to validate against the output schema and mark as completed.").up()
         .ele("Behavior").txt("If any completion tool returns success:false, inspect validation details, repair the output, and call the appropriate completion tool again. Do not stop until a completion tool returns success:true.").up()
         .up()
         .up()
         .ele("Rules")
-        .ele("Rule").txt("The formal notation (proposeNotation) is the planning artifact: propose it before transforming, iterate it on every discovery, finalize it before completing. The LaTeX explains the dataset; the code merely produces it.").up()
+        .ele("Rule").txt("The formal definition (defineNotation) and the materialization (transform code + output rows) are co-equal faces of the dataset: author the definition first as the PLAN, refine it on every discovery, finalize it as the RESULT before completing.").up()
         .ele("Rule").txt("Output must strictly match the output schema for each record in data.").up()
         .ele("Rule").txt("OutputSchema property names are authoritative. Field names are a technical contract; only field values may preserve input language.").up()
         .ele("Rule").txt("Use the cheapest correct tool. completeObject and replaceRows are low-cost completion tools. executeCommand is a high-cost computation tool and requires an explicit commandDescription.").up()

package/dist/transform/transform-dataset.agent.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFa
 import { datasetUpdateSchemaStep } from "../dataset/steps.js";
 import { getDatasetOutputPath } from "../datasetFiles.js";
 import { createExecuteCommandTool } from "../executeCommand.tool.js";
-import { createProposeNotationTool } from "../proposeNotation.tool.js";
+import { createDefineNotationTool } from "../defineNotation.tool.js";
 import { createCompleteObjectTool, createReplaceRowsTool, } from "../writeDatasetRows.tool.js";
 import { buildTransformDatasetPromptStep, } from "./transform-dataset.steps.js";
 import { createDatasetId } from "../id.js";
@@ -137,7 +137,7 @@ function createTransformDatasetContextDefinition(params) {
                 sandboxId,
                 runtime,
             }),
-            proposeNotation: createProposeNotationTool({
+            defineNotation: createDefineNotationTool({
                 datasetId,
                 runtime,
             }),

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ekairos/dataset",
-  "version": "1.22.97-beta.development.0",
+  "version": "1.22.98-beta.development.0",
   "description": "Pulzar Dataset Tools",
   "type": "module",
   "main": "dist/index.js",
@@ -65,9 +65,9 @@
     "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
   },
   "dependencies": {
-    "@ekairos/domain": "^1.22.97-beta.development.0",
-    "@ekairos/events": "^1.22.97-beta.development.0",
-    "@ekairos/sandbox": "^1.22.97-beta.development.0",
+    "@ekairos/domain": "^1.22.98-beta.development.0",
+    "@ekairos/events": "^1.22.98-beta.development.0",
+    "@ekairos/sandbox": "^1.22.98-beta.development.0",
     "@instantdb/admin": "0.22.158",
     "@instantdb/core": "0.22.142",
     "ai": "^5.0.44",

package/dist/proposeNotation.tool.d.ts DELETED Viewed

@@ -1,42 +0,0 @@
-interface ProposeNotationToolParams {
-    datasetId: string;
-    runtime: any;
-}
-/**
- * proposeNotation — declare or ITERATE the formal notation of the dataset.
- *
- * The notation is the planning artifact: call it FIRST with the initial
- * set definition derived from the resources, and call it AGAIN whenever
- * the analysis discovers new sets, variables, constraints or corrections.
- * Every call appends a revision (the discovery trail is preserved). Mark
- * the last call with final=true so the notation describes the produced
- * dataset. Predicates may be formal/semantic (we trust them); the few that
- * are arithmetic get optional advisory evidence after completion.
- */
-export declare function createProposeNotationTool({ datasetId, runtime }: ProposeNotationToolParams): import("ai").Tool<{
-    latex: string;
-    symbols: {
-        name: string;
-        kind: "function" | "set" | "variable" | "constant" | "predicate";
-        description: string;
-        latex?: string | undefined;
-    }[];
-    predicates: {
-        id: string;
-        description: string;
-        latex: string;
-        checkJson?: string | undefined;
-    }[];
-    reason: string;
-    final?: boolean | undefined;
-}, {
-    success: boolean;
-    error: string;
-} | {
-    warning?: string | undefined;
-    success: boolean;
-    version: number;
-    status: import("./notation.js").DatasetNotationStatus;
-    error?: undefined;
-}>;
-export {};