@ekairos/dataset 1.22.95-beta.development.0 → 1.22.96-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import { DatasetService } from "../service.js";
2
2
  import { datasetDomain } from "../schema.js";
3
- import { inferQueryNotation, verifyDatasetNotation, } from "../notation.js";
3
+ import { annotateNotationEvidence, inferQueryNotation, } from "../notation.js";
4
4
  import { datasetGetByIdStep, datasetPreviewRowsStep, datasetReadOneStep, datasetReadRowsStep, } from "../dataset/steps.js";
5
5
  import { inferDatasetSchema, validateRows } from "./schemaInference.js";
6
6
  import { rowsToJsonl } from "./rows.js";
@@ -79,9 +79,9 @@ export async function materializeRowsToDataset(runtime, params) {
79
79
  throw new Error(statusResult.error);
80
80
  }
81
81
  // Formal notation, informative only (never blocks the build): a notation
82
- // proposed during the build (agent iterations) is verified against the
83
- // materialized rows; query-backed builds with no proposed notation get
84
- // the deterministic one derived from query + schema + rows.
82
+ // proposed during the build (agent iterations) gets advisory evidence
83
+ // against the materialized rows; query-backed builds with no proposed
84
+ // notation get the deterministic one derived from query + schema + rows.
85
85
  try {
86
86
  const existing = await service.getDatasetById(params.datasetId);
87
87
  const previous = (existing.ok ? existing.data?.notation : null);
@@ -100,7 +100,7 @@ export async function materializeRowsToDataset(runtime, params) {
100
100
  if (candidate) {
101
101
  await service.updateDatasetNotation({
102
102
  datasetId: params.datasetId,
103
- notation: verifyDatasetNotation(candidate, params.rows),
103
+ notation: annotateNotationEvidence(candidate, params.rows),
104
104
  });
105
105
  }
106
106
  }
@@ -1,6 +1,6 @@
1
1
  import Ajv from "ajv";
2
2
  import { getDatasetOutputPath } from "./datasetFiles.js";
3
- import { verifyDatasetNotation } from "./notation.js";
3
+ import { annotateNotationEvidence } from "./notation.js";
4
4
  import { DatasetService } from "./service.js";
5
5
  import { getDatasetRuntimeDb } from "./dataset/steps.js";
6
6
  import { readDatasetSandboxFileStep, readDatasetSandboxTextFileStep, runDatasetSandboxCommandStep, } from "./sandbox/steps.js";
@@ -177,18 +177,18 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
177
177
  }
178
178
  console.log(`[Dataset ${datasetId}] Dataset marked as COMPLETED (${totalValidRows} valid rows)`);
179
179
  console.log(`[Dataset ${datasetId}] ========================================`);
180
- // Formal-notation verification: arithmetic checks of the latest notation
181
- // against the produced rows. Informative only — a failure here never
182
- // affects the dataset completion result.
180
+ // Formal-notation evidence: advisory arithmetic annotation of the latest
181
+ // notation against the produced rows. Informative only — it never
182
+ // affects the dataset completion result or the dataset's validity.
183
183
  try {
184
- await verifyNotationAgainstJsonl({
184
+ await annotateNotationFromJsonl({
185
185
  service,
186
186
  datasetId,
187
187
  jsonlBase64: fileRead.contentBase64,
188
188
  });
189
189
  }
190
190
  catch (error) {
191
- console.error(`[Dataset ${datasetId}] notation verification skipped:`, error instanceof Error ? error.message : String(error));
191
+ console.error(`[Dataset ${datasetId}] notation annotation skipped:`, error instanceof Error ? error.message : String(error));
192
192
  }
193
193
  return {
194
194
  success: true,
@@ -201,8 +201,8 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
201
201
  dataFileId: uploadResult.data.fileId,
202
202
  };
203
203
  }
204
- const NOTATION_VERIFY_MAX_ROWS = 50000;
205
- async function verifyNotationAgainstJsonl(params) {
204
+ const NOTATION_EVIDENCE_MAX_ROWS = 50000;
205
+ async function annotateNotationFromJsonl(params) {
206
206
  const existing = await params.service.getDatasetById(params.datasetId);
207
207
  const notation = (existing.ok ? existing.data?.notation : null);
208
208
  if (!notation || !Array.isArray(notation.predicates) || notation.predicates.length === 0) {
@@ -223,17 +223,19 @@ async function verifyNotationAgainstJsonl(params) {
223
223
  catch {
224
224
  // malformed lines were already handled by schema validation
225
225
  }
226
- if (rows.length >= NOTATION_VERIFY_MAX_ROWS)
226
+ if (rows.length >= NOTATION_EVIDENCE_MAX_ROWS)
227
227
  break;
228
228
  }
229
- const verified = verifyDatasetNotation(notation, rows);
229
+ const annotated = annotateNotationEvidence(notation, rows);
230
230
  await params.service.updateDatasetNotation({
231
231
  datasetId: params.datasetId,
232
- notation: verified,
232
+ notation: annotated,
233
233
  });
234
- const failed = (verified.checks ?? []).filter((check) => check.status === "failed");
235
- console.log(`[Dataset ${params.datasetId}] notation v${verified.version} ${verified.status}` +
236
- (failed.length ? ` (${failed.length} predicados violados)` : ""));
234
+ const contradicted = (annotated.checks ?? []).filter((check) => check.status === "contradicted");
235
+ console.log(`[Dataset ${params.datasetId}] notation v${annotated.version} (${annotated.status})` +
236
+ (contradicted.length
237
+ ? ` — ${contradicted.length} predicado(s) con evidencia contraria (advisory)`
238
+ : ""));
237
239
  }
238
240
  function resolveExecutionStoragePath(outputPath, datasetId) {
239
241
  const normalized = String(outputPath ?? "").replace(/\\/g, "/");
@@ -245,9 +245,9 @@ function buildInstructions(context) {
245
245
  .ele("Requirement").txt("The notation is your PLANNING artifact: it comes BEFORE the schema and BEFORE any parsing code. The LaTeX that explains the dataset matters more than the code that produces it").up()
246
246
  .ele("Requirement").txt("Use set-builder notation, quantifiers and arithmetic in LaTeX (e.g. D = \\{(c, q, p) \\mid q \\in \\mathbb{Z}^{+},\\; p \\in \\mathbb{R}_{\\geq 0}\\})").up()
247
247
  .ele("Requirement").txt("Declare every discovered set and variable as a symbol with a one-line meaning").up()
248
- .ele("Requirement").txt("Give predicates a machine-checkable checkJson whenever the claim is arithmetic (row counts, field types, ranges, uniqueness, aggregates); leave semantic-only claims without checkJson").up()
248
+ .ele("Requirement").txt("Predicates are formal claims we trust; they may be semantic (e.g. 'x es una frase relevante'). Only for the few that are purely arithmetic (row counts, field types, ranges, uniqueness, aggregates) you MAY add a checkJson for optional advisory evidence — leave every other claim without checkJson").up()
249
249
  .ele("Requirement").txt("ITERATE: every time the analysis discovers a new set, variable, constraint or correction (new columns, unexpected types, excluded sections), call proposeNotation again with the refined notation and the reason. The notation is not definitive — discovery is the point").up()
250
- .ele("Requirement").txt("Before calling completeDataset, call proposeNotation one last time with final=true so the notation describes EXACTLY the dataset you produced; its checkable predicates will be verified arithmetically against the rows").up()
250
+ .ele("Requirement").txt("Before calling completeDataset, call proposeNotation one last time with final=true so the notation describes EXACTLY the dataset you produced; any arithmetic predicates get optional advisory evidence afterwards (never a pass/fail verdict — the dataset's validity is trusted)").up()
251
251
  .up()
252
252
  .up();
253
253
  if (hasProvidedSchema) {
@@ -1,19 +1,29 @@
1
1
  /**
2
2
  * Formal notation for datasets.
3
3
  *
4
- * A dataset is the materialization of a set defined by formal notation:
4
+ * A dataset is the materialization of a set defined by FORMAL NOTATION:
5
5
  * LaTeX (set-builder, relational algebra, quantified predicates) that
6
6
  * EXPLAINS the data — what sets it draws from, what variables it binds,
7
- * what constraints every row satisfies. The notation is the planning
8
- * artifact: it starts as a proposal from the first look at the resources
9
- * and is ITERATED as the analysis discovers new sets, variables and
10
- * invariants. The final notation describes the produced dataset and its
11
- * machine-checkable predicates are verified with plain arithmetic over
12
- * the actual rows (propositional combinations supported).
7
+ * what every member satisfies. The definition is a logical proposition,
8
+ * possibly DERIVED (a syllogism), so it is NOT, in general, mechanically
9
+ * verifiable: a predicate may be semantic ("x es una frase divertida"),
10
+ * and the set is still perfectly well-formed. We TRUST that the formality
11
+ * and the produced dataset are valid formal notation is the planning
12
+ * and explanatory artifact, not a proof obligation.
13
13
  *
14
- * Verification is informative, never blocking: a dataset completes the
15
- * same way it always did; the notation carries its own verified/violated
16
- * state alongside.
14
+ * It is the planning artifact: it starts as a proposal from the first look
15
+ * at the resources and is ITERATED as the analysis discovers new sets,
16
+ * variables and constraints. The notation is not definitive — discovery is
17
+ * the point.
18
+ *
19
+ * SOME predicates happen to be arithmetic (a row count, a field type, a
20
+ * preserved total). For those, and only those, we can attach OPTIONAL
21
+ * arithmetic evidence computed over the produced rows. That evidence is
22
+ * advisory: a contradiction is a hint worth surfacing, never a verdict
23
+ * that the dataset is invalid. Predicates with no arithmetic form are
24
+ * "asserted" — formal claims we trust. Nothing here blocks or changes a
25
+ * dataset build; the notation simply rides alongside on
26
+ * dataset_datasets.notation.
17
27
  */
18
28
  export type DatasetNotationSymbolKind = "set" | "variable" | "function" | "constant" | "predicate";
19
29
  export type DatasetNotationSymbol = {
@@ -26,10 +36,12 @@ export type DatasetNotationSymbol = {
26
36
  };
27
37
  export type NotationCmpOp = "=" | "!=" | "<" | "<=" | ">" | ">=";
28
38
  /**
29
- * Machine-checkable claims about the dataset, evaluated with plain
30
- * arithmetic over the rows. Field access supports dot-paths into nested
31
- * records ("company.taxId"). Leaf checks are dataset-level propositions;
32
- * and/or/not/implies compose them propositionally.
39
+ * OPTIONAL arithmetic evidence for the subset of predicates that happen to
40
+ * be mechanical (counts, types, ranges, totals). Evaluated over the rows;
41
+ * field access supports dot-paths into nested records ("company.taxId").
42
+ * Leaf checks are dataset-level propositions; and/or/not/implies compose
43
+ * them propositionally. A predicate WITHOUT a check is a formal/semantic
44
+ * claim we trust — that is the normal case, not an exception.
33
45
  */
34
46
  export type NotationCheck = {
35
47
  kind: "row_count";
@@ -88,12 +100,21 @@ export type DatasetNotationPredicate = {
88
100
  description: string;
89
101
  /** the claim in LaTeX, e.g. "\\forall r \\in D:\\; r.amount > 0" */
90
102
  latex: string;
91
- /** machine-checkable form; absent = semantic-only claim (not verified) */
103
+ /**
104
+ * OPTIONAL arithmetic form. Absent (the common case) = a formal/semantic
105
+ * claim we trust without mechanical checking.
106
+ */
92
107
  check?: NotationCheck;
93
108
  };
109
+ /**
110
+ * Advisory evidence for one predicate. Never a verdict on the dataset:
111
+ * - "asserted" formal/semantic claim, trusted, no mechanical check
112
+ * - "supported" arithmetic evidence agrees with the stated claim
113
+ * - "contradicted" arithmetic evidence disagrees — a hint, not a failure
114
+ */
94
115
  export type DatasetNotationCheckResult = {
95
116
  predicateId: string;
96
- status: "passed" | "failed" | "skipped";
117
+ status: "asserted" | "supported" | "contradicted";
97
118
  detail?: string;
98
119
  };
99
120
  export type DatasetNotationRevision = {
@@ -103,7 +124,13 @@ export type DatasetNotationRevision = {
103
124
  reason: string;
104
125
  at: number;
105
126
  };
106
- export type DatasetNotationStatus = "proposed" | "refined" | "final" | "verified" | "violated";
127
+ /**
128
+ * Lifecycle of the formal notation. There is intentionally NO
129
+ * "verified"/"violated" verdict — the dataset's validity is trusted, not
130
+ * proven. Advisory arithmetic evidence (when any predicate has it) lives
131
+ * in `checks`, separate from this status.
132
+ */
133
+ export type DatasetNotationStatus = "proposed" | "refined" | "final";
107
134
  export type DatasetNotation = {
108
135
  version: number;
109
136
  status: DatasetNotationStatus;
@@ -111,8 +138,10 @@ export type DatasetNotation = {
111
138
  latex: string;
112
139
  symbols: DatasetNotationSymbol[];
113
140
  predicates: DatasetNotationPredicate[];
141
+ /** advisory per-predicate evidence (asserted/supported/contradicted) */
114
142
  checks?: DatasetNotationCheckResult[];
115
- verifiedAt?: number;
143
+ /** when the advisory evidence was last computed */
144
+ evidenceAt?: number;
116
145
  history: DatasetNotationRevision[];
117
146
  };
118
147
  export type NotationRevisionInput = {
@@ -139,8 +168,10 @@ type JsonSchemaLike = {
139
168
  /**
140
169
  * A query-backed dataset has a complete deterministic description: the
141
170
  * dataset is the image of a known query over a known domain. No model is
142
- * involved the notation and its checkable predicates derive mechanically
143
- * from the query, the inferred schema and the produced row count.
171
+ * involved, so here the formal definition and its predicates derive
172
+ * mechanically from the query, the inferred schema and the row count — and
173
+ * those predicates DO carry arithmetic evidence (the special case where the
174
+ * formal claims happen to be fully mechanical).
144
175
  */
145
176
  export declare function inferQueryNotation(params: {
146
177
  entityNames: string[];
@@ -154,10 +185,17 @@ type CheckOutcome = {
154
185
  };
155
186
  export declare function evaluateNotationCheck(rows: any[], check: NotationCheck): CheckOutcome;
156
187
  /**
157
- * Verify a notation against produced rows. Pure arithmetic never throws.
158
- * Predicates without a machine-checkable form are reported as "skipped"
159
- * (they remain semantic claims). Returns the notation with check results
160
- * and a verified/violated status.
188
+ * Annotate a notation with ADVISORY arithmetic evidence over the produced
189
+ * rows. Never throws, never blocks, and never changes the notation's
190
+ * lifecycle status the dataset's validity is trusted, not proven here.
191
+ *
192
+ * Each predicate is reported as:
193
+ * - "asserted" no arithmetic form (formal/semantic claim, trusted)
194
+ * - "supported" arithmetic evidence agrees
195
+ * - "contradicted" arithmetic evidence disagrees (a hint to look, not a
196
+ * verdict that the dataset is wrong)
197
+ * A check that can't be evaluated stays "asserted" — we don't downgrade a
198
+ * trusted claim because of a malformed mechanical form.
161
199
  */
162
- export declare function verifyDatasetNotation(notation: DatasetNotation, rows: any[]): DatasetNotation;
200
+ export declare function annotateNotationEvidence(notation: DatasetNotation, rows: any[]): DatasetNotation;
163
201
  export {};
package/dist/notation.js CHANGED
@@ -1,19 +1,29 @@
1
1
  /**
2
2
  * Formal notation for datasets.
3
3
  *
4
- * A dataset is the materialization of a set defined by formal notation:
4
+ * A dataset is the materialization of a set defined by FORMAL NOTATION:
5
5
  * LaTeX (set-builder, relational algebra, quantified predicates) that
6
6
  * EXPLAINS the data — what sets it draws from, what variables it binds,
7
- * what constraints every row satisfies. The notation is the planning
8
- * artifact: it starts as a proposal from the first look at the resources
9
- * and is ITERATED as the analysis discovers new sets, variables and
10
- * invariants. The final notation describes the produced dataset and its
11
- * machine-checkable predicates are verified with plain arithmetic over
12
- * the actual rows (propositional combinations supported).
7
+ * what every member satisfies. The definition is a logical proposition,
8
+ * possibly DERIVED (a syllogism), so it is NOT, in general, mechanically
9
+ * verifiable: a predicate may be semantic ("x es una frase divertida"),
10
+ * and the set is still perfectly well-formed. We TRUST that the formality
11
+ * and the produced dataset are valid formal notation is the planning
12
+ * and explanatory artifact, not a proof obligation.
13
13
  *
14
- * Verification is informative, never blocking: a dataset completes the
15
- * same way it always did; the notation carries its own verified/violated
16
- * state alongside.
14
+ * It is the planning artifact: it starts as a proposal from the first look
15
+ * at the resources and is ITERATED as the analysis discovers new sets,
16
+ * variables and constraints. The notation is not definitive — discovery is
17
+ * the point.
18
+ *
19
+ * SOME predicates happen to be arithmetic (a row count, a field type, a
20
+ * preserved total). For those, and only those, we can attach OPTIONAL
21
+ * arithmetic evidence computed over the produced rows. That evidence is
22
+ * advisory: a contradiction is a hint worth surfacing, never a verdict
23
+ * that the dataset is invalid. Predicates with no arithmetic form are
24
+ * "asserted" — formal claims we trust. Nothing here blocks or changes a
25
+ * dataset build; the notation simply rides alongside on
26
+ * dataset_datasets.notation.
17
27
  */
18
28
  /**
19
29
  * Iterate the notation: every revision bumps the version and appends to
@@ -80,8 +90,10 @@ function schemaProperties(schema) {
80
90
  /**
81
91
  * A query-backed dataset has a complete deterministic description: the
82
92
  * dataset is the image of a known query over a known domain. No model is
83
- * involved the notation and its checkable predicates derive mechanically
84
- * from the query, the inferred schema and the produced row count.
93
+ * involved, so here the formal definition and its predicates derive
94
+ * mechanically from the query, the inferred schema and the row count — and
95
+ * those predicates DO carry arithmetic evidence (the special case where the
96
+ * formal claims happen to be fully mechanical).
85
97
  */
86
98
  export function inferQueryNotation(params) {
87
99
  const sources = params.entityNames.length ? params.entityNames : ["Domain"];
@@ -367,42 +379,44 @@ export function evaluateNotationCheck(rows, check) {
367
379
  }
368
380
  }
369
381
  /**
370
- * Verify a notation against produced rows. Pure arithmetic never throws.
371
- * Predicates without a machine-checkable form are reported as "skipped"
372
- * (they remain semantic claims). Returns the notation with check results
373
- * and a verified/violated status.
382
+ * Annotate a notation with ADVISORY arithmetic evidence over the produced
383
+ * rows. Never throws, never blocks, and never changes the notation's
384
+ * lifecycle status the dataset's validity is trusted, not proven here.
385
+ *
386
+ * Each predicate is reported as:
387
+ * - "asserted" no arithmetic form (formal/semantic claim, trusted)
388
+ * - "supported" arithmetic evidence agrees
389
+ * - "contradicted" arithmetic evidence disagrees (a hint to look, not a
390
+ * verdict that the dataset is wrong)
391
+ * A check that can't be evaluated stays "asserted" — we don't downgrade a
392
+ * trusted claim because of a malformed mechanical form.
374
393
  */
375
- export function verifyDatasetNotation(notation, rows) {
394
+ export function annotateNotationEvidence(notation, rows) {
376
395
  const checks = [];
377
- let failed = 0;
378
396
  for (const predicate of notation.predicates ?? []) {
379
397
  if (!predicate.check) {
380
- checks.push({ predicateId: predicate.id, status: "skipped" });
398
+ checks.push({ predicateId: predicate.id, status: "asserted" });
381
399
  continue;
382
400
  }
383
401
  try {
384
402
  const outcome = evaluateNotationCheck(rows, predicate.check);
385
403
  checks.push({
386
404
  predicateId: predicate.id,
387
- status: outcome.ok ? "passed" : "failed",
405
+ status: outcome.ok ? "supported" : "contradicted",
388
406
  detail: outcome.detail,
389
407
  });
390
- if (!outcome.ok)
391
- failed += 1;
392
408
  }
393
409
  catch (error) {
394
410
  checks.push({
395
411
  predicateId: predicate.id,
396
- status: "failed",
397
- detail: `error de evaluación: ${String(error).slice(0, 120)}`,
412
+ status: "asserted",
413
+ detail: `no evaluable: ${String(error).slice(0, 120)}`,
398
414
  });
399
- failed += 1;
400
415
  }
401
416
  }
402
417
  return {
403
418
  ...notation,
404
419
  checks,
405
- status: failed === 0 ? "verified" : "violated",
406
- verifiedAt: Date.now(),
420
+ evidenceAt: Date.now(),
407
421
  };
408
422
  }
@@ -10,8 +10,8 @@ interface ProposeNotationToolParams {
10
10
  * the analysis discovers new sets, variables, constraints or corrections.
11
11
  * Every call appends a revision (the discovery trail is preserved). Mark
12
12
  * the last call with final=true so the notation describes the produced
13
- * dataset; its checkable predicates get verified arithmetically after
14
- * completion.
13
+ * dataset. Predicates may be formal/semantic (we trust them); the few that
14
+ * are arithmetic get optional advisory evidence after completion.
15
15
  */
16
16
  export declare function createProposeNotationTool({ datasetId, runtime }: ProposeNotationToolParams): import("ai").Tool<{
17
17
  latex: string;
@@ -22,8 +22,8 @@ const predicateSchema = z.object({
22
22
  .string()
23
23
  .optional()
24
24
  .describe([
25
- "OPTIONAL machine-checkable form of the claim as a JSON string, verified",
26
- "with plain arithmetic over the produced rows. Shapes:",
25
+ "OPTIONAL arithmetic form of the claim as a JSON string, used only for",
26
+ "advisory evidence over the produced rows (not a verdict). Shapes:",
27
27
  '{"kind":"row_count","op":"=","value":124}',
28
28
  '{"kind":"field_type","field":"amount","type":"number","allowNull":true}',
29
29
  '{"kind":"field_range","field":"amount","min":0}',
@@ -35,7 +35,7 @@ const predicateSchema = z.object({
35
35
  'Propositional composition: {"kind":"and"|"or","checks":[...]},',
36
36
  '{"kind":"not","check":...}, {"kind":"implies","if":...,"then":...}.',
37
37
  "Fields support dot-paths into nested records (company.taxId).",
38
- "Omit for claims that are semantic only.",
38
+ "Omit for formal/semantic claims (the normal case) — they are trusted.",
39
39
  ].join(" ")),
40
40
  });
41
41
  async function getDatasetService(runtime) {
@@ -50,19 +50,21 @@ async function getDatasetService(runtime) {
50
50
  * the analysis discovers new sets, variables, constraints or corrections.
51
51
  * Every call appends a revision (the discovery trail is preserved). Mark
52
52
  * the last call with final=true so the notation describes the produced
53
- * dataset; its checkable predicates get verified arithmetically after
54
- * completion.
53
+ * dataset. Predicates may be formal/semantic (we trust them); the few that
54
+ * are arithmetic get optional advisory evidence after completion.
55
55
  */
56
56
  export function createProposeNotationTool({ datasetId, runtime }) {
57
57
  return tool({
58
58
  description: [
59
59
  "Declare or refine the FORMAL NOTATION of the dataset: the dataset as a",
60
- "set defined in LaTeX (set-builder, relational algebra, quantified",
61
- "predicates) plus the symbols it binds and the predicates every row",
62
- "satisfies. This is your PLANNING artifact propose it before writing",
63
- "any code, and revise it whenever the analysis discovers new sets,",
64
- "variables or invariants. The latest final notation is verified",
65
- "arithmetically against the produced rows (non-blocking).",
60
+ "set defined in LaTeX (set-builder, relational algebra, quantified or",
61
+ "even semantic predicates) plus the symbols it binds. The definition is",
62
+ "a logical proposition, possibly derived — it does not need to be",
63
+ "mechanically provable; we trust the formality. This is your PLANNING",
64
+ "artifact propose it before writing any code, and revise it whenever",
65
+ "the analysis discovers new sets, variables or constraints. For the few",
66
+ "predicates that happen to be arithmetic you MAY attach a checkJson for",
67
+ "optional advisory evidence (non-blocking, never a verdict).",
66
68
  ].join(" "),
67
69
  inputSchema: z.object({
68
70
  latex: z
@@ -1,6 +1,6 @@
1
1
  import { DatasetService } from "../service.js";
2
2
  import { createDatasetId } from "../id.js";
3
- import { inferQueryNotation, verifyDatasetNotation } from "../notation.js";
3
+ import { annotateNotationEvidence, inferQueryNotation } from "../notation.js";
4
4
  function normalizeRows(result) {
5
5
  if (!result || typeof result !== "object")
6
6
  return [];
@@ -63,9 +63,9 @@ export async function queryDomainStep(params) {
63
63
  const previewRows = rows.slice(0, 20);
64
64
  const schema = inferSchema(rows);
65
65
  // query-backed datasets carry a fully deterministic formal notation:
66
- // the set definition, its symbols and its checkable predicates derive
67
- // mechanically from the query + rows; verification is immediate
68
- const notation = verifyDatasetNotation(inferQueryNotation({
66
+ // the set definition, its symbols and its predicates derive mechanically
67
+ // from the query + rows, so their arithmetic evidence is immediate
68
+ const notation = annotateNotationEvidence(inferQueryNotation({
69
69
  entityNames: Object.keys(params.query ?? {}),
70
70
  rowCount: rows.length,
71
71
  schema,
@@ -104,7 +104,7 @@ function buildInstructions(context) {
104
104
  .up()
105
105
  .ele("Step", { number: "2", name: "Propose Formal Notation (PLAN FIRST)" })
106
106
  .ele("Action").txt("Call proposeNotation with the formal definition of the OUTPUT dataset as a set derived from the input sets: e.g. D = \\pi_{fields}(\\sigma_{condition}(A \\bowtie B)) or set-builder with quantifiers, in LaTeX. Declare the input sets, bound variables and the predicates every output row satisfies.").up()
107
- .ele("Note").txt("The notation is the planning artifact and comes BEFORE the transformation: it states which sets you draw from, how they combine (join, filter, project, aggregate) and which arithmetic invariants the output keeps (e.g. totals preserved across the transformation). Give predicates a machine-checkable checkJson whenever the claim is arithmetic (row counts, ranges, uniqueness, aggregates). ITERATE the notation whenever inspection of the inputs reveals new sets, variables or corrections, and call proposeNotation with final=true just before completing — it will be verified arithmetically against the produced rows.").up()
107
+ .ele("Note").txt("The notation is the planning artifact and comes BEFORE the transformation: it states which sets you draw from, how they combine (join, filter, project, aggregate) and which invariants the output keeps (e.g. totals preserved across the transformation). The definition is a formal proposition we trust predicates may be semantic. Only for purely arithmetic invariants you MAY add a checkJson for optional advisory evidence. ITERATE the notation whenever inspection of the inputs reveals new sets, variables or corrections, and call proposeNotation with final=true just before completing — any arithmetic predicates then get advisory evidence (never a verdict).").up()
108
108
  .up()
109
109
  .ele("Step", { number: "3", name: "Plan Mapping" })
110
110
  .ele("Action").txt("Plan a deterministic mapping from input data fields to the output schema fields (normalize names, types, and formats).").up()
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.95-beta.development.0",
3
+ "version": "1.22.96-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.95-beta.development.0",
69
- "@ekairos/events": "^1.22.95-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.95-beta.development.0",
68
+ "@ekairos/domain": "^1.22.96-beta.development.0",
69
+ "@ekairos/events": "^1.22.96-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.96-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",