@ekairos/dataset 1.22.95-beta.development.0 → 1.22.97-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/persistence.js +13 -8
- package/dist/completeDataset.steps.js +16 -14
- package/dist/file/prompts.js +2 -2
- package/dist/notation.d.ts +63 -25
- package/dist/notation.js +41 -27
- package/dist/proposeNotation.tool.d.ts +2 -2
- package/dist/proposeNotation.tool.js +13 -11
- package/dist/query/queryDomain.step.js +4 -4
- package/dist/transform/prompts.js +1 -1
- package/package.json +4 -4
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { DatasetService } from "../service.js";
|
|
2
2
|
import { datasetDomain } from "../schema.js";
|
|
3
|
-
import {
|
|
3
|
+
import { annotateNotationEvidence, inferQueryNotation, } from "../notation.js";
|
|
4
4
|
import { datasetGetByIdStep, datasetPreviewRowsStep, datasetReadOneStep, datasetReadRowsStep, } from "../dataset/steps.js";
|
|
5
5
|
import { inferDatasetSchema, validateRows } from "./schemaInference.js";
|
|
6
6
|
import { rowsToJsonl } from "./rows.js";
|
|
@@ -79,9 +79,9 @@ export async function materializeRowsToDataset(runtime, params) {
|
|
|
79
79
|
throw new Error(statusResult.error);
|
|
80
80
|
}
|
|
81
81
|
// Formal notation, informative only (never blocks the build): a notation
|
|
82
|
-
// proposed during the build (agent iterations)
|
|
83
|
-
// materialized rows; query-backed builds with no proposed
|
|
84
|
-
// the deterministic one derived from query + schema + rows.
|
|
82
|
+
// proposed during the build (agent iterations) gets advisory evidence
|
|
83
|
+
// against the materialized rows; query-backed builds with no proposed
|
|
84
|
+
// notation get the deterministic one derived from query + schema + rows.
|
|
85
85
|
try {
|
|
86
86
|
const existing = await service.getDatasetById(params.datasetId);
|
|
87
87
|
const previous = (existing.ok ? existing.data?.notation : null);
|
|
@@ -94,13 +94,18 @@ export async function materializeRowsToDataset(runtime, params) {
|
|
|
94
94
|
explanation: typeof analysis.explanation === "string" ? analysis.explanation : undefined,
|
|
95
95
|
})
|
|
96
96
|
: null;
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
97
|
+
// Query-backed builds are deterministic, so a freshly inferred notation
|
|
98
|
+
// always wins (a prior run's notation would be stale). Only agent-built
|
|
99
|
+
// datasets (no query) keep the notation the agent proposed during the
|
|
100
|
+
// build, which by now is the latest `previous`.
|
|
101
|
+
const candidate = queryNotation ??
|
|
102
|
+
(previous && Array.isArray(previous.predicates) && previous.predicates.length > 0
|
|
103
|
+
? previous
|
|
104
|
+
: null);
|
|
100
105
|
if (candidate) {
|
|
101
106
|
await service.updateDatasetNotation({
|
|
102
107
|
datasetId: params.datasetId,
|
|
103
|
-
notation:
|
|
108
|
+
notation: annotateNotationEvidence(candidate, params.rows),
|
|
104
109
|
});
|
|
105
110
|
}
|
|
106
111
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import Ajv from "ajv";
|
|
2
2
|
import { getDatasetOutputPath } from "./datasetFiles.js";
|
|
3
|
-
import {
|
|
3
|
+
import { annotateNotationEvidence } from "./notation.js";
|
|
4
4
|
import { DatasetService } from "./service.js";
|
|
5
5
|
import { getDatasetRuntimeDb } from "./dataset/steps.js";
|
|
6
6
|
import { readDatasetSandboxFileStep, readDatasetSandboxTextFileStep, runDatasetSandboxCommandStep, } from "./sandbox/steps.js";
|
|
@@ -177,18 +177,18 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
177
177
|
}
|
|
178
178
|
console.log(`[Dataset ${datasetId}] Dataset marked as COMPLETED (${totalValidRows} valid rows)`);
|
|
179
179
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
180
|
-
// Formal-notation
|
|
181
|
-
// against the produced rows. Informative only —
|
|
182
|
-
// affects the dataset completion result.
|
|
180
|
+
// Formal-notation evidence: advisory arithmetic annotation of the latest
|
|
181
|
+
// notation against the produced rows. Informative only — it never
|
|
182
|
+
// affects the dataset completion result or the dataset's validity.
|
|
183
183
|
try {
|
|
184
|
-
await
|
|
184
|
+
await annotateNotationFromJsonl({
|
|
185
185
|
service,
|
|
186
186
|
datasetId,
|
|
187
187
|
jsonlBase64: fileRead.contentBase64,
|
|
188
188
|
});
|
|
189
189
|
}
|
|
190
190
|
catch (error) {
|
|
191
|
-
console.error(`[Dataset ${datasetId}] notation
|
|
191
|
+
console.error(`[Dataset ${datasetId}] notation annotation skipped:`, error instanceof Error ? error.message : String(error));
|
|
192
192
|
}
|
|
193
193
|
return {
|
|
194
194
|
success: true,
|
|
@@ -201,8 +201,8 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
201
201
|
dataFileId: uploadResult.data.fileId,
|
|
202
202
|
};
|
|
203
203
|
}
|
|
204
|
-
const
|
|
205
|
-
async function
|
|
204
|
+
const NOTATION_EVIDENCE_MAX_ROWS = 50000;
|
|
205
|
+
async function annotateNotationFromJsonl(params) {
|
|
206
206
|
const existing = await params.service.getDatasetById(params.datasetId);
|
|
207
207
|
const notation = (existing.ok ? existing.data?.notation : null);
|
|
208
208
|
if (!notation || !Array.isArray(notation.predicates) || notation.predicates.length === 0) {
|
|
@@ -223,17 +223,19 @@ async function verifyNotationAgainstJsonl(params) {
|
|
|
223
223
|
catch {
|
|
224
224
|
// malformed lines were already handled by schema validation
|
|
225
225
|
}
|
|
226
|
-
if (rows.length >=
|
|
226
|
+
if (rows.length >= NOTATION_EVIDENCE_MAX_ROWS)
|
|
227
227
|
break;
|
|
228
228
|
}
|
|
229
|
-
const
|
|
229
|
+
const annotated = annotateNotationEvidence(notation, rows);
|
|
230
230
|
await params.service.updateDatasetNotation({
|
|
231
231
|
datasetId: params.datasetId,
|
|
232
|
-
notation:
|
|
232
|
+
notation: annotated,
|
|
233
233
|
});
|
|
234
|
-
const
|
|
235
|
-
console.log(`[Dataset ${params.datasetId}] notation v${
|
|
236
|
-
(
|
|
234
|
+
const contradicted = (annotated.checks ?? []).filter((check) => check.status === "contradicted");
|
|
235
|
+
console.log(`[Dataset ${params.datasetId}] notation v${annotated.version} (${annotated.status})` +
|
|
236
|
+
(contradicted.length
|
|
237
|
+
? ` — ${contradicted.length} predicado(s) con evidencia contraria (advisory)`
|
|
238
|
+
: ""));
|
|
237
239
|
}
|
|
238
240
|
function resolveExecutionStoragePath(outputPath, datasetId) {
|
|
239
241
|
const normalized = String(outputPath ?? "").replace(/\\/g, "/");
|
package/dist/file/prompts.js
CHANGED
|
@@ -245,9 +245,9 @@ function buildInstructions(context) {
|
|
|
245
245
|
.ele("Requirement").txt("The notation is your PLANNING artifact: it comes BEFORE the schema and BEFORE any parsing code. The LaTeX that explains the dataset matters more than the code that produces it").up()
|
|
246
246
|
.ele("Requirement").txt("Use set-builder notation, quantifiers and arithmetic in LaTeX (e.g. D = \\{(c, q, p) \\mid q \\in \\mathbb{Z}^{+},\\; p \\in \\mathbb{R}_{\\geq 0}\\})").up()
|
|
247
247
|
.ele("Requirement").txt("Declare every discovered set and variable as a symbol with a one-line meaning").up()
|
|
248
|
-
.ele("Requirement").txt("
|
|
248
|
+
.ele("Requirement").txt("Predicates are formal claims we trust; they may be semantic (e.g. 'x es una frase relevante'). Only for the few that are purely arithmetic (row counts, field types, ranges, uniqueness, aggregates) you MAY add a checkJson for optional advisory evidence — leave every other claim without checkJson").up()
|
|
249
249
|
.ele("Requirement").txt("ITERATE: every time the analysis discovers a new set, variable, constraint or correction (new columns, unexpected types, excluded sections), call proposeNotation again with the refined notation and the reason. The notation is not definitive — discovery is the point").up()
|
|
250
|
-
.ele("Requirement").txt("Before calling completeDataset, call proposeNotation one last time with final=true so the notation describes EXACTLY the dataset you produced;
|
|
250
|
+
.ele("Requirement").txt("Before calling completeDataset, call proposeNotation one last time with final=true so the notation describes EXACTLY the dataset you produced; any arithmetic predicates get optional advisory evidence afterwards (never a pass/fail verdict — the dataset's validity is trusted)").up()
|
|
251
251
|
.up()
|
|
252
252
|
.up();
|
|
253
253
|
if (hasProvidedSchema) {
|
package/dist/notation.d.ts
CHANGED
|
@@ -1,19 +1,29 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Formal notation for datasets.
|
|
3
3
|
*
|
|
4
|
-
* A dataset is the materialization of a set defined by
|
|
4
|
+
* A dataset is the materialization of a set defined by FORMAL NOTATION:
|
|
5
5
|
* LaTeX (set-builder, relational algebra, quantified predicates) that
|
|
6
6
|
* EXPLAINS the data — what sets it draws from, what variables it binds,
|
|
7
|
-
* what
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
7
|
+
* what every member satisfies. The definition is a logical proposition,
|
|
8
|
+
* possibly DERIVED (a syllogism), so it is NOT, in general, mechanically
|
|
9
|
+
* verifiable: a predicate may be semantic ("x es una frase divertida"),
|
|
10
|
+
* and the set is still perfectly well-formed. We TRUST that the formality
|
|
11
|
+
* and the produced dataset are valid — formal notation is the planning
|
|
12
|
+
* and explanatory artifact, not a proof obligation.
|
|
13
13
|
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
14
|
+
* It is the planning artifact: it starts as a proposal from the first look
|
|
15
|
+
* at the resources and is ITERATED as the analysis discovers new sets,
|
|
16
|
+
* variables and constraints. The notation is not definitive — discovery is
|
|
17
|
+
* the point.
|
|
18
|
+
*
|
|
19
|
+
* SOME predicates happen to be arithmetic (a row count, a field type, a
|
|
20
|
+
* preserved total). For those, and only those, we can attach OPTIONAL
|
|
21
|
+
* arithmetic evidence computed over the produced rows. That evidence is
|
|
22
|
+
* advisory: a contradiction is a hint worth surfacing, never a verdict
|
|
23
|
+
* that the dataset is invalid. Predicates with no arithmetic form are
|
|
24
|
+
* "asserted" — formal claims we trust. Nothing here blocks or changes a
|
|
25
|
+
* dataset build; the notation simply rides alongside on
|
|
26
|
+
* dataset_datasets.notation.
|
|
17
27
|
*/
|
|
18
28
|
export type DatasetNotationSymbolKind = "set" | "variable" | "function" | "constant" | "predicate";
|
|
19
29
|
export type DatasetNotationSymbol = {
|
|
@@ -26,10 +36,12 @@ export type DatasetNotationSymbol = {
|
|
|
26
36
|
};
|
|
27
37
|
export type NotationCmpOp = "=" | "!=" | "<" | "<=" | ">" | ">=";
|
|
28
38
|
/**
|
|
29
|
-
*
|
|
30
|
-
*
|
|
31
|
-
* records ("company.taxId").
|
|
32
|
-
* and/or/not/implies compose
|
|
39
|
+
* OPTIONAL arithmetic evidence for the subset of predicates that happen to
|
|
40
|
+
* be mechanical (counts, types, ranges, totals). Evaluated over the rows;
|
|
41
|
+
* field access supports dot-paths into nested records ("company.taxId").
|
|
42
|
+
* Leaf checks are dataset-level propositions; and/or/not/implies compose
|
|
43
|
+
* them propositionally. A predicate WITHOUT a check is a formal/semantic
|
|
44
|
+
* claim we trust — that is the normal case, not an exception.
|
|
33
45
|
*/
|
|
34
46
|
export type NotationCheck = {
|
|
35
47
|
kind: "row_count";
|
|
@@ -88,12 +100,21 @@ export type DatasetNotationPredicate = {
|
|
|
88
100
|
description: string;
|
|
89
101
|
/** the claim in LaTeX, e.g. "\\forall r \\in D:\\; r.amount > 0" */
|
|
90
102
|
latex: string;
|
|
91
|
-
/**
|
|
103
|
+
/**
|
|
104
|
+
* OPTIONAL arithmetic form. Absent (the common case) = a formal/semantic
|
|
105
|
+
* claim we trust without mechanical checking.
|
|
106
|
+
*/
|
|
92
107
|
check?: NotationCheck;
|
|
93
108
|
};
|
|
109
|
+
/**
|
|
110
|
+
* Advisory evidence for one predicate. Never a verdict on the dataset:
|
|
111
|
+
* - "asserted" formal/semantic claim, trusted, no mechanical check
|
|
112
|
+
* - "supported" arithmetic evidence agrees with the stated claim
|
|
113
|
+
* - "contradicted" arithmetic evidence disagrees — a hint, not a failure
|
|
114
|
+
*/
|
|
94
115
|
export type DatasetNotationCheckResult = {
|
|
95
116
|
predicateId: string;
|
|
96
|
-
status: "
|
|
117
|
+
status: "asserted" | "supported" | "contradicted";
|
|
97
118
|
detail?: string;
|
|
98
119
|
};
|
|
99
120
|
export type DatasetNotationRevision = {
|
|
@@ -103,7 +124,13 @@ export type DatasetNotationRevision = {
|
|
|
103
124
|
reason: string;
|
|
104
125
|
at: number;
|
|
105
126
|
};
|
|
106
|
-
|
|
127
|
+
/**
|
|
128
|
+
* Lifecycle of the formal notation. There is intentionally NO
|
|
129
|
+
* "verified"/"violated" verdict — the dataset's validity is trusted, not
|
|
130
|
+
* proven. Advisory arithmetic evidence (when any predicate has it) lives
|
|
131
|
+
* in `checks`, separate from this status.
|
|
132
|
+
*/
|
|
133
|
+
export type DatasetNotationStatus = "proposed" | "refined" | "final";
|
|
107
134
|
export type DatasetNotation = {
|
|
108
135
|
version: number;
|
|
109
136
|
status: DatasetNotationStatus;
|
|
@@ -111,8 +138,10 @@ export type DatasetNotation = {
|
|
|
111
138
|
latex: string;
|
|
112
139
|
symbols: DatasetNotationSymbol[];
|
|
113
140
|
predicates: DatasetNotationPredicate[];
|
|
141
|
+
/** advisory per-predicate evidence (asserted/supported/contradicted) */
|
|
114
142
|
checks?: DatasetNotationCheckResult[];
|
|
115
|
-
|
|
143
|
+
/** when the advisory evidence was last computed */
|
|
144
|
+
evidenceAt?: number;
|
|
116
145
|
history: DatasetNotationRevision[];
|
|
117
146
|
};
|
|
118
147
|
export type NotationRevisionInput = {
|
|
@@ -139,8 +168,10 @@ type JsonSchemaLike = {
|
|
|
139
168
|
/**
|
|
140
169
|
* A query-backed dataset has a complete deterministic description: the
|
|
141
170
|
* dataset is the image of a known query over a known domain. No model is
|
|
142
|
-
* involved
|
|
143
|
-
* from the query, the inferred schema and the
|
|
171
|
+
* involved, so here the formal definition and its predicates derive
|
|
172
|
+
* mechanically from the query, the inferred schema and the row count — and
|
|
173
|
+
* those predicates DO carry arithmetic evidence (the special case where the
|
|
174
|
+
* formal claims happen to be fully mechanical).
|
|
144
175
|
*/
|
|
145
176
|
export declare function inferQueryNotation(params: {
|
|
146
177
|
entityNames: string[];
|
|
@@ -154,10 +185,17 @@ type CheckOutcome = {
|
|
|
154
185
|
};
|
|
155
186
|
export declare function evaluateNotationCheck(rows: any[], check: NotationCheck): CheckOutcome;
|
|
156
187
|
/**
|
|
157
|
-
*
|
|
158
|
-
*
|
|
159
|
-
*
|
|
160
|
-
*
|
|
188
|
+
* Annotate a notation with ADVISORY arithmetic evidence over the produced
|
|
189
|
+
* rows. Never throws, never blocks, and never changes the notation's
|
|
190
|
+
* lifecycle status — the dataset's validity is trusted, not proven here.
|
|
191
|
+
*
|
|
192
|
+
* Each predicate is reported as:
|
|
193
|
+
* - "asserted" no arithmetic form (formal/semantic claim, trusted)
|
|
194
|
+
* - "supported" arithmetic evidence agrees
|
|
195
|
+
* - "contradicted" arithmetic evidence disagrees (a hint to look, not a
|
|
196
|
+
* verdict that the dataset is wrong)
|
|
197
|
+
* A check that can't be evaluated stays "asserted" — we don't downgrade a
|
|
198
|
+
* trusted claim because of a malformed mechanical form.
|
|
161
199
|
*/
|
|
162
|
-
export declare function
|
|
200
|
+
export declare function annotateNotationEvidence(notation: DatasetNotation, rows: any[]): DatasetNotation;
|
|
163
201
|
export {};
|
package/dist/notation.js
CHANGED
|
@@ -1,19 +1,29 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Formal notation for datasets.
|
|
3
3
|
*
|
|
4
|
-
* A dataset is the materialization of a set defined by
|
|
4
|
+
* A dataset is the materialization of a set defined by FORMAL NOTATION:
|
|
5
5
|
* LaTeX (set-builder, relational algebra, quantified predicates) that
|
|
6
6
|
* EXPLAINS the data — what sets it draws from, what variables it binds,
|
|
7
|
-
* what
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
7
|
+
* what every member satisfies. The definition is a logical proposition,
|
|
8
|
+
* possibly DERIVED (a syllogism), so it is NOT, in general, mechanically
|
|
9
|
+
* verifiable: a predicate may be semantic ("x es una frase divertida"),
|
|
10
|
+
* and the set is still perfectly well-formed. We TRUST that the formality
|
|
11
|
+
* and the produced dataset are valid — formal notation is the planning
|
|
12
|
+
* and explanatory artifact, not a proof obligation.
|
|
13
13
|
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
14
|
+
* It is the planning artifact: it starts as a proposal from the first look
|
|
15
|
+
* at the resources and is ITERATED as the analysis discovers new sets,
|
|
16
|
+
* variables and constraints. The notation is not definitive — discovery is
|
|
17
|
+
* the point.
|
|
18
|
+
*
|
|
19
|
+
* SOME predicates happen to be arithmetic (a row count, a field type, a
|
|
20
|
+
* preserved total). For those, and only those, we can attach OPTIONAL
|
|
21
|
+
* arithmetic evidence computed over the produced rows. That evidence is
|
|
22
|
+
* advisory: a contradiction is a hint worth surfacing, never a verdict
|
|
23
|
+
* that the dataset is invalid. Predicates with no arithmetic form are
|
|
24
|
+
* "asserted" — formal claims we trust. Nothing here blocks or changes a
|
|
25
|
+
* dataset build; the notation simply rides alongside on
|
|
26
|
+
* dataset_datasets.notation.
|
|
17
27
|
*/
|
|
18
28
|
/**
|
|
19
29
|
* Iterate the notation: every revision bumps the version and appends to
|
|
@@ -80,8 +90,10 @@ function schemaProperties(schema) {
|
|
|
80
90
|
/**
|
|
81
91
|
* A query-backed dataset has a complete deterministic description: the
|
|
82
92
|
* dataset is the image of a known query over a known domain. No model is
|
|
83
|
-
* involved
|
|
84
|
-
* from the query, the inferred schema and the
|
|
93
|
+
* involved, so here the formal definition and its predicates derive
|
|
94
|
+
* mechanically from the query, the inferred schema and the row count — and
|
|
95
|
+
* those predicates DO carry arithmetic evidence (the special case where the
|
|
96
|
+
* formal claims happen to be fully mechanical).
|
|
85
97
|
*/
|
|
86
98
|
export function inferQueryNotation(params) {
|
|
87
99
|
const sources = params.entityNames.length ? params.entityNames : ["Domain"];
|
|
@@ -367,42 +379,44 @@ export function evaluateNotationCheck(rows, check) {
|
|
|
367
379
|
}
|
|
368
380
|
}
|
|
369
381
|
/**
|
|
370
|
-
*
|
|
371
|
-
*
|
|
372
|
-
*
|
|
373
|
-
*
|
|
382
|
+
* Annotate a notation with ADVISORY arithmetic evidence over the produced
|
|
383
|
+
* rows. Never throws, never blocks, and never changes the notation's
|
|
384
|
+
* lifecycle status — the dataset's validity is trusted, not proven here.
|
|
385
|
+
*
|
|
386
|
+
* Each predicate is reported as:
|
|
387
|
+
* - "asserted" no arithmetic form (formal/semantic claim, trusted)
|
|
388
|
+
* - "supported" arithmetic evidence agrees
|
|
389
|
+
* - "contradicted" arithmetic evidence disagrees (a hint to look, not a
|
|
390
|
+
* verdict that the dataset is wrong)
|
|
391
|
+
* A check that can't be evaluated stays "asserted" — we don't downgrade a
|
|
392
|
+
* trusted claim because of a malformed mechanical form.
|
|
374
393
|
*/
|
|
375
|
-
export function
|
|
394
|
+
export function annotateNotationEvidence(notation, rows) {
|
|
376
395
|
const checks = [];
|
|
377
|
-
let failed = 0;
|
|
378
396
|
for (const predicate of notation.predicates ?? []) {
|
|
379
397
|
if (!predicate.check) {
|
|
380
|
-
checks.push({ predicateId: predicate.id, status: "
|
|
398
|
+
checks.push({ predicateId: predicate.id, status: "asserted" });
|
|
381
399
|
continue;
|
|
382
400
|
}
|
|
383
401
|
try {
|
|
384
402
|
const outcome = evaluateNotationCheck(rows, predicate.check);
|
|
385
403
|
checks.push({
|
|
386
404
|
predicateId: predicate.id,
|
|
387
|
-
status: outcome.ok ? "
|
|
405
|
+
status: outcome.ok ? "supported" : "contradicted",
|
|
388
406
|
detail: outcome.detail,
|
|
389
407
|
});
|
|
390
|
-
if (!outcome.ok)
|
|
391
|
-
failed += 1;
|
|
392
408
|
}
|
|
393
409
|
catch (error) {
|
|
394
410
|
checks.push({
|
|
395
411
|
predicateId: predicate.id,
|
|
396
|
-
status: "
|
|
397
|
-
detail: `
|
|
412
|
+
status: "asserted",
|
|
413
|
+
detail: `no evaluable: ${String(error).slice(0, 120)}`,
|
|
398
414
|
});
|
|
399
|
-
failed += 1;
|
|
400
415
|
}
|
|
401
416
|
}
|
|
402
417
|
return {
|
|
403
418
|
...notation,
|
|
404
419
|
checks,
|
|
405
|
-
|
|
406
|
-
verifiedAt: Date.now(),
|
|
420
|
+
evidenceAt: Date.now(),
|
|
407
421
|
};
|
|
408
422
|
}
|
|
@@ -10,8 +10,8 @@ interface ProposeNotationToolParams {
|
|
|
10
10
|
* the analysis discovers new sets, variables, constraints or corrections.
|
|
11
11
|
* Every call appends a revision (the discovery trail is preserved). Mark
|
|
12
12
|
* the last call with final=true so the notation describes the produced
|
|
13
|
-
* dataset
|
|
14
|
-
* completion.
|
|
13
|
+
* dataset. Predicates may be formal/semantic (we trust them); the few that
|
|
14
|
+
* are arithmetic get optional advisory evidence after completion.
|
|
15
15
|
*/
|
|
16
16
|
export declare function createProposeNotationTool({ datasetId, runtime }: ProposeNotationToolParams): import("ai").Tool<{
|
|
17
17
|
latex: string;
|
|
@@ -22,8 +22,8 @@ const predicateSchema = z.object({
|
|
|
22
22
|
.string()
|
|
23
23
|
.optional()
|
|
24
24
|
.describe([
|
|
25
|
-
"OPTIONAL
|
|
26
|
-
"
|
|
25
|
+
"OPTIONAL arithmetic form of the claim as a JSON string, used only for",
|
|
26
|
+
"advisory evidence over the produced rows (not a verdict). Shapes:",
|
|
27
27
|
'{"kind":"row_count","op":"=","value":124}',
|
|
28
28
|
'{"kind":"field_type","field":"amount","type":"number","allowNull":true}',
|
|
29
29
|
'{"kind":"field_range","field":"amount","min":0}',
|
|
@@ -35,7 +35,7 @@ const predicateSchema = z.object({
|
|
|
35
35
|
'Propositional composition: {"kind":"and"|"or","checks":[...]},',
|
|
36
36
|
'{"kind":"not","check":...}, {"kind":"implies","if":...,"then":...}.',
|
|
37
37
|
"Fields support dot-paths into nested records (company.taxId).",
|
|
38
|
-
"Omit for claims
|
|
38
|
+
"Omit for formal/semantic claims (the normal case) — they are trusted.",
|
|
39
39
|
].join(" ")),
|
|
40
40
|
});
|
|
41
41
|
async function getDatasetService(runtime) {
|
|
@@ -50,19 +50,21 @@ async function getDatasetService(runtime) {
|
|
|
50
50
|
* the analysis discovers new sets, variables, constraints or corrections.
|
|
51
51
|
* Every call appends a revision (the discovery trail is preserved). Mark
|
|
52
52
|
* the last call with final=true so the notation describes the produced
|
|
53
|
-
* dataset
|
|
54
|
-
* completion.
|
|
53
|
+
* dataset. Predicates may be formal/semantic (we trust them); the few that
|
|
54
|
+
* are arithmetic get optional advisory evidence after completion.
|
|
55
55
|
*/
|
|
56
56
|
export function createProposeNotationTool({ datasetId, runtime }) {
|
|
57
57
|
return tool({
|
|
58
58
|
description: [
|
|
59
59
|
"Declare or refine the FORMAL NOTATION of the dataset: the dataset as a",
|
|
60
|
-
"set defined in LaTeX (set-builder, relational algebra, quantified",
|
|
61
|
-
"predicates) plus the symbols it binds
|
|
62
|
-
"
|
|
63
|
-
"
|
|
64
|
-
"
|
|
65
|
-
"
|
|
60
|
+
"set defined in LaTeX (set-builder, relational algebra, quantified or",
|
|
61
|
+
"even semantic predicates) plus the symbols it binds. The definition is",
|
|
62
|
+
"a logical proposition, possibly derived — it does not need to be",
|
|
63
|
+
"mechanically provable; we trust the formality. This is your PLANNING",
|
|
64
|
+
"artifact — propose it before writing any code, and revise it whenever",
|
|
65
|
+
"the analysis discovers new sets, variables or constraints. For the few",
|
|
66
|
+
"predicates that happen to be arithmetic you MAY attach a checkJson for",
|
|
67
|
+
"optional advisory evidence (non-blocking, never a verdict).",
|
|
66
68
|
].join(" "),
|
|
67
69
|
inputSchema: z.object({
|
|
68
70
|
latex: z
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { DatasetService } from "../service.js";
|
|
2
2
|
import { createDatasetId } from "../id.js";
|
|
3
|
-
import {
|
|
3
|
+
import { annotateNotationEvidence, inferQueryNotation } from "../notation.js";
|
|
4
4
|
function normalizeRows(result) {
|
|
5
5
|
if (!result || typeof result !== "object")
|
|
6
6
|
return [];
|
|
@@ -63,9 +63,9 @@ export async function queryDomainStep(params) {
|
|
|
63
63
|
const previewRows = rows.slice(0, 20);
|
|
64
64
|
const schema = inferSchema(rows);
|
|
65
65
|
// query-backed datasets carry a fully deterministic formal notation:
|
|
66
|
-
// the set definition, its symbols and its
|
|
67
|
-
//
|
|
68
|
-
const notation =
|
|
66
|
+
// the set definition, its symbols and its predicates derive mechanically
|
|
67
|
+
// from the query + rows, so their arithmetic evidence is immediate
|
|
68
|
+
const notation = annotateNotationEvidence(inferQueryNotation({
|
|
69
69
|
entityNames: Object.keys(params.query ?? {}),
|
|
70
70
|
rowCount: rows.length,
|
|
71
71
|
schema,
|
|
@@ -104,7 +104,7 @@ function buildInstructions(context) {
|
|
|
104
104
|
.up()
|
|
105
105
|
.ele("Step", { number: "2", name: "Propose Formal Notation (PLAN FIRST)" })
|
|
106
106
|
.ele("Action").txt("Call proposeNotation with the formal definition of the OUTPUT dataset as a set derived from the input sets: e.g. D = \\pi_{fields}(\\sigma_{condition}(A \\bowtie B)) or set-builder with quantifiers, in LaTeX. Declare the input sets, bound variables and the predicates every output row satisfies.").up()
|
|
107
|
-
.ele("Note").txt("The notation is the planning artifact and comes BEFORE the transformation: it states which sets you draw from, how they combine (join, filter, project, aggregate) and which
|
|
107
|
+
.ele("Note").txt("The notation is the planning artifact and comes BEFORE the transformation: it states which sets you draw from, how they combine (join, filter, project, aggregate) and which invariants the output keeps (e.g. totals preserved across the transformation). The definition is a formal proposition we trust — predicates may be semantic. Only for purely arithmetic invariants you MAY add a checkJson for optional advisory evidence. ITERATE the notation whenever inspection of the inputs reveals new sets, variables or corrections, and call proposeNotation with final=true just before completing — any arithmetic predicates then get advisory evidence (never a verdict).").up()
|
|
108
108
|
.up()
|
|
109
109
|
.ele("Step", { number: "3", name: "Plan Mapping" })
|
|
110
110
|
.ele("Action").txt("Plan a deterministic mapping from input data fields to the output schema fields (normalize names, types, and formats).").up()
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.97-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.97-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.97-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.97-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|