@ekairos/dataset 1.22.97-beta.development.0 → 1.22.98-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/persistence.js +4 -0
- package/dist/builder/types.d.ts +4 -0
- package/dist/defineNotation.tool.d.ts +49 -0
- package/dist/{proposeNotation.tool.js → defineNotation.tool.js} +33 -23
- package/dist/file/file-dataset.agent.js +2 -2
- package/dist/file/prompts.js +7 -7
- package/dist/materializeDataset.tool.d.ts +1 -1
- package/dist/notation.d.ts +30 -26
- package/dist/notation.js +24 -22
- package/dist/transform/prompts.js +5 -5
- package/dist/transform/transform-dataset.agent.js +2 -2
- package/package.json +4 -4
- package/dist/proposeNotation.tool.d.ts +0 -42
|
@@ -147,10 +147,12 @@ export async function finalizeBuildResult(runtime, datasetId, withFirst) {
|
|
|
147
147
|
});
|
|
148
148
|
},
|
|
149
149
|
};
|
|
150
|
+
const notation = (datasetResult.data?.notation ?? null);
|
|
150
151
|
if (!withFirst) {
|
|
151
152
|
return {
|
|
152
153
|
datasetId,
|
|
153
154
|
dataset: datasetResult.data,
|
|
155
|
+
notation,
|
|
154
156
|
previewRows: previewResult.rows,
|
|
155
157
|
reader,
|
|
156
158
|
};
|
|
@@ -159,6 +161,7 @@ export async function finalizeBuildResult(runtime, datasetId, withFirst) {
|
|
|
159
161
|
return {
|
|
160
162
|
datasetId,
|
|
161
163
|
dataset: datasetResult.data,
|
|
164
|
+
notation,
|
|
162
165
|
previewRows: previewResult.rows,
|
|
163
166
|
reader,
|
|
164
167
|
firstRow: firstResult.row,
|
|
@@ -181,6 +184,7 @@ export function createDatasetBuildResult(runtime, params) {
|
|
|
181
184
|
return {
|
|
182
185
|
datasetId: params.datasetId,
|
|
183
186
|
dataset: params.dataset,
|
|
187
|
+
notation: (params.dataset?.notation ?? null),
|
|
184
188
|
previewRows: params.previewRows,
|
|
185
189
|
reader,
|
|
186
190
|
...(params.firstRow !== undefined ? { firstRow: params.firstRow } : {}),
|
package/dist/builder/types.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
|
|
|
3
3
|
import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
|
|
4
4
|
import type { ContextIdentifier, ContextReactor, StoredContextResource } from "@ekairos/events";
|
|
5
5
|
import { datasetDomain } from "../schema.js";
|
|
6
|
+
import type { DatasetNotation } from "../notation.js";
|
|
6
7
|
export type DatasetQueryResourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
|
|
7
8
|
query: InstaQLParams<DomainInstantSchema<D>>;
|
|
8
9
|
title?: string;
|
|
@@ -72,6 +73,9 @@ export type DatasetReader = {
|
|
|
72
73
|
export type DatasetBuildResult = {
|
|
73
74
|
datasetId: string;
|
|
74
75
|
dataset: any;
|
|
76
|
+
/** the formal definition (intensional face), co-equal with the rows */
|
|
77
|
+
notation: DatasetNotation | null;
|
|
78
|
+
/** preview of the materialization (extensional face) */
|
|
75
79
|
previewRows: any[];
|
|
76
80
|
reader: DatasetReader;
|
|
77
81
|
object?: any | null;
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
interface DefineNotationToolParams {
|
|
2
|
+
datasetId: string;
|
|
3
|
+
runtime: any;
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* defineNotation — author or REFINE the formal DEFINITION of the dataset.
|
|
7
|
+
*
|
|
8
|
+
* A dataset has two co-equal faces: its formal definition (the notation —
|
|
9
|
+
* the proposition that defines the set, in LaTeX) and its materialization
|
|
10
|
+
* (the rows + the code that produces them). They sit at the SAME level: the
|
|
11
|
+
* definition is not a side note about the data, it IS the dataset stated
|
|
12
|
+
* intensionally. The same notation is the PLAN (you state it first and the
|
|
13
|
+
* materialization realizes it) and, finalized, the RESULT (it describes what
|
|
14
|
+
* you produced).
|
|
15
|
+
*
|
|
16
|
+
* Call it FIRST with the initial definition derived from the resources, and
|
|
17
|
+
* AGAIN whenever the analysis discovers new sets, variables, constraints or
|
|
18
|
+
* corrections — every call keeps the prior version in history. Mark the last
|
|
19
|
+
* call with final=true so the definition describes the produced dataset.
|
|
20
|
+
* Predicates may be formal/semantic (trusted); the few that are arithmetic
|
|
21
|
+
* MAY carry optional advisory evidence.
|
|
22
|
+
*/
|
|
23
|
+
export declare function createDefineNotationTool({ datasetId, runtime }: DefineNotationToolParams): import("ai").Tool<{
|
|
24
|
+
latex: string;
|
|
25
|
+
symbols: {
|
|
26
|
+
name: string;
|
|
27
|
+
kind: "function" | "set" | "variable" | "constant" | "predicate";
|
|
28
|
+
description: string;
|
|
29
|
+
latex?: string | undefined;
|
|
30
|
+
}[];
|
|
31
|
+
predicates: {
|
|
32
|
+
id: string;
|
|
33
|
+
description: string;
|
|
34
|
+
latex: string;
|
|
35
|
+
checkJson?: string | undefined;
|
|
36
|
+
}[];
|
|
37
|
+
reason: string;
|
|
38
|
+
final?: boolean | undefined;
|
|
39
|
+
}, {
|
|
40
|
+
success: boolean;
|
|
41
|
+
error: string;
|
|
42
|
+
} | {
|
|
43
|
+
warning?: string | undefined;
|
|
44
|
+
success: boolean;
|
|
45
|
+
version: number;
|
|
46
|
+
status: import("./notation.js").DatasetNotationStatus;
|
|
47
|
+
error?: undefined;
|
|
48
|
+
}>;
|
|
49
|
+
export {};
|
|
@@ -43,44 +43,54 @@ async function getDatasetService(runtime) {
|
|
|
43
43
|
return new DatasetService(scoped.db);
|
|
44
44
|
}
|
|
45
45
|
/**
|
|
46
|
-
*
|
|
46
|
+
* defineNotation — author or REFINE the formal DEFINITION of the dataset.
|
|
47
47
|
*
|
|
48
|
-
*
|
|
49
|
-
*
|
|
50
|
-
* the
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
*
|
|
54
|
-
*
|
|
48
|
+
* A dataset has two co-equal faces: its formal definition (the notation —
|
|
49
|
+
* the proposition that defines the set, in LaTeX) and its materialization
|
|
50
|
+
* (the rows + the code that produces them). They sit at the SAME level: the
|
|
51
|
+
* definition is not a side note about the data, it IS the dataset stated
|
|
52
|
+
* intensionally. The same notation is the PLAN (you state it first and the
|
|
53
|
+
* materialization realizes it) and, finalized, the RESULT (it describes what
|
|
54
|
+
* you produced).
|
|
55
|
+
*
|
|
56
|
+
* Call it FIRST with the initial definition derived from the resources, and
|
|
57
|
+
* AGAIN whenever the analysis discovers new sets, variables, constraints or
|
|
58
|
+
* corrections — every call keeps the prior version in history. Mark the last
|
|
59
|
+
* call with final=true so the definition describes the produced dataset.
|
|
60
|
+
* Predicates may be formal/semantic (trusted); the few that are arithmetic
|
|
61
|
+
* MAY carry optional advisory evidence.
|
|
55
62
|
*/
|
|
56
|
-
export function
|
|
63
|
+
export function createDefineNotationTool({ datasetId, runtime }) {
|
|
57
64
|
return tool({
|
|
58
65
|
description: [
|
|
59
|
-
"
|
|
60
|
-
"set
|
|
61
|
-
"
|
|
62
|
-
"
|
|
63
|
-
"
|
|
64
|
-
"
|
|
65
|
-
"
|
|
66
|
-
"
|
|
67
|
-
"
|
|
66
|
+
"Author or refine the formal DEFINITION of the dataset: the dataset as a",
|
|
67
|
+
"set in LaTeX (set-builder, relational algebra, quantified or even",
|
|
68
|
+
"semantic predicates) plus the symbols it binds. This definition and the",
|
|
69
|
+
"materialization (rows + code) are TWO CO-EQUAL FACES of the dataset —",
|
|
70
|
+
"the definition is the dataset stated intensionally, not a comment on it.",
|
|
71
|
+
"It is your PLAN (state it before writing any code; the materialization",
|
|
72
|
+
"realizes it) and, once final, the RESULT (it describes what you",
|
|
73
|
+
"produced). The definition is a logical proposition, possibly derived —",
|
|
74
|
+
"it need not be mechanically provable; we trust the formality. State it",
|
|
75
|
+
"first, refine it on every discovery, and set final=true on the last",
|
|
76
|
+
"call. For the few predicates that are arithmetic you MAY attach a",
|
|
77
|
+
"checkJson for optional advisory evidence (non-blocking, never a verdict).",
|
|
68
78
|
].join(" "),
|
|
69
79
|
inputSchema: z.object({
|
|
70
80
|
latex: z
|
|
71
81
|
.string()
|
|
72
82
|
.describe("Main definition of the dataset as a set, in LaTeX. Example: 'D = \\\\{(w,r,t) \\\\mid t = \\\\sum_{o \\\\in Orders} o.amount,\\\\; o.status = paid\\\\}'"),
|
|
73
|
-
symbols: z.array(symbolSchema).describe("Symbols bound by the
|
|
83
|
+
symbols: z.array(symbolSchema).describe("Symbols bound by the definition"),
|
|
74
84
|
predicates: z
|
|
75
85
|
.array(predicateSchema)
|
|
76
|
-
.describe("Claims
|
|
86
|
+
.describe("Claims the set satisfies; attach a checkJson only when arithmetic"),
|
|
77
87
|
reason: z
|
|
78
88
|
.string()
|
|
79
|
-
.describe("What discovery triggered
|
|
89
|
+
.describe("What this revision states or what discovery triggered it (or 'initial definition')"),
|
|
80
90
|
final: z
|
|
81
91
|
.boolean()
|
|
82
92
|
.optional()
|
|
83
|
-
.describe("true when this
|
|
93
|
+
.describe("true when this definition describes the dataset you are about to complete (the RESULT)"),
|
|
84
94
|
}),
|
|
85
95
|
execute: async ({ latex, symbols, predicates, reason, final }) => {
|
|
86
96
|
try {
|
|
@@ -121,7 +131,7 @@ export function createProposeNotationTool({ datasetId, runtime }) {
|
|
|
121
131
|
if (!update.ok) {
|
|
122
132
|
return { success: false, error: update.error };
|
|
123
133
|
}
|
|
124
|
-
console.log(`[Dataset ${datasetId}]
|
|
134
|
+
console.log(`[Dataset ${datasetId}] definition v${notation.version} (${notation.status}): ${reason}`);
|
|
125
135
|
return {
|
|
126
136
|
success: true,
|
|
127
137
|
version: notation.version,
|
|
@@ -3,7 +3,7 @@ import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
|
3
3
|
import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
|
|
4
4
|
import { datasetGetByIdStep } from "../dataset/steps.js";
|
|
5
5
|
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
6
|
-
import {
|
|
6
|
+
import { createDefineNotationTool } from "../defineNotation.tool.js";
|
|
7
7
|
import { createGenerateSchemaTool } from "./generateSchema.tool.js";
|
|
8
8
|
import { buildFileDatasetPromptStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
|
|
9
9
|
import { createDatasetId } from "../id.js";
|
|
@@ -152,7 +152,7 @@ function createFileParseContextDefinition(params) {
|
|
|
152
152
|
sandboxId,
|
|
153
153
|
runtime,
|
|
154
154
|
}),
|
|
155
|
-
|
|
155
|
+
defineNotation: createDefineNotationTool({
|
|
156
156
|
datasetId,
|
|
157
157
|
runtime,
|
|
158
158
|
}),
|
package/dist/file/prompts.js
CHANGED
|
@@ -239,15 +239,15 @@ function buildInstructions(context) {
|
|
|
239
239
|
.ele("Note").txt("FilePreview contains: TotalRows (total data rows), Metadata (file properties with JSON output), Head (first N raw file lines), Tail (last N lines if present), Mid (middle sample for large files). Each section shows Description, Script (full Python code), Command, Stdout (raw content), Stderr. This allows you to understand the exact file format.").up()
|
|
240
240
|
.up();
|
|
241
241
|
xml = xml
|
|
242
|
-
.ele("Step", { number: "2", name: "
|
|
243
|
-
.ele("Action").txt("Call
|
|
242
|
+
.ele("Step", { number: "2", name: "Define the Dataset (PLAN FIRST)" })
|
|
243
|
+
.ele("Action").txt("Call defineNotation with the INITIAL formal definition of the dataset as a set, derived from the file preview: D = { r | r ∈ File ∧ <constraints> } in LaTeX, the symbols it binds (sets, variables, functions) and the predicates the set satisfies").up()
|
|
244
244
|
.ele("Requirements")
|
|
245
|
-
.ele("Requirement").txt("The
|
|
245
|
+
.ele("Requirement").txt("The definition and the materialization (schema + parsing code + rows) are TWO CO-EQUAL FACES of the dataset. The definition is the dataset stated intensionally — author it FIRST; it is your PLAN and the code is built to realize it").up()
|
|
246
246
|
.ele("Requirement").txt("Use set-builder notation, quantifiers and arithmetic in LaTeX (e.g. D = \\{(c, q, p) \\mid q \\in \\mathbb{Z}^{+},\\; p \\in \\mathbb{R}_{\\geq 0}\\})").up()
|
|
247
247
|
.ele("Requirement").txt("Declare every discovered set and variable as a symbol with a one-line meaning").up()
|
|
248
248
|
.ele("Requirement").txt("Predicates are formal claims we trust; they may be semantic (e.g. 'x es una frase relevante'). Only for the few that are purely arithmetic (row counts, field types, ranges, uniqueness, aggregates) you MAY add a checkJson for optional advisory evidence — leave every other claim without checkJson").up()
|
|
249
|
-
.ele("Requirement").txt("
|
|
250
|
-
.ele("Requirement").txt("Before calling completeDataset, call
|
|
249
|
+
.ele("Requirement").txt("REFINE: every time the analysis discovers a new set, variable, constraint or correction (new columns, unexpected types, excluded sections), call defineNotation again with the updated definition and the reason. The definition is not fixed up front — discovery is the point").up()
|
|
250
|
+
.ele("Requirement").txt("Before calling completeDataset, call defineNotation one last time with final=true so the definition becomes the RESULT — it describes EXACTLY the dataset you produced; any arithmetic predicates get optional advisory evidence afterwards (never a pass/fail verdict — the dataset's validity is trusted)").up()
|
|
251
251
|
.up()
|
|
252
252
|
.up();
|
|
253
253
|
if (hasProvidedSchema) {
|
|
@@ -292,12 +292,12 @@ function buildInstructions(context) {
|
|
|
292
292
|
.up()
|
|
293
293
|
.up()
|
|
294
294
|
.ele("Step", { number: "5", name: "Complete and Validate" })
|
|
295
|
-
.ele("Action").txt("Call
|
|
295
|
+
.ele("Action").txt("Call defineNotation with final=true (the definition as RESULT, matching the produced rows), then call completeDataset to validate the dataset").up()
|
|
296
296
|
.ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns success:false with validation details if validation fails. If validation fails, inspect validation errors, rewrite output.jsonl, and call completeDataset again. Do not stop until completeDataset returns success:true.").up()
|
|
297
297
|
.up()
|
|
298
298
|
.up()
|
|
299
299
|
.ele("Rules")
|
|
300
|
-
.ele("Rule").txt("The formal
|
|
300
|
+
.ele("Rule").txt("The formal definition (defineNotation) and the materialization (schema + code + rows) are co-equal faces of the dataset: author the definition first as the PLAN, refine it on every discovery, finalize it as the RESULT before completion").up()
|
|
301
301
|
.ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
|
|
302
302
|
.ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the input language").up()
|
|
303
303
|
.ele("Rule").txt("Original/input language applies to extracted values only, not to JSON object keys").up()
|
|
@@ -18,8 +18,8 @@ declare const materializeDatasetToolInputSchema: z.ZodObject<{
|
|
|
18
18
|
}, z.core.$strip>>>;
|
|
19
19
|
texts: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
20
20
|
name: z.ZodOptional<z.ZodString>;
|
|
21
|
-
text: z.ZodString;
|
|
22
21
|
description: z.ZodOptional<z.ZodString>;
|
|
22
|
+
text: z.ZodString;
|
|
23
23
|
mimeType: z.ZodOptional<z.ZodString>;
|
|
24
24
|
}, z.core.$strip>>>;
|
|
25
25
|
datasets: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
package/dist/notation.d.ts
CHANGED
|
@@ -1,29 +1,31 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Formal notation for datasets.
|
|
2
|
+
* Formal notation for datasets — the dataset stated intensionally.
|
|
3
3
|
*
|
|
4
|
-
* A dataset
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
* and the produced dataset are valid — formal notation is the planning
|
|
12
|
-
* and explanatory artifact, not a proof obligation.
|
|
4
|
+
* A dataset has TWO CO-EQUAL FACES at the same level:
|
|
5
|
+
* - its formal DEFINITION (this notation: the proposition that defines the
|
|
6
|
+
* set, in LaTeX), and
|
|
7
|
+
* - its MATERIALIZATION (the rows + the code that produces them).
|
|
8
|
+
* The notation is not a comment about the data; it IS the dataset, written
|
|
9
|
+
* as a logical statement. The materialization is the same set written
|
|
10
|
+
* extensionally. Neither is subordinate to the other.
|
|
13
11
|
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
12
|
+
* The SAME notation plays two roles across the lifecycle: it is the PLAN
|
|
13
|
+
* (status "plan": stated first, the materialization is built to realize it)
|
|
14
|
+
* and, once finalized, the RESULT (status "result": it describes exactly
|
|
15
|
+
* what was produced). It is iterated in between — every revision keeps the
|
|
16
|
+
* prior version in `history`, so the discovery trail stays visible.
|
|
17
|
+
*
|
|
18
|
+
* The definition is a logical proposition, possibly DERIVED (a syllogism),
|
|
19
|
+
* so it is NOT, in general, mechanically verifiable: a predicate may be
|
|
20
|
+
* semantic ("x es una frase divertida") and the set is still well-formed.
|
|
21
|
+
* We TRUST the formality and the produced dataset — there is no verdict.
|
|
18
22
|
*
|
|
19
23
|
* SOME predicates happen to be arithmetic (a row count, a field type, a
|
|
20
|
-
* preserved total). For those, and only those, we
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
25
|
-
* dataset build; the notation simply rides alongside on
|
|
26
|
-
* dataset_datasets.notation.
|
|
24
|
+
* preserved total). For those, and only those, we attach OPTIONAL evidence
|
|
25
|
+
* computed over the rows. It is advisory: a contradiction is a hint, never
|
|
26
|
+
* a claim that the dataset is invalid. Predicates with no arithmetic form
|
|
27
|
+
* are "asserted" — trusted. Nothing here blocks or changes a build; the
|
|
28
|
+
* notation rides on dataset_datasets.notation.
|
|
27
29
|
*/
|
|
28
30
|
export type DatasetNotationSymbolKind = "set" | "variable" | "function" | "constant" | "predicate";
|
|
29
31
|
export type DatasetNotationSymbol = {
|
|
@@ -125,12 +127,14 @@ export type DatasetNotationRevision = {
|
|
|
125
127
|
at: number;
|
|
126
128
|
};
|
|
127
129
|
/**
|
|
128
|
-
*
|
|
129
|
-
* "
|
|
130
|
-
*
|
|
131
|
-
*
|
|
130
|
+
* The role the notation currently plays — the two ends of its life:
|
|
131
|
+
* - "plan": stated before/while building; the materialization realizes it
|
|
132
|
+
* - "result": finalized; it describes the dataset that was produced
|
|
133
|
+
* There is intentionally NO "verified"/"violated" verdict — validity is
|
|
134
|
+
* trusted, not proven. Iteration is tracked by `version`/`history`; advisory
|
|
135
|
+
* arithmetic evidence lives in `checks`, separate from this role.
|
|
132
136
|
*/
|
|
133
|
-
export type DatasetNotationStatus = "
|
|
137
|
+
export type DatasetNotationStatus = "plan" | "result";
|
|
134
138
|
export type DatasetNotation = {
|
|
135
139
|
version: number;
|
|
136
140
|
status: DatasetNotationStatus;
|
package/dist/notation.js
CHANGED
|
@@ -1,29 +1,31 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Formal notation for datasets.
|
|
2
|
+
* Formal notation for datasets — the dataset stated intensionally.
|
|
3
3
|
*
|
|
4
|
-
* A dataset
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
* and the produced dataset are valid — formal notation is the planning
|
|
12
|
-
* and explanatory artifact, not a proof obligation.
|
|
4
|
+
* A dataset has TWO CO-EQUAL FACES at the same level:
|
|
5
|
+
* - its formal DEFINITION (this notation: the proposition that defines the
|
|
6
|
+
* set, in LaTeX), and
|
|
7
|
+
* - its MATERIALIZATION (the rows + the code that produces them).
|
|
8
|
+
* The notation is not a comment about the data; it IS the dataset, written
|
|
9
|
+
* as a logical statement. The materialization is the same set written
|
|
10
|
+
* extensionally. Neither is subordinate to the other.
|
|
13
11
|
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
12
|
+
* The SAME notation plays two roles across the lifecycle: it is the PLAN
|
|
13
|
+
* (status "plan": stated first, the materialization is built to realize it)
|
|
14
|
+
* and, once finalized, the RESULT (status "result": it describes exactly
|
|
15
|
+
* what was produced). It is iterated in between — every revision keeps the
|
|
16
|
+
* prior version in `history`, so the discovery trail stays visible.
|
|
17
|
+
*
|
|
18
|
+
* The definition is a logical proposition, possibly DERIVED (a syllogism),
|
|
19
|
+
* so it is NOT, in general, mechanically verifiable: a predicate may be
|
|
20
|
+
* semantic ("x es una frase divertida") and the set is still well-formed.
|
|
21
|
+
* We TRUST the formality and the produced dataset — there is no verdict.
|
|
18
22
|
*
|
|
19
23
|
* SOME predicates happen to be arithmetic (a row count, a field type, a
|
|
20
|
-
* preserved total). For those, and only those, we
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
25
|
-
* dataset build; the notation simply rides alongside on
|
|
26
|
-
* dataset_datasets.notation.
|
|
24
|
+
* preserved total). For those, and only those, we attach OPTIONAL evidence
|
|
25
|
+
* computed over the rows. It is advisory: a contradiction is a hint, never
|
|
26
|
+
* a claim that the dataset is invalid. Predicates with no arithmetic form
|
|
27
|
+
* are "asserted" — trusted. Nothing here blocks or changes a build; the
|
|
28
|
+
* notation rides on dataset_datasets.notation.
|
|
27
29
|
*/
|
|
28
30
|
/**
|
|
29
31
|
* Iterate the notation: every revision bumps the version and appends to
|
|
@@ -40,7 +42,7 @@ export function reviseDatasetNotation(previous, input) {
|
|
|
40
42
|
};
|
|
41
43
|
return {
|
|
42
44
|
version,
|
|
43
|
-
status: input.final ? "
|
|
45
|
+
status: input.final ? "result" : "plan",
|
|
44
46
|
latex: input.latex,
|
|
45
47
|
symbols: input.symbols ?? previous?.symbols ?? [],
|
|
46
48
|
predicates: input.predicates ?? previous?.predicates ?? [],
|
|
@@ -102,9 +102,9 @@ function buildInstructions(context) {
|
|
|
102
102
|
.ele("Action").txt(`Review ContextResources and any InputPreviews to understand current record structures, evidence, fields, shapes and edge cases. ${multipleInputsNote}`).up()
|
|
103
103
|
.ele("Note").txt("ContextResources DescriptorJson may include inline text, metadata, previewRows, or other visible evidence. Treat that visible content as already available context. Do not use executeCommand only to reread it.").up()
|
|
104
104
|
.up()
|
|
105
|
-
.ele("Step", { number: "2", name: "
|
|
106
|
-
.ele("Action").txt("Call
|
|
107
|
-
.ele("Note").txt("The
|
|
105
|
+
.ele("Step", { number: "2", name: "Define the Output Dataset (PLAN FIRST)" })
|
|
106
|
+
.ele("Action").txt("Call defineNotation with the formal definition of the OUTPUT dataset as a set derived from the input sets: e.g. D = \\pi_{fields}(\\sigma_{condition}(A \\bowtie B)) or set-builder with quantifiers, in LaTeX. Declare the input sets, bound variables and the predicates the output set satisfies.").up()
|
|
107
|
+
.ele("Note").txt("The definition and the materialization (the transform code + output rows) are TWO CO-EQUAL FACES of the dataset; author the definition FIRST as the PLAN: it states which sets you draw from, how they combine (join, filter, project, aggregate) and which invariants the output keeps (e.g. totals preserved). The definition is a formal proposition we trust — predicates may be semantic. Only for purely arithmetic invariants you MAY add a checkJson for optional advisory evidence. REFINE the definition whenever inspection of the inputs reveals new sets, variables or corrections, and call defineNotation with final=true just before completing — as the RESULT it describes the produced output; any arithmetic predicates then get advisory evidence (never a verdict).").up()
|
|
108
108
|
.up()
|
|
109
109
|
.ele("Step", { number: "3", name: "Plan Mapping" })
|
|
110
110
|
.ele("Action").txt("Plan a deterministic mapping from input data fields to the output schema fields (normalize names, types, and formats).").up()
|
|
@@ -125,12 +125,12 @@ function buildInstructions(context) {
|
|
|
125
125
|
.ele("Requirement").txt("Do not install packages, download dependencies, or access the network from executeCommand. Use only the available runtime and standard library unless a dependency is already present.").up()
|
|
126
126
|
.up()
|
|
127
127
|
.ele("Step", { number: "5", name: "Validate and Complete" })
|
|
128
|
-
.ele("Action").txt("Call
|
|
128
|
+
.ele("Action").txt("Call defineNotation with final=true (the definition as RESULT, matching the produced output), then: when using completeObject or replaceRows, no separate completeDataset call is needed. When using executeCommand, call completeDataset to validate against the output schema and mark as completed.").up()
|
|
129
129
|
.ele("Behavior").txt("If any completion tool returns success:false, inspect validation details, repair the output, and call the appropriate completion tool again. Do not stop until a completion tool returns success:true.").up()
|
|
130
130
|
.up()
|
|
131
131
|
.up()
|
|
132
132
|
.ele("Rules")
|
|
133
|
-
.ele("Rule").txt("The formal
|
|
133
|
+
.ele("Rule").txt("The formal definition (defineNotation) and the materialization (transform code + output rows) are co-equal faces of the dataset: author the definition first as the PLAN, refine it on every discovery, finalize it as the RESULT before completing.").up()
|
|
134
134
|
.ele("Rule").txt("Output must strictly match the output schema for each record in data.").up()
|
|
135
135
|
.ele("Rule").txt("OutputSchema property names are authoritative. Field names are a technical contract; only field values may preserve input language.").up()
|
|
136
136
|
.ele("Rule").txt("Use the cheapest correct tool. completeObject and replaceRows are low-cost completion tools. executeCommand is a high-cost computation tool and requires an explicit commandDescription.").up()
|
|
@@ -4,7 +4,7 @@ import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFa
|
|
|
4
4
|
import { datasetUpdateSchemaStep } from "../dataset/steps.js";
|
|
5
5
|
import { getDatasetOutputPath } from "../datasetFiles.js";
|
|
6
6
|
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
7
|
-
import {
|
|
7
|
+
import { createDefineNotationTool } from "../defineNotation.tool.js";
|
|
8
8
|
import { createCompleteObjectTool, createReplaceRowsTool, } from "../writeDatasetRows.tool.js";
|
|
9
9
|
import { buildTransformDatasetPromptStep, } from "./transform-dataset.steps.js";
|
|
10
10
|
import { createDatasetId } from "../id.js";
|
|
@@ -137,7 +137,7 @@ function createTransformDatasetContextDefinition(params) {
|
|
|
137
137
|
sandboxId,
|
|
138
138
|
runtime,
|
|
139
139
|
}),
|
|
140
|
-
|
|
140
|
+
defineNotation: createDefineNotationTool({
|
|
141
141
|
datasetId,
|
|
142
142
|
runtime,
|
|
143
143
|
}),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.98-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.98-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.98-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.98-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
interface ProposeNotationToolParams {
|
|
2
|
-
datasetId: string;
|
|
3
|
-
runtime: any;
|
|
4
|
-
}
|
|
5
|
-
/**
|
|
6
|
-
* proposeNotation — declare or ITERATE the formal notation of the dataset.
|
|
7
|
-
*
|
|
8
|
-
* The notation is the planning artifact: call it FIRST with the initial
|
|
9
|
-
* set definition derived from the resources, and call it AGAIN whenever
|
|
10
|
-
* the analysis discovers new sets, variables, constraints or corrections.
|
|
11
|
-
* Every call appends a revision (the discovery trail is preserved). Mark
|
|
12
|
-
* the last call with final=true so the notation describes the produced
|
|
13
|
-
* dataset. Predicates may be formal/semantic (we trust them); the few that
|
|
14
|
-
* are arithmetic get optional advisory evidence after completion.
|
|
15
|
-
*/
|
|
16
|
-
export declare function createProposeNotationTool({ datasetId, runtime }: ProposeNotationToolParams): import("ai").Tool<{
|
|
17
|
-
latex: string;
|
|
18
|
-
symbols: {
|
|
19
|
-
name: string;
|
|
20
|
-
kind: "function" | "set" | "variable" | "constant" | "predicate";
|
|
21
|
-
description: string;
|
|
22
|
-
latex?: string | undefined;
|
|
23
|
-
}[];
|
|
24
|
-
predicates: {
|
|
25
|
-
id: string;
|
|
26
|
-
description: string;
|
|
27
|
-
latex: string;
|
|
28
|
-
checkJson?: string | undefined;
|
|
29
|
-
}[];
|
|
30
|
-
reason: string;
|
|
31
|
-
final?: boolean | undefined;
|
|
32
|
-
}, {
|
|
33
|
-
success: boolean;
|
|
34
|
-
error: string;
|
|
35
|
-
} | {
|
|
36
|
-
warning?: string | undefined;
|
|
37
|
-
success: boolean;
|
|
38
|
-
version: number;
|
|
39
|
-
status: import("./notation.js").DatasetNotationStatus;
|
|
40
|
-
error?: undefined;
|
|
41
|
-
}>;
|
|
42
|
-
export {};
|