@xdarkicex/openclaw-memory-libravdb 1.4.3 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -16
- package/docs/README.md +3 -12
- package/docs/architecture.md +68 -153
- package/docs/contributing.md +1 -2
- package/openclaw.plugin.json +64 -1
- package/package.json +2 -2
- package/src/cli.ts +34 -0
- package/src/comparison-experiments.ts +128 -0
- package/src/context-engine.ts +276 -62
- package/src/dream-promotion.ts +492 -0
- package/src/dream-routing.ts +40 -0
- package/src/index.ts +16 -1
- package/src/markdown-hash.ts +104 -0
- package/src/markdown-ingest.ts +627 -0
- package/src/memory-runtime.ts +32 -9
- package/src/scoring.ts +6 -3
- package/src/temporal.ts +657 -80
- package/src/types.ts +48 -0
- package/docs/ast-v2.md +0 -167
- package/docs/ast.md +0 -70
- package/docs/compaction-evaluation.md +0 -182
- package/docs/continuity.md +0 -708
- package/docs/elevated-guidance.md +0 -258
- package/docs/gating.md +0 -134
- package/docs/implementation.md +0 -447
- package/docs/mathematics-v2.md +0 -1879
- package/docs/mathematics.md +0 -695
package/src/types.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import type { ComparisonProfileSummary } from "./comparison-experiments.js";
|
|
2
|
+
|
|
1
3
|
export interface PluginConfig {
|
|
2
4
|
dbPath?: string;
|
|
3
5
|
sidecarPath?: string;
|
|
@@ -24,6 +26,21 @@ export interface PluginConfig {
|
|
|
24
26
|
beta?: number;
|
|
25
27
|
gamma?: number;
|
|
26
28
|
ingestionGateThreshold?: number;
|
|
29
|
+
markdownIngestionEnabled?: boolean;
|
|
30
|
+
markdownIngestionRoots?: string[];
|
|
31
|
+
markdownIngestionObsidianEnabled?: boolean;
|
|
32
|
+
markdownIngestionObsidianRoots?: string[];
|
|
33
|
+
markdownIngestionObsidianInclude?: string[];
|
|
34
|
+
markdownIngestionObsidianExclude?: string[];
|
|
35
|
+
markdownIngestionObsidianDebounceMs?: number;
|
|
36
|
+
markdownIngestionInclude?: string[];
|
|
37
|
+
markdownIngestionExclude?: string[];
|
|
38
|
+
markdownIngestionCollection?: string;
|
|
39
|
+
markdownIngestionDebounceMs?: number;
|
|
40
|
+
dreamPromotionEnabled?: boolean;
|
|
41
|
+
dreamPromotionDiaryPath?: string;
|
|
42
|
+
dreamPromotionUserId?: string;
|
|
43
|
+
dreamPromotionDebounceMs?: number;
|
|
27
44
|
gatingWeights?: {
|
|
28
45
|
w1c?: number;
|
|
29
46
|
w2c?: number;
|
|
@@ -48,6 +65,7 @@ export interface PluginConfig {
|
|
|
48
65
|
continuityTailBudgetTokens?: number;
|
|
49
66
|
continuityPriorContextTokens?: number;
|
|
50
67
|
compactThreshold?: number;
|
|
68
|
+
compactSessionTokenBudget?: number;
|
|
51
69
|
section7CoarseTopK?: number;
|
|
52
70
|
section7SecondPassTopK?: number;
|
|
53
71
|
section7Theta1?: number;
|
|
@@ -211,6 +229,31 @@ export interface ContextAssembleResult {
|
|
|
211
229
|
recencyScore: number;
|
|
212
230
|
finalScore: number;
|
|
213
231
|
rationale: string;
|
|
232
|
+
comparisonSide?: 0 | 1 | null;
|
|
233
|
+
comparisonSlot?: string;
|
|
234
|
+
comparisonSlotRecall?: number;
|
|
235
|
+
comparisonSlotPrecision?: number;
|
|
236
|
+
comparisonSlotSpecificity?: number;
|
|
237
|
+
comparisonSlotPositionWeightedRecall?: number;
|
|
238
|
+
comparisonSlotPositionWeightedPrecision?: number;
|
|
239
|
+
comparisonSlotPositionWeightedSpecificity?: number;
|
|
240
|
+
comparisonFirstPersonClauseCount?: number;
|
|
241
|
+
comparisonProspectivePersonalVerbCount?: number;
|
|
242
|
+
comparisonPlanningDensity?: number;
|
|
243
|
+
comparisonPastness?: number;
|
|
244
|
+
comparisonSideWitnessScore?: number;
|
|
245
|
+
}>;
|
|
246
|
+
recoveryDedupedOrder?: Array<{
|
|
247
|
+
id: string;
|
|
248
|
+
recoveryScope: string;
|
|
249
|
+
finalScore: number;
|
|
250
|
+
tokenEstimate: number;
|
|
251
|
+
}>;
|
|
252
|
+
recoveryFittedOrder?: Array<{
|
|
253
|
+
id: string;
|
|
254
|
+
recoveryScope: string;
|
|
255
|
+
finalScore: number;
|
|
256
|
+
tokenEstimate: number;
|
|
214
257
|
}>;
|
|
215
258
|
recoveryReserveTokens?: number;
|
|
216
259
|
temporalQueryIndicator?: number;
|
|
@@ -219,6 +262,11 @@ export interface ContextAssembleResult {
|
|
|
219
262
|
temporalSelectorApplied?: boolean;
|
|
220
263
|
temporalSelectorReason?: string;
|
|
221
264
|
temporalRecoverySlots?: string[];
|
|
265
|
+
temporalComparisonCoverageApplied?: boolean;
|
|
266
|
+
temporalComparisonCoverageSlots?: string[];
|
|
267
|
+
temporalComparisonCoverageMinTokens?: number;
|
|
268
|
+
temporalComparisonWitnessIds?: string[];
|
|
269
|
+
comparisonProfile?: ComparisonProfileSummary;
|
|
222
270
|
};
|
|
223
271
|
}
|
|
224
272
|
|
package/docs/ast-v2.md
DELETED
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
# TITLE: Mathematical Reference - Abstract Syntax Tree (AST) Partitioning
|
|
2
|
-
|
|
3
|
-
This document formalizes the heuristic mapping of user-authored Markdown documents (such as `agents.md` and `souls.md`) into the partitioned sets required by the two-pass retrieval system. It serves as the bridge between raw text ingestion and the rigorous corpus decomposition defined in `mathematics-v2.md`.
|
|
4
|
-
|
|
5
|
-
The design goal is to extract rigid behavioral rules (the invariant sets) from contextual lore (the variant set) automatically. This is achieved using a three-tier structural and semantic proxy, eliminating monolithic injection while protecting user constraints from token-budget starvation.
|
|
6
|
-
|
|
7
|
-
## 1. The Document AST and Node Extraction
|
|
8
|
-
|
|
9
|
-
Let a raw Markdown document \(d_{\mathrm{raw}}\) be parsed into an Abstract Syntax Tree \(\mathcal{T}\).
|
|
10
|
-
Let \(E: \mathcal{T} \to N_d\) be an extraction function that flattens the tree into an ordered sequence of semantic leaf nodes \(N_d = \langle n_1, n_2, \dots, n_k \rangle\).
|
|
11
|
-
|
|
12
|
-
Each node \(n_i \in N_d\) has an associated structural kind assigned by the parser (e.g., `yuin/goldmark`), mapped by the function \(\kappa: N_d \to K\), where \(K\) is the set of supported Markdown node types:
|
|
13
|
-
\[ K = \{ \text{Paragraph}, \text{List}, \text{Blockquote}, \text{YAMLFrontmatter}, \text{Heading}, \dots \} \]
|
|
14
|
-
|
|
15
|
-
*Implemented in `sidecaragentparser.go`.*
|
|
16
|
-
|
|
17
|
-
## 2. Formal Deontic Logic (\(\sigma\)) and the Kripke Frame
|
|
18
|
-
|
|
19
|
-
Structural types alone are insufficient proxies for intent. Narrative lore often resides in paragraphs, but authors frequently place critical instructions there as well (e.g., "You must always answer in JSON").
|
|
20
|
-
|
|
21
|
-
To detect these rules without deep NLP allocations, the parser evaluates raw node bytes against a Kripke Frame \((W, R)\) grounded in Standard Deontic Logic (SDL).
|
|
22
|
-
|
|
23
|
-
Let \(\mathcal{B}\) be the set of valid second-person imperative bigrams (e.g., "you must", "never"). A zero-allocation lexer scans the bytes for patterns in \(\mathcal{B}\), mapping them to Modalities (Obligatory, Forbidden, Permitted).
|
|
24
|
-
|
|
25
|
-
To guarantee logical consistency, the engine enforces Seriality (Axiom D). No world reachable from an Obligatory state may contain a Forbidden obligation on the same action:
|
|
26
|
-
\[ O(\phi) \implies \neg F(\text{next}(\phi)) \]
|
|
27
|
-
|
|
28
|
-
We formalize this as a binary promotion scalar \(\sigma: N_d \to \{0,1\}\). This function is specifically targeted at Paragraph nodes, as structural invariants bypass it:
|
|
29
|
-
\[
|
|
30
|
-
\sigma(n) = \begin{cases}
|
|
31
|
-
1 & \text{if } \kappa(n) = \text{Paragraph} \land \text{SDL}(\mathcal{B}) \text{ detects a valid imperative} \\
|
|
32
|
-
0 & \text{otherwise}
|
|
33
|
-
\end{cases}
|
|
34
|
-
\]
|
|
35
|
-
|
|
36
|
-
To reason about tuning noise in the bigram set \(W_{\mathrm{deontic}}\), we
|
|
37
|
-
also define the paragraph classifier error rates:
|
|
38
|
-
\[
|
|
39
|
-
P_{\mathrm{fp}} = P(\sigma(n) = 1 \mid n \text{ is narrative lore})
|
|
40
|
-
\]
|
|
41
|
-
\[
|
|
42
|
-
P_{\mathrm{fn}} = P(\sigma(n) = 0 \mid n \text{ is behavioral rule})
|
|
43
|
-
\]
|
|
44
|
-
|
|
45
|
-
For authored documents whose lore paragraphs would otherwise remain in
|
|
46
|
-
\(\mathcal{V}_d\), the expected Tier-2 waste introduced by false positives is:
|
|
47
|
-
\[
|
|
48
|
-
\mathbb{E}[\mathrm{wasted\ toks\ in\ }\mathcal{I}_2]
|
|
49
|
-
=
|
|
50
|
-
P_{\mathrm{fp}} \cdot |\mathcal{V}_{d,\mathrm{paragraphs}}| \cdot \mathbb{E}[\mathrm{toks}(n)]
|
|
51
|
-
\]
|
|
52
|
-
|
|
53
|
-
This gives the parser a concrete quantity to minimize when adjusting
|
|
54
|
-
\(W_{\mathrm{deontic}}\), while \(P_{\mathrm{fn}}\) measures the risk of leaving
|
|
55
|
-
true behavioral rules behind in \(\mathcal{V}_d\).
|
|
56
|
-
|
|
57
|
-
*Implemented via `NewDeonticFrame` and `EvaluateText` in the zero-allocation byte lexer.*
|
|
58
|
-
|
|
59
|
-
## 3. The Three-Tier Structural Indicator Function \(\iota\)
|
|
60
|
-
|
|
61
|
-
To avoid the brittleness of a binary pass/fail budget, we distribute nodes across a three-tier priority hierarchy.
|
|
62
|
-
|
|
63
|
-
Let \(K_{\mathcal{I}1} \subset K\) be the subset of node kinds that represent hard authorial constraints:
|
|
64
|
-
\[ K_{\mathcal{I}1} = \{ \text{List}, \text{YAMLFrontmatter} \} \]
|
|
65
|
-
|
|
66
|
-
Let \(K_{\mathcal{I}2} \subset K\) be the subset of node kinds that represent soft constraints or stylistic guidelines:
|
|
67
|
-
\[ K_{\mathcal{I}2} = \{ \text{Blockquote} \} \]
|
|
68
|
-
|
|
69
|
-
We define the structural indicator function \(\iota: N_d \to \{0,1,2\}\) mapping each node to a specific tier:
|
|
70
|
-
\[
|
|
71
|
-
\iota(n) = \begin{cases}
|
|
72
|
-
1 & \text{if } \kappa(n) \in K_{\mathcal{I}1} \quad \text{(Hard Invariant)} \\
|
|
73
|
-
2 & \text{if } \kappa(n) \in K_{\mathcal{I}2} \lor \sigma(n) = 1 \quad \text{(Soft Invariant)} \\
|
|
74
|
-
0 & \text{otherwise} \quad \text{(Variant Lore)}
|
|
75
|
-
\end{cases}
|
|
76
|
-
\]
|
|
77
|
-
|
|
78
|
-
*Proof of Reachability:* If a node is a Paragraph, \(\kappa(n) \notin K_{\mathcal{I}1}\) and \(\kappa(n) \notin K_{\mathcal{I}2}\). However, if the deontic lexer detects a rule, \(\sigma(n) = 1\), causing the logical OR condition for \(\iota(n) = 2\) to evaluate to true, successfully promoting the paragraph to a Soft Invariant.
|
|
79
|
-
|
|
80
|
-
## 4. Corpus Decomposition and Set Integration
|
|
81
|
-
|
|
82
|
-
For any document \(d \in \mathbf{D}_{\text{agents}} \cup \mathbf{D}_{\text{souls}}\), the node set \(N_d\) is partitioned cleanly into three sets:
|
|
83
|
-
- **Hard Directives:** \(\mathcal{I}_{1d} = \langle n \in N_d \mid \iota(n) = 1 \rangle\), ordered by \(\mathrm{position}(n)\) ascending, where \(\mathrm{position}(n)\) is the byte offset of node \(n\) in \(d_{\mathrm{raw}}\)
|
|
84
|
-
- **Soft Directives:** \(\mathcal{I}_{2d} = \{ n \in N_d \mid \iota(n) = 2 \}\)
|
|
85
|
-
- **Contextual Lore:** \(\mathcal{V}_d = \{ n \in N_d \mid \iota(n) = 0 \}\)
|
|
86
|
-
|
|
87
|
-
*Partition Completeness:* Because \(\iota(n)\) maps every node to exactly one integer in \(\{0, 1, 2\}\), the resulting sets are mutually exclusive and collectively exhaustive:
|
|
88
|
-
\[
|
|
89
|
-
\mathcal{I}_{1d} \cup \mathcal{I}_{2d} \cup \mathcal{V}_d = N_d
|
|
90
|
-
\]
|
|
91
|
-
\[
|
|
92
|
-
\mathcal{I}_{1d} \cap \mathcal{I}_{2d} = \emptyset
|
|
93
|
-
\]
|
|
94
|
-
\[
|
|
95
|
-
\mathcal{I}_{1d} \cap \mathcal{V}_d = \emptyset
|
|
96
|
-
\]
|
|
97
|
-
\[
|
|
98
|
-
\mathcal{I}_{2d} \cap \mathcal{V}_d = \emptyset
|
|
99
|
-
\]
|
|
100
|
-
|
|
101
|
-
These pairwise disjointness statements follow directly from \(\iota\) being a
|
|
102
|
-
single-valued total function into \(\{0,1,2\}\): no node can be assigned to
|
|
103
|
-
more than one tier simultaneously.
|
|
104
|
-
|
|
105
|
-
These sets integrate into the global corpus. Let \(\mathbf{D}_{\text{standard}}\) be the set of standard memory documents (non-core files). We formally define the standard variant node set as \(\mathcal{V}_{\text{standard}} = \bigcup_{d \in \mathbf{D}_{\text{standard}}} E(d)\). The global corpus is then:
|
|
106
|
-
\[ \mathcal{I}_1 = \bigcup_{d} \mathcal{I}_{1d} \qquad \mathcal{I}_2 = \bigcup_{d} \mathcal{I}_{2d} \qquad \mathcal{V} = \mathcal{V}_{\text{standard}} \cup \left( \bigcup_{d} \mathcal{V}_d \right) \]
|
|
107
|
-
|
|
108
|
-
By definition, any chunk \(n \in \mathcal{I}_{1d}\) inherits the hard startup injection guarantee from `mathematics-v2.md`. To clarify, \(G(q,n)\) represents the runtime *gating admission scalar*, not semantic relevance.
|
|
109
|
-
\[ \iota(n)=1 \implies G(q,n)=1 \quad \forall q \in \mathbf{Q} \]
|
|
110
|
-
|
|
111
|
-
## 5. Authored Authority Boost for Variant Lore
|
|
112
|
-
|
|
113
|
-
Chunks in \(\mathcal{V}_d\) lose their invariant injection guarantee and must survive semantic vector retrieval. To ensure that agent-specific lore outcompetes general conversational memory, we enforce a strict authority override. For all \(n \in \mathcal{V}_d\) extracted from a core identity document:
|
|
114
|
-
\[ a_n = 1.0 \]
|
|
115
|
-
This guarantees that variant chunks of core files receive the maximum possible authored weight when scoring against the remaining token budget \(\tau_{\mathcal{V}}\).
|
|
116
|
-
|
|
117
|
-
## 6. Token Budget Safety Bounds
|
|
118
|
-
|
|
119
|
-
Adversarial or malformed files containing excessively large constraint blocks could violate the strict prompt limits defined by the host. The system enforces split load-time bounds:
|
|
120
|
-
|
|
121
|
-
For Hard Invariants (\(\alpha_1\)):
|
|
122
|
-
\[ \sum_{n \in \mathcal{I}_{1d}} \mathrm{toks}(n) \le \alpha_1 \tau \implies \text{fast-fail and reject agent load if exceeded} \]
|
|
123
|
-
|
|
124
|
-
For Soft Invariants (\(\alpha_2\)):
|
|
125
|
-
\[ \sum_{n \in \mathcal{I}_{2d}} \mathrm{toks}(n) \le \alpha_2 \tau \implies \text{truncate by source position if exceeded} \]
|
|
126
|
-
|
|
127
|
-
*Cumulative Verification Proof:* Let the total reserved invariant budget fraction be \(\alpha\), where \(\alpha_1 + \alpha_2 \le \alpha\). If both independent enforcement bounds are satisfied, then:
|
|
128
|
-
\[ \sum_{n \in \mathcal{I}_{1d}} \mathrm{toks}(n) + \sum_{n \in \mathcal{I}_{2d}} \mathrm{toks}(n) \le \alpha_1 \tau + \alpha_2 \tau = (\alpha_1 + \alpha_2)\tau \le \alpha \tau \]
|
|
129
|
-
This mathematically guarantees the overall token budget \(\tau\) is never breached by the combined invariant sets.
|
|
130
|
-
|
|
131
|
-
Under the unified assembly contract in [`mathematics-v2.md`](./mathematics-v2.md)
|
|
132
|
-
section 7.8 and [`continuity.md`](./continuity.md), these authored bounds are
|
|
133
|
-
combined with a separate recent-tail target fraction \(\beta\). The runtime
|
|
134
|
-
therefore treats the tiers with the following precedence:
|
|
135
|
-
|
|
136
|
-
1. **Tier 1 / Hard invariants** must fit their startup reservation \(\alpha_1\tau\).
|
|
137
|
-
2. **Mandatory recent tail** must preserve at least the minimum raw suffix \(T_{\mathrm{base}}\).
|
|
138
|
-
3. **Tier 2 / Soft invariants** are injected by longest-prefix truncation under the effective budget
|
|
139
|
-
\[
|
|
140
|
-
\tau_{\mathcal{I}_2}^{\mathrm{eff}}=
|
|
141
|
-
\max\!\left(0,\,
|
|
142
|
-
\min\!\left(\alpha_2\tau,\,
|
|
143
|
-
\tau-\tau_{\mathcal{I}_1}-\mathrm{toks}(T_{\mathrm{base}})\right)\right)
|
|
144
|
-
\]
|
|
145
|
-
4. **Variant lore** competes only for the final residual budget after Tier 1,
|
|
146
|
-
the admitted Tier 2 prefix, and the exact recent tail are accounted for.
|
|
147
|
-
|
|
148
|
-
This makes \(\mathcal{I}_1\) and the minimum continuity suffix hard
|
|
149
|
-
constraints, while keeping \(\mathcal{I}_2\) order-preserving but elastic.
|
|
150
|
-
Equivalently, the runtime safety invariant is:
|
|
151
|
-
\[
|
|
152
|
-
\tau_{\mathcal{I}_1} + \mathrm{toks}(T_{\mathrm{base}}) \le \tau
|
|
153
|
-
\quad \text{must hold at runtime or Tier 2 is fully evicted}
|
|
154
|
-
\]
|
|
155
|
-
|
|
156
|
-
## 7. The Document-Addressed Cache (\(\Psi\)) and Runtime Implications
|
|
157
|
-
|
|
158
|
-
The AST extraction, Deontic bigram evaluation, and partition logic are purely deterministic functions of \(d_{\mathrm{raw}}\). To prevent \(O(N)\) recomputation on every conversational turn, the system maintains a document-addressed cache:
|
|
159
|
-
|
|
160
|
-
\[ \Psi: \text{hash}(d_{\mathrm{raw}}, \text{tokenizer\_id}) \to \{\mathcal{I}_{1d}, \mathcal{I}_{2d}, \mathcal{V}_d, \text{budget}\} \]
|
|
161
|
-
|
|
162
|
-
Because the token estimator function \(\lceil \frac{|t|}{\chi(t)} \rceil\) depends on the active model tokenizer, \(\text{tokenizer\_id}\) is embedded in the hash key.
|
|
163
|
-
|
|
164
|
-
At runtime:
|
|
165
|
-
1. **Tier 1 (\(\mathcal{I}_{1d}\))** is injected via an \(O(1)\) memory copy.
|
|
166
|
-
2. **Tier 2 (\(\mathcal{I}_{2d}\))** is evaluated via an \(O(|\mathcal{I}_{2d}|)\) prefix sum to enforce source-order truncation under \(\tau_{\mathcal{I}_2}^{\mathrm{eff}}\).
|
|
167
|
-
3. **Tier 0 (\(\mathcal{V}_d\))** bypasses re-parsing and feeds into the semantic Pass 1 vector retrieval only after the continuity layer removes the exact recent tail into \(T_{\mathrm{recent}}\), leaving \(\mathcal{V}_{\mathrm{rest}}\).
|
package/docs/ast.md
DELETED
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
# TITLE: Mathematical Reference - Abstract Syntax Tree (AST) Partitioning
|
|
2
|
-
|
|
3
|
-
Historical note: this document is preserved to show the project's design
|
|
4
|
-
evolution. The reviewed authoritative AST reference is
|
|
5
|
-
[`ast-v2.md`](./ast-v2.md).
|
|
6
|
-
|
|
7
|
-
This document formalizes the heuristic mapping of user-authored Markdown documents (such as `agents.md` and `souls.md`) into the partitioned sets required by the two-pass retrieval system. It serves as the bridge between raw text ingestion and the rigorous corpus decomposition defined in `mathematics-v2.md` Section 7.2.
|
|
8
|
-
|
|
9
|
-
The design goal is to extract rigid behavioral rules (the invariant set) from contextual lore (the variant set) automatically, using structural types as a mathematically stable proxy for user intent.
|
|
10
|
-
|
|
11
|
-
## 1. The Document AST and Node Extraction
|
|
12
|
-
|
|
13
|
-
Let a raw Markdown document $d_{\mathrm{raw}}$ be parsed into an Abstract Syntax Tree $\mathcal{T}$.
|
|
14
|
-
Let $E: \mathcal{T} \to N_d$ be an extraction function that flattens the tree into an ordered sequence of semantic leaf nodes $N_d = \langle n_1, n_2, \dots, n_k \rangle$.
|
|
15
|
-
|
|
16
|
-
Each node $n_i \in N_d$ has an associated structural kind assigned by the parser (e.g., `yuin/goldmark`), mapped by the function $\kappa: N_d \to K$, where $K$ is the set of supported Markdown node types:
|
|
17
|
-
\[ K = \{ \text{Paragraph}, \text{List}, \text{Blockquote}, \text{YAMLFrontmatter}, \text{Heading}, \dots \} \]
|
|
18
|
-
|
|
19
|
-
*Implemented in `sidecaragentparser.go`.*
|
|
20
|
-
|
|
21
|
-
## 2. The Structural Indicator Function $\iota$
|
|
22
|
-
|
|
23
|
-
To avoid document-level monolithic injection, we redefine the invariant membership predicate from `mathematics-v2.md` Section 7.2 at the node level.
|
|
24
|
-
|
|
25
|
-
Let $K_{\mathcal{I}} \subset K$ be the subset of node kinds structurally correlated with hard constraints, core directives, and programmatic definitions:
|
|
26
|
-
\[ K_{\mathcal{I}} = \{ \text{List}, \text{Blockquote}, \text{YAMLFrontmatter} \} \]
|
|
27
|
-
|
|
28
|
-
We define the structural indicator function $\iota: N_d \to \{0,1\}$ as:
|
|
29
|
-
\[
|
|
30
|
-
\iota(n) = \begin{cases}
|
|
31
|
-
1 & \text{if } \kappa(n) \in K_{\mathcal{I}} \\
|
|
32
|
-
0 & \text{otherwise}
|
|
33
|
-
\end{cases}
|
|
34
|
-
\]
|
|
35
|
-
|
|
36
|
-
**Note on structural proxy limits:** This heuristic relies entirely on the probability that human authors place absolute rules in lists/frontmatter and narrative lore in standard paragraphs. It is mathematically blind to the semantic meaning of the text.
|
|
37
|
-
|
|
38
|
-
## 3. Corpus Decomposition and Set Integration
|
|
39
|
-
|
|
40
|
-
For any document $d \in \mathbf{D}_{\text{agents}} \cup \mathbf{D}_{\text{souls}}$, the node set $N_d$ is partitioned cleanly:
|
|
41
|
-
- **The Core Directives (Invariant):** $\mathcal{I}_d = \{ n \in N_d \mid \iota(n) = 1 \}$
|
|
42
|
-
- **The Contextual Lore (Variant):** $\mathcal{V}_d = \{ n \in N_d \mid \iota(n) = 0 \}$
|
|
43
|
-
|
|
44
|
-
This guarantees partition integrity:
|
|
45
|
-
\[ \mathcal{I}_d \cup \mathcal{V}_d = N_d \quad \text{and} \quad \mathcal{I}_d \cap \mathcal{V}_d = \emptyset \]
|
|
46
|
-
|
|
47
|
-
These sets feed directly into the global corpus partitioning:
|
|
48
|
-
\[ \mathcal{I} = \bigcup_{d} \mathcal{I}_d \qquad \mathcal{V} = \mathbf{D}_{\text{standard}} \cup \left( \bigcup_{d} \mathcal{V}_d \right) \]
|
|
49
|
-
|
|
50
|
-
By definition, any chunk $n \in \mathcal{I}_d$ inherits the hard startup guarantee from `mathematics-v2.md` Section 7.1:
|
|
51
|
-
\[ \iota(n)=1 \implies G(q,n)=1 \quad \forall q \in \mathbf{Q} \]
|
|
52
|
-
|
|
53
|
-
## 4. Authored Authority Boost for Variant Lore
|
|
54
|
-
|
|
55
|
-
Chunks in $\mathcal{V}_d$ (such as standard paragraph nodes) lose their invariant guarantee and must survive the Pass 1 coarse semantic filter defined in `mathematics-v2.md` Section 7.4.
|
|
56
|
-
|
|
57
|
-
To ensure that agent-specific lore outcompetes general conversational memory during Pass 2, we enforce a strict authority override. For all $n \in \mathcal{V}_d$ extracted from a core identity document:
|
|
58
|
-
\[ a_n = 1.0 \]
|
|
59
|
-
|
|
60
|
-
Following the authority weight convex combination $d_{\omega}$ from `mathematics-v2.md` Section 7.3, this guarantees that variant chunks of core files receive the maximum possible authored weight when scoring against the remaining token budget $\tau_{\mathcal{V}}$.
|
|
61
|
-
|
|
62
|
-
## 5. Token Budget Safety Bounds
|
|
63
|
-
|
|
64
|
-
Because invariants bypass all truncation (Section 7.8), an adversarial or malformed file containing an excessively large list block could violate the token budget:
|
|
65
|
-
\[ \sum_{n \in \mathcal{I}_d} \mathrm{toks}(n) > \tau \]
|
|
66
|
-
|
|
67
|
-
Therefore, the system must enforce a load-time safety bound on the extracted AST invariants:
|
|
68
|
-
\[ \tau_{\text{max\_invariant}} \le \alpha \tau \quad \text{where } \alpha \in (0, 1) \]
|
|
69
|
-
|
|
70
|
-
If parsing yields an $\mathcal{I}_d$ that exceeds $\alpha \tau$ (e.g., $\alpha = 0.4$, reserving 60% of context for variant history and tools), the parser must fast-fail and reject the agent load. This protects the runtime invariants dictated in `mathematics-v2.md` Section 7.10 from mathematically impossible token fits.
|
|
@@ -1,182 +0,0 @@
|
|
|
1
|
-
# Compaction Evaluation
|
|
2
|
-
|
|
3
|
-
This document records the first local evaluation pass for the Nomic-first
|
|
4
|
-
compaction confidence design.
|
|
5
|
-
|
|
6
|
-
The goal of the experiment was to compare:
|
|
7
|
-
|
|
8
|
-
- raw ONNX T5 decoder confidence
|
|
9
|
-
- Nomic-space preservation metrics
|
|
10
|
-
- the planned hybrid confidence model with a hard preservation gate
|
|
11
|
-
|
|
12
|
-
The evaluation harness lives in:
|
|
13
|
-
|
|
14
|
-
- `sidecar/cmd/eval_compaction`
|
|
15
|
-
|
|
16
|
-
It runs real local models:
|
|
17
|
-
|
|
18
|
-
- Nomic `nomic-embed-text-v1.5` for embedding-space evaluation
|
|
19
|
-
- ONNX T5-small for optional abstractive summarization
|
|
20
|
-
|
|
21
|
-
## Why This Exists
|
|
22
|
-
|
|
23
|
-
The compaction system previously trusted T5 decoder confidence alone:
|
|
24
|
-
|
|
25
|
-
```text
|
|
26
|
-
conf_t5(s, C) = exp(mean log p(token_i | token_<i, C))
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
That quantity measures decoder self-consistency, not semantic preservation in
|
|
30
|
-
the retrieval geometry used by the vector store.
|
|
31
|
-
|
|
32
|
-
The new design evaluates every summary back in Nomic space:
|
|
33
|
-
|
|
34
|
-
```text
|
|
35
|
-
Q_align(s, C) = cos(E(s), mu_C)
|
|
36
|
-
Q_cover(s, C) = mean_i max(0, cos(E(s), E(t_i)))
|
|
37
|
-
conf_nomic(s, C) = clamp01((Q_align + Q_cover) / 2)
|
|
38
|
-
```
|
|
39
|
-
|
|
40
|
-
And then applies:
|
|
41
|
-
|
|
42
|
-
```text
|
|
43
|
-
if Q_align < tau_preserve:
|
|
44
|
-
reject abstractive summary and fall back to extractive
|
|
45
|
-
|
|
46
|
-
confidence =
|
|
47
|
-
conf_nomic for extractive
|
|
48
|
-
lambda * conf_nomic + (1 - lambda) * conf_t5 for T5 summaries
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
with the current implementation constants:
|
|
52
|
-
|
|
53
|
-
- `tau_preserve = 0.65`
|
|
54
|
-
- `lambda = 0.8`
|
|
55
|
-
|
|
56
|
-
## Baseline Corpus
|
|
57
|
-
|
|
58
|
-
The current real-model pass uses 17 fixed synthetic clusters:
|
|
59
|
-
|
|
60
|
-
- 5 normal engineering-memory clusters
|
|
61
|
-
- 12 adversarial clusters designed to stress abstractive faithfulness
|
|
62
|
-
|
|
63
|
-
The adversarial set included:
|
|
64
|
-
|
|
65
|
-
- conflicting subsystem failures
|
|
66
|
-
- dense Go code and test logic
|
|
67
|
-
- four-way architectural decision bundles
|
|
68
|
-
- many-number and threshold-heavy cases
|
|
69
|
-
- continuity vs progress tension
|
|
70
|
-
- cross-domain product/math/infra mixtures
|
|
71
|
-
- token-budget contract distinctions
|
|
72
|
-
- conflicting proposed resolutions vs the actual root cause
|
|
73
|
-
- long noisy code-trace clusters with one decisive invariant
|
|
74
|
-
- topic-shift clusters that tempt generic summaries
|
|
75
|
-
- near-duplicate threshold statements from different subsystems
|
|
76
|
-
|
|
77
|
-
## Results
|
|
78
|
-
|
|
79
|
-
### Core Cases
|
|
80
|
-
|
|
81
|
-
| case | raw_conf | align | cover | final_conf | delta_conf |
|
|
82
|
-
|---|---:|---:|---:|---:|---:|
|
|
83
|
-
| auth_migration | 0.8501 | 0.9183 | 0.8342 | 0.8710 | +0.0209 |
|
|
84
|
-
| compaction_boundary | 0.6894 | 0.7983 | 0.7216 | 0.7458 | +0.0564 |
|
|
85
|
-
| gating_math | 0.7790 | 0.9167 | 0.8285 | 0.8539 | +0.0748 |
|
|
86
|
-
| release_pipeline | 0.8859 | 0.9697 | 0.8729 | 0.9142 | +0.0283 |
|
|
87
|
-
| adversarial_multi_fact | 0.8545 | 0.9052 | 0.7893 | 0.8487 | -0.0058 |
|
|
88
|
-
|
|
89
|
-
### Adversarial Cases
|
|
90
|
-
|
|
91
|
-
| case | raw_conf | align | cover | final_conf | delta_conf |
|
|
92
|
-
|---|---:|---:|---:|---:|---:|
|
|
93
|
-
| adversarial_conflicting_errors | 0.8540 | 0.8579 | 0.7440 | 0.8116 | -0.0424 |
|
|
94
|
-
| adversarial_dense_go_code | 0.8945 | 0.9167 | 0.8212 | 0.8741 | -0.0205 |
|
|
95
|
-
| adversarial_four_way_decision_bundle | 0.8451 | 0.8651 | 0.7598 | 0.8190 | -0.0261 |
|
|
96
|
-
| adversarial_many_numbers | 0.6915 | 0.8854 | 0.7900 | 0.8084 | +0.1170 |
|
|
97
|
-
| adversarial_boundary_vs_progress | 0.7824 | 0.8993 | 0.8109 | 0.8406 | +0.0581 |
|
|
98
|
-
| adversarial_cross_domain_mix | 0.5240 | 0.8099 | 0.7327 | 0.7218 | +0.1978 |
|
|
99
|
-
| adversarial_token_budget_rules | 0.7938 | 0.9060 | 0.8249 | 0.8511 | +0.0573 |
|
|
100
|
-
| adversarial_conflicting_resolutions | 0.8600 | 0.9284 | 0.8560 | 0.8858 | +0.0258 |
|
|
101
|
-
| adversarial_long_noisy_code_trace | 0.8144 | 0.8565 | 0.7893 | 0.8212 | +0.0068 |
|
|
102
|
-
| adversarial_topic_shift_generic_bait | 0.8860 | 0.9166 | 0.8209 | 0.8722 | -0.0138 |
|
|
103
|
-
| adversarial_near_duplicate_thresholds | 0.8731 | 0.9123 | 0.8266 | 0.8702 | -0.0029 |
|
|
104
|
-
|
|
105
|
-
## What We Learned
|
|
106
|
-
|
|
107
|
-
### 1. T5 and Nomic are locally compatible
|
|
108
|
-
|
|
109
|
-
Every evaluated case produced:
|
|
110
|
-
|
|
111
|
-
```text
|
|
112
|
-
Q_align > 0.65
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
So the hard preservation gate did not trigger on the initial corpus. This is
|
|
116
|
-
useful evidence that the local T5 summaries are generally pointing in the same
|
|
117
|
-
semantic direction as the source cluster in Nomic space.
|
|
118
|
-
|
|
119
|
-
### 2. The new math improves confidence grounding
|
|
120
|
-
|
|
121
|
-
The hybrid model changed confidence more often than it changed summary text.
|
|
122
|
-
|
|
123
|
-
This is still a meaningful result:
|
|
124
|
-
|
|
125
|
-
- positive deltas mean Nomic-space preservation validated summaries that T5
|
|
126
|
-
scored pessimistically
|
|
127
|
-
- negative deltas mean Nomic-space preservation penalized summaries that T5
|
|
128
|
-
scored too generously
|
|
129
|
-
|
|
130
|
-
The largest rescue was:
|
|
131
|
-
|
|
132
|
-
- `adversarial_cross_domain_mix`: `0.5240 -> 0.7218` (`+0.1978`)
|
|
133
|
-
|
|
134
|
-
The largest penalty was:
|
|
135
|
-
|
|
136
|
-
- `adversarial_conflicting_errors`: `0.8540 -> 0.8116` (`-0.0424`)
|
|
137
|
-
|
|
138
|
-
So even without fallback, the confidence signal is more retrieval-aware than the
|
|
139
|
-
old T5-only design.
|
|
140
|
-
|
|
141
|
-
### 3. Harsher corpus plus threshold sweep sharpened the evidence
|
|
142
|
-
|
|
143
|
-
Even after expanding the corpus to 17 cases, the shipped gate still did not
|
|
144
|
-
trip:
|
|
145
|
-
|
|
146
|
-
```text
|
|
147
|
-
tau_preserve = 0.65 -> 0 trips
|
|
148
|
-
tau_preserve = 0.75 -> 0 trips
|
|
149
|
-
tau_preserve = 0.85 -> 2 trips
|
|
150
|
-
```
|
|
151
|
-
|
|
152
|
-
The two cases that fall below `0.85` are:
|
|
153
|
-
|
|
154
|
-
- `compaction_boundary`
|
|
155
|
-
- `adversarial_cross_domain_mix`
|
|
156
|
-
|
|
157
|
-
So the section-5 preservation machinery is now evidenced in two ways:
|
|
158
|
-
|
|
159
|
-
- unit tests prove the hard fallback path when `Q_align < tau_preserve`
|
|
160
|
-
- real-model threshold sweeps show where the current corpus begins to stress
|
|
161
|
-
geometric drift, even though the shipped `0.65` threshold remains conservative
|
|
162
|
-
|
|
163
|
-
This means the earlier evidence gap has narrowed: the corpus is now harsh enough
|
|
164
|
-
to differentiate thresholds and expose weaker cases, even if it still does not
|
|
165
|
-
force fallback at the default gate.
|
|
166
|
-
|
|
167
|
-
Remaining interpretation questions are now about calibration, not about whether
|
|
168
|
-
the gate machinery exists or whether the evaluation corpus can separate stronger
|
|
169
|
-
and weaker summaries.
|
|
170
|
-
|
|
171
|
-
## Current Interpretation
|
|
172
|
-
|
|
173
|
-
The preservation gate is not decorative, but its first practical value is
|
|
174
|
-
confidence correction rather than frequent fallback.
|
|
175
|
-
|
|
176
|
-
That is still a win:
|
|
177
|
-
|
|
178
|
-
- T5 remains the lightweight local decoder
|
|
179
|
-
- Nomic remains the canonical retrieval geometry
|
|
180
|
-
- compaction confidence is now judged in the same space retrieval uses
|
|
181
|
-
|
|
182
|
-
This is the mathematically coherent compromise for a stable shippable plugin.
|