@xdarkicex/openclaw-memory-libravdb 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -0
- package/docs/README.md +14 -0
- package/docs/architecture-decisions/README.md +6 -0
- package/docs/architecture-decisions/adr-001-onnx-over-ollama.md +21 -0
- package/docs/architecture-decisions/adr-002-libravdb-over-lancedb.md +19 -0
- package/docs/architecture-decisions/adr-003-convex-gating-over-threshold.md +27 -0
- package/docs/architecture-decisions/adr-004-sidecar-over-native-ts.md +21 -0
- package/docs/architecture.md +188 -0
- package/docs/contributing.md +76 -0
- package/docs/dependencies.md +38 -0
- package/docs/embedding-profiles.md +42 -0
- package/docs/gating.md +329 -0
- package/docs/implementation.md +381 -0
- package/docs/installation.md +272 -0
- package/docs/mathematics.md +695 -0
- package/docs/models.md +63 -0
- package/docs/problem.md +64 -0
- package/docs/security.md +86 -0
- package/openclaw.plugin.json +84 -0
- package/package.json +41 -0
- package/scripts/build-sidecar.sh +30 -0
- package/scripts/postinstall.js +169 -0
- package/scripts/setup.sh +20 -0
- package/scripts/setup.ts +505 -0
- package/scripts/sidecar-release.d.ts +4 -0
- package/scripts/sidecar-release.js +17 -0
- package/sidecar/cmd/inspect_onnx/main.go +105 -0
- package/sidecar/compact/gate.go +273 -0
- package/sidecar/compact/gate_test.go +85 -0
- package/sidecar/compact/summarize.go +345 -0
- package/sidecar/compact/summarize_test.go +319 -0
- package/sidecar/compact/tokens.go +11 -0
- package/sidecar/config/config.go +119 -0
- package/sidecar/config/config_test.go +75 -0
- package/sidecar/embed/engine.go +696 -0
- package/sidecar/embed/engine_test.go +349 -0
- package/sidecar/embed/matryoshka.go +93 -0
- package/sidecar/embed/matryoshka_test.go +150 -0
- package/sidecar/embed/onnx_local.go +319 -0
- package/sidecar/embed/onnx_local_test.go +159 -0
- package/sidecar/embed/profile_contract_test.go +71 -0
- package/sidecar/embed/profile_eval_test.go +923 -0
- package/sidecar/embed/profiles.go +39 -0
- package/sidecar/go.mod +21 -0
- package/sidecar/go.sum +30 -0
- package/sidecar/health/check.go +33 -0
- package/sidecar/health/check_test.go +55 -0
- package/sidecar/main.go +151 -0
- package/sidecar/model/encoder.go +222 -0
- package/sidecar/model/registry.go +262 -0
- package/sidecar/model/registry_test.go +102 -0
- package/sidecar/model/seq2seq.go +133 -0
- package/sidecar/server/rpc.go +343 -0
- package/sidecar/server/rpc_test.go +350 -0
- package/sidecar/server/transport.go +160 -0
- package/sidecar/store/libravdb.go +676 -0
- package/sidecar/store/libravdb_test.go +472 -0
- package/sidecar/summarize/engine.go +360 -0
- package/sidecar/summarize/engine_test.go +148 -0
- package/sidecar/summarize/onnx_local.go +494 -0
- package/sidecar/summarize/onnx_local_test.go +48 -0
- package/sidecar/summarize/profiles.go +52 -0
- package/sidecar/summarize/tokenizer.go +13 -0
- package/sidecar/summarize/tokenizer_hf.go +76 -0
- package/sidecar/summarize/util.go +13 -0
- package/src/cli.ts +205 -0
- package/src/context-engine.ts +195 -0
- package/src/index.ts +27 -0
- package/src/memory-provider.ts +24 -0
- package/src/openclaw-plugin-sdk.d.ts +53 -0
- package/src/plugin-runtime.ts +67 -0
- package/src/recall-cache.ts +34 -0
- package/src/recall-utils.ts +22 -0
- package/src/rpc.ts +84 -0
- package/src/scoring.ts +58 -0
- package/src/sidecar.ts +506 -0
- package/src/tokens.ts +36 -0
- package/src/types.ts +146 -0
- package/tsconfig.json +20 -0
- package/tsconfig.tests.json +12 -0
package/docs/gating.md
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
# Domain-Adaptive Gating Scalar
|
|
2
|
+
|
|
3
|
+
This document describes the ingestion gate used to decide whether a user turn should be promoted into durable `user:` memory. It is the most novel scoring component in the repository.
|
|
4
|
+
|
|
5
|
+
Implemented in:
|
|
6
|
+
|
|
7
|
+
- [`sidecar/compact/gate.go`](../sidecar/compact/gate.go)
|
|
8
|
+
- [`sidecar/compact/tokens.go`](../sidecar/compact/tokens.go)
|
|
9
|
+
- [`sidecar/compact/summarize.go`](../sidecar/compact/summarize.go) for the
|
|
10
|
+
downstream abstractive-routing threshold
|
|
11
|
+
|
|
12
|
+
## 1. Why the Original Scalar Failed
|
|
13
|
+
|
|
14
|
+
The original scalar assumed conversational memory semantics:
|
|
15
|
+
|
|
16
|
+
- low novelty meant "already known"
|
|
17
|
+
- repetition meant "probably redundant"
|
|
18
|
+
- low natural-language structure meant "probably noise"
|
|
19
|
+
|
|
20
|
+
That logic breaks for technical sessions.
|
|
21
|
+
|
|
22
|
+
Repeated workflow context is often exactly what should be remembered:
|
|
23
|
+
|
|
24
|
+
- file paths
|
|
25
|
+
- APIs
|
|
26
|
+
- failure signatures
|
|
27
|
+
- configuration changes
|
|
28
|
+
- architectural decisions
|
|
29
|
+
|
|
30
|
+
In technical work, repetition can indicate persistent work context rather than low value.
|
|
31
|
+
|
|
32
|
+
## 2. The Convex Mixture
|
|
33
|
+
|
|
34
|
+
The corrected gate is:
|
|
35
|
+
|
|
36
|
+
$$
|
|
37
|
+
G(t) = (1 - T(t)) \cdot G_{\mathrm{conv}}(t) + T(t) \cdot G_{\mathrm{tech}}(t)
|
|
38
|
+
$$
|
|
39
|
+
|
|
40
|
+
where:
|
|
41
|
+
|
|
42
|
+
$$
|
|
43
|
+
G_{\mathrm{conv}}(t) = w_1^c H(t) + w_2^c R(t) + w_3^c D_{nl}(t)
|
|
44
|
+
$$
|
|
45
|
+
|
|
46
|
+
$$
|
|
47
|
+
G_{\mathrm{tech}}(t) = w_1^t P(t) + w_2^t A(t) + w_3^t D_{\mathrm{tech}}(t)
|
|
48
|
+
$$
|
|
49
|
+
|
|
50
|
+
and:
|
|
51
|
+
|
|
52
|
+
$$
|
|
53
|
+
T(t) \in [0,1]
|
|
54
|
+
$$
|
|
55
|
+
|
|
56
|
+
is the technical-density signal.
|
|
57
|
+
|
|
58
|
+
Current default weights from
|
|
59
|
+
[`DefaultGatingConfig()`](../sidecar/compact/gate.go):
|
|
60
|
+
|
|
61
|
+
- conversational branch: $w_1^c = 0.35$, $w_2^c = 0.40$, $w_3^c = 0.25$
|
|
62
|
+
- technical branch: $w_1^t = 0.40$, $w_2^t = 0.35$, $w_3^t = 0.25$
|
|
63
|
+
|
|
64
|
+
### Boundedness
|
|
65
|
+
|
|
66
|
+
If:
|
|
67
|
+
|
|
68
|
+
- $T(t) \in [0,1]$
|
|
69
|
+
- $G_{\mathrm{conv}}(t) \in [0,1]$
|
|
70
|
+
- $G_{\mathrm{tech}}(t) \in [0,1]$
|
|
71
|
+
|
|
72
|
+
then:
|
|
73
|
+
|
|
74
|
+
$$
|
|
75
|
+
G(t) \in [0,1]
|
|
76
|
+
$$
|
|
77
|
+
|
|
78
|
+
because $G$ is a convex combination of two values in $[0,1]$.
|
|
79
|
+
|
|
80
|
+
### Continuity
|
|
81
|
+
|
|
82
|
+
The gate is continuous in $T$:
|
|
83
|
+
|
|
84
|
+
$$
|
|
85
|
+
\frac{\partial G}{\partial T} = G_{\mathrm{tech}} - G_{\mathrm{conv}}
|
|
86
|
+
$$
|
|
87
|
+
|
|
88
|
+
There is no discontinuous jump at a domain boundary. A mixed technical/conversational turn interpolates smoothly between the two sub-formulas.
|
|
89
|
+
|
|
90
|
+
## 3. Domain Detection $T(t)$
|
|
91
|
+
|
|
92
|
+
Technical density is a weighted sum of technical patterns with saturation:
|
|
93
|
+
|
|
94
|
+
$$
|
|
95
|
+
T(t) = \min\left(\frac{\sum_i s_i \cdot \mathbf{1}[\mathrm{pattern}_i(t)]}{\theta_{\mathrm{norm}}}, 1\right)
|
|
96
|
+
$$
|
|
97
|
+
|
|
98
|
+
The shipped patterns include:
|
|
99
|
+
|
|
100
|
+
- code fences
|
|
101
|
+
- file paths
|
|
102
|
+
- function definitions
|
|
103
|
+
- shell commands
|
|
104
|
+
- URLs or endpoints
|
|
105
|
+
- stack traces
|
|
106
|
+
- hashes or hex identifiers
|
|
107
|
+
|
|
108
|
+
Default normalization:
|
|
109
|
+
|
|
110
|
+
$$
|
|
111
|
+
\theta_{\mathrm{norm}} = 1.5
|
|
112
|
+
$$
|
|
113
|
+
|
|
114
|
+
This means two strong technical signals are enough to saturate the branch weight.
|
|
115
|
+
|
|
116
|
+
Saturation at `1.0` is correct because the gate does not need "how technical beyond fully technical"; it only needs the branch mixture weight.
|
|
117
|
+
|
|
118
|
+
## 4. Conversational Branch
|
|
119
|
+
|
|
120
|
+
### Novelty $H(t)$
|
|
121
|
+
|
|
122
|
+
Novelty is:
|
|
123
|
+
|
|
124
|
+
$$
|
|
125
|
+
H(t) = 1 - \frac{1}{|K|} \sum_{k \in K} \cos(\vec{v}_t, \vec{v}_k)
|
|
126
|
+
$$
|
|
127
|
+
|
|
128
|
+
where $K$ is the retrieved nearest-neighbor set from durable `user:` memory.
|
|
129
|
+
|
|
130
|
+
Properties:
|
|
131
|
+
|
|
132
|
+
- empty memory gives $H=1.0$
|
|
133
|
+
- highly similar existing memories drive $H$ toward `0`
|
|
134
|
+
|
|
135
|
+
The implementation deliberately uses top-k mean similarity rather than centroid distance because user memory is often multimodal.
|
|
136
|
+
|
|
137
|
+
### Repetition Gate $R(t)$
|
|
138
|
+
|
|
139
|
+
The repetition term is:
|
|
140
|
+
|
|
141
|
+
$$
|
|
142
|
+
R(t) = F(t) \cdot (1 - S(t))
|
|
143
|
+
$$
|
|
144
|
+
|
|
145
|
+
with:
|
|
146
|
+
|
|
147
|
+
$$
|
|
148
|
+
F(t) = \min\left(\frac{\mathrm{hitsAbove}(\mathrm{turns:userId}, 0.80, k=10)}{5}, 1\right)
|
|
149
|
+
$$
|
|
150
|
+
|
|
151
|
+
$$
|
|
152
|
+
S(t) = \min\left(\frac{\mathrm{hitsAbove}(\mathrm{user:userId}, 0.85, k=5)}{3}, 1\right)
|
|
153
|
+
$$
|
|
154
|
+
|
|
155
|
+
This is intentionally a product, not a sum.
|
|
156
|
+
|
|
157
|
+
Why:
|
|
158
|
+
|
|
159
|
+
- high input frequency should help only if durable memory is not already saturated
|
|
160
|
+
- high saturation should veto the repetition term regardless of frequency
|
|
161
|
+
|
|
162
|
+
The veto property is structural:
|
|
163
|
+
|
|
164
|
+
$$
|
|
165
|
+
S(t) = 1 \Rightarrow R(t) = 0
|
|
166
|
+
$$
|
|
167
|
+
|
|
168
|
+
### Natural-Language Structural Load $D_{nl}(t)$
|
|
169
|
+
|
|
170
|
+
The conversational branch adds heuristic structure for turns that look like:
|
|
171
|
+
|
|
172
|
+
- preferences
|
|
173
|
+
- human-name references
|
|
174
|
+
- dates
|
|
175
|
+
- quantities
|
|
176
|
+
- fact assertions
|
|
177
|
+
|
|
178
|
+
This is intentionally narrow. It excludes general proper-noun detection so technical identifiers do not inflate the conversational signal.
|
|
179
|
+
|
|
180
|
+
## 5. Technical Branch
|
|
181
|
+
|
|
182
|
+
### Specificity $P(t)$
|
|
183
|
+
|
|
184
|
+
Specificity measures concrete artifact density:
|
|
185
|
+
|
|
186
|
+
$$
|
|
187
|
+
P(t) = \min\left(
|
|
188
|
+
\frac{
|
|
189
|
+
\sum_j p_j \cdot \mathrm{count}_j(t)
|
|
190
|
+
}{
|
|
191
|
+
\max(\mathrm{EstimateTokens}(t)/100, 1)
|
|
192
|
+
},
|
|
193
|
+
1
|
|
194
|
+
\right)
|
|
195
|
+
$$
|
|
196
|
+
|
|
197
|
+
The numerator counts things like:
|
|
198
|
+
|
|
199
|
+
- file paths
|
|
200
|
+
- function references
|
|
201
|
+
- error codes
|
|
202
|
+
- git references
|
|
203
|
+
- API endpoints
|
|
204
|
+
|
|
205
|
+
The normalization denominator is implemented in
|
|
206
|
+
[`sidecar/compact/tokens.go`](../sidecar/compact/tokens.go):
|
|
207
|
+
|
|
208
|
+
$$
|
|
209
|
+
L(t)=\max\left(\left\lfloor \frac{\mathrm{len}(t)}{4} \right\rfloor, 1\right)
|
|
210
|
+
$$
|
|
211
|
+
|
|
212
|
+
This bytes-per-token heuristic is the token estimator used by the gating
|
|
213
|
+
subsystem. It is intentionally cheap and deterministic. It is not the same as
|
|
214
|
+
the separate host-side prompt-budget estimator in [`src/tokens.ts`](../src/tokens.ts).
|
|
215
|
+
|
|
216
|
+
Length normalization matters. Without it, any long technical turn would score
|
|
217
|
+
high simply because it contains more surface area, not because it is more
|
|
218
|
+
memory-worthy.
|
|
219
|
+
|
|
220
|
+
### Actionability $A(t)$
|
|
221
|
+
|
|
222
|
+
Actionability captures decision and outcome content:
|
|
223
|
+
|
|
224
|
+
- architectural decisions
|
|
225
|
+
- fixes or resolutions
|
|
226
|
+
- deployment or merge milestones
|
|
227
|
+
- configuration changes
|
|
228
|
+
|
|
229
|
+
These are the kinds of technical turns that are expensive to reconstruct later and therefore worth persisting.
|
|
230
|
+
|
|
231
|
+
### Technical Structural Load $D_{\mathrm{tech}}(t)$
|
|
232
|
+
|
|
233
|
+
This branch detects structural technical content such as:
|
|
234
|
+
|
|
235
|
+
- function definitions
|
|
236
|
+
- data structures
|
|
237
|
+
- dependencies
|
|
238
|
+
- tests
|
|
239
|
+
- documentation comments
|
|
240
|
+
|
|
241
|
+
It is the technical analogue to $D_{nl}$, not a replacement for it.
|
|
242
|
+
|
|
243
|
+
## 6. Calibration
|
|
244
|
+
|
|
245
|
+
Stored metadata includes:
|
|
246
|
+
|
|
247
|
+
- `gating_score`
|
|
248
|
+
- `gating_t`
|
|
249
|
+
- `gating_h`
|
|
250
|
+
- `gating_r`
|
|
251
|
+
- `gating_d`
|
|
252
|
+
- `gating_p`
|
|
253
|
+
- `gating_a`
|
|
254
|
+
- `gating_dtech`
|
|
255
|
+
- `gating_gconv`
|
|
256
|
+
- `gating_gtech`
|
|
257
|
+
|
|
258
|
+
The first calibration pass should inspect the empirical score distribution after real traffic arrives.
|
|
259
|
+
|
|
260
|
+
What to look for:
|
|
261
|
+
|
|
262
|
+
- bimodality in `gating_score`
|
|
263
|
+
- sensible spread in `gating_t`
|
|
264
|
+
- non-degenerate contributions from both `gconv` and `gtech`
|
|
265
|
+
|
|
266
|
+
For threshold tuning, isotonic regression is the correct calibration method once usefulness labels exist:
|
|
267
|
+
|
|
268
|
+
$$
|
|
269
|
+
P(\mathrm{useful} \mid G) = \mathrm{IsotonicRegression}(G, y)
|
|
270
|
+
$$
|
|
271
|
+
|
|
272
|
+
It preserves the monotonic design of the gate without assuming a sigmoid link function.
|
|
273
|
+
|
|
274
|
+
Current thresholds implemented in code:
|
|
275
|
+
|
|
276
|
+
- durable promotion threshold:
|
|
277
|
+
[`DefaultGatingConfig().Threshold = 0.35`](../sidecar/compact/gate.go)
|
|
278
|
+
- abstractive compaction routing threshold:
|
|
279
|
+
[`AbstractiveRoutingThreshold = 0.60`](../sidecar/compact/summarize.go)
|
|
280
|
+
|
|
281
|
+
## 7. Invariants
|
|
282
|
+
|
|
283
|
+
The gate has six mathematical invariants in `gate_test.go`.
|
|
284
|
+
|
|
285
|
+
### 1. Empty memory implies full novelty
|
|
286
|
+
|
|
287
|
+
$$
|
|
288
|
+
\mathrm{memHits} = \emptyset \Rightarrow H = 1.0
|
|
289
|
+
$$
|
|
290
|
+
|
|
291
|
+
This prevents a cold start from suppressing every first durable insertion.
|
|
292
|
+
|
|
293
|
+
### 2. Saturation vetoes repetition
|
|
294
|
+
|
|
295
|
+
$$
|
|
296
|
+
\mathrm{MemSaturation} = 1 \Rightarrow R = 0
|
|
297
|
+
$$
|
|
298
|
+
|
|
299
|
+
This is what makes the repetition term a true gate instead of an accumulation bonus.
|
|
300
|
+
|
|
301
|
+
### 3. The convex blend stays in bounds
|
|
302
|
+
|
|
303
|
+
$$
|
|
304
|
+
G \in [0,1]
|
|
305
|
+
$$
|
|
306
|
+
|
|
307
|
+
and:
|
|
308
|
+
|
|
309
|
+
$$
|
|
310
|
+
G \in [\min(G_{\mathrm{conv}}, G_{\mathrm{tech}}), \max(G_{\mathrm{conv}}, G_{\mathrm{tech}})]
|
|
311
|
+
$$
|
|
312
|
+
|
|
313
|
+
### 4. Purely conversational turns collapse to the conversational branch
|
|
314
|
+
|
|
315
|
+
$$
|
|
316
|
+
T = 0 \Rightarrow G = G_{\mathrm{conv}}
|
|
317
|
+
$$
|
|
318
|
+
|
|
319
|
+
### 5. Purely technical turns collapse to the technical branch
|
|
320
|
+
|
|
321
|
+
$$
|
|
322
|
+
T = 1 \Rightarrow G = G_{\mathrm{tech}}
|
|
323
|
+
$$
|
|
324
|
+
|
|
325
|
+
### 6. Conversational structure should not overfire on pure code
|
|
326
|
+
|
|
327
|
+
This guards against a common failure mode where technical identifiers masquerade as conversational entities.
|
|
328
|
+
|
|
329
|
+
Together these invariants make the scalar interpretable, stable, and safe to tune later from real traffic rather than intuition.
|
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
# Implementation Notes and Interfaces
|
|
2
|
+
|
|
3
|
+
This document explains the implemented contracts that are easy to miss when
|
|
4
|
+
reading the code piecemeal.
|
|
5
|
+
|
|
6
|
+
## Memory Kind Plus Explicit Context Engine Registration
|
|
7
|
+
|
|
8
|
+
The plugin declares `kind: "memory"` in
|
|
9
|
+
[`openclaw.plugin.json`](../openclaw.plugin.json), but still registers both a
|
|
10
|
+
context engine and a memory prompt section in [`src/index.ts`](../src/index.ts).
|
|
11
|
+
|
|
12
|
+
Why:
|
|
13
|
+
|
|
14
|
+
- the exclusive slot takeover happens through the `memory` kind
|
|
15
|
+
- the runtime behavior still needs explicit lifecycle hooks for:
|
|
16
|
+
- `bootstrap`
|
|
17
|
+
- `ingest`
|
|
18
|
+
- `assemble`
|
|
19
|
+
- `compact`
|
|
20
|
+
- the lightweight memory prompt section remains useful as a separate early
|
|
21
|
+
durable-recall pass
|
|
22
|
+
|
|
23
|
+
This is why the code registers both `registerContextEngine("libravdb-memory", …)`
|
|
24
|
+
and `registerMemoryPromptSection(...)` instead of relying on only one hook.
|
|
25
|
+
|
|
26
|
+
## Why Ingest Is Fire-and-Forget
|
|
27
|
+
|
|
28
|
+
Implemented in [`src/context-engine.ts`](../src/context-engine.ts).
|
|
29
|
+
|
|
30
|
+
Session insertion is intentionally fire-and-forget:
|
|
31
|
+
|
|
32
|
+
- the active conversation should not block on persistence
|
|
33
|
+
- session memory is useful immediately, but not allowed to become a hard
|
|
34
|
+
dependency for response generation
|
|
35
|
+
|
|
36
|
+
The current code writes to `session:<sessionId>` asynchronously and only then
|
|
37
|
+
attempts the more expensive durable-promotion path for user turns.
|
|
38
|
+
|
|
39
|
+
## Why Gating Uses Exactly Two Searches
|
|
40
|
+
|
|
41
|
+
Implemented in [`sidecar/server/rpc.go`](../sidecar/server/rpc.go).
|
|
42
|
+
|
|
43
|
+
`gating_scalar` performs exactly:
|
|
44
|
+
|
|
45
|
+
1. one search against `turns:<userId>`
|
|
46
|
+
2. one search against `user:<userId>`
|
|
47
|
+
|
|
48
|
+
Novelty and durable-memory saturation reuse the same `user:` hit set. This keeps
|
|
49
|
+
the RPC bounded and predictable. There is no third store query for novelty.
|
|
50
|
+
|
|
51
|
+
## Why the Token Estimator Uses Bytes/4 in the Gate
|
|
52
|
+
|
|
53
|
+
Implemented in [`sidecar/compact/tokens.go`](../sidecar/compact/tokens.go).
|
|
54
|
+
|
|
55
|
+
The gate's specificity term uses:
|
|
56
|
+
|
|
57
|
+
$$
|
|
58
|
+
\mathrm{EstimateTokens}(t)=\max(\lfloor \mathrm{len}(t)/4 \rfloor, 1)
|
|
59
|
+
$$
|
|
60
|
+
|
|
61
|
+
Why not word count:
|
|
62
|
+
|
|
63
|
+
- word count behaves badly on code
|
|
64
|
+
- file paths, stack traces, and identifiers are token-dense but word-sparse
|
|
65
|
+
- bytes/4 is cheap and stable across prose, code, and mixed technical content
|
|
66
|
+
|
|
67
|
+
Important boundary:
|
|
68
|
+
|
|
69
|
+
- this is the gating estimator
|
|
70
|
+
- prompt-budget fitting uses a separate host-side chars-per-token heuristic in
|
|
71
|
+
[`src/tokens.ts`](../src/tokens.ts)
|
|
72
|
+
|
|
73
|
+
## Why the Sidecar Emits Its Endpoint on stdout
|
|
74
|
+
|
|
75
|
+
Implemented in [`sidecar/main.go`](../sidecar/main.go) and
|
|
76
|
+
[`src/sidecar.ts`](../src/sidecar.ts).
|
|
77
|
+
|
|
78
|
+
The sidecar prints its runtime endpoint to stdout on startup instead of binding
|
|
79
|
+
to a fixed path known in advance.
|
|
80
|
+
|
|
81
|
+
Why:
|
|
82
|
+
|
|
83
|
+
- fixed Unix socket paths create collision risk across concurrent runs
|
|
84
|
+
- temporary per-process endpoints avoid stale socket cleanup problems
|
|
85
|
+
- Windows already requires a dynamic TCP fallback
|
|
86
|
+
|
|
87
|
+
The host watches stdout, captures the endpoint, and then establishes the
|
|
88
|
+
JSON-RPC transport.
|
|
89
|
+
|
|
90
|
+
## Why Degraded Mode Continues the Session
|
|
91
|
+
|
|
92
|
+
Implemented in [`src/sidecar.ts`](../src/sidecar.ts) and
|
|
93
|
+
[`src/context-engine.ts`](../src/context-engine.ts).
|
|
94
|
+
|
|
95
|
+
If the sidecar fails repeatedly, the plugin enters degraded mode instead of
|
|
96
|
+
failing the chat session.
|
|
97
|
+
|
|
98
|
+
Why:
|
|
99
|
+
|
|
100
|
+
- memory augmentation is valuable, but it is not allowed to become a hard
|
|
101
|
+
dependency for the core conversation path
|
|
102
|
+
- the safe fallback is "continue without memory augmentation" rather than
|
|
103
|
+
"reject the entire turn"
|
|
104
|
+
|
|
105
|
+
This is deliberate fault containment.
|
|
106
|
+
|
|
107
|
+
## `ownsCompaction: true`
|
|
108
|
+
|
|
109
|
+
Implemented in [`src/context-engine.ts`](../src/context-engine.ts).
|
|
110
|
+
|
|
111
|
+
The context engine factory returns `ownsCompaction: true`.
|
|
112
|
+
|
|
113
|
+
This tells the host that compaction belongs to the memory engine lifecycle
|
|
114
|
+
itself. In this plugin, compaction is not an optional helper or an external
|
|
115
|
+
maintenance job; it is part of the actual memory system contract.
|
|
116
|
+
|
|
117
|
+
## Interface: `GatingConfig`
|
|
118
|
+
|
|
119
|
+
Defined in [`sidecar/compact/gate.go`](../sidecar/compact/gate.go).
|
|
120
|
+
|
|
121
|
+
```go
|
|
122
|
+
type GatingConfig struct {
|
|
123
|
+
W1c float64
|
|
124
|
+
W2c float64
|
|
125
|
+
W3c float64
|
|
126
|
+
W1t float64
|
|
127
|
+
W2t float64
|
|
128
|
+
W3t float64
|
|
129
|
+
TechNorm float64
|
|
130
|
+
Threshold float64
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Field meanings:
|
|
135
|
+
|
|
136
|
+
- `W1c`, `W2c`, `W3c`: conversational-branch weights for novelty `H`,
|
|
137
|
+
repetition gate `R`, and conversational structure `D`
|
|
138
|
+
- `W1t`, `W2t`, `W3t`: technical-branch weights for specificity `P`,
|
|
139
|
+
actionability `A`, and technical structure `Dtech`
|
|
140
|
+
- `TechNorm`: normalization constant for technical-density saturation
|
|
141
|
+
- `Threshold`: durable-promotion cutoff used by the host
|
|
142
|
+
|
|
143
|
+
Contract:
|
|
144
|
+
|
|
145
|
+
- all weights are intended to be in `[0,1]`
|
|
146
|
+
- each branch should sum to `1.0` by convention
|
|
147
|
+
- `TechNorm <= 0` is normalized back to the default inside `computeT`
|
|
148
|
+
- zero values are not generally meaningful outside tests; callers should use
|
|
149
|
+
`DefaultGatingConfig()` or config-derived values from [`sidecar/main.go`](../sidecar/main.go)
|
|
150
|
+
|
|
151
|
+
## Interface: `GatingSignals`
|
|
152
|
+
|
|
153
|
+
Defined in [`sidecar/compact/gate.go`](../sidecar/compact/gate.go).
|
|
154
|
+
|
|
155
|
+
```go
|
|
156
|
+
type GatingSignals struct {
|
|
157
|
+
G float64
|
|
158
|
+
T float64
|
|
159
|
+
H float64
|
|
160
|
+
R float64
|
|
161
|
+
D float64
|
|
162
|
+
InputFreq float64
|
|
163
|
+
MemSaturation float64
|
|
164
|
+
P float64
|
|
165
|
+
A float64
|
|
166
|
+
Dtech float64
|
|
167
|
+
Gconv float64
|
|
168
|
+
Gtech float64
|
|
169
|
+
}
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Field meanings:
|
|
173
|
+
|
|
174
|
+
- `G`: final gate score in `[0,1]`
|
|
175
|
+
- `T`: technical-density branch weight in `[0,1]`
|
|
176
|
+
- `H`: novelty score in `[0,1]`
|
|
177
|
+
- `R`: repetition product gate in `[0,1]`
|
|
178
|
+
- `D`: conversational structural load in `[0,1]`
|
|
179
|
+
- `InputFreq`: normalized repeated-mention signal in `[0,1]`
|
|
180
|
+
- `MemSaturation`: normalized durable-memory saturation signal in `[0,1]`
|
|
181
|
+
- `P`: technical specificity score in `[0,1]`
|
|
182
|
+
- `A`: technical actionability score in `[0,1]`
|
|
183
|
+
- `Dtech`: technical structural load in `[0,1]`
|
|
184
|
+
- `Gconv`: weighted conversational branch score
|
|
185
|
+
- `Gtech`: weighted technical branch score
|
|
186
|
+
|
|
187
|
+
Zero-value behavior:
|
|
188
|
+
|
|
189
|
+
- an all-zero `GatingSignals` struct does not mean "valid low-confidence turn";
|
|
190
|
+
it usually means "not computed yet"
|
|
191
|
+
- missing metadata readers should treat absent values as `0.0` and not panic
|
|
192
|
+
|
|
193
|
+
Inputs vs outputs:
|
|
194
|
+
|
|
195
|
+
- `GatingConfig` is input
|
|
196
|
+
- `turnHits`, `memHits`, and `text` are inputs to `ComputeGating`
|
|
197
|
+
- `GatingSignals` is output and is intended to be persisted in metadata
|
|
198
|
+
|
|
199
|
+
## Interface: JSON-RPC Surface
|
|
200
|
+
|
|
201
|
+
Implemented in [`sidecar/server/rpc.go`](../sidecar/server/rpc.go).
|
|
202
|
+
|
|
203
|
+
Method names are snake_case in the actual protocol.
|
|
204
|
+
|
|
205
|
+
### `health`
|
|
206
|
+
|
|
207
|
+
- request: `{}`
|
|
208
|
+
- response: `{ ok: boolean, message: string }`
|
|
209
|
+
- errors: none expected unless transport fails
|
|
210
|
+
|
|
211
|
+
### `status`
|
|
212
|
+
|
|
213
|
+
- request: `{}`
|
|
214
|
+
- response:
|
|
215
|
+
`{ ok, message, turnCount, memoryCount, gatingThreshold, abstractiveReady, embeddingProfile }`
|
|
216
|
+
- errors: none expected unless transport fails
|
|
217
|
+
|
|
218
|
+
### `ensure_collections`
|
|
219
|
+
|
|
220
|
+
- request: `{ collections: string[] }`
|
|
221
|
+
- response: `{ ok: true }`
|
|
222
|
+
- errors:
|
|
223
|
+
- collection creation failure in the Go store
|
|
224
|
+
|
|
225
|
+
### `insert_text`
|
|
226
|
+
|
|
227
|
+
- request:
|
|
228
|
+
`{ collection: string, id: string, text: string, metadata: object }`
|
|
229
|
+
- response: `{ ok: true }`
|
|
230
|
+
- errors:
|
|
231
|
+
- embedding failure
|
|
232
|
+
- record validation failure
|
|
233
|
+
- store insertion failure
|
|
234
|
+
|
|
235
|
+
### `gating_scalar`
|
|
236
|
+
|
|
237
|
+
- request: `{ userId: string, text: string }`
|
|
238
|
+
- response: full `GatingSignals` JSON payload
|
|
239
|
+
- errors:
|
|
240
|
+
- search failure on `turns:<userId>` or `user:<userId>`
|
|
241
|
+
|
|
242
|
+
### `search_text`
|
|
243
|
+
|
|
244
|
+
- request:
|
|
245
|
+
`{ collection: string, text: string, k: number, excludeIds?: string[] }`
|
|
246
|
+
- response: `{ results: SearchResult[] }`
|
|
247
|
+
- errors:
|
|
248
|
+
- query embedding failure
|
|
249
|
+
- store search failure
|
|
250
|
+
|
|
251
|
+
### `list_by_meta`
|
|
252
|
+
|
|
253
|
+
- request: `{ collection: string, key: string, value: string }`
|
|
254
|
+
- response: `{ results: SearchResult[] }`
|
|
255
|
+
- errors:
|
|
256
|
+
- store listing failure
|
|
257
|
+
|
|
258
|
+
### `export_memory`
|
|
259
|
+
|
|
260
|
+
- request: `{ userId?: string }`
|
|
261
|
+
- response:
|
|
262
|
+
`{ records: Array<{ collection, id, text, metadata }> }`
|
|
263
|
+
- errors:
|
|
264
|
+
- collection listing failure
|
|
265
|
+
|
|
266
|
+
### `flush_namespace`
|
|
267
|
+
|
|
268
|
+
- request: `{ userId: string }`
|
|
269
|
+
- response: `{ ok: true }`
|
|
270
|
+
- errors:
|
|
271
|
+
- missing `userId`
|
|
272
|
+
- delete-by-prefix failure
|
|
273
|
+
|
|
274
|
+
### `delete`
|
|
275
|
+
|
|
276
|
+
- request: `{ collection: string, id: string }`
|
|
277
|
+
- response: `{ ok: true }`
|
|
278
|
+
- errors:
|
|
279
|
+
- delete failure
|
|
280
|
+
|
|
281
|
+
### `delete_batch`
|
|
282
|
+
|
|
283
|
+
- request: `{ collection: string, ids: string[] }`
|
|
284
|
+
- response: `{ ok: true }`
|
|
285
|
+
- errors:
|
|
286
|
+
- batch delete failure
|
|
287
|
+
|
|
288
|
+
### `compact_session`
|
|
289
|
+
|
|
290
|
+
- request: `{ sessionId: string, force: boolean, targetSize?: number }`
|
|
291
|
+
- response:
|
|
292
|
+
`{ didCompact, clustersFormed, turnsRemoved, summaryMethod, meanConfidence }`
|
|
293
|
+
- errors:
|
|
294
|
+
- missing session id
|
|
295
|
+
- extractive summarizer unavailable
|
|
296
|
+
- summary insertion failure
|
|
297
|
+
- summarization failure
|
|
298
|
+
|
|
299
|
+
### `flush`
|
|
300
|
+
|
|
301
|
+
- request: `{}`
|
|
302
|
+
- response: `{ ok: true }`
|
|
303
|
+
- errors:
|
|
304
|
+
- store flush failure
|
|
305
|
+
|
|
306
|
+
## Interface: Context Engine Lifecycle
|
|
307
|
+
|
|
308
|
+
Implemented in [`src/context-engine.ts`](../src/context-engine.ts).
|
|
309
|
+
|
|
310
|
+
The factory returns an object with this effective shape:
|
|
311
|
+
|
|
312
|
+
```ts
|
|
313
|
+
{
|
|
314
|
+
ownsCompaction: true,
|
|
315
|
+
bootstrap(args: ContextBootstrapArgs): Promise<{ ok: true }>,
|
|
316
|
+
ingest(args: ContextIngestArgs): Promise<{ ingested: boolean }>,
|
|
317
|
+
assemble(args: ContextAssembleArgs): Promise<{
|
|
318
|
+
messages: MemoryMessage[],
|
|
319
|
+
estimatedTokens: number,
|
|
320
|
+
systemPromptAddition: string,
|
|
321
|
+
}>,
|
|
322
|
+
compact(args: ContextCompactArgs): Promise<{ ok: true, compacted: boolean }>,
|
|
323
|
+
}
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### `bootstrap`
|
|
327
|
+
|
|
328
|
+
Input:
|
|
329
|
+
|
|
330
|
+
- `sessionId`
|
|
331
|
+
- `userId`
|
|
332
|
+
|
|
333
|
+
Behavior:
|
|
334
|
+
|
|
335
|
+
- ensures `session:`, `turns:`, `user:`, and `global` collections exist
|
|
336
|
+
|
|
337
|
+
### `ingest`
|
|
338
|
+
|
|
339
|
+
Input:
|
|
340
|
+
|
|
341
|
+
- `sessionId`
|
|
342
|
+
- `userId`
|
|
343
|
+
- `message`
|
|
344
|
+
- `isHeartbeat?`
|
|
345
|
+
|
|
346
|
+
Behavior:
|
|
347
|
+
|
|
348
|
+
- must not block the session on best-effort persistence
|
|
349
|
+
- writes all non-heartbeat messages to session memory
|
|
350
|
+
- only user turns go through the durable gating path
|
|
351
|
+
|
|
352
|
+
### `assemble`
|
|
353
|
+
|
|
354
|
+
Input:
|
|
355
|
+
|
|
356
|
+
- `sessionId`
|
|
357
|
+
- `userId`
|
|
358
|
+
- `messages`
|
|
359
|
+
- `tokenBudget`
|
|
360
|
+
|
|
361
|
+
Behavior:
|
|
362
|
+
|
|
363
|
+
- must not mutate the incoming `messages` array in place
|
|
364
|
+
- searches three scopes in parallel
|
|
365
|
+
- hybrid-ranks and budget-fits the result set
|
|
366
|
+
- prepends selected memories as synthetic system messages
|
|
367
|
+
- falls back cleanly to the original message list on failure
|
|
368
|
+
|
|
369
|
+
### `compact`
|
|
370
|
+
|
|
371
|
+
Input:
|
|
372
|
+
|
|
373
|
+
- `sessionId`
|
|
374
|
+
- `force?`
|
|
375
|
+
- `targetSize?`
|
|
376
|
+
|
|
377
|
+
Behavior:
|
|
378
|
+
|
|
379
|
+
- delegates to the sidecar `compact_session` RPC
|
|
380
|
+
- returns `{ ok: true, compacted }`
|
|
381
|
+
- treats compaction failure as non-fatal to the active session
|