@xdarkicex/openclaw-memory-libravdb 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -0
- package/docs/README.md +14 -0
- package/docs/architecture-decisions/README.md +6 -0
- package/docs/architecture-decisions/adr-001-onnx-over-ollama.md +21 -0
- package/docs/architecture-decisions/adr-002-libravdb-over-lancedb.md +19 -0
- package/docs/architecture-decisions/adr-003-convex-gating-over-threshold.md +27 -0
- package/docs/architecture-decisions/adr-004-sidecar-over-native-ts.md +21 -0
- package/docs/architecture.md +188 -0
- package/docs/contributing.md +76 -0
- package/docs/dependencies.md +38 -0
- package/docs/embedding-profiles.md +42 -0
- package/docs/gating.md +329 -0
- package/docs/implementation.md +381 -0
- package/docs/installation.md +272 -0
- package/docs/mathematics.md +695 -0
- package/docs/models.md +63 -0
- package/docs/problem.md +64 -0
- package/docs/security.md +86 -0
- package/openclaw.plugin.json +84 -0
- package/package.json +41 -0
- package/scripts/build-sidecar.sh +30 -0
- package/scripts/postinstall.js +169 -0
- package/scripts/setup.sh +20 -0
- package/scripts/setup.ts +505 -0
- package/scripts/sidecar-release.d.ts +4 -0
- package/scripts/sidecar-release.js +17 -0
- package/sidecar/cmd/inspect_onnx/main.go +105 -0
- package/sidecar/compact/gate.go +273 -0
- package/sidecar/compact/gate_test.go +85 -0
- package/sidecar/compact/summarize.go +345 -0
- package/sidecar/compact/summarize_test.go +319 -0
- package/sidecar/compact/tokens.go +11 -0
- package/sidecar/config/config.go +119 -0
- package/sidecar/config/config_test.go +75 -0
- package/sidecar/embed/engine.go +696 -0
- package/sidecar/embed/engine_test.go +349 -0
- package/sidecar/embed/matryoshka.go +93 -0
- package/sidecar/embed/matryoshka_test.go +150 -0
- package/sidecar/embed/onnx_local.go +319 -0
- package/sidecar/embed/onnx_local_test.go +159 -0
- package/sidecar/embed/profile_contract_test.go +71 -0
- package/sidecar/embed/profile_eval_test.go +923 -0
- package/sidecar/embed/profiles.go +39 -0
- package/sidecar/go.mod +21 -0
- package/sidecar/go.sum +30 -0
- package/sidecar/health/check.go +33 -0
- package/sidecar/health/check_test.go +55 -0
- package/sidecar/main.go +151 -0
- package/sidecar/model/encoder.go +222 -0
- package/sidecar/model/registry.go +262 -0
- package/sidecar/model/registry_test.go +102 -0
- package/sidecar/model/seq2seq.go +133 -0
- package/sidecar/server/rpc.go +343 -0
- package/sidecar/server/rpc_test.go +350 -0
- package/sidecar/server/transport.go +160 -0
- package/sidecar/store/libravdb.go +676 -0
- package/sidecar/store/libravdb_test.go +472 -0
- package/sidecar/summarize/engine.go +360 -0
- package/sidecar/summarize/engine_test.go +148 -0
- package/sidecar/summarize/onnx_local.go +494 -0
- package/sidecar/summarize/onnx_local_test.go +48 -0
- package/sidecar/summarize/profiles.go +52 -0
- package/sidecar/summarize/tokenizer.go +13 -0
- package/sidecar/summarize/tokenizer_hf.go +76 -0
- package/sidecar/summarize/util.go +13 -0
- package/src/cli.ts +205 -0
- package/src/context-engine.ts +195 -0
- package/src/index.ts +27 -0
- package/src/memory-provider.ts +24 -0
- package/src/openclaw-plugin-sdk.d.ts +53 -0
- package/src/plugin-runtime.ts +67 -0
- package/src/recall-cache.ts +34 -0
- package/src/recall-utils.ts +22 -0
- package/src/rpc.ts +84 -0
- package/src/scoring.ts +58 -0
- package/src/sidecar.ts +506 -0
- package/src/tokens.ts +36 -0
- package/src/types.ts +146 -0
- package/tsconfig.json +20 -0
- package/tsconfig.tests.json +12 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
package compact
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"math"
|
|
5
|
+
"regexp"
|
|
6
|
+
"strings"
|
|
7
|
+
|
|
8
|
+
"github.com/xDarkicex/openclaw-memory-libravdb/sidecar/store"
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
const (
|
|
12
|
+
inputFrequencyThreshold = 0.80
|
|
13
|
+
inputFrequencyCap = 5.0
|
|
14
|
+
memSaturationThreshold = 0.85
|
|
15
|
+
memSaturationCap = 3.0
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
// GatingConfig holds all weights and normalization constants.
|
|
19
|
+
// All weights within each branch must sum to 1.0 by convention.
|
|
20
|
+
type GatingConfig struct {
|
|
21
|
+
W1c float64
|
|
22
|
+
W2c float64
|
|
23
|
+
W3c float64
|
|
24
|
+
|
|
25
|
+
W1t float64
|
|
26
|
+
W2t float64
|
|
27
|
+
W3t float64
|
|
28
|
+
|
|
29
|
+
TechNorm float64
|
|
30
|
+
Threshold float64
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
func DefaultGatingConfig() GatingConfig {
|
|
34
|
+
return GatingConfig{
|
|
35
|
+
W1c: 0.35, W2c: 0.40, W3c: 0.25,
|
|
36
|
+
W1t: 0.40, W2t: 0.35, W3t: 0.25,
|
|
37
|
+
TechNorm: 1.5,
|
|
38
|
+
Threshold: 0.35,
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// GatingSignals holds all decomposed values for observability.
|
|
43
|
+
// Every field is stored in record metadata.
|
|
44
|
+
type GatingSignals struct {
|
|
45
|
+
G float64 `json:"g"`
|
|
46
|
+
T float64 `json:"t"`
|
|
47
|
+
|
|
48
|
+
H float64 `json:"h"`
|
|
49
|
+
R float64 `json:"r"`
|
|
50
|
+
D float64 `json:"d"`
|
|
51
|
+
|
|
52
|
+
InputFreq float64 `json:"inputFreq"`
|
|
53
|
+
MemSaturation float64 `json:"memSaturation"`
|
|
54
|
+
|
|
55
|
+
P float64 `json:"p"`
|
|
56
|
+
A float64 `json:"a"`
|
|
57
|
+
Dtech float64 `json:"dtech"`
|
|
58
|
+
|
|
59
|
+
Gconv float64 `json:"gconv"`
|
|
60
|
+
Gtech float64 `json:"gtech"`
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
var (
|
|
64
|
+
datePattern = regexp.MustCompile(`\b(?:\d{4}-\d{2}-\d{2}|\d{1,2}/\d{1,2}/\d{2,4})\b`)
|
|
65
|
+
quantityPattern = regexp.MustCompile(`\b\d+(?:\.\d+)?\b`)
|
|
66
|
+
humanNamePattern = regexp.MustCompile(`(?i)\b(?:mr|mrs|ms|dr|prof)\.?\s+[A-Z][a-z]+\b|\b(?:my (?:manager|teammate|friend|colleague))\s+[A-Z][a-z]+\b`)
|
|
67
|
+
preferencePattern = regexp.MustCompile(`(?i)\b(?:i prefer|i like|i love|my favorite|i enjoy)\b`)
|
|
68
|
+
factAssertionPattern = regexp.MustCompile(`(?i)\b(?:i work at|my [a-z]+ is|i live in|i am|i have)\b`)
|
|
69
|
+
|
|
70
|
+
codeFencePattern = regexp.MustCompile("(?s)```.+?```")
|
|
71
|
+
filePathPattern = regexp.MustCompile(`(?:\./|\.\./|/)?[\w.-]+(?:/[\w.-]+)+(?:\.[A-Za-z0-9]+)?`)
|
|
72
|
+
functionDefPattern = regexp.MustCompile(`(?m)\b(?:func|def|class)\b|\b[A-Za-z_][A-Za-z0-9_]*\s*\(`)
|
|
73
|
+
shellCommandPattern = regexp.MustCompile(`(?m)(?:^\s*\$|\b(?:git|npm|pnpm|yarn|go|docker|make|kubectl)\b)`)
|
|
74
|
+
urlOrEndpointPattern = regexp.MustCompile(`https?://\S+|/[A-Za-z0-9._~/-]+`)
|
|
75
|
+
stackTracePattern = regexp.MustCompile(`(?i)\b(?:panic:|goroutine \d+|stack trace|traceback|exception:|at [^:\s]+:\d+)\b`)
|
|
76
|
+
hexOrHashPattern = regexp.MustCompile(`\b(?:[a-f0-9]{7,40}|0x[a-fA-F0-9]+)\b`)
|
|
77
|
+
functionRefPattern = regexp.MustCompile(`\b[A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)?\s*\(`)
|
|
78
|
+
errorCodePattern = regexp.MustCompile(`(?i)\b(?:ERR_[A-Z0-9_]+|E[A-Z0-9_]{2,}|HTTP\s+[45]\d{2}|[45]\d{2})\b`)
|
|
79
|
+
gitRefPattern = regexp.MustCompile(`\b(?:[a-f0-9]{7,40}|HEAD|main|master|origin/[A-Za-z0-9._/-]+)\b`)
|
|
80
|
+
archDecisionPattern = regexp.MustCompile(`(?i)\b(?:switched to|use .+ instead|migrated to|replaced with|decided to|standardize on)\b`)
|
|
81
|
+
resolutionPattern = regexp.MustCompile(`(?i)\b(?:fixed|resolved|patched|corrected|unblocked)\b`)
|
|
82
|
+
milestonePattern = regexp.MustCompile(`(?i)\b(?:deployed|merged|released|shipped|rolled out)\b`)
|
|
83
|
+
configChangePattern = regexp.MustCompile(`(?i)\b(?:set [A-Za-z0-9_]+ to|configured|configuration changed|env(?:ironment)? var)\b`)
|
|
84
|
+
dataStructurePattern = regexp.MustCompile(`(?i)\b(?:struct|interface|type|enum|map\[)\b`)
|
|
85
|
+
dependencyPattern = regexp.MustCompile(`(?i)\b(?:import|require|go\.mod|package\.json|dependency|depends on)\b`)
|
|
86
|
+
testCasePattern = regexp.MustCompile(`(?i)\b(?:func Test\w+|it\(|describe\(|test\()\b`)
|
|
87
|
+
docCommentPattern = regexp.MustCompile(`(?m)(?:^\s*//|/\*\*|///)`)
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
// ComputeGating is a pure function.
|
|
91
|
+
// turnHits: SearchText(turns:userId, text, 10)
|
|
92
|
+
// memHits: SearchText(user:userId, text, 5)
|
|
93
|
+
func ComputeGating(turnHits, memHits []store.SearchResult, text string, cfg GatingConfig) GatingSignals {
|
|
94
|
+
techT := computeT(text, cfg.TechNorm)
|
|
95
|
+
|
|
96
|
+
h := computeH(memHits)
|
|
97
|
+
inputFreq := math.Min(float64(countAbove(turnHits, inputFrequencyThreshold))/inputFrequencyCap, 1.0)
|
|
98
|
+
memSaturation := math.Min(float64(countAbove(memHits, memSaturationThreshold))/memSaturationCap, 1.0)
|
|
99
|
+
r := inputFreq * (1.0 - memSaturation)
|
|
100
|
+
dnl := computeDNL(text)
|
|
101
|
+
|
|
102
|
+
p := computeP(text)
|
|
103
|
+
a := computeA(text)
|
|
104
|
+
dtech := computeDTech(text)
|
|
105
|
+
|
|
106
|
+
gConv := cfg.W1c*h + cfg.W2c*r + cfg.W3c*dnl
|
|
107
|
+
gTech := cfg.W1t*p + cfg.W2t*a + cfg.W3t*dtech
|
|
108
|
+
g := (1.0-techT)*gConv + techT*gTech
|
|
109
|
+
|
|
110
|
+
return GatingSignals{
|
|
111
|
+
G: g, T: techT,
|
|
112
|
+
H: h, R: r, D: dnl,
|
|
113
|
+
InputFreq: inputFreq,
|
|
114
|
+
MemSaturation: memSaturation,
|
|
115
|
+
P: p,
|
|
116
|
+
A: a,
|
|
117
|
+
Dtech: dtech,
|
|
118
|
+
Gconv: gConv,
|
|
119
|
+
Gtech: gTech,
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
func (s GatingSignals) Passes(cfg GatingConfig) bool {
|
|
124
|
+
return s.G >= cfg.Threshold
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
func countAbove(hits []store.SearchResult, threshold float64) int {
|
|
128
|
+
count := 0
|
|
129
|
+
for _, hit := range hits {
|
|
130
|
+
if hit.Score >= threshold {
|
|
131
|
+
count++
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return count
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
func computeH(hits []store.SearchResult) float64 {
|
|
138
|
+
if len(hits) == 0 {
|
|
139
|
+
return 1.0
|
|
140
|
+
}
|
|
141
|
+
var sum float64
|
|
142
|
+
for _, hit := range hits {
|
|
143
|
+
sum += hit.Score
|
|
144
|
+
}
|
|
145
|
+
return 1.0 - (sum / float64(len(hits)))
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
func computeT(text string, norm float64) float64 {
|
|
149
|
+
text = strings.TrimSpace(text)
|
|
150
|
+
if text == "" {
|
|
151
|
+
return 0.0
|
|
152
|
+
}
|
|
153
|
+
if norm <= 0 {
|
|
154
|
+
norm = 1.5
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
score := 0.0
|
|
158
|
+
if codeFencePattern.MatchString(text) {
|
|
159
|
+
score += 0.5
|
|
160
|
+
}
|
|
161
|
+
if filePathPattern.MatchString(text) {
|
|
162
|
+
score += 0.4
|
|
163
|
+
}
|
|
164
|
+
if functionDefPattern.MatchString(text) {
|
|
165
|
+
score += 0.4
|
|
166
|
+
}
|
|
167
|
+
if shellCommandPattern.MatchString(text) {
|
|
168
|
+
score += 0.4
|
|
169
|
+
}
|
|
170
|
+
if urlOrEndpointPattern.MatchString(text) {
|
|
171
|
+
score += 0.3
|
|
172
|
+
}
|
|
173
|
+
if stackTracePattern.MatchString(text) {
|
|
174
|
+
score += 0.5
|
|
175
|
+
}
|
|
176
|
+
if hexOrHashPattern.MatchString(text) {
|
|
177
|
+
score += 0.3
|
|
178
|
+
}
|
|
179
|
+
return math.Min(score/norm, 1.0)
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
func computeDNL(text string) float64 {
|
|
183
|
+
text = strings.TrimSpace(text)
|
|
184
|
+
if text == "" {
|
|
185
|
+
return 0.0
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
score := 0.0
|
|
189
|
+
if datePattern.MatchString(text) {
|
|
190
|
+
score += 0.3
|
|
191
|
+
}
|
|
192
|
+
if quantityPattern.MatchString(text) {
|
|
193
|
+
score += 0.2
|
|
194
|
+
}
|
|
195
|
+
if humanNamePattern.MatchString(text) {
|
|
196
|
+
score += 0.3
|
|
197
|
+
}
|
|
198
|
+
if preferencePattern.MatchString(text) {
|
|
199
|
+
score += 0.4
|
|
200
|
+
}
|
|
201
|
+
if factAssertionPattern.MatchString(text) {
|
|
202
|
+
score += 0.3
|
|
203
|
+
}
|
|
204
|
+
return math.Min(score, 1.0)
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
func computeP(text string) float64 {
|
|
208
|
+
text = strings.TrimSpace(text)
|
|
209
|
+
if text == "" {
|
|
210
|
+
return 0.0
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
norm := math.Max(float64(EstimateTokens(text))/100.0, 1.0)
|
|
214
|
+
score := 0.0
|
|
215
|
+
score += float64(countMatches(filePathPattern, text)) * 0.4
|
|
216
|
+
score += float64(countMatches(functionRefPattern, text)) * 0.3
|
|
217
|
+
score += float64(countMatches(errorCodePattern, text)) * 0.5
|
|
218
|
+
score += float64(countMatches(gitRefPattern, text)) * 0.4
|
|
219
|
+
score += float64(countMatches(urlOrEndpointPattern, text)) * 0.3
|
|
220
|
+
|
|
221
|
+
return math.Min(score/norm, 1.0)
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
func computeA(text string) float64 {
|
|
225
|
+
text = strings.TrimSpace(text)
|
|
226
|
+
if text == "" {
|
|
227
|
+
return 0.0
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
score := 0.0
|
|
231
|
+
if archDecisionPattern.MatchString(text) {
|
|
232
|
+
score += 0.5
|
|
233
|
+
}
|
|
234
|
+
if resolutionPattern.MatchString(text) {
|
|
235
|
+
score += 0.4
|
|
236
|
+
}
|
|
237
|
+
if milestonePattern.MatchString(text) {
|
|
238
|
+
score += 0.3
|
|
239
|
+
}
|
|
240
|
+
if configChangePattern.MatchString(text) {
|
|
241
|
+
score += 0.4
|
|
242
|
+
}
|
|
243
|
+
return math.Min(score, 1.0)
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
func computeDTech(text string) float64 {
|
|
247
|
+
text = strings.TrimSpace(text)
|
|
248
|
+
if text == "" {
|
|
249
|
+
return 0.0
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
score := 0.0
|
|
253
|
+
if functionDefPattern.MatchString(text) {
|
|
254
|
+
score += 0.4
|
|
255
|
+
}
|
|
256
|
+
if dataStructurePattern.MatchString(text) {
|
|
257
|
+
score += 0.3
|
|
258
|
+
}
|
|
259
|
+
if dependencyPattern.MatchString(text) {
|
|
260
|
+
score += 0.3
|
|
261
|
+
}
|
|
262
|
+
if testCasePattern.MatchString(text) {
|
|
263
|
+
score += 0.2
|
|
264
|
+
}
|
|
265
|
+
if docCommentPattern.MatchString(text) {
|
|
266
|
+
score += 0.2
|
|
267
|
+
}
|
|
268
|
+
return math.Min(score, 1.0)
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
func countMatches(pattern *regexp.Regexp, text string) int {
|
|
272
|
+
return len(pattern.FindAllString(text, -1))
|
|
273
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
package compact
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"math"
|
|
5
|
+
"testing"
|
|
6
|
+
|
|
7
|
+
"github.com/xDarkicex/openclaw-memory-libravdb/sidecar/store"
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
func TestHEmptyMemory(t *testing.T) {
|
|
11
|
+
sig := ComputeGating(nil, nil, "any text", DefaultGatingConfig())
|
|
12
|
+
if sig.H != 1.0 {
|
|
13
|
+
t.Errorf("H = %.6f, want 1.0 for empty memory", sig.H)
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
func TestRSaturationVetoes(t *testing.T) {
|
|
18
|
+
turnHits := hitsWithScore(10, 0.95)
|
|
19
|
+
memHits := hitsWithScore(5, 0.95)
|
|
20
|
+
sig := ComputeGating(turnHits, memHits, "any text", DefaultGatingConfig())
|
|
21
|
+
if sig.R != 0.0 {
|
|
22
|
+
t.Errorf("R = %.6f, want 0.0 when memory saturated", sig.R)
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
func TestGConvexBound(t *testing.T) {
|
|
27
|
+
for _, text := range []string{"hello", "func main() {}", "```go\nfmt.Println()\n```"} {
|
|
28
|
+
sig := ComputeGating(nil, nil, text, DefaultGatingConfig())
|
|
29
|
+
if sig.G < 0.0 || sig.G > 1.0 {
|
|
30
|
+
t.Errorf("G = %.6f out of [0,1] for text %q", sig.G, text)
|
|
31
|
+
}
|
|
32
|
+
lo := math.Min(sig.Gconv, sig.Gtech)
|
|
33
|
+
hi := math.Max(sig.Gconv, sig.Gtech)
|
|
34
|
+
if sig.G < lo-1e-9 || sig.G > hi+1e-9 {
|
|
35
|
+
t.Errorf("G = %.6f not in [Gconv=%.6f, Gtech=%.6f]", sig.G, sig.Gconv, sig.Gtech)
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
func TestPurelyConversationalUsesConvFormula(t *testing.T) {
|
|
41
|
+
text := "I prefer dark mode and work best in the mornings"
|
|
42
|
+
sig := ComputeGating(nil, nil, text, DefaultGatingConfig())
|
|
43
|
+
if sig.T > 0.05 {
|
|
44
|
+
t.Skipf("text triggered T=%.2f, not purely conversational", sig.T)
|
|
45
|
+
}
|
|
46
|
+
if math.Abs(sig.G-sig.Gconv) > 1e-9 {
|
|
47
|
+
t.Errorf("G=%.9f ≠ Gconv=%.9f when T=0", sig.G, sig.Gconv)
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
func TestPurelyTechnicalUsesTechFormula(t *testing.T) {
|
|
52
|
+
text := "```go\nfunc (s *Store) InsertRecord(ctx context.Context) error {\n}\n```"
|
|
53
|
+
sig := ComputeGating(nil, nil, text, DefaultGatingConfig())
|
|
54
|
+
if sig.T < 0.95 {
|
|
55
|
+
t.Skipf("text triggered T=%.2f, not purely technical", sig.T)
|
|
56
|
+
}
|
|
57
|
+
if math.Abs(sig.G-sig.Gtech) > 1e-9 {
|
|
58
|
+
t.Errorf("G=%.9f ≠ Gtech=%.9f when T=1", sig.G, sig.Gtech)
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
func TestDNLDoesNotFireOnCode(t *testing.T) {
|
|
63
|
+
code := "func NewMatryoshkaVec(full []float32) (MatryoshkaVec, error) {"
|
|
64
|
+
sig := ComputeGating(nil, nil, code, DefaultGatingConfig())
|
|
65
|
+
if sig.D > 0.3 {
|
|
66
|
+
t.Errorf("D_nl = %.4f overfiring on code text", sig.D)
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
func TestEstimateTokensUsesStableByteHeuristic(t *testing.T) {
|
|
71
|
+
if got := EstimateTokens("abcd"); got != 1 {
|
|
72
|
+
t.Fatalf("EstimateTokens(\"abcd\") = %d, want 1", got)
|
|
73
|
+
}
|
|
74
|
+
if got := EstimateTokens("abcdefgh"); got != 2 {
|
|
75
|
+
t.Fatalf("EstimateTokens(\"abcdefgh\") = %d, want 2", got)
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
func hitsWithScore(n int, score float64) []store.SearchResult {
|
|
80
|
+
hits := make([]store.SearchResult, n)
|
|
81
|
+
for i := range hits {
|
|
82
|
+
hits[i] = store.SearchResult{ID: string(rune('a' + i)), Score: score}
|
|
83
|
+
}
|
|
84
|
+
return hits
|
|
85
|
+
}
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
package compact
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"context"
|
|
5
|
+
"crypto/sha256"
|
|
6
|
+
"encoding/hex"
|
|
7
|
+
"fmt"
|
|
8
|
+
"log"
|
|
9
|
+
"math"
|
|
10
|
+
"sort"
|
|
11
|
+
"strconv"
|
|
12
|
+
"strings"
|
|
13
|
+
"time"
|
|
14
|
+
|
|
15
|
+
"github.com/xDarkicex/openclaw-memory-libravdb/sidecar/store"
|
|
16
|
+
"github.com/xDarkicex/openclaw-memory-libravdb/sidecar/summarize"
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
const (
|
|
20
|
+
DefaultTargetSize = 20
|
|
21
|
+
DefaultMaxOutputTokens = 64
|
|
22
|
+
AbstractiveRoutingThreshold = 0.60
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
type Store interface {
|
|
26
|
+
ListByMeta(ctx context.Context, collection, key, value string) ([]store.SearchResult, error)
|
|
27
|
+
InsertText(ctx context.Context, collection, id, text string, meta map[string]any) error
|
|
28
|
+
DeleteBatch(ctx context.Context, collection string, ids []string) error
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
type Result struct {
|
|
32
|
+
DidCompact bool `json:"didCompact"`
|
|
33
|
+
ClustersFormed int `json:"clustersFormed"`
|
|
34
|
+
TurnsRemoved int `json:"turnsRemoved"`
|
|
35
|
+
SummaryMethod string `json:"summaryMethod,omitempty"`
|
|
36
|
+
MeanConfidence float64 `json:"meanConfidence"`
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
type cluster struct {
|
|
40
|
+
turns []turnRecord
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
type turnRecord struct {
|
|
44
|
+
id string
|
|
45
|
+
text string
|
|
46
|
+
metadata map[string]any
|
|
47
|
+
ts int64
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
func CompactSession(
|
|
51
|
+
ctx context.Context,
|
|
52
|
+
st Store,
|
|
53
|
+
extractive summarize.Summarizer,
|
|
54
|
+
abstractive summarize.Summarizer,
|
|
55
|
+
sessionID string,
|
|
56
|
+
force bool,
|
|
57
|
+
targetSize int,
|
|
58
|
+
) (Result, error) {
|
|
59
|
+
if strings.TrimSpace(sessionID) == "" {
|
|
60
|
+
return Result{}, fmt.Errorf("session ID is required")
|
|
61
|
+
}
|
|
62
|
+
if st == nil {
|
|
63
|
+
return Result{}, fmt.Errorf("store is required")
|
|
64
|
+
}
|
|
65
|
+
if extractive == nil {
|
|
66
|
+
return Result{}, fmt.Errorf("extractive summarizer is required")
|
|
67
|
+
}
|
|
68
|
+
if !extractive.Ready() {
|
|
69
|
+
return Result{}, fmt.Errorf("extractive summarizer not ready: %s", extractive.Reason())
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
targetSize = normalizedTargetSize(targetSize)
|
|
73
|
+
collection := "session:" + sessionID
|
|
74
|
+
results, err := st.ListByMeta(ctx, collection, "sessionId", sessionID)
|
|
75
|
+
if err != nil {
|
|
76
|
+
return Result{}, err
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
turns := eligibleTurns(results)
|
|
80
|
+
if len(turns) < 2 {
|
|
81
|
+
return Result{DidCompact: false}, nil
|
|
82
|
+
}
|
|
83
|
+
if !force && len(turns) < targetSize {
|
|
84
|
+
return Result{DidCompact: false}, nil
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
clusters := partitionChronological(turns, targetSize)
|
|
88
|
+
if len(clusters) == 0 {
|
|
89
|
+
return Result{DidCompact: false}, nil
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
now := time.Now().UnixMilli()
|
|
93
|
+
out := Result{
|
|
94
|
+
DidCompact: true,
|
|
95
|
+
ClustersFormed: len(clusters),
|
|
96
|
+
}
|
|
97
|
+
var totalConfidence float64
|
|
98
|
+
|
|
99
|
+
for idx, group := range clusters {
|
|
100
|
+
if len(group.turns) == 0 {
|
|
101
|
+
continue
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
summaryTurns := make([]summarize.Turn, 0, len(group.turns))
|
|
105
|
+
sourceIDs := make([]string, 0, len(group.turns))
|
|
106
|
+
for _, turn := range group.turns {
|
|
107
|
+
summaryTurns = append(summaryTurns, summarize.Turn{
|
|
108
|
+
ID: turn.id,
|
|
109
|
+
Text: turn.text,
|
|
110
|
+
})
|
|
111
|
+
sourceIDs = append(sourceIDs, turn.id)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
summarizer, meanGating := routeSummarizer(group.turns, extractive, abstractive)
|
|
115
|
+
log.Printf("compaction: cluster_id=%d mean_gating_score=%.3f summarizer_used=%s", idx, meanGating, summarizer.Mode())
|
|
116
|
+
|
|
117
|
+
summary, err := summarizer.Summarize(ctx, summaryTurns, summarize.SummaryOpts{
|
|
118
|
+
MinInputTurns: 1,
|
|
119
|
+
MaxOutputTokens: DefaultMaxOutputTokens,
|
|
120
|
+
})
|
|
121
|
+
if err != nil {
|
|
122
|
+
return Result{}, fmt.Errorf("cluster %d summarize failed: %w", idx, err)
|
|
123
|
+
}
|
|
124
|
+
if strings.TrimSpace(summary.Text) == "" {
|
|
125
|
+
return Result{}, fmt.Errorf("cluster %d summarize produced empty text", idx)
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
summary.SourceIDs = append([]string(nil), sourceIDs...)
|
|
129
|
+
|
|
130
|
+
metadata := summaryMetadata(sessionID, now, summary, group.turns)
|
|
131
|
+
summaryID := summaryRecordID(sessionID, summary.SourceIDs)
|
|
132
|
+
if err := st.InsertText(ctx, collection, summaryID, summary.Text, metadata); err != nil {
|
|
133
|
+
return Result{}, fmt.Errorf("summary insert failed, source turns preserved: %w", err)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if err := st.DeleteBatch(ctx, collection, summary.SourceIDs); err != nil {
|
|
137
|
+
log.Printf("compaction: summary %s inserted but source delete failed: %v", summaryID, err)
|
|
138
|
+
} else {
|
|
139
|
+
out.TurnsRemoved += len(summary.SourceIDs)
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
totalConfidence += summary.Confidence
|
|
143
|
+
out.SummaryMethod = mergeMethod(out.SummaryMethod, summary.Method)
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if out.ClustersFormed > 0 {
|
|
147
|
+
out.MeanConfidence = clamp01(totalConfidence / float64(out.ClustersFormed))
|
|
148
|
+
}
|
|
149
|
+
return out, nil
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
func routeSummarizer(turns []turnRecord, extractive summarize.Summarizer, abstractive summarize.Summarizer) (summarize.Summarizer, float64) {
|
|
153
|
+
meanGating := meanGatingScore(turns)
|
|
154
|
+
if abstractive == nil || !abstractive.Ready() {
|
|
155
|
+
return extractive, meanGating
|
|
156
|
+
}
|
|
157
|
+
if meanGating >= AbstractiveRoutingThreshold {
|
|
158
|
+
return abstractive, meanGating
|
|
159
|
+
}
|
|
160
|
+
return extractive, meanGating
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
func meanGatingScore(turns []turnRecord) float64 {
|
|
164
|
+
if len(turns) == 0 {
|
|
165
|
+
return 0.0
|
|
166
|
+
}
|
|
167
|
+
var sum float64
|
|
168
|
+
for _, turn := range turns {
|
|
169
|
+
sum += metaFloat(turn.metadata, "gating_score")
|
|
170
|
+
}
|
|
171
|
+
return clamp01(sum / float64(len(turns)))
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
func normalizedTargetSize(targetSize int) int {
|
|
175
|
+
if targetSize <= 0 {
|
|
176
|
+
return DefaultTargetSize
|
|
177
|
+
}
|
|
178
|
+
return targetSize
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
func eligibleTurns(results []store.SearchResult) []turnRecord {
|
|
182
|
+
turns := make([]turnRecord, 0, len(results))
|
|
183
|
+
for _, result := range results {
|
|
184
|
+
typed, ok := result.Metadata["type"].(string)
|
|
185
|
+
if ok && typed == "summary" {
|
|
186
|
+
continue
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
turns = append(turns, turnRecord{
|
|
190
|
+
id: result.ID,
|
|
191
|
+
text: result.Text,
|
|
192
|
+
metadata: cloneMeta(result.Metadata),
|
|
193
|
+
ts: metadataTimestamp(result.Metadata),
|
|
194
|
+
})
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
sort.Slice(turns, func(i, j int) bool {
|
|
198
|
+
if turns[i].ts == turns[j].ts {
|
|
199
|
+
return turns[i].id < turns[j].id
|
|
200
|
+
}
|
|
201
|
+
return turns[i].ts < turns[j].ts
|
|
202
|
+
})
|
|
203
|
+
return turns
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
func partitionChronological(turns []turnRecord, targetSize int) []cluster {
|
|
207
|
+
if len(turns) == 0 {
|
|
208
|
+
return nil
|
|
209
|
+
}
|
|
210
|
+
targetSize = normalizedTargetSize(targetSize)
|
|
211
|
+
clusterCount := int(math.Ceil(float64(len(turns)) / float64(targetSize)))
|
|
212
|
+
if clusterCount < 1 {
|
|
213
|
+
clusterCount = 1
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
clusters := make([]cluster, clusterCount)
|
|
217
|
+
for i, turn := range turns {
|
|
218
|
+
index := i * clusterCount / len(turns)
|
|
219
|
+
clusters[index].turns = append(clusters[index].turns, turn)
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
out := make([]cluster, 0, len(clusters))
|
|
223
|
+
for _, group := range clusters {
|
|
224
|
+
if len(group.turns) > 0 {
|
|
225
|
+
out = append(out, group)
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return out
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
func summaryMetadata(sessionID string, compactedAt int64, summary summarize.Summary, turns []turnRecord) map[string]any {
|
|
232
|
+
meta := map[string]any{
|
|
233
|
+
"type": "summary",
|
|
234
|
+
"ts": compactedAt,
|
|
235
|
+
"sessionId": sessionID,
|
|
236
|
+
"source_ids": append([]string(nil), summary.SourceIDs...),
|
|
237
|
+
"method": summary.Method,
|
|
238
|
+
"token_count": summary.TokenCount,
|
|
239
|
+
"confidence": clamp01(summary.Confidence),
|
|
240
|
+
"compacted_at": compactedAt,
|
|
241
|
+
"decay_rate": clamp01(1.0 - summary.Confidence),
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
for _, turn := range turns {
|
|
245
|
+
if userID, ok := turn.metadata["userId"]; ok && userID != nil {
|
|
246
|
+
meta["userId"] = userID
|
|
247
|
+
break
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
return meta
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
func summaryRecordID(sessionID string, sourceIDs []string) string {
|
|
254
|
+
hash := sha256.Sum256([]byte(sessionID + ":" + strings.Join(sourceIDs, ",")))
|
|
255
|
+
return "summary:" + hex.EncodeToString(hash[:8])
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
func metadataTimestamp(meta map[string]any) int64 {
|
|
259
|
+
value, ok := meta["ts"]
|
|
260
|
+
if !ok {
|
|
261
|
+
return 0
|
|
262
|
+
}
|
|
263
|
+
switch typed := value.(type) {
|
|
264
|
+
case int:
|
|
265
|
+
return int64(typed)
|
|
266
|
+
case int64:
|
|
267
|
+
return typed
|
|
268
|
+
case int32:
|
|
269
|
+
return int64(typed)
|
|
270
|
+
case float64:
|
|
271
|
+
return int64(typed)
|
|
272
|
+
case float32:
|
|
273
|
+
return int64(typed)
|
|
274
|
+
case jsonNumber:
|
|
275
|
+
n, _ := typed.Int64()
|
|
276
|
+
return n
|
|
277
|
+
case string:
|
|
278
|
+
n, err := strconv.ParseInt(typed, 10, 64)
|
|
279
|
+
if err == nil {
|
|
280
|
+
return n
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
return 0
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
func metaFloat(meta map[string]any, key string) float64 {
|
|
287
|
+
value, ok := meta[key]
|
|
288
|
+
if !ok {
|
|
289
|
+
return 0.0
|
|
290
|
+
}
|
|
291
|
+
switch typed := value.(type) {
|
|
292
|
+
case float64:
|
|
293
|
+
return typed
|
|
294
|
+
case float32:
|
|
295
|
+
return float64(typed)
|
|
296
|
+
case int:
|
|
297
|
+
return float64(typed)
|
|
298
|
+
case int64:
|
|
299
|
+
return float64(typed)
|
|
300
|
+
case int32:
|
|
301
|
+
return float64(typed)
|
|
302
|
+
case string:
|
|
303
|
+
n, err := strconv.ParseFloat(strings.TrimSpace(typed), 64)
|
|
304
|
+
if err == nil {
|
|
305
|
+
return n
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
return 0.0
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
type jsonNumber interface {
|
|
312
|
+
Int64() (int64, error)
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
func cloneMeta(src map[string]any) map[string]any {
|
|
316
|
+
if src == nil {
|
|
317
|
+
return map[string]any{}
|
|
318
|
+
}
|
|
319
|
+
dst := make(map[string]any, len(src))
|
|
320
|
+
for k, v := range src {
|
|
321
|
+
dst[k] = v
|
|
322
|
+
}
|
|
323
|
+
return dst
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
func mergeMethod(current, next string) string {
|
|
327
|
+
switch {
|
|
328
|
+
case current == "":
|
|
329
|
+
return next
|
|
330
|
+
case current == next:
|
|
331
|
+
return current
|
|
332
|
+
default:
|
|
333
|
+
return "mixed"
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
func clamp01(v float64) float64 {
|
|
338
|
+
if v < 0 {
|
|
339
|
+
return 0
|
|
340
|
+
}
|
|
341
|
+
if v > 1 {
|
|
342
|
+
return 1
|
|
343
|
+
}
|
|
344
|
+
return v
|
|
345
|
+
}
|