@xdarkicex/openclaw-memory-libravdb 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -0
- package/docs/README.md +14 -0
- package/docs/architecture-decisions/README.md +6 -0
- package/docs/architecture-decisions/adr-001-onnx-over-ollama.md +21 -0
- package/docs/architecture-decisions/adr-002-libravdb-over-lancedb.md +19 -0
- package/docs/architecture-decisions/adr-003-convex-gating-over-threshold.md +27 -0
- package/docs/architecture-decisions/adr-004-sidecar-over-native-ts.md +21 -0
- package/docs/architecture.md +188 -0
- package/docs/contributing.md +76 -0
- package/docs/dependencies.md +38 -0
- package/docs/embedding-profiles.md +42 -0
- package/docs/gating.md +329 -0
- package/docs/implementation.md +381 -0
- package/docs/installation.md +272 -0
- package/docs/mathematics.md +695 -0
- package/docs/models.md +63 -0
- package/docs/problem.md +64 -0
- package/docs/security.md +86 -0
- package/openclaw.plugin.json +84 -0
- package/package.json +41 -0
- package/scripts/build-sidecar.sh +30 -0
- package/scripts/postinstall.js +169 -0
- package/scripts/setup.sh +20 -0
- package/scripts/setup.ts +505 -0
- package/scripts/sidecar-release.d.ts +4 -0
- package/scripts/sidecar-release.js +17 -0
- package/sidecar/cmd/inspect_onnx/main.go +105 -0
- package/sidecar/compact/gate.go +273 -0
- package/sidecar/compact/gate_test.go +85 -0
- package/sidecar/compact/summarize.go +345 -0
- package/sidecar/compact/summarize_test.go +319 -0
- package/sidecar/compact/tokens.go +11 -0
- package/sidecar/config/config.go +119 -0
- package/sidecar/config/config_test.go +75 -0
- package/sidecar/embed/engine.go +696 -0
- package/sidecar/embed/engine_test.go +349 -0
- package/sidecar/embed/matryoshka.go +93 -0
- package/sidecar/embed/matryoshka_test.go +150 -0
- package/sidecar/embed/onnx_local.go +319 -0
- package/sidecar/embed/onnx_local_test.go +159 -0
- package/sidecar/embed/profile_contract_test.go +71 -0
- package/sidecar/embed/profile_eval_test.go +923 -0
- package/sidecar/embed/profiles.go +39 -0
- package/sidecar/go.mod +21 -0
- package/sidecar/go.sum +30 -0
- package/sidecar/health/check.go +33 -0
- package/sidecar/health/check_test.go +55 -0
- package/sidecar/main.go +151 -0
- package/sidecar/model/encoder.go +222 -0
- package/sidecar/model/registry.go +262 -0
- package/sidecar/model/registry_test.go +102 -0
- package/sidecar/model/seq2seq.go +133 -0
- package/sidecar/server/rpc.go +343 -0
- package/sidecar/server/rpc_test.go +350 -0
- package/sidecar/server/transport.go +160 -0
- package/sidecar/store/libravdb.go +676 -0
- package/sidecar/store/libravdb_test.go +472 -0
- package/sidecar/summarize/engine.go +360 -0
- package/sidecar/summarize/engine_test.go +148 -0
- package/sidecar/summarize/onnx_local.go +494 -0
- package/sidecar/summarize/onnx_local_test.go +48 -0
- package/sidecar/summarize/profiles.go +52 -0
- package/sidecar/summarize/tokenizer.go +13 -0
- package/sidecar/summarize/tokenizer_hf.go +76 -0
- package/sidecar/summarize/util.go +13 -0
- package/src/cli.ts +205 -0
- package/src/context-engine.ts +195 -0
- package/src/index.ts +27 -0
- package/src/memory-provider.ts +24 -0
- package/src/openclaw-plugin-sdk.d.ts +53 -0
- package/src/plugin-runtime.ts +67 -0
- package/src/recall-cache.ts +34 -0
- package/src/recall-utils.ts +22 -0
- package/src/rpc.ts +84 -0
- package/src/scoring.ts +58 -0
- package/src/sidecar.ts +506 -0
- package/src/tokens.ts +36 -0
- package/src/types.ts +146 -0
- package/tsconfig.json +20 -0
- package/tsconfig.tests.json +12 -0
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
package store
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"context"
|
|
5
|
+
"errors"
|
|
6
|
+
"path/filepath"
|
|
7
|
+
"testing"
|
|
8
|
+
|
|
9
|
+
"github.com/xDarkicex/openclaw-memory-libravdb/sidecar/embed"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
type fakeEmbedder struct{}
|
|
13
|
+
|
|
14
|
+
func (fakeEmbedder) EmbedDocument(_ context.Context, text string) ([]float32, error) {
|
|
15
|
+
switch text {
|
|
16
|
+
case "alpha":
|
|
17
|
+
return []float32{1, 0, 0}, nil
|
|
18
|
+
case "beta":
|
|
19
|
+
return []float32{0, 1, 0}, nil
|
|
20
|
+
case "query-alpha":
|
|
21
|
+
return []float32{1, 0, 0}, nil
|
|
22
|
+
default:
|
|
23
|
+
return []float32{0, 0, 1}, nil
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
func (fakeEmbedder) EmbedQuery(_ context.Context, text string) ([]float32, error) {
|
|
28
|
+
return fakeEmbedder{}.EmbedDocument(context.Background(), text)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
func (fakeEmbedder) Dimensions() int { return 3 }
|
|
32
|
+
func (fakeEmbedder) Profile() embed.Profile {
|
|
33
|
+
return embed.Profile{
|
|
34
|
+
Backend: "test",
|
|
35
|
+
Family: "test",
|
|
36
|
+
Dimensions: 3,
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
func (fakeEmbedder) Ready() bool { return true }
|
|
40
|
+
func (fakeEmbedder) Reason() string { return "" }
|
|
41
|
+
func (fakeEmbedder) Mode() string { return "primary" }
|
|
42
|
+
|
|
43
|
+
type fakeProfiledEmbedder struct {
|
|
44
|
+
fingerprint string
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
func (e fakeProfiledEmbedder) EmbedDocument(ctx context.Context, text string) ([]float32, error) {
|
|
48
|
+
return fakeEmbedder{}.EmbedDocument(ctx, text)
|
|
49
|
+
}
|
|
50
|
+
func (e fakeProfiledEmbedder) EmbedQuery(ctx context.Context, text string) ([]float32, error) {
|
|
51
|
+
return fakeEmbedder{}.EmbedQuery(ctx, text)
|
|
52
|
+
}
|
|
53
|
+
func (e fakeProfiledEmbedder) Dimensions() int { return 3 }
|
|
54
|
+
func (e fakeProfiledEmbedder) Ready() bool { return true }
|
|
55
|
+
func (e fakeProfiledEmbedder) Reason() string { return "" }
|
|
56
|
+
func (e fakeProfiledEmbedder) Mode() string { return "primary" }
|
|
57
|
+
func (e fakeProfiledEmbedder) Profile() embed.Profile {
|
|
58
|
+
return embed.Profile{
|
|
59
|
+
Backend: "onnx-local",
|
|
60
|
+
Family: "test",
|
|
61
|
+
Dimensions: 3,
|
|
62
|
+
Normalize: true,
|
|
63
|
+
Fingerprint: e.fingerprint,
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
func TestInsertSearchAndDelete(t *testing.T) {
|
|
68
|
+
ctx := context.Background()
|
|
69
|
+
s, err := Open(filepath.Join(t.TempDir(), "store"), fakeEmbedder{})
|
|
70
|
+
if err != nil {
|
|
71
|
+
t.Fatalf("Open() error = %v", err)
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if err := s.InsertText(ctx, "session:test", "a", "alpha", map[string]any{"type": "turn"}); err != nil {
|
|
75
|
+
t.Fatalf("InsertText(alpha) error = %v", err)
|
|
76
|
+
}
|
|
77
|
+
if err := s.InsertText(ctx, "session:test", "b", "beta", map[string]any{"type": "turn"}); err != nil {
|
|
78
|
+
t.Fatalf("InsertText(beta) error = %v", err)
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
results, err := s.SearchText(ctx, "session:test", "query-alpha", 5, nil)
|
|
82
|
+
if err != nil {
|
|
83
|
+
t.Fatalf("SearchText() error = %v", err)
|
|
84
|
+
}
|
|
85
|
+
if len(results) != 2 {
|
|
86
|
+
t.Fatalf("expected 2 results, got %d", len(results))
|
|
87
|
+
}
|
|
88
|
+
if results[0].ID != "a" {
|
|
89
|
+
t.Fatalf("expected alpha hit first, got %s", results[0].ID)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if err := s.Delete(ctx, "session:test", "a"); err != nil {
|
|
93
|
+
t.Fatalf("Delete() error = %v", err)
|
|
94
|
+
}
|
|
95
|
+
results, err = s.SearchText(ctx, "session:test", "query-alpha", 5, nil)
|
|
96
|
+
if err != nil {
|
|
97
|
+
t.Fatalf("SearchText() after delete error = %v", err)
|
|
98
|
+
}
|
|
99
|
+
if len(results) != 1 || results[0].ID != "b" {
|
|
100
|
+
t.Fatalf("expected only beta remaining, got %+v", results)
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
func TestListByMetaAndExclude(t *testing.T) {
|
|
105
|
+
ctx := context.Background()
|
|
106
|
+
s, err := Open(filepath.Join(t.TempDir(), "store"), fakeEmbedder{})
|
|
107
|
+
if err != nil {
|
|
108
|
+
t.Fatalf("Open() error = %v", err)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
_ = s.InsertText(ctx, "global", "g1", "alpha", map[string]any{"type": "doc", "source": "spec"})
|
|
112
|
+
_ = s.InsertText(ctx, "global", "g2", "beta", map[string]any{"type": "doc", "source": "notes"})
|
|
113
|
+
|
|
114
|
+
listed, err := s.ListByMeta(ctx, "global", "source", "spec")
|
|
115
|
+
if err != nil {
|
|
116
|
+
t.Fatalf("ListByMeta() error = %v", err)
|
|
117
|
+
}
|
|
118
|
+
if len(listed) != 1 || listed[0].ID != "g1" {
|
|
119
|
+
t.Fatalf("expected only g1 from metadata filter, got %+v", listed)
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
results, err := s.SearchText(ctx, "global", "query-alpha", 5, []string{"g1"})
|
|
123
|
+
if err != nil {
|
|
124
|
+
t.Fatalf("SearchText() with exclude error = %v", err)
|
|
125
|
+
}
|
|
126
|
+
if len(results) != 1 || results[0].ID != "g2" {
|
|
127
|
+
t.Fatalf("expected g2 after excluding g1, got %+v", results)
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
func TestEnsureCollectionIsIdempotent(t *testing.T) {
|
|
132
|
+
ctx := context.Background()
|
|
133
|
+
s, err := Open(filepath.Join(t.TempDir(), "store"), fakeEmbedder{})
|
|
134
|
+
if err != nil {
|
|
135
|
+
t.Fatalf("Open() error = %v", err)
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if err := s.EnsureCollection(ctx, "session:test"); err != nil {
|
|
139
|
+
t.Fatalf("first EnsureCollection() error = %v", err)
|
|
140
|
+
}
|
|
141
|
+
if err := s.EnsureCollection(ctx, "session:test"); err != nil {
|
|
142
|
+
t.Fatalf("second EnsureCollection() error = %v", err)
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
func TestFlushPersistsAndReloadsRecords(t *testing.T) {
|
|
147
|
+
ctx := context.Background()
|
|
148
|
+
storePath := filepath.Join(t.TempDir(), "store")
|
|
149
|
+
|
|
150
|
+
s, err := Open(storePath, fakeEmbedder{})
|
|
151
|
+
if err != nil {
|
|
152
|
+
t.Fatalf("Open() error = %v", err)
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if err := s.InsertText(ctx, "global", "g1", "alpha", map[string]any{"source": "spec"}); err != nil {
|
|
156
|
+
t.Fatalf("InsertText() error = %v", err)
|
|
157
|
+
}
|
|
158
|
+
if err := s.Flush(ctx); err != nil {
|
|
159
|
+
t.Fatalf("Flush() error = %v", err)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
reopened, err := Open(storePath, fakeEmbedder{})
|
|
163
|
+
if err != nil {
|
|
164
|
+
t.Fatalf("reopen Open() error = %v", err)
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
results, err := reopened.SearchText(ctx, "global", "query-alpha", 5, nil)
|
|
168
|
+
if err != nil {
|
|
169
|
+
t.Fatalf("SearchText() after reload error = %v", err)
|
|
170
|
+
}
|
|
171
|
+
if len(results) != 1 || results[0].ID != "g1" {
|
|
172
|
+
t.Fatalf("expected persisted g1 after reload, got %+v", results)
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
func TestOpenRejectsEmbeddingFingerprintMismatch(t *testing.T) {
|
|
177
|
+
ctx := context.Background()
|
|
178
|
+
storePath := filepath.Join(t.TempDir(), "store")
|
|
179
|
+
|
|
180
|
+
s, err := Open(storePath, fakeProfiledEmbedder{fingerprint: "first"})
|
|
181
|
+
if err != nil {
|
|
182
|
+
t.Fatalf("Open(first) error = %v", err)
|
|
183
|
+
}
|
|
184
|
+
if err := s.InsertText(ctx, "global", "g1", "alpha", map[string]any{"source": "spec"}); err != nil {
|
|
185
|
+
t.Fatalf("InsertText() error = %v", err)
|
|
186
|
+
}
|
|
187
|
+
if err := s.Flush(ctx); err != nil {
|
|
188
|
+
t.Fatalf("Flush() error = %v", err)
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if _, err := Open(storePath, fakeProfiledEmbedder{fingerprint: "second"}); err == nil {
|
|
192
|
+
t.Fatalf("expected embedding fingerprint mismatch to error")
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
func TestInsertRecordAndListCollection(t *testing.T) {
|
|
197
|
+
ctx := context.Background()
|
|
198
|
+
s, err := Open(filepath.Join(t.TempDir(), "store"), fakeEmbedder{})
|
|
199
|
+
if err != nil {
|
|
200
|
+
t.Fatalf("Open() error = %v", err)
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if err := s.InsertRecord(ctx, "_tier_dirty", "session:test/doc1:64", make([]float32, 64), map[string]any{
|
|
204
|
+
"base_collection": "session:test",
|
|
205
|
+
"record_id": "doc1",
|
|
206
|
+
"dims": 64,
|
|
207
|
+
}); err != nil {
|
|
208
|
+
t.Fatalf("InsertRecord() error = %v", err)
|
|
209
|
+
}
|
|
210
|
+
if err := s.InsertRecord(ctx, "_tier_dirty", "session:test/doc1:256", make([]float32, 256), map[string]any{
|
|
211
|
+
"base_collection": "session:test",
|
|
212
|
+
"record_id": "doc1",
|
|
213
|
+
"dims": 256,
|
|
214
|
+
}); err != nil {
|
|
215
|
+
t.Fatalf("InsertRecord() second error = %v", err)
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
results, err := s.ListCollection(ctx, "_tier_dirty")
|
|
219
|
+
if err != nil {
|
|
220
|
+
t.Fatalf("ListCollection() error = %v", err)
|
|
221
|
+
}
|
|
222
|
+
if len(results) != 2 {
|
|
223
|
+
t.Fatalf("expected 2 dirty records, got %d", len(results))
|
|
224
|
+
}
|
|
225
|
+
if results[0].ID != "session:test/doc1:256" || results[1].ID != "session:test/doc1:64" {
|
|
226
|
+
t.Fatalf("expected stable id ordering, got %+v", results)
|
|
227
|
+
}
|
|
228
|
+
if results[0].Text != "" || results[1].Text != "" {
|
|
229
|
+
t.Fatalf("expected non-semantic records to preserve empty text, got %+v", results)
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
func TestInsertRecordRejectsDimensionMismatch(t *testing.T) {
|
|
234
|
+
ctx := context.Background()
|
|
235
|
+
s, err := Open(filepath.Join(t.TempDir(), "store"), fakeEmbedder{})
|
|
236
|
+
if err != nil {
|
|
237
|
+
t.Fatalf("Open() error = %v", err)
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
err = s.InsertRecord(ctx, "_tier_dirty", "bad", []float32{0, 0}, map[string]any{"dims": 64})
|
|
241
|
+
if err == nil {
|
|
242
|
+
t.Fatalf("expected dimension mismatch error")
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
func TestInsertMatryoshkaL3IsSourceOfTruth(t *testing.T) {
|
|
247
|
+
ctx := context.Background()
|
|
248
|
+
s, err := Open(filepath.Join(t.TempDir(), "store"), fakeMatryoshkaEmbedder{})
|
|
249
|
+
if err != nil {
|
|
250
|
+
t.Fatalf("Open() error = %v", err)
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
s.beforeInsertRecord = func(collection, id string, vec []float32, meta map[string]any) error {
|
|
254
|
+
if collection == tierCollection("test", embed.DimsL2) {
|
|
255
|
+
return errors.New("injected L2 failure")
|
|
256
|
+
}
|
|
257
|
+
return nil
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
vec, err := embed.NewMatryoshkaVec(testMatryoshkaVector())
|
|
261
|
+
if err != nil {
|
|
262
|
+
t.Fatalf("NewMatryoshkaVec() error = %v", err)
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
err = s.InsertMatryoshka(ctx, "test", "doc1", vec, map[string]any{"kind": "memory"})
|
|
266
|
+
if err == nil {
|
|
267
|
+
t.Fatal("expected error on L2 insert failure")
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if !recordExists(s, "test", "doc1") {
|
|
271
|
+
t.Error("L3 record missing after L2 failure")
|
|
272
|
+
}
|
|
273
|
+
if recordExists(s, tierCollection("test", embed.DimsL1), "doc1") {
|
|
274
|
+
t.Error("L1 record should not exist when L2 insert failed first")
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
dirty, err := s.ListCollection(ctx, dirtyTierCollection)
|
|
278
|
+
if err != nil {
|
|
279
|
+
t.Fatalf("ListCollection(_tier_dirty) error = %v", err)
|
|
280
|
+
}
|
|
281
|
+
if !containsDirty(dirty, "test", "doc1", embed.DimsL2) {
|
|
282
|
+
t.Error("dirty marker missing for failed L2 tier")
|
|
283
|
+
}
|
|
284
|
+
if containsDirty(dirty, "test", "doc1", embed.DimsL1) {
|
|
285
|
+
t.Error("spurious dirty marker for L1 tier")
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
func TestBackfillDirtyTiersRestoresMissingTier(t *testing.T) {
|
|
290
|
+
ctx := context.Background()
|
|
291
|
+
s, err := Open(filepath.Join(t.TempDir(), "store"), fakeMatryoshkaEmbedder{})
|
|
292
|
+
if err != nil {
|
|
293
|
+
t.Fatalf("Open() error = %v", err)
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
vec, err := embed.NewMatryoshkaVec(testMatryoshkaVector())
|
|
297
|
+
if err != nil {
|
|
298
|
+
t.Fatalf("NewMatryoshkaVec() error = %v", err)
|
|
299
|
+
}
|
|
300
|
+
if err := s.InsertRecord(ctx, "test", "doc1", vec.L3, map[string]any{"kind": "memory"}); err != nil {
|
|
301
|
+
t.Fatalf("InsertRecord(L3) error = %v", err)
|
|
302
|
+
}
|
|
303
|
+
if err := s.InsertRecord(ctx, dirtyTierCollection, dirtyID("test", "doc1", embed.DimsL2), make([]float32, embed.DimsL2), map[string]any{
|
|
304
|
+
"base_collection": "test",
|
|
305
|
+
"record_id": "doc1",
|
|
306
|
+
"dims": embed.DimsL2,
|
|
307
|
+
}); err != nil {
|
|
308
|
+
t.Fatalf("InsertRecord(dirty) error = %v", err)
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
if err := s.BackfillDirtyTiers(ctx); err != nil {
|
|
312
|
+
t.Fatalf("BackfillDirtyTiers() error = %v", err)
|
|
313
|
+
}
|
|
314
|
+
if !recordExists(s, tierCollection("test", embed.DimsL2), "doc1") {
|
|
315
|
+
t.Fatalf("expected L2 tier record to be restored")
|
|
316
|
+
}
|
|
317
|
+
dirty, err := s.ListCollection(ctx, dirtyTierCollection)
|
|
318
|
+
if err != nil {
|
|
319
|
+
t.Fatalf("ListCollection(_tier_dirty) error = %v", err)
|
|
320
|
+
}
|
|
321
|
+
if len(dirty) != 0 {
|
|
322
|
+
t.Fatalf("expected dirty collection to be empty after restore, got %+v", dirty)
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
func TestCascadeExitsAtCorrectTier(t *testing.T) {
|
|
327
|
+
ctx := context.Background()
|
|
328
|
+
s, err := Open(filepath.Join(t.TempDir(), "store"), fakeMatryoshkaEmbedder{})
|
|
329
|
+
if err != nil {
|
|
330
|
+
t.Fatalf("Open() error = %v", err)
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
cfg := CascadeConfig{
|
|
334
|
+
ExitThresholdL1: 0.92,
|
|
335
|
+
ExitThresholdL2: 0.80,
|
|
336
|
+
BudgetMs: 50,
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
vec, err := embed.NewMatryoshkaVec(testMatryoshkaVector())
|
|
340
|
+
if err != nil {
|
|
341
|
+
t.Fatalf("NewMatryoshkaVec() error = %v", err)
|
|
342
|
+
}
|
|
343
|
+
if err := s.InsertMatryoshka(ctx, "test", "doc1", vec, nil); err != nil {
|
|
344
|
+
t.Fatalf("InsertMatryoshka() error = %v", err)
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
result := s.CascadeSearch(ctx, "test", vec, 1, nil, cfg)
|
|
348
|
+
if result.TierUsed != 1 {
|
|
349
|
+
t.Errorf("expected L1 exit for identical vector, got tier %d", result.TierUsed)
|
|
350
|
+
}
|
|
351
|
+
if len(result.Exits) == 0 || result.Exits[0].BestScore < 0.92 {
|
|
352
|
+
t.Errorf("L1 best score %.4f below exit threshold", best(result.Hits))
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
func TestCascadeFallsThroughOnLowScore(t *testing.T) {
|
|
357
|
+
ctx := context.Background()
|
|
358
|
+
s, err := Open(filepath.Join(t.TempDir(), "store"), fakeMatryoshkaEmbedder{})
|
|
359
|
+
if err != nil {
|
|
360
|
+
t.Fatalf("Open() error = %v", err)
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
cfg := CascadeConfig{
|
|
364
|
+
ExitThresholdL1: 0.92,
|
|
365
|
+
ExitThresholdL2: 0.80,
|
|
366
|
+
BudgetMs: 50,
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
vec, err := embed.NewMatryoshkaVec(testMatryoshkaVector())
|
|
370
|
+
if err != nil {
|
|
371
|
+
t.Fatalf("NewMatryoshkaVec() error = %v", err)
|
|
372
|
+
}
|
|
373
|
+
if err := s.InsertMatryoshka(ctx, "test", "doc1", vec, nil); err != nil {
|
|
374
|
+
t.Fatalf("InsertMatryoshka() error = %v", err)
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
orthogonal, err := embed.NewMatryoshkaVec(orthogonalMatryoshkaVector())
|
|
378
|
+
if err != nil {
|
|
379
|
+
t.Fatalf("NewMatryoshkaVec(orthogonal) error = %v", err)
|
|
380
|
+
}
|
|
381
|
+
result := s.CascadeSearch(ctx, "test", orthogonal, 1, nil, cfg)
|
|
382
|
+
if result.TierUsed != 3 {
|
|
383
|
+
t.Errorf("expected L3 fallthrough for orthogonal query, got tier %d", result.TierUsed)
|
|
384
|
+
}
|
|
385
|
+
if len(result.Exits) != 3 {
|
|
386
|
+
t.Errorf("expected 3 tier exit records, got %d", len(result.Exits))
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
func TestCascadeDegradesWhenTierEmpty(t *testing.T) {
|
|
391
|
+
ctx := context.Background()
|
|
392
|
+
s, err := Open(filepath.Join(t.TempDir(), "store"), fakeMatryoshkaEmbedder{})
|
|
393
|
+
if err != nil {
|
|
394
|
+
t.Fatalf("Open() error = %v", err)
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
cfg := CascadeConfig{
|
|
398
|
+
ExitThresholdL1: 0.92,
|
|
399
|
+
ExitThresholdL2: 0.80,
|
|
400
|
+
BudgetMs: 50,
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
vec, err := embed.NewMatryoshkaVec(testMatryoshkaVector())
|
|
404
|
+
if err != nil {
|
|
405
|
+
t.Fatalf("NewMatryoshkaVec() error = %v", err)
|
|
406
|
+
}
|
|
407
|
+
if err := s.InsertRecord(ctx, "test", "doc1", vec.L3, nil); err != nil {
|
|
408
|
+
t.Fatalf("InsertRecord(L3) error = %v", err)
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
result := s.CascadeSearch(ctx, "test", vec, 1, nil, cfg)
|
|
412
|
+
if result.TierUsed != 3 {
|
|
413
|
+
t.Errorf("expected L3 fallthrough when lower tiers are empty, got tier %d", result.TierUsed)
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
type fakeMatryoshkaEmbedder struct{}
|
|
418
|
+
|
|
419
|
+
func (fakeMatryoshkaEmbedder) EmbedDocument(_ context.Context, _ string) ([]float32, error) {
|
|
420
|
+
return testMatryoshkaVector(), nil
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
func (fakeMatryoshkaEmbedder) EmbedQuery(_ context.Context, _ string) ([]float32, error) {
|
|
424
|
+
return testMatryoshkaVector(), nil
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
func (fakeMatryoshkaEmbedder) Dimensions() int { return embed.DimsL3 }
|
|
428
|
+
func (fakeMatryoshkaEmbedder) Ready() bool { return true }
|
|
429
|
+
func (fakeMatryoshkaEmbedder) Reason() string { return "" }
|
|
430
|
+
func (fakeMatryoshkaEmbedder) Mode() string { return "primary" }
|
|
431
|
+
func (fakeMatryoshkaEmbedder) Profile() embed.Profile {
|
|
432
|
+
return embed.Profile{
|
|
433
|
+
Backend: "onnx-local",
|
|
434
|
+
Family: "nomic-embed-text-v1.5",
|
|
435
|
+
Dimensions: embed.DimsL3,
|
|
436
|
+
Normalize: true,
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
func testMatryoshkaVector() []float32 {
|
|
441
|
+
full := make([]float32, embed.DimsL3)
|
|
442
|
+
full[0] = 1
|
|
443
|
+
full[1] = 0.5
|
|
444
|
+
full[2] = 0.25
|
|
445
|
+
return full
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
func orthogonalMatryoshkaVector() []float32 {
|
|
449
|
+
full := make([]float32, embed.DimsL3)
|
|
450
|
+
full[10] = 1
|
|
451
|
+
full[11] = -0.5
|
|
452
|
+
full[12] = 0.25
|
|
453
|
+
return full
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
func recordExists(s *Store, collection, id string) bool {
|
|
457
|
+
s.mu.RLock()
|
|
458
|
+
defer s.mu.RUnlock()
|
|
459
|
+
col := s.collections[collection]
|
|
460
|
+
_, ok := col[id]
|
|
461
|
+
return ok
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
func containsDirty(records []SearchResult, base, id string, dims int) bool {
|
|
465
|
+
want := dirtyID(base, id, dims)
|
|
466
|
+
for _, rec := range records {
|
|
467
|
+
if rec.ID == want {
|
|
468
|
+
return true
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
return false
|
|
472
|
+
}
|