@xdarkicex/openclaw-memory-libravdb 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +46 -0
  2. package/docs/README.md +14 -0
  3. package/docs/architecture-decisions/README.md +6 -0
  4. package/docs/architecture-decisions/adr-001-onnx-over-ollama.md +21 -0
  5. package/docs/architecture-decisions/adr-002-libravdb-over-lancedb.md +19 -0
  6. package/docs/architecture-decisions/adr-003-convex-gating-over-threshold.md +27 -0
  7. package/docs/architecture-decisions/adr-004-sidecar-over-native-ts.md +21 -0
  8. package/docs/architecture.md +188 -0
  9. package/docs/contributing.md +76 -0
  10. package/docs/dependencies.md +38 -0
  11. package/docs/embedding-profiles.md +42 -0
  12. package/docs/gating.md +329 -0
  13. package/docs/implementation.md +381 -0
  14. package/docs/installation.md +272 -0
  15. package/docs/mathematics.md +695 -0
  16. package/docs/models.md +63 -0
  17. package/docs/problem.md +64 -0
  18. package/docs/security.md +86 -0
  19. package/openclaw.plugin.json +84 -0
  20. package/package.json +41 -0
  21. package/scripts/build-sidecar.sh +30 -0
  22. package/scripts/postinstall.js +169 -0
  23. package/scripts/setup.sh +20 -0
  24. package/scripts/setup.ts +505 -0
  25. package/scripts/sidecar-release.d.ts +4 -0
  26. package/scripts/sidecar-release.js +17 -0
  27. package/sidecar/cmd/inspect_onnx/main.go +105 -0
  28. package/sidecar/compact/gate.go +273 -0
  29. package/sidecar/compact/gate_test.go +85 -0
  30. package/sidecar/compact/summarize.go +345 -0
  31. package/sidecar/compact/summarize_test.go +319 -0
  32. package/sidecar/compact/tokens.go +11 -0
  33. package/sidecar/config/config.go +119 -0
  34. package/sidecar/config/config_test.go +75 -0
  35. package/sidecar/embed/engine.go +696 -0
  36. package/sidecar/embed/engine_test.go +349 -0
  37. package/sidecar/embed/matryoshka.go +93 -0
  38. package/sidecar/embed/matryoshka_test.go +150 -0
  39. package/sidecar/embed/onnx_local.go +319 -0
  40. package/sidecar/embed/onnx_local_test.go +159 -0
  41. package/sidecar/embed/profile_contract_test.go +71 -0
  42. package/sidecar/embed/profile_eval_test.go +923 -0
  43. package/sidecar/embed/profiles.go +39 -0
  44. package/sidecar/go.mod +21 -0
  45. package/sidecar/go.sum +30 -0
  46. package/sidecar/health/check.go +33 -0
  47. package/sidecar/health/check_test.go +55 -0
  48. package/sidecar/main.go +151 -0
  49. package/sidecar/model/encoder.go +222 -0
  50. package/sidecar/model/registry.go +262 -0
  51. package/sidecar/model/registry_test.go +102 -0
  52. package/sidecar/model/seq2seq.go +133 -0
  53. package/sidecar/server/rpc.go +343 -0
  54. package/sidecar/server/rpc_test.go +350 -0
  55. package/sidecar/server/transport.go +160 -0
  56. package/sidecar/store/libravdb.go +676 -0
  57. package/sidecar/store/libravdb_test.go +472 -0
  58. package/sidecar/summarize/engine.go +360 -0
  59. package/sidecar/summarize/engine_test.go +148 -0
  60. package/sidecar/summarize/onnx_local.go +494 -0
  61. package/sidecar/summarize/onnx_local_test.go +48 -0
  62. package/sidecar/summarize/profiles.go +52 -0
  63. package/sidecar/summarize/tokenizer.go +13 -0
  64. package/sidecar/summarize/tokenizer_hf.go +76 -0
  65. package/sidecar/summarize/util.go +13 -0
  66. package/src/cli.ts +205 -0
  67. package/src/context-engine.ts +195 -0
  68. package/src/index.ts +27 -0
  69. package/src/memory-provider.ts +24 -0
  70. package/src/openclaw-plugin-sdk.d.ts +53 -0
  71. package/src/plugin-runtime.ts +67 -0
  72. package/src/recall-cache.ts +34 -0
  73. package/src/recall-utils.ts +22 -0
  74. package/src/rpc.ts +84 -0
  75. package/src/scoring.ts +58 -0
  76. package/src/sidecar.ts +506 -0
  77. package/src/tokens.ts +36 -0
  78. package/src/types.ts +146 -0
  79. package/tsconfig.json +20 -0
  80. package/tsconfig.tests.json +12 -0
@@ -0,0 +1,676 @@
1
+ package store
2
+
3
+ import (
4
+ "context"
5
+ "encoding/json"
6
+ "errors"
7
+ "fmt"
8
+ "log"
9
+ "os"
10
+ "path/filepath"
11
+ "sort"
12
+ "strings"
13
+ "sync"
14
+ "time"
15
+
16
+ "github.com/xDarkicex/openclaw-memory-libravdb/sidecar/embed"
17
+ )
18
+
19
+ type SearchResult struct {
20
+ ID string `json:"id"`
21
+ Score float64 `json:"score"`
22
+ Text string `json:"text"`
23
+ Metadata map[string]any `json:"metadata"`
24
+ }
25
+
26
+ type TierExit struct {
27
+ Tier int `json:"tier"`
28
+ Dims int `json:"dims"`
29
+ BestScore float64 `json:"bestScore"`
30
+ Latency time.Duration `json:"latency"`
31
+ Exited bool `json:"exited"`
32
+ }
33
+
34
+ type CascadeResult struct {
35
+ Hits []SearchResult `json:"hits"`
36
+ TierUsed int `json:"tierUsed"`
37
+ Dims int `json:"dims"`
38
+ Latency time.Duration `json:"latency"`
39
+ Exits []TierExit `json:"exits"`
40
+ }
41
+
42
+ type CascadeConfig struct {
43
+ ExitThresholdL1 float64
44
+ ExitThresholdL2 float64
45
+ BudgetMs int
46
+ }
47
+
48
+ var DefaultCascadeConfig = CascadeConfig{
49
+ ExitThresholdL1: 0.92,
50
+ ExitThresholdL2: 0.80,
51
+ BudgetMs: 50,
52
+ }
53
+
54
+ type record struct {
55
+ ID string
56
+ Text string
57
+ Vector []float32
58
+ Metadata map[string]any
59
+ }
60
+
61
+ type Store struct {
62
+ path string
63
+ embedder embed.Embedder
64
+ profile embed.Profile
65
+ mu sync.RWMutex
66
+ collections map[string]map[string]record
67
+ beforeInsertRecord func(collection, id string, vec []float32, meta map[string]any) error
68
+ }
69
+
70
+ type persistedRecord struct {
71
+ ID string `json:"id"`
72
+ Text string `json:"text"`
73
+ Vector []float32 `json:"vector"`
74
+ Metadata map[string]any `json:"metadata"`
75
+ }
76
+
77
+ type persistedStore struct {
78
+ Embedding *embed.Profile `json:"embedding,omitempty"`
79
+ Collections map[string][]persistedRecord `json:"collections"`
80
+ }
81
+
82
+ const dirtyTierCollection = "_tier_dirty"
83
+
84
+ func Open(path string, embedder embed.Embedder) (*Store, error) {
85
+ if path == "" {
86
+ return nil, errors.New("store path is required")
87
+ }
88
+ if embedder == nil {
89
+ return nil, errors.New("embedder is required")
90
+ }
91
+ s := &Store{
92
+ path: path,
93
+ embedder: embedder,
94
+ collections: make(map[string]map[string]record),
95
+ }
96
+ s.profile = embedder.Profile()
97
+ if err := s.load(); err != nil {
98
+ return nil, err
99
+ }
100
+ return s, nil
101
+ }
102
+
103
+ func (s *Store) Path() string {
104
+ return s.path
105
+ }
106
+
107
+ func (s *Store) EnsureCollection(_ context.Context, collection string) error {
108
+ s.mu.Lock()
109
+ defer s.mu.Unlock()
110
+ s.ensureCollectionLocked(collection)
111
+ return nil
112
+ }
113
+
114
+ func (s *Store) InsertText(ctx context.Context, collection, id, text string, meta map[string]any) error {
115
+ vec, err := s.embedder.EmbedDocument(ctx, text)
116
+ if err != nil {
117
+ return fmt.Errorf("embed document: %w", err)
118
+ }
119
+ return s.insertRecord(ctx, collection, id, text, vec, meta)
120
+ }
121
+
122
+ func (s *Store) InsertRecord(_ context.Context, collection, id string, vec []float32, meta map[string]any) error {
123
+ return s.insertRecord(context.Background(), collection, id, "", vec, meta)
124
+ }
125
+
126
+ func (s *Store) insertRecord(_ context.Context, collection, id, text string, vec []float32, meta map[string]any) error {
127
+ if id == "" {
128
+ return errors.New("record id is required")
129
+ }
130
+ expected := s.collectionDimensions(collection, vec, meta)
131
+ if len(vec) != expected {
132
+ return fmt.Errorf("record vector dimensions %d do not match collection %s dimensions %d", len(vec), collection, expected)
133
+ }
134
+ if s.beforeInsertRecord != nil {
135
+ if err := s.beforeInsertRecord(collection, id, vec, meta); err != nil {
136
+ return err
137
+ }
138
+ }
139
+
140
+ s.mu.Lock()
141
+ defer s.mu.Unlock()
142
+
143
+ col := s.ensureCollectionLocked(collection)
144
+ col[id] = record{
145
+ ID: id,
146
+ Text: text,
147
+ Vector: append([]float32(nil), vec...),
148
+ Metadata: cloneMeta(meta),
149
+ }
150
+ return nil
151
+ }
152
+
153
+ func (s *Store) InsertMatryoshka(ctx context.Context, collection, id string, vec embed.MatryoshkaVec, meta map[string]any) error {
154
+ if err := s.InsertRecord(ctx, collection, id, vec.L3, meta); err != nil {
155
+ return fmt.Errorf("L3 insert: %w", err)
156
+ }
157
+ if err := s.InsertRecord(ctx, tierCollection(collection, embed.DimsL2), id, vec.L2, meta); err != nil {
158
+ s.markTierDirty(ctx, collection, id, embed.DimsL2)
159
+ return fmt.Errorf("L2 insert: %w", err)
160
+ }
161
+ if err := s.InsertRecord(ctx, tierCollection(collection, embed.DimsL1), id, vec.L1, meta); err != nil {
162
+ s.markTierDirty(ctx, collection, id, embed.DimsL1)
163
+ return fmt.Errorf("L1 insert: %w", err)
164
+ }
165
+ return nil
166
+ }
167
+
168
+ func (s *Store) SearchText(ctx context.Context, collection, query string, k int, exclude []string) ([]SearchResult, error) {
169
+ if me, ok := s.embedder.(embed.MatryoshkaEmbedder); ok && embed.SupportsMatryoshka(s.embedder) {
170
+ queryVec, err := me.EmbedQueryM(ctx, query)
171
+ if err == nil {
172
+ result := s.CascadeSearch(ctx, collection, queryVec, k, exclude, DefaultCascadeConfig)
173
+ return result.Hits, nil
174
+ }
175
+ }
176
+
177
+ vec, err := s.embedder.EmbedQuery(ctx, query)
178
+ if err != nil {
179
+ return nil, fmt.Errorf("embed query: %w", err)
180
+ }
181
+ return s.searchVec(ctx, collection, vec, k, exclude), nil
182
+ }
183
+
184
+ func (s *Store) ListByMeta(_ context.Context, collection, key, value string) ([]SearchResult, error) {
185
+ s.mu.RLock()
186
+ defer s.mu.RUnlock()
187
+
188
+ col := s.collections[collection]
189
+ results := make([]SearchResult, 0, len(col))
190
+ for _, rec := range col {
191
+ metaValue, ok := rec.Metadata[key]
192
+ if !ok || !matchesMeta(metaValue, value) {
193
+ continue
194
+ }
195
+ results = append(results, SearchResult{
196
+ ID: rec.ID,
197
+ Score: 0,
198
+ Text: rec.Text,
199
+ Metadata: cloneMeta(rec.Metadata),
200
+ })
201
+ }
202
+
203
+ sort.Slice(results, func(i, j int) bool { return results[i].ID < results[j].ID })
204
+ return results, nil
205
+ }
206
+
207
+ func (s *Store) ListCollection(_ context.Context, collection string) ([]SearchResult, error) {
208
+ s.mu.RLock()
209
+ defer s.mu.RUnlock()
210
+
211
+ col := s.collections[collection]
212
+ results := make([]SearchResult, 0, len(col))
213
+ for _, rec := range col {
214
+ results = append(results, SearchResult{
215
+ ID: rec.ID,
216
+ Score: 0,
217
+ Text: rec.Text,
218
+ Metadata: cloneMeta(rec.Metadata),
219
+ })
220
+ }
221
+
222
+ sort.Slice(results, func(i, j int) bool { return results[i].ID < results[j].ID })
223
+ return results, nil
224
+ }
225
+
226
+ func (s *Store) CollectionNames() []string {
227
+ s.mu.RLock()
228
+ defer s.mu.RUnlock()
229
+
230
+ names := make([]string, 0, len(s.collections))
231
+ for name := range s.collections {
232
+ names = append(names, name)
233
+ }
234
+ sort.Strings(names)
235
+ return names
236
+ }
237
+
238
+ func (s *Store) CountByPrefix(prefix string) int {
239
+ s.mu.RLock()
240
+ defer s.mu.RUnlock()
241
+
242
+ total := 0
243
+ for collection, records := range s.collections {
244
+ if !strings.HasPrefix(collection, prefix) {
245
+ continue
246
+ }
247
+ total += len(records)
248
+ }
249
+ return total
250
+ }
251
+
252
+ func (s *Store) DeleteCollectionsByPrefix(_ context.Context, prefix string) error {
253
+ s.mu.Lock()
254
+ defer s.mu.Unlock()
255
+
256
+ for collection := range s.collections {
257
+ if strings.HasPrefix(collection, prefix) {
258
+ delete(s.collections, collection)
259
+ }
260
+ }
261
+ return nil
262
+ }
263
+
264
+ func (s *Store) loadVec(_ context.Context, collection, id string) ([]float32, error) {
265
+ s.mu.RLock()
266
+ defer s.mu.RUnlock()
267
+
268
+ col := s.collections[collection]
269
+ rec, ok := col[id]
270
+ if !ok {
271
+ return nil, fmt.Errorf("record %s/%s not found", collection, id)
272
+ }
273
+ return append([]float32(nil), rec.Vector...), nil
274
+ }
275
+
276
+ func (s *Store) loadMeta(_ context.Context, collection, id string) (map[string]any, error) {
277
+ s.mu.RLock()
278
+ defer s.mu.RUnlock()
279
+
280
+ col := s.collections[collection]
281
+ rec, ok := col[id]
282
+ if !ok {
283
+ return nil, fmt.Errorf("record %s/%s not found", collection, id)
284
+ }
285
+ return cloneMeta(rec.Metadata), nil
286
+ }
287
+
288
+ func (s *Store) BackfillDirtyTiers(ctx context.Context) error {
289
+ markers, err := s.ListCollection(ctx, dirtyTierCollection)
290
+ if err != nil {
291
+ return fmt.Errorf("backfill: list dirty tiers: %w", err)
292
+ }
293
+ if len(markers) == 0 {
294
+ return nil
295
+ }
296
+
297
+ log.Printf("backfill: %d dirty tier(s) to restore", len(markers))
298
+ var restored, dropped int
299
+ for _, marker := range markers {
300
+ base := metaString(marker.Metadata, "base_collection")
301
+ id := metaString(marker.Metadata, "record_id")
302
+ dims := metaInt(marker.Metadata, "dims")
303
+ if base == "" || id == "" || dims == 0 {
304
+ log.Printf("backfill: malformed dirty marker %s - dropping", marker.ID)
305
+ _ = s.Delete(ctx, dirtyTierCollection, marker.ID)
306
+ dropped++
307
+ continue
308
+ }
309
+
310
+ l3, err := s.loadVec(ctx, base, id)
311
+ if err != nil || len(l3) < embed.DimsL3 {
312
+ log.Printf("backfill: L3 missing for %s/%s - dropping dirty marker", base, id)
313
+ _ = s.Delete(ctx, dirtyTierCollection, marker.ID)
314
+ dropped++
315
+ continue
316
+ }
317
+
318
+ mv, err := embed.NewMatryoshkaVec(l3)
319
+ if err != nil {
320
+ log.Printf("backfill: cannot derive matryoshka for %s/%s: %v", base, id, err)
321
+ continue
322
+ }
323
+
324
+ var tierVec []float32
325
+ switch dims {
326
+ case embed.DimsL2:
327
+ tierVec = mv.L2
328
+ case embed.DimsL1:
329
+ tierVec = mv.L1
330
+ default:
331
+ log.Printf("backfill: unexpected dims %d for %s/%s - dropping", dims, base, id)
332
+ _ = s.Delete(ctx, dirtyTierCollection, marker.ID)
333
+ dropped++
334
+ continue
335
+ }
336
+
337
+ meta, err := s.loadMeta(ctx, base, id)
338
+ if err != nil {
339
+ log.Printf("backfill: metadata missing for %s/%s - dropping dirty marker", base, id)
340
+ _ = s.Delete(ctx, dirtyTierCollection, marker.ID)
341
+ dropped++
342
+ continue
343
+ }
344
+ if err := s.InsertRecord(ctx, tierCollection(base, dims), id, tierVec, meta); err != nil {
345
+ log.Printf("backfill: re-insert failed for %s/%s dims=%d: %v", base, id, dims, err)
346
+ continue
347
+ }
348
+
349
+ _ = s.Delete(ctx, dirtyTierCollection, marker.ID)
350
+ restored++
351
+ }
352
+
353
+ log.Printf("backfill: restored=%d dropped=%d remaining=%d", restored, dropped, len(markers)-restored-dropped)
354
+ return nil
355
+ }
356
+
357
+ func (s *Store) CascadeSearch(ctx context.Context, base string, queryVec embed.MatryoshkaVec, k int, exclude []string, cfg CascadeConfig) CascadeResult {
358
+ start := time.Now()
359
+ deadline := start.Add(time.Duration(cfg.BudgetMs) * time.Millisecond)
360
+ exits := make([]TierExit, 0, 3)
361
+
362
+ if time.Now().Before(deadline) {
363
+ t0 := time.Now()
364
+ hits := s.searchVec(ctx, tierCollection(base, embed.DimsL1), queryVec.L1, k, exclude)
365
+ lat := time.Since(t0)
366
+ top := best(hits)
367
+ exits = append(exits, TierExit{Tier: 1, Dims: embed.DimsL1, BestScore: top, Latency: lat})
368
+ if top >= cfg.ExitThresholdL1 {
369
+ exits[len(exits)-1].Exited = true
370
+ return CascadeResult{Hits: hits, TierUsed: 1, Dims: embed.DimsL1, Latency: time.Since(start), Exits: exits}
371
+ }
372
+ }
373
+
374
+ if time.Now().Before(deadline) {
375
+ t0 := time.Now()
376
+ hits := s.searchVec(ctx, tierCollection(base, embed.DimsL2), queryVec.L2, k, exclude)
377
+ lat := time.Since(t0)
378
+ top := best(hits)
379
+ exits = append(exits, TierExit{Tier: 2, Dims: embed.DimsL2, BestScore: top, Latency: lat})
380
+ if top >= cfg.ExitThresholdL2 {
381
+ exits[len(exits)-1].Exited = true
382
+ return CascadeResult{Hits: hits, TierUsed: 2, Dims: embed.DimsL2, Latency: time.Since(start), Exits: exits}
383
+ }
384
+ }
385
+
386
+ t0 := time.Now()
387
+ hits := s.searchVec(ctx, base, queryVec.L3, k, exclude)
388
+ lat := time.Since(t0)
389
+ exits = append(exits, TierExit{Tier: 3, Dims: embed.DimsL3, BestScore: best(hits), Latency: lat, Exited: true})
390
+ return CascadeResult{Hits: hits, TierUsed: 3, Dims: embed.DimsL3, Latency: time.Since(start), Exits: exits}
391
+ }
392
+
393
+ func dirtyID(baseCollection, id string, dims int) string {
394
+ return fmt.Sprintf("%s/%s:%d", baseCollection, id, dims)
395
+ }
396
+
397
+ func tierCollection(base string, dims int) string {
398
+ switch dims {
399
+ case embed.DimsL1:
400
+ return base + ":64d"
401
+ case embed.DimsL2:
402
+ return base + ":256d"
403
+ default:
404
+ return base
405
+ }
406
+ }
407
+
408
+ func (s *Store) markTierDirty(ctx context.Context, base, id string, dims int) {
409
+ zero := make([]float32, dims)
410
+ meta := map[string]any{
411
+ "base_collection": base,
412
+ "record_id": id,
413
+ "dims": dims,
414
+ "created_at": time.Now().UnixMilli(),
415
+ }
416
+ if err := s.InsertRecord(ctx, dirtyTierCollection, dirtyID(base, id, dims), zero, meta); err != nil {
417
+ log.Printf("markTierDirty: failed to mark %s/%s dims=%d: %v", base, id, dims, err)
418
+ }
419
+ }
420
+
421
+ func (s *Store) Delete(_ context.Context, collection, id string) error {
422
+ s.mu.Lock()
423
+ defer s.mu.Unlock()
424
+ if col, ok := s.collections[collection]; ok {
425
+ delete(col, id)
426
+ }
427
+ return nil
428
+ }
429
+
430
+ func (s *Store) DeleteBatch(ctx context.Context, collection string, ids []string) error {
431
+ for _, id := range ids {
432
+ if err := s.Delete(ctx, collection, id); err != nil {
433
+ return err
434
+ }
435
+ }
436
+ return nil
437
+ }
438
+
439
+ func (s *Store) Flush(_ context.Context) error {
440
+ s.mu.RLock()
441
+ snapshot := persistedStore{
442
+ Embedding: profilePtr(s.profile),
443
+ Collections: make(map[string][]persistedRecord, len(s.collections)),
444
+ }
445
+ for collection, records := range s.collections {
446
+ items := make([]persistedRecord, 0, len(records))
447
+ for _, rec := range records {
448
+ items = append(items, persistedRecord{
449
+ ID: rec.ID,
450
+ Text: rec.Text,
451
+ Vector: append([]float32(nil), rec.Vector...),
452
+ Metadata: cloneMeta(rec.Metadata),
453
+ })
454
+ }
455
+ sort.Slice(items, func(i, j int) bool { return items[i].ID < items[j].ID })
456
+ snapshot.Collections[collection] = items
457
+ }
458
+ s.mu.RUnlock()
459
+
460
+ if err := os.MkdirAll(s.path, 0o755); err != nil {
461
+ return err
462
+ }
463
+
464
+ data, err := json.MarshalIndent(snapshot, "", " ")
465
+ if err != nil {
466
+ return err
467
+ }
468
+
469
+ tmpPath := filepath.Join(s.path, "store.json.tmp")
470
+ finalPath := filepath.Join(s.path, "store.json")
471
+ if err := os.WriteFile(tmpPath, data, 0o644); err != nil {
472
+ return err
473
+ }
474
+ return os.Rename(tmpPath, finalPath)
475
+ }
476
+
477
+ func (s *Store) ensureCollectionLocked(collection string) map[string]record {
478
+ col, ok := s.collections[collection]
479
+ if !ok {
480
+ col = make(map[string]record)
481
+ s.collections[collection] = col
482
+ }
483
+ return col
484
+ }
485
+
486
+ func cloneMeta(src map[string]any) map[string]any {
487
+ if src == nil {
488
+ return map[string]any{}
489
+ }
490
+ dst := make(map[string]any, len(src))
491
+ for k, v := range src {
492
+ dst[k] = v
493
+ }
494
+ return dst
495
+ }
496
+
497
+ func matchesMeta(v any, want string) bool {
498
+ switch typed := v.(type) {
499
+ case string:
500
+ return typed == want
501
+ default:
502
+ return fmt.Sprint(typed) == want
503
+ }
504
+ }
505
+
506
+ func metaString(meta map[string]any, key string) string {
507
+ if meta == nil {
508
+ return ""
509
+ }
510
+ value, ok := meta[key]
511
+ if !ok {
512
+ return ""
513
+ }
514
+ if typed, ok := value.(string); ok {
515
+ return typed
516
+ }
517
+ return fmt.Sprint(value)
518
+ }
519
+
520
+ func metaInt(meta map[string]any, key string) int {
521
+ if meta == nil {
522
+ return 0
523
+ }
524
+ value, ok := meta[key]
525
+ if !ok {
526
+ return 0
527
+ }
528
+ switch typed := value.(type) {
529
+ case int:
530
+ return typed
531
+ case int64:
532
+ return int(typed)
533
+ case float64:
534
+ return int(typed)
535
+ default:
536
+ return 0
537
+ }
538
+ }
539
+
540
+ func (s *Store) collectionDimensions(collection string, vec []float32, meta map[string]any) int {
541
+ switch {
542
+ case strings.HasSuffix(collection, ":64d"):
543
+ return embed.DimsL1
544
+ case strings.HasSuffix(collection, ":256d"):
545
+ return embed.DimsL2
546
+ case collection == dirtyTierCollection:
547
+ if dims, ok := meta["dims"].(int); ok && dims > 0 {
548
+ return dims
549
+ }
550
+ return len(vec)
551
+ default:
552
+ return s.profile.Dimensions
553
+ }
554
+ }
555
+
556
+ func (s *Store) searchVec(_ context.Context, collection string, vec []float32, k int, exclude []string) []SearchResult {
557
+ s.mu.RLock()
558
+ defer s.mu.RUnlock()
559
+
560
+ col := s.collections[collection]
561
+ if len(col) == 0 {
562
+ return []SearchResult{}
563
+ }
564
+
565
+ excluded := make(map[string]struct{}, len(exclude))
566
+ for _, id := range exclude {
567
+ excluded[id] = struct{}{}
568
+ }
569
+
570
+ results := make([]SearchResult, 0, len(col))
571
+ for _, rec := range col {
572
+ if _, skip := excluded[rec.ID]; skip {
573
+ continue
574
+ }
575
+ score := cosine(rec.Vector, vec)
576
+ results = append(results, SearchResult{
577
+ ID: rec.ID,
578
+ Score: score,
579
+ Text: rec.Text,
580
+ Metadata: cloneMeta(rec.Metadata),
581
+ })
582
+ }
583
+
584
+ sort.Slice(results, func(i, j int) bool {
585
+ if results[i].Score == results[j].Score {
586
+ return results[i].ID < results[j].ID
587
+ }
588
+ return results[i].Score > results[j].Score
589
+ })
590
+ if k > 0 && len(results) > k {
591
+ results = results[:k]
592
+ }
593
+ return results
594
+ }
595
+
596
+ func best(hits []SearchResult) float64 {
597
+ if len(hits) == 0 {
598
+ return 0
599
+ }
600
+ return hits[0].Score
601
+ }
602
+
603
+ func cosine(a, b []float32) float64 {
604
+ if len(a) == 0 || len(a) != len(b) {
605
+ return 0
606
+ }
607
+
608
+ var dot, normA, normB float64
609
+ for i := range a {
610
+ av := float64(a[i])
611
+ bv := float64(b[i])
612
+ dot += av * bv
613
+ normA += av * av
614
+ normB += bv * bv
615
+ }
616
+ if normA == 0 || normB == 0 {
617
+ return 0
618
+ }
619
+ return dot / (sqrt(normA) * sqrt(normB))
620
+ }
621
+
622
+ func sqrt(v float64) float64 {
623
+ // Newton iteration is enough here and avoids another dependency.
624
+ if v <= 0 {
625
+ return 0
626
+ }
627
+ x := v
628
+ for i := 0; i < 8; i++ {
629
+ x = 0.5 * (x + v/x)
630
+ }
631
+ return x
632
+ }
633
+
634
+ func (s *Store) load() error {
635
+ finalPath := filepath.Join(s.path, "store.json")
636
+ data, err := os.ReadFile(finalPath)
637
+ if err != nil {
638
+ if errors.Is(err, os.ErrNotExist) {
639
+ return nil
640
+ }
641
+ return err
642
+ }
643
+
644
+ var snapshot persistedStore
645
+ if err := json.Unmarshal(data, &snapshot); err != nil {
646
+ return err
647
+ }
648
+ if snapshot.Embedding != nil && s.profile.Fingerprint != "" && snapshot.Embedding.Fingerprint != s.profile.Fingerprint {
649
+ return fmt.Errorf("embedding profile mismatch: store fingerprint %s does not match current fingerprint %s", snapshot.Embedding.Fingerprint, s.profile.Fingerprint)
650
+ }
651
+ if snapshot.Embedding != nil {
652
+ s.profile = *snapshot.Embedding
653
+ }
654
+
655
+ for collection, items := range snapshot.Collections {
656
+ col := make(map[string]record, len(items))
657
+ for _, item := range items {
658
+ col[item.ID] = record{
659
+ ID: item.ID,
660
+ Text: item.Text,
661
+ Vector: append([]float32(nil), item.Vector...),
662
+ Metadata: cloneMeta(item.Metadata),
663
+ }
664
+ }
665
+ s.collections[collection] = col
666
+ }
667
+ return nil
668
+ }
669
+
670
+ func profilePtr(profile embed.Profile) *embed.Profile {
671
+ if profile.Fingerprint == "" {
672
+ return nil
673
+ }
674
+ copyProfile := profile
675
+ return &copyProfile
676
+ }