lancedb-opencode-pro 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -31
- package/dist/config.js +13 -0
- package/dist/index.js +68 -1
- package/dist/store.d.ts +7 -0
- package/dist/store.js +138 -2
- package/dist/types.d.ts +11 -1
- package/package.json +1 -1
- package/LICENSE +0 -21
package/README.md
CHANGED
|
@@ -383,14 +383,14 @@ Add an `injection` block to your sidecar config:
|
|
|
383
383
|
"mode": "fixed",
|
|
384
384
|
"maxMemories": 3,
|
|
385
385
|
"minMemories": 1,
|
|
386
|
-
"budgetTokens":
|
|
386
|
+
"budgetTokens": 4096,
|
|
387
387
|
"maxCharsPerMemory": 1200,
|
|
388
388
|
"summarization": "none",
|
|
389
|
-
"summaryTargetChars":
|
|
389
|
+
"summaryTargetChars": 300,
|
|
390
390
|
"scoreDropTolerance": 0.15,
|
|
391
|
-
"injectionFloor": 0.
|
|
391
|
+
"injectionFloor": 0.2,
|
|
392
392
|
"codeSummarization": {
|
|
393
|
-
"mode": "
|
|
393
|
+
"mode": "smart",
|
|
394
394
|
"preserveStructure": true
|
|
395
395
|
}
|
|
396
396
|
}
|
|
@@ -416,7 +416,7 @@ When `summarization` is set to `truncate` or `extract`, memories are summarized
|
|
|
416
416
|
|
|
417
417
|
The `codeSummarization` config controls how code snippets are processed:
|
|
418
418
|
|
|
419
|
-
- **`mode`**: `"
|
|
419
|
+
- **`mode`**: `"smart"` | `"truncate"` | `"preserve"` (default: `"smart"`)
|
|
420
420
|
- **`preserveStructure`**: When `true`, code truncation attempts to balance brackets and preserve syntactic validity.
|
|
421
421
|
|
|
422
422
|
### Environment Variables
|
|
@@ -474,9 +474,9 @@ For quality-sensitive scenarios where you want to avoid low-relevance memories:
|
|
|
474
474
|
"injection": {
|
|
475
475
|
"mode": "adaptive",
|
|
476
476
|
"maxMemories": 5,
|
|
477
|
-
"minMemories":
|
|
477
|
+
"minMemories": 2,
|
|
478
478
|
"scoreDropTolerance": 0.15,
|
|
479
|
-
"injectionFloor": 0.
|
|
479
|
+
"injectionFloor": 0.2
|
|
480
480
|
}
|
|
481
481
|
}
|
|
482
482
|
```
|
|
@@ -485,7 +485,38 @@ This configuration:
|
|
|
485
485
|
1. Starts with up to 5 candidate memories
|
|
486
486
|
2. Stops adding memories when score drops >15% from the top
|
|
487
487
|
3. Ensures minimum score threshold (floor) prevents low-quality injection
|
|
488
|
-
4. Always includes at least
|
|
488
|
+
4. Always includes at least 2 memories
|
|
489
|
+
|
|
490
|
+
### Example: Adaptive Mode with Auto Summarization
|
|
491
|
+
|
|
492
|
+
Recommended for users who want intelligent memory injection with content-aware summarization:
|
|
493
|
+
|
|
494
|
+
```json
|
|
495
|
+
{
|
|
496
|
+
"injection": {
|
|
497
|
+
"mode": "adaptive",
|
|
498
|
+
"maxMemories": 5,
|
|
499
|
+
"minMemories": 2,
|
|
500
|
+
"budgetTokens": 4096,
|
|
501
|
+
"maxCharsPerMemory": 1200,
|
|
502
|
+
"summarization": "auto",
|
|
503
|
+
"summaryTargetChars": 400,
|
|
504
|
+
"scoreDropTolerance": 0.15,
|
|
505
|
+
"injectionFloor": 0.2,
|
|
506
|
+
"codeSummarization": {
|
|
507
|
+
"mode": "smart",
|
|
508
|
+
"preserveStructure": true
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
```
|
|
513
|
+
|
|
514
|
+
This configuration:
|
|
515
|
+
1. Dynamically adjusts injection count based on relevance scores
|
|
516
|
+
2. Uses content-aware summarization (key sentences for text, smart truncation for code)
|
|
517
|
+
3. Guarantees at least 2 memories are injected
|
|
518
|
+
4. Preserves code structure when truncating
|
|
519
|
+
5. Prevents injection of memories below 0.2 score threshold
|
|
489
520
|
|
|
490
521
|
---
|
|
491
522
|
|
|
@@ -632,7 +663,7 @@ The project provides layered validation workflows that can run locally or inside
|
|
|
632
663
|
| `npm run verify` | Typecheck + build + effectiveness workflow + retrieval (quick release check) |
|
|
633
664
|
| `npm run verify:full` | All of the above + benchmark + `npm pack` (full release gate) |
|
|
634
665
|
|
|
635
|
-
Threshold policy and benchmark profiles are documented in `docs/
|
|
666
|
+
Threshold policy and benchmark profiles are documented in `docs/memory-validation-checklist.md` (Phase 4.4).
|
|
636
667
|
Acceptance evidence mapping and archive/ship gate policy are documented in `docs/release-readiness.md`.
|
|
637
668
|
|
|
638
669
|
## Maintainer Release SOP
|
|
@@ -644,7 +675,7 @@ Use this flow when publishing a new version to npm.
|
|
|
644
675
|
|
|
645
676
|
```bash
|
|
646
677
|
docker compose build --no-cache && docker compose up -d
|
|
647
|
-
docker compose exec
|
|
678
|
+
docker compose exec opencode-dev npm run release:check
|
|
648
679
|
```
|
|
649
680
|
|
|
650
681
|
3. Confirm npm authentication:
|
|
@@ -700,8 +731,8 @@ ls -l dist dist-test/src 2>/dev/null
|
|
|
700
731
|
|
|
701
732
|
```bash
|
|
702
733
|
docker compose build --no-cache && docker compose up -d
|
|
703
|
-
docker compose exec
|
|
704
|
-
docker compose exec
|
|
734
|
+
docker compose exec opencode-dev npm run typecheck
|
|
735
|
+
docker compose exec opencode-dev npm run build
|
|
705
736
|
```
|
|
706
737
|
|
|
707
738
|
### Running validation inside Docker
|
|
@@ -710,16 +741,16 @@ docker compose exec app npm run build
|
|
|
710
741
|
docker compose build --no-cache && docker compose up -d
|
|
711
742
|
|
|
712
743
|
# Quick release check
|
|
713
|
-
docker compose exec
|
|
744
|
+
docker compose exec opencode-dev npm run verify
|
|
714
745
|
|
|
715
746
|
# Full release gate (includes benchmark + pack)
|
|
716
|
-
docker compose exec
|
|
747
|
+
docker compose exec opencode-dev npm run verify:full
|
|
717
748
|
|
|
718
749
|
# Individual workflows
|
|
719
|
-
docker compose exec
|
|
720
|
-
docker compose exec
|
|
721
|
-
docker compose exec
|
|
722
|
-
docker compose exec
|
|
750
|
+
docker compose exec opencode-dev npm run test:foundation
|
|
751
|
+
docker compose exec opencode-dev npm run test:regression
|
|
752
|
+
docker compose exec opencode-dev npm run test:retrieval
|
|
753
|
+
docker compose exec opencode-dev npm run benchmark:latency
|
|
723
754
|
```
|
|
724
755
|
|
|
725
756
|
### Operator verification
|
|
@@ -728,15 +759,15 @@ After running `npm run verify:full`, operators can inspect the following:
|
|
|
728
759
|
|
|
729
760
|
```bash
|
|
730
761
|
# Confirm the packaged build is installable
|
|
731
|
-
docker compose exec
|
|
762
|
+
docker compose exec opencode-dev ls -la lancedb-opencode-pro-*.tgz
|
|
732
763
|
|
|
733
764
|
# Confirm typecheck and build succeeded
|
|
734
|
-
docker compose exec
|
|
735
|
-
docker compose exec
|
|
765
|
+
docker compose exec opencode-dev npm run typecheck
|
|
766
|
+
docker compose exec opencode-dev npm run build
|
|
736
767
|
|
|
737
768
|
# Check resolved default storage path
|
|
738
|
-
docker compose exec
|
|
739
|
-
docker compose exec
|
|
769
|
+
docker compose exec opencode-dev node -e "import('./dist/index.js').then(() => console.log('plugin loaded'))"
|
|
770
|
+
docker compose exec opencode-dev sh -lc 'ls -la ~/.opencode/memory/lancedb 2>/dev/null || echo "No data yet (expected before first use)"'
|
|
740
771
|
```
|
|
741
772
|
|
|
742
773
|
## Long Memory Verification
|
|
@@ -754,14 +785,14 @@ docker compose build --no-cache && docker compose up -d
|
|
|
754
785
|
The E2E script loads `dist/index.js`, so build artifacts must exist first.
|
|
755
786
|
|
|
756
787
|
```bash
|
|
757
|
-
docker compose exec
|
|
758
|
-
docker compose exec
|
|
788
|
+
docker compose exec opencode-dev npm install
|
|
789
|
+
docker compose exec opencode-dev npm run build
|
|
759
790
|
```
|
|
760
791
|
|
|
761
792
|
### 3. Run the built-in end-to-end memory test
|
|
762
793
|
|
|
763
794
|
```bash
|
|
764
|
-
docker compose exec
|
|
795
|
+
docker compose exec opencode-dev npm run test:e2e
|
|
765
796
|
```
|
|
766
797
|
|
|
767
798
|
Expected success output:
|
|
@@ -783,7 +814,7 @@ This verifies all of the following in one run:
|
|
|
783
814
|
The E2E script uses `/tmp/opencode-memory-e2e` as its test database path.
|
|
784
815
|
|
|
785
816
|
```bash
|
|
786
|
-
docker compose exec
|
|
817
|
+
docker compose exec opencode-dev ls -la /tmp/opencode-memory-e2e
|
|
787
818
|
```
|
|
788
819
|
|
|
789
820
|
If files appear in that directory after the E2E run, memory was written to disk instead of only being kept in process memory.
|
|
@@ -799,7 +830,7 @@ When running through the normal plugin config, the default durable storage path
|
|
|
799
830
|
Check it inside the container with:
|
|
800
831
|
|
|
801
832
|
```bash
|
|
802
|
-
docker compose exec
|
|
833
|
+
docker compose exec opencode-dev sh -lc 'ls -la ~/.opencode/memory/lancedb'
|
|
803
834
|
```
|
|
804
835
|
|
|
805
836
|
### 6. Stronger proof: verify retrieval still works after restart
|
|
@@ -808,8 +839,8 @@ Long memory is only convincing if retrieval still works after the runtime is res
|
|
|
808
839
|
|
|
809
840
|
```bash
|
|
810
841
|
docker compose restart app
|
|
811
|
-
docker compose exec
|
|
812
|
-
docker compose exec
|
|
842
|
+
docker compose exec opencode-dev npm run test:e2e
|
|
843
|
+
docker compose exec opencode-dev ls -la /tmp/opencode-memory-e2e
|
|
813
844
|
```
|
|
814
845
|
|
|
815
846
|
If the search step still succeeds after restart and the database files remain present, that is strong evidence that the memory is durable.
|
|
@@ -818,7 +849,7 @@ If the search step still succeeds after restart and the database files remain pr
|
|
|
818
849
|
|
|
819
850
|
Treat the feature as verified only when all of these are true:
|
|
820
851
|
|
|
821
|
-
- `docker compose exec
|
|
852
|
+
- `docker compose exec opencode-dev npm run test:e2e` passes
|
|
822
853
|
- `/tmp/opencode-memory-e2e` contains LanceDB files after the run
|
|
823
854
|
- the memory retrieval step still succeeds after container restart
|
|
824
855
|
- the configured OpenCode storage path exists when running real plugin integration
|
package/dist/config.js
CHANGED
|
@@ -39,6 +39,7 @@ export function resolveMemoryConfig(config, worktree) {
|
|
|
39
39
|
: process.env.LANCEDB_OPENCODE_PRO_EMBEDDING_TIMEOUT_MS;
|
|
40
40
|
const timeoutRaw = timeoutEnv ?? embeddingRaw.timeoutMs;
|
|
41
41
|
const injection = resolveInjectionConfig(raw, process.env);
|
|
42
|
+
const dedup = resolveDedupConfig(raw, process.env);
|
|
42
43
|
const resolvedConfig = {
|
|
43
44
|
provider,
|
|
44
45
|
dbPath,
|
|
@@ -60,6 +61,7 @@ export function resolveMemoryConfig(config, worktree) {
|
|
|
60
61
|
importanceWeight,
|
|
61
62
|
},
|
|
62
63
|
injection,
|
|
64
|
+
dedup,
|
|
63
65
|
includeGlobalScope: toBoolean(process.env.LANCEDB_OPENCODE_PRO_INCLUDE_GLOBAL_SCOPE ?? raw.includeGlobalScope, true),
|
|
64
66
|
globalDetectionThreshold: Math.max(1, Math.floor(toNumber(process.env.LANCEDB_OPENCODE_PRO_GLOBAL_DETECTION_THRESHOLD ?? raw.globalDetectionThreshold, 2))),
|
|
65
67
|
globalDiscountFactor: clamp(toNumber(process.env.LANCEDB_OPENCODE_PRO_GLOBAL_DISCOUNT_FACTOR ?? raw.globalDiscountFactor, 0.7), 0, 1),
|
|
@@ -92,6 +94,13 @@ function resolveCodeTruncationMode(raw) {
|
|
|
92
94
|
return raw;
|
|
93
95
|
return "smart";
|
|
94
96
|
}
|
|
97
|
+
function resolveDedupConfig(raw, env) {
|
|
98
|
+
const dedupRaw = (raw.dedup ?? {});
|
|
99
|
+
const enabled = toBoolean(env.LANCEDB_OPENCODE_PRO_DEDUP_ENABLED ?? dedupRaw.enabled, true);
|
|
100
|
+
const writeThreshold = clamp(toNumber(env.LANCEDB_OPENCODE_PRO_DEDUP_WRITE_THRESHOLD ?? dedupRaw.writeThreshold, 0.92), 0.0, 1.0);
|
|
101
|
+
const consolidateThreshold = clamp(toNumber(env.LANCEDB_OPENCODE_PRO_DEDUP_CONSOLIDATE_THRESHOLD ?? dedupRaw.consolidateThreshold, 0.95), 0.0, 1.0);
|
|
102
|
+
return { enabled, writeThreshold, consolidateThreshold };
|
|
103
|
+
}
|
|
95
104
|
function resolveInjectionConfig(raw, env) {
|
|
96
105
|
const injectionRaw = (raw.injection ?? {});
|
|
97
106
|
const codeSummarizationRaw = (injectionRaw.codeSummarization ?? {});
|
|
@@ -178,6 +187,10 @@ function mergeMemoryConfig(base, override) {
|
|
|
178
187
|
...((override.injection ?? {}).codeSummarization ?? {}),
|
|
179
188
|
},
|
|
180
189
|
},
|
|
190
|
+
dedup: {
|
|
191
|
+
...(base.dedup ?? {}),
|
|
192
|
+
...(override.dedup ?? {}),
|
|
193
|
+
},
|
|
181
194
|
};
|
|
182
195
|
}
|
|
183
196
|
function firstString(...values) {
|
package/dist/index.js
CHANGED
|
@@ -23,6 +23,10 @@ const plugin = async (input) => {
|
|
|
23
23
|
if (event.type === "session.idle" || event.type === "session.compacted") {
|
|
24
24
|
const sessionID = event.properties.sessionID;
|
|
25
25
|
await flushAutoCapture(sessionID, state, input.client);
|
|
26
|
+
if (event.type === "session.compacted" && state.config.dedup.enabled) {
|
|
27
|
+
const activeScope = deriveProjectScope(input.worktree);
|
|
28
|
+
state.store.consolidateDuplicates(activeScope, state.config.dedup.consolidateThreshold).catch(() => { });
|
|
29
|
+
}
|
|
26
30
|
}
|
|
27
31
|
},
|
|
28
32
|
"experimental.text.complete": async (eventInput, eventOutput) => {
|
|
@@ -157,7 +161,9 @@ const plugin = async (input) => {
|
|
|
157
161
|
return results
|
|
158
162
|
.map((item, idx) => {
|
|
159
163
|
const percent = Math.round(item.score * 100);
|
|
160
|
-
|
|
164
|
+
const meta = JSON.parse(item.record.metadataJson || "{}");
|
|
165
|
+
const duplicateMarker = meta.isPotentialDuplicate ? " (duplicate)" : "";
|
|
166
|
+
return `${idx + 1}. [${item.record.id}]${duplicateMarker} (${item.record.scope}) ${item.record.text} [${percent}%]`;
|
|
161
167
|
})
|
|
162
168
|
.join("\n");
|
|
163
169
|
},
|
|
@@ -429,6 +435,45 @@ const plugin = async (input) => {
|
|
|
429
435
|
.join("\n");
|
|
430
436
|
},
|
|
431
437
|
}),
|
|
438
|
+
memory_consolidate: tool({
|
|
439
|
+
description: "Scope-internally merge near-duplicate memories. Use to clean up accumulated duplicates.",
|
|
440
|
+
args: {
|
|
441
|
+
scope: tool.schema.string().optional(),
|
|
442
|
+
confirm: tool.schema.boolean().default(false),
|
|
443
|
+
},
|
|
444
|
+
execute: async (args, context) => {
|
|
445
|
+
await state.ensureInitialized();
|
|
446
|
+
if (!state.initialized)
|
|
447
|
+
return unavailableMessage(state.config.embedding.provider);
|
|
448
|
+
if (!args.confirm) {
|
|
449
|
+
return "Rejected: memory_consolidate requires confirm=true.";
|
|
450
|
+
}
|
|
451
|
+
const targetScope = args.scope ?? deriveProjectScope(context.worktree);
|
|
452
|
+
const result = await state.store.consolidateDuplicates(targetScope, state.config.dedup.consolidateThreshold);
|
|
453
|
+
return JSON.stringify({ scope: targetScope, ...result }, null, 2);
|
|
454
|
+
},
|
|
455
|
+
}),
|
|
456
|
+
memory_consolidate_all: tool({
|
|
457
|
+
description: "Consolidate duplicates across global scope and current project scope. Used by external cron jobs for daily cleanup.",
|
|
458
|
+
args: {
|
|
459
|
+
confirm: tool.schema.boolean().default(false),
|
|
460
|
+
},
|
|
461
|
+
execute: async (args, context) => {
|
|
462
|
+
await state.ensureInitialized();
|
|
463
|
+
if (!state.initialized)
|
|
464
|
+
return unavailableMessage(state.config.embedding.provider);
|
|
465
|
+
if (!args.confirm) {
|
|
466
|
+
return "Rejected: memory_consolidate_all requires confirm=true.";
|
|
467
|
+
}
|
|
468
|
+
const projectScope = deriveProjectScope(context.worktree);
|
|
469
|
+
const globalResult = await state.store.consolidateDuplicates("global", state.config.dedup.consolidateThreshold);
|
|
470
|
+
const projectResult = await state.store.consolidateDuplicates(projectScope, state.config.dedup.consolidateThreshold);
|
|
471
|
+
return JSON.stringify({
|
|
472
|
+
global: { scope: "global", ...globalResult },
|
|
473
|
+
project: { scope: projectScope, ...projectResult },
|
|
474
|
+
}, null, 2);
|
|
475
|
+
},
|
|
476
|
+
}),
|
|
432
477
|
memory_port_plan: tool({
|
|
433
478
|
description: "Plan non-conflicting host ports for compose services and optionally persist reservations",
|
|
434
479
|
args: {
|
|
@@ -638,6 +683,26 @@ async function flushAutoCapture(sessionID, state, client) {
|
|
|
638
683
|
});
|
|
639
684
|
return;
|
|
640
685
|
}
|
|
686
|
+
let isPotentialDuplicate = false;
|
|
687
|
+
let duplicateOf = null;
|
|
688
|
+
if (state.config.dedup.enabled) {
|
|
689
|
+
const similar = await state.store.search({
|
|
690
|
+
query: result.candidate.text,
|
|
691
|
+
queryVector: vector,
|
|
692
|
+
scopes: [activeScope],
|
|
693
|
+
limit: 1,
|
|
694
|
+
vectorWeight: 1.0,
|
|
695
|
+
bm25Weight: 0.0,
|
|
696
|
+
minScore: 0.0,
|
|
697
|
+
rrfK: 60,
|
|
698
|
+
recencyBoost: false,
|
|
699
|
+
globalDiscountFactor: 1.0,
|
|
700
|
+
});
|
|
701
|
+
if (similar.length > 0 && similar[0].score >= state.config.dedup.writeThreshold) {
|
|
702
|
+
isPotentialDuplicate = true;
|
|
703
|
+
duplicateOf = similar[0].record.id;
|
|
704
|
+
}
|
|
705
|
+
}
|
|
641
706
|
const memoryId = generateId();
|
|
642
707
|
await state.store.put({
|
|
643
708
|
id: memoryId,
|
|
@@ -656,6 +721,8 @@ async function flushAutoCapture(sessionID, state, client) {
|
|
|
656
721
|
metadataJson: JSON.stringify({
|
|
657
722
|
source: "auto-capture",
|
|
658
723
|
sessionID,
|
|
724
|
+
isPotentialDuplicate,
|
|
725
|
+
duplicateOf,
|
|
659
726
|
}),
|
|
660
727
|
});
|
|
661
728
|
await recordCaptureEvent(state, {
|
package/dist/store.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { EffectivenessSummary, MemoryEffectivenessEvent, MemoryRecord, SearchResult } from "./types.js";
|
|
2
|
+
export declare function storeFastCosine(a: number[], b: number[], normA: number, normB: number): number;
|
|
2
3
|
export declare class MemoryStore {
|
|
3
4
|
private readonly dbPath;
|
|
4
5
|
private lancedb;
|
|
@@ -32,6 +33,11 @@ export declare class MemoryStore {
|
|
|
32
33
|
clearScope(scope: string): Promise<number>;
|
|
33
34
|
list(scope: string, limit: number): Promise<MemoryRecord[]>;
|
|
34
35
|
pruneScope(scope: string, maxEntries: number): Promise<number>;
|
|
36
|
+
consolidateDuplicates(scope: string, threshold: number): Promise<{
|
|
37
|
+
mergedPairs: number;
|
|
38
|
+
updatedRecords: number;
|
|
39
|
+
skippedRecords: number;
|
|
40
|
+
}>;
|
|
35
41
|
countIncompatibleVectors(scopes: string[], expectedDim: number): Promise<number>;
|
|
36
42
|
private matchesId;
|
|
37
43
|
hasMemory(id: string, scopes: string[]): Promise<boolean>;
|
|
@@ -48,6 +54,7 @@ export declare class MemoryStore {
|
|
|
48
54
|
private requireTable;
|
|
49
55
|
private requireEventTable;
|
|
50
56
|
private readEventsByScopes;
|
|
57
|
+
private readByScopesIncludingMerged;
|
|
51
58
|
private readByScopes;
|
|
52
59
|
private ensureIndexes;
|
|
53
60
|
private ensureMemoriesTableCompatibility;
|
package/dist/store.js
CHANGED
|
@@ -4,6 +4,19 @@ import { tokenize } from "./utils.js";
|
|
|
4
4
|
const TABLE_NAME = "memories";
|
|
5
5
|
const EVENTS_TABLE_NAME = "effectiveness_events";
|
|
6
6
|
const EVENTS_SOURCE_COLUMN = "source";
|
|
7
|
+
// Exported for use by consolidateDuplicates
|
|
8
|
+
export function storeFastCosine(a, b, normA, normB) {
|
|
9
|
+
if (a.length === 0 || b.length === 0 || a.length !== b.length)
|
|
10
|
+
return 0;
|
|
11
|
+
const denom = normA * normB;
|
|
12
|
+
if (denom === 0)
|
|
13
|
+
return 0;
|
|
14
|
+
let dot = 0;
|
|
15
|
+
for (let i = 0; i < a.length; i += 1) {
|
|
16
|
+
dot += a[i] * b[i];
|
|
17
|
+
}
|
|
18
|
+
return dot / denom;
|
|
19
|
+
}
|
|
7
20
|
export class MemoryStore {
|
|
8
21
|
dbPath;
|
|
9
22
|
lancedb = null;
|
|
@@ -209,13 +222,83 @@ export class MemoryStore {
|
|
|
209
222
|
const rows = await this.list(scope, 100000);
|
|
210
223
|
if (rows.length <= maxEntries)
|
|
211
224
|
return 0;
|
|
212
|
-
const
|
|
225
|
+
const flagged = rows.filter((r) => {
|
|
226
|
+
const meta = parseMetadata(r.metadataJson);
|
|
227
|
+
return meta.isPotentialDuplicate === true;
|
|
228
|
+
});
|
|
229
|
+
const unflagged = rows.filter((r) => {
|
|
230
|
+
const meta = parseMetadata(r.metadataJson);
|
|
231
|
+
return meta.isPotentialDuplicate !== true;
|
|
232
|
+
});
|
|
233
|
+
const sortedFlagged = flagged.sort((a, b) => a.timestamp - b.timestamp);
|
|
234
|
+
const sortedUnflagged = unflagged.sort((a, b) => a.timestamp - b.timestamp);
|
|
235
|
+
const toDeleteCount = rows.length - maxEntries;
|
|
236
|
+
const deleteFromFlagged = Math.min(sortedFlagged.length, toDeleteCount);
|
|
237
|
+
const toDelete = [
|
|
238
|
+
...sortedFlagged.slice(0, deleteFromFlagged),
|
|
239
|
+
...sortedUnflagged.slice(0, toDeleteCount - deleteFromFlagged),
|
|
240
|
+
];
|
|
213
241
|
for (const row of toDelete) {
|
|
214
242
|
await this.requireTable().delete(`id = '${escapeSql(row.id)}'`);
|
|
215
243
|
}
|
|
216
244
|
this.invalidateScope(scope);
|
|
217
245
|
return toDelete.length;
|
|
218
246
|
}
|
|
247
|
+
async consolidateDuplicates(scope, threshold) {
|
|
248
|
+
const rows = await this.readByScopesIncludingMerged([scope]);
|
|
249
|
+
if (rows.length === 0) {
|
|
250
|
+
return { mergedPairs: 0, updatedRecords: 0, skippedRecords: 0 };
|
|
251
|
+
}
|
|
252
|
+
let mergedPairs = 0;
|
|
253
|
+
let updatedRecords = 0;
|
|
254
|
+
let skippedRecords = 0;
|
|
255
|
+
const now = Date.now();
|
|
256
|
+
const FIVE_MINUTES_MS = 5 * 60 * 1000;
|
|
257
|
+
const rowsWithNorms = rows.map((row) => ({
|
|
258
|
+
row,
|
|
259
|
+
norm: this.scopeCache.get(scope)?.norms.get(row.id) ?? vecNorm(row.vector),
|
|
260
|
+
}));
|
|
261
|
+
for (let i = 0; i < rowsWithNorms.length; i += 1) {
|
|
262
|
+
const a = rowsWithNorms[i];
|
|
263
|
+
for (let j = i + 1; j < rowsWithNorms.length; j += 1) {
|
|
264
|
+
const b = rowsWithNorms[j];
|
|
265
|
+
const sim = storeFastCosine(a.row.vector, b.row.vector, a.norm, b.norm);
|
|
266
|
+
if (sim < threshold)
|
|
267
|
+
continue;
|
|
268
|
+
const aMeta = parseMetadata(a.row.metadataJson);
|
|
269
|
+
if (aMeta.status === "merged") {
|
|
270
|
+
skippedRecords += 1;
|
|
271
|
+
continue;
|
|
272
|
+
}
|
|
273
|
+
if (a.row.lastRecalled > 0 && now - a.row.lastRecalled < FIVE_MINUTES_MS) {
|
|
274
|
+
skippedRecords += 1;
|
|
275
|
+
continue;
|
|
276
|
+
}
|
|
277
|
+
const older = a.row.timestamp <= b.row.timestamp ? a.row : b.row;
|
|
278
|
+
const newer = a.row.timestamp <= b.row.timestamp ? b.row : a.row;
|
|
279
|
+
const newerMeta = parseMetadata(newer.metadataJson);
|
|
280
|
+
const mergedIntoId = newer.id;
|
|
281
|
+
const updatedOlderMeta = { status: "merged", mergedInto: mergedIntoId };
|
|
282
|
+
await this.requireTable().delete(`id = '${escapeSql(older.id)}'`);
|
|
283
|
+
await this.requireTable().add([{
|
|
284
|
+
...older,
|
|
285
|
+
metadataJson: JSON.stringify({ ...parseMetadata(older.metadataJson), ...updatedOlderMeta }),
|
|
286
|
+
}]);
|
|
287
|
+
const updatedNewerMeta = { ...newerMeta, mergedFrom: older.id };
|
|
288
|
+
await this.requireTable().delete(`id = '${escapeSql(newer.id)}'`);
|
|
289
|
+
await this.requireTable().add([{
|
|
290
|
+
...newer,
|
|
291
|
+
metadataJson: JSON.stringify(updatedNewerMeta),
|
|
292
|
+
}]);
|
|
293
|
+
mergedPairs += 1;
|
|
294
|
+
updatedRecords += 2;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
if (mergedPairs > 0) {
|
|
298
|
+
this.invalidateScope(scope);
|
|
299
|
+
}
|
|
300
|
+
return { mergedPairs, updatedRecords, skippedRecords };
|
|
301
|
+
}
|
|
219
302
|
async countIncompatibleVectors(scopes, expectedDim) {
|
|
220
303
|
const rows = await this.readByScopes(scopes);
|
|
221
304
|
return rows.filter((row) => row.vectorDim !== expectedDim).length;
|
|
@@ -279,6 +362,8 @@ export class MemoryStore {
|
|
|
279
362
|
async summarizeEvents(scope, includeGlobalScope) {
|
|
280
363
|
const scopes = includeGlobalScope && scope !== "global" ? [scope, "global"] : [scope];
|
|
281
364
|
const events = await this.readEventsByScopes(scopes);
|
|
365
|
+
// Read all memories including merged for duplicate counts
|
|
366
|
+
const memories = await this.readByScopesIncludingMerged(scopes);
|
|
282
367
|
const captureSkipReasons = {};
|
|
283
368
|
let captureConsidered = 0;
|
|
284
369
|
let captureStored = 0;
|
|
@@ -343,6 +428,15 @@ export class MemoryStore {
|
|
|
343
428
|
}
|
|
344
429
|
const totalCaptureAttempts = captureStored + captureSkipped;
|
|
345
430
|
const totalUsefulFeedback = feedbackUsefulPositive + feedbackUsefulNegative;
|
|
431
|
+
// Count flagged (isPotentialDuplicate) and consolidated (status=merged) from memories table
|
|
432
|
+
const flaggedCount = memories.filter((r) => {
|
|
433
|
+
const meta = parseMetadata(r.metadataJson);
|
|
434
|
+
return meta.isPotentialDuplicate === true;
|
|
435
|
+
}).length;
|
|
436
|
+
const consolidatedCount = memories.filter((r) => {
|
|
437
|
+
const meta = parseMetadata(r.metadataJson);
|
|
438
|
+
return meta.status === "merged";
|
|
439
|
+
}).length;
|
|
346
440
|
return {
|
|
347
441
|
scope,
|
|
348
442
|
totalEvents: events.length,
|
|
@@ -384,6 +478,10 @@ export class MemoryStore {
|
|
|
384
478
|
falsePositiveRate: captureStored === 0 ? 0 : feedbackWrong / captureStored,
|
|
385
479
|
falseNegativeRate: totalCaptureAttempts === 0 ? 0 : feedbackMissing / totalCaptureAttempts,
|
|
386
480
|
},
|
|
481
|
+
duplicates: {
|
|
482
|
+
flaggedCount,
|
|
483
|
+
consolidatedCount,
|
|
484
|
+
},
|
|
387
485
|
};
|
|
388
486
|
}
|
|
389
487
|
getIndexHealth() {
|
|
@@ -469,7 +567,7 @@ export class MemoryStore {
|
|
|
469
567
|
.map((row) => normalizeEventRow(row))
|
|
470
568
|
.filter((row) => row !== null);
|
|
471
569
|
}
|
|
472
|
-
async
|
|
570
|
+
async readByScopesIncludingMerged(scopes) {
|
|
473
571
|
const table = this.requireTable();
|
|
474
572
|
if (scopes.length === 0)
|
|
475
573
|
return [];
|
|
@@ -499,6 +597,36 @@ export class MemoryStore {
|
|
|
499
597
|
.map((row) => normalizeRow(row))
|
|
500
598
|
.filter((row) => row !== null);
|
|
501
599
|
}
|
|
600
|
+
async readByScopes(scopes) {
|
|
601
|
+
const table = this.requireTable();
|
|
602
|
+
if (scopes.length === 0)
|
|
603
|
+
return [];
|
|
604
|
+
const whereExpr = scopes.map((scope) => `scope = '${escapeSql(scope)}'`).join(" OR ");
|
|
605
|
+
const rows = await table
|
|
606
|
+
.query()
|
|
607
|
+
.where(`(${whereExpr}) AND metadataJson NOT LIKE '%"status":"merged"%'`)
|
|
608
|
+
.select([
|
|
609
|
+
"id",
|
|
610
|
+
"text",
|
|
611
|
+
"vector",
|
|
612
|
+
"category",
|
|
613
|
+
"scope",
|
|
614
|
+
"importance",
|
|
615
|
+
"timestamp",
|
|
616
|
+
"lastRecalled",
|
|
617
|
+
"recallCount",
|
|
618
|
+
"projectCount",
|
|
619
|
+
"schemaVersion",
|
|
620
|
+
"embeddingModel",
|
|
621
|
+
"vectorDim",
|
|
622
|
+
"metadataJson",
|
|
623
|
+
])
|
|
624
|
+
.limit(100000)
|
|
625
|
+
.toArray();
|
|
626
|
+
return rows
|
|
627
|
+
.map((row) => normalizeRow(row))
|
|
628
|
+
.filter((row) => row !== null);
|
|
629
|
+
}
|
|
502
630
|
async ensureIndexes() {
|
|
503
631
|
const table = this.requireTable();
|
|
504
632
|
try {
|
|
@@ -747,3 +875,11 @@ function extractRecalledProjects(metadataJson) {
|
|
|
747
875
|
}
|
|
748
876
|
return new Set();
|
|
749
877
|
}
|
|
878
|
+
function parseMetadata(metadataJson) {
|
|
879
|
+
try {
|
|
880
|
+
return JSON.parse(metadataJson);
|
|
881
|
+
}
|
|
882
|
+
catch {
|
|
883
|
+
return {};
|
|
884
|
+
}
|
|
885
|
+
}
|
package/dist/types.d.ts
CHANGED
|
@@ -16,7 +16,7 @@ export interface SummarizedContent {
|
|
|
16
16
|
}
|
|
17
17
|
export type MemoryCategory = "preference" | "fact" | "decision" | "entity" | "other";
|
|
18
18
|
export type CaptureOutcome = "considered" | "skipped" | "stored";
|
|
19
|
-
export type CaptureSkipReason = "empty-buffer" | "below-min-chars" | "no-positive-signal" | "initialization-unavailable" | "embedding-unavailable" | "empty-embedding";
|
|
19
|
+
export type CaptureSkipReason = "empty-buffer" | "below-min-chars" | "no-positive-signal" | "initialization-unavailable" | "embedding-unavailable" | "empty-embedding" | "duplicate-similarity" | "duplicate-exact";
|
|
20
20
|
export type FeedbackType = "missing" | "wrong" | "useful";
|
|
21
21
|
export type RecallSource = "system-transform" | "manual-search";
|
|
22
22
|
export type MemoryScope = "project" | "global";
|
|
@@ -67,12 +67,18 @@ export interface SummarizationConfig {
|
|
|
67
67
|
preserveComments: boolean;
|
|
68
68
|
preserveImports: boolean;
|
|
69
69
|
}
|
|
70
|
+
export interface DedupConfig {
|
|
71
|
+
enabled: boolean;
|
|
72
|
+
writeThreshold: number;
|
|
73
|
+
consolidateThreshold: number;
|
|
74
|
+
}
|
|
70
75
|
export interface MemoryRuntimeConfig {
|
|
71
76
|
provider: string;
|
|
72
77
|
dbPath: string;
|
|
73
78
|
embedding: EmbeddingConfig;
|
|
74
79
|
retrieval: RetrievalConfig;
|
|
75
80
|
injection: InjectionConfig;
|
|
81
|
+
dedup: DedupConfig;
|
|
76
82
|
includeGlobalScope: boolean;
|
|
77
83
|
globalDetectionThreshold: number;
|
|
78
84
|
globalDiscountFactor: number;
|
|
@@ -180,5 +186,9 @@ export interface EffectivenessSummary {
|
|
|
180
186
|
falsePositiveRate: number;
|
|
181
187
|
falseNegativeRate: number;
|
|
182
188
|
};
|
|
189
|
+
duplicates: {
|
|
190
|
+
flaggedCount: number;
|
|
191
|
+
consolidatedCount: number;
|
|
192
|
+
};
|
|
183
193
|
}
|
|
184
194
|
export {};
|
package/package.json
CHANGED
package/LICENSE
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2026 Jonathan Tsai <tryweb@ichiayi.com>
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|