clawmem 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +12 -25
- package/CLAUDE.md +12 -25
- package/README.md +1 -1
- package/SKILL.md +9 -23
- package/package.json +1 -1
- package/src/entity.ts +179 -31
- package/src/indexer.ts +1 -1
- package/src/intent.ts +3 -3
- package/src/watcher.ts +91 -30
package/AGENTS.md
CHANGED
|
@@ -89,7 +89,7 @@ curl http://host:8090/v1/models
|
|
|
89
89
|
| `CLAWMEM_EMBED_MAX_CHARS` | `6000` | Max chars per embedding input. Default fits EmbeddingGemma (2048 tokens). Set to `1100` for granite-278m (512 tokens). Cloud providers skip truncation. |
|
|
90
90
|
| `CLAWMEM_EMBED_TPM_LIMIT` | `100000` | Tokens-per-minute limit for cloud embedding pacing. Match to your provider tier: Free 100000, Paid 2000000, Premium 50000000. |
|
|
91
91
|
| `CLAWMEM_EMBED_DIMENSIONS` | (none) | Output dimensions for OpenAI `text-embedding-3-*` Matryoshka models (e.g. `512`, `1024`). Only sent when URL contains `openai.com`. |
|
|
92
|
-
| `CLAWMEM_LLM_URL` | `http://localhost:8089` | LLM server for intent, expansion, A-MEM. Falls to `node-llama-cpp` if unset + `NO_LOCAL_MODELS=false`. |
|
|
92
|
+
| `CLAWMEM_LLM_URL` | `http://localhost:8089` | LLM server for intent, expansion, A-MEM, and entity extraction. Falls to `node-llama-cpp` if unset + `NO_LOCAL_MODELS=false`. For better entity extraction quality, point at a 7B+ model or cloud API during `reindex --enrich` (see `docs/internals/entity-resolution.md`). |
|
|
93
93
|
| `CLAWMEM_RERANK_URL` | `http://localhost:8090` | Reranker server. Falls to `node-llama-cpp` if unset + `NO_LOCAL_MODELS=false`. |
|
|
94
94
|
| `CLAWMEM_NO_LOCAL_MODELS` | `false` | Blocks `node-llama-cpp` from auto-downloading GGUF models. Set `true` for remote-only setups. |
|
|
95
95
|
| `CLAWMEM_VAULTS` | (none) | JSON map of vault name → SQLite path for multi-vault mode. E.g. `{"work":"~/.cache/clawmem/work.sqlite"}` |
|
|
@@ -485,7 +485,7 @@ The `memory_relations` table is populated by multiple independent sources:
|
|
|
485
485
|
| Beads `syncBeadsIssues()` | causal, supporting, semantic | `beads_sync` MCP tool or watcher (.beads/ change) | Queries `bd` CLI (Dolt backend). Maps beads deps: blocks→causal, discovered-from→supporting, relates-to→semantic, plus conditional-blocks→causal, caused-by→causal, supersedes→supporting. Metadata: `{origin: "beads"}`. |
|
|
486
486
|
| `buildTemporalBackbone()` | temporal | `build_graphs` MCP tool (manual) | Creation-order edges between all active docs. |
|
|
487
487
|
| `buildSemanticGraph()` | semantic | `build_graphs` MCP tool (manual) | Pure cosine similarity. PK collision: `INSERT OR IGNORE` means A-MEM semantic edges take precedence if they exist first. |
|
|
488
|
-
| Entity co-occurrence graph | entity | A-MEM enrichment (indexing) | LLM entity extraction → canonical
|
|
488
|
+
| Entity co-occurrence graph | entity | A-MEM enrichment (indexing) | LLM entity extraction → quality filters (title/length/blocklist/location validation) → type-agnostic canonical resolution within compatibility buckets (person, org, location, tech=project/service/tool/concept) → `entity_mentions` + `entity_cooccurrences` tables. Entity edges use IDF-based specificity scoring. Feeds ENTITY intent queries and MPFP `[entity, semantic]` patterns. |
|
|
489
489
|
| `consolidated_observations` | supporting | Consolidation worker (background) | 3-tier consolidation: facts → observations → mental models. Observations track `proof_count`, `trend` (STABLE/STRENGTHENING/WEAKENING/STALE), and source links. |
|
|
490
490
|
|
|
491
491
|
**Edge collision:** Both `generateMemoryLinks()` and `buildSemanticGraph()` insert `relation_type='semantic'`. PK is `(source_id, target_id, relation_type)` — first writer wins.
|
|
@@ -555,28 +555,6 @@ User Query → Intent Classification (WHY/WHEN/ENTITY/WHAT)
|
|
|
555
555
|
| `candidateLimit` | Yes (default 30) | No |
|
|
556
556
|
| Best for | Most queries, progressive disclosure | Causal chains spanning multiple docs |
|
|
557
557
|
|
|
558
|
-
## Operational Issue Tracking
|
|
559
|
-
|
|
560
|
-
When encountering tool failures, instruction contradictions, retrieval gaps, or workflow friction that would benefit from a fix:
|
|
561
|
-
|
|
562
|
-
Write to `docs/issues/YYYY-MM-DD-<slug>.md` with: category, severity, what happened, what was expected, context, suggested fix.
|
|
563
|
-
|
|
564
|
-
**File structure:**
|
|
565
|
-
```
|
|
566
|
-
# <title>
|
|
567
|
-
- Category: tool-failure | instruction-gap | workflow-friction | retrieval-gap | inconsistency
|
|
568
|
-
- Severity: critical | high | medium
|
|
569
|
-
- Status: open | resolved
|
|
570
|
-
|
|
571
|
-
## Observed
|
|
572
|
-
## Expected
|
|
573
|
-
## Context
|
|
574
|
-
## Suggested Fix
|
|
575
|
-
```
|
|
576
|
-
|
|
577
|
-
**Triggers:** repeated tool error, instruction that contradicts observed behavior, retrieval consistently missing known content, workflow requiring unnecessary steps.
|
|
578
|
-
|
|
579
|
-
**Do NOT log:** one-off transient errors, user-caused issues, issues already recorded.
|
|
580
558
|
|
|
581
559
|
## Troubleshooting
|
|
582
560
|
|
|
@@ -658,6 +636,15 @@ Symptom: "UserPromptSubmit hook error" on context-surfacing hook (intermittent)
|
|
|
658
636
|
→ Default hook timeout is 8s (since v0.1.1). If you have an older install, re-run
|
|
659
637
|
`clawmem setup hooks`. If persistent, restart the watcher: `systemctl --user restart
|
|
660
638
|
clawmem-watcher.service`. Healthy memory is under 100MB — if 400MB+, restart clears it.
|
|
639
|
+
|
|
640
|
+
Symptom: WSL hangs or becomes unresponsive during long sessions / watcher has 100K+ FDs
|
|
641
|
+
→ Pre-v0.2.3: fs.watch(recursive: true) registered inotify watches on EVERY subdirectory,
|
|
642
|
+
including excluded dirs (gits/, node_modules/, .git/). Broad collection paths like
|
|
643
|
+
~/Projects with 67K subdirs exhausted inotify limits.
|
|
644
|
+
→ v0.2.3 fix: watcher walks dir trees at startup, skips excluded subtrees, watches
|
|
645
|
+
non-excluded dirs individually. 500-dir cap per collection path.
|
|
646
|
+
→ Diagnosis: `ls /proc/$(pgrep -f "clawmem.*watch")/fd | wc -l` — healthy < 15K.
|
|
647
|
+
→ If still high: narrow broad collection paths. See docs/troubleshooting.md for details.
|
|
661
648
|
```
|
|
662
649
|
|
|
663
650
|
## CLI Reference
|
|
@@ -684,7 +671,7 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
|
|
|
684
671
|
## Integration Notes
|
|
685
672
|
|
|
686
673
|
- **Memory nudge (v0.2.0):** Every N prompts (default 15) without a lifecycle MCP tool call (`memory_pin`/`memory_forget`/`memory_snooze`), context-surfacing appends `<vault-nudge>` prompting proactive memory management. Counter resets on lifecycle tool use. Configure via `CLAWMEM_NUDGE_INTERVAL` (0 to disable).
|
|
687
|
-
- **Entity resolution (v0.2.0):** A-MEM enrichment
|
|
674
|
+
- **Entity resolution (v0.2.0+):** A-MEM enrichment extracts named entities via LLM, resolves to canonical forms using FTS5 + Levenshtein fuzzy matching with **type-agnostic compatibility buckets** (person, org, location stay separate; project/service/tool/concept merge freely as "tech" bucket). Quality filters reject title-as-entity, long names, template placeholders, and invalid locations. Entity edges use IDF-based specificity scoring (rare entities create edges; ubiquitous entities alone cannot). See `docs/internals/entity-resolution.md` for customization (extending type vocabulary and buckets).
|
|
688
675
|
- QMD retrieval (BM25, vector, RRF, rerank, query expansion) is forked into ClawMem. Do not call standalone QMD tools.
|
|
689
676
|
- SAME (composite scoring), MAGMA (intent + graph), A-MEM (self-evolving notes) layer on top of QMD substrate.
|
|
690
677
|
- Three `llama-server` instances (embedding, LLM, reranker) on local or remote GPU. Wrapper defaults to `localhost:8088/8089/8090`.
|
package/CLAUDE.md
CHANGED
|
@@ -89,7 +89,7 @@ curl http://host:8090/v1/models
|
|
|
89
89
|
| `CLAWMEM_EMBED_MAX_CHARS` | `6000` | Max chars per embedding input. Default fits EmbeddingGemma (2048 tokens). Set to `1100` for granite-278m (512 tokens). Cloud providers skip truncation. |
|
|
90
90
|
| `CLAWMEM_EMBED_TPM_LIMIT` | `100000` | Tokens-per-minute limit for cloud embedding pacing. Match to your provider tier: Free 100000, Paid 2000000, Premium 50000000. |
|
|
91
91
|
| `CLAWMEM_EMBED_DIMENSIONS` | (none) | Output dimensions for OpenAI `text-embedding-3-*` Matryoshka models (e.g. `512`, `1024`). Only sent when URL contains `openai.com`. |
|
|
92
|
-
| `CLAWMEM_LLM_URL` | `http://localhost:8089` | LLM server for intent, expansion, A-MEM. Falls to `node-llama-cpp` if unset + `NO_LOCAL_MODELS=false`. |
|
|
92
|
+
| `CLAWMEM_LLM_URL` | `http://localhost:8089` | LLM server for intent, expansion, A-MEM, and entity extraction. Falls to `node-llama-cpp` if unset + `NO_LOCAL_MODELS=false`. For better entity extraction quality, point at a 7B+ model or cloud API during `reindex --enrich` (see `docs/internals/entity-resolution.md`). |
|
|
93
93
|
| `CLAWMEM_RERANK_URL` | `http://localhost:8090` | Reranker server. Falls to `node-llama-cpp` if unset + `NO_LOCAL_MODELS=false`. |
|
|
94
94
|
| `CLAWMEM_NO_LOCAL_MODELS` | `false` | Blocks `node-llama-cpp` from auto-downloading GGUF models. Set `true` for remote-only setups. |
|
|
95
95
|
| `CLAWMEM_VAULTS` | (none) | JSON map of vault name → SQLite path for multi-vault mode. E.g. `{"work":"~/.cache/clawmem/work.sqlite"}` |
|
|
@@ -485,7 +485,7 @@ The `memory_relations` table is populated by multiple independent sources:
|
|
|
485
485
|
| Beads `syncBeadsIssues()` | causal, supporting, semantic | `beads_sync` MCP tool or watcher (.beads/ change) | Queries `bd` CLI (Dolt backend). Maps beads deps: blocks→causal, discovered-from→supporting, relates-to→semantic, plus conditional-blocks→causal, caused-by→causal, supersedes→supporting. Metadata: `{origin: "beads"}`. |
|
|
486
486
|
| `buildTemporalBackbone()` | temporal | `build_graphs` MCP tool (manual) | Creation-order edges between all active docs. |
|
|
487
487
|
| `buildSemanticGraph()` | semantic | `build_graphs` MCP tool (manual) | Pure cosine similarity. PK collision: `INSERT OR IGNORE` means A-MEM semantic edges take precedence if they exist first. |
|
|
488
|
-
| Entity co-occurrence graph | entity | A-MEM enrichment (indexing) | LLM entity extraction → canonical
|
|
488
|
+
| Entity co-occurrence graph | entity | A-MEM enrichment (indexing) | LLM entity extraction → quality filters (title/length/blocklist/location validation) → type-agnostic canonical resolution within compatibility buckets (person, org, location, tech=project/service/tool/concept) → `entity_mentions` + `entity_cooccurrences` tables. Entity edges use IDF-based specificity scoring. Feeds ENTITY intent queries and MPFP `[entity, semantic]` patterns. |
|
|
489
489
|
| `consolidated_observations` | supporting | Consolidation worker (background) | 3-tier consolidation: facts → observations → mental models. Observations track `proof_count`, `trend` (STABLE/STRENGTHENING/WEAKENING/STALE), and source links. |
|
|
490
490
|
|
|
491
491
|
**Edge collision:** Both `generateMemoryLinks()` and `buildSemanticGraph()` insert `relation_type='semantic'`. PK is `(source_id, target_id, relation_type)` — first writer wins.
|
|
@@ -555,28 +555,6 @@ User Query → Intent Classification (WHY/WHEN/ENTITY/WHAT)
|
|
|
555
555
|
| `candidateLimit` | Yes (default 30) | No |
|
|
556
556
|
| Best for | Most queries, progressive disclosure | Causal chains spanning multiple docs |
|
|
557
557
|
|
|
558
|
-
## Operational Issue Tracking
|
|
559
|
-
|
|
560
|
-
When encountering tool failures, instruction contradictions, retrieval gaps, or workflow friction that would benefit from a fix:
|
|
561
|
-
|
|
562
|
-
Write to `docs/issues/YYYY-MM-DD-<slug>.md` with: category, severity, what happened, what was expected, context, suggested fix.
|
|
563
|
-
|
|
564
|
-
**File structure:**
|
|
565
|
-
```
|
|
566
|
-
# <title>
|
|
567
|
-
- Category: tool-failure | instruction-gap | workflow-friction | retrieval-gap | inconsistency
|
|
568
|
-
- Severity: critical | high | medium
|
|
569
|
-
- Status: open | resolved
|
|
570
|
-
|
|
571
|
-
## Observed
|
|
572
|
-
## Expected
|
|
573
|
-
## Context
|
|
574
|
-
## Suggested Fix
|
|
575
|
-
```
|
|
576
|
-
|
|
577
|
-
**Triggers:** repeated tool error, instruction that contradicts observed behavior, retrieval consistently missing known content, workflow requiring unnecessary steps.
|
|
578
|
-
|
|
579
|
-
**Do NOT log:** one-off transient errors, user-caused issues, issues already recorded.
|
|
580
558
|
|
|
581
559
|
## Troubleshooting
|
|
582
560
|
|
|
@@ -658,6 +636,15 @@ Symptom: "UserPromptSubmit hook error" on context-surfacing hook (intermittent)
|
|
|
658
636
|
→ Default hook timeout is 8s (since v0.1.1). If you have an older install, re-run
|
|
659
637
|
`clawmem setup hooks`. If persistent, restart the watcher: `systemctl --user restart
|
|
660
638
|
clawmem-watcher.service`. Healthy memory is under 100MB — if 400MB+, restart clears it.
|
|
639
|
+
|
|
640
|
+
Symptom: WSL hangs or becomes unresponsive during long sessions / watcher has 100K+ FDs
|
|
641
|
+
→ Pre-v0.2.3: fs.watch(recursive: true) registered inotify watches on EVERY subdirectory,
|
|
642
|
+
including excluded dirs (gits/, node_modules/, .git/). Broad collection paths like
|
|
643
|
+
~/Projects with 67K subdirs exhausted inotify limits.
|
|
644
|
+
→ v0.2.3 fix: watcher walks dir trees at startup, skips excluded subtrees, watches
|
|
645
|
+
non-excluded dirs individually. 500-dir cap per collection path.
|
|
646
|
+
→ Diagnosis: `ls /proc/$(pgrep -f "clawmem.*watch")/fd | wc -l` — healthy < 15K.
|
|
647
|
+
→ If still high: narrow broad collection paths. See docs/troubleshooting.md for details.
|
|
661
648
|
```
|
|
662
649
|
|
|
663
650
|
## CLI Reference
|
|
@@ -684,7 +671,7 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
|
|
|
684
671
|
## Integration Notes
|
|
685
672
|
|
|
686
673
|
- **Memory nudge (v0.2.0):** Every N prompts (default 15) without a lifecycle MCP tool call (`memory_pin`/`memory_forget`/`memory_snooze`), context-surfacing appends `<vault-nudge>` prompting proactive memory management. Counter resets on lifecycle tool use. Configure via `CLAWMEM_NUDGE_INTERVAL` (0 to disable).
|
|
687
|
-
- **Entity resolution (v0.2.0):** A-MEM enrichment
|
|
674
|
+
- **Entity resolution (v0.2.0+):** A-MEM enrichment extracts named entities via LLM, resolves to canonical forms using FTS5 + Levenshtein fuzzy matching with **type-agnostic compatibility buckets** (person, org, location stay separate; project/service/tool/concept merge freely as "tech" bucket). Quality filters reject title-as-entity, long names, template placeholders, and invalid locations. Entity edges use IDF-based specificity scoring (rare entities create edges; ubiquitous entities alone cannot). See `docs/internals/entity-resolution.md` for customization (extending type vocabulary and buckets).
|
|
688
675
|
- QMD retrieval (BM25, vector, RRF, rerank, query expansion) is forked into ClawMem. Do not call standalone QMD tools.
|
|
689
676
|
- SAME (composite scoring), MAGMA (intent + graph), A-MEM (self-evolving notes) layer on top of QMD substrate.
|
|
690
677
|
- Three `llama-server` instances (embedding, LLM, reranker) on local or remote GPU. Wrapper defaults to `localhost:8088/8089/8090`.
|
package/README.md
CHANGED
|
@@ -44,7 +44,7 @@ Runs fully local with no API keys and no cloud services. Integrates via Claude C
|
|
|
44
44
|
|
|
45
45
|
### v0.2.0 Enhancements
|
|
46
46
|
|
|
47
|
-
- **Entity resolution + co-occurrence graph** — LLM entity extraction
|
|
47
|
+
- **Entity resolution + co-occurrence graph** — LLM entity extraction with quality filters, type-agnostic canonical resolution within [compatibility buckets](docs/internals/entity-resolution.md) (extensible type vocabulary), IDF-based entity edge scoring, co-occurrence tracking, entity graph traversal for ENTITY intent queries
|
|
48
48
|
- **MPFP graph retrieval** — Multi-Path Fact Propagation with meta-path patterns per intent, hop-synchronized edge cache, Forward Push with α=0.15 teleport probability. Replaces single-beam traversal for causal/entity/temporal queries.
|
|
49
49
|
- **Temporal query extraction** — regex-based date range extraction from natural language queries ("last week", "March 2026"), wired as WHERE filters into BM25 and vector search
|
|
50
50
|
- **4-way parallel retrieval** — temporal proximity and entity graph channels added as parallel RRF legs in `query` tool (Tier 3 only), alongside existing BM25 + vector channels
|
package/SKILL.md
CHANGED
|
@@ -642,6 +642,15 @@ Symptom: "UserPromptSubmit hook error" on context-surfacing hook (intermittent)
|
|
|
642
642
|
-> Default hook timeout is 8s (since v0.1.1). If you have an older install, re-run
|
|
643
643
|
`clawmem setup hooks`. If persistent, restart the watcher: `systemctl --user restart
|
|
644
644
|
clawmem-watcher.service`. Healthy memory is under 100MB — if 400MB+, restart clears it.
|
|
645
|
+
|
|
646
|
+
Symptom: WSL hangs or becomes unresponsive during long sessions / watcher has 100K+ FDs
|
|
647
|
+
-> Pre-v0.2.3: fs.watch(recursive: true) registered inotify watches on EVERY subdirectory,
|
|
648
|
+
including excluded dirs (gits/, node_modules/, .git/). Broad collection paths like
|
|
649
|
+
~/Projects with 67K subdirs exhausted inotify limits.
|
|
650
|
+
-> v0.2.3 fix: watcher walks dir trees at startup, skips excluded subtrees, watches
|
|
651
|
+
non-excluded dirs individually. 500-dir cap per collection path.
|
|
652
|
+
-> Diagnosis: `ls /proc/$(pgrep -f "clawmem.*watch")/fd | wc -l` — healthy < 15K.
|
|
653
|
+
-> If still high: narrow broad collection paths. See docs/troubleshooting.md.
|
|
645
654
|
```
|
|
646
655
|
|
|
647
656
|
---
|
|
@@ -677,29 +686,6 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
|
|
|
677
686
|
# Jaccard similarity within same collection
|
|
678
687
|
```
|
|
679
688
|
|
|
680
|
-
---
|
|
681
|
-
|
|
682
|
-
## Operational Issue Tracking
|
|
683
|
-
|
|
684
|
-
When encountering tool failures, instruction contradictions, retrieval gaps, or workflow friction:
|
|
685
|
-
|
|
686
|
-
Write to `docs/issues/YYYY-MM-DD-<slug>.md`:
|
|
687
|
-
|
|
688
|
-
```
|
|
689
|
-
# <title>
|
|
690
|
-
- Category: tool-failure | instruction-gap | workflow-friction | retrieval-gap | inconsistency
|
|
691
|
-
- Severity: critical | high | medium
|
|
692
|
-
- Status: open | resolved
|
|
693
|
-
|
|
694
|
-
## Observed
|
|
695
|
-
## Expected
|
|
696
|
-
## Context
|
|
697
|
-
## Suggested Fix
|
|
698
|
-
```
|
|
699
|
-
|
|
700
|
-
**Triggers:** repeated tool error, instruction contradicting observed behavior, retrieval consistently missing known content.
|
|
701
|
-
|
|
702
|
-
**Do NOT log:** one-off transient errors, user-caused issues, already recorded issues.
|
|
703
689
|
|
|
704
690
|
---
|
|
705
691
|
|
package/package.json
CHANGED
package/src/entity.ts
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import type { Database } from "bun:sqlite";
|
|
11
11
|
import { createHash } from "crypto";
|
|
12
|
-
import type {
|
|
12
|
+
import type { LLM } from "./llm.ts";
|
|
13
13
|
import { extractJsonFromLLM } from "./amem.ts";
|
|
14
14
|
|
|
15
15
|
// =============================================================================
|
|
@@ -77,6 +77,77 @@ function similarityRatio(a: string, b: string): number {
|
|
|
77
77
|
return 1 - levenshtein(a, b) / maxLen;
|
|
78
78
|
}
|
|
79
79
|
|
|
80
|
+
// =============================================================================
|
|
81
|
+
// Quality Filters
|
|
82
|
+
// =============================================================================
|
|
83
|
+
|
|
84
|
+
const ENTITY_BLOCKLIST = new Set([
|
|
85
|
+
'entity name', 'entity type', 'description', 'example',
|
|
86
|
+
'name', 'type', 'value', 'item',
|
|
87
|
+
'exampletool', 'jane smith', // prompt examples the LLM echoes
|
|
88
|
+
]);
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Check if an extracted entity is low quality and should be rejected.
|
|
92
|
+
* Catches: title-as-entity, long names, template placeholders, heading labels.
|
|
93
|
+
*/
|
|
94
|
+
function isLowQualityEntity(name: string, type: string, docTitle: string): boolean {
|
|
95
|
+
const normalized = name.toLowerCase().trim();
|
|
96
|
+
const normalizedTitle = docTitle.toLowerCase().trim();
|
|
97
|
+
|
|
98
|
+
// Exact or near-exact title match (Levenshtein > 0.85)
|
|
99
|
+
if (normalizedTitle.length > 0 && similarityRatio(normalized, normalizedTitle) > 0.85) return true;
|
|
100
|
+
|
|
101
|
+
// Too long — likely a title or sentence fragment
|
|
102
|
+
if (name.length > 60) return true;
|
|
103
|
+
|
|
104
|
+
// Template placeholders / generic words
|
|
105
|
+
if (ENTITY_BLOCKLIST.has(normalized)) return true;
|
|
106
|
+
|
|
107
|
+
// Heading labels (trailing colon)
|
|
108
|
+
if (name.endsWith(':')) return true;
|
|
109
|
+
|
|
110
|
+
// Location low-trust: if type is location, validate it
|
|
111
|
+
if (type === 'location' && !isValidLocation(name)) return true;
|
|
112
|
+
|
|
113
|
+
return false;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Validate that a location entity is actually geographic / infrastructure.
|
|
118
|
+
* Rejects long non-geographic names that the LLM mistyped as location.
|
|
119
|
+
*/
|
|
120
|
+
function isValidLocation(name: string): boolean {
|
|
121
|
+
// IP addresses
|
|
122
|
+
if (/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/.test(name)) return true;
|
|
123
|
+
// VM identifiers (e.g., "VM 202", "VM 200")
|
|
124
|
+
if (/^VM\s+\d+/i.test(name)) return true;
|
|
125
|
+
// Positive-signal only — no length-based or FQDN fallback
|
|
126
|
+
return false;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// =============================================================================
|
|
130
|
+
// Compatibility Buckets (for type-agnostic canonical resolution)
|
|
131
|
+
// =============================================================================
|
|
132
|
+
|
|
133
|
+
// Each bucket contains types that are semantically interchangeable for merging.
|
|
134
|
+
// Types in the same bucket can merge; cross-bucket merges are rejected.
|
|
135
|
+
// The 'tech' bucket captures the common LLM confusion between project/service/tool/concept.
|
|
136
|
+
// Unknown types default to their own isolated bucket (no false merges).
|
|
137
|
+
const ENTITY_BUCKETS: Record<string, string> = {
|
|
138
|
+
person: 'person',
|
|
139
|
+
org: 'org',
|
|
140
|
+
location: 'location',
|
|
141
|
+
project: 'tech',
|
|
142
|
+
service: 'tech',
|
|
143
|
+
tool: 'tech',
|
|
144
|
+
concept: 'tech',
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
function getEntityBucket(type: string): string {
|
|
148
|
+
return ENTITY_BUCKETS[type] ?? type; // unknown types form their own bucket
|
|
149
|
+
}
|
|
150
|
+
|
|
80
151
|
// =============================================================================
|
|
81
152
|
// Entity ID Generation
|
|
82
153
|
// =============================================================================
|
|
@@ -99,7 +170,7 @@ function makeEntityId(name: string, type: string, vault: string = 'default'): st
|
|
|
99
170
|
* Returns a list of (name, type) pairs.
|
|
100
171
|
*/
|
|
101
172
|
export async function extractEntities(
|
|
102
|
-
llm:
|
|
173
|
+
llm: LLM,
|
|
103
174
|
title: string,
|
|
104
175
|
content: string
|
|
105
176
|
): Promise<ExtractedEntity[]> {
|
|
@@ -113,15 +184,16 @@ Content:
|
|
|
113
184
|
${truncated}
|
|
114
185
|
|
|
115
186
|
Return ONLY valid JSON array:
|
|
116
|
-
[
|
|
117
|
-
{"name": "Entity Name", "type": "person|project|service|tool|concept|org|location"}
|
|
118
|
-
]
|
|
187
|
+
[{"name": "...", "type": "person|project|service|tool|concept|org|location"}]
|
|
119
188
|
|
|
120
189
|
Rules:
|
|
121
190
|
- Only include specific, named entities (not generic concepts like "database" or "testing")
|
|
122
191
|
- Normalize names: "VM 202" not "vm202", "ClawMem" not "clawmem"
|
|
123
|
-
-
|
|
192
|
+
- 0-10 entities. Return empty array [] if no specific entities found
|
|
124
193
|
- Include the most specific type for each entity
|
|
194
|
+
- Do NOT extract the document's title as an entity
|
|
195
|
+
- Do NOT extract heading labels, section names, or sentence fragments
|
|
196
|
+
- Only extract entities that could meaningfully appear in OTHER documents
|
|
125
197
|
Return ONLY the JSON array. /no_think`;
|
|
126
198
|
|
|
127
199
|
try {
|
|
@@ -135,7 +207,7 @@ Return ONLY the JSON array. /no_think`;
|
|
|
135
207
|
const parsed = extractJsonFromLLM(result.text) as ExtractedEntity[] | null;
|
|
136
208
|
if (!Array.isArray(parsed)) return [];
|
|
137
209
|
|
|
138
|
-
// Validate and
|
|
210
|
+
// Validate, filter, and quality-check
|
|
139
211
|
return parsed
|
|
140
212
|
.filter(e =>
|
|
141
213
|
typeof e.name === 'string' &&
|
|
@@ -144,7 +216,8 @@ Return ONLY the JSON array. /no_think`;
|
|
|
144
216
|
e.name.length <= 100 &&
|
|
145
217
|
['person', 'project', 'service', 'tool', 'concept', 'org', 'location'].includes(e.type)
|
|
146
218
|
)
|
|
147
|
-
.
|
|
219
|
+
.filter(e => !isLowQualityEntity(e.name, e.type, title))
|
|
220
|
+
.slice(0, 10);
|
|
148
221
|
} catch (err) {
|
|
149
222
|
console.log(`[entity] LLM extraction failed:`, err);
|
|
150
223
|
return [];
|
|
@@ -159,7 +232,13 @@ Return ONLY the JSON array. /no_think`;
|
|
|
159
232
|
* Resolve an entity name to its canonical form.
|
|
160
233
|
* Uses FTS5 candidate lookup + Levenshtein fuzzy matching.
|
|
161
234
|
*
|
|
162
|
-
*
|
|
235
|
+
* Type-agnostic within compatibility buckets:
|
|
236
|
+
* - person: only merges with person
|
|
237
|
+
* - org: only merges with org
|
|
238
|
+
* - location: only merges with location
|
|
239
|
+
* - tech (project/service/tool/concept): merges freely within bucket
|
|
240
|
+
*
|
|
241
|
+
* Scoped per vault to prevent cross-vault false merges.
|
|
163
242
|
*
|
|
164
243
|
* @returns entity_id of canonical match, or null if no match (new entity)
|
|
165
244
|
*/
|
|
@@ -171,34 +250,41 @@ export function resolveEntityCanonical(
|
|
|
171
250
|
threshold: number = 0.75
|
|
172
251
|
): string | null {
|
|
173
252
|
const normalizedName = name.toLowerCase().trim();
|
|
253
|
+
const inputBucket = getEntityBucket(type);
|
|
174
254
|
|
|
175
|
-
//
|
|
255
|
+
// Use lower threshold for person names (enables "Andre (Dre) Konrad" ↔ "Dre Konrad")
|
|
256
|
+
const effectiveThreshold = inputBucket === 'person' ? 0.65 : threshold;
|
|
257
|
+
|
|
258
|
+
// Step 1: FTS5 candidate lookup — type-agnostic, vault-scoped
|
|
176
259
|
let candidates: { entity_id: string; name: string; entity_type: string }[] = [];
|
|
177
260
|
try {
|
|
178
261
|
candidates = db.prepare(`
|
|
179
262
|
SELECT f.entity_id, f.name, f.entity_type
|
|
180
263
|
FROM entities_fts f
|
|
181
264
|
JOIN entity_nodes e ON e.entity_id = f.entity_id
|
|
182
|
-
WHERE entities_fts MATCH ? AND
|
|
265
|
+
WHERE entities_fts MATCH ? AND e.vault = ?
|
|
183
266
|
LIMIT 20
|
|
184
|
-
`).all(normalizedName.split(/\s+/).map(w => `"${w}"`).join(' OR '),
|
|
267
|
+
`).all(normalizedName.split(/\s+/).map(w => `"${w}"`).join(' OR '), vault) as typeof candidates;
|
|
185
268
|
} catch {
|
|
186
269
|
// FTS5 match may fail on special chars — fall back to LIKE on entity_nodes directly
|
|
187
270
|
candidates = db.prepare(`
|
|
188
271
|
SELECT entity_id, name, entity_type
|
|
189
272
|
FROM entity_nodes
|
|
190
|
-
WHERE LOWER(name) LIKE ? AND
|
|
273
|
+
WHERE LOWER(name) LIKE ? AND vault = ?
|
|
191
274
|
LIMIT 20
|
|
192
|
-
`).all(`%${normalizedName}%`,
|
|
275
|
+
`).all(`%${normalizedName}%`, vault) as typeof candidates;
|
|
193
276
|
}
|
|
194
277
|
|
|
195
278
|
if (candidates.length === 0) return null;
|
|
196
279
|
|
|
197
|
-
// Step 2: Fuzzy rank candidates by
|
|
280
|
+
// Step 2: Fuzzy rank candidates, filtering by bucket compatibility
|
|
198
281
|
let bestMatch: { entity_id: string; score: number } | null = null;
|
|
199
282
|
for (const candidate of candidates) {
|
|
283
|
+
// Reject cross-bucket matches (e.g., don't merge "Andrea" person with "Andrea" project)
|
|
284
|
+
if (getEntityBucket(candidate.entity_type) !== inputBucket) continue;
|
|
285
|
+
|
|
200
286
|
const score = similarityRatio(normalizedName, candidate.name.toLowerCase());
|
|
201
|
-
if (score >=
|
|
287
|
+
if (score >= effectiveThreshold && (!bestMatch || score > bestMatch.score)) {
|
|
202
288
|
bestMatch = { entity_id: candidate.entity_id, score };
|
|
203
289
|
}
|
|
204
290
|
}
|
|
@@ -362,7 +448,7 @@ function clearDocEntityState(db: Database, docId: number): void {
|
|
|
362
448
|
*/
|
|
363
449
|
export async function enrichDocumentEntities(
|
|
364
450
|
db: Database,
|
|
365
|
-
llm:
|
|
451
|
+
llm: LLM,
|
|
366
452
|
docId: number,
|
|
367
453
|
vault: string = 'default'
|
|
368
454
|
): Promise<number> {
|
|
@@ -444,8 +530,11 @@ export async function enrichDocumentEntities(
|
|
|
444
530
|
return 0; // Another worker already committed this exact enrichment
|
|
445
531
|
}
|
|
446
532
|
|
|
447
|
-
// Clear old derived state if re-enriching (content changed)
|
|
448
|
-
|
|
533
|
+
// Clear old derived state if re-enriching (content changed or state was externally wiped)
|
|
534
|
+
const hasOldMentions = db.prepare(
|
|
535
|
+
`SELECT 1 FROM entity_mentions WHERE doc_id = ? LIMIT 1`
|
|
536
|
+
).get(docId);
|
|
537
|
+
if (txState || existingState || hasOldMentions) {
|
|
449
538
|
clearDocEntityState(db, docId);
|
|
450
539
|
}
|
|
451
540
|
|
|
@@ -459,33 +548,92 @@ export async function enrichDocumentEntities(
|
|
|
459
548
|
return 0;
|
|
460
549
|
}
|
|
461
550
|
|
|
462
|
-
//
|
|
551
|
+
// Mutate counters using precomputed canonical IDs (no redundant re-resolution)
|
|
463
552
|
const resolvedIds: string[] = [];
|
|
464
553
|
for (const { entity, canonicalId } of resolvedPairs) {
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
554
|
+
// Check if canonical entity already exists
|
|
555
|
+
const existing = db.prepare(
|
|
556
|
+
`SELECT entity_id FROM entity_nodes WHERE entity_id = ?`
|
|
557
|
+
).get(canonicalId) as { entity_id: string } | undefined;
|
|
558
|
+
|
|
559
|
+
if (existing) {
|
|
560
|
+
// Existing canonical — increment count
|
|
561
|
+
db.prepare(`
|
|
562
|
+
UPDATE entity_nodes SET mention_count = mention_count + 1, last_seen = datetime('now')
|
|
563
|
+
WHERE entity_id = ?
|
|
564
|
+
`).run(canonicalId);
|
|
565
|
+
} else {
|
|
566
|
+
// New entity — insert
|
|
567
|
+
db.prepare(`
|
|
568
|
+
INSERT OR IGNORE INTO entity_nodes (entity_id, entity_type, name, description, created_at, mention_count, last_seen, vault)
|
|
569
|
+
VALUES (?, ?, ?, NULL, datetime('now'), 1, datetime('now'), ?)
|
|
570
|
+
`).run(canonicalId, entity.type, entity.name, vault);
|
|
571
|
+
try {
|
|
572
|
+
db.prepare(`
|
|
573
|
+
INSERT OR IGNORE INTO entities_fts (entity_id, name, entity_type)
|
|
574
|
+
VALUES (?, ?, ?)
|
|
575
|
+
`).run(canonicalId, entity.name.toLowerCase(), entity.type);
|
|
576
|
+
} catch { /* FTS insert non-fatal */ }
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
resolvedIds.push(canonicalId);
|
|
580
|
+
recordEntityMention(db, canonicalId, docId, entity.name);
|
|
468
581
|
}
|
|
469
582
|
|
|
470
|
-
// Step 4: Track co-occurrences (
|
|
471
|
-
|
|
583
|
+
// Step 4: Track co-occurrences (deduplicate resolvedIds to prevent self-pairs)
|
|
584
|
+
const uniqueResolvedIds = [...new Set(resolvedIds)];
|
|
585
|
+
trackCoOccurrences(db, uniqueResolvedIds);
|
|
586
|
+
|
|
587
|
+
// Step 5: Create entity edges with IDF-based specificity scoring
|
|
588
|
+
// Rare entities justify edges; ubiquitous entities alone cannot
|
|
589
|
+
const totalDocs = (db.prepare(`SELECT COUNT(*) as cnt FROM documents WHERE active = 1`).get() as { cnt: number }).cnt;
|
|
472
590
|
|
|
473
|
-
//
|
|
591
|
+
// Collect candidate target docs and their shared entities
|
|
592
|
+
const targetEntityMap = new Map<number, string[]>(); // docId → [entityIds]
|
|
474
593
|
for (const entityId of resolvedIds) {
|
|
475
594
|
const otherDocs = db.prepare(`
|
|
476
595
|
SELECT doc_id FROM entity_mentions
|
|
477
596
|
WHERE entity_id = ? AND doc_id != ?
|
|
478
|
-
LIMIT
|
|
597
|
+
LIMIT 20
|
|
479
598
|
`).all(entityId, docId) as { doc_id: number }[];
|
|
480
599
|
|
|
481
600
|
for (const other of otherDocs) {
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
601
|
+
const existing = targetEntityMap.get(other.doc_id) || [];
|
|
602
|
+
existing.push(entityId);
|
|
603
|
+
targetEntityMap.set(other.doc_id, existing);
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
// Compute IDF per entity (cache for this enrichment)
|
|
608
|
+
const entityIdf = new Map<string, number>();
|
|
609
|
+
for (const entityId of resolvedIds) {
|
|
610
|
+
if (!entityIdf.has(entityId)) {
|
|
611
|
+
const docFreq = (db.prepare(
|
|
612
|
+
`SELECT COUNT(DISTINCT doc_id) as cnt FROM entity_mentions WHERE entity_id = ?`
|
|
613
|
+
).get(entityId) as { cnt: number }).cnt;
|
|
614
|
+
entityIdf.set(entityId, Math.log((totalDocs + 1) / (docFreq + 1)));
|
|
486
615
|
}
|
|
487
616
|
}
|
|
488
617
|
|
|
618
|
+
// Create edges only when max entity IDF exceeds threshold
|
|
619
|
+
const idfThreshold = 3.0; // ln-based: filters entities in >5% of docs (e.g., 13+ docs in 262-doc corpus)
|
|
620
|
+
for (const [targetDocId, sharedEntities] of targetEntityMap) {
|
|
621
|
+
const maxIdf = Math.max(...sharedEntities.map(eid => entityIdf.get(eid) || 0));
|
|
622
|
+
if (maxIdf < idfThreshold) continue; // Skip — only ubiquitous entities shared
|
|
623
|
+
|
|
624
|
+
// Weight: IDF specificity + shared-count bonus (multi-entity overlap outranks single)
|
|
625
|
+
const sharedBonus = Math.min(0.15, 0.05 * (sharedEntities.length - 1));
|
|
626
|
+
const weight = Math.min(1.0, 0.3 + 0.12 * maxIdf + sharedBonus);
|
|
627
|
+
const bestEntity = sharedEntities.reduce((best, eid) =>
|
|
628
|
+
(entityIdf.get(eid) || 0) > (entityIdf.get(best) || 0) ? eid : best
|
|
629
|
+
);
|
|
630
|
+
|
|
631
|
+
db.prepare(`
|
|
632
|
+
INSERT OR IGNORE INTO memory_relations (source_id, target_id, relation_type, weight, metadata, created_at)
|
|
633
|
+
VALUES (?, ?, 'entity', ?, ?, datetime('now'))
|
|
634
|
+
`).run(docId, targetDocId, weight, JSON.stringify({ entity: bestEntity, shared: sharedEntities.length }));
|
|
635
|
+
}
|
|
636
|
+
|
|
489
637
|
// Persist enrichment state LAST — only after all derived data written
|
|
490
638
|
db.prepare(`
|
|
491
639
|
INSERT OR REPLACE INTO entity_enrichment_state (doc_id, input_hash, enriched_at)
|
package/src/indexer.ts
CHANGED
package/src/intent.ts
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import type { Database } from "bun:sqlite";
|
|
12
12
|
import { createHash } from "crypto";
|
|
13
|
-
import type {
|
|
13
|
+
import type { LLM } from "./llm.ts";
|
|
14
14
|
|
|
15
15
|
export type IntentType = 'WHY' | 'WHEN' | 'ENTITY' | 'WHAT';
|
|
16
16
|
|
|
@@ -179,7 +179,7 @@ function classifyIntentHeuristic(query: string): IntentResult {
|
|
|
179
179
|
*/
|
|
180
180
|
export async function classifyIntent(
|
|
181
181
|
query: string,
|
|
182
|
-
llm:
|
|
182
|
+
llm: LLM,
|
|
183
183
|
db: Database
|
|
184
184
|
): Promise<IntentResult> {
|
|
185
185
|
// Check cache first (1 hour TTL)
|
|
@@ -268,7 +268,7 @@ export type QueryClause = {
|
|
|
268
268
|
*/
|
|
269
269
|
export async function decomposeQuery(
|
|
270
270
|
query: string,
|
|
271
|
-
llm:
|
|
271
|
+
llm: LLM,
|
|
272
272
|
db: Database,
|
|
273
273
|
sessionContext?: string
|
|
274
274
|
): Promise<QueryClause[]> {
|
package/src/watcher.ts
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* ClawMem File Watcher - fs.watch with debounce for incremental reindex
|
|
3
|
+
*
|
|
4
|
+
* Walks each directory tree at startup, skipping excluded dirs (gits/,
|
|
5
|
+
* node_modules/, .git/, etc.), and watches only non-excluded directories.
|
|
6
|
+
* This prevents inotify FD exhaustion on trees with large cloned repos.
|
|
3
7
|
*/
|
|
4
8
|
|
|
5
|
-
import { watch, type WatchEventType } from "fs";
|
|
6
|
-
import {
|
|
9
|
+
import { watch, readdirSync, statSync, type WatchEventType } from "fs";
|
|
10
|
+
import { join, relative } from "path";
|
|
11
|
+
import { shouldExclude, EXCLUDED_DIRS } from "./indexer.ts";
|
|
7
12
|
|
|
8
13
|
export type WatcherOptions = {
|
|
9
14
|
debounceMs?: number;
|
|
@@ -11,6 +16,42 @@ export type WatcherOptions = {
|
|
|
11
16
|
onError?: (error: Error) => void;
|
|
12
17
|
};
|
|
13
18
|
|
|
19
|
+
/**
|
|
20
|
+
* Walk a directory tree, returning only directories that are NOT excluded.
|
|
21
|
+
* Stops recursion into excluded subtrees (gits/, node_modules/, .git/, etc.).
|
|
22
|
+
*/
|
|
23
|
+
function walkNonExcludedDirs(root: string): string[] {
|
|
24
|
+
const dirs: string[] = [root];
|
|
25
|
+
const queue: string[] = [root];
|
|
26
|
+
|
|
27
|
+
while (queue.length > 0) {
|
|
28
|
+
const current = queue.pop()!;
|
|
29
|
+
let entries: string[];
|
|
30
|
+
try {
|
|
31
|
+
entries = readdirSync(current);
|
|
32
|
+
} catch {
|
|
33
|
+
continue; // Permission denied or deleted
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
for (const entry of entries) {
|
|
37
|
+
// Skip excluded directory names before stat
|
|
38
|
+
if (EXCLUDED_DIRS.has(entry) || (entry.startsWith(".") && entry !== ".")) continue;
|
|
39
|
+
|
|
40
|
+
const fullPath = join(current, entry);
|
|
41
|
+
try {
|
|
42
|
+
if (statSync(fullPath).isDirectory()) {
|
|
43
|
+
dirs.push(fullPath);
|
|
44
|
+
queue.push(fullPath);
|
|
45
|
+
}
|
|
46
|
+
} catch {
|
|
47
|
+
// stat failed — skip
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return dirs;
|
|
53
|
+
}
|
|
54
|
+
|
|
14
55
|
export function startWatcher(
|
|
15
56
|
directories: string[],
|
|
16
57
|
options: WatcherOptions
|
|
@@ -20,34 +61,54 @@ export function startWatcher(
|
|
|
20
61
|
const watchers: ReturnType<typeof watch>[] = [];
|
|
21
62
|
|
|
22
63
|
for (const dir of directories) {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
64
|
+
// Walk the tree, skipping excluded dirs — watch each non-excluded dir individually
|
|
65
|
+
const watchableDirs = walkNonExcludedDirs(dir);
|
|
66
|
+
|
|
67
|
+
// Safety: warn and cap if a single collection path produces too many dirs
|
|
68
|
+
const MAX_WATCH_DIRS = 500;
|
|
69
|
+
if (watchableDirs.length > MAX_WATCH_DIRS) {
|
|
70
|
+
console.log(`[watcher] WARNING: ${dir} has ${watchableDirs.length} dirs — capping at ${MAX_WATCH_DIRS} to prevent FD exhaustion. Consider narrowing the collection path.`);
|
|
71
|
+
watchableDirs.length = MAX_WATCH_DIRS;
|
|
72
|
+
} else {
|
|
73
|
+
console.log(`[watcher] ${dir}: watching ${watchableDirs.length} dirs`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
for (const watchDir of watchableDirs) {
|
|
77
|
+
try {
|
|
78
|
+
// Non-recursive watch — each dir watched individually
|
|
79
|
+
const watcher = watch(watchDir, (event, filename) => {
|
|
80
|
+
if (!filename) return;
|
|
81
|
+
// Accept .md files (indexing) and .jsonl only within .beads/ (Dolt backend)
|
|
82
|
+
const isMd = filename.endsWith(".md");
|
|
83
|
+
const isBeadsJsonl = filename.endsWith(".jsonl") && filename.includes(".beads/");
|
|
84
|
+
if (!isMd && !isBeadsJsonl) return;
|
|
85
|
+
|
|
86
|
+
const relativeToDirRoot = relative(dir, join(watchDir, filename));
|
|
87
|
+
if (shouldExclude(relativeToDirRoot)) return;
|
|
88
|
+
|
|
89
|
+
const fullPath = join(watchDir, filename);
|
|
90
|
+
const existing = pending.get(fullPath);
|
|
91
|
+
if (existing) clearTimeout(existing);
|
|
92
|
+
|
|
93
|
+
pending.set(fullPath, setTimeout(async () => {
|
|
94
|
+
pending.delete(fullPath);
|
|
95
|
+
try {
|
|
96
|
+
await onChanged(fullPath, event);
|
|
97
|
+
} catch (err) {
|
|
98
|
+
onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
99
|
+
}
|
|
100
|
+
}, debounceMs));
|
|
101
|
+
});
|
|
102
|
+
watcher.on("error", (err) => {
|
|
103
|
+
onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
104
|
+
});
|
|
105
|
+
watchers.push(watcher);
|
|
106
|
+
} catch (err) {
|
|
107
|
+
// Individual dir watch failure is non-fatal — skip it
|
|
108
|
+
if (onError) {
|
|
109
|
+
onError(err instanceof Error ? err : new Error(`Failed to watch ${watchDir}: ${err}`));
|
|
110
|
+
}
|
|
111
|
+
}
|
|
51
112
|
}
|
|
52
113
|
}
|
|
53
114
|
|