clawmem 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +19 -2
- package/CLAUDE.md +19 -2
- package/README.md +15 -6
- package/SKILL.md +13 -2
- package/package.json +1 -1
- package/src/clawmem.ts +98 -0
- package/src/hooks/decision-extractor.ts +92 -0
- package/src/hooks/session-bootstrap.ts +84 -29
- package/src/llm.ts +120 -16
- package/src/mcp.ts +148 -0
- package/src/store.ts +120 -1
package/AGENTS.md
CHANGED
|
@@ -307,9 +307,15 @@ All other retrieval is handled by Tier 2 hooks. Do NOT call MCP tools speculativ
|
|
|
307
307
|
4. Chain tracing → find_causal_links(docid, direction="both", depth=5)
|
|
308
308
|
Traverses causal edges between _clawmem/agent/observations/ docs (from decision-extractor).
|
|
309
309
|
|
|
310
|
-
5.
|
|
310
|
+
5. Entity facts → kg_query(entity, as_of?, direction?)
|
|
311
|
+
Structured SPO triples with temporal validity. Different from intent_search:
|
|
312
|
+
- kg_query: "what does ClawMem relate to?" → returns structured facts (subject-predicate-object)
|
|
313
|
+
- intent_search: "why did we choose ClawMem?" → returns documents with causal reasoning
|
|
314
|
+
Use kg_query for entity lookup, intent_search for causal chains.
|
|
311
315
|
|
|
312
|
-
6.
|
|
316
|
+
6. Memory debugging → memory_evolution_status(docid)
|
|
317
|
+
|
|
318
|
+
7. Temporal context → timeline(docid, before=5, after=5, same_collection=false)
|
|
313
319
|
Shows what was created/modified before and after a document.
|
|
314
320
|
Use after search to understand chronological neighborhood.
|
|
315
321
|
```
|
|
@@ -327,6 +333,9 @@ All other retrieval is handled by Tier 2 hooks. Do NOT call MCP tools speculativ
|
|
|
327
333
|
- `timeline(docid, before=5, after=5, same_collection=false)` — temporal neighborhood around a document. Progressive disclosure: search → timeline → get. Supports same-collection scoping and session correlation.
|
|
328
334
|
- `list_vaults()` — show configured vault names and paths. Empty in single-vault mode (default).
|
|
329
335
|
- `vault_sync(vault, content_root, pattern?, collection_name?)` — index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories (`/etc/`, `/root/`, `.ssh`, `.env`, `credentials`, etc.).
|
|
336
|
+
- `kg_query(entity, as_of?, direction?)` — query the SPO knowledge graph for an entity's relationships. Returns temporal triples with validity windows. USE THIS for "what does X relate to?", "what was true about X in January?". Uses entity resolution for lookup.
|
|
337
|
+
- `diary_write(entry, topic?, agent?)` — write a diary entry. USE PROACTIVELY in non-hooked environments (Hermes, Gemini, plain MCP) for recording important events and decisions. Do NOT use in Claude Code (hooks handle this automatically).
|
|
338
|
+
- `diary_read(last_n?, agent?)` — read recent diary entries.
|
|
330
339
|
|
|
331
340
|
### Multi-Vault
|
|
332
341
|
|
|
@@ -355,6 +364,8 @@ Pin, snooze, and forget are **manual MCP tools** — not automated. The agent sh
|
|
|
355
364
|
- Do NOT forget memories to "clean up" — let confidence decay and contradiction detection handle it naturally.
|
|
356
365
|
- Do NOT run `build_graphs` after every reindex — A-MEM creates per-doc links automatically. Only after bulk ingestion or when `intent_search` returns weak graph results.
|
|
357
366
|
- Do NOT run `clawmem mine` autonomously — it is a bulk ingestion command (same category as `update`/`reindex`). Suggest it to the user when they mention old conversation exports, but let them run it. Bulk import has disk/embedding cost implications that need user consent.
|
|
367
|
+
- Do NOT use `diary_write` in Claude Code — hooks (`decision-extractor`, `handoff-generator`) capture this automatically. Diary is for non-hooked environments only (Hermes, Gemini, plain MCP clients).
|
|
368
|
+
- Do NOT use `kg_query` for causal "why" questions — use `intent_search` or `memory_retrieve`. `kg_query` returns structured entity facts (SPO triples), not reasoning chains.
|
|
358
369
|
|
|
359
370
|
## Tool Selection (one-liner)
|
|
360
371
|
|
|
@@ -564,6 +575,12 @@ Symptom: "Local model download blocked" error
|
|
|
564
575
|
→ llama-server endpoint unreachable while CLAWMEM_NO_LOCAL_MODELS=true.
|
|
565
576
|
→ Fix: Start the llama-server instance. Or set CLAWMEM_NO_LOCAL_MODELS=false for in-process fallback.
|
|
566
577
|
|
|
578
|
+
Symptom: "[generate] Remote LLM in cooldown, falling back to in-process generation"
|
|
579
|
+
→ Remote LLM server had a transport failure (ECONNREFUSED/ETIMEDOUT). ClawMem set a 60s cooldown
|
|
580
|
+
and is using local node-llama-cpp. Remote will be retried after cooldown expires.
|
|
581
|
+
→ Not an error if you expect local fallback. If you want remote only: ensure llama-server is running,
|
|
582
|
+
or set CLAWMEM_NO_LOCAL_MODELS=true to get null instead of slow local inference.
|
|
583
|
+
|
|
567
584
|
Symptom: Query expansion always fails / returns garbage
|
|
568
585
|
→ On CPU-only systems, in-process inference is significantly slower and less reliable. Systems with GPU acceleration (Metal/Vulkan) handle these models well in-process.
|
|
569
586
|
→ Fix: Run llama-server on a GPU. Even a low-end NVIDIA card handles 1.7B models.
|
package/CLAUDE.md
CHANGED
|
@@ -307,9 +307,15 @@ All other retrieval is handled by Tier 2 hooks. Do NOT call MCP tools speculativ
|
|
|
307
307
|
4. Chain tracing → find_causal_links(docid, direction="both", depth=5)
|
|
308
308
|
Traverses causal edges between _clawmem/agent/observations/ docs (from decision-extractor).
|
|
309
309
|
|
|
310
|
-
5.
|
|
310
|
+
5. Entity facts → kg_query(entity, as_of?, direction?)
|
|
311
|
+
Structured SPO triples with temporal validity. Different from intent_search:
|
|
312
|
+
- kg_query: "what does ClawMem relate to?" → returns structured facts (subject-predicate-object)
|
|
313
|
+
- intent_search: "why did we choose ClawMem?" → returns documents with causal reasoning
|
|
314
|
+
Use kg_query for entity lookup, intent_search for causal chains.
|
|
311
315
|
|
|
312
|
-
6.
|
|
316
|
+
6. Memory debugging → memory_evolution_status(docid)
|
|
317
|
+
|
|
318
|
+
7. Temporal context → timeline(docid, before=5, after=5, same_collection=false)
|
|
313
319
|
Shows what was created/modified before and after a document.
|
|
314
320
|
Use after search to understand chronological neighborhood.
|
|
315
321
|
```
|
|
@@ -327,6 +333,9 @@ All other retrieval is handled by Tier 2 hooks. Do NOT call MCP tools speculativ
|
|
|
327
333
|
- `timeline(docid, before=5, after=5, same_collection=false)` — temporal neighborhood around a document. Progressive disclosure: search → timeline → get. Supports same-collection scoping and session correlation.
|
|
328
334
|
- `list_vaults()` — show configured vault names and paths. Empty in single-vault mode (default).
|
|
329
335
|
- `vault_sync(vault, content_root, pattern?, collection_name?)` — index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories (`/etc/`, `/root/`, `.ssh`, `.env`, `credentials`, etc.).
|
|
336
|
+
- `kg_query(entity, as_of?, direction?)` — query the SPO knowledge graph for an entity's relationships. Returns temporal triples with validity windows. USE THIS for "what does X relate to?", "what was true about X in January?". Uses entity resolution for lookup.
|
|
337
|
+
- `diary_write(entry, topic?, agent?)` — write a diary entry. USE PROACTIVELY in non-hooked environments (Hermes, Gemini, plain MCP) for recording important events and decisions. Do NOT use in Claude Code (hooks handle this automatically).
|
|
338
|
+
- `diary_read(last_n?, agent?)` — read recent diary entries.
|
|
330
339
|
|
|
331
340
|
### Multi-Vault
|
|
332
341
|
|
|
@@ -355,6 +364,8 @@ Pin, snooze, and forget are **manual MCP tools** — not automated. The agent sh
|
|
|
355
364
|
- Do NOT forget memories to "clean up" — let confidence decay and contradiction detection handle it naturally.
|
|
356
365
|
- Do NOT run `build_graphs` after every reindex — A-MEM creates per-doc links automatically. Only after bulk ingestion or when `intent_search` returns weak graph results.
|
|
357
366
|
- Do NOT run `clawmem mine` autonomously — it is a bulk ingestion command (same category as `update`/`reindex`). Suggest it to the user when they mention old conversation exports, but let them run it. Bulk import has disk/embedding cost implications that need user consent.
|
|
367
|
+
- Do NOT use `diary_write` in Claude Code — hooks (`decision-extractor`, `handoff-generator`) capture this automatically. Diary is for non-hooked environments only (Hermes, Gemini, plain MCP clients).
|
|
368
|
+
- Do NOT use `kg_query` for causal "why" questions — use `intent_search` or `memory_retrieve`. `kg_query` returns structured entity facts (SPO triples), not reasoning chains.
|
|
358
369
|
|
|
359
370
|
## Tool Selection (one-liner)
|
|
360
371
|
|
|
@@ -564,6 +575,12 @@ Symptom: "Local model download blocked" error
|
|
|
564
575
|
→ llama-server endpoint unreachable while CLAWMEM_NO_LOCAL_MODELS=true.
|
|
565
576
|
→ Fix: Start the llama-server instance. Or set CLAWMEM_NO_LOCAL_MODELS=false for in-process fallback.
|
|
566
577
|
|
|
578
|
+
Symptom: "[generate] Remote LLM in cooldown, falling back to in-process generation"
|
|
579
|
+
→ Remote LLM server had a transport failure (ECONNREFUSED/ETIMEDOUT). ClawMem set a 60s cooldown
|
|
580
|
+
and is using local node-llama-cpp. Remote will be retried after cooldown expires.
|
|
581
|
+
→ Not an error if you expect local fallback. If you want remote only: ensure llama-server is running,
|
|
582
|
+
or set CLAWMEM_NO_LOCAL_MODELS=true to get null instead of slow local inference.
|
|
583
|
+
|
|
567
584
|
Symptom: Query expansion always fails / returns garbage
|
|
568
585
|
→ On CPU-only systems, in-process inference is significantly slower and less reliable. Systems with GPU acceleration (Metal/Vulkan) handle these models well in-process.
|
|
569
586
|
→ Fix: Run llama-server on a GPU. Even a low-end NVIDIA card handles 1.7B models.
|
package/README.md
CHANGED
|
@@ -176,7 +176,7 @@ ClawMem integrates via hooks (`settings.json`) and an MCP stdio server. Hooks ha
|
|
|
176
176
|
|
|
177
177
|
```bash
|
|
178
178
|
clawmem setup hooks # Install lifecycle hooks (SessionStart, UserPromptSubmit, Stop, PreCompact)
|
|
179
|
-
clawmem setup mcp # Register MCP server in ~/.claude.json (
|
|
179
|
+
clawmem setup mcp # Register MCP server in ~/.claude.json (31 tools)
|
|
180
180
|
```
|
|
181
181
|
|
|
182
182
|
**Automatic (90%):** `context-surfacing` injects relevant memory on every prompt. `postcompact-inject` re-injects state after compaction. `decision-extractor`, `handoff-generator`, `feedback-loop` capture session state on stop.
|
|
@@ -203,7 +203,7 @@ Disable OpenClaw's native memory and `memory-lancedb` auto-recall/capture to avo
|
|
|
203
203
|
openclaw config set agents.defaults.memorySearch.extraPaths "[]"
|
|
204
204
|
```
|
|
205
205
|
|
|
206
|
-
**Alternative:** OpenClaw agents can also use ClawMem's MCP server directly (`clawmem setup mcp`), with or without hooks. This gives full access to all
|
|
206
|
+
**Alternative:** OpenClaw agents can also use ClawMem's MCP server directly (`clawmem setup mcp`), with or without hooks. This gives full access to all 31 MCP tools but bypasses OpenClaw's ContextEngine lifecycle, so you lose token budget awareness, native compaction orchestration, and the `afterTurn()` message pipeline. The ContextEngine plugin is recommended for new OpenClaw setups; MCP is available as an additional or standalone integration.
|
|
207
207
|
|
|
208
208
|
#### Hermes Agent
|
|
209
209
|
|
|
@@ -310,9 +310,9 @@ ClawMem uses three `llama-server` (llama.cpp) instances for neural inference. Al
|
|
|
310
310
|
| LLM | 8089 | [qmd-query-expansion-1.7B-q4_k_m](https://huggingface.co/tobil/qmd-query-expansion-1.7B-gguf) | ~2.2GB | Intent classification, query expansion, A-MEM |
|
|
311
311
|
| Reranker | 8090 | [qwen3-reranker-0.6B-Q8_0](https://huggingface.co/ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF) | ~1.3GB | Cross-encoder reranking (query, intent_search) |
|
|
312
312
|
|
|
313
|
-
The `bin/clawmem` wrapper defaults to `localhost:8088/8089/8090`. If a server is unreachable, ClawMem
|
|
313
|
+
The `bin/clawmem` wrapper defaults to `localhost:8088/8089/8090`. If a server is unreachable (transport error like ECONNREFUSED/ETIMEDOUT), ClawMem sets a 60-second cooldown and falls back to in-process inference via `node-llama-cpp` (auto-downloads the QMD native models on first use, uses Metal/Vulkan/CPU depending on hardware). HTTP errors (400/500) and user-cancelled requests do not trigger cooldown — the remote server is retried normally on the next call. With GPU acceleration the fallback is fast; on CPU-only it is significantly slower. ClawMem always works either way, but **if you're running dedicated GPU servers, use [systemd services](docs/guides/systemd-services.md) to ensure they stay up**.
|
|
314
314
|
|
|
315
|
-
To prevent
|
|
315
|
+
To prevent fallback and fail fast instead, set `CLAWMEM_NO_LOCAL_MODELS=true`.
|
|
316
316
|
|
|
317
317
|
#### Remote GPU (optional)
|
|
318
318
|
|
|
@@ -473,7 +473,7 @@ llama-server -m Qwen3-Reranker-0.6B-Q8_0.gguf \
|
|
|
473
473
|
|
|
474
474
|
### MCP Server
|
|
475
475
|
|
|
476
|
-
ClawMem exposes
|
|
476
|
+
ClawMem exposes 31 MCP tools via the [Model Context Protocol](https://modelcontextprotocol.io) and an optional HTTP REST API. Any MCP-compatible client or HTTP client can use it.
|
|
477
477
|
|
|
478
478
|
**Claude Code (automatic):**
|
|
479
479
|
|
|
@@ -678,7 +678,7 @@ clawmem doctor Full health check
|
|
|
678
678
|
clawmem status Quick index status
|
|
679
679
|
```
|
|
680
680
|
|
|
681
|
-
## MCP Tools (
|
|
681
|
+
## MCP Tools (31)
|
|
682
682
|
|
|
683
683
|
Registered by `clawmem setup mcp`. Available to any MCP-compatible client.
|
|
684
684
|
|
|
@@ -715,6 +715,7 @@ Registered by `clawmem setup mcp`. Available to any MCP-compatible client.
|
|
|
715
715
|
|---|---|
|
|
716
716
|
| `build_graphs` | Build temporal and/or semantic graphs from document corpus |
|
|
717
717
|
| `find_causal_links` | Trace decision chains: "what led to X", "how we got from A to B". Follow up `intent_search` with this tool on a top result to walk the full causal chain. Traverses causes / caused_by / both up to N hops with depth-annotated reasoning. |
|
|
718
|
+
| `kg_query` | Query the SPO knowledge graph: "what does X relate to?", "what was true about X when?". Returns temporal entity-relationship triples with validity windows. Uses entity resolution for lookup. |
|
|
718
719
|
| `memory_evolution_status` | Show how a document's A-MEM metadata evolved over time |
|
|
719
720
|
| `timeline` | Show the temporal neighborhood around a document — what was created/modified before and after it. Progressive disclosure: search → timeline (context) → get (full content). Supports same-collection scoping and session correlation. |
|
|
720
721
|
|
|
@@ -731,6 +732,13 @@ Registered by `clawmem setup mcp`. Available to any MCP-compatible client.
|
|
|
731
732
|
| `list_vaults` | Show configured vault names and paths. Empty in single-vault mode. |
|
|
732
733
|
| `vault_sync` | Index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories. |
|
|
733
734
|
|
|
735
|
+
### Agent Diary
|
|
736
|
+
|
|
737
|
+
| Tool | Description |
|
|
738
|
+
|---|---|
|
|
739
|
+
| `diary_write` | Write a diary entry. Use for recording important events, decisions, or observations in environments without hook support. Stored as searchable memories. |
|
|
740
|
+
| `diary_read` | Read recent diary entries. Filter by agent name. |
|
|
741
|
+
|
|
734
742
|
### Memory Management & Lifecycle
|
|
735
743
|
|
|
736
744
|
| Tool | Description |
|
|
@@ -1112,6 +1120,7 @@ Built on the shoulders of:
|
|
|
1112
1120
|
- [Hermes Agent](https://github.com/NousResearch/hermes-agent) — MemoryProvider plugin integration, memory nudge system (periodic lifecycle tool prompting)
|
|
1113
1121
|
- [Hindsight](https://github.com/vectorize-io/hindsight) — entity resolution, MPFP graph traversal, temporal extraction, 3-tier consolidation, observation invalidation, 4-way parallel retrieval
|
|
1114
1122
|
- [MAGMA](https://arxiv.org/abs/2501.13956) — multi-graph memory agent
|
|
1123
|
+
- [MemPalace](https://github.com/milla-jovovich/mempalace) — conversation import patterns, broadened observation taxonomy (preference/milestone/problem), session-bootstrap synthesis
|
|
1115
1124
|
- [memory-lancedb-pro](https://github.com/CortexReach/memory-lancedb-pro) — retrieval gate, length normalization, MMR diversity, access reinforcement algorithms
|
|
1116
1125
|
- [OpenViking](https://github.com/volcengine/OpenViking) — query decomposition patterns, collection-scoped retrieval, transaction-safe indexing
|
|
1117
1126
|
- [QMD](https://github.com/tobi/qmd) — search backend (BM25 + vectors + RRF + reranking)
|
package/SKILL.md
CHANGED
|
@@ -242,9 +242,15 @@ Once escalated, route by query type:
|
|
|
242
242
|
4. Chain tracing -> find_causal_links(docid, direction="both", depth=5)
|
|
243
243
|
Traverses causal edges between _clawmem/agent/observations/ docs.
|
|
244
244
|
|
|
245
|
-
5.
|
|
245
|
+
5. Entity facts -> kg_query(entity, as_of?, direction?)
|
|
246
|
+
Structured SPO triples with temporal validity. Different from intent_search:
|
|
247
|
+
- kg_query: "what does ClawMem relate to?" -> returns structured facts (subject-predicate-object)
|
|
248
|
+
- intent_search: "why did we choose ClawMem?" -> returns documents with causal reasoning
|
|
249
|
+
Use kg_query for entity lookup, intent_search for causal chains.
|
|
246
250
|
|
|
247
|
-
6.
|
|
251
|
+
6. Memory debugging -> memory_evolution_status(docid)
|
|
252
|
+
|
|
253
|
+
7. Temporal context -> timeline(docid, before=5, after=5, same_collection=false)
|
|
248
254
|
Shows what was created/modified before and after a document.
|
|
249
255
|
Use after search to understand chronological neighborhood.
|
|
250
256
|
```
|
|
@@ -277,6 +283,9 @@ Once escalated, route by query type:
|
|
|
277
283
|
| `timeline` | Temporal neighborhood around a document — what was modified before/after. Progressive disclosure: search → timeline → get. Supports same-collection scoping and session correlation. |
|
|
278
284
|
| `list_vaults` | Show configured vault names and paths. Empty in single-vault mode. |
|
|
279
285
|
| `vault_sync` | Index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories. |
|
|
286
|
+
| `kg_query` | Query SPO knowledge graph for entity relationships with temporal validity. Uses entity resolution. |
|
|
287
|
+
| `diary_write` | Write diary entry. Use proactively in non-hooked environments. Do NOT use in Claude Code. |
|
|
288
|
+
| `diary_read` | Read recent diary entries. Filter by agent name. |
|
|
280
289
|
| `lifecycle_status` | Document lifecycle statistics: active, archived, forgotten, pinned, snoozed counts and policy summary. |
|
|
281
290
|
| `lifecycle_sweep` | Run lifecycle policies: archive stale docs. Defaults to dry_run (preview only). |
|
|
282
291
|
| `lifecycle_restore` | Restore auto-archived documents. Filter by query, collection, or all. Does NOT restore manually forgotten docs. |
|
|
@@ -567,6 +576,8 @@ When `decision-extractor` detects a new decision contradicting an old one, the o
|
|
|
567
576
|
- Do NOT forget memories to "clean up" — let confidence decay and contradiction detection handle it.
|
|
568
577
|
- Do NOT run `build_graphs` after every reindex — A-MEM creates per-doc links automatically.
|
|
569
578
|
- Do NOT run `clawmem mine` autonomously — it is a bulk ingestion command. Suggest it to the user when they mention old conversation exports, but let them run it.
|
|
579
|
+
- Do NOT use `diary_write` in Claude Code — hooks capture this automatically. Diary is for non-hooked environments only (Hermes, Gemini, plain MCP).
|
|
580
|
+
- Do NOT use `kg_query` for causal "why" questions — use `intent_search` or `memory_retrieve`. `kg_query` returns structured entity facts (SPO triples), not reasoning chains.
|
|
570
581
|
|
|
571
582
|
---
|
|
572
583
|
|
package/package.json
CHANGED
package/src/clawmem.ts
CHANGED
|
@@ -1868,6 +1868,9 @@ async function main() {
|
|
|
1868
1868
|
case "curate":
|
|
1869
1869
|
await cmdCurate(subArgs);
|
|
1870
1870
|
break;
|
|
1871
|
+
case "diary":
|
|
1872
|
+
await cmdDiary(subArgs);
|
|
1873
|
+
break;
|
|
1871
1874
|
case "help":
|
|
1872
1875
|
case "--help":
|
|
1873
1876
|
case "-h":
|
|
@@ -2207,6 +2210,99 @@ interface CuratorReport {
|
|
|
2207
2210
|
actions: string[];
|
|
2208
2211
|
}
|
|
2209
2212
|
|
|
2213
|
+
async function cmdDiary(args: string[]) {
|
|
2214
|
+
const subCmd = args[0];
|
|
2215
|
+
const subArgs = args.slice(1);
|
|
2216
|
+
|
|
2217
|
+
switch (subCmd) {
|
|
2218
|
+
case "write": {
|
|
2219
|
+
const { values, positionals } = parseArgs({
|
|
2220
|
+
args: subArgs,
|
|
2221
|
+
options: {
|
|
2222
|
+
topic: { type: "string", short: "t", default: "general" },
|
|
2223
|
+
agent: { type: "string", short: "a", default: "user" },
|
|
2224
|
+
},
|
|
2225
|
+
allowPositionals: true,
|
|
2226
|
+
});
|
|
2227
|
+
|
|
2228
|
+
const entry = positionals.join(" ");
|
|
2229
|
+
if (!entry) die("Usage: clawmem diary write <entry text> [-t topic] [-a agent-name]");
|
|
2230
|
+
|
|
2231
|
+
const s = getStore();
|
|
2232
|
+
const now = new Date();
|
|
2233
|
+
const dateStr = now.toISOString().slice(0, 10);
|
|
2234
|
+
const timeStr = now.toISOString().slice(11, 19).replace(/:/g, "");
|
|
2235
|
+
const ms = String(now.getMilliseconds()).padStart(3, "0");
|
|
2236
|
+
const diaryPath = `diary/${dateStr}-${timeStr}${ms}-${values.topic}.md`;
|
|
2237
|
+
const body = [
|
|
2238
|
+
"---",
|
|
2239
|
+
`title: "${entry.slice(0, 80).replace(/"/g, '\\"')}"`,
|
|
2240
|
+
`content_type: note`,
|
|
2241
|
+
`tags: [diary, ${values.topic}]`,
|
|
2242
|
+
`domain: "${values.agent}"`,
|
|
2243
|
+
"---",
|
|
2244
|
+
"",
|
|
2245
|
+
entry,
|
|
2246
|
+
].join("\n");
|
|
2247
|
+
|
|
2248
|
+
const result = s.saveMemory({
|
|
2249
|
+
collection: "_clawmem",
|
|
2250
|
+
path: diaryPath,
|
|
2251
|
+
title: entry.slice(0, 80),
|
|
2252
|
+
body,
|
|
2253
|
+
contentType: "note",
|
|
2254
|
+
confidence: 0.7,
|
|
2255
|
+
semanticPayload: `${diaryPath}::${entry}`,
|
|
2256
|
+
});
|
|
2257
|
+
|
|
2258
|
+
console.log(`${c.green}✓${c.reset} Diary entry saved (${result.action}, doc #${result.docId})`);
|
|
2259
|
+
break;
|
|
2260
|
+
}
|
|
2261
|
+
|
|
2262
|
+
case "read": {
|
|
2263
|
+
const { values } = parseArgs({
|
|
2264
|
+
args: subArgs,
|
|
2265
|
+
options: {
|
|
2266
|
+
last: { type: "string", short: "n", default: "10" },
|
|
2267
|
+
agent: { type: "string", short: "a" },
|
|
2268
|
+
},
|
|
2269
|
+
allowPositionals: false,
|
|
2270
|
+
});
|
|
2271
|
+
|
|
2272
|
+
const limit = parseInt(values.last || "10", 10);
|
|
2273
|
+
const s = getStore();
|
|
2274
|
+
|
|
2275
|
+
const rows = s.db.prepare(`
|
|
2276
|
+
SELECT d.id, d.path, d.title, d.modified_at as modifiedAt, d.domain,
|
|
2277
|
+
c.doc as body
|
|
2278
|
+
FROM documents d
|
|
2279
|
+
JOIN content c ON c.hash = d.hash
|
|
2280
|
+
WHERE d.active = 1 AND d.collection = '_clawmem' AND d.path LIKE 'diary/%'
|
|
2281
|
+
${values.agent ? "AND d.domain = ?" : ""}
|
|
2282
|
+
ORDER BY d.modified_at DESC
|
|
2283
|
+
LIMIT ?
|
|
2284
|
+
`).all(...(values.agent ? [values.agent, limit] : [limit])) as any[];
|
|
2285
|
+
|
|
2286
|
+
if (rows.length === 0) {
|
|
2287
|
+
console.log("No diary entries found.");
|
|
2288
|
+
break;
|
|
2289
|
+
}
|
|
2290
|
+
|
|
2291
|
+
console.log(`${c.bold}Diary${c.reset} (${rows.length} entries)\n`);
|
|
2292
|
+
for (const row of rows) {
|
|
2293
|
+
const agent = row.domain ? ` [${row.domain}]` : "";
|
|
2294
|
+
console.log(`${c.dim}${row.modifiedAt.slice(0, 16)}${c.reset}${agent} ${row.title}`);
|
|
2295
|
+
}
|
|
2296
|
+
break;
|
|
2297
|
+
}
|
|
2298
|
+
|
|
2299
|
+
default:
|
|
2300
|
+
console.log(`Usage:
|
|
2301
|
+
clawmem diary write <entry> [-t topic] [-a agent] Write diary entry
|
|
2302
|
+
clawmem diary read [-n limit] [-a agent] Read recent entries`);
|
|
2303
|
+
}
|
|
2304
|
+
}
|
|
2305
|
+
|
|
2210
2306
|
async function cmdCurate(_args: string[]) {
|
|
2211
2307
|
const s = getStore();
|
|
2212
2308
|
const report: CuratorReport = {
|
|
@@ -2422,6 +2518,8 @@ ${c.bold}Intelligence:${c.reset}
|
|
|
2422
2518
|
clawmem reflect [days] Cross-session pattern analysis
|
|
2423
2519
|
clawmem consolidate [--dry-run] Merge duplicate low-confidence docs
|
|
2424
2520
|
clawmem curate Automated maintenance (health, sweep, dedup, hygiene)
|
|
2521
|
+
clawmem diary write <entry> [-t topic] Write a diary entry (for non-hooked environments)
|
|
2522
|
+
clawmem diary read [-n N] [-a agent] Read recent diary entries
|
|
2425
2523
|
|
|
2426
2524
|
${c.bold}Integration:${c.reset}
|
|
2427
2525
|
clawmem mcp Start stdio MCP server
|
|
@@ -374,6 +374,32 @@ export async function decisionExtractor(
|
|
|
374
374
|
console.log(`[decision-extractor] Error in causal inference:`, err);
|
|
375
375
|
}
|
|
376
376
|
}
|
|
377
|
+
|
|
378
|
+
// Extract SPO triples from observation facts (preference/decision types get priority)
|
|
379
|
+
for (const obs of observations) {
|
|
380
|
+
if (!obs.facts || obs.facts.length === 0) continue;
|
|
381
|
+
for (const fact of obs.facts) {
|
|
382
|
+
const triple = extractTripleFromFact(fact, obs.type);
|
|
383
|
+
if (triple) {
|
|
384
|
+
try {
|
|
385
|
+
store.db.prepare(
|
|
386
|
+
"INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
|
|
387
|
+
).run(triple.subjectId, triple.subject, "auto", new Date().toISOString());
|
|
388
|
+
if (triple.objectId) {
|
|
389
|
+
store.db.prepare(
|
|
390
|
+
"INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
|
|
391
|
+
).run(triple.objectId, triple.object, "auto", new Date().toISOString());
|
|
392
|
+
}
|
|
393
|
+
store.addTriple(triple.subjectId, triple.predicate, triple.objectId, triple.objectId ? null : triple.object, {
|
|
394
|
+
confidence: obs.type === "decision" || obs.type === "preference" ? 0.9 : 0.7,
|
|
395
|
+
sourceFact: fact,
|
|
396
|
+
});
|
|
397
|
+
} catch {
|
|
398
|
+
// Triple insertion errors are non-fatal
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
377
403
|
}
|
|
378
404
|
|
|
379
405
|
// Extract decisions (observer-first, regex fallback)
|
|
@@ -663,3 +689,69 @@ function formatObservation(obs: Observation, dateStr: string, sessionId: string)
|
|
|
663
689
|
|
|
664
690
|
return lines.join("\n");
|
|
665
691
|
}
|
|
692
|
+
|
|
693
|
+
// =============================================================================
|
|
694
|
+
// SPO Triple Extraction from Facts
|
|
695
|
+
// =============================================================================
|
|
696
|
+
|
|
697
|
+
type ExtractedTriple = {
|
|
698
|
+
subject: string;
|
|
699
|
+
subjectId: string;
|
|
700
|
+
predicate: string;
|
|
701
|
+
object: string;
|
|
702
|
+
objectId: string | null;
|
|
703
|
+
};
|
|
704
|
+
|
|
705
|
+
function toEntityId(name: string): string {
|
|
706
|
+
return name.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "");
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
function extractTripleFromFact(fact: string, obsType: string): ExtractedTriple | null {
|
|
710
|
+
// Only extract from decision/preference/milestone/problem types — skip noisy bugfix/feature/change facts
|
|
711
|
+
if (!["decision", "preference", "milestone", "problem"].includes(obsType)) return null;
|
|
712
|
+
|
|
713
|
+
// Conservative verb patterns — only clear relational predicates
|
|
714
|
+
const verbPatterns = [
|
|
715
|
+
/^(.+?)\s+(chose|selected|switched to|migrated to|adopted)\s+(.+?)\.?$/i,
|
|
716
|
+
/^(.+?)\s+(deployed to|runs on|hosted on|installed on)\s+(.+?)\.?$/i,
|
|
717
|
+
/^(.+?)\s+(replaced|superseded|deprecated)\s+(.+?)\.?$/i,
|
|
718
|
+
/^(.+?)\s+(depends on|integrates with|connects to)\s+(.+?)\.?$/i,
|
|
719
|
+
];
|
|
720
|
+
|
|
721
|
+
for (const pattern of verbPatterns) {
|
|
722
|
+
const match = fact.match(pattern);
|
|
723
|
+
if (match) {
|
|
724
|
+
const subject = match[1]!.trim();
|
|
725
|
+
const predicate = match[2]!.trim();
|
|
726
|
+
const object = match[3]!.trim();
|
|
727
|
+
|
|
728
|
+
// Reject subjects/objects that look like sentences rather than entity names
|
|
729
|
+
if (subject.length < 3 || object.length < 3 || subject.length > 60 || object.length > 60) continue;
|
|
730
|
+
if (subject.includes(",") || object.includes(",")) continue; // likely a clause, not an entity
|
|
731
|
+
|
|
732
|
+
return {
|
|
733
|
+
subject,
|
|
734
|
+
subjectId: toEntityId(subject),
|
|
735
|
+
predicate: predicate.toLowerCase().replace(/\s+/g, "_"),
|
|
736
|
+
object,
|
|
737
|
+
objectId: toEntityId(object),
|
|
738
|
+
};
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
// Preference facts only: "User prefers X" / "Prefers X"
|
|
743
|
+
if (obsType === "preference") {
|
|
744
|
+
const prefMatch = fact.match(/^(?:user\s+)?(?:prefers?|avoids?)\s+(.+?)\.?$/i);
|
|
745
|
+
if (prefMatch && prefMatch[1]!.trim().length > 2) {
|
|
746
|
+
return {
|
|
747
|
+
subject: "user",
|
|
748
|
+
subjectId: "user",
|
|
749
|
+
predicate: "prefers",
|
|
750
|
+
object: prefMatch[1]!.trim(),
|
|
751
|
+
objectId: null, // literal, not entity
|
|
752
|
+
};
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
return null;
|
|
757
|
+
}
|
|
@@ -78,13 +78,13 @@ export async function sessionBootstrap(
|
|
|
78
78
|
}
|
|
79
79
|
}
|
|
80
80
|
|
|
81
|
-
// 2.
|
|
82
|
-
const
|
|
83
|
-
if (
|
|
84
|
-
const tokens = estimateTokens(
|
|
81
|
+
// 2. Current focus (recent preferences + active problems)
|
|
82
|
+
const focusSection = getCurrentFocus(store, DECISION_TOKEN_BUDGET);
|
|
83
|
+
if (focusSection) {
|
|
84
|
+
const tokens = estimateTokens(focusSection.text);
|
|
85
85
|
if (totalTokens + tokens <= TOTAL_TOKEN_BUDGET) {
|
|
86
|
-
sections.push(
|
|
87
|
-
paths.push(...
|
|
86
|
+
sections.push(focusSection.text);
|
|
87
|
+
paths.push(...focusSection.paths);
|
|
88
88
|
totalTokens += tokens;
|
|
89
89
|
}
|
|
90
90
|
}
|
|
@@ -252,38 +252,90 @@ function extractSection(body: string, sectionName: string): string | null {
|
|
|
252
252
|
return text.length > 10 ? `**${sectionName}:**\n${text}` : null;
|
|
253
253
|
}
|
|
254
254
|
|
|
255
|
-
function
|
|
255
|
+
function getCurrentFocus(
|
|
256
256
|
store: Store,
|
|
257
257
|
maxTokens: number
|
|
258
258
|
): { text: string; paths: string[] } | null {
|
|
259
|
-
const decisions = store.getDocumentsByType("decision", 5);
|
|
260
|
-
if (decisions.length === 0) return null;
|
|
261
|
-
|
|
262
259
|
const cutoff = new Date();
|
|
263
260
|
cutoff.setDate(cutoff.getDate() - DECISION_LOOKBACK_DAYS);
|
|
264
261
|
const cutoffStr = cutoff.toISOString();
|
|
265
262
|
|
|
266
|
-
//
|
|
267
|
-
const
|
|
268
|
-
|
|
263
|
+
// Gather recent decisions, preferences, and active problems
|
|
264
|
+
const decisions = store.getDocumentsByType("decision", 10);
|
|
265
|
+
const preferences = store.getDocumentsByType("preference", 5);
|
|
266
|
+
const problems = store.getDocumentsByType("problem", 5);
|
|
267
|
+
|
|
268
|
+
// Rank by: pinned first, then recency, then access_count
|
|
269
|
+
const now = Date.now();
|
|
270
|
+
const rankDoc = (d: any) => {
|
|
271
|
+
const pinBoost = d.pinned ? 1000 : 0;
|
|
272
|
+
const daysSince = (now - new Date(d.modifiedAt).getTime()) / 86400000;
|
|
273
|
+
const recencyScore = Math.max(0, 100 - daysSince * 5); // 0-100, loses 5 per day
|
|
274
|
+
const accessScore = (d.accessCount ?? 0) * 2;
|
|
275
|
+
return pinBoost + recencyScore + accessScore;
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
const recentDecisions = decisions
|
|
279
|
+
.filter(d => d.modifiedAt >= cutoffStr)
|
|
280
|
+
.sort((a, b) => rankDoc(b) - rankDoc(a));
|
|
281
|
+
|
|
282
|
+
const activeProblems = problems
|
|
283
|
+
.filter(d => d.modifiedAt >= cutoffStr && (d.confidence ?? 0.5) > 0.2);
|
|
284
|
+
|
|
285
|
+
// Preferences are durable — no date filter, just rank
|
|
286
|
+
const rankedPrefs = [...preferences].sort((a, b) => rankDoc(b) - rankDoc(a));
|
|
287
|
+
|
|
288
|
+
if (recentDecisions.length === 0 && rankedPrefs.length === 0 && activeProblems.length === 0) {
|
|
289
|
+
return null;
|
|
290
|
+
}
|
|
269
291
|
|
|
270
292
|
const maxChars = maxTokens * 4;
|
|
271
|
-
const lines: string[] = ["###
|
|
293
|
+
const lines: string[] = ["### Current Focus"];
|
|
272
294
|
const paths: string[] = [];
|
|
273
|
-
let charCount =
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
295
|
+
let charCount = 20;
|
|
296
|
+
|
|
297
|
+
// Active problems first (high priority)
|
|
298
|
+
if (activeProblems.length > 0) {
|
|
299
|
+
lines.push("**Active Problems:**");
|
|
300
|
+
charCount += 22;
|
|
301
|
+
for (const d of activeProblems) {
|
|
302
|
+
if (charCount >= maxChars) break;
|
|
303
|
+
const entry = `- ${d.title} (${d.modifiedAt.slice(0, 10)})`;
|
|
304
|
+
lines.push(entry);
|
|
305
|
+
paths.push(`${d.collection}/${d.path}`);
|
|
306
|
+
charCount += entry.length + 2;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// Recent decisions
|
|
311
|
+
if (recentDecisions.length > 0) {
|
|
312
|
+
lines.push("**Recent Decisions:**");
|
|
313
|
+
charCount += 24;
|
|
314
|
+
for (const d of recentDecisions) {
|
|
315
|
+
if (charCount >= maxChars) break;
|
|
316
|
+
let body = store.getDocumentBody({ filepath: `${d.collection}/${d.path}`, displayPath: `${d.collection}/${d.path}` } as any);
|
|
317
|
+
if (body) body = sanitizeSnippet(body);
|
|
318
|
+
if (body === "[content filtered for security]") continue;
|
|
319
|
+
const snippet = body ? smartTruncate(body, 200) : d.title;
|
|
320
|
+
const entry = `- **${d.title}** (${d.modifiedAt.slice(0, 10)})\n ${snippet}`;
|
|
321
|
+
if (charCount + entry.length > maxChars && lines.length > 2) break;
|
|
322
|
+
lines.push(entry);
|
|
323
|
+
paths.push(`${d.collection}/${d.path}`);
|
|
324
|
+
charCount += entry.length;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// User preferences (compact — title only, they're durable context)
|
|
329
|
+
if (rankedPrefs.length > 0) {
|
|
330
|
+
lines.push("**Preferences:**");
|
|
331
|
+
charCount += 18;
|
|
332
|
+
for (const d of rankedPrefs) {
|
|
333
|
+
if (charCount >= maxChars) break;
|
|
334
|
+
const entry = `- ${d.title}`;
|
|
335
|
+
lines.push(entry);
|
|
336
|
+
paths.push(`${d.collection}/${d.path}`);
|
|
337
|
+
charCount += entry.length + 2;
|
|
338
|
+
}
|
|
287
339
|
}
|
|
288
340
|
|
|
289
341
|
return lines.length > 1 ? { text: lines.join("\n"), paths } : null;
|
|
@@ -299,12 +351,15 @@ function getStaleNotes(
|
|
|
299
351
|
|
|
300
352
|
if (stale.length === 0) return null;
|
|
301
353
|
|
|
354
|
+
// Rank by confidence descending — higher confidence notes are more important to review
|
|
355
|
+
const ranked = [...stale].sort((a, b) => (b.confidence ?? 0.5) - (a.confidence ?? 0.5));
|
|
356
|
+
|
|
302
357
|
const maxChars = maxTokens * 4;
|
|
303
358
|
const lines: string[] = ["### Notes to Review"];
|
|
304
359
|
const paths: string[] = [];
|
|
305
360
|
let charCount = 25;
|
|
306
361
|
|
|
307
|
-
for (const d of
|
|
362
|
+
for (const d of ranked.slice(0, 5)) {
|
|
308
363
|
const entry = `- ${d.title} (${d.collection}/${d.path}) — last modified ${d.modifiedAt.slice(0, 10)}`;
|
|
309
364
|
if (charCount + entry.length > maxChars && lines.length > 1) break;
|
|
310
365
|
lines.push(entry);
|
package/src/llm.ts
CHANGED
|
@@ -290,6 +290,12 @@ export class LlamaCpp implements LLM {
|
|
|
290
290
|
// Track disposal state to prevent double-dispose
|
|
291
291
|
private disposed = false;
|
|
292
292
|
|
|
293
|
+
// Cooldown-based down-cache for remote services.
|
|
294
|
+
// Timestamps (ms since epoch) until which we skip remote and use local fallback.
|
|
295
|
+
// Resets after cooldown expires — one network hiccup doesn't permanently disable GPU.
|
|
296
|
+
private remoteEmbedDownUntil = 0;
|
|
297
|
+
private remoteLlmDownUntil = 0;
|
|
298
|
+
private static readonly REMOTE_COOLDOWN_MS = 60_000; // 60s cooldown on transport failure
|
|
293
299
|
|
|
294
300
|
constructor(config: LlamaCppConfig = {}) {
|
|
295
301
|
this.embedModelUri = config.embedModel || DEFAULT_EMBED_MODEL;
|
|
@@ -563,14 +569,19 @@ export class LlamaCpp implements LLM {
|
|
|
563
569
|
|
|
564
570
|
async embed(text: string, options: EmbedOptions = {}): Promise<EmbeddingResult | null> {
|
|
565
571
|
// Remote server or cloud API — preferred path
|
|
566
|
-
if (this.remoteEmbedUrl) {
|
|
572
|
+
if (this.remoteEmbedUrl && !this.isRemoteEmbedDown()) {
|
|
567
573
|
const extraParams = this.getCloudEmbedParams(!!options.isQuery);
|
|
568
574
|
const result = await this.embedRemote(text, extraParams);
|
|
569
575
|
if (result) return result;
|
|
570
576
|
// Cloud providers don't fall back — if API key is set, the user chose cloud
|
|
571
577
|
if (this.isCloudEmbedding()) return null;
|
|
572
|
-
//
|
|
573
|
-
|
|
578
|
+
// Transport failure already set cooldown in embedRemote — fall through
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// Remote is in cooldown or was never configured — try local fallback
|
|
582
|
+
if (this.remoteEmbedUrl && this.isRemoteEmbedDown()) {
|
|
583
|
+
if (process.env.CLAWMEM_NO_LOCAL_MODELS === "true") return null;
|
|
584
|
+
console.error("[embed] Remote embed in cooldown, using in-process fallback");
|
|
574
585
|
}
|
|
575
586
|
|
|
576
587
|
// In-process fallback via node-llama-cpp (auto-downloads EmbeddingGemma on first use)
|
|
@@ -586,15 +597,20 @@ export class LlamaCpp implements LLM {
|
|
|
586
597
|
if (texts.length === 0) return [];
|
|
587
598
|
|
|
588
599
|
// Remote server or cloud API
|
|
589
|
-
if (this.remoteEmbedUrl) {
|
|
600
|
+
if (this.remoteEmbedUrl && !this.isRemoteEmbedDown()) {
|
|
590
601
|
const extraParams = this.getCloudEmbedParams(false);
|
|
591
602
|
const results = await this.embedRemoteBatch(texts, extraParams);
|
|
592
603
|
// If we got at least one result, remote is working
|
|
593
604
|
if (results.some(r => r !== null)) return results;
|
|
594
605
|
// Cloud providers don't fall back
|
|
595
606
|
if (this.isCloudEmbedding()) return results;
|
|
596
|
-
//
|
|
597
|
-
|
|
607
|
+
// Transport failure already set cooldown in embedRemoteBatch — fall through
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
// Remote is in cooldown or was never configured — try local fallback
|
|
611
|
+
if (this.remoteEmbedUrl && this.isRemoteEmbedDown()) {
|
|
612
|
+
if (process.env.CLAWMEM_NO_LOCAL_MODELS === "true") return texts.map(() => null);
|
|
613
|
+
console.error("[embed] Remote embed in cooldown, using in-process fallback");
|
|
598
614
|
}
|
|
599
615
|
|
|
600
616
|
// In-process fallback via node-llama-cpp
|
|
@@ -645,6 +661,46 @@ export class LlamaCpp implements LLM {
|
|
|
645
661
|
return text.slice(0, this.maxRemoteEmbedChars);
|
|
646
662
|
}
|
|
647
663
|
|
|
664
|
+
// ---------- Remote failure classification ----------
|
|
665
|
+
|
|
666
|
+
/**
|
|
667
|
+
* Classify whether an error is a transport failure (server unreachable)
|
|
668
|
+
* vs an HTTP error (server received request but rejected it) or abort.
|
|
669
|
+
* Only transport failures should trigger the down-cache cooldown.
|
|
670
|
+
*/
|
|
671
|
+
private isTransportError(error: unknown): boolean {
|
|
672
|
+
if (error instanceof TypeError && String(error.message).includes("fetch")) return true; // fetch network error
|
|
673
|
+
const code = (error as any)?.code || (error as any)?.cause?.code;
|
|
674
|
+
if (code === "ECONNREFUSED" || code === "ETIMEDOUT" || code === "ENOTFOUND" ||
|
|
675
|
+
code === "EHOSTUNREACH" || code === "ENETUNREACH" || code === "ECONNRESET" ||
|
|
676
|
+
code === "UND_ERR_CONNECT_TIMEOUT") return true;
|
|
677
|
+
const msg = String((error as any)?.message || "").toLowerCase();
|
|
678
|
+
if (msg.includes("econnrefused") || msg.includes("etimedout") || msg.includes("enotfound") ||
|
|
679
|
+
msg.includes("ehostunreach") || msg.includes("enetunreach")) return true;
|
|
680
|
+
return false;
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
private isAbortError(error: unknown): boolean {
|
|
684
|
+
return (error instanceof DOMException && error.name === "AbortError") ||
|
|
685
|
+
(error as any)?.name === "AbortError";
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
private isRemoteLlmDown(): boolean {
|
|
689
|
+
return Date.now() < this.remoteLlmDownUntil;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
private isRemoteEmbedDown(): boolean {
|
|
693
|
+
return Date.now() < this.remoteEmbedDownUntil;
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
private markRemoteLlmDown(): void {
|
|
697
|
+
this.remoteLlmDownUntil = Date.now() + LlamaCpp.REMOTE_COOLDOWN_MS;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
private markRemoteEmbedDown(): void {
|
|
701
|
+
this.remoteEmbedDownUntil = Date.now() + LlamaCpp.REMOTE_COOLDOWN_MS;
|
|
702
|
+
}
|
|
703
|
+
|
|
648
704
|
// ---------- Remote embedding (GPU server or cloud API via /v1/embeddings) ----------
|
|
649
705
|
|
|
650
706
|
// Default: 6000 chars for EmbeddingGemma-300M (2048-token context).
|
|
@@ -712,6 +768,7 @@ export class LlamaCpp implements LLM {
|
|
|
712
768
|
}
|
|
713
769
|
|
|
714
770
|
private async embedRemote(text: string, extraParams: Record<string, unknown> = {}, retries = 5): Promise<EmbeddingResult | null> {
|
|
771
|
+
if (this.isRemoteEmbedDown()) return null;
|
|
715
772
|
const input = this.truncateForEmbed(text);
|
|
716
773
|
for (let attempt = 0; attempt < retries; attempt++) {
|
|
717
774
|
try {
|
|
@@ -741,11 +798,16 @@ export class LlamaCpp implements LLM {
|
|
|
741
798
|
model: data.model || this.remoteEmbedUrl!,
|
|
742
799
|
};
|
|
743
800
|
} catch (error) {
|
|
744
|
-
|
|
801
|
+
if (this.isTransportError(error)) {
|
|
802
|
+
console.error("[embed] Remote embed server unreachable, cooldown 60s");
|
|
803
|
+
this.markRemoteEmbedDown();
|
|
804
|
+
} else {
|
|
805
|
+
console.error("[embed] Remote embed error:", error);
|
|
806
|
+
}
|
|
745
807
|
return null;
|
|
746
808
|
}
|
|
747
809
|
}
|
|
748
|
-
console.error("Remote embed: max retries exceeded (rate limit)");
|
|
810
|
+
console.error("[embed] Remote embed: max retries exceeded (rate limit)");
|
|
749
811
|
return null;
|
|
750
812
|
}
|
|
751
813
|
|
|
@@ -753,6 +815,7 @@ export class LlamaCpp implements LLM {
|
|
|
753
815
|
lastBatchTokens = 0;
|
|
754
816
|
|
|
755
817
|
private async embedRemoteBatch(texts: string[], extraParams: Record<string, unknown> = {}, retries = 3): Promise<(EmbeddingResult | null)[]> {
|
|
818
|
+
if (this.isRemoteEmbedDown()) return texts.map(() => null);
|
|
756
819
|
const truncated = texts.map(t => this.truncateForEmbed(t));
|
|
757
820
|
for (let attempt = 0; attempt < retries; attempt++) {
|
|
758
821
|
try {
|
|
@@ -787,11 +850,16 @@ export class LlamaCpp implements LLM {
|
|
|
787
850
|
}
|
|
788
851
|
return results;
|
|
789
852
|
} catch (error) {
|
|
790
|
-
|
|
853
|
+
if (this.isTransportError(error)) {
|
|
854
|
+
console.error("[embed] Remote batch embed server unreachable, cooldown 60s");
|
|
855
|
+
this.markRemoteEmbedDown();
|
|
856
|
+
} else {
|
|
857
|
+
console.error("[embed] Remote batch embed error:", error);
|
|
858
|
+
}
|
|
791
859
|
return texts.map(() => null);
|
|
792
860
|
}
|
|
793
861
|
}
|
|
794
|
-
console.error("Remote batch embed: max retries exceeded (rate limit)");
|
|
862
|
+
console.error("[embed] Remote batch embed: max retries exceeded (rate limit)");
|
|
795
863
|
return texts.map(() => null);
|
|
796
864
|
}
|
|
797
865
|
|
|
@@ -800,8 +868,18 @@ export class LlamaCpp implements LLM {
|
|
|
800
868
|
const temperature = options.temperature ?? 0;
|
|
801
869
|
|
|
802
870
|
// Remote LLM server (GPU) — preferred path
|
|
803
|
-
if (this.remoteLlmUrl) {
|
|
804
|
-
|
|
871
|
+
if (this.remoteLlmUrl && !this.isRemoteLlmDown()) {
|
|
872
|
+
const result = await this.generateRemote(prompt, maxTokens, temperature, options.signal);
|
|
873
|
+
if (result) return result;
|
|
874
|
+
// If remote failed but NOT transport error (HTTP 400/500, abort), don't fall through
|
|
875
|
+
if (!this.isRemoteLlmDown()) return null;
|
|
876
|
+
// Transport failure set cooldown — fall through to local
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
// Remote is in cooldown or was never configured — try local fallback
|
|
880
|
+
if (this.remoteLlmUrl && this.isRemoteLlmDown()) {
|
|
881
|
+
if (process.env.CLAWMEM_NO_LOCAL_MODELS === "true") return null;
|
|
882
|
+
console.error("[generate] Remote LLM in cooldown, falling back to in-process generation");
|
|
805
883
|
}
|
|
806
884
|
|
|
807
885
|
// Local fallback via node-llama-cpp (CPU)
|
|
@@ -840,6 +918,8 @@ export class LlamaCpp implements LLM {
|
|
|
840
918
|
temperature: number,
|
|
841
919
|
signal?: AbortSignal
|
|
842
920
|
): Promise<GenerateResult | null> {
|
|
921
|
+
// Re-check: concurrent call may have set cooldown while we were awaited
|
|
922
|
+
if (this.isRemoteLlmDown()) return null;
|
|
843
923
|
try {
|
|
844
924
|
const resp = await fetch(`${this.remoteLlmUrl}/v1/chat/completions`, {
|
|
845
925
|
method: "POST",
|
|
@@ -854,7 +934,8 @@ export class LlamaCpp implements LLM {
|
|
|
854
934
|
});
|
|
855
935
|
|
|
856
936
|
if (!resp.ok) {
|
|
857
|
-
console.error(`[generate] Remote LLM
|
|
937
|
+
console.error(`[generate] Remote LLM HTTP ${resp.status}: ${resp.statusText}`);
|
|
938
|
+
// HTTP errors mean the server IS reachable — don't trigger down-cache
|
|
858
939
|
return null;
|
|
859
940
|
}
|
|
860
941
|
|
|
@@ -869,7 +950,16 @@ export class LlamaCpp implements LLM {
|
|
|
869
950
|
done: true,
|
|
870
951
|
};
|
|
871
952
|
} catch (error) {
|
|
872
|
-
|
|
953
|
+
if (this.isAbortError(error)) {
|
|
954
|
+
// User/caller cancelled — don't cache as "down"
|
|
955
|
+
return null;
|
|
956
|
+
}
|
|
957
|
+
if (this.isTransportError(error)) {
|
|
958
|
+
console.error("[generate] Remote LLM server unreachable, cooldown 60s");
|
|
959
|
+
this.markRemoteLlmDown();
|
|
960
|
+
} else {
|
|
961
|
+
console.error("[generate] Remote LLM error:", error);
|
|
962
|
+
}
|
|
873
963
|
return null;
|
|
874
964
|
}
|
|
875
965
|
}
|
|
@@ -939,8 +1029,22 @@ Output:`;
|
|
|
939
1029
|
const intent = options.intent;
|
|
940
1030
|
|
|
941
1031
|
// Remote LLM path — no grammar constraint, parse output instead
|
|
942
|
-
if (this.remoteLlmUrl) {
|
|
943
|
-
|
|
1032
|
+
if (this.remoteLlmUrl && !this.isRemoteLlmDown()) {
|
|
1033
|
+
const result = await this.expandQueryRemote(query, includeLexical, context, intent);
|
|
1034
|
+
// Check if transport failure set cooldown during this call
|
|
1035
|
+
if (!this.isRemoteLlmDown()) return result;
|
|
1036
|
+
// Transport failure — fall through to local grammar path
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
// Remote is in cooldown (pre-existing or just set) — fall through to local
|
|
1040
|
+
if (this.remoteLlmUrl && this.isRemoteLlmDown()) {
|
|
1041
|
+
if (process.env.CLAWMEM_NO_LOCAL_MODELS === "true") {
|
|
1042
|
+
// Can't fall back — return passthrough
|
|
1043
|
+
const fallback: Queryable[] = [{ type: 'vec', text: query }];
|
|
1044
|
+
if (includeLexical) fallback.unshift({ type: 'lex', text: query });
|
|
1045
|
+
return fallback;
|
|
1046
|
+
}
|
|
1047
|
+
console.error("[expandQuery] Remote LLM in cooldown, falling back to in-process grammar expansion");
|
|
944
1048
|
}
|
|
945
1049
|
|
|
946
1050
|
const llama = await this.ensureLlama();
|
package/src/mcp.ts
CHANGED
|
@@ -1918,6 +1918,61 @@ This is the recommended entry point for ALL memory queries.`,
|
|
|
1918
1918
|
}
|
|
1919
1919
|
);
|
|
1920
1920
|
|
|
1921
|
+
// ---------------------------------------------------------------------------
|
|
1922
|
+
// Tool: kg_query (SPO Knowledge Graph)
|
|
1923
|
+
// ---------------------------------------------------------------------------
|
|
1924
|
+
|
|
1925
|
+
server.registerTool(
|
|
1926
|
+
"kg_query",
|
|
1927
|
+
{
|
|
1928
|
+
title: "Knowledge Graph Query",
|
|
1929
|
+
description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'.",
|
|
1930
|
+
inputSchema: {
|
|
1931
|
+
entity: z.string().describe("Entity name or ID to query"),
|
|
1932
|
+
as_of: z.string().optional().describe("Date filter (YYYY-MM-DD) — only facts valid at this date"),
|
|
1933
|
+
direction: z.enum(["outgoing", "incoming", "both"]).optional().default("both").describe("Relationship direction"),
|
|
1934
|
+
vault: z.string().optional().describe("Named vault (omit for default vault)"),
|
|
1935
|
+
},
|
|
1936
|
+
},
|
|
1937
|
+
async ({ entity, as_of, direction, vault }) => {
|
|
1938
|
+
const store = getStore(vault);
|
|
1939
|
+
|
|
1940
|
+
const entityResults = store.searchEntities(entity, 1);
|
|
1941
|
+
const entityId = entityResults.length > 0
|
|
1942
|
+
? entityResults[0]!.entity_id
|
|
1943
|
+
: entity.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "");
|
|
1944
|
+
|
|
1945
|
+
const triples = store.queryEntityTriples(entityId, { asOf: as_of, direction });
|
|
1946
|
+
const stats = store.getTripleStats();
|
|
1947
|
+
|
|
1948
|
+
if (triples.length === 0) {
|
|
1949
|
+
return {
|
|
1950
|
+
content: [{ type: "text", text: `No knowledge graph facts found for "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
|
|
1951
|
+
};
|
|
1952
|
+
}
|
|
1953
|
+
|
|
1954
|
+
const lines = [`Knowledge graph for "${entity}" (${triples.length} fact${triples.length === 1 ? '' : 's'}):\n`];
|
|
1955
|
+
|
|
1956
|
+
for (const t of triples) {
|
|
1957
|
+
const validity = t.current ? "current" : `ended ${t.validTo}`;
|
|
1958
|
+
const from = t.validFrom ? ` (since ${t.validFrom})` : "";
|
|
1959
|
+
const conf = Math.round(t.confidence * 100);
|
|
1960
|
+
lines.push(`[${t.direction}] ${t.subject} → ${t.predicate} → ${t.object}${from} [${validity}, ${conf}%]`);
|
|
1961
|
+
}
|
|
1962
|
+
|
|
1963
|
+
return {
|
|
1964
|
+
content: [{ type: "text", text: lines.join('\n') }],
|
|
1965
|
+
structuredContent: {
|
|
1966
|
+
entity,
|
|
1967
|
+
direction,
|
|
1968
|
+
as_of: as_of ?? null,
|
|
1969
|
+
facts: triples,
|
|
1970
|
+
stats,
|
|
1971
|
+
},
|
|
1972
|
+
};
|
|
1973
|
+
}
|
|
1974
|
+
);
|
|
1975
|
+
|
|
1921
1976
|
// ---------------------------------------------------------------------------
|
|
1922
1977
|
// Tool: memory_evolution_status (A-MEM)
|
|
1923
1978
|
// ---------------------------------------------------------------------------
|
|
@@ -2407,6 +2462,99 @@ This is the recommended entry point for ALL memory queries.`,
|
|
|
2407
2462
|
}
|
|
2408
2463
|
);
|
|
2409
2464
|
|
|
2465
|
+
// ---------------------------------------------------------------------------
|
|
2466
|
+
// Tool: diary_write
|
|
2467
|
+
// ---------------------------------------------------------------------------
|
|
2468
|
+
|
|
2469
|
+
server.registerTool(
|
|
2470
|
+
"diary_write",
|
|
2471
|
+
{
|
|
2472
|
+
title: "Write Diary Entry",
|
|
2473
|
+
description: "Write to the agent's diary. Use for recording important events, decisions, or observations in environments without hook support. Entries are stored as memories and are searchable.",
|
|
2474
|
+
inputSchema: {
|
|
2475
|
+
entry: z.string().describe("Diary entry text"),
|
|
2476
|
+
topic: z.string().optional().default("general").describe("Topic tag (e.g., 'technical', 'user_facts', 'session')"),
|
|
2477
|
+
agent: z.string().optional().default("agent").describe("Agent name writing the entry"),
|
|
2478
|
+
vault: z.string().optional().describe("Named vault (omit for default vault)"),
|
|
2479
|
+
},
|
|
2480
|
+
},
|
|
2481
|
+
async ({ entry, topic, agent, vault }) => {
|
|
2482
|
+
const store = getStore(vault);
|
|
2483
|
+
const now = new Date();
|
|
2484
|
+
const dateStr = now.toISOString().slice(0, 10);
|
|
2485
|
+
const timeStr = now.toISOString().slice(11, 19).replace(/:/g, "");
|
|
2486
|
+
const ms = String(now.getMilliseconds()).padStart(3, "0");
|
|
2487
|
+
const diaryPath = `diary/${dateStr}-${timeStr}${ms}-${topic}.md`;
|
|
2488
|
+
const body = `---\ntitle: "${entry.slice(0, 80).replace(/"/g, '\\"')}"\ncontent_type: note\ntags: [diary, ${topic}]\ndomain: "${agent}"\n---\n\n${entry}`;
|
|
2489
|
+
|
|
2490
|
+
const result = store.saveMemory({
|
|
2491
|
+
collection: "_clawmem",
|
|
2492
|
+
path: diaryPath,
|
|
2493
|
+
title: entry.slice(0, 80),
|
|
2494
|
+
body,
|
|
2495
|
+
contentType: "note",
|
|
2496
|
+
confidence: 0.7,
|
|
2497
|
+
semanticPayload: `${diaryPath}::${entry}`,
|
|
2498
|
+
});
|
|
2499
|
+
|
|
2500
|
+
return {
|
|
2501
|
+
content: [{ type: "text", text: `Diary entry saved (${result.action}, doc #${result.docId})` }],
|
|
2502
|
+
structuredContent: { action: result.action, docId: result.docId, path: diaryPath },
|
|
2503
|
+
};
|
|
2504
|
+
}
|
|
2505
|
+
);
|
|
2506
|
+
|
|
2507
|
+
// ---------------------------------------------------------------------------
|
|
2508
|
+
// Tool: diary_read
|
|
2509
|
+
// ---------------------------------------------------------------------------
|
|
2510
|
+
|
|
2511
|
+
server.registerTool(
|
|
2512
|
+
"diary_read",
|
|
2513
|
+
{
|
|
2514
|
+
title: "Read Diary Entries",
|
|
2515
|
+
description: "Read recent diary entries. Use to review past observations and events recorded by the agent.",
|
|
2516
|
+
inputSchema: {
|
|
2517
|
+
last_n: z.number().optional().default(10).describe("Number of recent entries to return"),
|
|
2518
|
+
agent: z.string().optional().describe("Filter by agent name"),
|
|
2519
|
+
vault: z.string().optional().describe("Named vault (omit for default vault)"),
|
|
2520
|
+
},
|
|
2521
|
+
},
|
|
2522
|
+
async ({ last_n, agent, vault }) => {
|
|
2523
|
+
const store = getStore(vault);
|
|
2524
|
+
const params: any[] = [];
|
|
2525
|
+
let agentFilter = "";
|
|
2526
|
+
if (agent) {
|
|
2527
|
+
agentFilter = "AND d.domain = ?";
|
|
2528
|
+
params.push(agent);
|
|
2529
|
+
}
|
|
2530
|
+
params.push(last_n);
|
|
2531
|
+
|
|
2532
|
+
const rows = store.db.prepare(`
|
|
2533
|
+
SELECT d.id, d.path, d.title, d.modified_at as modifiedAt, d.domain
|
|
2534
|
+
FROM documents d
|
|
2535
|
+
WHERE d.active = 1 AND d.collection = '_clawmem' AND d.path LIKE 'diary/%'
|
|
2536
|
+
${agentFilter}
|
|
2537
|
+
ORDER BY d.modified_at DESC
|
|
2538
|
+
LIMIT ?
|
|
2539
|
+
`).all(...params) as any[];
|
|
2540
|
+
|
|
2541
|
+
if (rows.length === 0) {
|
|
2542
|
+
return { content: [{ type: "text", text: "No diary entries found." }] };
|
|
2543
|
+
}
|
|
2544
|
+
|
|
2545
|
+
const lines = [`Diary (${rows.length} entries):\n`];
|
|
2546
|
+
for (const row of rows) {
|
|
2547
|
+
const agentLabel = row.domain ? ` [${row.domain}]` : "";
|
|
2548
|
+
lines.push(`${row.modifiedAt.slice(0, 16)}${agentLabel} ${row.title}`);
|
|
2549
|
+
}
|
|
2550
|
+
|
|
2551
|
+
return {
|
|
2552
|
+
content: [{ type: "text", text: lines.join('\n') }],
|
|
2553
|
+
structuredContent: { entries: rows },
|
|
2554
|
+
};
|
|
2555
|
+
}
|
|
2556
|
+
);
|
|
2557
|
+
|
|
2410
2558
|
// ---------------------------------------------------------------------------
|
|
2411
2559
|
// Connect
|
|
2412
2560
|
// ---------------------------------------------------------------------------
|
package/src/store.ts
CHANGED
|
@@ -708,6 +708,31 @@ function initializeDatabase(db: Database): void {
|
|
|
708
708
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_cooccurrences_a ON entity_cooccurrences(entity_a)`);
|
|
709
709
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_cooccurrences_b ON entity_cooccurrences(entity_b)`);
|
|
710
710
|
|
|
711
|
+
// SPO knowledge graph: temporal entity-relationship triples
|
|
712
|
+
db.exec(`
|
|
713
|
+
CREATE TABLE IF NOT EXISTS entity_triples (
|
|
714
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
715
|
+
subject_entity_id TEXT NOT NULL,
|
|
716
|
+
predicate TEXT NOT NULL,
|
|
717
|
+
object_entity_id TEXT,
|
|
718
|
+
object_literal TEXT,
|
|
719
|
+
valid_from TEXT,
|
|
720
|
+
valid_to TEXT,
|
|
721
|
+
confidence REAL DEFAULT 1.0,
|
|
722
|
+
source_doc_id INTEGER,
|
|
723
|
+
source_fact TEXT,
|
|
724
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
725
|
+
FOREIGN KEY (subject_entity_id) REFERENCES entity_nodes(entity_id),
|
|
726
|
+
FOREIGN KEY (object_entity_id) REFERENCES entity_nodes(entity_id),
|
|
727
|
+
FOREIGN KEY (source_doc_id) REFERENCES documents(id)
|
|
728
|
+
)
|
|
729
|
+
`);
|
|
730
|
+
|
|
731
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_subject ON entity_triples(subject_entity_id)`);
|
|
732
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_object ON entity_triples(object_entity_id)`);
|
|
733
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_predicate ON entity_triples(predicate)`);
|
|
734
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_valid ON entity_triples(valid_from, valid_to)`);
|
|
735
|
+
|
|
711
736
|
// Entity FTS5 for fuzzy name lookup
|
|
712
737
|
db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS entities_fts USING fts5(entity_id, name, entity_type)`);
|
|
713
738
|
|
|
@@ -904,6 +929,12 @@ export type Store = {
|
|
|
904
929
|
searchEntities: (query: string, limit?: number) => { entity_id: string; name: string; type: string; mention_count: number; cooccurrence_count: number }[];
|
|
905
930
|
getEntityGraphNeighbors: (seedDocIds: number[], limit?: number) => { docId: number; score: number; viaEntity: string }[];
|
|
906
931
|
|
|
932
|
+
// SPO knowledge graph
|
|
933
|
+
addTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, options?: { validFrom?: string; validTo?: string; confidence?: number; sourceDocId?: number; sourceFact?: string }) => number;
|
|
934
|
+
invalidateTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, endedDate?: string) => number;
|
|
935
|
+
queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[];
|
|
936
|
+
getTripleStats: () => { totalTriples: number; currentFacts: number; expiredFacts: number; predicateTypes: string[] };
|
|
937
|
+
|
|
907
938
|
// Co-activation tracking
|
|
908
939
|
recordCoActivation: (paths: string[]) => void;
|
|
909
940
|
getCoActivated: (path: string, limit?: number) => { path: string; count: number }[];
|
|
@@ -1070,6 +1101,93 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
|
|
|
1070
1101
|
searchEntities: (query: string, limit?: number) => searchEntities(db, query, limit),
|
|
1071
1102
|
getEntityGraphNeighbors: (seedDocIds: number[], limit?: number) => getEntityGraphNeighbors(db, seedDocIds, limit),
|
|
1072
1103
|
|
|
1104
|
+
// SPO knowledge graph
|
|
1105
|
+
addTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, options?: { validFrom?: string; validTo?: string; confidence?: number; sourceDocId?: number; sourceFact?: string }) => {
|
|
1106
|
+
const pred = predicate.toLowerCase().replace(/\s+/g, "_");
|
|
1107
|
+
const now = new Date().toISOString();
|
|
1108
|
+
const objClause = objectEntityId
|
|
1109
|
+
? "object_entity_id = ? AND object_literal IS NULL"
|
|
1110
|
+
: "object_entity_id IS NULL AND object_literal = ?";
|
|
1111
|
+
const objParam = objectEntityId ?? objectLiteral;
|
|
1112
|
+
const existing = db.prepare(
|
|
1113
|
+
`SELECT id FROM entity_triples WHERE subject_entity_id = ? AND predicate = ? AND ${objClause} AND valid_to IS NULL`
|
|
1114
|
+
).get(subjectEntityId, pred, objParam) as { id: number } | null;
|
|
1115
|
+
if (existing) return existing.id;
|
|
1116
|
+
|
|
1117
|
+
const result = db.prepare(`
|
|
1118
|
+
INSERT INTO entity_triples (subject_entity_id, predicate, object_entity_id, object_literal, valid_from, valid_to, confidence, source_doc_id, source_fact, created_at)
|
|
1119
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1120
|
+
`).run(
|
|
1121
|
+
subjectEntityId, pred, objectEntityId, objectLiteral,
|
|
1122
|
+
options?.validFrom ?? null, options?.validTo ?? null,
|
|
1123
|
+
options?.confidence ?? 1.0, options?.sourceDocId ?? null,
|
|
1124
|
+
options?.sourceFact ?? null, now
|
|
1125
|
+
);
|
|
1126
|
+
return Number(result.lastInsertRowid);
|
|
1127
|
+
},
|
|
1128
|
+
|
|
1129
|
+
invalidateTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, endedDate?: string) => {
|
|
1130
|
+
const pred = predicate.toLowerCase().replace(/\s+/g, "_");
|
|
1131
|
+
const ended = endedDate || new Date().toISOString().slice(0, 10);
|
|
1132
|
+
const objClause = objectEntityId
|
|
1133
|
+
? "object_entity_id = ? AND object_literal IS NULL"
|
|
1134
|
+
: "object_entity_id IS NULL AND object_literal = ?";
|
|
1135
|
+
const objParam = objectEntityId ?? objectLiteral;
|
|
1136
|
+
const result = db.prepare(
|
|
1137
|
+
`UPDATE entity_triples SET valid_to = ? WHERE subject_entity_id = ? AND predicate = ? AND ${objClause} AND valid_to IS NULL`
|
|
1138
|
+
).run(ended, subjectEntityId, pred, objParam);
|
|
1139
|
+
return result.changes;
|
|
1140
|
+
},
|
|
1141
|
+
|
|
1142
|
+
queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => {
|
|
1143
|
+
const direction = options?.direction ?? "both";
|
|
1144
|
+
const asOf = options?.asOf;
|
|
1145
|
+
const results: { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[] = [];
|
|
1146
|
+
|
|
1147
|
+
if (direction === "outgoing" || direction === "both") {
|
|
1148
|
+
let query = `SELECT t.id, t.predicate, t.object_entity_id, t.object_literal, t.valid_from, t.valid_to, t.confidence,
|
|
1149
|
+
COALESCE(s.name, t.subject_entity_id) as sub_name, COALESCE(o.name, t.object_literal, t.object_entity_id) as obj_name
|
|
1150
|
+
FROM entity_triples t
|
|
1151
|
+
LEFT JOIN entity_nodes s ON t.subject_entity_id = s.entity_id
|
|
1152
|
+
LEFT JOIN entity_nodes o ON t.object_entity_id = o.entity_id
|
|
1153
|
+
WHERE t.subject_entity_id = ?`;
|
|
1154
|
+
const params: any[] = [entityId];
|
|
1155
|
+
if (asOf) {
|
|
1156
|
+
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)";
|
|
1157
|
+
params.push(asOf, asOf);
|
|
1158
|
+
}
|
|
1159
|
+
for (const row of db.prepare(query).all(...params) as any[]) {
|
|
1160
|
+
results.push({ id: row.id, direction: "outgoing", subject: row.sub_name, predicate: row.predicate, object: row.obj_name, validFrom: row.valid_from, validTo: row.valid_to, confidence: row.confidence, current: row.valid_to === null });
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
if (direction === "incoming" || direction === "both") {
|
|
1165
|
+
let query = `SELECT t.id, t.predicate, t.valid_from, t.valid_to, t.confidence,
|
|
1166
|
+
COALESCE(s.name, t.subject_entity_id) as sub_name, COALESCE(o.name, t.object_literal, t.object_entity_id) as obj_name
|
|
1167
|
+
FROM entity_triples t
|
|
1168
|
+
LEFT JOIN entity_nodes s ON t.subject_entity_id = s.entity_id
|
|
1169
|
+
LEFT JOIN entity_nodes o ON t.object_entity_id = o.entity_id
|
|
1170
|
+
WHERE t.object_entity_id = ?`;
|
|
1171
|
+
const params: any[] = [entityId];
|
|
1172
|
+
if (asOf) {
|
|
1173
|
+
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)";
|
|
1174
|
+
params.push(asOf, asOf);
|
|
1175
|
+
}
|
|
1176
|
+
for (const row of db.prepare(query).all(...params) as any[]) {
|
|
1177
|
+
results.push({ id: row.id, direction: "incoming", subject: row.sub_name, predicate: row.predicate, object: row.obj_name, validFrom: row.valid_from, validTo: row.valid_to, confidence: row.confidence, current: row.valid_to === null });
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
return results;
|
|
1182
|
+
},
|
|
1183
|
+
|
|
1184
|
+
getTripleStats: () => {
|
|
1185
|
+
const total = (db.prepare("SELECT COUNT(*) as n FROM entity_triples").get() as any).n;
|
|
1186
|
+
const current = (db.prepare("SELECT COUNT(*) as n FROM entity_triples WHERE valid_to IS NULL").get() as any).n;
|
|
1187
|
+
const predicates = db.prepare("SELECT DISTINCT predicate FROM entity_triples ORDER BY predicate").all().map((r: any) => r.predicate);
|
|
1188
|
+
return { totalTriples: total, currentFacts: current, expiredFacts: total - current, predicateTypes: predicates };
|
|
1189
|
+
},
|
|
1190
|
+
|
|
1073
1191
|
// Co-activation tracking
|
|
1074
1192
|
recordCoActivation: (paths: string[]) => {
|
|
1075
1193
|
if (paths.length < 2) return;
|
|
@@ -1333,6 +1451,7 @@ export type DocumentRow = {
|
|
|
1333
1451
|
confidence: number;
|
|
1334
1452
|
accessCount: number;
|
|
1335
1453
|
bodyLength: number;
|
|
1454
|
+
pinned: number;
|
|
1336
1455
|
};
|
|
1337
1456
|
|
|
1338
1457
|
// =============================================================================
|
|
@@ -3560,7 +3679,7 @@ function getDocumentsByTypeFn(db: Database, contentType: string, limit: number =
|
|
|
3560
3679
|
SELECT d.id, d.collection, d.path, d.title, d.hash, d.modified_at as modifiedAt,
|
|
3561
3680
|
d.domain, d.workstream, d.tags, d.content_type as contentType,
|
|
3562
3681
|
d.review_by as reviewBy, d.confidence, d.access_count as accessCount,
|
|
3563
|
-
LENGTH(c.doc) as bodyLength
|
|
3682
|
+
LENGTH(c.doc) as bodyLength, d.pinned
|
|
3564
3683
|
FROM documents d
|
|
3565
3684
|
JOIN content c ON c.hash = d.hash
|
|
3566
3685
|
WHERE d.active = 1 AND d.content_type = ?
|