clawmem 0.3.4 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -620,6 +620,13 @@ Symptom: reindex --force after v0.2.0 upgrade shows no entity extraction
620
620
  → `--force` alone only refreshes A-MEM notes (keywords, tags, context). `--enrich`
621
621
  is needed after major upgrades that add new enrichment stages.
622
622
 
623
+ Symptom: `clawmem update` crashes with "Binding expected string, TypedArray, boolean, number, bigint or null"
624
+ → YAML frontmatter values like `title: 2023-09-27` or `title: true` are coerced by gray-matter
625
+ into Date objects or booleans. Bun's SQLite driver rejects these as bind parameters.
626
+ → Fixed v0.4.2: `parseDocument()` runtime-checks all frontmatter fields via `str()` helper.
627
+ Defense-in-depth `safeTitle` guards in `insertDocument`/`updateDocument`/`reactivateDocument`.
628
+ → Affects: title, domain, workstream, content_type, review_by — any field gray-matter can coerce.
629
+
623
630
  Symptom: CLI reindex/update falls back to node-llama-cpp Vulkan (not GPU server)
624
631
  → GPU env vars only in systemd drop-in, not in wrapper script. CLI invocations missed them.
625
632
  → Fixed 2026-02-12: bin/clawmem wrapper exports CLAWMEM_EMBED_URL/LLM_URL/RERANK_URL defaults.
@@ -685,3 +692,4 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
685
692
  - HTTP REST API: `clawmem serve [--port 7438]` — optional REST server on localhost. Search, retrieval, lifecycle, and graph traversal. `POST /retrieve` mirrors `memory_retrieve` with auto-routing (keyword/semantic/causal/timeline/hybrid). `POST /search` provides direct mode selection. Bearer token auth via `CLAWMEM_API_TOKEN` env var (disabled if unset).
686
693
  - OpenClaw ContextEngine plugin: `clawmem setup openclaw` — registers ClawMem as a native OpenClaw context engine. Uses `before_prompt_build` for retrieval (prompt-aware), `afterTurn()` for extraction, `compact()` for pre-compaction + runtime delegation. Shares same vault as Claude Code hooks (dual-mode). SQLite busy_timeout=5000ms for concurrent access safety.
687
694
  - **OpenClaw v2026.3.28+ compaction fix (v0.3.0):** `compact()` now delegates to OpenClaw's runtime compactor via `delegateCompactionToRuntime()` from `openclaw/plugin-sdk/core`. Previous versions returned `compacted: false` expecting legacy fallback — that fallback no longer exists. Without this fix, sessions never compact. Bootstrap context is now cached in `bootstrap()` and consumed once in `before_prompt_build`, eliminating duplicate hook invocations.
695
+ - Hermes Agent MemoryProvider plugin: `src/hermes/` — Python plugin implementing Hermes's `MemoryProvider` ABC. Symlink or copy into `hermes-agent/plugins/memory/clawmem/`. Uses shell-out for lifecycle hooks (session-bootstrap, context-surfacing, extraction) and REST API for tools (retrieve, get, session_log, timeline, similar). Plugin manages its own transcript JSONL for ClawMem hooks. Supports external (you run `clawmem serve`) and managed (plugin starts/stops serve) modes.
package/CLAUDE.md CHANGED
@@ -620,6 +620,13 @@ Symptom: reindex --force after v0.2.0 upgrade shows no entity extraction
620
620
  → `--force` alone only refreshes A-MEM notes (keywords, tags, context). `--enrich`
621
621
  is needed after major upgrades that add new enrichment stages.
622
622
 
623
+ Symptom: `clawmem update` crashes with "Binding expected string, TypedArray, boolean, number, bigint or null"
624
+ → YAML frontmatter values like `title: 2023-09-27` or `title: true` are coerced by gray-matter
625
+ into Date objects or booleans. Bun's SQLite driver rejects these as bind parameters.
626
+ → Fixed v0.4.2: `parseDocument()` runtime-checks all frontmatter fields via `str()` helper.
627
+ Defense-in-depth `safeTitle` guards in `insertDocument`/`updateDocument`/`reactivateDocument`.
628
+ → Affects: title, domain, workstream, content_type, review_by — any field gray-matter can coerce.
629
+
623
630
  Symptom: CLI reindex/update falls back to node-llama-cpp Vulkan (not GPU server)
624
631
  → GPU env vars only in systemd drop-in, not in wrapper script. CLI invocations missed them.
625
632
  → Fixed 2026-02-12: bin/clawmem wrapper exports CLAWMEM_EMBED_URL/LLM_URL/RERANK_URL defaults.
@@ -685,3 +692,4 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
685
692
  - HTTP REST API: `clawmem serve [--port 7438]` — optional REST server on localhost. Search, retrieval, lifecycle, and graph traversal. `POST /retrieve` mirrors `memory_retrieve` with auto-routing (keyword/semantic/causal/timeline/hybrid). `POST /search` provides direct mode selection. Bearer token auth via `CLAWMEM_API_TOKEN` env var (disabled if unset).
686
693
  - OpenClaw ContextEngine plugin: `clawmem setup openclaw` — registers ClawMem as a native OpenClaw context engine. Uses `before_prompt_build` for retrieval (prompt-aware), `afterTurn()` for extraction, `compact()` for pre-compaction + runtime delegation. Shares same vault as Claude Code hooks (dual-mode). SQLite busy_timeout=5000ms for concurrent access safety.
687
694
  - **OpenClaw v2026.3.28+ compaction fix (v0.3.0):** `compact()` now delegates to OpenClaw's runtime compactor via `delegateCompactionToRuntime()` from `openclaw/plugin-sdk/core`. Previous versions returned `compacted: false` expecting legacy fallback — that fallback no longer exists. Without this fix, sessions never compact. Bootstrap context is now cached in `bootstrap()` and consumed once in `before_prompt_build`, eliminating duplicate hook invocations.
695
+ - Hermes Agent MemoryProvider plugin: `src/hermes/` — Python plugin implementing Hermes's `MemoryProvider` ABC. Symlink or copy into `hermes-agent/plugins/memory/clawmem/`. Uses shell-out for lifecycle hooks (session-bootstrap, context-surfacing, extraction) and REST API for tools (retrieve, get, session_log, timeline, similar). Plugin manages its own transcript JSONL for ClawMem hooks. Supports external (you run `clawmem serve`) and managed (plugin starts/stops serve) modes.
package/README.md CHANGED
@@ -1,14 +1,14 @@
1
- # ClawMem — Context engine for Claude Code and OpenClaw agents
1
+ # ClawMem — Context engine for Claude Code, OpenClaw, and Hermes agents
2
2
 
3
3
  <p align="center">
4
4
  <img src="docs/clawmem_hero.jpg" alt="ClawMem" width="100%">
5
5
  </p>
6
6
 
7
- **On-device memory for Claude Code and AI agents.** Retrieval-augmented search, hooks, and an MCP server in a single local system. No API keys, no cloud dependencies.
7
+ **On-device memory for Claude Code, OpenClaw, Hermes, and AI agents.** Retrieval-augmented search, hooks, and an MCP server in a single local system. No API keys, no cloud dependencies.
8
8
 
9
9
  ClawMem fuses recent research into a retrieval-augmented memory layer that agents actually use. The hybrid architecture combines [QMD](https://github.com/tobi/qmd)-derived multi-signal retrieval (BM25 + vector search + reciprocal rank fusion + query expansion + cross-encoder reranking), [SAME](https://github.com/sgx-labs/statelessagent)-inspired composite scoring (recency decay, confidence, content-type half-lives, co-activation reinforcement), [MAGMA](https://arxiv.org/abs/2501.13956)-style intent classification with multi-graph traversal (semantic, temporal, and causal beam search), and [A-MEM](https://arxiv.org/abs/2510.02178) self-evolving memory notes that enrich documents with keywords, tags, and causal links between entries. Pattern extraction from [Engram](https://github.com/Gentleman-Programming/engram) adds deduplication windows, frequency-based durability scoring, and temporal navigation.
10
10
 
11
- Integrates via Claude Code hooks, an MCP server (works with any MCP-compatible client including OpenClaw), or a native OpenClaw ContextEngine plugin. All paths write to the same local SQLite vault. A decision captured during a Claude Code session shows up immediately when an OpenClaw agent picks up the same project.
11
+ Integrates via Claude Code hooks, an MCP server (works with any MCP-compatible client), a native OpenClaw ContextEngine plugin, or a Hermes Agent MemoryProvider plugin. All paths write to the same local SQLite vault. A decision captured during a Claude Code session shows up immediately when an OpenClaw or Hermes agent picks up the same project.
12
12
 
13
13
  TypeScript on Bun. MIT License.
14
14
 
@@ -40,7 +40,7 @@ ClawMem turns your markdown notes, project docs, and research dumps into persist
40
40
  - **Auto-routes queries** via `memory_retrieve` — classifies intent and dispatches to the optimal search backend
41
41
  - **Syncs project issues** from Beads issue trackers into searchable memory
42
42
 
43
- Runs fully local with no API keys and no cloud services. Integrates via Claude Code hooks and MCP tools, or as an OpenClaw ContextEngine plugin. Both modes share the same vault for cross-runtime memory. Works with any MCP-compatible client.
43
+ Runs fully local with no API keys and no cloud services. Integrates via Claude Code hooks and MCP tools, as an OpenClaw ContextEngine plugin, or as a Hermes Agent MemoryProvider plugin. All modes share the same vault for cross-runtime memory. Works with any MCP-compatible client.
44
44
 
45
45
  ### v0.2.0 Enhancements
46
46
 
@@ -85,6 +85,7 @@ Runs fully local with no API keys and no cloud services. Integrates via Claude C
85
85
 
86
86
  - [Claude Code](https://docs.anthropic.com/en/docs/claude-code) — for hooks + MCP integration
87
87
  - [OpenClaw](https://github.com/openclawai/openclaw) — for ContextEngine plugin integration
88
+ - [Hermes Agent](https://github.com/NousResearch/hermes-agent) — for MemoryProvider plugin integration
88
89
  - [bd CLI](https://github.com/dolthub/dolt) v0.58.0+ — for Beads issue tracker sync (only if using Beads)
89
90
 
90
91
  ### Install from npm (recommended)
@@ -118,7 +119,7 @@ After installing, here's the full journey from zero to working memory:
118
119
  | **3. Download models** | Get the GGUF files for your chosen stack | `wget` from HuggingFace, or let `node-llama-cpp` auto-download the QMD native models on first use | [Embedding](#embedding), [LLM Server](#llm-server), [Reranker Server](#reranker-server) |
119
120
  | **4. Start services** | Run GPU servers (if using dedicated GPU) and background services | `llama-server` for each model. systemd units for watcher + embed timer. | [systemd services](docs/guides/systemd-services.md) |
120
121
  | **5. Decide what to index** | Add collections for your projects, notes, research, and domain docs | `clawmem collection add ~/project --name project` | The more relevant markdown you index, the better retrieval works. See [building a rich context field](docs/introduction.md#building-a-rich-context-field). |
121
- | **6. Connect your agent** | Hook into Claude Code, OpenClaw, or any MCP client | `clawmem setup hooks && clawmem setup mcp` for Claude Code. `clawmem setup openclaw` for OpenClaw. | [Integration](#integration) |
122
+ | **6. Connect your agent** | Hook into Claude Code, OpenClaw, Hermes, or any MCP client | `clawmem setup hooks && clawmem setup mcp` for Claude Code. `clawmem setup openclaw` for OpenClaw. Copy `src/hermes/` to Hermes plugins for Hermes. | [Integration](#integration) |
122
123
  | **7. Verify** | Confirm everything is working | `clawmem doctor` (full health check) or `clawmem status` (quick index stats) | [Verify Installation](#verify-installation) |
123
124
 
124
125
  **Fastest path:** Step 1 alone gets you a working system with in-process CPU/GPU inference and default models — no manual model downloads or service configuration needed. Steps 2-4 are optional upgrades for better performance. Steps 5-6 are where you customize what gets indexed and how your agent connects.
@@ -203,9 +204,48 @@ openclaw config set agents.defaults.memorySearch.extraPaths "[]"
203
204
 
204
205
  **Alternative:** OpenClaw agents can also use ClawMem's MCP server directly (`clawmem setup mcp`), with or without hooks. This gives full access to all 28 MCP tools but bypasses OpenClaw's ContextEngine lifecycle, so you lose token budget awareness, native compaction orchestration, and the `afterTurn()` message pipeline. The ContextEngine plugin is recommended for new OpenClaw setups; MCP is available as an additional or standalone integration.
205
206
 
206
- #### Dual-Mode Operation
207
+ #### Hermes Agent
207
208
 
208
- Both integrations share the same SQLite vault by default. Claude Code and OpenClaw can run simultaneously - decisions captured in one runtime are immediately available in the other, giving agents persistent shared memory across sessions and platforms. WAL mode + busy_timeout handles concurrent access.
209
+ ClawMem integrates as a native MemoryProvider plugin Hermes's pluggable interface for agent memory. Same automatic retrieval and extraction, delivered through Hermes's memory lifecycle instead of Claude Code hooks.
210
+
211
+ **Install:**
212
+
213
+ ```bash
214
+ # Copy or symlink the plugin into Hermes's plugin directory
215
+ cp -r /path/to/ClawMem/src/hermes /path/to/hermes-agent/plugins/memory/clawmem
216
+
217
+ # Or symlink for development
218
+ ln -s /path/to/ClawMem/src/hermes /path/to/hermes-agent/plugins/memory/clawmem
219
+ ```
220
+
221
+ **Configure** in your Hermes profile's `.env` or environment:
222
+ ```bash
223
+ CLAWMEM_BIN=/path/to/clawmem # Path to clawmem binary (or ensure it's on PATH)
224
+ CLAWMEM_SERVE_PORT=7438 # REST API port (default: 7438)
225
+ CLAWMEM_SERVE_MODE=external # "external" (you run clawmem serve) or "managed" (plugin manages it)
226
+ CLAWMEM_PROFILE=balanced # speed | balanced | deep
227
+ ```
228
+
229
+ Then set `memory.provider: clawmem` in your Hermes `config.yaml`, or run `hermes memory setup` to configure interactively.
230
+
231
+ **What the plugin provides:**
232
+
233
+ - **`prefetch()`** — prompt-aware retrieval via `context-surfacing` hook (automatic every turn)
234
+ - **`on_session_end()`** — decision extraction, handoff generation, feedback loop (parallel)
235
+ - **`on_pre_compress()`** — pre-compaction state preservation
236
+ - **`session-bootstrap`** — session registration + first-turn context injection
237
+ - **5 agent tools** — `clawmem_retrieve`, `clawmem_get`, `clawmem_session_log`, `clawmem_timeline`, `clawmem_similar`
238
+ - **Plugin-managed transcript** — maintains its own JSONL transcript for ClawMem hooks
239
+
240
+ **Requirements:** `clawmem` binary on PATH and `clawmem serve` running (external mode) or the plugin starts it automatically (managed mode). Python 3.10+. No pip dependencies beyond Hermes itself (uses `urllib` for REST calls, `httpx` optional for better performance).
241
+
242
+ **Alternative:** Hermes also has built-in MCP client support. You can add ClawMem as an MCP server in Hermes's `config.yaml` under `mcp_servers` for tool-only access. But this misses the lifecycle hooks (prefetch, session_end, pre_compress), so the native plugin is recommended.
243
+
244
+ See [Hermes plugin guide](docs/guides/hermes-plugin.md) for architecture details, lifecycle mapping, and troubleshooting.
245
+
246
+ #### Multi-Framework Operation
247
+
248
+ All three integrations share the same SQLite vault by default. Claude Code, OpenClaw, and Hermes can run simultaneously — decisions captured in one runtime are immediately available in the others, giving agents persistent shared memory across sessions and platforms. WAL mode + busy_timeout handles concurrent access.
209
249
 
210
250
  #### Multi-Vault (Optional)
211
251
 
@@ -1020,6 +1060,24 @@ Manual layers benefit from periodic re-indexing — a cron job running `clawmem
1020
1060
  ./bin/clawmem bootstrap ~/.openclaw/workspace --name workspace
1021
1061
  ```
1022
1062
 
1063
+ #### Hermes-Specific
1064
+
1065
+ ```bash
1066
+ # Hermes uses ~/.hermes/ as its home directory
1067
+ ./bin/clawmem bootstrap ~/.hermes --name hermes-home
1068
+
1069
+ # Install the memory provider plugin
1070
+ cp -r src/hermes /path/to/hermes-agent/plugins/memory/clawmem
1071
+
1072
+ # Start clawmem serve (external mode)
1073
+ clawmem serve --port 7438 &
1074
+
1075
+ # Configure Hermes to use ClawMem
1076
+ # In your Hermes config.yaml:
1077
+ # memory:
1078
+ # provider: clawmem
1079
+ ```
1080
+
1023
1081
  ## Dependencies
1024
1082
 
1025
1083
  | Package | Purpose |
@@ -1045,7 +1103,7 @@ Built on the shoulders of:
1045
1103
  - [Beads](https://github.com/steveyegge/beads) — Dolt-backed issue tracker for AI agents
1046
1104
  - [claude-mem](https://github.com/thedotmack/claude-mem) — Claude Code memory integration reference
1047
1105
  - [Engram](https://github.com/Gentleman-Programming/engram) — observation dedup window, topic-key upsert pattern, temporal timeline navigation, duplicate metadata scoring signals
1048
- - [Hermes Agent](https://github.com/NousResearch/hermes-agent) — memory nudge system (periodic lifecycle tool prompting)
1106
+ - [Hermes Agent](https://github.com/NousResearch/hermes-agent) — MemoryProvider plugin integration, memory nudge system (periodic lifecycle tool prompting)
1049
1107
  - [Hindsight](https://github.com/vectorize-io/hindsight) — entity resolution, MPFP graph traversal, temporal extraction, 3-tier consolidation, observation invalidation, 4-way parallel retrieval
1050
1108
  - [MAGMA](https://arxiv.org/abs/2501.13956) — multi-graph memory agent
1051
1109
  - [memory-lancedb-pro](https://github.com/CortexReach/memory-lancedb-pro) — retrieval gate, length normalization, MMR diversity, access reinforcement algorithms
package/SKILL.md CHANGED
@@ -598,6 +598,30 @@ openclaw config set agents.defaults.memorySearch.extraPaths '["~/documents", "~/
598
598
 
599
599
  ---
600
600
 
601
+ ## Hermes Agent Integration
602
+
603
+ ### Install
604
+
605
+ Copy or symlink `src/hermes/` into `hermes-agent/plugins/memory/clawmem/`. Set `memory.provider: clawmem` in Hermes config.
606
+
607
+ ### How it works
608
+
609
+ Plugin implements Hermes's `MemoryProvider` ABC:
610
+ - `prefetch()` — context-surfacing hook (automatic per turn)
611
+ - `on_session_end()` — extraction hooks in parallel (decision-extractor, handoff-generator, feedback-loop)
612
+ - `on_pre_compress()` — precompact-extract (side effect only)
613
+ - 5 agent tools via REST: `clawmem_retrieve`, `clawmem_get`, `clawmem_session_log`, `clawmem_timeline`, `clawmem_similar`
614
+
615
+ ### Key difference from OpenClaw/Claude Code
616
+
617
+ Hermes passes turn pairs, not transcript files. The plugin maintains its own JSONL transcript at `$HERMES_HOME/clawmem-transcripts/<session_id>.jsonl` so ClawMem hooks can read it.
618
+
619
+ ### Requirements
620
+
621
+ `clawmem` binary on PATH + `clawmem serve` running (external) or `CLAWMEM_SERVE_MODE=managed`. Python 3.10+.
622
+
623
+ ---
624
+
601
625
  ## Troubleshooting
602
626
 
603
627
  ```
@@ -707,6 +731,7 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
707
731
  - Beads integration: `syncBeadsIssues()` queries `bd` CLI (Dolt backend, v0.58.0+), creates markdown docs, maps dependency edges into `memory_relations`. Watcher auto-triggers on `.beads/` changes; `beads_sync` MCP for manual sync.
708
732
  - HTTP REST API: `clawmem serve [--port 7438]` — optional REST server on localhost. Search, retrieval, lifecycle, and graph traversal. `POST /retrieve` mirrors `memory_retrieve` with auto-routing (keyword/semantic/causal/timeline/hybrid). `POST /search` provides direct mode selection. Bearer token auth via `CLAWMEM_API_TOKEN` env var (disabled if unset).
709
733
  - OpenClaw ContextEngine plugin: `clawmem setup openclaw` — registers as native OpenClaw context engine. Dual-mode: shares vault with Claude Code hooks. Uses `before_prompt_build` for retrieval, `afterTurn()` for extraction, `compact()` for pre-compaction + runtime delegation (v0.3.0+, required for OpenClaw v2026.3.28+).
734
+ - Hermes Agent MemoryProvider plugin: `src/hermes/` — Python plugin for Hermes's memory system. Shell-out hooks for lifecycle (prefetch, extraction, precompact), REST API for tools. Plugin-managed transcript JSONL bridges Hermes turn pairs to ClawMem file format. Shares vault with Claude Code and OpenClaw.
710
735
 
711
736
  ## Tool Selection (one-liner)
712
737
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmem",
3
- "version": "0.3.4",
3
+ "version": "0.4.2",
4
4
  "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,611 @@
1
+ """ClawMem memory provider plugin for Hermes Agent.
2
+
3
+ On-device hybrid memory with composite scoring, graph traversal, and
4
+ lifecycle management. Integrates via REST API (tools) and CLI shell-out
5
+ (lifecycle hooks).
6
+
7
+ Requires:
8
+ - clawmem binary on PATH (or configured via CLAWMEM_BIN)
9
+ - clawmem serve running (or managed mode starts it automatically)
10
+
11
+ Config via environment variables:
12
+ CLAWMEM_BIN — Path to clawmem binary (default: auto-detect on PATH)
13
+ CLAWMEM_SERVE_PORT — REST API port (default: 7438)
14
+ CLAWMEM_SERVE_MODE — "external" (default) or "managed" (plugin starts/stops serve)
15
+ CLAWMEM_PROFILE — Retrieval profile: speed, balanced, deep (default: balanced)
16
+ CLAWMEM_EMBED_URL — GPU embedding server URL (optional)
17
+ CLAWMEM_LLM_URL — GPU LLM server URL (optional)
18
+ CLAWMEM_RERANK_URL — GPU reranker server URL (optional)
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import json
24
+ import logging
25
+ import os
26
+ import shutil
27
+ import subprocess
28
+ import threading
29
+ import time
30
+ from pathlib import Path
31
+ from typing import Any, Dict, List, Optional
32
+
33
+ from agent.memory_provider import MemoryProvider
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ _DEFAULT_PORT = 7438
38
+ _HOOK_TIMEOUT = 30 # seconds
39
+ _REST_TIMEOUT = 5.0 # seconds
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Helpers
44
+ # ---------------------------------------------------------------------------
45
+
46
+ def _find_clawmem_bin() -> Optional[str]:
47
+ """Find the clawmem binary. Check env, then PATH."""
48
+ env_bin = os.environ.get("CLAWMEM_BIN")
49
+ if env_bin and os.path.isfile(env_bin) and os.access(env_bin, os.X_OK):
50
+ return env_bin
51
+ return shutil.which("clawmem")
52
+
53
+
54
+ def _run_hook(bin_path: str, hook_name: str, hook_input: dict,
55
+ timeout: int = _HOOK_TIMEOUT, env_extra: Optional[dict] = None) -> Optional[str]:
56
+ """Shell out to clawmem hook <name>. Returns stdout or None on failure."""
57
+ try:
58
+ env = {**os.environ, **(env_extra or {})}
59
+ result = subprocess.run(
60
+ [bin_path, "hook", hook_name],
61
+ input=json.dumps(hook_input),
62
+ capture_output=True,
63
+ text=True,
64
+ timeout=timeout,
65
+ env=env,
66
+ )
67
+ if result.returncode == 0:
68
+ return result.stdout
69
+ logger.debug("clawmem hook %s exited %d: %s", hook_name, result.returncode, result.stderr)
70
+ return None
71
+ except subprocess.TimeoutExpired:
72
+ logger.debug("clawmem hook %s timed out after %ds", hook_name, timeout)
73
+ return None
74
+ except Exception as e:
75
+ logger.debug("clawmem hook %s failed: %s", hook_name, e)
76
+ return None
77
+
78
+
79
+ def _rest_call(port: int, method: str, path: str,
80
+ body: Optional[dict] = None, timeout: float = _REST_TIMEOUT) -> Optional[dict]:
81
+ """Call the ClawMem REST API. Returns parsed JSON or None."""
82
+ headers: dict = {"Content-Type": "application/json"}
83
+ token = os.environ.get("CLAWMEM_API_TOKEN")
84
+ if token:
85
+ headers["Authorization"] = f"Bearer {token}"
86
+
87
+ try:
88
+ import httpx
89
+ except ImportError:
90
+ # Fallback to urllib for zero-dependency operation
91
+ import urllib.request
92
+ import urllib.error
93
+ url = f"http://127.0.0.1:{port}{path}"
94
+ req = urllib.request.Request(
95
+ url,
96
+ data=json.dumps(body).encode() if body else None,
97
+ headers=headers,
98
+ method=method,
99
+ )
100
+ try:
101
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
102
+ return json.loads(resp.read().decode())
103
+ except (urllib.error.URLError, Exception) as e:
104
+ logger.debug("ClawMem REST %s %s failed: %s", method, path, e)
105
+ return None
106
+
107
+ try:
108
+ client = httpx.Client(timeout=timeout)
109
+ if method == "GET":
110
+ resp = client.get(f"http://127.0.0.1:{port}{path}", headers=headers)
111
+ else:
112
+ resp = client.post(
113
+ f"http://127.0.0.1:{port}{path}",
114
+ json=body or {},
115
+ headers=headers,
116
+ )
117
+ resp.raise_for_status()
118
+ return resp.json()
119
+ except Exception as e:
120
+ logger.debug("ClawMem REST %s %s failed: %s", method, path, e)
121
+ return None
122
+
123
+
124
+ def _extract_context(hook_output: str) -> str:
125
+ """Extract additionalContext from hook JSON output."""
126
+ if not hook_output:
127
+ return ""
128
+ try:
129
+ parsed = json.loads(hook_output.strip().split("\n")[-1])
130
+ hso = parsed.get("hookSpecificOutput", {})
131
+ return hso.get("additionalContext", "")
132
+ except (json.JSONDecodeError, IndexError):
133
+ return ""
134
+
135
+
136
+ # ---------------------------------------------------------------------------
137
+ # Tool schemas
138
+ # ---------------------------------------------------------------------------
139
+
140
+ RETRIEVE_SCHEMA = {
141
+ "name": "clawmem_retrieve",
142
+ "description": (
143
+ "Search long-term memory with auto-routing. Handles keyword, semantic, "
144
+ "causal, and timeline queries automatically. Use for recalling past "
145
+ "decisions, preferences, session history, and learned patterns."
146
+ ),
147
+ "parameters": {
148
+ "type": "object",
149
+ "properties": {
150
+ "query": {"type": "string", "description": "Search query."},
151
+ "limit": {"type": "integer", "description": "Max results (default: 10)."},
152
+ },
153
+ "required": ["query"],
154
+ },
155
+ }
156
+
157
+ GET_SCHEMA = {
158
+ "name": "clawmem_get",
159
+ "description": (
160
+ "Retrieve full content of a memory document by its docid (6-char hex prefix)."
161
+ ),
162
+ "parameters": {
163
+ "type": "object",
164
+ "properties": {
165
+ "docid": {"type": "string", "description": "Document ID (6-char hex prefix)."},
166
+ },
167
+ "required": ["docid"],
168
+ },
169
+ }
170
+
171
+ SESSION_LOG_SCHEMA = {
172
+ "name": "clawmem_session_log",
173
+ "description": "List recent session summaries for cross-session context.",
174
+ "parameters": {
175
+ "type": "object",
176
+ "properties": {
177
+ "limit": {"type": "integer", "description": "Number of sessions (default: 5)."},
178
+ },
179
+ },
180
+ }
181
+
182
+ TIMELINE_SCHEMA = {
183
+ "name": "clawmem_timeline",
184
+ "description": "Show temporal context around a document — what was created before and after.",
185
+ "parameters": {
186
+ "type": "object",
187
+ "properties": {
188
+ "docid": {"type": "string", "description": "Document ID (6-char hex prefix)."},
189
+ "before": {"type": "integer", "description": "Docs before (default: 5)."},
190
+ "after": {"type": "integer", "description": "Docs after (default: 5)."},
191
+ },
192
+ "required": ["docid"],
193
+ },
194
+ }
195
+
196
+ SIMILAR_SCHEMA = {
197
+ "name": "clawmem_similar",
198
+ "description": "Find documents semantically similar to a given document.",
199
+ "parameters": {
200
+ "type": "object",
201
+ "properties": {
202
+ "docid": {"type": "string", "description": "Document ID (6-char hex prefix)."},
203
+ "limit": {"type": "integer", "description": "Max results (default: 5)."},
204
+ },
205
+ "required": ["docid"],
206
+ },
207
+ }
208
+
209
+
210
+ # ---------------------------------------------------------------------------
211
+ # MemoryProvider implementation
212
+ # ---------------------------------------------------------------------------
213
+
214
+ class ClawMemProvider(MemoryProvider):
215
+ """ClawMem memory provider for Hermes Agent."""
216
+
217
+ def __init__(self):
218
+ self._bin: Optional[str] = None
219
+ self._port: int = _DEFAULT_PORT
220
+ self._session_id: str = ""
221
+ self._transcript_path: str = ""
222
+ self._hermes_home: str = ""
223
+ self._serve_mode: str = "external"
224
+ self._serve_proc: Optional[subprocess.Popen] = None
225
+ self._env_extra: dict = {}
226
+
227
+ # Prefetch state (generation counter prevents stale overwrites)
228
+ self._prefetch_result: str = ""
229
+ self._prefetch_result_gen: int = 0 # generation of stored result
230
+ self._prefetch_generation: int = 0 # latest queued generation
231
+ self._prefetch_consumed_gen: int = 0 # last generation consumed by prefetch()
232
+ self._prefetch_lock = threading.Lock()
233
+ self._prefetch_thread: Optional[threading.Thread] = None
234
+
235
+ # Bootstrap context (consumed on first prefetch)
236
+ self._bootstrap_context: str = ""
237
+
238
+ @property
239
+ def name(self) -> str:
240
+ return "clawmem"
241
+
242
+ # -- Config ----------------------------------------------------------------
243
+
244
+ def get_config_schema(self) -> List[Dict[str, Any]]:
245
+ return [
246
+ {
247
+ "key": "serve_port",
248
+ "description": "ClawMem REST API port",
249
+ "default": str(_DEFAULT_PORT),
250
+ "env_var": "CLAWMEM_SERVE_PORT",
251
+ },
252
+ {
253
+ "key": "serve_mode",
254
+ "description": "Server mode: 'external' (you run clawmem serve) or 'managed' (plugin manages it)",
255
+ "default": "external",
256
+ "choices": ["external", "managed"],
257
+ "env_var": "CLAWMEM_SERVE_MODE",
258
+ },
259
+ {
260
+ "key": "profile",
261
+ "description": "Retrieval profile: speed (BM25 only), balanced (hybrid), deep (full pipeline)",
262
+ "default": "balanced",
263
+ "choices": ["speed", "balanced", "deep"],
264
+ "env_var": "CLAWMEM_PROFILE",
265
+ },
266
+ {
267
+ "key": "bin_path",
268
+ "description": "Path to clawmem binary (auto-detected if on PATH)",
269
+ "env_var": "CLAWMEM_BIN",
270
+ },
271
+ {
272
+ "key": "embed_url",
273
+ "description": "GPU embedding server URL (e.g., http://localhost:8088)",
274
+ "secret": False,
275
+ "env_var": "CLAWMEM_EMBED_URL",
276
+ },
277
+ {
278
+ "key": "llm_url",
279
+ "description": "GPU LLM server URL (e.g., http://localhost:8089)",
280
+ "secret": False,
281
+ "env_var": "CLAWMEM_LLM_URL",
282
+ },
283
+ ]
284
+
285
+ # -- Core lifecycle --------------------------------------------------------
286
+
287
+ def is_available(self) -> bool:
288
+ """Check if clawmem binary is on PATH. No network calls."""
289
+ return _find_clawmem_bin() is not None
290
+
291
+ def initialize(self, session_id: str, **kwargs) -> None:
292
+ self._bin = _find_clawmem_bin()
293
+ if not self._bin:
294
+ logger.warning("clawmem binary not found on PATH — provider disabled")
295
+ return
296
+
297
+ self._session_id = session_id
298
+ try:
299
+ self._port = int(os.environ.get("CLAWMEM_SERVE_PORT", _DEFAULT_PORT))
300
+ except (ValueError, TypeError):
301
+ self._port = _DEFAULT_PORT
302
+ self._serve_mode = os.environ.get("CLAWMEM_SERVE_MODE", "external")
303
+ self._hermes_home = kwargs.get("hermes_home", str(Path.home() / ".hermes"))
304
+
305
+ # Build env for hook shell-outs (GPU endpoints, profile)
306
+ for var in ("CLAWMEM_EMBED_URL", "CLAWMEM_LLM_URL", "CLAWMEM_RERANK_URL", "CLAWMEM_PROFILE"):
307
+ val = os.environ.get(var)
308
+ if val:
309
+ self._env_extra[var] = val
310
+
311
+ # Create transcript directory
312
+ transcript_dir = Path(self._hermes_home) / "clawmem-transcripts"
313
+ transcript_dir.mkdir(parents=True, exist_ok=True)
314
+ self._transcript_path = str(transcript_dir / f"{session_id}.jsonl")
315
+
316
+ # Start managed serve if configured
317
+ if self._serve_mode == "managed":
318
+ self._start_serve()
319
+
320
+ # Run session-bootstrap hook
321
+ hook_input = {
322
+ "session_id": session_id,
323
+ "transcript_path": self._transcript_path,
324
+ "hook_event_name": "SessionStart",
325
+ }
326
+ output = _run_hook(self._bin, "session-bootstrap", hook_input, env_extra=self._env_extra)
327
+ if output:
328
+ ctx = _extract_context(output)
329
+ if ctx:
330
+ self._bootstrap_context = ctx
331
+ logger.info("clawmem: session-bootstrap returned %d chars of context", len(ctx))
332
+
333
+ def system_prompt_block(self) -> str:
334
+ if not self._bin:
335
+ return ""
336
+ return (
337
+ "# ClawMem Memory System\n"
338
+ "Active. Use clawmem_retrieve to search memory, clawmem_get for "
339
+ "full documents, clawmem_session_log for session history, "
340
+ "clawmem_timeline for temporal context, clawmem_similar for discovery."
341
+ )
342
+
343
+ # -- Prefetch / recall -----------------------------------------------------
344
+
345
+ def prefetch(self, query: str, *, session_id: str = "") -> str:
346
+ """Return cached prefetch result + any unconsumed bootstrap context."""
347
+ # Wait for background thread if still running
348
+ if self._prefetch_thread and self._prefetch_thread.is_alive():
349
+ self._prefetch_thread.join(timeout=3.0)
350
+
351
+ parts = []
352
+
353
+ # Consume bootstrap context (one-shot, first turn only)
354
+ if self._bootstrap_context:
355
+ parts.append(self._bootstrap_context)
356
+ self._bootstrap_context = ""
357
+
358
+ # Consume prefetched context only if it's from a generation we haven't consumed yet
359
+ with self._prefetch_lock:
360
+ if (self._prefetch_result
361
+ and self._prefetch_result_gen > self._prefetch_consumed_gen):
362
+ parts.append(self._prefetch_result)
363
+ # Always advance consumed_gen to current queued generation — this
364
+ # prevents late-arriving results from leaking into the next turn
365
+ self._prefetch_consumed_gen = self._prefetch_generation
366
+ self._prefetch_result = ""
367
+
368
+ return "\n\n".join(parts) if parts else ""
369
+
370
+ def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
371
+ """Background: run context-surfacing hook for next turn."""
372
+ if not self._bin or not query or len(query) < 5:
373
+ return
374
+
375
+ # Increment generation so older threads can't overwrite newer results
376
+ with self._prefetch_lock:
377
+ self._prefetch_generation += 1
378
+ my_gen = self._prefetch_generation
379
+
380
+ def _run():
381
+ hook_input = {
382
+ "session_id": self._session_id,
383
+ "transcript_path": self._transcript_path,
384
+ "prompt": query,
385
+ "hook_event_name": "UserPromptSubmit",
386
+ }
387
+ output = _run_hook(self._bin, "context-surfacing", hook_input,
388
+ env_extra=self._env_extra)
389
+ if output:
390
+ ctx = _extract_context(output)
391
+ if ctx:
392
+ with self._prefetch_lock:
393
+ # Only write if we're still the latest generation
394
+ if my_gen == self._prefetch_generation:
395
+ self._prefetch_result = ctx
396
+ self._prefetch_result_gen = my_gen
397
+
398
+ # Wait for any previous prefetch to finish
399
+ if self._prefetch_thread and self._prefetch_thread.is_alive():
400
+ self._prefetch_thread.join(timeout=5.0)
401
+
402
+ self._prefetch_thread = threading.Thread(
403
+ target=_run, daemon=True, name="clawmem-prefetch"
404
+ )
405
+ self._prefetch_thread.start()
406
+
407
+ # -- Sync / transcript management ------------------------------------------
408
+
409
+ def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
410
+ """Append turn to plugin-managed transcript JSONL.
411
+
412
+ Writes in Claude Code transcript format so ClawMem hooks can read it.
413
+ """
414
+ if not self._transcript_path:
415
+ return
416
+
417
+ try:
418
+ ts = time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime())
419
+ with open(self._transcript_path, "a") as f:
420
+ # User message
421
+ f.write(json.dumps({
422
+ "type": "message",
423
+ "message": {
424
+ "role": "user",
425
+ "content": user_content,
426
+ },
427
+ "timestamp": ts,
428
+ }) + "\n")
429
+ # Assistant message
430
+ f.write(json.dumps({
431
+ "type": "message",
432
+ "message": {
433
+ "role": "assistant",
434
+ "content": assistant_content,
435
+ },
436
+ "timestamp": ts,
437
+ }) + "\n")
438
+ except Exception as e:
439
+ logger.debug("clawmem: sync_turn write failed: %s", e)
440
+
441
+ # -- Session end / compression hooks ---------------------------------------
442
+
443
+ def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
444
+ """Run extraction hooks in parallel."""
445
+ if not self._bin or not self._transcript_path:
446
+ return
447
+
448
+ hook_input = {
449
+ "session_id": self._session_id,
450
+ "transcript_path": self._transcript_path,
451
+ "hook_event_name": "Stop",
452
+ }
453
+
454
+ threads = []
455
+ for hook_name in ("decision-extractor", "handoff-generator", "feedback-loop"):
456
+ t = threading.Thread(
457
+ target=_run_hook,
458
+ args=(self._bin, hook_name, hook_input),
459
+ kwargs={"env_extra": self._env_extra},
460
+ daemon=True,
461
+ name=f"clawmem-{hook_name}",
462
+ )
463
+ t.start()
464
+ threads.append(t)
465
+
466
+ # Wait for all extraction hooks (bounded)
467
+ for t in threads:
468
+ t.join(timeout=_HOOK_TIMEOUT + 5)
469
+
470
+ logger.info("clawmem: session %s extraction complete", self._session_id[:8])
471
+
472
+ def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
473
+ """Run precompact-extract (side effect only — Hermes ignores return)."""
474
+ if not self._bin or not self._transcript_path:
475
+ return ""
476
+
477
+ hook_input = {
478
+ "session_id": self._session_id,
479
+ "transcript_path": self._transcript_path,
480
+ "hook_event_name": "PreCompact",
481
+ }
482
+ _run_hook(self._bin, "precompact-extract", hook_input, env_extra=self._env_extra)
483
+ return ""
484
+
485
+ # -- Tools (REST API) ------------------------------------------------------
486
+
487
+ def get_tool_schemas(self) -> List[Dict[str, Any]]:
488
+ return [RETRIEVE_SCHEMA, GET_SCHEMA, SESSION_LOG_SCHEMA, TIMELINE_SCHEMA, SIMILAR_SCHEMA]
489
+
490
+ def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
491
+ try:
492
+ if tool_name == "clawmem_retrieve":
493
+ return self._tool_retrieve(args)
494
+ elif tool_name == "clawmem_get":
495
+ return self._tool_get(args)
496
+ elif tool_name == "clawmem_session_log":
497
+ return self._tool_session_log(args)
498
+ elif tool_name == "clawmem_timeline":
499
+ return self._tool_timeline(args)
500
+ elif tool_name == "clawmem_similar":
501
+ return self._tool_similar(args)
502
+ return json.dumps({"error": f"Unknown tool: {tool_name}"})
503
+ except Exception as e:
504
+ return json.dumps({"error": str(e)})
505
+
506
+ def _tool_retrieve(self, args: dict) -> str:
507
+ query = args.get("query", "")
508
+ if not query:
509
+ return json.dumps({"error": "query is required"})
510
+ body = {"query": query, "compact": True}
511
+ if args.get("limit"):
512
+ body["limit"] = args["limit"]
513
+ data = _rest_call(self._port, "POST", "/retrieve", body)
514
+ if data is None:
515
+ return json.dumps({"error": "ClawMem REST API unreachable"})
516
+ return json.dumps(data, ensure_ascii=False)
517
+
518
+ def _tool_get(self, args: dict) -> str:
519
+ docid = args.get("docid", "")
520
+ if not docid:
521
+ return json.dumps({"error": "docid is required"})
522
+ data = _rest_call(self._port, "GET", f"/documents/{docid}")
523
+ if data is None:
524
+ return json.dumps({"error": f"Document not found: {docid}"})
525
+ return json.dumps(data, ensure_ascii=False)
526
+
527
+ def _tool_session_log(self, args: dict) -> str:
528
+ limit = args.get("limit", 5)
529
+ data = _rest_call(self._port, "GET", f"/sessions?limit={limit}")
530
+ if data is None:
531
+ return json.dumps({"error": "ClawMem REST API unreachable"})
532
+ return json.dumps(data, ensure_ascii=False)
533
+
534
+ def _tool_timeline(self, args: dict) -> str:
535
+ docid = args.get("docid", "")
536
+ if not docid:
537
+ return json.dumps({"error": "docid is required"})
538
+ before = args.get("before", 5)
539
+ after = args.get("after", 5)
540
+ data = _rest_call(self._port, "GET", f"/timeline/{docid}?before={before}&after={after}")
541
+ if data is None:
542
+ return json.dumps({"error": "ClawMem REST API unreachable"})
543
+ return json.dumps(data, ensure_ascii=False)
544
+
545
+ def _tool_similar(self, args: dict) -> str:
546
+ docid = args.get("docid", "")
547
+ if not docid:
548
+ return json.dumps({"error": "docid is required"})
549
+ limit = args.get("limit", 5)
550
+ data = _rest_call(self._port, "GET", f"/graph/similar/{docid}?limit={limit}")
551
+ if data is None:
552
+ return json.dumps({"error": "ClawMem REST API unreachable"})
553
+ return json.dumps(data, ensure_ascii=False)
554
+
555
+ # -- Managed serve ---------------------------------------------------------
556
+
557
+ def _start_serve(self) -> None:
558
+ """Start clawmem serve as a managed child process with readiness probe."""
559
+ if not self._bin:
560
+ return
561
+ try:
562
+ env = {**os.environ, **self._env_extra}
563
+ self._serve_proc = subprocess.Popen(
564
+ [self._bin, "serve", "--port", str(self._port)],
565
+ stdout=subprocess.DEVNULL,
566
+ stderr=subprocess.DEVNULL,
567
+ env=env,
568
+ )
569
+ # Readiness probe — wait up to 5s for /health to respond
570
+ for attempt in range(10):
571
+ # Check if process exited immediately (port conflict, crash)
572
+ if self._serve_proc.poll() is not None:
573
+ logger.warning("clawmem: managed serve exited immediately (code=%d)",
574
+ self._serve_proc.returncode)
575
+ self._serve_proc = None
576
+ return
577
+ time.sleep(0.5)
578
+ health = _rest_call(self._port, "GET", "/health", timeout=1.0)
579
+ if health:
580
+ logger.info("clawmem: managed serve ready (pid=%d, port=%d)",
581
+ self._serve_proc.pid, self._port)
582
+ return
583
+ logger.warning("clawmem: managed serve started but health check timed out (pid=%d)",
584
+ self._serve_proc.pid)
585
+ except Exception as e:
586
+ logger.warning("clawmem: failed to start managed serve: %s", e)
587
+
588
+ # -- Shutdown --------------------------------------------------------------
589
+
590
+ def shutdown(self) -> None:
591
+ # Wait for background threads
592
+ if self._prefetch_thread and self._prefetch_thread.is_alive():
593
+ self._prefetch_thread.join(timeout=5.0)
594
+
595
+ # Stop managed serve
596
+ if self._serve_proc and self._serve_proc.poll() is None:
597
+ self._serve_proc.terminate()
598
+ try:
599
+ self._serve_proc.wait(timeout=5)
600
+ except subprocess.TimeoutExpired:
601
+ self._serve_proc.kill()
602
+ logger.info("clawmem: managed serve stopped")
603
+
604
+
605
+ # ---------------------------------------------------------------------------
606
+ # Plugin entry point
607
+ # ---------------------------------------------------------------------------
608
+
609
+ def register(ctx) -> None:
610
+ """Register ClawMem as a memory provider plugin."""
611
+ ctx.register_memory_provider(ClawMemProvider())
@@ -0,0 +1,10 @@
1
+ name: clawmem
2
+ version: 1.0.0
3
+ description: "ClawMem — on-device hybrid memory with composite scoring, graph traversal, and lifecycle management. Requires clawmem binary and optionally clawmem serve."
4
+ external_dependencies:
5
+ - name: clawmem
6
+ install: "See https://github.com/yoloshii/ClawMem#install"
7
+ check: "clawmem --version"
8
+ hooks:
9
+ - on_session_end
10
+ - on_pre_compress
package/src/indexer.ts CHANGED
@@ -87,17 +87,21 @@ export function extractTitle(content: string, filename: string): string {
87
87
  // =============================================================================
88
88
 
89
89
  export function parseDocument(content: string, relativePath: string): { body: string; meta: DocumentMeta } {
90
+ // gray-matter coerces YAML values: `title: 2023-09-27` → Date, `title: true` → boolean.
91
+ // All frontmatter fields must be runtime-checked to prevent SQLite binding errors.
92
+ const str = (v: unknown): string | undefined =>
93
+ typeof v === "string" ? v || undefined : undefined;
90
94
  try {
91
95
  const { data, content: body } = matter(content);
92
96
  return {
93
97
  body,
94
98
  meta: {
95
- title: data.title as string | undefined,
99
+ title: str(data.title),
96
100
  tags: Array.isArray(data.tags) ? data.tags.map(String) : undefined,
97
- domain: data.domain as string | undefined,
98
- workstream: data.workstream as string | undefined,
99
- content_type: (data.content_type as ContentType) || inferContentType(relativePath),
100
- review_by: data.review_by as string | undefined,
101
+ domain: str(data.domain),
102
+ workstream: str(data.workstream),
103
+ content_type: (str(data.content_type) as ContentType) || inferContentType(relativePath),
104
+ review_by: str(data.review_by),
101
105
  },
102
106
  };
103
107
  } catch {
@@ -233,7 +237,7 @@ export async function indexCollection(
233
237
 
234
238
  // Content changed — update
235
239
  const { body, meta } = parseDocument(content, relativePath);
236
- const title = meta.title || extractTitle(body, relativePath);
240
+ const title = (typeof meta.title === "string" && meta.title) ? meta.title : extractTitle(body, relativePath);
237
241
  const docHash = hashContent(body);
238
242
 
239
243
  store.insertContent(docHash, body, now);
@@ -265,7 +269,7 @@ export async function indexCollection(
265
269
  ).get(collectionName, relativePath) as { id: number; hash: string } | null;
266
270
 
267
271
  const { body, meta } = parseDocument(content, relativePath);
268
- const title = meta.title || extractTitle(body, relativePath);
272
+ const title = (typeof meta.title === "string" && meta.title) ? meta.title : extractTitle(body, relativePath);
269
273
  const docHash = hashContent(body);
270
274
  const contentType = meta.content_type || inferContentType(relativePath);
271
275
 
package/src/store.ts CHANGED
@@ -1583,10 +1583,12 @@ export function insertDocument(
1583
1583
  createdAt: string,
1584
1584
  modifiedAt: string
1585
1585
  ): void {
1586
+ // Guard: gray-matter can coerce YAML values to Date/boolean/null — SQLite rejects these
1587
+ const safeTitle = (typeof title === "string") ? title : String(title ?? "Untitled");
1586
1588
  db.prepare(`
1587
1589
  INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
1588
1590
  VALUES (?, ?, ?, ?, ?, ?, 1)
1589
- `).run(collectionName, path, title, hash, createdAt, modifiedAt);
1591
+ `).run(collectionName, path, safeTitle, hash, createdAt, modifiedAt);
1590
1592
  }
1591
1593
 
1592
1594
  // =============================================================================
@@ -1915,8 +1917,9 @@ export function reactivateDocument(
1915
1917
  hash: string,
1916
1918
  modifiedAt: string
1917
1919
  ): void {
1920
+ const safeTitle = (typeof title === "string") ? title : String(title ?? "Untitled");
1918
1921
  db.prepare(`UPDATE documents SET active = 1, title = ?, hash = ?, modified_at = ? WHERE id = ?`)
1919
- .run(title, hash, modifiedAt, documentId);
1922
+ .run(safeTitle, hash, modifiedAt, documentId);
1920
1923
  }
1921
1924
 
1922
1925
  /**
@@ -1943,8 +1946,9 @@ export function updateDocument(
1943
1946
  hash: string,
1944
1947
  modifiedAt: string
1945
1948
  ): void {
1949
+ const safeTitle = (typeof title === "string") ? title : String(title ?? "Untitled");
1946
1950
  db.prepare(`UPDATE documents SET title = ?, hash = ?, modified_at = ? WHERE id = ?`)
1947
- .run(title, hash, modifiedAt, documentId);
1951
+ .run(safeTitle, hash, modifiedAt, documentId);
1948
1952
  }
1949
1953
 
1950
1954
  /**