clawmem 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -307,9 +307,15 @@ All other retrieval is handled by Tier 2 hooks. Do NOT call MCP tools speculativ
307
307
  4. Chain tracing → find_causal_links(docid, direction="both", depth=5)
308
308
  Traverses causal edges between _clawmem/agent/observations/ docs (from decision-extractor).
309
309
 
310
- 5. Memory debuggingmemory_evolution_status(docid)
310
+ 5. Entity factskg_query(entity, as_of?, direction?)
311
+ Structured SPO triples with temporal validity. Different from intent_search:
312
+ - kg_query: "what does ClawMem relate to?" → returns structured facts (subject-predicate-object)
313
+ - intent_search: "why did we choose ClawMem?" → returns documents with causal reasoning
314
+ Use kg_query for entity lookup, intent_search for causal chains.
311
315
 
312
- 6. Temporal contexttimeline(docid, before=5, after=5, same_collection=false)
316
+ 6. Memory debuggingmemory_evolution_status(docid)
317
+
318
+ 7. Temporal context → timeline(docid, before=5, after=5, same_collection=false)
313
319
  Shows what was created/modified before and after a document.
314
320
  Use after search to understand chronological neighborhood.
315
321
  ```
@@ -327,6 +333,9 @@ All other retrieval is handled by Tier 2 hooks. Do NOT call MCP tools speculativ
327
333
  - `timeline(docid, before=5, after=5, same_collection=false)` — temporal neighborhood around a document. Progressive disclosure: search → timeline → get. Supports same-collection scoping and session correlation.
328
334
  - `list_vaults()` — show configured vault names and paths. Empty in single-vault mode (default).
329
335
  - `vault_sync(vault, content_root, pattern?, collection_name?)` — index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories (`/etc/`, `/root/`, `.ssh`, `.env`, `credentials`, etc.).
336
+ - `kg_query(entity, as_of?, direction?)` — query the SPO knowledge graph for an entity's relationships. Returns temporal triples with validity windows. USE THIS for "what does X relate to?", "what was true about X in January?". Uses entity resolution for lookup.
337
+ - `diary_write(entry, topic?, agent?)` — write a diary entry. USE PROACTIVELY in non-hooked environments (Hermes, Gemini, plain MCP) for recording important events and decisions. Do NOT use in Claude Code (hooks handle this automatically).
338
+ - `diary_read(last_n?, agent?)` — read recent diary entries.
330
339
 
331
340
  ### Multi-Vault
332
341
 
@@ -354,6 +363,9 @@ Pin, snooze, and forget are **manual MCP tools** — not automated. The agent sh
354
363
  - Do NOT pin everything — pin is for persistent high-priority items, not temporary boosting.
355
364
  - Do NOT forget memories to "clean up" — let confidence decay and contradiction detection handle it naturally.
356
365
  - Do NOT run `build_graphs` after every reindex — A-MEM creates per-doc links automatically. Only after bulk ingestion or when `intent_search` returns weak graph results.
366
+ - Do NOT run `clawmem mine` autonomously — it is a bulk ingestion command (same category as `update`/`reindex`). Suggest it to the user when they mention old conversation exports, but let them run it. Bulk import has disk/embedding cost implications that need user consent.
367
+ - Do NOT use `diary_write` in Claude Code — hooks (`decision-extractor`, `handoff-generator`) capture this automatically. Diary is for non-hooked environments only (Hermes, Gemini, plain MCP clients).
368
+ - Do NOT use `kg_query` for causal "why" questions — use `intent_search` or `memory_retrieve`. `kg_query` returns structured entity facts (SPO triples), not reasoning chains.
357
369
 
358
370
  ## Tool Selection (one-liner)
359
371
 
@@ -435,16 +447,16 @@ compositeScore = (0.10 × searchScore + 0.70 × recencyScore + 0.20 × confidenc
435
447
 
436
448
  | Content Type | Half-Life | Effect |
437
449
  |--------------|-----------|--------|
438
- | decision, hub | ∞ | Never decay |
450
+ | decision, preference, hub | ∞ | Never decay |
439
451
  | antipattern | ∞ | Never decay — accumulated negative patterns persist |
440
452
  | project | 120 days | Slow decay |
441
453
  | research | 90 days | Moderate decay |
442
- | note | 60 days | Default |
443
- | progress | 45 days | Faster decay |
454
+ | problem, milestone, note | 60 days | Default |
455
+ | conversation, progress | 45 days | Faster decay |
444
456
  | handoff | 30 days | Fast — recent matters most |
445
457
 
446
458
  Half-lives extend up to 3× for frequently-accessed memories (access reinforcement decays over 90 days).
447
- Attention decay: non-durable types (handoff, progress, note, project) lose 5% confidence per week without access. Decision/hub/research/antipattern are exempt.
459
+ Attention decay: non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access. Decision/preference/hub/research/antipattern are exempt.
448
460
 
449
461
  ## Indexing & Graph Building
450
462
 
@@ -563,6 +575,12 @@ Symptom: "Local model download blocked" error
563
575
  → llama-server endpoint unreachable while CLAWMEM_NO_LOCAL_MODELS=true.
564
576
  → Fix: Start the llama-server instance. Or set CLAWMEM_NO_LOCAL_MODELS=false for in-process fallback.
565
577
 
578
+ Symptom: "[generate] Remote LLM in cooldown, falling back to in-process generation"
579
+ → Remote LLM server had a transport failure (ECONNREFUSED/ETIMEDOUT). ClawMem set a 60s cooldown
580
+ and is using local node-llama-cpp. Remote will be retried after cooldown expires.
581
+ → Not an error if you expect local fallback. If you want remote only: ensure llama-server is running,
582
+ or set CLAWMEM_NO_LOCAL_MODELS=true to get null instead of slow local inference.
583
+
566
584
  Symptom: Query expansion always fails / returns garbage
567
585
  → On CPU-only systems, in-process inference is significantly slower and less reliable. Systems with GPU acceleration (Metal/Vulkan) handle these models well in-process.
568
586
  → Fix: Run llama-server on a GPU. Even a low-end NVIDIA card handles 1.7B models.
package/CLAUDE.md CHANGED
@@ -307,9 +307,15 @@ All other retrieval is handled by Tier 2 hooks. Do NOT call MCP tools speculativ
307
307
  4. Chain tracing → find_causal_links(docid, direction="both", depth=5)
308
308
  Traverses causal edges between _clawmem/agent/observations/ docs (from decision-extractor).
309
309
 
310
- 5. Memory debuggingmemory_evolution_status(docid)
310
+ 5. Entity factskg_query(entity, as_of?, direction?)
311
+ Structured SPO triples with temporal validity. Different from intent_search:
312
+ - kg_query: "what does ClawMem relate to?" → returns structured facts (subject-predicate-object)
313
+ - intent_search: "why did we choose ClawMem?" → returns documents with causal reasoning
314
+ Use kg_query for entity lookup, intent_search for causal chains.
311
315
 
312
- 6. Temporal contexttimeline(docid, before=5, after=5, same_collection=false)
316
+ 6. Memory debuggingmemory_evolution_status(docid)
317
+
318
+ 7. Temporal context → timeline(docid, before=5, after=5, same_collection=false)
313
319
  Shows what was created/modified before and after a document.
314
320
  Use after search to understand chronological neighborhood.
315
321
  ```
@@ -327,6 +333,9 @@ All other retrieval is handled by Tier 2 hooks. Do NOT call MCP tools speculativ
327
333
  - `timeline(docid, before=5, after=5, same_collection=false)` — temporal neighborhood around a document. Progressive disclosure: search → timeline → get. Supports same-collection scoping and session correlation.
328
334
  - `list_vaults()` — show configured vault names and paths. Empty in single-vault mode (default).
329
335
  - `vault_sync(vault, content_root, pattern?, collection_name?)` — index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories (`/etc/`, `/root/`, `.ssh`, `.env`, `credentials`, etc.).
336
+ - `kg_query(entity, as_of?, direction?)` — query the SPO knowledge graph for an entity's relationships. Returns temporal triples with validity windows. USE THIS for "what does X relate to?", "what was true about X in January?". Uses entity resolution for lookup.
337
+ - `diary_write(entry, topic?, agent?)` — write a diary entry. USE PROACTIVELY in non-hooked environments (Hermes, Gemini, plain MCP) for recording important events and decisions. Do NOT use in Claude Code (hooks handle this automatically).
338
+ - `diary_read(last_n?, agent?)` — read recent diary entries.
330
339
 
331
340
  ### Multi-Vault
332
341
 
@@ -354,6 +363,9 @@ Pin, snooze, and forget are **manual MCP tools** — not automated. The agent sh
354
363
  - Do NOT pin everything — pin is for persistent high-priority items, not temporary boosting.
355
364
  - Do NOT forget memories to "clean up" — let confidence decay and contradiction detection handle it naturally.
356
365
  - Do NOT run `build_graphs` after every reindex — A-MEM creates per-doc links automatically. Only after bulk ingestion or when `intent_search` returns weak graph results.
366
+ - Do NOT run `clawmem mine` autonomously — it is a bulk ingestion command (same category as `update`/`reindex`). Suggest it to the user when they mention old conversation exports, but let them run it. Bulk import has disk/embedding cost implications that need user consent.
367
+ - Do NOT use `diary_write` in Claude Code — hooks (`decision-extractor`, `handoff-generator`) capture this automatically. Diary is for non-hooked environments only (Hermes, Gemini, plain MCP clients).
368
+ - Do NOT use `kg_query` for causal "why" questions — use `intent_search` or `memory_retrieve`. `kg_query` returns structured entity facts (SPO triples), not reasoning chains.
357
369
 
358
370
  ## Tool Selection (one-liner)
359
371
 
@@ -435,16 +447,16 @@ compositeScore = (0.10 × searchScore + 0.70 × recencyScore + 0.20 × confidenc
435
447
 
436
448
  | Content Type | Half-Life | Effect |
437
449
  |--------------|-----------|--------|
438
- | decision, hub | ∞ | Never decay |
450
+ | decision, preference, hub | ∞ | Never decay |
439
451
  | antipattern | ∞ | Never decay — accumulated negative patterns persist |
440
452
  | project | 120 days | Slow decay |
441
453
  | research | 90 days | Moderate decay |
442
- | note | 60 days | Default |
443
- | progress | 45 days | Faster decay |
454
+ | problem, milestone, note | 60 days | Default |
455
+ | conversation, progress | 45 days | Faster decay |
444
456
  | handoff | 30 days | Fast — recent matters most |
445
457
 
446
458
  Half-lives extend up to 3× for frequently-accessed memories (access reinforcement decays over 90 days).
447
- Attention decay: non-durable types (handoff, progress, note, project) lose 5% confidence per week without access. Decision/hub/research/antipattern are exempt.
459
+ Attention decay: non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access. Decision/preference/hub/research/antipattern are exempt.
448
460
 
449
461
  ## Indexing & Graph Building
450
462
 
@@ -563,6 +575,12 @@ Symptom: "Local model download blocked" error
563
575
  → llama-server endpoint unreachable while CLAWMEM_NO_LOCAL_MODELS=true.
564
576
  → Fix: Start the llama-server instance. Or set CLAWMEM_NO_LOCAL_MODELS=false for in-process fallback.
565
577
 
578
+ Symptom: "[generate] Remote LLM in cooldown, falling back to in-process generation"
579
+ → Remote LLM server had a transport failure (ECONNREFUSED/ETIMEDOUT). ClawMem set a 60s cooldown
580
+ and is using local node-llama-cpp. Remote will be retried after cooldown expires.
581
+ → Not an error if you expect local fallback. If you want remote only: ensure llama-server is running,
582
+ or set CLAWMEM_NO_LOCAL_MODELS=true to get null instead of slow local inference.
583
+
566
584
  Symptom: Query expansion always fails / returns garbage
567
585
  → On CPU-only systems, in-process inference is significantly slower and less reliable. Systems with GPU acceleration (Metal/Vulkan) handle these models well in-process.
568
586
  → Fix: Run llama-server on a GPU. Even a low-end NVIDIA card handles 1.7B models.
package/README.md CHANGED
@@ -18,7 +18,8 @@ ClawMem turns your markdown notes, project docs, and research dumps into persist
18
18
 
19
19
  - **Surfaces relevant context** on every prompt (context-surfacing hook)
20
20
  - **Bootstraps sessions** with your profile, latest handoff, recent decisions, and stale notes
21
- - **Captures decisions** from session transcripts using a local GGUF observer model
21
+ - **Captures decisions, preferences, milestones, and problems** from session transcripts using a local GGUF observer model
22
+ - **Imports conversation exports** from Claude Code, ChatGPT, Claude.ai, Slack, and plain text via `clawmem mine`
22
23
  - **Generates handoffs** at session end so the next session can pick up where you left off
23
24
  - **Learns what matters** via a feedback loop that boosts referenced notes and decays unused ones
24
25
  - **Guards against prompt injection** in surfaced content
@@ -175,7 +176,7 @@ ClawMem integrates via hooks (`settings.json`) and an MCP stdio server. Hooks ha
175
176
 
176
177
  ```bash
177
178
  clawmem setup hooks # Install lifecycle hooks (SessionStart, UserPromptSubmit, Stop, PreCompact)
178
- clawmem setup mcp # Register MCP server in ~/.claude.json (28 tools)
179
+ clawmem setup mcp # Register MCP server in ~/.claude.json (31 tools)
179
180
  ```
180
181
 
181
182
  **Automatic (90%):** `context-surfacing` injects relevant memory on every prompt. `postcompact-inject` re-injects state after compaction. `decision-extractor`, `handoff-generator`, `feedback-loop` capture session state on stop.
@@ -202,7 +203,7 @@ Disable OpenClaw's native memory and `memory-lancedb` auto-recall/capture to avo
202
203
  openclaw config set agents.defaults.memorySearch.extraPaths "[]"
203
204
  ```
204
205
 
205
- **Alternative:** OpenClaw agents can also use ClawMem's MCP server directly (`clawmem setup mcp`), with or without hooks. This gives full access to all 28 MCP tools but bypasses OpenClaw's ContextEngine lifecycle, so you lose token budget awareness, native compaction orchestration, and the `afterTurn()` message pipeline. The ContextEngine plugin is recommended for new OpenClaw setups; MCP is available as an additional or standalone integration.
206
+ **Alternative:** OpenClaw agents can also use ClawMem's MCP server directly (`clawmem setup mcp`), with or without hooks. This gives full access to all 31 MCP tools but bypasses OpenClaw's ContextEngine lifecycle, so you lose token budget awareness, native compaction orchestration, and the `afterTurn()` message pipeline. The ContextEngine plugin is recommended for new OpenClaw setups; MCP is available as an additional or standalone integration.
206
207
 
207
208
  #### Hermes Agent
208
209
 
@@ -309,9 +310,9 @@ ClawMem uses three `llama-server` (llama.cpp) instances for neural inference. Al
309
310
  | LLM | 8089 | [qmd-query-expansion-1.7B-q4_k_m](https://huggingface.co/tobil/qmd-query-expansion-1.7B-gguf) | ~2.2GB | Intent classification, query expansion, A-MEM |
310
311
  | Reranker | 8090 | [qwen3-reranker-0.6B-Q8_0](https://huggingface.co/ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF) | ~1.3GB | Cross-encoder reranking (query, intent_search) |
311
312
 
312
- The `bin/clawmem` wrapper defaults to `localhost:8088/8089/8090`. If a server is unreachable, ClawMem silently falls back to in-process inference via `node-llama-cpp` (auto-downloads the QMD native models on first use, uses Metal/Vulkan/CPU depending on hardware). With GPU acceleration this is fast; on CPU-only it is significantly slower. ClawMem always works either way, but **if you're running dedicated GPU servers, use [systemd services](docs/guides/systemd-services.md) to ensure they stay up** — otherwise a crashed server silently degrades without warning.
313
+ The `bin/clawmem` wrapper defaults to `localhost:8088/8089/8090`. If a server is unreachable (transport error like ECONNREFUSED/ETIMEDOUT), ClawMem sets a 60-second cooldown and falls back to in-process inference via `node-llama-cpp` (auto-downloads the QMD native models on first use, uses Metal/Vulkan/CPU depending on hardware). HTTP errors (400/500) and user-cancelled requests do not trigger cooldown — the remote server is retried normally on the next call. With GPU acceleration the fallback is fast; on CPU-only it is significantly slower. ClawMem always works either way, but **if you're running dedicated GPU servers, use [systemd services](docs/guides/systemd-services.md) to ensure they stay up**.
313
314
 
314
- To prevent silent fallback and fail fast instead, set `CLAWMEM_NO_LOCAL_MODELS=true`.
315
+ To prevent fallback and fail fast instead, set `CLAWMEM_NO_LOCAL_MODELS=true`.
315
316
 
316
317
  #### Remote GPU (optional)
317
318
 
@@ -472,7 +473,7 @@ llama-server -m Qwen3-Reranker-0.6B-Q8_0.gguf \
472
473
 
473
474
  ### MCP Server
474
475
 
475
- ClawMem exposes 28 MCP tools via the [Model Context Protocol](https://modelcontextprotocol.io) and an optional HTTP REST API. Any MCP-compatible client or HTTP client can use it.
476
+ ClawMem exposes 31 MCP tools via the [Model Context Protocol](https://modelcontextprotocol.io) and an optional HTTP REST API. Any MCP-compatible client or HTTP client can use it.
476
477
 
477
478
  **Claude Code (automatic):**
478
479
 
@@ -643,6 +644,7 @@ clawmem collection list List collections
643
644
  clawmem collection remove <name> Remove a collection
644
645
 
645
646
  clawmem update [--pull] [--embed] Incremental re-scan
647
+ clawmem mine <dir> [-c name] [--embed] Import conversation exports (Claude, ChatGPT, Slack)
646
648
  clawmem embed [-f] Generate fragment embeddings
647
649
  clawmem reindex [--force] Full re-index
648
650
  clawmem watch File watcher daemon
@@ -676,7 +678,7 @@ clawmem doctor Full health check
676
678
  clawmem status Quick index status
677
679
  ```
678
680
 
679
- ## MCP Tools (28)
681
+ ## MCP Tools (31)
680
682
 
681
683
  Registered by `clawmem setup mcp`. Available to any MCP-compatible client.
682
684
 
@@ -713,6 +715,7 @@ Registered by `clawmem setup mcp`. Available to any MCP-compatible client.
713
715
  |---|---|
714
716
  | `build_graphs` | Build temporal and/or semantic graphs from document corpus |
715
717
  | `find_causal_links` | Trace decision chains: "what led to X", "how we got from A to B". Follow up `intent_search` with this tool on a top result to walk the full causal chain. Traverses causes / caused_by / both up to N hops with depth-annotated reasoning. |
718
+ | `kg_query` | Query the SPO knowledge graph: "what does X relate to?", "what was true about X when?". Returns temporal entity-relationship triples with validity windows. Uses entity resolution for lookup. |
716
719
  | `memory_evolution_status` | Show how a document's A-MEM metadata evolved over time |
717
720
  | `timeline` | Show the temporal neighborhood around a document — what was created/modified before and after it. Progressive disclosure: search → timeline (context) → get (full content). Supports same-collection scoping and session correlation. |
718
721
 
@@ -729,6 +732,13 @@ Registered by `clawmem setup mcp`. Available to any MCP-compatible client.
729
732
  | `list_vaults` | Show configured vault names and paths. Empty in single-vault mode. |
730
733
  | `vault_sync` | Index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories. |
731
734
 
735
+ ### Agent Diary
736
+
737
+ | Tool | Description |
738
+ |---|---|
739
+ | `diary_write` | Write a diary entry. Use for recording important events, decisions, or observations in environments without hook support. Stored as searchable memories. |
740
+ | `diary_read` | Read recent diary entries. Filter by agent name. |
741
+
732
742
  ### Memory Management & Lifecycle
733
743
 
734
744
  | Tool | Description |
@@ -759,7 +769,7 @@ Hooks installed by `clawmem setup hooks`:
759
769
  | `postcompact-inject` | SessionStart | Re-injects authoritative context after compaction: precompact state + recent decisions + antipatterns + vault context (1200 token budget) |
760
770
  | `curator-nudge` | SessionStart | Surfaces curator report actions, nudges when report is stale (>7 days) |
761
771
  | `precompact-extract` | PreCompact | Extracts decisions, file paths, open questions before auto-compaction → writes `precompact-state.md` to auto-memory |
762
- | `decision-extractor` | Stop | GGUF observer extracts structured decisions, infers causal links, detects contradictions with prior decisions |
772
+ | `decision-extractor` | Stop | GGUF observer extracts structured observations (decisions, preferences, milestones, problems, bugfixes, features, refactors, discoveries), infers causal links, detects contradictions with prior decisions |
763
773
  | `handoff-generator` | Stop | GGUF observer generates rich handoff, regex fallback |
764
774
  | `feedback-loop` | Stop | Silently boosts referenced notes, decays unused ones, records co-activation + usage relations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation) |
765
775
 
@@ -813,15 +823,19 @@ For WHY and ENTITY queries, the search pipeline expands results through the memo
813
823
  | Type | Half-life | Baseline | Notes |
814
824
  |---|---|---|---|
815
825
  | `decision` | ∞ | 0.85 | Never decays |
826
+ | `preference` | ∞ | 0.80 | Never decays — user preferences are durable facts |
816
827
  | `hub` | ∞ | 0.80 | Never decays |
828
+ | `antipattern` | ∞ | 0.75 | Never decays — accumulated negative patterns persist |
829
+ | `problem` | 60 days | 0.75 | High priority but resolves over time |
817
830
  | `research` | 90 days | 0.70 | |
831
+ | `milestone` | 60 days | 0.70 | Important at the time, fades as project moves forward |
818
832
  | `project` | 120 days | 0.65 | |
819
833
  | `handoff` | 30 days | 0.60 | Fast decay — most recent matters |
834
+ | `conversation` | 45 days | 0.55 | Imported chat exchanges |
820
835
  | `progress` | 45 days | 0.50 | |
821
836
  | `note` | 60 days | 0.50 | Default |
822
- | `antipattern` | ∞ | 0.75 | Never decays — accumulated negative patterns persist |
823
837
 
824
- Content types are inferred from frontmatter or file path patterns. Half-lives extend up to 3× for frequently-accessed memories (access reinforcement, decays over 90 days). Non-durable types (handoff, progress, note, project) lose 5% confidence per week without access (attention decay). Decision/hub/research/antipattern are exempt.
838
+ Content types are inferred from frontmatter or file path patterns. Half-lives extend up to 3× for frequently-accessed memories (access reinforcement, decays over 90 days). Non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access (attention decay). Decision/preference/hub/research/antipattern are exempt.
825
839
 
826
840
  **Quality scoring:** Each document gets a `quality_score` (0.0–1.0) computed during indexing based on length, structure (headings, lists), decision/correction keywords, and frontmatter richness. Applied as `qualityMultiplier = 0.7 + 0.6 × qualityScore` (range: 0.7× penalty to 1.3× boost).
827
841
 
@@ -868,7 +882,7 @@ Documents are split into semantic fragments (sections, lists, code blocks, front
868
882
 
869
883
  ### Local Observer Agent
870
884
 
871
- Uses the LLM server (shared with query expansion and intent classification) to extract structured observations from session transcripts: type, title, facts, narrative, concepts, files read/modified. Falls back to regex patterns if the model is unavailable.
885
+ Uses the LLM server (shared with query expansion and intent classification) to extract structured observations from session transcripts. Observation types: `decision`, `bugfix`, `feature`, `refactor`, `discovery`, `change`, `preference`, `milestone`, `problem`. Each observation includes title, facts, narrative, concepts, and files read/modified. Preferences, milestones, and problems get first-class content_type treatment with dedicated confidence baselines and half-lives instead of being flattened to generic "observation". Falls back to regex patterns if the model is unavailable.
872
886
 
873
887
  ### User Profile
874
888
 
@@ -943,7 +957,7 @@ title: "Document Title"
943
957
  tags: [tag1, tag2]
944
958
  domain: "infrastructure"
945
959
  workstream: "project-name"
946
- content_type: "decision" # decision|hub|research|project|handoff|progress|note
960
+ content_type: "decision" # decision|preference|hub|research|project|handoff|conversation|progress|note
947
961
  review_by: "2026-03-01"
948
962
  ---
949
963
  ```
@@ -1106,6 +1120,7 @@ Built on the shoulders of:
1106
1120
  - [Hermes Agent](https://github.com/NousResearch/hermes-agent) — MemoryProvider plugin integration, memory nudge system (periodic lifecycle tool prompting)
1107
1121
  - [Hindsight](https://github.com/vectorize-io/hindsight) — entity resolution, MPFP graph traversal, temporal extraction, 3-tier consolidation, observation invalidation, 4-way parallel retrieval
1108
1122
  - [MAGMA](https://arxiv.org/abs/2501.13956) — multi-graph memory agent
1123
+ - [MemPalace](https://github.com/milla-jovovich/mempalace) — conversation import patterns, broadened observation taxonomy (preference/milestone/problem), session-bootstrap synthesis
1109
1124
  - [memory-lancedb-pro](https://github.com/CortexReach/memory-lancedb-pro) — retrieval gate, length normalization, MMR diversity, access reinforcement algorithms
1110
1125
  - [OpenViking](https://github.com/volcengine/OpenViking) — query decomposition patterns, collection-scoped retrieval, transaction-safe indexing
1111
1126
  - [QMD](https://github.com/tobi/qmd) — search backend (BM25 + vectors + RRF + reranking)
package/SKILL.md CHANGED
@@ -242,9 +242,15 @@ Once escalated, route by query type:
242
242
  4. Chain tracing -> find_causal_links(docid, direction="both", depth=5)
243
243
  Traverses causal edges between _clawmem/agent/observations/ docs.
244
244
 
245
- 5. Memory debugging -> memory_evolution_status(docid)
245
+ 5. Entity facts -> kg_query(entity, as_of?, direction?)
246
+ Structured SPO triples with temporal validity. Different from intent_search:
247
+ - kg_query: "what does ClawMem relate to?" -> returns structured facts (subject-predicate-object)
248
+ - intent_search: "why did we choose ClawMem?" -> returns documents with causal reasoning
249
+ Use kg_query for entity lookup, intent_search for causal chains.
246
250
 
247
- 6. Temporal context -> timeline(docid, before=5, after=5, same_collection=false)
251
+ 6. Memory debugging -> memory_evolution_status(docid)
252
+
253
+ 7. Temporal context -> timeline(docid, before=5, after=5, same_collection=false)
248
254
  Shows what was created/modified before and after a document.
249
255
  Use after search to understand chronological neighborhood.
250
256
  ```
@@ -277,6 +283,9 @@ Once escalated, route by query type:
277
283
  | `timeline` | Temporal neighborhood around a document — what was modified before/after. Progressive disclosure: search → timeline → get. Supports same-collection scoping and session correlation. |
278
284
  | `list_vaults` | Show configured vault names and paths. Empty in single-vault mode. |
279
285
  | `vault_sync` | Index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories. |
286
+ | `kg_query` | Query SPO knowledge graph for entity relationships with temporal validity. Uses entity resolution. |
287
+ | `diary_write` | Write diary entry. Use proactively in non-hooked environments. Do NOT use in Claude Code. |
288
+ | `diary_read` | Read recent diary entries. Filter by agent name. |
280
289
  | `lifecycle_status` | Document lifecycle statistics: active, archived, forgotten, pinned, snoozed counts and policy summary. |
281
290
  | `lifecycle_sweep` | Run lifecycle policies: archive stale docs. Defaults to dry_run (preview only). |
282
291
  | `lifecycle_restore` | Restore auto-archived documents. Filter by query, collection, or all. Does NOT restore manually forgotten docs. |
@@ -442,12 +451,12 @@ compositeScore = (0.10 x searchScore + 0.70 x recencyScore + 0.20 x confidenceSc
442
451
 
443
452
  | Content Type | Half-Life | Effect |
444
453
  |--------------|-----------|--------|
445
- | decision, hub | infinity | Never decay |
454
+ | decision, preference, hub | infinity | Never decay |
446
455
  | antipattern | infinity | Never decay — accumulated negative patterns persist |
447
456
  | project | 120 days | Slow decay |
448
457
  | research | 90 days | Moderate decay |
449
- | note | 60 days | Default |
450
- | progress | 45 days | Faster decay |
458
+ | problem, milestone, note | 60 days | Default |
459
+ | conversation, progress | 45 days | Faster decay |
451
460
  | handoff | 30 days | Fast — recent matters most |
452
461
 
453
462
  Half-lives extend up to 3x for frequently-accessed memories (access reinforcement decays over 90 days).
@@ -566,6 +575,9 @@ When `decision-extractor` detects a new decision contradicting an old one, the o
566
575
  - Do NOT pin everything — pin is for persistent high-priority items.
567
576
  - Do NOT forget memories to "clean up" — let confidence decay and contradiction detection handle it.
568
577
  - Do NOT run `build_graphs` after every reindex — A-MEM creates per-doc links automatically.
578
+ - Do NOT run `clawmem mine` autonomously — it is a bulk ingestion command. Suggest it to the user when they mention old conversation exports, but let them run it.
579
+ - Do NOT use `diary_write` in Claude Code — hooks capture this automatically. Diary is for non-hooked environments only (Hermes, Gemini, plain MCP).
580
+ - Do NOT use `kg_query` for causal "why" questions — use `intent_search` or `memory_retrieve`. `kg_query` returns structured entity facts (SPO triples), not reasoning chains.
569
581
 
570
582
  ---
571
583
 
@@ -657,6 +669,12 @@ Symptom: reindex --force crashes with UNIQUE constraint
657
669
  -> Force deactivates rows but UNIQUE(collection, path) doesn't discriminate by active flag.
658
670
  -> Fixed: indexer.ts reactivates inactive rows instead of inserting.
659
671
 
672
+ Symptom: `clawmem update` crashes with "Binding expected string, TypedArray, boolean, number, bigint or null"
673
+ -> YAML frontmatter values like `title: 2023-09-27` or `title: true` are coerced by gray-matter
674
+ into Date objects or booleans. Bun's SQLite driver rejects these as bind parameters.
675
+ -> Fixed v0.4.2: `parseDocument()` runtime-checks all frontmatter fields via `str()` helper.
676
+ -> Affects: title, domain, workstream, content_type, review_by.
677
+
660
678
  Symptom: CLI reindex/update falls back to node-llama-cpp
661
679
  -> GPU env vars only in systemd drop-in, not in wrapper script.
662
680
  -> Fixed: bin/clawmem wrapper exports CLAWMEM_EMBED_URL/LLM_URL/RERANK_URL defaults.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmem",
3
- "version": "0.4.2",
3
+ "version": "0.5.1",
4
4
  "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/clawmem.ts CHANGED
@@ -235,6 +235,101 @@ async function cmdUpdate(args: string[]) {
235
235
  }
236
236
  }
237
237
 
238
+ async function cmdMine(args: string[]) {
239
+ const { values, positionals } = parseArgs({
240
+ args,
241
+ options: {
242
+ collection: { type: "string", short: "c" },
243
+ embed: { type: "boolean", default: false },
244
+ "dry-run": { type: "boolean", default: false },
245
+ },
246
+ allowPositionals: true,
247
+ });
248
+
249
+ const dir = positionals[0];
250
+ if (!dir) die("Usage: clawmem mine <directory> [-c collection-name] [--embed] [--dry-run]");
251
+ const absDir = pathResolve(dir);
252
+ if (!existsSync(absDir)) die(`Directory not found: ${absDir}`);
253
+
254
+ const { scanConversationDir, normalizeFile, chunkConversation } = await import("./normalize.ts");
255
+
256
+ console.log(`${c.cyan}Scanning for conversation files${c.reset} in ${absDir}`);
257
+ const files = scanConversationDir(absDir);
258
+ if (files.length === 0) die("No conversation files found (.json, .jsonl, .txt, .md)");
259
+ console.log(` Found ${files.length} candidate files`);
260
+
261
+ // Normalize and chunk
262
+ let totalChunks = 0;
263
+ let totalConversations = 0;
264
+ const allChunks: { title: string; body: string; sourcePath: string; chunkIndex: number }[] = [];
265
+
266
+ for (const file of files) {
267
+ const conv = normalizeFile(file);
268
+ if (!conv) continue;
269
+ totalConversations++;
270
+
271
+ const chunks = chunkConversation(conv);
272
+ if (chunks.length === 0) continue;
273
+
274
+ console.log(` ${c.green}✓${c.reset} ${conv.source} (${conv.format}, ${conv.messages.length} messages → ${chunks.length} chunks)`);
275
+ for (const chunk of chunks) {
276
+ chunk.sourcePath = file.replace(absDir + "/", "");
277
+ }
278
+ allChunks.push(...chunks);
279
+ totalChunks += chunks.length;
280
+ }
281
+
282
+ if (totalConversations === 0) die("No conversation files could be parsed");
283
+ console.log(`\n${c.bold}Parsed:${c.reset} ${totalConversations} conversations → ${totalChunks} exchange chunks`);
284
+
285
+ if (values["dry-run"]) {
286
+ console.log(`${c.yellow}Dry run — no changes made${c.reset}`);
287
+ return;
288
+ }
289
+
290
+ // Write chunks as markdown to a staging directory (outside source tree), then index
291
+ const collectionName = values.collection || "conversations";
292
+ const { tmpdir } = await import("os");
293
+ const stagingDir = pathResolve(tmpdir(), `clawmem-mine-${Date.now()}`);
294
+ mkdirSync(stagingDir, { recursive: true });
295
+
296
+ const { rmSync } = await import("fs");
297
+ try {
298
+ const writePromises: Promise<number>[] = [];
299
+ for (const chunk of allChunks) {
300
+ const safeSource = chunk.sourcePath.replace(/[\/\\]/g, "_").replace(/\.[^.]+$/, "");
301
+ const filename = `${safeSource}_${String(chunk.chunkIndex).padStart(4, "0")}.md`;
302
+ const esc = (s: string) => s.replace(/"/g, '\\"');
303
+ const frontmatter = [
304
+ "---",
305
+ `title: "${esc(chunk.title)}"`,
306
+ `content_type: conversation`,
307
+ `source: "${esc(chunk.sourcePath)}"`,
308
+ "---",
309
+ "",
310
+ chunk.body,
311
+ ].join("\n");
312
+ writePromises.push(Bun.write(pathResolve(stagingDir, filename), frontmatter));
313
+ }
314
+ await Promise.all(writePromises);
315
+
316
+ // Index through existing pipeline
317
+ const s = getStore();
318
+ console.log(`\n${c.cyan}Indexing ${totalChunks} conversation chunks${c.reset} as collection '${collectionName}'`);
319
+ const stats = await indexCollection(s, collectionName, stagingDir, "**/*.md");
320
+ console.log(` ${c.green}+${stats.added}${c.reset} added, ${c.yellow}~${stats.updated}${c.reset} updated, ${c.dim}=${stats.unchanged}${c.reset} unchanged`);
321
+
322
+ if (values.embed) {
323
+ console.log();
324
+ await cmdEmbed([]);
325
+ } else {
326
+ console.log(`\nRun ${c.cyan}clawmem embed${c.reset} to generate embeddings for the imported conversations`);
327
+ }
328
+ } finally {
329
+ rmSync(stagingDir, { recursive: true, force: true });
330
+ }
331
+ }
332
+
238
333
  async function cmdEmbed(args: string[]) {
239
334
  const { values } = parseArgs({
240
335
  args,
@@ -1695,6 +1790,9 @@ async function main() {
1695
1790
  case "update":
1696
1791
  await cmdUpdate(subArgs);
1697
1792
  break;
1793
+ case "mine":
1794
+ await cmdMine(subArgs);
1795
+ break;
1698
1796
  case "embed":
1699
1797
  await cmdEmbed(subArgs);
1700
1798
  break;
@@ -1770,6 +1868,9 @@ async function main() {
1770
1868
  case "curate":
1771
1869
  await cmdCurate(subArgs);
1772
1870
  break;
1871
+ case "diary":
1872
+ await cmdDiary(subArgs);
1873
+ break;
1773
1874
  case "help":
1774
1875
  case "--help":
1775
1876
  case "-h":
@@ -2109,6 +2210,99 @@ interface CuratorReport {
2109
2210
  actions: string[];
2110
2211
  }
2111
2212
 
2213
+ async function cmdDiary(args: string[]) {
2214
+ const subCmd = args[0];
2215
+ const subArgs = args.slice(1);
2216
+
2217
+ switch (subCmd) {
2218
+ case "write": {
2219
+ const { values, positionals } = parseArgs({
2220
+ args: subArgs,
2221
+ options: {
2222
+ topic: { type: "string", short: "t", default: "general" },
2223
+ agent: { type: "string", short: "a", default: "user" },
2224
+ },
2225
+ allowPositionals: true,
2226
+ });
2227
+
2228
+ const entry = positionals.join(" ");
2229
+ if (!entry) die("Usage: clawmem diary write <entry text> [-t topic] [-a agent-name]");
2230
+
2231
+ const s = getStore();
2232
+ const now = new Date();
2233
+ const dateStr = now.toISOString().slice(0, 10);
2234
+ const timeStr = now.toISOString().slice(11, 19).replace(/:/g, "");
2235
+ const ms = String(now.getMilliseconds()).padStart(3, "0");
2236
+ const diaryPath = `diary/${dateStr}-${timeStr}${ms}-${values.topic}.md`;
2237
+ const body = [
2238
+ "---",
2239
+ `title: "${entry.slice(0, 80).replace(/"/g, '\\"')}"`,
2240
+ `content_type: note`,
2241
+ `tags: [diary, ${values.topic}]`,
2242
+ `domain: "${values.agent}"`,
2243
+ "---",
2244
+ "",
2245
+ entry,
2246
+ ].join("\n");
2247
+
2248
+ const result = s.saveMemory({
2249
+ collection: "_clawmem",
2250
+ path: diaryPath,
2251
+ title: entry.slice(0, 80),
2252
+ body,
2253
+ contentType: "note",
2254
+ confidence: 0.7,
2255
+ semanticPayload: `${diaryPath}::${entry}`,
2256
+ });
2257
+
2258
+ console.log(`${c.green}✓${c.reset} Diary entry saved (${result.action}, doc #${result.docId})`);
2259
+ break;
2260
+ }
2261
+
2262
+ case "read": {
2263
+ const { values } = parseArgs({
2264
+ args: subArgs,
2265
+ options: {
2266
+ last: { type: "string", short: "n", default: "10" },
2267
+ agent: { type: "string", short: "a" },
2268
+ },
2269
+ allowPositionals: false,
2270
+ });
2271
+
2272
+ const limit = parseInt(values.last || "10", 10);
2273
+ const s = getStore();
2274
+
2275
+ const rows = s.db.prepare(`
2276
+ SELECT d.id, d.path, d.title, d.modified_at as modifiedAt, d.domain,
2277
+ c.doc as body
2278
+ FROM documents d
2279
+ JOIN content c ON c.hash = d.hash
2280
+ WHERE d.active = 1 AND d.collection = '_clawmem' AND d.path LIKE 'diary/%'
2281
+ ${values.agent ? "AND d.domain = ?" : ""}
2282
+ ORDER BY d.modified_at DESC
2283
+ LIMIT ?
2284
+ `).all(...(values.agent ? [values.agent, limit] : [limit])) as any[];
2285
+
2286
+ if (rows.length === 0) {
2287
+ console.log("No diary entries found.");
2288
+ break;
2289
+ }
2290
+
2291
+ console.log(`${c.bold}Diary${c.reset} (${rows.length} entries)\n`);
2292
+ for (const row of rows) {
2293
+ const agent = row.domain ? ` [${row.domain}]` : "";
2294
+ console.log(`${c.dim}${row.modifiedAt.slice(0, 16)}${c.reset}${agent} ${row.title}`);
2295
+ }
2296
+ break;
2297
+ }
2298
+
2299
+ default:
2300
+ console.log(`Usage:
2301
+ clawmem diary write <entry> [-t topic] [-a agent] Write diary entry
2302
+ clawmem diary read [-n limit] [-a agent] Read recent entries`);
2303
+ }
2304
+ }
2305
+
2112
2306
  async function cmdCurate(_args: string[]) {
2113
2307
  const s = getStore();
2114
2308
  const report: CuratorReport = {
@@ -2289,6 +2483,7 @@ ${c.bold}Setup:${c.reset}
2289
2483
 
2290
2484
  ${c.bold}Indexing:${c.reset}
2291
2485
  clawmem update [--pull] [--embed] Re-scan collections (--embed auto-embeds)
2486
+ clawmem mine <dir> [-c name] [--embed] Import conversation exports (Claude, ChatGPT, Slack)
2292
2487
  clawmem embed [-f] Generate fragment embeddings
2293
2488
  clawmem reindex [--force] [--enrich] Full re-index (--enrich: run entity extraction + links on all docs)
2294
2489
  clawmem watch File watcher daemon
@@ -2323,6 +2518,8 @@ ${c.bold}Intelligence:${c.reset}
2323
2518
  clawmem reflect [days] Cross-session pattern analysis
2324
2519
  clawmem consolidate [--dry-run] Merge duplicate low-confidence docs
2325
2520
  clawmem curate Automated maintenance (health, sweep, dedup, hygiene)
2521
+ clawmem diary write <entry> [-t topic] Write a diary entry (for non-hooked environments)
2522
+ clawmem diary read [-n N] [-a agent] Read recent diary entries
2326
2523
 
2327
2524
  ${c.bold}Integration:${c.reset}
2328
2525
  clawmem mcp Start stdio MCP server