deja-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deja/interfaces/cli.py ADDED
@@ -0,0 +1,1967 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import gzip
5
+ import json
6
+ import shutil
7
+ import stat
8
+ import sys
9
+ import threading
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Optional, List
13
+
14
+ import typer
15
+ from ulid import ULID
16
+ from watchdog.events import FileSystemEventHandler, FileSystemEvent
17
+ from watchdog.observers import Observer
18
+
19
+ from deja.config import load_config
20
+ from deja.core.extractor import extract_memories
21
+ from deja.core.store import MemoryStore
22
+ from deja.ingest.watchers.claude_code import ClaudeCodeWatcher
23
+ from deja.ingest.watchers.gemini_cli import GeminiCLIWatcher
24
+ from deja.ingest.watchers.codex_cli import CodexCLIWatcher
25
+ from deja.llm.embedding import EmbeddingAdapter
26
+ from deja.llm.factory import create_adapter, create_embedding_adapter
27
+ from deja.core.reflection import ReflectionEngine
28
+ from deja.core.scheduler import make_scheduler
29
+
30
+ app = typer.Typer(name="deja", help="Deja — persistent coding memory CLI")
31
+ VALID_MERGE_STRATEGIES = {"skip", "overwrite", "update-confidence"}
32
+
33
+ # ── helpers ──────────────────────────────────────────────────────────────────
34
+
35
+
36
+ def _get_config():
37
+ return load_config()
38
+
39
+
40
+ def _get_store(config=None) -> MemoryStore:
41
+ if config is None:
42
+ config = _get_config()
43
+ return MemoryStore(config)
44
+
45
+
46
+ async def _init_store(store: MemoryStore) -> None:
47
+ await store.init_db()
48
+
49
+
50
+ async def _embed_and_save(
51
+ memories: list[dict],
52
+ store: MemoryStore,
53
+ embedding_adapter, # Optional[EmbeddingAdapter]
54
+ ) -> int:
55
+ """Embed each memory (if adapter available) and save. Returns count saved."""
56
+ saved = 0
57
+ for memory in memories:
58
+ emb_bytes = None
59
+ if embedding_adapter is not None:
60
+ try:
61
+ emb = await embedding_adapter.embed(memory["content"])
62
+ emb_bytes = EmbeddingAdapter.to_bytes(emb)
63
+ except Exception as e:
64
+ print(f"[deja] Embedding failed: {e}", file=sys.stderr)
65
+ await store.save(memory, emb_bytes)
66
+ saved += 1
67
+ return saved
68
+
69
+
70
+ def _format_memory_text(mem: dict) -> str:
71
+ scope = mem["scope"]
72
+ scope_label = "global" if scope == "global" else mem.get("project", scope)
73
+ domain = mem.get("domain")
74
+ domain_tag = f" [domain:{domain}]" if domain else ""
75
+ return (
76
+ f"[{mem['type']}]{domain_tag} [{scope_label}] {mem['content']} "
77
+ f"(confidence: {mem['confidence']:.1f})"
78
+ )
79
+
80
+
81
+ def _format_load_result(result: dict) -> str:
82
+ """Render a load_budgeted() result as compact, agent-readable text."""
83
+ memories = result["memories"]
84
+ total = result["total"]
85
+ overflow = result["overflow"]
86
+ project = result["project"]
87
+ overflow_hints = result.get("overflow_hints", [])
88
+
89
+ if not memories and total == 0:
90
+ return "No memories found."
91
+
92
+ header = f"=== deja: {len(memories)}/{total} memories"
93
+ if project != "global":
94
+ header += f" (project: {project})"
95
+ header += " ==="
96
+
97
+ lines = [header]
98
+
99
+ by_type: dict[str, list[dict]] = {}
100
+ for mem in memories:
101
+ t = mem.get("type", "pattern")
102
+ by_type.setdefault(t, []).append(mem)
103
+
104
+ type_order = ["preference", "gotcha", "decision", "pattern", "procedure", "progress"]
105
+ for mem_type in type_order:
106
+ mems = by_type.get(mem_type, [])
107
+ if not mems:
108
+ continue
109
+ lines.append("")
110
+ for mem in mems:
111
+ label = f"[{mem_type}]"
112
+ if mem.get("domain"):
113
+ label += f"[{mem['domain']}]"
114
+ if mem_type == "procedure" and mem.get("reuse_count", 0):
115
+ label += f"(reuse:{mem['reuse_count']})"
116
+ if mem.get("scope") != "global":
117
+ label += f"({mem.get('project', '')})"
118
+ lines.append(f"{label} {mem['content']}")
119
+
120
+ if overflow > 0:
121
+ lines.append("")
122
+ search_cmd = 'deja search "<topic>"'
123
+ if project != "global":
124
+ search_cmd += f" --project {project}"
125
+ hints_str = ", ".join(f"{h['type']} +{h['overflow']}" for h in overflow_hints)
126
+ lines.append(f"--- {overflow} more memories available. Run: {search_cmd} ---")
127
+ if hints_str:
128
+ lines.append(f"Overflow: {hints_str}")
129
+
130
+ return "\n".join(lines)
131
+
132
+
133
+ def _now_iso() -> str:
134
+ return datetime.now(timezone.utc).isoformat()
135
+
136
+
137
+ def _prepare_import_memory(raw: object, project: Optional[str]) -> tuple[Optional[dict], Optional[str]]:
138
+ """Validate and normalize one imported memory record."""
139
+ if not isinstance(raw, dict):
140
+ return None, "record is not a JSON object"
141
+
142
+ required_fields = ("id", "type", "content")
143
+ for field in required_fields:
144
+ value = raw.get(field)
145
+ if not isinstance(value, str) or not value.strip():
146
+ return None, f"missing required field: {field}"
147
+
148
+ scope = f"project:{project}" if project else raw.get("scope")
149
+ if not isinstance(scope, str) or not scope.strip():
150
+ return None, "missing required field: scope"
151
+
152
+ project_name = project if project else raw.get("project")
153
+ created_at = raw.get("created_at") or _now_iso()
154
+ updated_at = raw.get("updated_at") or created_at
155
+ last_confirmed = raw.get("last_confirmed") or updated_at
156
+
157
+ confidence = raw.get("confidence", 1.0)
158
+ try:
159
+ confidence = float(confidence)
160
+ except (TypeError, ValueError):
161
+ return None, "invalid confidence value"
162
+ confidence = max(0.0, min(1.0, confidence))
163
+
164
+ reuse_count = raw.get("reuse_count", 0)
165
+ try:
166
+ reuse_count = int(reuse_count)
167
+ except (TypeError, ValueError):
168
+ reuse_count = 0
169
+
170
+ normalized = {
171
+ "id": raw["id"],
172
+ "type": raw["type"],
173
+ "category": raw.get("category", "agent"),
174
+ "content": raw["content"],
175
+ "scope": scope,
176
+ "project": project_name,
177
+ "source": raw.get("source"),
178
+ "confidence": confidence,
179
+ "reuse_count": reuse_count,
180
+ "domain": raw.get("domain"),
181
+ "entity_graph": raw.get("entity_graph"),
182
+ "created_at": created_at,
183
+ "updated_at": updated_at,
184
+ "last_confirmed": last_confirmed,
185
+ "archived_at": raw.get("archived_at"),
186
+ "invalidated_at": raw.get("invalidated_at"),
187
+ }
188
+ return normalized, None
189
+
190
+
191
+ # ── commands ──────────────────────────────────────────────────────────────────
192
+
193
+
194
+ @app.command()
195
+ def init():
196
+ """First-time setup: create ~/.deja/ directory structure."""
197
+ ms_dir = Path("~/.deja").expanduser()
198
+ store_dir = ms_dir / "store"
199
+ vault_dir = store_dir / "vault"
200
+ config_path = ms_dir / "config.yaml"
201
+
202
+ store_dir.mkdir(parents=True, exist_ok=True)
203
+ vault_dir.mkdir(parents=True, exist_ok=True)
204
+
205
+ if not config_path.exists():
206
+ default_config = Path(__file__).parent.parent.parent / "config" / "default.yaml"
207
+ if default_config.exists():
208
+ shutil.copy(default_config, config_path)
209
+ typer.echo(f"Created config at {config_path}")
210
+ else:
211
+ typer.echo(f"Warning: could not find default config to copy", err=True)
212
+ else:
213
+ typer.echo(f"Config already exists at {config_path}")
214
+
215
+ # Initialize the database
216
+ store = _get_store()
217
+ asyncio.run(_init_store(store))
218
+ asyncio.run(store.close())
219
+
220
+ typer.echo(f"Memory service initialized at {ms_dir}")
221
+
222
+
223
+ @app.command()
224
+ def load(
225
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Project name. Omit to load global memories only."),
226
+ format: str = typer.Option("text", "--format", "-f", help="Output format: json|text"),
227
+ context: Optional[str] = typer.Option(None, "--context", "-c", help="Task context query: re-ranks memories by relevance instead of raw confidence."),
228
+ ):
229
+ """Load memories as context for a session (type-slot budgeted).
230
+
231
+ With --project: returns global memories + that project's memories.
232
+ Without --project: returns global memories only.
233
+
234
+ Selects top-N per type (5 gotcha, 5 decision, 5 preference, 5 pattern,
235
+ 3 procedure by reuse, 3 recent progress). Overflow count shown with search hint.
236
+
237
+ With --context: re-ranks within each type slot by relevance to the given query
238
+ (hybrid BM25 + embedding) instead of sorting by raw confidence.
239
+ """
240
+ async def _run():
241
+ config = _get_config()
242
+ store = _get_store(config)
243
+ await store.init_db()
244
+ try:
245
+ embedding_adapter = await create_embedding_adapter(config) if context else None
246
+ return await store.load_budgeted(project, context=context, embedding_adapter=embedding_adapter)
247
+ finally:
248
+ await store.close()
249
+
250
+ result = asyncio.run(_run())
251
+
252
+ if format == "json":
253
+ typer.echo(json.dumps(result, indent=2, default=str))
254
+ else:
255
+ typer.echo(_format_load_result(result))
256
+
257
+
258
+ @app.command()
259
+ def save(
260
+ content: str = typer.Argument(..., help="Memory content to save"),
261
+ type: str = typer.Option("pattern", "--type", "-t", help="Memory type: preference|pattern|decision|gotcha|progress|procedure"),
262
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Project name. Omit to save as global (cross-project)."),
263
+ confidence: float = typer.Option(1.0, "--confidence", "-c", help="Confidence score 0.0-1.0"),
264
+ category: str = typer.Option("agent", "--category", help="Category: user|agent"),
265
+ trigger: Optional[str] = typer.Option(None, "--trigger", help="Comma-separated phrases that activate this memory (e.g. 'kubectl apply, deploy k8s')."),
266
+ ):
267
+ """Save a memory directly (no LLM extraction).
268
+
269
+ Omit --project to save globally — the memory will appear in deja load for
270
+ every project. Use this for user preferences and broadly applicable patterns.
271
+
272
+ Use --project to scope the memory to a specific project — it only appears
273
+ in deja load --project <name>.
274
+
275
+ If embedding.provider is configured, generates an embedding automatically.
276
+ """
277
+ async def _run():
278
+ config = _get_config()
279
+ store = _get_store(config)
280
+ await store.init_db()
281
+
282
+ embedding_bytes = None
283
+ embedding_adapter = await create_embedding_adapter(config)
284
+ if embedding_adapter is not None:
285
+ try:
286
+ emb = await embedding_adapter.embed(content)
287
+ embedding_bytes = EmbeddingAdapter.to_bytes(emb)
288
+ except Exception as e:
289
+ typer.echo(f"[deja] Embedding generation failed: {e}", err=True)
290
+
291
+ try:
292
+ scope = f"project:{project}" if project else "global"
293
+ memory = {
294
+ "type": type,
295
+ "category": category,
296
+ "content": content,
297
+ "scope": scope,
298
+ "project": project,
299
+ "source": "manual",
300
+ "confidence": confidence,
301
+ "trigger": trigger,
302
+ }
303
+ mem_id = await store.save(memory, embedding=embedding_bytes)
304
+ return mem_id
305
+ finally:
306
+ await store.close()
307
+
308
+ mem_id = asyncio.run(_run())
309
+ typer.echo(f"Saved memory: {mem_id}")
310
+
311
+
312
+ @app.command()
313
+ def search(
314
+ query: str = typer.Argument(..., help="Search query"),
315
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Narrow to global + this project. Omit to search all scopes."),
316
+ type: Optional[str] = typer.Option(None, "--type", "-t", help="Filter by type"),
317
+ format: str = typer.Option("text", "--format", "-f", help="Output format: json|text"),
318
+ ):
319
+ """Search memories using hybrid BM25 + embedding search.
320
+
321
+ BM25 (keyword) always runs first. When BM25 returns fewer than 3 results
322
+ and embedding.provider is configured, semantic similarity search runs as
323
+ a fallback. Results are ranked by activation score (task_match, confidence,
324
+ recency, reuse_count). Enable semantic search in ~/.deja/config.yaml:
325
+ embedding:
326
+ provider: ollama
327
+ model: nomic-embed-text
328
+ Then run 'deja embed' to backfill embeddings for existing memories.
329
+ """
330
+ async def _run():
331
+ config = _get_config()
332
+ store = _get_store(config)
333
+ await store.init_db()
334
+ embedding_adapter = await create_embedding_adapter(config)
335
+ try:
336
+ results = await store.search(
337
+ query, project, mem_type=type, embedding_adapter=embedding_adapter
338
+ )
339
+ return results
340
+ finally:
341
+ await store.close()
342
+
343
+ results = asyncio.run(_run())
344
+
345
+ if format == "json":
346
+ typer.echo(json.dumps(results, indent=2, default=str))
347
+ else:
348
+ if not results:
349
+ typer.echo("No memories found.")
350
+ return
351
+ for mem in results:
352
+ typer.echo(_format_memory_text(mem))
353
+
354
+
355
+ @app.command()
356
+ def list(
357
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Filter to global + this project. Omit to list everything."),
358
+ type: Optional[str] = typer.Option(None, "--type", "-t", help="Filter by type"),
359
+ format: str = typer.Option("text", "--format", "-f", help="Output format: json|text"),
360
+ ):
361
+ """List all active memories.
362
+
363
+ Without --project: shows every memory across all projects and global scope.
364
+ With --project: shows global + that project's memories only.
365
+ """
366
+ async def _run():
367
+ config = _get_config()
368
+ store = _get_store(config)
369
+ await store.init_db()
370
+ try:
371
+ if project:
372
+ memories = await store.load(project)
373
+ else:
374
+ memories = await store.list_all()
375
+ if type:
376
+ memories = [m for m in memories if m["type"] == type]
377
+ return memories
378
+ finally:
379
+ await store.close()
380
+
381
+ memories = asyncio.run(_run())
382
+
383
+ if format == "json":
384
+ typer.echo(json.dumps(memories, indent=2, default=str))
385
+ else:
386
+ if not memories:
387
+ typer.echo("No memories found.")
388
+ return
389
+ for mem in memories:
390
+ typer.echo(_format_memory_text(mem))
391
+
392
+
393
+ @app.command()
394
+ def show(
395
+ memory_id: str = typer.Argument(..., help="Memory ID to show"),
396
+ ):
397
+ """Show details for a specific memory."""
398
+ async def _run():
399
+ config = _get_config()
400
+ store = _get_store(config)
401
+ await store.init_db()
402
+ try:
403
+ return await store.get(memory_id)
404
+ finally:
405
+ await store.close()
406
+
407
+ mem = asyncio.run(_run())
408
+
409
+ if mem is None:
410
+ typer.echo(f"Memory {memory_id} not found.", err=True)
411
+ raise typer.Exit(1)
412
+ typer.echo(json.dumps(mem, indent=2, default=str))
413
+
414
+
415
+ def _parse_transcript_for_path(path: Path, content: str) -> str:
416
+ """Auto-detect session file format from filename and convert to plain text.
417
+
418
+ Dispatches to the appropriate watcher's parse_transcript() based on filename:
419
+ session-*.json → Gemini CLI JSON format
420
+ rollout-*.jsonl → Codex CLI JSONL format
421
+ anything else → plain text (Claude Code summary.md, custom files)
422
+
423
+ This ensures deja save-session --transcript works correctly for all agents,
424
+ not just Claude Code's plain-text summary.md.
425
+ """
426
+ name = path.name
427
+ if name.startswith("session-") and name.endswith(".json"):
428
+ from deja.ingest.watchers.gemini_cli import GeminiCLIWatcher
429
+ return GeminiCLIWatcher.__new__(GeminiCLIWatcher).parse_transcript(content)
430
+ if name.startswith("rollout-") and name.endswith(".jsonl"):
431
+ from deja.ingest.watchers.codex_cli import CodexCLIWatcher
432
+ return CodexCLIWatcher.__new__(CodexCLIWatcher).parse_transcript(content)
433
+ return content
434
+
435
+
436
+ @app.command(name="save-session")
437
+ def save_session(
438
+ transcript: Optional[str] = typer.Option(None, "--transcript", "-t", help="Path to transcript file"),
439
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Project name"),
440
+ ):
441
+ """Extract memories from a session transcript.
442
+
443
+ Without --transcript: prints an extraction prompt to stdout. The agent reads
444
+ it, identifies memories from the session, and calls deja save for each one.
445
+ This requires no API key — the agent already running IS the model.
446
+ Works identically for Claude Code, Gemini CLI, Codex, or any agent.
447
+
448
+ With --transcript: reads the file and uses the configured LLM to extract
449
+ memories automatically (requires extraction.provider set in config).
450
+ File format is auto-detected from the filename:
451
+ session-*.json → Gemini CLI
452
+ rollout-*.jsonl → Codex CLI
453
+ anything else → plain text (Claude Code summary.md)
454
+ """
455
+ from deja.core.extractor import EXTRACTION_SYSTEM
456
+
457
+ if transcript is None:
458
+ # Agent mode: print the prompt, let the agent do the extraction
459
+ project_hint = f" for project '{project}'" if project else ""
460
+ typer.echo(
461
+ f"Review this session's context{project_hint} and identify memories worth keeping.\n\n"
462
+ f"{EXTRACTION_SYSTEM}\n\n"
463
+ f"For each memory you identify, call:\n"
464
+ f" deja save \"<content>\" --type <type>"
465
+ + (f" --project {project}" if project else "")
466
+ + "\n\n"
467
+ f"Only save things that are non-obvious, reusable, and important. "
468
+ f"If nothing is worth saving, do nothing."
469
+ )
470
+ return
471
+
472
+ async def _run():
473
+ config = _get_config()
474
+ adapter = await create_adapter(config, "extraction")
475
+ if adapter is None:
476
+ typer.echo(
477
+ "No LLM configured for extraction. Set extraction.provider in "
478
+ "~/.deja/config.yaml, or run `deja save-session` without "
479
+ "--transcript to let the agent extract memories interactively.",
480
+ err=True,
481
+ )
482
+ raise typer.Exit(1)
483
+
484
+ transcript_path = Path(transcript).expanduser()
485
+ if not transcript_path.exists():
486
+ typer.echo(f"Transcript file not found: {transcript_path}", err=True)
487
+ raise typer.Exit(1)
488
+
489
+ content = transcript_path.read_text(encoding="utf-8", errors="replace")
490
+ # Auto-detect format and convert to plain text before extraction.
491
+ # Without this, Gemini/Codex raw JSON files would confuse the extractor.
492
+ content = _parse_transcript_for_path(transcript_path, content)
493
+
494
+ store = _get_store(config)
495
+ await store.init_db()
496
+ try:
497
+ from deja.core.extractor import extract_memories
498
+ memories = await extract_memories(
499
+ content, project or "unknown", "save-session", adapter
500
+ )
501
+ embedding_adapter = await create_embedding_adapter(config)
502
+ saved = await _embed_and_save(memories, store, embedding_adapter)
503
+ return saved
504
+ finally:
505
+ await store.close()
506
+
507
+ count = asyncio.run(_run())
508
+ typer.echo(f"Saved {count} memories from transcript.")
509
+
510
+
511
+ @app.command(name="ingest-skills")
512
+ def ingest_skills(
513
+ path: str = typer.Argument(..., help="Path to skill/rules file to import (markdown, plain text, .cursorrules, etc.)"),
514
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Project name to scope imported memories to."),
515
+ dry_run: bool = typer.Option(False, "--dry-run", help="Print what would be saved without writing to the store."),
516
+ no_llm: bool = typer.Option(False, "--no-llm", help="Skip LLM extraction. Parse markdown sections directly as procedure memories."),
517
+ ):
518
+ """Import an existing skill/rules file as procedure + pattern memories.
519
+
520
+ Accepts any plain-text file: markdown skill docs, .cursorrules, AGENTS.md,
521
+ custom rules files, etc.
522
+
523
+ LLM mode (default): passes the file through the extraction pipeline, same as
524
+ `deja save-session --transcript`. Requires extraction.provider set in config.
525
+ Infers types (procedure, pattern, gotcha) from content.
526
+
527
+ --no-llm mode: parses markdown ## headings as section boundaries. Each section
528
+ becomes one procedure memory (heading as title, body as content). Zero API cost.
529
+
530
+ --dry-run: prints what would be saved without touching the store.
531
+ """
532
+ skill_path = Path(path).expanduser()
533
+ if not skill_path.exists():
534
+ typer.echo(f"File not found: {skill_path}", err=True)
535
+ raise typer.Exit(1)
536
+
537
+ content = skill_path.read_text(encoding="utf-8", errors="replace")
538
+ if not content.strip():
539
+ typer.echo("File is empty.", err=True)
540
+ raise typer.Exit(1)
541
+
542
+ scope = f"project:{project}" if project else "global"
543
+
544
+ if no_llm:
545
+ # Heuristic markdown parser: split on ## headings, each section → procedure
546
+ memories = _parse_skills_markdown(content, project, scope)
547
+ if not memories:
548
+ typer.echo("No ## sections found. Use LLM mode or add markdown headings.", err=True)
549
+ raise typer.Exit(1)
550
+ else:
551
+ # LLM extraction mode
552
+ async def _extract():
553
+ config = _get_config()
554
+ adapter = await create_adapter(config, "extraction")
555
+ if adapter is None:
556
+ typer.echo(
557
+ "No LLM configured for extraction. Set extraction.provider in "
558
+ "~/.deja/config.yaml, or use --no-llm to parse markdown headings directly.",
559
+ err=True,
560
+ )
561
+ raise typer.Exit(1)
562
+ from deja.core.extractor import extract_memories
563
+ return await extract_memories(content, project or "unknown", "ingest-skills", adapter)
564
+
565
+ memories = asyncio.run(_extract())
566
+ if not memories:
567
+ typer.echo("Extraction returned no memories. Try --no-llm or check the file format.", err=True)
568
+ raise typer.Exit(1)
569
+
570
+ # Force scope to match --project flag (extractor may infer differently)
571
+ if project:
572
+ for mem in memories:
573
+ mem["scope"] = scope
574
+ mem["project"] = project
575
+
576
+ if dry_run:
577
+ typer.echo(f"[dry-run] Would save {len(memories)} memories from {skill_path.name}:")
578
+ for mem in memories:
579
+ label = f"[{mem['type']}]"
580
+ if mem.get("project"):
581
+ label += f"({mem['project']})"
582
+ typer.echo(f" {label} {mem['content'][:100]}")
583
+ return
584
+
585
+ async def _save():
586
+ config = _get_config()
587
+ store = _get_store(config)
588
+ await store.init_db()
589
+ try:
590
+ embedding_adapter = await create_embedding_adapter(config)
591
+ return await _embed_and_save(memories, store, embedding_adapter)
592
+ finally:
593
+ await store.close()
594
+
595
+ saved = asyncio.run(_save())
596
+ typer.echo(f"Saved {saved} memories from {skill_path.name}.")
597
+
598
+
599
+ def _parse_skills_markdown(content: str, project: Optional[str], scope: str) -> list[dict]:
600
+ """Parse a markdown file into procedure memories by splitting on ## headings."""
601
+ import re
602
+ sections = re.split(r"^#{1,3} ", content, flags=re.MULTILINE)
603
+ memories = []
604
+ for section in sections:
605
+ section = section.strip()
606
+ if not section:
607
+ continue
608
+ lines = section.splitlines()
609
+ heading = lines[0].strip()
610
+ body = "\n".join(lines[1:]).strip()
611
+ if not heading:
612
+ continue
613
+ mem_content = f"{heading}: {body}" if body else heading
614
+ memories.append(
615
+ {
616
+ "type": "procedure",
617
+ "category": "agent",
618
+ "content": mem_content,
619
+ "scope": scope,
620
+ "project": project,
621
+ "source": "ingest-skills",
622
+ "confidence": 1.0,
623
+ "domain": None,
624
+ }
625
+ )
626
+ return memories
627
+
628
+
629
+ @app.command()
630
+ def export(
631
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Export only this project's memories."),
632
+ type: Optional[str] = typer.Option(None, "--type", "-t", help="Comma-separated list of memory types to include."),
633
+ output: str = typer.Option("memories.jsonl", "--output", "-o", help="Output file path."),
634
+ compress: bool = typer.Option(False, "--compress", help="Compress output with gzip."),
635
+ include_archived: bool = typer.Option(False, "--include-archived", help="Include archived memories."),
636
+ ):
637
+ """Export memories to a JSONL file.
638
+
639
+ Format: JSONL (one memory per line).
640
+ Filters: --project, --type, --include-archived.
641
+ Compression: --compress (gzip).
642
+ """
643
+ async def _run():
644
+ config = _get_config()
645
+ store = _get_store(config)
646
+ await store.init_db()
647
+ try:
648
+ types = type.split(",") if type else None
649
+ memories = await store.list_for_export(project, types, include_archived)
650
+ return memories
651
+ finally:
652
+ await store.close()
653
+
654
+ memories = asyncio.run(_run())
655
+
656
+ if not memories:
657
+ typer.echo("No memories found to export.")
658
+ return
659
+
660
+ output_path = Path(output)
661
+ if compress and not output_path.suffix == ".gz":
662
+ output_path = output_path.with_suffix(output_path.suffix + ".gz")
663
+
664
+ open_fn = gzip.open if compress or output_path.suffix == ".gz" else open
665
+ mode = "wt" if not (compress or output_path.suffix == ".gz") else "wb"
666
+
667
+ try:
668
+ with open_fn(output_path, mode) as f:
669
+ for mem in memories:
670
+ line = json.dumps(mem, default=str) + "\n"
671
+ if "b" in mode:
672
+ f.write(line.encode("utf-8"))
673
+ else:
674
+ f.write(line)
675
+ typer.echo(f"Exported {len(memories)} memories to {output_path}")
676
+ except Exception as e:
677
+ typer.echo(f"Export failed: {e}", err=True)
678
+ raise typer.Exit(1)
679
+
680
+
681
+ @app.command(name="import")
682
+ def import_cmd(
683
+ file: Path = typer.Argument(..., help="Path to the JSONL file to import."),
684
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Overwrite scope and project for all imported memories."),
685
+ dry_run: bool = typer.Option(False, "--dry-run", help="Preview what would happen without modifying the database."),
686
+ merge_strategy: str = typer.Option("skip", "--merge-strategy", "-m", help="Strategy for handling ID collisions: skip|overwrite|update-confidence"),
687
+ ):
688
+ """Import memories from a JSONL file.
689
+
690
+ Format: JSONL or JSONL.gz (auto-detected).
691
+ Strategies: skip (default), overwrite, update-confidence.
692
+ """
693
+ if not file.exists():
694
+ typer.echo(f"Import file not found: {file}", err=True)
695
+ raise typer.Exit(1)
696
+ merge_strategy = merge_strategy.strip().lower()
697
+ if merge_strategy not in VALID_MERGE_STRATEGIES:
698
+ typer.echo(
699
+ f"Invalid merge strategy: {merge_strategy}. "
700
+ "Use one of: skip, overwrite, update-confidence",
701
+ err=True,
702
+ )
703
+ raise typer.Exit(1)
704
+
705
+ async def _run():
706
+ config = _get_config()
707
+ store = _get_store(config)
708
+ await store.init_db()
709
+
710
+ stats = {
711
+ "inserted": 0,
712
+ "skipped": 0,
713
+ "updated": 0,
714
+ "overwritten": 0,
715
+ "invalid": 0,
716
+ }
717
+
718
+ try:
719
+ is_gz = file.suffix == ".gz"
720
+ open_fn = gzip.open if is_gz else open
721
+ mode = "rt" if not is_gz else "rb"
722
+
723
+ with open_fn(file, mode) as f:
724
+ for line_no, line_bytes in enumerate(f, start=1):
725
+ line = line_bytes.decode("utf-8") if is_gz else line_bytes
726
+ if not line.strip():
727
+ continue
728
+
729
+ try:
730
+ raw_memory = json.loads(line)
731
+ except json.JSONDecodeError:
732
+ typer.echo(
733
+ f"Warning: skipped malformed line {line_no} in {file}",
734
+ err=True,
735
+ )
736
+ stats["invalid"] += 1
737
+ continue
738
+
739
+ memory, validation_error = _prepare_import_memory(raw_memory, project)
740
+ if validation_error:
741
+ typer.echo(
742
+ f"Warning: skipped invalid line {line_no} in {file}: {validation_error}",
743
+ err=True,
744
+ )
745
+ stats["invalid"] += 1
746
+ continue
747
+
748
+ existing = await store.get(memory["id"])
749
+ if project and existing:
750
+ target_scope = f"project:{project}"
751
+ existing_scope = existing.get("scope")
752
+ existing_project = existing.get("project")
753
+ if existing_scope != target_scope or existing_project != project:
754
+ memory["id"] = str(ULID())
755
+ existing = None
756
+
757
+ if dry_run:
758
+ if not existing:
759
+ stats["inserted"] += 1
760
+ elif merge_strategy == "skip":
761
+ stats["skipped"] += 1
762
+ elif merge_strategy == "overwrite":
763
+ stats["overwritten"] += 1
764
+ elif merge_strategy == "update-confidence":
765
+ if memory["content"] == existing["content"]:
766
+ stats["updated"] += 1
767
+ else:
768
+ stats["skipped"] += 1
769
+ else:
770
+ result = await store.upsert(memory, merge_strategy)
771
+ stats[result] += 1
772
+
773
+ return stats
774
+ finally:
775
+ await store.close()
776
+
777
+ stats = asyncio.run(_run())
778
+
779
+ action = "Would import" if dry_run else "Imported"
780
+ total = stats["inserted"] + stats["skipped"] + stats["updated"] + stats["overwritten"]
781
+ typer.echo(f"{action}: {total} memories")
782
+ if stats["inserted"]:
783
+ typer.echo(f" - {stats['inserted']} new ({'would insert' if dry_run else 'inserted'})")
784
+ if stats["skipped"]:
785
+ typer.echo(f" - {stats['skipped']} already exist ({'would skip' if dry_run else 'skipped'})")
786
+ if stats["updated"]:
787
+ typer.echo(f" - {stats['updated']} confidence bumped ({'would update' if dry_run else 'updated'})")
788
+ if stats["overwritten"]:
789
+ typer.echo(f" - {stats['overwritten']} records replaced ({'would overwrite' if dry_run else 'overwritten'})")
790
+ if stats["invalid"]:
791
+ typer.echo(f" - {stats['invalid']} invalid records ({'would skip' if dry_run else 'skipped'})")
792
+
793
+
794
+ @app.command()
795
+ def update(
796
+ memory_id: str = typer.Argument(..., help="Memory ID to update"),
797
+ trigger: Optional[str] = typer.Option(None, "--trigger", help="Comma-separated trigger phrases to add (merged with existing)."),
798
+ type: Optional[str] = typer.Option(None, "--type", "-t", help="New memory type: preference|pattern|decision|gotcha|progress|procedure"),
799
+ ):
800
+ """Update metadata on an existing memory.
801
+
802
+ Only trigger and type can be updated. Content changes should use
803
+ deja save (which deduplicates automatically).
804
+
805
+ Trigger phrases are merged with any existing trigger, not replaced.
806
+
807
+ Example:
808
+ deja update 01JKB... --trigger "kubectl apply, helm upgrade"
809
+ deja update 01JKB... --type gotcha
810
+ """
811
+ async def _run():
812
+ config = _get_config()
813
+ store = _get_store(config)
814
+ await store.init_db()
815
+ try:
816
+ fields = {}
817
+ if trigger is not None:
818
+ fields["trigger"] = trigger
819
+ if type is not None:
820
+ fields["type"] = type
821
+ if not fields:
822
+ typer.echo("No fields to update. Use --trigger or --type.", err=True)
823
+ raise typer.Exit(1)
824
+ updated = await store.update_memory(memory_id, fields)
825
+ return updated
826
+ finally:
827
+ await store.close()
828
+
829
+ updated = asyncio.run(_run())
830
+ if updated:
831
+ typer.echo(f"Updated memory: {memory_id}")
832
+ else:
833
+ typer.echo(f"Memory not found or already archived: {memory_id}", err=True)
834
+ raise typer.Exit(1)
835
+
836
+
837
+ @app.command()
838
+ def archive(
839
+ memory_id: str = typer.Argument(..., help="Memory ID to archive"),
840
+ ):
841
+ """Archive a memory (soft delete)."""
842
+ async def _run():
843
+ config = _get_config()
844
+ store = _get_store(config)
845
+ await store.init_db()
846
+ try:
847
+ await store.archive(memory_id)
848
+ finally:
849
+ await store.close()
850
+
851
+ asyncio.run(_run())
852
+ typer.echo(f"Archived memory: {memory_id}")
853
+
854
+
855
+ @app.command()
856
+ def reflect(
857
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Project to reflect on."),
858
+ agent_mode: bool = typer.Option(
859
+ False, "--agent-mode", "-a",
860
+ help="Print memory dump + instructions for the coding agent to reflect. No LLM call.",
861
+ ),
862
+ ):
863
+ """Reflect on memories: compress (Observer/Reflector), decay, promote, archive.
864
+
865
+ Two modes:
866
+
867
+ Agent mode (--agent-mode): prints a memory dump with instructions for the active
868
+ coding agent (Claude Code, Codex, Gemini CLI) to execute deja commands directly.
869
+ Zero extra API cost — the agent is already being billed for the session.
870
+
871
+ LLM mode (default): uses the configured reflection LLM (Ollama/Anthropic) to run
872
+ the Observer and Reflector passes, then decay/promote/archive without LLM.
873
+ """
874
+ async def _run():
875
+ config = _get_config()
876
+ store = _get_store(config)
877
+ await store.init_db()
878
+ try:
879
+ if agent_mode:
880
+ engine = ReflectionEngine(store, config.reflection)
881
+ return "agent_mode", await engine.agent_mode_prompt(project)
882
+ adapter = await create_adapter(config, "reflection")
883
+ if not adapter:
884
+ return "no_llm", None
885
+ engine = ReflectionEngine(store, config.reflection, adapter)
886
+ return "done", await engine.run_full(project)
887
+ finally:
888
+ await store.close()
889
+
890
+ result_type, data = asyncio.run(_run())
891
+
892
+ if result_type == "agent_mode":
893
+ typer.echo(data)
894
+ elif result_type == "no_llm":
895
+ typer.echo(
896
+ "No reflection LLM configured. Options:\n"
897
+ " --agent-mode use the active coding agent as reflector (free)\n"
898
+ " deja config set reflection.provider anthropic (or ollama)",
899
+ err=True,
900
+ )
901
+ raise typer.Exit(1)
902
+ else:
903
+ results = data
904
+ lines = ["Reflection complete:"]
905
+ if results.get("observer"):
906
+ lines.append(f" Observer: {results['observer']} observations created")
907
+ if results.get("reflector"):
908
+ lines.append(f" Reflector: {results['reflector']} observations condensed")
909
+ if results.get("decay"):
910
+ lines.append(f" Decay: {results['decay']} memories decayed")
911
+ if results.get("promote"):
912
+ lines.append(f" Promote: {results['promote']} patterns promoted to global")
913
+ if results.get("archive"):
914
+ lines.append(f" Archive: {results['archive']} memories archived")
915
+ if len(lines) == 1:
916
+ lines.append(" Nothing to do.")
917
+ typer.echo("\n".join(lines))
918
+
919
+
920
+ @app.command()
921
+ def stats(
922
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Project to show stats for."),
923
+ ):
924
+ """Show memory vault statistics: counts by type, token estimate, last reflection times."""
925
+ async def _run():
926
+ config = _get_config()
927
+ store = _get_store(config)
928
+ await store.init_db()
929
+ try:
930
+ return await store.get_stats(project)
931
+ finally:
932
+ await store.close()
933
+
934
+ s = asyncio.run(_run())
935
+
936
+ typer.echo(f"Project: {s['project']}")
937
+ typer.echo(f"Active: {s['active']}")
938
+ for t, cnt in sorted(s["by_type"].items()):
939
+ typer.echo(f" {t:<16} {cnt}")
940
+ typer.echo(f"Archived: {s['archived']}")
941
+ typer.echo(f"Invalidated: {s['invalidated']}")
942
+ typer.echo(f"Observations: {s['observations']}")
943
+ typer.echo(f"Token estimate: ~{s['token_estimate']}")
944
+ if s["last_observer_at"]:
945
+ typer.echo(f"Last observer: {s['last_observer_at'][:19]}")
946
+ if s["last_reflector_at"]:
947
+ typer.echo(f"Last reflector: {s['last_reflector_at'][:19]}")
948
+ if s["last_decay_at"]:
949
+ typer.echo(f"Last decay: {s['last_decay_at'][:19]}")
950
+ typer.echo(f"With embeddings: {s.get('with_embeddings', 0)}/{s['active']}")
951
+
952
+
953
+ @app.command()
954
+ def embed(
955
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Scope backfill to global + this project. Omit to backfill all."),
956
+ ):
957
+ """Generate embeddings for memories that don't have one yet.
958
+
959
+ Run this after enabling embedding.provider in ~/.deja/config.yaml to
960
+ backfill embeddings for memories saved before semantic search was configured.
961
+
962
+ Requires:
963
+ 1. embedding.provider set to 'ollama' in ~/.deja/config.yaml
964
+ 2. Ollama running with the configured model pulled:
965
+ ollama pull nomic-embed-text
966
+ """
967
+ async def _run():
968
+ config = _get_config()
969
+ embedding_adapter = await create_embedding_adapter(config)
970
+ if embedding_adapter is None:
971
+ typer.echo(
972
+ "No embedding provider configured.\n"
973
+ "Set embedding.provider in ~/.deja/config.yaml, e.g.:\n"
974
+ " embedding:\n"
975
+ " provider: ollama\n"
976
+ " model: nomic-embed-text",
977
+ err=True,
978
+ )
979
+ return 0, 0
980
+
981
+ store = _get_store(config)
982
+ await store.init_db()
983
+ try:
984
+ memories = await store.get_memories_without_embeddings(project)
985
+ done = 0
986
+ failed = 0
987
+ for mem in memories:
988
+ try:
989
+ emb = await embedding_adapter.embed(mem["content"])
990
+ emb_bytes = EmbeddingAdapter.to_bytes(emb)
991
+ await store.save_embedding(mem["id"], emb_bytes)
992
+ done += 1
993
+ except Exception as e:
994
+ typer.echo(f"[deja] Failed to embed {mem['id']}: {e}", err=True)
995
+ failed += 1
996
+ return done, failed
997
+ finally:
998
+ await store.close()
999
+
1000
+ done, failed = asyncio.run(_run())
1001
+ typer.echo(f"Embedded {done} memories." + (f" {failed} failed." if failed else ""))
1002
+
1003
+
1004
+ @app.command()
1005
+ def invalidate(
1006
+ memory_id: str = typer.Argument(..., help="Memory ID to mark as superseded."),
1007
+ ):
1008
+ """Mark a memory as invalidated (superseded by newer information).
1009
+
1010
+ Unlike archive (soft-delete for low-confidence memories), invalidate signals that
1011
+ this memory has been actively contradicted — used in agent-mode reflection when
1012
+ the agent identifies a memory that is no longer correct.
1013
+ """
1014
+ async def _run():
1015
+ config = _get_config()
1016
+ store = _get_store(config)
1017
+ await store.init_db()
1018
+ try:
1019
+ await store.invalidate(memory_id)
1020
+ finally:
1021
+ await store.close()
1022
+
1023
+ asyncio.run(_run())
1024
+ typer.echo(f"Invalidated: {memory_id}")
1025
+
1026
+
1027
+ # ── backfill helpers ──────────────────────────────────────────────────────────
1028
+
1029
+
1030
+ def _get_project_name_from_dir(project_dir: Path) -> str:
1031
+ """Resolve a human-readable project name from a ~/.claude/projects/<hash> dir.
1032
+
1033
+ Priority:
1034
+ 1. sessions-index.json → entries[0].projectPath → basename
1035
+ 2. First .jsonl file → cwd field → basename
1036
+ 3. Reconstruct from dir name (/ → - encoding); check if path exists on disk
1037
+ 4. Last hyphen-delimited token of dir name
1038
+ """
1039
+ index_file = project_dir / "sessions-index.json"
1040
+ if index_file.exists():
1041
+ try:
1042
+ data = json.loads(index_file.read_text(encoding="utf-8"))
1043
+ for entry in data.get("entries", []):
1044
+ pp = entry.get("projectPath")
1045
+ if pp:
1046
+ return Path(pp).name
1047
+ except Exception:
1048
+ pass
1049
+
1050
+ for jsonl_file in sorted(project_dir.glob("*.jsonl")):
1051
+ try:
1052
+ for raw in jsonl_file.read_text(encoding="utf-8", errors="replace").splitlines():
1053
+ d = json.loads(raw)
1054
+ cwd = d.get("cwd")
1055
+ if cwd:
1056
+ return Path(cwd).name
1057
+ except Exception:
1058
+ continue
1059
+
1060
+ raw = project_dir.name
1061
+ reconstructed = Path("/" + raw.lstrip("-").replace("-", "/"))
1062
+ if reconstructed.exists():
1063
+ return reconstructed.name
1064
+ return raw.split("-")[-1] or raw
1065
+
1066
+
1067
+ def _sessions_index_transcript(index_path: Path) -> str:
1068
+ """Build a lightweight transcript from sessions-index.json per-session metadata."""
1069
+ try:
1070
+ data = json.loads(index_path.read_text(encoding="utf-8"))
1071
+ parts = []
1072
+ for entry in data.get("entries", []):
1073
+ summary = (entry.get("summary") or "").strip()
1074
+ first_prompt = (entry.get("firstPrompt") or "").strip()
1075
+ if summary or first_prompt:
1076
+ parts.append(f"Session summary: {summary}\nUser's first message: {first_prompt}")
1077
+ return "\n\n---\n\n".join(parts)
1078
+ except Exception:
1079
+ return ""
1080
+
1081
+
1082
+ def _parse_jsonl_transcript(jsonl_path: Path, max_chars: int = 30_000) -> str:
1083
+ """Extract a readable transcript from a Claude Code .jsonl session file.
1084
+
1085
+ Keeps user messages and assistant text blocks; skips thinking blocks and
1086
+ tool results to keep the text focused.
1087
+ """
1088
+ turns = []
1089
+ try:
1090
+ for raw in jsonl_path.read_text(encoding="utf-8", errors="replace").splitlines():
1091
+ try:
1092
+ d = json.loads(raw)
1093
+ except json.JSONDecodeError:
1094
+ continue
1095
+
1096
+ if d.get("type") not in ("user", "assistant"):
1097
+ continue
1098
+
1099
+ msg = d.get("message", {})
1100
+ role = msg.get("role", d["type"])
1101
+ content = msg.get("content", "")
1102
+
1103
+ if isinstance(content, str):
1104
+ text = content.strip()
1105
+ elif isinstance(content, type([])): # list — avoid name shadowed by `def list()`
1106
+ parts = [
1107
+ item.get("text", "").strip()
1108
+ for item in content
1109
+ if isinstance(item, dict) and item.get("type") == "text"
1110
+ ]
1111
+ text = "\n".join(p for p in parts if p)
1112
+ else:
1113
+ continue
1114
+
1115
+ if text:
1116
+ turns.append(f"{role.capitalize()}: {text}")
1117
+ except Exception:
1118
+ return ""
1119
+
1120
+ full = "\n\n".join(turns)
1121
+ return full[:max_chars]
1122
+
1123
+
1124
+ @app.command()
1125
+ def backfill(
1126
+ project: Optional[str] = typer.Option(
1127
+ None, "--project", "-p",
1128
+ help="Only backfill this project. Omit to backfill all projects."
1129
+ ),
1130
+ include_sessions: Optional[bool] = typer.Option(
1131
+ None, "--include-sessions/--no-include-sessions", "-s/-S",
1132
+ help="Process full .jsonl session transcripts. Defaults to True in --agent-mode, False otherwise."
1133
+ ),
1134
+ dry_run: bool = typer.Option(
1135
+ False, "--dry-run",
1136
+ help="Show what would be processed without calling the LLM or saving."
1137
+ ),
1138
+ agent_mode: bool = typer.Option(
1139
+ False, "--agent-mode",
1140
+ help=(
1141
+ "Print session content + instructions for the active agent to extract memories. "
1142
+ "No LLM API call needed — the agent running IS the model. "
1143
+ "Defaults --include-sessions to True."
1144
+ ),
1145
+ ),
1146
+ claude_dir: str = typer.Option(
1147
+ "~/.claude/projects",
1148
+ "--claude-dir",
1149
+ help="Path to the Claude projects directory.",
1150
+ ),
1151
+ ):
1152
+ """Backfill memories from existing Claude Code session history.
1153
+
1154
+ Processes for each project (in order):
1155
+ 1. session-memory/summary.md files (structured session summaries)
1156
+ 2. sessions-index.json per-session summaries (lightweight, one LLM call per project)
1157
+ 3. Full .jsonl session transcripts (with --include-sessions or --agent-mode)
1158
+
1159
+ --agent-mode: prints all session content to stdout with deja save instructions.
1160
+ The active coding agent reads the output and calls deja save for each memory.
1161
+ No API key required — the agent already running is the LLM.
1162
+
1163
+ Deduplication is handled automatically — re-running backfill is safe.
1164
+ Without --agent-mode, requires extraction.provider set in ~/.deja/config.yaml.
1165
+ """
1166
+ # In agent mode, include full session transcripts by default (richer signal).
1167
+ # User can explicitly override with --no-include-sessions.
1168
+ effective_include_sessions = include_sessions if include_sessions is not None else agent_mode
1169
+
1170
+ projects_root = Path(claude_dir).expanduser()
1171
+ if not projects_root.exists():
1172
+ typer.echo(f"Claude projects directory not found: {projects_root}", err=True)
1173
+ raise typer.Exit(1)
1174
+
1175
+ # ── Agent mode: dump content + instructions, no LLM call ─────────────────
1176
+ if agent_mode:
1177
+ from deja.core.extractor import EXTRACTION_SYSTEM
1178
+
1179
+ project_dirs = sorted(p for p in projects_root.iterdir() if p.is_dir())
1180
+ total_sources = 0
1181
+
1182
+ for project_dir in project_dirs:
1183
+ proj_name = _get_project_name_from_dir(project_dir)
1184
+
1185
+ if project and proj_name != project:
1186
+ continue
1187
+
1188
+ typer.echo(f"\n{'='*60}")
1189
+ typer.echo(f"=== Backfill: {proj_name} ===")
1190
+ typer.echo(f"{'='*60}\n")
1191
+
1192
+ # 1. summary.md files
1193
+ for summary_file in sorted(project_dir.rglob("session-memory/summary.md")):
1194
+ content = summary_file.read_text(encoding="utf-8", errors="replace").strip()
1195
+ if not content:
1196
+ continue
1197
+ total_sources += 1
1198
+ typer.echo(f"--- summary.md ---")
1199
+ typer.echo(content)
1200
+ typer.echo()
1201
+
1202
+ # 2. sessions-index.json
1203
+ index_file = project_dir / "sessions-index.json"
1204
+ if index_file.exists():
1205
+ transcript = _sessions_index_transcript(index_file)
1206
+ if transcript.strip():
1207
+ try:
1208
+ entry_count = len(json.loads(index_file.read_text()).get("entries", []))
1209
+ except Exception:
1210
+ entry_count = "?"
1211
+ total_sources += 1
1212
+ typer.echo(f"--- sessions-index.json ({entry_count} sessions) ---")
1213
+ typer.echo(transcript)
1214
+ typer.echo()
1215
+
1216
+ # 3. full .jsonl session transcripts
1217
+ if effective_include_sessions:
1218
+ jsonl_files = sorted(
1219
+ f for f in project_dir.glob("*.jsonl")
1220
+ if f.parent.name != "subagents"
1221
+ )
1222
+ for jsonl_file in jsonl_files:
1223
+ transcript = _parse_jsonl_transcript(jsonl_file)
1224
+ if not transcript.strip():
1225
+ continue
1226
+ total_sources += 1
1227
+ typer.echo(f"--- {jsonl_file.name} ---")
1228
+ typer.echo(transcript)
1229
+ typer.echo()
1230
+
1231
+ if total_sources == 0:
1232
+ typer.echo("No session content found to process.")
1233
+ return
1234
+
1235
+ project_flag = f" --project {project}" if project else ""
1236
+ typer.echo(f"\n{'='*60}")
1237
+ typer.echo("=== Instructions ===")
1238
+ typer.echo(f"{'='*60}")
1239
+ typer.echo(
1240
+ f"\nReview all session content above and identify memories worth keeping.\n\n"
1241
+ f"{EXTRACTION_SYSTEM}\n\n"
1242
+ f"For each memory you identify, call:\n"
1243
+ f" deja save \"<content>\" --type <type>{project_flag}\n"
1244
+ f" (omit --project for global memories that apply across all projects)\n\n"
1245
+ f"Only save things that are non-obvious, reusable, and important.\n"
1246
+ f"If nothing is worth saving, do nothing.\n"
1247
+ f"\n({total_sources} sources printed above)"
1248
+ )
1249
+ return
1250
+
1251
+ # ── Auto mode: use configured LLM ────────────────────────────────────────
1252
+ async def _run():
1253
+ config = _get_config()
1254
+ if dry_run:
1255
+ adapter = None
1256
+ else:
1257
+ adapter = await create_adapter(config, "extraction")
1258
+ if adapter is None:
1259
+ typer.echo(
1260
+ "No LLM configured for extraction. Set extraction.provider in "
1261
+ "~/.deja/config.yaml, or use --agent-mode to let the "
1262
+ "active coding agent extract memories without an API call.",
1263
+ err=True,
1264
+ )
1265
+ raise typer.Exit(1)
1266
+
1267
+ store = _get_store(config)
1268
+ await store.init_db()
1269
+ embedding_adapter = await create_embedding_adapter(config)
1270
+
1271
+ total_sources = 0
1272
+ total_memories = 0
1273
+
1274
+ try:
1275
+ project_dirs = sorted(p for p in projects_root.iterdir() if p.is_dir())
1276
+
1277
+ for project_dir in project_dirs:
1278
+ proj_name = _get_project_name_from_dir(project_dir)
1279
+
1280
+ if project and proj_name != project:
1281
+ continue
1282
+
1283
+ typer.echo(f"\nProject: {proj_name} ({project_dir.name})")
1284
+
1285
+ # ── 1. summary.md files ──────────────────────────────────────
1286
+ for summary_file in sorted(project_dir.rglob("session-memory/summary.md")):
1287
+ total_sources += 1
1288
+ content = summary_file.read_text(encoding="utf-8", errors="replace").strip()
1289
+ if not content:
1290
+ typer.echo(f" summary.md (empty, skipped)")
1291
+ continue
1292
+
1293
+ if dry_run:
1294
+ typer.echo(f" [dry-run] summary.md ({len(content)} chars)")
1295
+ continue
1296
+
1297
+ memories = await extract_memories(content, proj_name, "backfill", adapter)
1298
+ total_memories += await _embed_and_save(memories, store, embedding_adapter)
1299
+ typer.echo(f" summary.md → {len(memories)} memories")
1300
+
1301
+ # ── 2. sessions-index.json ───────────────────────────────────
1302
+ index_file = project_dir / "sessions-index.json"
1303
+ if index_file.exists():
1304
+ total_sources += 1
1305
+ transcript = _sessions_index_transcript(index_file)
1306
+ if not transcript.strip():
1307
+ typer.echo(f" sessions-index.json (no summaries, skipped)")
1308
+ elif dry_run:
1309
+ entry_count = len(json.loads(index_file.read_text()).get("entries", []))
1310
+ typer.echo(f" [dry-run] sessions-index.json ({entry_count} sessions)")
1311
+ else:
1312
+ entry_count = len(json.loads(index_file.read_text()).get("entries", []))
1313
+ memories = await extract_memories(transcript, proj_name, "backfill", adapter)
1314
+ total_memories += await _embed_and_save(memories, store, embedding_adapter)
1315
+ typer.echo(f" sessions-index.json ({entry_count} sessions) → {len(memories)} memories")
1316
+
1317
+ # ── 3. full .jsonl transcripts (opt-in) ──────────────────────
1318
+ if effective_include_sessions:
1319
+ jsonl_files = sorted(
1320
+ f for f in project_dir.glob("*.jsonl")
1321
+ if f.parent.name != "subagents"
1322
+ )
1323
+ for jsonl_file in jsonl_files:
1324
+ total_sources += 1
1325
+ transcript = _parse_jsonl_transcript(jsonl_file)
1326
+ if not transcript.strip():
1327
+ continue
1328
+
1329
+ if dry_run:
1330
+ typer.echo(f" [dry-run] {jsonl_file.name} ({len(transcript)} chars)")
1331
+ continue
1332
+
1333
+ memories = await extract_memories(transcript, proj_name, "backfill", adapter)
1334
+ total_memories += await _embed_and_save(memories, store, embedding_adapter)
1335
+ typer.echo(f" {jsonl_file.name} → {len(memories)} memories")
1336
+
1337
+ finally:
1338
+ await store.close()
1339
+
1340
+ return total_sources, total_memories
1341
+
1342
+ total_sources, total_memories = asyncio.run(_run())
1343
+ typer.echo(f"\nDone. {total_sources} sources processed, {total_memories} memories saved.")
1344
+
1345
+
1346
+ @app.command()
1347
+ def viewer(
1348
+ host: str = typer.Option("127.0.0.1", "--host", help="Host to bind to."),
1349
+ port: int = typer.Option(8888, "--port", "-p", help="Port to listen on."),
1350
+ no_browser: bool = typer.Option(False, "--no-browser", help="Don't open browser automatically."),
1351
+ ):
1352
+ """Launch the deja web viewer.
1353
+
1354
+ Opens a browser UI to browse, search, and manage your memory vault.
1355
+ Press Ctrl+C to stop.
1356
+ """
1357
+ try:
1358
+ import uvicorn
1359
+ except ImportError:
1360
+ typer.echo("uvicorn not installed. Run: uv add uvicorn", err=True)
1361
+ raise typer.Exit(1)
1362
+
1363
+ from deja.interfaces.web import app as web_app
1364
+
1365
+ if not no_browser:
1366
+ import threading
1367
+ import webbrowser
1368
+ import time
1369
+
1370
+ def _open():
1371
+ time.sleep(0.8)
1372
+ webbrowser.open(f"http://{host}:{port}")
1373
+
1374
+ threading.Thread(target=_open, daemon=True).start()
1375
+
1376
+ typer.echo(f"deja viewer → http://{host}:{port} (Ctrl+C to stop)")
1377
+ uvicorn.run(web_app, host=host, port=port, log_level="warning")
1378
+
1379
+
1380
+ @app.command()
1381
+ def watch():
1382
+ """Start watching for session files from enabled agents.
1383
+
1384
+ Agents watched (configured in ~/.deja/config.yaml):
1385
+ claude_code: ~/.claude/projects/**/session-memory/summary.md
1386
+ gemini_cli: ~/.gemini/tmp/**/chats/session-*.json
1387
+ codex_cli: ~/.codex/sessions/**/rollout-*.jsonl
1388
+
1389
+ Enable additional watchers by setting them to true in config:
1390
+ watchers:
1391
+ gemini_cli: true
1392
+ codex_cli: true
1393
+ """
1394
+ config = _get_config()
1395
+ store = _get_store(config)
1396
+
1397
+ # Run event loop in background thread for async operations
1398
+ loop = asyncio.new_event_loop()
1399
+
1400
+ def start_loop():
1401
+ asyncio.set_event_loop(loop)
1402
+ loop.run_forever()
1403
+
1404
+ loop_thread = threading.Thread(target=start_loop, daemon=True)
1405
+ loop_thread.start()
1406
+
1407
+ # Initialize store in the background loop
1408
+ future = asyncio.run_coroutine_threadsafe(store.init_db(), loop)
1409
+ future.result(timeout=10)
1410
+
1411
+ # Create adapter (may be None if provider: none — watcher saves raw summaries)
1412
+ adapter_future = asyncio.run_coroutine_threadsafe(
1413
+ create_adapter(config, "extraction"), loop
1414
+ )
1415
+ adapter = adapter_future.result(timeout=10)
1416
+ if adapter is None:
1417
+ typer.echo(
1418
+ "Note: no LLM configured. Watcher will save raw session summaries as "
1419
+ "progress memories. Set extraction.provider in ~/.deja/config.yaml "
1420
+ "to enable structured extraction."
1421
+ )
1422
+
1423
+ embedding_adapter_future = asyncio.run_coroutine_threadsafe(
1424
+ create_embedding_adapter(config), loop
1425
+ )
1426
+ embedding_adapter = embedding_adapter_future.result(timeout=10)
1427
+
1428
+ # Create reflection engine + scheduler
1429
+ reflection_adapter_future = asyncio.run_coroutine_threadsafe(
1430
+ create_adapter(config, "reflection"), loop
1431
+ )
1432
+ reflection_adapter = reflection_adapter_future.result(timeout=10)
1433
+ engine = ReflectionEngine(store, config.reflection, reflection_adapter)
1434
+ scheduler = make_scheduler(engine, loop)
1435
+ scheduler.start()
1436
+ typer.echo("Reflection scheduler started (nightly 2am + 5-min token-count checks).")
1437
+
1438
+ # Build the list of enabled watchers
1439
+ watcher_kwargs = dict(
1440
+ store=store,
1441
+ extractor_fn=extract_memories,
1442
+ adapter=adapter,
1443
+ debounce_seconds=config.watchers.debounce_seconds,
1444
+ embedding_adapter=embedding_adapter,
1445
+ )
1446
+ watchers = []
1447
+ if config.watchers.claude_code:
1448
+ watchers.append(ClaudeCodeWatcher(**watcher_kwargs))
1449
+ if config.watchers.gemini_cli:
1450
+ watchers.append(GeminiCLIWatcher(**watcher_kwargs))
1451
+ if config.watchers.codex_cli:
1452
+ watchers.append(CodexCLIWatcher(**watcher_kwargs))
1453
+
1454
+ if not watchers:
1455
+ typer.echo(
1456
+ "No watchers enabled. Set at least one to true in ~/.deja/config.yaml:\n"
1457
+ " watchers:\n"
1458
+ " claude_code: true\n"
1459
+ " gemini_cli: true\n"
1460
+ " codex_cli: true",
1461
+ err=True,
1462
+ )
1463
+ raise typer.Exit(1)
1464
+
1465
+ observer = Observer()
1466
+
1467
+ for watcher in watchers:
1468
+ # Each watcher gets its own handler that calls its handle_file_event
1469
+ class _Handler(FileSystemEventHandler):
1470
+ def __init__(self, w):
1471
+ self._watcher = w
1472
+
1473
+ def on_modified(self, event: FileSystemEvent):
1474
+ if not event.is_directory:
1475
+ path = Path(event.src_path)
1476
+ loop.call_soon_threadsafe(self._watcher.handle_file_event, path)
1477
+
1478
+ def on_created(self, event: FileSystemEvent):
1479
+ if not event.is_directory:
1480
+ path = Path(event.src_path)
1481
+ loop.call_soon_threadsafe(self._watcher.handle_file_event, path)
1482
+
1483
+ handler = _Handler(watcher)
1484
+ for watch_path in watcher.get_watch_paths():
1485
+ watch_path.mkdir(parents=True, exist_ok=True)
1486
+ observer.schedule(handler, str(watch_path), recursive=True)
1487
+ typer.echo(f"Watching [{watcher.__class__.__name__}]: {watch_path}")
1488
+
1489
+ observer.start()
1490
+ typer.echo("Memory service watcher running. Press Ctrl+C to stop.")
1491
+
1492
+ stop_event = threading.Event()
1493
+ try:
1494
+ stop_event.wait()
1495
+ except KeyboardInterrupt:
1496
+ pass
1497
+ finally:
1498
+ scheduler.shutdown(wait=False)
1499
+ observer.stop()
1500
+ observer.join()
1501
+ loop.call_soon_threadsafe(loop.stop)
1502
+ asyncio.run_coroutine_threadsafe(store.close(), loop)
1503
+ typer.echo("\nWatcher stopped.")
1504
+
1505
+
1506
+ # ── setup command ─────────────────────────────────────────────────────────────
1507
+
1508
+ # Hook scripts are embedded here so `deja setup` works after `uv tool install`
1509
+ # without the source repo present.
1510
+
1511
+ _RECALL_HOOK_SCRIPT = r"""#!/usr/bin/env bash
1512
+ # deja-recall.sh — pre-tool-use hook for Claude Code
1513
+ #
1514
+ # Fires before each Bash tool call. Classifies the command against a short
1515
+ # allowlist of high-signal operations (deploy, migrate, rotate, etc.), runs
1516
+ # `deja search` for matching ones, and injects relevant gotchas/procedures
1517
+ # into the agent's context before the command executes.
1518
+
1519
+ set -euo pipefail
1520
+
1521
+ INPUT=$(cat)
1522
+
1523
+ TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // ""')
1524
+ CMD=$(echo "$INPUT" | jq -r '.tool_input.command // ""')
1525
+
1526
+ # Only fire for Bash tool calls
1527
+ [ "$TOOL_NAME" != "Bash" ] && exit 0
1528
+
1529
+ INTENT=""
1530
+
1531
+ if echo "$CMD" | grep -qE "kubectl apply|helm upgrade|kubectl rollout"; then
1532
+ INTENT="deploy kubernetes"
1533
+ elif echo "$CMD" | grep -qE "alembic upgrade|alembic downgrade|flask db upgrade|python manage.py migrate"; then
1534
+ INTENT="database migration"
1535
+ elif echo "$CMD" | grep -qE "terraform apply|terraform destroy|terraform import"; then
1536
+ INTENT="terraform infrastructure"
1537
+ elif echo "$CMD" | grep -qE "git push.*(--force|-f)|git push.*(main|master)"; then
1538
+ INTENT="git push force main"
1539
+ elif echo "$CMD" | grep -qE "aws.*deploy|aws.*update|aws.*delete|aws.*terminate"; then
1540
+ INTENT="aws deploy infrastructure"
1541
+ elif echo "$CMD" | grep -qE "secret|credential|rotate|revoke|vault"; then
1542
+ INTENT="secret rotation credentials"
1543
+ elif echo "$CMD" | grep -qE "docker.*push|docker.*deploy|docker.*prod"; then
1544
+ INTENT="docker deploy production"
1545
+ elif echo "$CMD" | grep -qE "pg_dump|mysqldump|mongodump|pg_restore|mongorestore"; then
1546
+ INTENT="database backup restore"
1547
+ elif echo "$CMD" | grep -qE "npm publish|pip publish|cargo publish|gem push"; then
1548
+ INTENT="publish package release"
1549
+ fi
1550
+
1551
+ [ -z "$INTENT" ] && exit 0
1552
+
1553
+ MEMORIES=$(deja search "$INTENT" 2>/dev/null || true)
1554
+
1555
+ [ -z "$MEMORIES" ] && exit 0
1556
+
1557
+ jq -n --arg ctx "$MEMORIES" '{
1558
+ "hookSpecificOutput": {
1559
+ "additionalContext": ("[deja recall]\n" + $ctx)
1560
+ }
1561
+ }'
1562
+ """
1563
+
1564
+ _POST_FAIL_HOOK_SCRIPT = r"""#!/usr/bin/env bash
1565
+ # deja-post-fail.sh — post-tool-use hook for Claude Code
1566
+ #
1567
+ # Fires after each Bash tool call. If the command was high-signal AND failed
1568
+ # (non-zero exit / is_error), searches deja for related gotchas and injects
1569
+ # them so the agent debugs with past context immediately.
1570
+
1571
+ set -euo pipefail
1572
+
1573
+ INPUT=$(cat)
1574
+
1575
+ TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // ""')
1576
+ CMD=$(echo "$INPUT" | jq -r '.tool_input.command // ""')
1577
+
1578
+ [ "$TOOL_NAME" != "Bash" ] && exit 0
1579
+
1580
+ EXIT_CODE=$(echo "$INPUT" | jq -r '.tool_result.exit_code // 0')
1581
+ IS_ERROR=$(echo "$INPUT" | jq -r '.tool_result.is_error // false')
1582
+
1583
+ if [ "$EXIT_CODE" = "0" ] && [ "$IS_ERROR" = "false" ]; then
1584
+ exit 0
1585
+ fi
1586
+
1587
+ INTENT=""
1588
+
1589
+ if echo "$CMD" | grep -qE "kubectl apply|helm upgrade|kubectl rollout"; then
1590
+ INTENT="deploy kubernetes"
1591
+ elif echo "$CMD" | grep -qE "alembic upgrade|alembic downgrade|flask db upgrade|python manage.py migrate"; then
1592
+ INTENT="database migration"
1593
+ elif echo "$CMD" | grep -qE "terraform apply|terraform destroy|terraform import"; then
1594
+ INTENT="terraform infrastructure"
1595
+ elif echo "$CMD" | grep -qE "git push.*(--force|-f)|git push.*(main|master)"; then
1596
+ INTENT="git push force main"
1597
+ elif echo "$CMD" | grep -qE "aws.*deploy|aws.*update|aws.*delete|aws.*terminate"; then
1598
+ INTENT="aws deploy infrastructure"
1599
+ elif echo "$CMD" | grep -qE "secret|credential|rotate|revoke|vault"; then
1600
+ INTENT="secret rotation credentials"
1601
+ elif echo "$CMD" | grep -qE "docker.*push|docker.*deploy|docker.*prod"; then
1602
+ INTENT="docker deploy production"
1603
+ elif echo "$CMD" | grep -qE "pg_dump|mysqldump|mongodump|pg_restore|mongorestore"; then
1604
+ INTENT="database backup restore"
1605
+ elif echo "$CMD" | grep -qE "npm publish|pip publish|cargo publish|gem push"; then
1606
+ INTENT="publish package release"
1607
+ fi
1608
+
1609
+ [ -z "$INTENT" ] && exit 0
1610
+
1611
+ MEMORIES=$(deja search "$INTENT" 2>/dev/null || true)
1612
+
1613
+ [ -z "$MEMORIES" ] && exit 0
1614
+
1615
+ jq -n --arg ctx "$MEMORIES" '{
1616
+ "systemMessage": ("[deja recall — command failed, related gotchas]\n" + $ctx)
1617
+ }'
1618
+ """
1619
+
1620
+ _SESSION_END_HOOK_SCRIPT = r"""#!/usr/bin/env bash
1621
+ # deja-session-end.sh — session-end hook for Claude Code
1622
+ #
1623
+ # Fires when the Claude Code session ends (including /exit). Reads the
1624
+ # transcript path from stdin JSON and runs `deja save-session --transcript`
1625
+ # to automatically extract and save memories from the session.
1626
+ #
1627
+ # Requires a provider configured: deja config set provider anthropic
1628
+ # (or openai, google, etc.). Exits silently if no provider is set.
1629
+
1630
+ set -euo pipefail
1631
+
1632
+ INPUT=$(cat)
1633
+ TRANSCRIPT=$(echo "$INPUT" | jq -r '.transcript_path // ""')
1634
+ CWD=$(echo "$INPUT" | jq -r '.cwd // ""')
1635
+
1636
+ [ -z "$TRANSCRIPT" ] && exit 0
1637
+ [ ! -f "$TRANSCRIPT" ] && exit 0
1638
+
1639
+ PROJECT=$(basename "$CWD")
1640
+
1641
+ if [ -n "$PROJECT" ]; then
1642
+ deja save-session --transcript "$TRANSCRIPT" --project "$PROJECT" 2>/dev/null || true
1643
+ else
1644
+ deja save-session --transcript "$TRANSCRIPT" 2>/dev/null || true
1645
+ fi
1646
+ """
1647
+
1648
+ _DEJA_BLOCK_MARKER = "## Deja"
1649
+
1650
+ _AGENT_CONFIGS: dict[str, dict] = {
1651
+ "claude-code": {
1652
+ "global": Path("~/.claude/CLAUDE.md"),
1653
+ "project": Path("./CLAUDE.md"),
1654
+ },
1655
+ "gemini-cli": {
1656
+ "global": Path("~/.gemini/GEMINI.md"),
1657
+ "project": Path("./GEMINI.md"),
1658
+ },
1659
+ "codex": {
1660
+ "global": Path("~/.codex/AGENTS.md"),
1661
+ "project": Path("./AGENTS.md"),
1662
+ },
1663
+ "cursor": {
1664
+ "global": Path("~/.cursor/rules/deja.md"),
1665
+ "project": Path("./.cursorrules"),
1666
+ },
1667
+ "windsurf": {
1668
+ "global": None,
1669
+ "project": Path("./.windsurfrules"),
1670
+ },
1671
+ }
1672
+
1673
+
1674
+ def _build_deja_block(project: Optional[str]) -> str:
1675
+ project_flag = f" --project {project}" if project else ""
1676
+ session_cmd = f"deja save-session --project {project}" if project else "deja save-session"
1677
+ return (
1678
+ f"## Deja\n"
1679
+ f"\n"
1680
+ f"At session START: run `deja load{project_flag} --context \"<brief description of what you're working on>\"`"
1681
+ f" and read the output. Assemble the context string from the user's first message, CWD, and"
1682
+ f" primary language before running the command.\n"
1683
+ f"\n"
1684
+ f"DURING the session, run `deja save` when you discover:\n"
1685
+ f"- Something that took >10 min to figure out → --type gotcha\n"
1686
+ f"- A non-obvious architectural decision → --type decision\n"
1687
+ f"- A pattern reusable across files/projects → --type pattern\n"
1688
+ f"- A significant chunk of work completed → --type progress\n"
1689
+ f"- A user preference → --type preference\n"
1690
+ f"\n"
1691
+ f"At session END: run `{session_cmd}` to flush remaining context."
1692
+ )
1693
+
1694
+
1695
+ def _inject_block(path: Path, block: str, force: bool) -> str:
1696
+ """Inject or replace the deja block in a config file.
1697
+
1698
+ Returns "injected" | "replaced" | "already_configured".
1699
+ """
1700
+ if path.exists():
1701
+ content = path.read_text(encoding="utf-8")
1702
+ # Check for marker on its own line to avoid matching "## Deja Integration" etc.
1703
+ has_marker = ("\n## Deja\n" in content or content.startswith("## Deja\n"))
1704
+ if has_marker:
1705
+ if not force:
1706
+ return "already_configured"
1707
+ # Replace: find ## Deja line, then scan to next ## heading or EOF.
1708
+ lines = content.split("\n")
1709
+ start = None
1710
+ for i, line in enumerate(lines):
1711
+ if line == _DEJA_BLOCK_MARKER:
1712
+ start = i
1713
+ break
1714
+ if start is None:
1715
+ # Marker present but not on its own line — fall through to append
1716
+ pass
1717
+ else:
1718
+ end = len(lines)
1719
+ for i in range(start + 1, len(lines)):
1720
+ if lines[i].startswith("## "):
1721
+ end = i
1722
+ break
1723
+ # Strip trailing blank lines between block and next section
1724
+ while end > start + 1 and lines[end - 1] == "":
1725
+ end -= 1
1726
+ block_lines = block.split("\n")
1727
+ new_lines = lines[:start] + block_lines + lines[end:]
1728
+ path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
1729
+ return "replaced"
1730
+ # Not found (or fallthrough) — append with leading blank line
1731
+ sep = "\n" if content.endswith("\n") else "\n\n"
1732
+ if content.endswith("\n\n"):
1733
+ sep = ""
1734
+ path.write_text(content + sep + block + "\n", encoding="utf-8")
1735
+ return "injected"
1736
+ else:
1737
+ path.parent.mkdir(parents=True, exist_ok=True)
1738
+ path.write_text(block + "\n", encoding="utf-8")
1739
+ return "injected"
1740
+
1741
+
1742
+ def _detect_claude_plugin() -> list[str]:
1743
+ """Return a list of human-readable signals indicating the Claude Code plugin is active.
1744
+
1745
+ Checks four independent sources so partial installs are also caught:
1746
+ 1. ## Memory (deja) block in ~/.claude/CLAUDE.md (written by session-start.sh)
1747
+ 2. enabledPlugins entry containing "deja" in ~/.claude/settings.json
1748
+ 3. installed_plugins.json in ~/.claude/plugins/ containing a "deja" entry
1749
+ 4. CLAUDE_PLUGIN_ROOT hook commands in ~/.claude/settings.json (plugin-registered hooks)
1750
+ """
1751
+ signals: list[str] = []
1752
+
1753
+ # 1. CLAUDE.md block written by the plugin's session-start.sh
1754
+ claude_md = Path("~/.claude/CLAUDE.md").expanduser()
1755
+ if claude_md.exists():
1756
+ try:
1757
+ content = claude_md.read_text(encoding="utf-8")
1758
+ if "\n## Memory (deja)\n" in content or content.startswith("## Memory (deja)\n"):
1759
+ signals.append("## Memory (deja) block found in ~/.claude/CLAUDE.md")
1760
+ except OSError:
1761
+ pass
1762
+
1763
+ settings_path = Path("~/.claude/settings.json").expanduser()
1764
+ settings_text = ""
1765
+ settings: dict = {}
1766
+ if settings_path.exists():
1767
+ try:
1768
+ settings_text = settings_path.read_text(encoding="utf-8")
1769
+ settings = json.loads(settings_text)
1770
+ except (OSError, json.JSONDecodeError):
1771
+ pass
1772
+
1773
+ # 2. enabledPlugins entry with "deja" in the key
1774
+ for key, enabled in settings.get("enabledPlugins", {}).items():
1775
+ if "deja" in key.lower() and enabled:
1776
+ signals.append(f"plugin '{key}' enabled in ~/.claude/settings.json (enabledPlugins)")
1777
+
1778
+ # 3. installed_plugins.json registry
1779
+ installed_plugins_path = Path("~/.claude/plugins/installed_plugins.json").expanduser()
1780
+ if installed_plugins_path.exists():
1781
+ try:
1782
+ registry = json.loads(installed_plugins_path.read_text(encoding="utf-8"))
1783
+ for key in registry.get("plugins", {}):
1784
+ if "deja" in key.lower():
1785
+ signals.append(
1786
+ f"plugin '{key}' found in ~/.claude/plugins/installed_plugins.json"
1787
+ )
1788
+ except (OSError, json.JSONDecodeError):
1789
+ pass
1790
+
1791
+ # 4. CLAUDE_PLUGIN_ROOT in settings.json hooks (plugin hooks registered by Claude Code)
1792
+ if "CLAUDE_PLUGIN_ROOT" in settings_text:
1793
+ signals.append(
1794
+ "plugin hook commands (CLAUDE_PLUGIN_ROOT) found in ~/.claude/settings.json"
1795
+ )
1796
+
1797
+ return signals
1798
+
1799
+
1800
+ def _detect_deja_setup(target_path: Path, agent: str) -> list[str]:
1801
+ """Return signals indicating deja setup has already been run for this agent/path.
1802
+
1803
+ Checks the config file for the ## Deja block, and for claude-code additionally
1804
+ checks for hook scripts and settings.json registrations.
1805
+ """
1806
+ signals: list[str] = []
1807
+ display = str(target_path).replace(str(Path.home()), "~")
1808
+
1809
+ # 1. ## Deja block in the target config file
1810
+ if target_path.exists():
1811
+ try:
1812
+ content = target_path.read_text(encoding="utf-8")
1813
+ if "\n## Deja\n" in content or content.startswith("## Deja\n"):
1814
+ signals.append(f"## Deja block found in {display}")
1815
+ except OSError:
1816
+ pass
1817
+
1818
+ if agent != "claude-code":
1819
+ return signals
1820
+
1821
+ # 2-4. Hook scripts present in ~/.claude/hooks/
1822
+ hooks_dir = Path("~/.claude/hooks").expanduser()
1823
+ for name in ("deja-recall.sh", "deja-post-fail.sh", "deja-session-end.sh"):
1824
+ if (hooks_dir / name).exists():
1825
+ signals.append(f"{name} found in ~/.claude/hooks/")
1826
+
1827
+ # 5-7. Hook scripts registered in ~/.claude/settings.json
1828
+ settings_path = Path("~/.claude/settings.json").expanduser()
1829
+ if settings_path.exists():
1830
+ try:
1831
+ settings_text = settings_path.read_text(encoding="utf-8")
1832
+ for name in ("deja-recall.sh", "deja-post-fail.sh", "deja-session-end.sh"):
1833
+ if name in settings_text:
1834
+ signals.append(f"{name} registered in ~/.claude/settings.json")
1835
+ except OSError:
1836
+ pass
1837
+
1838
+ return signals
1839
+
1840
+
1841
+ def _install_claude_hooks() -> None:
1842
+ """Write hook scripts and register them in ~/.claude/settings.json."""
1843
+ hooks_dir = Path("~/.claude/hooks").expanduser()
1844
+ hooks_dir.mkdir(parents=True, exist_ok=True)
1845
+
1846
+ recall_path = hooks_dir / "deja-recall.sh"
1847
+ post_fail_path = hooks_dir / "deja-post-fail.sh"
1848
+ session_end_path = hooks_dir / "deja-session-end.sh"
1849
+
1850
+ for path, content in [
1851
+ (recall_path, _RECALL_HOOK_SCRIPT.lstrip("\n")),
1852
+ (post_fail_path, _POST_FAIL_HOOK_SCRIPT.lstrip("\n")),
1853
+ (session_end_path, _SESSION_END_HOOK_SCRIPT.lstrip("\n")),
1854
+ ]:
1855
+ path.write_text(content, encoding="utf-8")
1856
+ path.chmod(path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
1857
+ display = str(path).replace(str(Path.home()), "~")
1858
+ typer.echo(f"deja: hooks installed → {display}")
1859
+
1860
+ settings_path = Path("~/.claude/settings.json").expanduser()
1861
+ settings_text = settings_path.read_text(encoding="utf-8") if settings_path.exists() else ""
1862
+ try:
1863
+ settings = json.loads(settings_text) if settings_text.strip() else {}
1864
+ except json.JSONDecodeError:
1865
+ settings = {}
1866
+
1867
+ hooks_obj = settings.setdefault("hooks", {})
1868
+ changed = False
1869
+
1870
+ if "deja-recall.sh" not in settings_text:
1871
+ hooks_obj.setdefault("PreToolUse", []).append({
1872
+ "matcher": "Bash",
1873
+ "hooks": [{"type": "command", "command": "~/.claude/hooks/deja-recall.sh"}],
1874
+ })
1875
+ changed = True
1876
+
1877
+ if "deja-post-fail.sh" not in settings_text:
1878
+ hooks_obj.setdefault("PostToolUse", []).append({
1879
+ "matcher": "Bash",
1880
+ "hooks": [{"type": "command", "command": "~/.claude/hooks/deja-post-fail.sh"}],
1881
+ })
1882
+ changed = True
1883
+
1884
+ if "deja-session-end.sh" not in settings_text:
1885
+ hooks_obj.setdefault("SessionEnd", []).append({
1886
+ "hooks": [{"type": "command", "command": "~/.claude/hooks/deja-session-end.sh"}],
1887
+ })
1888
+ changed = True
1889
+
1890
+ if changed:
1891
+ settings_path.parent.mkdir(parents=True, exist_ok=True)
1892
+ settings_path.write_text(json.dumps(settings, indent=2) + "\n", encoding="utf-8")
1893
+ typer.echo("deja: settings.json updated → hooks registered")
1894
+
1895
+
1896
+ @app.command()
1897
+ def setup(
1898
+ agent: str = typer.Argument(..., help="Agent: claude-code, gemini-cli, codex, cursor, windsurf"),
1899
+ project: Optional[str] = typer.Option(None, "--project", help="Write to local project config instead of global"),
1900
+ hooks: bool = typer.Option(True, "--hooks/--no-hooks", help="Install hooks for claude-code (use --no-hooks to skip)"),
1901
+ force: bool = typer.Option(False, "--force", help="Overwrite existing deja block"),
1902
+ ):
1903
+ """Inject the deja memory protocol into an agent's config file.
1904
+
1905
+ Without --project, writes to the agent's global config (~/.claude/CLAUDE.md etc.).
1906
+ With --project <name>, writes to the local project config (./CLAUDE.md etc.) and
1907
+ scopes deja load/save-session to that project name.
1908
+ """
1909
+ if agent not in _AGENT_CONFIGS:
1910
+ valid = ", ".join(sorted(_AGENT_CONFIGS.keys()))
1911
+ typer.echo(f"deja: unknown agent '{agent}'. Valid: {valid}", err=True)
1912
+ raise typer.Exit(1)
1913
+
1914
+ if agent == "claude-code":
1915
+ signals = _detect_claude_plugin()
1916
+ if signals:
1917
+ lines = ["deja: Claude Code plugin installation detected — stopping."]
1918
+ lines.append("The plugin and `deja setup` are separate paths; use one, not both.")
1919
+ lines.append("Signals found:")
1920
+ for s in signals:
1921
+ lines.append(f" • {s}")
1922
+ lines.append(
1923
+ "To switch to the manual path: uninstall the plugin, remove the "
1924
+ "## Memory (deja) block from ~/.claude/CLAUDE.md, then re-run."
1925
+ )
1926
+ typer.echo("\n".join(lines), err=True)
1927
+ raise typer.Exit(1)
1928
+
1929
+ cfg = _AGENT_CONFIGS[agent]
1930
+
1931
+ if project:
1932
+ raw_path: Optional[Path] = cfg["project"]
1933
+ else:
1934
+ raw_path = cfg.get("global")
1935
+ if raw_path is None:
1936
+ typer.echo(
1937
+ f"deja: {agent} has no known global config path. "
1938
+ f"Use --project <name> to write to a local project config.",
1939
+ err=True,
1940
+ )
1941
+ raise typer.Exit(1)
1942
+
1943
+ target_path = raw_path.expanduser()
1944
+ display = str(target_path).replace(str(Path.home()), "~")
1945
+
1946
+ # Always run hook installation for claude-code — it's idempotent and new
1947
+ # hook scripts may have been added since the initial setup.
1948
+ if agent == "claude-code" and hooks:
1949
+ _install_claude_hooks()
1950
+
1951
+ setup_signals = _detect_deja_setup(target_path, agent)
1952
+ if setup_signals and not force:
1953
+ lines = [f"deja: {agent} already configured — stopping."]
1954
+ lines.append("Signals found:")
1955
+ for s in setup_signals:
1956
+ lines.append(f" • {s}")
1957
+ lines.append("Use --force to update.")
1958
+ typer.echo("\n".join(lines))
1959
+ raise typer.Exit(0)
1960
+
1961
+ block = _build_deja_block(project)
1962
+ result = _inject_block(target_path, block, force)
1963
+
1964
+ if result == "replaced":
1965
+ typer.echo(f"deja: {agent} configured (updated) → {display}")
1966
+ else:
1967
+ typer.echo(f"deja: {agent} configured → {display}")