deja-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
File without changes
@@ -0,0 +1,143 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import sys
5
+ from abc import ABC, abstractmethod
6
+ from pathlib import Path
7
+ from typing import Callable, Awaitable, Optional
8
+
9
+ from deja.core.store import MemoryStore
10
+ from deja.llm.base import LLMAdapter
11
+ from deja.llm.embedding import EmbeddingAdapter
12
+
13
+
14
+ class BaseWatcher(ABC):
15
+ def __init__(
16
+ self,
17
+ store: MemoryStore,
18
+ extractor_fn: Callable[..., Awaitable[list[dict]]],
19
+ adapter: Optional[LLMAdapter],
20
+ debounce_seconds: int = 30,
21
+ embedding_adapter: Optional[EmbeddingAdapter] = None,
22
+ ) -> None:
23
+ self.store = store
24
+ self.extractor_fn = extractor_fn
25
+ self.adapter = adapter
26
+ self.debounce_seconds = debounce_seconds
27
+ self.embedding_adapter = embedding_adapter
28
+ # path -> last mtime we processed
29
+ self._processed: dict[Path, float] = {}
30
+ # path -> pending debounce task
31
+ self._pending: dict[Path, asyncio.TimerHandle] = {}
32
+
33
+ @abstractmethod
34
+ def get_watch_paths(self) -> list[Path]: ...
35
+
36
+ @abstractmethod
37
+ def should_process(self, path: Path) -> bool: ...
38
+
39
+ @abstractmethod
40
+ def get_project_name(self, path: Path) -> str: ...
41
+
42
+ def parse_transcript(self, content: str) -> str:
43
+ """Convert raw file content to a plain-text transcript for extraction.
44
+
45
+ Default implementation returns content unchanged (suitable for plain text
46
+ files like summary.md). Subclasses override this to parse JSON/JSONL formats.
47
+ """
48
+ return content
49
+
50
+ def handle_file_event(self, path: Path) -> None:
51
+ """Called from watchdog thread. Schedules debounced async processing."""
52
+ if not self.should_process(path):
53
+ return
54
+
55
+ loop = asyncio.get_event_loop()
56
+
57
+ # Cancel any existing pending timer for this path
58
+ if path in self._pending:
59
+ self._pending[path].cancel()
60
+
61
+ handle = loop.call_later(
62
+ self.debounce_seconds,
63
+ lambda: asyncio.ensure_future(self._process(path)),
64
+ )
65
+ self._pending[path] = handle
66
+
67
+ async def _process(self, path: Path) -> None:
68
+ """Read file and run extraction pipeline."""
69
+ self._pending.pop(path, None)
70
+
71
+ if not path.exists():
72
+ return
73
+
74
+ try:
75
+ mtime = path.stat().st_mtime
76
+ except OSError:
77
+ return
78
+
79
+ # Skip if we already processed this exact version
80
+ if self._processed.get(path) == mtime:
81
+ return
82
+
83
+ try:
84
+ content = path.read_text(encoding="utf-8", errors="replace")
85
+ except OSError as e:
86
+ print(f"[deja] Failed to read {path}: {e}", file=sys.stderr)
87
+ return
88
+
89
+ project = self.get_project_name(path)
90
+ source = self.__class__.__name__.lower().replace("watcher", "")
91
+
92
+ content = self.parse_transcript(content)
93
+
94
+ if self.adapter is None:
95
+ # No LLM configured — save the raw summary as a single progress memory.
96
+ # The agent-driven path (CLAUDE.md skill prompt) handles structured extraction.
97
+ raw_memory = {
98
+ "type": "progress",
99
+ "category": "agent",
100
+ "content": content[:2000].strip(), # truncate to avoid huge entries
101
+ "scope": f"project:{project}",
102
+ "project": project,
103
+ "source": source,
104
+ "confidence": 0.5,
105
+ }
106
+ try:
107
+ await self.store.save(raw_memory)
108
+ print(
109
+ f"[deja] Saved raw summary from {path} (no LLM configured)",
110
+ file=sys.stderr,
111
+ )
112
+ except Exception as e:
113
+ print(f"[deja] Failed to save raw summary: {e}", file=sys.stderr)
114
+ self._processed[path] = mtime
115
+ return
116
+
117
+ try:
118
+ memories = await self.extractor_fn(content, project, source, self.adapter)
119
+ except Exception as e:
120
+ print(f"[deja] Extraction failed for {path}: {e}", file=sys.stderr)
121
+ return
122
+
123
+ saved = 0
124
+ for memory in memories:
125
+ try:
126
+ emb_bytes = None
127
+ if self.embedding_adapter is not None:
128
+ try:
129
+ emb = await self.embedding_adapter.embed(memory["content"])
130
+ emb_bytes = EmbeddingAdapter.to_bytes(emb)
131
+ except Exception as emb_e:
132
+ print(f"[deja] Embedding failed: {emb_e}", file=sys.stderr)
133
+ await self.store.save(memory, emb_bytes)
134
+ saved += 1
135
+ except Exception as e:
136
+ print(f"[deja] Failed to save memory: {e}", file=sys.stderr)
137
+
138
+ self._processed[path] = mtime
139
+ if saved:
140
+ print(
141
+ f"[deja] Saved {saved} memories from {path} (project: {project})",
142
+ file=sys.stderr,
143
+ )
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ from deja.ingest.watchers.base import BaseWatcher
8
+
9
+
10
+ class ClaudeCodeWatcher(BaseWatcher):
11
+ """Watches ~/.claude/projects/**/session-memory/summary.md for new session summaries."""
12
+
13
+ def get_watch_paths(self) -> list[Path]:
14
+ return [Path.home() / ".claude" / "projects"]
15
+
16
+ def should_process(self, path: Path) -> bool:
17
+ """Process summary.md files inside session-memory/ directories."""
18
+ return (
19
+ path.name == "summary.md"
20
+ and path.parent.name == "session-memory"
21
+ )
22
+
23
+ def get_project_name(self, path: Path) -> str:
24
+ """Extract project name from path.
25
+
26
+ Path structure: ~/.claude/projects/<hash>/session-memory/summary.md
27
+ First try reading ~/.claude/projects/<hash>/.project (if it exists).
28
+ Fall back to the hash directory name.
29
+ """
30
+ # path.parents: [session-memory/, <hash>/, projects/, .claude/, ~/, ...]
31
+ try:
32
+ hash_dir = path.parent.parent # <hash> dir
33
+ project_file = hash_dir / ".project"
34
+ if project_file.exists():
35
+ content = project_file.read_text(encoding="utf-8").strip()
36
+ # Could be JSON with a name field or plain text
37
+ try:
38
+ data = json.loads(content)
39
+ if isinstance(data, dict) and "name" in data:
40
+ return str(data["name"])
41
+ if isinstance(data, dict) and "project_name" in data:
42
+ return str(data["project_name"])
43
+ except (json.JSONDecodeError, KeyError):
44
+ if content:
45
+ return content
46
+ # Claude Code's hash dir name is the absolute project path with '/' → '-'.
47
+ # e.g. "-Users-tree-Desktop-deja" encodes /Users/tree/Desktop/deja
48
+ # Reconstruct the path and check if it actually exists on disk —
49
+ # if it does, Path.name gives the correct project name regardless of hyphens.
50
+ raw = hash_dir.name
51
+ reconstructed = Path("/" + raw.lstrip("-").replace("-", "/"))
52
+ if reconstructed.exists():
53
+ return reconstructed.name
54
+ # Reconstructed path doesn't exist (project path contains hyphens, so
55
+ # the round-trip is ambiguous). Fall back to last dash-separated token.
56
+ return raw.split("-")[-1] or raw
57
+ except Exception as e:
58
+ print(
59
+ f"[deja] Could not determine project name for {path}: {e}",
60
+ file=sys.stderr,
61
+ )
62
+ return "unknown"
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ from deja.ingest.watchers.base import BaseWatcher
8
+
9
+
10
+ class CodexCLIWatcher(BaseWatcher):
11
+ """Watches ~/.codex/sessions/**/rollout-*.jsonl for Codex CLI session files.
12
+
13
+ File structure:
14
+ ~/.codex/sessions/<year>/<month>/<day>/rollout-<timestamp>-<uuid>.jsonl
15
+
16
+ Each line is a JSON object with a "type" field:
17
+ - session_meta: payload.cwd gives the project directory
18
+ - response_item: payload.role in (user, assistant, developer)
19
+ user: payload.content[].type == "input_text"
20
+ assistant: payload.content[].type == "output_text"
21
+ developer: system context — skipped
22
+ - event_msg, turn_context: metadata — skipped
23
+
24
+ Project name is derived from cwd in the session_meta line.
25
+ """
26
+
27
+ def get_watch_paths(self) -> list[Path]:
28
+ return [Path.home() / ".codex" / "sessions"]
29
+
30
+ def should_process(self, path: Path) -> bool:
31
+ """Process rollout-*.jsonl session files."""
32
+ return path.suffix == ".jsonl" and path.stem.startswith("rollout-")
33
+
34
+ def get_project_name(self, path: Path) -> str:
35
+ """Extract project name from cwd in the session_meta line."""
36
+ try:
37
+ for raw_line in path.read_text(encoding="utf-8", errors="replace").splitlines():
38
+ try:
39
+ d = json.loads(raw_line)
40
+ if d.get("type") == "session_meta":
41
+ cwd = d.get("payload", {}).get("cwd", "")
42
+ if cwd:
43
+ return Path(cwd).name
44
+ except (json.JSONDecodeError, KeyError):
45
+ continue
46
+ except OSError as e:
47
+ print(
48
+ f"[deja] Could not determine project name for {path}: {e}",
49
+ file=sys.stderr,
50
+ )
51
+ # Fallback: stem of the filename (rollout-<timestamp>-<uuid>)
52
+ return path.stem
53
+
54
+ def parse_transcript(self, content: str) -> str:
55
+ """Parse Codex JSONL into a plain-text conversation transcript.
56
+
57
+ Extracts user and assistant turns from response_item lines.
58
+ Skips developer (system context), event_msg, and turn_context lines.
59
+ """
60
+ turns = []
61
+
62
+ for raw_line in content.splitlines():
63
+ try:
64
+ d = json.loads(raw_line)
65
+ except (json.JSONDecodeError, ValueError):
66
+ continue
67
+
68
+ if d.get("type") != "response_item":
69
+ continue
70
+
71
+ payload = d.get("payload", {})
72
+ role = payload.get("role", "")
73
+ if role not in ("user", "assistant"):
74
+ continue
75
+
76
+ content_items = payload.get("content", [])
77
+ if not isinstance(content_items, list):
78
+ continue
79
+
80
+ texts = []
81
+ for item in content_items:
82
+ if not isinstance(item, dict):
83
+ continue
84
+ item_type = item.get("type", "")
85
+ # user turns use input_text; assistant turns use output_text
86
+ if item_type in ("input_text", "output_text"):
87
+ text = item.get("text", "").strip()
88
+ if text:
89
+ texts.append(text)
90
+
91
+ if texts:
92
+ label = "User" if role == "user" else "Codex"
93
+ turns.append(f"{label}: {chr(10).join(texts)}")
94
+
95
+ return "\n\n".join(turns)
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ from deja.ingest.watchers.base import BaseWatcher
8
+
9
+
10
+ class GeminiCLIWatcher(BaseWatcher):
11
+ """Watches ~/.gemini/tmp/**/chats/session-*.json for Gemini CLI session files.
12
+
13
+ File structure:
14
+ ~/.gemini/tmp/<project-dir>/chats/session-<timestamp>.json
15
+
16
+ Each file is a JSON object:
17
+ {
18
+ "sessionId": "...",
19
+ "projectHash": "...",
20
+ "startTime": "...",
21
+ "lastUpdated": "...",
22
+ "messages": [
23
+ {"type": "user", "content": [{"text": "..."}]},
24
+ {"type": "gemini", "content": "..."},
25
+ {"type": "info", ...}, # skip
26
+ {"type": "error", ...}, # skip
27
+ ]
28
+ }
29
+
30
+ Project name is read from .project_root in the project directory, which
31
+ contains the absolute path to the project on disk.
32
+ """
33
+
34
+ def get_watch_paths(self) -> list[Path]:
35
+ return [Path.home() / ".gemini" / "tmp"]
36
+
37
+ def should_process(self, path: Path) -> bool:
38
+ """Process session-*.json files inside chats/ directories."""
39
+ return (
40
+ path.suffix == ".json"
41
+ and path.stem.startswith("session-")
42
+ and path.parent.name == "chats"
43
+ )
44
+
45
+ def get_project_name(self, path: Path) -> str:
46
+ """Extract project name from .project_root file.
47
+
48
+ Path structure: ~/.gemini/tmp/<project-dir>/chats/session-*.json
49
+ The .project_root file in <project-dir> contains the absolute project path.
50
+ """
51
+ try:
52
+ project_dir = path.parent.parent # <project-dir>
53
+ project_root_file = project_dir / ".project_root"
54
+ if project_root_file.exists():
55
+ root_path = project_root_file.read_text(encoding="utf-8").strip()
56
+ if root_path:
57
+ return Path(root_path).name
58
+ # Fallback: use the project dir name as-is (may be a hash or human name)
59
+ return project_dir.name
60
+ except Exception as e:
61
+ print(
62
+ f"[deja] Could not determine project name for {path}: {e}",
63
+ file=sys.stderr,
64
+ )
65
+ return "unknown"
66
+
67
+ def parse_transcript(self, content: str) -> str:
68
+ """Parse Gemini session JSON into a plain-text conversation transcript."""
69
+ try:
70
+ data = json.loads(content)
71
+ except (json.JSONDecodeError, ValueError):
72
+ return content # not valid JSON — pass raw, extractor will handle it
73
+
74
+ turns = []
75
+ for msg in data.get("messages", []):
76
+ msg_type = msg.get("type", "")
77
+
78
+ if msg_type == "user":
79
+ raw_content = msg.get("content", [])
80
+ if isinstance(raw_content, list):
81
+ text = " ".join(
82
+ c.get("text", "")
83
+ for c in raw_content
84
+ if isinstance(c, dict)
85
+ ).strip()
86
+ else:
87
+ text = str(raw_content).strip()
88
+ if text:
89
+ turns.append(f"User: {text}")
90
+
91
+ elif msg_type == "gemini":
92
+ text = msg.get("content", "")
93
+ if isinstance(text, str) and text.strip():
94
+ turns.append(f"Gemini: {text.strip()}")
95
+
96
+ return "\n\n".join(turns)
File without changes