deja-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deja/__init__.py +0 -0
- deja/config.py +127 -0
- deja/core/__init__.py +0 -0
- deja/core/extractor.py +135 -0
- deja/core/reflection.py +364 -0
- deja/core/scheduler.py +65 -0
- deja/core/store.py +1413 -0
- deja/ingest/__init__.py +0 -0
- deja/ingest/watchers/__init__.py +0 -0
- deja/ingest/watchers/base.py +143 -0
- deja/ingest/watchers/claude_code.py +62 -0
- deja/ingest/watchers/codex_cli.py +95 -0
- deja/ingest/watchers/gemini_cli.py +96 -0
- deja/interfaces/__init__.py +0 -0
- deja/interfaces/cli.py +1967 -0
- deja/interfaces/mcp_server.py +96 -0
- deja/interfaces/web.py +104 -0
- deja/interfaces/web_ui/index.html +614 -0
- deja/llm/__init__.py +0 -0
- deja/llm/base.py +34 -0
- deja/llm/embedding.py +45 -0
- deja/llm/factory.py +90 -0
- deja/llm/providers/__init__.py +0 -0
- deja/llm/providers/anthropic.py +21 -0
- deja/llm/providers/ollama.py +30 -0
- deja/main.py +4 -0
- deja_cli-0.1.0.dist-info/METADATA +100 -0
- deja_cli-0.1.0.dist-info/RECORD +31 -0
- deja_cli-0.1.0.dist-info/WHEEL +4 -0
- deja_cli-0.1.0.dist-info/entry_points.txt +3 -0
- deja_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
deja/ingest/__init__.py
ADDED
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import sys
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Callable, Awaitable, Optional
|
|
8
|
+
|
|
9
|
+
from deja.core.store import MemoryStore
|
|
10
|
+
from deja.llm.base import LLMAdapter
|
|
11
|
+
from deja.llm.embedding import EmbeddingAdapter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BaseWatcher(ABC):
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
store: MemoryStore,
|
|
18
|
+
extractor_fn: Callable[..., Awaitable[list[dict]]],
|
|
19
|
+
adapter: Optional[LLMAdapter],
|
|
20
|
+
debounce_seconds: int = 30,
|
|
21
|
+
embedding_adapter: Optional[EmbeddingAdapter] = None,
|
|
22
|
+
) -> None:
|
|
23
|
+
self.store = store
|
|
24
|
+
self.extractor_fn = extractor_fn
|
|
25
|
+
self.adapter = adapter
|
|
26
|
+
self.debounce_seconds = debounce_seconds
|
|
27
|
+
self.embedding_adapter = embedding_adapter
|
|
28
|
+
# path -> last mtime we processed
|
|
29
|
+
self._processed: dict[Path, float] = {}
|
|
30
|
+
# path -> pending debounce task
|
|
31
|
+
self._pending: dict[Path, asyncio.TimerHandle] = {}
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def get_watch_paths(self) -> list[Path]: ...
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def should_process(self, path: Path) -> bool: ...
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def get_project_name(self, path: Path) -> str: ...
|
|
41
|
+
|
|
42
|
+
def parse_transcript(self, content: str) -> str:
|
|
43
|
+
"""Convert raw file content to a plain-text transcript for extraction.
|
|
44
|
+
|
|
45
|
+
Default implementation returns content unchanged (suitable for plain text
|
|
46
|
+
files like summary.md). Subclasses override this to parse JSON/JSONL formats.
|
|
47
|
+
"""
|
|
48
|
+
return content
|
|
49
|
+
|
|
50
|
+
def handle_file_event(self, path: Path) -> None:
|
|
51
|
+
"""Called from watchdog thread. Schedules debounced async processing."""
|
|
52
|
+
if not self.should_process(path):
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
loop = asyncio.get_event_loop()
|
|
56
|
+
|
|
57
|
+
# Cancel any existing pending timer for this path
|
|
58
|
+
if path in self._pending:
|
|
59
|
+
self._pending[path].cancel()
|
|
60
|
+
|
|
61
|
+
handle = loop.call_later(
|
|
62
|
+
self.debounce_seconds,
|
|
63
|
+
lambda: asyncio.ensure_future(self._process(path)),
|
|
64
|
+
)
|
|
65
|
+
self._pending[path] = handle
|
|
66
|
+
|
|
67
|
+
async def _process(self, path: Path) -> None:
|
|
68
|
+
"""Read file and run extraction pipeline."""
|
|
69
|
+
self._pending.pop(path, None)
|
|
70
|
+
|
|
71
|
+
if not path.exists():
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
mtime = path.stat().st_mtime
|
|
76
|
+
except OSError:
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
# Skip if we already processed this exact version
|
|
80
|
+
if self._processed.get(path) == mtime:
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
content = path.read_text(encoding="utf-8", errors="replace")
|
|
85
|
+
except OSError as e:
|
|
86
|
+
print(f"[deja] Failed to read {path}: {e}", file=sys.stderr)
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
project = self.get_project_name(path)
|
|
90
|
+
source = self.__class__.__name__.lower().replace("watcher", "")
|
|
91
|
+
|
|
92
|
+
content = self.parse_transcript(content)
|
|
93
|
+
|
|
94
|
+
if self.adapter is None:
|
|
95
|
+
# No LLM configured — save the raw summary as a single progress memory.
|
|
96
|
+
# The agent-driven path (CLAUDE.md skill prompt) handles structured extraction.
|
|
97
|
+
raw_memory = {
|
|
98
|
+
"type": "progress",
|
|
99
|
+
"category": "agent",
|
|
100
|
+
"content": content[:2000].strip(), # truncate to avoid huge entries
|
|
101
|
+
"scope": f"project:{project}",
|
|
102
|
+
"project": project,
|
|
103
|
+
"source": source,
|
|
104
|
+
"confidence": 0.5,
|
|
105
|
+
}
|
|
106
|
+
try:
|
|
107
|
+
await self.store.save(raw_memory)
|
|
108
|
+
print(
|
|
109
|
+
f"[deja] Saved raw summary from {path} (no LLM configured)",
|
|
110
|
+
file=sys.stderr,
|
|
111
|
+
)
|
|
112
|
+
except Exception as e:
|
|
113
|
+
print(f"[deja] Failed to save raw summary: {e}", file=sys.stderr)
|
|
114
|
+
self._processed[path] = mtime
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
memories = await self.extractor_fn(content, project, source, self.adapter)
|
|
119
|
+
except Exception as e:
|
|
120
|
+
print(f"[deja] Extraction failed for {path}: {e}", file=sys.stderr)
|
|
121
|
+
return
|
|
122
|
+
|
|
123
|
+
saved = 0
|
|
124
|
+
for memory in memories:
|
|
125
|
+
try:
|
|
126
|
+
emb_bytes = None
|
|
127
|
+
if self.embedding_adapter is not None:
|
|
128
|
+
try:
|
|
129
|
+
emb = await self.embedding_adapter.embed(memory["content"])
|
|
130
|
+
emb_bytes = EmbeddingAdapter.to_bytes(emb)
|
|
131
|
+
except Exception as emb_e:
|
|
132
|
+
print(f"[deja] Embedding failed: {emb_e}", file=sys.stderr)
|
|
133
|
+
await self.store.save(memory, emb_bytes)
|
|
134
|
+
saved += 1
|
|
135
|
+
except Exception as e:
|
|
136
|
+
print(f"[deja] Failed to save memory: {e}", file=sys.stderr)
|
|
137
|
+
|
|
138
|
+
self._processed[path] = mtime
|
|
139
|
+
if saved:
|
|
140
|
+
print(
|
|
141
|
+
f"[deja] Saved {saved} memories from {path} (project: {project})",
|
|
142
|
+
file=sys.stderr,
|
|
143
|
+
)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from deja.ingest.watchers.base import BaseWatcher
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ClaudeCodeWatcher(BaseWatcher):
|
|
11
|
+
"""Watches ~/.claude/projects/**/session-memory/summary.md for new session summaries."""
|
|
12
|
+
|
|
13
|
+
def get_watch_paths(self) -> list[Path]:
|
|
14
|
+
return [Path.home() / ".claude" / "projects"]
|
|
15
|
+
|
|
16
|
+
def should_process(self, path: Path) -> bool:
|
|
17
|
+
"""Process summary.md files inside session-memory/ directories."""
|
|
18
|
+
return (
|
|
19
|
+
path.name == "summary.md"
|
|
20
|
+
and path.parent.name == "session-memory"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
def get_project_name(self, path: Path) -> str:
|
|
24
|
+
"""Extract project name from path.
|
|
25
|
+
|
|
26
|
+
Path structure: ~/.claude/projects/<hash>/session-memory/summary.md
|
|
27
|
+
First try reading ~/.claude/projects/<hash>/.project (if it exists).
|
|
28
|
+
Fall back to the hash directory name.
|
|
29
|
+
"""
|
|
30
|
+
# path.parents: [session-memory/, <hash>/, projects/, .claude/, ~/, ...]
|
|
31
|
+
try:
|
|
32
|
+
hash_dir = path.parent.parent # <hash> dir
|
|
33
|
+
project_file = hash_dir / ".project"
|
|
34
|
+
if project_file.exists():
|
|
35
|
+
content = project_file.read_text(encoding="utf-8").strip()
|
|
36
|
+
# Could be JSON with a name field or plain text
|
|
37
|
+
try:
|
|
38
|
+
data = json.loads(content)
|
|
39
|
+
if isinstance(data, dict) and "name" in data:
|
|
40
|
+
return str(data["name"])
|
|
41
|
+
if isinstance(data, dict) and "project_name" in data:
|
|
42
|
+
return str(data["project_name"])
|
|
43
|
+
except (json.JSONDecodeError, KeyError):
|
|
44
|
+
if content:
|
|
45
|
+
return content
|
|
46
|
+
# Claude Code's hash dir name is the absolute project path with '/' → '-'.
|
|
47
|
+
# e.g. "-Users-tree-Desktop-deja" encodes /Users/tree/Desktop/deja
|
|
48
|
+
# Reconstruct the path and check if it actually exists on disk —
|
|
49
|
+
# if it does, Path.name gives the correct project name regardless of hyphens.
|
|
50
|
+
raw = hash_dir.name
|
|
51
|
+
reconstructed = Path("/" + raw.lstrip("-").replace("-", "/"))
|
|
52
|
+
if reconstructed.exists():
|
|
53
|
+
return reconstructed.name
|
|
54
|
+
# Reconstructed path doesn't exist (project path contains hyphens, so
|
|
55
|
+
# the round-trip is ambiguous). Fall back to last dash-separated token.
|
|
56
|
+
return raw.split("-")[-1] or raw
|
|
57
|
+
except Exception as e:
|
|
58
|
+
print(
|
|
59
|
+
f"[deja] Could not determine project name for {path}: {e}",
|
|
60
|
+
file=sys.stderr,
|
|
61
|
+
)
|
|
62
|
+
return "unknown"
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from deja.ingest.watchers.base import BaseWatcher
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CodexCLIWatcher(BaseWatcher):
|
|
11
|
+
"""Watches ~/.codex/sessions/**/rollout-*.jsonl for Codex CLI session files.
|
|
12
|
+
|
|
13
|
+
File structure:
|
|
14
|
+
~/.codex/sessions/<year>/<month>/<day>/rollout-<timestamp>-<uuid>.jsonl
|
|
15
|
+
|
|
16
|
+
Each line is a JSON object with a "type" field:
|
|
17
|
+
- session_meta: payload.cwd gives the project directory
|
|
18
|
+
- response_item: payload.role in (user, assistant, developer)
|
|
19
|
+
user: payload.content[].type == "input_text"
|
|
20
|
+
assistant: payload.content[].type == "output_text"
|
|
21
|
+
developer: system context — skipped
|
|
22
|
+
- event_msg, turn_context: metadata — skipped
|
|
23
|
+
|
|
24
|
+
Project name is derived from cwd in the session_meta line.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def get_watch_paths(self) -> list[Path]:
|
|
28
|
+
return [Path.home() / ".codex" / "sessions"]
|
|
29
|
+
|
|
30
|
+
def should_process(self, path: Path) -> bool:
|
|
31
|
+
"""Process rollout-*.jsonl session files."""
|
|
32
|
+
return path.suffix == ".jsonl" and path.stem.startswith("rollout-")
|
|
33
|
+
|
|
34
|
+
def get_project_name(self, path: Path) -> str:
|
|
35
|
+
"""Extract project name from cwd in the session_meta line."""
|
|
36
|
+
try:
|
|
37
|
+
for raw_line in path.read_text(encoding="utf-8", errors="replace").splitlines():
|
|
38
|
+
try:
|
|
39
|
+
d = json.loads(raw_line)
|
|
40
|
+
if d.get("type") == "session_meta":
|
|
41
|
+
cwd = d.get("payload", {}).get("cwd", "")
|
|
42
|
+
if cwd:
|
|
43
|
+
return Path(cwd).name
|
|
44
|
+
except (json.JSONDecodeError, KeyError):
|
|
45
|
+
continue
|
|
46
|
+
except OSError as e:
|
|
47
|
+
print(
|
|
48
|
+
f"[deja] Could not determine project name for {path}: {e}",
|
|
49
|
+
file=sys.stderr,
|
|
50
|
+
)
|
|
51
|
+
# Fallback: stem of the filename (rollout-<timestamp>-<uuid>)
|
|
52
|
+
return path.stem
|
|
53
|
+
|
|
54
|
+
def parse_transcript(self, content: str) -> str:
|
|
55
|
+
"""Parse Codex JSONL into a plain-text conversation transcript.
|
|
56
|
+
|
|
57
|
+
Extracts user and assistant turns from response_item lines.
|
|
58
|
+
Skips developer (system context), event_msg, and turn_context lines.
|
|
59
|
+
"""
|
|
60
|
+
turns = []
|
|
61
|
+
|
|
62
|
+
for raw_line in content.splitlines():
|
|
63
|
+
try:
|
|
64
|
+
d = json.loads(raw_line)
|
|
65
|
+
except (json.JSONDecodeError, ValueError):
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
if d.get("type") != "response_item":
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
payload = d.get("payload", {})
|
|
72
|
+
role = payload.get("role", "")
|
|
73
|
+
if role not in ("user", "assistant"):
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
content_items = payload.get("content", [])
|
|
77
|
+
if not isinstance(content_items, list):
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
texts = []
|
|
81
|
+
for item in content_items:
|
|
82
|
+
if not isinstance(item, dict):
|
|
83
|
+
continue
|
|
84
|
+
item_type = item.get("type", "")
|
|
85
|
+
# user turns use input_text; assistant turns use output_text
|
|
86
|
+
if item_type in ("input_text", "output_text"):
|
|
87
|
+
text = item.get("text", "").strip()
|
|
88
|
+
if text:
|
|
89
|
+
texts.append(text)
|
|
90
|
+
|
|
91
|
+
if texts:
|
|
92
|
+
label = "User" if role == "user" else "Codex"
|
|
93
|
+
turns.append(f"{label}: {chr(10).join(texts)}")
|
|
94
|
+
|
|
95
|
+
return "\n\n".join(turns)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from deja.ingest.watchers.base import BaseWatcher
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GeminiCLIWatcher(BaseWatcher):
|
|
11
|
+
"""Watches ~/.gemini/tmp/**/chats/session-*.json for Gemini CLI session files.
|
|
12
|
+
|
|
13
|
+
File structure:
|
|
14
|
+
~/.gemini/tmp/<project-dir>/chats/session-<timestamp>.json
|
|
15
|
+
|
|
16
|
+
Each file is a JSON object:
|
|
17
|
+
{
|
|
18
|
+
"sessionId": "...",
|
|
19
|
+
"projectHash": "...",
|
|
20
|
+
"startTime": "...",
|
|
21
|
+
"lastUpdated": "...",
|
|
22
|
+
"messages": [
|
|
23
|
+
{"type": "user", "content": [{"text": "..."}]},
|
|
24
|
+
{"type": "gemini", "content": "..."},
|
|
25
|
+
{"type": "info", ...}, # skip
|
|
26
|
+
{"type": "error", ...}, # skip
|
|
27
|
+
]
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
Project name is read from .project_root in the project directory, which
|
|
31
|
+
contains the absolute path to the project on disk.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def get_watch_paths(self) -> list[Path]:
|
|
35
|
+
return [Path.home() / ".gemini" / "tmp"]
|
|
36
|
+
|
|
37
|
+
def should_process(self, path: Path) -> bool:
|
|
38
|
+
"""Process session-*.json files inside chats/ directories."""
|
|
39
|
+
return (
|
|
40
|
+
path.suffix == ".json"
|
|
41
|
+
and path.stem.startswith("session-")
|
|
42
|
+
and path.parent.name == "chats"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def get_project_name(self, path: Path) -> str:
|
|
46
|
+
"""Extract project name from .project_root file.
|
|
47
|
+
|
|
48
|
+
Path structure: ~/.gemini/tmp/<project-dir>/chats/session-*.json
|
|
49
|
+
The .project_root file in <project-dir> contains the absolute project path.
|
|
50
|
+
"""
|
|
51
|
+
try:
|
|
52
|
+
project_dir = path.parent.parent # <project-dir>
|
|
53
|
+
project_root_file = project_dir / ".project_root"
|
|
54
|
+
if project_root_file.exists():
|
|
55
|
+
root_path = project_root_file.read_text(encoding="utf-8").strip()
|
|
56
|
+
if root_path:
|
|
57
|
+
return Path(root_path).name
|
|
58
|
+
# Fallback: use the project dir name as-is (may be a hash or human name)
|
|
59
|
+
return project_dir.name
|
|
60
|
+
except Exception as e:
|
|
61
|
+
print(
|
|
62
|
+
f"[deja] Could not determine project name for {path}: {e}",
|
|
63
|
+
file=sys.stderr,
|
|
64
|
+
)
|
|
65
|
+
return "unknown"
|
|
66
|
+
|
|
67
|
+
def parse_transcript(self, content: str) -> str:
|
|
68
|
+
"""Parse Gemini session JSON into a plain-text conversation transcript."""
|
|
69
|
+
try:
|
|
70
|
+
data = json.loads(content)
|
|
71
|
+
except (json.JSONDecodeError, ValueError):
|
|
72
|
+
return content # not valid JSON — pass raw, extractor will handle it
|
|
73
|
+
|
|
74
|
+
turns = []
|
|
75
|
+
for msg in data.get("messages", []):
|
|
76
|
+
msg_type = msg.get("type", "")
|
|
77
|
+
|
|
78
|
+
if msg_type == "user":
|
|
79
|
+
raw_content = msg.get("content", [])
|
|
80
|
+
if isinstance(raw_content, list):
|
|
81
|
+
text = " ".join(
|
|
82
|
+
c.get("text", "")
|
|
83
|
+
for c in raw_content
|
|
84
|
+
if isinstance(c, dict)
|
|
85
|
+
).strip()
|
|
86
|
+
else:
|
|
87
|
+
text = str(raw_content).strip()
|
|
88
|
+
if text:
|
|
89
|
+
turns.append(f"User: {text}")
|
|
90
|
+
|
|
91
|
+
elif msg_type == "gemini":
|
|
92
|
+
text = msg.get("content", "")
|
|
93
|
+
if isinstance(text, str) and text.strip():
|
|
94
|
+
turns.append(f"Gemini: {text.strip()}")
|
|
95
|
+
|
|
96
|
+
return "\n\n".join(turns)
|
|
File without changes
|