dev-recall 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dev_recall-0.2.0.dist-info/METADATA +281 -0
- dev_recall-0.2.0.dist-info/RECORD +34 -0
- dev_recall-0.2.0.dist-info/WHEEL +5 -0
- dev_recall-0.2.0.dist-info/entry_points.txt +2 -0
- dev_recall-0.2.0.dist-info/top_level.txt +1 -0
- recall/__init__.py +3 -0
- recall/_hooks.py +211 -0
- recall/cli.py +1032 -0
- recall/collectors/__init__.py +1 -0
- recall/collectors/ai_chat.py +644 -0
- recall/collectors/containers.py +164 -0
- recall/collectors/git.py +540 -0
- recall/collectors/linux_process.py +230 -0
- recall/collectors/linux_session.py +229 -0
- recall/collectors/linux_window.py +199 -0
- recall/collectors/shell.py +300 -0
- recall/collectors/vscode.py +175 -0
- recall/config.py +257 -0
- recall/daemon.py +466 -0
- recall/daemon_main.py +25 -0
- recall/mcp_server.py +290 -0
- recall/models.py +225 -0
- recall/processor/__init__.py +1 -0
- recall/processor/embedder.py +213 -0
- recall/processor/enricher.py +213 -0
- recall/processor/session.py +142 -0
- recall/query/__init__.py +1 -0
- recall/query/context.py +130 -0
- recall/query/llm.py +85 -0
- recall/query/retriever.py +147 -0
- recall/query/timeparser.py +188 -0
- recall/storage/__init__.py +1 -0
- recall/storage/db.py +528 -0
- recall/storage/vectors.py +166 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Docker / Podman container event collector.
|
|
2
|
+
|
|
3
|
+
Streams Docker events filtered to start/die/create/destroy and emits
|
|
4
|
+
CONTAINER_START / CONTAINER_STOP events.
|
|
5
|
+
|
|
6
|
+
Supports both Docker (default socket) and Podman (rootless socket at
|
|
7
|
+
/run/user/<uid>/podman/podman.sock) by checking both sockets.
|
|
8
|
+
|
|
9
|
+
Gracefully becomes a no-op when:
|
|
10
|
+
- The `docker` SDK is not installed
|
|
11
|
+
- No accessible Docker/Podman socket is found
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
import threading
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Callable, Optional
|
|
22
|
+
|
|
23
|
+
from recall.models import Event, EventType, Source
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
import docker # type: ignore
|
|
29
|
+
|
|
30
|
+
_DOCKER_SDK_AVAILABLE = True
|
|
31
|
+
except ImportError:
|
|
32
|
+
_DOCKER_SDK_AVAILABLE = False
|
|
33
|
+
|
|
34
|
+
# Canonical socket paths in preference order
|
|
35
|
+
_DOCKER_SOCKETS = [
|
|
36
|
+
"/var/run/docker.sock",
|
|
37
|
+
"/run/docker.sock",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _podman_socket() -> Optional[str]:
|
|
42
|
+
uid = os.getuid()
|
|
43
|
+
p = Path(f"/run/user/{uid}/podman/podman.sock")
|
|
44
|
+
return str(p) if p.exists() else None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _find_socket() -> Optional[str]:
|
|
48
|
+
for s in _DOCKER_SOCKETS:
|
|
49
|
+
if os.path.exists(s) and os.access(s, os.R_OK | os.W_OK):
|
|
50
|
+
return s
|
|
51
|
+
ps = _podman_socket()
|
|
52
|
+
if ps and os.access(ps, os.R_OK | os.W_OK):
|
|
53
|
+
return ps
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _now_ts() -> tuple[str, str]:
|
|
58
|
+
now = datetime.now(timezone.utc)
|
|
59
|
+
return now.strftime("%Y-%m-%dT%H:%M:%SZ"), now.strftime("%Y-%m-%d")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# Container actions that map to CONTAINER_START
|
|
63
|
+
_START_ACTIONS = {"start", "restart"}
|
|
64
|
+
# Container actions that map to CONTAINER_STOP
|
|
65
|
+
_STOP_ACTIONS = {"die", "stop", "destroy", "kill"}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ContainerCollector:
|
|
69
|
+
"""Stream Docker/Podman events and emit container lifecycle events."""
|
|
70
|
+
|
|
71
|
+
def __init__(self, event_callback: Callable[[Event], None]) -> None:
|
|
72
|
+
self._cb = event_callback
|
|
73
|
+
self._thread: Optional[threading.Thread] = None
|
|
74
|
+
self._stop_event = threading.Event()
|
|
75
|
+
self._client: Optional["docker.DockerClient"] = None # type: ignore[name-defined]
|
|
76
|
+
|
|
77
|
+
def start(self) -> None:
|
|
78
|
+
if not _DOCKER_SDK_AVAILABLE:
|
|
79
|
+
logger.debug("docker SDK unavailable — container tracking disabled")
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
socket = _find_socket()
|
|
83
|
+
if socket is None:
|
|
84
|
+
logger.debug("No accessible Docker/Podman socket — container tracking disabled")
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
self._client = docker.DockerClient(base_url=f"unix://{socket}")
|
|
89
|
+
# Quick ping to verify it's alive
|
|
90
|
+
self._client.ping()
|
|
91
|
+
except Exception as exc:
|
|
92
|
+
logger.debug("Docker/Podman socket not responding (%s) — disabled", exc)
|
|
93
|
+
self._client = None
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
self._thread = threading.Thread(
|
|
97
|
+
target=self._run,
|
|
98
|
+
daemon=True,
|
|
99
|
+
name="devmem-containers",
|
|
100
|
+
)
|
|
101
|
+
self._thread.start()
|
|
102
|
+
logger.info("ContainerCollector started on socket %s", socket)
|
|
103
|
+
|
|
104
|
+
def stop(self) -> None:
|
|
105
|
+
self._stop_event.set()
|
|
106
|
+
# Close client to unblock the blocking events() generator
|
|
107
|
+
if self._client is not None:
|
|
108
|
+
try:
|
|
109
|
+
self._client.close()
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
# ------------------------------------------------------------------
|
|
114
|
+
# Internal
|
|
115
|
+
# ------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
def _run(self) -> None:
|
|
118
|
+
if self._client is None:
|
|
119
|
+
return
|
|
120
|
+
try:
|
|
121
|
+
for raw in self._client.events(
|
|
122
|
+
decode=True,
|
|
123
|
+
filters={"type": "container"},
|
|
124
|
+
):
|
|
125
|
+
if self._stop_event.is_set():
|
|
126
|
+
break
|
|
127
|
+
self._handle_raw(raw)
|
|
128
|
+
except Exception:
|
|
129
|
+
if not self._stop_event.is_set():
|
|
130
|
+
logger.exception("ContainerCollector stream crashed")
|
|
131
|
+
|
|
132
|
+
def _handle_raw(self, raw: dict) -> None:
|
|
133
|
+
action = raw.get("Action", "")
|
|
134
|
+
attrs = raw.get("Actor", {}).get("Attributes", {})
|
|
135
|
+
container_id = raw.get("Actor", {}).get("ID", "")[:12]
|
|
136
|
+
name = attrs.get("name", container_id)
|
|
137
|
+
image = attrs.get("image", "")
|
|
138
|
+
|
|
139
|
+
if action in _START_ACTIONS:
|
|
140
|
+
event_type = EventType.CONTAINER_START
|
|
141
|
+
elif action in _STOP_ACTIONS:
|
|
142
|
+
event_type = EventType.CONTAINER_STOP
|
|
143
|
+
else:
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
ts, date = _now_ts()
|
|
147
|
+
event_data = {
|
|
148
|
+
"name": name,
|
|
149
|
+
"container_id": container_id,
|
|
150
|
+
"image": image,
|
|
151
|
+
"action": action,
|
|
152
|
+
}
|
|
153
|
+
event = Event(
|
|
154
|
+
timestamp=ts,
|
|
155
|
+
date=date,
|
|
156
|
+
event_type=event_type,
|
|
157
|
+
source=Source.CONTAINER_EVENTS,
|
|
158
|
+
content="",
|
|
159
|
+
raw_data=event_data,
|
|
160
|
+
)
|
|
161
|
+
from recall.models import build_content
|
|
162
|
+
|
|
163
|
+
event.content = build_content(event_type, event_data)
|
|
164
|
+
self._cb(event)
|
recall/collectors/git.py
ADDED
|
@@ -0,0 +1,540 @@
|
|
|
1
|
+
"""Git collector — reads git.tsv and provides a fallback git log poller."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import fnmatch
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import subprocess
|
|
9
|
+
import threading
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Callable, Optional
|
|
13
|
+
|
|
14
|
+
from watchdog.events import FileModifiedEvent, FileSystemEventHandler
|
|
15
|
+
from watchdog.observers import Observer
|
|
16
|
+
|
|
17
|
+
from recall.models import Event, EventType, Source, build_content
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
# TSV column layouts
|
|
22
|
+
# commit: ts | "commit" | repo_path | hash | branch | message | files (|sep) | author
|
|
23
|
+
# branch: ts | "branch" | repo_path | old_branch | new_branch
|
|
24
|
+
# push: ts | "push" | repo_path | remote | branch | commit_count
|
|
25
|
+
# merge: ts | "merge" | repo_path | branch | merged_branch | is_squash
|
|
26
|
+
_COMMIT_COLS = 8
|
|
27
|
+
_BRANCH_COLS = 5
|
|
28
|
+
_PUSH_COLS = 6
|
|
29
|
+
_MERGE_COLS = 6
|
|
30
|
+
|
|
31
|
+
_OFFSET_KEY = "git_tsv_offset"
|
|
32
|
+
_LAST_POLL_KEY = "git_poller_last_run"
|
|
33
|
+
|
|
34
|
+
# Maximum directory depth when scanning for git repos
|
|
35
|
+
_MAX_SCAN_DEPTH = 4
|
|
36
|
+
# Poller interval (seconds)
|
|
37
|
+
_POLL_INTERVAL_SEC = 300
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class GitCollector:
|
|
41
|
+
"""
|
|
42
|
+
Watches git.tsv written by the global git hooks.
|
|
43
|
+
|
|
44
|
+
Also starts a poller as a fallback for repos that already had
|
|
45
|
+
core.hooksPath set before dev-recall was installed.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
git_tsv: Path,
|
|
51
|
+
event_callback: Callable[[Event], None],
|
|
52
|
+
get_kv: Callable[[str], Optional[str]],
|
|
53
|
+
set_kv: Callable[[str, str], None],
|
|
54
|
+
repo_ignore_patterns: Optional[list[str]] = None,
|
|
55
|
+
) -> None:
|
|
56
|
+
self._path = git_tsv
|
|
57
|
+
self._callback = event_callback
|
|
58
|
+
self._get_kv = get_kv
|
|
59
|
+
self._set_kv = set_kv
|
|
60
|
+
self._ignore_patterns = repo_ignore_patterns or []
|
|
61
|
+
|
|
62
|
+
stored_offset = get_kv(_OFFSET_KEY)
|
|
63
|
+
self._offset: int = int(stored_offset) if stored_offset else 0
|
|
64
|
+
|
|
65
|
+
self._observer: Optional[Observer] = None
|
|
66
|
+
self._poller_thread: Optional[threading.Thread] = None
|
|
67
|
+
self._stop_event = threading.Event()
|
|
68
|
+
self._lock = threading.Lock()
|
|
69
|
+
|
|
70
|
+
# ------------------------------------------------------------------
|
|
71
|
+
# Public
|
|
72
|
+
# ------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
def start(self) -> None:
|
|
75
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
self._path.touch(exist_ok=True)
|
|
77
|
+
|
|
78
|
+
# Catch up on any lines written while daemon was down
|
|
79
|
+
self._read_new_lines()
|
|
80
|
+
|
|
81
|
+
handler = _GitTSVHandler(self._path, self._on_file_change)
|
|
82
|
+
self._observer = Observer()
|
|
83
|
+
self._observer.schedule(handler, str(self._path.parent), recursive=False)
|
|
84
|
+
self._observer.start()
|
|
85
|
+
|
|
86
|
+
self._poller_thread = threading.Thread(
|
|
87
|
+
target=self._poller_loop,
|
|
88
|
+
daemon=True,
|
|
89
|
+
name="dev-recall-git-poller",
|
|
90
|
+
)
|
|
91
|
+
self._poller_thread.start()
|
|
92
|
+
logger.info("GitCollector watching %s + poller started", self._path)
|
|
93
|
+
|
|
94
|
+
def stop(self) -> None:
|
|
95
|
+
self._stop_event.set()
|
|
96
|
+
if self._observer:
|
|
97
|
+
self._observer.stop()
|
|
98
|
+
self._observer.join(timeout=5)
|
|
99
|
+
logger.info("GitCollector stopped")
|
|
100
|
+
|
|
101
|
+
# ------------------------------------------------------------------
|
|
102
|
+
# File watcher
|
|
103
|
+
# ------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
def _on_file_change(self) -> None:
|
|
106
|
+
with self._lock:
|
|
107
|
+
self._read_new_lines()
|
|
108
|
+
|
|
109
|
+
def _read_new_lines(self) -> None:
|
|
110
|
+
try:
|
|
111
|
+
file_size = self._path.stat().st_size
|
|
112
|
+
except OSError:
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
if file_size <= self._offset:
|
|
116
|
+
if file_size < self._offset:
|
|
117
|
+
logger.warning("git.tsv shrank; resetting offset")
|
|
118
|
+
self._offset = 0
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
with self._path.open("rb") as fh:
|
|
123
|
+
fh.seek(self._offset)
|
|
124
|
+
new_bytes = fh.read()
|
|
125
|
+
new_offset = fh.tell()
|
|
126
|
+
except OSError as exc:
|
|
127
|
+
logger.error("Cannot read git.tsv: %s", exc)
|
|
128
|
+
return
|
|
129
|
+
|
|
130
|
+
for raw_line in new_bytes.decode("utf-8", errors="replace").splitlines():
|
|
131
|
+
event = self._parse_line(raw_line.rstrip("\r\n"))
|
|
132
|
+
if event:
|
|
133
|
+
try:
|
|
134
|
+
self._callback(event)
|
|
135
|
+
except Exception:
|
|
136
|
+
logger.exception("Error in git event callback")
|
|
137
|
+
|
|
138
|
+
self._offset = new_offset
|
|
139
|
+
self._set_kv(_OFFSET_KEY, str(new_offset))
|
|
140
|
+
|
|
141
|
+
def _parse_line(self, line: str) -> Optional[Event]:
|
|
142
|
+
if not line.strip():
|
|
143
|
+
return None
|
|
144
|
+
parts = line.split("\t")
|
|
145
|
+
if len(parts) < 3:
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
ts_str = parts[0]
|
|
149
|
+
kind = parts[1]
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
dt = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
|
153
|
+
date_str = dt.strftime("%Y-%m-%d")
|
|
154
|
+
except ValueError:
|
|
155
|
+
dt = datetime.now(timezone.utc)
|
|
156
|
+
date_str = dt.strftime("%Y-%m-%d")
|
|
157
|
+
ts_str = dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
158
|
+
|
|
159
|
+
if kind == "commit" and len(parts) >= _COMMIT_COLS:
|
|
160
|
+
return self._parse_commit(ts_str, date_str, parts)
|
|
161
|
+
elif kind == "branch" and len(parts) >= _BRANCH_COLS:
|
|
162
|
+
return self._parse_branch(ts_str, date_str, parts)
|
|
163
|
+
elif kind == "push" and len(parts) >= _PUSH_COLS:
|
|
164
|
+
return self._parse_push(ts_str, date_str, parts)
|
|
165
|
+
elif kind == "merge" and len(parts) >= _MERGE_COLS:
|
|
166
|
+
return self._parse_merge(ts_str, date_str, parts)
|
|
167
|
+
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
def _parse_commit(self, ts: str, date: str, parts: list[str]) -> Optional[Event]:
|
|
171
|
+
repo_path = parts[2]
|
|
172
|
+
commit_hash = parts[3]
|
|
173
|
+
branch = parts[4]
|
|
174
|
+
message = parts[5]
|
|
175
|
+
files_raw = parts[6] if len(parts) > 6 else ""
|
|
176
|
+
author = parts[7] if len(parts) > 7 else ""
|
|
177
|
+
|
|
178
|
+
if self._is_repo_ignored(repo_path):
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
repo_name = os.path.basename(repo_path.rstrip("/"))
|
|
182
|
+
files = [f for f in files_raw.split("|") if f]
|
|
183
|
+
|
|
184
|
+
raw = {
|
|
185
|
+
"hash": commit_hash,
|
|
186
|
+
"branch": branch,
|
|
187
|
+
"message": message,
|
|
188
|
+
"files": files,
|
|
189
|
+
"author": author,
|
|
190
|
+
"repo_path": repo_path,
|
|
191
|
+
"repo_name": repo_name,
|
|
192
|
+
}
|
|
193
|
+
content = build_content(EventType.GIT_COMMIT, raw)
|
|
194
|
+
|
|
195
|
+
event = Event(
|
|
196
|
+
timestamp=ts,
|
|
197
|
+
date=date,
|
|
198
|
+
event_type=EventType.GIT_COMMIT,
|
|
199
|
+
source=Source.GIT_HOOK,
|
|
200
|
+
content=content,
|
|
201
|
+
raw_data=raw,
|
|
202
|
+
repo_path=repo_path,
|
|
203
|
+
repo_name=repo_name,
|
|
204
|
+
)
|
|
205
|
+
return event
|
|
206
|
+
|
|
207
|
+
def _parse_branch(self, ts: str, date: str, parts: list[str]) -> Optional[Event]:
|
|
208
|
+
repo_path = parts[2]
|
|
209
|
+
old_branch = parts[3]
|
|
210
|
+
new_branch = parts[4]
|
|
211
|
+
|
|
212
|
+
if self._is_repo_ignored(repo_path):
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
repo_name = os.path.basename(repo_path.rstrip("/"))
|
|
216
|
+
raw = {
|
|
217
|
+
"old_branch": old_branch,
|
|
218
|
+
"new_branch": new_branch,
|
|
219
|
+
"repo_path": repo_path,
|
|
220
|
+
"repo_name": repo_name,
|
|
221
|
+
}
|
|
222
|
+
content = build_content(EventType.GIT_BRANCH, raw)
|
|
223
|
+
|
|
224
|
+
return Event(
|
|
225
|
+
timestamp=ts,
|
|
226
|
+
date=date,
|
|
227
|
+
event_type=EventType.GIT_BRANCH,
|
|
228
|
+
source=Source.GIT_HOOK,
|
|
229
|
+
content=content,
|
|
230
|
+
raw_data=raw,
|
|
231
|
+
repo_path=repo_path,
|
|
232
|
+
repo_name=repo_name,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
def _parse_push(self, ts: str, date: str, parts: list[str]) -> Optional[Event]:
|
|
236
|
+
repo_path = parts[2]
|
|
237
|
+
remote = parts[3]
|
|
238
|
+
branch = parts[4]
|
|
239
|
+
try:
|
|
240
|
+
commit_count = int(parts[5])
|
|
241
|
+
except (ValueError, IndexError):
|
|
242
|
+
commit_count = 0
|
|
243
|
+
|
|
244
|
+
if self._is_repo_ignored(repo_path):
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
repo_name = os.path.basename(repo_path.rstrip("/"))
|
|
248
|
+
raw = {
|
|
249
|
+
"remote": remote,
|
|
250
|
+
"branch": branch,
|
|
251
|
+
"commit_count": commit_count,
|
|
252
|
+
"repo_path": repo_path,
|
|
253
|
+
"repo_name": repo_name,
|
|
254
|
+
}
|
|
255
|
+
content = build_content(EventType.GIT_PUSH, raw)
|
|
256
|
+
return Event(
|
|
257
|
+
timestamp=ts,
|
|
258
|
+
date=date,
|
|
259
|
+
event_type=EventType.GIT_PUSH,
|
|
260
|
+
source=Source.GIT_HOOK,
|
|
261
|
+
content=content,
|
|
262
|
+
raw_data=raw,
|
|
263
|
+
repo_path=repo_path,
|
|
264
|
+
repo_name=repo_name,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
def _parse_merge(self, ts: str, date: str, parts: list[str]) -> Optional[Event]:
|
|
268
|
+
repo_path = parts[2]
|
|
269
|
+
branch = parts[3]
|
|
270
|
+
merged_branch = parts[4]
|
|
271
|
+
is_squash = parts[5].strip() == "1" if len(parts) > 5 else False
|
|
272
|
+
|
|
273
|
+
if self._is_repo_ignored(repo_path):
|
|
274
|
+
return None
|
|
275
|
+
|
|
276
|
+
repo_name = os.path.basename(repo_path.rstrip("/"))
|
|
277
|
+
raw = {
|
|
278
|
+
"branch": branch,
|
|
279
|
+
"merged_branch": merged_branch,
|
|
280
|
+
"is_squash": is_squash,
|
|
281
|
+
"repo_path": repo_path,
|
|
282
|
+
"repo_name": repo_name,
|
|
283
|
+
}
|
|
284
|
+
content = build_content(EventType.GIT_MERGE, raw)
|
|
285
|
+
return Event(
|
|
286
|
+
timestamp=ts,
|
|
287
|
+
date=date,
|
|
288
|
+
event_type=EventType.GIT_MERGE,
|
|
289
|
+
source=Source.GIT_HOOK,
|
|
290
|
+
content=content,
|
|
291
|
+
raw_data=raw,
|
|
292
|
+
repo_path=repo_path,
|
|
293
|
+
repo_name=repo_name,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
def _is_repo_ignored(self, path: str) -> bool:
|
|
297
|
+
for pattern in self._ignore_patterns:
|
|
298
|
+
if fnmatch.fnmatch(path, pattern):
|
|
299
|
+
return True
|
|
300
|
+
return False
|
|
301
|
+
|
|
302
|
+
# ------------------------------------------------------------------
|
|
303
|
+
# Git poller fallback
|
|
304
|
+
# ------------------------------------------------------------------
|
|
305
|
+
|
|
306
|
+
def _poller_loop(self) -> None:
|
|
307
|
+
"""Runs every POLL_INTERVAL_SEC to catch commits missed by hooks."""
|
|
308
|
+
# Wait a bit before first poll so daemon start-up completes
|
|
309
|
+
self._stop_event.wait(30)
|
|
310
|
+
|
|
311
|
+
while not self._stop_event.is_set():
|
|
312
|
+
try:
|
|
313
|
+
self._poll_git_repos()
|
|
314
|
+
except Exception:
|
|
315
|
+
logger.exception("Git poller error")
|
|
316
|
+
self._stop_event.wait(_POLL_INTERVAL_SEC)
|
|
317
|
+
|
|
318
|
+
def _poll_git_repos(self) -> None:
|
|
319
|
+
last_run_str = self._get_kv(_LAST_POLL_KEY)
|
|
320
|
+
if last_run_str:
|
|
321
|
+
since = last_run_str
|
|
322
|
+
else:
|
|
323
|
+
# First run: only look back 24 hours
|
|
324
|
+
from datetime import timedelta
|
|
325
|
+
|
|
326
|
+
dt = datetime.now(timezone.utc) - timedelta(hours=24)
|
|
327
|
+
since = dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
328
|
+
|
|
329
|
+
now_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
330
|
+
|
|
331
|
+
git_repos = _find_git_repos(Path.home(), _MAX_SCAN_DEPTH)
|
|
332
|
+
for repo_path in git_repos:
|
|
333
|
+
if self._is_repo_ignored(str(repo_path)):
|
|
334
|
+
continue
|
|
335
|
+
self._poll_repo(repo_path, since)
|
|
336
|
+
|
|
337
|
+
self._set_kv(_LAST_POLL_KEY, now_str)
|
|
338
|
+
|
|
339
|
+
def _poll_repo(self, repo_path: Path, since: str) -> None:
|
|
340
|
+
"""Fetch commits from *repo_path* since *since* (ISO timestamp)."""
|
|
341
|
+
try:
|
|
342
|
+
result = subprocess.run(
|
|
343
|
+
[
|
|
344
|
+
"git",
|
|
345
|
+
"-C",
|
|
346
|
+
str(repo_path),
|
|
347
|
+
"log",
|
|
348
|
+
f"--since={since}",
|
|
349
|
+
"--format=%H\x1f%s\x1f%D\x1f%an\x1f%aI",
|
|
350
|
+
"--name-only",
|
|
351
|
+
"--diff-filter=ACDMRT",
|
|
352
|
+
],
|
|
353
|
+
capture_output=True,
|
|
354
|
+
text=True,
|
|
355
|
+
timeout=15,
|
|
356
|
+
)
|
|
357
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
358
|
+
return
|
|
359
|
+
|
|
360
|
+
if result.returncode != 0:
|
|
361
|
+
return
|
|
362
|
+
|
|
363
|
+
repo_name = repo_path.name
|
|
364
|
+
# Parse the custom format
|
|
365
|
+
# Each commit block: header line + blank + file lines + blank
|
|
366
|
+
commit_hash = branch = message = author = ts_str = ""
|
|
367
|
+
files: list[str] = []
|
|
368
|
+
|
|
369
|
+
for line in result.stdout.splitlines():
|
|
370
|
+
if "\x1f" in line:
|
|
371
|
+
# Header line
|
|
372
|
+
if commit_hash and message:
|
|
373
|
+
self._emit_polled_commit(
|
|
374
|
+
repo_path, repo_name, commit_hash, branch,
|
|
375
|
+
message, author, files, ts_str,
|
|
376
|
+
)
|
|
377
|
+
parts = line.split("\x1f")
|
|
378
|
+
commit_hash = parts[0]
|
|
379
|
+
message = parts[1] if len(parts) > 1 else ""
|
|
380
|
+
refs = parts[2] if len(parts) > 2 else ""
|
|
381
|
+
author = parts[3] if len(parts) > 3 else ""
|
|
382
|
+
ts_str = parts[4] if len(parts) > 4 else ""
|
|
383
|
+
# Extract branch from refs (e.g. "HEAD -> main, origin/main")
|
|
384
|
+
branch = _extract_branch_from_refs(refs)
|
|
385
|
+
files = []
|
|
386
|
+
elif line.strip():
|
|
387
|
+
files.append(line.strip())
|
|
388
|
+
|
|
389
|
+
# Emit last commit
|
|
390
|
+
if commit_hash and message:
|
|
391
|
+
self._emit_polled_commit(
|
|
392
|
+
repo_path, repo_name, commit_hash, branch,
|
|
393
|
+
message, author, files, ts_str,
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
def _emit_polled_commit(
|
|
397
|
+
self,
|
|
398
|
+
repo_path: Path,
|
|
399
|
+
repo_name: str,
|
|
400
|
+
commit_hash: str,
|
|
401
|
+
branch: str,
|
|
402
|
+
message: str,
|
|
403
|
+
author: str,
|
|
404
|
+
files: list[str],
|
|
405
|
+
ts_str: str,
|
|
406
|
+
) -> None:
|
|
407
|
+
# Normalize timestamp
|
|
408
|
+
try:
|
|
409
|
+
dt = datetime.fromisoformat(ts_str)
|
|
410
|
+
ts_out = dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
411
|
+
date_str = dt.astimezone(timezone.utc).strftime("%Y-%m-%d")
|
|
412
|
+
except ValueError:
|
|
413
|
+
now = datetime.now(timezone.utc)
|
|
414
|
+
ts_out = now.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
415
|
+
date_str = now.strftime("%Y-%m-%d")
|
|
416
|
+
|
|
417
|
+
raw = {
|
|
418
|
+
"hash": commit_hash,
|
|
419
|
+
"branch": branch,
|
|
420
|
+
"message": message,
|
|
421
|
+
"files": files,
|
|
422
|
+
"author": author,
|
|
423
|
+
"repo_path": str(repo_path),
|
|
424
|
+
"repo_name": repo_name,
|
|
425
|
+
}
|
|
426
|
+
content = build_content(EventType.GIT_COMMIT, raw)
|
|
427
|
+
event = Event(
|
|
428
|
+
timestamp=ts_out,
|
|
429
|
+
date=date_str,
|
|
430
|
+
event_type=EventType.GIT_COMMIT,
|
|
431
|
+
source=Source.GIT_POLLER,
|
|
432
|
+
content=content,
|
|
433
|
+
raw_data=raw,
|
|
434
|
+
repo_path=str(repo_path),
|
|
435
|
+
repo_name=repo_name,
|
|
436
|
+
)
|
|
437
|
+
try:
|
|
438
|
+
self._callback(event)
|
|
439
|
+
except Exception:
|
|
440
|
+
logger.exception("Error emitting polled commit")
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
# ---------------------------------------------------------------------------
|
|
444
|
+
# Helpers
|
|
445
|
+
# ---------------------------------------------------------------------------
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _find_git_repos(root: Path, max_depth: int) -> list[Path]:
|
|
449
|
+
"""Walk *root* up to *max_depth* levels and return directories containing .git."""
|
|
450
|
+
repos: list[Path] = []
|
|
451
|
+
_walk(root, max_depth, repos)
|
|
452
|
+
return repos
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def _walk(path: Path, depth: int, result: list[Path]) -> None:
|
|
456
|
+
if depth < 0:
|
|
457
|
+
return
|
|
458
|
+
try:
|
|
459
|
+
for child in path.iterdir():
|
|
460
|
+
if not child.is_dir():
|
|
461
|
+
continue
|
|
462
|
+
name = child.name
|
|
463
|
+
if name.startswith(".") and name != ".git":
|
|
464
|
+
continue # skip hidden dirs except .git
|
|
465
|
+
if name == ".git":
|
|
466
|
+
result.append(path)
|
|
467
|
+
return # don't recurse into .git
|
|
468
|
+
_walk(child, depth - 1, result)
|
|
469
|
+
except (PermissionError, OSError):
|
|
470
|
+
pass
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
class _GitTSVHandler(FileSystemEventHandler):
|
|
474
|
+
def __init__(self, path: Path, callback: Callable[[], None]) -> None:
|
|
475
|
+
self._path = str(path)
|
|
476
|
+
self._callback = callback
|
|
477
|
+
|
|
478
|
+
def on_modified(self, event: FileModifiedEvent) -> None:
|
|
479
|
+
if not event.is_directory and event.src_path == self._path:
|
|
480
|
+
self._callback()
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def _extract_branch_from_refs(refs: str) -> str:
|
|
484
|
+
"""Extract the current branch name from the git log %D decorator string."""
|
|
485
|
+
for part in refs.split(","):
|
|
486
|
+
part = part.strip()
|
|
487
|
+
if part.startswith("HEAD -> "):
|
|
488
|
+
return part[len("HEAD -> "):]
|
|
489
|
+
# Fallback: use the first ref that's not 'HEAD' or 'origin/...'
|
|
490
|
+
for part in refs.split(","):
|
|
491
|
+
part = part.strip()
|
|
492
|
+
if part and not part.startswith("origin/") and part != "HEAD":
|
|
493
|
+
return part
|
|
494
|
+
return refs.strip() or "unknown"
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
# ---------------------------------------------------------------------------
|
|
498
|
+
# Hook installer
|
|
499
|
+
# ---------------------------------------------------------------------------
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def install_global_hooks(hooks_src_dir: Path, git_hooks_dest: Path) -> None:
|
|
503
|
+
"""
|
|
504
|
+
Copy git hooks to *git_hooks_dest* and configure git to use them globally.
|
|
505
|
+
|
|
506
|
+
Warns (but does not fail) if core.hooksPath is already set to something
|
|
507
|
+
else — the user can review and decide.
|
|
508
|
+
"""
|
|
509
|
+
import shutil
|
|
510
|
+
|
|
511
|
+
git_hooks_dest.mkdir(parents=True, exist_ok=True)
|
|
512
|
+
for hook_name in ("post-commit", "post-checkout"):
|
|
513
|
+
src = hooks_src_dir / hook_name
|
|
514
|
+
dst = git_hooks_dest / hook_name
|
|
515
|
+
if src.exists():
|
|
516
|
+
shutil.copy2(str(src), str(dst))
|
|
517
|
+
os.chmod(str(dst), 0o755)
|
|
518
|
+
|
|
519
|
+
# Check existing core.hooksPath
|
|
520
|
+
result = subprocess.run(
|
|
521
|
+
["git", "config", "--global", "core.hooksPath"],
|
|
522
|
+
capture_output=True,
|
|
523
|
+
text=True,
|
|
524
|
+
)
|
|
525
|
+
existing = result.stdout.strip()
|
|
526
|
+
desired = str(git_hooks_dest)
|
|
527
|
+
|
|
528
|
+
if existing and existing != desired:
|
|
529
|
+
logger.warning(
|
|
530
|
+
"core.hooksPath is already set to %r. "
|
|
531
|
+
"Overriding to %r — existing hooks may stop working. "
|
|
532
|
+
"Consider merging them manually.",
|
|
533
|
+
existing,
|
|
534
|
+
desired,
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
subprocess.run(
|
|
538
|
+
["git", "config", "--global", "core.hooksPath", desired],
|
|
539
|
+
check=True,
|
|
540
|
+
)
|