debugbrief 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,268 @@
1
+ """Thin, honest wrappers around the native ``git`` executable.
2
+
3
+ We deliberately shell out to ``git`` via subprocess rather than depend on a
4
+ library like GitPython. Every function fails safely: if ``git`` is missing, the
5
+ directory is not a repo, or a command errors, callers get conservative defaults
6
+ (e.g. ``None`` / empty lists) instead of exceptions.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import subprocess
12
+ from pathlib import Path
13
+ from typing import Dict, List, Optional, Tuple
14
+
15
+ from .models import GitState
16
+
17
+ _GIT_TIMEOUT_SECONDS = 15
18
+
19
+ # Generated/cache artifact names that should never appear in a change summary.
20
+ _ARTIFACT_DIR_NAMES = frozenset(
21
+ {
22
+ "__pycache__",
23
+ ".pytest_cache",
24
+ ".mypy_cache",
25
+ ".ruff_cache",
26
+ ".cache",
27
+ ".tox",
28
+ ".nox",
29
+ ".hypothesis",
30
+ "node_modules",
31
+ ".DS_Store",
32
+ }
33
+ )
34
+
35
+
36
+ def _is_generated_artifact(path: str) -> bool:
37
+ """Return True for compiled/cache files that are not meaningful changes.
38
+
39
+ Filters out byte-compiled files (``*.pyc``/``*.pyo``), packaging metadata
40
+ (``*.egg-info``), editor/OS cruft (``.DS_Store``), and known cache or vendor
41
+ directories (``__pycache__``, the various ``.*_cache`` trees, ``node_modules``),
42
+ matching on any path segment. Real source files keep their names, so paths
43
+ like ``keymap.py`` or ``api_client.py`` are never filtered.
44
+ """
45
+ if path.endswith((".pyc", ".pyo")):
46
+ return True
47
+ for segment in path.replace("\\", "/").split("/"):
48
+ if not segment:
49
+ continue
50
+ if segment in _ARTIFACT_DIR_NAMES:
51
+ return True
52
+ if segment.endswith(".egg-info"):
53
+ return True
54
+ return False
55
+
56
+
57
+ def _run_git(args: List[str], cwd: Path) -> Tuple[bool, str, str]:
58
+ """Run ``git <args>`` in ``cwd``.
59
+
60
+ Returns (success, stdout, stderr). ``success`` is True only when git is
61
+ available and exits 0.
62
+ """
63
+ try:
64
+ completed = subprocess.run(
65
+ ["git", *args],
66
+ cwd=str(cwd),
67
+ stdout=subprocess.PIPE,
68
+ stderr=subprocess.PIPE,
69
+ text=True,
70
+ timeout=_GIT_TIMEOUT_SECONDS,
71
+ )
72
+ except FileNotFoundError:
73
+ # git is not installed / not on PATH.
74
+ return False, "", "git executable not found"
75
+ except subprocess.TimeoutExpired:
76
+ return False, "", "git command timed out"
77
+ except OSError as exc: # pragma: no cover - defensive
78
+ return False, "", str(exc)
79
+ return completed.returncode == 0, completed.stdout, completed.stderr
80
+
81
+
82
+ def is_git_available() -> bool:
83
+ """Return True if a ``git`` executable can be invoked."""
84
+ ok, _, _ = _run_git(["--version"], Path.cwd())
85
+ return ok
86
+
87
+
88
+ def find_repo_root(cwd: Path) -> Optional[str]:
89
+ """Return the absolute path of the enclosing Git repo root, or None."""
90
+ ok, out, _ = _run_git(["rev-parse", "--show-toplevel"], cwd)
91
+ if not ok:
92
+ return None
93
+ root = out.strip()
94
+ return root or None
95
+
96
+
97
+ def is_inside_repo(cwd: Path) -> bool:
98
+ ok, out, _ = _run_git(["rev-parse", "--is-inside-work-tree"], cwd)
99
+ return ok and out.strip() == "true"
100
+
101
+
102
+ def current_sha(cwd: Path) -> Optional[str]:
103
+ """Return the full HEAD SHA, or None (e.g. a repo with no commits yet)."""
104
+ ok, out, _ = _run_git(["rev-parse", "HEAD"], cwd)
105
+ if not ok:
106
+ return None
107
+ sha = out.strip()
108
+ return sha or None
109
+
110
+
111
+ def current_short_sha(cwd: Path) -> Optional[str]:
112
+ """Return the abbreviated HEAD SHA, or None when unavailable."""
113
+ ok, out, _ = _run_git(["rev-parse", "--short", "HEAD"], cwd)
114
+ if not ok:
115
+ return None
116
+ sha = out.strip()
117
+ return sha or None
118
+
119
+
120
+ def is_detached_head(cwd: Path) -> bool:
121
+ """Return True if HEAD is detached.
122
+
123
+ ``git symbolic-ref -q HEAD`` exits nonzero when HEAD is detached.
124
+ """
125
+ ok, out, _ = _run_git(["symbolic-ref", "-q", "HEAD"], cwd)
126
+ if ok and out.strip():
127
+ return False
128
+ # If there are no commits yet, treat HEAD as not detached (branch exists).
129
+ return current_sha(cwd) is not None
130
+
131
+
132
+ def current_branch(cwd: Path) -> Optional[str]:
133
+ """Return the current branch name, or None when detached / unborn."""
134
+ ok, out, _ = _run_git(["symbolic-ref", "--short", "-q", "HEAD"], cwd)
135
+ if not ok:
136
+ return None
137
+ branch = out.strip()
138
+ return branch or None
139
+
140
+
141
+ def _porcelain_label(code: str) -> str:
142
+ """Map a 2-char porcelain status code to a single M/A/D/R-style label.
143
+
144
+ Untracked files (``??``) are reported as ``A`` (added/new). For other
145
+ entries we use the index status when present, otherwise the worktree status.
146
+ """
147
+ if code == "??":
148
+ return "A"
149
+ x = code[0] if len(code) >= 1 else " "
150
+ y = code[1] if len(code) >= 2 else " "
151
+ primary = x if x != " " else y
152
+ mapping = {
153
+ "M": "M", # modified
154
+ "A": "A", # added
155
+ "D": "D", # deleted
156
+ "R": "R", # renamed
157
+ "C": "A", # copied -> treat as added
158
+ "T": "M", # type change -> treat as modified
159
+ "U": "M", # unmerged -> treat as modified
160
+ }
161
+ return mapping.get(primary, primary if primary.strip() else "M")
162
+
163
+
164
+ def name_status(cwd: Path) -> List[Tuple[str, str]]:
165
+ """Return sorted (label, path) pairs describing changed files.
166
+
167
+ Labels are M (modified), A (added/new), D (deleted), or R (renamed).
168
+ Combines staged, unstaged, and untracked changes via ``git status
169
+ --porcelain`` so the result reflects the working tree at call time. Safe
170
+ outside a repo: returns an empty list.
171
+ """
172
+ ok, out, _ = _run_git(["status", "--porcelain"], cwd)
173
+ if not ok:
174
+ return []
175
+ seen: Dict[str, str] = {}
176
+ for line in out.splitlines():
177
+ if not line.strip() or len(line) < 3:
178
+ continue
179
+ # Porcelain format: "XY <path>" or "XY <old> -> <new>" for renames.
180
+ code = line[:2]
181
+ path_part = line[3:]
182
+ if " -> " in path_part:
183
+ path_part = path_part.split(" -> ", 1)[1]
184
+ path_part = path_part.strip().strip('"')
185
+ if not path_part:
186
+ continue
187
+ if _is_generated_artifact(path_part):
188
+ continue
189
+ label = _porcelain_label(code)
190
+ # If a path appears twice, keep the first (most significant) label.
191
+ seen.setdefault(path_part, label)
192
+ return sorted(
193
+ ((label, path) for path, label in seen.items()), key=lambda item: item[1]
194
+ )
195
+
196
+
197
+ def changed_files(cwd: Path) -> List[str]:
198
+ """Return a sorted, de-duplicated list of changed files.
199
+
200
+ Combines tracked changes (staged + unstaged) and untracked files from
201
+ ``git status --porcelain`` so the result reflects the working-tree state.
202
+ """
203
+ return [path for _label, path in name_status(cwd)]
204
+
205
+
206
+ def shortstat(cwd: Path) -> Tuple[int, int]:
207
+ """Return (lines_added, lines_deleted) for unstaged + staged changes.
208
+
209
+ Uses ``git diff HEAD --shortstat`` which compares the working tree against
210
+ HEAD. Returns (0, 0) when there are no changes or stats are unavailable.
211
+ """
212
+ ok, out, _ = _run_git(["diff", "HEAD", "--shortstat"], cwd)
213
+ if not ok or not out.strip():
214
+ return 0, 0
215
+ return _parse_shortstat(out)
216
+
217
+
218
+ def _parse_shortstat(text: str) -> Tuple[int, int]:
219
+ """Parse a ``--shortstat`` summary line into (added, deleted).
220
+
221
+ Example input:
222
+ " 3 files changed, 12 insertions(+), 4 deletions(-)"
223
+ """
224
+ added = 0
225
+ deleted = 0
226
+ for chunk in text.replace("\n", ",").split(","):
227
+ chunk = chunk.strip()
228
+ if "insertion" in chunk:
229
+ added = _leading_int(chunk)
230
+ elif "deletion" in chunk:
231
+ deleted = _leading_int(chunk)
232
+ return added, deleted
233
+
234
+
235
+ def _leading_int(text: str) -> int:
236
+ digits = ""
237
+ for ch in text.strip():
238
+ if ch.isdigit():
239
+ digits += ch
240
+ else:
241
+ break
242
+ return int(digits) if digits else 0
243
+
244
+
245
+ def capture_state(cwd: Path, initial: bool = True) -> GitState:
246
+ """Capture a snapshot of the Git state for ``cwd``.
247
+
248
+ Safe outside a repo: returns ``GitState(is_repo=False)``.
249
+ """
250
+ repo_root = find_repo_root(cwd)
251
+ if repo_root is None:
252
+ return GitState(is_repo=False)
253
+
254
+ detached = is_detached_head(cwd)
255
+ sha = current_sha(cwd)
256
+ branch = None if detached else current_branch(cwd)
257
+
258
+ state = GitState(
259
+ is_repo=True,
260
+ repo_root=repo_root,
261
+ branch=branch,
262
+ detached_head=detached,
263
+ )
264
+ if initial:
265
+ state.initial_sha = sha
266
+ else:
267
+ state.final_sha = sha
268
+ return state
debugbrief/models.py ADDED
@@ -0,0 +1,320 @@
1
+ """Typed data models for DebugBrief sessions and events.
2
+
3
+ All persisted state flows through these dataclasses. Every model knows how to
4
+ serialize itself to plain JSON-compatible dicts and reconstruct itself from
5
+ them, which keeps persistence honest and round-trippable.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import uuid
11
+ from dataclasses import dataclass, field
12
+ from enum import Enum
13
+ from typing import Any, Dict, List, Optional
14
+
15
+
16
+ class SessionStatus(str, Enum):
17
+ ACTIVE = "ACTIVE"
18
+ COMPLETED = "COMPLETED"
19
+ INTERRUPTED = "INTERRUPTED"
20
+ ABANDONED = "ABANDONED"
21
+
22
+
23
+ class EventType(str, Enum):
24
+ COMMAND = "command"
25
+ NOTE = "note"
26
+ SNAPSHOT = "snapshot"
27
+ WARNING = "warning"
28
+
29
+
30
+ # Status values for a captured command.
31
+ COMMAND_STATUS_PASSED = "passed"
32
+ COMMAND_STATUS_FAILED = "failed"
33
+ COMMAND_STATUS_TIMED_OUT = "timed_out"
34
+ COMMAND_STATUS_ERROR = "error" # could not execute (e.g. command not found)
35
+
36
+
37
+ @dataclass
38
+ class GitState:
39
+ is_repo: bool = False
40
+ repo_root: Optional[str] = None
41
+ initial_sha: Optional[str] = None
42
+ final_sha: Optional[str] = None
43
+ branch: Optional[str] = None
44
+ detached_head: bool = False
45
+
46
+ def to_dict(self) -> Dict[str, Any]:
47
+ return {
48
+ "is_repo": self.is_repo,
49
+ "repo_root": self.repo_root,
50
+ "initial_sha": self.initial_sha,
51
+ "final_sha": self.final_sha,
52
+ "branch": self.branch,
53
+ "detached_head": self.detached_head,
54
+ }
55
+
56
+ @classmethod
57
+ def from_dict(cls, data: Dict[str, Any]) -> "GitState":
58
+ return cls(
59
+ is_repo=bool(data.get("is_repo", False)),
60
+ repo_root=data.get("repo_root"),
61
+ initial_sha=data.get("initial_sha"),
62
+ final_sha=data.get("final_sha"),
63
+ branch=data.get("branch"),
64
+ detached_head=bool(data.get("detached_head", False)),
65
+ )
66
+
67
+
68
+ @dataclass
69
+ class Timestamps:
70
+ start: Optional[str] = None
71
+ end: Optional[str] = None
72
+
73
+ def to_dict(self) -> Dict[str, Any]:
74
+ return {"start": self.start, "end": self.end}
75
+
76
+ @classmethod
77
+ def from_dict(cls, data: Dict[str, Any]) -> "Timestamps":
78
+ return cls(start=data.get("start"), end=data.get("end"))
79
+
80
+
81
+ @dataclass
82
+ class CommandClassification:
83
+ is_test: bool = False
84
+ is_verification: bool = False
85
+ tool: Optional[str] = None
86
+ status: str = COMMAND_STATUS_FAILED
87
+
88
+ def to_dict(self) -> Dict[str, Any]:
89
+ return {
90
+ "is_test": self.is_test,
91
+ "is_verification": self.is_verification,
92
+ "tool": self.tool,
93
+ "status": self.status,
94
+ }
95
+
96
+ @classmethod
97
+ def from_dict(cls, data: Dict[str, Any]) -> "CommandClassification":
98
+ return cls(
99
+ is_test=bool(data.get("is_test", False)),
100
+ is_verification=bool(data.get("is_verification", False)),
101
+ tool=data.get("tool"),
102
+ status=data.get("status", COMMAND_STATUS_FAILED),
103
+ )
104
+
105
+
106
+ @dataclass
107
+ class CommandData:
108
+ """The ``data`` payload stored inside a command event."""
109
+
110
+ command: str
111
+ started_at: str
112
+ ended_at: str
113
+ duration_seconds: float
114
+ exit_code: Optional[int]
115
+ stdout_preview: str = ""
116
+ stderr_preview: str = ""
117
+ stdout_truncated: bool = False
118
+ stderr_truncated: bool = False
119
+ used_shell: bool = False
120
+ classification: CommandClassification = field(default_factory=CommandClassification)
121
+ # Whether redaction masked anything in the stored command/output.
122
+ redacted: bool = False
123
+ # Lightweight git snapshot taken at the moment this command was recorded.
124
+ # Empty/None outside a repo or when git was unavailable (backward compatible:
125
+ # older session files simply omit these).
126
+ git_head: Optional[str] = None
127
+ git_changed_files: List[str] = field(default_factory=list)
128
+
129
+ def to_dict(self) -> Dict[str, Any]:
130
+ return {
131
+ "command": self.command,
132
+ "started_at": self.started_at,
133
+ "ended_at": self.ended_at,
134
+ "duration_seconds": self.duration_seconds,
135
+ "exit_code": self.exit_code,
136
+ "stdout_preview": self.stdout_preview,
137
+ "stderr_preview": self.stderr_preview,
138
+ "stdout_truncated": self.stdout_truncated,
139
+ "stderr_truncated": self.stderr_truncated,
140
+ "used_shell": self.used_shell,
141
+ "classification": self.classification.to_dict(),
142
+ "redacted": self.redacted,
143
+ "git_head": self.git_head,
144
+ "git_changed_files": list(self.git_changed_files),
145
+ }
146
+
147
+ @classmethod
148
+ def from_dict(cls, data: Dict[str, Any]) -> "CommandData":
149
+ return cls(
150
+ command=data.get("command", ""),
151
+ started_at=data.get("started_at", ""),
152
+ ended_at=data.get("ended_at", ""),
153
+ duration_seconds=float(data.get("duration_seconds", 0.0)),
154
+ exit_code=data.get("exit_code"),
155
+ stdout_preview=data.get("stdout_preview", ""),
156
+ stderr_preview=data.get("stderr_preview", ""),
157
+ stdout_truncated=bool(data.get("stdout_truncated", False)),
158
+ stderr_truncated=bool(data.get("stderr_truncated", False)),
159
+ used_shell=bool(data.get("used_shell", False)),
160
+ classification=CommandClassification.from_dict(
161
+ data.get("classification", {})
162
+ ),
163
+ redacted=bool(data.get("redacted", False)),
164
+ git_head=data.get("git_head"),
165
+ git_changed_files=list(data.get("git_changed_files", [])),
166
+ )
167
+
168
+
169
+ @dataclass
170
+ class Event:
171
+ type: str
172
+ timestamp: str
173
+ data: Dict[str, Any] = field(default_factory=dict)
174
+
175
+ def to_dict(self) -> Dict[str, Any]:
176
+ return {"type": self.type, "timestamp": self.timestamp, "data": self.data}
177
+
178
+ @classmethod
179
+ def from_dict(cls, data: Dict[str, Any]) -> "Event":
180
+ return cls(
181
+ type=data.get("type", ""),
182
+ timestamp=data.get("timestamp", ""),
183
+ data=data.get("data", {}) or {},
184
+ )
185
+
186
+ # Convenience constructors -------------------------------------------------
187
+ @classmethod
188
+ def note(cls, text: str, timestamp: str) -> "Event":
189
+ return cls(type=EventType.NOTE.value, timestamp=timestamp, data={"text": text})
190
+
191
+ @classmethod
192
+ def warning(cls, message: str, timestamp: str) -> "Event":
193
+ return cls(
194
+ type=EventType.WARNING.value,
195
+ timestamp=timestamp,
196
+ data={"message": message},
197
+ )
198
+
199
+ @classmethod
200
+ def command(cls, command_data: CommandData, timestamp: str) -> "Event":
201
+ return cls(
202
+ type=EventType.COMMAND.value,
203
+ timestamp=timestamp,
204
+ data=command_data.to_dict(),
205
+ )
206
+
207
+ @classmethod
208
+ def snapshot(cls, payload: Dict[str, Any], timestamp: str) -> "Event":
209
+ return cls(type=EventType.SNAPSHOT.value, timestamp=timestamp, data=payload)
210
+
211
+
212
+ @dataclass
213
+ class FileChange:
214
+ """A single changed file with a name-status label (M/A/D/R)."""
215
+
216
+ status: str
217
+ path: str
218
+
219
+ def to_dict(self) -> Dict[str, Any]:
220
+ return {"status": self.status, "path": self.path}
221
+
222
+ @classmethod
223
+ def from_dict(cls, data: Dict[str, Any]) -> "FileChange":
224
+ return cls(status=data.get("status", "M"), path=data.get("path", ""))
225
+
226
+
227
+ @dataclass
228
+ class Summary:
229
+ modified_files: List[str] = field(default_factory=list)
230
+ file_changes: List[FileChange] = field(default_factory=list)
231
+ lines_added: int = 0
232
+ lines_deleted: int = 0
233
+ tests_run: List[str] = field(default_factory=list)
234
+ notes_count: int = 0
235
+ commands_count: int = 0
236
+ failed_commands_count: int = 0
237
+ command_capture_status: str = "full"
238
+
239
+ def to_dict(self) -> Dict[str, Any]:
240
+ return {
241
+ "modified_files": list(self.modified_files),
242
+ "file_changes": [fc.to_dict() for fc in self.file_changes],
243
+ "lines_added": self.lines_added,
244
+ "lines_deleted": self.lines_deleted,
245
+ "tests_run": list(self.tests_run),
246
+ "notes_count": self.notes_count,
247
+ "commands_count": self.commands_count,
248
+ "failed_commands_count": self.failed_commands_count,
249
+ "command_capture_status": self.command_capture_status,
250
+ }
251
+
252
+ @classmethod
253
+ def from_dict(cls, data: Dict[str, Any]) -> "Summary":
254
+ return cls(
255
+ modified_files=list(data.get("modified_files", [])),
256
+ file_changes=[
257
+ FileChange.from_dict(fc) for fc in data.get("file_changes", [])
258
+ ],
259
+ lines_added=int(data.get("lines_added", 0)),
260
+ lines_deleted=int(data.get("lines_deleted", 0)),
261
+ tests_run=list(data.get("tests_run", [])),
262
+ notes_count=int(data.get("notes_count", 0)),
263
+ commands_count=int(data.get("commands_count", 0)),
264
+ failed_commands_count=int(data.get("failed_commands_count", 0)),
265
+ command_capture_status=data.get("command_capture_status", "full"),
266
+ )
267
+
268
+
269
+ @dataclass
270
+ class Session:
271
+ title: str
272
+ project_root: str
273
+ session_id: str = field(default_factory=lambda: str(uuid.uuid4()))
274
+ status: str = SessionStatus.ACTIVE.value
275
+ warnings: List[str] = field(default_factory=list)
276
+ git: GitState = field(default_factory=GitState)
277
+ timestamps: Timestamps = field(default_factory=Timestamps)
278
+ events: List[Event] = field(default_factory=list)
279
+ summary: Summary = field(default_factory=Summary)
280
+
281
+ # Accessors ---------------------------------------------------------------
282
+ def command_events(self) -> List[Event]:
283
+ return [e for e in self.events if e.type == EventType.COMMAND.value]
284
+
285
+ def note_events(self) -> List[Event]:
286
+ return [e for e in self.events if e.type == EventType.NOTE.value]
287
+
288
+ def add_warning(self, message: str, timestamp: str) -> None:
289
+ """Record a warning both in the warnings list and the event timeline."""
290
+ if message not in self.warnings:
291
+ self.warnings.append(message)
292
+ self.events.append(Event.warning(message, timestamp))
293
+
294
+ # Serialization -----------------------------------------------------------
295
+ def to_dict(self) -> Dict[str, Any]:
296
+ return {
297
+ "session_id": self.session_id,
298
+ "title": self.title,
299
+ "status": self.status,
300
+ "project_root": self.project_root,
301
+ "warnings": list(self.warnings),
302
+ "git": self.git.to_dict(),
303
+ "timestamps": self.timestamps.to_dict(),
304
+ "events": [e.to_dict() for e in self.events],
305
+ "summary": self.summary.to_dict(),
306
+ }
307
+
308
+ @classmethod
309
+ def from_dict(cls, data: Dict[str, Any]) -> "Session":
310
+ return cls(
311
+ session_id=data.get("session_id", str(uuid.uuid4())),
312
+ title=data.get("title", ""),
313
+ status=data.get("status", SessionStatus.ACTIVE.value),
314
+ project_root=data.get("project_root", ""),
315
+ warnings=list(data.get("warnings", [])),
316
+ git=GitState.from_dict(data.get("git", {})),
317
+ timestamps=Timestamps.from_dict(data.get("timestamps", {})),
318
+ events=[Event.from_dict(e) for e in data.get("events", [])],
319
+ summary=Summary.from_dict(data.get("summary", {})),
320
+ )