gitinspect 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
diffenv/git_layer.py ADDED
@@ -0,0 +1,278 @@
1
+ """
2
+ Git layer — resolves refs and fetches file content at a given ref, without
3
+ ever checking out the working tree. Uses `git show <ref>:<path>` so the
4
+ user's working directory and index are never touched.
5
+
6
+ Responsibilities:
7
+ - Validate we're inside a git repository.
8
+ - Resolve a ref (branch / tag / commit) to a real commit, fetching from
9
+ the remote if it's missing locally (interactively, unless auto_fetch).
10
+ - Fetch the content of specific tracked files at that ref.
11
+ - Never leak raw git/subprocess errors to the caller — everything is
12
+ translated into diffenv's own exception types with clear messages.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import subprocess
18
+ from pathlib import Path
19
+
20
+ from diffenv.exceptions import GitError, NotAGitRepoError, RefNotFoundError
21
+ from diffenv.logging_config import get_logger
22
+
23
+ logger = get_logger("git_layer")
24
+
25
+ # Files diffenv cares about. Parser layer (Phase 3) consumes these by name.
26
+ TRACKED_FILES: tuple[str, ...] = (
27
+ "requirements.txt",
28
+ "pyproject.toml",
29
+ ".env.example",
30
+ ".python-version",
31
+ )
32
+
33
+
34
+ class _GitResult:
35
+ """
36
+ Minimal result object returned by _run_git.
37
+ stdout is kept as raw bytes so callers can decode with BOM detection.
38
+ stderr is pre-decoded as UTF-8 (git's own messages are always UTF-8).
39
+ """
40
+
41
+ __slots__ = ("returncode", "stdout_bytes", "stderr")
42
+
43
+ def __init__(self, returncode: int, stdout_bytes: bytes, stderr: str) -> None:
44
+ self.returncode = returncode
45
+ self.stdout_bytes = stdout_bytes
46
+ self.stderr = stderr
47
+
48
+
49
+ def _decode_file_bytes(raw: bytes) -> str:
50
+ """
51
+ Decode raw file bytes from git show, handling:
52
+ - UTF-16 LE with BOM (\\xff\\xfe) — PowerShell echo, old Notepad
53
+ - UTF-16 BE with BOM (\\xfe\\xff) — rare but possible
54
+ - UTF-8 with BOM (\\xef\\xbb\\xbf) — some Windows editors
55
+ - Plain UTF-8 (no BOM) — standard on Linux/Mac, Git for Windows default
56
+ """
57
+ if raw[:2] == b"\xff\xfe":
58
+ return raw.decode("utf-16-le", errors="replace").lstrip("\ufeff")
59
+ if raw[:2] == b"\xfe\xff":
60
+ return raw.decode("utf-16-be", errors="replace").lstrip("\ufeff")
61
+ if raw[:3] == b"\xef\xbb\xbf":
62
+ return raw[3:].decode("utf-8", errors="replace")
63
+ return raw.decode("utf-8", errors="replace")
64
+
65
+
66
+ def _run_git(args: list[str], cwd: str) -> subprocess.CompletedProcess:
67
+ """
68
+ Run a git subcommand and return the CompletedProcess.
69
+ Never raises on non-zero exit — caller inspects returncode.
70
+ Raises GitError only if git itself cannot be invoked (not installed).
71
+ """
72
+ try:
73
+ # Use text=False (binary mode) so we get raw bytes back regardless
74
+ # of the platform's default encoding. We decode stdout ourselves in
75
+ # get_file_content() to handle UTF-16 files created by Windows tools.
76
+ # stderr is always plain ASCII/UTF-8 from git itself, safe to decode.
77
+ result = subprocess.run(
78
+ ["git", *args],
79
+ cwd=cwd,
80
+ capture_output=True,
81
+ text=False,
82
+ timeout=30,
83
+ )
84
+ # Decode stderr as UTF-8 (git's own messages are always UTF-8).
85
+ result_text_stderr = result.stderr.decode("utf-8", errors="replace")
86
+ # Return a namespace-like object that callers can treat the same way.
87
+ # stdout is kept as bytes; get_file_content decodes it with BOM detection.
88
+ return _GitResult(
89
+ returncode=result.returncode,
90
+ stdout_bytes=result.stdout,
91
+ stderr=result_text_stderr,
92
+ )
93
+ except FileNotFoundError as exc:
94
+ raise GitError(
95
+ "Git is not installed or not available on PATH. "
96
+ "Please install git to use diffenv."
97
+ ) from exc
98
+ except subprocess.TimeoutExpired as exc:
99
+ raise GitError(
100
+ "Git command timed out. This can happen with a slow or "
101
+ "unreachable remote — check your network connection."
102
+ ) from exc
103
+
104
+
105
+ class GitClient:
106
+ """
107
+ Thin, safe wrapper around git for diffenv's needs.
108
+
109
+ Args:
110
+ repo_path: Path to the repository root (defaults to CWD).
111
+ confirm_callback: Called with the missing ref name when a ref isn't
112
+ found locally and a remote fetch could resolve
113
+ it. Should return True to proceed with fetching,
114
+ False to decline. Defaults to an interactive
115
+ y/n console prompt. SDK callers can pass a
116
+ no-op (lambda r: False) or set auto_fetch=True
117
+ on individual calls to skip prompting entirely.
118
+ """
119
+
120
+ def __init__(
121
+ self,
122
+ repo_path: str = ".",
123
+ confirm_callback=None,
124
+ ) -> None:
125
+ self.repo_path = str(Path(repo_path).resolve())
126
+ self._confirm_callback = confirm_callback or self._default_confirm
127
+ self._verify_repo()
128
+
129
+ # ------------------------------------------------------------------
130
+ # Setup / validation
131
+ # ------------------------------------------------------------------
132
+
133
+ def _verify_repo(self) -> None:
134
+ result = _run_git(["rev-parse", "--is-inside-work-tree"], cwd=self.repo_path)
135
+ if result.returncode != 0:
136
+ logger.debug("rev-parse failed: %s", result.stderr.strip())
137
+ raise NotAGitRepoError(
138
+ f"'{self.repo_path}' is not a git repository "
139
+ "(or none of the parent directories are)."
140
+ )
141
+
142
+ @staticmethod
143
+ def _default_confirm(ref: str) -> bool:
144
+ """Interactive y/n prompt used by the CLI. SDK users should override this."""
145
+ try:
146
+ answer = input(
147
+ f"Branch/ref '{ref}' was not found locally. "
148
+ f"Attempt to fetch it from the remote? [y/N]: "
149
+ ).strip().lower()
150
+ except (EOFError, KeyboardInterrupt):
151
+ return False
152
+ return answer in ("y", "yes")
153
+
154
+ # ------------------------------------------------------------------
155
+ # Ref resolution
156
+ # ------------------------------------------------------------------
157
+
158
+ def _ref_exists_locally(self, ref: str) -> bool:
159
+ result = _run_git(["rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"], cwd=self.repo_path)
160
+ return result.returncode == 0
161
+
162
+ def _remote_ref_exists(self, ref: str) -> bool:
163
+ """Check (without fetching) whether `ref` exists on any configured remote."""
164
+ result = _run_git(["ls-remote", "--exit-code", "--heads", "--tags", "origin", ref], cwd=self.repo_path)
165
+ return result.returncode == 0 and bool(result.stdout_bytes.strip())
166
+
167
+ def _fetch_ref(self, ref: str) -> bool:
168
+ """
169
+ Fetch `ref` from origin into FETCH_HEAD. A plain `git fetch origin <ref>`
170
+ does NOT create or update a local branch — it only populates FETCH_HEAD —
171
+ so resolution after fetching must use `origin/<ref>` or FETCH_HEAD,
172
+ never the bare branch name.
173
+ """
174
+ logger.debug("Fetching ref '%s' from origin", ref)
175
+ result = _run_git(["fetch", "origin", ref], cwd=self.repo_path)
176
+ if result.returncode != 0:
177
+ logger.debug("git fetch failed: %s", result.stderr.strip())
178
+ return False
179
+ return True
180
+
181
+ def resolve_ref(self, ref: str, auto_fetch: bool = False) -> str:
182
+ """
183
+ Ensure `ref` is resolvable, fetching from the remote if needed.
184
+
185
+ Args:
186
+ ref: Branch name, tag, or commit SHA.
187
+ auto_fetch: If True, fetch automatically without prompting
188
+ (used by the SDK / non-interactive callers).
189
+
190
+ Returns:
191
+ A ref string guaranteed resolvable via `git show`. This may
192
+ differ from the input `ref` (e.g. resolved to `origin/<ref>`
193
+ after a remote fetch) — callers must use the RETURNED value,
194
+ not the original argument, for all subsequent git operations.
195
+
196
+ Raises:
197
+ RefNotFoundError: ref does not exist locally or on the remote,
198
+ or the user declined to fetch it.
199
+ """
200
+ if self._ref_exists_locally(ref):
201
+ return ref
202
+
203
+ logger.debug("Ref '%s' not found locally", ref)
204
+
205
+ if not auto_fetch:
206
+ should_fetch = self._confirm_callback(ref)
207
+ if not should_fetch:
208
+ raise RefNotFoundError(ref)
209
+ else:
210
+ logger.debug("auto_fetch enabled, skipping prompt for '%s'", ref)
211
+
212
+ if not self._remote_ref_exists(ref):
213
+ raise RefNotFoundError(ref)
214
+
215
+ if not self._fetch_ref(ref):
216
+ raise RefNotFoundError(ref)
217
+
218
+ # After `git fetch origin <ref>`, the content is reachable via
219
+ # FETCH_HEAD. We resolve through `origin/<ref>` when the remote
220
+ # tracking branch was updated, falling back to FETCH_HEAD directly.
221
+ remote_tracking_ref = f"origin/{ref}"
222
+ if self._ref_exists_locally(remote_tracking_ref):
223
+ return remote_tracking_ref
224
+
225
+ if self._ref_exists_locally("FETCH_HEAD"):
226
+ return "FETCH_HEAD"
227
+
228
+ raise RefNotFoundError(ref)
229
+
230
+ # ------------------------------------------------------------------
231
+ # File content fetching
232
+ # ------------------------------------------------------------------
233
+
234
+ def file_exists_at_ref(self, ref: str, filename: str) -> bool:
235
+ result = _run_git(["cat-file", "-e", f"{ref}:{filename}"], cwd=self.repo_path)
236
+ return result.returncode == 0
237
+
238
+ def get_file_content(self, ref: str, filename: str) -> str | None:
239
+ """
240
+ Return the content of `filename` as it existed at `ref`.
241
+
242
+ Returns None if the file did not exist at that ref (this is normal,
243
+ not an error — e.g. a dependency file added/removed between branches).
244
+
245
+ Raises:
246
+ GitError: an unexpected git failure unrelated to file absence.
247
+ """
248
+ result = _run_git(["show", f"{ref}:{filename}"], cwd=self.repo_path)
249
+
250
+ if result.returncode == 0:
251
+ return _decode_file_bytes(result.stdout_bytes)
252
+
253
+ stderr = result.stderr.strip().lower()
254
+ if "exists on disk, but not in" in stderr or "does not exist" in stderr or "fatal: path" in stderr:
255
+ logger.debug("'%s' not present at ref '%s'", filename, ref)
256
+ return None
257
+
258
+ # Any other failure is unexpected — surface as a clean GitError.
259
+ logger.debug("Unexpected git show failure for %s:%s — %s", ref, filename, result.stderr.strip())
260
+ raise GitError(f"Could not read '{filename}' at '{ref}'. The repository may be in an unexpected state.")
261
+
262
+ def fetch_files(
263
+ self,
264
+ ref: str,
265
+ filenames: tuple[str, ...] = TRACKED_FILES,
266
+ auto_fetch: bool = False,
267
+ ) -> dict[str, str | None]:
268
+ """
269
+ Resolve `ref` and return a mapping of filename -> content (or None
270
+ if absent at that ref) for every file diffenv tracks.
271
+
272
+ This is the single method the parser layer (Phase 3) depends on.
273
+ """
274
+ resolved_ref = self.resolve_ref(ref, auto_fetch=auto_fetch)
275
+ return {
276
+ filename: self.get_file_content(resolved_ref, filename)
277
+ for filename in filenames
278
+ }
@@ -0,0 +1,23 @@
1
+ """Logging setup for diffenv. Debug output goes to stderr; never pollutes stdout."""
2
+
3
+ import logging
4
+ import sys
5
+
6
+ _LOG_FORMAT = "%(levelname)s [diffenv.%(module)s] %(message)s"
7
+
8
+
9
+ def configure(verbose: bool = False) -> None:
10
+ """Call once at CLI entry-point to configure root logger."""
11
+ level = logging.DEBUG if verbose else logging.WARNING
12
+ handler = logging.StreamHandler(sys.stderr)
13
+ handler.setFormatter(logging.Formatter(_LOG_FORMAT))
14
+ root = logging.getLogger("diffenv")
15
+ root.setLevel(level)
16
+ root.handlers.clear()
17
+ root.addHandler(handler)
18
+ root.propagate = False
19
+
20
+
21
+ def get_logger(name: str) -> logging.Logger:
22
+ """Return a child logger namespaced under 'diffenv'."""
23
+ return logging.getLogger(f"diffenv.{name}")
diffenv/models.py ADDED
@@ -0,0 +1,107 @@
1
+ """
2
+ Shared data models used across all layers (git, parser, diff, formatter).
3
+ Using dataclasses for lightweight, type-checked value objects.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass, field
9
+
10
+
11
+ # ---------------------------------------------------------------------------
12
+ # Parsed data models
13
+ # ---------------------------------------------------------------------------
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class Dependency:
18
+ """A single dependency entry (name + optional pinned version)."""
19
+
20
+ name: str
21
+ version: str | None = None # None means unpinned / not present
22
+
23
+ def __str__(self) -> str:
24
+ if self.version:
25
+ return f"{self.name}=={self.version}"
26
+ return self.name
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class EnvVar:
31
+ """An environment variable key declared in .env.example."""
32
+
33
+ key: str
34
+
35
+
36
+ @dataclass
37
+ class ParsedSnapshot:
38
+ """Everything parsed from a single git ref."""
39
+
40
+ ref: str
41
+ dependencies: list[Dependency] = field(default_factory=list)
42
+ env_vars: list[EnvVar] = field(default_factory=list)
43
+ python_version: str | None = None
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Diff result models
48
+ # ---------------------------------------------------------------------------
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class DepChange:
53
+ """A single dependency change between two refs."""
54
+
55
+ name: str
56
+ old_present: bool # was this dependency present at all in the old ref
57
+ new_present: bool # is this dependency present at all in the new ref
58
+ old_version: str | None # pinned version in old ref, or None if absent/unpinned
59
+ new_version: str | None # pinned version in new ref, or None if absent/unpinned
60
+
61
+ @property
62
+ def is_added(self) -> bool:
63
+ return not self.old_present and self.new_present
64
+
65
+ @property
66
+ def is_removed(self) -> bool:
67
+ return self.old_present and not self.new_present
68
+
69
+ @property
70
+ def is_upgraded(self) -> bool:
71
+ return (
72
+ self.old_present
73
+ and self.new_present
74
+ and self.old_version != self.new_version
75
+ )
76
+
77
+
78
+ @dataclass(frozen=True)
79
+ class EnvVarChange:
80
+ """A single env-var change between two refs."""
81
+
82
+ key: str
83
+ is_added: bool # True = appeared in new ref, False = removed from new ref
84
+
85
+
86
+ @dataclass
87
+ class DiffResult:
88
+ """Complete diff between two refs, ready for formatting."""
89
+
90
+ ref_old: str
91
+ ref_new: str
92
+ dep_changes: list[DepChange] = field(default_factory=list)
93
+ env_changes: list[EnvVarChange] = field(default_factory=list)
94
+ python_old: str | None = None
95
+ python_new: str | None = None
96
+
97
+ @property
98
+ def has_python_change(self) -> bool:
99
+ return self.python_old != self.python_new
100
+
101
+ @property
102
+ def is_empty(self) -> bool:
103
+ return (
104
+ not self.dep_changes
105
+ and not self.env_changes
106
+ and not self.has_python_change
107
+ )
@@ -0,0 +1,257 @@
1
+ """
2
+ Parser layer — turns raw file content (as strings) into the shared data
3
+ models from `models.py`. Each file type has its own parser function; all
4
+ parsers share the same signature so new file types can be added without
5
+ touching existing code.
6
+
7
+ Design: a registry (dict) maps filename -> parser function. `parse_snapshot`
8
+ is the only function the rest of the app calls; it doesn't know or care how
9
+ many parsers exist or what file types they handle.
10
+
11
+ To add a new file type later:
12
+ 1. Write a function: def parse_xxx(content: str) -> ...
13
+ 2. Register it in _DEP_PARSERS, _ENV_PARSERS, or _PYTHON_VERSION_PARSERS
14
+ depending on what kind of data it contributes.
15
+ That's the entire integration surface.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ from typing import Callable
22
+
23
+ from diffenv.exceptions import ParseError
24
+ from diffenv.logging_config import get_logger
25
+ from diffenv.models import Dependency, EnvVar, ParsedSnapshot
26
+
27
+ logger = get_logger("parser_layer")
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # requirements.txt
31
+ # ---------------------------------------------------------------------------
32
+
33
+ # Matches: name, optional extras [foo,bar], optional version spec (==, >=, etc.)
34
+ _REQ_LINE_RE = re.compile(
35
+ r"^\s*([A-Za-z0-9][A-Za-z0-9._-]*)" # package name
36
+ r"(?:\[[^\]]*\])?" # optional extras, ignored
37
+ r"\s*(==\s*([^\s;#]+))?" # optional ==version (captured)
38
+ r"(?:[<>=!~].*)?" # any other version specifier, ignored for version capture
39
+ )
40
+
41
+
42
+ def parse_requirements_txt(content: str) -> list[Dependency]:
43
+ """
44
+ Parse a requirements.txt file into a list of Dependency objects.
45
+
46
+ Only `==` pins are treated as a known version; ranges like `>=1.0` or
47
+ unpinned entries are recorded with version=None, since diffenv reports
48
+ exact version changes, not range changes.
49
+
50
+ Lines that are comments, blank, -e/-r includes, or otherwise not a
51
+ simple requirement are skipped (logged at debug level, not an error —
52
+ requirements.txt has no strict spec, so being permissive here matters).
53
+ """
54
+ dependencies: list[Dependency] = []
55
+
56
+ for lineno, raw_line in enumerate(content.splitlines(), start=1):
57
+ line = raw_line.strip()
58
+
59
+ if not line or line.startswith("#"):
60
+ continue
61
+ if line.startswith(("-e ", "-r ", "--", "-c ", "-f ", "-i ")):
62
+ logger.debug("requirements.txt line %d: skipping directive: %s", lineno, line)
63
+ continue
64
+
65
+ # Strip inline comments
66
+ line = line.split(" #", 1)[0].strip()
67
+ if not line:
68
+ continue
69
+
70
+ match = _REQ_LINE_RE.match(line)
71
+ if not match:
72
+ logger.debug("requirements.txt line %d: could not parse, skipping: %s", lineno, line)
73
+ continue
74
+
75
+ name = match.group(1)
76
+ version = match.group(3) # None if not an == pin
77
+ dependencies.append(Dependency(name=name, version=version))
78
+
79
+ return dependencies
80
+
81
+
82
+ # ---------------------------------------------------------------------------
83
+ # pyproject.toml
84
+ # ---------------------------------------------------------------------------
85
+
86
+
87
+ def _load_toml(content: str, filename: str) -> dict:
88
+ try:
89
+ import tomllib # Python 3.11+
90
+ except ModuleNotFoundError:
91
+ import tomli as tomllib # type: ignore[no-redef]
92
+
93
+ try:
94
+ return tomllib.loads(content)
95
+ except Exception as exc:
96
+ raise ParseError(filename, f"invalid TOML syntax ({exc})") from exc
97
+
98
+
99
+ # Matches a PEP 508 dependency string: name[extras]specifier
100
+ _PEP508_RE = re.compile(
101
+ r"^\s*([A-Za-z0-9][A-Za-z0-9._-]*)"
102
+ r"(?:\[[^\]]*\])?"
103
+ r"\s*(==\s*([^\s;,]+))?"
104
+ )
105
+
106
+
107
+ def _parse_pep508_list(entries: list) -> list[Dependency]:
108
+ dependencies: list[Dependency] = []
109
+ for entry in entries:
110
+ if not isinstance(entry, str):
111
+ continue
112
+ match = _PEP508_RE.match(entry)
113
+ if not match:
114
+ logger.debug("pyproject.toml: could not parse dependency entry: %s", entry)
115
+ continue
116
+ dependencies.append(Dependency(name=match.group(1), version=match.group(3)))
117
+ return dependencies
118
+
119
+
120
+ def parse_pyproject_toml(content: str) -> tuple[list[Dependency], str | None]:
121
+ """
122
+ Parse pyproject.toml for:
123
+ - [project.dependencies] (PEP 621 standard)
124
+ - [project.requires-python] -> used as a fallback Python version hint
125
+
126
+ Returns:
127
+ (dependencies, required_python_spec)
128
+
129
+ Notes:
130
+ - Poetry-style [tool.poetry.dependencies] is intentionally NOT parsed
131
+ in this phase to keep scope tight; requirements.txt and PEP 621
132
+ cover the common case. This is a natural extension point later.
133
+ - requires-python is a specifier (e.g. ">=3.10"), not an exact
134
+ version, so it's kept separate from .python-version's exact pin
135
+ and surfaced only when .python-version is absent.
136
+ """
137
+ data = _load_toml(content, "pyproject.toml")
138
+
139
+ project = data.get("project", {})
140
+ if not isinstance(project, dict):
141
+ raise ParseError("pyproject.toml", "'[project]' section is malformed")
142
+
143
+ raw_deps = project.get("dependencies", [])
144
+ if not isinstance(raw_deps, list):
145
+ raise ParseError("pyproject.toml", "'project.dependencies' must be a list")
146
+
147
+ dependencies = _parse_pep508_list(raw_deps)
148
+
149
+ requires_python = project.get("requires-python")
150
+ if requires_python is not None and not isinstance(requires_python, str):
151
+ requires_python = None
152
+
153
+ return dependencies, requires_python
154
+
155
+
156
+ # ---------------------------------------------------------------------------
157
+ # .env.example
158
+ # ---------------------------------------------------------------------------
159
+
160
+ _ENV_LINE_RE = re.compile(r"^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=")
161
+
162
+
163
+ def parse_env_example(content: str) -> list[EnvVar]:
164
+ """
165
+ Parse a .env.example file into a list of declared env var keys.
166
+ Values are intentionally ignored — diffenv only cares whether a key
167
+ is declared, not what placeholder value it holds.
168
+ """
169
+ env_vars: list[EnvVar] = []
170
+ seen: set[str] = set()
171
+
172
+ for lineno, raw_line in enumerate(content.splitlines(), start=1):
173
+ line = raw_line.strip()
174
+ if not line or line.startswith("#"):
175
+ continue
176
+
177
+ match = _ENV_LINE_RE.match(line)
178
+ if not match:
179
+ logger.debug(".env.example line %d: not a KEY=value line, skipping: %s", lineno, line)
180
+ continue
181
+
182
+ key = match.group(1)
183
+ if key in seen:
184
+ continue
185
+ seen.add(key)
186
+ env_vars.append(EnvVar(key=key))
187
+
188
+ return env_vars
189
+
190
+
191
+ # ---------------------------------------------------------------------------
192
+ # .python-version
193
+ # ---------------------------------------------------------------------------
194
+
195
+
196
+ def parse_python_version(content: str) -> str | None:
197
+ """
198
+ Parse a .python-version file (pyenv-style — a single version string,
199
+ optionally with trailing whitespace/newline, sometimes multiple lines
200
+ where only the first is the active version).
201
+ """
202
+ first_line = content.strip().splitlines()[0].strip() if content.strip() else ""
203
+ return first_line or None
204
+
205
+
206
+ # ---------------------------------------------------------------------------
207
+ # Snapshot assembly
208
+ # ---------------------------------------------------------------------------
209
+
210
+
211
+ def parse_snapshot(files: dict[str, str | None], ref: str) -> ParsedSnapshot:
212
+ """
213
+ Parse all tracked files fetched from a single git ref into one
214
+ ParsedSnapshot. Missing files (content=None) simply contribute nothing
215
+ — that's normal, not an error.
216
+
217
+ Args:
218
+ files: mapping of filename -> content (or None if absent at ref),
219
+ as produced by GitClient.fetch_files().
220
+ ref: the ref these files were fetched from (for traceability).
221
+
222
+ Raises:
223
+ ParseError: a file was present but malformed (e.g. broken TOML).
224
+ Parse failures are NOT silently swallowed — a broken
225
+ pyproject.toml is real, actionable information for
226
+ the user, distinct from a merely-absent file.
227
+ """
228
+ snapshot = ParsedSnapshot(ref=ref)
229
+
230
+ requirements_content = files.get("requirements.txt")
231
+ if requirements_content is not None:
232
+ snapshot.dependencies.extend(parse_requirements_txt(requirements_content))
233
+
234
+ pyproject_content = files.get("pyproject.toml")
235
+ pyproject_requires_python: str | None = None
236
+ if pyproject_content is not None:
237
+ pyproject_deps, pyproject_requires_python = parse_pyproject_toml(pyproject_content)
238
+ # Merge without duplicating a dependency already declared in requirements.txt
239
+ existing_names = {dep.name.lower() for dep in snapshot.dependencies}
240
+ for dep in pyproject_deps:
241
+ if dep.name.lower() not in existing_names:
242
+ snapshot.dependencies.append(dep)
243
+ existing_names.add(dep.name.lower())
244
+
245
+ env_content = files.get(".env.example")
246
+ if env_content is not None:
247
+ snapshot.env_vars.extend(parse_env_example(env_content))
248
+
249
+ python_version_content = files.get(".python-version")
250
+ if python_version_content is not None:
251
+ snapshot.python_version = parse_python_version(python_version_content)
252
+ elif pyproject_requires_python is not None:
253
+ # Fallback: no .python-version pin, but pyproject.toml declares a
254
+ # supported range — surface it so Python runtime info isn't silently lost.
255
+ snapshot.python_version = pyproject_requires_python
256
+
257
+ return snapshot