gitinspect 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffenv/__init__.py +11 -0
- diffenv/api.py +72 -0
- diffenv/cli.py +155 -0
- diffenv/diff_layer.py +102 -0
- diffenv/exceptions.py +29 -0
- diffenv/formatter_layer.py +198 -0
- diffenv/git_layer.py +278 -0
- diffenv/logging_config.py +23 -0
- diffenv/models.py +107 -0
- diffenv/parser_layer.py +257 -0
- gitinspect-0.1.0.dist-info/METADATA +233 -0
- gitinspect-0.1.0.dist-info/RECORD +15 -0
- gitinspect-0.1.0.dist-info/WHEEL +5 -0
- gitinspect-0.1.0.dist-info/entry_points.txt +2 -0
- gitinspect-0.1.0.dist-info/top_level.txt +1 -0
diffenv/git_layer.py
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Git layer — resolves refs and fetches file content at a given ref, without
|
|
3
|
+
ever checking out the working tree. Uses `git show <ref>:<path>` so the
|
|
4
|
+
user's working directory and index are never touched.
|
|
5
|
+
|
|
6
|
+
Responsibilities:
|
|
7
|
+
- Validate we're inside a git repository.
|
|
8
|
+
- Resolve a ref (branch / tag / commit) to a real commit, fetching from
|
|
9
|
+
the remote if it's missing locally (interactively, unless auto_fetch).
|
|
10
|
+
- Fetch the content of specific tracked files at that ref.
|
|
11
|
+
- Never leak raw git/subprocess errors to the caller — everything is
|
|
12
|
+
translated into diffenv's own exception types with clear messages.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import subprocess
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
from diffenv.exceptions import GitError, NotAGitRepoError, RefNotFoundError
|
|
21
|
+
from diffenv.logging_config import get_logger
|
|
22
|
+
|
|
23
|
+
logger = get_logger("git_layer")
|
|
24
|
+
|
|
25
|
+
# Files diffenv cares about. Parser layer (Phase 3) consumes these by name.
|
|
26
|
+
TRACKED_FILES: tuple[str, ...] = (
|
|
27
|
+
"requirements.txt",
|
|
28
|
+
"pyproject.toml",
|
|
29
|
+
".env.example",
|
|
30
|
+
".python-version",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class _GitResult:
|
|
35
|
+
"""
|
|
36
|
+
Minimal result object returned by _run_git.
|
|
37
|
+
stdout is kept as raw bytes so callers can decode with BOM detection.
|
|
38
|
+
stderr is pre-decoded as UTF-8 (git's own messages are always UTF-8).
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
__slots__ = ("returncode", "stdout_bytes", "stderr")
|
|
42
|
+
|
|
43
|
+
def __init__(self, returncode: int, stdout_bytes: bytes, stderr: str) -> None:
|
|
44
|
+
self.returncode = returncode
|
|
45
|
+
self.stdout_bytes = stdout_bytes
|
|
46
|
+
self.stderr = stderr
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _decode_file_bytes(raw: bytes) -> str:
|
|
50
|
+
"""
|
|
51
|
+
Decode raw file bytes from git show, handling:
|
|
52
|
+
- UTF-16 LE with BOM (\\xff\\xfe) — PowerShell echo, old Notepad
|
|
53
|
+
- UTF-16 BE with BOM (\\xfe\\xff) — rare but possible
|
|
54
|
+
- UTF-8 with BOM (\\xef\\xbb\\xbf) — some Windows editors
|
|
55
|
+
- Plain UTF-8 (no BOM) — standard on Linux/Mac, Git for Windows default
|
|
56
|
+
"""
|
|
57
|
+
if raw[:2] == b"\xff\xfe":
|
|
58
|
+
return raw.decode("utf-16-le", errors="replace").lstrip("\ufeff")
|
|
59
|
+
if raw[:2] == b"\xfe\xff":
|
|
60
|
+
return raw.decode("utf-16-be", errors="replace").lstrip("\ufeff")
|
|
61
|
+
if raw[:3] == b"\xef\xbb\xbf":
|
|
62
|
+
return raw[3:].decode("utf-8", errors="replace")
|
|
63
|
+
return raw.decode("utf-8", errors="replace")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _run_git(args: list[str], cwd: str) -> subprocess.CompletedProcess:
|
|
67
|
+
"""
|
|
68
|
+
Run a git subcommand and return the CompletedProcess.
|
|
69
|
+
Never raises on non-zero exit — caller inspects returncode.
|
|
70
|
+
Raises GitError only if git itself cannot be invoked (not installed).
|
|
71
|
+
"""
|
|
72
|
+
try:
|
|
73
|
+
# Use text=False (binary mode) so we get raw bytes back regardless
|
|
74
|
+
# of the platform's default encoding. We decode stdout ourselves in
|
|
75
|
+
# get_file_content() to handle UTF-16 files created by Windows tools.
|
|
76
|
+
# stderr is always plain ASCII/UTF-8 from git itself, safe to decode.
|
|
77
|
+
result = subprocess.run(
|
|
78
|
+
["git", *args],
|
|
79
|
+
cwd=cwd,
|
|
80
|
+
capture_output=True,
|
|
81
|
+
text=False,
|
|
82
|
+
timeout=30,
|
|
83
|
+
)
|
|
84
|
+
# Decode stderr as UTF-8 (git's own messages are always UTF-8).
|
|
85
|
+
result_text_stderr = result.stderr.decode("utf-8", errors="replace")
|
|
86
|
+
# Return a namespace-like object that callers can treat the same way.
|
|
87
|
+
# stdout is kept as bytes; get_file_content decodes it with BOM detection.
|
|
88
|
+
return _GitResult(
|
|
89
|
+
returncode=result.returncode,
|
|
90
|
+
stdout_bytes=result.stdout,
|
|
91
|
+
stderr=result_text_stderr,
|
|
92
|
+
)
|
|
93
|
+
except FileNotFoundError as exc:
|
|
94
|
+
raise GitError(
|
|
95
|
+
"Git is not installed or not available on PATH. "
|
|
96
|
+
"Please install git to use diffenv."
|
|
97
|
+
) from exc
|
|
98
|
+
except subprocess.TimeoutExpired as exc:
|
|
99
|
+
raise GitError(
|
|
100
|
+
"Git command timed out. This can happen with a slow or "
|
|
101
|
+
"unreachable remote — check your network connection."
|
|
102
|
+
) from exc
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class GitClient:
|
|
106
|
+
"""
|
|
107
|
+
Thin, safe wrapper around git for diffenv's needs.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
repo_path: Path to the repository root (defaults to CWD).
|
|
111
|
+
confirm_callback: Called with the missing ref name when a ref isn't
|
|
112
|
+
found locally and a remote fetch could resolve
|
|
113
|
+
it. Should return True to proceed with fetching,
|
|
114
|
+
False to decline. Defaults to an interactive
|
|
115
|
+
y/n console prompt. SDK callers can pass a
|
|
116
|
+
no-op (lambda r: False) or set auto_fetch=True
|
|
117
|
+
on individual calls to skip prompting entirely.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(
|
|
121
|
+
self,
|
|
122
|
+
repo_path: str = ".",
|
|
123
|
+
confirm_callback=None,
|
|
124
|
+
) -> None:
|
|
125
|
+
self.repo_path = str(Path(repo_path).resolve())
|
|
126
|
+
self._confirm_callback = confirm_callback or self._default_confirm
|
|
127
|
+
self._verify_repo()
|
|
128
|
+
|
|
129
|
+
# ------------------------------------------------------------------
|
|
130
|
+
# Setup / validation
|
|
131
|
+
# ------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
def _verify_repo(self) -> None:
|
|
134
|
+
result = _run_git(["rev-parse", "--is-inside-work-tree"], cwd=self.repo_path)
|
|
135
|
+
if result.returncode != 0:
|
|
136
|
+
logger.debug("rev-parse failed: %s", result.stderr.strip())
|
|
137
|
+
raise NotAGitRepoError(
|
|
138
|
+
f"'{self.repo_path}' is not a git repository "
|
|
139
|
+
"(or none of the parent directories are)."
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def _default_confirm(ref: str) -> bool:
|
|
144
|
+
"""Interactive y/n prompt used by the CLI. SDK users should override this."""
|
|
145
|
+
try:
|
|
146
|
+
answer = input(
|
|
147
|
+
f"Branch/ref '{ref}' was not found locally. "
|
|
148
|
+
f"Attempt to fetch it from the remote? [y/N]: "
|
|
149
|
+
).strip().lower()
|
|
150
|
+
except (EOFError, KeyboardInterrupt):
|
|
151
|
+
return False
|
|
152
|
+
return answer in ("y", "yes")
|
|
153
|
+
|
|
154
|
+
# ------------------------------------------------------------------
|
|
155
|
+
# Ref resolution
|
|
156
|
+
# ------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
def _ref_exists_locally(self, ref: str) -> bool:
|
|
159
|
+
result = _run_git(["rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"], cwd=self.repo_path)
|
|
160
|
+
return result.returncode == 0
|
|
161
|
+
|
|
162
|
+
def _remote_ref_exists(self, ref: str) -> bool:
|
|
163
|
+
"""Check (without fetching) whether `ref` exists on any configured remote."""
|
|
164
|
+
result = _run_git(["ls-remote", "--exit-code", "--heads", "--tags", "origin", ref], cwd=self.repo_path)
|
|
165
|
+
return result.returncode == 0 and bool(result.stdout_bytes.strip())
|
|
166
|
+
|
|
167
|
+
def _fetch_ref(self, ref: str) -> bool:
|
|
168
|
+
"""
|
|
169
|
+
Fetch `ref` from origin into FETCH_HEAD. A plain `git fetch origin <ref>`
|
|
170
|
+
does NOT create or update a local branch — it only populates FETCH_HEAD —
|
|
171
|
+
so resolution after fetching must use `origin/<ref>` or FETCH_HEAD,
|
|
172
|
+
never the bare branch name.
|
|
173
|
+
"""
|
|
174
|
+
logger.debug("Fetching ref '%s' from origin", ref)
|
|
175
|
+
result = _run_git(["fetch", "origin", ref], cwd=self.repo_path)
|
|
176
|
+
if result.returncode != 0:
|
|
177
|
+
logger.debug("git fetch failed: %s", result.stderr.strip())
|
|
178
|
+
return False
|
|
179
|
+
return True
|
|
180
|
+
|
|
181
|
+
def resolve_ref(self, ref: str, auto_fetch: bool = False) -> str:
|
|
182
|
+
"""
|
|
183
|
+
Ensure `ref` is resolvable, fetching from the remote if needed.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
ref: Branch name, tag, or commit SHA.
|
|
187
|
+
auto_fetch: If True, fetch automatically without prompting
|
|
188
|
+
(used by the SDK / non-interactive callers).
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
A ref string guaranteed resolvable via `git show`. This may
|
|
192
|
+
differ from the input `ref` (e.g. resolved to `origin/<ref>`
|
|
193
|
+
after a remote fetch) — callers must use the RETURNED value,
|
|
194
|
+
not the original argument, for all subsequent git operations.
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
RefNotFoundError: ref does not exist locally or on the remote,
|
|
198
|
+
or the user declined to fetch it.
|
|
199
|
+
"""
|
|
200
|
+
if self._ref_exists_locally(ref):
|
|
201
|
+
return ref
|
|
202
|
+
|
|
203
|
+
logger.debug("Ref '%s' not found locally", ref)
|
|
204
|
+
|
|
205
|
+
if not auto_fetch:
|
|
206
|
+
should_fetch = self._confirm_callback(ref)
|
|
207
|
+
if not should_fetch:
|
|
208
|
+
raise RefNotFoundError(ref)
|
|
209
|
+
else:
|
|
210
|
+
logger.debug("auto_fetch enabled, skipping prompt for '%s'", ref)
|
|
211
|
+
|
|
212
|
+
if not self._remote_ref_exists(ref):
|
|
213
|
+
raise RefNotFoundError(ref)
|
|
214
|
+
|
|
215
|
+
if not self._fetch_ref(ref):
|
|
216
|
+
raise RefNotFoundError(ref)
|
|
217
|
+
|
|
218
|
+
# After `git fetch origin <ref>`, the content is reachable via
|
|
219
|
+
# FETCH_HEAD. We resolve through `origin/<ref>` when the remote
|
|
220
|
+
# tracking branch was updated, falling back to FETCH_HEAD directly.
|
|
221
|
+
remote_tracking_ref = f"origin/{ref}"
|
|
222
|
+
if self._ref_exists_locally(remote_tracking_ref):
|
|
223
|
+
return remote_tracking_ref
|
|
224
|
+
|
|
225
|
+
if self._ref_exists_locally("FETCH_HEAD"):
|
|
226
|
+
return "FETCH_HEAD"
|
|
227
|
+
|
|
228
|
+
raise RefNotFoundError(ref)
|
|
229
|
+
|
|
230
|
+
# ------------------------------------------------------------------
|
|
231
|
+
# File content fetching
|
|
232
|
+
# ------------------------------------------------------------------
|
|
233
|
+
|
|
234
|
+
def file_exists_at_ref(self, ref: str, filename: str) -> bool:
|
|
235
|
+
result = _run_git(["cat-file", "-e", f"{ref}:{filename}"], cwd=self.repo_path)
|
|
236
|
+
return result.returncode == 0
|
|
237
|
+
|
|
238
|
+
def get_file_content(self, ref: str, filename: str) -> str | None:
|
|
239
|
+
"""
|
|
240
|
+
Return the content of `filename` as it existed at `ref`.
|
|
241
|
+
|
|
242
|
+
Returns None if the file did not exist at that ref (this is normal,
|
|
243
|
+
not an error — e.g. a dependency file added/removed between branches).
|
|
244
|
+
|
|
245
|
+
Raises:
|
|
246
|
+
GitError: an unexpected git failure unrelated to file absence.
|
|
247
|
+
"""
|
|
248
|
+
result = _run_git(["show", f"{ref}:{filename}"], cwd=self.repo_path)
|
|
249
|
+
|
|
250
|
+
if result.returncode == 0:
|
|
251
|
+
return _decode_file_bytes(result.stdout_bytes)
|
|
252
|
+
|
|
253
|
+
stderr = result.stderr.strip().lower()
|
|
254
|
+
if "exists on disk, but not in" in stderr or "does not exist" in stderr or "fatal: path" in stderr:
|
|
255
|
+
logger.debug("'%s' not present at ref '%s'", filename, ref)
|
|
256
|
+
return None
|
|
257
|
+
|
|
258
|
+
# Any other failure is unexpected — surface as a clean GitError.
|
|
259
|
+
logger.debug("Unexpected git show failure for %s:%s — %s", ref, filename, result.stderr.strip())
|
|
260
|
+
raise GitError(f"Could not read '{filename}' at '{ref}'. The repository may be in an unexpected state.")
|
|
261
|
+
|
|
262
|
+
def fetch_files(
|
|
263
|
+
self,
|
|
264
|
+
ref: str,
|
|
265
|
+
filenames: tuple[str, ...] = TRACKED_FILES,
|
|
266
|
+
auto_fetch: bool = False,
|
|
267
|
+
) -> dict[str, str | None]:
|
|
268
|
+
"""
|
|
269
|
+
Resolve `ref` and return a mapping of filename -> content (or None
|
|
270
|
+
if absent at that ref) for every file diffenv tracks.
|
|
271
|
+
|
|
272
|
+
This is the single method the parser layer (Phase 3) depends on.
|
|
273
|
+
"""
|
|
274
|
+
resolved_ref = self.resolve_ref(ref, auto_fetch=auto_fetch)
|
|
275
|
+
return {
|
|
276
|
+
filename: self.get_file_content(resolved_ref, filename)
|
|
277
|
+
for filename in filenames
|
|
278
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Logging setup for diffenv. Debug output goes to stderr; never pollutes stdout."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
_LOG_FORMAT = "%(levelname)s [diffenv.%(module)s] %(message)s"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def configure(verbose: bool = False) -> None:
|
|
10
|
+
"""Call once at CLI entry-point to configure root logger."""
|
|
11
|
+
level = logging.DEBUG if verbose else logging.WARNING
|
|
12
|
+
handler = logging.StreamHandler(sys.stderr)
|
|
13
|
+
handler.setFormatter(logging.Formatter(_LOG_FORMAT))
|
|
14
|
+
root = logging.getLogger("diffenv")
|
|
15
|
+
root.setLevel(level)
|
|
16
|
+
root.handlers.clear()
|
|
17
|
+
root.addHandler(handler)
|
|
18
|
+
root.propagate = False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_logger(name: str) -> logging.Logger:
|
|
22
|
+
"""Return a child logger namespaced under 'diffenv'."""
|
|
23
|
+
return logging.getLogger(f"diffenv.{name}")
|
diffenv/models.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared data models used across all layers (git, parser, diff, formatter).
|
|
3
|
+
Using dataclasses for lightweight, type-checked value objects.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# ---------------------------------------------------------------------------
|
|
12
|
+
# Parsed data models
|
|
13
|
+
# ---------------------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class Dependency:
|
|
18
|
+
"""A single dependency entry (name + optional pinned version)."""
|
|
19
|
+
|
|
20
|
+
name: str
|
|
21
|
+
version: str | None = None # None means unpinned / not present
|
|
22
|
+
|
|
23
|
+
def __str__(self) -> str:
|
|
24
|
+
if self.version:
|
|
25
|
+
return f"{self.name}=={self.version}"
|
|
26
|
+
return self.name
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class EnvVar:
|
|
31
|
+
"""An environment variable key declared in .env.example."""
|
|
32
|
+
|
|
33
|
+
key: str
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class ParsedSnapshot:
|
|
38
|
+
"""Everything parsed from a single git ref."""
|
|
39
|
+
|
|
40
|
+
ref: str
|
|
41
|
+
dependencies: list[Dependency] = field(default_factory=list)
|
|
42
|
+
env_vars: list[EnvVar] = field(default_factory=list)
|
|
43
|
+
python_version: str | None = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
# Diff result models
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class DepChange:
|
|
53
|
+
"""A single dependency change between two refs."""
|
|
54
|
+
|
|
55
|
+
name: str
|
|
56
|
+
old_present: bool # was this dependency present at all in the old ref
|
|
57
|
+
new_present: bool # is this dependency present at all in the new ref
|
|
58
|
+
old_version: str | None # pinned version in old ref, or None if absent/unpinned
|
|
59
|
+
new_version: str | None # pinned version in new ref, or None if absent/unpinned
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def is_added(self) -> bool:
|
|
63
|
+
return not self.old_present and self.new_present
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def is_removed(self) -> bool:
|
|
67
|
+
return self.old_present and not self.new_present
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def is_upgraded(self) -> bool:
|
|
71
|
+
return (
|
|
72
|
+
self.old_present
|
|
73
|
+
and self.new_present
|
|
74
|
+
and self.old_version != self.new_version
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class EnvVarChange:
|
|
80
|
+
"""A single env-var change between two refs."""
|
|
81
|
+
|
|
82
|
+
key: str
|
|
83
|
+
is_added: bool # True = appeared in new ref, False = removed from new ref
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class DiffResult:
|
|
88
|
+
"""Complete diff between two refs, ready for formatting."""
|
|
89
|
+
|
|
90
|
+
ref_old: str
|
|
91
|
+
ref_new: str
|
|
92
|
+
dep_changes: list[DepChange] = field(default_factory=list)
|
|
93
|
+
env_changes: list[EnvVarChange] = field(default_factory=list)
|
|
94
|
+
python_old: str | None = None
|
|
95
|
+
python_new: str | None = None
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def has_python_change(self) -> bool:
|
|
99
|
+
return self.python_old != self.python_new
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def is_empty(self) -> bool:
|
|
103
|
+
return (
|
|
104
|
+
not self.dep_changes
|
|
105
|
+
and not self.env_changes
|
|
106
|
+
and not self.has_python_change
|
|
107
|
+
)
|
diffenv/parser_layer.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parser layer — turns raw file content (as strings) into the shared data
|
|
3
|
+
models from `models.py`. Each file type has its own parser function; all
|
|
4
|
+
parsers share the same signature so new file types can be added without
|
|
5
|
+
touching existing code.
|
|
6
|
+
|
|
7
|
+
Design: a registry (dict) maps filename -> parser function. `parse_snapshot`
|
|
8
|
+
is the only function the rest of the app calls; it doesn't know or care how
|
|
9
|
+
many parsers exist or what file types they handle.
|
|
10
|
+
|
|
11
|
+
To add a new file type later:
|
|
12
|
+
1. Write a function: def parse_xxx(content: str) -> ...
|
|
13
|
+
2. Register it in _DEP_PARSERS, _ENV_PARSERS, or _PYTHON_VERSION_PARSERS
|
|
14
|
+
depending on what kind of data it contributes.
|
|
15
|
+
That's the entire integration surface.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
from typing import Callable
|
|
22
|
+
|
|
23
|
+
from diffenv.exceptions import ParseError
|
|
24
|
+
from diffenv.logging_config import get_logger
|
|
25
|
+
from diffenv.models import Dependency, EnvVar, ParsedSnapshot
|
|
26
|
+
|
|
27
|
+
logger = get_logger("parser_layer")
|
|
28
|
+
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
# requirements.txt
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
# Matches: name, optional extras [foo,bar], optional version spec (==, >=, etc.)
|
|
34
|
+
_REQ_LINE_RE = re.compile(
|
|
35
|
+
r"^\s*([A-Za-z0-9][A-Za-z0-9._-]*)" # package name
|
|
36
|
+
r"(?:\[[^\]]*\])?" # optional extras, ignored
|
|
37
|
+
r"\s*(==\s*([^\s;#]+))?" # optional ==version (captured)
|
|
38
|
+
r"(?:[<>=!~].*)?" # any other version specifier, ignored for version capture
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def parse_requirements_txt(content: str) -> list[Dependency]:
|
|
43
|
+
"""
|
|
44
|
+
Parse a requirements.txt file into a list of Dependency objects.
|
|
45
|
+
|
|
46
|
+
Only `==` pins are treated as a known version; ranges like `>=1.0` or
|
|
47
|
+
unpinned entries are recorded with version=None, since diffenv reports
|
|
48
|
+
exact version changes, not range changes.
|
|
49
|
+
|
|
50
|
+
Lines that are comments, blank, -e/-r includes, or otherwise not a
|
|
51
|
+
simple requirement are skipped (logged at debug level, not an error —
|
|
52
|
+
requirements.txt has no strict spec, so being permissive here matters).
|
|
53
|
+
"""
|
|
54
|
+
dependencies: list[Dependency] = []
|
|
55
|
+
|
|
56
|
+
for lineno, raw_line in enumerate(content.splitlines(), start=1):
|
|
57
|
+
line = raw_line.strip()
|
|
58
|
+
|
|
59
|
+
if not line or line.startswith("#"):
|
|
60
|
+
continue
|
|
61
|
+
if line.startswith(("-e ", "-r ", "--", "-c ", "-f ", "-i ")):
|
|
62
|
+
logger.debug("requirements.txt line %d: skipping directive: %s", lineno, line)
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
# Strip inline comments
|
|
66
|
+
line = line.split(" #", 1)[0].strip()
|
|
67
|
+
if not line:
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
match = _REQ_LINE_RE.match(line)
|
|
71
|
+
if not match:
|
|
72
|
+
logger.debug("requirements.txt line %d: could not parse, skipping: %s", lineno, line)
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
name = match.group(1)
|
|
76
|
+
version = match.group(3) # None if not an == pin
|
|
77
|
+
dependencies.append(Dependency(name=name, version=version))
|
|
78
|
+
|
|
79
|
+
return dependencies
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ---------------------------------------------------------------------------
|
|
83
|
+
# pyproject.toml
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _load_toml(content: str, filename: str) -> dict:
|
|
88
|
+
try:
|
|
89
|
+
import tomllib # Python 3.11+
|
|
90
|
+
except ModuleNotFoundError:
|
|
91
|
+
import tomli as tomllib # type: ignore[no-redef]
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
return tomllib.loads(content)
|
|
95
|
+
except Exception as exc:
|
|
96
|
+
raise ParseError(filename, f"invalid TOML syntax ({exc})") from exc
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# Matches a PEP 508 dependency string: name[extras]specifier
|
|
100
|
+
_PEP508_RE = re.compile(
|
|
101
|
+
r"^\s*([A-Za-z0-9][A-Za-z0-9._-]*)"
|
|
102
|
+
r"(?:\[[^\]]*\])?"
|
|
103
|
+
r"\s*(==\s*([^\s;,]+))?"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _parse_pep508_list(entries: list) -> list[Dependency]:
|
|
108
|
+
dependencies: list[Dependency] = []
|
|
109
|
+
for entry in entries:
|
|
110
|
+
if not isinstance(entry, str):
|
|
111
|
+
continue
|
|
112
|
+
match = _PEP508_RE.match(entry)
|
|
113
|
+
if not match:
|
|
114
|
+
logger.debug("pyproject.toml: could not parse dependency entry: %s", entry)
|
|
115
|
+
continue
|
|
116
|
+
dependencies.append(Dependency(name=match.group(1), version=match.group(3)))
|
|
117
|
+
return dependencies
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def parse_pyproject_toml(content: str) -> tuple[list[Dependency], str | None]:
|
|
121
|
+
"""
|
|
122
|
+
Parse pyproject.toml for:
|
|
123
|
+
- [project.dependencies] (PEP 621 standard)
|
|
124
|
+
- [project.requires-python] -> used as a fallback Python version hint
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
(dependencies, required_python_spec)
|
|
128
|
+
|
|
129
|
+
Notes:
|
|
130
|
+
- Poetry-style [tool.poetry.dependencies] is intentionally NOT parsed
|
|
131
|
+
in this phase to keep scope tight; requirements.txt and PEP 621
|
|
132
|
+
cover the common case. This is a natural extension point later.
|
|
133
|
+
- requires-python is a specifier (e.g. ">=3.10"), not an exact
|
|
134
|
+
version, so it's kept separate from .python-version's exact pin
|
|
135
|
+
and surfaced only when .python-version is absent.
|
|
136
|
+
"""
|
|
137
|
+
data = _load_toml(content, "pyproject.toml")
|
|
138
|
+
|
|
139
|
+
project = data.get("project", {})
|
|
140
|
+
if not isinstance(project, dict):
|
|
141
|
+
raise ParseError("pyproject.toml", "'[project]' section is malformed")
|
|
142
|
+
|
|
143
|
+
raw_deps = project.get("dependencies", [])
|
|
144
|
+
if not isinstance(raw_deps, list):
|
|
145
|
+
raise ParseError("pyproject.toml", "'project.dependencies' must be a list")
|
|
146
|
+
|
|
147
|
+
dependencies = _parse_pep508_list(raw_deps)
|
|
148
|
+
|
|
149
|
+
requires_python = project.get("requires-python")
|
|
150
|
+
if requires_python is not None and not isinstance(requires_python, str):
|
|
151
|
+
requires_python = None
|
|
152
|
+
|
|
153
|
+
return dependencies, requires_python
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# ---------------------------------------------------------------------------
|
|
157
|
+
# .env.example
|
|
158
|
+
# ---------------------------------------------------------------------------
|
|
159
|
+
|
|
160
|
+
_ENV_LINE_RE = re.compile(r"^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def parse_env_example(content: str) -> list[EnvVar]:
|
|
164
|
+
"""
|
|
165
|
+
Parse a .env.example file into a list of declared env var keys.
|
|
166
|
+
Values are intentionally ignored — diffenv only cares whether a key
|
|
167
|
+
is declared, not what placeholder value it holds.
|
|
168
|
+
"""
|
|
169
|
+
env_vars: list[EnvVar] = []
|
|
170
|
+
seen: set[str] = set()
|
|
171
|
+
|
|
172
|
+
for lineno, raw_line in enumerate(content.splitlines(), start=1):
|
|
173
|
+
line = raw_line.strip()
|
|
174
|
+
if not line or line.startswith("#"):
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
match = _ENV_LINE_RE.match(line)
|
|
178
|
+
if not match:
|
|
179
|
+
logger.debug(".env.example line %d: not a KEY=value line, skipping: %s", lineno, line)
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
key = match.group(1)
|
|
183
|
+
if key in seen:
|
|
184
|
+
continue
|
|
185
|
+
seen.add(key)
|
|
186
|
+
env_vars.append(EnvVar(key=key))
|
|
187
|
+
|
|
188
|
+
return env_vars
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
# ---------------------------------------------------------------------------
|
|
192
|
+
# .python-version
|
|
193
|
+
# ---------------------------------------------------------------------------
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def parse_python_version(content: str) -> str | None:
|
|
197
|
+
"""
|
|
198
|
+
Parse a .python-version file (pyenv-style — a single version string,
|
|
199
|
+
optionally with trailing whitespace/newline, sometimes multiple lines
|
|
200
|
+
where only the first is the active version).
|
|
201
|
+
"""
|
|
202
|
+
first_line = content.strip().splitlines()[0].strip() if content.strip() else ""
|
|
203
|
+
return first_line or None
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# ---------------------------------------------------------------------------
|
|
207
|
+
# Snapshot assembly
|
|
208
|
+
# ---------------------------------------------------------------------------
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def parse_snapshot(files: dict[str, str | None], ref: str) -> ParsedSnapshot:
|
|
212
|
+
"""
|
|
213
|
+
Parse all tracked files fetched from a single git ref into one
|
|
214
|
+
ParsedSnapshot. Missing files (content=None) simply contribute nothing
|
|
215
|
+
— that's normal, not an error.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
files: mapping of filename -> content (or None if absent at ref),
|
|
219
|
+
as produced by GitClient.fetch_files().
|
|
220
|
+
ref: the ref these files were fetched from (for traceability).
|
|
221
|
+
|
|
222
|
+
Raises:
|
|
223
|
+
ParseError: a file was present but malformed (e.g. broken TOML).
|
|
224
|
+
Parse failures are NOT silently swallowed — a broken
|
|
225
|
+
pyproject.toml is real, actionable information for
|
|
226
|
+
the user, distinct from a merely-absent file.
|
|
227
|
+
"""
|
|
228
|
+
snapshot = ParsedSnapshot(ref=ref)
|
|
229
|
+
|
|
230
|
+
requirements_content = files.get("requirements.txt")
|
|
231
|
+
if requirements_content is not None:
|
|
232
|
+
snapshot.dependencies.extend(parse_requirements_txt(requirements_content))
|
|
233
|
+
|
|
234
|
+
pyproject_content = files.get("pyproject.toml")
|
|
235
|
+
pyproject_requires_python: str | None = None
|
|
236
|
+
if pyproject_content is not None:
|
|
237
|
+
pyproject_deps, pyproject_requires_python = parse_pyproject_toml(pyproject_content)
|
|
238
|
+
# Merge without duplicating a dependency already declared in requirements.txt
|
|
239
|
+
existing_names = {dep.name.lower() for dep in snapshot.dependencies}
|
|
240
|
+
for dep in pyproject_deps:
|
|
241
|
+
if dep.name.lower() not in existing_names:
|
|
242
|
+
snapshot.dependencies.append(dep)
|
|
243
|
+
existing_names.add(dep.name.lower())
|
|
244
|
+
|
|
245
|
+
env_content = files.get(".env.example")
|
|
246
|
+
if env_content is not None:
|
|
247
|
+
snapshot.env_vars.extend(parse_env_example(env_content))
|
|
248
|
+
|
|
249
|
+
python_version_content = files.get(".python-version")
|
|
250
|
+
if python_version_content is not None:
|
|
251
|
+
snapshot.python_version = parse_python_version(python_version_content)
|
|
252
|
+
elif pyproject_requires_python is not None:
|
|
253
|
+
# Fallback: no .python-version pin, but pyproject.toml declares a
|
|
254
|
+
# supported range — surface it so Python runtime info isn't silently lost.
|
|
255
|
+
snapshot.python_version = pyproject_requires_python
|
|
256
|
+
|
|
257
|
+
return snapshot
|