git-analyser 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: git-analyser
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Git repository analyser — commit history, churn, contributor patterns, dispatches to analyser family
5
5
  Author-email: Michael Borck <michael.borck@curtin.edu.au>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "git-analyser"
7
- version = "0.2.1"
7
+ version = "0.2.2"
8
8
  description = "Git repository analyser — commit history, churn, contributor patterns, dispatches to analyser family"
9
9
  authors = [{name = "Michael Borck", email = "michael.borck@curtin.edu.au"}]
10
10
  readme = "README.md"
@@ -41,3 +41,7 @@ packages = ["src/git_analyser"]
41
41
  [tool.pytest.ini_options]
42
42
  testpaths = ["tests"]
43
43
  pythonpath = ["src"]
44
+ addopts = "-m 'not slow'"
45
+ markers = [
46
+ "slow: tests that load real models or make network calls — opt-in with `pytest -m slow`",
47
+ ]
@@ -0,0 +1,3 @@
1
+ from importlib.metadata import version as _v
2
+ __version__ = _v("git-analyser")
3
+ del _v
@@ -14,7 +14,7 @@ class AnalyseRequest(BaseModel):
14
14
 
15
15
  @app.get("/health")
16
16
  def health():
17
- return {"status": "ok"}
17
+ return {"status": "ok", "version": version("git-analyser")}
18
18
 
19
19
 
20
20
  @app.post("/analyse", response_model=GitAnalysisResult)
@@ -1,3 +1,5 @@
1
+ from importlib.metadata import version
2
+
1
3
  import pytest
2
4
  from fastapi.testclient import TestClient
3
5
 
@@ -9,7 +11,7 @@ client = TestClient(app)
9
11
  def test_health():
10
12
  response = client.get("/health")
11
13
  assert response.status_code == 200
12
- assert response.json() == {"status": "ok"}
14
+ assert response.json() == {"status": "ok", "version": version("git-analyser")}
13
15
 
14
16
 
15
17
  def test_analyse_valid_repo(temp_repo):
@@ -27,8 +29,12 @@ def test_analyse_zip_path_returns_400():
27
29
 
28
30
 
29
31
  def test_analyse_nonexistent_path_returns_400():
30
- response = client.post("/analyse", json={"repo": "/nonexistent/path/repo"})
32
+ response = client.post(
33
+ "/analyse", json={"repo": "/nonexistent/path/that/cannot/exist"}
34
+ )
31
35
  assert response.status_code == 400
36
+ detail = response.json()["detail"].lower()
37
+ assert "exist" in detail or "not found" in detail or "git" in detail
32
38
 
33
39
 
34
40
  def test_analyse_missing_body_returns_422():
@@ -39,9 +45,10 @@ def test_analyse_missing_body_returns_422():
39
45
  def test_analyse_returns_learning_signals(temp_repo):
40
46
  response = client.post("/analyse", json={"repo": str(temp_repo)})
41
47
  assert response.status_code == 200
42
- data = response.json()
43
- sig = data["learning_signals"]
44
- assert "commit_count" in sig
45
- assert "total_additions" in sig
46
- assert "add_delete_ratio" in sig
47
- assert "commit_regularity_cv" in sig
48
+ sig = response.json()["learning_signals"]
49
+ assert sig["commit_count"] == 2
50
+ # Only the second commit's 1 addition counts (root commit has no parent).
51
+ assert sig["total_additions"] == 1
52
+ assert sig["total_deletions"] == 0
53
+ assert sig["add_delete_ratio"] == 0.0
54
+ assert sig["generic_message_ratio"] == 0.0
@@ -0,0 +1,115 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+
5
+ from git_analyser.core import analyse_repo
6
+ from git_analyser.models import GitAnalysisResult
7
+
8
+
9
+ def test_valid_repo_returns_result(temp_repo):
10
+ result = analyse_repo(temp_repo)
11
+ assert isinstance(result, GitAnalysisResult)
12
+ assert result.error is None
13
+ assert result.commit_count >= 2
14
+
15
+
16
+ def test_valid_repo_has_authors(temp_repo):
17
+ result = analyse_repo(temp_repo)
18
+ assert len(result.authors) >= 1
19
+ assert "Test" in result.authors
20
+
21
+
22
+ def test_valid_repo_has_timeline(temp_repo):
23
+ result = analyse_repo(temp_repo)
24
+ assert len(result.timeline) >= 2
25
+ # First commit subject
26
+ subjects = [c.subject for c in result.timeline]
27
+ assert "add index.html" in subjects
28
+
29
+
30
+ def test_zip_path_returns_bundle_analyser_error():
31
+ result = analyse_repo("/some/path/repo.zip")
32
+ assert result.error is not None
33
+ assert "bundle-analyser" in result.error
34
+
35
+
36
+ def test_nonexistent_path_returns_error():
37
+ result = analyse_repo("/nonexistent/path/to/repo")
38
+ assert result.error is not None
39
+ assert result.commit_count == 0
40
+
41
+
42
+ def test_path_without_git_returns_error(tmp_path):
43
+ result = analyse_repo(tmp_path)
44
+ assert result.error is not None
45
+ assert ".git" in result.error or "not a git" in result.error
46
+
47
+
48
+ def test_learning_signals_values_are_deterministic(temp_repo):
49
+ """The 2-commit fixture has known signal values; pin them.
50
+
51
+ Note: the first (root) commit has no parent so diff-tree --numstat
52
+ reports nothing for it, hence only the second commit's 1 addition is
53
+ counted. This matches the current implementation's behaviour.
54
+ """
55
+ result = analyse_repo(temp_repo)
56
+ sig = result.learning_signals
57
+ assert sig.commit_count == 2
58
+ assert sig.total_additions == 1 # only the second commit's line is counted
59
+ assert sig.total_deletions == 0
60
+ assert sig.add_delete_ratio == 0.0
61
+ assert sig.generic_message_ratio == 0.0 # both messages are descriptive
62
+ # avg_message_length: "add index.html" (14) + "add stylesheet" (14) / 2 = 14
63
+ assert sig.avg_message_length == 14.0
64
+
65
+
66
+ def test_string_path_accepted(temp_repo):
67
+ result = analyse_repo(str(temp_repo))
68
+ assert result.error is None
69
+ assert result.commit_count >= 2
70
+
71
+
72
+ def test_remote_url_invokes_git_clone(monkeypatch):
73
+ """Remote URLs trigger git clone with the right argv (no network)."""
74
+ import subprocess as _subprocess
75
+ from pathlib import Path as _Path
76
+ from unittest.mock import MagicMock
77
+
78
+ from git_analyser import core as _core
79
+
80
+ captured_calls: list[list[str]] = []
81
+ real_run = _subprocess.run
82
+
83
+ def fake_run(cmd, *args, **kwargs):
84
+ captured_calls.append(list(cmd))
85
+ if isinstance(cmd, list) and len(cmd) >= 2 and cmd[0] == "git" and cmd[1] == "clone":
86
+ target = cmd[-1]
87
+ target_path = _Path(target)
88
+ target_path.mkdir(parents=True, exist_ok=True)
89
+ (target_path / ".git").mkdir(exist_ok=True)
90
+ mock = MagicMock()
91
+ mock.returncode = 0
92
+ mock.stdout = ""
93
+ mock.stderr = ""
94
+ return mock
95
+ # All other git invocations (log, diff-tree...) — return empty output
96
+ mock = MagicMock()
97
+ mock.returncode = 0
98
+ mock.stdout = ""
99
+ mock.stderr = ""
100
+ return mock
101
+
102
+ # Patch the symbol used inside core.py
103
+ monkeypatch.setattr(_core.subprocess, "run", fake_run)
104
+
105
+ result = analyse_repo("https://github.com/example/repo.git")
106
+
107
+ clone_calls = [
108
+ c for c in captured_calls
109
+ if len(c) >= 2 and c[0] == "git" and c[1] == "clone"
110
+ ]
111
+ assert len(clone_calls) == 1
112
+ assert "https://github.com/example/repo.git" in clone_calls[0]
113
+ # Sanity: result is a GitAnalysisResult, no clone error surfaced
114
+ assert isinstance(result, GitAnalysisResult)
115
+ assert result.error is None or "clone" not in result.error.lower()
@@ -0,0 +1,46 @@
1
+ """Invariant tests — fast, run by default."""
2
+
3
+ from importlib.metadata import version
4
+
5
+ import pytest
6
+
7
+
8
+ def test_package_imports_cleanly() -> None:
9
+ """Smoke alarm — package must import without errors."""
10
+ import git_analyser # noqa: F401
11
+ from git_analyser.cli import main # noqa: F401
12
+ from git_analyser.api import app # noqa: F401
13
+
14
+
15
+ def test_health_version_matches_installed_package() -> None:
16
+ """/health must report the actual installed package version."""
17
+ from fastapi.testclient import TestClient
18
+
19
+ from git_analyser.api import app
20
+
21
+ client = TestClient(app)
22
+ response = client.get("/health")
23
+ assert response.status_code == 200
24
+ assert response.json()["version"] == version("git-analyser")
25
+
26
+
27
+ def test_app_title_matches_installed_package() -> None:
28
+ """FastAPI app.version must match the installed package."""
29
+ from git_analyser.api import app
30
+
31
+ assert app.version == version("git-analyser")
32
+
33
+
34
+ def test_non_git_directory_returns_loud_error(tmp_path) -> None:
35
+ """A directory without .git must error explicitly, not silently zero-fill.
36
+
37
+ Family pattern: failures are loud, not silent.
38
+ """
39
+ from git_analyser.core import analyse_repo
40
+
41
+ result = analyse_repo(tmp_path)
42
+ assert result.error is not None
43
+ # Must NOT have populated signals — silent zero-fill is a real risk
44
+ assert result.commit_count == 0
45
+ assert result.suspicious_flags == []
46
+ assert result.learning_signals.commit_count == 0
@@ -0,0 +1,189 @@
1
+ """POS/NEG matrix for the four suspicious-pattern rules in core.py.
2
+
3
+ Each of the four rules (bulk upload, single-session 24h dump, massive
4
+ commit, multi-author) gets one positive and one negative test using
5
+ deterministic fixtures with forged commit dates and author emails.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import subprocess
11
+ from datetime import datetime, timedelta, timezone
12
+ from pathlib import Path
13
+
14
+ import pytest
15
+
16
+ from git_analyser.core import analyse_repo
17
+
18
+
19
+ def _git_init(repo: Path) -> None:
20
+ subprocess.run(["git", "init", str(repo)], check=True, capture_output=True)
21
+ subprocess.run(
22
+ ["git", "config", "user.email", "test@test.com"],
23
+ cwd=repo,
24
+ check=True,
25
+ capture_output=True,
26
+ )
27
+ subprocess.run(
28
+ ["git", "config", "user.name", "Test"],
29
+ cwd=repo,
30
+ check=True,
31
+ capture_output=True,
32
+ )
33
+
34
+
35
+ def _commit(
36
+ repo: Path,
37
+ message: str,
38
+ when: datetime,
39
+ author_name: str = "Test",
40
+ author_email: str = "test@test.com",
41
+ ) -> None:
42
+ """Create a commit with forged author/committer dates and identity."""
43
+ iso = when.isoformat()
44
+ env = {
45
+ **os.environ,
46
+ "GIT_AUTHOR_DATE": iso,
47
+ "GIT_COMMITTER_DATE": iso,
48
+ "GIT_AUTHOR_NAME": author_name,
49
+ "GIT_AUTHOR_EMAIL": author_email,
50
+ "GIT_COMMITTER_NAME": author_name,
51
+ "GIT_COMMITTER_EMAIL": author_email,
52
+ }
53
+ subprocess.run(
54
+ ["git", "add", "."], cwd=repo, check=True, capture_output=True, env=env
55
+ )
56
+ subprocess.run(
57
+ ["git", "commit", "-m", message],
58
+ cwd=repo,
59
+ check=True,
60
+ capture_output=True,
61
+ env=env,
62
+ )
63
+
64
+
65
+ def _make_repo(tmp_path: Path, num_commits: int, span_hours: float) -> Path:
66
+ """Init repo, make N commits spread evenly over span_hours."""
67
+ repo = tmp_path / "repo"
68
+ repo.mkdir()
69
+ _git_init(repo)
70
+
71
+ start = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
72
+ step = timedelta(hours=span_hours / max(num_commits - 1, 1))
73
+
74
+ for i in range(num_commits):
75
+ (repo / f"file_{i}.txt").write_text(f"content {i}\n")
76
+ when = start + step * i
77
+ _commit(repo, f"add file_{i}", when)
78
+
79
+ return repo
80
+
81
+
82
+ def _make_repo_with_huge_commit(tmp_path: Path, lines: int) -> Path:
83
+ """Init repo, make a baseline commit, then a huge commit with N lines added."""
84
+ repo = tmp_path / "repo"
85
+ repo.mkdir()
86
+ _git_init(repo)
87
+
88
+ start = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
89
+
90
+ # Baseline commit (the root commit's diff doesn't show in numstat)
91
+ (repo / "seed.txt").write_text("seed\n")
92
+ _commit(repo, "seed file", start)
93
+
94
+ # Huge commit on top
95
+ huge = "\n".join(f"line {i}" for i in range(lines)) + "\n"
96
+ (repo / "huge.txt").write_text(huge)
97
+ _commit(repo, "add huge file", start + timedelta(hours=1))
98
+
99
+ return repo
100
+
101
+
102
+ def _make_repo_with_multiple_authors(tmp_path: Path, num_authors: int) -> Path:
103
+ """Init repo with one commit per author (distinct emails)."""
104
+ repo = tmp_path / "repo"
105
+ repo.mkdir()
106
+ _git_init(repo)
107
+
108
+ start = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
109
+
110
+ for i in range(num_authors):
111
+ (repo / f"file_{i}.txt").write_text(f"content {i}\n")
112
+ when = start + timedelta(hours=i * 10)
113
+ _commit(
114
+ repo,
115
+ f"add file_{i}",
116
+ when,
117
+ author_name=f"Author{i}",
118
+ author_email=f"author{i}@example.com",
119
+ )
120
+
121
+ return repo
122
+
123
+
124
+ # ---- bulk-upload rule (`<= 2 commits`) -------------------------------------
125
+
126
+
127
+ def test_no_bulk_upload_flag_for_more_than_two_commits(tmp_path):
128
+ """3+ commits should NOT trigger the bulk-upload flag."""
129
+ repo = _make_repo(tmp_path, num_commits=4, span_hours=72)
130
+ result = analyse_repo(repo)
131
+ assert not any("bulk upload" in flag.lower() for flag in result.suspicious_flags)
132
+
133
+
134
+ # ---- 24-hour-dump rule -----------------------------------------------------
135
+
136
+
137
+ def test_no_24h_dump_flag_when_span_exceeds_day(tmp_path):
138
+ """Commits spanning >24h should NOT trigger the single-session-dump flag."""
139
+ repo = _make_repo(tmp_path, num_commits=4, span_hours=72)
140
+ result = analyse_repo(repo)
141
+ assert not any(
142
+ "single session" in flag.lower() for flag in result.suspicious_flags
143
+ )
144
+
145
+
146
+ # ---- massive-commit rule (>500 additions) ----------------------------------
147
+
148
+
149
+ def test_massive_commit_flagged(tmp_path):
150
+ """A commit with >500 additions triggers the very-large-commit flag."""
151
+ repo = _make_repo_with_huge_commit(tmp_path, lines=600)
152
+ result = analyse_repo(repo)
153
+ assert any(
154
+ "addition" in flag.lower() or "large" in flag.lower()
155
+ for flag in result.suspicious_flags
156
+ )
157
+
158
+
159
+ def test_no_massive_commit_flag_for_small_commits(tmp_path):
160
+ """Small commits don't trigger the very-large-commit flag."""
161
+ repo = _make_repo(tmp_path, num_commits=3, span_hours=48)
162
+ result = analyse_repo(repo)
163
+ assert not any(
164
+ "addition" in flag.lower() and "large" in flag.lower()
165
+ for flag in result.suspicious_flags
166
+ )
167
+
168
+
169
+ # ---- multi-author rule (`> 2` distinct emails) -----------------------------
170
+
171
+
172
+ def test_multi_author_flagged(tmp_path):
173
+ """3+ distinct author emails triggers the multi-author flag."""
174
+ repo = _make_repo_with_multiple_authors(tmp_path, num_authors=3)
175
+ result = analyse_repo(repo)
176
+ assert any(
177
+ "author" in flag.lower() and "multiple" in flag.lower()
178
+ for flag in result.suspicious_flags
179
+ )
180
+
181
+
182
+ def test_no_multi_author_flag_for_single_author(tmp_path):
183
+ """Single author should NOT trigger the multi-author flag."""
184
+ repo = _make_repo(tmp_path, num_commits=4, span_hours=48)
185
+ result = analyse_repo(repo)
186
+ assert not any(
187
+ "author" in flag.lower() and "multiple" in flag.lower()
188
+ for flag in result.suspicious_flags
189
+ )
@@ -1 +0,0 @@
1
- __version__ = "0.2.0"
@@ -1,73 +0,0 @@
1
- from pathlib import Path
2
-
3
- import pytest
4
-
5
- from git_analyser.core import analyse_repo
6
- from git_analyser.models import GitAnalysisResult
7
-
8
-
9
- def test_valid_repo_returns_result(temp_repo):
10
- result = analyse_repo(temp_repo)
11
- assert isinstance(result, GitAnalysisResult)
12
- assert result.error is None
13
- assert result.commit_count >= 2
14
-
15
-
16
- def test_valid_repo_has_authors(temp_repo):
17
- result = analyse_repo(temp_repo)
18
- assert len(result.authors) >= 1
19
- assert "Test" in result.authors
20
-
21
-
22
- def test_valid_repo_has_timeline(temp_repo):
23
- result = analyse_repo(temp_repo)
24
- assert len(result.timeline) >= 2
25
- # First commit subject
26
- subjects = [c.subject for c in result.timeline]
27
- assert "add index.html" in subjects
28
-
29
-
30
- def test_zip_path_returns_bundle_analyser_error():
31
- result = analyse_repo("/some/path/repo.zip")
32
- assert result.error is not None
33
- assert "bundle-analyser" in result.error
34
-
35
-
36
- def test_nonexistent_path_returns_error():
37
- result = analyse_repo("/nonexistent/path/to/repo")
38
- assert result.error is not None
39
- assert result.commit_count == 0
40
-
41
-
42
- def test_path_without_git_returns_error(tmp_path):
43
- result = analyse_repo(tmp_path)
44
- assert result.error is not None
45
- assert ".git" in result.error or "not a git" in result.error
46
-
47
-
48
- def test_learning_signals_has_expected_fields(temp_repo):
49
- result = analyse_repo(temp_repo)
50
- sig = result.learning_signals
51
- assert hasattr(sig, "commit_count")
52
- assert hasattr(sig, "total_additions")
53
- assert hasattr(sig, "total_deletions")
54
- assert hasattr(sig, "add_delete_ratio")
55
- assert hasattr(sig, "avg_message_length")
56
- assert hasattr(sig, "generic_message_ratio")
57
- assert hasattr(sig, "time_span_hours")
58
- assert hasattr(sig, "max_gap_hours")
59
- assert hasattr(sig, "commit_regularity_cv")
60
-
61
-
62
- def test_learning_signals_values(temp_repo):
63
- result = analyse_repo(temp_repo)
64
- sig = result.learning_signals
65
- assert sig.commit_count >= 2
66
- assert sig.total_additions >= 0
67
- assert sig.avg_message_length > 0
68
-
69
-
70
- def test_string_path_accepted(temp_repo):
71
- result = analyse_repo(str(temp_repo))
72
- assert result.error is None
73
- assert result.commit_count >= 2
File without changes
File without changes
File without changes
File without changes