entropy-tracker 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. entropy_tracker-1.0.0/PKG-INFO +120 -0
  2. entropy_tracker-1.0.0/README.md +81 -0
  3. entropy_tracker-1.0.0/entropy/__init__.py +9 -0
  4. entropy_tracker-1.0.0/entropy/analyzers/__init__.py +1 -0
  5. entropy_tracker-1.0.0/entropy/analyzers/ast_analyzer.py +179 -0
  6. entropy_tracker-1.0.0/entropy/analyzers/dep_analyzer.py +525 -0
  7. entropy_tracker-1.0.0/entropy/analyzers/git_analyzer.py +371 -0
  8. entropy_tracker-1.0.0/entropy/api/__init__.py +1 -0
  9. entropy_tracker-1.0.0/entropy/api/main.py +79 -0
  10. entropy_tracker-1.0.0/entropy/api/routers/__init__.py +1 -0
  11. entropy_tracker-1.0.0/entropy/api/routers/alerts.py +65 -0
  12. entropy_tracker-1.0.0/entropy/api/routers/modules.py +129 -0
  13. entropy_tracker-1.0.0/entropy/api/routers/repos.py +232 -0
  14. entropy_tracker-1.0.0/entropy/cli.py +1202 -0
  15. entropy_tracker-1.0.0/entropy/config.py +170 -0
  16. entropy_tracker-1.0.0/entropy/scoring/__init__.py +1 -0
  17. entropy_tracker-1.0.0/entropy/scoring/alerts.py +118 -0
  18. entropy_tracker-1.0.0/entropy/scoring/forecaster.py +105 -0
  19. entropy_tracker-1.0.0/entropy/scoring/scorer.py +212 -0
  20. entropy_tracker-1.0.0/entropy/storage/__init__.py +1 -0
  21. entropy_tracker-1.0.0/entropy/storage/db.py +272 -0
  22. entropy_tracker-1.0.0/entropy/storage/models.py +123 -0
  23. entropy_tracker-1.0.0/entropy/tasks/__init__.py +1 -0
  24. entropy_tracker-1.0.0/entropy/tasks/celery_app.py +31 -0
  25. entropy_tracker-1.0.0/entropy/tasks/scheduler.py +105 -0
  26. entropy_tracker-1.0.0/entropy_tracker.egg-info/PKG-INFO +120 -0
  27. entropy_tracker-1.0.0/entropy_tracker.egg-info/SOURCES.txt +31 -0
  28. entropy_tracker-1.0.0/entropy_tracker.egg-info/dependency_links.txt +1 -0
  29. entropy_tracker-1.0.0/entropy_tracker.egg-info/entry_points.txt +2 -0
  30. entropy_tracker-1.0.0/entropy_tracker.egg-info/requires.txt +26 -0
  31. entropy_tracker-1.0.0/entropy_tracker.egg-info/top_level.txt +1 -0
  32. entropy_tracker-1.0.0/pyproject.toml +69 -0
  33. entropy_tracker-1.0.0/setup.cfg +4 -0
@@ -0,0 +1,120 @@
1
+ Metadata-Version: 2.4
2
+ Name: entropy-tracker
3
+ Version: 1.0.0
4
+ Summary: A Code Aging & Decay Tracker - measures software entropy per module using git analysis, dependency drift, churn ratios, and knowledge decay signals.
5
+ Author-email: Hari om Singh <singh.omhari715@gmail.com>
6
+ License: MIT
7
+ Keywords: entropy,code-aging,decay,technical-debt,git-analysis
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Topic :: Software Development :: Quality Assurance
14
+ Requires-Python: >=3.11
15
+ Description-Content-Type: text/markdown
16
+ Requires-Dist: pydriller>=2.6
17
+ Requires-Dist: gitpython>=3.1
18
+ Requires-Dist: typer[all]>=0.9
19
+ Requires-Dist: rich>=13.0
20
+ Requires-Dist: fastapi>=0.104
21
+ Requires-Dist: uvicorn[standard]>=0.24
22
+ Requires-Dist: sqlalchemy>=2.0
23
+ Requires-Dist: asyncpg>=0.29
24
+ Requires-Dist: psycopg2-binary>=2.9
25
+ Requires-Dist: celery[redis]>=5.3
26
+ Requires-Dist: redis>=5.0
27
+ Requires-Dist: httpx>=0.25
28
+ Requires-Dist: aiohttp>=3.8
29
+ Requires-Dist: tomli>=2.0; python_version < "3.11"
30
+ Requires-Dist: numpy>=1.26
31
+ Requires-Dist: pydantic>=2.5
32
+ Requires-Dist: pydantic-settings>=2.1
33
+ Requires-Dist: alembic>=1.13
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest>=7.4; extra == "dev"
36
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
37
+ Requires-Dist: black>=23.0; extra == "dev"
38
+ Requires-Dist: ruff>=0.1; extra == "dev"
39
+
40
+ # Entropy
41
+
42
+ The engineer who wrote `payments/gateway.py` left 18 months ago. Nobody else understands it. Entropy tells you that before production goes down.
43
+
44
+ ![Entropy Report Output](./assets/demo.png)
45
+
46
+ Software does not just accumulate bugs. It **ages**. Code written three years ago, never touched, slowly becomes dangerous — not because it broke, but because the world around it changed. The library it depends on evolved. The external API it calls shifted its contract. The engineers who wrote it have left.
47
+
48
+ This is **software entropy**. Entropy is a continuously-running analysis engine that combines git history, dependency drift, and code churn into a single decay score per module, and projects it forward in time.
49
+
50
+ ---
51
+
52
+ ## Quick Start
53
+
54
+ Get your first risk score in 3 commands:
55
+
56
+ ```bash
57
+ # Install the CLI
58
+ pip install entropy-tracker
59
+
60
+ # Register a repository and run the first scan
61
+ entropy init ./my-repo
62
+
63
+ # See your most decayed, highest-risk modules
64
+ entropy report --top 10
65
+ ```
66
+
67
+ ---
68
+
69
+ ## What Entropy Measures
70
+
71
+ Entropy combines four distinct signals into one module-level composite score (0–100):
72
+
73
+ | Signal | What it measures | Why it matters |
74
+ |--------|-----------------|----------------|
75
+ | **Knowledge Decay** | % of authors who touched this module that are still active | A module where 5 of 6 authors have gone inactive is a knowledge silo. |
76
+ | **Dependency Decay** | How far behind this module's direct dependencies are | A 12-month-old dep in a fast-moving ecosystem is riskier than a stable one. |
77
+ | **Churn-to-Touch** | Ratio of chaotic edits to intentional refactors | High churn with no refactoring = technical debt accumulating invisibly. |
78
+ | **Age Without Refactor** | Months since the last structural refactor | Old code that is never deliberately revisited drifts from team understanding. |
79
+
80
+ ### The Entropy Output
81
+
82
+ Modules are scored from 0 to 100:
83
+ - **0–50 (Healthy):** Active authors, up-to-date dependencies, regular refactoring.
84
+ - **50–70 (Medium):** Aging code. Starting to drift.
85
+ - **70–85 (High):** Risky to touch. Single points of failure emerging.
86
+ - **85–100 (Critical):** A fire hazard. Do not ship features through this without remediation.
87
+
88
+ You can inspect exactly why a file is failing:
89
+ ```bash
90
+ entropy inspect payments/gateway.py
91
+ ```
92
+
93
+ ### Advanced Forecasting & Alerts
94
+
95
+ Because Entropy stores scores over time, it computes the **decay velocity** of each module and projects forward.
96
+
97
+ ```bash
98
+ entropy forecast payments/gateway.py
99
+ # Output:
100
+ # 30 days → 90 (CRITICAL)
101
+ # 90 days → 97 (CRITICAL)
102
+ # Estimated unmaintainable: ~4 months
103
+ ```
104
+
105
+ ---
106
+
107
+ ## Architecture
108
+
109
+ Entropy runs entirely locally with zero external telemetry. The architecture includes:
110
+ - **Git Analyzer:** PyDriller + GitPython extract author decay and churn ratios.
111
+ - **Dep Analyzer:** PyPI API concurrent requests + pip-audit identify drift.
112
+ - **TimescaleDB:** Time-series storage for continuous scoring and forecasting.
113
+ - **FastAPI / Celery:** Optional background scheduler to continuously monitor repositories overnight.
114
+
115
+ ## Roadmap & v2 Features
116
+
117
+ We are actively building features to embed Entropy permanently in engineering workflows:
118
+ - **PR-level Diffing:** `entropy diff --base main` to block PRs that increase complexity on undocumented modules.
119
+ - **Simulations:** "What happens to the codebase if Engineer X leaves?"
120
+ - **Ecosystem Expansion:** Full JavaScript / TypeScript support.
@@ -0,0 +1,81 @@
1
+ # Entropy
2
+
3
+ The engineer who wrote `payments/gateway.py` left 18 months ago. Nobody else understands it. Entropy tells you that before production goes down.
4
+
5
+ ![Entropy Report Output](./assets/demo.png)
6
+
7
+ Software does not just accumulate bugs. It **ages**. Code written three years ago, never touched, slowly becomes dangerous — not because it broke, but because the world around it changed. The library it depends on evolved. The external API it calls shifted its contract. The engineers who wrote it have left.
8
+
9
+ This is **software entropy**. Entropy is a continuously-running analysis engine that combines git history, dependency drift, and code churn into a single decay score per module, and projects it forward in time.
10
+
11
+ ---
12
+
13
+ ## Quick Start
14
+
15
+ Get your first risk score in 3 commands:
16
+
17
+ ```bash
18
+ # Install the CLI
19
+ pip install entropy-tracker
20
+
21
+ # Register a repository and run the first scan
22
+ entropy init ./my-repo
23
+
24
+ # See your most decayed, highest-risk modules
25
+ entropy report --top 10
26
+ ```
27
+
28
+ ---
29
+
30
+ ## What Entropy Measures
31
+
32
+ Entropy combines four distinct signals into one module-level composite score (0–100):
33
+
34
+ | Signal | What it measures | Why it matters |
35
+ |--------|-----------------|----------------|
36
+ | **Knowledge Decay** | % of authors who touched this module that are still active | A module where 5 of 6 authors have gone inactive is a knowledge silo. |
37
+ | **Dependency Decay** | How far behind this module's direct dependencies are | A 12-month-old dep in a fast-moving ecosystem is riskier than a stable one. |
38
+ | **Churn-to-Touch** | Ratio of chaotic edits to intentional refactors | High churn with no refactoring = technical debt accumulating invisibly. |
39
+ | **Age Without Refactor** | Months since the last structural refactor | Old code that is never deliberately revisited drifts from team understanding. |
40
+
41
+ ### The Entropy Output
42
+
43
+ Modules are scored from 0 to 100:
44
+ - **0–50 (Healthy):** Active authors, up-to-date dependencies, regular refactoring.
45
+ - **50–70 (Medium):** Aging code. Starting to drift.
46
+ - **70–85 (High):** Risky to touch. Single points of failure emerging.
47
+ - **85–100 (Critical):** A fire hazard. Do not ship features through this without remediation.
48
+
49
+ You can inspect exactly why a file is failing:
50
+ ```bash
51
+ entropy inspect payments/gateway.py
52
+ ```
53
+
54
+ ### Advanced Forecasting & Alerts
55
+
56
+ Because Entropy stores scores over time, it computes the **decay velocity** of each module and projects forward.
57
+
58
+ ```bash
59
+ entropy forecast payments/gateway.py
60
+ # Output:
61
+ # 30 days → 90 (CRITICAL)
62
+ # 90 days → 97 (CRITICAL)
63
+ # Estimated unmaintainable: ~4 months
64
+ ```
65
+
66
+ ---
67
+
68
+ ## Architecture
69
+
70
+ Entropy runs entirely locally with zero external telemetry. The architecture includes:
71
+ - **Git Analyzer:** PyDriller + GitPython extract author decay and churn ratios.
72
+ - **Dep Analyzer:** PyPI API concurrent requests + pip-audit identify drift.
73
+ - **TimescaleDB:** Time-series storage for continuous scoring and forecasting.
74
+ - **FastAPI / Celery:** Optional background scheduler to continuously monitor repositories overnight.
75
+
76
+ ## Roadmap & v2 Features
77
+
78
+ We are actively building features to embed Entropy permanently in engineering workflows:
79
+ - **PR-level Diffing:** `entropy diff --base main` to block PRs that increase complexity on undocumented modules.
80
+ - **Simulations:** "What happens to the codebase if Engineer X leaves?"
81
+ - **Ecosystem Expansion:** Full JavaScript / TypeScript support.
@@ -0,0 +1,9 @@
1
+ """
2
+ ENTROPY — A Code Aging & Decay Tracker
3
+
4
+ Computes decay scores per codebase module using git analysis,
5
+ dependency drift, churn ratios, and knowledge decay signals.
6
+ """
7
+
8
+ __version__ = "1.0.0"
9
+ __app_name__ = "entropy"
@@ -0,0 +1 @@
1
+ """Entropy analyzers — git history, dependency drift, and AST import graph."""
@@ -0,0 +1,179 @@
1
+ """
2
+ AST Analyzer — builds the import graph and computes blast radius.
3
+
4
+ Uses Python's built-in ``ast`` module to parse every .py file in the repo,
5
+ extract import statements, and build a directed graph of module dependencies.
6
+ From this graph we compute:
7
+ - Blast radius: how many modules transitively depend on a given module
8
+ - Import graph adjacency list (for visualization/API)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import ast
14
+ import logging
15
+ import warnings
16
+ from collections import defaultdict
17
+ from dataclasses import dataclass, field
18
+ from pathlib import Path
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class ImportGraphData:
25
+ """Complete import graph for a repository."""
26
+
27
+ # module_path → list of modules it imports (within-repo only)
28
+ imports: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))
29
+ # module_path → list of modules that import it (reverse edges)
30
+ imported_by: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))
31
+ # module_path → transitive blast radius count
32
+ blast_radius: dict[str, int] = field(default_factory=dict)
33
+ # all known module paths
34
+ all_modules: set[str] = field(default_factory=set)
35
+
36
+
37
+ class ASTAnalyzer:
38
+ """Build the import graph and compute blast radius for a Python repo."""
39
+
40
+ def __init__(self, repo_path: str | Path):
41
+ self.repo_path = Path(repo_path)
42
+ self._module_paths: dict[str, str] = {} # dotted.name → relative/file/path
43
+
44
+ def analyze(self) -> ImportGraphData:
45
+ """
46
+ 1. Discover all .py files and build a dotted-name registry
47
+ 2. Parse each file's imports
48
+ 3. Resolve imports to local modules
49
+ 4. Build reverse graph and compute blast radius
50
+ """
51
+ logger.info("ASTAnalyzer: scanning %s …", self.repo_path)
52
+ graph = ImportGraphData()
53
+
54
+ # Step 1: Build module registry
55
+ self._build_module_registry()
56
+ graph.all_modules = set(self._module_paths.values())
57
+
58
+ # Step 2: Parse imports from each file
59
+ for dotted_name, rel_path in self._module_paths.items():
60
+ full_path = self.repo_path / rel_path
61
+ if not full_path.is_file():
62
+ continue
63
+
64
+ raw_imports = self._extract_imports(full_path)
65
+
66
+ # Step 3: Resolve to local modules only
67
+ for imp in raw_imports:
68
+ resolved = self._resolve_import(imp)
69
+ if resolved and resolved != rel_path:
70
+ graph.imports[rel_path].append(resolved)
71
+ graph.imported_by[resolved].append(rel_path)
72
+
73
+ # Step 4: Compute blast radius for every module
74
+ for module_path in graph.all_modules:
75
+ radius = self._compute_blast_radius(module_path, graph)
76
+ graph.blast_radius[module_path] = radius
77
+
78
+ logger.info(
79
+ "ASTAnalyzer: found %d modules, max blast radius = %d",
80
+ len(graph.all_modules),
81
+ max(graph.blast_radius.values()) if graph.blast_radius else 0,
82
+ )
83
+ return graph
84
+
85
+ # ---- module registry ----------------------------------------------------
86
+
87
+ def _build_module_registry(self) -> None:
88
+ """Map dotted module names → relative file paths."""
89
+ self._module_paths.clear()
90
+
91
+ roots = [self.repo_path]
92
+ src_dir = self.repo_path / "src"
93
+ if src_dir.is_dir():
94
+ roots.append(src_dir)
95
+
96
+ for root in roots:
97
+ for py_file in root.rglob("*.py"):
98
+ try:
99
+ rel_to_root = py_file.relative_to(root)
100
+ rel_to_repo = py_file.relative_to(self.repo_path)
101
+ except ValueError:
102
+ continue
103
+
104
+ # Skip hidden dirs, __pycache__, venv, node_modules
105
+ parts = rel_to_repo.parts
106
+ if any(p.startswith(".") or p in ("__pycache__", "venv", ".venv", "node_modules") for p in parts):
107
+ continue
108
+
109
+ rel_str = str(rel_to_repo).replace("\\", "/")
110
+
111
+ # If checking repo root and there's a src dir, skip files inside src
112
+ if root == self.repo_path and src_dir.is_dir() and "src" in rel_to_repo.parts:
113
+ continue
114
+
115
+ # Build dotted name relative to root
116
+ root_parts = rel_to_root.parts
117
+ if rel_to_root.name == "__init__.py":
118
+ dotted = ".".join(root_parts[:-1])
119
+ else:
120
+ dotted = ".".join(root_parts[:-1] + (rel_to_root.stem,))
121
+
122
+ if dotted:
123
+ self._module_paths[dotted] = rel_str
124
+
125
+ # ---- import extraction --------------------------------------------------
126
+
127
+ @staticmethod
128
+ def _extract_imports(filepath: Path) -> list[str]:
129
+ """Extract all import statements as dotted names."""
130
+ try:
131
+ source = filepath.read_text(errors="replace")
132
+ with warnings.catch_warnings():
133
+ warnings.simplefilter("ignore", SyntaxWarning)
134
+ tree = ast.parse(source, filename=str(filepath))
135
+ except (SyntaxError, UnicodeDecodeError):
136
+ return []
137
+
138
+ imports: list[str] = []
139
+ for node in ast.walk(tree):
140
+ if isinstance(node, ast.Import):
141
+ for alias in node.names:
142
+ imports.append(alias.name)
143
+ elif isinstance(node, ast.ImportFrom):
144
+ if node.module:
145
+ imports.append(node.module)
146
+ return imports
147
+
148
+ # ---- import resolution --------------------------------------------------
149
+
150
+ def _resolve_import(self, import_name: str) -> str | None:
151
+ """Resolve a dotted import name to a local module path, or None if external."""
152
+ # Try exact match
153
+ if import_name in self._module_paths:
154
+ return self._module_paths[import_name]
155
+
156
+ # Try prefix matches (e.g., "entropy.analyzers" might match "entropy.analyzers.__init__")
157
+ parts = import_name.split(".")
158
+ for i in range(len(parts), 0, -1):
159
+ partial = ".".join(parts[:i])
160
+ if partial in self._module_paths:
161
+ return self._module_paths[partial]
162
+
163
+ return None # external or unresolvable
164
+
165
+ # ---- blast radius -------------------------------------------------------
166
+
167
+ def _compute_blast_radius(self, module_path: str, graph: ImportGraphData) -> int:
168
+ """BFS to count all transitive dependents (modules that import this, directly or indirectly)."""
169
+ visited: set[str] = set()
170
+ queue = [module_path]
171
+
172
+ while queue:
173
+ current = queue.pop(0)
174
+ for dependent in graph.imported_by.get(current, []):
175
+ if dependent not in visited:
176
+ visited.add(dependent)
177
+ queue.append(dependent)
178
+
179
+ return len(visited)