parallax-scan 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parallax_scan-0.3.0/LICENSE +21 -0
- parallax_scan-0.3.0/PKG-INFO +134 -0
- parallax_scan-0.3.0/README.md +107 -0
- parallax_scan-0.3.0/pyproject.toml +57 -0
- parallax_scan-0.3.0/setup.cfg +4 -0
- parallax_scan-0.3.0/src/parallax/__init__.py +20 -0
- parallax_scan-0.3.0/src/parallax/cli.py +270 -0
- parallax_scan-0.3.0/src/parallax/config.py +90 -0
- parallax_scan-0.3.0/src/parallax/core.py +220 -0
- parallax_scan-0.3.0/src/parallax/extractors/__init__.py +26 -0
- parallax_scan-0.3.0/src/parallax/extractors/base.py +23 -0
- parallax_scan-0.3.0/src/parallax/extractors/django_models.py +90 -0
- parallax_scan-0.3.0/src/parallax/extractors/env_vars.py +80 -0
- parallax_scan-0.3.0/src/parallax/extractors/http_urls.py +134 -0
- parallax_scan-0.3.0/src/parallax/extractors/redis_keys.py +148 -0
- parallax_scan-0.3.0/src/parallax/extractors/sqlalchemy_models.py +188 -0
- parallax_scan-0.3.0/src/parallax/reporters/__init__.py +8 -0
- parallax_scan-0.3.0/src/parallax/reporters/html_reporter.py +113 -0
- parallax_scan-0.3.0/src/parallax/reporters/json_reporter.py +39 -0
- parallax_scan-0.3.0/src/parallax/reporters/sarif_reporter.py +112 -0
- parallax_scan-0.3.0/src/parallax/reporters/text_reporter.py +53 -0
- parallax_scan-0.3.0/src/parallax/verify/__init__.py +25 -0
- parallax_scan-0.3.0/src/parallax/verify/core.py +97 -0
- parallax_scan-0.3.0/src/parallax/verify/python_ast.py +83 -0
- parallax_scan-0.3.0/src/parallax_scan.egg-info/PKG-INFO +134 -0
- parallax_scan-0.3.0/src/parallax_scan.egg-info/SOURCES.txt +38 -0
- parallax_scan-0.3.0/src/parallax_scan.egg-info/dependency_links.txt +1 -0
- parallax_scan-0.3.0/src/parallax_scan.egg-info/entry_points.txt +2 -0
- parallax_scan-0.3.0/src/parallax_scan.egg-info/requires.txt +8 -0
- parallax_scan-0.3.0/src/parallax_scan.egg-info/top_level.txt +1 -0
- parallax_scan-0.3.0/tests/test_config_and_ci.py +120 -0
- parallax_scan-0.3.0/tests/test_core.py +158 -0
- parallax_scan-0.3.0/tests/test_django_extractor.py +33 -0
- parallax_scan-0.3.0/tests/test_env_vars_extractor.py +48 -0
- parallax_scan-0.3.0/tests/test_http_urls_extractor.py +48 -0
- parallax_scan-0.3.0/tests/test_redis_keys_extractor.py +85 -0
- parallax_scan-0.3.0/tests/test_repo_awareness.py +33 -0
- parallax_scan-0.3.0/tests/test_reporters.py +90 -0
- parallax_scan-0.3.0/tests/test_sqlalchemy_extractor.py +39 -0
- parallax_scan-0.3.0/tests/test_verify.py +40 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ziyad Alotaibi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: parallax-scan
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Find functions that work on the same resource through different code paths — the architectural-duplication detector that token-similarity tools miss.
|
|
5
|
+
Author-email: Ziyad Alotaibi <ziyad.alotaibe@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ziyad00/parallax
|
|
8
|
+
Project-URL: Issues, https://github.com/ziyad00/parallax/issues
|
|
9
|
+
Keywords: duplication,static-analysis,ast,refactor,code-quality
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: tomli; python_version < "3.11"
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-cov>=4; extra == "dev"
|
|
25
|
+
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# parallax
|
|
29
|
+
|
|
30
|
+
[](https://pypi.org/project/parallax-scan/)
|
|
31
|
+
[](https://github.com/ziyad00/parallax/actions/workflows/ci.yml)
|
|
32
|
+
[](LICENSE)
|
|
33
|
+
|
|
34
|
+
Find code that does the same logical job through different paths.
|
|
35
|
+
|
|
36
|
+
Token-similarity tools (jscpd, PMD CPD, `pylint duplicate-code`) detect copy-paste. parallax detects something different: two pieces of code that touch the same set of resources, regardless of how the code is written. Different filters, different return shapes, even different languages.
|
|
37
|
+
|
|
38
|
+
## Model
|
|
39
|
+
|
|
40
|
+
A **unit** of code (function, method, file, module, microservice, ...) touches a set of **resources** (database tables, HTTP endpoints, Redis keys, env vars, file paths, ...). Units sharing the same resource set are clustered as duplication candidates.
|
|
41
|
+
|
|
42
|
+
Both unit detection and resource detection are pluggable per **extractor**.
|
|
43
|
+
|
|
44
|
+
## Built-in extractors
|
|
45
|
+
|
|
46
|
+
| Name | Unit | Resource |
|
|
47
|
+
|---|---|---|
|
|
48
|
+
| `sqlalchemy` | Python function/method | SQLAlchemy ORM model classes |
|
|
49
|
+
| `django` | Python function/method | Django ORM model classes |
|
|
50
|
+
| `http-urls` | any text file | HTTP URL paths |
|
|
51
|
+
| `env-vars` | any text file | Environment variable names |
|
|
52
|
+
| `redis-keys` | any text file | Redis key namespaces |
|
|
53
|
+
|
|
54
|
+
## Installation
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install parallax-scan
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Usage
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
parallax scan path/to/repo
|
|
64
|
+
parallax scan path/to/repo --extractor sqlalchemy
|
|
65
|
+
parallax scan path/to/repo -e sqlalchemy -e http-urls
|
|
66
|
+
parallax scan path/to/repo --min-resources 3 --top 20
|
|
67
|
+
parallax scan path/to/repo --cross-file-only
|
|
68
|
+
parallax scan path/to/repo --format html -o report.html
|
|
69
|
+
parallax scan path/to/repo --format sarif -o parallax.sarif
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Configuration
|
|
73
|
+
|
|
74
|
+
Drop `.parallax.toml` at your repo root:
|
|
75
|
+
|
|
76
|
+
```toml
|
|
77
|
+
[scan]
|
|
78
|
+
min_resources = 3
|
|
79
|
+
min_cluster_size = 2
|
|
80
|
+
|
|
81
|
+
[ci]
|
|
82
|
+
max_cluster_size = 5
|
|
83
|
+
|
|
84
|
+
[[ignore]]
|
|
85
|
+
resources = ["User", "Place"]
|
|
86
|
+
reason = "Generic"
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
In CI:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
parallax scan . --ci
|
|
93
|
+
parallax scan . --format sarif -o parallax.sarif
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
`--ci` exits non-zero only when a cluster meets `ci.max_cluster_size`. Without it, any cluster is non-zero.
|
|
97
|
+
|
|
98
|
+
## Comparison
|
|
99
|
+
|
|
100
|
+
| Tool | Catches | Doesn't catch |
|
|
101
|
+
|---|---|---|
|
|
102
|
+
| jscpd, PMD CPD, pylint duplicate-code | Token-similar copy-paste | Code with different surface shape |
|
|
103
|
+
| Sourcegraph | Manual code search | Automatic detection |
|
|
104
|
+
| `pydeps`, `dependency-cruiser` | Module-level imports | Same-resource overlap |
|
|
105
|
+
| semgrep | Hand-written patterns | Discovery |
|
|
106
|
+
| parallax | Same-resource overlap regardless of shape | Single-instance bad patterns |
|
|
107
|
+
|
|
108
|
+
## Status
|
|
109
|
+
|
|
110
|
+
Alpha. API and CLI are unstable until 1.0.
|
|
111
|
+
|
|
112
|
+
## Writing an extractor
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from pathlib import Path
|
|
116
|
+
from typing import Iterable
|
|
117
|
+
|
|
118
|
+
from parallax import Unit
|
|
119
|
+
from parallax.extractors.base import Extractor
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class TerraformAwsExtractor(Extractor):
|
|
123
|
+
name = "terraform-aws"
|
|
124
|
+
|
|
125
|
+
def extract(self, root: Path) -> Iterable[Unit]:
|
|
126
|
+
for tf in root.rglob("*.tf"):
|
|
127
|
+
...
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Register in `parallax.extractors.BUILTIN_EXTRACTORS`.
|
|
131
|
+
|
|
132
|
+
## License
|
|
133
|
+
|
|
134
|
+
MIT
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# parallax
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/parallax-scan/)
|
|
4
|
+
[](https://github.com/ziyad00/parallax/actions/workflows/ci.yml)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
Find code that does the same logical job through different paths.
|
|
8
|
+
|
|
9
|
+
Token-similarity tools (jscpd, PMD CPD, `pylint duplicate-code`) detect copy-paste. parallax detects something different: two pieces of code that touch the same set of resources, regardless of how the code is written. Different filters, different return shapes, even different languages.
|
|
10
|
+
|
|
11
|
+
## Model
|
|
12
|
+
|
|
13
|
+
A **unit** of code (function, method, file, module, microservice, ...) touches a set of **resources** (database tables, HTTP endpoints, Redis keys, env vars, file paths, ...). Units sharing the same resource set are clustered as duplication candidates.
|
|
14
|
+
|
|
15
|
+
Both unit detection and resource detection are pluggable per **extractor**.
|
|
16
|
+
|
|
17
|
+
## Built-in extractors
|
|
18
|
+
|
|
19
|
+
| Name | Unit | Resource |
|
|
20
|
+
|---|---|---|
|
|
21
|
+
| `sqlalchemy` | Python function/method | SQLAlchemy ORM model classes |
|
|
22
|
+
| `django` | Python function/method | Django ORM model classes |
|
|
23
|
+
| `http-urls` | any text file | HTTP URL paths |
|
|
24
|
+
| `env-vars` | any text file | Environment variable names |
|
|
25
|
+
| `redis-keys` | any text file | Redis key namespaces |
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install parallax-scan
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
parallax scan path/to/repo
|
|
37
|
+
parallax scan path/to/repo --extractor sqlalchemy
|
|
38
|
+
parallax scan path/to/repo -e sqlalchemy -e http-urls
|
|
39
|
+
parallax scan path/to/repo --min-resources 3 --top 20
|
|
40
|
+
parallax scan path/to/repo --cross-file-only
|
|
41
|
+
parallax scan path/to/repo --format html -o report.html
|
|
42
|
+
parallax scan path/to/repo --format sarif -o parallax.sarif
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Configuration
|
|
46
|
+
|
|
47
|
+
Drop `.parallax.toml` at your repo root:
|
|
48
|
+
|
|
49
|
+
```toml
|
|
50
|
+
[scan]
|
|
51
|
+
min_resources = 3
|
|
52
|
+
min_cluster_size = 2
|
|
53
|
+
|
|
54
|
+
[ci]
|
|
55
|
+
max_cluster_size = 5
|
|
56
|
+
|
|
57
|
+
[[ignore]]
|
|
58
|
+
resources = ["User", "Place"]
|
|
59
|
+
reason = "Generic"
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
In CI:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
parallax scan . --ci
|
|
66
|
+
parallax scan . --format sarif -o parallax.sarif
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
`--ci` exits non-zero only when a cluster meets `ci.max_cluster_size`. Without it, any cluster is non-zero.
|
|
70
|
+
|
|
71
|
+
## Comparison
|
|
72
|
+
|
|
73
|
+
| Tool | Catches | Doesn't catch |
|
|
74
|
+
|---|---|---|
|
|
75
|
+
| jscpd, PMD CPD, pylint duplicate-code | Token-similar copy-paste | Code with different surface shape |
|
|
76
|
+
| Sourcegraph | Manual code search | Automatic detection |
|
|
77
|
+
| `pydeps`, `dependency-cruiser` | Module-level imports | Same-resource overlap |
|
|
78
|
+
| semgrep | Hand-written patterns | Discovery |
|
|
79
|
+
| parallax | Same-resource overlap regardless of shape | Single-instance bad patterns |
|
|
80
|
+
|
|
81
|
+
## Status
|
|
82
|
+
|
|
83
|
+
Alpha. API and CLI are unstable until 1.0.
|
|
84
|
+
|
|
85
|
+
## Writing an extractor
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from pathlib import Path
|
|
89
|
+
from typing import Iterable
|
|
90
|
+
|
|
91
|
+
from parallax import Unit
|
|
92
|
+
from parallax.extractors.base import Extractor
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class TerraformAwsExtractor(Extractor):
|
|
96
|
+
name = "terraform-aws"
|
|
97
|
+
|
|
98
|
+
def extract(self, root: Path) -> Iterable[Unit]:
|
|
99
|
+
for tf in root.rglob("*.tf"):
|
|
100
|
+
...
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Register in `parallax.extractors.BUILTIN_EXTRACTORS`.
|
|
104
|
+
|
|
105
|
+
## License
|
|
106
|
+
|
|
107
|
+
MIT
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "parallax-scan"
|
|
7
|
+
version = "0.3.0"
|
|
8
|
+
description = "Find functions that work on the same resource through different code paths — the architectural-duplication detector that token-similarity tools miss."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Ziyad Alotaibi", email = "ziyad.alotaibe@gmail.com" }
|
|
14
|
+
]
|
|
15
|
+
keywords = ["duplication", "static-analysis", "ast", "refactor", "code-quality"]
|
|
16
|
+
dependencies = [
|
|
17
|
+
"tomli; python_version < '3.11'",
|
|
18
|
+
]
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Development Status :: 3 - Alpha",
|
|
21
|
+
"License :: OSI Approved :: MIT License",
|
|
22
|
+
"Programming Language :: Python :: 3",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
27
|
+
"Intended Audience :: Developers",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
dev = [
|
|
32
|
+
"pytest>=7",
|
|
33
|
+
"pytest-cov>=4",
|
|
34
|
+
"ruff>=0.5",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.scripts]
|
|
38
|
+
parallax = "parallax.cli:main"
|
|
39
|
+
|
|
40
|
+
[project.urls]
|
|
41
|
+
Homepage = "https://github.com/ziyad00/parallax"
|
|
42
|
+
Issues = "https://github.com/ziyad00/parallax/issues"
|
|
43
|
+
|
|
44
|
+
[tool.setuptools.packages.find]
|
|
45
|
+
where = ["src"]
|
|
46
|
+
|
|
47
|
+
[tool.ruff]
|
|
48
|
+
line-length = 100
|
|
49
|
+
target-version = "py310"
|
|
50
|
+
|
|
51
|
+
[tool.ruff.lint]
|
|
52
|
+
select = ["E", "F", "W", "I", "B", "UP"]
|
|
53
|
+
ignore = ["E501"]
|
|
54
|
+
|
|
55
|
+
[tool.pytest.ini_options]
|
|
56
|
+
testpaths = ["tests"]
|
|
57
|
+
addopts = "-ra"
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""parallax: find code that does the same logical job through different paths."""
|
|
2
|
+
|
|
3
|
+
from .core import (
|
|
4
|
+
Cluster,
|
|
5
|
+
FoldedGroup,
|
|
6
|
+
Unit,
|
|
7
|
+
fold_units_by_class,
|
|
8
|
+
group_by_resource_set,
|
|
9
|
+
)
|
|
10
|
+
from .extractors.base import Extractor
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"Cluster",
|
|
14
|
+
"Extractor",
|
|
15
|
+
"FoldedGroup",
|
|
16
|
+
"Unit",
|
|
17
|
+
"fold_units_by_class",
|
|
18
|
+
"group_by_resource_set",
|
|
19
|
+
]
|
|
20
|
+
__version__ = "0.3.0"
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""parallax CLI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from .config import Config, load_config
|
|
11
|
+
from .core import Cluster, Unit, group_by_resource_set
|
|
12
|
+
from .extractors import BUILTIN_EXTRACTORS
|
|
13
|
+
from .reporters import render_html, render_json, render_sarif, render_text
|
|
14
|
+
from .verify import verify_cluster
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
REPORTERS = {"text", "json", "html", "sarif"}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _build_argparser() -> argparse.ArgumentParser:
|
|
21
|
+
p = argparse.ArgumentParser(
|
|
22
|
+
prog="parallax",
|
|
23
|
+
description="Find code that does the same logical job through different paths.",
|
|
24
|
+
)
|
|
25
|
+
sub = p.add_subparsers(dest="command", required=True)
|
|
26
|
+
|
|
27
|
+
scan = sub.add_parser("scan", help="Scan a tree for clusters of overlapping units.")
|
|
28
|
+
scan.add_argument("path", type=Path, help="Root directory to scan.")
|
|
29
|
+
scan.add_argument(
|
|
30
|
+
"--extractor",
|
|
31
|
+
"-e",
|
|
32
|
+
action="append",
|
|
33
|
+
choices=sorted(BUILTIN_EXTRACTORS),
|
|
34
|
+
help="Which extractor to run. Repeat to combine. Default: all.",
|
|
35
|
+
)
|
|
36
|
+
scan.add_argument(
|
|
37
|
+
"--config",
|
|
38
|
+
"-c",
|
|
39
|
+
type=Path,
|
|
40
|
+
help=(
|
|
41
|
+
"Path to a config file (TOML). If omitted, parallax looks for "
|
|
42
|
+
".parallax.toml in the working directory."
|
|
43
|
+
),
|
|
44
|
+
)
|
|
45
|
+
scan.add_argument(
|
|
46
|
+
"--min-resources",
|
|
47
|
+
type=int,
|
|
48
|
+
help="Override config.scan.min_resources.",
|
|
49
|
+
)
|
|
50
|
+
scan.add_argument(
|
|
51
|
+
"--min-cluster-size",
|
|
52
|
+
type=int,
|
|
53
|
+
help="Override config.scan.min_cluster_size.",
|
|
54
|
+
)
|
|
55
|
+
scan.add_argument(
|
|
56
|
+
"--format",
|
|
57
|
+
"-f",
|
|
58
|
+
choices=sorted(REPORTERS),
|
|
59
|
+
default="text",
|
|
60
|
+
help="Output format. Default 'text'.",
|
|
61
|
+
)
|
|
62
|
+
scan.add_argument(
|
|
63
|
+
"--output",
|
|
64
|
+
"-o",
|
|
65
|
+
type=Path,
|
|
66
|
+
help="Write output to FILE instead of stdout.",
|
|
67
|
+
)
|
|
68
|
+
scan.add_argument(
|
|
69
|
+
"--json",
|
|
70
|
+
action="store_true",
|
|
71
|
+
help="Shortcut for --format json.",
|
|
72
|
+
)
|
|
73
|
+
scan.add_argument(
|
|
74
|
+
"--cross-file-only",
|
|
75
|
+
action="store_true",
|
|
76
|
+
help="Drop clusters whose units all live in one file.",
|
|
77
|
+
)
|
|
78
|
+
scan.add_argument(
|
|
79
|
+
"--top",
|
|
80
|
+
type=int,
|
|
81
|
+
default=None,
|
|
82
|
+
help="Limit the report to the N highest-scoring clusters.",
|
|
83
|
+
)
|
|
84
|
+
scan.add_argument(
|
|
85
|
+
"--fold-threshold",
|
|
86
|
+
type=int,
|
|
87
|
+
default=5,
|
|
88
|
+
help="Collapse N+ methods of the same class into one row in text output. Set 0 to disable.",
|
|
89
|
+
)
|
|
90
|
+
scan.add_argument(
|
|
91
|
+
"--ci",
|
|
92
|
+
action="store_true",
|
|
93
|
+
help="Exit non-zero only when a cluster meets ci.max_cluster_size from config.",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
sub.add_parser("list-extractors", help="Print the extractors built into parallax.")
|
|
97
|
+
|
|
98
|
+
verify = sub.add_parser(
|
|
99
|
+
"verify",
|
|
100
|
+
help="Read a cluster (JSON on stdin or --file) and run deeper analysis.",
|
|
101
|
+
)
|
|
102
|
+
verify.add_argument(
|
|
103
|
+
"--root",
|
|
104
|
+
type=Path,
|
|
105
|
+
default=Path("."),
|
|
106
|
+
help="Source root used to resolve unit locations. Default: cwd.",
|
|
107
|
+
)
|
|
108
|
+
verify.add_argument(
|
|
109
|
+
"--file",
|
|
110
|
+
type=Path,
|
|
111
|
+
help="Read the cluster JSON from FILE instead of stdin.",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return p
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _apply_overrides(args: argparse.Namespace, cfg: Config) -> Config:
|
|
118
|
+
"""CLI flags trump config-file values when given."""
|
|
119
|
+
if args.min_resources is not None:
|
|
120
|
+
cfg.min_resources = args.min_resources
|
|
121
|
+
if args.min_cluster_size is not None:
|
|
122
|
+
cfg.min_cluster_size = args.min_cluster_size
|
|
123
|
+
return cfg
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _filter_ignored(
|
|
127
|
+
clusters: list[Cluster], cfg: Config
|
|
128
|
+
) -> tuple[list[Cluster], list[Cluster]]:
|
|
129
|
+
"""Split clusters into (kept, ignored)."""
|
|
130
|
+
kept: list[Cluster] = []
|
|
131
|
+
ignored: list[Cluster] = []
|
|
132
|
+
for c in clusters:
|
|
133
|
+
if cfg.matches_ignore(c.resources) is not None:
|
|
134
|
+
ignored.append(c)
|
|
135
|
+
else:
|
|
136
|
+
kept.append(c)
|
|
137
|
+
return kept, ignored
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def cmd_scan(args: argparse.Namespace) -> int:
|
|
141
|
+
if not args.path.exists():
|
|
142
|
+
print(f"error: {args.path} does not exist", file=sys.stderr)
|
|
143
|
+
return 2
|
|
144
|
+
|
|
145
|
+
cfg = load_config(args.config)
|
|
146
|
+
cfg = _apply_overrides(args, cfg)
|
|
147
|
+
|
|
148
|
+
extractor_names: list[str] = args.extractor or sorted(BUILTIN_EXTRACTORS)
|
|
149
|
+
units: list[Unit] = []
|
|
150
|
+
for name in extractor_names:
|
|
151
|
+
cls = BUILTIN_EXTRACTORS[name]
|
|
152
|
+
units.extend(cls().extract(args.path))
|
|
153
|
+
|
|
154
|
+
clusters = group_by_resource_set(
|
|
155
|
+
units,
|
|
156
|
+
min_resources=cfg.min_resources,
|
|
157
|
+
min_cluster_size=cfg.min_cluster_size,
|
|
158
|
+
cross_file_only=args.cross_file_only,
|
|
159
|
+
)
|
|
160
|
+
kept, ignored = _filter_ignored(clusters, cfg)
|
|
161
|
+
if args.top is not None and args.top >= 0:
|
|
162
|
+
kept = kept[: args.top]
|
|
163
|
+
|
|
164
|
+
fmt = "json" if args.json else args.format
|
|
165
|
+
if fmt == "text":
|
|
166
|
+
output = render_text(
|
|
167
|
+
kept,
|
|
168
|
+
scanned_units=len(units),
|
|
169
|
+
extractors=extractor_names,
|
|
170
|
+
min_resources=cfg.min_resources,
|
|
171
|
+
min_cluster_size=cfg.min_cluster_size,
|
|
172
|
+
fold_threshold=max(0, args.fold_threshold),
|
|
173
|
+
)
|
|
174
|
+
if ignored:
|
|
175
|
+
output += f"\n({len(ignored)} cluster(s) suppressed by ignore rules)\n"
|
|
176
|
+
elif fmt == "json":
|
|
177
|
+
output = render_json(kept, scanned_units=len(units))
|
|
178
|
+
elif fmt == "html":
|
|
179
|
+
output = render_html(
|
|
180
|
+
kept,
|
|
181
|
+
scanned_units=len(units),
|
|
182
|
+
extractors=extractor_names,
|
|
183
|
+
min_resources=cfg.min_resources,
|
|
184
|
+
min_cluster_size=cfg.min_cluster_size,
|
|
185
|
+
)
|
|
186
|
+
elif fmt == "sarif":
|
|
187
|
+
output = render_sarif(kept, scanned_units=len(units), extractors=extractor_names)
|
|
188
|
+
else: # pragma: no cover
|
|
189
|
+
raise ValueError(f"unknown format: {fmt}")
|
|
190
|
+
|
|
191
|
+
if args.output:
|
|
192
|
+
args.output.write_text(output, encoding="utf-8")
|
|
193
|
+
else:
|
|
194
|
+
sys.stdout.write(output)
|
|
195
|
+
if not output.endswith("\n"):
|
|
196
|
+
sys.stdout.write("\n")
|
|
197
|
+
|
|
198
|
+
return _exit_code(kept, cfg, args.ci)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _exit_code(kept: list[Cluster], cfg: Config, ci_mode: bool) -> int:
|
|
202
|
+
if ci_mode:
|
|
203
|
+
if cfg.max_cluster_size is None:
|
|
204
|
+
return 0
|
|
205
|
+
if any(c.size >= cfg.max_cluster_size for c in kept):
|
|
206
|
+
return 1
|
|
207
|
+
return 0
|
|
208
|
+
return 0 if not kept else 1
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def cmd_verify(args: argparse.Namespace) -> int:
|
|
212
|
+
raw = args.file.read_text(encoding="utf-8") if args.file else sys.stdin.read()
|
|
213
|
+
try:
|
|
214
|
+
payload = json.loads(raw)
|
|
215
|
+
except json.JSONDecodeError as e:
|
|
216
|
+
print(f"error: invalid JSON: {e}", file=sys.stderr)
|
|
217
|
+
return 2
|
|
218
|
+
|
|
219
|
+
cluster: dict | None
|
|
220
|
+
if isinstance(payload, dict) and "units" in payload:
|
|
221
|
+
cluster = payload
|
|
222
|
+
elif isinstance(payload, dict) and "clusters" in payload and payload["clusters"]:
|
|
223
|
+
cluster = payload["clusters"][0]
|
|
224
|
+
else:
|
|
225
|
+
print(
|
|
226
|
+
"error: expected a cluster dict (with 'units') or a scan payload "
|
|
227
|
+
"(with 'clusters')",
|
|
228
|
+
file=sys.stderr,
|
|
229
|
+
)
|
|
230
|
+
return 2
|
|
231
|
+
|
|
232
|
+
results = verify_cluster(cluster, root=args.root)
|
|
233
|
+
if not results:
|
|
234
|
+
print("No applicable verifiers for this cluster.")
|
|
235
|
+
return 0
|
|
236
|
+
|
|
237
|
+
for r in results:
|
|
238
|
+
print(
|
|
239
|
+
f"verifier={r.verifier} "
|
|
240
|
+
f"recommendation={r.recommendation.value} "
|
|
241
|
+
f"mean_similarity={r.mean_similarity:.2f}"
|
|
242
|
+
)
|
|
243
|
+
for p in r.pairs:
|
|
244
|
+
print(f" {p.score:.2f} {p.a_location} ~ {p.b_location}")
|
|
245
|
+
return 0
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def cmd_list_extractors(_: argparse.Namespace) -> int:
|
|
249
|
+
for name, cls in sorted(BUILTIN_EXTRACTORS.items()):
|
|
250
|
+
doc_first_line = (cls.__doc__ or "").strip().splitlines()[:1]
|
|
251
|
+
summary = doc_first_line[0] if doc_first_line else ""
|
|
252
|
+
print(f"{name:<14} {summary}")
|
|
253
|
+
return 0
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def main(argv: list[str] | None = None) -> int:
|
|
257
|
+
parser = _build_argparser()
|
|
258
|
+
args = parser.parse_args(argv)
|
|
259
|
+
if args.command == "scan":
|
|
260
|
+
return cmd_scan(args)
|
|
261
|
+
if args.command == "list-extractors":
|
|
262
|
+
return cmd_list_extractors(args)
|
|
263
|
+
if args.command == "verify":
|
|
264
|
+
return cmd_verify(args)
|
|
265
|
+
parser.error("unknown command")
|
|
266
|
+
return 2
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
if __name__ == "__main__":
|
|
270
|
+
raise SystemExit(main())
|