cpitd 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cpitd-0.2.1/.gitignore +13 -0
- cpitd-0.2.1/.pre-commit-config.yaml +9 -0
- cpitd-0.2.1/.pre-commit-hooks.yaml +13 -0
- cpitd-0.2.1/CHANGELOG.md +23 -0
- cpitd-0.2.1/LICENSE +21 -0
- cpitd-0.2.1/PKG-INFO +207 -0
- cpitd-0.2.1/README.md +173 -0
- cpitd-0.2.1/cpitd/__init__.py +3 -0
- cpitd-0.2.1/cpitd/cli.py +134 -0
- cpitd-0.2.1/cpitd/config.py +154 -0
- cpitd-0.2.1/cpitd/discovery.py +74 -0
- cpitd-0.2.1/cpitd/filter.py +201 -0
- cpitd-0.2.1/cpitd/indexer.py +93 -0
- cpitd-0.2.1/cpitd/pipeline.py +118 -0
- cpitd-0.2.1/cpitd/reporter.py +272 -0
- cpitd-0.2.1/cpitd/tokenizer.py +134 -0
- cpitd-0.2.1/cpitd/types.py +11 -0
- cpitd-0.2.1/cpitd/winnowing.py +197 -0
- cpitd-0.2.1/cpitd.egg-info/PKG-INFO +207 -0
- cpitd-0.2.1/cpitd.egg-info/SOURCES.txt +41 -0
- cpitd-0.2.1/cpitd.egg-info/dependency_links.txt +1 -0
- cpitd-0.2.1/cpitd.egg-info/entry_points.txt +2 -0
- cpitd-0.2.1/cpitd.egg-info/requires.txt +13 -0
- cpitd-0.2.1/cpitd.egg-info/top_level.txt +1 -0
- cpitd-0.2.1/docs/api.rst +27 -0
- cpitd-0.2.1/docs/conf.py +18 -0
- cpitd-0.2.1/docs/index.rst +17 -0
- cpitd-0.2.1/pyproject.toml +67 -0
- cpitd-0.2.1/setup.cfg +4 -0
- cpitd-0.2.1/tests/__init__.py +0 -0
- cpitd-0.2.1/tests/fixtures/abc_a.py +31 -0
- cpitd-0.2.1/tests/fixtures/abc_b.py +31 -0
- cpitd-0.2.1/tests/fixtures/clone_a.py +19 -0
- cpitd-0.2.1/tests/fixtures/clone_b.py +19 -0
- cpitd-0.2.1/tests/fixtures/unique.py +24 -0
- cpitd-0.2.1/tests/test_config.py +204 -0
- cpitd-0.2.1/tests/test_discovery.py +41 -0
- cpitd-0.2.1/tests/test_filter.py +389 -0
- cpitd-0.2.1/tests/test_indexer.py +102 -0
- cpitd-0.2.1/tests/test_pipeline.py +219 -0
- cpitd-0.2.1/tests/test_reporter.py +226 -0
- cpitd-0.2.1/tests/test_tokenizer.py +78 -0
- cpitd-0.2.1/tests/test_winnowing.py +204 -0
cpitd-0.2.1/.gitignore
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
- id: cpitd
|
|
2
|
+
name: cpitd (clone detection — staged files)
|
|
3
|
+
entry: cpitd
|
|
4
|
+
language: python
|
|
5
|
+
types: [file]
|
|
6
|
+
pass_filenames: true
|
|
7
|
+
|
|
8
|
+
- id: cpitd-full
|
|
9
|
+
name: cpitd (clone detection — full scan)
|
|
10
|
+
entry: cpitd
|
|
11
|
+
language: python
|
|
12
|
+
types: [file]
|
|
13
|
+
pass_filenames: false
|
cpitd-0.2.1/CHANGELOG.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
- Add detailed similarity metrics (#18)
|
|
11
|
+
- Improve error handling (#16)
|
|
12
|
+
- Remove CLAUDE.md from git tracking (#27)
|
|
13
|
+
- Add sibling-aware clone suppression for abstract method implementations (#29)
|
|
14
|
+
- Add clone suppression filters for benign patterns like @abstractmethod (#28)
|
|
15
|
+
- Add default scan of current directory when no paths are provided (#6)
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
- Refactor README for external users with install, pre-commit, and per-language suppress guides (#31)
|
|
21
|
+
- Configure pytest, black, and sphinx dev tooling (#4)
|
|
22
|
+
- Create cpitd package directory with module stubs (#3)
|
|
23
|
+
- Add pyproject.toml with project metadata and dependencies (#2)
|
cpitd-0.2.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Scythia Marrow
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
cpitd-0.2.1/PKG-INFO
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cpitd
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Copy Paste Is The Devil — language-agnostic code clone detection
|
|
5
|
+
Author: cpitd contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/scythia-marrow/cpitd
|
|
8
|
+
Project-URL: Repository, https://github.com/scythia-marrow/cpitd
|
|
9
|
+
Keywords: clone-detection,static-analysis,code-quality,winnowing
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: click>=8.1
|
|
24
|
+
Requires-Dist: pygments>=2.17
|
|
25
|
+
Requires-Dist: tomli>=2.0; python_version < "3.11"
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
29
|
+
Requires-Dist: black>=24.0; extra == "dev"
|
|
30
|
+
Requires-Dist: pre-commit>=3.0; extra == "dev"
|
|
31
|
+
Requires-Dist: sphinx>=7.2; extra == "dev"
|
|
32
|
+
Requires-Dist: sphinx-rtd-theme>=2.0; extra == "dev"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
# CPITD: Copy Paste Is The Devil
|
|
36
|
+
|
|
37
|
+
A static code analysis tool that rakes you over the coals for using copy/paste. Because copy/paste is the devil. Language agnostic, and blazingly fast.
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
|
|
43
|
+
For development (linting, tests, docs):
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
git clone https://github.com/scythia-marrow/cpitd.git
|
|
47
|
+
cd cpitd
|
|
48
|
+
python -m venv venv
|
|
49
|
+
source venv/bin/activate
|
|
50
|
+
pip install -e ".[dev]"
|
|
51
|
+
pre-commit install
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Quick Start
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
# Scan current directory
|
|
60
|
+
cpitd
|
|
61
|
+
|
|
62
|
+
# Scan specific paths
|
|
63
|
+
cpitd src/ lib/
|
|
64
|
+
|
|
65
|
+
# JSON output for CI pipelines
|
|
66
|
+
cpitd --format json src/ | jq '.[]'
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Configuration
|
|
72
|
+
|
|
73
|
+
Settings can live in `pyproject.toml` so you don't repeat yourself on every invocation:
|
|
74
|
+
|
|
75
|
+
```toml
|
|
76
|
+
[tool.cpitd]
|
|
77
|
+
format = "human"
|
|
78
|
+
ignore = ["tests/fixtures/*", "vendor/*"]
|
|
79
|
+
suppress = ["*@abstractmethod*"]
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
CLI flags always override file config. For list options (`ignore`, `suppress`, `languages`), CLI values are appended to file values rather than replacing them.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Suppressing False Positives
|
|
87
|
+
|
|
88
|
+
Some clones are intentional—boilerplate required by a language or framework. Use `--suppress` to silence them.
|
|
89
|
+
|
|
90
|
+
`--suppress PATTERN` accepts [fnmatch](https://docs.python.org/3/library/fnmatch.html) glob patterns matched against raw source lines (including one line of context above each clone chunk, to catch decorators). If any line in either side of a clone pair matches, the group is suppressed.
|
|
91
|
+
|
|
92
|
+
You can also annotate specific sites inline—the filter reads raw source, so comments are visible even though the tokenizer strips them. Add a suppression comment to any line inside or immediately above a clone:
|
|
93
|
+
|
|
94
|
+
| Language | Inline annotation |
|
|
95
|
+
|----------|-------------------------------|
|
|
96
|
+
| Python | `# cpitd: suppress` |
|
|
97
|
+
| C/C++ | `// cpitd: suppress` |
|
|
98
|
+
| Rust | `// cpitd: suppress` |
|
|
99
|
+
|
|
100
|
+
Then pass `--suppress "*cpitd: suppress*"` (or set it in `pyproject.toml`).
|
|
101
|
+
|
|
102
|
+
### Python
|
|
103
|
+
|
|
104
|
+
**Abstract base class implementations** — ABCs force you to repeat method signatures across subclasses. Suppress them with:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
cpitd src/ --suppress "*@abstractmethod*"
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Or in `pyproject.toml`:
|
|
111
|
+
|
|
112
|
+
```toml
|
|
113
|
+
[tool.cpitd]
|
|
114
|
+
suppress = ["*@abstractmethod*", "*@override*"]
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
**Protocol / interface boilerplate** — if you use a decorator to mark protocol implementations (e.g. `@protocol_impl`), pass that pattern:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
cpitd src/ --suppress "*@protocol_impl*"
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### C / C++
|
|
124
|
+
|
|
125
|
+
**Header guards** — every `.h` file has them. Suppress both styles:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
cpitd src/ \
|
|
129
|
+
--suppress "*#ifndef *_H*" \
|
|
130
|
+
--suppress "*#pragma once*"
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Or in `pyproject.toml`:
|
|
134
|
+
|
|
135
|
+
```toml
|
|
136
|
+
[tool.cpitd]
|
|
137
|
+
suppress = ["*#ifndef *_H*", "*#pragma once*"]
|
|
138
|
+
ignore = ["**/*.h"] # alternatively, just skip headers entirely
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Rust
|
|
142
|
+
|
|
143
|
+
**Trait implementations** — implementing the same trait for multiple types produces near-identical `impl` blocks. Suppress by matching the `impl ... for ...` line:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
cpitd src/ --suppress "*impl * for *"
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**Derive macros** — `#[derive(Debug, Clone, PartialEq)]` lines repeat everywhere but are rarely meaningful clones. Suppress them:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
cpitd src/ --suppress "*#[derive(*"
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
In `pyproject.toml`:
|
|
156
|
+
|
|
157
|
+
```toml
|
|
158
|
+
[tool.cpitd]
|
|
159
|
+
suppress = [
|
|
160
|
+
"*impl*Display*for*",
|
|
161
|
+
"*impl*From*for*",
|
|
162
|
+
"*#[derive(*",
|
|
163
|
+
]
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## Pre-commit Hook
|
|
169
|
+
|
|
170
|
+
Add cpitd to `.pre-commit-config.yaml` as a local hook (cpitd must be installed in the environment where hooks run):
|
|
171
|
+
|
|
172
|
+
```yaml
|
|
173
|
+
repos:
|
|
174
|
+
- repo: local
|
|
175
|
+
hooks:
|
|
176
|
+
- id: cpitd-clone-detection
|
|
177
|
+
name: cpitd (clone detection)
|
|
178
|
+
entry: cpitd src/ --ignore "tests/fixtures/*"
|
|
179
|
+
language: system
|
|
180
|
+
pass_filenames: false
|
|
181
|
+
always_run: true
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Then install the hook:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
pre-commit install
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
The hook runs `cpitd` on every commit. Tune `entry` with `--suppress` or any other flag -- or lean on `[tool.cpitd]` in `pyproject.toml` so the hook entry stays short.
|
|
191
|
+
|
|
192
|
+
To run the hook manually without committing:
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
pre-commit run cpitd-clone-detection
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## CLI Options Reference
|
|
201
|
+
|
|
202
|
+
| Flag | Default | Description |
|
|
203
|
+
|------|---------|-------------|
|
|
204
|
+
| `--format human\|json` | `human` | Output format |
|
|
205
|
+
| `--ignore PATTERN` | — | Glob patterns to exclude (repeatable) |
|
|
206
|
+
| `--languages LANG` | — | Restrict to specific languages (repeatable) |
|
|
207
|
+
| `--suppress PATTERN` | — | Suppress clones whose source lines match (repeatable) |
|
cpitd-0.2.1/README.md
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# CPITD: Copy Paste Is The Devil
|
|
2
|
+
|
|
3
|
+
A static code analysis tool that rakes you over the coals for using copy/paste. Because copy/paste is the devil. Language agnostic, and blazingly fast.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
For development (linting, tests, docs):
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
git clone https://github.com/scythia-marrow/cpitd.git
|
|
13
|
+
cd cpitd
|
|
14
|
+
python -m venv venv
|
|
15
|
+
source venv/bin/activate
|
|
16
|
+
pip install -e ".[dev]"
|
|
17
|
+
pre-commit install
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
# Scan current directory
|
|
26
|
+
cpitd
|
|
27
|
+
|
|
28
|
+
# Scan specific paths
|
|
29
|
+
cpitd src/ lib/
|
|
30
|
+
|
|
31
|
+
# JSON output for CI pipelines
|
|
32
|
+
cpitd --format json src/ | jq '.[]'
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Configuration
|
|
38
|
+
|
|
39
|
+
Settings can live in `pyproject.toml` so you don't repeat yourself on every invocation:
|
|
40
|
+
|
|
41
|
+
```toml
|
|
42
|
+
[tool.cpitd]
|
|
43
|
+
format = "human"
|
|
44
|
+
ignore = ["tests/fixtures/*", "vendor/*"]
|
|
45
|
+
suppress = ["*@abstractmethod*"]
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
CLI flags always override file config. For list options (`ignore`, `suppress`, `languages`), CLI values are appended to file values rather than replacing them.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Suppressing False Positives
|
|
53
|
+
|
|
54
|
+
Some clones are intentional—boilerplate required by a language or framework. Use `--suppress` to silence them.
|
|
55
|
+
|
|
56
|
+
`--suppress PATTERN` accepts [fnmatch](https://docs.python.org/3/library/fnmatch.html) glob patterns matched against raw source lines (including one line of context above each clone chunk, to catch decorators). If any line in either side of a clone pair matches, the group is suppressed.
|
|
57
|
+
|
|
58
|
+
You can also annotate specific sites inline—the filter reads raw source, so comments are visible even though the tokenizer strips them. Add a suppression comment to any line inside or immediately above a clone:
|
|
59
|
+
|
|
60
|
+
| Language | Inline annotation |
|
|
61
|
+
|----------|-------------------------------|
|
|
62
|
+
| Python | `# cpitd: suppress` |
|
|
63
|
+
| C/C++ | `// cpitd: suppress` |
|
|
64
|
+
| Rust | `// cpitd: suppress` |
|
|
65
|
+
|
|
66
|
+
Then pass `--suppress "*cpitd: suppress*"` (or set it in `pyproject.toml`).
|
|
67
|
+
|
|
68
|
+
### Python
|
|
69
|
+
|
|
70
|
+
**Abstract base class implementations** — ABCs force you to repeat method signatures across subclasses. Suppress them with:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
cpitd src/ --suppress "*@abstractmethod*"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Or in `pyproject.toml`:
|
|
77
|
+
|
|
78
|
+
```toml
|
|
79
|
+
[tool.cpitd]
|
|
80
|
+
suppress = ["*@abstractmethod*", "*@override*"]
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**Protocol / interface boilerplate** — if you use a decorator to mark protocol implementations (e.g. `@protocol_impl`), pass that pattern:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
cpitd src/ --suppress "*@protocol_impl*"
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### C / C++
|
|
90
|
+
|
|
91
|
+
**Header guards** — every `.h` file has them. Suppress both styles:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
cpitd src/ \
|
|
95
|
+
--suppress "*#ifndef *_H*" \
|
|
96
|
+
--suppress "*#pragma once*"
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Or in `pyproject.toml`:
|
|
100
|
+
|
|
101
|
+
```toml
|
|
102
|
+
[tool.cpitd]
|
|
103
|
+
suppress = ["*#ifndef *_H*", "*#pragma once*"]
|
|
104
|
+
ignore = ["**/*.h"] # alternatively, just skip headers entirely
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Rust
|
|
108
|
+
|
|
109
|
+
**Trait implementations** — implementing the same trait for multiple types produces near-identical `impl` blocks. Suppress by matching the `impl ... for ...` line:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
cpitd src/ --suppress "*impl * for *"
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
**Derive macros** — `#[derive(Debug, Clone, PartialEq)]` lines repeat everywhere but are rarely meaningful clones. Suppress them:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
cpitd src/ --suppress "*#[derive(*"
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
In `pyproject.toml`:
|
|
122
|
+
|
|
123
|
+
```toml
|
|
124
|
+
[tool.cpitd]
|
|
125
|
+
suppress = [
|
|
126
|
+
"*impl*Display*for*",
|
|
127
|
+
"*impl*From*for*",
|
|
128
|
+
"*#[derive(*",
|
|
129
|
+
]
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Pre-commit Hook
|
|
135
|
+
|
|
136
|
+
Add cpitd to `.pre-commit-config.yaml` as a local hook (cpitd must be installed in the environment where hooks run):
|
|
137
|
+
|
|
138
|
+
```yaml
|
|
139
|
+
repos:
|
|
140
|
+
- repo: local
|
|
141
|
+
hooks:
|
|
142
|
+
- id: cpitd-clone-detection
|
|
143
|
+
name: cpitd (clone detection)
|
|
144
|
+
entry: cpitd src/ --ignore "tests/fixtures/*"
|
|
145
|
+
language: system
|
|
146
|
+
pass_filenames: false
|
|
147
|
+
always_run: true
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Then install the hook:
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
pre-commit install
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
The hook runs `cpitd` on every commit. Tune `entry` with `--suppress` or any other flag -- or lean on `[tool.cpitd]` in `pyproject.toml` so the hook entry stays short.
|
|
157
|
+
|
|
158
|
+
To run the hook manually without committing:
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
pre-commit run cpitd-clone-detection
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## CLI Options Reference
|
|
167
|
+
|
|
168
|
+
| Flag | Default | Description |
|
|
169
|
+
|------|---------|-------------|
|
|
170
|
+
| `--format human\|json` | `human` | Output format |
|
|
171
|
+
| `--ignore PATTERN` | — | Glob patterns to exclude (repeatable) |
|
|
172
|
+
| `--languages LANG` | — | Restrict to specific languages (repeatable) |
|
|
173
|
+
| `--suppress PATTERN` | — | Suppress clones whose source lines match (repeatable) |
|
cpitd-0.2.1/cpitd/cli.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""CLI interface for cpitd using Click."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from cpitd import __version__
|
|
8
|
+
from cpitd.config import ConfigFileError, build_config, load_file_config
|
|
9
|
+
from cpitd.pipeline import scan_and_report
|
|
10
|
+
from cpitd.tokenizer import NormalizationLevel
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _expand_cli_param(name: str, value: object) -> tuple[str, object]:
|
|
14
|
+
"""Map a CLI parameter name and value to its Config-compatible equivalent.
|
|
15
|
+
|
|
16
|
+
Handles renaming short CLI names (e.g. ``--ignore`` → ``ignore_patterns``)
|
|
17
|
+
and converting tuple-typed multi-value options.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
A (config_field_name, converted_value) pair.
|
|
21
|
+
"""
|
|
22
|
+
if name == "normalize":
|
|
23
|
+
return name, NormalizationLevel(value)
|
|
24
|
+
if name in ("ignore", "suppress", "languages"):
|
|
25
|
+
config_key = {"ignore": "ignore_patterns", "suppress": "suppress_patterns"}.get(
|
|
26
|
+
name, name
|
|
27
|
+
)
|
|
28
|
+
return config_key, tuple(value)
|
|
29
|
+
return name, value
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _collect_explicit_args(ctx: click.Context, **kwargs: object) -> dict[str, object]:
|
|
33
|
+
"""Return only the kwargs whose values were explicitly set on the command line."""
|
|
34
|
+
explicit: dict[str, object] = {}
|
|
35
|
+
for param_name, value in kwargs.items():
|
|
36
|
+
source = ctx.get_parameter_source(param_name)
|
|
37
|
+
if source is click.core.ParameterSource.COMMANDLINE:
|
|
38
|
+
key, converted = _expand_cli_param(param_name, value)
|
|
39
|
+
explicit[key] = converted
|
|
40
|
+
return explicit
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@click.command()
|
|
44
|
+
@click.version_option(version=__version__, prog_name="cpitd")
|
|
45
|
+
@click.argument("paths", nargs=-1, type=click.Path(exists=True))
|
|
46
|
+
@click.option(
|
|
47
|
+
"--min-tokens",
|
|
48
|
+
default=50,
|
|
49
|
+
show_default=True,
|
|
50
|
+
help="Minimum token sequence length to report.",
|
|
51
|
+
)
|
|
52
|
+
@click.option(
|
|
53
|
+
"--normalize",
|
|
54
|
+
type=click.IntRange(0, 2),
|
|
55
|
+
default=0,
|
|
56
|
+
show_default=True,
|
|
57
|
+
help="Token normalization level (0=exact, 1=identifiers, 2=literals+identifiers).",
|
|
58
|
+
)
|
|
59
|
+
@click.option(
|
|
60
|
+
"--format",
|
|
61
|
+
"output_format",
|
|
62
|
+
type=click.Choice(["human", "json"]),
|
|
63
|
+
default="human",
|
|
64
|
+
show_default=True,
|
|
65
|
+
help="Output format.",
|
|
66
|
+
)
|
|
67
|
+
@click.option(
|
|
68
|
+
"--ignore",
|
|
69
|
+
multiple=True,
|
|
70
|
+
help="Glob patterns to exclude (repeatable).",
|
|
71
|
+
)
|
|
72
|
+
@click.option(
|
|
73
|
+
"--languages",
|
|
74
|
+
multiple=True,
|
|
75
|
+
help="Restrict to specific languages (repeatable).",
|
|
76
|
+
)
|
|
77
|
+
@click.option(
|
|
78
|
+
"--suppress",
|
|
79
|
+
multiple=True,
|
|
80
|
+
help="Glob patterns to suppress clone groups (repeatable). "
|
|
81
|
+
"If any source line in a clone chunk matches, the group is suppressed.",
|
|
82
|
+
)
|
|
83
|
+
@click.option(
|
|
84
|
+
"--verbose",
|
|
85
|
+
is_flag=True,
|
|
86
|
+
default=False,
|
|
87
|
+
help="Print diagnostic warnings to stderr (skipped files, etc.).",
|
|
88
|
+
)
|
|
89
|
+
@click.pass_context
|
|
90
|
+
def main(
|
|
91
|
+
ctx,
|
|
92
|
+
paths,
|
|
93
|
+
min_tokens,
|
|
94
|
+
normalize,
|
|
95
|
+
output_format,
|
|
96
|
+
ignore,
|
|
97
|
+
languages,
|
|
98
|
+
suppress,
|
|
99
|
+
verbose,
|
|
100
|
+
):
|
|
101
|
+
"""Detect copy-pasted code clones across a codebase.
|
|
102
|
+
|
|
103
|
+
Pass one or more file or directory PATHS to analyze.
|
|
104
|
+
Defaults to the current directory if none are given.
|
|
105
|
+
"""
|
|
106
|
+
if not paths:
|
|
107
|
+
paths = (".",)
|
|
108
|
+
|
|
109
|
+
cli_overrides = _collect_explicit_args(
|
|
110
|
+
ctx,
|
|
111
|
+
min_tokens=min_tokens,
|
|
112
|
+
normalize=normalize,
|
|
113
|
+
output_format=output_format,
|
|
114
|
+
ignore=ignore,
|
|
115
|
+
languages=languages,
|
|
116
|
+
suppress=suppress,
|
|
117
|
+
verbose=verbose,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
file_config = load_file_config()
|
|
122
|
+
except ConfigFileError as exc:
|
|
123
|
+
raise click.ClickException(str(exc)) from None
|
|
124
|
+
|
|
125
|
+
config = build_config(cli_overrides, file_config)
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
reports = scan_and_report(config, paths, out=sys.stdout)
|
|
129
|
+
except KeyboardInterrupt:
|
|
130
|
+
raise SystemExit(130)
|
|
131
|
+
except (OSError, ValueError, RuntimeError) as exc:
|
|
132
|
+
raise click.ClickException(f"scan failed: {exc}") from None
|
|
133
|
+
|
|
134
|
+
raise SystemExit(1 if reports else 0)
|