git-trace 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- git_trace-1.0.0/LICENSE +21 -0
- git_trace-1.0.0/PKG-INFO +221 -0
- git_trace-1.0.0/README.md +195 -0
- git_trace-1.0.0/git_trace/__init__.py +6 -0
- git_trace-1.0.0/git_trace/__main__.py +3 -0
- git_trace-1.0.0/git_trace/analysis.py +183 -0
- git_trace-1.0.0/git_trace/git.py +54 -0
- git_trace-1.0.0/git_trace/input/__init__.py +0 -0
- git_trace-1.0.0/git_trace/input/args.py +151 -0
- git_trace-1.0.0/git_trace/input/parser.py +138 -0
- git_trace-1.0.0/git_trace/main.py +115 -0
- git_trace-1.0.0/git_trace/output/__init__.py +0 -0
- git_trace-1.0.0/git_trace/output/assets/analysis_controls.html +116 -0
- git_trace-1.0.0/git_trace/output/assets/analysis_legend.html +27 -0
- git_trace-1.0.0/git_trace/output/assets/pick_controls.html +137 -0
- git_trace-1.0.0/git_trace/output/assets/pick_legend.html +32 -0
- git_trace-1.0.0/git_trace/output/assets/styling.html +5 -0
- git_trace-1.0.0/git_trace/output/graph.py +198 -0
- git_trace-1.0.0/git_trace/output/html_injection.py +34 -0
- git_trace-1.0.0/git_trace/output/text.py +77 -0
- git_trace-1.0.0/git_trace/utils.py +84 -0
- git_trace-1.0.0/git_trace/version.py +2 -0
- git_trace-1.0.0/git_trace.egg-info/PKG-INFO +221 -0
- git_trace-1.0.0/git_trace.egg-info/SOURCES.txt +28 -0
- git_trace-1.0.0/git_trace.egg-info/dependency_links.txt +1 -0
- git_trace-1.0.0/git_trace.egg-info/entry_points.txt +2 -0
- git_trace-1.0.0/git_trace.egg-info/requires.txt +3 -0
- git_trace-1.0.0/git_trace.egg-info/top_level.txt +1 -0
- git_trace-1.0.0/pyproject.toml +67 -0
- git_trace-1.0.0/setup.cfg +4 -0
git_trace-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026-present Karol Kiszka
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
git_trace-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: git-trace
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Visualise git commit dependencies – see which commits edit lines introduced by earlier commits.
|
|
5
|
+
Author-email: Karol Kiszka <karolkisz22@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: source, https://github.com/kiszkacy/git-trace
|
|
8
|
+
Keywords: git
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
15
|
+
Classifier: Development Status :: 4 - Beta
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Environment :: Console
|
|
19
|
+
Requires-Python: <4.0,>=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: pyvis>=0.3
|
|
23
|
+
Requires-Dist: pyyaml>=6.0
|
|
24
|
+
Requires-Dist: colorama>=0.4.6
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# git-trace
|
|
28
|
+
|
|
29
|
+

|
|
30
|
+

|
|
31
|
+

|
|
32
|
+
|
|
33
|
+
Visualize commit dependencies in a git repository. For a given branch (or commit
|
|
34
|
+
range) `git-trace` analyses every diff and reports which commits **depend** on
|
|
35
|
+
earlier ones, in the sense that they modify or remove lines previously added by
|
|
36
|
+
those earlier commits. The result is rendered as either a text tree, a plain
|
|
37
|
+
list, or an interactive HTML graph.
|
|
38
|
+
|
|
39
|
+
> **Disclaimer:** This tool was built specifically with single-branch analysis in mind.
|
|
40
|
+
> While it **might** work across divergent branches, your mileage may vary and
|
|
41
|
+
> I am not responsible for any inaccurate results if you choose to use it that way.
|
|
42
|
+
|
|
43
|
+
A secondary mode (`--picks`) treats the analysis as a cherry-pick safety check:
|
|
44
|
+
given a set of commit hashes you intend to pick, it tells you which are safe,
|
|
45
|
+
which are blocked by missing dependencies, and which would become "conditional"
|
|
46
|
+
on picking other commits as well.
|
|
47
|
+
|
|
48
|
+

|
|
49
|
+
|
|
50
|
+
## Installation
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install git-trace
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
or from source:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
git clone https://github.com/kiszkacy/git-trace
|
|
60
|
+
cd git-trace
|
|
61
|
+
pip install -e .
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Requires Python 3.10+ and `git` available via `PATH`.
|
|
65
|
+
|
|
66
|
+
## Quick start
|
|
67
|
+
|
|
68
|
+
Run from inside a git repository:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
git-trace # analyse the full history of 'main'
|
|
72
|
+
git-trace dev # analyse 'dev' branch
|
|
73
|
+
git-trace dev --after abc1234 # only commits after a hash
|
|
74
|
+
git-trace dev --after abc --before def # a commit range
|
|
75
|
+
git-trace --no-graph # skip HTML, print text only
|
|
76
|
+
git-trace --list # simplified list output + HTML graph
|
|
77
|
+
git-trace --list --no-graph # simplified list output only
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
For cherry-pick analysis:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
git-trace dev --picks h1 h2 h3 # check whether picks are safe
|
|
84
|
+
git-trace dev --picks picks.txt # picks from a file
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
By default `git-trace` writes an interactive HTML graph to `./output.html` and
|
|
88
|
+
also prints a text summary to stdout.
|
|
89
|
+
|
|
90
|
+
## CLI arguments
|
|
91
|
+
|
|
92
|
+
| Argument | Description |
|
|
93
|
+
| --- | --- |
|
|
94
|
+
| `branch` | Branch to analyse. Defaults to `main`. Positional. |
|
|
95
|
+
| `--after HASH` | Only include commits *after* this hash (the hash itself is excluded). |
|
|
96
|
+
| `--before HASH` | Only include commits up to this hash (the hash itself is excluded). |
|
|
97
|
+
| `--whitelist HASH... \| FILE` | Restrict analysis to these hashes, or to a file containing hashes (one per line). Takes priority over `--blacklist`. Auto-loaded from `whitelist.txt` if present and the flag is omitted. |
|
|
98
|
+
| `--blacklist HASH... \| FILE` | Exclude these hashes from analysis, or a file containing hashes. Auto-loaded from `blacklist.txt` if present. |
|
|
99
|
+
| `--picks HASH... \| FILE` | Enable cherry-pick analysis. The given hashes are treated as the intended pick set; the tool reports safe / blocked / conditional picks. Auto-loaded from `picks.txt` if present. |
|
|
100
|
+
| `--ignore-paths PATH... \| FILE` | Repository-relative paths whose diffs should be ignored during analysis, or a file listing such paths. Auto-loaded from `ignore-paths.txt` if present. |
|
|
101
|
+
| `--repo DIR` | Path to the git repository root. Defaults to the current directory. |
|
|
102
|
+
| `--config FILE` | Path to a YAML config file. Defaults to `./config.yml`. |
|
|
103
|
+
| `--no-graph` | Skip HTML graph generation. |
|
|
104
|
+
| `--list` | Print a simple list of relevant commit hashes (one per line) instead of the formatted text tree. The HTML graph is still generated; combine with `--no-graph` to suppress it. |
|
|
105
|
+
| `--output PATH` | Path for the generated HTML graph. Defaults to `./output.html`. |
|
|
106
|
+
| `--txt-output PATH` | Also write the text output to this file. |
|
|
107
|
+
| `-v`, `--version` | Print version and exit. |
|
|
108
|
+
|
|
109
|
+
### Precedence rules
|
|
110
|
+
|
|
111
|
+
1. CLI arguments
|
|
112
|
+
2. `config.yml` values (if the file exists)
|
|
113
|
+
3. Auto-loaded files (`whitelist.txt`, `blacklist.txt`, `picks.txt`, `ignore-paths.txt`)
|
|
114
|
+
|
|
115
|
+
A more specific source overrides a less specific one
|
|
116
|
+
(i.e. CLI wins over config, config wins over auto-load).
|
|
117
|
+
|
|
118
|
+
## config.yml
|
|
119
|
+
|
|
120
|
+
If a file named `config.yml` exists in the current working directory it is
|
|
121
|
+
loaded automatically. Pass `--config FILE` to use a different path. Any CLI
|
|
122
|
+
option can be set there. Example:
|
|
123
|
+
|
|
124
|
+
```yaml
|
|
125
|
+
branch: dev
|
|
126
|
+
after: abc1234
|
|
127
|
+
ignore-paths:
|
|
128
|
+
- vendor/
|
|
129
|
+
- generated/
|
|
130
|
+
output: ./trace.html
|
|
131
|
+
no-graph: false
|
|
132
|
+
picks:
|
|
133
|
+
- a1b2c3d
|
|
134
|
+
- 9988776
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Keys mirror the CLI flag names (use `-` not `_`). Values may be strings, lists,
|
|
138
|
+
or booleans depending on the option.
|
|
139
|
+
|
|
140
|
+
## Auto-loaded files
|
|
141
|
+
|
|
142
|
+
If you omit a flag, `git-trace` looks in the current directory for a matching
|
|
143
|
+
file and loads it automatically:
|
|
144
|
+
|
|
145
|
+
| File | Equivalent flag |
|
|
146
|
+
| --- | --- |
|
|
147
|
+
| `whitelist.txt` | `--whitelist` |
|
|
148
|
+
| `blacklist.txt` | `--blacklist` |
|
|
149
|
+
| `picks.txt` | `--picks` |
|
|
150
|
+
| `ignore-paths.txt` | `--ignore-paths` |
|
|
151
|
+
|
|
152
|
+
Each file holds one entry per line. Blank lines and lines starting with `#` are
|
|
153
|
+
ignored.
|
|
154
|
+
|
|
155
|
+
## Output
|
|
156
|
+
|
|
157
|
+
- **Formatted text** (default): a tree-style listing of commits and their
|
|
158
|
+
dependencies, printed to stdout.
|
|
159
|
+
- **HTML graph** (default): an interactive force-directed graph written to
|
|
160
|
+
`./output.html`. Open it in any browser.
|
|
161
|
+
- **`--list`**: a flat list of hashes (one per line), suitable for piping into
|
|
162
|
+
other scripts. The HTML graph is still produced unless `--no-graph` is also specified.
|
|
163
|
+
- **`--txt-output FILE`**: also write the formatted text to a file.
|
|
164
|
+
|
|
165
|
+
In `--picks` mode the text output is grouped into `safe`, `blocked` (with the
|
|
166
|
+
list of missing dependencies for each) and `conditional` sections, while the HTML
|
|
167
|
+
graph highlights the pick set, blockers, and conditionals in distinct colors.
|
|
168
|
+
|
|
169
|
+
## How dependency detection works
|
|
170
|
+
|
|
171
|
+
`git-trace` uses a position-aware analyzer. For each commit it replays the
|
|
172
|
+
diff hunks against a virtual snapshot of every file, tracked as a list of
|
|
173
|
+
`(line_content, owning_commit)` pairs. When a later commit removes or
|
|
174
|
+
overwrites a line, the analyzer consults the owner stored for that exact
|
|
175
|
+
position and records a dependency on whichever earlier commit introduced
|
|
176
|
+
that line. Identical line content appearing in different places of a file is
|
|
177
|
+
correctly treated as independent.
|
|
178
|
+
|
|
179
|
+
The analyzer handles file creation, deletion (`+++ /dev/null`), renames
|
|
180
|
+
(`rename from` / `rename to`), and the `@@ -X,0 +Y,N @@` insertion hunks
|
|
181
|
+
correctly.
|
|
182
|
+
|
|
183
|
+
### Textual vs. Structural Dependencies
|
|
184
|
+
|
|
185
|
+
It is important to note that git-trace evaluates **purely textual dependencies**,
|
|
186
|
+
not structural or semantic ones. It only tracks modifications on a line-by-line basis.
|
|
187
|
+
It does not parse an Abstract Syntax Tree (AST) to understand code logic,
|
|
188
|
+
variable scopes, or function calls.
|
|
189
|
+
|
|
190
|
+
## Known limitations
|
|
191
|
+
|
|
192
|
+
- **Copies (`copy from` / `copy to`)** → a copied file is treated as a fresh
|
|
193
|
+
new file rather than inheriting history from the source path. In practice
|
|
194
|
+
this is rare since git rarely produces copy markers by default.
|
|
195
|
+
- **Binary files** are silently skipped → no dependency is recorded for
|
|
196
|
+
changes to binary blobs.
|
|
197
|
+
- **Mode-only changes** (e.g. `chmod`) → are silently ignored, since they
|
|
198
|
+
don't affect any tracked content.
|
|
199
|
+
- **Quoted paths in diffs** (`core.quotePath`) → git's `core.quotePath`
|
|
200
|
+
setting (default: `true`) wraps file paths containing non-ASCII or special
|
|
201
|
+
characters in backslash-escaped double quotes when producing diff output.
|
|
202
|
+
The analyser reads paths literally after `+++ b/`, so a quoted path will
|
|
203
|
+
not match its real on-disk name. If you analyse a repo with non-ASCII
|
|
204
|
+
filenames, set `git config core.quotePath false` first.
|
|
205
|
+
- **`diff.noprefix` / `--no-prefix` diffs** → when `diff.noprefix` is
|
|
206
|
+
enabled, git's diff output omits the `a/` and `b/` path prefixes (e.g.
|
|
207
|
+
`--- file.txt` instead of `--- a/file.txt`). The analyser specifically
|
|
208
|
+
looks for the `a/` and `b/` prefixes to identify files, so under that
|
|
209
|
+
configuration no dependencies will be detected. Keep `diff.noprefix` at
|
|
210
|
+
its default (`false`) when running `git-trace`.
|
|
211
|
+
|
|
212
|
+
## AI usage
|
|
213
|
+
|
|
214
|
+
LLM was used during the development of this project, specifically for:
|
|
215
|
+
- Writing the core dependency detection logic and processing the Git hunk headers.
|
|
216
|
+
- Injecting the custom HTML templates and dynamic styling payloads into pyvis html output.
|
|
217
|
+
- Generating most of the README.md content.
|
|
218
|
+
|
|
219
|
+
## License
|
|
220
|
+
|
|
221
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# git-trace
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
Visualize commit dependencies in a git repository. For a given branch (or commit
|
|
8
|
+
range) `git-trace` analyses every diff and reports which commits **depend** on
|
|
9
|
+
earlier ones, in the sense that they modify or remove lines previously added by
|
|
10
|
+
those earlier commits. The result is rendered as either a text tree, a plain
|
|
11
|
+
list, or an interactive HTML graph.
|
|
12
|
+
|
|
13
|
+
> **Disclaimer:** This tool was built specifically with single-branch analysis in mind.
|
|
14
|
+
> While it **might** work across divergent branches, your mileage may vary and
|
|
15
|
+
> I am not responsible for any inaccurate results if you choose to use it that way.
|
|
16
|
+
|
|
17
|
+
A secondary mode (`--picks`) treats the analysis as a cherry-pick safety check:
|
|
18
|
+
given a set of commit hashes you intend to pick, it tells you which are safe,
|
|
19
|
+
which are blocked by missing dependencies, and which would become "conditional"
|
|
20
|
+
on picking other commits as well.
|
|
21
|
+
|
|
22
|
+

|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install git-trace
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
or from source:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
git clone https://github.com/kiszkacy/git-trace
|
|
34
|
+
cd git-trace
|
|
35
|
+
pip install -e .
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Requires Python 3.10+ and `git` available via `PATH`.
|
|
39
|
+
|
|
40
|
+
## Quick start
|
|
41
|
+
|
|
42
|
+
Run from inside a git repository:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
git-trace # analyse the full history of 'main'
|
|
46
|
+
git-trace dev # analyse 'dev' branch
|
|
47
|
+
git-trace dev --after abc1234 # only commits after a hash
|
|
48
|
+
git-trace dev --after abc --before def # a commit range
|
|
49
|
+
git-trace --no-graph # skip HTML, print text only
|
|
50
|
+
git-trace --list # simplified list output + HTML graph
|
|
51
|
+
git-trace --list --no-graph # simplified list output only
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
For cherry-pick analysis:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
git-trace dev --picks h1 h2 h3 # check whether picks are safe
|
|
58
|
+
git-trace dev --picks picks.txt # picks from a file
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
By default `git-trace` writes an interactive HTML graph to `./output.html` and
|
|
62
|
+
also prints a text summary to stdout.
|
|
63
|
+
|
|
64
|
+
## CLI arguments
|
|
65
|
+
|
|
66
|
+
| Argument | Description |
|
|
67
|
+
| --- | --- |
|
|
68
|
+
| `branch` | Branch to analyse. Defaults to `main`. Positional. |
|
|
69
|
+
| `--after HASH` | Only include commits *after* this hash (the hash itself is excluded). |
|
|
70
|
+
| `--before HASH` | Only include commits up to this hash (the hash itself is excluded). |
|
|
71
|
+
| `--whitelist HASH... \| FILE` | Restrict analysis to these hashes, or to a file containing hashes (one per line). Takes priority over `--blacklist`. Auto-loaded from `whitelist.txt` if present and the flag is omitted. |
|
|
72
|
+
| `--blacklist HASH... \| FILE` | Exclude these hashes from analysis, or a file containing hashes. Auto-loaded from `blacklist.txt` if present. |
|
|
73
|
+
| `--picks HASH... \| FILE` | Enable cherry-pick analysis. The given hashes are treated as the intended pick set; the tool reports safe / blocked / conditional picks. Auto-loaded from `picks.txt` if present. |
|
|
74
|
+
| `--ignore-paths PATH... \| FILE` | Repository-relative paths whose diffs should be ignored during analysis, or a file listing such paths. Auto-loaded from `ignore-paths.txt` if present. |
|
|
75
|
+
| `--repo DIR` | Path to the git repository root. Defaults to the current directory. |
|
|
76
|
+
| `--config FILE` | Path to a YAML config file. Defaults to `./config.yml`. |
|
|
77
|
+
| `--no-graph` | Skip HTML graph generation. |
|
|
78
|
+
| `--list` | Print a simple list of relevant commit hashes (one per line) instead of the formatted text tree. The HTML graph is still generated; combine with `--no-graph` to suppress it. |
|
|
79
|
+
| `--output PATH` | Path for the generated HTML graph. Defaults to `./output.html`. |
|
|
80
|
+
| `--txt-output PATH` | Also write the text output to this file. |
|
|
81
|
+
| `-v`, `--version` | Print version and exit. |
|
|
82
|
+
|
|
83
|
+
### Precedence rules
|
|
84
|
+
|
|
85
|
+
1. CLI arguments
|
|
86
|
+
2. `config.yml` values (if the file exists)
|
|
87
|
+
3. Auto-loaded files (`whitelist.txt`, `blacklist.txt`, `picks.txt`, `ignore-paths.txt`)
|
|
88
|
+
|
|
89
|
+
A more specific source overrides a less specific one
|
|
90
|
+
(i.e. CLI wins over config, config wins over auto-load).
|
|
91
|
+
|
|
92
|
+
## config.yml
|
|
93
|
+
|
|
94
|
+
If a file named `config.yml` exists in the current working directory it is
|
|
95
|
+
loaded automatically. Pass `--config FILE` to use a different path. Any CLI
|
|
96
|
+
option can be set there. Example:
|
|
97
|
+
|
|
98
|
+
```yaml
|
|
99
|
+
branch: dev
|
|
100
|
+
after: abc1234
|
|
101
|
+
ignore-paths:
|
|
102
|
+
- vendor/
|
|
103
|
+
- generated/
|
|
104
|
+
output: ./trace.html
|
|
105
|
+
no-graph: false
|
|
106
|
+
picks:
|
|
107
|
+
- a1b2c3d
|
|
108
|
+
- 9988776
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Keys mirror the CLI flag names (use `-` not `_`). Values may be strings, lists,
|
|
112
|
+
or booleans depending on the option.
|
|
113
|
+
|
|
114
|
+
## Auto-loaded files
|
|
115
|
+
|
|
116
|
+
If you omit a flag, `git-trace` looks in the current directory for a matching
|
|
117
|
+
file and loads it automatically:
|
|
118
|
+
|
|
119
|
+
| File | Equivalent flag |
|
|
120
|
+
| --- | --- |
|
|
121
|
+
| `whitelist.txt` | `--whitelist` |
|
|
122
|
+
| `blacklist.txt` | `--blacklist` |
|
|
123
|
+
| `picks.txt` | `--picks` |
|
|
124
|
+
| `ignore-paths.txt` | `--ignore-paths` |
|
|
125
|
+
|
|
126
|
+
Each file holds one entry per line. Blank lines and lines starting with `#` are
|
|
127
|
+
ignored.
|
|
128
|
+
|
|
129
|
+
## Output
|
|
130
|
+
|
|
131
|
+
- **Formatted text** (default): a tree-style listing of commits and their
|
|
132
|
+
dependencies, printed to stdout.
|
|
133
|
+
- **HTML graph** (default): an interactive force-directed graph written to
|
|
134
|
+
`./output.html`. Open it in any browser.
|
|
135
|
+
- **`--list`**: a flat list of hashes (one per line), suitable for piping into
|
|
136
|
+
other scripts. The HTML graph is still produced unless `--no-graph` is also specified.
|
|
137
|
+
- **`--txt-output FILE`**: also write the formatted text to a file.
|
|
138
|
+
|
|
139
|
+
In `--picks` mode the text output is grouped into `safe`, `blocked` (with the
|
|
140
|
+
list of missing dependencies for each) and `conditional` sections, while the HTML
|
|
141
|
+
graph highlights the pick set, blockers, and conditionals in distinct colors.
|
|
142
|
+
|
|
143
|
+
## How dependency detection works
|
|
144
|
+
|
|
145
|
+
`git-trace` uses a position-aware analyzer. For each commit it replays the
|
|
146
|
+
diff hunks against a virtual snapshot of every file, tracked as a list of
|
|
147
|
+
`(line_content, owning_commit)` pairs. When a later commit removes or
|
|
148
|
+
overwrites a line, the analyzer consults the owner stored for that exact
|
|
149
|
+
position and records a dependency on whichever earlier commit introduced
|
|
150
|
+
that line. Identical line content appearing in different places of a file is
|
|
151
|
+
correctly treated as independent.
|
|
152
|
+
|
|
153
|
+
The analyzer handles file creation, deletion (`+++ /dev/null`), renames
|
|
154
|
+
(`rename from` / `rename to`), and the `@@ -X,0 +Y,N @@` insertion hunks
|
|
155
|
+
correctly.
|
|
156
|
+
|
|
157
|
+
### Textual vs. Structural Dependencies
|
|
158
|
+
|
|
159
|
+
It is important to note that git-trace evaluates **purely textual dependencies**,
|
|
160
|
+
not structural or semantic ones. It only tracks modifications on a line-by-line basis.
|
|
161
|
+
It does not parse an Abstract Syntax Tree (AST) to understand code logic,
|
|
162
|
+
variable scopes, or function calls.
|
|
163
|
+
|
|
164
|
+
## Known limitations
|
|
165
|
+
|
|
166
|
+
- **Copies (`copy from` / `copy to`)** → a copied file is treated as a fresh
|
|
167
|
+
new file rather than inheriting history from the source path. In practice
|
|
168
|
+
this is rare since git rarely produces copy markers by default.
|
|
169
|
+
- **Binary files** are silently skipped → no dependency is recorded for
|
|
170
|
+
changes to binary blobs.
|
|
171
|
+
- **Mode-only changes** (e.g. `chmod`) → are silently ignored, since they
|
|
172
|
+
don't affect any tracked content.
|
|
173
|
+
- **Quoted paths in diffs** (`core.quotePath`) → git's `core.quotePath`
|
|
174
|
+
setting (default: `true`) wraps file paths containing non-ASCII or special
|
|
175
|
+
characters in backslash-escaped double quotes when producing diff output.
|
|
176
|
+
The analyser reads paths literally after `+++ b/`, so a quoted path will
|
|
177
|
+
not match its real on-disk name. If you analyse a repo with non-ASCII
|
|
178
|
+
filenames, set `git config core.quotePath false` first.
|
|
179
|
+
- **`diff.noprefix` / `--no-prefix` diffs** → when `diff.noprefix` is
|
|
180
|
+
enabled, git's diff output omits the `a/` and `b/` path prefixes (e.g.
|
|
181
|
+
`--- file.txt` instead of `--- a/file.txt`). The analyser specifically
|
|
182
|
+
looks for the `a/` and `b/` prefixes to identify files, so under that
|
|
183
|
+
configuration no dependencies will be detected. Keep `diff.noprefix` at
|
|
184
|
+
its default (`false`) when running `git-trace`.
|
|
185
|
+
|
|
186
|
+
## AI usage
|
|
187
|
+
|
|
188
|
+
LLM was used during the development of this project, specifically for:
|
|
189
|
+
- Writing the core dependency detection logic and processing the Git hunk headers.
|
|
190
|
+
- Injecting the custom HTML templates and dynamic styling payloads into pyvis html output.
|
|
191
|
+
- Generating most of the README.md content.
|
|
192
|
+
|
|
193
|
+
## License
|
|
194
|
+
|
|
195
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
from git_trace.git import Commit
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
HUNK_HEADER_PATTERN: re.Pattern[str] = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class DiffHunk:
|
|
15
|
+
old_start: int
|
|
16
|
+
old_count: int
|
|
17
|
+
new_start: int
|
|
18
|
+
new_count: int
|
|
19
|
+
lines: list[tuple[str, str]] = field(default_factory=list)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class FileChange:
|
|
24
|
+
path: str
|
|
25
|
+
old_path: str | None = None
|
|
26
|
+
hunks: list[DiffHunk] = field(default_factory=list)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class VirtualLine:
|
|
31
|
+
content: str
|
|
32
|
+
owner_hash: str | None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class DependencyGraph:
|
|
37
|
+
relationships: dict[str, set[str]] = field(default_factory=lambda: defaultdict(set))
|
|
38
|
+
|
|
39
|
+
def add_dependency(self, commit_hash: str, parent_hash: str) -> None:
|
|
40
|
+
self.relationships[commit_hash].add(parent_hash)
|
|
41
|
+
|
|
42
|
+
def get_dependencies_for(self, commit_hash: str) -> set[str]:
|
|
43
|
+
return self.relationships.get(commit_hash, set())
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class CherryPickAnalysis:
|
|
48
|
+
safe: list[str] = field(default_factory=list)
|
|
49
|
+
conditional: list[str] = field(default_factory=list)
|
|
50
|
+
blocked: dict[str, set[str]] = field(default_factory=dict)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _parse_diff(diff_text: str, ignore_paths: set[str]) -> list[FileChange]:
|
|
54
|
+
changes: list[FileChange] = []
|
|
55
|
+
current_change: FileChange | None = None
|
|
56
|
+
current_hunk: DiffHunk | None = None
|
|
57
|
+
rename_from: str | None = None
|
|
58
|
+
old_path_candidate: str | None = None
|
|
59
|
+
|
|
60
|
+
for line in diff_text.splitlines():
|
|
61
|
+
if line.startswith("diff --git "):
|
|
62
|
+
if current_change is not None:
|
|
63
|
+
changes.append(current_change)
|
|
64
|
+
current_change = None
|
|
65
|
+
current_hunk = None
|
|
66
|
+
rename_from = None
|
|
67
|
+
old_path_candidate = None
|
|
68
|
+
elif line.startswith("rename from "):
|
|
69
|
+
rename_from = line[len("rename from "):].replace("\\", "/")
|
|
70
|
+
elif line.startswith("rename to "):
|
|
71
|
+
rename_to: str = line[len("rename to "):].replace("\\", "/")
|
|
72
|
+
if rename_to not in ignore_paths:
|
|
73
|
+
current_change = FileChange(path=rename_to, old_path=rename_from)
|
|
74
|
+
elif line.startswith("--- "):
|
|
75
|
+
current_hunk = None
|
|
76
|
+
old_path_candidate = line[6:].replace("\\", "/") if line.startswith("--- a/") else None
|
|
77
|
+
elif line.startswith("+++ "):
|
|
78
|
+
current_hunk = None
|
|
79
|
+
new_path: str | None = line[6:].replace("\\", "/") if line.startswith("+++ b/") else None
|
|
80
|
+
canonical_path: str | None = new_path or old_path_candidate
|
|
81
|
+
if canonical_path is None or canonical_path in ignore_paths:
|
|
82
|
+
current_change = None
|
|
83
|
+
elif current_change is not None and current_change.path == canonical_path:
|
|
84
|
+
pass
|
|
85
|
+
else:
|
|
86
|
+
old: str | None = old_path_candidate if (old_path_candidate and new_path and old_path_candidate != new_path) else None
|
|
87
|
+
current_change = FileChange(path=canonical_path, old_path=old)
|
|
88
|
+
elif current_change is not None:
|
|
89
|
+
hunk_match: re.Match[str] | None = HUNK_HEADER_PATTERN.match(line)
|
|
90
|
+
if hunk_match:
|
|
91
|
+
old_start: int = int(hunk_match.group(1))
|
|
92
|
+
old_count: int = int(hunk_match.group(2)) if hunk_match.group(2) is not None else 1
|
|
93
|
+
new_start: int = int(hunk_match.group(3))
|
|
94
|
+
new_count: int = int(hunk_match.group(4)) if hunk_match.group(4) is not None else 1
|
|
95
|
+
current_hunk = DiffHunk(
|
|
96
|
+
old_start=old_start,
|
|
97
|
+
old_count=old_count,
|
|
98
|
+
new_start=new_start,
|
|
99
|
+
new_count=new_count,
|
|
100
|
+
)
|
|
101
|
+
current_change.hunks.append(current_hunk)
|
|
102
|
+
elif current_hunk is not None and line and line[0] in ("+", "-", " "):
|
|
103
|
+
current_hunk.lines.append((line[0], line[1:]))
|
|
104
|
+
|
|
105
|
+
if current_change is not None:
|
|
106
|
+
changes.append(current_change)
|
|
107
|
+
return changes
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# wild opus dependency graph building logic
|
|
111
|
+
def build_dependency_graph(commits: list[Commit], raw_diffs: dict[str, str], ignore_paths: set[str] | None = None) -> DependencyGraph:
|
|
112
|
+
virtual_files: dict[str, list[VirtualLine]] = {}
|
|
113
|
+
graph: DependencyGraph = DependencyGraph()
|
|
114
|
+
ignore_paths = ignore_paths or set()
|
|
115
|
+
|
|
116
|
+
for commit in commits:
|
|
117
|
+
diff_text: str = raw_diffs.get(commit.hash, "")
|
|
118
|
+
file_changes: list[FileChange] = _parse_diff(diff_text, ignore_paths)
|
|
119
|
+
|
|
120
|
+
for change in file_changes:
|
|
121
|
+
if change.old_path is not None and change.old_path in virtual_files:
|
|
122
|
+
virtual_files[change.path] = virtual_files.pop(change.old_path)
|
|
123
|
+
else:
|
|
124
|
+
virtual_files.setdefault(change.path, [])
|
|
125
|
+
|
|
126
|
+
virtual_file: list[VirtualLine] = virtual_files[change.path]
|
|
127
|
+
cumulative_offset: int = 0
|
|
128
|
+
|
|
129
|
+
for hunk in change.hunks:
|
|
130
|
+
position: int = (
|
|
131
|
+
hunk.old_start if hunk.old_count == 0
|
|
132
|
+
else max(0, hunk.old_start - 1)
|
|
133
|
+
) + cumulative_offset
|
|
134
|
+
|
|
135
|
+
while len(virtual_file) < position + hunk.old_count:
|
|
136
|
+
virtual_file.append(VirtualLine(content="", owner_hash=None))
|
|
137
|
+
|
|
138
|
+
new_entries: list[VirtualLine] = []
|
|
139
|
+
old_index: int = position
|
|
140
|
+
|
|
141
|
+
for line_type, content in hunk.lines:
|
|
142
|
+
if line_type == " ":
|
|
143
|
+
new_entries.append(virtual_file[old_index])
|
|
144
|
+
old_index += 1
|
|
145
|
+
elif line_type == "-":
|
|
146
|
+
existing_line = virtual_file[old_index]
|
|
147
|
+
if existing_line.owner_hash is not None and existing_line.owner_hash != commit.hash:
|
|
148
|
+
graph.add_dependency(commit.hash, existing_line.owner_hash)
|
|
149
|
+
old_index += 1
|
|
150
|
+
elif line_type == "+":
|
|
151
|
+
new_entries.append(VirtualLine(content=content, owner_hash=commit.hash))
|
|
152
|
+
|
|
153
|
+
virtual_file[position:position + hunk.old_count] = new_entries
|
|
154
|
+
cumulative_offset += hunk.new_count - hunk.old_count
|
|
155
|
+
|
|
156
|
+
return graph
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def filter_picks(pick_hashes: set[str], graph: DependencyGraph) -> CherryPickAnalysis:
|
|
160
|
+
blocked: dict[str, set[str]] = {}
|
|
161
|
+
for commit_hash in pick_hashes:
|
|
162
|
+
missing: set[str] = graph.get_dependencies_for(commit_hash) - pick_hashes
|
|
163
|
+
if missing:
|
|
164
|
+
blocked[commit_hash] = missing
|
|
165
|
+
|
|
166
|
+
tainted: set[str] = set(blocked.keys())
|
|
167
|
+
changed: bool = True
|
|
168
|
+
while changed:
|
|
169
|
+
changed = False
|
|
170
|
+
for commit_hash in pick_hashes:
|
|
171
|
+
if commit_hash not in tainted:
|
|
172
|
+
in_pick_dependencies: set[str] = graph.get_dependencies_for(commit_hash) & pick_hashes
|
|
173
|
+
if in_pick_dependencies & tainted:
|
|
174
|
+
tainted.add(commit_hash)
|
|
175
|
+
changed = True
|
|
176
|
+
|
|
177
|
+
conditional: list[str] = [commit_hash for commit_hash in pick_hashes if commit_hash in tainted and commit_hash not in blocked]
|
|
178
|
+
safe: list[str] = [commit_hash for commit_hash in pick_hashes if commit_hash not in tainted]
|
|
179
|
+
return CherryPickAnalysis(
|
|
180
|
+
safe=safe,
|
|
181
|
+
conditional=conditional,
|
|
182
|
+
blocked=blocked
|
|
183
|
+
)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class Commit:
|
|
9
|
+
hash: str
|
|
10
|
+
message: str
|
|
11
|
+
|
|
12
|
+
def is_inside_hash_set(self, hash_set: set[str]) -> bool:
|
|
13
|
+
return any(self.hash.startswith(prefix) or prefix.startswith(self.hash) for prefix in hash_set)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def run_git(*args: str, repo_directory: str) -> str:
|
|
17
|
+
command: list[str] = ["git"]
|
|
18
|
+
if repo_directory:
|
|
19
|
+
command += ["-C", repo_directory]
|
|
20
|
+
command += list(args)
|
|
21
|
+
process: subprocess.CompletedProcess[str] = subprocess.run(
|
|
22
|
+
command, capture_output=True, encoding="utf-8", errors="replace"
|
|
23
|
+
)
|
|
24
|
+
return process.stdout or ""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_commits(branch: str, repo_directory: str, after: str | None = None, before: str | None = None) -> list[Commit]:
|
|
28
|
+
scope: str = f"{after if after else ''}{'..' if after else ''}{before if before else branch}"
|
|
29
|
+
output: str = run_git("log", scope, "--reverse", "--format=%H %s", repo_directory=repo_directory)
|
|
30
|
+
|
|
31
|
+
commits: list[Commit] = []
|
|
32
|
+
for line in output.splitlines():
|
|
33
|
+
line = line.strip()
|
|
34
|
+
if line:
|
|
35
|
+
commit_hash, _, message = line.partition(" ")
|
|
36
|
+
commits.append(Commit(hash=commit_hash, message=message))
|
|
37
|
+
|
|
38
|
+
if before and commits: # skip last commit to exclude it
|
|
39
|
+
commits.pop()
|
|
40
|
+
|
|
41
|
+
return commits
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_commit_diff(sha: str, repo_directory: str) -> str:
|
|
45
|
+
return run_git("show", "--format=", "-p", sha, repo_directory=repo_directory)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def resolve_hashes(hashes: list[str], all_commits: list[Commit]) -> dict[str, str | None]:
|
|
49
|
+
full_hashes: list[str] = [commit.hash for commit in all_commits]
|
|
50
|
+
result: dict[str, str | None] = {}
|
|
51
|
+
for hash_ in hashes: # if user provided short hashes map them to full hashes if possible
|
|
52
|
+
matches: list[str] = [full_hash for full_hash in full_hashes if full_hash.lower().startswith(hash_.lower())]
|
|
53
|
+
result[hash_] = matches[0] if len(matches) == 1 else None
|
|
54
|
+
return result
|
|
File without changes
|