git-trace 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. git_trace-1.0.0/LICENSE +21 -0
  2. git_trace-1.0.0/PKG-INFO +221 -0
  3. git_trace-1.0.0/README.md +195 -0
  4. git_trace-1.0.0/git_trace/__init__.py +6 -0
  5. git_trace-1.0.0/git_trace/__main__.py +3 -0
  6. git_trace-1.0.0/git_trace/analysis.py +183 -0
  7. git_trace-1.0.0/git_trace/git.py +54 -0
  8. git_trace-1.0.0/git_trace/input/__init__.py +0 -0
  9. git_trace-1.0.0/git_trace/input/args.py +151 -0
  10. git_trace-1.0.0/git_trace/input/parser.py +138 -0
  11. git_trace-1.0.0/git_trace/main.py +115 -0
  12. git_trace-1.0.0/git_trace/output/__init__.py +0 -0
  13. git_trace-1.0.0/git_trace/output/assets/analysis_controls.html +116 -0
  14. git_trace-1.0.0/git_trace/output/assets/analysis_legend.html +27 -0
  15. git_trace-1.0.0/git_trace/output/assets/pick_controls.html +137 -0
  16. git_trace-1.0.0/git_trace/output/assets/pick_legend.html +32 -0
  17. git_trace-1.0.0/git_trace/output/assets/styling.html +5 -0
  18. git_trace-1.0.0/git_trace/output/graph.py +198 -0
  19. git_trace-1.0.0/git_trace/output/html_injection.py +34 -0
  20. git_trace-1.0.0/git_trace/output/text.py +77 -0
  21. git_trace-1.0.0/git_trace/utils.py +84 -0
  22. git_trace-1.0.0/git_trace/version.py +2 -0
  23. git_trace-1.0.0/git_trace.egg-info/PKG-INFO +221 -0
  24. git_trace-1.0.0/git_trace.egg-info/SOURCES.txt +28 -0
  25. git_trace-1.0.0/git_trace.egg-info/dependency_links.txt +1 -0
  26. git_trace-1.0.0/git_trace.egg-info/entry_points.txt +2 -0
  27. git_trace-1.0.0/git_trace.egg-info/requires.txt +3 -0
  28. git_trace-1.0.0/git_trace.egg-info/top_level.txt +1 -0
  29. git_trace-1.0.0/pyproject.toml +67 -0
  30. git_trace-1.0.0/setup.cfg +4 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026-present Karol Kiszka
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,221 @@
1
+ Metadata-Version: 2.4
2
+ Name: git-trace
3
+ Version: 1.0.0
4
+ Summary: Visualise git commit dependencies – see which commits edit lines introduced by earlier commits.
5
+ Author-email: Karol Kiszka <karolkisz22@gmail.com>
6
+ License: MIT
7
+ Project-URL: source, https://github.com/kiszkacy/git-trace
8
+ Keywords: git
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
15
+ Classifier: Development Status :: 4 - Beta
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Environment :: Console
19
+ Requires-Python: <4.0,>=3.10
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: pyvis>=0.3
23
+ Requires-Dist: pyyaml>=6.0
24
+ Requires-Dist: colorama>=0.4.6
25
+ Dynamic: license-file
26
+
27
+ # git-trace
28
+
29
+ ![Python version](https://img.shields.io/badge/python-%3E%3D%203.10-blue.svg)
30
+ ![License](https://img.shields.io/badge/license-MIT-green.svg)
31
+ ![PyPI](https://img.shields.io/pypi/v/git-trace.svg)
32
+
33
+ Visualize commit dependencies in a git repository. For a given branch (or commit
34
+ range) `git-trace` analyses every diff and reports which commits **depend** on
35
+ earlier ones, in the sense that they modify or remove lines previously added by
36
+ those earlier commits. The result is rendered as either a text tree, a plain
37
+ list, or an interactive HTML graph.
38
+
39
+ > **Disclaimer:** This tool was built specifically with single-branch analysis in mind.
40
+ > While it **might** work across divergent branches, your mileage may vary and
41
+ > I am not responsible for any inaccurate results if you choose to use it that way.
42
+
43
+ A secondary mode (`--picks`) treats the analysis as a cherry-pick safety check:
44
+ given a set of commit hashes you intend to pick, it tells you which are safe,
45
+ which are blocked by missing dependencies, and which would become "conditional"
46
+ on picking other commits as well.
47
+
48
+ ![git-trace demo](docs/demo.png)
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ pip install git-trace
54
+ ```
55
+
56
+ or from source:
57
+
58
+ ```bash
59
+ git clone https://github.com/kiszkacy/git-trace
60
+ cd git-trace
61
+ pip install -e .
62
+ ```
63
+
64
+ Requires Python 3.10+ and `git` available via `PATH`.
65
+
66
+ ## Quick start
67
+
68
+ Run from inside a git repository:
69
+
70
+ ```bash
71
+ git-trace # analyse the full history of 'main'
72
+ git-trace dev # analyse 'dev' branch
73
+ git-trace dev --after abc1234 # only commits after a hash
74
+ git-trace dev --after abc --before def # a commit range
75
+ git-trace --no-graph # skip HTML, print text only
76
+ git-trace --list # simplified list output + HTML graph
77
+ git-trace --list --no-graph # simplified list output only
78
+ ```
79
+
80
+ For cherry-pick analysis:
81
+
82
+ ```bash
83
+ git-trace dev --picks h1 h2 h3 # check whether picks are safe
84
+ git-trace dev --picks picks.txt # picks from a file
85
+ ```
86
+
87
+ By default `git-trace` writes an interactive HTML graph to `./output.html` and
88
+ also prints a text summary to stdout.
89
+
90
+ ## CLI arguments
91
+
92
+ | Argument | Description |
93
+ | --- | --- |
94
+ | `branch` | Branch to analyse. Defaults to `main`. Positional. |
95
+ | `--after HASH` | Only include commits *after* this hash (the hash itself is excluded). |
96
+ | `--before HASH` | Only include commits up to this hash (the hash itself is excluded). |
97
+ | `--whitelist HASH... \| FILE` | Restrict analysis to these hashes, or to a file containing hashes (one per line). Takes priority over `--blacklist`. Auto-loaded from `whitelist.txt` if present and the flag is omitted. |
98
+ | `--blacklist HASH... \| FILE` | Exclude these hashes from analysis, or a file containing hashes. Auto-loaded from `blacklist.txt` if present. |
99
+ | `--picks HASH... \| FILE` | Enable cherry-pick analysis. The given hashes are treated as the intended pick set; the tool reports safe / blocked / conditional picks. Auto-loaded from `picks.txt` if present. |
100
+ | `--ignore-paths PATH... \| FILE` | Repository-relative paths whose diffs should be ignored during analysis, or a file listing such paths. Auto-loaded from `ignore-paths.txt` if present. |
101
+ | `--repo DIR` | Path to the git repository root. Defaults to the current directory. |
102
+ | `--config FILE` | Path to a YAML config file. Defaults to `./config.yml`. |
103
+ | `--no-graph` | Skip HTML graph generation. |
104
+ | `--list` | Print a simple list of relevant commit hashes (one per line) instead of the formatted text tree. The HTML graph is still generated; combine with `--no-graph` to suppress it. |
105
+ | `--output PATH` | Path for the generated HTML graph. Defaults to `./output.html`. |
106
+ | `--txt-output PATH` | Also write the text output to this file. |
107
+ | `-v`, `--version` | Print version and exit. |
108
+
109
+ ### Precedence rules
110
+
111
+ 1. CLI arguments
112
+ 2. `config.yml` values (if the file exists)
113
+ 3. Auto-loaded files (`whitelist.txt`, `blacklist.txt`, `picks.txt`, `ignore-paths.txt`)
114
+
115
+ A more specific source overrides a less specific one
116
+ (i.e. CLI wins over config, config wins over auto-load).
117
+
118
+ ## config.yml
119
+
120
+ If a file named `config.yml` exists in the current working directory it is
121
+ loaded automatically. Pass `--config FILE` to use a different path. Any CLI
122
+ option can be set there. Example:
123
+
124
+ ```yaml
125
+ branch: dev
126
+ after: abc1234
127
+ ignore-paths:
128
+ - vendor/
129
+ - generated/
130
+ output: ./trace.html
131
+ no-graph: false
132
+ picks:
133
+ - a1b2c3d
134
+ - 9988776
135
+ ```
136
+
137
+ Keys mirror the CLI flag names (use `-` not `_`). Values may be strings, lists,
138
+ or booleans depending on the option.
139
+
140
+ ## Auto-loaded files
141
+
142
+ If you omit a flag, `git-trace` looks in the current directory for a matching
143
+ file and loads it automatically:
144
+
145
+ | File | Equivalent flag |
146
+ | --- | --- |
147
+ | `whitelist.txt` | `--whitelist` |
148
+ | `blacklist.txt` | `--blacklist` |
149
+ | `picks.txt` | `--picks` |
150
+ | `ignore-paths.txt` | `--ignore-paths` |
151
+
152
+ Each file holds one entry per line. Blank lines and lines starting with `#` are
153
+ ignored.
154
+
155
+ ## Output
156
+
157
+ - **Formatted text** (default): a tree-style listing of commits and their
158
+ dependencies, printed to stdout.
159
+ - **HTML graph** (default): an interactive force-directed graph written to
160
+ `./output.html`. Open it in any browser.
161
+ - **`--list`**: a flat list of hashes (one per line), suitable for piping into
162
+ other scripts. The HTML graph is still produced unless `--no-graph` is also specified.
163
+ - **`--txt-output FILE`**: also write the formatted text to a file.
164
+
165
+ In `--picks` mode the text output is grouped into `safe`, `blocked` (with the
166
+ list of missing dependencies for each) and `conditional` sections, while the HTML
167
+ graph highlights the pick set, blockers, and conditionals in distinct colors.
168
+
169
+ ## How dependency detection works
170
+
171
+ `git-trace` uses a position-aware analyzer. For each commit it replays the
172
+ diff hunks against a virtual snapshot of every file, tracked as a list of
173
+ `(line_content, owning_commit)` pairs. When a later commit removes or
174
+ overwrites a line, the analyzer consults the owner stored for that exact
175
+ position and records a dependency on whichever earlier commit introduced
176
+ that line. Identical line content appearing in different places of a file is
177
+ correctly treated as independent.
178
+
179
+ The analyzer handles file creation, deletion (`+++ /dev/null`), renames
180
+ (`rename from` / `rename to`), and the `@@ -X,0 +Y,N @@` insertion hunks
181
+ correctly.
182
+
183
+ ### Textual vs. Structural Dependencies
184
+
185
+ It is important to note that git-trace evaluates **purely textual dependencies**,
186
+ not structural or semantic ones. It only tracks modifications on a line-by-line basis.
187
+ It does not parse an Abstract Syntax Tree (AST) to understand code logic,
188
+ variable scopes, or function calls.
189
+
190
+ ## Known limitations
191
+
192
+ - **Copies (`copy from` / `copy to`)** → a copied file is treated as a fresh
193
+ new file rather than inheriting history from the source path. In practice
194
+ this is rare since git rarely produces copy markers by default.
195
+ - **Binary files** are silently skipped → no dependency is recorded for
196
+ changes to binary blobs.
197
+ - **Mode-only changes** (e.g. `chmod`) → are silently ignored, since they
198
+ don't affect any tracked content.
199
+ - **Quoted paths in diffs** (`core.quotePath`) → git's `core.quotePath`
200
+ setting (default: `true`) wraps file paths containing non-ASCII or special
201
+ characters in backslash-escaped double quotes when producing diff output.
202
+ The analyser reads paths literally after `+++ b/`, so a quoted path will
203
+ not match its real on-disk name. If you analyse a repo with non-ASCII
204
+ filenames, set `git config core.quotePath false` first.
205
+ - **`diff.noprefix` / `--no-prefix` diffs** → when `diff.noprefix` is
206
+ enabled, git's diff output omits the `a/` and `b/` path prefixes (e.g.
207
+ `--- file.txt` instead of `--- a/file.txt`). The analyser specifically
208
+ looks for the `a/` and `b/` prefixes to identify files, so under that
209
+ configuration no dependencies will be detected. Keep `diff.noprefix` at
210
+ its default (`false`) when running `git-trace`.
211
+
212
+ ## AI usage
213
+
214
+ LLM was used during the development of this project, specifically for:
215
+ - Writing the core dependency detection logic and processing the Git hunk headers.
216
+ - Injecting the custom HTML templates and dynamic styling payloads into pyvis html output.
217
+ - Generating most of the README.md content.
218
+
219
+ ## License
220
+
221
+ MIT &mdash; see [LICENSE](LICENSE).
@@ -0,0 +1,195 @@
1
+ # git-trace
2
+
3
+ ![Python version](https://img.shields.io/badge/python-%3E%3D%203.10-blue.svg)
4
+ ![License](https://img.shields.io/badge/license-MIT-green.svg)
5
+ ![PyPI](https://img.shields.io/pypi/v/git-trace.svg)
6
+
7
+ Visualize commit dependencies in a git repository. For a given branch (or commit
8
+ range) `git-trace` analyses every diff and reports which commits **depend** on
9
+ earlier ones, in the sense that they modify or remove lines previously added by
10
+ those earlier commits. The result is rendered as either a text tree, a plain
11
+ list, or an interactive HTML graph.
12
+
13
+ > **Disclaimer:** This tool was built specifically with single-branch analysis in mind.
14
+ > While it **might** work across divergent branches, your mileage may vary and
15
+ > I am not responsible for any inaccurate results if you choose to use it that way.
16
+
17
+ A secondary mode (`--picks`) treats the analysis as a cherry-pick safety check:
18
+ given a set of commit hashes you intend to pick, it tells you which are safe,
19
+ which are blocked by missing dependencies, and which would become "conditional"
20
+ on picking other commits as well.
21
+
22
+ ![git-trace demo](docs/demo.png)
23
+
24
+ ## Installation
25
+
26
+ ```bash
27
+ pip install git-trace
28
+ ```
29
+
30
+ or from source:
31
+
32
+ ```bash
33
+ git clone https://github.com/kiszkacy/git-trace
34
+ cd git-trace
35
+ pip install -e .
36
+ ```
37
+
38
+ Requires Python 3.10+ and `git` available via `PATH`.
39
+
40
+ ## Quick start
41
+
42
+ Run from inside a git repository:
43
+
44
+ ```bash
45
+ git-trace # analyse the full history of 'main'
46
+ git-trace dev # analyse 'dev' branch
47
+ git-trace dev --after abc1234 # only commits after a hash
48
+ git-trace dev --after abc --before def # a commit range
49
+ git-trace --no-graph # skip HTML, print text only
50
+ git-trace --list # simplified list output + HTML graph
51
+ git-trace --list --no-graph # simplified list output only
52
+ ```
53
+
54
+ For cherry-pick analysis:
55
+
56
+ ```bash
57
+ git-trace dev --picks h1 h2 h3 # check whether picks are safe
58
+ git-trace dev --picks picks.txt # picks from a file
59
+ ```
60
+
61
+ By default `git-trace` writes an interactive HTML graph to `./output.html` and
62
+ also prints a text summary to stdout.
63
+
64
+ ## CLI arguments
65
+
66
+ | Argument | Description |
67
+ | --- | --- |
68
+ | `branch` | Branch to analyse. Defaults to `main`. Positional. |
69
+ | `--after HASH` | Only include commits *after* this hash (the hash itself is excluded). |
70
+ | `--before HASH` | Only include commits up to this hash (the hash itself is excluded). |
71
+ | `--whitelist HASH... \| FILE` | Restrict analysis to these hashes, or to a file containing hashes (one per line). Takes priority over `--blacklist`. Auto-loaded from `whitelist.txt` if present and the flag is omitted. |
72
+ | `--blacklist HASH... \| FILE` | Exclude these hashes from analysis, or a file containing hashes. Auto-loaded from `blacklist.txt` if present. |
73
+ | `--picks HASH... \| FILE` | Enable cherry-pick analysis. The given hashes are treated as the intended pick set; the tool reports safe / blocked / conditional picks. Auto-loaded from `picks.txt` if present. |
74
+ | `--ignore-paths PATH... \| FILE` | Repository-relative paths whose diffs should be ignored during analysis, or a file listing such paths. Auto-loaded from `ignore-paths.txt` if present. |
75
+ | `--repo DIR` | Path to the git repository root. Defaults to the current directory. |
76
+ | `--config FILE` | Path to a YAML config file. Defaults to `./config.yml`. |
77
+ | `--no-graph` | Skip HTML graph generation. |
78
+ | `--list` | Print a simple list of relevant commit hashes (one per line) instead of the formatted text tree. The HTML graph is still generated; combine with `--no-graph` to suppress it. |
79
+ | `--output PATH` | Path for the generated HTML graph. Defaults to `./output.html`. |
80
+ | `--txt-output PATH` | Also write the text output to this file. |
81
+ | `-v`, `--version` | Print version and exit. |
82
+
83
+ ### Precedence rules
84
+
85
+ 1. CLI arguments
86
+ 2. `config.yml` values (if the file exists)
87
+ 3. Auto-loaded files (`whitelist.txt`, `blacklist.txt`, `picks.txt`, `ignore-paths.txt`)
88
+
89
+ A more specific source overrides a less specific one
90
+ (i.e. CLI wins over config, config wins over auto-load).
91
+
92
+ ## config.yml
93
+
94
+ If a file named `config.yml` exists in the current working directory it is
95
+ loaded automatically. Pass `--config FILE` to use a different path. Any CLI
96
+ option can be set there. Example:
97
+
98
+ ```yaml
99
+ branch: dev
100
+ after: abc1234
101
+ ignore-paths:
102
+ - vendor/
103
+ - generated/
104
+ output: ./trace.html
105
+ no-graph: false
106
+ picks:
107
+ - a1b2c3d
108
+ - 9988776
109
+ ```
110
+
111
+ Keys mirror the CLI flag names (use `-` not `_`). Values may be strings, lists,
112
+ or booleans depending on the option.
113
+
114
+ ## Auto-loaded files
115
+
116
+ If you omit a flag, `git-trace` looks in the current directory for a matching
117
+ file and loads it automatically:
118
+
119
+ | File | Equivalent flag |
120
+ | --- | --- |
121
+ | `whitelist.txt` | `--whitelist` |
122
+ | `blacklist.txt` | `--blacklist` |
123
+ | `picks.txt` | `--picks` |
124
+ | `ignore-paths.txt` | `--ignore-paths` |
125
+
126
+ Each file holds one entry per line. Blank lines and lines starting with `#` are
127
+ ignored.
128
+
129
+ ## Output
130
+
131
+ - **Formatted text** (default): a tree-style listing of commits and their
132
+ dependencies, printed to stdout.
133
+ - **HTML graph** (default): an interactive force-directed graph written to
134
+ `./output.html`. Open it in any browser.
135
+ - **`--list`**: a flat list of hashes (one per line), suitable for piping into
136
+ other scripts. The HTML graph is still produced unless `--no-graph` is also specified.
137
+ - **`--txt-output FILE`**: also write the formatted text to a file.
138
+
139
+ In `--picks` mode the text output is grouped into `safe`, `blocked` (with the
140
+ list of missing dependencies for each) and `conditional` sections, while the HTML
141
+ graph highlights the pick set, blockers, and conditionals in distinct colors.
142
+
143
+ ## How dependency detection works
144
+
145
+ `git-trace` uses a position-aware analyzer. For each commit it replays the
146
+ diff hunks against a virtual snapshot of every file, tracked as a list of
147
+ `(line_content, owning_commit)` pairs. When a later commit removes or
148
+ overwrites a line, the analyzer consults the owner stored for that exact
149
+ position and records a dependency on whichever earlier commit introduced
150
+ that line. Identical line content appearing in different places of a file is
151
+ correctly treated as independent.
152
+
153
+ The analyzer handles file creation, deletion (`+++ /dev/null`), renames
154
+ (`rename from` / `rename to`), and the `@@ -X,0 +Y,N @@` insertion hunks
155
+ correctly.
156
+
157
+ ### Textual vs. Structural Dependencies
158
+
159
+ It is important to note that git-trace evaluates **purely textual dependencies**,
160
+ not structural or semantic ones. It only tracks modifications on a line-by-line basis.
161
+ It does not parse an Abstract Syntax Tree (AST) to understand code logic,
162
+ variable scopes, or function calls.
163
+
164
+ ## Known limitations
165
+
166
+ - **Copies (`copy from` / `copy to`)** → a copied file is treated as a fresh
167
+ new file rather than inheriting history from the source path. In practice
168
+ this is rare since git rarely produces copy markers by default.
169
+ - **Binary files** are silently skipped → no dependency is recorded for
170
+ changes to binary blobs.
171
+ - **Mode-only changes** (e.g. `chmod`) → are silently ignored, since they
172
+ don't affect any tracked content.
173
+ - **Quoted paths in diffs** (`core.quotePath`) → git's `core.quotePath`
174
+ setting (default: `true`) wraps file paths containing non-ASCII or special
175
+ characters in backslash-escaped double quotes when producing diff output.
176
+ The analyser reads paths literally after `+++ b/`, so a quoted path will
177
+ not match its real on-disk name. If you analyse a repo with non-ASCII
178
+ filenames, set `git config core.quotePath false` first.
179
+ - **`diff.noprefix` / `--no-prefix` diffs** → when `diff.noprefix` is
180
+ enabled, git's diff output omits the `a/` and `b/` path prefixes (e.g.
181
+ `--- file.txt` instead of `--- a/file.txt`). The analyser specifically
182
+ looks for the `a/` and `b/` prefixes to identify files, so under that
183
+ configuration no dependencies will be detected. Keep `diff.noprefix` at
184
+ its default (`false`) when running `git-trace`.
185
+
186
+ ## AI usage
187
+
188
+ LLM was used during the development of this project, specifically for:
189
+ - Writing the core dependency detection logic and processing the Git hunk headers.
190
+ - Injecting the custom HTML templates and dynamic styling payloads into pyvis html output.
191
+ - Generating most of the README.md content.
192
+
193
+ ## License
194
+
195
+ MIT &mdash; see [LICENSE](LICENSE).
@@ -0,0 +1,6 @@
1
+ from .version import VERSION, __version__
2
+
3
+ __all__ = (
4
+ "__version__",
5
+ "VERSION",
6
+ )
@@ -0,0 +1,3 @@
1
+ from git_trace.main import main
2
+
3
+ main()
@@ -0,0 +1,183 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from collections import defaultdict
5
+ from dataclasses import dataclass, field
6
+
7
+ from git_trace.git import Commit
8
+
9
+
10
+ HUNK_HEADER_PATTERN: re.Pattern[str] = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
11
+
12
+
13
+ @dataclass
14
+ class DiffHunk:
15
+ old_start: int
16
+ old_count: int
17
+ new_start: int
18
+ new_count: int
19
+ lines: list[tuple[str, str]] = field(default_factory=list)
20
+
21
+
22
+ @dataclass
23
+ class FileChange:
24
+ path: str
25
+ old_path: str | None = None
26
+ hunks: list[DiffHunk] = field(default_factory=list)
27
+
28
+
29
+ @dataclass
30
+ class VirtualLine:
31
+ content: str
32
+ owner_hash: str | None
33
+
34
+
35
+ @dataclass
36
+ class DependencyGraph:
37
+ relationships: dict[str, set[str]] = field(default_factory=lambda: defaultdict(set))
38
+
39
+ def add_dependency(self, commit_hash: str, parent_hash: str) -> None:
40
+ self.relationships[commit_hash].add(parent_hash)
41
+
42
+ def get_dependencies_for(self, commit_hash: str) -> set[str]:
43
+ return self.relationships.get(commit_hash, set())
44
+
45
+
46
+ @dataclass
47
+ class CherryPickAnalysis:
48
+ safe: list[str] = field(default_factory=list)
49
+ conditional: list[str] = field(default_factory=list)
50
+ blocked: dict[str, set[str]] = field(default_factory=dict)
51
+
52
+
53
+ def _parse_diff(diff_text: str, ignore_paths: set[str]) -> list[FileChange]:
54
+ changes: list[FileChange] = []
55
+ current_change: FileChange | None = None
56
+ current_hunk: DiffHunk | None = None
57
+ rename_from: str | None = None
58
+ old_path_candidate: str | None = None
59
+
60
+ for line in diff_text.splitlines():
61
+ if line.startswith("diff --git "):
62
+ if current_change is not None:
63
+ changes.append(current_change)
64
+ current_change = None
65
+ current_hunk = None
66
+ rename_from = None
67
+ old_path_candidate = None
68
+ elif line.startswith("rename from "):
69
+ rename_from = line[len("rename from "):].replace("\\", "/")
70
+ elif line.startswith("rename to "):
71
+ rename_to: str = line[len("rename to "):].replace("\\", "/")
72
+ if rename_to not in ignore_paths:
73
+ current_change = FileChange(path=rename_to, old_path=rename_from)
74
+ elif line.startswith("--- "):
75
+ current_hunk = None
76
+ old_path_candidate = line[6:].replace("\\", "/") if line.startswith("--- a/") else None
77
+ elif line.startswith("+++ "):
78
+ current_hunk = None
79
+ new_path: str | None = line[6:].replace("\\", "/") if line.startswith("+++ b/") else None
80
+ canonical_path: str | None = new_path or old_path_candidate
81
+ if canonical_path is None or canonical_path in ignore_paths:
82
+ current_change = None
83
+ elif current_change is not None and current_change.path == canonical_path:
84
+ pass
85
+ else:
86
+ old: str | None = old_path_candidate if (old_path_candidate and new_path and old_path_candidate != new_path) else None
87
+ current_change = FileChange(path=canonical_path, old_path=old)
88
+ elif current_change is not None:
89
+ hunk_match: re.Match[str] | None = HUNK_HEADER_PATTERN.match(line)
90
+ if hunk_match:
91
+ old_start: int = int(hunk_match.group(1))
92
+ old_count: int = int(hunk_match.group(2)) if hunk_match.group(2) is not None else 1
93
+ new_start: int = int(hunk_match.group(3))
94
+ new_count: int = int(hunk_match.group(4)) if hunk_match.group(4) is not None else 1
95
+ current_hunk = DiffHunk(
96
+ old_start=old_start,
97
+ old_count=old_count,
98
+ new_start=new_start,
99
+ new_count=new_count,
100
+ )
101
+ current_change.hunks.append(current_hunk)
102
+ elif current_hunk is not None and line and line[0] in ("+", "-", " "):
103
+ current_hunk.lines.append((line[0], line[1:]))
104
+
105
+ if current_change is not None:
106
+ changes.append(current_change)
107
+ return changes
108
+
109
+
110
+ # wild opus dependency graph building logic
111
+ def build_dependency_graph(commits: list[Commit], raw_diffs: dict[str, str], ignore_paths: set[str] | None = None) -> DependencyGraph:
112
+ virtual_files: dict[str, list[VirtualLine]] = {}
113
+ graph: DependencyGraph = DependencyGraph()
114
+ ignore_paths = ignore_paths or set()
115
+
116
+ for commit in commits:
117
+ diff_text: str = raw_diffs.get(commit.hash, "")
118
+ file_changes: list[FileChange] = _parse_diff(diff_text, ignore_paths)
119
+
120
+ for change in file_changes:
121
+ if change.old_path is not None and change.old_path in virtual_files:
122
+ virtual_files[change.path] = virtual_files.pop(change.old_path)
123
+ else:
124
+ virtual_files.setdefault(change.path, [])
125
+
126
+ virtual_file: list[VirtualLine] = virtual_files[change.path]
127
+ cumulative_offset: int = 0
128
+
129
+ for hunk in change.hunks:
130
+ position: int = (
131
+ hunk.old_start if hunk.old_count == 0
132
+ else max(0, hunk.old_start - 1)
133
+ ) + cumulative_offset
134
+
135
+ while len(virtual_file) < position + hunk.old_count:
136
+ virtual_file.append(VirtualLine(content="", owner_hash=None))
137
+
138
+ new_entries: list[VirtualLine] = []
139
+ old_index: int = position
140
+
141
+ for line_type, content in hunk.lines:
142
+ if line_type == " ":
143
+ new_entries.append(virtual_file[old_index])
144
+ old_index += 1
145
+ elif line_type == "-":
146
+ existing_line = virtual_file[old_index]
147
+ if existing_line.owner_hash is not None and existing_line.owner_hash != commit.hash:
148
+ graph.add_dependency(commit.hash, existing_line.owner_hash)
149
+ old_index += 1
150
+ elif line_type == "+":
151
+ new_entries.append(VirtualLine(content=content, owner_hash=commit.hash))
152
+
153
+ virtual_file[position:position + hunk.old_count] = new_entries
154
+ cumulative_offset += hunk.new_count - hunk.old_count
155
+
156
+ return graph
157
+
158
+
159
+ def filter_picks(pick_hashes: set[str], graph: DependencyGraph) -> CherryPickAnalysis:
160
+ blocked: dict[str, set[str]] = {}
161
+ for commit_hash in pick_hashes:
162
+ missing: set[str] = graph.get_dependencies_for(commit_hash) - pick_hashes
163
+ if missing:
164
+ blocked[commit_hash] = missing
165
+
166
+ tainted: set[str] = set(blocked.keys())
167
+ changed: bool = True
168
+ while changed:
169
+ changed = False
170
+ for commit_hash in pick_hashes:
171
+ if commit_hash not in tainted:
172
+ in_pick_dependencies: set[str] = graph.get_dependencies_for(commit_hash) & pick_hashes
173
+ if in_pick_dependencies & tainted:
174
+ tainted.add(commit_hash)
175
+ changed = True
176
+
177
+ conditional: list[str] = [commit_hash for commit_hash in pick_hashes if commit_hash in tainted and commit_hash not in blocked]
178
+ safe: list[str] = [commit_hash for commit_hash in pick_hashes if commit_hash not in tainted]
179
+ return CherryPickAnalysis(
180
+ safe=safe,
181
+ conditional=conditional,
182
+ blocked=blocked
183
+ )
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class Commit:
9
+ hash: str
10
+ message: str
11
+
12
+ def is_inside_hash_set(self, hash_set: set[str]) -> bool:
13
+ return any(self.hash.startswith(prefix) or prefix.startswith(self.hash) for prefix in hash_set)
14
+
15
+
16
+ def run_git(*args: str, repo_directory: str) -> str:
17
+ command: list[str] = ["git"]
18
+ if repo_directory:
19
+ command += ["-C", repo_directory]
20
+ command += list(args)
21
+ process: subprocess.CompletedProcess[str] = subprocess.run(
22
+ command, capture_output=True, encoding="utf-8", errors="replace"
23
+ )
24
+ return process.stdout or ""
25
+
26
+
27
+ def get_commits(branch: str, repo_directory: str, after: str | None = None, before: str | None = None) -> list[Commit]:
28
+ scope: str = f"{after if after else ''}{'..' if after else ''}{before if before else branch}"
29
+ output: str = run_git("log", scope, "--reverse", "--format=%H %s", repo_directory=repo_directory)
30
+
31
+ commits: list[Commit] = []
32
+ for line in output.splitlines():
33
+ line = line.strip()
34
+ if line:
35
+ commit_hash, _, message = line.partition(" ")
36
+ commits.append(Commit(hash=commit_hash, message=message))
37
+
38
+ if before and commits: # skip last commit to exclude it
39
+ commits.pop()
40
+
41
+ return commits
42
+
43
+
44
+ def get_commit_diff(sha: str, repo_directory: str) -> str:
45
+ return run_git("show", "--format=", "-p", sha, repo_directory=repo_directory)
46
+
47
+
48
+ def resolve_hashes(hashes: list[str], all_commits: list[Commit]) -> dict[str, str | None]:
49
+ full_hashes: list[str] = [commit.hash for commit in all_commits]
50
+ result: dict[str, str | None] = {}
51
+ for hash_ in hashes: # if user provided short hashes map them to full hashes if possible
52
+ matches: list[str] = [full_hash for full_hash in full_hashes if full_hash.lower().startswith(hash_.lower())]
53
+ result[hash_] = matches[0] if len(matches) == 1 else None
54
+ return result
File without changes