git2xml 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
git2xml/__init__.py ADDED
@@ -0,0 +1,43 @@
1
+ """git2xml - structured XML briefs of git commits and pull requests for LLMs.
2
+
3
+ Public API:
4
+ generate_commit_brief / generate_pr_brief - async (native interface)
5
+ generate_commit_brief_sync / generate_pr_brief_sync - sync convenience wrappers
6
+ Git2xmlConfig - typed, validated config
7
+ Git2xmlError (+ subclasses) - error hierarchy to catch
8
+
9
+ Everything else (GitScanner, the rendering helpers, ChangedFile/ScanResult) is an
10
+ internal implementation detail and may change without notice.
11
+ """
12
+
13
+ __version__ = "0.1.0"
14
+
15
+ from .api import (
16
+ generate_commit_brief,
17
+ generate_commit_brief_sync,
18
+ generate_pr_brief,
19
+ generate_pr_brief_sync,
20
+ )
21
+ from .models import (
22
+ Git2xmlConfig,
23
+ Git2xmlError,
24
+ GitCommandError,
25
+ GitNotInstalledError,
26
+ NotAGitRepositoryError,
27
+ )
28
+
29
+ __all__ = [
30
+ # async API
31
+ "generate_commit_brief",
32
+ "generate_pr_brief",
33
+ # sync API
34
+ "generate_commit_brief_sync",
35
+ "generate_pr_brief_sync",
36
+ # config + errors
37
+ "Git2xmlConfig",
38
+ "Git2xmlError",
39
+ "GitNotInstalledError",
40
+ "NotAGitRepositoryError",
41
+ "GitCommandError",
42
+ "__version__",
43
+ ]
git2xml/__main__.py ADDED
@@ -0,0 +1,8 @@
1
+ """Enable ``python -m git2xml`` alongside the installed ``git2xml`` entry point."""
2
+
3
+ import sys
4
+
5
+ from .cli import main
6
+
7
+ if __name__ == "__main__":
8
+ sys.exit(main())
git2xml/api.py ADDED
@@ -0,0 +1,95 @@
1
+ """Public programmatic interface for git2xml.
2
+
3
+ These functions are the stable, supported way to use git2xml from other Python
4
+ code. Each takes a fully-typed, validated ``Git2xmlConfig`` and returns the
5
+ generated brief as an XML string - no disk I/O. The async functions are the
6
+ native interface (the engine is asyncio-based); the ``*_sync`` variants are
7
+ convenience adapters for plain synchronous scripts.
8
+
9
+ Anything not exported here - ``GitScanner``, the rendering helpers, the
10
+ ``ChangedFile`` / ``ScanResult`` dataclasses - is an internal implementation
11
+ detail and may change without notice. The supported surface is exactly: these
12
+ four functions, ``Git2xmlConfig``, and the ``Git2xmlError`` hierarchy.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ from dataclasses import replace
19
+ from typing import Any, Callable, Coroutine
20
+
21
+ from .core import build_brief
22
+ from .models import Git2xmlConfig
23
+
24
+ __all__ = [
25
+ "generate_commit_brief",
26
+ "generate_pr_brief",
27
+ "generate_commit_brief_sync",
28
+ "generate_pr_brief_sync",
29
+ ]
30
+
31
+
32
+ # --- Async API (native interface) -------------------------------------------
33
+
34
+
35
+ async def generate_commit_brief(config: Git2xmlConfig) -> str:
36
+ """Generate a commit-mode brief and return it as an XML string.
37
+
38
+ Covers working-tree changes against HEAD (or staged changes when
39
+ ``config.staged`` is set). The mode is fixed by the function name:
40
+ ``config.command`` is coerced to ``"commit"``, so a config built for another
41
+ mode is adapted rather than rejected.
42
+ """
43
+ return await build_brief(replace(config, command="commit"))
44
+
45
+
46
+ async def generate_pr_brief(config: Git2xmlConfig) -> str:
47
+ """Generate a pr-mode brief and return it as an XML string.
48
+
49
+ Covers the current branch's changes against ``config.base`` (a
50
+ ``base...HEAD`` diff) plus a structured commit log. ``config.command`` is
51
+ coerced to ``"pr"``.
52
+ """
53
+ return await build_brief(replace(config, command="pr"))
54
+
55
+
56
+ # --- Sync wrappers (convenience adapters) -----------------------------------
57
+
58
+
59
+ def generate_commit_brief_sync(config: Git2xmlConfig) -> str:
60
+ """Synchronous wrapper around :func:`generate_commit_brief`.
61
+
62
+ For plain (non-async) scripts. Raises ``RuntimeError`` if called from within
63
+ a running event loop - use the async function there instead.
64
+ """
65
+ return _run_sync(generate_commit_brief, config)
66
+
67
+
68
+ def generate_pr_brief_sync(config: Git2xmlConfig) -> str:
69
+ """Synchronous wrapper around :func:`generate_pr_brief`. See its sync note."""
70
+ return _run_sync(generate_pr_brief, config)
71
+
72
+
73
+ # --- internal ---------------------------------------------------------------
74
+
75
+
76
+ def _run_sync(
77
+ async_fn: Callable[[Git2xmlConfig], Coroutine[Any, Any, str]],
78
+ config: Git2xmlConfig,
79
+ ) -> str:
80
+ """Drive an async API function to completion from sync code.
81
+
82
+ ``asyncio.run`` raises an opaque error if a loop is already running (Jupyter,
83
+ an async web handler, an agent runtime). We detect that case and fail with a
84
+ clear, actionable message instead - and we only build the coroutine on the
85
+ safe path, so there's no orphaned "coroutine was never awaited" warning.
86
+ """
87
+ try:
88
+ asyncio.get_running_loop()
89
+ except RuntimeError:
90
+ # No running loop -> safe to start one.
91
+ return asyncio.run(async_fn(config))
92
+ raise RuntimeError(
93
+ "git2xml's synchronous API cannot run inside an existing event loop. "
94
+ "Call the async variant instead, e.g. `await generate_commit_brief(config)`."
95
+ )
git2xml/cli.py ADDED
@@ -0,0 +1,158 @@
1
+ """Command-line entry point: argv -> Git2xmlConfig -> save_brief.
2
+
3
+ Thin adapter around the engine. Parses arguments, maps them onto a validated
4
+ ``Git2xmlConfig``, configures logging, and translates the typed error
5
+ hierarchy into process exit codes - it contains no brief-building logic of its
6
+ own. The ``git2xml`` console script and ``python -m git2xml`` both land in
7
+ ``main`` here.
8
+ """
9
+
10
+ import argparse
11
+ import asyncio
12
+ import dataclasses
13
+ import logging
14
+ import sys
15
+
16
+ from . import __version__
17
+ from .constants import DIFF_SEMAPHORE_LIMIT, GIT_TIMEOUT, MAX_DIFF_SIZE, MAX_TEXT_FILE_SIZE
18
+ from .core import save_brief
19
+ from .models import Git2xmlCliConfig, Git2xmlError
20
+
21
+
22
+ def _int(value: str) -> int:
23
+ """Parse an integer from argv. Bounds are enforced by Git2xmlConfig."""
24
+ try:
25
+ return int(value)
26
+ except ValueError:
27
+ raise argparse.ArgumentTypeError(f"invalid integer value: '{value}'") from None
28
+
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ def main() -> int:
34
+ """Console-script entry point: parse argv, run the brief, map errors to exit codes.
35
+
36
+ Parses command-line arguments, builds a validated ``Git2xmlCliConfig`` from
37
+ them, configures root logging (DEBUG under ``-v``, otherwise INFO), and runs
38
+ ``save_brief`` to generate and write the brief. Contains no brief-building
39
+ logic itself - that lives in the engine.
40
+
41
+ Side effects: configures logging on the root logger and terminates the
42
+ process by returning a status integer. Exit codes: 0 on success; 2 on an
43
+ argparse usage error (argparse's own exit); 1 on a known ``Git2xmlError``, a
44
+ user interrupt (Ctrl-C), or any unexpected exception (logged with a traceback
45
+ only under ``-v``).
46
+
47
+ Both the ``git2xml`` console script and ``python -m git2xml`` dispatch here.
48
+ """
49
+
50
+ parser = argparse.ArgumentParser(
51
+ prog="git2xml", description="Generate contextual XML briefs for Git Commits and PRs."
52
+ )
53
+ parser.add_argument("command", choices=["commit", "pr"], help="Type of brief to generate.")
54
+ parser.add_argument(
55
+ "--repo", default=".", help="Git repository root location (default: current directory)."
56
+ )
57
+ parser.add_argument(
58
+ "--output", help="Output file name. Defaults to commit_brief.xml or pr_brief.xml."
59
+ )
60
+ parser.add_argument("--base", default="main", help="Base branch for PR diffs (default: main).")
61
+ parser.add_argument(
62
+ "-v",
63
+ "--verbose",
64
+ action="store_true",
65
+ help="Show detailed file-by-file and commit-by-commit processing.",
66
+ )
67
+ parser.add_argument(
68
+ "--staged",
69
+ action="store_true",
70
+ help="Only include staged files in commit mode (ignores unstaged/untracked).",
71
+ )
72
+ parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
73
+ parser.add_argument(
74
+ "--strict-xml",
75
+ action="store_true",
76
+ help="Enforce strict XML 1.0 compliance (escapes control chars and CDATA terminators).",
77
+ )
78
+ parser.add_argument(
79
+ "--no-untracked",
80
+ action="store_true",
81
+ help="Exclude untracked files from commit-mode output. No-op with --staged or in PR mode.",
82
+ )
83
+ parser.add_argument(
84
+ "--max-size",
85
+ type=_int,
86
+ default=MAX_TEXT_FILE_SIZE,
87
+ metavar="N",
88
+ help=f"Maximum file size in bytes before content is omitted (default: {MAX_TEXT_FILE_SIZE}).",
89
+ )
90
+ parser.add_argument(
91
+ "--max-diff-size",
92
+ type=_int,
93
+ default=MAX_DIFF_SIZE,
94
+ metavar="N",
95
+ help=f"Maximum diff size in bytes. Larger diffs are omitted with a reason (default: {MAX_DIFF_SIZE}; 0 = unlimited).",
96
+ )
97
+ parser.add_argument(
98
+ "--no-content",
99
+ action="store_true",
100
+ help="Omit <content> for all files; still emit <file> elements and <diff>. Produces a diff-only brief.",
101
+ )
102
+ parser.add_argument(
103
+ "--git-timeout",
104
+ type=_int,
105
+ default=GIT_TIMEOUT,
106
+ metavar="N",
107
+ help=f"Git command timeout (default: {GIT_TIMEOUT}).",
108
+ )
109
+ parser.add_argument(
110
+ "--diff-semaphore-limit",
111
+ type=_int,
112
+ default=DIFF_SEMAPHORE_LIMIT,
113
+ metavar="N",
114
+ help=f"Maximum number of concurrent diff fetch actions (default: {DIFF_SEMAPHORE_LIMIT}).",
115
+ )
116
+ parser.add_argument(
117
+ "--hide-repo-path",
118
+ action="store_true",
119
+ help='Emit only the repo directory name in the root <... repo=""> attribute instead of the absolute local path. Use when sharing briefs externally.',
120
+ )
121
+
122
+ args = parser.parse_args()
123
+
124
+ logging.basicConfig(
125
+ level=logging.DEBUG if args.verbose else logging.INFO, format="%(levelname)s: %(message)s"
126
+ )
127
+
128
+ # Filter out unexpected arguments or with None value
129
+ valid_fields = {f.name for f in dataclasses.fields(Git2xmlCliConfig)}
130
+ config_args = {k: v for k, v in vars(args).items() if k in valid_fields and v is not None}
131
+
132
+ # Catch missing required arguments safely
133
+ try:
134
+ config = Git2xmlCliConfig(**config_args)
135
+ except (TypeError, ValueError) as e:
136
+ logger.error("Invalid configuration: %s", e)
137
+ return 1
138
+
139
+ try:
140
+ asyncio.run(save_brief(config))
141
+ except Git2xmlError as e:
142
+ logger.error(e)
143
+ return 1
144
+ except KeyboardInterrupt:
145
+ logger.error("Operation cancelled by user.")
146
+ return 1
147
+ except Exception as exc:
148
+ if args.verbose:
149
+ logger.exception("Unexpected error: %s", exc)
150
+ else:
151
+ logger.error("Unexpected error: %s. Run with -v for details.", exc)
152
+ return 1
153
+
154
+ return 0
155
+
156
+
157
+ if __name__ == "__main__":
158
+ sys.exit(main())
git2xml/constants.py ADDED
@@ -0,0 +1,31 @@
1
+ """Default tuning values for size limits, timeouts, and diff concurrency.
2
+
3
+ The defaults behind the matching ``Git2xmlConfig`` fields and CLI flags
4
+ (``--max-size``, ``--git-timeout``, ``--diff-semaphore-limit``); every value
5
+ here is overridable per run, so this is just the baseline, not a hard limit.
6
+ """
7
+
8
+ # Max size of a file whose content is included in the XML. Larger files omit
9
+ # their content and carry an explanatory reason instead.
10
+ MAX_TEXT_FILE_SIZE = 5 * 1024 * 1024
11
+
12
+ # Max size (bytes, UTF-8) of a single file's diff included in the XML. Larger diffs are
13
+ # omitted with a reason.
14
+ MAX_DIFF_SIZE = 1 * 1024 * 1024
15
+
16
+ # Git command timeout, in seconds.
17
+ GIT_TIMEOUT = 30
18
+
19
+ # Max number of git diffs fetched concurrently.
20
+ DIFF_SEMAPHORE_LIMIT = 20
21
+
22
+
23
+ # Per-batch budget (UTF-16 code units) for ls-tree path args. Derived solely
24
+ # from the Windows CreateProcessW cap (32,767 UTF-16 units), the binding
25
+ # constraint across platforms — POSIX ARG_MAX is far higher, so this single
26
+ # budget is safe everywhere. If a future change makes this per-platform (e.g.
27
+ # a larger Unix budget to cut ls-tree spawns), update _chunk_paths' counting
28
+ # unit to match: it counts UTF-16 units to align with this cap.
29
+ _WINDOWS_CMDLINE_CAP = 32_767
30
+ _CMDLINE_RESERVE = 2_048
31
+ LS_TREE_PATH_BUDGET = _WINDOWS_CMDLINE_CAP - _CMDLINE_RESERVE # ~30,700