git-commit-message 0.5.1__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/PKG-INFO +16 -1
  2. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/README.md +14 -0
  3. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/pyproject.toml +2 -1
  4. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/src/git_commit_message/__init__.py +1 -3
  5. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/src/git_commit_message/_cli.py +66 -27
  6. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/src/git_commit_message/_git.py +13 -13
  7. git_commit_message-0.6.0/src/git_commit_message/_gpt.py +604 -0
  8. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/src/git_commit_message.egg-info/PKG-INFO +16 -1
  9. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/src/git_commit_message.egg-info/requires.txt +1 -0
  10. git_commit_message-0.5.1/src/git_commit_message/_gpt.py +0 -312
  11. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/UNLICENSE +0 -0
  12. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/setup.cfg +0 -0
  13. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/src/git_commit_message/__main__.py +0 -0
  14. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/src/git_commit_message.egg-info/SOURCES.txt +0 -0
  15. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/src/git_commit_message.egg-info/dependency_links.txt +0 -0
  16. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/src/git_commit_message.egg-info/entry_points.txt +0 -0
  17. {git_commit_message-0.5.1 → git_commit_message-0.6.0}/src/git_commit_message.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: git-commit-message
3
- Version: 0.5.1
3
+ Version: 0.6.0
4
4
  Summary: Generate Git commit messages from staged changes using OpenAI GPT
5
5
  Maintainer-email: Mina Her <minacle@live.com>
6
6
  License: This is free and unencumbered software released into the public domain.
@@ -45,6 +45,7 @@ Requires-Python: >=3.13
45
45
  Description-Content-Type: text/markdown
46
46
  Requires-Dist: babel>=2.17.0
47
47
  Requires-Dist: openai>=2.6.1
48
+ Requires-Dist: tiktoken>=0.12.0
48
49
 
49
50
  # git-commit-message
50
51
 
@@ -115,6 +116,19 @@ git-commit-message --one-line "optional context"
115
116
  git-commit-message --one-line --max-length 50 "optional context"
116
117
  ```
117
118
 
119
+ - Chunk long diffs by token budget (0 = single chunk + summary, -1 = disable chunking):
120
+
121
+ ```sh
122
+ # force a single summary pass over the whole diff (default)
123
+ git-commit-message --chunk-tokens 0 "optional context"
124
+
125
+ # chunk the diff into ~4000-token pieces before summarising
126
+ git-commit-message --chunk-tokens 4000 "optional context"
127
+
128
+ # disable summarisation and use the legacy one-shot prompt
129
+ git-commit-message --chunk-tokens -1 "optional context"
130
+ ```
131
+
118
132
  - Commit immediately with editor:
119
133
 
120
134
  ```sh
@@ -144,6 +158,7 @@ Environment:
144
158
  - `OPENAI_API_KEY`: required
145
159
  - `GIT_COMMIT_MESSAGE_MODEL` or `OPENAI_MODEL`: optional (default: `gpt-5-mini`)
146
160
  - `GIT_COMMIT_MESSAGE_LANGUAGE`: optional (default: `en-GB`)
161
+ - `GIT_COMMIT_MESSAGE_CHUNK_TOKENS`: optional token budget per diff chunk (default: 0 = single chunk + summary; -1 disables summarisation)
147
162
 
148
163
  ## AI‑generated code notice
149
164
 
@@ -67,6 +67,19 @@ git-commit-message --one-line "optional context"
67
67
  git-commit-message --one-line --max-length 50 "optional context"
68
68
  ```
69
69
 
70
+ - Chunk long diffs by token budget (0 = single chunk + summary, -1 = disable chunking):
71
+
72
+ ```sh
73
+ # force a single summary pass over the whole diff (default)
74
+ git-commit-message --chunk-tokens 0 "optional context"
75
+
76
+ # chunk the diff into ~4000-token pieces before summarising
77
+ git-commit-message --chunk-tokens 4000 "optional context"
78
+
79
+ # disable summarisation and use the legacy one-shot prompt
80
+ git-commit-message --chunk-tokens -1 "optional context"
81
+ ```
82
+
70
83
  - Commit immediately with editor:
71
84
 
72
85
  ```sh
@@ -96,6 +109,7 @@ Environment:
96
109
  - `OPENAI_API_KEY`: required
97
110
  - `GIT_COMMIT_MESSAGE_MODEL` or `OPENAI_MODEL`: optional (default: `gpt-5-mini`)
98
111
  - `GIT_COMMIT_MESSAGE_LANGUAGE`: optional (default: `en-GB`)
112
+ - `GIT_COMMIT_MESSAGE_CHUNK_TOKENS`: optional token budget per diff chunk (default: 0 = single chunk + summary; -1 disables summarisation)
99
113
 
100
114
  ## AI‑generated code notice
101
115
 
@@ -1,12 +1,13 @@
1
1
  [project]
2
2
  name = "git-commit-message"
3
- version = "0.5.1"
3
+ version = "0.6.0"
4
4
  description = "Generate Git commit messages from staged changes using OpenAI GPT"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13"
7
7
  dependencies = [
8
8
  "babel>=2.17.0",
9
9
  "openai>=2.6.1",
10
+ "tiktoken>=0.12.0",
10
11
  ]
11
12
  maintainers = [{ name = "Mina Her", email = "minacle@live.com" }]
12
13
  license = { file = "UNLICENSE" }
@@ -5,6 +5,4 @@ This module exposes only public symbols in accordance with the codestyle guide.
5
5
 
6
6
  from ._cli import main
7
7
 
8
- __all__ = (
9
- "main",
10
- )
8
+ __all__ = ("main",)
@@ -1,17 +1,24 @@
1
- from __future__ import annotations
2
-
3
1
  """Command-line interface entry point.
4
2
 
5
3
  Collect staged changes from the repository and call an OpenAI GPT model
6
4
  to generate a commit message, or create a commit straight away.
7
5
  """
8
6
 
7
+ from __future__ import annotations
8
+
9
9
  from argparse import ArgumentParser, Namespace
10
+ from os import environ
10
11
  from pathlib import Path
11
- import sys
12
+ from sys import exit as sys_exit
13
+ from sys import stderr
12
14
  from typing import Final
13
15
 
14
- from ._git import commit_with_message, get_repo_root, get_staged_diff, has_staged_changes
16
+ from ._git import (
17
+ commit_with_message,
18
+ get_repo_root,
19
+ get_staged_diff,
20
+ has_staged_changes,
21
+ )
15
22
  from ._gpt import (
16
23
  generate_commit_message,
17
24
  generate_commit_message_with_info,
@@ -19,6 +26,18 @@ from ._gpt import (
19
26
  )
20
27
 
21
28
 
29
+ def _env_chunk_tokens_default() -> int | None:
30
+ """Return chunk token default from env if valid, else None."""
31
+
32
+ raw: str | None = environ.get("GIT_COMMIT_MESSAGE_CHUNK_TOKENS")
33
+ if raw is None:
34
+ return None
35
+ try:
36
+ return int(raw)
37
+ except ValueError:
38
+ return None
39
+
40
+
22
41
  def _build_parser() -> ArgumentParser:
23
42
  """Create the CLI argument parser.
24
43
 
@@ -92,12 +111,24 @@ def _build_parser() -> ArgumentParser:
92
111
  help="Maximum subject (first line) length (default: 72).",
93
112
  )
94
113
 
114
+ parser.add_argument(
115
+ "--chunk-tokens",
116
+ dest="chunk_tokens",
117
+ type=int,
118
+ default=None,
119
+ help=(
120
+ "Target token budget per diff chunk. "
121
+ "0 forces a single chunk with summarisation; -1 disables summarisation (legacy one-shot). "
122
+ "If omitted, uses GIT_COMMIT_MESSAGE_CHUNK_TOKENS when set (default: 0)."
123
+ ),
124
+ )
125
+
95
126
  return parser
96
127
 
97
128
 
98
129
  def _run(
99
- *,
100
130
  args: Namespace,
131
+ /,
101
132
  ) -> int:
102
133
  """Main execution logic.
103
134
 
@@ -114,37 +145,45 @@ def _run(
114
145
 
115
146
  repo_root: Path = get_repo_root()
116
147
 
117
- if not has_staged_changes(cwd=repo_root):
118
- print("No staged changes. Run 'git add' and try again.", file=sys.stderr)
148
+ if not has_staged_changes(repo_root):
149
+ print("No staged changes. Run 'git add' and try again.", file=stderr)
119
150
  return 2
120
151
 
121
- diff_text: str = get_staged_diff(cwd=repo_root)
152
+ diff_text: str = get_staged_diff(repo_root)
122
153
 
123
154
  hint: str | None = args.description if isinstance(args.description, str) else None
124
155
 
156
+ chunk_tokens: int | None = args.chunk_tokens
157
+ if chunk_tokens is None:
158
+ chunk_tokens = _env_chunk_tokens_default()
159
+ if chunk_tokens is None:
160
+ chunk_tokens = 0
161
+
125
162
  result: CommitMessageResult | None = None
126
163
  try:
127
164
  if args.debug:
128
165
  result = generate_commit_message_with_info(
129
- diff=diff_text,
130
- hint=hint,
131
- model=args.model,
132
- single_line=getattr(args, "one_line", False),
133
- subject_max=getattr(args, "max_length", None),
134
- language=getattr(args, "language", None),
166
+ diff_text,
167
+ hint,
168
+ args.model,
169
+ getattr(args, "one_line", False),
170
+ getattr(args, "max_length", None),
171
+ getattr(args, "language", None),
172
+ chunk_tokens,
135
173
  )
136
174
  message = result.message
137
175
  else:
138
176
  message = generate_commit_message(
139
- diff=diff_text,
140
- hint=hint,
141
- model=args.model,
142
- single_line=getattr(args, "one_line", False),
143
- subject_max=getattr(args, "max_length", None),
144
- language=getattr(args, "language", None),
177
+ diff_text,
178
+ hint,
179
+ args.model,
180
+ getattr(args, "one_line", False),
181
+ getattr(args, "max_length", None),
182
+ getattr(args, "language", None),
183
+ chunk_tokens,
145
184
  )
146
185
  except Exception as exc: # noqa: BLE001 - to preserve standard output messaging
147
- print(f"Failed to generate commit message: {exc}", file=sys.stderr)
186
+ print(f"Failed to generate commit message: {exc}", file=stderr)
148
187
  return 3
149
188
 
150
189
  # Option: force single-line message
@@ -198,9 +237,9 @@ def _run(
198
237
  print(message)
199
238
 
200
239
  if args.edit:
201
- rc: int = commit_with_message(message=message, edit=True, cwd=repo_root)
240
+ rc: int = commit_with_message(message, True, repo_root)
202
241
  else:
203
- rc = commit_with_message(message=message, edit=False, cwd=repo_root)
242
+ rc = commit_with_message(message, False, repo_root)
204
243
 
205
244
  return rc
206
245
 
@@ -215,8 +254,8 @@ def main() -> None:
215
254
  args: Namespace = parser.parse_args()
216
255
 
217
256
  if args.edit and not args.commit:
218
- print("'--edit' must be used together with '--commit'.", file=sys.stderr)
219
- sys.exit(2)
257
+ print("'--edit' must be used together with '--commit'.", file=stderr)
258
+ sys_exit(2)
220
259
 
221
- code: int = _run(args=args)
222
- sys.exit(code)
260
+ code: int = _run(args)
261
+ sys_exit(code)
@@ -1,18 +1,18 @@
1
- from __future__ import annotations
2
-
3
1
  """Git-related helper functions.
4
2
 
5
3
  Provides repository root discovery, extraction of staged changes, and
6
4
  creating commits from a message.
7
5
  """
8
6
 
7
+ from __future__ import annotations
8
+
9
9
  from pathlib import Path
10
- import subprocess
10
+ from subprocess import CalledProcessError, check_call, check_output, run
11
11
 
12
12
 
13
13
  def get_repo_root(
14
- *,
15
14
  cwd: Path | None = None,
15
+ /,
16
16
  ) -> Path:
17
17
  """Find the repository root from the current working directory.
18
18
 
@@ -29,7 +29,7 @@ def get_repo_root(
29
29
 
30
30
  start: Path = cwd or Path.cwd()
31
31
  try:
32
- out: bytes = subprocess.check_output(
32
+ out: bytes = check_output(
33
33
  [
34
34
  "git",
35
35
  "rev-parse",
@@ -37,7 +37,7 @@ def get_repo_root(
37
37
  ],
38
38
  cwd=str(start),
39
39
  )
40
- except subprocess.CalledProcessError as exc: # noqa: TRY003
40
+ except CalledProcessError as exc: # noqa: TRY003
41
41
  raise RuntimeError("Not a Git repository.") from exc
42
42
 
43
43
  root = Path(out.decode().strip())
@@ -45,28 +45,28 @@ def get_repo_root(
45
45
 
46
46
 
47
47
  def has_staged_changes(
48
- *,
49
48
  cwd: Path,
49
+ /,
50
50
  ) -> bool:
51
51
  """Check whether there are staged changes."""
52
52
 
53
53
  try:
54
- subprocess.check_call(
54
+ check_call(
55
55
  ["git", "diff", "--cached", "--quiet", "--exit-code"],
56
56
  cwd=str(cwd),
57
57
  )
58
58
  return False
59
- except subprocess.CalledProcessError:
59
+ except CalledProcessError:
60
60
  return True
61
61
 
62
62
 
63
63
  def get_staged_diff(
64
- *,
65
64
  cwd: Path,
65
+ /,
66
66
  ) -> str:
67
67
  """Return the staged changes as diff text."""
68
68
 
69
- out: bytes = subprocess.check_output(
69
+ out: bytes = check_output(
70
70
  [
71
71
  "git",
72
72
  "diff",
@@ -81,10 +81,10 @@ def get_staged_diff(
81
81
 
82
82
 
83
83
  def commit_with_message(
84
- *,
85
84
  message: str,
86
85
  edit: bool,
87
86
  cwd: Path,
87
+ /,
88
88
  ) -> int:
89
89
  """Create a commit with the given message.
90
90
 
@@ -108,7 +108,7 @@ def commit_with_message(
108
108
  cmd.append("--edit")
109
109
 
110
110
  try:
111
- completed = subprocess.run(cmd, cwd=str(cwd), check=False)
111
+ completed = run(cmd, cwd=str(cwd), check=False)
112
112
  return int(completed.returncode)
113
113
  except OSError as exc: # e.g., editor launch failure, etc.
114
114
  raise RuntimeError(f"Failed to run 'git commit': {exc}") from exc
@@ -0,0 +1,604 @@
1
+ """Generate Git commit messages by calling an OpenAI GPT model.
2
+
3
+ Migrated to use OpenAI Responses API (client.responses.create).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from babel import Locale
9
+ from openai import OpenAI
10
+ from openai.types.responses import Response, ResponseInputParam
11
+ from os import environ
12
+ from tiktoken import Encoding, encoding_for_model, get_encoding
13
+ from typing import Final
14
+
15
+
16
+ _DEFAULT_MODEL: Final[str] = "gpt-5-mini"
17
+ _DEFAULT_LANGUAGE: Final[str] = "en-GB"
18
+
19
+
20
+ def _encoding_for_model(
21
+ model: str,
22
+ /,
23
+ ) -> Encoding:
24
+ try:
25
+ return encoding_for_model(model)
26
+ except Exception:
27
+ return get_encoding("cl100k_base")
28
+
29
+
30
+ def _count_tokens(
31
+ text: str,
32
+ *,
33
+ model: str,
34
+ ) -> int:
35
+ encoding = _encoding_for_model(model)
36
+ return len(encoding.encode(text))
37
+
38
+
39
+ def _build_system_prompt(
40
+ single_line: bool,
41
+ subject_max: int | None,
42
+ language: str,
43
+ /,
44
+ ) -> str:
45
+ display_language: str = _language_display(language)
46
+ max_len = subject_max or 72
47
+ if single_line:
48
+ return (
49
+ f"You are an expert Git commit message generator. "
50
+ f"Always use '{display_language}' spelling and style. "
51
+ f"Return a single-line imperative subject only (<= {max_len} chars). "
52
+ f"Do not include a body, bullet points, or any rationale. Do not include any line breaks. "
53
+ f"Consider the user-provided auxiliary context if present. "
54
+ f"Return only the commit message text (no code fences or prefixes like 'Commit message:')."
55
+ )
56
+ return (
57
+ f"You are an expert Git commit message generator. "
58
+ f"Always use '{display_language}' spelling and style. "
59
+ f"The subject line is mandatory: you MUST start the output with the subject as the very first non-empty line, "
60
+ f"in imperative mood, and keep it <= {max_len} chars. Insert exactly one blank line after the subject. "
61
+ f"Never start with bullets, headings, labels, or any other text. Then include a body in this format.\n\n"
62
+ f"Example format (do not include the --- lines in the output):\n\n"
63
+ f"---\n\n"
64
+ f"<Subject line>\n\n"
65
+ f"- <detail 1>\n"
66
+ f"- <detail 2>\n"
67
+ f"- <detail N>\n\n"
68
+ f"<Rationale label translated into the target language>: <1-2 concise sentences explaining the intent and why>\n\n"
69
+ f"---\n\n"
70
+ f"Guidelines:\n"
71
+ f"- The first non-empty line MUST be the subject line; include exactly one blank line after it.\n"
72
+ f"- Never place bullets, headings, or labels before the subject line.\n"
73
+ f"- Use '-' bullets; keep each bullet short (<= 1 line).\n"
74
+ f"- Prefer imperative mood verbs (Add, Fix, Update, Remove, Refactor, Document, etc.).\n"
75
+ f"- Focus on what changed and why; avoid copying diff hunks verbatim.\n"
76
+ f"- The only allowed label is the equivalent of 'Rationale:' translated into the target language; do not add other headings or prefaces.\n"
77
+ f"- All text (subject, bullets, rationale label, rationale content) MUST be in the target language: '{display_language}'. Do not mix other languages.\n"
78
+ f"- Do not include the '---' delimiter lines, code fences, or any surrounding labels like 'Commit message:'.\n"
79
+ f"- Do not copy or reuse any example text verbatim; produce original content based on the provided diff and context.\n"
80
+ f"- If few details are necessary, include at least one bullet summarising the key change.\n"
81
+ f"- If you cannot provide any body content, still output the subject line; the subject line must never be omitted.\n"
82
+ f"- Consider the user-provided auxiliary context if present.\n"
83
+ f"Return only the commit message text in the above format (no code fences or extra labels)."
84
+ )
85
+
86
+
87
+ def _language_display(
88
+ language: str,
89
+ /,
90
+ ) -> str:
91
+ """Return a human-friendly language display like 'ko-KR, Korean (South Korea)'."""
92
+
93
+ try:
94
+ locale = Locale.parse(language, sep="-")
95
+ except Exception:
96
+ return language
97
+
98
+ tag_parts = [
99
+ locale.language,
100
+ locale.script,
101
+ locale.territory,
102
+ locale.variant,
103
+ ]
104
+ tag = "-".join(part for part in tag_parts if part)
105
+ if not tag:
106
+ return language
107
+
108
+ english_name = locale.get_display_name("en") or ""
109
+ if not english_name:
110
+ return f"[{tag}]"
111
+
112
+ return f"{english_name.capitalize()} [{tag}]"
113
+
114
+
115
+ def _instructions(
116
+ single_line: bool,
117
+ subject_max: int | None,
118
+ language: str,
119
+ /,
120
+ ) -> str:
121
+ """Create the system/developer instructions string for the Responses API."""
122
+ return _build_system_prompt(single_line, subject_max, language)
123
+
124
+
125
+ class CommitMessageResult:
126
+ """Hold the generated commit message and debugging information.
127
+
128
+ Notes
129
+ -----
130
+ Treat all fields as read-only by convention.
131
+ """
132
+
133
+ __slots__ = (
134
+ "message",
135
+ "model",
136
+ "prompt",
137
+ "response_text",
138
+ "response_id",
139
+ "prompt_tokens",
140
+ "completion_tokens",
141
+ "total_tokens",
142
+ )
143
+
144
+ def __init__(
145
+ self,
146
+ /,
147
+ *,
148
+ message: str,
149
+ model: str,
150
+ prompt: str,
151
+ response_text: str,
152
+ response_id: str | None,
153
+ prompt_tokens: int | None,
154
+ completion_tokens: int | None,
155
+ total_tokens: int | None,
156
+ ) -> None:
157
+ self.message = message
158
+ self.model = model
159
+ self.prompt = prompt
160
+ self.response_text = response_text
161
+ self.response_id = response_id
162
+ self.prompt_tokens = prompt_tokens
163
+ self.completion_tokens = completion_tokens
164
+ self.total_tokens = total_tokens
165
+
166
+
167
+ def _resolve_model(
168
+ model: str | None,
169
+ /,
170
+ ) -> str:
171
+ """Resolve the model name."""
172
+
173
+ return (
174
+ model
175
+ or environ.get("GIT_COMMIT_MESSAGE_MODEL")
176
+ or environ.get("OPENAI_MODEL")
177
+ or _DEFAULT_MODEL
178
+ )
179
+
180
+
181
+ def _resolve_language(
182
+ language: str | None,
183
+ /,
184
+ ) -> str:
185
+ """Resolve the target language/locale tag used for output style."""
186
+
187
+ return language or environ.get("GIT_COMMIT_MESSAGE_LANGUAGE") or _DEFAULT_LANGUAGE
188
+
189
+
190
+ def _build_responses_input(
191
+ diff: str,
192
+ hint: str | None,
193
+ /,
194
+ ) -> ResponseInputParam:
195
+ """Compose Responses API input items, separating auxiliary context and diff.
196
+
197
+ Returns
198
+ -------
199
+ ResponseInputParam
200
+ The list of input items to send to the Responses API.
201
+ """
202
+
203
+ hint_content: str | None = (
204
+ f"# Auxiliary context (user-provided)\n{hint}" if hint else None
205
+ )
206
+ diff_content: str = f"# Changes (diff)\n{diff}"
207
+
208
+ input_items: ResponseInputParam = []
209
+ if hint_content:
210
+ input_items.append(
211
+ {
212
+ "role": "user",
213
+ "content": [
214
+ {"type": "input_text", "text": hint_content},
215
+ ],
216
+ }
217
+ )
218
+ input_items.append(
219
+ {
220
+ "role": "user",
221
+ "content": [
222
+ {"type": "input_text", "text": diff_content},
223
+ ],
224
+ }
225
+ )
226
+
227
+ return input_items
228
+
229
+
230
+ def _split_diff_into_hunks(
231
+ diff: str,
232
+ /,
233
+ ) -> list[str]:
234
+ lines = diff.splitlines(keepends=True)
235
+ hunks: list[str] = []
236
+ file_header: list[str] = []
237
+ current_hunk: list[str] | None = None
238
+
239
+ for line in lines:
240
+ if line.startswith("diff --git "):
241
+ if current_hunk:
242
+ hunks.append("".join(current_hunk))
243
+ current_hunk = None
244
+ file_header = [line]
245
+ continue
246
+
247
+ if line.startswith("@@"):
248
+ if current_hunk:
249
+ hunks.append("".join(current_hunk))
250
+ base_header = file_header[:] if file_header else []
251
+ current_hunk = base_header + [line]
252
+ continue
253
+
254
+ if current_hunk is not None:
255
+ current_hunk.append(line)
256
+ continue
257
+
258
+ if file_header:
259
+ file_header.append(line)
260
+ continue
261
+
262
+ # Lines outside a diff header/hunk; keep as standalone hunk
263
+ current_hunk = [line]
264
+
265
+ if current_hunk:
266
+ hunks.append("".join(current_hunk))
267
+
268
+ return hunks
269
+
270
+
271
+ def _build_diff_chunks(
272
+ hunks: list[str],
273
+ chunk_tokens: int,
274
+ model: str,
275
+ /,
276
+ ) -> list[str]:
277
+ if chunk_tokens <= 0:
278
+ raise ValueError("chunk_tokens must be positive when chunking is enabled")
279
+
280
+ chunks: list[str] = []
281
+ current: list[str] = []
282
+
283
+ for hunk in hunks:
284
+ candidate = "".join(current + [hunk])
285
+ token_count = _count_tokens(candidate, model=model)
286
+
287
+ if token_count <= chunk_tokens:
288
+ current.append(hunk)
289
+ continue
290
+
291
+ if current:
292
+ chunks.append("".join(current))
293
+ current = [hunk]
294
+ else:
295
+ single_tokens = _count_tokens(hunk, model=model)
296
+ if single_tokens > chunk_tokens:
297
+ raise ValueError(
298
+ "chunk_tokens is too small to fit a single diff hunk; increase the value or disable chunking"
299
+ )
300
+ current = [hunk]
301
+
302
+ if current:
303
+ chunks.append("".join(current))
304
+
305
+ return chunks
306
+
307
+
308
+ def _build_chunk_summary_prompt() -> str:
309
+ return (
310
+ "You are an expert developer summarising Git diffs. "
311
+ "Write detailed English bullet points describing what changed and why. "
312
+ "Do not copy large code blocks verbatim; focus on behavior and intent. "
313
+ "Be verbose when useful; this summary will later be used to craft a commit message."
314
+ )
315
+
316
+
317
+ def _summarise_diff_chunks(
318
+ chunks: list[str],
319
+ model: str,
320
+ client: OpenAI,
321
+ /,
322
+ ) -> list[tuple[str, Response]]:
323
+ if not chunks:
324
+ return []
325
+
326
+ instructions = _build_chunk_summary_prompt()
327
+ summaries: list[tuple[str, Response]] = []
328
+
329
+ for chunk in chunks:
330
+ resp = client.responses.create(
331
+ model=model,
332
+ instructions=instructions,
333
+ input=[
334
+ {
335
+ "role": "user",
336
+ "content": [
337
+ {
338
+ "type": "input_text",
339
+ "text": f"# Diff chunk\n{chunk}",
340
+ }
341
+ ],
342
+ }
343
+ ],
344
+ )
345
+
346
+ text: str = (resp.output_text or "").strip()
347
+ if not text:
348
+ raise RuntimeError("An empty chunk summary was generated.")
349
+
350
+ summaries.append((text, resp))
351
+
352
+ return summaries
353
+
354
+
355
+ def _generate_commit_from_summaries(
356
+ summaries: list[str],
357
+ hint: str | None,
358
+ model: str,
359
+ single_line: bool,
360
+ subject_max: int | None,
361
+ language: str,
362
+ client: OpenAI,
363
+ /,
364
+ ) -> tuple[str, Response]:
365
+ instructions = _instructions(single_line, subject_max, language)
366
+ sections: list[str] = []
367
+
368
+ if hint:
369
+ sections.append(f"# Auxiliary context (user-provided)\n{hint}")
370
+
371
+ if summaries:
372
+ numbered = [
373
+ f"Summary {idx + 1}:\n{summary}" for idx, summary in enumerate(summaries)
374
+ ]
375
+ sections.append(
376
+ "# Combined summaries of the commit (in English)\n" + "\n\n".join(numbered)
377
+ )
378
+ else:
379
+ sections.append("# No summaries available")
380
+
381
+ user_content = "\n\n".join(sections)
382
+
383
+ resp = client.responses.create(
384
+ model=model,
385
+ instructions=instructions,
386
+ input=[
387
+ {
388
+ "role": "user",
389
+ "content": [
390
+ {
391
+ "type": "input_text",
392
+ "text": user_content,
393
+ }
394
+ ],
395
+ }
396
+ ],
397
+ )
398
+
399
+ text: str = (resp.output_text or "").strip()
400
+ if not text:
401
+ raise RuntimeError("An empty commit message was generated from summaries.")
402
+
403
+ return text, resp
404
+
405
+
406
+ def _build_combined_prompt(
407
+ diff: str,
408
+ hint: str | None,
409
+ content_label: str = "Changes (diff)",
410
+ /,
411
+ ) -> str:
412
+ """Compose a combined string of hint and content for debug/info output."""
413
+
414
+ hint_content: str | None = (
415
+ f"# Auxiliary context (user-provided)\n{hint}" if hint else None
416
+ )
417
+ content: str = f"# {content_label}\n{diff}"
418
+ return "\n\n".join([part for part in (hint_content, content) if part is not None])
419
+
420
+
421
+ def generate_commit_message(
422
+ diff: str,
423
+ hint: str | None,
424
+ model: str | None,
425
+ single_line: bool = False,
426
+ subject_max: int | None = None,
427
+ language: str | None = None,
428
+ chunk_tokens: int | None = 0,
429
+ /,
430
+ ) -> str:
431
+ """Generate a commit message using an OpenAI GPT model."""
432
+
433
+ chosen_model: str = _resolve_model(model)
434
+ chosen_language: str = _resolve_language(language)
435
+ api_key = environ.get("OPENAI_API_KEY")
436
+ if not api_key:
437
+ raise RuntimeError("The OPENAI_API_KEY environment variable is required.")
438
+
439
+ client = OpenAI(api_key=api_key)
440
+
441
+ normalized_chunk_tokens = 0 if chunk_tokens is None else chunk_tokens
442
+
443
+ if normalized_chunk_tokens != -1:
444
+ hunks = _split_diff_into_hunks(diff)
445
+ if normalized_chunk_tokens == 0:
446
+ chunks = ["".join(hunks) if hunks else diff]
447
+ elif normalized_chunk_tokens > 0:
448
+ chunks = _build_diff_chunks(
449
+ hunks,
450
+ normalized_chunk_tokens,
451
+ chosen_model,
452
+ )
453
+ else:
454
+ chunks = ["".join(hunks) if hunks else diff]
455
+
456
+ summary_pairs = _summarise_diff_chunks(
457
+ chunks,
458
+ chosen_model,
459
+ client,
460
+ )
461
+ summary_texts = [text for text, _ in summary_pairs]
462
+ text, _ = _generate_commit_from_summaries(
463
+ summary_texts,
464
+ hint,
465
+ chosen_model,
466
+ single_line,
467
+ subject_max,
468
+ chosen_language,
469
+ client,
470
+ )
471
+ else:
472
+ input_items = _build_responses_input(diff, hint)
473
+
474
+ resp = client.responses.create(
475
+ model=chosen_model,
476
+ instructions=_instructions(single_line, subject_max, chosen_language),
477
+ input=input_items,
478
+ )
479
+
480
+ text = (resp.output_text or "").strip()
481
+
482
+ if not text:
483
+ raise RuntimeError("An empty commit message was generated.")
484
+ return text
485
+
486
+
487
+ def generate_commit_message_with_info(
488
+ diff: str,
489
+ hint: str | None,
490
+ model: str | None,
491
+ single_line: bool = False,
492
+ subject_max: int | None = None,
493
+ language: str | None = None,
494
+ chunk_tokens: int | None = 0,
495
+ /,
496
+ ) -> CommitMessageResult:
497
+ """Return the OpenAI GPT call result together with debugging information.
498
+
499
+ Returns
500
+ -------
501
+ CommitMessageResult
502
+ The generated message, token usage, and prompt/response text.
503
+ """
504
+
505
+ chosen_model: str = _resolve_model(model)
506
+ chosen_language: str = _resolve_language(language)
507
+ api_key = environ.get("OPENAI_API_KEY")
508
+ if not api_key:
509
+ raise RuntimeError("The OPENAI_API_KEY environment variable is required.")
510
+
511
+ client = OpenAI(api_key=api_key)
512
+
513
+ normalized_chunk_tokens = 0 if chunk_tokens is None else chunk_tokens
514
+
515
+ if normalized_chunk_tokens != -1:
516
+ hunks = _split_diff_into_hunks(diff)
517
+ if normalized_chunk_tokens == 0:
518
+ chunks = ["".join(hunks) if hunks else diff]
519
+ elif normalized_chunk_tokens > 0:
520
+ chunks = _build_diff_chunks(
521
+ hunks,
522
+ normalized_chunk_tokens,
523
+ chosen_model,
524
+ )
525
+ else:
526
+ chunks = ["".join(hunks) if hunks else diff]
527
+
528
+ summary_pairs = _summarise_diff_chunks(
529
+ chunks,
530
+ chosen_model,
531
+ client,
532
+ )
533
+ summary_texts = [text for text, _ in summary_pairs]
534
+ response_text, final_resp = _generate_commit_from_summaries(
535
+ summary_texts,
536
+ hint,
537
+ chosen_model,
538
+ single_line,
539
+ subject_max,
540
+ chosen_language,
541
+ client,
542
+ )
543
+
544
+ total_tokens: int | None = None
545
+ prompt_tokens: int | None = None
546
+ completion_tokens: int | None = None
547
+
548
+ if final_resp.usage:
549
+ total_tokens = (total_tokens or 0) + (final_resp.usage.total_tokens or 0)
550
+ prompt_tokens = (prompt_tokens or 0) + (final_resp.usage.input_tokens or 0)
551
+ completion_tokens = (completion_tokens or 0) + (
552
+ final_resp.usage.output_tokens or 0
553
+ )
554
+
555
+ for _, resp in summary_pairs:
556
+ usage = resp.usage
557
+ if usage is None:
558
+ continue
559
+ total_tokens = (total_tokens or 0) + (usage.total_tokens or 0)
560
+ prompt_tokens = (prompt_tokens or 0) + (usage.input_tokens or 0)
561
+ completion_tokens = (completion_tokens or 0) + (usage.output_tokens or 0)
562
+
563
+ combined_prompt = _build_combined_prompt(
564
+ "\n".join(summary_texts),
565
+ hint,
566
+ "Combined summaries (English)",
567
+ )
568
+
569
+ response_id: str | None = final_resp.id
570
+
571
+ else:
572
+ combined_prompt = _build_combined_prompt(diff, hint)
573
+ input_items = _build_responses_input(diff, hint)
574
+
575
+ resp = client.responses.create(
576
+ model=chosen_model,
577
+ instructions=_instructions(single_line, subject_max, chosen_language),
578
+ input=input_items,
579
+ )
580
+
581
+ response_text = (resp.output_text or "").strip()
582
+ response_id = resp.id
583
+ usage = resp.usage
584
+ prompt_tokens: int | None = None
585
+ completion_tokens: int | None = None
586
+ total_tokens: int | None = None
587
+ if usage is not None:
588
+ total_tokens = usage.total_tokens
589
+ prompt_tokens = usage.input_tokens
590
+ completion_tokens = usage.output_tokens
591
+
592
+ if not response_text:
593
+ raise RuntimeError("An empty commit message was generated.")
594
+
595
+ return CommitMessageResult(
596
+ message=response_text,
597
+ model=chosen_model,
598
+ prompt=combined_prompt,
599
+ response_text=response_text,
600
+ response_id=response_id,
601
+ prompt_tokens=prompt_tokens,
602
+ completion_tokens=completion_tokens,
603
+ total_tokens=total_tokens,
604
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: git-commit-message
3
- Version: 0.5.1
3
+ Version: 0.6.0
4
4
  Summary: Generate Git commit messages from staged changes using OpenAI GPT
5
5
  Maintainer-email: Mina Her <minacle@live.com>
6
6
  License: This is free and unencumbered software released into the public domain.
@@ -45,6 +45,7 @@ Requires-Python: >=3.13
45
45
  Description-Content-Type: text/markdown
46
46
  Requires-Dist: babel>=2.17.0
47
47
  Requires-Dist: openai>=2.6.1
48
+ Requires-Dist: tiktoken>=0.12.0
48
49
 
49
50
  # git-commit-message
50
51
 
@@ -115,6 +116,19 @@ git-commit-message --one-line "optional context"
115
116
  git-commit-message --one-line --max-length 50 "optional context"
116
117
  ```
117
118
 
119
+ - Chunk long diffs by token budget (0 = single chunk + summary, -1 = disable chunking):
120
+
121
+ ```sh
122
+ # force a single summary pass over the whole diff (default)
123
+ git-commit-message --chunk-tokens 0 "optional context"
124
+
125
+ # chunk the diff into ~4000-token pieces before summarising
126
+ git-commit-message --chunk-tokens 4000 "optional context"
127
+
128
+ # disable summarisation and use the legacy one-shot prompt
129
+ git-commit-message --chunk-tokens -1 "optional context"
130
+ ```
131
+
118
132
  - Commit immediately with editor:
119
133
 
120
134
  ```sh
@@ -144,6 +158,7 @@ Environment:
144
158
  - `OPENAI_API_KEY`: required
145
159
  - `GIT_COMMIT_MESSAGE_MODEL` or `OPENAI_MODEL`: optional (default: `gpt-5-mini`)
146
160
  - `GIT_COMMIT_MESSAGE_LANGUAGE`: optional (default: `en-GB`)
161
+ - `GIT_COMMIT_MESSAGE_CHUNK_TOKENS`: optional token budget per diff chunk (default: 0 = single chunk + summary; -1 disables summarisation)
147
162
 
148
163
  ## AI‑generated code notice
149
164
 
@@ -1,2 +1,3 @@
1
1
  babel>=2.17.0
2
2
  openai>=2.6.1
3
+ tiktoken>=0.12.0
@@ -1,312 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from openai.types.responses import ResponseInputParam
4
-
5
- """Generate Git commit messages by calling an OpenAI GPT model.
6
-
7
- Migrated to use OpenAI Responses API (client.responses.create).
8
- """
9
-
10
- import os
11
- from typing import Final
12
- from babel import Locale
13
- from openai import OpenAI
14
-
15
-
16
- _DEFAULT_MODEL: Final[str] = "gpt-5-mini"
17
- _DEFAULT_LANGUAGE: Final[str] = "en-GB"
18
-
19
- def _build_system_prompt(
20
- *,
21
- single_line: bool,
22
- subject_max: int | None,
23
- language: str,
24
- ) -> str:
25
- display_language: str = _language_display(language)
26
- max_len = subject_max or 72
27
- if single_line:
28
- return (
29
- f"You are an expert Git commit message generator. "
30
- f"Always use '{display_language}' spelling and style. "
31
- f"Return a single-line imperative subject only (<= {max_len} chars). "
32
- f"Do not include a body, bullet points, or any rationale. Do not include any line breaks. "
33
- f"Consider the user-provided auxiliary context if present. "
34
- f"Return only the commit message text (no code fences or prefixes like 'Commit message:')."
35
- )
36
- return (
37
- f"You are an expert Git commit message generator. "
38
- f"Always use '{display_language}' spelling and style. "
39
- f"The subject line is mandatory: you MUST start the output with the subject as the very first non-empty line, "
40
- f"in imperative mood, and keep it <= {max_len} chars. Insert exactly one blank line after the subject. "
41
- f"Never start with bullets, headings, labels, or any other text. Then include a body in this format.\n\n"
42
- f"Example format (do not include the --- lines in the output):\n\n"
43
- f"---\n\n"
44
- f"<Subject line>\n\n"
45
- f"- <detail 1>\n"
46
- f"- <detail 2>\n"
47
- f"- <detail N>\n\n"
48
- f"<Rationale label translated into the target language>: <1-2 concise sentences explaining the intent and why>\n\n"
49
- f"---\n\n"
50
- f"Guidelines:\n"
51
- f"- The first non-empty line MUST be the subject line; include exactly one blank line after it.\n"
52
- f"- Never place bullets, headings, or labels before the subject line.\n"
53
- f"- Use '-' bullets; keep each bullet short (<= 1 line).\n"
54
- f"- Prefer imperative mood verbs (Add, Fix, Update, Remove, Refactor, Document, etc.).\n"
55
- f"- Focus on what changed and why; avoid copying diff hunks verbatim.\n"
56
- f"- The only allowed label is the equivalent of 'Rationale:' translated into the target language; do not add other headings or prefaces.\n"
57
- f"- All text (subject, bullets, rationale label, rationale content) MUST be in the target language: '{display_language}'. Do not mix other languages.\n"
58
- f"- Do not include the '---' delimiter lines, code fences, or any surrounding labels like 'Commit message:'.\n"
59
- f"- Do not copy or reuse any example text verbatim; produce original content based on the provided diff and context.\n"
60
- f"- If few details are necessary, include at least one bullet summarising the key change.\n"
61
- f"- If you cannot provide any body content, still output the subject line; the subject line must never be omitted.\n"
62
- f"- Consider the user-provided auxiliary context if present.\n"
63
- f"Return only the commit message text in the above format (no code fences or extra labels)."
64
- )
65
-
66
-
67
- def _language_display(language: str) -> str:
68
- """Return a human-friendly language display like 'ko-KR, Korean (South Korea)'."""
69
-
70
- try:
71
- locale = Locale.parse(language, sep="-")
72
- except Exception:
73
- return language
74
-
75
- tag_parts = [
76
- locale.language,
77
- locale.script,
78
- locale.territory,
79
- locale.variant,
80
- ]
81
- tag = "-".join(part for part in tag_parts if part)
82
- if not tag:
83
- return language
84
-
85
- english_name = locale.get_display_name("en") or ""
86
- if not english_name:
87
- return f"[{tag}]"
88
-
89
- return f"{english_name.capitalize()} [{tag}]"
90
-
91
-
92
- def _instructions(
93
- *,
94
- single_line: bool,
95
- subject_max: int | None,
96
- language: str,
97
- ) -> str:
98
- """Create the system/developer instructions string for the Responses API."""
99
- return _build_system_prompt(single_line=single_line, subject_max=subject_max, language=language)
100
-
101
-
102
- class CommitMessageResult:
103
- """Hold the generated commit message and debugging information.
104
-
105
- Notes
106
- -----
107
- Treat all fields as read-only by convention.
108
- """
109
-
110
- __slots__ = (
111
- "message",
112
- "model",
113
- "prompt",
114
- "response_text",
115
- "response_id",
116
- "prompt_tokens",
117
- "completion_tokens",
118
- "total_tokens",
119
- )
120
-
121
- def __init__(
122
- self,
123
- /,
124
- *,
125
- message: str,
126
- model: str,
127
- prompt: str,
128
- response_text: str,
129
- response_id: str | None,
130
- prompt_tokens: int | None,
131
- completion_tokens: int | None,
132
- total_tokens: int | None,
133
- ) -> None:
134
- self.message = message
135
- self.model = model
136
- self.prompt = prompt
137
- self.response_text = response_text
138
- self.response_id = response_id
139
- self.prompt_tokens = prompt_tokens
140
- self.completion_tokens = completion_tokens
141
- self.total_tokens = total_tokens
142
-
143
-
144
- def _resolve_model(
145
- *,
146
- model: str | None,
147
- ) -> str:
148
- """Resolve the model name."""
149
-
150
- return (
151
- model
152
- or os.environ.get("GIT_COMMIT_MESSAGE_MODEL")
153
- or os.environ.get("OPENAI_MODEL")
154
- or _DEFAULT_MODEL
155
- )
156
-
157
-
158
- def _resolve_language(
159
- *,
160
- language: str | None,
161
- ) -> str:
162
- """Resolve the target language/locale tag used for output style."""
163
-
164
- return (
165
- language
166
- or os.environ.get("GIT_COMMIT_MESSAGE_LANGUAGE")
167
- or _DEFAULT_LANGUAGE
168
- )
169
-
170
-
171
- def _build_responses_input(
172
- *,
173
- diff: str,
174
- hint: str | None,
175
- ) -> ResponseInputParam:
176
- """Compose Responses API input items, separating auxiliary context and diff.
177
-
178
- Returns
179
- -------
180
- ResponseInputParam
181
- The list of input items to send to the Responses API.
182
- """
183
-
184
- hint_content: str | None = (
185
- f"# Auxiliary context (user-provided)\n{hint}" if hint else None
186
- )
187
- diff_content: str = f"# Changes (diff)\n{diff}"
188
-
189
- input_items: ResponseInputParam = []
190
- if hint_content:
191
- input_items.append(
192
- {
193
- "role": "user",
194
- "content": [
195
- {"type": "input_text", "text": hint_content},
196
- ],
197
- }
198
- )
199
- input_items.append(
200
- {
201
- "role": "user",
202
- "content": [
203
- {"type": "input_text", "text": diff_content},
204
- ],
205
- }
206
- )
207
-
208
- return input_items
209
-
210
-
211
- def _build_combined_prompt(*, diff: str, hint: str | None) -> str:
212
- """Compose a combined string of hint and diff for debug/info output."""
213
- hint_content: str | None = (
214
- f"# Auxiliary context (user-provided)\n{hint}" if hint else None
215
- )
216
- diff_content: str = f"# Changes (diff)\n{diff}"
217
- return "\n\n".join([part for part in (hint_content, diff_content) if part is not None])
218
-
219
-
220
- def generate_commit_message(
221
- *,
222
- diff: str,
223
- hint: str | None,
224
- model: str | None,
225
- single_line: bool = False,
226
- subject_max: int | None = None,
227
- language: str | None = None,
228
- ) -> str:
229
- """Generate a commit message using an OpenAI GPT model."""
230
-
231
- chosen_model: str = _resolve_model(model=model)
232
- chosen_language: str = _resolve_language(language=language)
233
- api_key = os.environ.get("OPENAI_API_KEY")
234
- if not api_key:
235
- raise RuntimeError("The OPENAI_API_KEY environment variable is required.")
236
-
237
- client = OpenAI(api_key=api_key)
238
-
239
- input_items = _build_responses_input(diff=diff, hint=hint)
240
-
241
- # Use Responses API to generate a single response (send hint and diff as separate user inputs)
242
- resp = client.responses.create(
243
- model=chosen_model,
244
- instructions=_instructions(single_line=single_line, subject_max=subject_max, language=chosen_language),
245
- input=input_items,
246
- )
247
-
248
- # Prefer SDK convenience aggregate text if available
249
- text: str = (resp.output_text or "").strip()
250
- if not text:
251
- raise RuntimeError("An empty commit message was generated.")
252
- return text
253
-
254
-
255
- def generate_commit_message_with_info(
256
- *,
257
- diff: str,
258
- hint: str | None,
259
- model: str | None,
260
- single_line: bool = False,
261
- subject_max: int | None = None,
262
- language: str | None = None,
263
- ) -> CommitMessageResult:
264
- """Return the OpenAI GPT call result together with debugging information.
265
-
266
- Returns
267
- -------
268
- CommitMessageResult
269
- The generated message, token usage, and prompt/response text.
270
- """
271
-
272
- chosen_model: str = _resolve_model(model=model)
273
- chosen_language: str = _resolve_language(language=language)
274
- api_key = os.environ.get("OPENAI_API_KEY")
275
- if not api_key:
276
- raise RuntimeError("The OPENAI_API_KEY environment variable is required.")
277
-
278
- client = OpenAI(api_key=api_key)
279
- combined_prompt = _build_combined_prompt(diff=diff, hint=hint)
280
- input_items = _build_responses_input(diff=diff, hint=hint)
281
-
282
- resp = client.responses.create(
283
- model=chosen_model,
284
- instructions=_instructions(single_line=single_line, subject_max=subject_max, language=chosen_language),
285
- input=input_items,
286
- )
287
-
288
- response_text: str = (resp.output_text or "").strip()
289
- if not response_text:
290
- raise RuntimeError("An empty commit message was generated.")
291
-
292
- response_id: str | None = resp.id
293
- usage = resp.usage
294
- prompt_tokens: int | None = None
295
- completion_tokens: int | None = None
296
- total_tokens: int | None = None
297
- if usage is not None:
298
- # Responses API exposes input/output/total token fields.
299
- total_tokens = usage.total_tokens
300
- prompt_tokens = usage.input_tokens
301
- completion_tokens = usage.output_tokens
302
-
303
- return CommitMessageResult(
304
- message=response_text,
305
- model=chosen_model,
306
- prompt=combined_prompt,
307
- response_text=response_text,
308
- response_id=response_id,
309
- prompt_tokens=prompt_tokens,
310
- completion_tokens=completion_tokens,
311
- total_tokens=total_tokens,
312
- )