git-commit-message 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,4 @@ This module exposes only public symbols in accordance with the codestyle guide.
5
5
 
6
6
  from ._cli import main
7
7
 
8
- __all__ = (
9
- "main",
10
- )
8
+ __all__ = ("main",)
@@ -1,17 +1,24 @@
1
- from __future__ import annotations
2
-
3
1
  """Command-line interface entry point.
4
2
 
5
3
  Collect staged changes from the repository and call an OpenAI GPT model
6
4
  to generate a commit message, or create a commit straight away.
7
5
  """
8
6
 
7
+ from __future__ import annotations
8
+
9
9
  from argparse import ArgumentParser, Namespace
10
+ from os import environ
10
11
  from pathlib import Path
11
- import sys
12
+ from sys import exit as sys_exit
13
+ from sys import stderr
12
14
  from typing import Final
13
15
 
14
- from ._git import commit_with_message, get_repo_root, get_staged_diff, has_staged_changes
16
+ from ._git import (
17
+ commit_with_message,
18
+ get_repo_root,
19
+ get_staged_diff,
20
+ has_staged_changes,
21
+ )
15
22
  from ._gpt import (
16
23
  generate_commit_message,
17
24
  generate_commit_message_with_info,
@@ -19,6 +26,18 @@ from ._gpt import (
19
26
  )
20
27
 
21
28
 
29
+ def _env_chunk_tokens_default() -> int | None:
30
+ """Return chunk token default from env if valid, else None."""
31
+
32
+ raw: str | None = environ.get("GIT_COMMIT_MESSAGE_CHUNK_TOKENS")
33
+ if raw is None:
34
+ return None
35
+ try:
36
+ return int(raw)
37
+ except ValueError:
38
+ return None
39
+
40
+
22
41
  def _build_parser() -> ArgumentParser:
23
42
  """Create the CLI argument parser.
24
43
 
@@ -92,12 +111,24 @@ def _build_parser() -> ArgumentParser:
92
111
  help="Maximum subject (first line) length (default: 72).",
93
112
  )
94
113
 
114
+ parser.add_argument(
115
+ "--chunk-tokens",
116
+ dest="chunk_tokens",
117
+ type=int,
118
+ default=None,
119
+ help=(
120
+ "Target token budget per diff chunk. "
121
+ "0 forces a single chunk with summarisation; -1 disables summarisation (legacy one-shot). "
122
+ "If omitted, uses GIT_COMMIT_MESSAGE_CHUNK_TOKENS when set (default: 0)."
123
+ ),
124
+ )
125
+
95
126
  return parser
96
127
 
97
128
 
98
129
  def _run(
99
- *,
100
130
  args: Namespace,
131
+ /,
101
132
  ) -> int:
102
133
  """Main execution logic.
103
134
 
@@ -114,37 +145,45 @@ def _run(
114
145
 
115
146
  repo_root: Path = get_repo_root()
116
147
 
117
- if not has_staged_changes(cwd=repo_root):
118
- print("No staged changes. Run 'git add' and try again.", file=sys.stderr)
148
+ if not has_staged_changes(repo_root):
149
+ print("No staged changes. Run 'git add' and try again.", file=stderr)
119
150
  return 2
120
151
 
121
- diff_text: str = get_staged_diff(cwd=repo_root)
152
+ diff_text: str = get_staged_diff(repo_root)
122
153
 
123
154
  hint: str | None = args.description if isinstance(args.description, str) else None
124
155
 
156
+ chunk_tokens: int | None = args.chunk_tokens
157
+ if chunk_tokens is None:
158
+ chunk_tokens = _env_chunk_tokens_default()
159
+ if chunk_tokens is None:
160
+ chunk_tokens = 0
161
+
125
162
  result: CommitMessageResult | None = None
126
163
  try:
127
164
  if args.debug:
128
165
  result = generate_commit_message_with_info(
129
- diff=diff_text,
130
- hint=hint,
131
- model=args.model,
132
- single_line=getattr(args, "one_line", False),
133
- subject_max=getattr(args, "max_length", None),
134
- language=getattr(args, "language", None),
166
+ diff_text,
167
+ hint,
168
+ args.model,
169
+ getattr(args, "one_line", False),
170
+ getattr(args, "max_length", None),
171
+ getattr(args, "language", None),
172
+ chunk_tokens,
135
173
  )
136
174
  message = result.message
137
175
  else:
138
176
  message = generate_commit_message(
139
- diff=diff_text,
140
- hint=hint,
141
- model=args.model,
142
- single_line=getattr(args, "one_line", False),
143
- subject_max=getattr(args, "max_length", None),
144
- language=getattr(args, "language", None),
177
+ diff_text,
178
+ hint,
179
+ args.model,
180
+ getattr(args, "one_line", False),
181
+ getattr(args, "max_length", None),
182
+ getattr(args, "language", None),
183
+ chunk_tokens,
145
184
  )
146
185
  except Exception as exc: # noqa: BLE001 - to preserve standard output messaging
147
- print(f"Failed to generate commit message: {exc}", file=sys.stderr)
186
+ print(f"Failed to generate commit message: {exc}", file=stderr)
148
187
  return 3
149
188
 
150
189
  # Option: force single-line message
@@ -198,9 +237,9 @@ def _run(
198
237
  print(message)
199
238
 
200
239
  if args.edit:
201
- rc: int = commit_with_message(message=message, edit=True, cwd=repo_root)
240
+ rc: int = commit_with_message(message, True, repo_root)
202
241
  else:
203
- rc = commit_with_message(message=message, edit=False, cwd=repo_root)
242
+ rc = commit_with_message(message, False, repo_root)
204
243
 
205
244
  return rc
206
245
 
@@ -215,8 +254,8 @@ def main() -> None:
215
254
  args: Namespace = parser.parse_args()
216
255
 
217
256
  if args.edit and not args.commit:
218
- print("'--edit' must be used together with '--commit'.", file=sys.stderr)
219
- sys.exit(2)
257
+ print("'--edit' must be used together with '--commit'.", file=stderr)
258
+ sys_exit(2)
220
259
 
221
- code: int = _run(args=args)
222
- sys.exit(code)
260
+ code: int = _run(args)
261
+ sys_exit(code)
@@ -1,18 +1,18 @@
1
- from __future__ import annotations
2
-
3
1
  """Git-related helper functions.
4
2
 
5
3
  Provides repository root discovery, extraction of staged changes, and
6
4
  creating commits from a message.
7
5
  """
8
6
 
7
+ from __future__ import annotations
8
+
9
9
  from pathlib import Path
10
- import subprocess
10
+ from subprocess import CalledProcessError, check_call, check_output, run
11
11
 
12
12
 
13
13
  def get_repo_root(
14
- *,
15
14
  cwd: Path | None = None,
15
+ /,
16
16
  ) -> Path:
17
17
  """Find the repository root from the current working directory.
18
18
 
@@ -29,7 +29,7 @@ def get_repo_root(
29
29
 
30
30
  start: Path = cwd or Path.cwd()
31
31
  try:
32
- out: bytes = subprocess.check_output(
32
+ out: bytes = check_output(
33
33
  [
34
34
  "git",
35
35
  "rev-parse",
@@ -37,7 +37,7 @@ def get_repo_root(
37
37
  ],
38
38
  cwd=str(start),
39
39
  )
40
- except subprocess.CalledProcessError as exc: # noqa: TRY003
40
+ except CalledProcessError as exc: # noqa: TRY003
41
41
  raise RuntimeError("Not a Git repository.") from exc
42
42
 
43
43
  root = Path(out.decode().strip())
@@ -45,28 +45,28 @@ def get_repo_root(
45
45
 
46
46
 
47
47
  def has_staged_changes(
48
- *,
49
48
  cwd: Path,
49
+ /,
50
50
  ) -> bool:
51
51
  """Check whether there are staged changes."""
52
52
 
53
53
  try:
54
- subprocess.check_call(
54
+ check_call(
55
55
  ["git", "diff", "--cached", "--quiet", "--exit-code"],
56
56
  cwd=str(cwd),
57
57
  )
58
58
  return False
59
- except subprocess.CalledProcessError:
59
+ except CalledProcessError:
60
60
  return True
61
61
 
62
62
 
63
63
  def get_staged_diff(
64
- *,
65
64
  cwd: Path,
65
+ /,
66
66
  ) -> str:
67
67
  """Return the staged changes as diff text."""
68
68
 
69
- out: bytes = subprocess.check_output(
69
+ out: bytes = check_output(
70
70
  [
71
71
  "git",
72
72
  "diff",
@@ -81,10 +81,10 @@ def get_staged_diff(
81
81
 
82
82
 
83
83
  def commit_with_message(
84
- *,
85
84
  message: str,
86
85
  edit: bool,
87
86
  cwd: Path,
87
+ /,
88
88
  ) -> int:
89
89
  """Create a commit with the given message.
90
90
 
@@ -108,7 +108,7 @@ def commit_with_message(
108
108
  cmd.append("--edit")
109
109
 
110
110
  try:
111
- completed = subprocess.run(cmd, cwd=str(cwd), check=False)
111
+ completed = run(cmd, cwd=str(cwd), check=False)
112
112
  return int(completed.returncode)
113
113
  except OSError as exc: # e.g., editor launch failure, etc.
114
114
  raise RuntimeError(f"Failed to run 'git commit': {exc}") from exc
@@ -1,31 +1,53 @@
1
- from __future__ import annotations
2
-
3
- from openai.types.responses import ResponseInputParam
4
-
5
1
  """Generate Git commit messages by calling an OpenAI GPT model.
6
2
 
7
3
  Migrated to use OpenAI Responses API (client.responses.create).
8
4
  """
9
5
 
10
- import os
11
- from typing import Final
6
+ from __future__ import annotations
7
+
8
+ from babel import Locale
12
9
  from openai import OpenAI
10
+ from openai.types.responses import Response, ResponseInputParam
11
+ from os import environ
12
+ from tiktoken import Encoding, encoding_for_model, get_encoding
13
+ from typing import Final
13
14
 
14
15
 
15
16
  _DEFAULT_MODEL: Final[str] = "gpt-5-mini"
16
17
  _DEFAULT_LANGUAGE: Final[str] = "en-GB"
17
18
 
18
- def _build_system_prompt(
19
+
20
+ def _encoding_for_model(
21
+ model: str,
22
+ /,
23
+ ) -> Encoding:
24
+ try:
25
+ return encoding_for_model(model)
26
+ except Exception:
27
+ return get_encoding("cl100k_base")
28
+
29
+
30
+ def _count_tokens(
31
+ text: str,
19
32
  *,
33
+ model: str,
34
+ ) -> int:
35
+ encoding = _encoding_for_model(model)
36
+ return len(encoding.encode(text))
37
+
38
+
39
+ def _build_system_prompt(
20
40
  single_line: bool,
21
41
  subject_max: int | None,
22
42
  language: str,
43
+ /,
23
44
  ) -> str:
45
+ display_language: str = _language_display(language)
24
46
  max_len = subject_max or 72
25
47
  if single_line:
26
48
  return (
27
49
  f"You are an expert Git commit message generator. "
28
- f"Always use '{language}' spelling and style. "
50
+ f"Always use '{display_language}' spelling and style. "
29
51
  f"Return a single-line imperative subject only (<= {max_len} chars). "
30
52
  f"Do not include a body, bullet points, or any rationale. Do not include any line breaks. "
31
53
  f"Consider the user-provided auxiliary context if present. "
@@ -33,7 +55,7 @@ def _build_system_prompt(
33
55
  )
34
56
  return (
35
57
  f"You are an expert Git commit message generator. "
36
- f"Always use '{language}' spelling and style. "
58
+ f"Always use '{display_language}' spelling and style. "
37
59
  f"The subject line is mandatory: you MUST start the output with the subject as the very first non-empty line, "
38
60
  f"in imperative mood, and keep it <= {max_len} chars. Insert exactly one blank line after the subject. "
39
61
  f"Never start with bullets, headings, labels, or any other text. Then include a body in this format.\n\n"
@@ -52,6 +74,7 @@ def _build_system_prompt(
52
74
  f"- Prefer imperative mood verbs (Add, Fix, Update, Remove, Refactor, Document, etc.).\n"
53
75
  f"- Focus on what changed and why; avoid copying diff hunks verbatim.\n"
54
76
  f"- The only allowed label is the equivalent of 'Rationale:' translated into the target language; do not add other headings or prefaces.\n"
77
+ f"- All text (subject, bullets, rationale label, rationale content) MUST be in the target language: '{display_language}'. Do not mix other languages.\n"
55
78
  f"- Do not include the '---' delimiter lines, code fences, or any surrounding labels like 'Commit message:'.\n"
56
79
  f"- Do not copy or reuse any example text verbatim; produce original content based on the provided diff and context.\n"
57
80
  f"- If few details are necessary, include at least one bullet summarising the key change.\n"
@@ -61,14 +84,42 @@ def _build_system_prompt(
61
84
  )
62
85
 
63
86
 
87
+ def _language_display(
88
+ language: str,
89
+ /,
90
+ ) -> str:
91
+ """Return a human-friendly language display like 'ko-KR, Korean (South Korea)'."""
92
+
93
+ try:
94
+ locale = Locale.parse(language, sep="-")
95
+ except Exception:
96
+ return language
97
+
98
+ tag_parts = [
99
+ locale.language,
100
+ locale.script,
101
+ locale.territory,
102
+ locale.variant,
103
+ ]
104
+ tag = "-".join(part for part in tag_parts if part)
105
+ if not tag:
106
+ return language
107
+
108
+ english_name = locale.get_display_name("en") or ""
109
+ if not english_name:
110
+ return f"[{tag}]"
111
+
112
+ return f"{english_name.capitalize()} [{tag}]"
113
+
114
+
64
115
  def _instructions(
65
- *,
66
116
  single_line: bool,
67
117
  subject_max: int | None,
68
118
  language: str,
119
+ /,
69
120
  ) -> str:
70
121
  """Create the system/developer instructions string for the Responses API."""
71
- return _build_system_prompt(single_line=single_line, subject_max=subject_max, language=language)
122
+ return _build_system_prompt(single_line, subject_max, language)
72
123
 
73
124
 
74
125
  class CommitMessageResult:
@@ -114,36 +165,32 @@ class CommitMessageResult:
114
165
 
115
166
 
116
167
  def _resolve_model(
117
- *,
118
168
  model: str | None,
169
+ /,
119
170
  ) -> str:
120
171
  """Resolve the model name."""
121
172
 
122
173
  return (
123
174
  model
124
- or os.environ.get("GIT_COMMIT_MESSAGE_MODEL")
125
- or os.environ.get("OPENAI_MODEL")
175
+ or environ.get("GIT_COMMIT_MESSAGE_MODEL")
176
+ or environ.get("OPENAI_MODEL")
126
177
  or _DEFAULT_MODEL
127
178
  )
128
179
 
129
180
 
130
181
  def _resolve_language(
131
- *,
132
182
  language: str | None,
183
+ /,
133
184
  ) -> str:
134
185
  """Resolve the target language/locale tag used for output style."""
135
186
 
136
- return (
137
- language
138
- or os.environ.get("GIT_COMMIT_MESSAGE_LANGUAGE")
139
- or _DEFAULT_LANGUAGE
140
- )
187
+ return language or environ.get("GIT_COMMIT_MESSAGE_LANGUAGE") or _DEFAULT_LANGUAGE
141
188
 
142
189
 
143
190
  def _build_responses_input(
144
- *,
145
191
  diff: str,
146
192
  hint: str | None,
193
+ /,
147
194
  ) -> ResponseInputParam:
148
195
  """Compose Responses API input items, separating auxiliary context and diff.
149
196
 
@@ -180,58 +227,272 @@ def _build_responses_input(
180
227
  return input_items
181
228
 
182
229
 
183
- def _build_combined_prompt(*, diff: str, hint: str | None) -> str:
184
- """Compose a combined string of hint and diff for debug/info output."""
230
+ def _split_diff_into_hunks(
231
+ diff: str,
232
+ /,
233
+ ) -> list[str]:
234
+ lines = diff.splitlines(keepends=True)
235
+ hunks: list[str] = []
236
+ file_header: list[str] = []
237
+ current_hunk: list[str] | None = None
238
+
239
+ for line in lines:
240
+ if line.startswith("diff --git "):
241
+ if current_hunk:
242
+ hunks.append("".join(current_hunk))
243
+ current_hunk = None
244
+ file_header = [line]
245
+ continue
246
+
247
+ if line.startswith("@@"):
248
+ if current_hunk:
249
+ hunks.append("".join(current_hunk))
250
+ base_header = file_header[:] if file_header else []
251
+ current_hunk = base_header + [line]
252
+ continue
253
+
254
+ if current_hunk is not None:
255
+ current_hunk.append(line)
256
+ continue
257
+
258
+ if file_header:
259
+ file_header.append(line)
260
+ continue
261
+
262
+ # Lines outside a diff header/hunk; keep as standalone hunk
263
+ current_hunk = [line]
264
+
265
+ if current_hunk:
266
+ hunks.append("".join(current_hunk))
267
+
268
+ return hunks
269
+
270
+
271
+ def _build_diff_chunks(
272
+ hunks: list[str],
273
+ chunk_tokens: int,
274
+ model: str,
275
+ /,
276
+ ) -> list[str]:
277
+ if chunk_tokens <= 0:
278
+ raise ValueError("chunk_tokens must be positive when chunking is enabled")
279
+
280
+ chunks: list[str] = []
281
+ current: list[str] = []
282
+
283
+ for hunk in hunks:
284
+ candidate = "".join(current + [hunk])
285
+ token_count = _count_tokens(candidate, model=model)
286
+
287
+ if token_count <= chunk_tokens:
288
+ current.append(hunk)
289
+ continue
290
+
291
+ if current:
292
+ chunks.append("".join(current))
293
+ current = [hunk]
294
+ else:
295
+ single_tokens = _count_tokens(hunk, model=model)
296
+ if single_tokens > chunk_tokens:
297
+ raise ValueError(
298
+ "chunk_tokens is too small to fit a single diff hunk; increase the value or disable chunking"
299
+ )
300
+ current = [hunk]
301
+
302
+ if current:
303
+ chunks.append("".join(current))
304
+
305
+ return chunks
306
+
307
+
308
+ def _build_chunk_summary_prompt() -> str:
309
+ return (
310
+ "You are an expert developer summarising Git diffs. "
311
+ "Write detailed English bullet points describing what changed and why. "
312
+ "Do not copy large code blocks verbatim; focus on behavior and intent. "
313
+ "Be verbose when useful; this summary will later be used to craft a commit message."
314
+ )
315
+
316
+
317
+ def _summarise_diff_chunks(
318
+ chunks: list[str],
319
+ model: str,
320
+ client: OpenAI,
321
+ /,
322
+ ) -> list[tuple[str, Response]]:
323
+ if not chunks:
324
+ return []
325
+
326
+ instructions = _build_chunk_summary_prompt()
327
+ summaries: list[tuple[str, Response]] = []
328
+
329
+ for chunk in chunks:
330
+ resp = client.responses.create(
331
+ model=model,
332
+ instructions=instructions,
333
+ input=[
334
+ {
335
+ "role": "user",
336
+ "content": [
337
+ {
338
+ "type": "input_text",
339
+ "text": f"# Diff chunk\n{chunk}",
340
+ }
341
+ ],
342
+ }
343
+ ],
344
+ )
345
+
346
+ text: str = (resp.output_text or "").strip()
347
+ if not text:
348
+ raise RuntimeError("An empty chunk summary was generated.")
349
+
350
+ summaries.append((text, resp))
351
+
352
+ return summaries
353
+
354
+
355
+ def _generate_commit_from_summaries(
356
+ summaries: list[str],
357
+ hint: str | None,
358
+ model: str,
359
+ single_line: bool,
360
+ subject_max: int | None,
361
+ language: str,
362
+ client: OpenAI,
363
+ /,
364
+ ) -> tuple[str, Response]:
365
+ instructions = _instructions(single_line, subject_max, language)
366
+ sections: list[str] = []
367
+
368
+ if hint:
369
+ sections.append(f"# Auxiliary context (user-provided)\n{hint}")
370
+
371
+ if summaries:
372
+ numbered = [
373
+ f"Summary {idx + 1}:\n{summary}" for idx, summary in enumerate(summaries)
374
+ ]
375
+ sections.append(
376
+ "# Combined summaries of the commit (in English)\n" + "\n\n".join(numbered)
377
+ )
378
+ else:
379
+ sections.append("# No summaries available")
380
+
381
+ user_content = "\n\n".join(sections)
382
+
383
+ resp = client.responses.create(
384
+ model=model,
385
+ instructions=instructions,
386
+ input=[
387
+ {
388
+ "role": "user",
389
+ "content": [
390
+ {
391
+ "type": "input_text",
392
+ "text": user_content,
393
+ }
394
+ ],
395
+ }
396
+ ],
397
+ )
398
+
399
+ text: str = (resp.output_text or "").strip()
400
+ if not text:
401
+ raise RuntimeError("An empty commit message was generated from summaries.")
402
+
403
+ return text, resp
404
+
405
+
406
+ def _build_combined_prompt(
407
+ diff: str,
408
+ hint: str | None,
409
+ content_label: str = "Changes (diff)",
410
+ /,
411
+ ) -> str:
412
+ """Compose a combined string of hint and content for debug/info output."""
413
+
185
414
  hint_content: str | None = (
186
415
  f"# Auxiliary context (user-provided)\n{hint}" if hint else None
187
416
  )
188
- diff_content: str = f"# Changes (diff)\n{diff}"
189
- return "\n\n".join([part for part in (hint_content, diff_content) if part is not None])
417
+ content: str = f"# {content_label}\n{diff}"
418
+ return "\n\n".join([part for part in (hint_content, content) if part is not None])
190
419
 
191
420
 
192
421
  def generate_commit_message(
193
- *,
194
422
  diff: str,
195
423
  hint: str | None,
196
424
  model: str | None,
197
425
  single_line: bool = False,
198
426
  subject_max: int | None = None,
199
427
  language: str | None = None,
428
+ chunk_tokens: int | None = 0,
429
+ /,
200
430
  ) -> str:
201
431
  """Generate a commit message using an OpenAI GPT model."""
202
432
 
203
- chosen_model: str = _resolve_model(model=model)
204
- chosen_language: str = _resolve_language(language=language)
205
- api_key = os.environ.get("OPENAI_API_KEY")
433
+ chosen_model: str = _resolve_model(model)
434
+ chosen_language: str = _resolve_language(language)
435
+ api_key = environ.get("OPENAI_API_KEY")
206
436
  if not api_key:
207
437
  raise RuntimeError("The OPENAI_API_KEY environment variable is required.")
208
438
 
209
439
  client = OpenAI(api_key=api_key)
210
440
 
211
- input_items = _build_responses_input(diff=diff, hint=hint)
441
+ normalized_chunk_tokens = 0 if chunk_tokens is None else chunk_tokens
442
+
443
+ if normalized_chunk_tokens != -1:
444
+ hunks = _split_diff_into_hunks(diff)
445
+ if normalized_chunk_tokens == 0:
446
+ chunks = ["".join(hunks) if hunks else diff]
447
+ elif normalized_chunk_tokens > 0:
448
+ chunks = _build_diff_chunks(
449
+ hunks,
450
+ normalized_chunk_tokens,
451
+ chosen_model,
452
+ )
453
+ else:
454
+ chunks = ["".join(hunks) if hunks else diff]
455
+
456
+ summary_pairs = _summarise_diff_chunks(
457
+ chunks,
458
+ chosen_model,
459
+ client,
460
+ )
461
+ summary_texts = [text for text, _ in summary_pairs]
462
+ text, _ = _generate_commit_from_summaries(
463
+ summary_texts,
464
+ hint,
465
+ chosen_model,
466
+ single_line,
467
+ subject_max,
468
+ chosen_language,
469
+ client,
470
+ )
471
+ else:
472
+ input_items = _build_responses_input(diff, hint)
212
473
 
213
- # Use Responses API to generate a single response (send hint and diff as separate user inputs)
214
- resp = client.responses.create(
215
- model=chosen_model,
216
- instructions=_instructions(single_line=single_line, subject_max=subject_max, language=chosen_language),
217
- input=input_items,
218
- )
474
+ resp = client.responses.create(
475
+ model=chosen_model,
476
+ instructions=_instructions(single_line, subject_max, chosen_language),
477
+ input=input_items,
478
+ )
479
+
480
+ text = (resp.output_text or "").strip()
219
481
 
220
- # Prefer SDK convenience aggregate text if available
221
- text: str = (resp.output_text or "").strip()
222
482
  if not text:
223
483
  raise RuntimeError("An empty commit message was generated.")
224
484
  return text
225
485
 
226
486
 
227
487
  def generate_commit_message_with_info(
228
- *,
229
488
  diff: str,
230
489
  hint: str | None,
231
490
  model: str | None,
232
491
  single_line: bool = False,
233
492
  subject_max: int | None = None,
234
493
  language: str | None = None,
494
+ chunk_tokens: int | None = 0,
495
+ /,
235
496
  ) -> CommitMessageResult:
236
497
  """Return the OpenAI GPT call result together with debugging information.
237
498
 
@@ -241,37 +502,96 @@ def generate_commit_message_with_info(
241
502
  The generated message, token usage, and prompt/response text.
242
503
  """
243
504
 
244
- chosen_model: str = _resolve_model(model=model)
245
- chosen_language: str = _resolve_language(language=language)
246
- api_key = os.environ.get("OPENAI_API_KEY")
505
+ chosen_model: str = _resolve_model(model)
506
+ chosen_language: str = _resolve_language(language)
507
+ api_key = environ.get("OPENAI_API_KEY")
247
508
  if not api_key:
248
509
  raise RuntimeError("The OPENAI_API_KEY environment variable is required.")
249
510
 
250
511
  client = OpenAI(api_key=api_key)
251
- combined_prompt = _build_combined_prompt(diff=diff, hint=hint)
252
- input_items = _build_responses_input(diff=diff, hint=hint)
253
512
 
254
- resp = client.responses.create(
255
- model=chosen_model,
256
- instructions=_instructions(single_line=single_line, subject_max=subject_max, language=chosen_language),
257
- input=input_items,
258
- )
513
+ normalized_chunk_tokens = 0 if chunk_tokens is None else chunk_tokens
514
+
515
+ if normalized_chunk_tokens != -1:
516
+ hunks = _split_diff_into_hunks(diff)
517
+ if normalized_chunk_tokens == 0:
518
+ chunks = ["".join(hunks) if hunks else diff]
519
+ elif normalized_chunk_tokens > 0:
520
+ chunks = _build_diff_chunks(
521
+ hunks,
522
+ normalized_chunk_tokens,
523
+ chosen_model,
524
+ )
525
+ else:
526
+ chunks = ["".join(hunks) if hunks else diff]
527
+
528
+ summary_pairs = _summarise_diff_chunks(
529
+ chunks,
530
+ chosen_model,
531
+ client,
532
+ )
533
+ summary_texts = [text for text, _ in summary_pairs]
534
+ response_text, final_resp = _generate_commit_from_summaries(
535
+ summary_texts,
536
+ hint,
537
+ chosen_model,
538
+ single_line,
539
+ subject_max,
540
+ chosen_language,
541
+ client,
542
+ )
543
+
544
+ total_tokens: int | None = None
545
+ prompt_tokens: int | None = None
546
+ completion_tokens: int | None = None
547
+
548
+ if final_resp.usage:
549
+ total_tokens = (total_tokens or 0) + (final_resp.usage.total_tokens or 0)
550
+ prompt_tokens = (prompt_tokens or 0) + (final_resp.usage.input_tokens or 0)
551
+ completion_tokens = (completion_tokens or 0) + (
552
+ final_resp.usage.output_tokens or 0
553
+ )
554
+
555
+ for _, resp in summary_pairs:
556
+ usage = resp.usage
557
+ if usage is None:
558
+ continue
559
+ total_tokens = (total_tokens or 0) + (usage.total_tokens or 0)
560
+ prompt_tokens = (prompt_tokens or 0) + (usage.input_tokens or 0)
561
+ completion_tokens = (completion_tokens or 0) + (usage.output_tokens or 0)
562
+
563
+ combined_prompt = _build_combined_prompt(
564
+ "\n".join(summary_texts),
565
+ hint,
566
+ "Combined summaries (English)",
567
+ )
568
+
569
+ response_id: str | None = final_resp.id
570
+
571
+ else:
572
+ combined_prompt = _build_combined_prompt(diff, hint)
573
+ input_items = _build_responses_input(diff, hint)
574
+
575
+ resp = client.responses.create(
576
+ model=chosen_model,
577
+ instructions=_instructions(single_line, subject_max, chosen_language),
578
+ input=input_items,
579
+ )
580
+
581
+ response_text = (resp.output_text or "").strip()
582
+ response_id = resp.id
583
+ usage = resp.usage
584
+ prompt_tokens: int | None = None
585
+ completion_tokens: int | None = None
586
+ total_tokens: int | None = None
587
+ if usage is not None:
588
+ total_tokens = usage.total_tokens
589
+ prompt_tokens = usage.input_tokens
590
+ completion_tokens = usage.output_tokens
259
591
 
260
- response_text: str = (resp.output_text or "").strip()
261
592
  if not response_text:
262
593
  raise RuntimeError("An empty commit message was generated.")
263
594
 
264
- response_id: str | None = resp.id
265
- usage = resp.usage
266
- prompt_tokens: int | None = None
267
- completion_tokens: int | None = None
268
- total_tokens: int | None = None
269
- if usage is not None:
270
- # Responses API exposes input/output/total token fields.
271
- total_tokens = usage.total_tokens
272
- prompt_tokens = usage.input_tokens
273
- completion_tokens = usage.output_tokens
274
-
275
595
  return CommitMessageResult(
276
596
  message=response_text,
277
597
  model=chosen_model,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: git-commit-message
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Generate Git commit messages from staged changes using OpenAI GPT
5
5
  Maintainer-email: Mina Her <minacle@live.com>
6
6
  License: This is free and unencumbered software released into the public domain.
@@ -43,17 +43,21 @@ Classifier: Programming Language :: Python :: 3.13
43
43
  Classifier: Topic :: Software Development :: Version Control :: Git
44
44
  Requires-Python: >=3.13
45
45
  Description-Content-Type: text/markdown
46
+ Requires-Dist: babel>=2.17.0
46
47
  Requires-Dist: openai>=2.6.1
48
+ Requires-Dist: tiktoken>=0.12.0
47
49
 
48
50
  # git-commit-message
49
51
 
50
52
  Staged changes -> GPT commit message generator.
51
53
 
54
+ [![asciicast](https://asciinema.org/a/jk0phFqNnc5vaCiIZEYBwZOyN.svg)](https://asciinema.org/a/jk0phFqNnc5vaCiIZEYBwZOyN)
55
+
52
56
  ## Install (PyPI)
53
57
 
54
58
  Install the latest released version from PyPI:
55
59
 
56
- ```fish
60
+ ```sh
57
61
  # User environment (recommended)
58
62
  python -m pip install --user git-commit-message
59
63
 
@@ -69,11 +73,17 @@ python -m pip install --upgrade git-commit-message
69
73
 
70
74
  Quick check:
71
75
 
72
- ```fish
76
+ ```sh
73
77
  git-commit-message --help
74
78
  ```
75
79
 
76
- Set your API key (fish):
80
+ Set your API key (POSIX sh):
81
+
82
+ ```sh
83
+ export OPENAI_API_KEY="sk-..."
84
+ ```
85
+
86
+ Note (fish): In fish, set it as follows.
77
87
 
78
88
  ```fish
79
89
  set -x OPENAI_API_KEY "sk-..."
@@ -81,7 +91,7 @@ set -x OPENAI_API_KEY "sk-..."
81
91
 
82
92
  ## Install (editable)
83
93
 
84
- ```fish
94
+ ```sh
85
95
  python -m pip install -e .
86
96
  ```
87
97
 
@@ -89,32 +99,45 @@ python -m pip install -e .
89
99
 
90
100
  - Print commit message only:
91
101
 
92
- ```fish
102
+ ```sh
93
103
  git add -A
94
104
  git-commit-message "optional extra context about the change"
95
105
  ```
96
106
 
97
107
  - Force single-line subject only:
98
108
 
99
- ```fish
109
+ ```sh
100
110
  git-commit-message --one-line "optional context"
101
111
  ```
102
112
 
103
113
  - Limit subject length (default 72):
104
114
 
105
- ```fish
115
+ ```sh
106
116
  git-commit-message --one-line --max-length 50 "optional context"
107
117
  ```
108
118
 
119
+ - Chunk long diffs by token budget (0 = single chunk + summary, -1 = disable chunking):
120
+
121
+ ```sh
122
+ # force a single summary pass over the whole diff (default)
123
+ git-commit-message --chunk-tokens 0 "optional context"
124
+
125
+ # chunk the diff into ~4000-token pieces before summarising
126
+ git-commit-message --chunk-tokens 4000 "optional context"
127
+
128
+ # disable summarisation and use the legacy one-shot prompt
129
+ git-commit-message --chunk-tokens -1 "optional context"
130
+ ```
131
+
109
132
  - Commit immediately with editor:
110
133
 
111
- ```fish
134
+ ```sh
112
135
  git-commit-message --commit --edit "refactor parser for speed"
113
136
  ```
114
137
 
115
138
  - Select output language/locale (default: en-GB):
116
139
 
117
- ```fish
140
+ ```sh
118
141
  # American English
119
142
  git-commit-message --language en-US "optional context"
120
143
 
@@ -135,6 +158,7 @@ Environment:
135
158
  - `OPENAI_API_KEY`: required
136
159
  - `GIT_COMMIT_MESSAGE_MODEL` or `OPENAI_MODEL`: optional (default: `gpt-5-mini`)
137
160
  - `GIT_COMMIT_MESSAGE_LANGUAGE`: optional (default: `en-GB`)
161
+ - `GIT_COMMIT_MESSAGE_CHUNK_TOKENS`: optional token budget per diff chunk (default: 0 = single chunk + summary; -1 disables summarisation)
138
162
 
139
163
  ## AI‑generated code notice
140
164
 
@@ -0,0 +1,10 @@
1
+ git_commit_message/__init__.py,sha256=bmUVTlV1SYJAnoSaIKcpDCPkJ5JW2BANfFGvKt_A22w,190
2
+ git_commit_message/__main__.py,sha256=n5lvkLiCZ1Q4dwhEwonWntcKTeTaJL9qOJzdiLf0Gfk,99
3
+ git_commit_message/_cli.py,sha256=Norc3P3qT8JhnkbdO4VVhErr5FvmAC9hIgMD_aplvzY,7363
4
+ git_commit_message/_git.py,sha256=foQIG6e4QLv00JAhQgMUQ1cw7WExxU5SFezfgXJ10XA,2424
5
+ git_commit_message/_gpt.py,sha256=uriPHyMI7TFbdEhqqU3wJm7uT55tmF7x324L4bCfw_A,18081
6
+ git_commit_message-0.6.0.dist-info/METADATA,sha256=BxKIGDWssAMTdLP5bn0Rjzk9-OPsSYcB7PEQcJvzC3g,5273
7
+ git_commit_message-0.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
+ git_commit_message-0.6.0.dist-info/entry_points.txt,sha256=e2cRvoyZnmP7yVItmFKwZofYG86WWKhm8KbzZSo2mf0,63
9
+ git_commit_message-0.6.0.dist-info/top_level.txt,sha256=qeP45y7y44R4KrPEihvMdwdM8tXYDY_3nCvCD3I9EcI,19
10
+ git_commit_message-0.6.0.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- git_commit_message/__init__.py,sha256=cJvTj8-8_I1VYz3EO7Eq1LJZ6RS-WhNSAhjVVliVLtU,196
2
- git_commit_message/__main__.py,sha256=n5lvkLiCZ1Q4dwhEwonWntcKTeTaJL9qOJzdiLf0Gfk,99
3
- git_commit_message/_cli.py,sha256=CiUrxWfAksBOYm__DkdV3mqir5NIQe1OwjwXuB2P_XE,6491
4
- git_commit_message/_git.py,sha256=-FNXmFtlsbuArgCvEcCqpXSB7CjwdLdhoXgdZk1qtcE,2435
5
- git_commit_message/_gpt.py,sha256=1Mp80WpToWnngwa4H7pWybZcpHMW9A4G-f0yZB3yIco,9386
6
- git_commit_message-0.5.0.dist-info/METADATA,sha256=7aeE7wwA--d5NYozybrCxn4n4CDaw56-8yphF3YdK5w,4429
7
- git_commit_message-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
- git_commit_message-0.5.0.dist-info/entry_points.txt,sha256=e2cRvoyZnmP7yVItmFKwZofYG86WWKhm8KbzZSo2mf0,63
9
- git_commit_message-0.5.0.dist-info/top_level.txt,sha256=qeP45y7y44R4KrPEihvMdwdM8tXYDY_3nCvCD3I9EcI,19
10
- git_commit_message-0.5.0.dist-info/RECORD,,