git-commit-message 0.8.2__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/PKG-INFO +32 -4
  2. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/README.md +31 -2
  3. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/pyproject.toml +1 -2
  4. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message/_cli.py +76 -8
  5. git_commit_message-0.9.0/src/git_commit_message/_config.py +71 -0
  6. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message/_git.py +6 -0
  7. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message/_gpt.py +29 -13
  8. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message/_llamacpp.py +14 -18
  9. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message/_llm.py +77 -59
  10. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message/_ollama.py +4 -17
  11. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message.egg-info/PKG-INFO +32 -4
  12. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message.egg-info/SOURCES.txt +1 -0
  13. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message.egg-info/requires.txt +0 -1
  14. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/UNLICENSE +0 -0
  15. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/setup.cfg +0 -0
  16. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message/__init__.py +0 -0
  17. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message/__main__.py +0 -0
  18. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message/_gemini.py +0 -0
  19. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message.egg-info/dependency_links.txt +0 -0
  20. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message.egg-info/entry_points.txt +0 -0
  21. {git_commit_message-0.8.2 → git_commit_message-0.9.0}/src/git_commit_message.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: git-commit-message
3
- Version: 0.8.2
3
+ Version: 0.9.0
4
4
  Summary: Generate Git commit messages from staged changes using LLM
5
5
  Maintainer-email: Mina Her <minacle@live.com>
6
6
  License: This is free and unencumbered software released into the public domain.
@@ -47,7 +47,6 @@ Requires-Dist: babel>=2.17.0
47
47
  Requires-Dist: google-genai>=1.56.0
48
48
  Requires-Dist: ollama>=0.4.0
49
49
  Requires-Dist: openai>=2.6.1
50
- Requires-Dist: tiktoken>=0.12.0
51
50
 
52
51
  # git-commit-message
53
52
 
@@ -167,6 +166,18 @@ git-commit-message --one-line "optional context"
167
166
  git-commit-message --one-line --co-author 'John Doe <john.doe@example.com>'
168
167
  ```
169
168
 
169
+ Use Conventional Commits constraints for the subject/footer only (body format is preserved):
170
+
171
+ ```sh
172
+ git-commit-message --conventional
173
+
174
+ # can be combined with one-line mode
175
+ git-commit-message --conventional --one-line
176
+
177
+ # co-author trailers are appended after any existing footers
178
+ git-commit-message --conventional --co-author copilot
179
+ ```
180
+
170
181
  Select provider:
171
182
 
172
183
  ```sh
@@ -223,10 +234,24 @@ git-commit-message --chunk-tokens 0
223
234
  # chunk the diff into ~4000-token pieces before summarising
224
235
  git-commit-message --chunk-tokens 4000
225
236
 
237
+ # note: for provider 'ollama', values >= 1 are not supported
238
+ # use 0 (single summary pass) or -1 (legacy one-shot)
239
+ git-commit-message --provider ollama --chunk-tokens 0
240
+
226
241
  # disable summarisation and use the legacy one-shot prompt
227
242
  git-commit-message --chunk-tokens -1
228
243
  ```
229
244
 
245
+ Adjust unified diff context lines:
246
+
247
+ ```sh
248
+ # use 5 context lines around each change hunk
249
+ git-commit-message --diff-context 5
250
+
251
+ # include only changed lines (no surrounding context)
252
+ git-commit-message --diff-context 0
253
+ ```
254
+
230
255
  Select output language/locale (IETF language tag):
231
256
 
232
257
  ```sh
@@ -258,9 +283,11 @@ git-commit-message --provider llamacpp --host http://192.168.1.100:8080
258
283
  - `--provider {openai,google,ollama,llamacpp}`: provider to use (default: `openai`)
259
284
  - `--model MODEL`: model override (provider-specific; ignored for llama.cpp)
260
285
  - `--language TAG`: output language/locale (default: `en-GB`)
286
+ - `--conventional`: apply Conventional Commits constraints to the subject and footer behavior. The body format is unchanged and still includes the translated `Rationale:` line. Breaking changes are expressed with `!` in the subject line, and `BREAKING CHANGE` footer lines are not generated.
261
287
  - `--one-line`: output subject only when no trailers are appended; with `--co-author`, output is a single-line subject plus `Co-authored-by:` trailer lines
262
288
  - `--max-length N`: max subject length (default: 72)
263
- - `--chunk-tokens N`: token budget per diff chunk (`0` = single summary pass, `-1` disables summarisation)
289
+ - `--chunk-tokens N`: token budget per diff chunk (`0` = single summary pass, `-1` disables summarisation). For `ollama`, values `>= 1` are not supported.
290
+ - `--diff-context N`: context lines in unified diff (`N >= 0`). If omitted, uses `GIT_COMMIT_MESSAGE_DIFF_CONTEXT` when set; otherwise uses Git default (usually `3`).
264
291
  - `--debug`: print request/response details
265
292
  - `--commit`: run `git commit -m <message>`
266
293
  - `--amend`: generate a message suitable for amending the previous commit (diff is from the amended commit's parent to the staged index; if nothing is staged, this effectively becomes the diff introduced by `HEAD`)
@@ -284,7 +311,8 @@ Optional:
284
311
  - `OLLAMA_HOST`: Ollama server URL (default: `http://localhost:11434`)
285
312
  - `LLAMACPP_HOST`: llama.cpp server URL (default: `http://localhost:8080`)
286
313
  - `GIT_COMMIT_MESSAGE_LANGUAGE`: default language/locale (default: `en-GB`)
287
- - `GIT_COMMIT_MESSAGE_CHUNK_TOKENS`: default chunk token budget (default: `0`)
314
+ - `GIT_COMMIT_MESSAGE_CHUNK_TOKENS`: default chunk token budget (default: `0`; for `ollama`, values `>= 1` are not supported)
315
+ - `GIT_COMMIT_MESSAGE_DIFF_CONTEXT`: default unified diff context lines (`0` or greater). If unset, Git default is used (usually `3`).
288
316
 
289
317
  Default models (if not overridden):
290
318
 
@@ -116,6 +116,18 @@ git-commit-message --one-line "optional context"
116
116
  git-commit-message --one-line --co-author 'John Doe <john.doe@example.com>'
117
117
  ```
118
118
 
119
+ Use Conventional Commits constraints for the subject/footer only (body format is preserved):
120
+
121
+ ```sh
122
+ git-commit-message --conventional
123
+
124
+ # can be combined with one-line mode
125
+ git-commit-message --conventional --one-line
126
+
127
+ # co-author trailers are appended after any existing footers
128
+ git-commit-message --conventional --co-author copilot
129
+ ```
130
+
119
131
  Select provider:
120
132
 
121
133
  ```sh
@@ -172,10 +184,24 @@ git-commit-message --chunk-tokens 0
172
184
  # chunk the diff into ~4000-token pieces before summarising
173
185
  git-commit-message --chunk-tokens 4000
174
186
 
187
+ # note: for provider 'ollama', values >= 1 are not supported
188
+ # use 0 (single summary pass) or -1 (legacy one-shot)
189
+ git-commit-message --provider ollama --chunk-tokens 0
190
+
175
191
  # disable summarisation and use the legacy one-shot prompt
176
192
  git-commit-message --chunk-tokens -1
177
193
  ```
178
194
 
195
+ Adjust unified diff context lines:
196
+
197
+ ```sh
198
+ # use 5 context lines around each change hunk
199
+ git-commit-message --diff-context 5
200
+
201
+ # include only changed lines (no surrounding context)
202
+ git-commit-message --diff-context 0
203
+ ```
204
+
179
205
  Select output language/locale (IETF language tag):
180
206
 
181
207
  ```sh
@@ -207,9 +233,11 @@ git-commit-message --provider llamacpp --host http://192.168.1.100:8080
207
233
  - `--provider {openai,google,ollama,llamacpp}`: provider to use (default: `openai`)
208
234
  - `--model MODEL`: model override (provider-specific; ignored for llama.cpp)
209
235
  - `--language TAG`: output language/locale (default: `en-GB`)
236
+ - `--conventional`: apply Conventional Commits constraints to the subject and footer behavior. The body format is unchanged and still includes the translated `Rationale:` line. Breaking changes are expressed with `!` in the subject line, and `BREAKING CHANGE` footer lines are not generated.
210
237
  - `--one-line`: output subject only when no trailers are appended; with `--co-author`, output is a single-line subject plus `Co-authored-by:` trailer lines
211
238
  - `--max-length N`: max subject length (default: 72)
212
- - `--chunk-tokens N`: token budget per diff chunk (`0` = single summary pass, `-1` disables summarisation)
239
+ - `--chunk-tokens N`: token budget per diff chunk (`0` = single summary pass, `-1` disables summarisation). For `ollama`, values `>= 1` are not supported.
240
+ - `--diff-context N`: context lines in unified diff (`N >= 0`). If omitted, uses `GIT_COMMIT_MESSAGE_DIFF_CONTEXT` when set; otherwise uses Git default (usually `3`).
213
241
  - `--debug`: print request/response details
214
242
  - `--commit`: run `git commit -m <message>`
215
243
  - `--amend`: generate a message suitable for amending the previous commit (diff is from the amended commit's parent to the staged index; if nothing is staged, this effectively becomes the diff introduced by `HEAD`)
@@ -233,7 +261,8 @@ Optional:
233
261
  - `OLLAMA_HOST`: Ollama server URL (default: `http://localhost:11434`)
234
262
  - `LLAMACPP_HOST`: llama.cpp server URL (default: `http://localhost:8080`)
235
263
  - `GIT_COMMIT_MESSAGE_LANGUAGE`: default language/locale (default: `en-GB`)
236
- - `GIT_COMMIT_MESSAGE_CHUNK_TOKENS`: default chunk token budget (default: `0`)
264
+ - `GIT_COMMIT_MESSAGE_CHUNK_TOKENS`: default chunk token budget (default: `0`; for `ollama`, values `>= 1` are not supported)
265
+ - `GIT_COMMIT_MESSAGE_DIFF_CONTEXT`: default unified diff context lines (`0` or greater). If unset, Git default is used (usually `3`).
237
266
 
238
267
  Default models (if not overridden):
239
268
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "git-commit-message"
3
- version = "0.8.2"
3
+ version = "0.9.0"
4
4
  description = "Generate Git commit messages from staged changes using LLM"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13"
@@ -9,7 +9,6 @@ dependencies = [
9
9
  "google-genai>=1.56.0",
10
10
  "ollama>=0.4.0",
11
11
  "openai>=2.6.1",
12
- "tiktoken>=0.12.0",
13
12
  ]
14
13
  maintainers = [{ name = "Mina Her", email = "minacle@live.com" }]
15
14
  license = { file = "UNLICENSE" }
@@ -23,6 +23,7 @@ from ._git import (
23
23
  has_staged_changes,
24
24
  resolve_amend_base_ref,
25
25
  )
26
+ from ._config import resolve_provider_name, validate_provider_chunk_tokens
26
27
  from ._llm import (
27
28
  CommitMessageResult,
28
29
  UnsupportedProviderError,
@@ -37,6 +38,7 @@ class CliArgs(Namespace):
37
38
  "commit",
38
39
  "amend",
39
40
  "edit",
41
+ "conventional",
40
42
  "provider",
41
43
  "model",
42
44
  "language",
@@ -44,6 +46,7 @@ class CliArgs(Namespace):
44
46
  "one_line",
45
47
  "max_length",
46
48
  "chunk_tokens",
49
+ "diff_context",
47
50
  "host",
48
51
  "co_authors",
49
52
  )
@@ -56,6 +59,7 @@ class CliArgs(Namespace):
56
59
  self.commit: bool = False
57
60
  self.amend: bool = False
58
61
  self.edit: bool = False
62
+ self.conventional: bool = False
59
63
  self.provider: str | None = None
60
64
  self.model: str | None = None
61
65
  self.language: str | None = None
@@ -63,6 +67,7 @@ class CliArgs(Namespace):
63
67
  self.one_line: bool = False
64
68
  self.max_length: int | None = None
65
69
  self.chunk_tokens: int | None = None
70
+ self.diff_context: int | None = None
66
71
  self.host: str | None = None
67
72
  self.co_authors: list[str] | None = None
68
73
 
@@ -155,6 +160,21 @@ def _env_chunk_tokens_default() -> int | None:
155
160
  return None
156
161
 
157
162
 
163
+ def _env_diff_context_default() -> int | None:
164
+ """Return diff context default from env.
165
+
166
+ Raises
167
+ ------
168
+ ValueError
169
+ If the configured value is not an integer.
170
+ """
171
+
172
+ raw: str | None = environ.get("GIT_COMMIT_MESSAGE_DIFF_CONTEXT")
173
+ if raw is None:
174
+ return None
175
+ return int(raw)
176
+
177
+
158
178
  def _build_parser() -> ArgumentParser:
159
179
  """Create the CLI argument parser.
160
180
 
@@ -199,6 +219,15 @@ def _build_parser() -> ArgumentParser:
199
219
  help="Open an editor to amend the message before committing. Use with '--commit'.",
200
220
  )
201
221
 
222
+ parser.add_argument(
223
+ "--conventional",
224
+ action="store_true",
225
+ help=(
226
+ "Use Conventional Commits constraints for the subject line and footer. "
227
+ "The existing body format remains unchanged, including the translated Rationale line."
228
+ ),
229
+ )
230
+
202
231
  parser.add_argument(
203
232
  "--provider",
204
233
  default=None,
@@ -258,10 +287,23 @@ def _build_parser() -> ArgumentParser:
258
287
  help=(
259
288
  "Target token budget per diff chunk. "
260
289
  "0 forces a single chunk with summarisation; -1 disables summarisation (legacy one-shot). "
290
+ "For provider 'ollama', values >= 1 are not supported. "
261
291
  "If omitted, uses GIT_COMMIT_MESSAGE_CHUNK_TOKENS when set (default: 0)."
262
292
  ),
263
293
  )
264
294
 
295
+ parser.add_argument(
296
+ "--diff-context",
297
+ dest="diff_context",
298
+ type=int,
299
+ default=None,
300
+ help=(
301
+ "Number of context lines in unified diff output. "
302
+ "If omitted, uses GIT_COMMIT_MESSAGE_DIFF_CONTEXT when set "
303
+ "(default: Git default, usually 3)."
304
+ ),
305
+ )
306
+
265
307
  parser.add_argument(
266
308
  "--host",
267
309
  dest="host",
@@ -308,6 +350,32 @@ def _run(
308
350
  Process exit code. 0 indicates success; any other value indicates failure.
309
351
  """
310
352
 
353
+ chunk_tokens: int | None = args.chunk_tokens
354
+ if chunk_tokens is None:
355
+ chunk_tokens = _env_chunk_tokens_default()
356
+ if chunk_tokens is None:
357
+ chunk_tokens = 0
358
+
359
+ diff_context: int | None = args.diff_context
360
+ if diff_context is None:
361
+ try:
362
+ diff_context = _env_diff_context_default()
363
+ except ValueError:
364
+ print(
365
+ "GIT_COMMIT_MESSAGE_DIFF_CONTEXT must be an integer.",
366
+ file=stderr,
367
+ )
368
+ return 2
369
+ if diff_context is not None and diff_context < 0:
370
+ print("--diff-context must be greater than or equal to 0.", file=stderr)
371
+ return 2
372
+
373
+ provider_name: str = resolve_provider_name(args.provider)
374
+ provider_arg_error = validate_provider_chunk_tokens(provider_name, chunk_tokens)
375
+ if provider_arg_error is not None:
376
+ print(provider_arg_error, file=stderr)
377
+ return 2
378
+
311
379
  repo_root: Path = get_repo_root()
312
380
 
313
381
  if args.amend:
@@ -316,22 +384,20 @@ def _run(
316
384
  return 2
317
385
 
318
386
  base_ref = resolve_amend_base_ref(repo_root)
319
- diff_text: str = get_staged_diff(repo_root, base_ref=base_ref)
387
+ diff_text: str = get_staged_diff(
388
+ repo_root,
389
+ base_ref=base_ref,
390
+ context_lines=diff_context,
391
+ )
320
392
  else:
321
393
  if not has_staged_changes(repo_root):
322
394
  print("No staged changes. Run 'git add' and try again.", file=stderr)
323
395
  return 2
324
396
 
325
- diff_text = get_staged_diff(repo_root)
397
+ diff_text = get_staged_diff(repo_root, context_lines=diff_context)
326
398
 
327
399
  hint: str | None = args.description if isinstance(args.description, str) else None
328
400
 
329
- chunk_tokens: int | None = args.chunk_tokens
330
- if chunk_tokens is None:
331
- chunk_tokens = _env_chunk_tokens_default()
332
- if chunk_tokens is None:
333
- chunk_tokens = 0
334
-
335
401
  normalized_co_authors: list[str] | None = None
336
402
  if args.co_authors:
337
403
  try:
@@ -353,6 +419,7 @@ def _run(
353
419
  chunk_tokens,
354
420
  args.provider,
355
421
  args.host,
422
+ args.conventional,
356
423
  )
357
424
  message = result.message
358
425
  else:
@@ -366,6 +433,7 @@ def _run(
366
433
  chunk_tokens,
367
434
  args.provider,
368
435
  args.host,
436
+ args.conventional,
369
437
  )
370
438
  except UnsupportedProviderError as exc:
371
439
  print(str(exc), file=stderr)
@@ -0,0 +1,71 @@
1
+ """Shared configuration resolvers for provider/model/language selection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from os import environ
6
+ from typing import Final
7
+
8
+
9
+ DEFAULT_PROVIDER: Final[str] = "openai"
10
+ DEFAULT_MODEL_OPENAI: Final[str] = "gpt-5-mini"
11
+ DEFAULT_MODEL_GOOGLE: Final[str] = "gemini-2.5-flash"
12
+ DEFAULT_MODEL_OLLAMA: Final[str] = "gpt-oss:20b"
13
+ DEFAULT_MODEL_LLAMACPP: Final[str] = "default"
14
+ DEFAULT_LANGUAGE: Final[str] = "en-GB"
15
+
16
+
17
+ def resolve_provider_name(
18
+ provider: str | None,
19
+ /,
20
+ ) -> str:
21
+ chosen = provider or environ.get("GIT_COMMIT_MESSAGE_PROVIDER") or DEFAULT_PROVIDER
22
+ return chosen.strip().lower()
23
+
24
+
25
+ def resolve_model_name(
26
+ model: str | None,
27
+ provider_name: str,
28
+ /,
29
+ ) -> str:
30
+ if provider_name == "google":
31
+ default_model = DEFAULT_MODEL_GOOGLE
32
+ provider_model = None
33
+ elif provider_name == "ollama":
34
+ default_model = DEFAULT_MODEL_OLLAMA
35
+ provider_model = environ.get("OLLAMA_MODEL")
36
+ elif provider_name == "llamacpp":
37
+ default_model = DEFAULT_MODEL_LLAMACPP
38
+ provider_model = environ.get("LLAMACPP_MODEL")
39
+ else:
40
+ default_model = DEFAULT_MODEL_OPENAI
41
+ provider_model = environ.get("OPENAI_MODEL")
42
+
43
+ return model or environ.get("GIT_COMMIT_MESSAGE_MODEL") or provider_model or default_model
44
+
45
+
46
+ def resolve_language_tag(
47
+ language: str | None,
48
+ /,
49
+ ) -> str:
50
+ return language or environ.get("GIT_COMMIT_MESSAGE_LANGUAGE") or DEFAULT_LANGUAGE
51
+
52
+
53
+ def validate_provider_chunk_tokens(
54
+ provider_name: str,
55
+ chunk_tokens: int,
56
+ /,
57
+ ) -> str | None:
58
+ if chunk_tokens < -1:
59
+ return (
60
+ "'--chunk-tokens' must be -1 or greater. "
61
+ "Use -1 to disable summarisation, or 0/positive values to enable summarisation."
62
+ )
63
+
64
+ if provider_name == "ollama" and chunk_tokens > 0:
65
+ return (
66
+ "'--chunk-tokens' with values >= 1 is not supported for provider 'ollama'. "
67
+ "Use '--chunk-tokens 0' (single summary pass) or '--chunk-tokens -1' "
68
+ "(disable summarisation)."
69
+ )
70
+
71
+ return None
@@ -183,6 +183,7 @@ def get_staged_diff(
183
183
  /,
184
184
  *,
185
185
  base_ref: str | None = None,
186
+ context_lines: int | None = None,
186
187
  ) -> str:
187
188
  """Return the staged changes as diff text.
188
189
 
@@ -195,6 +196,9 @@ def get_staged_diff(
195
196
  commit hash, or the empty tree hash) to diff against. When provided,
196
197
  the diff shows changes from ``base_ref`` to the staged index, instead
197
198
  of changes from ``HEAD`` to the staged index.
199
+ context_lines
200
+ Optional number of context lines for unified diff output. When ``None``,
201
+ Git's default context lines are used.
198
202
 
199
203
  Returns
200
204
  -------
@@ -210,6 +214,8 @@ def get_staged_diff(
210
214
  "--minimal",
211
215
  "--no-color",
212
216
  ]
217
+ if context_lines is not None:
218
+ cmd.append(f"-U{context_lines}")
213
219
  if base_ref:
214
220
  cmd.append(base_ref)
215
221
 
@@ -11,20 +11,9 @@ from openai.types.responses import Response
11
11
  from os import environ
12
12
  from typing import ClassVar
13
13
 
14
- from tiktoken import Encoding, encoding_for_model, get_encoding
15
14
  from ._llm import LLMTextResult, LLMUsage
16
15
 
17
16
 
18
- def _encoding_for_model(
19
- model: str,
20
- /,
21
- ) -> Encoding:
22
- try:
23
- return encoding_for_model(model)
24
- except Exception:
25
- return get_encoding("cl100k_base")
26
-
27
-
28
17
  class OpenAIResponsesProvider:
29
18
  __slots__ = (
30
19
  "_client",
@@ -50,8 +39,35 @@ class OpenAIResponsesProvider:
50
39
  model: str,
51
40
  text: str,
52
41
  ) -> int:
53
- encoding = _encoding_for_model(model)
54
- return len(encoding.encode(text))
42
+ try:
43
+ resp = self._client.responses.input_tokens.count(
44
+ model=model,
45
+ input=[
46
+ {
47
+ "role": "user",
48
+ "content": [
49
+ {
50
+ "type": "input_text",
51
+ "text": text,
52
+ }
53
+ ],
54
+ }
55
+ ],
56
+ )
57
+ except Exception as exc:
58
+ raise RuntimeError(
59
+ "Token counting failed for the OpenAI provider. "
60
+ "Try `--chunk-tokens 0` (default) or `--chunk-tokens -1` to disable summarisation."
61
+ ) from exc
62
+
63
+ prompt_tokens = getattr(resp, "input_tokens", None)
64
+ if not isinstance(prompt_tokens, int):
65
+ raise RuntimeError(
66
+ "Token counting returned an unexpected response from the OpenAI provider. "
67
+ "Try `--chunk-tokens 0` (default) or `--chunk-tokens -1` to disable summarisation."
68
+ )
69
+
70
+ return prompt_tokens
55
71
 
56
72
  def generate_text(
57
73
  self,
@@ -12,7 +12,6 @@ from typing import ClassVar, Final
12
12
 
13
13
  from openai import OpenAI
14
14
  from openai.types.chat import ChatCompletionMessageParam
15
- from tiktoken import Encoding, get_encoding
16
15
 
17
16
  from ._llm import LLMTextResult, LLMUsage
18
17
 
@@ -29,15 +28,6 @@ def _resolve_llamacpp_host(
29
28
  return host or environ.get("LLAMACPP_HOST") or _DEFAULT_LLAMACPP_HOST
30
29
 
31
30
 
32
- def _get_encoding() -> Encoding:
33
- """Get a fallback encoding for token counting."""
34
-
35
- try:
36
- return get_encoding("cl100k_base")
37
- except Exception:
38
- return get_encoding("gpt2")
39
-
40
-
41
31
  class LlamaCppProvider:
42
32
  """llama.cpp provider implementation for the LLM protocol.
43
33
 
@@ -135,11 +125,17 @@ class LlamaCppProvider:
135
125
  },
136
126
  cast_to=dict,
137
127
  )
138
- return response.get("total", 0)
139
- except Exception:
140
- # Fallback to tiktoken approximation
141
- try:
142
- encoding = _get_encoding()
143
- return len(encoding.encode(text))
144
- except Exception:
145
- return len(text.split())
128
+ except Exception as exc:
129
+ raise RuntimeError(
130
+ "Token counting failed for the llama.cpp provider. "
131
+ "Try `--chunk-tokens 0` (default) or `--chunk-tokens -1` to disable summarisation."
132
+ ) from exc
133
+
134
+ total = response.get("total") if isinstance(response, dict) else None
135
+ if not isinstance(total, int):
136
+ raise RuntimeError(
137
+ "Token counting returned an unexpected response from the llama.cpp provider. "
138
+ "Try `--chunk-tokens 0` (default) or `--chunk-tokens -1` to disable summarisation."
139
+ )
140
+
141
+ return total
@@ -11,16 +11,14 @@ Provider-specific API calls live in provider modules (e.g. `_gpt.py`).
11
11
  from __future__ import annotations
12
12
 
13
13
  from babel import Locale
14
- from os import environ
15
- from typing import ClassVar, Final, Protocol
14
+ from typing import ClassVar, Protocol
16
15
 
17
-
18
- _DEFAULT_PROVIDER: Final[str] = "openai"
19
- _DEFAULT_MODEL_OPENAI: Final[str] = "gpt-5-mini"
20
- _DEFAULT_MODEL_GOOGLE: Final[str] = "gemini-2.5-flash"
21
- _DEFAULT_MODEL_OLLAMA: Final[str] = "gpt-oss:20b"
22
- _DEFAULT_MODEL_LLAMACPP: Final[str] = "default"
23
- _DEFAULT_LANGUAGE: Final[str] = "en-GB"
16
+ from ._config import (
17
+ resolve_language_tag,
18
+ resolve_model_name,
19
+ resolve_provider_name,
20
+ validate_provider_chunk_tokens,
21
+ )
24
22
 
25
23
 
26
24
  class UnsupportedProviderError(RuntimeError):
@@ -137,49 +135,13 @@ class CommitMessageResult:
137
135
  self.total_tokens = total_tokens
138
136
 
139
137
 
140
- def _resolve_provider(
141
- provider: str | None,
142
- /,
143
- ) -> str:
144
- chosen = provider or environ.get("GIT_COMMIT_MESSAGE_PROVIDER") or _DEFAULT_PROVIDER
145
- return chosen.strip().lower()
146
-
147
-
148
- def _resolve_model(
149
- model: str | None,
150
- provider_name: str,
151
- /,
152
- ) -> str:
153
- if provider_name == "google":
154
- default_model = _DEFAULT_MODEL_GOOGLE
155
- provider_model = None
156
- elif provider_name == "ollama":
157
- default_model = _DEFAULT_MODEL_OLLAMA
158
- provider_model = environ.get("OLLAMA_MODEL")
159
- elif provider_name == "llamacpp":
160
- default_model = _DEFAULT_MODEL_LLAMACPP
161
- provider_model = environ.get("LLAMACPP_MODEL")
162
- else:
163
- default_model = _DEFAULT_MODEL_OPENAI
164
- provider_model = environ.get("OPENAI_MODEL")
165
-
166
- return model or environ.get("GIT_COMMIT_MESSAGE_MODEL") or provider_model or default_model
167
-
168
-
169
- def _resolve_language(
170
- language: str | None,
171
- /,
172
- ) -> str:
173
- return language or environ.get("GIT_COMMIT_MESSAGE_LANGUAGE") or _DEFAULT_LANGUAGE
174
-
175
-
176
138
  def get_provider(
177
139
  provider: str | None,
178
140
  /,
179
141
  *,
180
142
  host: str | None = None,
181
143
  ) -> CommitMessageProvider:
182
- name = _resolve_provider(provider)
144
+ name = resolve_provider_name(provider)
183
145
 
184
146
  if name == "openai":
185
147
  # Local import to avoid import cycles: providers may import shared types from this module.
@@ -242,19 +204,54 @@ def _build_system_prompt(
242
204
  single_line: bool,
243
205
  subject_max: int | None,
244
206
  language: str,
207
+ conventional: bool = False,
245
208
  /,
246
209
  ) -> str:
247
210
  display_language: str = _language_display(language)
248
211
  max_len = subject_max or 72
249
212
  if single_line:
213
+ conventional_rule: str
214
+ if conventional:
215
+ conventional_rule = (
216
+ "Use one of these Conventional Commits subject forms: '<type>: <description>', '<type>(<scope>): <description>', '<type>!: <description>', or '<type>(<scope>)!: <description>'. "
217
+ "When a scope is present, it MUST be parenthesized and directly attached to the type with no spaces. "
218
+ "Represent breaking changes with '!' before ':' in the subject; do not output a BREAKING CHANGE footer. "
219
+ "Do NOT translate the Conventional prefix token ('<type>', optional '(<scope>)', optional '!'); translate only the description into the target language. "
220
+ )
221
+ else:
222
+ conventional_rule = (
223
+ "Do NOT use Conventional Commits title format. "
224
+ "Do not start with '<type>:' or '<type>(<scope>):' prefixes such as 'feat:', 'fix:', 'docs:', 'chore:', 'refactor:', 'test:', 'perf:', 'ci:', or 'build:'. "
225
+ )
250
226
  return (
251
227
  f"You are an expert Git commit message generator. "
252
228
  f"Always use '{display_language}' spelling and style. "
229
+ f"{conventional_rule}"
253
230
  f"Return a single-line imperative subject only (<= {max_len} chars). "
254
231
  f"Do not include a body, bullet points, or any rationale. Do not include any line breaks. "
255
232
  f"Consider the user-provided auxiliary context if present. "
256
233
  f"Return only the commit message text (no code fences or prefixes like 'Commit message:')."
257
234
  )
235
+
236
+ format_guidelines: str = ""
237
+ if conventional:
238
+ format_guidelines = (
239
+ "\n"
240
+ "- The subject line MUST use one of these forms: '<type>: <description>', '<type>(<scope>): <description>', '<type>!: <description>', or '<type>(<scope>)!: <description>'.\n"
241
+ "- If scope is used, it MUST be in parentheses and directly attached to type with no spaces, e.g. 'feat(parser):'.\n"
242
+ "- In Conventional mode, only the subject line and footer conventions are additionally constrained; keep the body structure unchanged.\n"
243
+ "- Keep the translated equivalent of 'Rationale:' as the final body line label; this section MUST be present.\n"
244
+ "- For breaking changes, use '!' immediately before ':' in the subject line.\n"
245
+ "- Do NOT generate any BREAKING CHANGE footer line.\n"
246
+ "- Do NOT translate the Conventional prefix token ('<type>', optional '(<scope>)', optional '!'). Translate only the description, bullet points, and rationale into the target language.\n"
247
+ )
248
+ else:
249
+ format_guidelines = (
250
+ "\n"
251
+ "- Do NOT use Conventional Commits subject prefixes.\n"
252
+ "- The subject MUST NOT start with '<type>:' or '<type>(<scope>):' patterns (for example: 'feat:', 'fix:', 'docs:', 'chore:', 'refactor:', 'test:', 'perf:', 'ci:', or 'build:').\n"
253
+ )
254
+
258
255
  return (
259
256
  f"You are an expert Git commit message generator. "
260
257
  f"Always use '{display_language}' spelling and style. "
@@ -282,6 +279,7 @@ def _build_system_prompt(
282
279
  f"- If few details are necessary, include at least one bullet summarising the key change.\n"
283
280
  f"- If you cannot provide any body content, still output the subject line; the subject line must never be omitted.\n"
284
281
  f"- Consider the user-provided auxiliary context if present.\n"
282
+ f"{format_guidelines}"
285
283
  f"Return only the commit message text in the above format (no code fences or extra labels)."
286
284
  )
287
285
 
@@ -371,14 +369,17 @@ def _build_diff_chunks(
371
369
 
372
370
  if current:
373
371
  chunks.append("".join(current))
374
- current = [hunk]
375
- else:
376
372
  single_tokens = provider.count_tokens(model=model, text=hunk)
377
373
  if single_tokens > chunk_tokens:
378
374
  raise ValueError(
379
375
  "chunk_tokens is too small to fit a single diff hunk; increase the value or disable chunking"
380
376
  )
381
377
  current = [hunk]
378
+ continue
379
+
380
+ raise ValueError(
381
+ "chunk_tokens is too small to fit a single diff hunk; increase the value or disable chunking"
382
+ )
382
383
 
383
384
  if current:
384
385
  chunks.append("".join(current))
@@ -420,9 +421,10 @@ def _generate_commit_from_summaries(
420
421
  single_line: bool,
421
422
  subject_max: int | None,
422
423
  language: str,
424
+ conventional: bool = False,
423
425
  /,
424
426
  ) -> LLMTextResult:
425
- instructions = _build_system_prompt(single_line, subject_max, language)
427
+ instructions = _build_system_prompt(single_line, subject_max, language, conventional)
426
428
  sections: list[str] = []
427
429
 
428
430
  if hint:
@@ -486,19 +488,26 @@ def generate_commit_message(
486
488
  chunk_tokens: int | None = 0,
487
489
  provider: str | None = None,
488
490
  host: str | None = None,
491
+ conventional: bool = False,
489
492
  /,
490
493
  ) -> str:
491
- chosen_provider = _resolve_provider(provider)
492
- chosen_model = _resolve_model(model, chosen_provider)
493
- chosen_language = _resolve_language(language)
494
+ chosen_provider = resolve_provider_name(provider)
495
+ chosen_model = resolve_model_name(model, chosen_provider)
496
+ chosen_language = resolve_language_tag(language)
494
497
 
495
498
  llm = get_provider(chosen_provider, host=host)
496
499
 
497
500
  normalized_chunk_tokens = 0 if chunk_tokens is None else chunk_tokens
501
+ provider_arg_error = validate_provider_chunk_tokens(
502
+ chosen_provider,
503
+ normalized_chunk_tokens,
504
+ )
505
+ if provider_arg_error is not None:
506
+ raise ValueError(provider_arg_error)
498
507
 
499
508
  if normalized_chunk_tokens != -1:
500
509
  hunks = _split_diff_into_hunks(diff)
501
- if normalized_chunk_tokens == 0 or normalized_chunk_tokens < 0:
510
+ if normalized_chunk_tokens == 0:
502
511
  chunks = ["".join(hunks) if hunks else diff]
503
512
  else:
504
513
  chunks = _build_diff_chunks(hunks, normalized_chunk_tokens, llm, chosen_model)
@@ -513,10 +522,11 @@ def generate_commit_message(
513
522
  single_line,
514
523
  subject_max,
515
524
  chosen_language,
525
+ conventional,
516
526
  )
517
527
  text = (final.text or "").strip()
518
528
  else:
519
- instructions = _build_system_prompt(single_line, subject_max, chosen_language)
529
+ instructions = _build_system_prompt(single_line, subject_max, chosen_language, conventional)
520
530
  user_text = _build_combined_prompt(diff, hint)
521
531
  final = llm.generate_text(
522
532
  model=chosen_model,
@@ -541,21 +551,28 @@ def generate_commit_message_with_info(
541
551
  chunk_tokens: int | None = 0,
542
552
  provider: str | None = None,
543
553
  host: str | None = None,
554
+ conventional: bool = False,
544
555
  /,
545
556
  ) -> CommitMessageResult:
546
- chosen_provider = _resolve_provider(provider)
547
- chosen_model = _resolve_model(model, chosen_provider)
548
- chosen_language = _resolve_language(language)
557
+ chosen_provider = resolve_provider_name(provider)
558
+ chosen_model = resolve_model_name(model, chosen_provider)
559
+ chosen_language = resolve_language_tag(language)
549
560
 
550
561
  llm = get_provider(chosen_provider, host=host)
551
562
 
552
563
  normalized_chunk_tokens = 0 if chunk_tokens is None else chunk_tokens
564
+ provider_arg_error = validate_provider_chunk_tokens(
565
+ chosen_provider,
566
+ normalized_chunk_tokens,
567
+ )
568
+ if provider_arg_error is not None:
569
+ raise ValueError(provider_arg_error)
553
570
 
554
571
  response_id: str | None = None
555
572
 
556
573
  if normalized_chunk_tokens != -1:
557
574
  hunks = _split_diff_into_hunks(diff)
558
- if normalized_chunk_tokens == 0 or normalized_chunk_tokens < 0:
575
+ if normalized_chunk_tokens == 0:
559
576
  chunks = ["".join(hunks) if hunks else diff]
560
577
  else:
561
578
  chunks = _build_diff_chunks(hunks, normalized_chunk_tokens, llm, chosen_model)
@@ -570,6 +587,7 @@ def generate_commit_message_with_info(
570
587
  single_line,
571
588
  subject_max,
572
589
  chosen_language,
590
+ conventional,
573
591
  )
574
592
 
575
593
  combined_prompt = _build_combined_prompt(
@@ -586,7 +604,7 @@ def generate_commit_message_with_info(
586
604
  response_id = final_result.response_id
587
605
 
588
606
  else:
589
- instructions = _build_system_prompt(single_line, subject_max, chosen_language)
607
+ instructions = _build_system_prompt(single_line, subject_max, chosen_language, conventional)
590
608
  combined_prompt = _build_combined_prompt(diff, hint)
591
609
 
592
610
  final_result = llm.generate_text(
@@ -11,7 +11,6 @@ from os import environ
11
11
  from typing import ClassVar, Final
12
12
 
13
13
  from ollama import Client, ResponseError
14
- from tiktoken import Encoding, get_encoding
15
14
 
16
15
  from ._llm import LLMTextResult, LLMUsage
17
16
 
@@ -28,15 +27,6 @@ def _resolve_ollama_host(
28
27
  return host or environ.get("OLLAMA_HOST") or _DEFAULT_OLLAMA_HOST
29
28
 
30
29
 
31
- def _get_encoding() -> Encoding:
32
- """Get a fallback encoding for token counting."""
33
-
34
- try:
35
- return get_encoding("cl100k_base")
36
- except Exception:
37
- return get_encoding("gpt2")
38
-
39
-
40
30
  class OllamaProvider:
41
31
  """Ollama provider implementation for the LLM protocol."""
42
32
 
@@ -113,10 +103,7 @@ class OllamaProvider:
113
103
  model: str,
114
104
  text: str,
115
105
  ) -> int:
116
- """Approximate token count using tiktoken; fallback to whitespace split."""
117
-
118
- try:
119
- encoding = _get_encoding()
120
- return len(encoding.encode(text))
121
- except Exception:
122
- return len(text.split())
106
+ raise RuntimeError(
107
+ "Token counting is not supported for the Ollama provider. "
108
+ "Try `--chunk-tokens 0` (default) or `--chunk-tokens -1` to disable summarisation."
109
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: git-commit-message
3
- Version: 0.8.2
3
+ Version: 0.9.0
4
4
  Summary: Generate Git commit messages from staged changes using LLM
5
5
  Maintainer-email: Mina Her <minacle@live.com>
6
6
  License: This is free and unencumbered software released into the public domain.
@@ -47,7 +47,6 @@ Requires-Dist: babel>=2.17.0
47
47
  Requires-Dist: google-genai>=1.56.0
48
48
  Requires-Dist: ollama>=0.4.0
49
49
  Requires-Dist: openai>=2.6.1
50
- Requires-Dist: tiktoken>=0.12.0
51
50
 
52
51
  # git-commit-message
53
52
 
@@ -167,6 +166,18 @@ git-commit-message --one-line "optional context"
167
166
  git-commit-message --one-line --co-author 'John Doe <john.doe@example.com>'
168
167
  ```
169
168
 
169
+ Use Conventional Commits constraints for the subject/footer only (body format is preserved):
170
+
171
+ ```sh
172
+ git-commit-message --conventional
173
+
174
+ # can be combined with one-line mode
175
+ git-commit-message --conventional --one-line
176
+
177
+ # co-author trailers are appended after any existing footers
178
+ git-commit-message --conventional --co-author copilot
179
+ ```
180
+
170
181
  Select provider:
171
182
 
172
183
  ```sh
@@ -223,10 +234,24 @@ git-commit-message --chunk-tokens 0
223
234
  # chunk the diff into ~4000-token pieces before summarising
224
235
  git-commit-message --chunk-tokens 4000
225
236
 
237
+ # note: for provider 'ollama', values >= 1 are not supported
238
+ # use 0 (single summary pass) or -1 (legacy one-shot)
239
+ git-commit-message --provider ollama --chunk-tokens 0
240
+
226
241
  # disable summarisation and use the legacy one-shot prompt
227
242
  git-commit-message --chunk-tokens -1
228
243
  ```
229
244
 
245
+ Adjust unified diff context lines:
246
+
247
+ ```sh
248
+ # use 5 context lines around each change hunk
249
+ git-commit-message --diff-context 5
250
+
251
+ # include only changed lines (no surrounding context)
252
+ git-commit-message --diff-context 0
253
+ ```
254
+
230
255
  Select output language/locale (IETF language tag):
231
256
 
232
257
  ```sh
@@ -258,9 +283,11 @@ git-commit-message --provider llamacpp --host http://192.168.1.100:8080
258
283
  - `--provider {openai,google,ollama,llamacpp}`: provider to use (default: `openai`)
259
284
  - `--model MODEL`: model override (provider-specific; ignored for llama.cpp)
260
285
  - `--language TAG`: output language/locale (default: `en-GB`)
286
+ - `--conventional`: apply Conventional Commits constraints to the subject and footer behavior. The body format is unchanged and still includes the translated `Rationale:` line. Breaking changes are expressed with `!` in the subject line, and `BREAKING CHANGE` footer lines are not generated.
261
287
  - `--one-line`: output subject only when no trailers are appended; with `--co-author`, output is a single-line subject plus `Co-authored-by:` trailer lines
262
288
  - `--max-length N`: max subject length (default: 72)
263
- - `--chunk-tokens N`: token budget per diff chunk (`0` = single summary pass, `-1` disables summarisation)
289
+ - `--chunk-tokens N`: token budget per diff chunk (`0` = single summary pass, `-1` disables summarisation). For `ollama`, values `>= 1` are not supported.
290
+ - `--diff-context N`: context lines in unified diff (`N >= 0`). If omitted, uses `GIT_COMMIT_MESSAGE_DIFF_CONTEXT` when set; otherwise uses Git default (usually `3`).
264
291
  - `--debug`: print request/response details
265
292
  - `--commit`: run `git commit -m <message>`
266
293
  - `--amend`: generate a message suitable for amending the previous commit (diff is from the amended commit's parent to the staged index; if nothing is staged, this effectively becomes the diff introduced by `HEAD`)
@@ -284,7 +311,8 @@ Optional:
284
311
  - `OLLAMA_HOST`: Ollama server URL (default: `http://localhost:11434`)
285
312
  - `LLAMACPP_HOST`: llama.cpp server URL (default: `http://localhost:8080`)
286
313
  - `GIT_COMMIT_MESSAGE_LANGUAGE`: default language/locale (default: `en-GB`)
287
- - `GIT_COMMIT_MESSAGE_CHUNK_TOKENS`: default chunk token budget (default: `0`)
314
+ - `GIT_COMMIT_MESSAGE_CHUNK_TOKENS`: default chunk token budget (default: `0`; for `ollama`, values `>= 1` are not supported)
315
+ - `GIT_COMMIT_MESSAGE_DIFF_CONTEXT`: default unified diff context lines (`0` or greater). If unset, Git default is used (usually `3`).
288
316
 
289
317
  Default models (if not overridden):
290
318
 
@@ -4,6 +4,7 @@ pyproject.toml
4
4
  src/git_commit_message/__init__.py
5
5
  src/git_commit_message/__main__.py
6
6
  src/git_commit_message/_cli.py
7
+ src/git_commit_message/_config.py
7
8
  src/git_commit_message/_gemini.py
8
9
  src/git_commit_message/_git.py
9
10
  src/git_commit_message/_gpt.py
@@ -2,4 +2,3 @@ babel>=2.17.0
2
2
  google-genai>=1.56.0
3
3
  ollama>=0.4.0
4
4
  openai>=2.6.1
5
- tiktoken>=0.12.0