gac 1.13.0__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gac/prompt.py CHANGED
@@ -8,10 +8,16 @@ formatting, and integration with diff preprocessing.
8
8
  import logging
9
9
  import re
10
10
 
11
+ from gac.constants import CommitMessageConstants
12
+
11
13
  logger = logging.getLogger(__name__)
12
14
 
13
- # Default template to use when no template file is found
14
- DEFAULT_TEMPLATE = """<role>
15
+
16
+ # ============================================================================
17
+ # Prompt Templates
18
+ # ============================================================================
19
+
20
+ DEFAULT_SYSTEM_TEMPLATE = """<role>
15
21
  You are an expert git commit message generator. Your task is to analyze code changes and create a concise, meaningful git commit message. You will receive git status and diff information. Your entire response will be used directly as a git commit message.
16
22
  </role>
17
23
 
@@ -33,12 +39,12 @@ When changes span multiple areas:
33
39
 
34
40
  <format>
35
41
  <one_liner>
36
- Create a single-line commit message (50-72 characters if possible).
42
+ Create a single-line commit message.
37
43
  Your message should be clear, concise, and descriptive of the core change.
38
44
  Use present tense ("Add feature" not "Added feature").
39
45
  </one_liner><multi_line>
40
46
  Create a commit message with:
41
- - First line: A concise summary (50-72 characters) that could stand alone
47
+ - First line: A concise summary that could stand alone
42
48
  - Blank line after the summary
43
49
  - Detailed body with multiple bullet points explaining the key changes
44
50
  - Focus on WHY changes were made, not just WHAT was changed
@@ -49,7 +55,7 @@ When changes span multiple areas:
49
55
 
50
56
  Your commit message MUST follow this structure:
51
57
 
52
- Line 1: A concise summary (up to ~72 characters) with conventional commit prefix
58
+ Line 1: A concise summary (that could stand alone) with conventional commit prefix
53
59
  Line 2: BLANK LINE (required)
54
60
  Lines 3+: Detailed multi-paragraph body with the following sections:
55
61
 
@@ -158,24 +164,6 @@ INCORRECT EXAMPLES (these formats are wrong and must NOT be used):
158
164
  You MUST NOT prefix the type(scope) with another type. Use EXACTLY ONE type, which MUST include the scope in parentheses.
159
165
  </conventions_with_scope>
160
166
 
161
- <hint>
162
- Additional context provided by the user: <hint_text></hint_text>
163
- </hint>
164
-
165
- <git_status>
166
- <status></status>
167
- </git_status>
168
-
169
- <git_diff_stat>
170
- <diff_stat></diff_stat>
171
- </git_diff_stat>
172
-
173
- <git_diff>
174
- <diff></diff>
175
- </git_diff>
176
-
177
-
178
-
179
167
  <examples_no_scope>
180
168
  Good commit messages (no scope):
181
169
  [OK] feat: add OAuth2 integration with Google and GitHub
@@ -252,297 +240,644 @@ Bad commit messages:
252
240
  [ERROR] WIP: still working on this
253
241
  [ERROR] Fixed bug
254
242
  [ERROR] Changes
255
- </examples_with_scope>
243
+ </examples_with_scope>"""
244
+
245
+ DEFAULT_USER_TEMPLATE = """<hint>
246
+ Additional context provided by the user: <hint_text></hint_text>
247
+ </hint>
248
+
249
+ <git_diff>
250
+ <diff></diff>
251
+ </git_diff>
252
+
253
+ <git_diff_stat>
254
+ <diff_stat></diff_stat>
255
+ </git_diff_stat>
256
256
 
257
- <instructions>
257
+ <git_status>
258
+ <status></status>
259
+ </git_status>
260
+
261
+ <language_instructions>
262
+ IMPORTANT: You MUST write the entire commit message in <language_name></language_name>.
263
+ All text in the commit message, including the summary line and body, must be in <language_name></language_name>.
264
+ <prefix_instruction></prefix_instruction>
265
+ </language_instructions>
266
+
267
+ <format_instructions>
258
268
  IMMEDIATELY AFTER ANALYZING THE CHANGES, RESPOND WITH ONLY THE COMMIT MESSAGE.
259
269
  DO NOT include any preamble, reasoning, explanations or anything other than the commit message itself.
260
270
  DO NOT use markdown formatting, headers, or code blocks.
261
271
  The entire response will be passed directly to 'git commit -m'.
262
- </instructions>"""
272
+ </format_instructions>"""
273
+
274
+ QUESTION_GENERATION_TEMPLATE = """<role>
275
+ You are an expert code reviewer specializing in identifying missing context and intent in code changes. Your task is to analyze git diffs and generate focused questions that clarify the "why" behind the changes.
276
+ </role>
277
+
278
+ <focus>
279
+ Analyze the git diff and identify missing "why" context. Generate 3-7 focused questions to clarify intent, motivation, and impact. Your questions should help the developer provide the essential context needed for a meaningful commit message.
280
+ </focus>
263
281
 
282
+ <guidelines>
283
+ - Focus on WHY the changes were made, not just WHAT was changed
284
+ - Ask about the intent, motivation, or business purpose behind the changes
285
+ - Consider what future developers need to understand about this change
286
+ - Ask about the broader impact or consequences of the changes
287
+ - Target areas where technical implementation doesn't reveal the underlying purpose
288
+ - Prioritize questions that would most help generate an informative commit message
289
+ - Keep questions concise and specific
290
+ - Format as a clean list for easy parsing
291
+ </guidelines>
292
+
293
+ <rules>
294
+ NEVER write or rewrite the commit message; only ask questions.
295
+ DO NOT suggest specific commit message formats or wording.
296
+ DO NOT ask about implementation details that are already clear from the diff.
297
+ DO NOT include any explanations or preamble with your response.
298
+ </rules>
299
+
300
+ <output_format>
301
+ Respond with ONLY a numbered list of questions, one per line:
302
+ 1. First focused question?
303
+ 2. Second focused question?
304
+ 3. Third focused question?
305
+ 4. [etc...]
306
+ </output_format>
307
+
308
+ <examples>
309
+ Good example questions:
310
+ 1. What problem or user need does this change address?
311
+ 2. Why was this particular approach chosen over alternatives?
312
+ 3. What impact will this have on existing functionality?
313
+ 4. What motivated the addition of these new error cases?
314
+ 5. Why are these validation rules being added now?
315
+
316
+ Bad examples (violates rules):
317
+ ❌ feat: add user authentication - This is a commit message, not a question
318
+ ❌ Should I use "feat" or "fix" for this change? - This asks about formatting, not context
319
+ ❌ Why did you rename the variable from x to y? - Too implementation-specific
320
+ ❌ You should reformat this as "fix: resolve authentication issue" - This rewrites the message
321
+ </examples>"""
322
+
323
+
324
+ # ============================================================================
325
+ # Template Loading
326
+ # ============================================================================
327
+
328
+
329
+ def load_system_template(custom_path: str | None = None) -> str:
330
+ """Load the system prompt template.
264
331
 
265
- def load_prompt_template() -> str:
266
- """Load the prompt template from the embedded default template.
332
+ Args:
333
+ custom_path: Optional path to a custom system template file
267
334
 
268
335
  Returns:
269
- Template content as string
336
+ System template content as string
270
337
  """
271
- logger.debug("Using default template")
272
- return DEFAULT_TEMPLATE
338
+ if custom_path:
339
+ return load_custom_system_template(custom_path)
273
340
 
341
+ logger.debug("Using default system template")
342
+ return DEFAULT_SYSTEM_TEMPLATE
274
343
 
275
- def build_prompt(
276
- status: str,
277
- processed_diff: str,
278
- diff_stat: str = "",
279
- one_liner: bool = False,
280
- infer_scope: bool = False,
281
- hint: str = "",
282
- verbose: bool = False,
283
- ) -> tuple[str, str]:
284
- """Build system and user prompts for the AI model using the provided template and git information.
344
+
345
+ def load_user_template() -> str:
346
+ """Load the user prompt template (contains git data sections and instructions).
347
+
348
+ Returns:
349
+ User template content as string
350
+ """
351
+ logger.debug("Using default user template")
352
+ return DEFAULT_USER_TEMPLATE
353
+
354
+
355
+ def load_custom_system_template(path: str) -> str:
356
+ """Load a custom system template from a file.
285
357
 
286
358
  Args:
287
- status: Git status output
288
- processed_diff: Git diff output, already preprocessed and ready to use
289
- diff_stat: Git diff stat output showing file changes summary
290
- one_liner: Whether to request a one-line commit message
291
- infer_scope: Whether to infer scope for the commit message
292
- hint: Optional hint to guide the AI
293
- verbose: Whether to generate detailed commit messages with motivation, architecture, and impact sections
359
+ path: Path to the custom system template file
294
360
 
295
361
  Returns:
296
- Tuple of (system_prompt, user_prompt) ready to be sent to an AI model
362
+ Custom system template content
363
+
364
+ Raises:
365
+ FileNotFoundError: If the template file doesn't exist
366
+ IOError: If there's an error reading the file
367
+ """
368
+ try:
369
+ with open(path, encoding="utf-8") as f:
370
+ content = f.read()
371
+ logger.info(f"Loaded custom system template from {path}")
372
+ return content
373
+ except FileNotFoundError:
374
+ logger.error(f"Custom system template not found: {path}")
375
+ raise
376
+ except OSError as e:
377
+ logger.error(f"Error reading custom system template from {path}: {e}")
378
+ raise
379
+
380
+
381
+ # ============================================================================
382
+ # Template Processing Helpers
383
+ # ============================================================================
384
+
385
+
386
+ def _remove_template_section(template: str, section_name: str) -> str:
387
+ """Remove a tagged section from the template.
388
+
389
+ Args:
390
+ template: The template string
391
+ section_name: Name of the section to remove (without < > brackets)
392
+
393
+ Returns:
394
+ Template with the section removed
297
395
  """
298
- template = load_prompt_template()
396
+ pattern = f"<{section_name}>.*?</{section_name}>\\n?"
397
+ return re.sub(pattern, "", template, flags=re.DOTALL)
398
+
299
399
 
300
- # Select the appropriate conventions section based on infer_scope parameter
400
+ def _select_conventions_section(template: str, infer_scope: bool) -> str:
401
+ """Select and normalize the appropriate conventions section.
402
+
403
+ Args:
404
+ template: The template string
405
+ infer_scope: Whether to infer scope for commits
406
+
407
+ Returns:
408
+ Template with the appropriate conventions section selected
409
+ """
301
410
  try:
302
411
  logger.debug(f"Processing infer_scope parameter: {infer_scope}")
303
412
  if infer_scope:
304
- # User wants to infer a scope from changes (any value other than None)
305
413
  logger.debug("Using inferred-scope conventions")
306
- template = re.sub(r"<conventions_no_scope>.*?</conventions_no_scope>\n", "", template, flags=re.DOTALL)
414
+ template = _remove_template_section(template, "conventions_no_scope")
307
415
  template = template.replace("<conventions_with_scope>", "<conventions>")
308
416
  template = template.replace("</conventions_with_scope>", "</conventions>")
309
417
  else:
310
- # No scope - use the plain conventions section
311
418
  logger.debug("Using no-scope conventions")
312
- template = re.sub(r"<conventions_with_scope>.*?</conventions_with_scope>\n", "", template, flags=re.DOTALL)
419
+ template = _remove_template_section(template, "conventions_with_scope")
313
420
  template = template.replace("<conventions_no_scope>", "<conventions>")
314
421
  template = template.replace("</conventions_no_scope>", "</conventions>")
315
422
  except Exception as e:
316
423
  logger.error(f"Error processing scope parameter: {e}")
317
- # Fallback to no scope if there's an error
318
- template = re.sub(r"<conventions_with_scope>.*?</conventions_with_scope>\n", "", template, flags=re.DOTALL)
424
+ template = _remove_template_section(template, "conventions_with_scope")
319
425
  template = template.replace("<conventions_no_scope>", "<conventions>")
320
426
  template = template.replace("</conventions_no_scope>", "</conventions>")
427
+ return template
321
428
 
322
- template = template.replace("<status></status>", status)
323
- template = template.replace("<diff_stat></diff_stat>", diff_stat)
324
- template = template.replace("<diff></diff>", processed_diff)
325
429
 
326
- # Add hint if present
327
- if hint:
328
- template = template.replace("<hint_text></hint_text>", hint)
329
- logger.debug(f"Added hint ({len(hint)} characters)")
330
- else:
331
- template = re.sub(r"<hint>.*?</hint>", "", template, flags=re.DOTALL)
332
- logger.debug("No hint provided")
430
+ def _select_format_section(template: str, verbose: bool, one_liner: bool) -> str:
431
+ """Select the appropriate format section based on verbosity and one-liner settings.
432
+
433
+ Priority: verbose > one_liner > multi_line
434
+
435
+ Args:
436
+ template: The template string
437
+ verbose: Whether to use verbose format
438
+ one_liner: Whether to use one-liner format
333
439
 
334
- # Process format options (verbose, one-liner, or multi-line)
335
- # Priority: verbose > one_liner > multi_line
440
+ Returns:
441
+ Template with the appropriate format section selected
442
+ """
336
443
  if verbose:
337
- # Verbose mode: remove one_liner and multi_line, keep verbose
338
- template = re.sub(r"<one_liner>.*?</one_liner>", "", template, flags=re.DOTALL)
339
- template = re.sub(r"<multi_line>.*?</multi_line>", "", template, flags=re.DOTALL)
444
+ template = _remove_template_section(template, "one_liner")
445
+ template = _remove_template_section(template, "multi_line")
340
446
  elif one_liner:
341
- # One-liner mode: remove multi_line and verbose
342
- template = re.sub(r"<multi_line>.*?</multi_line>", "", template, flags=re.DOTALL)
343
- template = re.sub(r"<verbose>.*?</verbose>", "", template, flags=re.DOTALL)
447
+ template = _remove_template_section(template, "multi_line")
448
+ template = _remove_template_section(template, "verbose")
344
449
  else:
345
- # Multi-line mode (default): remove one_liner and verbose
346
- template = re.sub(r"<one_liner>.*?</one_liner>", "", template, flags=re.DOTALL)
347
- template = re.sub(r"<verbose>.*?</verbose>", "", template, flags=re.DOTALL)
450
+ template = _remove_template_section(template, "one_liner")
451
+ template = _remove_template_section(template, "verbose")
452
+ return template
453
+
454
+
455
+ def _select_examples_section(template: str, verbose: bool, infer_scope: bool) -> str:
456
+ """Select the appropriate examples section based on verbosity and scope settings.
348
457
 
349
- # Clean up examples sections based on verbose and infer_scope settings
458
+ Args:
459
+ template: The template string
460
+ verbose: Whether verbose mode is enabled
461
+ infer_scope: Whether scope inference is enabled
462
+
463
+ Returns:
464
+ Template with the appropriate examples section selected
465
+ """
350
466
  if verbose and infer_scope:
351
- # Verbose mode with scope - keep verbose_with_scope examples
352
- template = re.sub(r"<examples_no_scope>.*?</examples_no_scope>\n?", "", template, flags=re.DOTALL)
353
- template = re.sub(r"<examples_with_scope>.*?</examples_with_scope>\n?", "", template, flags=re.DOTALL)
354
- template = re.sub(
355
- r"<examples_verbose_no_scope>.*?</examples_verbose_no_scope>\n?", "", template, flags=re.DOTALL
356
- )
467
+ template = _remove_template_section(template, "examples_no_scope")
468
+ template = _remove_template_section(template, "examples_with_scope")
469
+ template = _remove_template_section(template, "examples_verbose_no_scope")
357
470
  template = template.replace("<examples_verbose_with_scope>", "<examples>")
358
471
  template = template.replace("</examples_verbose_with_scope>", "</examples>")
359
472
  elif verbose:
360
- # Verbose mode without scope - keep verbose_no_scope examples
361
- template = re.sub(r"<examples_no_scope>.*?</examples_no_scope>\n?", "", template, flags=re.DOTALL)
362
- template = re.sub(r"<examples_with_scope>.*?</examples_with_scope>\n?", "", template, flags=re.DOTALL)
363
- template = re.sub(
364
- r"<examples_verbose_with_scope>.*?</examples_verbose_with_scope>\n?", "", template, flags=re.DOTALL
365
- )
473
+ template = _remove_template_section(template, "examples_no_scope")
474
+ template = _remove_template_section(template, "examples_with_scope")
475
+ template = _remove_template_section(template, "examples_verbose_with_scope")
366
476
  template = template.replace("<examples_verbose_no_scope>", "<examples>")
367
477
  template = template.replace("</examples_verbose_no_scope>", "</examples>")
368
478
  elif infer_scope:
369
- # With scope (inferred) - keep scope examples, remove all others
370
- template = re.sub(r"<examples_no_scope>.*?</examples_no_scope>\n?", "", template, flags=re.DOTALL)
371
- template = re.sub(
372
- r"<examples_verbose_no_scope>.*?</examples_verbose_no_scope>\n?", "", template, flags=re.DOTALL
373
- )
374
- template = re.sub(
375
- r"<examples_verbose_with_scope>.*?</examples_verbose_with_scope>\n?", "", template, flags=re.DOTALL
376
- )
479
+ template = _remove_template_section(template, "examples_no_scope")
480
+ template = _remove_template_section(template, "examples_verbose_no_scope")
481
+ template = _remove_template_section(template, "examples_verbose_with_scope")
377
482
  template = template.replace("<examples_with_scope>", "<examples>")
378
483
  template = template.replace("</examples_with_scope>", "</examples>")
379
484
  else:
380
- # No scope - keep no_scope examples, remove all others
381
- template = re.sub(r"<examples_with_scope>.*?</examples_with_scope>\n?", "", template, flags=re.DOTALL)
382
- template = re.sub(
383
- r"<examples_verbose_no_scope>.*?</examples_verbose_no_scope>\n?", "", template, flags=re.DOTALL
384
- )
385
- template = re.sub(
386
- r"<examples_verbose_with_scope>.*?</examples_verbose_with_scope>\n?", "", template, flags=re.DOTALL
387
- )
485
+ template = _remove_template_section(template, "examples_with_scope")
486
+ template = _remove_template_section(template, "examples_verbose_no_scope")
487
+ template = _remove_template_section(template, "examples_verbose_with_scope")
388
488
  template = template.replace("<examples_no_scope>", "<examples>")
389
489
  template = template.replace("</examples_no_scope>", "</examples>")
490
+ return template
491
+
492
+
493
+ # ============================================================================
494
+ # Prompt Building
495
+ # ============================================================================
496
+
497
+
498
+ def build_prompt(
499
+ status: str,
500
+ processed_diff: str,
501
+ diff_stat: str = "",
502
+ one_liner: bool = False,
503
+ infer_scope: bool = False,
504
+ hint: str = "",
505
+ verbose: bool = False,
506
+ system_template_path: str | None = None,
507
+ language: str | None = None,
508
+ translate_prefixes: bool = False,
509
+ ) -> tuple[str, str]:
510
+ """Build system and user prompts for the AI model using the provided templates and git information.
511
+
512
+ Args:
513
+ status: Git status output
514
+ processed_diff: Git diff output, already preprocessed and ready to use
515
+ diff_stat: Git diff stat output showing file changes summary
516
+ one_liner: Whether to request a one-line commit message
517
+ infer_scope: Whether to infer scope for the commit message
518
+ hint: Optional hint to guide the AI
519
+ verbose: Whether to generate detailed commit messages with motivation, architecture, and impact sections
520
+ system_template_path: Optional path to custom system template
521
+ language: Optional language for commit messages (e.g., "Spanish", "French", "Japanese")
522
+ translate_prefixes: Whether to translate conventional commit prefixes (default: False keeps them in English)
523
+
524
+ Returns:
525
+ Tuple of (system_prompt, user_prompt) ready to be sent to an AI model
526
+ """
527
+ system_template = load_system_template(system_template_path)
528
+ user_template = load_user_template()
390
529
 
391
- # Clean up extra whitespace, collapsing blank lines that may contain spaces
392
- template = re.sub(r"\n(?:[ \t]*\n){2,}", "\n\n", template)
393
-
394
- # Split the template into system and user prompts
395
- # System prompt contains all instructions, role, conventions, examples
396
- # User prompt contains the actual git data
397
-
398
- # Extract the git data sections for the user prompt
399
- user_sections = []
400
-
401
- # Extract git status
402
- status_match = re.search(r"<git_status>.*?</git_status>", template, re.DOTALL)
403
- if status_match:
404
- user_sections.append(status_match.group(0))
405
- # Remove from system prompt
406
- template = template.replace(status_match.group(0), "")
407
-
408
- # Extract git diff stat
409
- diff_stat_match = re.search(r"<git_diff_stat>.*?</git_diff_stat>", template, re.DOTALL)
410
- if diff_stat_match:
411
- user_sections.append(diff_stat_match.group(0))
412
- # Remove from system prompt
413
- template = template.replace(diff_stat_match.group(0), "")
414
-
415
- # Extract git diff
416
- diff_match = re.search(r"<git_diff>.*?</git_diff>", template, re.DOTALL)
417
- if diff_match:
418
- user_sections.append(diff_match.group(0))
419
- # Remove from system prompt
420
- template = template.replace(diff_match.group(0), "")
421
-
422
- # Extract hint if present
423
- hint_match = re.search(r"<hint>.*?</hint>", template, re.DOTALL)
424
- if hint_match and hint: # Only include if hint was provided
425
- user_sections.append(hint_match.group(0))
426
- # Remove from system prompt
427
- template = template.replace(hint_match.group(0), "")
428
-
429
- # System prompt is everything else (role, conventions, examples, instructions)
430
- system_prompt = template.strip()
431
- system_prompt = re.sub(r"\n(?:[ \t]*\n){2,}", "\n\n", system_prompt)
432
-
433
- # User prompt is the git data sections
434
- user_prompt = "\n\n".join(user_sections).strip()
530
+ system_template = _select_conventions_section(system_template, infer_scope)
531
+ system_template = _select_format_section(system_template, verbose, one_liner)
532
+ system_template = _select_examples_section(system_template, verbose, infer_scope)
533
+ system_template = re.sub(r"\n(?:[ \t]*\n){2,}", "\n\n", system_template)
534
+
535
+ user_template = user_template.replace("<status></status>", status)
536
+ user_template = user_template.replace("<diff_stat></diff_stat>", diff_stat)
537
+ user_template = user_template.replace("<diff></diff>", processed_diff)
538
+
539
+ if hint:
540
+ user_template = user_template.replace("<hint_text></hint_text>", hint)
541
+ logger.debug(f"Added hint ({len(hint)} characters)")
542
+ else:
543
+ user_template = _remove_template_section(user_template, "hint")
544
+ logger.debug("No hint provided")
545
+
546
+ if language:
547
+ user_template = user_template.replace("<language_name></language_name>", language)
548
+
549
+ # Set prefix instruction based on translate_prefixes setting
550
+ if translate_prefixes:
551
+ prefix_instruction = f"""CRITICAL: You MUST translate the conventional commit prefix into {language}.
552
+ DO NOT use English prefixes like 'feat:', 'fix:', 'docs:', etc.
553
+ Instead, translate them into {language} equivalents.
554
+ Examples:
555
+ - 'feat:' translate to {language} word for 'feature' or 'add'
556
+ - 'fix:' → translate to {language} word for 'fix' or 'correct'
557
+ - 'docs:' → translate to {language} word for 'documentation'
558
+ The ENTIRE commit message, including the prefix, must be in {language}."""
559
+ logger.debug(f"Set commit message language to: {language} (with prefix translation)")
560
+ else:
561
+ prefix_instruction = (
562
+ "The conventional commit prefix (feat:, fix:, etc.) should remain in English, but everything after the prefix must be in "
563
+ + language
564
+ + "."
565
+ )
566
+ logger.debug(f"Set commit message language to: {language} (English prefixes)")
567
+
568
+ user_template = user_template.replace("<prefix_instruction></prefix_instruction>", prefix_instruction)
569
+ else:
570
+ user_template = _remove_template_section(user_template, "language_instructions")
571
+ logger.debug("Using default language (English)")
572
+
573
+ user_template = re.sub(r"\n(?:[ \t]*\n){2,}", "\n\n", user_template)
574
+
575
+ return system_template.strip(), user_template.strip()
576
+
577
+
578
+ def build_group_prompt(
579
+ status: str,
580
+ processed_diff: str,
581
+ diff_stat: str,
582
+ one_liner: bool,
583
+ hint: str,
584
+ infer_scope: bool,
585
+ verbose: bool,
586
+ system_template_path: str | None,
587
+ language: str | None,
588
+ translate_prefixes: bool,
589
+ ) -> tuple[str, str]:
590
+ """Build prompt for grouped commit generation (JSON output with multiple commits)."""
591
+ system_prompt, user_prompt = build_prompt(
592
+ status=status,
593
+ processed_diff=processed_diff,
594
+ diff_stat=diff_stat,
595
+ one_liner=one_liner,
596
+ hint=hint,
597
+ infer_scope=infer_scope,
598
+ verbose=verbose,
599
+ system_template_path=system_template_path,
600
+ language=language,
601
+ translate_prefixes=translate_prefixes,
602
+ )
603
+
604
+ user_prompt = _remove_template_section(user_prompt, "format_instructions")
605
+
606
+ grouping_instructions = """
607
+ <format_instructions>
608
+ Your task is to split the changed files into separate, logical commits. Think of this like sorting files into different folders where each file belongs in exactly one folder.
609
+
610
+ CRITICAL REQUIREMENT - Every File Used Exactly Once:
611
+ You must assign EVERY file from the diff to exactly ONE commit.
612
+ - NO file should be left out
613
+ - NO file should appear in multiple commits
614
+ - EVERY file must be used once and ONLY once
615
+
616
+ Think of it like dealing cards: Once you've dealt a card to a player, that card cannot be dealt to another player.
617
+
618
+ HOW TO SPLIT THE FILES:
619
+ 1. Review all changed files in the diff
620
+ 2. Group files by logical relationship (e.g., related features, bug fixes, documentation)
621
+ 3. Assign each file to exactly one commit based on what makes the most sense
622
+ 4. If a file could fit in multiple commits, pick the best fit and move on - do NOT duplicate it
623
+ 5. Continue until every single file has been assigned to a commit
624
+
625
+ ORDERING:
626
+ Order the commits in a logical sequence considering dependencies, natural progression, and overall workflow.
627
+
628
+ YOUR RESPONSE FORMAT:
629
+ Respond with valid JSON following this structure:
630
+ ```json
631
+ {
632
+ "commits": [
633
+ {
634
+ "files": ["src/auth/login.ts", "src/auth/logout.ts"],
635
+ "message": "<commit_message_conforming_to_prescribed_structure_and_format>"
636
+ },
637
+ {
638
+ "files": ["src/db/schema.sql", "src/db/migrations/001.sql"],
639
+ "message": "<commit_message_conforming_to_prescribed_structure_and_format>"
640
+ },
641
+ {
642
+ "files": ["tests/auth.test.ts", "tests/db.test.ts", "README.md"],
643
+ "message": "<commit_message_conforming_to_prescribed_structure_and_format>"
644
+ }
645
+ ]
646
+ }
647
+ ```
648
+
649
+ ☝️ Notice how EVERY file path in the example above appears exactly ONCE across all commits. "src/auth/login.ts" appears once. "tests/auth.test.ts" appears once. No file is repeated.
650
+
651
+ VALIDATION CHECKLIST - Before responding, verify:
652
+ □ Total files across all commits = Total files in the diff
653
+ □ Each file appears in exactly 1 commit (no duplicates, no omissions)
654
+ □ Every commit has at least one file
655
+ □ If you list all files from all commits and count them, you get the same count as unique files in the diff
656
+ </format_instructions>
657
+ """
658
+
659
+ user_prompt = user_prompt + grouping_instructions
435
660
 
436
661
  return system_prompt, user_prompt
437
662
 
438
663
 
439
- def clean_commit_message(message: str) -> str:
440
- """Clean up a commit message generated by an AI model.
664
+ def build_question_generation_prompt(
665
+ status: str,
666
+ processed_diff: str,
667
+ diff_stat: str = "",
668
+ hint: str = "",
669
+ ) -> tuple[str, str]:
670
+ """Build system and user prompts for question generation about staged changes.
441
671
 
442
- This function:
443
- 1. Removes any preamble or reasoning text
444
- 2. Removes code block markers and formatting
445
- 3. Removes XML tags that might have leaked into the response
446
- 4. Ensures the message starts with a conventional commit prefix
447
- 5. Fixes double type prefix issues (e.g., "chore: feat(scope):")
672
+ Args:
673
+ status: Git status output
674
+ processed_diff: Git diff output, already preprocessed and ready to use
675
+ diff_stat: Git diff stat output showing file changes summary
676
+ hint: Optional hint to guide the question generation
677
+
678
+ Returns:
679
+ Tuple of (system_prompt, user_prompt) ready to be sent to an AI model
680
+ """
681
+ system_prompt = QUESTION_GENERATION_TEMPLATE
682
+
683
+ # Build user prompt with git context
684
+ user_prompt = f"""<git_diff>
685
+ {processed_diff}
686
+ </git_diff>
687
+
688
+ <git_diff_stat>
689
+ {diff_stat}
690
+ </git_diff_stat>
691
+
692
+ <git_status>
693
+ {status}
694
+ </git_status>"""
695
+
696
+ if hint:
697
+ user_prompt = f"""<hint>
698
+ Additional context provided by the user: {hint}
699
+ </hint>
700
+
701
+ {user_prompt}"""
702
+
703
+ # Add instruction to ask questions in the appropriate language if specified
704
+ user_prompt += """
705
+
706
+ <format_instructions>
707
+ Analyze the changes above and generate 3-7 focused questions that clarify the intent, motivation, and impact of these changes. Respond with ONLY a numbered list of questions as specified in the system prompt.
708
+ </format_instructions>"""
709
+
710
+ return system_prompt.strip(), user_prompt.strip()
711
+
712
+
713
+ # ============================================================================
714
+ # Message Cleaning Helpers
715
+ # ============================================================================
716
+
717
+
718
+ def _remove_think_tags(message: str) -> str:
719
+ """Remove AI reasoning <think> tags and their content from the message.
448
720
 
449
721
  Args:
450
- message: Raw commit message from AI
722
+ message: The message to clean
451
723
 
452
724
  Returns:
453
- Cleaned commit message ready for use
725
+ Message with <think> tags removed
454
726
  """
455
- message = message.strip()
727
+ while re.search(r"<think>(?:(?!</think>)[^\n])*\n.*?</think>", message, flags=re.DOTALL | re.IGNORECASE):
728
+ message = re.sub(
729
+ r"<think>(?:(?!</think>)[^\n])*\n.*?</think>\s*", "", message, flags=re.DOTALL | re.IGNORECASE, count=1
730
+ )
731
+
732
+ message = re.sub(r"\n\n+\s*<think>.*?</think>\s*", "", message, flags=re.DOTALL | re.IGNORECASE)
733
+ message = re.sub(r"<think>.*?</think>\s*\n\n+", "", message, flags=re.DOTALL | re.IGNORECASE)
734
+
735
+ message = re.sub(r"<think>\s*\n.*$", "", message, flags=re.DOTALL | re.IGNORECASE)
736
+
737
+ conventional_prefixes_pattern = r"(" + "|".join(CommitMessageConstants.CONVENTIONAL_PREFIXES) + r")[\(:)]"
738
+ if re.search(r"^.*?</think>", message, flags=re.DOTALL | re.IGNORECASE):
739
+ prefix_match = re.search(conventional_prefixes_pattern, message, flags=re.IGNORECASE)
740
+ think_match = re.search(r"</think>", message, flags=re.IGNORECASE)
741
+
742
+ if not prefix_match or (think_match and think_match.start() < prefix_match.start()):
743
+ message = re.sub(r"^.*?</think>\s*", "", message, flags=re.DOTALL | re.IGNORECASE)
456
744
 
457
- # Remove any markdown code blocks
458
- message = re.sub(r"```[\w]*\n|```", "", message)
459
-
460
- # Extract the actual commit message if it follows our reasoning pattern
461
- # Look for different indicators of where the actual commit message starts
462
- commit_indicators = [
463
- "# Your commit message:",
464
- "Your commit message:",
465
- "The commit message is:",
466
- "Here's the commit message:",
467
- "Commit message:",
468
- "Final commit message:",
469
- "# Commit Message",
470
- ]
471
-
472
- for indicator in commit_indicators:
745
+ message = re.sub(r"</think>\s*$", "", message, flags=re.IGNORECASE)
746
+
747
+ return message
748
+
749
+
750
+ def _remove_code_blocks(message: str) -> str:
751
+ """Remove markdown code blocks from the message.
752
+
753
+ Args:
754
+ message: The message to clean
755
+
756
+ Returns:
757
+ Message with code blocks removed
758
+ """
759
+ return re.sub(r"```[\w]*\n|```", "", message)
760
+
761
+
762
+ def _extract_commit_from_reasoning(message: str) -> str:
763
+ """Extract the actual commit message from reasoning/preamble text.
764
+
765
+ Args:
766
+ message: The message potentially containing reasoning
767
+
768
+ Returns:
769
+ Extracted commit message
770
+ """
771
+ for indicator in CommitMessageConstants.COMMIT_INDICATORS:
473
772
  if indicator.lower() in message.lower():
474
- # Extract everything after the indicator
475
773
  message = message.split(indicator, 1)[1].strip()
476
774
  break
477
775
 
478
- # If message starts with any kind of explanation text, try to locate a conventional prefix
479
776
  lines = message.split("\n")
480
777
  for i, line in enumerate(lines):
481
- if any(
482
- line.strip().startswith(prefix)
483
- for prefix in ["feat:", "fix:", "docs:", "style:", "refactor:", "perf:", "test:", "build:", "ci:", "chore:"]
484
- ):
778
+ if any(line.strip().startswith(f"{prefix}:") for prefix in CommitMessageConstants.CONVENTIONAL_PREFIXES):
485
779
  message = "\n".join(lines[i:])
486
780
  break
487
781
 
488
- # Remove any XML tags that might have leaked into the response
489
- for tag in [
490
- "<git-status>",
491
- "</git-status>",
492
- "<git_status>",
493
- "</git_status>",
494
- "<git-diff>",
495
- "</git-diff>",
496
- "<git_diff>",
497
- "</git_diff>",
498
- "<repository_context>",
499
- "</repository_context>",
500
- "<instructions>",
501
- "</instructions>",
502
- "<format>",
503
- "</format>",
504
- "<conventions>",
505
- "</conventions>",
506
- ]:
782
+ return message
783
+
784
+
785
+ def _remove_xml_tags(message: str) -> str:
786
+ """Remove XML tags that might have leaked into the message.
787
+
788
+ Args:
789
+ message: The message to clean
790
+
791
+ Returns:
792
+ Message with XML tags removed
793
+ """
794
+ for tag in CommitMessageConstants.XML_TAGS_TO_REMOVE:
507
795
  message = message.replace(tag, "")
796
+ return message
797
+
798
+
799
+ def _fix_double_prefix(message: str) -> str:
800
+ """Fix double type prefix issues like 'chore: feat(scope):' to 'feat(scope):'.
801
+
802
+ Args:
803
+ message: The message to fix
508
804
 
509
- # Fix double type prefix issues (e.g., "chore: feat(scope):") to just "feat(scope):")
510
- conventional_prefixes = [
511
- "feat",
512
- "fix",
513
- "docs",
514
- "style",
515
- "refactor",
516
- "perf",
517
- "test",
518
- "build",
519
- "ci",
520
- "chore",
521
- ]
522
-
523
- # Look for double prefix pattern like "chore: feat(scope):" and fix it
524
- # This regex looks for a conventional prefix followed by another conventional prefix with a scope
805
+ Returns:
806
+ Message with double prefix corrected
807
+ """
525
808
  double_prefix_pattern = re.compile(
526
- r"^(" + r"|\s*".join(conventional_prefixes) + r"):\s*(" + r"|\s*".join(conventional_prefixes) + r")\(([^)]+)\):"
809
+ r"^("
810
+ + r"|\s*".join(CommitMessageConstants.CONVENTIONAL_PREFIXES)
811
+ + r"):\s*("
812
+ + r"|\s*".join(CommitMessageConstants.CONVENTIONAL_PREFIXES)
813
+ + r")\(([^)]+)\):"
527
814
  )
528
815
  match = double_prefix_pattern.match(message)
529
816
 
530
817
  if match:
531
- # Extract the second type and scope, which is what we want to keep
532
818
  second_type = match.group(2)
533
819
  scope = match.group(3)
534
820
  description = message[match.end() :].strip()
535
821
  message = f"{second_type}({scope}): {description}"
536
822
 
537
- # Ensure message starts with a conventional commit prefix
823
+ return message
824
+
825
+
826
+ def _ensure_conventional_prefix(message: str) -> str:
827
+ """Ensure the message starts with a conventional commit prefix.
828
+
829
+ Args:
830
+ message: The message to check
831
+
832
+ Returns:
833
+ Message with conventional prefix ensured
834
+ """
538
835
  if not any(
539
836
  message.strip().startswith(prefix + ":") or message.strip().startswith(prefix + "(")
540
- for prefix in conventional_prefixes
837
+ for prefix in CommitMessageConstants.CONVENTIONAL_PREFIXES
541
838
  ):
542
839
  message = f"chore: {message.strip()}"
840
+ return message
841
+
842
+
843
+ def _normalize_whitespace(message: str) -> str:
844
+ """Normalize whitespace, ensuring no more than one blank line between paragraphs.
845
+
846
+ Args:
847
+ message: The message to normalize
848
+
849
+ Returns:
850
+ Message with normalized whitespace
851
+ """
852
+ return re.sub(r"\n(?:[ \t]*\n){2,}", "\n\n", message).strip()
853
+
543
854
 
544
- # Final cleanup: trim extra whitespace and ensure no more than one blank line
545
- # Handle blank lines that may include spaces or tabs
546
- message = re.sub(r"\n(?:[ \t]*\n){2,}", "\n\n", message).strip()
855
+ # ============================================================================
856
+ # Message Cleaning
857
+ # ============================================================================
547
858
 
859
+
860
+ def clean_commit_message(message: str) -> str:
861
+ """Clean up a commit message generated by an AI model.
862
+
863
+ This function:
864
+ 1. Removes any preamble or reasoning text
865
+ 2. Removes code block markers and formatting
866
+ 3. Removes XML tags that might have leaked into the response
867
+ 4. Fixes double type prefix issues (e.g., "chore: feat(scope):")
868
+ 5. Normalizes whitespace
869
+
870
+ Args:
871
+ message: Raw commit message from AI
872
+
873
+ Returns:
874
+ Cleaned commit message ready for use
875
+ """
876
+ message = message.strip()
877
+ message = _remove_think_tags(message)
878
+ message = _remove_code_blocks(message)
879
+ message = _extract_commit_from_reasoning(message)
880
+ message = _remove_xml_tags(message)
881
+ message = _fix_double_prefix(message)
882
+ message = _normalize_whitespace(message)
548
883
  return message