aline-ai 0.1.10__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
realign/hooks.py CHANGED
@@ -167,6 +167,41 @@ def find_codex_latest_session(project_path: Path, days_back: int = 7) -> Optiona
167
167
  return matching_sessions[0] if matching_sessions else None
168
168
 
169
169
 
170
+ def find_all_claude_sessions() -> List[Path]:
171
+ """
172
+ Find all active Claude Code sessions from ALL projects.
173
+
174
+ Scans ~/.claude/projects/ and returns the latest session from each project.
175
+
176
+ Returns:
177
+ List of session file paths from all Claude projects
178
+ """
179
+ sessions = []
180
+ claude_base = Path.home() / ".claude" / "projects"
181
+
182
+ if not claude_base.exists():
183
+ logger.debug(f"Claude projects directory not found: {claude_base}")
184
+ return sessions
185
+
186
+ # Iterate through all project directories
187
+ for project_dir in claude_base.iterdir():
188
+ if not project_dir.is_dir():
189
+ continue
190
+
191
+ # Skip system directories
192
+ if project_dir.name.startswith('.'):
193
+ continue
194
+
195
+ # Find the latest session in this project directory
196
+ session = find_latest_session(project_dir)
197
+ if session:
198
+ sessions.append(session)
199
+ logger.debug(f"Found Claude session in {project_dir.name}: {session.name}")
200
+
201
+ logger.info(f"Found {len(sessions)} Claude session(s) across all projects")
202
+ return sessions
203
+
204
+
170
205
  def find_all_active_sessions(
171
206
  config: ReAlignConfig,
172
207
  project_path: Optional[Path] = None
@@ -181,13 +216,15 @@ def find_all_active_sessions(
181
216
 
182
217
  Args:
183
218
  config: Configuration object
184
- project_path: Optional path to the current project (git repo root)
219
+ project_path: Optional path to the current project (git repo root).
220
+ If None, will find sessions from ALL projects (multi-project mode).
185
221
 
186
222
  Returns:
187
223
  List of session file paths (may be empty if no sessions found)
188
224
  """
189
225
  logger.info("Searching for active AI sessions")
190
226
  logger.debug(f"Config: auto_detect_codex={config.auto_detect_codex}, auto_detect_claude={config.auto_detect_claude}")
227
+ logger.debug(f"Project path: {project_path}")
191
228
 
192
229
  sessions = []
193
230
 
@@ -203,15 +240,47 @@ def find_all_active_sessions(
203
240
  logger.warning(f"No session found at explicit path: {history_path}")
204
241
  return sessions
205
242
 
243
+ # Multi-project mode: find sessions from ALL projects
244
+ if project_path is None:
245
+ logger.info("Multi-project mode: scanning all projects")
246
+
247
+ # Find all Claude sessions if enabled
248
+ if config.auto_detect_claude:
249
+ logger.debug("Scanning all Claude projects")
250
+ claude_sessions = find_all_claude_sessions()
251
+ sessions.extend(claude_sessions)
252
+
253
+ # TODO: Add Codex multi-project support if needed
254
+ # For now, Codex sessions are only found when project_path is specified
255
+
256
+ if sessions:
257
+ logger.info(f"Multi-project scan complete: found {len(sessions)} session(s)")
258
+ return sessions
259
+
260
+ # Fallback: try local history path
261
+ logger.debug("No sessions found in multi-project scan, trying fallback path")
262
+ history_path = config.expanded_local_history_path
263
+ session = find_latest_session(history_path)
264
+ if session:
265
+ sessions.append(session)
266
+ logger.info(f"Found session at fallback path: {session}")
267
+ else:
268
+ logger.warning(f"No session found at fallback path: {history_path}")
269
+
270
+ return sessions
271
+
272
+ # Single-project mode: find sessions for specific project
273
+ logger.info(f"Single-project mode for: {project_path}")
274
+
206
275
  # Try Codex auto-detection if enabled
207
- if config.auto_detect_codex and project_path:
276
+ if config.auto_detect_codex:
208
277
  logger.debug("Attempting Codex auto-detection")
209
278
  codex_session = find_codex_latest_session(project_path)
210
279
  if codex_session:
211
280
  sessions.append(codex_session)
212
281
 
213
282
  # Try Claude auto-detection if enabled
214
- if config.auto_detect_claude and project_path:
283
+ if config.auto_detect_claude:
215
284
  logger.debug("Attempting Claude auto-detection")
216
285
  claude_dir = find_claude_sessions_dir(project_path)
217
286
  if claude_dir:
@@ -328,13 +397,48 @@ def simple_summarize(content: str, max_chars: int = 500) -> str:
328
397
  summary = " | ".join(summaries[:3])
329
398
  return summary[:max_chars]
330
399
 
400
+ # Fallback: surface the first few non-empty raw lines to give context
401
+ fallback_lines = []
402
+ for raw_line in lines:
403
+ stripped = raw_line.strip()
404
+ if not stripped:
405
+ continue
406
+ # Skip noisy JSON braces-only lines
407
+ if stripped in ("{", "}", "[", "]"):
408
+ continue
409
+ fallback_lines.append(stripped[:120])
410
+ if len(fallback_lines) == 3:
411
+ break
412
+
413
+ if fallback_lines:
414
+ summary = " | ".join(fallback_lines)
415
+ return summary[:max_chars]
416
+
331
417
  return f"Session updated with {len(lines)} new lines"
332
418
 
333
419
 
334
- def generate_summary_with_llm(content: str, max_chars: int = 500, provider: str = "auto") -> Optional[str]:
420
+ def detect_agent_from_session_path(session_relpath: str) -> str:
421
+ """Infer agent type based on session filename."""
422
+ lower_path = session_relpath.lower()
423
+
424
+ if "codex" in lower_path or "rollout-" in lower_path:
425
+ return "Codex"
426
+ if "claude" in lower_path or "agent-" in lower_path:
427
+ return "Claude"
428
+ if lower_path.endswith(".jsonl"):
429
+ # Default to Unknown to avoid mislabeling generic files
430
+ return "Unknown"
431
+ return "Unknown"
432
+
433
+
434
+ def generate_summary_with_llm(
435
+ content: str,
436
+ max_chars: int = 500,
437
+ provider: str = "auto"
438
+ ) -> Tuple[Optional[str], Optional[str]]:
335
439
  """
336
440
  Generate summary using LLM (Anthropic Claude or OpenAI) for NEW content only.
337
- Returns None if LLM is not available or fails.
441
+ Returns (summary, model_name) tuple, or (None, None) if LLM is unavailable.
338
442
 
339
443
  Args:
340
444
  content: Raw text content of new session additions
@@ -345,7 +449,7 @@ def generate_summary_with_llm(content: str, max_chars: int = 500, provider: str
345
449
 
346
450
  if not content or not content.strip():
347
451
  logger.debug("No content provided for summarization")
348
- return "No new content in this session"
452
+ return "No new content in this session", None
349
453
 
350
454
  # Truncate content for API (to avoid token limits)
351
455
  # Approximately 4000 chars = ~1000 tokens
@@ -395,13 +499,13 @@ def generate_summary_with_llm(content: str, max_chars: int = 500, provider: str
395
499
  logger.info(f"Claude API success: {len(summary)} chars in {elapsed:.2f}s")
396
500
  logger.debug(f"Claude response: {summary[:100]}...")
397
501
  print(" ✅ Anthropic (Claude) summary successful", file=sys.stderr)
398
- return summary[:max_chars]
502
+ return summary[:max_chars], "claude-3-5-haiku-20241022"
399
503
 
400
504
  except ImportError:
401
505
  logger.warning("Anthropic package not installed")
402
506
  if provider == "claude":
403
507
  print(" ❌ Anthropic package not installed", file=sys.stderr)
404
- return None
508
+ return None, None
405
509
  else:
406
510
  print(" ❌ Anthropic package not installed, trying OpenAI...", file=sys.stderr)
407
511
  except Exception as e:
@@ -417,7 +521,7 @@ def generate_summary_with_llm(content: str, max_chars: int = 500, provider: str
417
521
  print(f" ❌ Anthropic quota/credit issue", file=sys.stderr)
418
522
  else:
419
523
  print(f" ❌ Anthropic API error: {e}", file=sys.stderr)
420
- return None
524
+ return None, None
421
525
  else:
422
526
  # Auto mode: try falling back to OpenAI
423
527
  if "authentication" in error_msg.lower() or "invalid" in error_msg.lower():
@@ -433,7 +537,7 @@ def generate_summary_with_llm(content: str, max_chars: int = 500, provider: str
433
537
  logger.debug("ANTHROPIC_API_KEY not set")
434
538
  if provider == "claude":
435
539
  print(" ❌ ANTHROPIC_API_KEY not set", file=sys.stderr)
436
- return None
540
+ return None, None
437
541
  else:
438
542
  print(" ⓘ ANTHROPIC_API_KEY not set, trying OpenAI...", file=sys.stderr)
439
543
 
@@ -471,12 +575,12 @@ def generate_summary_with_llm(content: str, max_chars: int = 500, provider: str
471
575
  logger.info(f"OpenAI API success: {len(summary)} chars in {elapsed:.2f}s")
472
576
  logger.debug(f"OpenAI response: {summary[:100]}...")
473
577
  print(" ✅ OpenAI (GPT) summary successful", file=sys.stderr)
474
- return summary[:max_chars]
578
+ return summary[:max_chars], "gpt-3.5-turbo"
475
579
 
476
580
  except ImportError:
477
581
  logger.warning("OpenAI package not installed")
478
582
  print(" ❌ OpenAI package not installed", file=sys.stderr)
479
- return None
583
+ return None, None
480
584
  except Exception as e:
481
585
  error_msg = str(e)
482
586
  logger.error(f"OpenAI API error: {error_msg}", exc_info=True)
@@ -488,17 +592,17 @@ def generate_summary_with_llm(content: str, max_chars: int = 500, provider: str
488
592
  print(f" ❌ OpenAI quota/billing issue", file=sys.stderr)
489
593
  else:
490
594
  print(f" ❌ OpenAI API error: {e}", file=sys.stderr)
491
- return None
595
+ return None, None
492
596
  elif try_openai:
493
597
  logger.debug("OPENAI_API_KEY not set")
494
598
  print(" ❌ OPENAI_API_KEY not set", file=sys.stderr)
495
- return None
599
+ return None, None
496
600
 
497
601
  # No API keys available or provider not configured
498
602
  logger.warning(f"No LLM API keys available (provider: {provider})")
499
603
  if provider == "auto":
500
604
  print(" ❌ No LLM API keys configured", file=sys.stderr)
501
- return None
605
+ return None, None
502
606
 
503
607
 
504
608
  def generate_session_filename(user: str, agent: str = "claude") -> str:
@@ -509,6 +613,63 @@ def generate_session_filename(user: str, agent: str = "claude") -> str:
509
613
  return f"{timestamp}_{user_short}_{agent}_{short_id}.jsonl"
510
614
 
511
615
 
616
+ def extract_codex_rollout_hash(filename: str) -> Optional[str]:
617
+ """
618
+ Extract stable hash from Codex rollout filename.
619
+
620
+ Primary Codex rollout format:
621
+ rollout-YYYY-MM-DDTHH-MM-SS-<uuid>.jsonl
622
+ Example: rollout-2025-11-16T18-10-42-019a8ddc-b4b3-7942-9a4f-fac74d1580c9.jsonl
623
+ -> 019a8ddc-b4b3-7942-9a4f-fac74d1580c9
624
+
625
+ Legacy format (still supported):
626
+ rollout-<timestamp>-<hash>.jsonl
627
+ Example: rollout-1763315655-abc123def.jsonl -> abc123def
628
+
629
+ Args:
630
+ filename: Original Codex rollout filename
631
+
632
+ Returns:
633
+ Hash string, or None if parsing fails
634
+ """
635
+ if not filename.startswith("rollout-"):
636
+ return None
637
+
638
+ # Normalize filename (strip extension) and remove prefix
639
+ stem = Path(filename).stem
640
+ if stem.startswith("rollout-"):
641
+ stem = stem[len("rollout-"):]
642
+
643
+ if not stem:
644
+ return None
645
+
646
+ def looks_like_uuid(value: str) -> bool:
647
+ """Return True if value matches canonical UUID format."""
648
+ parts = value.split("-")
649
+ expected_lengths = [8, 4, 4, 4, 12]
650
+ if len(parts) != 5:
651
+ return False
652
+ hex_digits = set("0123456789abcdefABCDEF")
653
+ for part, length in zip(parts, expected_lengths):
654
+ if len(part) != length or not set(part).issubset(hex_digits):
655
+ return False
656
+ return True
657
+
658
+ # Newer Codex exports append a full UUID after the human-readable timestamp.
659
+ uuid_candidate_parts = stem.rsplit("-", 5)
660
+ if len(uuid_candidate_parts) == 6:
661
+ candidate_uuid = "-".join(uuid_candidate_parts[1:])
662
+ if looks_like_uuid(candidate_uuid):
663
+ return candidate_uuid.lower()
664
+
665
+ # Fallback for legacy rollout names: everything after first '-' is the hash.
666
+ legacy_parts = stem.split("-", 1)
667
+ if len(legacy_parts) == 2 and legacy_parts[1]:
668
+ return legacy_parts[1]
669
+
670
+ return None
671
+
672
+
512
673
  def get_git_user() -> str:
513
674
  """Get git user name."""
514
675
  try:
@@ -528,12 +689,12 @@ def copy_session_to_repo(
528
689
  repo_root: Path,
529
690
  user: str,
530
691
  config: Optional[ReAlignConfig] = None
531
- ) -> Tuple[Path, str, bool]:
692
+ ) -> Tuple[Path, str, bool, int]:
532
693
  """
533
694
  Copy session file to repository .realign/sessions/ directory.
534
695
  Optionally redacts sensitive information if configured.
535
696
  If the source filename is in UUID format, renames it to include username for better identification.
536
- Returns (absolute_path, relative_path, was_redacted).
697
+ Returns (absolute_path, relative_path, was_redacted, content_size).
537
698
  """
538
699
  logger.info(f"Copying session to repo: {session_file.name}")
539
700
  logger.debug(f"Source: {session_file}, Repo root: {repo_root}, User: {user}")
@@ -551,6 +712,8 @@ def copy_session_to_repo(
551
712
  '_' not in stem and
552
713
  len(stem) == 36 # UUID is 36 chars including hyphens
553
714
  )
715
+ # Codex rollout exports always start with rollout-<timestamp>-
716
+ is_codex_rollout = original_filename.startswith("rollout-")
554
717
 
555
718
  # Read session content first to detect agent type
556
719
  try:
@@ -566,6 +729,16 @@ def copy_session_to_repo(
566
729
  user_short = user.split()[0].lower() if user else "unknown"
567
730
  new_filename = f"{user_short}_unknown_{short_id}.jsonl"
568
731
  dest_path = sessions_dir / new_filename
732
+ elif is_codex_rollout:
733
+ # Extract stable hash from rollout filename
734
+ rollout_hash = extract_codex_rollout_hash(original_filename)
735
+ user_short = user.split()[0].lower() if user else "unknown"
736
+ if rollout_hash:
737
+ new_filename = f"{user_short}_codex_{rollout_hash}.jsonl"
738
+ else:
739
+ # Fallback if hash extraction fails
740
+ new_filename = generate_session_filename(user, "codex")
741
+ dest_path = sessions_dir / new_filename
569
742
  else:
570
743
  dest_path = sessions_dir / original_filename
571
744
  temp_path = dest_path.with_suffix(".tmp")
@@ -573,7 +746,12 @@ def copy_session_to_repo(
573
746
  temp_path.rename(dest_path)
574
747
  rel_path = dest_path.relative_to(repo_root)
575
748
  logger.warning(f"Copied session with fallback (no agent detection): {rel_path}")
576
- return dest_path, str(rel_path), False
749
+ # Get file size for the fallback case
750
+ try:
751
+ fallback_size = dest_path.stat().st_size
752
+ except Exception:
753
+ fallback_size = 0
754
+ return dest_path, str(rel_path), False, fallback_size
577
755
 
578
756
  # Detect agent type from session content
579
757
  agent_type = "unknown"
@@ -611,6 +789,18 @@ def copy_session_to_repo(
611
789
  # Format: username_agent_shortid.jsonl (no timestamp for consistency)
612
790
  new_filename = f"{user_short}_{agent_type}_{short_id}.jsonl"
613
791
  dest_path = sessions_dir / new_filename
792
+ elif is_codex_rollout:
793
+ # Extract stable hash from rollout filename
794
+ codex_agent = agent_type if agent_type != "unknown" else "codex"
795
+ rollout_hash = extract_codex_rollout_hash(original_filename)
796
+ user_short = user.split()[0].lower() if user else "unknown"
797
+ if rollout_hash:
798
+ # Format: username_codex_hash.jsonl (stable naming)
799
+ new_filename = f"{user_short}_{codex_agent}_{rollout_hash}.jsonl"
800
+ else:
801
+ # Fallback if hash extraction fails
802
+ new_filename = generate_session_filename(user, codex_agent)
803
+ dest_path = sessions_dir / new_filename
614
804
  else:
615
805
  # Keep original filename (could be timestamp_user_agent_id format or other)
616
806
  dest_path = sessions_dir / original_filename
@@ -658,9 +848,68 @@ def copy_session_to_repo(
658
848
  shutil.copy2(session_file, dest_path)
659
849
  logger.warning("Fallback to simple copy")
660
850
 
661
- # Return both absolute and relative paths, plus redaction status
851
+ # Return both absolute and relative paths, plus redaction status and content size
662
852
  rel_path = dest_path.relative_to(repo_root)
663
- return dest_path, str(rel_path), was_redacted
853
+ content_size = len(content)
854
+ return dest_path, str(rel_path), was_redacted, content_size
855
+
856
+
857
+ def save_session_metadata(repo_root: Path, session_relpath: str, content_size: int):
858
+ """
859
+ Save metadata about a processed session to avoid reprocessing.
860
+
861
+ Args:
862
+ repo_root: Path to repository root
863
+ session_relpath: Relative path to session file
864
+ content_size: Size of session content when processed
865
+ """
866
+ metadata_dir = repo_root / ".realign" / ".metadata"
867
+ metadata_dir.mkdir(parents=True, exist_ok=True)
868
+
869
+ # Use session filename as metadata key
870
+ session_name = Path(session_relpath).name
871
+ metadata_file = metadata_dir / f"{session_name}.meta"
872
+
873
+ metadata = {
874
+ "processed_at": time.time(),
875
+ "content_size": content_size,
876
+ "session_relpath": session_relpath,
877
+ }
878
+
879
+ try:
880
+ with open(metadata_file, 'w', encoding='utf-8') as f:
881
+ json.dump(metadata, f)
882
+ logger.debug(f"Saved metadata for {session_relpath}: {content_size} bytes")
883
+ except Exception as e:
884
+ logger.warning(f"Failed to save metadata for {session_relpath}: {e}")
885
+
886
+
887
+ def get_session_metadata(repo_root: Path, session_relpath: str) -> Optional[Dict[str, Any]]:
888
+ """
889
+ Get metadata about a previously processed session.
890
+
891
+ Args:
892
+ repo_root: Path to repository root
893
+ session_relpath: Relative path to session file
894
+
895
+ Returns:
896
+ Metadata dictionary or None if not found
897
+ """
898
+ metadata_dir = repo_root / ".realign" / ".metadata"
899
+ session_name = Path(session_relpath).name
900
+ metadata_file = metadata_dir / f"{session_name}.meta"
901
+
902
+ if not metadata_file.exists():
903
+ return None
904
+
905
+ try:
906
+ with open(metadata_file, 'r', encoding='utf-8') as f:
907
+ metadata = json.load(f)
908
+ logger.debug(f"Loaded metadata for {session_relpath}: {metadata.get('content_size')} bytes")
909
+ return metadata
910
+ except Exception as e:
911
+ logger.warning(f"Failed to load metadata for {session_relpath}: {e}")
912
+ return None
664
913
 
665
914
 
666
915
  def process_sessions(
@@ -678,7 +927,7 @@ def process_sessions(
678
927
  user: User name override (optional)
679
928
 
680
929
  Returns:
681
- Dictionary with keys: summary, session_relpaths, redacted
930
+ Dictionary with keys: summary, session_relpaths, redacted, summary_entries, summary_model
682
931
  """
683
932
  import time
684
933
  start_time = time.time()
@@ -730,13 +979,15 @@ def process_sessions(
730
979
 
731
980
  # Copy all sessions to repo (with optional redaction)
732
981
  session_relpaths = []
982
+ session_metadata_map = {} # Map session_relpath -> content_size
733
983
  any_redacted = False
734
984
  for session_file in session_files:
735
985
  try:
736
- _, session_relpath, was_redacted = copy_session_to_repo(
986
+ _, session_relpath, was_redacted, content_size = copy_session_to_repo(
737
987
  session_file, repo_root, user, config
738
988
  )
739
989
  session_relpaths.append(session_relpath)
990
+ session_metadata_map[session_relpath] = content_size
740
991
  if was_redacted:
741
992
  any_redacted = True
742
993
  except Exception as e:
@@ -746,18 +997,31 @@ def process_sessions(
746
997
 
747
998
  if not session_relpaths:
748
999
  logger.warning("No session files copied successfully")
749
- return {"summary": "", "session_relpaths": [], "redacted": False}
1000
+ return {
1001
+ "summary": "",
1002
+ "session_relpaths": [],
1003
+ "redacted": False,
1004
+ "summary_entries": [],
1005
+ "summary_model": "",
1006
+ }
750
1007
 
751
1008
  logger.info(f"Copied {len(session_relpaths)} session(s): {session_relpaths}")
752
1009
 
753
- # If pre-commit mode, just return session paths (summary will be generated later)
1010
+ # If pre-commit mode, save metadata and return session paths (summary will be generated later)
754
1011
  if pre_commit_mode:
1012
+ # Save metadata for each session to prevent reprocessing
1013
+ for session_relpath, content_size in session_metadata_map.items():
1014
+ save_session_metadata(repo_root, session_relpath, content_size)
1015
+ logger.debug(f"Saved metadata for {session_relpath} in pre-commit")
1016
+
755
1017
  elapsed = time.time() - start_time
756
1018
  logger.info(f"======== Hook completed: {hook_type} in {elapsed:.2f}s ========")
757
1019
  return {
758
1020
  "summary": "",
759
1021
  "session_relpaths": session_relpaths,
760
1022
  "redacted": any_redacted,
1023
+ "summary_entries": [],
1024
+ "summary_model": "",
761
1025
  }
762
1026
 
763
1027
  # For prepare-commit-msg mode, we need to stage files first to get accurate diff
@@ -776,9 +1040,24 @@ def process_sessions(
776
1040
 
777
1041
  # For prepare-commit-msg mode, generate summary from all sessions
778
1042
  logger.info("Generating summaries for sessions")
779
- all_summaries = []
1043
+ summary_entries: List[Dict[str, str]] = []
1044
+ legacy_summary_chunks: List[str] = []
1045
+ summary_model_label: Optional[str] = None
780
1046
 
781
1047
  for session_relpath in session_relpaths:
1048
+ # Check if this session was already processed in pre-commit hook
1049
+ previous_metadata = get_session_metadata(repo_root, session_relpath)
1050
+ current_size = session_metadata_map.get(session_relpath, 0)
1051
+
1052
+ if previous_metadata:
1053
+ previous_size = previous_metadata.get("content_size", 0)
1054
+ if previous_size == current_size:
1055
+ logger.info(f"Session {session_relpath} unchanged since pre-commit (size: {current_size}), skipping")
1056
+ print(f"⏭️ Skipping {Path(session_relpath).name} (no new content since pre-commit)", file=sys.stderr)
1057
+ continue
1058
+ else:
1059
+ logger.info(f"Session {session_relpath} size changed: {previous_size} -> {current_size}")
1060
+
782
1061
  # Extract NEW content using git diff
783
1062
  new_content = get_new_content_from_git_diff(repo_root, session_relpath)
784
1063
 
@@ -787,37 +1066,54 @@ def process_sessions(
787
1066
  continue
788
1067
 
789
1068
  # Generate summary for NEW content only
790
- summary = None
1069
+ summary_text: Optional[str] = None
1070
+ is_llm_summary = False
1071
+ llm_model_name: Optional[str] = None
791
1072
  if config.use_LLM:
792
1073
  print(f"🤖 Attempting to generate LLM summary (provider: {config.llm_provider})...", file=sys.stderr)
793
- summary = generate_summary_with_llm(new_content, config.summary_max_chars, config.llm_provider)
1074
+ summary_text, llm_model_name = generate_summary_with_llm(
1075
+ new_content,
1076
+ config.summary_max_chars,
1077
+ config.llm_provider
1078
+ )
794
1079
 
795
- if summary:
1080
+ if summary_text:
796
1081
  print("✅ LLM summary generated successfully", file=sys.stderr)
1082
+ is_llm_summary = True
1083
+ if summary_model_label is None:
1084
+ summary_model_label = llm_model_name or config.llm_provider
797
1085
  else:
798
1086
  print("⚠️ LLM summary failed - falling back to local summarization", file=sys.stderr)
799
1087
  print(" Check your API keys: ANTHROPIC_API_KEY or OPENAI_API_KEY", file=sys.stderr)
800
1088
 
801
- if not summary:
1089
+ if not summary_text:
802
1090
  # Fallback to simple summarize
803
1091
  logger.info("Using local summarization (no LLM)")
804
1092
  print("📝 Using local summarization (no LLM)", file=sys.stderr)
805
- summary = simple_summarize(new_content, config.summary_max_chars)
1093
+ summary_text = simple_summarize(new_content, config.summary_max_chars)
806
1094
 
807
1095
  # Identify agent type from filename
808
- agent_name = "Unknown"
809
- if "rollout-" in session_relpath:
810
- agent_name = "Codex"
811
- elif "agent-" in session_relpath or ".jsonl" in session_relpath:
812
- agent_name = "Claude"
1096
+ agent_name = detect_agent_from_session_path(session_relpath)
1097
+
1098
+ summary_text = summary_text.strip()
1099
+ logger.debug(f"Summary for {session_relpath} ({agent_name}): {summary_text[:100]}...")
1100
+ summary_entries.append({
1101
+ "agent": agent_name,
1102
+ "text": summary_text,
1103
+ "source": "llm" if is_llm_summary else "local",
1104
+ })
1105
+ legacy_summary_chunks.append(f"[{agent_name}] {summary_text}")
813
1106
 
814
- logger.debug(f"Summary for {session_relpath} ({agent_name}): {summary[:100]}...")
815
- all_summaries.append(f"[{agent_name}] {summary}")
1107
+ # Update metadata after successfully generating summary
1108
+ save_session_metadata(repo_root, session_relpath, current_size)
1109
+ logger.debug(f"Updated metadata for {session_relpath} in prepare-commit-msg")
816
1110
 
817
1111
  # Combine all summaries
818
- if all_summaries:
819
- combined_summary = " | ".join(all_summaries)
820
- logger.info(f"Generated {len(all_summaries)} summary(ies)")
1112
+ if summary_entries:
1113
+ if summary_model_label is None:
1114
+ summary_model_label = "Local summarizer"
1115
+ combined_summary = " | ".join(legacy_summary_chunks)
1116
+ logger.info(f"Generated {len(summary_entries)} summary(ies)")
821
1117
  else:
822
1118
  combined_summary = "No new content in sessions"
823
1119
  logger.info("No summaries generated (no new content)")
@@ -829,6 +1125,8 @@ def process_sessions(
829
1125
  "summary": combined_summary,
830
1126
  "session_relpaths": session_relpaths,
831
1127
  "redacted": any_redacted,
1128
+ "summary_entries": summary_entries,
1129
+ "summary_model": summary_model_label or "",
832
1130
  }
833
1131
 
834
1132
 
@@ -875,22 +1173,37 @@ def prepare_commit_msg_hook():
875
1173
  Generates session summary and appends to commit message.
876
1174
  """
877
1175
  # Get commit message file path from command line arguments
878
- if len(sys.argv) < 2:
879
- print("Error: Commit message file path not provided", file=sys.stderr)
880
- sys.exit(1)
881
-
882
- msg_file = sys.argv[1]
1176
+ # When called via __main__ with --prepare-commit-msg flag, the file is at index 2
1177
+ # When called directly as a hook entry point, the file is at index 1
1178
+ if sys.argv[1] == "--prepare-commit-msg":
1179
+ # Called via: python -m realign.hooks --prepare-commit-msg <msg-file> <source>
1180
+ if len(sys.argv) < 3:
1181
+ print("Error: Commit message file path not provided", file=sys.stderr)
1182
+ sys.exit(1)
1183
+ msg_file = sys.argv[2]
1184
+ else:
1185
+ # Called via: realign-hook-prepare-commit-msg <msg-file> <source>
1186
+ msg_file = sys.argv[1]
883
1187
 
884
1188
  # Process sessions and generate summary
885
1189
  result = process_sessions(pre_commit_mode=False)
886
1190
 
887
1191
  # Append summary to commit message
888
- if result["summary"] and result["session_relpaths"]:
1192
+ summary_entries = result.get("summary_entries") or []
1193
+ if summary_entries:
889
1194
  try:
890
1195
  with open(msg_file, "a", encoding="utf-8") as f:
891
- f.write(f"\n\nAgent-Summary: {result['summary']}\n")
892
- f.write(f"Agent-Session-Paths: {', '.join(result['session_relpaths'])}\n")
893
- if result["redacted"]:
1196
+ summary_model = result.get("summary_model") or "Local summarizer"
1197
+ f.write("\n\n")
1198
+ f.write(f"--- LLM-Summary ({summary_model}) ---\n")
1199
+ for entry in summary_entries:
1200
+ agent_label = entry.get("agent", "Agent")
1201
+ text = (entry.get("text") or "").strip()
1202
+ if not text:
1203
+ continue
1204
+ f.write(f"* [{agent_label}] {text}\n")
1205
+ f.write("\n")
1206
+ if result.get("redacted"):
894
1207
  f.write("Agent-Redacted: true\n")
895
1208
  except Exception as e:
896
1209
  print(f"Warning: Could not append to commit message: {e}", file=sys.stderr)