aline-ai 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1024 @@
1
+ """
2
+ Commit/turn processing pipeline shared by watcher + worker.
3
+
4
+ This module intentionally contains the "heavy" logic:
5
+ - Parse session files via triggers
6
+ - Extract turn content
7
+ - Call LLM (best-effort) to generate title/description
8
+ - Write turns + turn_content into SQLite
9
+ - Use DB-backed lease locks to prevent cross-process races
10
+
11
+ The watcher should only enqueue work; the worker should execute this pipeline.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import hashlib
17
+ import json
18
+ import os
19
+ import subprocess
20
+ import sys
21
+ import time
22
+ from datetime import datetime
23
+ from pathlib import Path
24
+ from typing import Any, Callable, Dict, Literal, Optional
25
+
26
+ from .config import ReAlignConfig
27
+ from .hooks import find_all_active_sessions
28
+ from .logging_config import setup_logger
29
+
30
+ logger = setup_logger("realign.commit_pipeline", "commit_pipeline.log")
31
+
32
+ SessionType = Literal["claude", "codex", "gemini", "unknown"]
33
+
34
+
35
+ class CommitPipeline:
36
+ def __init__(
37
+ self,
38
+ *,
39
+ config: ReAlignConfig | None = None,
40
+ lock_owner_prefix: str = "worker",
41
+ processing_turn_ttl_seconds: float = 20 * 60,
42
+ ) -> None:
43
+ self.config = config or ReAlignConfig.load()
44
+
45
+ # Trigger support for pluggable turn detection
46
+ from .triggers.registry import get_global_registry
47
+
48
+ self.trigger_registry = get_global_registry()
49
+ self.trigger_name = "next_turn"
50
+ self._session_triggers: Dict[str, Any] = {}
51
+
52
+ # Owner id for DB-backed lease locks (cross-process).
53
+ try:
54
+ from .db.locks import make_lock_owner
55
+
56
+ self.lock_owner = make_lock_owner(str(lock_owner_prefix))
57
+ except Exception:
58
+ self.lock_owner = f"{lock_owner_prefix}:{os.getpid()}"
59
+
60
+ # Per-turn "processing" TTL: if a processing placeholder exists longer than this,
61
+ # a new run may take over and re-process it to avoid permanent stuck states.
62
+ self.processing_turn_ttl_seconds = float(processing_turn_ttl_seconds)
63
+
64
+ def _detect_session_type(self, session_file: Path) -> SessionType:
65
+ """Detect the type of session file."""
66
+ try:
67
+ from .adapters import get_adapter_registry
68
+
69
+ registry = get_adapter_registry()
70
+ adapter = registry.auto_detect_adapter(session_file)
71
+ if adapter:
72
+ name = adapter.name
73
+ if name in ["claude", "codex", "gemini"]:
74
+ return name
75
+ return "unknown"
76
+ except Exception as e:
77
+ print(
78
+ f"[Commit] Error detecting session type for {session_file.name}: {e}",
79
+ file=sys.stderr,
80
+ )
81
+ return "unknown"
82
+
83
+ def _get_trigger_for_session(self, session_file: Path):
84
+ """Get or create the session trigger."""
85
+ session_path = str(session_file)
86
+ if session_path not in self._session_triggers:
87
+ from .adapters import get_adapter_registry
88
+
89
+ registry = get_adapter_registry()
90
+ adapter = registry.auto_detect_adapter(session_file)
91
+ if not adapter:
92
+ logger.error(f"Unknown session type for {session_file.name}, cannot select trigger")
93
+ return None
94
+ self._session_triggers[session_path] = adapter.trigger
95
+ return self._session_triggers[session_path]
96
+
97
+ def _count_complete_turns(self, session_file: Path) -> int:
98
+ """Unified interface to count complete dialogue turns for any session type."""
99
+ trigger = self._get_trigger_for_session(session_file)
100
+ if not trigger:
101
+ return 0
102
+ try:
103
+ return int(trigger.count_complete_turns(session_file))
104
+ except Exception as e:
105
+ logger.error(f"Trigger error for {session_file.name}: {e}")
106
+ return 0
107
+
108
+ def _get_total_turn_count(self, session_file: Path) -> int:
109
+ """Get total turns for a session file (including the last turn)."""
110
+ try:
111
+ trigger = self._get_trigger_for_session(session_file)
112
+ if not trigger:
113
+ return 0
114
+ if hasattr(trigger, "get_detailed_analysis"):
115
+ analysis = trigger.get_detailed_analysis(session_file)
116
+ return int(analysis.get("total_turns", 0))
117
+ return int(trigger.count_complete_turns(session_file)) + 1
118
+ except Exception as e:
119
+ logger.debug(f"Error getting total turn count for {session_file.name}: {e}")
120
+ return 0
121
+
122
+ def _find_latest_session(self, project_path: Path) -> Optional[Path]:
123
+ """Find the most recently modified session file for this project."""
124
+ try:
125
+ session_files = find_all_active_sessions(self.config, project_path)
126
+ if not session_files:
127
+ return None
128
+ return max(session_files, key=lambda f: f.stat().st_mtime)
129
+ except Exception as e:
130
+ logger.error(f"Failed to find latest session: {e}")
131
+ return None
132
+
133
+ def _handle_session_redaction(
134
+ self, session_file: Path, project_path: Path, quiet: bool = False
135
+ ) -> Path:
136
+ """Check and redact sensitive information from session file (best-effort)."""
137
+ if not self.config.redact_on_match:
138
+ return session_file
139
+ try:
140
+ from .redactor import check_and_redact_session, save_original_session
141
+
142
+ content = session_file.read_text(encoding="utf-8")
143
+ redacted_content, has_secrets, secrets = check_and_redact_session(
144
+ content, redact_mode="auto", quiet=quiet
145
+ )
146
+ if has_secrets:
147
+ logger.warning(f"Secrets detected: {len(secrets)} secret(s)")
148
+ backup_path = save_original_session(session_file, project_path)
149
+ session_file.write_text(redacted_content, encoding="utf-8")
150
+ logger.info(f"Session redacted, original saved to {backup_path}")
151
+ return session_file
152
+ except Exception as e:
153
+ logger.error(f"Failed to redact session: {e}")
154
+ return session_file
155
+
156
+ def _get_current_turn_number(self, session_file: Path) -> int:
157
+ return self._count_complete_turns(session_file)
158
+
159
+ def _extract_user_message_for_turn(self, session_file: Path, turn_number: int) -> str:
160
+ """Extract user message for a specific turn using the active trigger."""
161
+ try:
162
+ trigger = self._get_trigger_for_session(session_file)
163
+ info = trigger.extract_turn_info(session_file, turn_number)
164
+ if info and info.user_message:
165
+ return info.user_message
166
+ except Exception as e:
167
+ logger.error(f"Failed to extract user message for turn {turn_number}: {e}")
168
+ return "No user message found"
169
+
170
+ def _extract_turn_content_by_number(self, session_file: Path, turn_number: int) -> str:
171
+ """Extract content for a specific turn (supports JSONL and JSON formats)."""
172
+ try:
173
+ trigger = self._get_trigger_for_session(session_file)
174
+ analysis = trigger.get_detailed_analysis(session_file)
175
+ group = None
176
+ for g in analysis.get("groups", []):
177
+ if g.get("turn_number") == turn_number:
178
+ group = g
179
+ break
180
+ if not group:
181
+ return ""
182
+
183
+ session_format = analysis.get("format", "")
184
+ if session_format in ("gemini_json", "gemini"):
185
+ turn_info = trigger.extract_turn_info(session_file, turn_number)
186
+ if turn_info and turn_info.get("turn_content"):
187
+ return turn_info["turn_content"]
188
+ return json.dumps(
189
+ {
190
+ "turn_number": turn_number,
191
+ "user_message": group.get("user_message", ""),
192
+ "assistant_response": group.get("summary_message", ""),
193
+ },
194
+ ensure_ascii=False,
195
+ indent=2,
196
+ )
197
+
198
+ start_line = group.get("start_line") or (group.get("lines") or [None])[0]
199
+ end_line = group.get("end_line") or (group.get("lines") or [None])[-1]
200
+ if not start_line or not end_line:
201
+ return ""
202
+
203
+ lines: list[str] = []
204
+ with open(session_file, "r", encoding="utf-8") as f:
205
+ for idx, line in enumerate(f, 1):
206
+ if start_line <= idx <= end_line:
207
+ lines.append(line)
208
+ if idx > end_line:
209
+ break
210
+ return "".join(lines)
211
+ except Exception as e:
212
+ logger.error(f"Failed to extract turn content for turn {turn_number}: {e}")
213
+ print(f"[Commit] Failed to extract turn content: {e}", file=sys.stderr)
214
+ return ""
215
+
216
+ def _find_latest_structured_summary(self, session_file: Path) -> Optional[str]:
217
+ """Find the latest agent-authored summary record in the session (Claude only)."""
218
+ try:
219
+ if session_file.is_dir():
220
+ return None
221
+ with open(session_file, "r", encoding="utf-8") as f:
222
+ lines = f.readlines()
223
+ for line in reversed(lines):
224
+ line = line.strip()
225
+ if not line:
226
+ continue
227
+ try:
228
+ data = json.loads(line)
229
+ except json.JSONDecodeError:
230
+ continue
231
+ if data.get("type") == "summary":
232
+ summary = data.get("summary") or ""
233
+ if summary and summary.strip():
234
+ return summary.strip()
235
+ return None
236
+ except Exception as e:
237
+ logger.error(f"Failed to find structured summary: {e}")
238
+ return None
239
+
240
+ def _extract_assistant_summary(self, session_file: Path) -> str:
241
+ """Extract a summary of the assistant's response from session file."""
242
+ try:
243
+ if session_file.is_dir():
244
+ # For directory sessions (Antigravity), we don't have a simple way to extract assistant summary
245
+ # from a single file scan. Return generic message or use trigger if possible.
246
+ return "Antigravity Session State"
247
+
248
+ summary = self._find_latest_structured_summary(session_file)
249
+ if summary:
250
+ summary = summary.strip()
251
+ return summary[:300] + ("..." if len(summary) > 300 else "")
252
+ except Exception as e:
253
+ logger.debug(f"Structured summary extraction failed: {e}")
254
+
255
+ try:
256
+ # Extract last assistant response text
257
+ assistant_text = ""
258
+
259
+ if session_file.is_dir():
260
+ return "Antigravity Session"
261
+
262
+ with open(session_file, "r", encoding="utf-8") as f:
263
+ for line in f:
264
+ try:
265
+ data = json.loads(line.strip())
266
+
267
+ if data.get("type") == "assistant":
268
+ message = data.get("message", {})
269
+ content = message.get("content", [])
270
+
271
+ if isinstance(content, list):
272
+ for item in content:
273
+ if isinstance(item, dict) and item.get("type") == "text":
274
+ assistant_text = item.get("text", "")
275
+
276
+ except json.JSONDecodeError:
277
+ continue
278
+
279
+ # Truncate to reasonable length
280
+ if assistant_text:
281
+ # Take first 300 characters as summary
282
+ summary = assistant_text[:300]
283
+ if len(assistant_text) > 300:
284
+ summary += "..."
285
+ return summary
286
+ else:
287
+ return "Assistant response"
288
+
289
+ except Exception as e:
290
+ logger.error(f"Failed to extract assistant summary: {e}")
291
+ return "Error extracting summary"
292
+
293
+ def _extract_current_turn_content(self, session_file: Path) -> str:
294
+ """Extract only the content for the current turn being committed (best-effort)."""
295
+ try:
296
+ lines: list[str] = []
297
+ user_message_indices: list[int] = []
298
+
299
+ if session_file.is_dir():
300
+ trigger = self._get_trigger_for_session(session_file)
301
+ if trigger:
302
+ turn = self._get_current_turn_number(session_file)
303
+ info = trigger.extract_turn_info(session_file, turn)
304
+ if info:
305
+ return info.user_message
306
+ return ""
307
+
308
+ with open(session_file, "r", encoding="utf-8") as f:
309
+ for idx, line in enumerate(f):
310
+ lines.append(line)
311
+ try:
312
+ data = json.loads(line.strip())
313
+ if data.get("type") == "user":
314
+ message = data.get("message", {})
315
+ content = message.get("content", "")
316
+ is_real_message = False
317
+ if isinstance(content, str):
318
+ if not content.startswith(
319
+ "This session is being continued"
320
+ ) and not content.startswith("<ide_opened_file>"):
321
+ is_real_message = True
322
+ elif isinstance(content, list):
323
+ text_parts = [
324
+ item.get("text", "")
325
+ for item in content
326
+ if isinstance(item, dict) and item.get("type") == "text"
327
+ ]
328
+ if text_parts:
329
+ combined_text = "\n".join(text_parts)
330
+ if not combined_text.startswith(
331
+ "This session is being continued"
332
+ ) and not combined_text.startswith("<ide_opened_file>"):
333
+ is_real_message = True
334
+ if is_real_message:
335
+ user_message_indices.append(idx)
336
+ except json.JSONDecodeError:
337
+ continue
338
+
339
+ if len(user_message_indices) >= 2:
340
+ start_idx = user_message_indices[-2]
341
+ end_idx = user_message_indices[-1]
342
+ turn_lines = lines[start_idx:end_idx]
343
+ elif len(user_message_indices) == 1:
344
+ start_idx = user_message_indices[0]
345
+ turn_lines = lines[start_idx:]
346
+ else:
347
+ return ""
348
+
349
+ return "".join(turn_lines)
350
+ except Exception as e:
351
+ logger.error(f"Failed to extract current turn content: {e}", exc_info=True)
352
+ return ""
353
+
354
+ def _generate_llm_summary(
355
+ self,
356
+ session_file: Optional[Path],
357
+ turn_number: Optional[int] = None,
358
+ turn_content: Optional[str] = None,
359
+ user_message: Optional[str] = None,
360
+ debug_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
361
+ session_id: Optional[str] = None,
362
+ ) -> Optional[tuple[str, str, str, str, str]]:
363
+ """
364
+ Generate LLM-powered summary for the CURRENT TURN only.
365
+
366
+ Priority:
367
+ 1. MCP Sampling API (if enabled and available)
368
+ 2. Direct Claude/OpenAI API calls (existing fallback)
369
+
370
+ Returns:
371
+ Tuple of (title, model_name, description, if_last_task, satisfaction), or None if LLM is disabled or fails
372
+ """
373
+ try:
374
+ if not self.config.use_LLM:
375
+ logger.debug("LLM summary disabled in config")
376
+ return None
377
+
378
+ if turn_number is None and session_file is not None:
379
+ turn_number = self._get_current_turn_number(session_file)
380
+
381
+ # Resolve session_id from file or parameter
382
+ resolved_session_id = session_id
383
+ if resolved_session_id is None and session_file is not None:
384
+ resolved_session_id = session_file.stem
385
+
386
+ recent_ctx = ""
387
+ previous_records = []
388
+ previous_commit_title = None
389
+ try:
390
+ # Get recent turns from database for context
391
+ from .db import get_database
392
+
393
+ db = get_database()
394
+ session_id = resolved_session_id
395
+ recent_turns = db.get_turns_for_session(session_id)
396
+ if recent_turns:
397
+ # Get last 5 turn titles
398
+ for turn in recent_turns[-5:]:
399
+ if turn.llm_title:
400
+ previous_records.append(turn.llm_title)
401
+ # Get the most recent title
402
+ if previous_records:
403
+ previous_commit_title = previous_records[-1]
404
+ recent_ctx = "Recent turns:\n" + "\n".join(
405
+ f"- {t}" for t in previous_records
406
+ )
407
+ except Exception:
408
+ recent_ctx = ""
409
+ previous_records = []
410
+
411
+ # Extract full turn content first (includes all messages, thinking, etc.)
412
+ if turn_content is None and session_file is not None:
413
+ turn_content = self._extract_turn_content_by_number(session_file, turn_number)
414
+
415
+ # Prefer trigger-derived fields: user_message + assistant summary + turn_status
416
+ group = None
417
+ if session_file is not None:
418
+ try:
419
+ trigger = self._get_trigger_for_session(session_file)
420
+ analysis = trigger.get_detailed_analysis(session_file)
421
+ group = next(
422
+ (
423
+ g
424
+ for g in analysis.get("groups", [])
425
+ if g.get("turn_number") == turn_number
426
+ ),
427
+ None,
428
+ )
429
+ except Exception:
430
+ group = None
431
+
432
+ assistant_summary = None
433
+ turn_status = "unknown"
434
+
435
+ if group:
436
+ if not user_message:
437
+ user_message = group.get("user_message") or user_message
438
+ assistant_summary = group.get("summary_message") or assistant_summary
439
+ turn_status = group.get("turn_status") or turn_status
440
+
441
+ # Robust fallback for directory sessions (Antigravity) if group lookup failed
442
+ if (
443
+ session_file is not None
444
+ and session_file.is_dir()
445
+ and (not user_message or not assistant_summary)
446
+ ):
447
+ logger.info("Using fallback extraction for Antigravity directory session")
448
+ print(
449
+ f"[Debug] Antigravity fallback: user_message={bool(user_message)}, assistant_summary={bool(assistant_summary)}",
450
+ file=sys.stderr,
451
+ )
452
+ if not user_message:
453
+ # For Antigravity, turn_content is essentially the user message (full state)
454
+ user_message = turn_content
455
+ print(
456
+ f"[Debug] Set user_message from turn_content: {len(user_message) if user_message else 0} chars",
457
+ file=sys.stderr,
458
+ )
459
+ if not assistant_summary:
460
+ assistant_summary = "Antigravity Session State"
461
+ turn_status = "completed"
462
+
463
+ print(
464
+ f"[Debug] Before LLM call: user_message={len(user_message) if user_message else 0} chars, assistant_summary={bool(assistant_summary)}",
465
+ file=sys.stderr,
466
+ )
467
+ if user_message and assistant_summary:
468
+ from .hooks import generate_summary_with_llm_from_turn_context
469
+
470
+ # Pass full turn content to include all messages (user, assistant text, thinking)
471
+ # but exclude tool use and code changes (handled by filter_session_content)
472
+ title, model_name, description, if_last_task, satisfaction = (
473
+ generate_summary_with_llm_from_turn_context(
474
+ user_message=user_message,
475
+ assistant_summary=assistant_summary,
476
+ turn_status=turn_status,
477
+ recent_commit_context=recent_ctx,
478
+ provider=self.config.llm_provider,
479
+ previous_commit_title=previous_commit_title,
480
+ full_turn_content=turn_content, # Pass full turn content
481
+ previous_records=previous_records, # Pass extracted records from git history
482
+ debug_callback=debug_callback, # Pass debug callback
483
+ )
484
+ )
485
+
486
+ if title:
487
+ logger.info(f"Generated LLM summary from turn context using {model_name}")
488
+ print(
489
+ f"[Watcher] ✓ Generated summary from turn context ({model_name})",
490
+ file=sys.stderr,
491
+ )
492
+ return (
493
+ title,
494
+ model_name or "unknown",
495
+ description or "",
496
+ if_last_task,
497
+ satisfaction,
498
+ )
499
+
500
+ if session_file is not None and session_file.is_dir():
501
+ # Fallback if LLM fails for Antigravity
502
+ print(
503
+ f"[Watcher] ⚠ LLM summary failed/empty, using generic fallback for Antigravity",
504
+ file=sys.stderr,
505
+ )
506
+ return (
507
+ "Update Antigravity Brain",
508
+ "fallback",
509
+ "Automatic update of brain artifacts",
510
+ "yes",
511
+ "fine",
512
+ )
513
+
514
+ # Fallback: Extract turn content and use the legacy pipeline
515
+ if turn_content is None and session_file is not None:
516
+ turn_content = self._extract_turn_content_by_number(session_file, turn_number)
517
+ if not turn_content:
518
+ logger.warning("No content found for current turn")
519
+ return None
520
+
521
+ if recent_ctx:
522
+ try:
523
+ recent_line = json.dumps(
524
+ {
525
+ "type": "assistant",
526
+ "message": {
527
+ "content": [
528
+ {
529
+ "type": "text",
530
+ "text": f"Recent commit context:\n{recent_ctx}",
531
+ }
532
+ ]
533
+ },
534
+ },
535
+ ensure_ascii=False,
536
+ )
537
+ if not turn_content.endswith("\n"):
538
+ turn_content += "\n"
539
+ turn_content += recent_line + "\n"
540
+ except Exception:
541
+ pass
542
+
543
+ # Use direct API calls for LLM summary
544
+ from .hooks import generate_summary_with_llm
545
+
546
+ title, model_name, description, if_last_task, satisfaction = generate_summary_with_llm(
547
+ turn_content,
548
+ max_chars=500,
549
+ provider=self.config.llm_provider,
550
+ previous_commit_title=previous_commit_title,
551
+ debug_callback=debug_callback,
552
+ )
553
+
554
+ if title:
555
+ if model_name:
556
+ logger.info(f"Generated LLM summary using {model_name}")
557
+ print(f"[Watcher] ✓ Generated LLM summary using {model_name}", file=sys.stderr)
558
+ return (
559
+ title,
560
+ model_name or "unknown",
561
+ description or "",
562
+ if_last_task,
563
+ satisfaction,
564
+ )
565
+ else:
566
+ logger.warning("LLM summary generation returned empty result")
567
+
568
+ if session_file is not None and session_file.is_dir():
569
+ # Fallback if LLM fails for Antigravity (generic path)
570
+ print(
571
+ f"[Watcher] ⚠ LLM summary returned empty, using fallback for Antigravity",
572
+ file=sys.stderr,
573
+ )
574
+ return (
575
+ "Update Antigravity Brain",
576
+ "fallback",
577
+ "Automatic update of brain artifacts",
578
+ "yes",
579
+ "fine",
580
+ )
581
+
582
+ return None
583
+
584
+ except Exception as e:
585
+ logger.error(f"Failed to generate LLM summary: {e}", exc_info=True)
586
+ print(f"[Watcher] Failed to generate LLM summary: {e}", file=sys.stderr)
587
+
588
+ # Record the error for later use in fallback logic
589
+ from .hooks import set_last_llm_error
590
+
591
+ set_last_llm_error(str(e))
592
+
593
+ # Robust fallback for Antigravity directory sessions if anything fails
594
+ if session_file is not None and session_file.is_dir():
595
+ print(
596
+ f"[Watcher] ⚠ Using generic fallback after exception for Antigravity",
597
+ file=sys.stderr,
598
+ )
599
+ return (
600
+ "Update Antigravity Brain",
601
+ "fallback",
602
+ "Automatic update of brain artifacts",
603
+ "yes",
604
+ "fine",
605
+ )
606
+
607
+ return None
608
+
609
+ def _run_realign_commit(
610
+ self,
611
+ project_path: Path,
612
+ session_file: Optional[Path] = None,
613
+ target_turn: Optional[int] = None,
614
+ turn_content: Optional[str] = None,
615
+ user_message_override: Optional[str] = None,
616
+ quiet: bool = False,
617
+ debug_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
618
+ skip_dedup: bool = False,
619
+ skip_session_summary: bool = False,
620
+ no_track: bool = False,
621
+ ) -> bool:
622
+ """
623
+ Execute commit with DB-backed lease locking to prevent cross-process races.
624
+
625
+ Args:
626
+ project_path: Path to the project directory
627
+ quiet: If True, suppress console output
628
+
629
+ Returns:
630
+ True if commit was created, False otherwise
631
+
632
+ The method will:
633
+ - Acquire a DB lease lock to prevent concurrent commits across processes
634
+ - Generate LLM-powered semantic commit message
635
+ - Create DB record
636
+ """
637
+ try:
638
+ from .db import get_database
639
+ from .db.locks import lease_lock, lock_key_for_project_commit
640
+
641
+ db = get_database()
642
+ lock_key = lock_key_for_project_commit(project_path)
643
+
644
+ with lease_lock(
645
+ db,
646
+ lock_key,
647
+ owner=self.lock_owner,
648
+ ttl_seconds=30 * 60, # 30 minutes
649
+ wait_timeout_seconds=5.0,
650
+ ) as acquired:
651
+ if not acquired:
652
+ print(
653
+ f"[Watcher] Another process is committing to {project_path.name}, skipping",
654
+ file=sys.stderr,
655
+ )
656
+ return False
657
+
658
+ return self._do_commit_locked(
659
+ project_path,
660
+ session_file=session_file,
661
+ target_turn=target_turn,
662
+ turn_content=turn_content,
663
+ user_message_override=user_message_override,
664
+ quiet=quiet,
665
+ debug_callback=debug_callback,
666
+ skip_dedup=skip_dedup,
667
+ skip_session_summary=skip_session_summary,
668
+ no_track=no_track,
669
+ )
670
+ except Exception as e:
671
+ print(f"[Watcher] Commit error: {e}", file=sys.stderr)
672
+ return False
673
+
674
+ def _run_realign_commit_batch(
675
+ self,
676
+ project_path: Path,
677
+ *,
678
+ session_file: Path,
679
+ target_turns: list[int],
680
+ quiet: bool = False,
681
+ skip_dedup: bool = False,
682
+ skip_session_summary: bool = False,
683
+ no_track: bool = False,
684
+ ) -> dict[int, bool]:
685
+ """
686
+ Batch commit multiple target turns under a single project lease lock.
687
+
688
+ This reduces overhead for session_process jobs that need to backfill many turns.
689
+ """
690
+ results: dict[int, bool] = {}
691
+ turns = [int(t) for t in (target_turns or []) if int(t) > 0]
692
+ if not turns:
693
+ return results
694
+
695
+ try:
696
+ from .db import get_database
697
+ from .db.locks import lease_lock, lock_key_for_project_commit
698
+
699
+ db = get_database()
700
+ lock_key = lock_key_for_project_commit(project_path)
701
+
702
+ with lease_lock(
703
+ db,
704
+ lock_key,
705
+ owner=self.lock_owner,
706
+ ttl_seconds=30 * 60,
707
+ wait_timeout_seconds=5.0,
708
+ ) as acquired:
709
+ if not acquired:
710
+ print(
711
+ f"[Watcher] Another process is committing to {project_path.name}, skipping",
712
+ file=sys.stderr,
713
+ )
714
+ return results
715
+
716
+ for t in turns:
717
+ try:
718
+ ok = self._do_commit_locked(
719
+ project_path,
720
+ session_file=session_file,
721
+ target_turn=int(t),
722
+ quiet=quiet,
723
+ skip_dedup=skip_dedup,
724
+ skip_session_summary=skip_session_summary,
725
+ no_track=no_track,
726
+ )
727
+ results[int(t)] = bool(ok)
728
+ except Exception:
729
+ results[int(t)] = False
730
+ return results
731
+ except Exception:
732
+ return results
733
+
734
+ def _do_commit_locked(
735
+ self,
736
+ project_path: Path,
737
+ session_file: Optional[Path] = None,
738
+ target_turn: Optional[int] = None,
739
+ turn_content: Optional[str] = None,
740
+ user_message_override: Optional[str] = None,
741
+ quiet: bool = False,
742
+ debug_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
743
+ skip_dedup: bool = False,
744
+ skip_session_summary: bool = False,
745
+ no_track: bool = False,
746
+ ) -> bool:
747
+ """
748
+ Perform the actual commit operation to SQLite database.
749
+
750
+ This method:
751
+ 1. Finds the latest session file for the project
752
+ 2. Redacts sensitive information from the session
753
+ 3. Generates LLM-powered semantic commit message
754
+ 4. Creates DB record
755
+
756
+ Args:
757
+ project_path: Path to the project directory
758
+ session_file: Target session file (if None, will locate latest)
759
+ target_turn: If provided, commit this specific turn
760
+ turn_content: Optional precomputed turn content
761
+ user_message_override: Optional precomputed user message
762
+ quiet: If True, suppress console output
763
+
764
+ Returns:
765
+ True if commit was created, False otherwise
766
+ """
767
+ try:
768
+ # Find the latest session file for this project if not provided
769
+ if not session_file:
770
+ session_file = self._find_latest_session(project_path)
771
+
772
+ if not session_file:
773
+ logger.warning("No session file found for commit")
774
+ return False
775
+
776
+ # Redact sensitive information from session file before committing
777
+ session_file = self._handle_session_redaction(session_file, project_path, quiet=quiet)
778
+
779
+ # Extract session information
780
+ session_id = session_file.stem # e.g., "minhao_claude_abc123"
781
+ turn_number = target_turn or self._get_current_turn_number(session_file)
782
+ user_message = user_message_override or self._extract_user_message_for_turn(
783
+ session_file, turn_number
784
+ )
785
+
786
+ # V9: Get user identity for creator tracking
787
+ from .config import ReAlignConfig
788
+
789
+ config = ReAlignConfig.load()
790
+
791
+ # Compute hash of current turn content (not the whole session file)
792
+ if not turn_content:
793
+ turn_content = self._extract_turn_content_by_number(session_file, turn_number)
794
+
795
+ turn_hash = hashlib.md5((turn_content or "").encode("utf-8")).hexdigest()
796
+
797
+ # SQLite Storage (authoritative): dedupe by (session_id, turn_number)
798
+ from .db import get_database
799
+ from .db.base import TurnRecord
800
+ import uuid
801
+
802
+ db = get_database()
803
+
804
+ file_stat = session_file.stat()
805
+ file_created = datetime.fromtimestamp(
806
+ getattr(file_stat, "st_birthtime", file_stat.st_ctime)
807
+ )
808
+ session = db.get_or_create_session(
809
+ session_id=session_id,
810
+ session_file_path=session_file,
811
+ session_type=self._detect_session_type(session_file),
812
+ started_at=file_created,
813
+ workspace_path=str(project_path) if project_path else None,
814
+ )
815
+
816
+ # Check no_track from parameter or existing session metadata (polling path)
817
+ is_no_track = no_track
818
+ if not is_no_track and session:
819
+ session_meta = getattr(session, "metadata", None) or {}
820
+ is_no_track = bool(session_meta.get("no_track", False))
821
+
822
+ # Store no_track flag in session metadata if applicable
823
+ if is_no_track:
824
+ try:
825
+ db.update_session_metadata_flag(session_id, "no_track", True)
826
+ except Exception:
827
+ pass
828
+
829
+ takeover_attempt = False
830
+ existing_turn = db.get_turn_by_number(session_id, turn_number)
831
+ if existing_turn and not skip_dedup:
832
+ existing_status = getattr(existing_turn, "turn_status", None)
833
+ if existing_status in (None, "completed"):
834
+ logger.info(f"Turn already exists in DB: {session_id} #{turn_number}, skipping")
835
+ return False
836
+
837
+ if existing_status == "processing":
838
+ # If a processing placeholder exists, avoid duplicate LLM calls unless it's stale.
839
+ try:
840
+ age_seconds = max(
841
+ 0.0,
842
+ (
843
+ datetime.now()
844
+ - getattr(existing_turn, "created_at", datetime.now())
845
+ ).total_seconds(),
846
+ )
847
+ except Exception:
848
+ age_seconds = 0.0
849
+
850
+ if age_seconds < float(self.processing_turn_ttl_seconds):
851
+ logger.info(
852
+ f"Turn is already processing in DB: {session_id} #{turn_number} ({age_seconds:.0f}s), skipping"
853
+ )
854
+ return False
855
+
856
+ logger.warning(
857
+ f"Processing turn appears stale: {session_id} #{turn_number} ({age_seconds:.0f}s), taking over"
858
+ )
859
+ takeover_attempt = True
860
+
861
+ if existing_status == "failed":
862
+ logger.warning(f"Turn previously failed: {session_id} #{turn_number}, skipping")
863
+ return False
864
+
865
+ # Insert a processing placeholder BEFORE calling LLM so we can reflect runtime status
866
+ # and avoid duplicate work in crash/restart scenarios.
867
+ placeholder_hash = hashlib.md5(
868
+ f"processing:{session_id}:{turn_number}:{time.time()}".encode("utf-8")
869
+ ).hexdigest()
870
+ processing_created_at = datetime.now()
871
+ processing_turn = TurnRecord(
872
+ id=str(uuid.uuid4()),
873
+ session_id=session_id,
874
+ turn_number=turn_number,
875
+ user_message=user_message,
876
+ assistant_summary=None,
877
+ turn_status="processing",
878
+ llm_title="running...",
879
+ llm_description=None,
880
+ model_name=None,
881
+ if_last_task="unknown",
882
+ satisfaction="unknown",
883
+ content_hash=placeholder_hash,
884
+ timestamp=processing_created_at,
885
+ created_at=processing_created_at,
886
+ git_commit_hash=None,
887
+ )
888
+ try:
889
+ db.create_turn(processing_turn, content="")
890
+ except Exception as e:
891
+ # If we fail to store processing state, continue anyway (best-effort).
892
+ logger.debug(f"Failed to write processing placeholder: {e}")
893
+
894
+ try:
895
+ # Skip LLM call for no-track mode
896
+ if is_no_track:
897
+ llm_result = ("No Track", None, "No Track", "no", "fine")
898
+ logger.info(f"No-track mode: skipping LLM for {session_id} turn {turn_number}")
899
+ else:
900
+ # Generate LLM summary with fallback for errors
901
+ llm_result = self._generate_llm_summary(
902
+ session_file,
903
+ turn_number=turn_number,
904
+ turn_content=turn_content,
905
+ user_message=user_message,
906
+ debug_callback=debug_callback,
907
+ )
908
+
909
+ if not llm_result:
910
+ # LLM summary failed, use error marker to continue commit
911
+ logger.warning(
912
+ f"LLM summary generation failed for {session_file.name} turn {turn_number} - using error marker"
913
+ )
914
+ print(
915
+ f"[Watcher] ⚠ LLM API unavailable - using error marker for commit",
916
+ file=sys.stderr,
917
+ )
918
+
919
+ # Check if it's an API key problem
920
+ from .hooks import get_last_llm_error
921
+
922
+ last_error = get_last_llm_error()
923
+ if last_error:
924
+ if "API_KEY not set" in last_error or "api_key" in last_error.lower():
925
+ print(
926
+ f"[Watcher] ⓘ Configure API keys in Acme Settings to enable LLM summaries",
927
+ file=sys.stderr,
928
+ )
929
+ else:
930
+ print(f"[Watcher] ⓘ LLM Error: {last_error[:100]}", file=sys.stderr)
931
+
932
+ # Use explicit error marker
933
+ title = "⚠ LLM API Error - Summary unavailable"
934
+ model_name = "error-fallback"
935
+ description = f"LLM API failed. Error: {last_error[:200] if last_error else 'Unknown error'}"
936
+ if_last_task = "unknown"
937
+ satisfaction = "unknown"
938
+
939
+ llm_result = (title, model_name, description, if_last_task, satisfaction)
940
+
941
+ title, model_name, description, if_last_task, satisfaction = llm_result
942
+
943
+ # Validate title - reject if it's empty, too short, or looks like truncated JSON
944
+ if not title or len(title.strip()) < 2:
945
+ logger.error(f"Invalid LLM title generated: '{title}' - skipping commit")
946
+ print(f"[Watcher] ✗ Invalid commit message title: '{title}'", file=sys.stderr)
947
+ raise RuntimeError(f"Invalid LLM title: {title!r}")
948
+
949
+ if (
950
+ title.strip() in ["{", "}", "[", "]"]
951
+ or title.startswith("{")
952
+ and not title.endswith("}")
953
+ ):
954
+ logger.error(f"Title appears to be truncated JSON: '{title}' - skipping commit")
955
+ print(f"[Watcher] ✗ Truncated JSON in title: '{title}'", file=sys.stderr)
956
+ raise RuntimeError(f"Truncated JSON title: {title!r}")
957
+
958
+ logger.info(f"Committing turn {turn_number} for session {session_id}")
959
+ new_turn = TurnRecord(
960
+ id=str(uuid.uuid4()),
961
+ session_id=session_id,
962
+ turn_number=turn_number,
963
+ user_message=user_message,
964
+ assistant_summary=description,
965
+ turn_status="completed",
966
+ llm_title=title,
967
+ llm_description=description,
968
+ model_name=model_name,
969
+ if_last_task=if_last_task,
970
+ satisfaction=satisfaction,
971
+ content_hash=turn_hash,
972
+ timestamp=datetime.now(),
973
+ created_at=datetime.now(),
974
+ git_commit_hash=None,
975
+ )
976
+ db.create_turn(
977
+ new_turn,
978
+ content=turn_content or "",
979
+ skip_session_summary=skip_session_summary,
980
+ )
981
+ logger.info(f"✓ Saved turn {turn_number} to SQLite DB")
982
+ print(f"[Watcher] ✓ Saved turn {turn_number} to SQLite DB", file=sys.stderr)
983
+ return True
984
+ except Exception as e:
985
+ # If we were taking over a stale processing turn, a failure here should stop further retries.
986
+ if takeover_attempt:
987
+ logger.error(
988
+ f"Takeover attempt failed for {session_id} #{turn_number}: {e}",
989
+ exc_info=True,
990
+ )
991
+ failed_turn = TurnRecord(
992
+ id=str(uuid.uuid4()),
993
+ session_id=session_id,
994
+ turn_number=turn_number,
995
+ user_message=user_message,
996
+ assistant_summary=None,
997
+ turn_status="failed",
998
+ llm_title="failed",
999
+ llm_description=str(e)[:2000],
1000
+ model_name=None,
1001
+ if_last_task="unknown",
1002
+ satisfaction="unknown",
1003
+ content_hash=placeholder_hash,
1004
+ timestamp=datetime.now(),
1005
+ created_at=processing_created_at,
1006
+ git_commit_hash=None,
1007
+ )
1008
+ try:
1009
+ db.create_turn(
1010
+ failed_turn,
1011
+ content="",
1012
+ skip_session_summary=skip_session_summary,
1013
+ )
1014
+ except Exception:
1015
+ pass
1016
+
1017
+ logger.error(f"Failed to write to SQLite DB: {e}", exc_info=True)
1018
+ print(f"[Watcher] ⚠ Failed to write to SQLite DB: {e}", file=sys.stderr)
1019
+ return False
1020
+
1021
+ except Exception as e:
1022
+ logger.error(f"Commit error for {project_path.name}: {e}", exc_info=True)
1023
+ print(f"[Watcher] Commit error for {project_path.name}: {e}", file=sys.stderr)
1024
+ return False