aline-ai 0.2.6__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {aline_ai-0.2.6.dist-info → aline_ai-0.3.0.dist-info}/METADATA +3 -1
  2. aline_ai-0.3.0.dist-info/RECORD +41 -0
  3. aline_ai-0.3.0.dist-info/entry_points.txt +3 -0
  4. realign/__init__.py +32 -1
  5. realign/cli.py +203 -19
  6. realign/commands/__init__.py +2 -2
  7. realign/commands/clean.py +149 -0
  8. realign/commands/config.py +1 -1
  9. realign/commands/export_shares.py +1785 -0
  10. realign/commands/hide.py +112 -24
  11. realign/commands/import_history.py +873 -0
  12. realign/commands/init.py +104 -217
  13. realign/commands/mirror.py +131 -0
  14. realign/commands/pull.py +101 -0
  15. realign/commands/push.py +155 -245
  16. realign/commands/review.py +216 -54
  17. realign/commands/session_utils.py +139 -4
  18. realign/commands/share.py +965 -0
  19. realign/commands/status.py +559 -0
  20. realign/commands/sync.py +91 -0
  21. realign/commands/undo.py +423 -0
  22. realign/commands/watcher.py +805 -0
  23. realign/config.py +21 -10
  24. realign/file_lock.py +3 -1
  25. realign/hash_registry.py +310 -0
  26. realign/hooks.py +115 -411
  27. realign/logging_config.py +2 -2
  28. realign/mcp_server.py +263 -549
  29. realign/mcp_watcher.py +997 -139
  30. realign/mirror_utils.py +322 -0
  31. realign/prompts/__init__.py +21 -0
  32. realign/prompts/presets.py +238 -0
  33. realign/redactor.py +168 -16
  34. realign/tracker/__init__.py +9 -0
  35. realign/tracker/git_tracker.py +1123 -0
  36. realign/watcher_daemon.py +115 -0
  37. aline_ai-0.2.6.dist-info/RECORD +0 -28
  38. aline_ai-0.2.6.dist-info/entry_points.txt +0 -5
  39. realign/commands/auto_commit.py +0 -242
  40. realign/commands/commit.py +0 -379
  41. realign/commands/search.py +0 -449
  42. realign/commands/show.py +0 -416
  43. {aline_ai-0.2.6.dist-info → aline_ai-0.3.0.dist-info}/WHEEL +0 -0
  44. {aline_ai-0.2.6.dist-info → aline_ai-0.3.0.dist-info}/licenses/LICENSE +0 -0
  45. {aline_ai-0.2.6.dist-info → aline_ai-0.3.0.dist-info}/top_level.txt +0 -0
realign/mcp_watcher.py CHANGED
@@ -4,6 +4,7 @@ Supports both Claude Code and Codex session formats with unified interface.
4
4
  """
5
5
 
6
6
  import asyncio
7
+ import hashlib
7
8
  import json
8
9
  import subprocess
9
10
  import sys
@@ -13,8 +14,11 @@ from typing import Optional, Dict, Literal
13
14
  from datetime import datetime
14
15
 
15
16
  from .config import ReAlignConfig
17
+ from .hash_registry import HashRegistry
16
18
  from .hooks import find_all_active_sessions
17
19
  from .logging_config import setup_logger
20
+ from .tracker import ReAlignGitTracker
21
+ from .mirror_utils import collect_project_files
18
22
 
19
23
  # Initialize logger for watcher
20
24
  logger = setup_logger('realign.mcp_watcher', 'mcp_watcher.log')
@@ -24,21 +28,206 @@ logger = setup_logger('realign.mcp_watcher', 'mcp_watcher.log')
24
28
  SessionType = Literal["claude", "codex", "unknown"]
25
29
 
26
30
 
31
+ def is_path_blacklisted(project_path: Path) -> bool:
32
+ """
33
+ Check if a project path is blacklisted for auto-init.
34
+
35
+ Blacklisted paths:
36
+ - Anything inside ~/.aline/ directories
37
+ - User home directory itself (~)
38
+ - ~/Desktop, ~/Documents, ~/Downloads (but allow subdirectories)
39
+
40
+ Args:
41
+ project_path: Absolute path to check
42
+
43
+ Returns:
44
+ True if blacklisted, False if allowed
45
+ """
46
+ try:
47
+ # Normalize path (resolve symlinks, make absolute)
48
+ normalized = project_path.resolve()
49
+ home = Path.home().resolve()
50
+ aline_dir = (home / ".aline").resolve()
51
+
52
+ # Check if inside .aline directory
53
+ try:
54
+ normalized.relative_to(aline_dir)
55
+ logger.debug(f"Blacklisted (inside .aline): {normalized}")
56
+ return True
57
+ except ValueError:
58
+ pass # Not inside .aline
59
+
60
+ # Check if it IS the home directory itself
61
+ if normalized == home:
62
+ logger.debug(f"Blacklisted (home directory): {normalized}")
63
+ return True
64
+
65
+ # Check forbidden top-level home subdirectories
66
+ # But allow their subdirectories (e.g., ~/Desktop/project is OK)
67
+ forbidden_dirs = ["Desktop", "Documents", "Downloads"]
68
+ for forbidden in forbidden_dirs:
69
+ forbidden_path = (home / forbidden).resolve()
70
+ if normalized == forbidden_path:
71
+ logger.debug(f"Blacklisted (forbidden dir): {normalized}")
72
+ return True
73
+
74
+ return False
75
+
76
+ except Exception as e:
77
+ logger.error(f"Error checking blacklist for {project_path}: {e}")
78
+ # If we can't determine, err on the side of caution
79
+ return True
80
+
81
+
82
+ def decode_claude_project_path(project_dir_name: str) -> Optional[Path]:
83
+ """
84
+ Decode Claude Code project directory name to actual project path.
85
+
86
+ Claude naming: -Users-huminhao-Projects-ReAlign
87
+ Decoded: /Users/huminhao/Projects/ReAlign
88
+
89
+ If naive decoding fails (e.g., paths with underscores/hyphens in directory names),
90
+ falls back to reading the 'cwd' field from JSONL session files.
91
+
92
+ Args:
93
+ project_dir_name: Claude project directory name (or full path to Claude project dir)
94
+
95
+ Returns:
96
+ Decoded Path if valid, None otherwise
97
+ """
98
+ # Handle both directory name and full path
99
+ if isinstance(project_dir_name, Path):
100
+ project_dir = project_dir_name
101
+ dir_name = project_dir.name
102
+ elif '/' in project_dir_name:
103
+ project_dir = Path(project_dir_name)
104
+ dir_name = project_dir.name
105
+ else:
106
+ dir_name = project_dir_name
107
+ project_dir = Path.home() / ".claude" / "projects" / dir_name
108
+
109
+ if not dir_name.startswith('-'):
110
+ return None
111
+
112
+ # Try naive decoding first
113
+ path_str = '/' + dir_name[1:].replace('-', '/')
114
+ project_path = Path(path_str)
115
+
116
+ if project_path.exists():
117
+ return project_path
118
+
119
+ # Naive decoding failed - try reading from JSONL files
120
+ logger.debug(f"Naive decoding failed for {dir_name}, trying JSONL fallback")
121
+
122
+ if not project_dir.exists() or not project_dir.is_dir():
123
+ logger.debug(f"Claude project directory not found: {project_dir}")
124
+ return None
125
+
126
+ # Find any JSONL file (excluding agent files)
127
+ try:
128
+ jsonl_files = [
129
+ f for f in project_dir.iterdir()
130
+ if f.suffix == '.jsonl' and not f.name.startswith('agent-')
131
+ ]
132
+
133
+ if not jsonl_files:
134
+ logger.debug(f"No JSONL session files found in {project_dir}")
135
+ return None
136
+
137
+ # Read lines from first JSONL file to find cwd field
138
+ jsonl_file = jsonl_files[0]
139
+ with jsonl_file.open('r', encoding='utf-8') as f:
140
+ # Check up to first 20 lines for cwd field
141
+ for i, line in enumerate(f):
142
+ if i >= 20:
143
+ break
144
+
145
+ line = line.strip()
146
+ if not line:
147
+ continue
148
+
149
+ session_data = json.loads(line)
150
+ cwd = session_data.get('cwd')
151
+
152
+ if cwd:
153
+ project_path = Path(cwd)
154
+ if project_path.exists():
155
+ logger.debug(f"Decoded path from JSONL: {dir_name} -> {project_path}")
156
+ return project_path
157
+ else:
158
+ logger.debug(f"Path from JSONL doesn't exist: {project_path}")
159
+ return None
160
+
161
+ logger.debug(f"No 'cwd' field found in first 20 lines of {jsonl_file.name}")
162
+ return None
163
+
164
+ except Exception as e:
165
+ logger.debug(f"Error reading JSONL files from {project_dir}: {e}")
166
+ return None
167
+
168
+ return None
169
+
170
+
171
+ def is_project_initialized(project_path: Path) -> bool:
172
+ """
173
+ Check if a project has been initialized with aline.
174
+
175
+ Checks for:
176
+ 1. .aline-config marker in project root
177
+ 2. .aline directory existence
178
+ 3. .git repo inside .aline directory
179
+
180
+ Args:
181
+ project_path: Absolute path to project
182
+
183
+ Returns:
184
+ True if initialized, False otherwise
185
+ """
186
+ try:
187
+ config_marker = project_path / ".aline-config"
188
+
189
+ if not config_marker.exists():
190
+ return False
191
+
192
+ # Read configured .aline path
193
+ realign_dir = Path(config_marker.read_text(encoding='utf-8').strip())
194
+
195
+ # Check if .git exists inside
196
+ git_config = realign_dir / ".git" / "config"
197
+ return git_config.exists()
198
+
199
+ except Exception as e:
200
+ logger.debug(f"Error checking init status for {project_path}: {e}")
201
+ return False
202
+
203
+
27
204
  class DialogueWatcher:
28
205
  """Watch session files and auto-commit immediately after each user request completes."""
29
206
 
30
207
  def __init__(self):
31
- """Initialize watcher without fixed repo_path - will extract dynamically from sessions."""
208
+ """Initialize watcher for multi-project monitoring - extracts project paths dynamically from sessions."""
32
209
  self.config = ReAlignConfig.load()
33
- self.project_path = self._detect_project_path()
34
210
  self.last_commit_times: Dict[str, float] = {} # Track last commit time per project
35
211
  self.last_session_sizes: Dict[str, int] = {} # Track file sizes
36
212
  self.last_stop_reason_counts: Dict[str, int] = {} # Track stop_reason counts per session
213
+ self.last_session_mtimes: Dict[str, float] = {} # Track last mtime of session files for idle detection
214
+ self.last_final_commit_times: Dict[str, float] = {} # Track when we last tried final commit per session
215
+ self.last_committed_hashes: Dict[str, str] = {} # Track content hash of last commit per session to prevent duplicates (DEPRECATED: use hash_registries instead)
216
+ self.hash_registries: Dict[str, HashRegistry] = {} # Persistent hash registries per project (lazy-loaded)
37
217
  self.min_commit_interval = 5.0 # Minimum 5 seconds between commits (cooldown)
38
- self.debounce_delay = 2.0 # Wait 2 seconds after file change to ensure turn is complete
218
+ self.debounce_delay = 10.0 # Wait 10 seconds after file change to ensure turn is complete (increased from 2.0 to handle streaming responses)
219
+ self.final_commit_idle_timeout = 300.0 # 5 minutes idle to trigger final commit
39
220
  self.running = False
40
221
  self.pending_commit_task: Optional[asyncio.Task] = None
41
222
 
223
+ # Auto-init tracking
224
+ self.failed_init_projects: set[str] = set() # Projects that failed init
225
+ self.last_auto_init_time: float = 0.0 # Last time we ran auto-init
226
+ self.auto_init_interval: float = 5.0 # Run auto-init every 5 seconds
227
+
228
+ # Git tracker will be initialized per-project dynamically in _do_commit_locked()
229
+ self.git_tracker = None
230
+
42
231
  async def start(self):
43
232
  """Start watching session files."""
44
233
  if not self.config.mcp_auto_commit:
@@ -48,28 +237,39 @@ class DialogueWatcher:
48
237
 
49
238
  self.running = True
50
239
  logger.info("Started watching for dialogue completion")
51
- logger.info(f"Mode: Per-request (triggers at end of each AI response)")
240
+ logger.info(f"Mode: Multi-project monitoring (all Claude Code projects)")
241
+ logger.info(f"Trigger: Per-request (at end of each AI response)")
52
242
  logger.info(f"Supports: Claude Code & Codex (auto-detected)")
53
243
  logger.info(f"Debounce: {self.debounce_delay}s, Cooldown: {self.min_commit_interval}s")
54
244
  print("[MCP Watcher] Started watching for dialogue completion", file=sys.stderr)
55
- print(f"[MCP Watcher] Mode: Per-request (triggers at end of each AI response)", file=sys.stderr)
245
+ print(f"[MCP Watcher] Mode: Multi-project monitoring (all Claude Code projects)", file=sys.stderr)
246
+ print(f"[MCP Watcher] Trigger: Per-request (at end of each AI response)", file=sys.stderr)
56
247
  print(f"[MCP Watcher] Supports: Claude Code & Codex (auto-detected)", file=sys.stderr)
57
248
  print(f"[MCP Watcher] Debounce: {self.debounce_delay}s, Cooldown: {self.min_commit_interval}s", file=sys.stderr)
58
- if self.project_path:
59
- logger.info(f"Monitoring project: {self.project_path}")
60
- print(f"[MCP Watcher] Monitoring project: {self.project_path}", file=sys.stderr)
61
- else:
62
- logger.info("Project path unknown, falling back to multi-project scan")
63
- print("[MCP Watcher] Project path unknown, falling back to multi-project scan", file=sys.stderr)
64
249
 
65
250
  # Initialize baseline sizes and stop_reason counts
66
251
  self.last_session_sizes = self._get_session_sizes()
67
252
  self.last_stop_reason_counts = self._get_stop_reason_counts()
68
253
 
254
+ # Note: Idle timeout checking is now integrated into main loop instead of separate task
255
+
256
+ # Run initial auto-init
257
+ logger.info("Running initial auto-init scan")
258
+ print("[MCP Watcher] Running initial auto-init scan", file=sys.stderr)
259
+ await self.auto_init_projects()
260
+ self.last_auto_init_time = time.time()
261
+
262
+ # Start periodic auto-init task
263
+ asyncio.create_task(self.run_periodic_auto_init())
264
+
69
265
  # Poll for file changes more frequently
70
266
  while self.running:
71
267
  try:
72
268
  await self.check_for_changes()
269
+
270
+ # Check for idle sessions that need final commit
271
+ await self._check_idle_sessions_for_final_commit()
272
+
73
273
  await asyncio.sleep(0.5) # Check every 0.5 seconds for responsiveness
74
274
  except Exception as e:
75
275
  logger.error(f"Error in check loop: {e}", exc_info=True)
@@ -85,30 +285,26 @@ class DialogueWatcher:
85
285
  print("[MCP Watcher] Stopped", file=sys.stderr)
86
286
 
87
287
  def _get_session_sizes(self) -> Dict[str, int]:
88
- """Get current sizes of all active session files."""
288
+ """Get current sizes of all active session files across all projects."""
89
289
  sizes = {}
90
290
  try:
91
- session_files = find_all_active_sessions(
92
- self.config,
93
- project_path=self.project_path if self.project_path and self.project_path.exists() else None,
94
- )
291
+ # Always use multi-project mode (project_path=None)
292
+ session_files = find_all_active_sessions(self.config, project_path=None)
95
293
  for session_file in session_files:
96
294
  if session_file.exists():
97
295
  sizes[str(session_file)] = session_file.stat().st_size
98
- logger.debug(f"Tracked {len(sizes)} session file(s)")
296
+ logger.debug(f"Tracked {len(sizes)} session file(s) across all projects")
99
297
  except Exception as e:
100
298
  logger.error(f"Error getting session sizes: {e}", exc_info=True)
101
299
  print(f"[MCP Watcher] Error getting session sizes: {e}", file=sys.stderr)
102
300
  return sizes
103
301
 
104
302
  def _get_stop_reason_counts(self) -> Dict[str, int]:
105
- """Get current count of turn completion markers in all active session files."""
303
+ """Get current count of turn completion markers in all active session files across all projects."""
106
304
  counts = {}
107
305
  try:
108
- session_files = find_all_active_sessions(
109
- self.config,
110
- project_path=self.project_path if self.project_path and self.project_path.exists() else None,
111
- )
306
+ # Always use multi-project mode (project_path=None)
307
+ session_files = find_all_active_sessions(self.config, project_path=None)
112
308
  for session_file in session_files:
113
309
  if session_file.exists():
114
310
  counts[str(session_file)] = self._count_complete_turns(session_file)
@@ -116,6 +312,105 @@ class DialogueWatcher:
116
312
  print(f"[MCP Watcher] Error getting turn counts: {e}", file=sys.stderr)
117
313
  return counts
118
314
 
315
+ def _get_file_hash(self, session_file: Path) -> Optional[str]:
316
+ """Compute MD5 hash of session file for duplicate detection."""
317
+ try:
318
+ with open(session_file, 'rb') as f:
319
+ md5_hash = hashlib.md5()
320
+ while chunk := f.read(8192):
321
+ md5_hash.update(chunk)
322
+ return md5_hash.hexdigest()
323
+ except Exception as e:
324
+ logger.warning(f"Failed to compute hash for {session_file.name}: {e}")
325
+ return None
326
+
327
+ async def _check_idle_sessions_for_final_commit(self):
328
+ """Check for idle sessions and trigger final commits if needed."""
329
+ try:
330
+ current_time = time.time()
331
+ # Always use multi-project mode (project_path=None)
332
+ session_files = find_all_active_sessions(self.config, project_path=None)
333
+
334
+ for session_file in session_files:
335
+ if not session_file.exists():
336
+ continue
337
+
338
+ session_path = str(session_file)
339
+
340
+ try:
341
+ # Get current mtime
342
+ mtime = session_file.stat().st_mtime
343
+
344
+ # Initialize tracking if first time seeing this session
345
+ if session_path not in self.last_session_mtimes:
346
+ self.last_session_mtimes[session_path] = mtime
347
+ continue
348
+
349
+ last_mtime = self.last_session_mtimes[session_path]
350
+
351
+ # If file was modified, update mtime and skip
352
+ if mtime > last_mtime:
353
+ self.last_session_mtimes[session_path] = mtime
354
+ # Reset final commit attempt time when file changes
355
+ self.last_final_commit_times.pop(session_path, None)
356
+ continue
357
+
358
+ # Check if session has been idle long enough
359
+ time_since_change = current_time - last_mtime
360
+ if time_since_change >= self.final_commit_idle_timeout:
361
+ # Check if we've already tried final commit recently
362
+ last_attempt = self.last_final_commit_times.get(session_path, 0)
363
+ if current_time - last_attempt < 60: # Don't try more than once per minute
364
+ continue
365
+
366
+ # Check if there are any new turns since last commit
367
+ current_count = self._count_complete_turns(session_file)
368
+ last_count = self.last_stop_reason_counts.get(session_path, 0)
369
+
370
+ if current_count <= last_count:
371
+ # No new content since last commit, skip
372
+ logger.debug(f"No new turns in {session_file.name} (count: {current_count}), skipping idle commit")
373
+ # Mark as attempted to avoid checking again soon
374
+ self.last_final_commit_times[session_path] = current_time
375
+ continue
376
+
377
+ # Try to trigger final commit
378
+ logger.info(f"Session {session_file.name} idle for {time_since_change:.0f}s with {current_count - last_count} new turn(s), attempting final commit")
379
+ print(f"[MCP Watcher] Session idle for {time_since_change:.0f}s - triggering final commit", file=sys.stderr)
380
+
381
+ project_path = self._extract_project_path(session_file)
382
+ if project_path:
383
+ # Check cooldown
384
+ project_key = str(project_path)
385
+ last_commit_time = self.last_commit_times.get(project_key)
386
+
387
+ if not last_commit_time or (current_time - last_commit_time) >= self.min_commit_interval:
388
+ # Trigger commit via executor to avoid blocking
389
+ result = await asyncio.get_event_loop().run_in_executor(
390
+ None,
391
+ self._run_realign_commit,
392
+ project_path
393
+ )
394
+
395
+ if result:
396
+ logger.info(f"✓ Final commit completed for {project_path.name}")
397
+ print(f"[MCP Watcher] ✓ Final commit completed for {project_path.name}", file=sys.stderr)
398
+ self.last_commit_times[project_key] = current_time
399
+
400
+ # Update turn count baseline after successful commit
401
+ self.last_stop_reason_counts[session_path] = current_count
402
+ logger.debug(f"Updated turn count baseline for {session_file.name}: {current_count}")
403
+
404
+ # Record attempt time
405
+ self.last_final_commit_times[session_path] = current_time
406
+
407
+ except Exception as e:
408
+ logger.warning(f"Error checking idle status for {session_path}: {e}")
409
+ continue
410
+
411
+ except Exception as e:
412
+ logger.error(f"Error in idle session check: {e}", exc_info=True)
413
+
119
414
  def _extract_project_path(self, session_file: Path) -> Optional[Path]:
120
415
  """
121
416
  Extract project path (cwd) from session file.
@@ -254,13 +549,15 @@ class DialogueWatcher:
254
549
  """
255
550
  Count complete dialogue turns for Claude Code sessions.
256
551
 
257
- Uses stop_reason in ('end_turn', 'tool_use') as the marker, with message ID deduplication
258
- to handle Claude Code's incremental writes (thinking first, then full content).
259
-
260
- Note: In Claude Code, most responses have stop_reason='tool_use' because the AI
261
- frequently uses tools. We count both 'end_turn' and 'tool_use' as complete turns.
552
+ Strategy:
553
+ - Count unique user messages by timestamp (not UUID)
554
+ - Claude Code 2.0 sometimes splits one user message into multiple entries
555
+ with different UUIDs but the same timestamp
556
+ - Excludes tool results (type="tool_result")
557
+ - This represents the number of user requests that have been sent
262
558
  """
263
- unique_message_ids = set()
559
+ user_message_timestamps = set()
560
+
264
561
  try:
265
562
  with open(session_file, 'r', encoding='utf-8') as f:
266
563
  for line in f:
@@ -269,25 +566,39 @@ class DialogueWatcher:
269
566
  continue
270
567
  try:
271
568
  data = json.loads(line)
272
- message = data.get("message", {})
273
- stop_reason = message.get("stop_reason")
274
- # Count both end_turn and tool_use as complete turns
275
- # tool_use indicates the AI used tools (most common in Claude Code)
276
- # end_turn indicates a simple response without tools
277
- if stop_reason in ("end_turn", "tool_use"):
278
- # Deduplicate by message ID to handle incremental writes
279
- msg_id = message.get("id")
280
- if msg_id:
281
- unique_message_ids.add(msg_id)
282
- else:
283
- # If no message ID, count it anyway (shouldn't happen normally)
284
- unique_message_ids.add(f"no_id_{len(unique_message_ids)}")
569
+ msg_type = data.get("type")
570
+
571
+ # Count user messages (excluding tool results)
572
+ if msg_type == "user":
573
+ message = data.get("message", {})
574
+ content = message.get("content", [])
575
+
576
+ # Check if this is a tool result (has tool_use_id)
577
+ is_tool_result = False
578
+ if isinstance(content, list):
579
+ for item in content:
580
+ if isinstance(item, dict) and item.get("type") == "tool_result":
581
+ is_tool_result = True
582
+ break
583
+
584
+ # Only count non-tool-result user messages
585
+ # Use timestamp instead of UUID to handle split messages
586
+ if not is_tool_result:
587
+ timestamp = data.get("timestamp")
588
+ if timestamp:
589
+ user_message_timestamps.add(timestamp)
590
+
285
591
  except json.JSONDecodeError:
286
592
  continue
593
+
594
+ logger.debug(f"Counted {len(user_message_timestamps)} user messages in {session_file.name}")
595
+ return len(user_message_timestamps)
596
+
287
597
  except Exception as e:
288
598
  logger.error(f"Error counting Claude turns in {session_file}: {e}", exc_info=True)
289
599
  print(f"[MCP Watcher] Error counting Claude turns in {session_file}: {e}", file=sys.stderr)
290
- return len(unique_message_ids)
600
+ return 0
601
+
291
602
 
292
603
  def _count_codex_turns(self, session_file: Path) -> int:
293
604
  """
@@ -401,13 +712,16 @@ class DialogueWatcher:
401
712
  Check if the session file has at least 1 new complete dialogue turn since last check.
402
713
 
403
714
  Supports both Claude Code and Codex formats:
404
- - Claude Code: Uses stop_reason='end_turn' (deduplicated by message ID)
715
+ - Claude Code: Count user messages by timestamp
405
716
  - Codex: Uses token_count events (no deduplication needed)
406
717
 
407
718
  Each complete dialogue round consists of:
408
719
  1. User message/request
409
720
  2. Assistant response
410
721
  3. Turn completion marker (format-specific)
722
+
723
+ Note: This method does NOT update last_stop_reason_counts.
724
+ The count will be updated in _do_commit() after successful commit.
411
725
  """
412
726
  try:
413
727
  session_path = str(session_file)
@@ -422,8 +736,8 @@ class DialogueWatcher:
422
736
  if new_turns >= 1:
423
737
  logger.info(f"Detected {new_turns} new turn(s) in {session_file.name} ({session_type})")
424
738
  print(f"[MCP Watcher] Detected {new_turns} new turn(s) in {session_file.name} ({session_type})", file=sys.stderr)
425
- # Update baseline immediately to avoid double-counting
426
- self.last_stop_reason_counts[session_path] = current_count
739
+ # DO NOT update last_stop_reason_counts here!
740
+ # It will be updated in _do_commit() after successful commit
427
741
  return True
428
742
 
429
743
  return False
@@ -435,22 +749,18 @@ class DialogueWatcher:
435
749
 
436
750
  async def _do_commit(self, project_path: Path, session_file: Path):
437
751
  """
438
- Perform the actual commit for a specific project.
752
+ Async wrapper for committing a turn to the shadow git repository.
439
753
 
440
754
  Args:
441
755
  project_path: Path to the project directory
442
756
  session_file: Session file that triggered the commit
443
757
  """
444
758
  try:
445
- # Use empty message to let prepare-commit-msg hook generate it with LLM summary
446
- message = ""
447
-
448
- # Use realign commit command
759
+ # Delegate to synchronous commit method (runs in executor to avoid blocking)
449
760
  result = await asyncio.get_event_loop().run_in_executor(
450
761
  None,
451
762
  self._run_realign_commit,
452
- project_path,
453
- message
763
+ project_path
454
764
  )
455
765
 
456
766
  if result:
@@ -458,7 +768,13 @@ class DialogueWatcher:
458
768
  print(f"[MCP Watcher] ✓ Auto-committed to {project_path.name}", file=sys.stderr)
459
769
  # Update last commit time for this project
460
770
  self.last_commit_times[str(project_path)] = time.time()
461
- # Baseline counts already updated in _check_if_turn_complete()
771
+
772
+ # Update turn count baseline ONLY after successful commit
773
+ # This prevents double-counting if commit fails
774
+ session_path = str(session_file)
775
+ current_count = self._count_complete_turns(session_file)
776
+ self.last_stop_reason_counts[session_path] = current_count
777
+ logger.debug(f"Updated turn count baseline for {session_file.name}: {current_count}")
462
778
  else:
463
779
  logger.warning(f"Commit failed for {project_path.name}")
464
780
 
@@ -466,20 +782,22 @@ class DialogueWatcher:
466
782
  logger.error(f"Error during commit for {project_path}: {e}", exc_info=True)
467
783
  print(f"[MCP Watcher] Error during commit for {project_path}: {e}", file=sys.stderr)
468
784
 
469
- def _run_realign_commit(self, project_path: Path, message: str) -> bool:
785
+ def _run_realign_commit(self, project_path: Path) -> bool:
470
786
  """
471
- Run realign commit command with file locking to prevent race conditions.
787
+ Execute commit with file locking to prevent race conditions.
472
788
 
473
789
  Args:
474
790
  project_path: Path to the project directory
475
- message: Commit message
476
791
 
477
- The command will:
792
+ Returns:
793
+ True if commit was created, False otherwise
794
+
795
+ The method will:
478
796
  - Acquire a file lock to prevent concurrent commits from multiple watchers
479
- - Auto-initialize git and ReAlign if needed
480
- - Check for session changes (modified within last 5 minutes)
481
- - Create empty commit if only sessions changed
482
- - Return True if commit was created, False otherwise
797
+ - Auto-initialize shadow git repository if needed
798
+ - Generate LLM-powered commit message
799
+ - Mirror project files to shadow repository
800
+ - Create commit with semantic message
483
801
  """
484
802
  from .file_lock import commit_lock
485
803
 
@@ -490,7 +808,7 @@ class DialogueWatcher:
490
808
  print(f"[MCP Watcher] Another watcher is committing to {project_path.name}, skipping", file=sys.stderr)
491
809
  return False
492
810
 
493
- return self._do_commit_locked(project_path, message)
811
+ return self._do_commit_locked(project_path)
494
812
 
495
813
  except TimeoutError:
496
814
  print("[MCP Watcher] Could not acquire commit lock (timeout)", file=sys.stderr)
@@ -499,119 +817,659 @@ class DialogueWatcher:
499
817
  print(f"[MCP Watcher] Commit error: {e}", file=sys.stderr)
500
818
  return False
501
819
 
502
- def _do_commit_locked(self, project_path: Path, message: str) -> bool:
820
+ def _do_commit_locked(self, project_path: Path) -> bool:
503
821
  """
504
- Perform the actual commit operation (must be called with lock held).
822
+ Perform the actual commit operation using ReAlignGitTracker.
823
+
824
+ This method:
825
+ 1. Finds the latest session file for the project
826
+ 2. Redacts sensitive information from the session
827
+ 3. Generates LLM-powered semantic commit message
828
+ 4. Mirrors project files to shadow repository
829
+ 5. Creates commit with structured metadata
505
830
 
506
831
  Args:
507
832
  project_path: Path to the project directory
508
- message: Commit message
509
833
 
510
834
  Returns:
511
835
  True if commit was created, False otherwise
512
836
  """
513
837
  try:
514
- # Check if ReAlign is initialized
515
- realign_dir = project_path / ".realign"
516
-
517
- if not realign_dir.exists():
518
- print(f"[MCP Watcher] ReAlign not initialized in {project_path.name}, initializing...", file=sys.stderr)
519
-
520
- # Auto-initialize ReAlign (which also inits git if needed)
521
- init_result = subprocess.run(
522
- ["python3", "-m", "realign.cli", "init", "--yes"],
523
- cwd=project_path,
524
- capture_output=True,
525
- text=True,
526
- timeout=30,
527
- )
838
+ # Initialize git tracker if not already done
839
+ if not self.git_tracker or self.git_tracker.project_root != project_path:
840
+ self.git_tracker = ReAlignGitTracker(project_path)
841
+ if not self.git_tracker.is_initialized():
842
+ self.git_tracker.init_repo()
843
+
844
+ # Find the latest session file for this project
845
+ session_file = self._find_latest_session(project_path)
846
+ if not session_file:
847
+ logger.warning("No session file found for commit")
848
+ return False
528
849
 
529
- if init_result.returncode != 0:
530
- print(f"[MCP Watcher] Failed to initialize ReAlign in {project_path.name}: {init_result.stderr}", file=sys.stderr)
850
+ # Redact sensitive information from session file before committing
851
+ session_file = self._handle_session_redaction(session_file, project_path)
852
+
853
+ # Extract session information
854
+ session_id = session_file.stem # e.g., "minhao_claude_abc123"
855
+ turn_number = self._get_current_turn_number(session_file)
856
+ user_message = self._extract_last_user_message(session_file)
857
+ modified_files = self._extract_modified_files(session_file)
858
+
859
+ # Check if we've already committed this exact turn content to avoid duplicates
860
+ # Compute hash of current turn content (not the whole session file)
861
+ turn_content = self._extract_current_turn_content(session_file)
862
+ turn_hash = None
863
+ if turn_content:
864
+ turn_hash = hashlib.md5(turn_content.encode('utf-8')).hexdigest()
865
+
866
+ # Get hash registry for this project
867
+ hash_registry = self._get_hash_registry(project_path)
868
+ last_hash = hash_registry.get_last_hash(session_file)
869
+
870
+ if last_hash == turn_hash:
871
+ logger.info(f"Turn content unchanged since last commit (hash: {turn_hash[:8]}), skipping duplicate")
872
+ print(f"[MCP Watcher] ⓘ Turn content unchanged, skipping duplicate commit", file=sys.stderr)
531
873
  return False
532
874
 
533
- print(f"[MCP Watcher] ReAlign initialized successfully in {project_path.name}", file=sys.stderr)
875
+ logger.debug(f"Turn content hash: {turn_hash[:8]}")
876
+
877
+ # Generate LLM summary (required, no fallback)
878
+ llm_result = self._generate_llm_summary(session_file)
879
+ if not llm_result:
880
+ logger.error("LLM summary generation failed - cannot commit without summary")
881
+ print("[MCP Watcher] ✗ LLM summary generation failed - cannot commit", file=sys.stderr)
882
+ return False
883
+
884
+ title, model_name, description = llm_result
534
885
 
535
- # Use direct function call instead of subprocess
536
- # This allows environment variables (like OPENAI_API_KEY) to be inherited
537
- from realign.commands.commit import commit_internal
886
+ # Validate title - reject if it's empty, too short, or looks like truncated JSON
887
+ if not title or len(title.strip()) < 2:
888
+ logger.error(f"Invalid LLM title generated: '{title}' - skipping commit")
889
+ print(f"[MCP Watcher] ✗ Invalid commit message title: '{title}'", file=sys.stderr)
890
+ return False
891
+
892
+ if title.strip() in ["{", "}", "[", "]"] or title.startswith("{") and not title.endswith("}"):
893
+ logger.error(f"Title appears to be truncated JSON: '{title}' - skipping commit")
894
+ print(f"[MCP Watcher] ✗ Truncated JSON in title: '{title}'", file=sys.stderr)
895
+ return False
538
896
 
539
- success, commit_hash, status_msg = commit_internal(
540
- repo_root=project_path,
541
- message=message,
542
- all_files=True,
543
- amend=False,
544
- no_edit=False,
897
+ logger.info(f"Committing turn {turn_number} for session {session_id}")
898
+ logger.debug(f"Modified files: {[str(f) for f in modified_files]}")
899
+
900
+ # Commit the turn to .realign/.git
901
+ commit_hash = self.git_tracker.commit_turn(
902
+ session_id=session_id,
903
+ turn_number=turn_number,
904
+ user_message=user_message,
905
+ llm_title=title,
906
+ llm_description=description,
907
+ model_name=model_name,
908
+ modified_files=modified_files,
909
+ session_file=session_file
545
910
  )
546
911
 
547
- if success:
548
- print(f"[MCP Watcher] {status_msg}", file=sys.stderr)
912
+ if commit_hash:
913
+ logger.info(f" Committed turn {turn_number} to .realign/.git: {commit_hash[:8]}")
914
+ print(f"[MCP Watcher] ✓ Committed turn {turn_number} to .realign/.git: {commit_hash[:8]}", file=sys.stderr)
915
+
916
+ # Store hash in persistent registry (if we computed one)
917
+ if turn_hash:
918
+ hash_registry = self._get_hash_registry(project_path)
919
+ hash_registry.set_last_hash(
920
+ session_file=session_file,
921
+ hash_value=turn_hash,
922
+ commit_sha=commit_hash,
923
+ turn_number=turn_number
924
+ )
925
+
549
926
  return True
550
- elif "No changes detected" in status_msg:
551
- # No changes - this is expected, not an error
552
- return False
553
927
  else:
554
- # Log the error for debugging
555
- print(f"[MCP Watcher] Commit failed for {project_path.name}: {status_msg}", file=sys.stderr)
928
+ logger.info("No changes to commit")
556
929
  return False
557
930
 
558
931
  except Exception as e:
932
+ logger.error(f"Commit error for {project_path.name}: {e}", exc_info=True)
559
933
  print(f"[MCP Watcher] Commit error for {project_path.name}: {e}", file=sys.stderr)
560
934
  return False
561
935
 
562
- def _detect_project_path(self) -> Optional[Path]:
936
+ def _get_hash_registry(self, project_path: Path) -> HashRegistry:
563
937
  """
564
- Attempt to detect the git repository root for the current process.
565
- Returns None if not inside a git repo.
938
+ Get or create hash registry for a project (lazy initialization).
939
+
940
+ Args:
941
+ project_path: Path to the user's project root
942
+
943
+ Returns:
944
+ HashRegistry instance for this project
566
945
  """
946
+ key = str(project_path)
947
+ if key not in self.hash_registries:
948
+ # Get .aline directory for this project
949
+ from realign import get_realign_dir
950
+ realign_dir = get_realign_dir(project_path)
951
+ self.hash_registries[key] = HashRegistry(realign_dir)
952
+ logger.debug(f"Initialized HashRegistry for {project_path.name}")
953
+ return self.hash_registries[key]
954
+
955
+ def _find_latest_session(self, project_path: Path) -> Optional[Path]:
956
+ """Find the most recently modified session file for this project."""
567
957
  try:
568
- result = subprocess.run(
569
- ["git", "rev-parse", "--show-toplevel"],
570
- capture_output=True,
571
- text=True,
572
- check=True,
958
+ session_files = find_all_active_sessions(self.config, project_path)
959
+ if not session_files:
960
+ return None
961
+
962
+ # Return most recently modified session
963
+ return max(session_files, key=lambda f: f.stat().st_mtime)
964
+ except Exception as e:
965
+ logger.error(f"Failed to find latest session: {e}")
966
+ return None
967
+
968
+ def _handle_session_redaction(self, session_file: Path, project_path: Path) -> Path:
969
+ """Check and redact sensitive information from session file.
970
+
971
+ Args:
972
+ session_file: Path to the session file
973
+ project_path: Path to the project directory
974
+
975
+ Returns:
976
+ Path to the (possibly modified) session file
977
+ """
978
+ if not self.config.redact_on_match:
979
+ return session_file
980
+
981
+ try:
982
+ from .redactor import check_and_redact_session, save_original_session
983
+
984
+ content = session_file.read_text(encoding='utf-8')
985
+ redacted_content, has_secrets, secrets = check_and_redact_session(
986
+ content, redact_mode="auto"
573
987
  )
574
- repo_path = Path(result.stdout.strip())
575
- if repo_path.exists():
576
- return repo_path
577
- except subprocess.CalledProcessError:
578
- current_dir = Path.cwd()
579
- if (current_dir / ".git").exists():
580
- return current_dir
988
+
989
+ if has_secrets:
990
+ logger.warning(f"Secrets detected: {len(secrets)} secret(s)")
991
+ from realign import get_realign_dir
992
+ realign_dir = get_realign_dir(project_path)
993
+ backup_path = save_original_session(session_file, realign_dir)
994
+ session_file.write_text(redacted_content, encoding='utf-8')
995
+ logger.info(f"Session redacted, original saved to {backup_path}")
996
+
997
+ return session_file
998
+
581
999
  except Exception as e:
582
- print(f"[MCP Watcher] Could not detect project path: {e}", file=sys.stderr)
583
- return None
1000
+ logger.error(f"Failed to redact session: {e}")
1001
+ # Return original session file on error
1002
+ return session_file
584
1003
 
1004
+ def _get_current_turn_number(self, session_file: Path) -> int:
1005
+ """Get the current turn number from a session file."""
1006
+ # Count the number of complete turns in the session
1007
+ return self._count_complete_turns(session_file)
585
1008
 
586
- # Global watcher instance
587
- _watcher: Optional[DialogueWatcher] = None
1009
+ def _extract_last_user_message(self, session_file: Path) -> str:
1010
+ """
1011
+ Extract the user message for the current turn being committed.
588
1012
 
1013
+ This is called AFTER a new user message arrives (which triggers the commit),
1014
+ so we need to extract the SECOND-TO-LAST valid user message, not the last one.
1015
+ The last user message belongs to the next turn that hasn't been processed yet.
1016
+ """
1017
+ from .hooks import clean_user_message
589
1018
 
590
- async def start_watcher():
591
- """
592
- Start the global session watcher for auto-commit on user request completion.
1019
+ try:
1020
+ user_messages = []
593
1021
 
594
- No longer requires a repo_path - the watcher will dynamically extract project paths
595
- from session files and commit to the appropriate repositories.
596
- """
597
- global _watcher
1022
+ with open(session_file, 'r', encoding='utf-8') as f:
1023
+ for line in f:
1024
+ try:
1025
+ data = json.loads(line.strip())
1026
+
1027
+ # Check for user message
1028
+ if data.get("type") == "user":
1029
+ message = data.get("message", {})
1030
+ content = message.get("content", "")
1031
+
1032
+ extracted_text = None
1033
+
1034
+ if isinstance(content, str):
1035
+ extracted_text = content
1036
+ elif isinstance(content, list):
1037
+ # Extract text from content blocks
1038
+ text_parts = []
1039
+ for item in content:
1040
+ if isinstance(item, dict) and item.get("type") == "text":
1041
+ text_parts.append(item.get("text", ""))
1042
+
1043
+ # Only add if we found actual text content
1044
+ # Skip entries that only contain tool_result items
1045
+ if text_parts:
1046
+ extracted_text = "\n".join(text_parts)
1047
+
1048
+ if extracted_text:
1049
+ # Clean the message (remove IDE tags, etc.)
1050
+ cleaned_text = clean_user_message(extracted_text)
1051
+
1052
+ # Skip empty messages after cleaning
1053
+ if not cleaned_text.strip():
1054
+ continue
1055
+
1056
+ # Skip continuation messages
1057
+ if cleaned_text.startswith("This session is being continued"):
1058
+ continue
1059
+
1060
+ user_messages.append(cleaned_text)
1061
+
1062
+ except json.JSONDecodeError:
1063
+ continue
1064
+
1065
+ # Return second-to-last message if available, otherwise last message
1066
+ # This is because the commit is triggered by a new user message,
1067
+ # so the last message is for the NEXT turn, not the current one being committed
1068
+ if len(user_messages) >= 2:
1069
+ return user_messages[-2]
1070
+ elif len(user_messages) == 1:
1071
+ return user_messages[0]
1072
+ else:
1073
+ return "No user message found"
1074
+
1075
+ except Exception as e:
1076
+ logger.error(f"Failed to extract user message: {e}")
1077
+ return "Error extracting message"
1078
+
1079
+ def _extract_assistant_summary(self, session_file: Path) -> str:
1080
+ """Extract a summary of the assistant's response from session file."""
1081
+ try:
1082
+ # Extract last assistant response text
1083
+ assistant_text = ""
1084
+
1085
+ with open(session_file, 'r', encoding='utf-8') as f:
1086
+ for line in f:
1087
+ try:
1088
+ data = json.loads(line.strip())
1089
+
1090
+ if data.get("type") == "assistant":
1091
+ message = data.get("message", {})
1092
+ content = message.get("content", [])
1093
+
1094
+ if isinstance(content, list):
1095
+ for item in content:
1096
+ if isinstance(item, dict) and item.get("type") == "text":
1097
+ assistant_text = item.get("text", "")
1098
+
1099
+ except json.JSONDecodeError:
1100
+ continue
1101
+
1102
+ # Truncate to reasonable length
1103
+ if assistant_text:
1104
+ # Take first 300 characters as summary
1105
+ summary = assistant_text[:300]
1106
+ if len(assistant_text) > 300:
1107
+ summary += "..."
1108
+ return summary
1109
+ else:
1110
+ return "Assistant response"
1111
+
1112
+ except Exception as e:
1113
+ logger.error(f"Failed to extract assistant summary: {e}")
1114
+ return "Error extracting summary"
598
1115
 
599
- if _watcher and _watcher.running:
600
- logger.info("Watcher already running, skipping start")
601
- print("[MCP Watcher] Already running", file=sys.stderr)
602
- return
1116
+ def _extract_current_turn_content(self, session_file: Path) -> str:
1117
+ """
1118
+ Extract only the content for the current turn being committed.
603
1119
 
604
- logger.info("Initializing global watcher instance")
605
- _watcher = DialogueWatcher()
606
- logger.info("Starting watcher task")
607
- asyncio.create_task(_watcher.start())
608
- logger.info("Watcher task created")
1120
+ Since commit is triggered by a new user message (Turn N+1), we need to extract
1121
+ the content from the PREVIOUS turn (Turn N), which includes:
1122
+ - The second-to-last user message
1123
+ - All assistant responses after that user message
1124
+ - But BEFORE the last user message (which belongs to Turn N+1)
609
1125
 
1126
+ Returns:
1127
+ JSONL content for the current turn only
1128
+ """
1129
+ try:
1130
+ lines = []
1131
+ user_message_indices = []
610
1132
 
611
- async def stop_watcher():
612
- """Stop the global session watcher."""
613
- global _watcher
1133
+ # Read all lines and track user message positions
1134
+ with open(session_file, 'r', encoding='utf-8') as f:
1135
+ for idx, line in enumerate(f):
1136
+ lines.append(line)
1137
+ try:
1138
+ data = json.loads(line.strip())
1139
+ if data.get("type") == "user":
1140
+ message = data.get("message", {})
1141
+ content = message.get("content", "")
1142
+
1143
+ # Check if this is a real user message (not tool result, IDE notification, etc.)
1144
+ is_real_message = False
1145
+ if isinstance(content, str):
1146
+ if not content.startswith("This session is being continued") and \
1147
+ not content.startswith("<ide_opened_file>"):
1148
+ is_real_message = True
1149
+ elif isinstance(content, list):
1150
+ text_parts = [item.get("text", "") for item in content
1151
+ if isinstance(item, dict) and item.get("type") == "text"]
1152
+ if text_parts:
1153
+ combined_text = "\n".join(text_parts)
1154
+ if not combined_text.startswith("This session is being continued") and \
1155
+ not combined_text.startswith("<ide_opened_file>"):
1156
+ is_real_message = True
1157
+
1158
+ if is_real_message:
1159
+ user_message_indices.append(idx)
1160
+ except json.JSONDecodeError:
1161
+ continue
1162
+
1163
+ # Determine the range for current turn
1164
+ if len(user_message_indices) >= 2:
1165
+ # Extract from second-to-last user message up to (but not including) last user message
1166
+ start_idx = user_message_indices[-2]
1167
+ end_idx = user_message_indices[-1]
1168
+ turn_lines = lines[start_idx:end_idx]
1169
+ elif len(user_message_indices) == 1:
1170
+ # First turn: from first user message to end
1171
+ start_idx = user_message_indices[0]
1172
+ turn_lines = lines[start_idx:]
1173
+ else:
1174
+ # No valid user messages
1175
+ return ""
1176
+
1177
+ return "".join(turn_lines)
1178
+
1179
+ except Exception as e:
1180
+ logger.error(f"Failed to extract current turn content: {e}", exc_info=True)
1181
+ return ""
1182
+
1183
+ def _generate_llm_summary(self, session_file: Path) -> Optional[tuple[str, str, str]]:
1184
+ """
1185
+ Generate LLM-powered summary for the CURRENT TURN only.
1186
+
1187
+ Priority:
1188
+ 1. MCP Sampling API (if enabled and available)
1189
+ 2. Direct Claude/OpenAI API calls (existing fallback)
1190
+
1191
+ Returns:
1192
+ Tuple of (title, model_name, description), or None if LLM is disabled or fails
1193
+ """
1194
+ try:
1195
+ if not self.config.use_LLM:
1196
+ logger.debug("LLM summary disabled in config")
1197
+ return None
1198
+
1199
+ # Extract only the current turn's content
1200
+ turn_content = self._extract_current_turn_content(session_file)
1201
+ if not turn_content:
1202
+ logger.warning("No content found for current turn")
1203
+ return None
1204
+
1205
+ # NEW: Try MCP Sampling first (if enabled)
1206
+ if self.config.use_mcp_sampling:
1207
+ logger.info("Attempting LLM summary via MCP Sampling")
1208
+ print("[MCP Watcher] → Requesting summary via MCP Sampling (user approval required)...", file=sys.stderr)
1209
+
1210
+ try:
1211
+ # Import here to avoid circular dependency
1212
+ from .mcp_server import request_llm_summary_via_sampling
1213
+
1214
+ # Get current event loop (we're in async context via watcher)
1215
+ import asyncio
1216
+ loop = asyncio.get_event_loop()
1217
+
1218
+ # Run the sampling request with 30s timeout
1219
+ result = loop.run_until_complete(
1220
+ asyncio.wait_for(
1221
+ request_llm_summary_via_sampling(turn_content),
1222
+ timeout=30.0
1223
+ )
1224
+ )
1225
+
1226
+ if result:
1227
+ title, model, description = result
1228
+ logger.info(f"✓ MCP Sampling success using {model}")
1229
+ print(f"[MCP Watcher] ✓ Generated summary via MCP Sampling ({model})", file=sys.stderr)
1230
+ return result
1231
+ else:
1232
+ logger.warning("MCP Sampling returned None (not in MCP mode or user denied)")
1233
+ print("[MCP Watcher] ⚠ MCP Sampling unavailable, falling back to direct API", file=sys.stderr)
1234
+
1235
+ except asyncio.TimeoutError:
1236
+ logger.warning("MCP Sampling timeout (30s), falling back to direct API")
1237
+ print("[MCP Watcher] ⚠ MCP Sampling timeout, falling back to direct API", file=sys.stderr)
1238
+ except Exception as e:
1239
+ logger.warning(f"MCP Sampling error: {e}, falling back to direct API")
1240
+ print(f"[MCP Watcher] ⚠ MCP Sampling error: {e}", file=sys.stderr)
1241
+
1242
+ # EXISTING: Fallback to direct API calls
1243
+ from .hooks import generate_summary_with_llm
1244
+
1245
+ title, model_name, description = generate_summary_with_llm(
1246
+ turn_content,
1247
+ max_chars=500,
1248
+ provider=self.config.llm_provider
1249
+ )
1250
+
1251
+ if title:
1252
+ if model_name:
1253
+ logger.info(f"Generated LLM summary using {model_name}")
1254
+ print(f"[MCP Watcher] ✓ Generated LLM summary using {model_name}", file=sys.stderr)
1255
+ return (title, model_name or "unknown", description or "")
1256
+ else:
1257
+ logger.warning("LLM summary generation returned empty result")
1258
+ return None
1259
+
1260
+ except Exception as e:
1261
+ logger.error(f"Failed to generate LLM summary: {e}", exc_info=True)
1262
+ print(f"[MCP Watcher] Failed to generate LLM summary: {e}", file=sys.stderr)
1263
+ return None
1264
+
1265
+ def _extract_modified_files(self, session_file: Path) -> list[Path]:
1266
+ """
1267
+ Extract all project files for mirroring.
1268
+
1269
+ This method uses the shared mirror_utils.collect_project_files() logic
1270
+ to find all files that should be mirrored to the shadow git repository.
1271
+ It respects .gitignore patterns and excludes .git directory.
1272
+
1273
+ Args:
1274
+ session_file: Path to the session file
1275
+
1276
+ Returns:
1277
+ List of absolute paths to all project files
1278
+ """
1279
+ try:
1280
+ # Get project path
1281
+ project_path = self._extract_project_path(session_file)
1282
+ if not project_path:
1283
+ logger.warning("Could not determine project path")
1284
+ return []
1285
+
1286
+ # Use shared logic to collect all project files
1287
+ all_files = collect_project_files(project_path, logger=logger)
1288
+ return all_files
1289
+
1290
+ except Exception as e:
1291
+ logger.error(f"Failed to extract project files: {e}", exc_info=True)
1292
+ return []
1293
+
1294
+ def _get_session_start_time(self, session_file: Path) -> Optional[float]:
1295
+ """
1296
+ Get the session start time from the first message timestamp.
1297
+
1298
+ Returns:
1299
+ Unix timestamp (float) or None if not found
1300
+ """
1301
+ try:
1302
+ with open(session_file, 'r', encoding='utf-8') as f:
1303
+ for line in f:
1304
+ try:
1305
+ data = json.loads(line.strip())
1306
+
1307
+ # Look for timestamp field in various formats
1308
+ timestamp_str = data.get("timestamp")
1309
+ if timestamp_str:
1310
+ # Parse ISO 8601 timestamp
1311
+ from datetime import datetime
1312
+ dt = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
1313
+ return dt.timestamp()
1314
+
1315
+ except (json.JSONDecodeError, ValueError, KeyError):
1316
+ continue
1317
+
1318
+ # Fallback: use session file's creation time
1319
+ return session_file.stat().st_ctime
1320
+
1321
+ except Exception as e:
1322
+ logger.error(f"Failed to get session start time: {e}")
1323
+ return None
1324
+
1325
+ async def auto_init_projects(self):
1326
+ """
1327
+ Discover and auto-initialize uninitialized projects from ~/.claude/projects/.
1328
+
1329
+ This method:
1330
+ 1. Scans all Claude Code project directories
1331
+ 2. Decodes project paths
1332
+ 3. Checks blacklist
1333
+ 4. Checks if already initialized
1334
+ 5. Attempts initialization for new projects
1335
+ 6. Logs all operations (no user notifications)
1336
+ """
1337
+ try:
1338
+ claude_projects = Path.home() / ".claude" / "projects"
1339
+
1340
+ if not claude_projects.exists():
1341
+ logger.debug("No ~/.claude/projects directory found")
1342
+ return
1343
+
1344
+ logger.info("Starting auto-init scan of Claude projects")
1345
+
1346
+ initialized_count = 0
1347
+ skipped_count = 0
1348
+ failed_count = 0
1349
+
1350
+ for project_dir in claude_projects.iterdir():
1351
+ if not project_dir.is_dir():
1352
+ continue
1353
+
1354
+ # Skip system directories
1355
+ if project_dir.name.startswith('.'):
1356
+ continue
1357
+
1358
+ # Decode project path
1359
+ project_path = decode_claude_project_path(project_dir.name)
1360
+ if not project_path:
1361
+ logger.debug(f"Could not decode project: {project_dir.name}")
1362
+ skipped_count += 1
1363
+ continue
1364
+
1365
+ project_key = str(project_path)
1366
+
1367
+ # Check if already initialized - if yes, skip (even if blacklisted)
1368
+ # User has explicitly initialized this project, so respect their choice
1369
+ if is_project_initialized(project_path):
1370
+ logger.debug(f"Already initialized: {project_path}")
1371
+ skipped_count += 1
1372
+ continue
1373
+
1374
+ # Skip if previously failed auto-init
1375
+ if project_key in self.failed_init_projects:
1376
+ logger.debug(f"Skipping previously failed project: {project_path}")
1377
+ skipped_count += 1
1378
+ continue
1379
+
1380
+ # Check blacklist - only for auto-init, not for already initialized projects
1381
+ if is_path_blacklisted(project_path):
1382
+ logger.info(f"Skipping blacklisted project (auto-init): {project_path}")
1383
+ skipped_count += 1
1384
+ continue
1385
+
1386
+ # Attempt initialization
1387
+ logger.info(f"Auto-initializing project: {project_path}")
1388
+
1389
+ try:
1390
+ from .commands.init import init_repository
1391
+ from .commands.mirror import mirror_project
1392
+
1393
+ result = await asyncio.get_event_loop().run_in_executor(
1394
+ None,
1395
+ init_repository,
1396
+ str(project_path),
1397
+ False # force=False
1398
+ )
1399
+
1400
+ if result["success"]:
1401
+ logger.info(f"✓ Auto-initialized: {project_path.name}")
1402
+
1403
+ # Mirror project files after successful initialization
1404
+ logger.info(f"Mirroring project files for {project_path.name}")
1405
+ mirror_success = await asyncio.get_event_loop().run_in_executor(
1406
+ None,
1407
+ mirror_project,
1408
+ project_path,
1409
+ False # verbose=False
1410
+ )
1411
+
1412
+ if mirror_success:
1413
+ logger.info(f"✓ Mirrored project files for {project_path.name}")
1414
+
1415
+ # Create initial commit with mirrored files
1416
+ realign_dir = Path(result["realign_dir"])
1417
+ try:
1418
+ subprocess.run(
1419
+ ["git", "add", "-A"],
1420
+ cwd=realign_dir,
1421
+ check=True,
1422
+ capture_output=True
1423
+ )
1424
+ subprocess.run(
1425
+ ["git", "commit", "-m", "Initial commit: Mirror project files"],
1426
+ cwd=realign_dir,
1427
+ check=True,
1428
+ capture_output=True
1429
+ )
1430
+ logger.info(f"✓ Created initial commit for {project_path.name}")
1431
+ except subprocess.CalledProcessError as e:
1432
+ logger.warning(f"Failed to create initial commit for {project_path.name}: {e}")
1433
+ else:
1434
+ logger.warning(f"Failed to mirror project files for {project_path.name}")
1435
+
1436
+ initialized_count += 1
1437
+ else:
1438
+ logger.error(f"✗ Auto-init failed for {project_path.name}: {result.get('message')}")
1439
+ # Mark as failed, never retry
1440
+ self.failed_init_projects.add(project_key)
1441
+ failed_count += 1
1442
+
1443
+ except Exception as e:
1444
+ logger.error(f"✗ Auto-init exception for {project_path.name}: {e}", exc_info=True)
1445
+ # Mark as failed, never retry
1446
+ self.failed_init_projects.add(project_key)
1447
+ failed_count += 1
1448
+
1449
+ logger.info(f"Auto-init complete: {initialized_count} initialized, {skipped_count} skipped, {failed_count} failed")
1450
+
1451
+ except Exception as e:
1452
+ logger.error(f"Error in auto_init_projects: {e}", exc_info=True)
1453
+
1454
+ async def run_periodic_auto_init(self):
1455
+ """
1456
+ Run auto-init periodically while watcher is running.
1457
+
1458
+ Runs every self.auto_init_interval seconds (default 5s).
1459
+ """
1460
+ try:
1461
+ while self.running:
1462
+ current_time = time.time()
1463
+
1464
+ # Check if it's time to run auto-init
1465
+ if current_time - self.last_auto_init_time >= self.auto_init_interval:
1466
+ logger.info("Running periodic auto-init check")
1467
+ await self.auto_init_projects()
1468
+ self.last_auto_init_time = current_time
1469
+
1470
+ # Sleep for 1 second before checking again
1471
+ await asyncio.sleep(1.0)
1472
+
1473
+ except Exception as e:
1474
+ logger.error(f"Error in periodic auto-init: {e}", exc_info=True)
614
1475
 
615
- if _watcher:
616
- await _watcher.stop()
617
- _watcher = None