aline-ai 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,492 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Review command - Display unpushed commits with session summaries.
4
+
5
+ This allows users to review what will be pushed before making it public.
6
+ """
7
+
8
+ import re
9
+ import subprocess
10
+ import sys
11
+ from dataclasses import dataclass
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+ from typing import List, Dict, Tuple, Optional
15
+
16
+ from ..logging_config import setup_logger
17
+
18
+ logger = setup_logger('realign.commands.review', 'review.log')
19
+
20
+
21
+ @dataclass
22
+ class UnpushedCommit:
23
+ """Represents an unpushed commit with session information."""
24
+ index: int # User-visible index (1-based)
25
+ hash: str # Short commit hash
26
+ full_hash: str # Full commit hash
27
+ message: str # First line of commit message
28
+ timestamp: datetime # Commit timestamp
29
+ llm_summary: str # Extracted LLM summary
30
+ session_files: List[str] # Session files modified
31
+ session_additions: Dict[str, List[Tuple[int, int]]] # {file: [(start, end), ...]}
32
+ has_sensitive: bool = False # Whether sensitive content detected
33
+
34
+
35
+ def get_unpushed_commits(repo_root: Path) -> List[UnpushedCommit]:
36
+ """
37
+ Get all unpushed commits from the current branch.
38
+
39
+ Strategy (方案 C):
40
+ 1. Try to use upstream branch (@{u})
41
+ 2. Fallback to origin/main or origin/master
42
+ 3. If no remote exists, show all commits on current branch
43
+
44
+ Args:
45
+ repo_root: Path to repository root
46
+
47
+ Returns:
48
+ List of UnpushedCommit objects, ordered from newest to oldest
49
+ """
50
+ logger.info("Getting unpushed commits")
51
+
52
+ # Try to get upstream branch
53
+ upstream_result = subprocess.run(
54
+ ["git", "rev-parse", "--abbrev-ref", "@{u}"],
55
+ cwd=repo_root,
56
+ capture_output=True,
57
+ text=True
58
+ )
59
+
60
+ if upstream_result.returncode == 0:
61
+ base = "@{u}"
62
+ logger.debug(f"Using upstream branch: {upstream_result.stdout.strip()}")
63
+ else:
64
+ # Fallback to origin/main or origin/master
65
+ main_branch = detect_main_branch(repo_root)
66
+ base = f"origin/{main_branch}"
67
+ logger.debug(f"No upstream found, using fallback: {base}")
68
+
69
+ # Verify that the remote branch exists
70
+ verify_result = subprocess.run(
71
+ ["git", "rev-parse", "--verify", base],
72
+ cwd=repo_root,
73
+ capture_output=True,
74
+ text=True
75
+ )
76
+
77
+ if verify_result.returncode != 0:
78
+ # Remote branch doesn't exist, show all commits on current branch
79
+ base = None
80
+ logger.info("No remote branch found, will show all commits on current branch")
81
+
82
+ # Get commit list
83
+ # Format: full_hash|short_hash|subject|timestamp
84
+ if base:
85
+ log_cmd = ["git", "log", f"{base}..HEAD", "--format=%H|%h|%s|%at"]
86
+ else:
87
+ # No remote, show all commits
88
+ log_cmd = ["git", "log", "HEAD", "--format=%H|%h|%s|%at"]
89
+
90
+ log_result = subprocess.run(
91
+ log_cmd,
92
+ cwd=repo_root,
93
+ capture_output=True,
94
+ text=True
95
+ )
96
+
97
+ if log_result.returncode != 0:
98
+ logger.error(f"Failed to get commit list: {log_result.stderr}")
99
+ return []
100
+
101
+ commit_lines = [line for line in log_result.stdout.strip().split('\n') if line]
102
+
103
+ if not commit_lines:
104
+ logger.info("No unpushed commits found")
105
+ return []
106
+
107
+ logger.info(f"Found {len(commit_lines)} unpushed commit(s)")
108
+
109
+ # Parse commits
110
+ commits = []
111
+ for idx, line in enumerate(commit_lines, 1):
112
+ parts = line.split('|')
113
+ if len(parts) != 4:
114
+ logger.warning(f"Skipping malformed commit line: {line}")
115
+ continue
116
+
117
+ full_hash, short_hash, subject, timestamp_str = parts
118
+ timestamp = datetime.fromtimestamp(int(timestamp_str))
119
+
120
+ # Get full commit message
121
+ full_message = subprocess.run(
122
+ ["git", "log", "-1", "--format=%B", full_hash],
123
+ cwd=repo_root,
124
+ capture_output=True,
125
+ text=True
126
+ ).stdout
127
+
128
+ # Extract LLM summary
129
+ llm_summary = extract_llm_summary(full_message)
130
+
131
+ # Get session file additions
132
+ session_files, session_additions = get_session_additions(full_hash, repo_root)
133
+
134
+ commit = UnpushedCommit(
135
+ index=idx,
136
+ hash=short_hash,
137
+ full_hash=full_hash,
138
+ message=subject,
139
+ timestamp=timestamp,
140
+ llm_summary=llm_summary,
141
+ session_files=session_files,
142
+ session_additions=session_additions,
143
+ has_sensitive=False # Will be set by --detect-secrets flag
144
+ )
145
+
146
+ commits.append(commit)
147
+ logger.debug(f"Parsed commit [{idx}] {short_hash}: {subject}")
148
+
149
+ return commits
150
+
151
+
152
+ def detect_main_branch(repo_root: Path) -> str:
153
+ """
154
+ Detect the main branch name (main or master).
155
+
156
+ Args:
157
+ repo_root: Path to repository root
158
+
159
+ Returns:
160
+ "main" or "master"
161
+ """
162
+ # Check if origin/main exists
163
+ main_check = subprocess.run(
164
+ ["git", "rev-parse", "--verify", "origin/main"],
165
+ cwd=repo_root,
166
+ capture_output=True,
167
+ text=True
168
+ )
169
+
170
+ if main_check.returncode == 0:
171
+ return "main"
172
+
173
+ # Check if origin/master exists
174
+ master_check = subprocess.run(
175
+ ["git", "rev-parse", "--verify", "origin/master"],
176
+ cwd=repo_root,
177
+ capture_output=True,
178
+ text=True
179
+ )
180
+
181
+ if master_check.returncode == 0:
182
+ return "master"
183
+
184
+ # If no remote branches exist, check current local branch name
185
+ current_branch = subprocess.run(
186
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"],
187
+ cwd=repo_root,
188
+ capture_output=True,
189
+ text=True
190
+ )
191
+
192
+ if current_branch.returncode == 0:
193
+ branch_name = current_branch.stdout.strip()
194
+ logger.info(f"No remote found, using current branch: {branch_name}")
195
+ return branch_name
196
+
197
+ # Default to main
198
+ logger.warning("Could not detect main branch, defaulting to 'main'")
199
+ return "main"
200
+
201
+
202
+ def extract_llm_summary(commit_message: str) -> str:
203
+ """
204
+ Extract LLM summary from commit message.
205
+
206
+ Expected format:
207
+ chore: Auto-commit MCP session (2025-11-22 19:24:29)
208
+
209
+ --- LLM-Summary (claude-3-5-haiku) ---
210
+ * [Claude] Discussed implementing JWT authentication
211
+ * [Codex] Fixed bug in payment module
212
+
213
+ Agent-Redacted: false
214
+
215
+ Args:
216
+ commit_message: Full commit message
217
+
218
+ Returns:
219
+ Extracted summary text (without * and [Agent] prefix), or "(No summary)"
220
+ """
221
+ lines = commit_message.split('\n')
222
+
223
+ in_summary = False
224
+ summary_lines = []
225
+
226
+ for line in lines:
227
+ # Start of summary section
228
+ if '--- LLM-Summary' in line:
229
+ in_summary = True
230
+ continue
231
+
232
+ if in_summary:
233
+ # End of summary section
234
+ if line.strip().startswith('---') or line.strip().startswith('Agent-'):
235
+ break
236
+
237
+ # Extract summary content
238
+ if line.strip().startswith('*'):
239
+ # Remove leading "* "
240
+ content = line.strip()[1:].strip()
241
+
242
+ # Remove [Agent] prefix if present
243
+ if ']' in content:
244
+ # "* [Claude] Text here" -> "Text here"
245
+ content = content.split(']', 1)[1].strip()
246
+
247
+ summary_lines.append(content)
248
+
249
+ if summary_lines:
250
+ return ' | '.join(summary_lines)
251
+ else:
252
+ return "(No summary)"
253
+
254
+
255
+ def get_session_additions(commit_hash: str, repo_root: Path) -> Tuple[List[str], Dict[str, List[Tuple[int, int]]]]:
256
+ """
257
+ Get session files modified in this commit and their line additions.
258
+
259
+ Args:
260
+ commit_hash: Commit hash
261
+ repo_root: Path to repository root
262
+
263
+ Returns:
264
+ Tuple of:
265
+ - List of session file paths (relative to repo root)
266
+ - Dict mapping file paths to line ranges: {file: [(start, end), ...]}
267
+ """
268
+ logger.debug(f"Getting session additions for commit {commit_hash}")
269
+
270
+ # Get files modified in this commit
271
+ files_result = subprocess.run(
272
+ ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit_hash],
273
+ cwd=repo_root,
274
+ capture_output=True,
275
+ text=True
276
+ )
277
+
278
+ if files_result.returncode != 0:
279
+ logger.warning(f"Failed to get files for commit {commit_hash}")
280
+ return [], {}
281
+
282
+ all_files = files_result.stdout.strip().split('\n')
283
+
284
+ # Filter session files
285
+ session_files = [
286
+ f for f in all_files
287
+ if f.startswith('.realign/sessions/') and f.endswith('.jsonl')
288
+ ]
289
+
290
+ if not session_files:
291
+ logger.debug(f"No session files in commit {commit_hash}")
292
+ return [], {}
293
+
294
+ logger.debug(f"Found {len(session_files)} session file(s) in commit {commit_hash}")
295
+
296
+ # Get line additions for each session file
297
+ additions = {}
298
+
299
+ for session_file in session_files:
300
+ # Get diff for this file
301
+ diff_result = subprocess.run(
302
+ ["git", "show", commit_hash, "--", session_file],
303
+ cwd=repo_root,
304
+ capture_output=True,
305
+ text=True
306
+ )
307
+
308
+ if diff_result.returncode != 0:
309
+ logger.warning(f"Failed to get diff for {session_file}")
310
+ continue
311
+
312
+ # Parse diff to extract line ranges
313
+ line_ranges = parse_diff_additions(diff_result.stdout)
314
+
315
+ if line_ranges:
316
+ additions[session_file] = line_ranges
317
+ total_lines = sum(end - start + 1 for start, end in line_ranges)
318
+ logger.debug(f" {session_file}: +{total_lines} lines in {len(line_ranges)} range(s)")
319
+
320
+ return session_files, additions
321
+
322
+
323
+ def parse_diff_additions(diff_output: str) -> List[Tuple[int, int]]:
324
+ """
325
+ Parse git diff output to extract line ranges of additions.
326
+
327
+ Diff format:
328
+ @@ -10,5 +10,8 @@
329
+ existing line
330
+ +new line 1
331
+ +new line 2
332
+ +new line 3
333
+ existing line
334
+
335
+ Args:
336
+ diff_output: Output from git show or git diff
337
+
338
+ Returns:
339
+ List of (start_line, end_line) tuples (1-based, inclusive)
340
+ Line numbers are based on the NEW file (after commit)
341
+ """
342
+ ranges = []
343
+ current_line = 0
344
+ range_start = None
345
+
346
+ for line in diff_output.split('\n'):
347
+ # Parse hunk header: @@ -old_start,old_count +new_start,new_count @@
348
+ if line.startswith('@@'):
349
+ match = re.search(r'\+(\d+),?(\d+)?', line)
350
+ if match:
351
+ current_line = int(match.group(1))
352
+ range_start = None
353
+ logger.debug(f" Hunk starts at line {current_line}")
354
+
355
+ # Added line
356
+ elif line.startswith('+') and not line.startswith('+++'):
357
+ if range_start is None:
358
+ range_start = current_line
359
+ current_line += 1
360
+
361
+ # Context line (unchanged)
362
+ elif line.startswith(' '):
363
+ if range_start is not None:
364
+ # End current range
365
+ ranges.append((range_start, current_line - 1))
366
+ logger.debug(f" Range: {range_start}-{current_line - 1}")
367
+ range_start = None
368
+ current_line += 1
369
+
370
+ # Deleted line (doesn't affect new file line numbers)
371
+ elif line.startswith('-') and not line.startswith('---'):
372
+ pass
373
+
374
+ # Handle last range if still open
375
+ if range_start is not None:
376
+ ranges.append((range_start, current_line - 1))
377
+ logger.debug(f" Range: {range_start}-{current_line - 1}")
378
+
379
+ return ranges
380
+
381
+
382
+ def display_unpushed_commits(commits: List[UnpushedCommit], verbose: bool = False):
383
+ """
384
+ Display list of unpushed commits in a user-friendly format.
385
+
386
+ Args:
387
+ commits: List of UnpushedCommit objects
388
+ verbose: Whether to show detailed information
389
+ """
390
+ if not commits:
391
+ print("\n✓ No unpushed commits found.\n")
392
+ return
393
+
394
+ print(f"\n📋 Unpushed commits ({len(commits)}):\n")
395
+
396
+ for commit in commits:
397
+ # Basic info
398
+ print(f" [{commit.index}] {commit.hash} - {commit.message}")
399
+ print(f" Time: {commit.timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
400
+
401
+ # LLM Summary
402
+ if commit.llm_summary and commit.llm_summary != "(No summary)":
403
+ print(f" Summary: {commit.llm_summary}")
404
+
405
+ # Session files
406
+ if commit.session_files:
407
+ for session_file in commit.session_files:
408
+ additions = commit.session_additions.get(session_file, [])
409
+ total_lines = sum(end - start + 1 for start, end in additions)
410
+ print(f" Session: {session_file} (+{total_lines} lines)")
411
+
412
+ # Sensitive content warning
413
+ if commit.has_sensitive:
414
+ print(f" ⚠️ WARNING: Potential sensitive content detected")
415
+
416
+ print() # Blank line separator
417
+
418
+
419
+ def review_command(
420
+ repo_root: Optional[Path] = None,
421
+ verbose: bool = False,
422
+ detect_secrets: bool = False
423
+ ) -> int:
424
+ """
425
+ Main entry point for review command.
426
+
427
+ Args:
428
+ repo_root: Path to repository root (auto-detected if None)
429
+ verbose: Show detailed information
430
+ detect_secrets: Run sensitive content detection
431
+
432
+ Returns:
433
+ 0 on success, 1 on error
434
+ """
435
+ logger.info("======== Review command started ========")
436
+
437
+ # Auto-detect repo root if not provided
438
+ if repo_root is None:
439
+ try:
440
+ result = subprocess.run(
441
+ ["git", "rev-parse", "--show-toplevel"],
442
+ capture_output=True,
443
+ text=True,
444
+ check=True
445
+ )
446
+ repo_root = Path(result.stdout.strip())
447
+ logger.debug(f"Detected repo root: {repo_root}")
448
+ except subprocess.CalledProcessError:
449
+ print("Error: Not in a git repository", file=sys.stderr)
450
+ logger.error("Not in a git repository")
451
+ return 1
452
+
453
+ # Get unpushed commits
454
+ try:
455
+ commits = get_unpushed_commits(repo_root)
456
+ except Exception as e:
457
+ print(f"Error: Failed to get unpushed commits: {e}", file=sys.stderr)
458
+ logger.error(f"Failed to get unpushed commits: {e}", exc_info=True)
459
+ return 1
460
+
461
+ # Detect sensitive content if requested
462
+ if detect_secrets:
463
+ try:
464
+ from ..redactor import check_and_redact_session
465
+
466
+ for commit in commits:
467
+ for session_file in commit.session_files:
468
+ file_path = repo_root / session_file
469
+ if not file_path.exists():
470
+ continue
471
+
472
+ with open(file_path, 'r', encoding='utf-8') as f:
473
+ content = f.read()
474
+
475
+ _, has_secrets, _ = check_and_redact_session(
476
+ content,
477
+ redact_mode="auto"
478
+ )
479
+
480
+ if has_secrets:
481
+ commit.has_sensitive = True
482
+ logger.warning(f"Detected sensitive content in commit {commit.hash}")
483
+ break
484
+ except ImportError:
485
+ print("Warning: detect-secrets not available, skipping sensitive content detection", file=sys.stderr)
486
+ logger.warning("detect-secrets not available")
487
+
488
+ # Display commits
489
+ display_unpushed_commits(commits, verbose=verbose)
490
+
491
+ logger.info("======== Review command completed ========")
492
+ return 0
realign/redactor.py CHANGED
@@ -142,17 +142,55 @@ def redact_content(content: str, secrets: List[SecretMatch]) -> str:
142
142
  for line_num, line_secrets in secrets_by_line.items():
143
143
  secret_types = [s.type for s in line_secrets]
144
144
  # Keep the JSON structure but redact the sensitive value
145
- # This is a simple approach - we mark the line as redacted
146
145
  original_line = lines[line_num]
147
- # Try to preserve JSON structure by finding quotes and redacting content
148
- if '"' in original_line:
149
- # Simple redaction: find the value part and replace it
150
- lines[line_num] = original_line.replace(
151
- original_line[original_line.find(':'):] if ':' in original_line else original_line,
152
- f': "[REDACTED: {", ".join(set(secret_types))}]"'
153
- )
154
- else:
155
- lines[line_num] = f"[REDACTED LINE - {', '.join(set(secret_types))}]"
146
+
147
+ # Try to parse as JSON and redact only values
148
+ import json
149
+ import re
150
+
151
+ try:
152
+ # Try to parse the line as JSON
153
+ json_obj = json.loads(original_line)
154
+
155
+ # Redact all string values that might contain secrets
156
+ def redact_json_values(obj):
157
+ """Recursively redact values in JSON object."""
158
+ if isinstance(obj, dict):
159
+ return {k: redact_json_values(v) for k, v in obj.items()}
160
+ elif isinstance(obj, list):
161
+ return [redact_json_values(item) for item in obj]
162
+ elif isinstance(obj, str):
163
+ # Check if this value might be sensitive (heuristic: not too short)
164
+ # This is a simple approach - we redact string values on lines with secrets
165
+ return f"[REDACTED: {', '.join(set(secret_types))}]"
166
+ else:
167
+ return obj
168
+
169
+ redacted_obj = redact_json_values(json_obj)
170
+ lines[line_num] = json.dumps(redacted_obj, ensure_ascii=False)
171
+
172
+ except (json.JSONDecodeError, Exception):
173
+ # If JSON parsing fails, fall back to simple replacement
174
+ # Try to preserve structure by using regex to find and replace values
175
+ if ':' in original_line:
176
+ # Find the value part after the colon, preserving the closing braces/brackets
177
+ # Match pattern: : "value" or : value, capture trailing punctuation
178
+ pattern = r':\s*"[^"]*"(\s*[,}\]])'
179
+ if re.search(pattern, original_line):
180
+ lines[line_num] = re.sub(
181
+ pattern,
182
+ rf': "[REDACTED: {", ".join(set(secret_types))}]"\1',
183
+ original_line
184
+ )
185
+ else:
186
+ # Fallback: replace from colon onwards but keep trailing punctuation
187
+ match = re.search(r'(.*?:\s*)(.+?)(\s*[}\]]*\s*)$', original_line)
188
+ if match:
189
+ lines[line_num] = f'{match.group(1)}"[REDACTED: {", ".join(set(secret_types))}]"{match.group(3)}'
190
+ else:
191
+ lines[line_num] = f'{original_line}: "[REDACTED: {", ".join(set(secret_types))}]"'
192
+ else:
193
+ lines[line_num] = f"[REDACTED LINE - {', '.join(set(secret_types))}]"
156
194
 
157
195
  redacted_content = '\n'.join(lines)
158
196
  redacted_size = len(redacted_content)