aline-ai 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {aline_ai-0.2.5.dist-info → aline_ai-0.3.0.dist-info}/METADATA +3 -1
  2. aline_ai-0.3.0.dist-info/RECORD +41 -0
  3. aline_ai-0.3.0.dist-info/entry_points.txt +3 -0
  4. realign/__init__.py +32 -1
  5. realign/cli.py +203 -19
  6. realign/commands/__init__.py +2 -2
  7. realign/commands/clean.py +149 -0
  8. realign/commands/config.py +1 -1
  9. realign/commands/export_shares.py +1785 -0
  10. realign/commands/hide.py +112 -24
  11. realign/commands/import_history.py +873 -0
  12. realign/commands/init.py +104 -217
  13. realign/commands/mirror.py +131 -0
  14. realign/commands/pull.py +101 -0
  15. realign/commands/push.py +155 -245
  16. realign/commands/review.py +216 -54
  17. realign/commands/session_utils.py +139 -4
  18. realign/commands/share.py +965 -0
  19. realign/commands/status.py +559 -0
  20. realign/commands/sync.py +91 -0
  21. realign/commands/undo.py +423 -0
  22. realign/commands/watcher.py +805 -0
  23. realign/config.py +21 -10
  24. realign/file_lock.py +3 -1
  25. realign/hash_registry.py +310 -0
  26. realign/hooks.py +368 -384
  27. realign/logging_config.py +2 -2
  28. realign/mcp_server.py +263 -549
  29. realign/mcp_watcher.py +999 -142
  30. realign/mirror_utils.py +322 -0
  31. realign/prompts/__init__.py +21 -0
  32. realign/prompts/presets.py +238 -0
  33. realign/redactor.py +168 -16
  34. realign/tracker/__init__.py +9 -0
  35. realign/tracker/git_tracker.py +1123 -0
  36. realign/watcher_daemon.py +115 -0
  37. aline_ai-0.2.5.dist-info/RECORD +0 -28
  38. aline_ai-0.2.5.dist-info/entry_points.txt +0 -5
  39. realign/commands/auto_commit.py +0 -231
  40. realign/commands/commit.py +0 -379
  41. realign/commands/search.py +0 -449
  42. realign/commands/show.py +0 -416
  43. {aline_ai-0.2.5.dist-info → aline_ai-0.3.0.dist-info}/WHEEL +0 -0
  44. {aline_ai-0.2.5.dist-info → aline_ai-0.3.0.dist-info}/licenses/LICENSE +0 -0
  45. {aline_ai-0.2.5.dist-info → aline_ai-0.3.0.dist-info}/top_level.txt +0 -0
realign/redactor.py CHANGED
@@ -29,9 +29,83 @@ class SecretMatch:
29
29
  return f"SecretMatch(type={self.type}, line={self.line})"
30
30
 
31
31
 
32
+ def _detect_custom_api_keys(content: str) -> List[SecretMatch]:
33
+ """
34
+ Detect API keys using custom regex patterns.
35
+
36
+ This catches common API key formats that detect-secrets might miss.
37
+
38
+ Args:
39
+ content: The text content to scan
40
+
41
+ Returns:
42
+ List of SecretMatch objects for detected API keys
43
+ """
44
+ import re
45
+
46
+ secrets = []
47
+ lines = content.split('\n')
48
+
49
+ # Common API key patterns
50
+ patterns = [
51
+ # OpenAI API keys (sk-, sk-proj-)
52
+ (r'\bsk-[a-zA-Z0-9]{20,}', 'OpenAI API Key'),
53
+ # Anthropic API keys (sk-ant-api03-...)
54
+ (r'\bsk-ant-[a-zA-Z0-9\-]{50,}', 'Anthropic API Key'),
55
+ # Generic API keys with common prefixes
56
+ (r'\b(?:api[_-]?key|apikey|api[_-]?secret)[\s:=]+["\']?([a-zA-Z0-9_\-]{32,})["\']?', 'Generic API Key'),
57
+ # Bearer tokens
58
+ (r'\bBearer\s+[a-zA-Z0-9\-._~+/]+=*', 'Bearer Token'),
59
+ # GitHub tokens
60
+ (r'\bgh[ps]_[a-zA-Z0-9]{36,}', 'GitHub Token'),
61
+ # Slack tokens
62
+ (r'\bxox[baprs]-[a-zA-Z0-9\-]{10,}', 'Slack Token'),
63
+ # Generic long alphanumeric strings that look like secrets (60+ chars, mixed case)
64
+ (r'\b[a-zA-Z0-9]{60,}\b', 'Potential Secret (Long String)'),
65
+ ]
66
+
67
+ for line_num, line in enumerate(lines, start=1):
68
+ for pattern, secret_type in patterns:
69
+ matches = re.finditer(pattern, line, re.IGNORECASE)
70
+ for match in matches:
71
+ matched_text = match.group(0)
72
+
73
+ # Skip if it looks like a UUID (has hyphens in UUID pattern)
74
+ if re.match(r'^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$', matched_text, re.IGNORECASE):
75
+ continue
76
+
77
+ # Skip common false positives
78
+ if matched_text.lower() in ['example', 'placeholder', 'your_api_key_here', 'your-api-key']:
79
+ continue
80
+
81
+ # For "Potential Secret (Long String)", require mixed case to reduce false positives
82
+ if secret_type == 'Potential Secret (Long String)':
83
+ has_upper = any(c.isupper() for c in matched_text)
84
+ has_lower = any(c.islower() for c in matched_text)
85
+ has_digit = any(c.isdigit() for c in matched_text)
86
+ # Require at least mixed case (upper + lower) or (letter + digit)
87
+ if not ((has_upper and has_lower) or (has_digit and (has_upper or has_lower))):
88
+ continue
89
+
90
+ # Create a hash of the secret for identification
91
+ import hashlib
92
+ secret_hash = hashlib.sha256(matched_text.encode()).hexdigest()[:16]
93
+
94
+ secrets.append(
95
+ SecretMatch(
96
+ secret_type=secret_type,
97
+ line_number=line_num,
98
+ secret_hash=secret_hash
99
+ )
100
+ )
101
+ logger.debug(f"Custom pattern detected: {secret_type} at line {line_num}")
102
+
103
+ return secrets
104
+
105
+
32
106
  def detect_secrets(content: str) -> Tuple[List[SecretMatch], bool]:
33
107
  """
34
- Detect secrets in the given content using detect-secrets library.
108
+ Detect secrets in the given content using detect-secrets library plus custom patterns.
35
109
 
36
110
  Args:
37
111
  content: The text content to scan for secrets
@@ -69,14 +143,20 @@ def detect_secrets(content: str) -> Tuple[List[SecretMatch], bool]:
69
143
 
70
144
  logger.debug(f"Created temporary file for scanning: {temp_path}")
71
145
 
72
- # Scan the file
146
+ # Scan the file with default settings
73
147
  collection = SecretsCollection()
74
148
  with default_settings():
75
149
  collection.scan_file(temp_path)
76
150
 
77
- # Extract detected secrets
151
+ # Extract detected secrets, filtering out high-entropy false positives
78
152
  for filename, secret_list in collection.data.items():
79
153
  for secret in secret_list:
154
+ # Filter out high-entropy detectors that cause false positives with UUIDs
155
+ # Note: detect-secrets uses "High Entropy" (with space) in type names like "Base64 High Entropy String"
156
+ if 'High Entropy' in secret.type or 'HighEntropy' in secret.type:
157
+ logger.debug(f"Filtering out high-entropy detection: {secret.type} at line {secret.line_number}")
158
+ continue
159
+
80
160
  secrets.append(
81
161
  SecretMatch(
82
162
  secret_type=secret.type,
@@ -85,6 +165,22 @@ def detect_secrets(content: str) -> Tuple[List[SecretMatch], bool]:
85
165
  )
86
166
  )
87
167
 
168
+ # Additional custom pattern-based detection for common API key formats
169
+ custom_secrets = _detect_custom_api_keys(content)
170
+ secrets.extend(custom_secrets)
171
+
172
+ # Deduplicate secrets by line number and type
173
+ # Prefer custom detector results over high-entropy detections
174
+ seen = set()
175
+ deduped_secrets = []
176
+ for secret in secrets:
177
+ key = (secret.line, secret.type)
178
+ if key not in seen:
179
+ seen.add(key)
180
+ deduped_secrets.append(secret)
181
+
182
+ secrets = deduped_secrets
183
+
88
184
  if secrets:
89
185
  logger.warning(f"Detected {len(secrets)} potential secret(s)")
90
186
  for secret in secrets:
@@ -107,6 +203,38 @@ def detect_secrets(content: str) -> Tuple[List[SecretMatch], bool]:
107
203
  return secrets, True
108
204
 
109
205
 
206
+ # Fields that should NOT be redacted (metadata and non-sensitive data)
207
+ NON_SENSITIVE_FIELDS = {
208
+ # Message structure
209
+ 'type', 'role', 'stop_reason', 'stop_sequence',
210
+ # Model metadata
211
+ 'model', 'id', 'service_tier',
212
+ # Session metadata
213
+ 'isSidechain', 'userType', 'version', 'gitBranch', 'cwd', 'slug',
214
+ # Identifiers (UUIDs, timestamps - not actual secrets)
215
+ 'parentUuid', 'uuid', 'sessionId', 'requestId', 'timestamp',
216
+ # Token usage (not sensitive)
217
+ 'usage', 'input_tokens', 'output_tokens',
218
+ 'cache_read_input_tokens', 'cache_creation_input_tokens',
219
+ 'cache_creation', 'ephemeral_5m_input_tokens', 'ephemeral_1h_input_tokens',
220
+ # Tool metadata
221
+ 'tool_use_id', 'name', 'is_error', 'interrupted', 'isImage',
222
+ # File/process info
223
+ 'filenames', 'durationMs', 'numFiles', 'truncated',
224
+ 'stdout', 'stderr', 'returnCodeInterpretation',
225
+ # Other metadata
226
+ 'todos', 'oldTodos', 'newTodos', 'toolUseResult',
227
+ 'context_management', 'applied_edits', 'operation',
228
+ }
229
+
230
+ # Fields that contain potentially sensitive content (user input, file contents, etc.)
231
+ SENSITIVE_CONTENT_FIELDS = {
232
+ # These fields may contain actual secrets and should be redacted if secrets detected
233
+ 'content', # Main content field
234
+ 'text', # Text content in messages
235
+ }
236
+
237
+
110
238
  def redact_content(content: str, secrets: List[SecretMatch]) -> str:
111
239
  """
112
240
  Redact detected secrets from content.
@@ -138,13 +266,12 @@ def redact_content(content: str, secrets: List[SecretMatch]) -> str:
138
266
 
139
267
  logger.debug(f"Redacting {len(secrets_by_line)} line(s)")
140
268
 
141
- # Redact secrets (simple approach: replace entire line with redaction notice)
269
+ # Redact secrets (selective approach: only redact content fields)
142
270
  for line_num, line_secrets in secrets_by_line.items():
143
271
  secret_types = [s.type for s in line_secrets]
144
- # Keep the JSON structure but redact the sensitive value
145
272
  original_line = lines[line_num]
146
273
 
147
- # Try to parse as JSON and redact only values
274
+ # Try to parse as JSON and redact selectively
148
275
  import json
149
276
  import re
150
277
 
@@ -152,25 +279,48 @@ def redact_content(content: str, secrets: List[SecretMatch]) -> str:
152
279
  # Try to parse the line as JSON
153
280
  json_obj = json.loads(original_line)
154
281
 
155
- # Redact all string values that might contain secrets
156
- def redact_json_values(obj):
157
- """Recursively redact values in JSON object."""
282
+ # Selectively redact only sensitive content fields
283
+ def redact_json_values(obj, parent_key=None):
284
+ """
285
+ Recursively redact values in JSON object.
286
+ Only redacts fields that are in SENSITIVE_CONTENT_FIELDS.
287
+ Preserves all metadata and non-sensitive fields.
288
+ """
158
289
  if isinstance(obj, dict):
159
- return {k: redact_json_values(v) for k, v in obj.items()}
290
+ result = {}
291
+ for k, v in obj.items():
292
+ # Only redact if the current key is sensitive
293
+ if k in SENSITIVE_CONTENT_FIELDS:
294
+ # This field contains potentially sensitive content
295
+ result[k] = redact_json_values(v, k)
296
+ elif k in NON_SENSITIVE_FIELDS:
297
+ # Preserve non-sensitive fields as-is
298
+ result[k] = v
299
+ else:
300
+ # For unknown fields, recursively process but don't redact metadata
301
+ result[k] = redact_json_values(v, k)
302
+ return result
160
303
  elif isinstance(obj, list):
161
- return [redact_json_values(item) for item in obj]
304
+ # Process list items
305
+ return [redact_json_values(item, parent_key) for item in obj]
162
306
  elif isinstance(obj, str):
163
- # Check if this value might be sensitive (heuristic: not too short)
164
- # This is a simple approach - we redact string values on lines with secrets
165
- return f"[REDACTED: {', '.join(set(secret_types))}]"
307
+ # Only redact if we're inside a sensitive content field
308
+ if parent_key in SENSITIVE_CONTENT_FIELDS:
309
+ return f"[REDACTED: {', '.join(set(secret_types))}]"
310
+ # Otherwise preserve the string value
311
+ return obj
166
312
  else:
313
+ # Preserve non-string values (numbers, booleans, null)
167
314
  return obj
168
315
 
169
316
  redacted_obj = redact_json_values(json_obj)
170
317
  lines[line_num] = json.dumps(redacted_obj, ensure_ascii=False)
171
318
 
172
319
  except (json.JSONDecodeError, Exception):
173
- # If JSON parsing fails, fall back to simple replacement
320
+ # If JSON parsing fails, fall back to targeted regex replacement
321
+ # This tries to preserve as much structure as possible
322
+ logger.warning(f"Failed to parse line {line_num + 1} as JSON, using regex redaction")
323
+
174
324
  # Try to preserve structure by using regex to find and replace values
175
325
  if ':' in original_line:
176
326
  # Find the value part after the colon, preserving the closing braces/brackets
@@ -268,7 +418,9 @@ def save_original_session(
268
418
  logger.info(f"Saving original session backup: {session_path.name}")
269
419
 
270
420
  try:
271
- backup_dir = repo_root / ".realign" / "sessions-original"
421
+ from realign import get_realign_dir
422
+ realign_dir = get_realign_dir(repo_root)
423
+ backup_dir = realign_dir / "sessions-original"
272
424
  backup_dir.mkdir(parents=True, exist_ok=True)
273
425
 
274
426
  backup_path = backup_dir / session_path.name
@@ -0,0 +1,9 @@
1
+ """Git-based tracking system for AI work history.
2
+
3
+ This module implements Plan A: an independent Git repository in .realign/
4
+ that mirrors project file structure and tracks AI work history using standard Git.
5
+ """
6
+
7
+ from .git_tracker import ReAlignGitTracker
8
+
9
+ __all__ = ["ReAlignGitTracker"]