PyPI - aline-ai - Versions diffs - 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

aline-ai 0.2.5py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{aline_ai-0.2.5.dist-info → aline_ai-0.3.0.dist-info}/METADATA +3 -1
aline_ai-0.3.0.dist-info/RECORD +41 -0
aline_ai-0.3.0.dist-info/entry_points.txt +3 -0
realign/__init__.py +32 -1
realign/cli.py +203 -19
realign/commands/__init__.py +2 -2
realign/commands/clean.py +149 -0
realign/commands/config.py +1 -1
realign/commands/export_shares.py +1785 -0
realign/commands/hide.py +112 -24
realign/commands/import_history.py +873 -0
realign/commands/init.py +104 -217
realign/commands/mirror.py +131 -0
realign/commands/pull.py +101 -0
realign/commands/push.py +155 -245
realign/commands/review.py +216 -54
realign/commands/session_utils.py +139 -4
realign/commands/share.py +965 -0
realign/commands/status.py +559 -0
realign/commands/sync.py +91 -0
realign/commands/undo.py +423 -0
realign/commands/watcher.py +805 -0
realign/config.py +21 -10
realign/file_lock.py +3 -1
realign/hash_registry.py +310 -0
realign/hooks.py +368 -384
realign/logging_config.py +2 -2
realign/mcp_server.py +263 -549
realign/mcp_watcher.py +999 -142
realign/mirror_utils.py +322 -0
realign/prompts/__init__.py +21 -0
realign/prompts/presets.py +238 -0
realign/redactor.py +168 -16
realign/tracker/__init__.py +9 -0
realign/tracker/git_tracker.py +1123 -0
realign/watcher_daemon.py +115 -0
aline_ai-0.2.5.dist-info/RECORD +0 -28
aline_ai-0.2.5.dist-info/entry_points.txt +0 -5
realign/commands/auto_commit.py +0 -231
realign/commands/commit.py +0 -379
realign/commands/search.py +0 -449
realign/commands/show.py +0 -416
{aline_ai-0.2.5.dist-info → aline_ai-0.3.0.dist-info}/WHEEL +0 -0
{aline_ai-0.2.5.dist-info → aline_ai-0.3.0.dist-info}/licenses/LICENSE +0 -0
{aline_ai-0.2.5.dist-info → aline_ai-0.3.0.dist-info}/top_level.txt +0 -0

realign/redactor.py CHANGED Viewed

@@ -29,9 +29,83 @@ class SecretMatch:
         return f"SecretMatch(type={self.type}, line={self.line})"
+def _detect_custom_api_keys(content: str) -> List[SecretMatch]:
+    """
+    Detect API keys using custom regex patterns.
+    This catches common API key formats that detect-secrets might miss.
+    Args:
+        content: The text content to scan
+    Returns:
+        List of SecretMatch objects for detected API keys
+    """
+    import re
+    secrets = []
+    lines = content.split('\n')
+    # Common API key patterns
+    patterns = [
+        # OpenAI API keys (sk-, sk-proj-)
+        (r'\bsk-[a-zA-Z0-9]{20,}', 'OpenAI API Key'),
+        # Anthropic API keys (sk-ant-api03-...)
+        (r'\bsk-ant-[a-zA-Z0-9\-]{50,}', 'Anthropic API Key'),
+        # Generic API keys with common prefixes
+        (r'\b(?:api[_-]?key|apikey|api[_-]?secret)[\s:=]+["\']?([a-zA-Z0-9_\-]{32,})["\']?', 'Generic API Key'),
+        # Bearer tokens
+        (r'\bBearer\s+[a-zA-Z0-9\-._~+/]+=*', 'Bearer Token'),
+        # GitHub tokens
+        (r'\bgh[ps]_[a-zA-Z0-9]{36,}', 'GitHub Token'),
+        # Slack tokens
+        (r'\bxox[baprs]-[a-zA-Z0-9\-]{10,}', 'Slack Token'),
+        # Generic long alphanumeric strings that look like secrets (60+ chars, mixed case)
+        (r'\b[a-zA-Z0-9]{60,}\b', 'Potential Secret (Long String)'),
+    ]
+    for line_num, line in enumerate(lines, start=1):
+        for pattern, secret_type in patterns:
+            matches = re.finditer(pattern, line, re.IGNORECASE)
+            for match in matches:
+                matched_text = match.group(0)
+                # Skip if it looks like a UUID (has hyphens in UUID pattern)
+                if re.match(r'^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$', matched_text, re.IGNORECASE):
+                    continue
+                # Skip common false positives
+                if matched_text.lower() in ['example', 'placeholder', 'your_api_key_here', 'your-api-key']:
+                    continue
+                # For "Potential Secret (Long String)", require mixed case to reduce false positives
+                if secret_type == 'Potential Secret (Long String)':
+                    has_upper = any(c.isupper() for c in matched_text)
+                    has_lower = any(c.islower() for c in matched_text)
+                    has_digit = any(c.isdigit() for c in matched_text)
+                    # Require at least mixed case (upper + lower) or (letter + digit)
+                    if not ((has_upper and has_lower) or (has_digit and (has_upper or has_lower))):
+                        continue
+                # Create a hash of the secret for identification
+                import hashlib
+                secret_hash = hashlib.sha256(matched_text.encode()).hexdigest()[:16]
+                secrets.append(
+                    SecretMatch(
+                        secret_type=secret_type,
+                        line_number=line_num,
+                        secret_hash=secret_hash
+                    )
+                )
+                logger.debug(f"Custom pattern detected: {secret_type} at line {line_num}")
+    return secrets
 def detect_secrets(content: str) -> Tuple[List[SecretMatch], bool]:
     """
-    Detect secrets in the given content using detect-secrets library.
+    Detect secrets in the given content using detect-secrets library plus custom patterns.
     Args:
         content: The text content to scan for secrets
@@ -69,14 +143,20 @@ def detect_secrets(content: str) -> Tuple[List[SecretMatch], bool]:
         logger.debug(f"Created temporary file for scanning: {temp_path}")
-        # Scan the file
+        # Scan the file with default settings
         collection = SecretsCollection()
         with default_settings():
             collection.scan_file(temp_path)
-        # Extract detected secrets
+        # Extract detected secrets, filtering out high-entropy false positives
         for filename, secret_list in collection.data.items():
             for secret in secret_list:
+                # Filter out high-entropy detectors that cause false positives with UUIDs
+                # Note: detect-secrets uses "High Entropy" (with space) in type names like "Base64 High Entropy String"
+                if 'High Entropy' in secret.type or 'HighEntropy' in secret.type:
+                    logger.debug(f"Filtering out high-entropy detection: {secret.type} at line {secret.line_number}")
+                    continue
                 secrets.append(
                     SecretMatch(
                         secret_type=secret.type,
@@ -85,6 +165,22 @@ def detect_secrets(content: str) -> Tuple[List[SecretMatch], bool]:
                     )
                 )
+        # Additional custom pattern-based detection for common API key formats
+        custom_secrets = _detect_custom_api_keys(content)
+        secrets.extend(custom_secrets)
+        # Deduplicate secrets by line number and type
+        # Prefer custom detector results over high-entropy detections
+        seen = set()
+        deduped_secrets = []
+        for secret in secrets:
+            key = (secret.line, secret.type)
+            if key not in seen:
+                seen.add(key)
+                deduped_secrets.append(secret)
+        secrets = deduped_secrets
         if secrets:
             logger.warning(f"Detected {len(secrets)} potential secret(s)")
             for secret in secrets:
@@ -107,6 +203,38 @@ def detect_secrets(content: str) -> Tuple[List[SecretMatch], bool]:
     return secrets, True
+# Fields that should NOT be redacted (metadata and non-sensitive data)
+NON_SENSITIVE_FIELDS = {
+    # Message structure
+    'type', 'role', 'stop_reason', 'stop_sequence',
+    # Model metadata
+    'model', 'id', 'service_tier',
+    # Session metadata
+    'isSidechain', 'userType', 'version', 'gitBranch', 'cwd', 'slug',
+    # Identifiers (UUIDs, timestamps - not actual secrets)
+    'parentUuid', 'uuid', 'sessionId', 'requestId', 'timestamp',
+    # Token usage (not sensitive)
+    'usage', 'input_tokens', 'output_tokens',
+    'cache_read_input_tokens', 'cache_creation_input_tokens',
+    'cache_creation', 'ephemeral_5m_input_tokens', 'ephemeral_1h_input_tokens',
+    # Tool metadata
+    'tool_use_id', 'name', 'is_error', 'interrupted', 'isImage',
+    # File/process info
+    'filenames', 'durationMs', 'numFiles', 'truncated',
+    'stdout', 'stderr', 'returnCodeInterpretation',
+    # Other metadata
+    'todos', 'oldTodos', 'newTodos', 'toolUseResult',
+    'context_management', 'applied_edits', 'operation',
+}
+# Fields that contain potentially sensitive content (user input, file contents, etc.)
+SENSITIVE_CONTENT_FIELDS = {
+    # These fields may contain actual secrets and should be redacted if secrets detected
+    'content',  # Main content field
+    'text',     # Text content in messages
+}
 def redact_content(content: str, secrets: List[SecretMatch]) -> str:
     """
     Redact detected secrets from content.
@@ -138,13 +266,12 @@ def redact_content(content: str, secrets: List[SecretMatch]) -> str:
     logger.debug(f"Redacting {len(secrets_by_line)} line(s)")
-    # Redact secrets (simple approach: replace entire line with redaction notice)
+    # Redact secrets (selective approach: only redact content fields)
     for line_num, line_secrets in secrets_by_line.items():
         secret_types = [s.type for s in line_secrets]
-        # Keep the JSON structure but redact the sensitive value
         original_line = lines[line_num]
-        # Try to parse as JSON and redact only values
+        # Try to parse as JSON and redact selectively
         import json
         import re
@@ -152,25 +279,48 @@ def redact_content(content: str, secrets: List[SecretMatch]) -> str:
             # Try to parse the line as JSON
             json_obj = json.loads(original_line)
-            # Redact all string values that might contain secrets
-            def redact_json_values(obj):
-                """Recursively redact values in JSON object."""
+            # Selectively redact only sensitive content fields
+            def redact_json_values(obj, parent_key=None):
+                """
+                Recursively redact values in JSON object.
+                Only redacts fields that are in SENSITIVE_CONTENT_FIELDS.
+                Preserves all metadata and non-sensitive fields.
+                """
                 if isinstance(obj, dict):
-                    return {k: redact_json_values(v) for k, v in obj.items()}
+                    result = {}
+                    for k, v in obj.items():
+                        # Only redact if the current key is sensitive
+                        if k in SENSITIVE_CONTENT_FIELDS:
+                            # This field contains potentially sensitive content
+                            result[k] = redact_json_values(v, k)
+                        elif k in NON_SENSITIVE_FIELDS:
+                            # Preserve non-sensitive fields as-is
+                            result[k] = v
+                        else:
+                            # For unknown fields, recursively process but don't redact metadata
+                            result[k] = redact_json_values(v, k)
+                    return result
                 elif isinstance(obj, list):
-                    return [redact_json_values(item) for item in obj]
+                    # Process list items
+                    return [redact_json_values(item, parent_key) for item in obj]
                 elif isinstance(obj, str):
-                    # Check if this value might be sensitive (heuristic: not too short)
-                    # This is a simple approach - we redact string values on lines with secrets
-                    return f"[REDACTED: {', '.join(set(secret_types))}]"
+                    # Only redact if we're inside a sensitive content field
+                    if parent_key in SENSITIVE_CONTENT_FIELDS:
+                        return f"[REDACTED: {', '.join(set(secret_types))}]"
+                    # Otherwise preserve the string value
+                    return obj
                 else:
+                    # Preserve non-string values (numbers, booleans, null)
                     return obj
             redacted_obj = redact_json_values(json_obj)
             lines[line_num] = json.dumps(redacted_obj, ensure_ascii=False)
         except (json.JSONDecodeError, Exception):
-            # If JSON parsing fails, fall back to simple replacement
+            # If JSON parsing fails, fall back to targeted regex replacement
+            # This tries to preserve as much structure as possible
+            logger.warning(f"Failed to parse line {line_num + 1} as JSON, using regex redaction")
             # Try to preserve structure by using regex to find and replace values
             if ':' in original_line:
                 # Find the value part after the colon, preserving the closing braces/brackets
@@ -268,7 +418,9 @@ def save_original_session(
     logger.info(f"Saving original session backup: {session_path.name}")
     try:
-        backup_dir = repo_root / ".realign" / "sessions-original"
+        from realign import get_realign_dir
+        realign_dir = get_realign_dir(repo_root)
+        backup_dir = realign_dir / "sessions-original"
         backup_dir.mkdir(parents=True, exist_ok=True)
         backup_path = backup_dir / session_path.name

realign/tracker/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Git-based tracking system for AI work history.
+This module implements Plan A: an independent Git repository in .realign/
+that mirrors project file structure and tracks AI work history using standard Git.
+"""
+from .git_tracker import ReAlignGitTracker
+__all__ = ["ReAlignGitTracker"]

aline-ai 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl

aline-ai 0.2.5py3-none-any.whl → 0.3.0py3-none-any.whl