PyPI - luckyd-code - Versions diffs - 1.2.2__py3-none-any.whl - Mend

luckyd-code 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

luckyd_code/__init__.py +54 -0
luckyd_code/__main__.py +5 -0
luckyd_code/_agent_loop.py +551 -0
luckyd_code/_data_dir.py +73 -0
luckyd_code/agent.py +38 -0
luckyd_code/analytics/__init__.py +18 -0
luckyd_code/analytics/reporter.py +195 -0
luckyd_code/analytics/scanner.py +443 -0
luckyd_code/analytics/smells.py +316 -0
luckyd_code/analytics/trends.py +303 -0
luckyd_code/api.py +473 -0
luckyd_code/audit_daemon.py +845 -0
luckyd_code/autonomous_fixer.py +473 -0
luckyd_code/background.py +159 -0
luckyd_code/backup.py +237 -0
luckyd_code/brain/__init__.py +84 -0
luckyd_code/brain/assembler.py +100 -0
luckyd_code/brain/chunker.py +345 -0
luckyd_code/brain/constants.py +73 -0
luckyd_code/brain/embedder.py +163 -0
luckyd_code/brain/graph.py +311 -0
luckyd_code/brain/indexer.py +316 -0
luckyd_code/brain/parser.py +140 -0
luckyd_code/brain/retriever.py +234 -0
luckyd_code/cli.py +894 -0
luckyd_code/cli_commands/__init__.py +1 -0
luckyd_code/cli_commands/audit.py +120 -0
luckyd_code/cli_commands/background.py +83 -0
luckyd_code/cli_commands/brain.py +87 -0
luckyd_code/cli_commands/config.py +75 -0
luckyd_code/cli_commands/dispatcher.py +695 -0
luckyd_code/cli_commands/sessions.py +41 -0
luckyd_code/cli_entry.py +147 -0
luckyd_code/cli_utils.py +112 -0
luckyd_code/config.py +205 -0
luckyd_code/context.py +214 -0
luckyd_code/cost_tracker.py +209 -0
luckyd_code/error_reporter.py +508 -0
luckyd_code/exceptions.py +39 -0
luckyd_code/export.py +126 -0
luckyd_code/feedback_analyzer.py +290 -0
luckyd_code/file_watcher.py +258 -0
luckyd_code/git/__init__.py +11 -0
luckyd_code/git/auto_commit.py +157 -0
luckyd_code/git/tools.py +85 -0
luckyd_code/hooks.py +236 -0
luckyd_code/indexer.py +280 -0
luckyd_code/init.py +39 -0
luckyd_code/keybindings.py +77 -0
luckyd_code/log.py +55 -0
luckyd_code/mcp/__init__.py +6 -0
luckyd_code/mcp/client.py +184 -0
luckyd_code/memory/__init__.py +19 -0
luckyd_code/memory/manager.py +339 -0
luckyd_code/metrics/__init__.py +5 -0
luckyd_code/model_registry.py +131 -0
luckyd_code/orchestrator.py +204 -0
luckyd_code/permissions/__init__.py +1 -0
luckyd_code/permissions/manager.py +103 -0
luckyd_code/planner.py +361 -0
luckyd_code/plugins.py +91 -0
luckyd_code/py.typed +0 -0
luckyd_code/retry.py +57 -0
luckyd_code/router.py +417 -0
luckyd_code/sandbox.py +156 -0
luckyd_code/self_critique.py +2 -0
luckyd_code/self_improve.py +274 -0
luckyd_code/sessions.py +114 -0
luckyd_code/settings.py +72 -0
luckyd_code/skills/__init__.py +8 -0
luckyd_code/skills/review.py +22 -0
luckyd_code/skills/security.py +17 -0
luckyd_code/tasks/__init__.py +1 -0
luckyd_code/tasks/manager.py +102 -0
luckyd_code/templates/icon-192.png +0 -0
luckyd_code/templates/icon-512.png +0 -0
luckyd_code/templates/index.html +1965 -0
luckyd_code/templates/manifest.json +14 -0
luckyd_code/templates/src/app.js +694 -0
luckyd_code/templates/src/body.html +767 -0
luckyd_code/templates/src/cdn.txt +2 -0
luckyd_code/templates/src/style.css +474 -0
luckyd_code/templates/sw.js +31 -0
luckyd_code/templates/test.html +6 -0
luckyd_code/themes.py +48 -0
luckyd_code/tools/__init__.py +97 -0
luckyd_code/tools/agent_tools.py +65 -0
luckyd_code/tools/bash.py +360 -0
luckyd_code/tools/brain_tools.py +137 -0
luckyd_code/tools/browser.py +369 -0
luckyd_code/tools/datetime_tool.py +34 -0
luckyd_code/tools/dockerfile_gen.py +212 -0
luckyd_code/tools/file_ops.py +381 -0
luckyd_code/tools/game_gen.py +360 -0
luckyd_code/tools/git_tools.py +130 -0
luckyd_code/tools/git_worktree.py +63 -0
luckyd_code/tools/path_validate.py +64 -0
luckyd_code/tools/project_gen.py +187 -0
luckyd_code/tools/readme_gen.py +227 -0
luckyd_code/tools/registry.py +157 -0
luckyd_code/tools/shell_detect.py +109 -0
luckyd_code/tools/web.py +89 -0
luckyd_code/tools/youtube.py +187 -0
luckyd_code/tools_bridge.py +144 -0
luckyd_code/undo.py +126 -0
luckyd_code/update.py +60 -0
luckyd_code/verify.py +360 -0
luckyd_code/web_app.py +176 -0
luckyd_code/web_routes/__init__.py +23 -0
luckyd_code/web_routes/background.py +73 -0
luckyd_code/web_routes/brain.py +109 -0
luckyd_code/web_routes/cost.py +12 -0
luckyd_code/web_routes/files.py +133 -0
luckyd_code/web_routes/memories.py +94 -0
luckyd_code/web_routes/misc.py +67 -0
luckyd_code/web_routes/project.py +48 -0
luckyd_code/web_routes/review.py +20 -0
luckyd_code/web_routes/sessions.py +44 -0
luckyd_code/web_routes/settings.py +43 -0
luckyd_code/web_routes/static.py +70 -0
luckyd_code/web_routes/update.py +19 -0
luckyd_code/web_routes/ws.py +237 -0
luckyd_code-1.2.2.dist-info/METADATA +297 -0
luckyd_code-1.2.2.dist-info/RECORD +127 -0
luckyd_code-1.2.2.dist-info/WHEEL +4 -0
luckyd_code-1.2.2.dist-info/entry_points.txt +3 -0
luckyd_code-1.2.2.dist-info/licenses/LICENSE +21 -0

luckyd_code/router.py ADDED Viewed

@@ -0,0 +1,417 @@
+"""Auto-router — classify prompt complexity and pick the right model tier.
+Uses a 4-tier classification system:
+  Tier 1 — Ultra Fast / Cheap: simple chat, quick Q&A
+  Tier 2 — Balanced: general purpose coding and chat
+  Tier 3 — Reasoner: debugging, architecture, complex analysis
+  Tier 4 — Code-Specialized: large refactors, code generation, reviews
+The router escalates up tiers as task complexity increases.
+"""
+import hashlib
+import os as _os_router
+import re
+import atexit
+import threading
+from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
+from dataclasses import dataclass
+from typing import Optional
+from .model_registry import (
+    get_models_by_tier,
+    ALL_MODELS_FLAT,
+    format_model_list,
+    TIER_MODEL_MAP,
+)
+# DeepSeek tier → model mapping (single source of truth lives in model_registry)
+DEEPSEEK_TIER_MODELS: dict[int, str] = TIER_MODEL_MAP
+# DeepSeek model fallback order when a model is not found
+DEEPSEEK_FALLBACK_MODELS: list[str] = [m.id for m in ALL_MODELS_FLAT]
+# Prompts that trigger reasoner (ordered by strength)
+_REASONER_KEYWORDS = [
+    "debug this", "fix this bug", "why is this broken", "what's wrong with",
+    "optimize", "refactor", "redesign", "migrate",
+    "security vulnerability", "race condition", "memory leak",
+    "architecture decision", "design pattern", "trade-off",
+    "complex", "complicated", "difficult", "hard problem",
+    "review this code", "code review",
+]
+# Regex patterns catch paraphrased queries that keyword matches miss
+_REASONER_PATTERNS = [
+    r'\b(debug|broke|broken|crash|crashed|crashing)\b',
+    r'\bfix\s+(this|the|bug|issue|problem)\b',
+    r'\bwhy\s+(is|does|did|can\'t|won\'t|would)\b',
+    r'\b(not\s+working|doesn\'t\s+work|won\'t\s+run|fails?\s+to)\b',
+    r'\b(can\'t\s+figure|can\'t\s+understand)\b',
+]
+# Keywords that indicate heavy reasoning needed (tier 4)
+_HEAVY_KEYWORDS = [
+    "large refactor", "major redesign", "complex architecture",
+    "security audit", "performance optimization",
+    "migration plan", "full rewrite",
+]
+# Tool names that indicate the prompt is part of a complex workflow
+_COMPLEX_TOOLS = {"Write", "Edit", "GitCommit", "GitPush", "GitPR", "Bash"}
+# Thresholds
+LONG_PROMPT_CHARS = 300
+VERY_LONG_PROMPT_CHARS = 800
+TOOL_CALL_THRESHOLD = 2        # After N tool calls, escalate to tier 3
+HEAVY_TOOL_CALL_THRESHOLD = 8  # After N tool calls, escalate to tier 4
+# LLM classifier timeout — set to near-zero so we always use the fast
+# heuristic result immediately. The background thread still runs and caches
+# its result for future identical prompts, but the main thread never waits.
+_LLM_CLASSIFY_TIMEOUT = 0.01
+# Shared thread pool for background LLM classification calls (daemon so it
+# doesn't block process exit).
+_classify_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="router-llm")
+# Ensure the pool is cleanly shut down at process exit (non-blocking so the
+# main thread is never held waiting for in-flight classification calls).
+atexit.register(_classify_executor.shutdown, wait=False)
+def _file_size_tier(text: str) -> int:
+    """Check if the prompt references a local file; escalate tier based on line count.
+    Only files that resolve to a path *within the current working directory*
+    are opened — this prevents an adversarial user from tricking the router
+    into reading arbitrary paths (e.g. ``../../.env``) embedded in their prompt.
+    """
+    cwd = _os_router.path.realpath(_os_router.getcwd())
+    paths = re.findall(r'[\w./\\-]+\.\w{1,5}', text)
+    max_tier = 1
+    for p in paths:
+        try:
+            # Resolve the candidate path and confirm it stays inside cwd
+            resolved = _os_router.path.realpath(p)
+            if not resolved.startswith(cwd + _os_router.sep) and resolved != cwd:
+                continue  # path escapes the project root — skip
+            if _os_router.path.isfile(resolved):
+                with open(resolved, errors='ignore') as fh:
+                    lines = sum(1 for _ in fh)
+                if lines > 500:
+                    max_tier = max(max_tier, 4)
+                elif lines > 200:
+                    max_tier = max(max_tier, 3)
+                elif lines > 80:
+                    max_tier = max(max_tier, 2)
+        except OSError:
+            pass
+    return max_tier
+# In-process cache: prompt_hash → tier int.  Avoids a blocking API call on
+# repeated or similar prompts.  Capped at 512 entries to bound memory use.
+# Protected by _tier_cache_lock — the cache is written from a background
+# thread (the LLM classifier) and read from the main thread concurrently.
+_tier_cache: dict[str, int] = {}
+_tier_cache_lock = threading.Lock()
+_TIER_CACHE_MAX = 512
+def _llm_classify_worker(prompt_snippet: str, config) -> int:
+    """Blocking worker that calls the LLM to classify a prompt (runs in thread pool)."""
+    _CLASSIFY_PROMPT = (
+        "Rate this coding task 1-4:\n"
+        "1 = simple Q&A or single-line change\n"
+        "2 = general coding, explanation, or small feature\n"
+        "3 = debugging, architecture, complex analysis, or multi-file reasoning\n"
+        "4 = large refactor, full rewrite, security audit, or migration\n"
+        "Reply with ONLY the single digit, nothing else.\n"
+        f"Task: {prompt_snippet}"
+    )
+    from openai import OpenAI
+    import httpx
+    client = OpenAI(
+        api_key=config.api_key,
+        base_url=config.base_url,
+        http_client=httpx.Client(timeout=8),
+    )
+    resp = client.chat.completions.create(
+        model="deepseek-v4-flash",
+        messages=[{"role": "user", "content": _CLASSIFY_PROMPT}],
+        max_tokens=2,
+        temperature=0.0,
+    )
+    digit = (resp.choices[0].message.content or "").strip()
+    return max(1, min(4, int(digit)))
+def classify_tier_llm(user_text: str, config) -> int:
+    """Classify a prompt using the LLM, without blocking the caller.
+    Strategy:
+    1. Compute heuristic tier immediately (< 1ms).
+    2. Check the cache — if we've seen this prompt before, return cached result.
+    3. Submit the LLM call to a background thread pool.
+    4. Wait up to ``_LLM_CLASSIFY_TIMEOUT`` seconds for the result.
+    5. If it arrives in time, cache it and return it.
+    6. If it times out, return the heuristic result and let the thread keep
+       running — the result will be written to the cache for future identical
+       queries (zero extra cost on repeated prompts).
+    """
+    prompt_snippet = user_text[:600]
+    cache_key = hashlib.md5(prompt_snippet.encode("utf-8", errors="replace")).hexdigest()
+    # Cache hit — no API call needed (lock guards compound check+get)
+    with _tier_cache_lock:
+        if cache_key in _tier_cache:
+            return _tier_cache[cache_key]
+    # Compute heuristic immediately as the fallback
+    heuristic = classify_tier(user_text)
+    def _background_classify() -> int:
+        try:
+            result = _llm_classify_worker(prompt_snippet, config)
+        except Exception:
+            result = heuristic
+        # Always write to cache (even if we timed out below, future calls benefit)
+        with _tier_cache_lock:
+            if len(_tier_cache) >= _TIER_CACHE_MAX:
+                oldest = list(_tier_cache.keys())[:64]
+                for k in oldest:
+                    del _tier_cache[k]
+            _tier_cache[cache_key] = result
+        return result
+    future = _classify_executor.submit(_background_classify)
+    try:
+        return future.result(timeout=_LLM_CLASSIFY_TIMEOUT)
+    except (FutureTimeoutError, Exception):
+        # Timed out or errored — return heuristic, background thread caches result
+        return heuristic
+def classify_tier(user_text: str, recent_tool_count: int = 0) -> int:
+    """Classify a prompt into a model tier (1-4) using pure heuristics (no API call).
+    Returns:
+        1 = fast/cheap (simple chat)
+        2 = balanced (general purpose)
+        3 = reasoner (debugging, architecture)
+        4 = code-specialist (heavy refactoring)
+    """
+    text_lower = user_text.lower()
+    # File-size signal: referenced local files are a strong complexity indicator
+    file_tier = _file_size_tier(user_text)
+    # Heavy keywords → tier 4 (always checked, even for short prompts)
+    for kw in _HEAVY_KEYWORDS:
+        if kw in text_lower:
+            return 4
+    # Check for heavy reasoner keywords → tier 3 (always checked)
+    for kw in _REASONER_KEYWORDS:
+        if kw in text_lower:
+            return 3
+    # Regex fallback catches paraphrased queries keyword match misses
+    for pattern in _REASONER_PATTERNS:
+        if re.search(pattern, text_lower):
+            return 3
+    # Very short prompts: return file_tier unless keyword matched above
+    if len(user_text) < 20:
+        return file_tier
+    # Very long + code-heavy → tier 3
+    if len(user_text) > VERY_LONG_PROMPT_CHARS:
+        if "```" in user_text or re.search(r'\b(def|function|class|import|const)\b', text_lower):
+            return 3
+        return 2
+    # Code-heavy prompts (contains code blocks or file paths) → tier 2
+    code_indicators = 0
+    if "```" in user_text:
+        code_indicators += 1
+    if re.search(r'[\\/][\w.]+\.\w{1,4}', user_text):
+        code_indicators += 1
+    if re.search(r'\b(function|class|def|import|const|let|var)\b', text_lower):
+        code_indicators += 1
+    if re.search(r'error|exception|fail|crash|stack.trace', text_lower):
+        code_indicators += 1
+    if code_indicators >= 3:
+        return 3
+    if code_indicators >= 1:
+        return 2
+    # Long prompts with details → tier 2
+    if len(user_text) > LONG_PROMPT_CHARS:
+        return max(2, file_tier)
+    # Default: tier 1 for simple chat, but respect file-size floor
+    return max(1, file_tier)
+def select_model(user_text: str, recent_tool_count: int = 0,
+                 preferred_model: Optional[str] = None,
+                 tier_override: Optional[int] = None) -> str:
+    """Select the best model based on task complexity and tool usage."""
+    if tier_override is not None:
+        tier = tier_override
+    else:
+        base_tier = classify_tier(user_text, recent_tool_count)
+        if recent_tool_count >= HEAVY_TOOL_CALL_THRESHOLD:
+            tier = min(base_tier + 2, 4)
+        elif recent_tool_count >= TOOL_CALL_THRESHOLD:
+            tier = min(base_tier + 1, 4)
+        else:
+            tier = base_tier
+    tier_models = get_models_by_tier(tier)
+    if not tier_models:
+        return preferred_model or ALL_MODELS_FLAT[0].id
+    if preferred_model:
+        for m in tier_models:
+            if m.id == preferred_model:
+                return m.id
+        return preferred_model
+    return tier_models[0].id
+def should_use_reasoner(user_text: str, recent_tool_count: int = 0,
+                        auto_route_enabled: bool = True) -> bool:
+    """Returns True if tier 3+ model should be used."""
+    if not auto_route_enabled:
+        return False
+    tier = classify_tier(user_text, recent_tool_count)
+    effective_tier = tier
+    if recent_tool_count >= HEAVY_TOOL_CALL_THRESHOLD:
+        effective_tier = min(tier + 2, 4)
+    elif recent_tool_count >= TOOL_CALL_THRESHOLD:
+        effective_tier = min(tier + 1, 4)
+    return effective_tier >= 3
+def get_tier_description(tier: int) -> str:
+    """Get a human-readable description of a tier."""
+    descriptions = {
+        1: "Fast/Cheap (simple chat, quick queries)",
+        2: "Balanced (general purpose coding & chat)",
+        3: "Reasoner (debugging, architecture, complex analysis)",
+        4: "Code-Specialist (large refactors, code generation)",
+    }
+    return descriptions.get(tier, f"Tier {tier}")
+def show_model_info() -> str:
+    """Return a formatted string of all available models and tiers."""
+    return format_model_list()
+def show_current_routing(user_text: str, recent_tool_count: int = 0,
+                         preferred_model: Optional[str] = None) -> str:
+    """Show the routing decision for a given input."""
+    tier = classify_tier(user_text, recent_tool_count)
+    if recent_tool_count >= HEAVY_TOOL_CALL_THRESHOLD:
+        effective_tier = min(tier + 2, 4)
+    elif recent_tool_count >= TOOL_CALL_THRESHOLD:
+        effective_tier = min(tier + 1, 4)
+    else:
+        effective_tier = tier
+    model_id = select_model(user_text, recent_tool_count, preferred_model)
+    return (
+        f"Classification: Tier {tier} → Effective Tier {effective_tier}\n"
+        f"Selected Model: {model_id}\n"
+        f"Description: {get_tier_description(effective_tier)}\n"
+        f"Tool Calls: {recent_tool_count}"
+    )
+# ------------------------------------------------------------------ #
+#  Shared routing helpers (used by both CLI and Web UI)
+# ------------------------------------------------------------------ #
+@dataclass
+class RoutingResult:
+    """Result of a model routing decision."""
+    model: str
+    tier: int
+    tier_description: str
+    tier_changed: bool = False
+def resolve_initial_route(
+    user_text: str,
+    tool_call_count: int,
+    provider: str,
+    preferred_model: str,
+    auto_route: bool = True,
+    config=None,
+) -> RoutingResult:
+    """Determine the initial model tier for a user message.
+    When *config* is provided the LLM-based classifier is used for higher
+    accuracy (with a short timeout so it never blocks the response).
+    Falls back to the heuristic ``classify_tier()`` on timeout or error.
+    """
+    if not auto_route:
+        return RoutingResult(model=preferred_model, tier=2,
+                             tier_description=get_tier_description(2))
+    if config is not None:
+        base_tier = classify_tier_llm(user_text, config)
+    else:
+        base_tier = classify_tier(user_text, tool_call_count)
+    new_model = DEEPSEEK_TIER_MODELS.get(base_tier, "deepseek-v4-flash")
+    tier_changed = new_model != preferred_model
+    return RoutingResult(
+        model=new_model,
+        tier=base_tier,
+        tier_description=get_tier_description(base_tier),
+        tier_changed=tier_changed,
+    )
+def escalate_tier(
+    user_text: str,
+    tool_call_count: int,
+    provider: str,
+    preferred_model: str,
+    current_model: str,
+    current_tier: int,
+    auto_route: bool = True,
+) -> RoutingResult:
+    """Re-evaluate and possibly escalate the model tier mid-conversation."""
+    if not auto_route:
+        return RoutingResult(model=current_model, tier=current_tier,
+                             tier_description=get_tier_description(current_tier))
+    base_tier = classify_tier(user_text, tool_call_count)
+    if tool_call_count >= HEAVY_TOOL_CALL_THRESHOLD:
+        effective_tier = 4
+    elif tool_call_count >= TOOL_CALL_THRESHOLD:
+        effective_tier = min(base_tier + 1, 4)
+    else:
+        effective_tier = base_tier
+    new_model = DEEPSEEK_TIER_MODELS.get(effective_tier, "deepseek-v4-flash")
+    tier_changed = new_model != current_model
+    return RoutingResult(
+        model=new_model,
+        tier=effective_tier,
+        tier_description=get_tier_description(effective_tier),
+        tier_changed=tier_changed,
+    )

luckyd_code/sandbox.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""Docker sandbox for secure command execution."""
+import os
+import subprocess
+import threading
+SANDBOX_IMAGE = "python:3.10-slim"
+SANDBOX_MEM_LIMIT = "512m"
+SANDBOX_CPU_LIMIT = "1.0"
+def check_docker() -> tuple[bool, str]:
+    """Check if Docker is available. Returns (available, version_string)."""
+    try:
+        result = subprocess.run(
+            ["docker", "--version"],
+            capture_output=True, text=True, timeout=10,
+        )
+        if result.returncode == 0:
+            version = result.stdout.strip()
+            return True, version
+        return False, "Docker not available"
+    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
+        return False, "Docker not found"
+class Sandbox:
+    """Run commands in a Docker container for isolation."""
+    def __init__(self, image: str = SANDBOX_IMAGE):
+        self.image = image
+        self.available = False
+        self._check()
+    def _check(self):
+        available, _ = check_docker()
+        self.available = available
+    def run(self, command: str, cwd: str | None = None, timeout: int = 120) -> tuple[str, str, int]:
+        """Run a command in a sandboxed Docker container.
+        Returns:
+            (stdout, stderr, returncode)
+        """
+        if not self.available:
+            # Fallback to direct execution
+            return self._run_direct(command, timeout)
+        return self._run_docker(command, cwd, timeout)
+    def _run_docker(self, command: str, cwd: str | None, timeout: int) -> tuple[str, str, int]:
+        """Run command inside a Docker container."""
+        work_dir = cwd or os.getcwd()
+        # Escape the command for passing to docker
+        docker_cmd = [
+            "docker", "run", "--rm",
+            "--network", "none",
+            "--memory", SANDBOX_MEM_LIMIT,
+            "--cpus", SANDBOX_CPU_LIMIT,
+            "--read-only",
+            "-v", f"{work_dir}:/workspace",
+            "-w", "/workspace",
+            "--tmpfs", "/tmp:rw,noexec,nosuid,size=64m",
+            self.image,
+            "sh", "-c", command,
+        ]
+        try:
+            result = subprocess.run(
+                docker_cmd,
+                capture_output=True, text=True,
+                timeout=timeout,
+            )
+            return result.stdout, result.stderr, result.returncode
+        except subprocess.TimeoutExpired:
+            return "", f"Sandbox: command timed out after {timeout}s", -1
+        except OSError as e:
+            return "", f"Sandbox error: {e}", -1
+    def _run_direct(self, command: str, timeout: int) -> tuple[str, str, int]:
+        """Fallback: run directly without sandbox."""
+        try:
+            result = subprocess.run(
+                command, shell=True,
+                capture_output=True, text=True,
+                timeout=timeout,
+            )
+            return result.stdout, result.stderr, result.returncode
+        except subprocess.TimeoutExpired:
+            return "", f"Command timed out after {timeout}s", -1
+        except Exception as e:
+            return "", f"Error: {e}", -1
+    def pull_image(self) -> str:
+        """Pull the sandbox Docker image. Returns status message."""
+        if not self.available:
+            return "Docker not available"
+        try:
+            result = subprocess.run(
+                ["docker", "pull", self.image],
+                capture_output=True, text=True, timeout=120,
+            )
+            if result.returncode == 0:
+                return f"Pulled {self.image}"
+            return f"Failed to pull image: {result.stderr.strip()[:200]}"
+        except subprocess.TimeoutExpired:
+            return "Pull timed out"
+        except Exception as e:
+            return f"Error: {e}"
+    def ensure_image(self) -> bool:
+        """Ensure the sandbox image is available. Returns True if ready."""
+        if not self.available:
+            return False
+        try:
+            result = subprocess.run(
+                ["docker", "image", "inspect", self.image],
+                capture_output=True, text=True, timeout=10,
+            )
+            if result.returncode == 0:
+                return True
+            # Image not found, try to pull
+            self.pull_image()
+            result = subprocess.run(
+                ["docker", "image", "inspect", self.image],
+                capture_output=True, text=True, timeout=10,
+            )
+            return result.returncode == 0
+        except Exception:
+            return False
+# Global singleton — lock guards against simultaneous creation from multiple
+# threads (possible in the Web UI where requests run concurrently).
+_sandbox: Sandbox | None = None
+_sandbox_lock = threading.Lock()
+def get_sandbox() -> Sandbox:
+    """Get or create the global sandbox instance (thread-safe)."""
+    global _sandbox
+    if _sandbox is None:
+        with _sandbox_lock:
+            if _sandbox is None:  # double-checked locking
+                instance = Sandbox()
+                if instance.available:
+                    instance.ensure_image()
+                _sandbox = instance
+    return _sandbox
+def is_sandbox_available() -> bool:
+    """Check if Docker sandbox is available."""
+    return get_sandbox().available

luckyd_code/self_critique.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # Removed — self-critique was circular LLM fluff (same model reviewing itself).
2	+ # Replaced by the existing verify.py pipeline + static analysis in verify.py.