luckyd-code 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. luckyd_code/__init__.py +54 -0
  2. luckyd_code/__main__.py +5 -0
  3. luckyd_code/_agent_loop.py +551 -0
  4. luckyd_code/_data_dir.py +73 -0
  5. luckyd_code/agent.py +38 -0
  6. luckyd_code/analytics/__init__.py +18 -0
  7. luckyd_code/analytics/reporter.py +195 -0
  8. luckyd_code/analytics/scanner.py +443 -0
  9. luckyd_code/analytics/smells.py +316 -0
  10. luckyd_code/analytics/trends.py +303 -0
  11. luckyd_code/api.py +473 -0
  12. luckyd_code/audit_daemon.py +845 -0
  13. luckyd_code/autonomous_fixer.py +473 -0
  14. luckyd_code/background.py +159 -0
  15. luckyd_code/backup.py +237 -0
  16. luckyd_code/brain/__init__.py +84 -0
  17. luckyd_code/brain/assembler.py +100 -0
  18. luckyd_code/brain/chunker.py +345 -0
  19. luckyd_code/brain/constants.py +73 -0
  20. luckyd_code/brain/embedder.py +163 -0
  21. luckyd_code/brain/graph.py +311 -0
  22. luckyd_code/brain/indexer.py +316 -0
  23. luckyd_code/brain/parser.py +140 -0
  24. luckyd_code/brain/retriever.py +234 -0
  25. luckyd_code/cli.py +894 -0
  26. luckyd_code/cli_commands/__init__.py +1 -0
  27. luckyd_code/cli_commands/audit.py +120 -0
  28. luckyd_code/cli_commands/background.py +83 -0
  29. luckyd_code/cli_commands/brain.py +87 -0
  30. luckyd_code/cli_commands/config.py +75 -0
  31. luckyd_code/cli_commands/dispatcher.py +695 -0
  32. luckyd_code/cli_commands/sessions.py +41 -0
  33. luckyd_code/cli_entry.py +147 -0
  34. luckyd_code/cli_utils.py +112 -0
  35. luckyd_code/config.py +205 -0
  36. luckyd_code/context.py +214 -0
  37. luckyd_code/cost_tracker.py +209 -0
  38. luckyd_code/error_reporter.py +508 -0
  39. luckyd_code/exceptions.py +39 -0
  40. luckyd_code/export.py +126 -0
  41. luckyd_code/feedback_analyzer.py +290 -0
  42. luckyd_code/file_watcher.py +258 -0
  43. luckyd_code/git/__init__.py +11 -0
  44. luckyd_code/git/auto_commit.py +157 -0
  45. luckyd_code/git/tools.py +85 -0
  46. luckyd_code/hooks.py +236 -0
  47. luckyd_code/indexer.py +280 -0
  48. luckyd_code/init.py +39 -0
  49. luckyd_code/keybindings.py +77 -0
  50. luckyd_code/log.py +55 -0
  51. luckyd_code/mcp/__init__.py +6 -0
  52. luckyd_code/mcp/client.py +184 -0
  53. luckyd_code/memory/__init__.py +19 -0
  54. luckyd_code/memory/manager.py +339 -0
  55. luckyd_code/metrics/__init__.py +5 -0
  56. luckyd_code/model_registry.py +131 -0
  57. luckyd_code/orchestrator.py +204 -0
  58. luckyd_code/permissions/__init__.py +1 -0
  59. luckyd_code/permissions/manager.py +103 -0
  60. luckyd_code/planner.py +361 -0
  61. luckyd_code/plugins.py +91 -0
  62. luckyd_code/py.typed +0 -0
  63. luckyd_code/retry.py +57 -0
  64. luckyd_code/router.py +417 -0
  65. luckyd_code/sandbox.py +156 -0
  66. luckyd_code/self_critique.py +2 -0
  67. luckyd_code/self_improve.py +274 -0
  68. luckyd_code/sessions.py +114 -0
  69. luckyd_code/settings.py +72 -0
  70. luckyd_code/skills/__init__.py +8 -0
  71. luckyd_code/skills/review.py +22 -0
  72. luckyd_code/skills/security.py +17 -0
  73. luckyd_code/tasks/__init__.py +1 -0
  74. luckyd_code/tasks/manager.py +102 -0
  75. luckyd_code/templates/icon-192.png +0 -0
  76. luckyd_code/templates/icon-512.png +0 -0
  77. luckyd_code/templates/index.html +1965 -0
  78. luckyd_code/templates/manifest.json +14 -0
  79. luckyd_code/templates/src/app.js +694 -0
  80. luckyd_code/templates/src/body.html +767 -0
  81. luckyd_code/templates/src/cdn.txt +2 -0
  82. luckyd_code/templates/src/style.css +474 -0
  83. luckyd_code/templates/sw.js +31 -0
  84. luckyd_code/templates/test.html +6 -0
  85. luckyd_code/themes.py +48 -0
  86. luckyd_code/tools/__init__.py +97 -0
  87. luckyd_code/tools/agent_tools.py +65 -0
  88. luckyd_code/tools/bash.py +360 -0
  89. luckyd_code/tools/brain_tools.py +137 -0
  90. luckyd_code/tools/browser.py +369 -0
  91. luckyd_code/tools/datetime_tool.py +34 -0
  92. luckyd_code/tools/dockerfile_gen.py +212 -0
  93. luckyd_code/tools/file_ops.py +381 -0
  94. luckyd_code/tools/game_gen.py +360 -0
  95. luckyd_code/tools/git_tools.py +130 -0
  96. luckyd_code/tools/git_worktree.py +63 -0
  97. luckyd_code/tools/path_validate.py +64 -0
  98. luckyd_code/tools/project_gen.py +187 -0
  99. luckyd_code/tools/readme_gen.py +227 -0
  100. luckyd_code/tools/registry.py +157 -0
  101. luckyd_code/tools/shell_detect.py +109 -0
  102. luckyd_code/tools/web.py +89 -0
  103. luckyd_code/tools/youtube.py +187 -0
  104. luckyd_code/tools_bridge.py +144 -0
  105. luckyd_code/undo.py +126 -0
  106. luckyd_code/update.py +60 -0
  107. luckyd_code/verify.py +360 -0
  108. luckyd_code/web_app.py +176 -0
  109. luckyd_code/web_routes/__init__.py +23 -0
  110. luckyd_code/web_routes/background.py +73 -0
  111. luckyd_code/web_routes/brain.py +109 -0
  112. luckyd_code/web_routes/cost.py +12 -0
  113. luckyd_code/web_routes/files.py +133 -0
  114. luckyd_code/web_routes/memories.py +94 -0
  115. luckyd_code/web_routes/misc.py +67 -0
  116. luckyd_code/web_routes/project.py +48 -0
  117. luckyd_code/web_routes/review.py +20 -0
  118. luckyd_code/web_routes/sessions.py +44 -0
  119. luckyd_code/web_routes/settings.py +43 -0
  120. luckyd_code/web_routes/static.py +70 -0
  121. luckyd_code/web_routes/update.py +19 -0
  122. luckyd_code/web_routes/ws.py +237 -0
  123. luckyd_code-1.2.2.dist-info/METADATA +297 -0
  124. luckyd_code-1.2.2.dist-info/RECORD +127 -0
  125. luckyd_code-1.2.2.dist-info/WHEEL +4 -0
  126. luckyd_code-1.2.2.dist-info/entry_points.txt +3 -0
  127. luckyd_code-1.2.2.dist-info/licenses/LICENSE +21 -0
luckyd_code/router.py ADDED
@@ -0,0 +1,417 @@
1
+ """Auto-router — classify prompt complexity and pick the right model tier.
2
+
3
+ Uses a 4-tier classification system:
4
+ Tier 1 — Ultra Fast / Cheap: simple chat, quick Q&A
5
+ Tier 2 — Balanced: general purpose coding and chat
6
+ Tier 3 — Reasoner: debugging, architecture, complex analysis
7
+ Tier 4 — Code-Specialized: large refactors, code generation, reviews
8
+
9
+ The router escalates up tiers as task complexity increases.
10
+ """
11
+
12
+ import hashlib
13
+ import os as _os_router
14
+ import re
15
+ import atexit
16
+ import threading
17
+ from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
18
+ from dataclasses import dataclass
19
+ from typing import Optional
20
+
21
+ from .model_registry import (
22
+ get_models_by_tier,
23
+ ALL_MODELS_FLAT,
24
+ format_model_list,
25
+ TIER_MODEL_MAP,
26
+ )
27
+
28
+ # DeepSeek tier → model mapping (single source of truth lives in model_registry)
29
+ DEEPSEEK_TIER_MODELS: dict[int, str] = TIER_MODEL_MAP
30
+
31
+ # DeepSeek model fallback order when a model is not found
32
+ DEEPSEEK_FALLBACK_MODELS: list[str] = [m.id for m in ALL_MODELS_FLAT]
33
+
34
+ # Prompts that trigger reasoner (ordered by strength)
35
+ _REASONER_KEYWORDS = [
36
+ "debug this", "fix this bug", "why is this broken", "what's wrong with",
37
+ "optimize", "refactor", "redesign", "migrate",
38
+ "security vulnerability", "race condition", "memory leak",
39
+ "architecture decision", "design pattern", "trade-off",
40
+ "complex", "complicated", "difficult", "hard problem",
41
+ "review this code", "code review",
42
+ ]
43
+
44
+ # Regex patterns catch paraphrased queries that keyword matches miss
45
+ _REASONER_PATTERNS = [
46
+ r'\b(debug|broke|broken|crash|crashed|crashing)\b',
47
+ r'\bfix\s+(this|the|bug|issue|problem)\b',
48
+ r'\bwhy\s+(is|does|did|can\'t|won\'t|would)\b',
49
+ r'\b(not\s+working|doesn\'t\s+work|won\'t\s+run|fails?\s+to)\b',
50
+ r'\b(can\'t\s+figure|can\'t\s+understand)\b',
51
+ ]
52
+
53
+ # Keywords that indicate heavy reasoning needed (tier 4)
54
+ _HEAVY_KEYWORDS = [
55
+ "large refactor", "major redesign", "complex architecture",
56
+ "security audit", "performance optimization",
57
+ "migration plan", "full rewrite",
58
+ ]
59
+
60
+ # Tool names that indicate the prompt is part of a complex workflow
61
+ _COMPLEX_TOOLS = {"Write", "Edit", "GitCommit", "GitPush", "GitPR", "Bash"}
62
+
63
+ # Thresholds
64
+ LONG_PROMPT_CHARS = 300
65
+ VERY_LONG_PROMPT_CHARS = 800
66
+ TOOL_CALL_THRESHOLD = 2 # After N tool calls, escalate to tier 3
67
+ HEAVY_TOOL_CALL_THRESHOLD = 8 # After N tool calls, escalate to tier 4
68
+
69
+ # LLM classifier timeout — set to near-zero so we always use the fast
70
+ # heuristic result immediately. The background thread still runs and caches
71
+ # its result for future identical prompts, but the main thread never waits.
72
+ _LLM_CLASSIFY_TIMEOUT = 0.01
73
+
74
+ # Shared thread pool for background LLM classification calls (daemon so it
75
+ # doesn't block process exit).
76
+ _classify_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="router-llm")
77
+ # Ensure the pool is cleanly shut down at process exit (non-blocking so the
78
+ # main thread is never held waiting for in-flight classification calls).
79
+ atexit.register(_classify_executor.shutdown, wait=False)
80
+
81
+
82
+ def _file_size_tier(text: str) -> int:
83
+ """Check if the prompt references a local file; escalate tier based on line count.
84
+
85
+ Only files that resolve to a path *within the current working directory*
86
+ are opened — this prevents an adversarial user from tricking the router
87
+ into reading arbitrary paths (e.g. ``../../.env``) embedded in their prompt.
88
+ """
89
+ cwd = _os_router.path.realpath(_os_router.getcwd())
90
+ paths = re.findall(r'[\w./\\-]+\.\w{1,5}', text)
91
+ max_tier = 1
92
+ for p in paths:
93
+ try:
94
+ # Resolve the candidate path and confirm it stays inside cwd
95
+ resolved = _os_router.path.realpath(p)
96
+ if not resolved.startswith(cwd + _os_router.sep) and resolved != cwd:
97
+ continue # path escapes the project root — skip
98
+ if _os_router.path.isfile(resolved):
99
+ with open(resolved, errors='ignore') as fh:
100
+ lines = sum(1 for _ in fh)
101
+ if lines > 500:
102
+ max_tier = max(max_tier, 4)
103
+ elif lines > 200:
104
+ max_tier = max(max_tier, 3)
105
+ elif lines > 80:
106
+ max_tier = max(max_tier, 2)
107
+ except OSError:
108
+ pass
109
+ return max_tier
110
+
111
+
112
+ # In-process cache: prompt_hash → tier int. Avoids a blocking API call on
113
+ # repeated or similar prompts. Capped at 512 entries to bound memory use.
114
+ # Protected by _tier_cache_lock — the cache is written from a background
115
+ # thread (the LLM classifier) and read from the main thread concurrently.
116
+ _tier_cache: dict[str, int] = {}
117
+ _tier_cache_lock = threading.Lock()
118
+ _TIER_CACHE_MAX = 512
119
+
120
+
121
+ def _llm_classify_worker(prompt_snippet: str, config) -> int:
122
+ """Blocking worker that calls the LLM to classify a prompt (runs in thread pool)."""
123
+ _CLASSIFY_PROMPT = (
124
+ "Rate this coding task 1-4:\n"
125
+ "1 = simple Q&A or single-line change\n"
126
+ "2 = general coding, explanation, or small feature\n"
127
+ "3 = debugging, architecture, complex analysis, or multi-file reasoning\n"
128
+ "4 = large refactor, full rewrite, security audit, or migration\n"
129
+ "Reply with ONLY the single digit, nothing else.\n"
130
+ f"Task: {prompt_snippet}"
131
+ )
132
+ from openai import OpenAI
133
+ import httpx
134
+ client = OpenAI(
135
+ api_key=config.api_key,
136
+ base_url=config.base_url,
137
+ http_client=httpx.Client(timeout=8),
138
+ )
139
+ resp = client.chat.completions.create(
140
+ model="deepseek-v4-flash",
141
+ messages=[{"role": "user", "content": _CLASSIFY_PROMPT}],
142
+ max_tokens=2,
143
+ temperature=0.0,
144
+ )
145
+ digit = (resp.choices[0].message.content or "").strip()
146
+ return max(1, min(4, int(digit)))
147
+
148
+
149
+ def classify_tier_llm(user_text: str, config) -> int:
150
+ """Classify a prompt using the LLM, without blocking the caller.
151
+
152
+ Strategy:
153
+ 1. Compute heuristic tier immediately (< 1ms).
154
+ 2. Check the cache — if we've seen this prompt before, return cached result.
155
+ 3. Submit the LLM call to a background thread pool.
156
+ 4. Wait up to ``_LLM_CLASSIFY_TIMEOUT`` seconds for the result.
157
+ 5. If it arrives in time, cache it and return it.
158
+ 6. If it times out, return the heuristic result and let the thread keep
159
+ running — the result will be written to the cache for future identical
160
+ queries (zero extra cost on repeated prompts).
161
+ """
162
+ prompt_snippet = user_text[:600]
163
+ cache_key = hashlib.md5(prompt_snippet.encode("utf-8", errors="replace")).hexdigest()
164
+
165
+ # Cache hit — no API call needed (lock guards compound check+get)
166
+ with _tier_cache_lock:
167
+ if cache_key in _tier_cache:
168
+ return _tier_cache[cache_key]
169
+
170
+ # Compute heuristic immediately as the fallback
171
+ heuristic = classify_tier(user_text)
172
+
173
+ def _background_classify() -> int:
174
+ try:
175
+ result = _llm_classify_worker(prompt_snippet, config)
176
+ except Exception:
177
+ result = heuristic
178
+ # Always write to cache (even if we timed out below, future calls benefit)
179
+ with _tier_cache_lock:
180
+ if len(_tier_cache) >= _TIER_CACHE_MAX:
181
+ oldest = list(_tier_cache.keys())[:64]
182
+ for k in oldest:
183
+ del _tier_cache[k]
184
+ _tier_cache[cache_key] = result
185
+ return result
186
+
187
+ future = _classify_executor.submit(_background_classify)
188
+ try:
189
+ return future.result(timeout=_LLM_CLASSIFY_TIMEOUT)
190
+ except (FutureTimeoutError, Exception):
191
+ # Timed out or errored — return heuristic, background thread caches result
192
+ return heuristic
193
+
194
+
195
+ def classify_tier(user_text: str, recent_tool_count: int = 0) -> int:
196
+ """Classify a prompt into a model tier (1-4) using pure heuristics (no API call).
197
+
198
+ Returns:
199
+ 1 = fast/cheap (simple chat)
200
+ 2 = balanced (general purpose)
201
+ 3 = reasoner (debugging, architecture)
202
+ 4 = code-specialist (heavy refactoring)
203
+ """
204
+ text_lower = user_text.lower()
205
+
206
+ # File-size signal: referenced local files are a strong complexity indicator
207
+ file_tier = _file_size_tier(user_text)
208
+
209
+ # Heavy keywords → tier 4 (always checked, even for short prompts)
210
+ for kw in _HEAVY_KEYWORDS:
211
+ if kw in text_lower:
212
+ return 4
213
+
214
+ # Check for heavy reasoner keywords → tier 3 (always checked)
215
+ for kw in _REASONER_KEYWORDS:
216
+ if kw in text_lower:
217
+ return 3
218
+
219
+ # Regex fallback catches paraphrased queries keyword match misses
220
+ for pattern in _REASONER_PATTERNS:
221
+ if re.search(pattern, text_lower):
222
+ return 3
223
+
224
+ # Very short prompts: return file_tier unless keyword matched above
225
+ if len(user_text) < 20:
226
+ return file_tier
227
+
228
+ # Very long + code-heavy → tier 3
229
+ if len(user_text) > VERY_LONG_PROMPT_CHARS:
230
+ if "```" in user_text or re.search(r'\b(def|function|class|import|const)\b', text_lower):
231
+ return 3
232
+ return 2
233
+
234
+ # Code-heavy prompts (contains code blocks or file paths) → tier 2
235
+ code_indicators = 0
236
+ if "```" in user_text:
237
+ code_indicators += 1
238
+ if re.search(r'[\\/][\w.]+\.\w{1,4}', user_text):
239
+ code_indicators += 1
240
+ if re.search(r'\b(function|class|def|import|const|let|var)\b', text_lower):
241
+ code_indicators += 1
242
+ if re.search(r'error|exception|fail|crash|stack.trace', text_lower):
243
+ code_indicators += 1
244
+
245
+ if code_indicators >= 3:
246
+ return 3
247
+ if code_indicators >= 1:
248
+ return 2
249
+
250
+ # Long prompts with details → tier 2
251
+ if len(user_text) > LONG_PROMPT_CHARS:
252
+ return max(2, file_tier)
253
+
254
+ # Default: tier 1 for simple chat, but respect file-size floor
255
+ return max(1, file_tier)
256
+
257
+
258
+ def select_model(user_text: str, recent_tool_count: int = 0,
259
+ preferred_model: Optional[str] = None,
260
+ tier_override: Optional[int] = None) -> str:
261
+ """Select the best model based on task complexity and tool usage."""
262
+ if tier_override is not None:
263
+ tier = tier_override
264
+ else:
265
+ base_tier = classify_tier(user_text, recent_tool_count)
266
+ if recent_tool_count >= HEAVY_TOOL_CALL_THRESHOLD:
267
+ tier = min(base_tier + 2, 4)
268
+ elif recent_tool_count >= TOOL_CALL_THRESHOLD:
269
+ tier = min(base_tier + 1, 4)
270
+ else:
271
+ tier = base_tier
272
+
273
+ tier_models = get_models_by_tier(tier)
274
+
275
+ if not tier_models:
276
+ return preferred_model or ALL_MODELS_FLAT[0].id
277
+
278
+ if preferred_model:
279
+ for m in tier_models:
280
+ if m.id == preferred_model:
281
+ return m.id
282
+ return preferred_model
283
+
284
+ return tier_models[0].id
285
+
286
+
287
+ def should_use_reasoner(user_text: str, recent_tool_count: int = 0,
288
+ auto_route_enabled: bool = True) -> bool:
289
+ """Returns True if tier 3+ model should be used."""
290
+ if not auto_route_enabled:
291
+ return False
292
+ tier = classify_tier(user_text, recent_tool_count)
293
+ effective_tier = tier
294
+ if recent_tool_count >= HEAVY_TOOL_CALL_THRESHOLD:
295
+ effective_tier = min(tier + 2, 4)
296
+ elif recent_tool_count >= TOOL_CALL_THRESHOLD:
297
+ effective_tier = min(tier + 1, 4)
298
+ return effective_tier >= 3
299
+
300
+
301
+ def get_tier_description(tier: int) -> str:
302
+ """Get a human-readable description of a tier."""
303
+ descriptions = {
304
+ 1: "Fast/Cheap (simple chat, quick queries)",
305
+ 2: "Balanced (general purpose coding & chat)",
306
+ 3: "Reasoner (debugging, architecture, complex analysis)",
307
+ 4: "Code-Specialist (large refactors, code generation)",
308
+ }
309
+ return descriptions.get(tier, f"Tier {tier}")
310
+
311
+
312
+ def show_model_info() -> str:
313
+ """Return a formatted string of all available models and tiers."""
314
+ return format_model_list()
315
+
316
+
317
+ def show_current_routing(user_text: str, recent_tool_count: int = 0,
318
+ preferred_model: Optional[str] = None) -> str:
319
+ """Show the routing decision for a given input."""
320
+ tier = classify_tier(user_text, recent_tool_count)
321
+
322
+ if recent_tool_count >= HEAVY_TOOL_CALL_THRESHOLD:
323
+ effective_tier = min(tier + 2, 4)
324
+ elif recent_tool_count >= TOOL_CALL_THRESHOLD:
325
+ effective_tier = min(tier + 1, 4)
326
+ else:
327
+ effective_tier = tier
328
+
329
+ model_id = select_model(user_text, recent_tool_count, preferred_model)
330
+
331
+ return (
332
+ f"Classification: Tier {tier} → Effective Tier {effective_tier}\n"
333
+ f"Selected Model: {model_id}\n"
334
+ f"Description: {get_tier_description(effective_tier)}\n"
335
+ f"Tool Calls: {recent_tool_count}"
336
+ )
337
+
338
+
339
+ # ------------------------------------------------------------------ #
340
+ # Shared routing helpers (used by both CLI and Web UI)
341
+ # ------------------------------------------------------------------ #
342
+
343
+ @dataclass
344
+ class RoutingResult:
345
+ """Result of a model routing decision."""
346
+ model: str
347
+ tier: int
348
+ tier_description: str
349
+ tier_changed: bool = False
350
+
351
+
352
+ def resolve_initial_route(
353
+ user_text: str,
354
+ tool_call_count: int,
355
+ provider: str,
356
+ preferred_model: str,
357
+ auto_route: bool = True,
358
+ config=None,
359
+ ) -> RoutingResult:
360
+ """Determine the initial model tier for a user message.
361
+
362
+ When *config* is provided the LLM-based classifier is used for higher
363
+ accuracy (with a short timeout so it never blocks the response).
364
+ Falls back to the heuristic ``classify_tier()`` on timeout or error.
365
+ """
366
+ if not auto_route:
367
+ return RoutingResult(model=preferred_model, tier=2,
368
+ tier_description=get_tier_description(2))
369
+
370
+ if config is not None:
371
+ base_tier = classify_tier_llm(user_text, config)
372
+ else:
373
+ base_tier = classify_tier(user_text, tool_call_count)
374
+
375
+ new_model = DEEPSEEK_TIER_MODELS.get(base_tier, "deepseek-v4-flash")
376
+
377
+ tier_changed = new_model != preferred_model
378
+ return RoutingResult(
379
+ model=new_model,
380
+ tier=base_tier,
381
+ tier_description=get_tier_description(base_tier),
382
+ tier_changed=tier_changed,
383
+ )
384
+
385
+
386
+ def escalate_tier(
387
+ user_text: str,
388
+ tool_call_count: int,
389
+ provider: str,
390
+ preferred_model: str,
391
+ current_model: str,
392
+ current_tier: int,
393
+ auto_route: bool = True,
394
+ ) -> RoutingResult:
395
+ """Re-evaluate and possibly escalate the model tier mid-conversation."""
396
+ if not auto_route:
397
+ return RoutingResult(model=current_model, tier=current_tier,
398
+ tier_description=get_tier_description(current_tier))
399
+
400
+ base_tier = classify_tier(user_text, tool_call_count)
401
+
402
+ if tool_call_count >= HEAVY_TOOL_CALL_THRESHOLD:
403
+ effective_tier = 4
404
+ elif tool_call_count >= TOOL_CALL_THRESHOLD:
405
+ effective_tier = min(base_tier + 1, 4)
406
+ else:
407
+ effective_tier = base_tier
408
+
409
+ new_model = DEEPSEEK_TIER_MODELS.get(effective_tier, "deepseek-v4-flash")
410
+
411
+ tier_changed = new_model != current_model
412
+ return RoutingResult(
413
+ model=new_model,
414
+ tier=effective_tier,
415
+ tier_description=get_tier_description(effective_tier),
416
+ tier_changed=tier_changed,
417
+ )
luckyd_code/sandbox.py ADDED
@@ -0,0 +1,156 @@
1
+ """Docker sandbox for secure command execution."""
2
+
3
+ import os
4
+ import subprocess
5
+ import threading
6
+
7
+
8
+ SANDBOX_IMAGE = "python:3.10-slim"
9
+ SANDBOX_MEM_LIMIT = "512m"
10
+ SANDBOX_CPU_LIMIT = "1.0"
11
+
12
+
13
+ def check_docker() -> tuple[bool, str]:
14
+ """Check if Docker is available. Returns (available, version_string)."""
15
+ try:
16
+ result = subprocess.run(
17
+ ["docker", "--version"],
18
+ capture_output=True, text=True, timeout=10,
19
+ )
20
+ if result.returncode == 0:
21
+ version = result.stdout.strip()
22
+ return True, version
23
+ return False, "Docker not available"
24
+ except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
25
+ return False, "Docker not found"
26
+
27
+
28
+ class Sandbox:
29
+ """Run commands in a Docker container for isolation."""
30
+
31
+ def __init__(self, image: str = SANDBOX_IMAGE):
32
+ self.image = image
33
+ self.available = False
34
+ self._check()
35
+
36
+ def _check(self):
37
+ available, _ = check_docker()
38
+ self.available = available
39
+
40
+ def run(self, command: str, cwd: str | None = None, timeout: int = 120) -> tuple[str, str, int]:
41
+ """Run a command in a sandboxed Docker container.
42
+
43
+ Returns:
44
+ (stdout, stderr, returncode)
45
+ """
46
+ if not self.available:
47
+ # Fallback to direct execution
48
+ return self._run_direct(command, timeout)
49
+
50
+ return self._run_docker(command, cwd, timeout)
51
+
52
+ def _run_docker(self, command: str, cwd: str | None, timeout: int) -> tuple[str, str, int]:
53
+ """Run command inside a Docker container."""
54
+ work_dir = cwd or os.getcwd()
55
+
56
+ # Escape the command for passing to docker
57
+ docker_cmd = [
58
+ "docker", "run", "--rm",
59
+ "--network", "none",
60
+ "--memory", SANDBOX_MEM_LIMIT,
61
+ "--cpus", SANDBOX_CPU_LIMIT,
62
+ "--read-only",
63
+ "-v", f"{work_dir}:/workspace",
64
+ "-w", "/workspace",
65
+ "--tmpfs", "/tmp:rw,noexec,nosuid,size=64m",
66
+ self.image,
67
+ "sh", "-c", command,
68
+ ]
69
+
70
+ try:
71
+ result = subprocess.run(
72
+ docker_cmd,
73
+ capture_output=True, text=True,
74
+ timeout=timeout,
75
+ )
76
+ return result.stdout, result.stderr, result.returncode
77
+ except subprocess.TimeoutExpired:
78
+ return "", f"Sandbox: command timed out after {timeout}s", -1
79
+ except OSError as e:
80
+ return "", f"Sandbox error: {e}", -1
81
+
82
+ def _run_direct(self, command: str, timeout: int) -> tuple[str, str, int]:
83
+ """Fallback: run directly without sandbox."""
84
+ try:
85
+ result = subprocess.run(
86
+ command, shell=True,
87
+ capture_output=True, text=True,
88
+ timeout=timeout,
89
+ )
90
+ return result.stdout, result.stderr, result.returncode
91
+ except subprocess.TimeoutExpired:
92
+ return "", f"Command timed out after {timeout}s", -1
93
+ except Exception as e:
94
+ return "", f"Error: {e}", -1
95
+
96
+ def pull_image(self) -> str:
97
+ """Pull the sandbox Docker image. Returns status message."""
98
+ if not self.available:
99
+ return "Docker not available"
100
+ try:
101
+ result = subprocess.run(
102
+ ["docker", "pull", self.image],
103
+ capture_output=True, text=True, timeout=120,
104
+ )
105
+ if result.returncode == 0:
106
+ return f"Pulled {self.image}"
107
+ return f"Failed to pull image: {result.stderr.strip()[:200]}"
108
+ except subprocess.TimeoutExpired:
109
+ return "Pull timed out"
110
+ except Exception as e:
111
+ return f"Error: {e}"
112
+
113
+ def ensure_image(self) -> bool:
114
+ """Ensure the sandbox image is available. Returns True if ready."""
115
+ if not self.available:
116
+ return False
117
+ try:
118
+ result = subprocess.run(
119
+ ["docker", "image", "inspect", self.image],
120
+ capture_output=True, text=True, timeout=10,
121
+ )
122
+ if result.returncode == 0:
123
+ return True
124
+ # Image not found, try to pull
125
+ self.pull_image()
126
+ result = subprocess.run(
127
+ ["docker", "image", "inspect", self.image],
128
+ capture_output=True, text=True, timeout=10,
129
+ )
130
+ return result.returncode == 0
131
+ except Exception:
132
+ return False
133
+
134
+
135
+ # Global singleton — lock guards against simultaneous creation from multiple
136
+ # threads (possible in the Web UI where requests run concurrently).
137
+ _sandbox: Sandbox | None = None
138
+ _sandbox_lock = threading.Lock()
139
+
140
+
141
+ def get_sandbox() -> Sandbox:
142
+ """Get or create the global sandbox instance (thread-safe)."""
143
+ global _sandbox
144
+ if _sandbox is None:
145
+ with _sandbox_lock:
146
+ if _sandbox is None: # double-checked locking
147
+ instance = Sandbox()
148
+ if instance.available:
149
+ instance.ensure_image()
150
+ _sandbox = instance
151
+ return _sandbox
152
+
153
+
154
+ def is_sandbox_available() -> bool:
155
+ """Check if Docker sandbox is available."""
156
+ return get_sandbox().available
@@ -0,0 +1,2 @@
1
+ # Removed — self-critique was circular LLM fluff (same model reviewing itself).
2
+ # Replaced by the existing verify.py pipeline + static analysis in verify.py.