llmcode-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. llm_code/__init__.py +2 -0
  2. llm_code/analysis/__init__.py +6 -0
  3. llm_code/analysis/cache.py +33 -0
  4. llm_code/analysis/engine.py +256 -0
  5. llm_code/analysis/go_rules.py +114 -0
  6. llm_code/analysis/js_rules.py +84 -0
  7. llm_code/analysis/python_rules.py +311 -0
  8. llm_code/analysis/rules.py +140 -0
  9. llm_code/analysis/rust_rules.py +108 -0
  10. llm_code/analysis/universal_rules.py +111 -0
  11. llm_code/api/__init__.py +0 -0
  12. llm_code/api/client.py +90 -0
  13. llm_code/api/errors.py +73 -0
  14. llm_code/api/openai_compat.py +390 -0
  15. llm_code/api/provider.py +35 -0
  16. llm_code/api/sse.py +52 -0
  17. llm_code/api/types.py +140 -0
  18. llm_code/cli/__init__.py +0 -0
  19. llm_code/cli/commands.py +70 -0
  20. llm_code/cli/image.py +122 -0
  21. llm_code/cli/render.py +214 -0
  22. llm_code/cli/status_line.py +79 -0
  23. llm_code/cli/streaming.py +92 -0
  24. llm_code/cli/tui_main.py +220 -0
  25. llm_code/computer_use/__init__.py +11 -0
  26. llm_code/computer_use/app_detect.py +49 -0
  27. llm_code/computer_use/app_tier.py +57 -0
  28. llm_code/computer_use/coordinator.py +99 -0
  29. llm_code/computer_use/input_control.py +71 -0
  30. llm_code/computer_use/screenshot.py +93 -0
  31. llm_code/cron/__init__.py +13 -0
  32. llm_code/cron/parser.py +145 -0
  33. llm_code/cron/scheduler.py +135 -0
  34. llm_code/cron/storage.py +126 -0
  35. llm_code/enterprise/__init__.py +1 -0
  36. llm_code/enterprise/audit.py +59 -0
  37. llm_code/enterprise/auth.py +26 -0
  38. llm_code/enterprise/oidc.py +95 -0
  39. llm_code/enterprise/rbac.py +65 -0
  40. llm_code/harness/__init__.py +5 -0
  41. llm_code/harness/config.py +33 -0
  42. llm_code/harness/engine.py +129 -0
  43. llm_code/harness/guides.py +41 -0
  44. llm_code/harness/sensors.py +68 -0
  45. llm_code/harness/templates.py +84 -0
  46. llm_code/hida/__init__.py +1 -0
  47. llm_code/hida/classifier.py +187 -0
  48. llm_code/hida/engine.py +49 -0
  49. llm_code/hida/profiles.py +95 -0
  50. llm_code/hida/types.py +28 -0
  51. llm_code/ide/__init__.py +1 -0
  52. llm_code/ide/bridge.py +80 -0
  53. llm_code/ide/detector.py +76 -0
  54. llm_code/ide/server.py +169 -0
  55. llm_code/logging.py +29 -0
  56. llm_code/lsp/__init__.py +0 -0
  57. llm_code/lsp/client.py +298 -0
  58. llm_code/lsp/detector.py +42 -0
  59. llm_code/lsp/manager.py +56 -0
  60. llm_code/lsp/tools.py +288 -0
  61. llm_code/marketplace/__init__.py +0 -0
  62. llm_code/marketplace/builtin_registry.py +102 -0
  63. llm_code/marketplace/installer.py +162 -0
  64. llm_code/marketplace/plugin.py +78 -0
  65. llm_code/marketplace/registry.py +360 -0
  66. llm_code/mcp/__init__.py +0 -0
  67. llm_code/mcp/bridge.py +87 -0
  68. llm_code/mcp/client.py +117 -0
  69. llm_code/mcp/health.py +120 -0
  70. llm_code/mcp/manager.py +214 -0
  71. llm_code/mcp/oauth.py +219 -0
  72. llm_code/mcp/transport.py +254 -0
  73. llm_code/mcp/types.py +53 -0
  74. llm_code/remote/__init__.py +0 -0
  75. llm_code/remote/client.py +136 -0
  76. llm_code/remote/protocol.py +22 -0
  77. llm_code/remote/server.py +275 -0
  78. llm_code/remote/ssh_proxy.py +56 -0
  79. llm_code/runtime/__init__.py +0 -0
  80. llm_code/runtime/auto_commit.py +56 -0
  81. llm_code/runtime/auto_diagnose.py +62 -0
  82. llm_code/runtime/checkpoint.py +70 -0
  83. llm_code/runtime/checkpoint_recovery.py +142 -0
  84. llm_code/runtime/compaction.py +35 -0
  85. llm_code/runtime/compressor.py +415 -0
  86. llm_code/runtime/config.py +533 -0
  87. llm_code/runtime/context.py +49 -0
  88. llm_code/runtime/conversation.py +921 -0
  89. llm_code/runtime/cost_tracker.py +126 -0
  90. llm_code/runtime/dream.py +127 -0
  91. llm_code/runtime/file_protection.py +150 -0
  92. llm_code/runtime/hardware.py +85 -0
  93. llm_code/runtime/hooks.py +223 -0
  94. llm_code/runtime/indexer.py +230 -0
  95. llm_code/runtime/knowledge_compiler.py +232 -0
  96. llm_code/runtime/memory.py +132 -0
  97. llm_code/runtime/memory_layers.py +467 -0
  98. llm_code/runtime/memory_lint.py +252 -0
  99. llm_code/runtime/model_aliases.py +37 -0
  100. llm_code/runtime/ollama.py +93 -0
  101. llm_code/runtime/overlay.py +124 -0
  102. llm_code/runtime/permissions.py +200 -0
  103. llm_code/runtime/plan.py +45 -0
  104. llm_code/runtime/prompt.py +238 -0
  105. llm_code/runtime/repo_map.py +174 -0
  106. llm_code/runtime/sandbox.py +116 -0
  107. llm_code/runtime/session.py +268 -0
  108. llm_code/runtime/skill_resolver.py +61 -0
  109. llm_code/runtime/skills.py +133 -0
  110. llm_code/runtime/speculative.py +75 -0
  111. llm_code/runtime/streaming_executor.py +216 -0
  112. llm_code/runtime/telemetry.py +196 -0
  113. llm_code/runtime/token_budget.py +26 -0
  114. llm_code/runtime/vcr.py +142 -0
  115. llm_code/runtime/vision.py +102 -0
  116. llm_code/swarm/__init__.py +1 -0
  117. llm_code/swarm/backend_subprocess.py +108 -0
  118. llm_code/swarm/backend_tmux.py +103 -0
  119. llm_code/swarm/backend_worktree.py +306 -0
  120. llm_code/swarm/checkpoint.py +74 -0
  121. llm_code/swarm/coordinator.py +236 -0
  122. llm_code/swarm/mailbox.py +88 -0
  123. llm_code/swarm/manager.py +202 -0
  124. llm_code/swarm/memory_sync.py +80 -0
  125. llm_code/swarm/recovery.py +21 -0
  126. llm_code/swarm/team.py +67 -0
  127. llm_code/swarm/types.py +31 -0
  128. llm_code/task/__init__.py +16 -0
  129. llm_code/task/diagnostics.py +93 -0
  130. llm_code/task/manager.py +162 -0
  131. llm_code/task/types.py +112 -0
  132. llm_code/task/verifier.py +104 -0
  133. llm_code/tools/__init__.py +0 -0
  134. llm_code/tools/agent.py +145 -0
  135. llm_code/tools/agent_roles.py +82 -0
  136. llm_code/tools/base.py +94 -0
  137. llm_code/tools/bash.py +565 -0
  138. llm_code/tools/computer_use_tools.py +278 -0
  139. llm_code/tools/coordinator_tool.py +75 -0
  140. llm_code/tools/cron_create.py +90 -0
  141. llm_code/tools/cron_delete.py +49 -0
  142. llm_code/tools/cron_list.py +51 -0
  143. llm_code/tools/deferred.py +92 -0
  144. llm_code/tools/dump.py +116 -0
  145. llm_code/tools/edit_file.py +282 -0
  146. llm_code/tools/git_tools.py +531 -0
  147. llm_code/tools/glob_search.py +112 -0
  148. llm_code/tools/grep_search.py +144 -0
  149. llm_code/tools/ide_diagnostics.py +59 -0
  150. llm_code/tools/ide_open.py +58 -0
  151. llm_code/tools/ide_selection.py +52 -0
  152. llm_code/tools/memory_tools.py +138 -0
  153. llm_code/tools/multi_edit.py +143 -0
  154. llm_code/tools/notebook_edit.py +107 -0
  155. llm_code/tools/notebook_read.py +81 -0
  156. llm_code/tools/parsing.py +63 -0
  157. llm_code/tools/read_file.py +154 -0
  158. llm_code/tools/registry.py +58 -0
  159. llm_code/tools/search_backends/__init__.py +56 -0
  160. llm_code/tools/search_backends/brave.py +56 -0
  161. llm_code/tools/search_backends/duckduckgo.py +129 -0
  162. llm_code/tools/search_backends/searxng.py +71 -0
  163. llm_code/tools/search_backends/tavily.py +73 -0
  164. llm_code/tools/swarm_create.py +109 -0
  165. llm_code/tools/swarm_delete.py +95 -0
  166. llm_code/tools/swarm_list.py +44 -0
  167. llm_code/tools/swarm_message.py +109 -0
  168. llm_code/tools/task_close.py +79 -0
  169. llm_code/tools/task_plan.py +79 -0
  170. llm_code/tools/task_verify.py +90 -0
  171. llm_code/tools/tool_search.py +65 -0
  172. llm_code/tools/web_common.py +258 -0
  173. llm_code/tools/web_fetch.py +223 -0
  174. llm_code/tools/web_search.py +280 -0
  175. llm_code/tools/write_file.py +118 -0
  176. llm_code/tui/__init__.py +1 -0
  177. llm_code/tui/app.py +2432 -0
  178. llm_code/tui/chat_view.py +82 -0
  179. llm_code/tui/chat_widgets.py +309 -0
  180. llm_code/tui/header_bar.py +46 -0
  181. llm_code/tui/input_bar.py +349 -0
  182. llm_code/tui/keybindings.py +142 -0
  183. llm_code/tui/marketplace.py +210 -0
  184. llm_code/tui/status_bar.py +72 -0
  185. llm_code/tui/theme.py +96 -0
  186. llm_code/utils/__init__.py +0 -0
  187. llm_code/utils/diff.py +111 -0
  188. llm_code/utils/errors.py +70 -0
  189. llm_code/utils/hyperlink.py +73 -0
  190. llm_code/utils/notebook.py +179 -0
  191. llm_code/utils/search.py +69 -0
  192. llm_code/utils/text_normalize.py +28 -0
  193. llm_code/utils/version_check.py +62 -0
  194. llm_code/vim/__init__.py +4 -0
  195. llm_code/vim/engine.py +51 -0
  196. llm_code/vim/motions.py +172 -0
  197. llm_code/vim/operators.py +183 -0
  198. llm_code/vim/text_objects.py +139 -0
  199. llm_code/vim/transitions.py +279 -0
  200. llm_code/vim/types.py +68 -0
  201. llm_code/voice/__init__.py +1 -0
  202. llm_code/voice/languages.py +43 -0
  203. llm_code/voice/recorder.py +136 -0
  204. llm_code/voice/stt.py +36 -0
  205. llm_code/voice/stt_anthropic.py +66 -0
  206. llm_code/voice/stt_google.py +32 -0
  207. llm_code/voice/stt_whisper.py +52 -0
  208. llmcode_cli-1.0.0.dist-info/METADATA +524 -0
  209. llmcode_cli-1.0.0.dist-info/RECORD +212 -0
  210. llmcode_cli-1.0.0.dist-info/WHEEL +4 -0
  211. llmcode_cli-1.0.0.dist-info/entry_points.txt +2 -0
  212. llmcode_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,79 @@
1
+ """TaskCloseTool: finalize a task, write summary, transition to DONE."""
2
+ from __future__ import annotations
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from llm_code.task.manager import TaskLifecycleManager
7
+ from llm_code.task.types import TaskStatus
8
+ from llm_code.tools.base import PermissionLevel, Tool, ToolResult
9
+
10
+
11
+ class TaskCloseInput(BaseModel):
12
+ task_id: str
13
+ summary: str = ""
14
+
15
+
16
+ class TaskCloseTool(Tool):
17
+ """Close a task: transition to DONE and write a completion summary."""
18
+
19
+ def __init__(self, manager: TaskLifecycleManager) -> None:
20
+ self._manager = manager
21
+
22
+ @property
23
+ def name(self) -> str:
24
+ return "task_close"
25
+
26
+ @property
27
+ def description(self) -> str:
28
+ return (
29
+ "Close a completed task. Transitions from CLOSE to DONE and writes "
30
+ "a completion summary. The task must be in CLOSE status."
31
+ )
32
+
33
+ @property
34
+ def input_schema(self) -> dict:
35
+ return {
36
+ "type": "object",
37
+ "properties": {
38
+ "task_id": {"type": "string", "description": "The task ID to close"},
39
+ "summary": {"type": "string", "description": "Completion summary"},
40
+ },
41
+ "required": ["task_id"],
42
+ }
43
+
44
+ @property
45
+ def required_permission(self) -> PermissionLevel:
46
+ return PermissionLevel.WORKSPACE_WRITE
47
+
48
+ @property
49
+ def input_model(self) -> type[TaskCloseInput]:
50
+ return TaskCloseInput
51
+
52
+ def execute(self, args: dict) -> ToolResult:
53
+ task_id = args["task_id"]
54
+ summary = args.get("summary", "")
55
+
56
+ task = self._manager.get_task(task_id)
57
+ if task is None:
58
+ return ToolResult(output=f"Task not found: {task_id}", is_error=True)
59
+
60
+ try:
61
+ self._manager.transition(task_id, TaskStatus.DONE)
62
+ except ValueError as exc:
63
+ return ToolResult(output=str(exc), is_error=True)
64
+
65
+ # Write summary to task
66
+ if summary:
67
+ self._manager.update_task(task_id, plan=f"{task.plan}\n\n## Summary\n{summary}")
68
+
69
+ closed = self._manager.get_task(task_id)
70
+ files = ", ".join(closed.files_modified) if closed and closed.files_modified else "(none)"
71
+
72
+ return ToolResult(
73
+ output=(
74
+ f"Task {task_id} closed successfully.\n"
75
+ f"Title: {task.title}\n"
76
+ f"Files modified: {files}\n"
77
+ f"Summary: {summary or '(no summary)'}"
78
+ )
79
+ )
@@ -0,0 +1,79 @@
1
+ """TaskPlanTool: create a task with title, plan, and goals."""
2
+ from __future__ import annotations
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from llm_code.task.manager import TaskLifecycleManager
7
+ from llm_code.tools.base import PermissionLevel, Tool, ToolResult
8
+
9
+
10
+ class TaskPlanInput(BaseModel):
11
+ title: str
12
+ plan: str = ""
13
+ goals: list[str] = []
14
+
15
+
16
+ class TaskPlanTool(Tool):
17
+ """Create a new structured task with a plan and goals."""
18
+
19
+ def __init__(self, manager: TaskLifecycleManager, session_id: str = "") -> None:
20
+ self._manager = manager
21
+ self._session_id = session_id
22
+
23
+ @property
24
+ def name(self) -> str:
25
+ return "task_plan"
26
+
27
+ @property
28
+ def description(self) -> str:
29
+ return (
30
+ "Create a new structured task. Provide a title, an implementation plan, "
31
+ "and measurable goals. The task starts in PLAN status."
32
+ )
33
+
34
+ @property
35
+ def input_schema(self) -> dict:
36
+ return {
37
+ "type": "object",
38
+ "properties": {
39
+ "title": {"type": "string", "description": "Short task title"},
40
+ "plan": {"type": "string", "description": "Step-by-step implementation plan"},
41
+ "goals": {
42
+ "type": "array",
43
+ "items": {"type": "string"},
44
+ "description": "Measurable completion goals",
45
+ },
46
+ },
47
+ "required": ["title"],
48
+ }
49
+
50
+ @property
51
+ def required_permission(self) -> PermissionLevel:
52
+ return PermissionLevel.WORKSPACE_WRITE
53
+
54
+ @property
55
+ def input_model(self) -> type[TaskPlanInput]:
56
+ return TaskPlanInput
57
+
58
+ def execute(self, args: dict) -> ToolResult:
59
+ title = args.get("title", "").strip()
60
+ if not title:
61
+ return ToolResult(output="Error: title is required", is_error=True)
62
+
63
+ plan = args.get("plan", "")
64
+ goals = tuple(args.get("goals", []))
65
+
66
+ task = self._manager.create_task(
67
+ title=title,
68
+ plan=plan,
69
+ goals=goals,
70
+ session_id=self._session_id,
71
+ )
72
+ return ToolResult(
73
+ output=(
74
+ f"Created task {task.id}: {task.title}\n"
75
+ f"Status: {task.status.value}\n"
76
+ f"Goals: {', '.join(task.goals) if task.goals else '(none)'}\n"
77
+ f"Plan:\n{task.plan or '(no plan set)'}"
78
+ )
79
+ )
@@ -0,0 +1,90 @@
1
+ """TaskVerifyTool: run verification checks on a task."""
2
+ from __future__ import annotations
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from llm_code.task.diagnostics import DiagnosticsEngine
7
+ from llm_code.task.manager import TaskLifecycleManager
8
+ from llm_code.task.verifier import Verifier
9
+ from llm_code.tools.base import PermissionLevel, Tool, ToolResult
10
+
11
+
12
+ class TaskVerifyInput(BaseModel):
13
+ task_id: str
14
+
15
+
16
+ class TaskVerifyTool(Tool):
17
+ """Run automated verification checks (pytest, ruff, file_exists) on a task."""
18
+
19
+ def __init__(
20
+ self,
21
+ manager: TaskLifecycleManager,
22
+ verifier: Verifier,
23
+ diagnostics: DiagnosticsEngine,
24
+ ) -> None:
25
+ self._manager = manager
26
+ self._verifier = verifier
27
+ self._diagnostics = diagnostics
28
+
29
+ @property
30
+ def name(self) -> str:
31
+ return "task_verify"
32
+
33
+ @property
34
+ def description(self) -> str:
35
+ return (
36
+ "Run automated verification checks on a task: pytest, ruff, and file_exists. "
37
+ "Returns check results and a recommended action (continue/replan/escalate)."
38
+ )
39
+
40
+ @property
41
+ def input_schema(self) -> dict:
42
+ return {
43
+ "type": "object",
44
+ "properties": {
45
+ "task_id": {"type": "string", "description": "The task ID to verify"},
46
+ },
47
+ "required": ["task_id"],
48
+ }
49
+
50
+ @property
51
+ def required_permission(self) -> PermissionLevel:
52
+ return PermissionLevel.READ_ONLY
53
+
54
+ @property
55
+ def input_model(self) -> type[TaskVerifyInput]:
56
+ return TaskVerifyInput
57
+
58
+ def is_read_only(self, args: dict) -> bool:
59
+ return True
60
+
61
+ def execute(self, args: dict) -> ToolResult:
62
+ task_id = args["task_id"]
63
+ task = self._manager.get_task(task_id)
64
+ if task is None:
65
+ return ToolResult(output=f"Task not found: {task_id}", is_error=True)
66
+
67
+ # Run automated checks
68
+ verify_result = self._verifier.verify(task)
69
+
70
+ # Append result to task history
71
+ self._manager.append_verify_result(task_id, verify_result)
72
+
73
+ # Run diagnostics
74
+ report = self._diagnostics.analyze(task, verify_result)
75
+
76
+ # Format output
77
+ lines = [f"Verification for task {task_id}: {task.title}"]
78
+ lines.append(f"Overall: {'PASSED' if verify_result.all_passed else 'FAILED'}")
79
+ lines.append("")
80
+ for check in verify_result.checks:
81
+ icon = "PASS" if check.passed else "FAIL"
82
+ lines.append(f" [{icon}] {check.check_name}: {check.output[:200]}")
83
+ lines.append("")
84
+ lines.append(f"Recommendation: {report.recommendation}")
85
+ if report.summary:
86
+ lines.append(f"Diagnostic: {report.summary}")
87
+ if report.report_path:
88
+ lines.append(f"Full report: {report.report_path}")
89
+
90
+ return ToolResult(output="\n".join(lines))
@@ -0,0 +1,65 @@
1
+ """ToolSearchTool — lets the LLM discover and unlock deferred tools."""
2
+ from __future__ import annotations
3
+
4
+ from llm_code.tools.base import PermissionLevel, Tool, ToolResult
5
+
6
+ if True:
7
+ # Avoid circular imports; DeferredToolManager is a pure data class
8
+ from llm_code.tools.deferred import DeferredToolManager
9
+
10
+
11
+ class ToolSearchTool(Tool):
12
+ """Search deferred tools by name/description and unlock matching ones."""
13
+
14
+ def __init__(self, manager: "DeferredToolManager") -> None:
15
+ self._manager = manager
16
+
17
+ @property
18
+ def name(self) -> str:
19
+ return "tool_search"
20
+
21
+ @property
22
+ def description(self) -> str:
23
+ return (
24
+ "Search for additional tools that are not currently visible. "
25
+ "Provide a query string to find tools by name or description. "
26
+ "Matching tools will be unlocked and available in subsequent turns."
27
+ )
28
+
29
+ @property
30
+ def input_schema(self) -> dict:
31
+ return {
32
+ "type": "object",
33
+ "properties": {
34
+ "query": {
35
+ "type": "string",
36
+ "description": "Search query to match against tool names and descriptions.",
37
+ }
38
+ },
39
+ "required": ["query"],
40
+ }
41
+
42
+ @property
43
+ def required_permission(self) -> PermissionLevel:
44
+ return PermissionLevel.READ_ONLY
45
+
46
+ def execute(self, args: dict) -> ToolResult:
47
+ query = args.get("query", "")
48
+ deferred = self._manager._deferred
49
+ matches = self._manager.search_tools(query, deferred)
50
+
51
+ if not matches:
52
+ return ToolResult(
53
+ output=f"No tools found matching '{query}'. "
54
+ "Try a different search term or use a broader query.",
55
+ )
56
+
57
+ # Unlock all matching tools
58
+ for d in matches:
59
+ self._manager.unlock_tool(d.name)
60
+
61
+ lines = [f"Found {len(matches)} tool(s) matching '{query}' (now unlocked):"]
62
+ for d in matches:
63
+ lines.append(f" - {d.name}: {d.description}")
64
+
65
+ return ToolResult(output="\n".join(lines))
@@ -0,0 +1,258 @@
1
+ """Shared utilities for web tools (URL safety, caching, extraction)."""
2
+ from __future__ import annotations
3
+
4
+ import dataclasses
5
+ import ipaddress
6
+ import json
7
+ import re
8
+ import time
9
+ from collections import OrderedDict
10
+ from urllib.parse import urlparse
11
+
12
+
13
+ @dataclasses.dataclass(frozen=True)
14
+ class UrlSafetyResult:
15
+ """Result of URL safety classification."""
16
+
17
+ classification: str
18
+ reasons: tuple[str, ...] = ()
19
+
20
+ @property
21
+ def is_safe(self) -> bool:
22
+ """Return True if URL is safe to fetch."""
23
+ return self.classification == "safe"
24
+
25
+ @property
26
+ def is_blocked(self) -> bool:
27
+ """Return True if URL should be blocked."""
28
+ return self.classification == "blocked"
29
+
30
+ @property
31
+ def needs_confirm(self) -> bool:
32
+ """Return True if URL needs user confirmation."""
33
+ return self.classification == "needs_confirm"
34
+
35
+
36
+ def classify_url(url: str) -> UrlSafetyResult:
37
+ """Classify URL as safe, needs_confirm, or blocked.
38
+
39
+ Rules:
40
+ - blocked: file://, private IPs, cloud metadata, invalid URLs, unsupported schemes
41
+ - needs_confirm: localhost, 127.0.0.1, IP-only URLs, non-standard ports
42
+ - safe: standard HTTP/HTTPS URLs to regular hosts
43
+ """
44
+ reasons: list[str] = []
45
+
46
+ try:
47
+ parsed = urlparse(url)
48
+ except Exception:
49
+ return UrlSafetyResult(classification="blocked", reasons=("invalid URL",))
50
+
51
+ # Check scheme
52
+ if not parsed.scheme:
53
+ return UrlSafetyResult(classification="blocked", reasons=("missing scheme",))
54
+
55
+ if parsed.scheme == "file":
56
+ return UrlSafetyResult(
57
+ classification="blocked", reasons=("file scheme",)
58
+ )
59
+
60
+ if parsed.scheme not in ("http", "https"):
61
+ return UrlSafetyResult(
62
+ classification="blocked", reasons=("unsupported scheme",)
63
+ )
64
+
65
+ # Check host
66
+ if not parsed.hostname:
67
+ return UrlSafetyResult(classification="blocked", reasons=("missing host",))
68
+
69
+ host = parsed.hostname
70
+
71
+ # Check for cloud metadata hosts (must be before IP check)
72
+ if host in ("169.254.169.254", "metadata.google.internal", "metadata.azure.com"):
73
+ return UrlSafetyResult(
74
+ classification="blocked", reasons=("metadata",)
75
+ )
76
+
77
+ # Try to parse as IP address
78
+ is_ip = False
79
+ try:
80
+ ip = ipaddress.ip_address(host)
81
+ is_ip = True
82
+
83
+ # Check for loopback IPv6 (blocked)
84
+ if ip.version == 6 and ip.is_loopback:
85
+ return UrlSafetyResult(
86
+ classification="blocked", reasons=("loopback",)
87
+ )
88
+
89
+ # Check for loopback IPv4 first (needs confirm)
90
+ if ip.is_loopback:
91
+ return UrlSafetyResult(
92
+ classification="needs_confirm", reasons=("127.0.0.1",)
93
+ )
94
+
95
+ # Check for private IPs (blocked)
96
+ if ip.is_private:
97
+ return UrlSafetyResult(
98
+ classification="blocked", reasons=("private IP",)
99
+ )
100
+ except ValueError:
101
+ # Not an IP address, check for localhost string
102
+ pass
103
+
104
+ # Check for localhost name (needs confirm)
105
+ if host == "localhost":
106
+ return UrlSafetyResult(
107
+ classification="needs_confirm", reasons=("localhost",)
108
+ )
109
+
110
+ # Check port
111
+ port = parsed.port
112
+ if is_ip and port is None:
113
+ # IP-only URL without port (needs confirm)
114
+ return UrlSafetyResult(
115
+ classification="needs_confirm", reasons=("IP-only",)
116
+ )
117
+
118
+ if port is not None and port not in (80, 443):
119
+ # Non-standard port (needs confirm)
120
+ return UrlSafetyResult(
121
+ classification="needs_confirm", reasons=("non-standard port",)
122
+ )
123
+
124
+ # All checks passed
125
+ return UrlSafetyResult(classification="safe", reasons=())
126
+
127
+
128
+ @dataclasses.dataclass(frozen=True)
129
+ class CacheEntry:
130
+ """Cache entry with TTL support."""
131
+
132
+ content: str
133
+ fetched_at: float
134
+ ttl: float = 900.0
135
+
136
+ @property
137
+ def is_expired(self) -> bool:
138
+ """Return True if entry has expired based on TTL."""
139
+ return time.time() - self.fetched_at > self.ttl
140
+
141
+
142
+ class UrlCache:
143
+ """LRU cache for URL content with TTL support."""
144
+
145
+ def __init__(self, max_entries: int = 50, ttl: float = 900.0) -> None:
146
+ """Initialize cache.
147
+
148
+ Args:
149
+ max_entries: Maximum number of entries before evicting oldest.
150
+ ttl: Time-to-live for entries in seconds.
151
+ """
152
+ self.max_entries = max_entries
153
+ self.ttl = ttl
154
+ self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
155
+
156
+ def get(self, url: str) -> str | None:
157
+ """Get cached content for URL, return None if not found or expired."""
158
+ if url not in self._cache:
159
+ return None
160
+
161
+ entry = self._cache[url]
162
+ if entry.is_expired:
163
+ del self._cache[url]
164
+ return None
165
+
166
+ # Move to end (LRU)
167
+ self._cache.move_to_end(url)
168
+ return entry.content
169
+
170
+ def put(self, url: str, content: str) -> None:
171
+ """Store content in cache. Evicts oldest entry if cache is full."""
172
+ # If updating existing, remove it first
173
+ if url in self._cache:
174
+ del self._cache[url]
175
+
176
+ # Add new entry
177
+ entry = CacheEntry(content=content, fetched_at=time.time(), ttl=self.ttl)
178
+ self._cache[url] = entry
179
+
180
+ # Evict oldest if over capacity
181
+ if len(self._cache) > self.max_entries:
182
+ self._cache.popitem(last=False)
183
+
184
+ def clear(self) -> None:
185
+ """Clear all cached entries."""
186
+ self._cache.clear()
187
+
188
+
189
+ def _html_to_markdown(html: str, use_readability: bool = True) -> str:
190
+ """Convert HTML to markdown using readability and html2text.
191
+
192
+ Falls back gracefully if dependencies are missing.
193
+ """
194
+ content = html
195
+
196
+ # Try readability first if requested
197
+ if use_readability:
198
+ try:
199
+ from readability import Document
200
+ doc = Document(content)
201
+ content = doc.summary()
202
+ except ImportError:
203
+ pass
204
+
205
+ # Try html2text
206
+ try:
207
+ import html2text
208
+ converter = html2text.HTML2Text()
209
+ converter.ignore_links = False
210
+ return converter.handle(content)
211
+ except ImportError:
212
+ # Fallback: simple regex tag stripping
213
+ content = re.sub(r"<[^>]+>", "", content)
214
+ content = re.sub(r"\s+", " ", content).strip()
215
+ return content
216
+
217
+
218
+ def extract_content(
219
+ body: str,
220
+ content_type: str,
221
+ raw: bool = False,
222
+ max_length: int = 50000,
223
+ ) -> str:
224
+ """Extract and format content based on content type.
225
+
226
+ Args:
227
+ body: Raw content body.
228
+ content_type: MIME type of content.
229
+ raw: If True, skip readability for HTML (just strip tags).
230
+ max_length: Maximum length before truncation.
231
+
232
+ Returns:
233
+ Formatted content, possibly truncated with "[truncated]" marker.
234
+ """
235
+ if not body:
236
+ return ""
237
+
238
+ result = ""
239
+
240
+ if content_type.startswith("application/json"):
241
+ try:
242
+ data = json.loads(body)
243
+ result = json.dumps(data, indent=2)
244
+ except json.JSONDecodeError:
245
+ result = body
246
+ elif "html" in content_type:
247
+ result = _html_to_markdown(body, use_readability=not raw)
248
+ else:
249
+ # Passthrough
250
+ result = body
251
+
252
+ # Truncate if needed
253
+ if len(result) > max_length:
254
+ truncated_marker = "\n\n[truncated]"
255
+ available = max_length - len(truncated_marker)
256
+ result = result[:available] + truncated_marker
257
+
258
+ return result