llmcode-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. llm_code/__init__.py +2 -0
  2. llm_code/analysis/__init__.py +6 -0
  3. llm_code/analysis/cache.py +33 -0
  4. llm_code/analysis/engine.py +256 -0
  5. llm_code/analysis/go_rules.py +114 -0
  6. llm_code/analysis/js_rules.py +84 -0
  7. llm_code/analysis/python_rules.py +311 -0
  8. llm_code/analysis/rules.py +140 -0
  9. llm_code/analysis/rust_rules.py +108 -0
  10. llm_code/analysis/universal_rules.py +111 -0
  11. llm_code/api/__init__.py +0 -0
  12. llm_code/api/client.py +90 -0
  13. llm_code/api/errors.py +73 -0
  14. llm_code/api/openai_compat.py +390 -0
  15. llm_code/api/provider.py +35 -0
  16. llm_code/api/sse.py +52 -0
  17. llm_code/api/types.py +140 -0
  18. llm_code/cli/__init__.py +0 -0
  19. llm_code/cli/commands.py +70 -0
  20. llm_code/cli/image.py +122 -0
  21. llm_code/cli/render.py +214 -0
  22. llm_code/cli/status_line.py +79 -0
  23. llm_code/cli/streaming.py +92 -0
  24. llm_code/cli/tui_main.py +220 -0
  25. llm_code/computer_use/__init__.py +11 -0
  26. llm_code/computer_use/app_detect.py +49 -0
  27. llm_code/computer_use/app_tier.py +57 -0
  28. llm_code/computer_use/coordinator.py +99 -0
  29. llm_code/computer_use/input_control.py +71 -0
  30. llm_code/computer_use/screenshot.py +93 -0
  31. llm_code/cron/__init__.py +13 -0
  32. llm_code/cron/parser.py +145 -0
  33. llm_code/cron/scheduler.py +135 -0
  34. llm_code/cron/storage.py +126 -0
  35. llm_code/enterprise/__init__.py +1 -0
  36. llm_code/enterprise/audit.py +59 -0
  37. llm_code/enterprise/auth.py +26 -0
  38. llm_code/enterprise/oidc.py +95 -0
  39. llm_code/enterprise/rbac.py +65 -0
  40. llm_code/harness/__init__.py +5 -0
  41. llm_code/harness/config.py +33 -0
  42. llm_code/harness/engine.py +129 -0
  43. llm_code/harness/guides.py +41 -0
  44. llm_code/harness/sensors.py +68 -0
  45. llm_code/harness/templates.py +84 -0
  46. llm_code/hida/__init__.py +1 -0
  47. llm_code/hida/classifier.py +187 -0
  48. llm_code/hida/engine.py +49 -0
  49. llm_code/hida/profiles.py +95 -0
  50. llm_code/hida/types.py +28 -0
  51. llm_code/ide/__init__.py +1 -0
  52. llm_code/ide/bridge.py +80 -0
  53. llm_code/ide/detector.py +76 -0
  54. llm_code/ide/server.py +169 -0
  55. llm_code/logging.py +29 -0
  56. llm_code/lsp/__init__.py +0 -0
  57. llm_code/lsp/client.py +298 -0
  58. llm_code/lsp/detector.py +42 -0
  59. llm_code/lsp/manager.py +56 -0
  60. llm_code/lsp/tools.py +288 -0
  61. llm_code/marketplace/__init__.py +0 -0
  62. llm_code/marketplace/builtin_registry.py +102 -0
  63. llm_code/marketplace/installer.py +162 -0
  64. llm_code/marketplace/plugin.py +78 -0
  65. llm_code/marketplace/registry.py +360 -0
  66. llm_code/mcp/__init__.py +0 -0
  67. llm_code/mcp/bridge.py +87 -0
  68. llm_code/mcp/client.py +117 -0
  69. llm_code/mcp/health.py +120 -0
  70. llm_code/mcp/manager.py +214 -0
  71. llm_code/mcp/oauth.py +219 -0
  72. llm_code/mcp/transport.py +254 -0
  73. llm_code/mcp/types.py +53 -0
  74. llm_code/remote/__init__.py +0 -0
  75. llm_code/remote/client.py +136 -0
  76. llm_code/remote/protocol.py +22 -0
  77. llm_code/remote/server.py +275 -0
  78. llm_code/remote/ssh_proxy.py +56 -0
  79. llm_code/runtime/__init__.py +0 -0
  80. llm_code/runtime/auto_commit.py +56 -0
  81. llm_code/runtime/auto_diagnose.py +62 -0
  82. llm_code/runtime/checkpoint.py +70 -0
  83. llm_code/runtime/checkpoint_recovery.py +142 -0
  84. llm_code/runtime/compaction.py +35 -0
  85. llm_code/runtime/compressor.py +415 -0
  86. llm_code/runtime/config.py +533 -0
  87. llm_code/runtime/context.py +49 -0
  88. llm_code/runtime/conversation.py +921 -0
  89. llm_code/runtime/cost_tracker.py +126 -0
  90. llm_code/runtime/dream.py +127 -0
  91. llm_code/runtime/file_protection.py +150 -0
  92. llm_code/runtime/hardware.py +85 -0
  93. llm_code/runtime/hooks.py +223 -0
  94. llm_code/runtime/indexer.py +230 -0
  95. llm_code/runtime/knowledge_compiler.py +232 -0
  96. llm_code/runtime/memory.py +132 -0
  97. llm_code/runtime/memory_layers.py +467 -0
  98. llm_code/runtime/memory_lint.py +252 -0
  99. llm_code/runtime/model_aliases.py +37 -0
  100. llm_code/runtime/ollama.py +93 -0
  101. llm_code/runtime/overlay.py +124 -0
  102. llm_code/runtime/permissions.py +200 -0
  103. llm_code/runtime/plan.py +45 -0
  104. llm_code/runtime/prompt.py +238 -0
  105. llm_code/runtime/repo_map.py +174 -0
  106. llm_code/runtime/sandbox.py +116 -0
  107. llm_code/runtime/session.py +268 -0
  108. llm_code/runtime/skill_resolver.py +61 -0
  109. llm_code/runtime/skills.py +133 -0
  110. llm_code/runtime/speculative.py +75 -0
  111. llm_code/runtime/streaming_executor.py +216 -0
  112. llm_code/runtime/telemetry.py +196 -0
  113. llm_code/runtime/token_budget.py +26 -0
  114. llm_code/runtime/vcr.py +142 -0
  115. llm_code/runtime/vision.py +102 -0
  116. llm_code/swarm/__init__.py +1 -0
  117. llm_code/swarm/backend_subprocess.py +108 -0
  118. llm_code/swarm/backend_tmux.py +103 -0
  119. llm_code/swarm/backend_worktree.py +306 -0
  120. llm_code/swarm/checkpoint.py +74 -0
  121. llm_code/swarm/coordinator.py +236 -0
  122. llm_code/swarm/mailbox.py +88 -0
  123. llm_code/swarm/manager.py +202 -0
  124. llm_code/swarm/memory_sync.py +80 -0
  125. llm_code/swarm/recovery.py +21 -0
  126. llm_code/swarm/team.py +67 -0
  127. llm_code/swarm/types.py +31 -0
  128. llm_code/task/__init__.py +16 -0
  129. llm_code/task/diagnostics.py +93 -0
  130. llm_code/task/manager.py +162 -0
  131. llm_code/task/types.py +112 -0
  132. llm_code/task/verifier.py +104 -0
  133. llm_code/tools/__init__.py +0 -0
  134. llm_code/tools/agent.py +145 -0
  135. llm_code/tools/agent_roles.py +82 -0
  136. llm_code/tools/base.py +94 -0
  137. llm_code/tools/bash.py +565 -0
  138. llm_code/tools/computer_use_tools.py +278 -0
  139. llm_code/tools/coordinator_tool.py +75 -0
  140. llm_code/tools/cron_create.py +90 -0
  141. llm_code/tools/cron_delete.py +49 -0
  142. llm_code/tools/cron_list.py +51 -0
  143. llm_code/tools/deferred.py +92 -0
  144. llm_code/tools/dump.py +116 -0
  145. llm_code/tools/edit_file.py +282 -0
  146. llm_code/tools/git_tools.py +531 -0
  147. llm_code/tools/glob_search.py +112 -0
  148. llm_code/tools/grep_search.py +144 -0
  149. llm_code/tools/ide_diagnostics.py +59 -0
  150. llm_code/tools/ide_open.py +58 -0
  151. llm_code/tools/ide_selection.py +52 -0
  152. llm_code/tools/memory_tools.py +138 -0
  153. llm_code/tools/multi_edit.py +143 -0
  154. llm_code/tools/notebook_edit.py +107 -0
  155. llm_code/tools/notebook_read.py +81 -0
  156. llm_code/tools/parsing.py +63 -0
  157. llm_code/tools/read_file.py +154 -0
  158. llm_code/tools/registry.py +58 -0
  159. llm_code/tools/search_backends/__init__.py +56 -0
  160. llm_code/tools/search_backends/brave.py +56 -0
  161. llm_code/tools/search_backends/duckduckgo.py +129 -0
  162. llm_code/tools/search_backends/searxng.py +71 -0
  163. llm_code/tools/search_backends/tavily.py +73 -0
  164. llm_code/tools/swarm_create.py +109 -0
  165. llm_code/tools/swarm_delete.py +95 -0
  166. llm_code/tools/swarm_list.py +44 -0
  167. llm_code/tools/swarm_message.py +109 -0
  168. llm_code/tools/task_close.py +79 -0
  169. llm_code/tools/task_plan.py +79 -0
  170. llm_code/tools/task_verify.py +90 -0
  171. llm_code/tools/tool_search.py +65 -0
  172. llm_code/tools/web_common.py +258 -0
  173. llm_code/tools/web_fetch.py +223 -0
  174. llm_code/tools/web_search.py +280 -0
  175. llm_code/tools/write_file.py +118 -0
  176. llm_code/tui/__init__.py +1 -0
  177. llm_code/tui/app.py +2432 -0
  178. llm_code/tui/chat_view.py +82 -0
  179. llm_code/tui/chat_widgets.py +309 -0
  180. llm_code/tui/header_bar.py +46 -0
  181. llm_code/tui/input_bar.py +349 -0
  182. llm_code/tui/keybindings.py +142 -0
  183. llm_code/tui/marketplace.py +210 -0
  184. llm_code/tui/status_bar.py +72 -0
  185. llm_code/tui/theme.py +96 -0
  186. llm_code/utils/__init__.py +0 -0
  187. llm_code/utils/diff.py +111 -0
  188. llm_code/utils/errors.py +70 -0
  189. llm_code/utils/hyperlink.py +73 -0
  190. llm_code/utils/notebook.py +179 -0
  191. llm_code/utils/search.py +69 -0
  192. llm_code/utils/text_normalize.py +28 -0
  193. llm_code/utils/version_check.py +62 -0
  194. llm_code/vim/__init__.py +4 -0
  195. llm_code/vim/engine.py +51 -0
  196. llm_code/vim/motions.py +172 -0
  197. llm_code/vim/operators.py +183 -0
  198. llm_code/vim/text_objects.py +139 -0
  199. llm_code/vim/transitions.py +279 -0
  200. llm_code/vim/types.py +68 -0
  201. llm_code/voice/__init__.py +1 -0
  202. llm_code/voice/languages.py +43 -0
  203. llm_code/voice/recorder.py +136 -0
  204. llm_code/voice/stt.py +36 -0
  205. llm_code/voice/stt_anthropic.py +66 -0
  206. llm_code/voice/stt_google.py +32 -0
  207. llm_code/voice/stt_whisper.py +52 -0
  208. llmcode_cli-1.0.0.dist-info/METADATA +524 -0
  209. llmcode_cli-1.0.0.dist-info/RECORD +212 -0
  210. llmcode_cli-1.0.0.dist-info/WHEEL +4 -0
  211. llmcode_cli-1.0.0.dist-info/entry_points.txt +2 -0
  212. llmcode_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,223 @@
1
+ """Hook runner supporting 6 types, 24 events, glob event matching, per-hook timeout/on_error."""
2
+ from __future__ import annotations
3
+
4
+ import fnmatch
5
+ import json
6
+ import os
7
+ import subprocess
8
+ from dataclasses import dataclass, field
9
+
10
+ from llm_code.runtime.config import HookConfig
11
+ from llm_code.tools.base import ToolResult
12
+
13
+ # Legacy global fallback timeout (used only when hook.timeout is not set, kept for compat)
14
+ HOOK_TIMEOUT = 10.0
15
+
16
+ # All 24 supported event names across 6 types.
17
+ # Canonical names used for glob matching with event patterns like "tool.*", "session.*", "*".
18
+ #
19
+ # Group prefixes are derived from the event name by taking the segment before "_" or the
20
+ # shorthand group word that appears in the category comment below.
21
+ #
22
+ # tool -> pre_tool_use, post_tool_use, tool_error, tool_denied
23
+ # command -> pre_command, post_command, command_error
24
+ # prompt -> prompt_submit, prompt_compile, prompt_cache_hit, prompt_cache_miss
25
+ # agent -> agent_spawn, agent_complete, agent_error, agent_message
26
+ # session -> session_start, session_end, session_save, session_compact, session_dream
27
+ # http -> http_request, http_response, http_error, http_retry, http_fallback
28
+
29
+ # Mapping from event name to its dot-prefixed canonical group name used for glob matching.
30
+ # E.g. "pre_tool_use" -> "tool.pre_tool_use", so pattern "tool.*" matches.
31
+ _EVENT_GROUP: dict[str, str] = {
32
+ # tool
33
+ "pre_tool_use": "tool.pre_tool_use",
34
+ "post_tool_use": "tool.post_tool_use",
35
+ "tool_error": "tool.tool_error",
36
+ "tool_denied": "tool.tool_denied",
37
+ # command
38
+ "pre_command": "command.pre_command",
39
+ "post_command": "command.post_command",
40
+ "command_error": "command.command_error",
41
+ # prompt
42
+ "prompt_submit": "prompt.prompt_submit",
43
+ "prompt_compile": "prompt.prompt_compile",
44
+ "prompt_cache_hit": "prompt.prompt_cache_hit",
45
+ "prompt_cache_miss": "prompt.prompt_cache_miss",
46
+ # agent
47
+ "agent_spawn": "agent.agent_spawn",
48
+ "agent_complete": "agent.agent_complete",
49
+ "agent_error": "agent.agent_error",
50
+ "agent_message": "agent.agent_message",
51
+ # session
52
+ "session_start": "session.session_start",
53
+ "session_end": "session.session_end",
54
+ "session_save": "session.session_save",
55
+ "session_compact": "session.session_compact",
56
+ "session_dream": "session.session_dream",
57
+ # http
58
+ "http_request": "http.http_request",
59
+ "http_response": "http.http_response",
60
+ "http_error": "http.http_error",
61
+ "http_retry": "http.http_retry",
62
+ "http_fallback": "http.http_fallback",
63
+ }
64
+
65
+
66
+ def _event_matches(pattern: str, event: str) -> bool:
67
+ """Return True if *pattern* matches *event*.
68
+
69
+ Matching rules (in order):
70
+ 1. "*" matches any event.
71
+ 2. Pattern containing "." is matched against the dotted form "group.event"
72
+ (e.g. "tool.*" matches "tool.pre_tool_use").
73
+ 3. Exact match (original event name, no dots).
74
+ """
75
+ if pattern == "*":
76
+ return True
77
+ dotted = _EVENT_GROUP.get(event, event)
78
+ if "." in pattern:
79
+ return fnmatch.fnmatch(dotted, pattern)
80
+ return pattern == event
81
+
82
+
83
+ def _build_env(event: str, context: dict) -> dict[str, str]:
84
+ """Build the environment mapping to pass to a hook process."""
85
+ env = {**os.environ}
86
+ env["HOOK_EVENT"] = event
87
+ env["HOOK_TOOL_NAME"] = context.get("tool_name", "")
88
+ env["HOOK_TOOL_INPUT"] = context.get("tool_input", "")
89
+ env["HOOK_TOOL_OUTPUT"] = context.get("tool_output", "")
90
+ env["HOOK_SESSION_ID"] = context.get("session_id", "")
91
+ env["HOOK_AGENT_ID"] = context.get("agent_id", "")
92
+ env["HOOK_HTTP_URL"] = context.get("url", "")
93
+ env["HOOK_HTTP_STATUS"] = context.get("status", "")
94
+ env["HOOK_COMMAND"] = context.get("command", "")
95
+ return env
96
+
97
+
98
+ @dataclass
99
+ class HookOutcome:
100
+ denied: bool = False
101
+ messages: list[str] = field(default_factory=list)
102
+
103
+
104
+ class HookRunner:
105
+ def __init__(self, hooks: tuple[HookConfig, ...] = ()) -> None:
106
+ self._hooks = hooks
107
+
108
+ # ------------------------------------------------------------------
109
+ # Public generic entry point
110
+ # ------------------------------------------------------------------
111
+
112
+ def fire(self, event: str, context: dict) -> HookOutcome:
113
+ """Fire all hooks whose event pattern matches *event*.
114
+
115
+ *context* is a plain dict carrying optional keys:
116
+ tool_name, tool_input, tool_output,
117
+ session_id, agent_id, url, status, command
118
+ """
119
+ env = _build_env(event, context)
120
+ outcome = HookOutcome()
121
+
122
+ for hook in self._hooks:
123
+ if not _event_matches(hook.event, event):
124
+ continue
125
+
126
+ hook_outcome = self._run_single_hook(hook, env)
127
+ if hook_outcome.denied:
128
+ return hook_outcome # stop on first deny
129
+ outcome.messages.extend(hook_outcome.messages)
130
+
131
+ return outcome
132
+
133
+ # ------------------------------------------------------------------
134
+ # Legacy helpers (backwards compat)
135
+ # ------------------------------------------------------------------
136
+
137
+ def pre_tool_use(self, tool_name: str, args: dict) -> HookOutcome:
138
+ """Run all pre_tool_use hooks that match tool_name."""
139
+ context = {
140
+ "tool_name": tool_name,
141
+ "tool_input": json.dumps(args),
142
+ "tool_output": "",
143
+ }
144
+ # Build env with legacy keys too
145
+ env = _build_env("pre_tool_use", context)
146
+ env["HOOK_TOOL_IS_ERROR"] = "false"
147
+ return self._run_hooks_with_env("pre_tool_use", tool_name, env)
148
+
149
+ def post_tool_use(self, tool_name: str, args: dict, result: ToolResult) -> HookOutcome:
150
+ """Run all post_tool_use hooks that match tool_name."""
151
+ context = {
152
+ "tool_name": tool_name,
153
+ "tool_input": json.dumps(args),
154
+ "tool_output": result.output,
155
+ }
156
+ env = _build_env("post_tool_use", context)
157
+ env["HOOK_TOOL_IS_ERROR"] = "true" if result.is_error else "false"
158
+ return self._run_hooks_with_env("post_tool_use", tool_name, env)
159
+
160
+ # ------------------------------------------------------------------
161
+ # Internal helpers
162
+ # ------------------------------------------------------------------
163
+
164
+ def _run_hooks_with_env(
165
+ self, event: str, tool_name: str, env: dict[str, str]
166
+ ) -> HookOutcome:
167
+ """Execute matching hooks using a pre-built env (legacy path)."""
168
+ outcome = HookOutcome()
169
+
170
+ for hook in self._hooks:
171
+ if not _event_matches(hook.event, event):
172
+ continue
173
+ if not fnmatch.fnmatch(tool_name, hook.tool_pattern):
174
+ continue
175
+
176
+ hook_outcome = self._run_single_hook(hook, env)
177
+ if hook_outcome.denied:
178
+ return hook_outcome
179
+ outcome.messages.extend(hook_outcome.messages)
180
+
181
+ return outcome
182
+
183
+ def _run_single_hook(self, hook: HookConfig, env: dict[str, str]) -> HookOutcome:
184
+ """Run one shell command and interpret its exit code respecting hook.on_error."""
185
+ timeout = getattr(hook, "timeout", HOOK_TIMEOUT)
186
+ on_error = getattr(hook, "on_error", "warn")
187
+
188
+ try:
189
+ proc = subprocess.run(
190
+ hook.command,
191
+ shell=True,
192
+ env=env,
193
+ timeout=timeout,
194
+ capture_output=True,
195
+ text=True,
196
+ )
197
+ except subprocess.TimeoutExpired:
198
+ msg = f"Hook timed out after {timeout}s: {hook.command}"
199
+ denied = hook.on_error == "deny"
200
+ return HookOutcome(denied=denied, messages=[msg])
201
+ except Exception as exc:
202
+ denied = hook.on_error == "deny"
203
+ return HookOutcome(denied=denied, messages=[f"Hook error: {exc}"])
204
+
205
+ if proc.returncode == 0:
206
+ return HookOutcome(denied=False)
207
+
208
+ if proc.returncode == 2:
209
+ msg = proc.stdout.strip() or proc.stderr.strip() or "Hook denied tool use"
210
+ return HookOutcome(denied=True, messages=[msg])
211
+
212
+ # Non-zero, non-2: apply on_error policy
213
+ msg = (
214
+ proc.stdout.strip()
215
+ or proc.stderr.strip()
216
+ or f"Hook exited with code {proc.returncode}: {hook.command}"
217
+ )
218
+ if on_error == "deny":
219
+ return HookOutcome(denied=True, messages=[msg])
220
+ elif on_error == "ignore":
221
+ return HookOutcome(denied=False, messages=[])
222
+ else: # "warn" (default)
223
+ return HookOutcome(denied=False, messages=[msg])
@@ -0,0 +1,230 @@
1
+ """Project file and symbol indexer."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import re
6
+ from dataclasses import asdict, dataclass
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+
10
+ # ---------------------------------------------------------------------------
11
+ # Types
12
+ # ---------------------------------------------------------------------------
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class FileEntry:
17
+ path: str # relative path from project root
18
+ size: int
19
+ language: str # "python", "typescript", "go", "rust", "javascript", "unknown"
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class SymbolEntry:
24
+ name: str
25
+ kind: str # "class" | "function" | "method" | "variable" | "export"
26
+ file: str # relative path
27
+ line: int
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class ProjectIndex:
32
+ files: tuple[FileEntry, ...]
33
+ symbols: tuple[SymbolEntry, ...]
34
+ generated_at: str
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Language detection
39
+ # ---------------------------------------------------------------------------
40
+
41
+ _EXT_TO_LANG: dict[str, str] = {
42
+ ".py": "python",
43
+ ".pyi": "python",
44
+ ".ts": "typescript",
45
+ ".tsx": "typescript",
46
+ ".js": "javascript",
47
+ ".jsx": "javascript",
48
+ ".go": "go",
49
+ ".rs": "rust",
50
+ ".java": "java",
51
+ ".rb": "ruby",
52
+ ".php": "php",
53
+ ".swift": "swift",
54
+ ".kt": "kotlin",
55
+ }
56
+
57
+ # ---------------------------------------------------------------------------
58
+ # Directories to skip
59
+ # ---------------------------------------------------------------------------
60
+
61
+ _SKIP_DIRS: frozenset[str] = frozenset(
62
+ {
63
+ ".git",
64
+ "node_modules",
65
+ ".venv",
66
+ "venv",
67
+ "__pycache__",
68
+ "dist",
69
+ "build",
70
+ ".next",
71
+ ".nuxt",
72
+ "target",
73
+ ".tox",
74
+ ".mypy_cache",
75
+ ".pytest_cache",
76
+ }
77
+ )
78
+
79
+ # ---------------------------------------------------------------------------
80
+ # Symbol regex patterns
81
+ # ---------------------------------------------------------------------------
82
+
83
+ _SYMBOL_PATTERNS: dict[str, list[tuple[re.Pattern[str], str]]] = {
84
+ "python": [
85
+ (re.compile(r"^class\s+(\w+)"), "class"),
86
+ (re.compile(r"^def\s+(\w+)"), "function"),
87
+ (re.compile(r"^(\w+)\s*(?::\s*\w+)?\s*="), "variable"),
88
+ ],
89
+ "typescript": [
90
+ (re.compile(r"^export\s+(?:class|interface)\s+(\w+)"), "class"),
91
+ (re.compile(r"^export\s+(?:function|const|let|var)\s+(\w+)"), "export"),
92
+ (re.compile(r"^class\s+(\w+)"), "class"),
93
+ (re.compile(r"^function\s+(\w+)"), "function"),
94
+ ],
95
+ "javascript": [
96
+ (re.compile(r"^export\s+(?:class|function|const|let|var)\s+(\w+)"), "export"),
97
+ (re.compile(r"^class\s+(\w+)"), "class"),
98
+ (re.compile(r"^function\s+(\w+)"), "function"),
99
+ ],
100
+ "go": [
101
+ (re.compile(r"^func\s+(\w+)"), "function"),
102
+ (re.compile(r"^type\s+(\w+)\s+struct"), "class"),
103
+ (re.compile(r"^type\s+(\w+)\s+interface"), "class"),
104
+ ],
105
+ "rust": [
106
+ (re.compile(r"^(?:pub\s+)?fn\s+(\w+)"), "function"),
107
+ (re.compile(r"^(?:pub\s+)?struct\s+(\w+)"), "class"),
108
+ (re.compile(r"^(?:pub\s+)?enum\s+(\w+)"), "class"),
109
+ (re.compile(r"^(?:pub\s+)?trait\s+(\w+)"), "class"),
110
+ ],
111
+ }
112
+
113
+
114
+ # ---------------------------------------------------------------------------
115
+ # Helpers
116
+ # ---------------------------------------------------------------------------
117
+
118
+
119
+ def _should_skip(name: str) -> bool:
120
+ """Return True if the directory name matches a skip pattern."""
121
+ if name in _SKIP_DIRS:
122
+ return True
123
+ # Handle glob-style patterns like *.egg-info
124
+ if name.endswith(".egg-info"):
125
+ return True
126
+ return False
127
+
128
+
129
+ def _detect_language(path: Path) -> str:
130
+ return _EXT_TO_LANG.get(path.suffix.lower(), "unknown")
131
+
132
+
133
+ # ---------------------------------------------------------------------------
134
+ # ProjectIndexer
135
+ # ---------------------------------------------------------------------------
136
+
137
+
138
+ class ProjectIndexer:
139
+ def __init__(self, cwd: Path) -> None:
140
+ self._cwd = cwd
141
+
142
+ # ------------------------------------------------------------------
143
+ # Public API
144
+ # ------------------------------------------------------------------
145
+
146
+ def build_index(self) -> ProjectIndex:
147
+ files = self._scan_files()
148
+ symbols: list[SymbolEntry] = []
149
+ for f in files:
150
+ symbols.extend(self._extract_symbols(f))
151
+ now = datetime.now(timezone.utc).isoformat()
152
+ return ProjectIndex(
153
+ files=tuple(files),
154
+ symbols=tuple(symbols),
155
+ generated_at=now,
156
+ )
157
+
158
+ def save(self, index: ProjectIndex, path: Path) -> None:
159
+ data = {
160
+ "files": [asdict(f) for f in index.files],
161
+ "symbols": [asdict(s) for s in index.symbols],
162
+ "generated_at": index.generated_at,
163
+ }
164
+ path.write_text(json.dumps(data, indent=2), encoding="utf-8")
165
+
166
+ @staticmethod
167
+ def load(path: Path) -> ProjectIndex | None:
168
+ try:
169
+ raw = json.loads(path.read_text(encoding="utf-8"))
170
+ files = tuple(FileEntry(**f) for f in raw["files"])
171
+ symbols = tuple(SymbolEntry(**s) for s in raw["symbols"])
172
+ return ProjectIndex(
173
+ files=files,
174
+ symbols=symbols,
175
+ generated_at=raw["generated_at"],
176
+ )
177
+ except (FileNotFoundError, KeyError, TypeError, json.JSONDecodeError):
178
+ return None
179
+
180
+ # ------------------------------------------------------------------
181
+ # Private helpers
182
+ # ------------------------------------------------------------------
183
+
184
+ def _scan_files(self) -> list[FileEntry]:
185
+ entries: list[FileEntry] = []
186
+ for item in self._walk(self._cwd):
187
+ rel = item.relative_to(self._cwd).as_posix()
188
+ entries.append(
189
+ FileEntry(
190
+ path=rel,
191
+ size=item.stat().st_size,
192
+ language=_detect_language(item),
193
+ )
194
+ )
195
+ entries.sort(key=lambda e: e.path)
196
+ return entries
197
+
198
+ def _walk(self, root: Path):
199
+ """Yield all files under *root*, skipping ignored directories."""
200
+ for child in sorted(root.iterdir()):
201
+ if child.is_dir():
202
+ if not _should_skip(child.name):
203
+ yield from self._walk(child)
204
+ elif child.is_file():
205
+ yield child
206
+
207
+ def _extract_symbols(self, file: FileEntry) -> list[SymbolEntry]:
208
+ patterns = _SYMBOL_PATTERNS.get(file.language)
209
+ if not patterns:
210
+ return []
211
+ abs_path = self._cwd / file.path
212
+ try:
213
+ text = abs_path.read_text(encoding="utf-8", errors="replace")
214
+ except OSError:
215
+ return []
216
+ results: list[SymbolEntry] = []
217
+ for lineno, line in enumerate(text.splitlines(), start=1):
218
+ for pattern, kind in patterns:
219
+ m = pattern.match(line)
220
+ if m:
221
+ results.append(
222
+ SymbolEntry(
223
+ name=m.group(1),
224
+ kind=kind,
225
+ file=file.path,
226
+ line=lineno,
227
+ )
228
+ )
229
+ break # first matching pattern wins for this line
230
+ return results
@@ -0,0 +1,232 @@
1
+ """Knowledge Compiler — incrementally builds a structured project knowledge base."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ import re
6
+ import subprocess
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from llm_code.api.types import Message, MessageRequest, TextBlock
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ _INDEX_LINE_RE = re.compile(r"^- \[(.+?)\]\((.+?)\)\s*—\s*(.+)$")
16
+
17
+ _COMPILE_SYSTEM_PROMPT = """\
18
+ You are a knowledge compiler for a software project. Given a list of changed files \
19
+ and session facts, produce a concise Markdown knowledge article about the affected \
20
+ module or area.
21
+
22
+ Format:
23
+ # [Module Name]
24
+
25
+ [2-3 sentence description of what this module does]
26
+
27
+ ## Key Types
28
+ - [Important classes, dataclasses, types]
29
+
30
+ ## Patterns
31
+ - [Recurring patterns or conventions in this area]
32
+
33
+ ## Dependencies
34
+ - [Key imports or integrations]
35
+
36
+ Be concise and factual. Focus on architecture, not implementation details.
37
+ """
38
+
39
+
40
+ @dataclass(frozen=True)
41
+ class KnowledgeEntry:
42
+ """A single entry in the knowledge index."""
43
+
44
+ path: str # relative to .llm-code/knowledge/
45
+ title: str
46
+ summary: str # one-line for index
47
+ last_compiled: str # ISO timestamp
48
+ source_files: tuple[str, ...] # which source files this knowledge covers
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class IngestResult:
53
+ """Result of the ingest phase."""
54
+
55
+ changed_files: tuple[str, ...]
56
+ facts: tuple[str, ...]
57
+
58
+
59
+ class KnowledgeCompiler:
60
+ """Incrementally builds and maintains a structured project knowledge base."""
61
+
62
+ def __init__(self, cwd: Path, llm_provider: Any | None, compile_model: str = "") -> None:
63
+ self._cwd = cwd
64
+ self._provider = llm_provider
65
+ self._compile_model = compile_model
66
+ self._knowledge_dir = cwd / ".llm-code" / "knowledge"
67
+ self._knowledge_dir.mkdir(parents=True, exist_ok=True)
68
+ (self._knowledge_dir / "modules").mkdir(exist_ok=True)
69
+
70
+ @property
71
+ def knowledge_dir(self) -> Path:
72
+ return self._knowledge_dir
73
+
74
+ def get_index(self) -> list[KnowledgeEntry]:
75
+ """Parse index.md and return all knowledge entries."""
76
+ index_path = self._knowledge_dir / "index.md"
77
+ if not index_path.exists():
78
+ return []
79
+
80
+ entries: list[KnowledgeEntry] = []
81
+ for line in index_path.read_text(encoding="utf-8").splitlines():
82
+ m = _INDEX_LINE_RE.match(line.strip())
83
+ if m:
84
+ title, path, summary = m.group(1), m.group(2), m.group(3).strip()
85
+ entries.append(
86
+ KnowledgeEntry(
87
+ path=path,
88
+ title=title,
89
+ summary=summary,
90
+ last_compiled="",
91
+ source_files=(),
92
+ )
93
+ )
94
+ return entries
95
+
96
+ def ingest(
97
+ self,
98
+ facts: list[str] | None = None,
99
+ since_commit: str | None = None,
100
+ ) -> IngestResult:
101
+ """Phase 1: Gather changed files and session facts."""
102
+ changed: list[str] = []
103
+ if since_commit:
104
+ try:
105
+ result = subprocess.run(
106
+ ["git", "diff", "--name-only", since_commit, "HEAD"],
107
+ cwd=self._cwd,
108
+ capture_output=True,
109
+ text=True,
110
+ timeout=10,
111
+ )
112
+ if result.returncode == 0:
113
+ changed = [f.strip() for f in result.stdout.strip().splitlines() if f.strip()]
114
+ except Exception:
115
+ pass
116
+ return IngestResult(
117
+ changed_files=tuple(changed),
118
+ facts=tuple(facts or []),
119
+ )
120
+
121
+ async def compile(self, ingest_data: IngestResult) -> None:
122
+ """Phase 2: Use LLM to compile knowledge from ingested data."""
123
+ if self._provider is None:
124
+ return
125
+ if not ingest_data.changed_files and not ingest_data.facts:
126
+ return
127
+
128
+ modules = self._group_by_module(ingest_data.changed_files)
129
+
130
+ for module_name, files in modules.items():
131
+ try:
132
+ article = await self._compile_module(module_name, files, ingest_data.facts)
133
+ if article:
134
+ self._write_module(module_name, article)
135
+ except Exception:
136
+ logger.debug("Knowledge compile failed for module %s", module_name, exc_info=True)
137
+
138
+ self._rebuild_index()
139
+
140
+ def _group_by_module(self, files: tuple[str, ...]) -> dict[str, list[str]]:
141
+ """Group files by their top-level package directory."""
142
+ modules: dict[str, list[str]] = {}
143
+ for f in files:
144
+ parts = Path(f).parts
145
+ if len(parts) >= 2:
146
+ module = parts[1] if parts[0] in ("llm_code", "src", "lib") else parts[0]
147
+ else:
148
+ module = Path(f).stem
149
+ modules.setdefault(module, []).append(f)
150
+ return modules
151
+
152
+ async def _compile_module(
153
+ self, module_name: str, files: list[str], facts: tuple[str, ...]
154
+ ) -> str:
155
+ """Call LLM to generate a knowledge article for a module."""
156
+ existing = ""
157
+ article_path = self._knowledge_dir / "modules" / f"{module_name}.md"
158
+ if article_path.exists():
159
+ existing = article_path.read_text(encoding="utf-8")
160
+
161
+ facts_str = "\n".join(f"- {fact}" for fact in facts) if facts else "None"
162
+ files_str = "\n".join(f"- {f}" for f in files)
163
+
164
+ user_msg = (
165
+ f"Module: {module_name}\n\n"
166
+ f"Changed files:\n{files_str}\n\n"
167
+ f"Session facts:\n{facts_str}\n\n"
168
+ )
169
+ if existing:
170
+ user_msg += f"Existing article (merge new information, don't overwrite):\n\n{existing}\n"
171
+
172
+ request = MessageRequest(
173
+ model=self._compile_model or "",
174
+ messages=(Message(role="user", content=(TextBlock(text=user_msg),)),),
175
+ system=_COMPILE_SYSTEM_PROMPT,
176
+ tools=(),
177
+ max_tokens=1024,
178
+ temperature=0.3,
179
+ )
180
+
181
+ response = await self._provider.send_message(request)
182
+ parts: list[str] = []
183
+ for block in response.content:
184
+ if hasattr(block, "text"):
185
+ parts.append(block.text)
186
+ return "\n".join(parts)
187
+
188
+ def _write_module(self, module_name: str, content: str) -> None:
189
+ """Write a module article to the knowledge directory."""
190
+ path = self._knowledge_dir / "modules" / f"{module_name}.md"
191
+ path.write_text(content, encoding="utf-8")
192
+
193
+ def _rebuild_index(self) -> None:
194
+ """Regenerate index.md from existing module files."""
195
+ modules_dir = self._knowledge_dir / "modules"
196
+ lines = ["# Knowledge Index\n"]
197
+ for md_file in sorted(modules_dir.glob("*.md")):
198
+ title = md_file.stem.replace("_", " ").title()
199
+ summary = ""
200
+ for file_line in md_file.read_text(encoding="utf-8").splitlines():
201
+ stripped = file_line.strip()
202
+ if stripped and not stripped.startswith("#"):
203
+ summary = stripped
204
+ break
205
+ lines.append(f"- [{title}]({md_file.relative_to(self._knowledge_dir)}) — {summary}")
206
+ (self._knowledge_dir / "index.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
207
+
208
+ def query(self, max_tokens: int = 3000) -> str:
209
+ """Phase 3: Return relevant knowledge for system prompt injection."""
210
+ entries = self.get_index()
211
+ if not entries:
212
+ return ""
213
+
214
+ max_chars = max_tokens * 4
215
+ parts: list[str] = ["# Project Knowledge\n"]
216
+ char_count = len(parts[0])
217
+
218
+ for entry in entries:
219
+ article_path = self._knowledge_dir / entry.path
220
+ if not article_path.exists():
221
+ continue
222
+ content = article_path.read_text(encoding="utf-8").strip()
223
+ if char_count + len(content) + 2 > max_chars:
224
+ summary_line = f"- **{entry.title}**: {entry.summary}"
225
+ if char_count + len(summary_line) + 1 <= max_chars:
226
+ parts.append(summary_line)
227
+ char_count += len(summary_line) + 1
228
+ break
229
+ parts.append(content)
230
+ char_count += len(content) + 2
231
+
232
+ return "\n\n".join(parts) if len(parts) > 1 else ""