llmcode-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_code/__init__.py +2 -0
- llm_code/analysis/__init__.py +6 -0
- llm_code/analysis/cache.py +33 -0
- llm_code/analysis/engine.py +256 -0
- llm_code/analysis/go_rules.py +114 -0
- llm_code/analysis/js_rules.py +84 -0
- llm_code/analysis/python_rules.py +311 -0
- llm_code/analysis/rules.py +140 -0
- llm_code/analysis/rust_rules.py +108 -0
- llm_code/analysis/universal_rules.py +111 -0
- llm_code/api/__init__.py +0 -0
- llm_code/api/client.py +90 -0
- llm_code/api/errors.py +73 -0
- llm_code/api/openai_compat.py +390 -0
- llm_code/api/provider.py +35 -0
- llm_code/api/sse.py +52 -0
- llm_code/api/types.py +140 -0
- llm_code/cli/__init__.py +0 -0
- llm_code/cli/commands.py +70 -0
- llm_code/cli/image.py +122 -0
- llm_code/cli/render.py +214 -0
- llm_code/cli/status_line.py +79 -0
- llm_code/cli/streaming.py +92 -0
- llm_code/cli/tui_main.py +220 -0
- llm_code/computer_use/__init__.py +11 -0
- llm_code/computer_use/app_detect.py +49 -0
- llm_code/computer_use/app_tier.py +57 -0
- llm_code/computer_use/coordinator.py +99 -0
- llm_code/computer_use/input_control.py +71 -0
- llm_code/computer_use/screenshot.py +93 -0
- llm_code/cron/__init__.py +13 -0
- llm_code/cron/parser.py +145 -0
- llm_code/cron/scheduler.py +135 -0
- llm_code/cron/storage.py +126 -0
- llm_code/enterprise/__init__.py +1 -0
- llm_code/enterprise/audit.py +59 -0
- llm_code/enterprise/auth.py +26 -0
- llm_code/enterprise/oidc.py +95 -0
- llm_code/enterprise/rbac.py +65 -0
- llm_code/harness/__init__.py +5 -0
- llm_code/harness/config.py +33 -0
- llm_code/harness/engine.py +129 -0
- llm_code/harness/guides.py +41 -0
- llm_code/harness/sensors.py +68 -0
- llm_code/harness/templates.py +84 -0
- llm_code/hida/__init__.py +1 -0
- llm_code/hida/classifier.py +187 -0
- llm_code/hida/engine.py +49 -0
- llm_code/hida/profiles.py +95 -0
- llm_code/hida/types.py +28 -0
- llm_code/ide/__init__.py +1 -0
- llm_code/ide/bridge.py +80 -0
- llm_code/ide/detector.py +76 -0
- llm_code/ide/server.py +169 -0
- llm_code/logging.py +29 -0
- llm_code/lsp/__init__.py +0 -0
- llm_code/lsp/client.py +298 -0
- llm_code/lsp/detector.py +42 -0
- llm_code/lsp/manager.py +56 -0
- llm_code/lsp/tools.py +288 -0
- llm_code/marketplace/__init__.py +0 -0
- llm_code/marketplace/builtin_registry.py +102 -0
- llm_code/marketplace/installer.py +162 -0
- llm_code/marketplace/plugin.py +78 -0
- llm_code/marketplace/registry.py +360 -0
- llm_code/mcp/__init__.py +0 -0
- llm_code/mcp/bridge.py +87 -0
- llm_code/mcp/client.py +117 -0
- llm_code/mcp/health.py +120 -0
- llm_code/mcp/manager.py +214 -0
- llm_code/mcp/oauth.py +219 -0
- llm_code/mcp/transport.py +254 -0
- llm_code/mcp/types.py +53 -0
- llm_code/remote/__init__.py +0 -0
- llm_code/remote/client.py +136 -0
- llm_code/remote/protocol.py +22 -0
- llm_code/remote/server.py +275 -0
- llm_code/remote/ssh_proxy.py +56 -0
- llm_code/runtime/__init__.py +0 -0
- llm_code/runtime/auto_commit.py +56 -0
- llm_code/runtime/auto_diagnose.py +62 -0
- llm_code/runtime/checkpoint.py +70 -0
- llm_code/runtime/checkpoint_recovery.py +142 -0
- llm_code/runtime/compaction.py +35 -0
- llm_code/runtime/compressor.py +415 -0
- llm_code/runtime/config.py +533 -0
- llm_code/runtime/context.py +49 -0
- llm_code/runtime/conversation.py +921 -0
- llm_code/runtime/cost_tracker.py +126 -0
- llm_code/runtime/dream.py +127 -0
- llm_code/runtime/file_protection.py +150 -0
- llm_code/runtime/hardware.py +85 -0
- llm_code/runtime/hooks.py +223 -0
- llm_code/runtime/indexer.py +230 -0
- llm_code/runtime/knowledge_compiler.py +232 -0
- llm_code/runtime/memory.py +132 -0
- llm_code/runtime/memory_layers.py +467 -0
- llm_code/runtime/memory_lint.py +252 -0
- llm_code/runtime/model_aliases.py +37 -0
- llm_code/runtime/ollama.py +93 -0
- llm_code/runtime/overlay.py +124 -0
- llm_code/runtime/permissions.py +200 -0
- llm_code/runtime/plan.py +45 -0
- llm_code/runtime/prompt.py +238 -0
- llm_code/runtime/repo_map.py +174 -0
- llm_code/runtime/sandbox.py +116 -0
- llm_code/runtime/session.py +268 -0
- llm_code/runtime/skill_resolver.py +61 -0
- llm_code/runtime/skills.py +133 -0
- llm_code/runtime/speculative.py +75 -0
- llm_code/runtime/streaming_executor.py +216 -0
- llm_code/runtime/telemetry.py +196 -0
- llm_code/runtime/token_budget.py +26 -0
- llm_code/runtime/vcr.py +142 -0
- llm_code/runtime/vision.py +102 -0
- llm_code/swarm/__init__.py +1 -0
- llm_code/swarm/backend_subprocess.py +108 -0
- llm_code/swarm/backend_tmux.py +103 -0
- llm_code/swarm/backend_worktree.py +306 -0
- llm_code/swarm/checkpoint.py +74 -0
- llm_code/swarm/coordinator.py +236 -0
- llm_code/swarm/mailbox.py +88 -0
- llm_code/swarm/manager.py +202 -0
- llm_code/swarm/memory_sync.py +80 -0
- llm_code/swarm/recovery.py +21 -0
- llm_code/swarm/team.py +67 -0
- llm_code/swarm/types.py +31 -0
- llm_code/task/__init__.py +16 -0
- llm_code/task/diagnostics.py +93 -0
- llm_code/task/manager.py +162 -0
- llm_code/task/types.py +112 -0
- llm_code/task/verifier.py +104 -0
- llm_code/tools/__init__.py +0 -0
- llm_code/tools/agent.py +145 -0
- llm_code/tools/agent_roles.py +82 -0
- llm_code/tools/base.py +94 -0
- llm_code/tools/bash.py +565 -0
- llm_code/tools/computer_use_tools.py +278 -0
- llm_code/tools/coordinator_tool.py +75 -0
- llm_code/tools/cron_create.py +90 -0
- llm_code/tools/cron_delete.py +49 -0
- llm_code/tools/cron_list.py +51 -0
- llm_code/tools/deferred.py +92 -0
- llm_code/tools/dump.py +116 -0
- llm_code/tools/edit_file.py +282 -0
- llm_code/tools/git_tools.py +531 -0
- llm_code/tools/glob_search.py +112 -0
- llm_code/tools/grep_search.py +144 -0
- llm_code/tools/ide_diagnostics.py +59 -0
- llm_code/tools/ide_open.py +58 -0
- llm_code/tools/ide_selection.py +52 -0
- llm_code/tools/memory_tools.py +138 -0
- llm_code/tools/multi_edit.py +143 -0
- llm_code/tools/notebook_edit.py +107 -0
- llm_code/tools/notebook_read.py +81 -0
- llm_code/tools/parsing.py +63 -0
- llm_code/tools/read_file.py +154 -0
- llm_code/tools/registry.py +58 -0
- llm_code/tools/search_backends/__init__.py +56 -0
- llm_code/tools/search_backends/brave.py +56 -0
- llm_code/tools/search_backends/duckduckgo.py +129 -0
- llm_code/tools/search_backends/searxng.py +71 -0
- llm_code/tools/search_backends/tavily.py +73 -0
- llm_code/tools/swarm_create.py +109 -0
- llm_code/tools/swarm_delete.py +95 -0
- llm_code/tools/swarm_list.py +44 -0
- llm_code/tools/swarm_message.py +109 -0
- llm_code/tools/task_close.py +79 -0
- llm_code/tools/task_plan.py +79 -0
- llm_code/tools/task_verify.py +90 -0
- llm_code/tools/tool_search.py +65 -0
- llm_code/tools/web_common.py +258 -0
- llm_code/tools/web_fetch.py +223 -0
- llm_code/tools/web_search.py +280 -0
- llm_code/tools/write_file.py +118 -0
- llm_code/tui/__init__.py +1 -0
- llm_code/tui/app.py +2432 -0
- llm_code/tui/chat_view.py +82 -0
- llm_code/tui/chat_widgets.py +309 -0
- llm_code/tui/header_bar.py +46 -0
- llm_code/tui/input_bar.py +349 -0
- llm_code/tui/keybindings.py +142 -0
- llm_code/tui/marketplace.py +210 -0
- llm_code/tui/status_bar.py +72 -0
- llm_code/tui/theme.py +96 -0
- llm_code/utils/__init__.py +0 -0
- llm_code/utils/diff.py +111 -0
- llm_code/utils/errors.py +70 -0
- llm_code/utils/hyperlink.py +73 -0
- llm_code/utils/notebook.py +179 -0
- llm_code/utils/search.py +69 -0
- llm_code/utils/text_normalize.py +28 -0
- llm_code/utils/version_check.py +62 -0
- llm_code/vim/__init__.py +4 -0
- llm_code/vim/engine.py +51 -0
- llm_code/vim/motions.py +172 -0
- llm_code/vim/operators.py +183 -0
- llm_code/vim/text_objects.py +139 -0
- llm_code/vim/transitions.py +279 -0
- llm_code/vim/types.py +68 -0
- llm_code/voice/__init__.py +1 -0
- llm_code/voice/languages.py +43 -0
- llm_code/voice/recorder.py +136 -0
- llm_code/voice/stt.py +36 -0
- llm_code/voice/stt_anthropic.py +66 -0
- llm_code/voice/stt_google.py +32 -0
- llm_code/voice/stt_whisper.py +52 -0
- llmcode_cli-1.0.0.dist-info/METADATA +524 -0
- llmcode_cli-1.0.0.dist-info/RECORD +212 -0
- llmcode_cli-1.0.0.dist-info/WHEEL +4 -0
- llmcode_cli-1.0.0.dist-info/entry_points.txt +2 -0
- llmcode_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""Hook runner supporting 6 types, 24 events, glob event matching, per-hook timeout/on_error."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import fnmatch
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import subprocess
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
|
|
10
|
+
from llm_code.runtime.config import HookConfig
|
|
11
|
+
from llm_code.tools.base import ToolResult
|
|
12
|
+
|
|
13
|
+
# Legacy global fallback timeout (used only when hook.timeout is not set, kept for compat)
|
|
14
|
+
HOOK_TIMEOUT = 10.0
|
|
15
|
+
|
|
16
|
+
# All 24 supported event names across 6 types.
|
|
17
|
+
# Canonical names used for glob matching with event patterns like "tool.*", "session.*", "*".
|
|
18
|
+
#
|
|
19
|
+
# Group prefixes are derived from the event name by taking the segment before "_" or the
|
|
20
|
+
# shorthand group word that appears in the category comment below.
|
|
21
|
+
#
|
|
22
|
+
# tool -> pre_tool_use, post_tool_use, tool_error, tool_denied
|
|
23
|
+
# command -> pre_command, post_command, command_error
|
|
24
|
+
# prompt -> prompt_submit, prompt_compile, prompt_cache_hit, prompt_cache_miss
|
|
25
|
+
# agent -> agent_spawn, agent_complete, agent_error, agent_message
|
|
26
|
+
# session -> session_start, session_end, session_save, session_compact, session_dream
|
|
27
|
+
# http -> http_request, http_response, http_error, http_retry, http_fallback
|
|
28
|
+
|
|
29
|
+
# Mapping from event name to its dot-prefixed canonical group name used for glob matching.
|
|
30
|
+
# E.g. "pre_tool_use" -> "tool.pre_tool_use", so pattern "tool.*" matches.
|
|
31
|
+
_EVENT_GROUP: dict[str, str] = {
|
|
32
|
+
# tool
|
|
33
|
+
"pre_tool_use": "tool.pre_tool_use",
|
|
34
|
+
"post_tool_use": "tool.post_tool_use",
|
|
35
|
+
"tool_error": "tool.tool_error",
|
|
36
|
+
"tool_denied": "tool.tool_denied",
|
|
37
|
+
# command
|
|
38
|
+
"pre_command": "command.pre_command",
|
|
39
|
+
"post_command": "command.post_command",
|
|
40
|
+
"command_error": "command.command_error",
|
|
41
|
+
# prompt
|
|
42
|
+
"prompt_submit": "prompt.prompt_submit",
|
|
43
|
+
"prompt_compile": "prompt.prompt_compile",
|
|
44
|
+
"prompt_cache_hit": "prompt.prompt_cache_hit",
|
|
45
|
+
"prompt_cache_miss": "prompt.prompt_cache_miss",
|
|
46
|
+
# agent
|
|
47
|
+
"agent_spawn": "agent.agent_spawn",
|
|
48
|
+
"agent_complete": "agent.agent_complete",
|
|
49
|
+
"agent_error": "agent.agent_error",
|
|
50
|
+
"agent_message": "agent.agent_message",
|
|
51
|
+
# session
|
|
52
|
+
"session_start": "session.session_start",
|
|
53
|
+
"session_end": "session.session_end",
|
|
54
|
+
"session_save": "session.session_save",
|
|
55
|
+
"session_compact": "session.session_compact",
|
|
56
|
+
"session_dream": "session.session_dream",
|
|
57
|
+
# http
|
|
58
|
+
"http_request": "http.http_request",
|
|
59
|
+
"http_response": "http.http_response",
|
|
60
|
+
"http_error": "http.http_error",
|
|
61
|
+
"http_retry": "http.http_retry",
|
|
62
|
+
"http_fallback": "http.http_fallback",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _event_matches(pattern: str, event: str) -> bool:
|
|
67
|
+
"""Return True if *pattern* matches *event*.
|
|
68
|
+
|
|
69
|
+
Matching rules (in order):
|
|
70
|
+
1. "*" matches any event.
|
|
71
|
+
2. Pattern containing "." is matched against the dotted form "group.event"
|
|
72
|
+
(e.g. "tool.*" matches "tool.pre_tool_use").
|
|
73
|
+
3. Exact match (original event name, no dots).
|
|
74
|
+
"""
|
|
75
|
+
if pattern == "*":
|
|
76
|
+
return True
|
|
77
|
+
dotted = _EVENT_GROUP.get(event, event)
|
|
78
|
+
if "." in pattern:
|
|
79
|
+
return fnmatch.fnmatch(dotted, pattern)
|
|
80
|
+
return pattern == event
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _build_env(event: str, context: dict) -> dict[str, str]:
|
|
84
|
+
"""Build the environment mapping to pass to a hook process."""
|
|
85
|
+
env = {**os.environ}
|
|
86
|
+
env["HOOK_EVENT"] = event
|
|
87
|
+
env["HOOK_TOOL_NAME"] = context.get("tool_name", "")
|
|
88
|
+
env["HOOK_TOOL_INPUT"] = context.get("tool_input", "")
|
|
89
|
+
env["HOOK_TOOL_OUTPUT"] = context.get("tool_output", "")
|
|
90
|
+
env["HOOK_SESSION_ID"] = context.get("session_id", "")
|
|
91
|
+
env["HOOK_AGENT_ID"] = context.get("agent_id", "")
|
|
92
|
+
env["HOOK_HTTP_URL"] = context.get("url", "")
|
|
93
|
+
env["HOOK_HTTP_STATUS"] = context.get("status", "")
|
|
94
|
+
env["HOOK_COMMAND"] = context.get("command", "")
|
|
95
|
+
return env
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclass
|
|
99
|
+
class HookOutcome:
|
|
100
|
+
denied: bool = False
|
|
101
|
+
messages: list[str] = field(default_factory=list)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class HookRunner:
|
|
105
|
+
def __init__(self, hooks: tuple[HookConfig, ...] = ()) -> None:
|
|
106
|
+
self._hooks = hooks
|
|
107
|
+
|
|
108
|
+
# ------------------------------------------------------------------
|
|
109
|
+
# Public generic entry point
|
|
110
|
+
# ------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
def fire(self, event: str, context: dict) -> HookOutcome:
|
|
113
|
+
"""Fire all hooks whose event pattern matches *event*.
|
|
114
|
+
|
|
115
|
+
*context* is a plain dict carrying optional keys:
|
|
116
|
+
tool_name, tool_input, tool_output,
|
|
117
|
+
session_id, agent_id, url, status, command
|
|
118
|
+
"""
|
|
119
|
+
env = _build_env(event, context)
|
|
120
|
+
outcome = HookOutcome()
|
|
121
|
+
|
|
122
|
+
for hook in self._hooks:
|
|
123
|
+
if not _event_matches(hook.event, event):
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
hook_outcome = self._run_single_hook(hook, env)
|
|
127
|
+
if hook_outcome.denied:
|
|
128
|
+
return hook_outcome # stop on first deny
|
|
129
|
+
outcome.messages.extend(hook_outcome.messages)
|
|
130
|
+
|
|
131
|
+
return outcome
|
|
132
|
+
|
|
133
|
+
# ------------------------------------------------------------------
|
|
134
|
+
# Legacy helpers (backwards compat)
|
|
135
|
+
# ------------------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
def pre_tool_use(self, tool_name: str, args: dict) -> HookOutcome:
|
|
138
|
+
"""Run all pre_tool_use hooks that match tool_name."""
|
|
139
|
+
context = {
|
|
140
|
+
"tool_name": tool_name,
|
|
141
|
+
"tool_input": json.dumps(args),
|
|
142
|
+
"tool_output": "",
|
|
143
|
+
}
|
|
144
|
+
# Build env with legacy keys too
|
|
145
|
+
env = _build_env("pre_tool_use", context)
|
|
146
|
+
env["HOOK_TOOL_IS_ERROR"] = "false"
|
|
147
|
+
return self._run_hooks_with_env("pre_tool_use", tool_name, env)
|
|
148
|
+
|
|
149
|
+
def post_tool_use(self, tool_name: str, args: dict, result: ToolResult) -> HookOutcome:
|
|
150
|
+
"""Run all post_tool_use hooks that match tool_name."""
|
|
151
|
+
context = {
|
|
152
|
+
"tool_name": tool_name,
|
|
153
|
+
"tool_input": json.dumps(args),
|
|
154
|
+
"tool_output": result.output,
|
|
155
|
+
}
|
|
156
|
+
env = _build_env("post_tool_use", context)
|
|
157
|
+
env["HOOK_TOOL_IS_ERROR"] = "true" if result.is_error else "false"
|
|
158
|
+
return self._run_hooks_with_env("post_tool_use", tool_name, env)
|
|
159
|
+
|
|
160
|
+
# ------------------------------------------------------------------
|
|
161
|
+
# Internal helpers
|
|
162
|
+
# ------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
def _run_hooks_with_env(
|
|
165
|
+
self, event: str, tool_name: str, env: dict[str, str]
|
|
166
|
+
) -> HookOutcome:
|
|
167
|
+
"""Execute matching hooks using a pre-built env (legacy path)."""
|
|
168
|
+
outcome = HookOutcome()
|
|
169
|
+
|
|
170
|
+
for hook in self._hooks:
|
|
171
|
+
if not _event_matches(hook.event, event):
|
|
172
|
+
continue
|
|
173
|
+
if not fnmatch.fnmatch(tool_name, hook.tool_pattern):
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
hook_outcome = self._run_single_hook(hook, env)
|
|
177
|
+
if hook_outcome.denied:
|
|
178
|
+
return hook_outcome
|
|
179
|
+
outcome.messages.extend(hook_outcome.messages)
|
|
180
|
+
|
|
181
|
+
return outcome
|
|
182
|
+
|
|
183
|
+
def _run_single_hook(self, hook: HookConfig, env: dict[str, str]) -> HookOutcome:
|
|
184
|
+
"""Run one shell command and interpret its exit code respecting hook.on_error."""
|
|
185
|
+
timeout = getattr(hook, "timeout", HOOK_TIMEOUT)
|
|
186
|
+
on_error = getattr(hook, "on_error", "warn")
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
proc = subprocess.run(
|
|
190
|
+
hook.command,
|
|
191
|
+
shell=True,
|
|
192
|
+
env=env,
|
|
193
|
+
timeout=timeout,
|
|
194
|
+
capture_output=True,
|
|
195
|
+
text=True,
|
|
196
|
+
)
|
|
197
|
+
except subprocess.TimeoutExpired:
|
|
198
|
+
msg = f"Hook timed out after {timeout}s: {hook.command}"
|
|
199
|
+
denied = hook.on_error == "deny"
|
|
200
|
+
return HookOutcome(denied=denied, messages=[msg])
|
|
201
|
+
except Exception as exc:
|
|
202
|
+
denied = hook.on_error == "deny"
|
|
203
|
+
return HookOutcome(denied=denied, messages=[f"Hook error: {exc}"])
|
|
204
|
+
|
|
205
|
+
if proc.returncode == 0:
|
|
206
|
+
return HookOutcome(denied=False)
|
|
207
|
+
|
|
208
|
+
if proc.returncode == 2:
|
|
209
|
+
msg = proc.stdout.strip() or proc.stderr.strip() or "Hook denied tool use"
|
|
210
|
+
return HookOutcome(denied=True, messages=[msg])
|
|
211
|
+
|
|
212
|
+
# Non-zero, non-2: apply on_error policy
|
|
213
|
+
msg = (
|
|
214
|
+
proc.stdout.strip()
|
|
215
|
+
or proc.stderr.strip()
|
|
216
|
+
or f"Hook exited with code {proc.returncode}: {hook.command}"
|
|
217
|
+
)
|
|
218
|
+
if on_error == "deny":
|
|
219
|
+
return HookOutcome(denied=True, messages=[msg])
|
|
220
|
+
elif on_error == "ignore":
|
|
221
|
+
return HookOutcome(denied=False, messages=[])
|
|
222
|
+
else: # "warn" (default)
|
|
223
|
+
return HookOutcome(denied=False, messages=[msg])
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""Project file and symbol indexer."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import asdict, dataclass
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
# ---------------------------------------------------------------------------
|
|
11
|
+
# Types
|
|
12
|
+
# ---------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class FileEntry:
|
|
17
|
+
path: str # relative path from project root
|
|
18
|
+
size: int
|
|
19
|
+
language: str # "python", "typescript", "go", "rust", "javascript", "unknown"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class SymbolEntry:
|
|
24
|
+
name: str
|
|
25
|
+
kind: str # "class" | "function" | "method" | "variable" | "export"
|
|
26
|
+
file: str # relative path
|
|
27
|
+
line: int
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class ProjectIndex:
|
|
32
|
+
files: tuple[FileEntry, ...]
|
|
33
|
+
symbols: tuple[SymbolEntry, ...]
|
|
34
|
+
generated_at: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Language detection
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
_EXT_TO_LANG: dict[str, str] = {
|
|
42
|
+
".py": "python",
|
|
43
|
+
".pyi": "python",
|
|
44
|
+
".ts": "typescript",
|
|
45
|
+
".tsx": "typescript",
|
|
46
|
+
".js": "javascript",
|
|
47
|
+
".jsx": "javascript",
|
|
48
|
+
".go": "go",
|
|
49
|
+
".rs": "rust",
|
|
50
|
+
".java": "java",
|
|
51
|
+
".rb": "ruby",
|
|
52
|
+
".php": "php",
|
|
53
|
+
".swift": "swift",
|
|
54
|
+
".kt": "kotlin",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# Directories to skip
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
_SKIP_DIRS: frozenset[str] = frozenset(
|
|
62
|
+
{
|
|
63
|
+
".git",
|
|
64
|
+
"node_modules",
|
|
65
|
+
".venv",
|
|
66
|
+
"venv",
|
|
67
|
+
"__pycache__",
|
|
68
|
+
"dist",
|
|
69
|
+
"build",
|
|
70
|
+
".next",
|
|
71
|
+
".nuxt",
|
|
72
|
+
"target",
|
|
73
|
+
".tox",
|
|
74
|
+
".mypy_cache",
|
|
75
|
+
".pytest_cache",
|
|
76
|
+
}
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
# Symbol regex patterns
|
|
81
|
+
# ---------------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
_SYMBOL_PATTERNS: dict[str, list[tuple[re.Pattern[str], str]]] = {
|
|
84
|
+
"python": [
|
|
85
|
+
(re.compile(r"^class\s+(\w+)"), "class"),
|
|
86
|
+
(re.compile(r"^def\s+(\w+)"), "function"),
|
|
87
|
+
(re.compile(r"^(\w+)\s*(?::\s*\w+)?\s*="), "variable"),
|
|
88
|
+
],
|
|
89
|
+
"typescript": [
|
|
90
|
+
(re.compile(r"^export\s+(?:class|interface)\s+(\w+)"), "class"),
|
|
91
|
+
(re.compile(r"^export\s+(?:function|const|let|var)\s+(\w+)"), "export"),
|
|
92
|
+
(re.compile(r"^class\s+(\w+)"), "class"),
|
|
93
|
+
(re.compile(r"^function\s+(\w+)"), "function"),
|
|
94
|
+
],
|
|
95
|
+
"javascript": [
|
|
96
|
+
(re.compile(r"^export\s+(?:class|function|const|let|var)\s+(\w+)"), "export"),
|
|
97
|
+
(re.compile(r"^class\s+(\w+)"), "class"),
|
|
98
|
+
(re.compile(r"^function\s+(\w+)"), "function"),
|
|
99
|
+
],
|
|
100
|
+
"go": [
|
|
101
|
+
(re.compile(r"^func\s+(\w+)"), "function"),
|
|
102
|
+
(re.compile(r"^type\s+(\w+)\s+struct"), "class"),
|
|
103
|
+
(re.compile(r"^type\s+(\w+)\s+interface"), "class"),
|
|
104
|
+
],
|
|
105
|
+
"rust": [
|
|
106
|
+
(re.compile(r"^(?:pub\s+)?fn\s+(\w+)"), "function"),
|
|
107
|
+
(re.compile(r"^(?:pub\s+)?struct\s+(\w+)"), "class"),
|
|
108
|
+
(re.compile(r"^(?:pub\s+)?enum\s+(\w+)"), "class"),
|
|
109
|
+
(re.compile(r"^(?:pub\s+)?trait\s+(\w+)"), "class"),
|
|
110
|
+
],
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
# Helpers
|
|
116
|
+
# ---------------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _should_skip(name: str) -> bool:
|
|
120
|
+
"""Return True if the directory name matches a skip pattern."""
|
|
121
|
+
if name in _SKIP_DIRS:
|
|
122
|
+
return True
|
|
123
|
+
# Handle glob-style patterns like *.egg-info
|
|
124
|
+
if name.endswith(".egg-info"):
|
|
125
|
+
return True
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _detect_language(path: Path) -> str:
|
|
130
|
+
return _EXT_TO_LANG.get(path.suffix.lower(), "unknown")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# ---------------------------------------------------------------------------
|
|
134
|
+
# ProjectIndexer
|
|
135
|
+
# ---------------------------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class ProjectIndexer:
|
|
139
|
+
def __init__(self, cwd: Path) -> None:
|
|
140
|
+
self._cwd = cwd
|
|
141
|
+
|
|
142
|
+
# ------------------------------------------------------------------
|
|
143
|
+
# Public API
|
|
144
|
+
# ------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
def build_index(self) -> ProjectIndex:
|
|
147
|
+
files = self._scan_files()
|
|
148
|
+
symbols: list[SymbolEntry] = []
|
|
149
|
+
for f in files:
|
|
150
|
+
symbols.extend(self._extract_symbols(f))
|
|
151
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
152
|
+
return ProjectIndex(
|
|
153
|
+
files=tuple(files),
|
|
154
|
+
symbols=tuple(symbols),
|
|
155
|
+
generated_at=now,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def save(self, index: ProjectIndex, path: Path) -> None:
|
|
159
|
+
data = {
|
|
160
|
+
"files": [asdict(f) for f in index.files],
|
|
161
|
+
"symbols": [asdict(s) for s in index.symbols],
|
|
162
|
+
"generated_at": index.generated_at,
|
|
163
|
+
}
|
|
164
|
+
path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
165
|
+
|
|
166
|
+
@staticmethod
|
|
167
|
+
def load(path: Path) -> ProjectIndex | None:
|
|
168
|
+
try:
|
|
169
|
+
raw = json.loads(path.read_text(encoding="utf-8"))
|
|
170
|
+
files = tuple(FileEntry(**f) for f in raw["files"])
|
|
171
|
+
symbols = tuple(SymbolEntry(**s) for s in raw["symbols"])
|
|
172
|
+
return ProjectIndex(
|
|
173
|
+
files=files,
|
|
174
|
+
symbols=symbols,
|
|
175
|
+
generated_at=raw["generated_at"],
|
|
176
|
+
)
|
|
177
|
+
except (FileNotFoundError, KeyError, TypeError, json.JSONDecodeError):
|
|
178
|
+
return None
|
|
179
|
+
|
|
180
|
+
# ------------------------------------------------------------------
|
|
181
|
+
# Private helpers
|
|
182
|
+
# ------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
def _scan_files(self) -> list[FileEntry]:
|
|
185
|
+
entries: list[FileEntry] = []
|
|
186
|
+
for item in self._walk(self._cwd):
|
|
187
|
+
rel = item.relative_to(self._cwd).as_posix()
|
|
188
|
+
entries.append(
|
|
189
|
+
FileEntry(
|
|
190
|
+
path=rel,
|
|
191
|
+
size=item.stat().st_size,
|
|
192
|
+
language=_detect_language(item),
|
|
193
|
+
)
|
|
194
|
+
)
|
|
195
|
+
entries.sort(key=lambda e: e.path)
|
|
196
|
+
return entries
|
|
197
|
+
|
|
198
|
+
def _walk(self, root: Path):
|
|
199
|
+
"""Yield all files under *root*, skipping ignored directories."""
|
|
200
|
+
for child in sorted(root.iterdir()):
|
|
201
|
+
if child.is_dir():
|
|
202
|
+
if not _should_skip(child.name):
|
|
203
|
+
yield from self._walk(child)
|
|
204
|
+
elif child.is_file():
|
|
205
|
+
yield child
|
|
206
|
+
|
|
207
|
+
def _extract_symbols(self, file: FileEntry) -> list[SymbolEntry]:
|
|
208
|
+
patterns = _SYMBOL_PATTERNS.get(file.language)
|
|
209
|
+
if not patterns:
|
|
210
|
+
return []
|
|
211
|
+
abs_path = self._cwd / file.path
|
|
212
|
+
try:
|
|
213
|
+
text = abs_path.read_text(encoding="utf-8", errors="replace")
|
|
214
|
+
except OSError:
|
|
215
|
+
return []
|
|
216
|
+
results: list[SymbolEntry] = []
|
|
217
|
+
for lineno, line in enumerate(text.splitlines(), start=1):
|
|
218
|
+
for pattern, kind in patterns:
|
|
219
|
+
m = pattern.match(line)
|
|
220
|
+
if m:
|
|
221
|
+
results.append(
|
|
222
|
+
SymbolEntry(
|
|
223
|
+
name=m.group(1),
|
|
224
|
+
kind=kind,
|
|
225
|
+
file=file.path,
|
|
226
|
+
line=lineno,
|
|
227
|
+
)
|
|
228
|
+
)
|
|
229
|
+
break # first matching pattern wins for this line
|
|
230
|
+
return results
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""Knowledge Compiler — incrementally builds a structured project knowledge base."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
import subprocess
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from llm_code.api.types import Message, MessageRequest, TextBlock
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
_INDEX_LINE_RE = re.compile(r"^- \[(.+?)\]\((.+?)\)\s*—\s*(.+)$")
|
|
16
|
+
|
|
17
|
+
_COMPILE_SYSTEM_PROMPT = """\
|
|
18
|
+
You are a knowledge compiler for a software project. Given a list of changed files \
|
|
19
|
+
and session facts, produce a concise Markdown knowledge article about the affected \
|
|
20
|
+
module or area.
|
|
21
|
+
|
|
22
|
+
Format:
|
|
23
|
+
# [Module Name]
|
|
24
|
+
|
|
25
|
+
[2-3 sentence description of what this module does]
|
|
26
|
+
|
|
27
|
+
## Key Types
|
|
28
|
+
- [Important classes, dataclasses, types]
|
|
29
|
+
|
|
30
|
+
## Patterns
|
|
31
|
+
- [Recurring patterns or conventions in this area]
|
|
32
|
+
|
|
33
|
+
## Dependencies
|
|
34
|
+
- [Key imports or integrations]
|
|
35
|
+
|
|
36
|
+
Be concise and factual. Focus on architecture, not implementation details.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class KnowledgeEntry:
|
|
42
|
+
"""A single entry in the knowledge index."""
|
|
43
|
+
|
|
44
|
+
path: str # relative to .llm-code/knowledge/
|
|
45
|
+
title: str
|
|
46
|
+
summary: str # one-line for index
|
|
47
|
+
last_compiled: str # ISO timestamp
|
|
48
|
+
source_files: tuple[str, ...] # which source files this knowledge covers
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class IngestResult:
|
|
53
|
+
"""Result of the ingest phase."""
|
|
54
|
+
|
|
55
|
+
changed_files: tuple[str, ...]
|
|
56
|
+
facts: tuple[str, ...]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class KnowledgeCompiler:
|
|
60
|
+
"""Incrementally builds and maintains a structured project knowledge base."""
|
|
61
|
+
|
|
62
|
+
def __init__(self, cwd: Path, llm_provider: Any | None, compile_model: str = "") -> None:
|
|
63
|
+
self._cwd = cwd
|
|
64
|
+
self._provider = llm_provider
|
|
65
|
+
self._compile_model = compile_model
|
|
66
|
+
self._knowledge_dir = cwd / ".llm-code" / "knowledge"
|
|
67
|
+
self._knowledge_dir.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
(self._knowledge_dir / "modules").mkdir(exist_ok=True)
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def knowledge_dir(self) -> Path:
|
|
72
|
+
return self._knowledge_dir
|
|
73
|
+
|
|
74
|
+
def get_index(self) -> list[KnowledgeEntry]:
|
|
75
|
+
"""Parse index.md and return all knowledge entries."""
|
|
76
|
+
index_path = self._knowledge_dir / "index.md"
|
|
77
|
+
if not index_path.exists():
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
entries: list[KnowledgeEntry] = []
|
|
81
|
+
for line in index_path.read_text(encoding="utf-8").splitlines():
|
|
82
|
+
m = _INDEX_LINE_RE.match(line.strip())
|
|
83
|
+
if m:
|
|
84
|
+
title, path, summary = m.group(1), m.group(2), m.group(3).strip()
|
|
85
|
+
entries.append(
|
|
86
|
+
KnowledgeEntry(
|
|
87
|
+
path=path,
|
|
88
|
+
title=title,
|
|
89
|
+
summary=summary,
|
|
90
|
+
last_compiled="",
|
|
91
|
+
source_files=(),
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
return entries
|
|
95
|
+
|
|
96
|
+
def ingest(
|
|
97
|
+
self,
|
|
98
|
+
facts: list[str] | None = None,
|
|
99
|
+
since_commit: str | None = None,
|
|
100
|
+
) -> IngestResult:
|
|
101
|
+
"""Phase 1: Gather changed files and session facts."""
|
|
102
|
+
changed: list[str] = []
|
|
103
|
+
if since_commit:
|
|
104
|
+
try:
|
|
105
|
+
result = subprocess.run(
|
|
106
|
+
["git", "diff", "--name-only", since_commit, "HEAD"],
|
|
107
|
+
cwd=self._cwd,
|
|
108
|
+
capture_output=True,
|
|
109
|
+
text=True,
|
|
110
|
+
timeout=10,
|
|
111
|
+
)
|
|
112
|
+
if result.returncode == 0:
|
|
113
|
+
changed = [f.strip() for f in result.stdout.strip().splitlines() if f.strip()]
|
|
114
|
+
except Exception:
|
|
115
|
+
pass
|
|
116
|
+
return IngestResult(
|
|
117
|
+
changed_files=tuple(changed),
|
|
118
|
+
facts=tuple(facts or []),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
async def compile(self, ingest_data: IngestResult) -> None:
|
|
122
|
+
"""Phase 2: Use LLM to compile knowledge from ingested data."""
|
|
123
|
+
if self._provider is None:
|
|
124
|
+
return
|
|
125
|
+
if not ingest_data.changed_files and not ingest_data.facts:
|
|
126
|
+
return
|
|
127
|
+
|
|
128
|
+
modules = self._group_by_module(ingest_data.changed_files)
|
|
129
|
+
|
|
130
|
+
for module_name, files in modules.items():
|
|
131
|
+
try:
|
|
132
|
+
article = await self._compile_module(module_name, files, ingest_data.facts)
|
|
133
|
+
if article:
|
|
134
|
+
self._write_module(module_name, article)
|
|
135
|
+
except Exception:
|
|
136
|
+
logger.debug("Knowledge compile failed for module %s", module_name, exc_info=True)
|
|
137
|
+
|
|
138
|
+
self._rebuild_index()
|
|
139
|
+
|
|
140
|
+
def _group_by_module(self, files: tuple[str, ...]) -> dict[str, list[str]]:
|
|
141
|
+
"""Group files by their top-level package directory."""
|
|
142
|
+
modules: dict[str, list[str]] = {}
|
|
143
|
+
for f in files:
|
|
144
|
+
parts = Path(f).parts
|
|
145
|
+
if len(parts) >= 2:
|
|
146
|
+
module = parts[1] if parts[0] in ("llm_code", "src", "lib") else parts[0]
|
|
147
|
+
else:
|
|
148
|
+
module = Path(f).stem
|
|
149
|
+
modules.setdefault(module, []).append(f)
|
|
150
|
+
return modules
|
|
151
|
+
|
|
152
|
+
async def _compile_module(
|
|
153
|
+
self, module_name: str, files: list[str], facts: tuple[str, ...]
|
|
154
|
+
) -> str:
|
|
155
|
+
"""Call LLM to generate a knowledge article for a module."""
|
|
156
|
+
existing = ""
|
|
157
|
+
article_path = self._knowledge_dir / "modules" / f"{module_name}.md"
|
|
158
|
+
if article_path.exists():
|
|
159
|
+
existing = article_path.read_text(encoding="utf-8")
|
|
160
|
+
|
|
161
|
+
facts_str = "\n".join(f"- {fact}" for fact in facts) if facts else "None"
|
|
162
|
+
files_str = "\n".join(f"- {f}" for f in files)
|
|
163
|
+
|
|
164
|
+
user_msg = (
|
|
165
|
+
f"Module: {module_name}\n\n"
|
|
166
|
+
f"Changed files:\n{files_str}\n\n"
|
|
167
|
+
f"Session facts:\n{facts_str}\n\n"
|
|
168
|
+
)
|
|
169
|
+
if existing:
|
|
170
|
+
user_msg += f"Existing article (merge new information, don't overwrite):\n\n{existing}\n"
|
|
171
|
+
|
|
172
|
+
request = MessageRequest(
|
|
173
|
+
model=self._compile_model or "",
|
|
174
|
+
messages=(Message(role="user", content=(TextBlock(text=user_msg),)),),
|
|
175
|
+
system=_COMPILE_SYSTEM_PROMPT,
|
|
176
|
+
tools=(),
|
|
177
|
+
max_tokens=1024,
|
|
178
|
+
temperature=0.3,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
response = await self._provider.send_message(request)
|
|
182
|
+
parts: list[str] = []
|
|
183
|
+
for block in response.content:
|
|
184
|
+
if hasattr(block, "text"):
|
|
185
|
+
parts.append(block.text)
|
|
186
|
+
return "\n".join(parts)
|
|
187
|
+
|
|
188
|
+
def _write_module(self, module_name: str, content: str) -> None:
|
|
189
|
+
"""Write a module article to the knowledge directory."""
|
|
190
|
+
path = self._knowledge_dir / "modules" / f"{module_name}.md"
|
|
191
|
+
path.write_text(content, encoding="utf-8")
|
|
192
|
+
|
|
193
|
+
def _rebuild_index(self) -> None:
|
|
194
|
+
"""Regenerate index.md from existing module files."""
|
|
195
|
+
modules_dir = self._knowledge_dir / "modules"
|
|
196
|
+
lines = ["# Knowledge Index\n"]
|
|
197
|
+
for md_file in sorted(modules_dir.glob("*.md")):
|
|
198
|
+
title = md_file.stem.replace("_", " ").title()
|
|
199
|
+
summary = ""
|
|
200
|
+
for file_line in md_file.read_text(encoding="utf-8").splitlines():
|
|
201
|
+
stripped = file_line.strip()
|
|
202
|
+
if stripped and not stripped.startswith("#"):
|
|
203
|
+
summary = stripped
|
|
204
|
+
break
|
|
205
|
+
lines.append(f"- [{title}]({md_file.relative_to(self._knowledge_dir)}) — {summary}")
|
|
206
|
+
(self._knowledge_dir / "index.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
207
|
+
|
|
208
|
+
def query(self, max_tokens: int = 3000) -> str:
|
|
209
|
+
"""Phase 3: Return relevant knowledge for system prompt injection."""
|
|
210
|
+
entries = self.get_index()
|
|
211
|
+
if not entries:
|
|
212
|
+
return ""
|
|
213
|
+
|
|
214
|
+
max_chars = max_tokens * 4
|
|
215
|
+
parts: list[str] = ["# Project Knowledge\n"]
|
|
216
|
+
char_count = len(parts[0])
|
|
217
|
+
|
|
218
|
+
for entry in entries:
|
|
219
|
+
article_path = self._knowledge_dir / entry.path
|
|
220
|
+
if not article_path.exists():
|
|
221
|
+
continue
|
|
222
|
+
content = article_path.read_text(encoding="utf-8").strip()
|
|
223
|
+
if char_count + len(content) + 2 > max_chars:
|
|
224
|
+
summary_line = f"- **{entry.title}**: {entry.summary}"
|
|
225
|
+
if char_count + len(summary_line) + 1 <= max_chars:
|
|
226
|
+
parts.append(summary_line)
|
|
227
|
+
char_count += len(summary_line) + 1
|
|
228
|
+
break
|
|
229
|
+
parts.append(content)
|
|
230
|
+
char_count += len(content) + 2
|
|
231
|
+
|
|
232
|
+
return "\n\n".join(parts) if len(parts) > 1 else ""
|