luckyd-code 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- luckyd_code/__init__.py +54 -0
- luckyd_code/__main__.py +5 -0
- luckyd_code/_agent_loop.py +551 -0
- luckyd_code/_data_dir.py +73 -0
- luckyd_code/agent.py +38 -0
- luckyd_code/analytics/__init__.py +18 -0
- luckyd_code/analytics/reporter.py +195 -0
- luckyd_code/analytics/scanner.py +443 -0
- luckyd_code/analytics/smells.py +316 -0
- luckyd_code/analytics/trends.py +303 -0
- luckyd_code/api.py +473 -0
- luckyd_code/audit_daemon.py +845 -0
- luckyd_code/autonomous_fixer.py +473 -0
- luckyd_code/background.py +159 -0
- luckyd_code/backup.py +237 -0
- luckyd_code/brain/__init__.py +84 -0
- luckyd_code/brain/assembler.py +100 -0
- luckyd_code/brain/chunker.py +345 -0
- luckyd_code/brain/constants.py +73 -0
- luckyd_code/brain/embedder.py +163 -0
- luckyd_code/brain/graph.py +311 -0
- luckyd_code/brain/indexer.py +316 -0
- luckyd_code/brain/parser.py +140 -0
- luckyd_code/brain/retriever.py +234 -0
- luckyd_code/cli.py +894 -0
- luckyd_code/cli_commands/__init__.py +1 -0
- luckyd_code/cli_commands/audit.py +120 -0
- luckyd_code/cli_commands/background.py +83 -0
- luckyd_code/cli_commands/brain.py +87 -0
- luckyd_code/cli_commands/config.py +75 -0
- luckyd_code/cli_commands/dispatcher.py +695 -0
- luckyd_code/cli_commands/sessions.py +41 -0
- luckyd_code/cli_entry.py +147 -0
- luckyd_code/cli_utils.py +112 -0
- luckyd_code/config.py +205 -0
- luckyd_code/context.py +214 -0
- luckyd_code/cost_tracker.py +209 -0
- luckyd_code/error_reporter.py +508 -0
- luckyd_code/exceptions.py +39 -0
- luckyd_code/export.py +126 -0
- luckyd_code/feedback_analyzer.py +290 -0
- luckyd_code/file_watcher.py +258 -0
- luckyd_code/git/__init__.py +11 -0
- luckyd_code/git/auto_commit.py +157 -0
- luckyd_code/git/tools.py +85 -0
- luckyd_code/hooks.py +236 -0
- luckyd_code/indexer.py +280 -0
- luckyd_code/init.py +39 -0
- luckyd_code/keybindings.py +77 -0
- luckyd_code/log.py +55 -0
- luckyd_code/mcp/__init__.py +6 -0
- luckyd_code/mcp/client.py +184 -0
- luckyd_code/memory/__init__.py +19 -0
- luckyd_code/memory/manager.py +339 -0
- luckyd_code/metrics/__init__.py +5 -0
- luckyd_code/model_registry.py +131 -0
- luckyd_code/orchestrator.py +204 -0
- luckyd_code/permissions/__init__.py +1 -0
- luckyd_code/permissions/manager.py +103 -0
- luckyd_code/planner.py +361 -0
- luckyd_code/plugins.py +91 -0
- luckyd_code/py.typed +0 -0
- luckyd_code/retry.py +57 -0
- luckyd_code/router.py +417 -0
- luckyd_code/sandbox.py +156 -0
- luckyd_code/self_critique.py +2 -0
- luckyd_code/self_improve.py +274 -0
- luckyd_code/sessions.py +114 -0
- luckyd_code/settings.py +72 -0
- luckyd_code/skills/__init__.py +8 -0
- luckyd_code/skills/review.py +22 -0
- luckyd_code/skills/security.py +17 -0
- luckyd_code/tasks/__init__.py +1 -0
- luckyd_code/tasks/manager.py +102 -0
- luckyd_code/templates/icon-192.png +0 -0
- luckyd_code/templates/icon-512.png +0 -0
- luckyd_code/templates/index.html +1965 -0
- luckyd_code/templates/manifest.json +14 -0
- luckyd_code/templates/src/app.js +694 -0
- luckyd_code/templates/src/body.html +767 -0
- luckyd_code/templates/src/cdn.txt +2 -0
- luckyd_code/templates/src/style.css +474 -0
- luckyd_code/templates/sw.js +31 -0
- luckyd_code/templates/test.html +6 -0
- luckyd_code/themes.py +48 -0
- luckyd_code/tools/__init__.py +97 -0
- luckyd_code/tools/agent_tools.py +65 -0
- luckyd_code/tools/bash.py +360 -0
- luckyd_code/tools/brain_tools.py +137 -0
- luckyd_code/tools/browser.py +369 -0
- luckyd_code/tools/datetime_tool.py +34 -0
- luckyd_code/tools/dockerfile_gen.py +212 -0
- luckyd_code/tools/file_ops.py +381 -0
- luckyd_code/tools/game_gen.py +360 -0
- luckyd_code/tools/git_tools.py +130 -0
- luckyd_code/tools/git_worktree.py +63 -0
- luckyd_code/tools/path_validate.py +64 -0
- luckyd_code/tools/project_gen.py +187 -0
- luckyd_code/tools/readme_gen.py +227 -0
- luckyd_code/tools/registry.py +157 -0
- luckyd_code/tools/shell_detect.py +109 -0
- luckyd_code/tools/web.py +89 -0
- luckyd_code/tools/youtube.py +187 -0
- luckyd_code/tools_bridge.py +144 -0
- luckyd_code/undo.py +126 -0
- luckyd_code/update.py +60 -0
- luckyd_code/verify.py +360 -0
- luckyd_code/web_app.py +176 -0
- luckyd_code/web_routes/__init__.py +23 -0
- luckyd_code/web_routes/background.py +73 -0
- luckyd_code/web_routes/brain.py +109 -0
- luckyd_code/web_routes/cost.py +12 -0
- luckyd_code/web_routes/files.py +133 -0
- luckyd_code/web_routes/memories.py +94 -0
- luckyd_code/web_routes/misc.py +67 -0
- luckyd_code/web_routes/project.py +48 -0
- luckyd_code/web_routes/review.py +20 -0
- luckyd_code/web_routes/sessions.py +44 -0
- luckyd_code/web_routes/settings.py +43 -0
- luckyd_code/web_routes/static.py +70 -0
- luckyd_code/web_routes/update.py +19 -0
- luckyd_code/web_routes/ws.py +237 -0
- luckyd_code-1.2.2.dist-info/METADATA +297 -0
- luckyd_code-1.2.2.dist-info/RECORD +127 -0
- luckyd_code-1.2.2.dist-info/WHEEL +4 -0
- luckyd_code-1.2.2.dist-info/entry_points.txt +3 -0
- luckyd_code-1.2.2.dist-info/licenses/LICENSE +21 -0
luckyd_code/backup.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Backup system — git-based snapshots before destructive operations.
|
|
2
|
+
|
|
3
|
+
Creates a timestamped git commit (or stash if git commits aren't desired)
|
|
4
|
+
so that /self-improve and /debug can always be fully reverted.
|
|
5
|
+
|
|
6
|
+
Usage from CLI:
|
|
7
|
+
/backup — snapshot now with auto message
|
|
8
|
+
/backup <message> — snapshot with custom message
|
|
9
|
+
/backup list — show recent backup snapshots
|
|
10
|
+
/backup restore — restore the most recent backup snapshot
|
|
11
|
+
/backup restore <n> — restore backup snapshot N (from /backup list)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import subprocess
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Tag prefix used to identify backup commits so we can list/restore them
|
|
21
|
+
BACKUP_TAG_PREFIX = "dsc-backup/"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _git(*args: str, cwd: Optional[str] = None) -> tuple[int, str, str]:
|
|
25
|
+
"""Run a git command. Returns (returncode, stdout, stderr)."""
|
|
26
|
+
try:
|
|
27
|
+
result = subprocess.run(
|
|
28
|
+
["git"] + list(args),
|
|
29
|
+
capture_output=True,
|
|
30
|
+
text=True,
|
|
31
|
+
timeout=30,
|
|
32
|
+
cwd=cwd or str(Path.cwd()),
|
|
33
|
+
)
|
|
34
|
+
return result.returncode, result.stdout.strip(), result.stderr.strip()
|
|
35
|
+
except FileNotFoundError:
|
|
36
|
+
return 1, "", "git not found in PATH"
|
|
37
|
+
except Exception as e:
|
|
38
|
+
return 1, "", str(e)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _is_git_repo(cwd: Optional[str] = None) -> bool:
|
|
42
|
+
code, _, _ = _git("rev-parse", "--is-inside-work-tree", cwd=cwd)
|
|
43
|
+
return code == 0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _has_changes(cwd: Optional[str] = None) -> bool:
|
|
47
|
+
"""Returns True if there are any tracked or untracked changes."""
|
|
48
|
+
_, out, _ = _git("status", "--porcelain", cwd=cwd)
|
|
49
|
+
return bool(out.strip())
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _current_branch(cwd: Optional[str] = None) -> str:
|
|
53
|
+
_, out, _ = _git("rev-parse", "--abbrev-ref", "HEAD", cwd=cwd)
|
|
54
|
+
return out or "unknown"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _short_hash(cwd: Optional[str] = None) -> str:
|
|
58
|
+
_, out, _ = _git("rev-parse", "--short", "HEAD", cwd=cwd)
|
|
59
|
+
return out or "unknown"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def create_backup(message: str = "", cwd: Optional[str] = None) -> dict:
|
|
63
|
+
"""Create a git backup snapshot of the current working tree.
|
|
64
|
+
|
|
65
|
+
Strategy:
|
|
66
|
+
1. `git add -A` — stage everything (new, modified, deleted)
|
|
67
|
+
2. `git commit` — commit with a timestamped message
|
|
68
|
+
3. `git tag` — tag it with dsc-backup/<timestamp> for easy lookup
|
|
69
|
+
|
|
70
|
+
If there is nothing to commit, returns success with a note.
|
|
71
|
+
|
|
72
|
+
Returns a dict with keys: ok, message, tag, hash, error
|
|
73
|
+
"""
|
|
74
|
+
result = {"ok": False, "message": "", "tag": "", "hash": "", "error": ""}
|
|
75
|
+
|
|
76
|
+
if not _is_git_repo(cwd):
|
|
77
|
+
result["error"] = (
|
|
78
|
+
"No git repository found. Run `git init` in your project root to enable backups."
|
|
79
|
+
)
|
|
80
|
+
return result
|
|
81
|
+
|
|
82
|
+
if not _has_changes(cwd):
|
|
83
|
+
# Nothing dirty — point at the current HEAD as the backup
|
|
84
|
+
h = _short_hash(cwd)
|
|
85
|
+
result["ok"] = True
|
|
86
|
+
result["hash"] = h
|
|
87
|
+
result["message"] = f"Nothing to commit — working tree is clean (HEAD is {h})"
|
|
88
|
+
return result
|
|
89
|
+
|
|
90
|
+
# Stage everything
|
|
91
|
+
code, _, err = _git("add", "-A", cwd=cwd)
|
|
92
|
+
if code != 0:
|
|
93
|
+
result["error"] = f"git add failed: {err}"
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
# Build commit message
|
|
97
|
+
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
98
|
+
label = message.strip() or "pre-operation snapshot"
|
|
99
|
+
commit_msg = f"[dsc-backup] {label} ({ts})"
|
|
100
|
+
|
|
101
|
+
code, _, err = _git("commit", "-m", commit_msg, cwd=cwd)
|
|
102
|
+
if code != 0:
|
|
103
|
+
result["error"] = f"git commit failed: {err}"
|
|
104
|
+
return result
|
|
105
|
+
|
|
106
|
+
# Tag it so we can find it later
|
|
107
|
+
tag_name = BACKUP_TAG_PREFIX + datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
108
|
+
_git("tag", tag_name, cwd=cwd) # best-effort, don't fail if tagging fails
|
|
109
|
+
|
|
110
|
+
h = _short_hash(cwd)
|
|
111
|
+
result["ok"] = True
|
|
112
|
+
result["hash"] = h
|
|
113
|
+
result["tag"] = tag_name
|
|
114
|
+
result["message"] = f"Backup created: {h} tag: {tag_name}"
|
|
115
|
+
return result
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def list_backups(limit: int = 10, cwd: Optional[str] = None) -> list[dict]:
|
|
119
|
+
"""Return a list of recent backup commits (newest first).
|
|
120
|
+
|
|
121
|
+
Each entry: {n, hash, tag, date, subject}
|
|
122
|
+
"""
|
|
123
|
+
# List tags matching our prefix, sorted by creation date descending
|
|
124
|
+
_, tag_out, _ = _git(
|
|
125
|
+
"tag", "--list", f"{BACKUP_TAG_PREFIX}*",
|
|
126
|
+
"--sort=-creatordate",
|
|
127
|
+
"--format=%(refname:short)|%(objectname:short)|%(creatordate:short)",
|
|
128
|
+
cwd=cwd,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
entries = []
|
|
132
|
+
for i, line in enumerate(tag_out.splitlines()[:limit]):
|
|
133
|
+
parts = line.split("|")
|
|
134
|
+
if len(parts) >= 3:
|
|
135
|
+
entries.append({
|
|
136
|
+
"n": i + 1,
|
|
137
|
+
"tag": parts[0],
|
|
138
|
+
"hash": parts[1],
|
|
139
|
+
"date": parts[2],
|
|
140
|
+
"subject": parts[0].replace(BACKUP_TAG_PREFIX, ""),
|
|
141
|
+
})
|
|
142
|
+
elif len(parts) == 2:
|
|
143
|
+
entries.append({
|
|
144
|
+
"n": i + 1,
|
|
145
|
+
"tag": parts[0],
|
|
146
|
+
"hash": parts[1],
|
|
147
|
+
"date": "",
|
|
148
|
+
"subject": parts[0].replace(BACKUP_TAG_PREFIX, ""),
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
# Fallback: search commit log for [dsc-backup] messages
|
|
152
|
+
if not entries:
|
|
153
|
+
_, log_out, _ = _git(
|
|
154
|
+
"log", f"--max-count={limit}",
|
|
155
|
+
"--pretty=format:%h|%ad|%s",
|
|
156
|
+
"--date=short",
|
|
157
|
+
"--grep=[dsc-backup]",
|
|
158
|
+
cwd=cwd,
|
|
159
|
+
)
|
|
160
|
+
for i, line in enumerate(log_out.splitlines()):
|
|
161
|
+
parts = line.split("|", 2)
|
|
162
|
+
if len(parts) == 3:
|
|
163
|
+
entries.append({
|
|
164
|
+
"n": i + 1,
|
|
165
|
+
"tag": "",
|
|
166
|
+
"hash": parts[0],
|
|
167
|
+
"date": parts[1],
|
|
168
|
+
"subject": parts[2],
|
|
169
|
+
})
|
|
170
|
+
|
|
171
|
+
return entries
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def restore_backup(ref: str, cwd: Optional[str] = None) -> dict:
|
|
175
|
+
"""Restore working tree to a backup snapshot.
|
|
176
|
+
|
|
177
|
+
Uses `git checkout <ref> -- .` so it only touches the working tree
|
|
178
|
+
(does NOT move HEAD), leaving you on the same branch. Any currently
|
|
179
|
+
staged/unstaged changes are overwritten.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
ref: A tag name, commit hash, or index number (as string) from list_backups()
|
|
183
|
+
|
|
184
|
+
Returns a dict with keys: ok, message, error
|
|
185
|
+
"""
|
|
186
|
+
result = {"ok": False, "message": "", "error": ""}
|
|
187
|
+
|
|
188
|
+
if not _is_git_repo(cwd):
|
|
189
|
+
result["error"] = "No git repository found."
|
|
190
|
+
return result
|
|
191
|
+
|
|
192
|
+
# Resolve numeric index to a real ref
|
|
193
|
+
if ref.isdigit():
|
|
194
|
+
backups = list_backups(cwd=cwd)
|
|
195
|
+
idx = int(ref)
|
|
196
|
+
match = next((b for b in backups if b["n"] == idx), None)
|
|
197
|
+
if not match:
|
|
198
|
+
result["error"] = f"No backup #{idx} found. Run /backup list to see options."
|
|
199
|
+
return result
|
|
200
|
+
ref = match["tag"] or match["hash"]
|
|
201
|
+
|
|
202
|
+
# Stash any current dirty state first so we don't lose it
|
|
203
|
+
dirty = _has_changes(cwd)
|
|
204
|
+
stash_msg = ""
|
|
205
|
+
if dirty:
|
|
206
|
+
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
207
|
+
code, _, _ = _git("stash", "push", "-m", f"pre-restore-{ts}", cwd=cwd)
|
|
208
|
+
if code == 0:
|
|
209
|
+
stash_msg = " (current changes stashed — run `git stash pop` to recover them)"
|
|
210
|
+
|
|
211
|
+
# Checkout the files from the backup ref into the working tree
|
|
212
|
+
code, _, err = _git("checkout", ref, "--", ".", cwd=cwd)
|
|
213
|
+
if code != 0:
|
|
214
|
+
# Try to recover stash
|
|
215
|
+
if dirty:
|
|
216
|
+
_git("stash", "pop", cwd=cwd)
|
|
217
|
+
result["error"] = f"git checkout failed: {err}"
|
|
218
|
+
return result
|
|
219
|
+
|
|
220
|
+
result["ok"] = True
|
|
221
|
+
result["message"] = f"Restored to {ref}{stash_msg}"
|
|
222
|
+
return result
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def format_backup_list(backups: list[dict]) -> str:
|
|
226
|
+
"""Format backup list for display."""
|
|
227
|
+
if not backups:
|
|
228
|
+
return "No backups found. Run /backup to create one."
|
|
229
|
+
lines = ["[bold]Recent backups:[/bold]\n"]
|
|
230
|
+
for b in backups:
|
|
231
|
+
date_str = f" {b['date']}" if b["date"] else ""
|
|
232
|
+
tag_str = f" [{b['tag']}]" if b["tag"] else ""
|
|
233
|
+
lines.append(f" [cyan]#{b['n']}[/cyan] {b['hash']}{date_str}{tag_str}")
|
|
234
|
+
if b["subject"] and not b["subject"].startswith("dsc-backup/"):
|
|
235
|
+
lines.append(f" [dim]{b['subject'][:80]}[/dim]")
|
|
236
|
+
lines.append("\n[dim]Use /backup restore <#> to restore any of these[/dim]")
|
|
237
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Persistent Codebase Brain — knowledge graph and RAG system for code understanding."""
|
|
2
|
+
import os
|
|
3
|
+
from .graph import KnowledgeGraph
|
|
4
|
+
from .parser import parse_project
|
|
5
|
+
from .chunker import chunk_file, chunk_project
|
|
6
|
+
from .embedder import Embedder, get_embedder
|
|
7
|
+
from .indexer import VectorIndexer
|
|
8
|
+
from .retriever import Retriever
|
|
9
|
+
from .assembler import ContextAssembler
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"KnowledgeGraph",
|
|
13
|
+
"parse_project",
|
|
14
|
+
"chunk_file",
|
|
15
|
+
"chunk_project",
|
|
16
|
+
"Embedder",
|
|
17
|
+
"get_embedder",
|
|
18
|
+
"VectorIndexer",
|
|
19
|
+
"Retriever",
|
|
20
|
+
"ContextAssembler",
|
|
21
|
+
"rebuild_project",
|
|
22
|
+
"find_dependents",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
find_dependents = KnowledgeGraph.find_dependents
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def rebuild_project(project_root: str | None = None) -> dict:
|
|
29
|
+
"""Rebuild both the vector index and the knowledge graph for a project.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
project_root: Root directory to index. Defaults to current working directory.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Dict with keys: chunks, files, node_count, files_parsed, languages
|
|
36
|
+
"""
|
|
37
|
+
if project_root is None:
|
|
38
|
+
project_root = os.getcwd()
|
|
39
|
+
|
|
40
|
+
result = {"chunks": 0, "files": 0, "node_count": 0, "files_parsed": 0, "languages": {}}
|
|
41
|
+
|
|
42
|
+
# Build vector index
|
|
43
|
+
chunks = chunk_project(project_root)
|
|
44
|
+
if chunks:
|
|
45
|
+
indexer = VectorIndexer()
|
|
46
|
+
stats = indexer.build(chunks) # type: ignore[arg-type]
|
|
47
|
+
# Track mtimes
|
|
48
|
+
from .chunker import LANGUAGE_MAP
|
|
49
|
+
from .constants import SKIP_DIRS, should_skip
|
|
50
|
+
from pathlib import Path
|
|
51
|
+
from .indexer import BRAIN_DIR, MTIMES_FILE
|
|
52
|
+
import json as _json
|
|
53
|
+
|
|
54
|
+
mtimes: dict = {}
|
|
55
|
+
for dirpath, dirnames, filenames in os.walk(Path(project_root).resolve()):
|
|
56
|
+
dirnames[:] = [d for d in dirnames if not should_skip(d)]
|
|
57
|
+
for fname in filenames:
|
|
58
|
+
suffix = Path(fname).suffix.lower()
|
|
59
|
+
if suffix not in LANGUAGE_MAP:
|
|
60
|
+
continue
|
|
61
|
+
fpath = Path(dirpath) / fname
|
|
62
|
+
try:
|
|
63
|
+
st = fpath.stat()
|
|
64
|
+
mtimes[str(fpath)] = (st.st_mtime, st.st_size)
|
|
65
|
+
except OSError:
|
|
66
|
+
continue
|
|
67
|
+
indexer.file_mtimes = mtimes
|
|
68
|
+
indexer.save()
|
|
69
|
+
|
|
70
|
+
result["chunks"] = stats.get("chunks", 0)
|
|
71
|
+
result["files"] = stats.get("files", 0)
|
|
72
|
+
result["languages"] = stats.get("languages", {})
|
|
73
|
+
|
|
74
|
+
# Build old graph (backward compatible)
|
|
75
|
+
parsed, _ = parse_project(project_root)
|
|
76
|
+
if parsed:
|
|
77
|
+
brain = KnowledgeGraph()
|
|
78
|
+
brain.build(project_root, parsed)
|
|
79
|
+
brain.save()
|
|
80
|
+
result["node_count"] = brain.stats.get("node_count", 0)
|
|
81
|
+
result["files_parsed"] = brain.stats.get("files_parsed", 0)
|
|
82
|
+
|
|
83
|
+
return result
|
|
84
|
+
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Context assembler — takes ranked chunks and produces prompt-ready context blocks."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _token_count(text: str) -> int:
|
|
8
|
+
return len(text) // 4
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ContextAssembler:
|
|
12
|
+
"""Assembles ranked chunks into a prompt-ready XML context block."""
|
|
13
|
+
|
|
14
|
+
def assemble(
|
|
15
|
+
self,
|
|
16
|
+
chunks: list[dict[str, Any]],
|
|
17
|
+
max_tokens: int = 8000,
|
|
18
|
+
max_chunks: int = 20,
|
|
19
|
+
) -> str:
|
|
20
|
+
if not chunks:
|
|
21
|
+
return ""
|
|
22
|
+
|
|
23
|
+
deduped = self._deduplicate(chunks)
|
|
24
|
+
return self._format_chunks(deduped, max_tokens, max_chunks)
|
|
25
|
+
|
|
26
|
+
def _deduplicate(self, chunks: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
27
|
+
if not chunks:
|
|
28
|
+
return []
|
|
29
|
+
|
|
30
|
+
by_file: dict[str, list[dict[str, Any]]] = {}
|
|
31
|
+
for c in chunks:
|
|
32
|
+
by_file.setdefault(c["file_path"], []).append(c)
|
|
33
|
+
|
|
34
|
+
result: list[dict[str, Any]] = []
|
|
35
|
+
for file_path, file_chunks in by_file.items():
|
|
36
|
+
file_chunks.sort(key=lambda c: c.get("score", 0), reverse=True)
|
|
37
|
+
|
|
38
|
+
keep: list[dict[str, Any]] = []
|
|
39
|
+
for c in file_chunks:
|
|
40
|
+
c_start = c.get("start_line", 0)
|
|
41
|
+
c_end = c.get("end_line", 0)
|
|
42
|
+
overlapping = False
|
|
43
|
+
for kept in keep:
|
|
44
|
+
k_start = kept.get("start_line", 0)
|
|
45
|
+
k_end = kept.get("end_line", 0)
|
|
46
|
+
if not (c_end < k_start or c_start > k_end):
|
|
47
|
+
if c.get("score", 0) > kept.get("score", 0):
|
|
48
|
+
keep.remove(kept)
|
|
49
|
+
keep.append(c)
|
|
50
|
+
overlapping = True
|
|
51
|
+
break
|
|
52
|
+
if not overlapping:
|
|
53
|
+
keep.append(c)
|
|
54
|
+
|
|
55
|
+
result.extend(keep)
|
|
56
|
+
|
|
57
|
+
result.sort(key=lambda c: c.get("score", 0), reverse=True)
|
|
58
|
+
return result
|
|
59
|
+
|
|
60
|
+
def _format_chunks(
|
|
61
|
+
self,
|
|
62
|
+
chunks: list[dict[str, Any]],
|
|
63
|
+
max_tokens: int,
|
|
64
|
+
max_chunks: int,
|
|
65
|
+
) -> str:
|
|
66
|
+
parts: list[str] = []
|
|
67
|
+
total_tokens = 0
|
|
68
|
+
|
|
69
|
+
for chunk in chunks[:max_chunks]:
|
|
70
|
+
score = chunk.get("score", 0)
|
|
71
|
+
file_path = chunk.get("file_path", "")
|
|
72
|
+
start_line = chunk.get("start_line", 0)
|
|
73
|
+
end_line = chunk.get("end_line", 0)
|
|
74
|
+
content = chunk.get("content", "").strip()
|
|
75
|
+
|
|
76
|
+
if not content:
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
chunk_tokens = _token_count(content)
|
|
80
|
+
remaining = max_tokens - total_tokens - _token_count("<context></context>")
|
|
81
|
+
|
|
82
|
+
if remaining <= 0:
|
|
83
|
+
break
|
|
84
|
+
|
|
85
|
+
if chunk_tokens > remaining:
|
|
86
|
+
truncated_chars = remaining * 4
|
|
87
|
+
content = content[:truncated_chars] + "..."
|
|
88
|
+
|
|
89
|
+
score_str = f'{score:.2f}' if score else "0.00"
|
|
90
|
+
context_tag = (
|
|
91
|
+
f'<context file="{file_path}" lines="{start_line}-{end_line}" '
|
|
92
|
+
f'relevance="{score_str}">\n'
|
|
93
|
+
f"{content}\n"
|
|
94
|
+
f"</context>"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
parts.append(context_tag)
|
|
98
|
+
total_tokens += _token_count(context_tag)
|
|
99
|
+
|
|
100
|
+
return "\n\n".join(parts)
|