opencode-diane 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +180 -0
- package/LICENSE +21 -0
- package/README.md +206 -0
- package/WIKI.md +1430 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.js +1632 -0
- package/dist/ingest/adaptive.d.ts +47 -0
- package/dist/ingest/adaptive.js +182 -0
- package/dist/ingest/code-health.d.ts +58 -0
- package/dist/ingest/code-health.js +202 -0
- package/dist/ingest/code-map.d.ts +71 -0
- package/dist/ingest/code-map.js +670 -0
- package/dist/ingest/cross-refs.d.ts +59 -0
- package/dist/ingest/cross-refs.js +1207 -0
- package/dist/ingest/docs.d.ts +49 -0
- package/dist/ingest/docs.js +325 -0
- package/dist/ingest/git.d.ts +77 -0
- package/dist/ingest/git.js +390 -0
- package/dist/ingest/live-session.d.ts +101 -0
- package/dist/ingest/live-session.js +173 -0
- package/dist/ingest/project-notes.d.ts +28 -0
- package/dist/ingest/project-notes.js +102 -0
- package/dist/ingest/project.d.ts +35 -0
- package/dist/ingest/project.js +430 -0
- package/dist/ingest/session-snapshot.d.ts +63 -0
- package/dist/ingest/session-snapshot.js +94 -0
- package/dist/ingest/sessions.d.ts +29 -0
- package/dist/ingest/sessions.js +164 -0
- package/dist/ingest/tables.d.ts +52 -0
- package/dist/ingest/tables.js +360 -0
- package/dist/mining/skill-miner.d.ts +53 -0
- package/dist/mining/skill-miner.js +234 -0
- package/dist/search/bm25.d.ts +81 -0
- package/dist/search/bm25.js +334 -0
- package/dist/search/e5-embedder.d.ts +30 -0
- package/dist/search/e5-embedder.js +91 -0
- package/dist/search/embed-pass.d.ts +26 -0
- package/dist/search/embed-pass.js +43 -0
- package/dist/search/embedder.d.ts +58 -0
- package/dist/search/embedder.js +85 -0
- package/dist/search/inverted-index.d.ts +51 -0
- package/dist/search/inverted-index.js +139 -0
- package/dist/search/ppr.d.ts +44 -0
- package/dist/search/ppr.js +118 -0
- package/dist/search/tokenize.d.ts +26 -0
- package/dist/search/tokenize.js +98 -0
- package/dist/store/eviction.d.ts +16 -0
- package/dist/store/eviction.js +37 -0
- package/dist/store/repository.d.ts +222 -0
- package/dist/store/repository.js +420 -0
- package/dist/store/sqlite-store.d.ts +89 -0
- package/dist/store/sqlite-store.js +252 -0
- package/dist/store/vector-store.d.ts +66 -0
- package/dist/store/vector-store.js +160 -0
- package/dist/types.d.ts +385 -0
- package/dist/types.js +9 -0
- package/dist/utils/file-log.d.ts +87 -0
- package/dist/utils/file-log.js +215 -0
- package/dist/utils/peer-detection.d.ts +45 -0
- package/dist/utils/peer-detection.js +90 -0
- package/dist/utils/shell.d.ts +43 -0
- package/dist/utils/shell.js +110 -0
- package/dist/utils/usage-skill.d.ts +42 -0
- package/dist/utils/usage-skill.js +129 -0
- package/dist/utils/xlsx.d.ts +36 -0
- package/dist/utils/xlsx.js +270 -0
- package/grammars/tree-sitter-c.wasm +0 -0
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-cpp.wasm +0 -0
- package/grammars/tree-sitter-css.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-html.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-json.wasm +0 -0
- package/grammars/tree-sitter-php.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +80 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adaptive configuration — scale size-derived settings to the repo.
|
|
3
|
+
*
|
|
4
|
+
* The plugin's fixed defaults (gitHistoryDepth 500, a 4000-file
|
|
5
|
+
* code-map cap, a 5 MB budget) are a reasonable middle. They are
|
|
6
|
+
* wasteful on a 50-commit toy and inadequate on a 100k-commit
|
|
7
|
+
* monorepo. Rather than a pile of per-knob heuristics, this module
|
|
8
|
+
* takes ONE measured signal — commit count, or file count when
|
|
9
|
+
* there's no git — classifies the repo into one named tier, and a
|
|
10
|
+
* lookup table picks the numbers. One input, three tiers, inspectable
|
|
11
|
+
* and logged: that keeps adaptation predictable.
|
|
12
|
+
*
|
|
13
|
+
* Adaptation only fills knobs the user did NOT set explicitly
|
|
14
|
+
* (`ResolvedConfig.explicitKeys`); an explicit value always wins.
|
|
15
|
+
* It is gated by `config.adaptive` (default true).
|
|
16
|
+
*/
|
|
17
|
+
import type { ResolvedConfig } from "../types.js";
|
|
18
|
+
export type RepoTier = "small" | "medium" | "large";
|
|
19
|
+
export interface RepoSignal {
|
|
20
|
+
/** What was measured: a git commit count, or a tree file count. */
|
|
21
|
+
basis: "commits" | "files";
|
|
22
|
+
value: number;
|
|
23
|
+
tier: RepoTier;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Measure the repo with one cheap call and classify it. Uses
|
|
27
|
+
* `git rev-list --count HEAD` when git is present; otherwise counts
|
|
28
|
+
* files in the tree (bounded — we stop early once past the large
|
|
29
|
+
* threshold, since the exact number past that doesn't matter).
|
|
30
|
+
* Never throws — on any failure it returns the `medium` tier, i.e.
|
|
31
|
+
* the plugin's existing fixed defaults.
|
|
32
|
+
*/
|
|
33
|
+
export declare function measureRepo(root: string, hasGit: boolean): Promise<RepoSignal>;
|
|
34
|
+
/**
|
|
35
|
+
* Apply size-derived tuning to a resolved config, **mutating it in
|
|
36
|
+
* place**. The config object is shared (the plugin's tools and hooks
|
|
37
|
+
* close over it at startup, before background prefill runs the
|
|
38
|
+
* measurement), so a returned copy wouldn't reach them — a one-time
|
|
39
|
+
* in-place settle does. Only knobs the user did NOT set explicitly
|
|
40
|
+
* are touched. Returns a short human-readable description of what
|
|
41
|
+
* changed, for the prefill log.
|
|
42
|
+
*
|
|
43
|
+
* When `config.adaptive` is false this is a no-op.
|
|
44
|
+
*/
|
|
45
|
+
export declare function applyAdaptiveTuning(config: ResolvedConfig, signal: RepoSignal): {
|
|
46
|
+
summary: string;
|
|
47
|
+
};
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adaptive configuration — scale size-derived settings to the repo.
|
|
3
|
+
*
|
|
4
|
+
* The plugin's fixed defaults (gitHistoryDepth 500, a 4000-file
|
|
5
|
+
* code-map cap, a 5 MB budget) are a reasonable middle. They are
|
|
6
|
+
* wasteful on a 50-commit toy and inadequate on a 100k-commit
|
|
7
|
+
* monorepo. Rather than a pile of per-knob heuristics, this module
|
|
8
|
+
* takes ONE measured signal — commit count, or file count when
|
|
9
|
+
* there's no git — classifies the repo into one named tier, and a
|
|
10
|
+
* lookup table picks the numbers. One input, three tiers, inspectable
|
|
11
|
+
* and logged: that keeps adaptation predictable.
|
|
12
|
+
*
|
|
13
|
+
* Adaptation only fills knobs the user did NOT set explicitly
|
|
14
|
+
* (`ResolvedConfig.explicitKeys`); an explicit value always wins.
|
|
15
|
+
* It is gated by `config.adaptive` (default true).
|
|
16
|
+
*/
|
|
17
|
+
import { runGit } from "../utils/shell.js";
|
|
18
|
+
const TIERS = {
|
|
19
|
+
small: {
|
|
20
|
+
gitHistoryDepth: 250,
|
|
21
|
+
maxMemoryDiskMB: 50, // uniform across tiers — see the note above
|
|
22
|
+
codeMapMaxFiles: 1500,
|
|
23
|
+
coChangeMaxCommits: 5000,
|
|
24
|
+
},
|
|
25
|
+
medium: {
|
|
26
|
+
gitHistoryDepth: 500,
|
|
27
|
+
maxMemoryDiskMB: 50,
|
|
28
|
+
codeMapMaxFiles: 4000,
|
|
29
|
+
coChangeMaxCommits: 5000,
|
|
30
|
+
},
|
|
31
|
+
large: {
|
|
32
|
+
gitHistoryDepth: 1500,
|
|
33
|
+
maxMemoryDiskMB: 50,
|
|
34
|
+
codeMapMaxFiles: 10000,
|
|
35
|
+
// co-change is the one genuinely super-linear pass; on very large
|
|
36
|
+
// histories it is skipped rather than risking a stall.
|
|
37
|
+
coChangeMaxCommits: 5000,
|
|
38
|
+
},
|
|
39
|
+
};
|
|
40
|
+
/** Commit-count thresholds for the git-available path. */
|
|
41
|
+
const SMALL_MAX_COMMITS = 150;
|
|
42
|
+
const LARGE_MIN_COMMITS = 2000;
|
|
43
|
+
/** File-count thresholds for the no-git fallback path. */
|
|
44
|
+
const SMALL_MAX_FILES = 400;
|
|
45
|
+
const LARGE_MIN_FILES = 5000;
|
|
46
|
+
/**
|
|
47
|
+
* Measure the repo with one cheap call and classify it. Uses
|
|
48
|
+
* `git rev-list --count HEAD` when git is present; otherwise counts
|
|
49
|
+
* files in the tree (bounded — we stop early once past the large
|
|
50
|
+
* threshold, since the exact number past that doesn't matter).
|
|
51
|
+
* Never throws — on any failure it returns the `medium` tier, i.e.
|
|
52
|
+
* the plugin's existing fixed defaults.
|
|
53
|
+
*/
|
|
54
|
+
export async function measureRepo(root, hasGit) {
|
|
55
|
+
if (hasGit) {
|
|
56
|
+
const out = await runGit(["rev-list", "--count", "HEAD"], root);
|
|
57
|
+
const n = out ? parseInt(out.trim(), 10) : NaN;
|
|
58
|
+
if (Number.isFinite(n)) {
|
|
59
|
+
return { basis: "commits", value: n, tier: tierForCommits(n) };
|
|
60
|
+
}
|
|
61
|
+
// git present but rev-list failed (empty repo, detached, etc.) —
|
|
62
|
+
// fall through to the file-count basis.
|
|
63
|
+
}
|
|
64
|
+
const files = await countFiles(root);
|
|
65
|
+
return { basis: "files", value: files, tier: tierForFiles(files) };
|
|
66
|
+
}
|
|
67
|
+
function tierForCommits(n) {
|
|
68
|
+
if (n <= SMALL_MAX_COMMITS)
|
|
69
|
+
return "small";
|
|
70
|
+
if (n >= LARGE_MIN_COMMITS)
|
|
71
|
+
return "large";
|
|
72
|
+
return "medium";
|
|
73
|
+
}
|
|
74
|
+
function tierForFiles(n) {
|
|
75
|
+
if (n <= SMALL_MAX_FILES)
|
|
76
|
+
return "small";
|
|
77
|
+
if (n >= LARGE_MIN_FILES)
|
|
78
|
+
return "large";
|
|
79
|
+
return "medium";
|
|
80
|
+
}
|
|
81
|
+
/** Directories not worth walking when sizing the repo. */
|
|
82
|
+
const SKIP = new Set([
|
|
83
|
+
".git",
|
|
84
|
+
"node_modules",
|
|
85
|
+
".venv",
|
|
86
|
+
"venv",
|
|
87
|
+
"__pycache__",
|
|
88
|
+
"dist",
|
|
89
|
+
"build",
|
|
90
|
+
"target",
|
|
91
|
+
"vendor",
|
|
92
|
+
".next",
|
|
93
|
+
"coverage",
|
|
94
|
+
]);
|
|
95
|
+
/**
|
|
96
|
+
* Bounded file count: walks the tree but stops once it is clearly
|
|
97
|
+
* past the "large" threshold — the exact count beyond that point
|
|
98
|
+
* doesn't change the tier, so there's no reason to keep walking a
|
|
99
|
+
* huge tree.
|
|
100
|
+
*/
|
|
101
|
+
async function countFiles(root) {
|
|
102
|
+
const { readdir } = await import("node:fs/promises");
|
|
103
|
+
const { join } = await import("node:path");
|
|
104
|
+
const CEILING = LARGE_MIN_FILES + 1;
|
|
105
|
+
let count = 0;
|
|
106
|
+
async function walk(dir, depth) {
|
|
107
|
+
if (count >= CEILING || depth > 8)
|
|
108
|
+
return;
|
|
109
|
+
let entries;
|
|
110
|
+
try {
|
|
111
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
112
|
+
}
|
|
113
|
+
catch {
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
for (const e of entries) {
|
|
117
|
+
if (count >= CEILING)
|
|
118
|
+
return;
|
|
119
|
+
if (e.isDirectory()) {
|
|
120
|
+
if (SKIP.has(e.name) || e.name.startsWith("."))
|
|
121
|
+
continue;
|
|
122
|
+
await walk(join(dir, e.name), depth + 1);
|
|
123
|
+
}
|
|
124
|
+
else if (e.isFile()) {
|
|
125
|
+
count += 1;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
await walk(root, 0);
|
|
130
|
+
return count;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Apply size-derived tuning to a resolved config, **mutating it in
|
|
134
|
+
* place**. The config object is shared (the plugin's tools and hooks
|
|
135
|
+
* close over it at startup, before background prefill runs the
|
|
136
|
+
* measurement), so a returned copy wouldn't reach them — a one-time
|
|
137
|
+
* in-place settle does. Only knobs the user did NOT set explicitly
|
|
138
|
+
* are touched. Returns a short human-readable description of what
|
|
139
|
+
* changed, for the prefill log.
|
|
140
|
+
*
|
|
141
|
+
* When `config.adaptive` is false this is a no-op.
|
|
142
|
+
*/
|
|
143
|
+
export function applyAdaptiveTuning(config, signal) {
|
|
144
|
+
if (!config.adaptive) {
|
|
145
|
+
return { summary: "adaptive tuning off — using fixed defaults" };
|
|
146
|
+
}
|
|
147
|
+
const t = TIERS[signal.tier];
|
|
148
|
+
const changes = [];
|
|
149
|
+
if (!config.explicitKeys.has("gitHistoryDepth") && config.gitHistoryDepth !== t.gitHistoryDepth) {
|
|
150
|
+
config.gitHistoryDepth = t.gitHistoryDepth;
|
|
151
|
+
changes.push(`gitHistoryDepth=${t.gitHistoryDepth}`);
|
|
152
|
+
}
|
|
153
|
+
if (!config.explicitKeys.has("maxMemoryDiskMB")) {
|
|
154
|
+
// The budget is tier-independent (all tiers carry the 50 MB
|
|
155
|
+
// default), so this normally makes no change. Adaptation only ever
|
|
156
|
+
// RAISES the budget, never lowers it — `Math.max` keeps that
|
|
157
|
+
// invariant if a future tier table sets a larger value.
|
|
158
|
+
const mb = Math.max(50, t.maxMemoryDiskMB);
|
|
159
|
+
const bytes = mb * 1024 * 1024;
|
|
160
|
+
if (config.maxMemoryBytes !== bytes) {
|
|
161
|
+
config.maxMemoryBytes = bytes;
|
|
162
|
+
changes.push(`budget=${mb}MB`);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
// codeMapMaxFiles and coChangeMaxCommits are user-exposable since
|
|
166
|
+
// v0.0.4 — respect an explicit override; otherwise follow the tier.
|
|
167
|
+
if (!config.explicitKeys.has("codeMapMaxFiles") && config.codeMapMaxFiles !== t.codeMapMaxFiles) {
|
|
168
|
+
config.codeMapMaxFiles = t.codeMapMaxFiles;
|
|
169
|
+
changes.push(`codeMapMaxFiles=${t.codeMapMaxFiles}`);
|
|
170
|
+
}
|
|
171
|
+
if (!config.explicitKeys.has("coChangeMaxCommits")) {
|
|
172
|
+
config.coChangeMaxCommits = t.coChangeMaxCommits;
|
|
173
|
+
}
|
|
174
|
+
const coChangeNote = signal.basis === "commits" && signal.value > t.coChangeMaxCommits
|
|
175
|
+
? ", co-change skipped (history too large)"
|
|
176
|
+
: "";
|
|
177
|
+
const changeSummary = changes.length > 0 ? changes.join(", ") : "no changes (already at tier defaults)";
|
|
178
|
+
const summary = `repo tier=${signal.tier} (${signal.value} ${signal.basis}) — ` +
|
|
179
|
+
changeSummary +
|
|
180
|
+
coChangeNote;
|
|
181
|
+
return { summary };
|
|
182
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code-health ingestion — turns OpenCode's LSP diagnostics into a
|
|
3
|
+
* live `code-health` memory category.
|
|
4
|
+
*
|
|
5
|
+
* Unlike git/project/session ingestion (one-shot, at startup), this
|
|
6
|
+
* is a LIVE signal: it's driven by the `lsp.client.diagnostics`
|
|
7
|
+
* plugin event, which fires whenever a language server re-analyses a
|
|
8
|
+
* file. Each fire upserts one memory per file — re-reporting REPLACES
|
|
9
|
+
* the prior state, so the store always reflects current diagnostics,
|
|
10
|
+
* never a pile of stale ones.
|
|
11
|
+
*
|
|
12
|
+
* Why this is convention-free and language-agnostic: LSP normalises
|
|
13
|
+
* diagnostics across 40+ servers into the same shape (severity 1-4,
|
|
14
|
+
* a message, a range). We read the compiler's / type-checker's own
|
|
15
|
+
* output — no heuristics, no per-language logic.
|
|
16
|
+
*
|
|
17
|
+
* The event payload shape is not nailed down across OpenCode
|
|
18
|
+
* versions, so extraction is deliberately defensive — it probes
|
|
19
|
+
* several plausible shapes and silently no-ops if none match, exactly
|
|
20
|
+
* like the session ingester does for SDK responses. The extraction
|
|
21
|
+
* logic is a pure function so it can be unit-tested against mock
|
|
22
|
+
* payloads without a running LSP.
|
|
23
|
+
*/
|
|
24
|
+
import type { MemoryRepository } from "../store/repository.js";
|
|
25
|
+
/** Per-file diagnostic rollup extracted from an LSP event. */
|
|
26
|
+
export interface FileDiagnostics {
|
|
27
|
+
path: string;
|
|
28
|
+
errors: number;
|
|
29
|
+
warnings: number;
|
|
30
|
+
infos: number;
|
|
31
|
+
hints: number;
|
|
32
|
+
/** A few representative messages, most-severe first. */
|
|
33
|
+
sampleMessages: string[];
|
|
34
|
+
}
|
|
35
|
+
export interface CodeHealthIngestResult {
|
|
36
|
+
filesUpdated: number;
|
|
37
|
+
filesCleared: number;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Ingest one `lsp.client.diagnostics` event payload. Returns how many
|
|
41
|
+
* file memories were updated / cleared. Never throws — a shape we
|
|
42
|
+
* don't recognise is simply a no-op.
|
|
43
|
+
*/
|
|
44
|
+
export declare function ingestCodeHealth(repo: MemoryRepository, payload: unknown): CodeHealthIngestResult;
|
|
45
|
+
/**
|
|
46
|
+
* Pull per-file diagnostic rollups out of an LSP event payload of
|
|
47
|
+
* unknown shape. Handles the shapes seen / plausible across OpenCode
|
|
48
|
+
* versions:
|
|
49
|
+
*
|
|
50
|
+
* { path|uri, diagnostics: [...] }
|
|
51
|
+
* { properties: { path|uri, diagnostics: [...] } }
|
|
52
|
+
* { type, properties: { ... } } (raw event)
|
|
53
|
+
* { path|uri, diagnostics: { [uri]: [...] } } (grouped)
|
|
54
|
+
* { diagnostics: { [uri]: [...] } } (server-wide map)
|
|
55
|
+
*
|
|
56
|
+
* Anything unrecognised yields an empty array.
|
|
57
|
+
*/
|
|
58
|
+
export declare function extractDiagnostics(payload: unknown): FileDiagnostics[];
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code-health ingestion — turns OpenCode's LSP diagnostics into a
|
|
3
|
+
* live `code-health` memory category.
|
|
4
|
+
*
|
|
5
|
+
* Unlike git/project/session ingestion (one-shot, at startup), this
|
|
6
|
+
* is a LIVE signal: it's driven by the `lsp.client.diagnostics`
|
|
7
|
+
* plugin event, which fires whenever a language server re-analyses a
|
|
8
|
+
* file. Each fire upserts one memory per file — re-reporting REPLACES
|
|
9
|
+
* the prior state, so the store always reflects current diagnostics,
|
|
10
|
+
* never a pile of stale ones.
|
|
11
|
+
*
|
|
12
|
+
* Why this is convention-free and language-agnostic: LSP normalises
|
|
13
|
+
* diagnostics across 40+ servers into the same shape (severity 1-4,
|
|
14
|
+
* a message, a range). We read the compiler's / type-checker's own
|
|
15
|
+
* output — no heuristics, no per-language logic.
|
|
16
|
+
*
|
|
17
|
+
* The event payload shape is not nailed down across OpenCode
|
|
18
|
+
* versions, so extraction is deliberately defensive — it probes
|
|
19
|
+
* several plausible shapes and silently no-ops if none match, exactly
|
|
20
|
+
* like the session ingester does for SDK responses. The extraction
|
|
21
|
+
* logic is a pure function so it can be unit-tested against mock
|
|
22
|
+
* payloads without a running LSP.
|
|
23
|
+
*/
|
|
24
|
+
const CATEGORY = "code-health";
|
|
25
|
+
/** LSP DiagnosticSeverity. 1=Error 2=Warning 3=Information 4=Hint. */
|
|
26
|
+
const SEVERITY_ERROR = 1;
|
|
27
|
+
const SEVERITY_WARNING = 2;
|
|
28
|
+
/**
|
|
29
|
+
* Ingest one `lsp.client.diagnostics` event payload. Returns how many
|
|
30
|
+
* file memories were updated / cleared. Never throws — a shape we
|
|
31
|
+
* don't recognise is simply a no-op.
|
|
32
|
+
*/
|
|
33
|
+
export function ingestCodeHealth(repo, payload) {
|
|
34
|
+
const result = { filesUpdated: 0, filesCleared: 0 };
|
|
35
|
+
const perFile = extractDiagnostics(payload);
|
|
36
|
+
if (perFile.length === 0)
|
|
37
|
+
return result;
|
|
38
|
+
for (const fd of perFile) {
|
|
39
|
+
const total = fd.errors + fd.warnings + fd.infos + fd.hints;
|
|
40
|
+
if (total === 0) {
|
|
41
|
+
// File is clean now — drop any stale code-health memory for it.
|
|
42
|
+
// upsertBySubject with a "clean" body keeps one tiny memory so
|
|
43
|
+
// the agent can positively learn "this file currently has no
|
|
44
|
+
// diagnostics" rather than just missing-data.
|
|
45
|
+
repo.upsertBySubject({
|
|
46
|
+
category: CATEGORY,
|
|
47
|
+
subject: fd.path,
|
|
48
|
+
content: `${fd.path} currently has no LSP diagnostics (clean).`,
|
|
49
|
+
tags: ["code-health", "clean", fd.path],
|
|
50
|
+
source: "lsp:diagnostics",
|
|
51
|
+
});
|
|
52
|
+
result.filesCleared += 1;
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
const parts = [];
|
|
56
|
+
if (fd.errors > 0)
|
|
57
|
+
parts.push(`${fd.errors} error${fd.errors === 1 ? "" : "s"}`);
|
|
58
|
+
if (fd.warnings > 0)
|
|
59
|
+
parts.push(`${fd.warnings} warning${fd.warnings === 1 ? "" : "s"}`);
|
|
60
|
+
if (fd.infos > 0)
|
|
61
|
+
parts.push(`${fd.infos} info`);
|
|
62
|
+
if (fd.hints > 0)
|
|
63
|
+
parts.push(`${fd.hints} hint${fd.hints === 1 ? "" : "s"}`);
|
|
64
|
+
const sample = fd.sampleMessages.length > 0
|
|
65
|
+
? ` Top: ${fd.sampleMessages.slice(0, 3).map((m) => `"${truncate(m, 100)}"`).join("; ")}.`
|
|
66
|
+
: "";
|
|
67
|
+
const tags = ["code-health", fd.path];
|
|
68
|
+
if (fd.errors > 0)
|
|
69
|
+
tags.push("has-errors");
|
|
70
|
+
else if (fd.warnings > 0)
|
|
71
|
+
tags.push("has-warnings");
|
|
72
|
+
repo.upsertBySubject({
|
|
73
|
+
category: CATEGORY,
|
|
74
|
+
subject: fd.path,
|
|
75
|
+
content: `${fd.path} currently has ${parts.join(", ")} reported by the language server.${sample}`,
|
|
76
|
+
tags,
|
|
77
|
+
source: "lsp:diagnostics",
|
|
78
|
+
});
|
|
79
|
+
result.filesUpdated += 1;
|
|
80
|
+
}
|
|
81
|
+
return result;
|
|
82
|
+
}
|
|
83
|
+
/* ─── defensive extraction ──────────────────────────────────────────── */
|
|
84
|
+
/**
|
|
85
|
+
* Pull per-file diagnostic rollups out of an LSP event payload of
|
|
86
|
+
* unknown shape. Handles the shapes seen / plausible across OpenCode
|
|
87
|
+
* versions:
|
|
88
|
+
*
|
|
89
|
+
* { path|uri, diagnostics: [...] }
|
|
90
|
+
* { properties: { path|uri, diagnostics: [...] } }
|
|
91
|
+
* { type, properties: { ... } } (raw event)
|
|
92
|
+
* { path|uri, diagnostics: { [uri]: [...] } } (grouped)
|
|
93
|
+
* { diagnostics: { [uri]: [...] } } (server-wide map)
|
|
94
|
+
*
|
|
95
|
+
* Anything unrecognised yields an empty array.
|
|
96
|
+
*/
|
|
97
|
+
export function extractDiagnostics(payload) {
|
|
98
|
+
if (!payload || typeof payload !== "object")
|
|
99
|
+
return [];
|
|
100
|
+
// Unwrap a raw event envelope: { type, properties }
|
|
101
|
+
let p = payload;
|
|
102
|
+
if (p.properties && typeof p.properties === "object") {
|
|
103
|
+
p = p.properties;
|
|
104
|
+
}
|
|
105
|
+
const out = [];
|
|
106
|
+
// Shape A/B: a single file + a diagnostics array.
|
|
107
|
+
const singlePath = pickPath(p);
|
|
108
|
+
const diagField = p.diagnostics;
|
|
109
|
+
if (singlePath && Array.isArray(diagField)) {
|
|
110
|
+
out.push(rollup(singlePath, diagField));
|
|
111
|
+
return out;
|
|
112
|
+
}
|
|
113
|
+
// Shape D/E: diagnostics is a map of uri/path -> array.
|
|
114
|
+
if (diagField && typeof diagField === "object" && !Array.isArray(diagField)) {
|
|
115
|
+
for (const [key, val] of Object.entries(diagField)) {
|
|
116
|
+
if (Array.isArray(val))
|
|
117
|
+
out.push(rollup(normalisePath(key), val));
|
|
118
|
+
}
|
|
119
|
+
return out;
|
|
120
|
+
}
|
|
121
|
+
// Shape: the payload itself is a uri -> array map.
|
|
122
|
+
let looksLikeMap = false;
|
|
123
|
+
for (const val of Object.values(p)) {
|
|
124
|
+
if (Array.isArray(val)) {
|
|
125
|
+
looksLikeMap = true;
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
if (looksLikeMap && !singlePath) {
|
|
130
|
+
for (const [key, val] of Object.entries(p)) {
|
|
131
|
+
if (Array.isArray(val))
|
|
132
|
+
out.push(rollup(normalisePath(key), val));
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return out;
|
|
136
|
+
}
|
|
137
|
+
function pickPath(obj) {
|
|
138
|
+
const candidate = obj.path ?? obj.uri ?? obj.file ?? obj.filePath ?? obj.fileName;
|
|
139
|
+
return typeof candidate === "string" ? normalisePath(candidate) : null;
|
|
140
|
+
}
|
|
141
|
+
/** Strip a `file://` scheme and decode, so memories key on a plain path. */
|
|
142
|
+
function normalisePath(p) {
|
|
143
|
+
let s = p;
|
|
144
|
+
if (s.startsWith("file://")) {
|
|
145
|
+
s = s.slice("file://".length);
|
|
146
|
+
try {
|
|
147
|
+
s = decodeURIComponent(s);
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
// leave as-is if it isn't valid percent-encoding
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return s;
|
|
154
|
+
}
|
|
155
|
+
/** Roll a raw diagnostics array up into severity counts + samples. */
|
|
156
|
+
function rollup(path, diagnostics) {
|
|
157
|
+
const fd = {
|
|
158
|
+
path,
|
|
159
|
+
errors: 0,
|
|
160
|
+
warnings: 0,
|
|
161
|
+
infos: 0,
|
|
162
|
+
hints: 0,
|
|
163
|
+
sampleMessages: [],
|
|
164
|
+
};
|
|
165
|
+
// Collect messages with their severity so we can sample most-severe first.
|
|
166
|
+
const withSeverity = [];
|
|
167
|
+
for (const d of diagnostics) {
|
|
168
|
+
if (!d || typeof d !== "object")
|
|
169
|
+
continue;
|
|
170
|
+
const obj = d;
|
|
171
|
+
const severity = typeof obj.severity === "number" ? obj.severity : SEVERITY_WARNING;
|
|
172
|
+
const message = typeof obj.message === "string"
|
|
173
|
+
? obj.message
|
|
174
|
+
: typeof obj.msg === "string"
|
|
175
|
+
? obj.msg
|
|
176
|
+
: "";
|
|
177
|
+
switch (severity) {
|
|
178
|
+
case SEVERITY_ERROR:
|
|
179
|
+
fd.errors += 1;
|
|
180
|
+
break;
|
|
181
|
+
case SEVERITY_WARNING:
|
|
182
|
+
fd.warnings += 1;
|
|
183
|
+
break;
|
|
184
|
+
case 3:
|
|
185
|
+
fd.infos += 1;
|
|
186
|
+
break;
|
|
187
|
+
case 4:
|
|
188
|
+
fd.hints += 1;
|
|
189
|
+
break;
|
|
190
|
+
default:
|
|
191
|
+
fd.warnings += 1;
|
|
192
|
+
}
|
|
193
|
+
if (message)
|
|
194
|
+
withSeverity.push({ severity, message });
|
|
195
|
+
}
|
|
196
|
+
withSeverity.sort((a, b) => a.severity - b.severity); // 1=error first
|
|
197
|
+
fd.sampleMessages = withSeverity.slice(0, 5).map((x) => x.message);
|
|
198
|
+
return fd;
|
|
199
|
+
}
|
|
200
|
+
function truncate(s, n) {
|
|
201
|
+
return s.length <= n ? s : s.slice(0, n - 1) + "…";
|
|
202
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code-map ingestion — an Aider-style "repo map": for each source
|
|
3
|
+
* file, the *signatures* of its top-level definitions (functions,
|
|
4
|
+
* classes, methods, types) with the bodies stripped. The agent gets
|
|
5
|
+
* the shape of the codebase without reading every file.
|
|
6
|
+
*
|
|
7
|
+
* This is the one part of the plugin that is NOT convention-free or
|
|
8
|
+
* dependency-light, and that is a deliberate, opt-in trade:
|
|
9
|
+
*
|
|
10
|
+
* - It needs `web-tree-sitter` (~290 KB) plus a vendored `.wasm`
|
|
11
|
+
* grammar per supported language (~10.3 MB for the eleven below —
|
|
12
|
+
* C++ alone is 4.7 MB and TypeScript 2.3 MB). The rest of the
|
|
13
|
+
* plugin is a ~77 KB source drop with one tiny dependency; this
|
|
14
|
+
* feature is most of the install weight.
|
|
15
|
+
* - It is inherently language-aware: each grammar needs to know
|
|
16
|
+
* which node types are "definitions" (or selectors / keys /
|
|
17
|
+
* elements). That per-language table is the `LANG_SPECS` map
|
|
18
|
+
* below — contained, declarative, and the only place language
|
|
19
|
+
* knowledge lives.
|
|
20
|
+
*
|
|
21
|
+
* Because of that, code-map is gated behind `config.enableCodeMap`
|
|
22
|
+
* and defaults OFF. When disabled, none of this loads — `import()` of
|
|
23
|
+
* `web-tree-sitter` only happens inside `ingestCodeMap`. Languages we
|
|
24
|
+
* have no grammar for are simply skipped; the rest of the plugin is
|
|
25
|
+
* unaffected.
|
|
26
|
+
*
|
|
27
|
+
* Signatures are stored one `code-map` memory per file via
|
|
28
|
+
* `upsertBySubject`, so they're recallable, co-change-boosted, and
|
|
29
|
+
* token-budgeted like every other memory, and a re-scan replaces
|
|
30
|
+
* rather than accumulates.
|
|
31
|
+
*/
|
|
32
|
+
import type { MemoryRepository } from "../store/repository.js";
|
|
33
|
+
export interface CodeMapIngestResult {
|
|
34
|
+
filesParsed: number;
|
|
35
|
+
filesSkippedUnsupported: number;
|
|
36
|
+
signaturesExtracted: number;
|
|
37
|
+
languagesSeen: string[];
|
|
38
|
+
/** Set when the feature couldn't run at all (e.g. web-tree-sitter missing). */
|
|
39
|
+
unavailableReason?: string;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Re-index the code-map for a SINGLE file. This is what keeps the index
|
|
43
|
+
* honest when the agent edits code mid-session: the edited file's stale
|
|
44
|
+
* signature memory is replaced (via `upsertBySubject`) with one parsed
|
|
45
|
+
* from the file as it is now. Reuses the cached engine, so after the
|
|
46
|
+
* initial prefill a refresh is just a one-file parse. Never throws.
|
|
47
|
+
*
|
|
48
|
+
* Returns: "updated" (re-indexed, incl. a newly created file),
|
|
49
|
+
* "unsupported" (extension has no grammar — nothing to do),
|
|
50
|
+
* "unavailable" (tree-sitter could not load), or "error".
|
|
51
|
+
*/
|
|
52
|
+
export declare function ingestCodeMapForFile(repo: MemoryRepository, root: string, absPath: string, packageDir?: string): Promise<"updated" | "unsupported" | "unavailable" | "error">;
|
|
53
|
+
export declare function ingestCodeMap(repo: MemoryRepository, root: string, packageDir?: string, maxFiles?: number): Promise<CodeMapIngestResult>;
|
|
54
|
+
/**
|
|
55
|
+
* Depth-first walk collecting one signature line per definition node.
|
|
56
|
+
* A "signature" is the node's source text up to (but not including)
|
|
57
|
+
* its body — i.e. up to the first `{` or the first newline, whichever
|
|
58
|
+
* comes first — trimmed and length-capped. That captures
|
|
59
|
+
* `func (s *Server) Start() error`, `def parse(self, text):`,
|
|
60
|
+
* `interface Config`, etc. without any of the body. Pure function of
|
|
61
|
+
* (tree, source) so it is unit-testable without a repo.
|
|
62
|
+
*/
|
|
63
|
+
export declare function extractSignatures(rootNode: any, src: string, defNodes: Set<string>): string[];
|
|
64
|
+
/**
|
|
65
|
+
* JSON "shape": the TOP-LEVEL keys only (or a marker if the root is an
|
|
66
|
+
* array / scalar). A whole-tree walk would emit every nested key,
|
|
67
|
+
* which is noise — so this descends exactly one object level. Pure
|
|
68
|
+
* function of (tree, source); unit-testable without a repo.
|
|
69
|
+
*/
|
|
70
|
+
export declare function extractJsonShape(rootNode: any, src: string): string[];
|
|
71
|
+
export declare function extractHtmlSkeleton(rootNode: any, src: string): string[];
|