opencode-diane 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +180 -0
  2. package/LICENSE +21 -0
  3. package/README.md +206 -0
  4. package/WIKI.md +1430 -0
  5. package/dist/index.d.ts +28 -0
  6. package/dist/index.js +1632 -0
  7. package/dist/ingest/adaptive.d.ts +47 -0
  8. package/dist/ingest/adaptive.js +182 -0
  9. package/dist/ingest/code-health.d.ts +58 -0
  10. package/dist/ingest/code-health.js +202 -0
  11. package/dist/ingest/code-map.d.ts +71 -0
  12. package/dist/ingest/code-map.js +670 -0
  13. package/dist/ingest/cross-refs.d.ts +59 -0
  14. package/dist/ingest/cross-refs.js +1207 -0
  15. package/dist/ingest/docs.d.ts +49 -0
  16. package/dist/ingest/docs.js +325 -0
  17. package/dist/ingest/git.d.ts +77 -0
  18. package/dist/ingest/git.js +390 -0
  19. package/dist/ingest/live-session.d.ts +101 -0
  20. package/dist/ingest/live-session.js +173 -0
  21. package/dist/ingest/project-notes.d.ts +28 -0
  22. package/dist/ingest/project-notes.js +102 -0
  23. package/dist/ingest/project.d.ts +35 -0
  24. package/dist/ingest/project.js +430 -0
  25. package/dist/ingest/session-snapshot.d.ts +63 -0
  26. package/dist/ingest/session-snapshot.js +94 -0
  27. package/dist/ingest/sessions.d.ts +29 -0
  28. package/dist/ingest/sessions.js +164 -0
  29. package/dist/ingest/tables.d.ts +52 -0
  30. package/dist/ingest/tables.js +360 -0
  31. package/dist/mining/skill-miner.d.ts +53 -0
  32. package/dist/mining/skill-miner.js +234 -0
  33. package/dist/search/bm25.d.ts +81 -0
  34. package/dist/search/bm25.js +334 -0
  35. package/dist/search/e5-embedder.d.ts +30 -0
  36. package/dist/search/e5-embedder.js +91 -0
  37. package/dist/search/embed-pass.d.ts +26 -0
  38. package/dist/search/embed-pass.js +43 -0
  39. package/dist/search/embedder.d.ts +58 -0
  40. package/dist/search/embedder.js +85 -0
  41. package/dist/search/inverted-index.d.ts +51 -0
  42. package/dist/search/inverted-index.js +139 -0
  43. package/dist/search/ppr.d.ts +44 -0
  44. package/dist/search/ppr.js +118 -0
  45. package/dist/search/tokenize.d.ts +26 -0
  46. package/dist/search/tokenize.js +98 -0
  47. package/dist/store/eviction.d.ts +16 -0
  48. package/dist/store/eviction.js +37 -0
  49. package/dist/store/repository.d.ts +222 -0
  50. package/dist/store/repository.js +420 -0
  51. package/dist/store/sqlite-store.d.ts +89 -0
  52. package/dist/store/sqlite-store.js +252 -0
  53. package/dist/store/vector-store.d.ts +66 -0
  54. package/dist/store/vector-store.js +160 -0
  55. package/dist/types.d.ts +385 -0
  56. package/dist/types.js +9 -0
  57. package/dist/utils/file-log.d.ts +87 -0
  58. package/dist/utils/file-log.js +215 -0
  59. package/dist/utils/peer-detection.d.ts +45 -0
  60. package/dist/utils/peer-detection.js +90 -0
  61. package/dist/utils/shell.d.ts +43 -0
  62. package/dist/utils/shell.js +110 -0
  63. package/dist/utils/usage-skill.d.ts +42 -0
  64. package/dist/utils/usage-skill.js +129 -0
  65. package/dist/utils/xlsx.d.ts +36 -0
  66. package/dist/utils/xlsx.js +270 -0
  67. package/grammars/tree-sitter-c.wasm +0 -0
  68. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  69. package/grammars/tree-sitter-cpp.wasm +0 -0
  70. package/grammars/tree-sitter-css.wasm +0 -0
  71. package/grammars/tree-sitter-go.wasm +0 -0
  72. package/grammars/tree-sitter-html.wasm +0 -0
  73. package/grammars/tree-sitter-java.wasm +0 -0
  74. package/grammars/tree-sitter-javascript.wasm +0 -0
  75. package/grammars/tree-sitter-json.wasm +0 -0
  76. package/grammars/tree-sitter-php.wasm +0 -0
  77. package/grammars/tree-sitter-python.wasm +0 -0
  78. package/grammars/tree-sitter-rust.wasm +0 -0
  79. package/grammars/tree-sitter-typescript.wasm +0 -0
  80. package/package.json +80 -0
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Skill miner — turns clusters of related memories into
3
+ * OpenCode-compatible SKILL.md files.
4
+ *
5
+ * Clustering is deterministic and cheap: group memories by `subject`,
6
+ * keep groups with at least `minCluster` entries (default 3), and
7
+ * write one skill per such group. The skill description is built
8
+ * from the subject + the most-used tags across the cluster, so it
9
+ * triggers when the agent's task mentions the same area.
10
+ *
11
+ * Output: `<root>/<skillsOutputDir>/<slug>/SKILL.md`. Each file has
12
+ * YAML frontmatter (name, description, license, compatibility,
13
+ * metadata) followed by a bullet-list body summarising the cluster.
14
+ */
15
+ import type { MemoryRepository } from "../store/repository.js";
16
+ /**
17
+ * A skill file found on disk, parsed enough to surface in the
18
+ * `memory_skill` tool. `body` is the SKILL.md content with the YAML
19
+ * frontmatter stripped — the instructional part an agent actually
20
+ * wants injected into context.
21
+ */
22
+ export interface MinedSkillInfo {
23
+ slug: string;
24
+ name: string;
25
+ description: string;
26
+ path: string;
27
+ body: string;
28
+ generatedByPlugin: boolean;
29
+ }
30
+ /**
31
+ * Read the skill files currently on disk under
32
+ * `<root>/<skillsOutputDir>`. This is read FRESH on every call — it's
33
+ * what lets the `memory_skill` tool surface skills written *after*
34
+ * OpenCode started (e.g. by `memory_mine_skills` mid-session), which
35
+ * OpenCode's own startup-time skill discovery cannot do.
36
+ *
37
+ * Tolerant by construction: a missing directory yields an empty list,
38
+ * and an unreadable or frontmatter-less skill folder is skipped
39
+ * rather than throwing. Never throws.
40
+ *
41
+ * `slugPrefix`, if non-empty, filters the results to subdirectories
42
+ * whose name starts with the prefix — used when a coexisting plugin
43
+ * (caveman, oh-my-opencode) writes its own skills into the shared
44
+ * `.opencode/skills/` directory and we want to surface only ours.
45
+ * Default `""` returns everything, matching the standalone behaviour.
46
+ */
47
+ export declare function readMinedSkills(root: string, skillsOutputDir: string, slugPrefix?: string): Promise<MinedSkillInfo[]>;
48
+ export interface MineResult {
49
+ clustersConsidered: number;
50
+ skillsWritten: number;
51
+ writtenPaths: string[];
52
+ }
53
+ export declare function mineSkills(repo: MemoryRepository, root: string, skillsOutputDir: string, minCluster: number, slugPrefix?: string): Promise<MineResult>;
@@ -0,0 +1,234 @@
1
+ /**
2
+ * Skill miner — turns clusters of related memories into
3
+ * OpenCode-compatible SKILL.md files.
4
+ *
5
+ * Clustering is deterministic and cheap: group memories by `subject`,
6
+ * keep groups with at least `minCluster` entries (default 3), and
7
+ * write one skill per such group. The skill description is built
8
+ * from the subject + the most-used tags across the cluster, so it
9
+ * triggers when the agent's task mentions the same area.
10
+ *
11
+ * Output: `<root>/<skillsOutputDir>/<slug>/SKILL.md`. Each file has
12
+ * YAML frontmatter (name, description, license, compatibility,
13
+ * metadata) followed by a bullet-list body summarising the cluster.
14
+ */
15
+ import { mkdir, writeFile, readdir, readFile } from "node:fs/promises";
16
+ import { join } from "node:path";
17
+ const MAX_BODY_BULLETS = 12;
18
+ const MAX_SKILLS_PER_RUN = 30;
19
+ /**
20
+ * Read the skill files currently on disk under
21
+ * `<root>/<skillsOutputDir>`. This is read FRESH on every call — it's
22
+ * what lets the `memory_skill` tool surface skills written *after*
23
+ * OpenCode started (e.g. by `memory_mine_skills` mid-session), which
24
+ * OpenCode's own startup-time skill discovery cannot do.
25
+ *
26
+ * Tolerant by construction: a missing directory yields an empty list,
27
+ * and an unreadable or frontmatter-less skill folder is skipped
28
+ * rather than throwing. Never throws.
29
+ *
30
+ * `slugPrefix`, if non-empty, filters the results to subdirectories
31
+ * whose name starts with the prefix — used when a coexisting plugin
32
+ * (caveman, oh-my-opencode) writes its own skills into the shared
33
+ * `.opencode/skills/` directory and we want to surface only ours.
34
+ * Default `""` returns everything, matching the standalone behaviour.
35
+ */
36
+ export async function readMinedSkills(root, skillsOutputDir, slugPrefix = "") {
37
+ const base = join(root, skillsOutputDir);
38
+ let entries;
39
+ try {
40
+ entries = await readdir(base);
41
+ }
42
+ catch {
43
+ return []; // no skills directory yet — nothing mined
44
+ }
45
+ // When a prefix is configured we only surface subdirectories matching
46
+ // it — peer plugins' subdirs (e.g. caveman's `caveman-commit/`) are
47
+ // theirs to list, not ours.
48
+ if (slugPrefix.length > 0) {
49
+ entries = entries.filter((e) => e.startsWith(slugPrefix));
50
+ }
51
+ const out = [];
52
+ for (const slug of entries) {
53
+ const path = join(base, slug, "SKILL.md");
54
+ let raw;
55
+ try {
56
+ raw = await readFile(path, "utf-8");
57
+ }
58
+ catch {
59
+ continue; // not a skill directory, or unreadable — skip
60
+ }
61
+ const parsed = parseSkillFile(raw);
62
+ out.push({
63
+ slug,
64
+ name: parsed.name || slug,
65
+ description: parsed.description || "(no description)",
66
+ path,
67
+ body: parsed.body,
68
+ generatedByPlugin: parsed.generatedByPlugin,
69
+ });
70
+ }
71
+ out.sort((a, b) => a.slug.localeCompare(b.slug));
72
+ return out;
73
+ }
74
+ /**
75
+ * Split a SKILL.md into its frontmatter-derived fields and its body.
76
+ * Frontmatter is the block between the first two `---` lines; the body
77
+ * is everything after. Deliberately a small hand parser — no YAML
78
+ * dependency — because we only need `name` and `description`.
79
+ */
80
+ function parseSkillFile(raw) {
81
+ const lines = raw.split("\n");
82
+ let name = "";
83
+ let description = "";
84
+ let generatedByPlugin = false;
85
+ let body = raw;
86
+ if (lines[0]?.trim() === "---") {
87
+ const end = lines.indexOf("---", 1);
88
+ if (end > 0) {
89
+ for (const line of lines.slice(1, end)) {
90
+ const m = /^([A-Za-z_]+):\s*(.*)$/.exec(line);
91
+ if (!m)
92
+ continue;
93
+ if (m[1] === "name")
94
+ name = m[2].trim();
95
+ else if (m[1] === "description")
96
+ description = m[2].trim();
97
+ }
98
+ if (raw.includes("generated_by: opencode-diane"))
99
+ generatedByPlugin = true;
100
+ body = lines
101
+ .slice(end + 1)
102
+ .join("\n")
103
+ .trim();
104
+ }
105
+ }
106
+ return { name, description, body, generatedByPlugin };
107
+ }
108
+ export async function mineSkills(repo, root, skillsOutputDir, minCluster, slugPrefix = "") {
109
+ const all = repo.allMemories();
110
+ // ── Cluster by subject ─────────────────────────────────────────────
111
+ const groups = new Map();
112
+ for (const m of all) {
113
+ let list = groups.get(m.subject);
114
+ if (!list) {
115
+ list = [];
116
+ groups.set(m.subject, list);
117
+ }
118
+ list.push(m);
119
+ }
120
+ const candidates = Array.from(groups.entries()).filter(([, ms]) => ms.length >= minCluster);
121
+ // Order so the most signal-rich clusters get written first when we
122
+ // hit the per-run cap.
123
+ candidates.sort((a, b) => b[1].length - a[1].length);
124
+ const writtenPaths = [];
125
+ let skillsWritten = 0;
126
+ const outputBase = join(root, skillsOutputDir);
127
+ for (const [subject, members] of candidates) {
128
+ if (skillsWritten >= MAX_SKILLS_PER_RUN)
129
+ break;
130
+ const skill = buildSkill(subject, members);
131
+ if (!skill)
132
+ continue;
133
+ // Prefix the on-disk subdirectory name AND the memory-store subject
134
+ // so peer plugins (caveman, oh-my-opencode) writing into the shared
135
+ // `.opencode/skills/` directory don't collide with us, and the
136
+ // subsequent `readMinedSkills(prefix)` round-trip finds the same
137
+ // entries. Empty prefix is the standalone behaviour and the path
138
+ // is byte-for-byte unchanged.
139
+ const namespacedSlug = `${slugPrefix}${skill.slug}`;
140
+ const dir = join(outputBase, namespacedSlug);
141
+ await mkdir(dir, { recursive: true });
142
+ const path = join(dir, "SKILL.md");
143
+ await writeFile(path, skill.content, "utf-8");
144
+ // Record a memory pointing at the skill so future mining doesn't
145
+ // re-emit the same one and the agent can find it via recall.
146
+ repo.insertIfMissing({
147
+ category: "skill-mined",
148
+ subject: namespacedSlug,
149
+ content: `Mined skill "${skill.name}" (description: ${skill.description}). ` +
150
+ `Backed by ${members.length} memories on subject "${subject}". ` +
151
+ `File: ${path.replace(root + "/", "")}`,
152
+ tags: ["skill", skill.slug],
153
+ source: `skill-miner:${skill.slug}`,
154
+ });
155
+ writtenPaths.push(path);
156
+ skillsWritten += 1;
157
+ }
158
+ return {
159
+ clustersConsidered: candidates.length,
160
+ skillsWritten,
161
+ writtenPaths,
162
+ };
163
+ }
164
+ function buildSkill(subject, members) {
165
+ const slug = toSlug(subject);
166
+ if (!slug)
167
+ return null;
168
+ const name = slug;
169
+ // Tag frequency
170
+ const tagCount = new Map();
171
+ for (const m of members) {
172
+ for (const t of m.tags)
173
+ tagCount.set(t, (tagCount.get(t) ?? 0) + 1);
174
+ }
175
+ const topTags = Array.from(tagCount.entries())
176
+ .sort((a, b) => b[1] - a[1])
177
+ .map(([t]) => t)
178
+ .slice(0, 6);
179
+ // Description: 20+ char minimum required by OpenCode skill spec.
180
+ const description = padDescription(`Recurring patterns and past actions associated with "${subject}". ` +
181
+ `Use when the user's task mentions ${subject}` +
182
+ (topTags.length > 0 ? ` or any of: ${topTags.slice(0, 4).join(", ")}.` : "."));
183
+ const bullets = [];
184
+ // Sort members by useCount desc so the most-relevant memories appear first.
185
+ const sorted = members.slice().sort((a, b) => b.useCount - a.useCount);
186
+ for (const m of sorted.slice(0, MAX_BODY_BULLETS)) {
187
+ bullets.push(`- (${m.category}, source ${m.source}): ${oneLine(m.content)}`);
188
+ }
189
+ const omitted = members.length > MAX_BODY_BULLETS ? members.length - MAX_BODY_BULLETS : 0;
190
+ const frontmatter = [
191
+ "---",
192
+ `name: ${name}`,
193
+ `description: ${description}`,
194
+ "license: MIT",
195
+ "compatibility: opencode",
196
+ "metadata:",
197
+ " generated_by: opencode-diane",
198
+ ` subject: "${escapeYaml(subject)}"`,
199
+ ` cluster_size: ${members.length}`,
200
+ ` top_tags: [${topTags.map((t) => `"${escapeYaml(t)}"`).join(", ")}]`,
201
+ "---",
202
+ "",
203
+ ].join("\n");
204
+ const body = `# ${name}\n\n` +
205
+ `This skill was mined automatically from project memory: a cluster of ${members.length} entries on subject "${subject}".\n\n` +
206
+ `## When to use\n\n` +
207
+ `${description}\n\n` +
208
+ `## Known patterns\n\n` +
209
+ bullets.join("\n") +
210
+ (omitted > 0 ? `\n- … and ${omitted} more entries\n` : "\n") +
211
+ `\n## Source\n\n` +
212
+ `Generated by \`opencode-diane\` skill miner. ` +
213
+ `Backing memories live in \`.opencode/diane.json\` under subject ` +
214
+ `\`${escapeYaml(subject)}\`. Re-run \`memory_mine_skills\` to refresh.\n`;
215
+ return { slug, name, description, content: frontmatter + body };
216
+ }
217
+ function toSlug(subject) {
218
+ return subject
219
+ .toLowerCase()
220
+ .replace(/[^a-z0-9]+/g, "-")
221
+ .replace(/^-+|-+$/g, "")
222
+ .slice(0, 64);
223
+ }
224
+ function padDescription(s) {
225
+ if (s.length >= 20)
226
+ return s;
227
+ return s + " ".repeat(20 - s.length);
228
+ }
229
+ function oneLine(s) {
230
+ return s.replace(/\s+/g, " ").trim().slice(0, 240);
231
+ }
232
+ function escapeYaml(s) {
233
+ return s.replace(/"/g, '\\"');
234
+ }
@@ -0,0 +1,81 @@
1
+ /**
2
+ * BM25 retrieval over the in-memory inverted index.
3
+ *
4
+ * Hierarchical filtering: callers can narrow candidates by category
5
+ * and/or subject before scoring. If neither filter is provided, all
6
+ * docs that contain any query term are considered.
7
+ *
8
+ * k1=1.2, b=0.75 — standard defaults that work well on short docs.
9
+ */
10
+ import type { Category, Memory, RecallHit } from "../types.js";
11
+ import { InvertedIndex } from "./inverted-index.js";
12
+ export interface SearchOptions {
13
+ query: string;
14
+ category?: Category;
15
+ subject?: string;
16
+ /** Cap on returned hits (count). Default 10. */
17
+ limit?: number;
18
+ /**
19
+ * Optional pre-computed embedding of `query`. Supplied only when
20
+ * semantic search is enabled — the async embedding is done by the
21
+ * caller so the recall path itself stays synchronous. When present,
22
+ * `recallDetailed` fuses vector similarity with the BM25 ranking;
23
+ * when absent, retrieval is the pure lexical path. `search()` itself
24
+ * ignores this field — fusion happens one level up, in the
25
+ * repository.
26
+ */
27
+ queryVector?: Float32Array;
28
+ /**
29
+ * Use Personalized PageRank for the co-change boost instead of the
30
+ * default single-hop propagation. Default off (undefined / false).
31
+ *
32
+ * When on, the co-change graph contribution is computed as a
33
+ * random-walk-with-restart personalized on the query's textual hits
34
+ * — relevance spreads multi-hop and is graded by graph distance.
35
+ * When off, retrieval uses the cheaper one-hop boost. See ppr.ts.
36
+ */
37
+ personalizedPageRank?: boolean;
38
+ /**
39
+ * Optional ceiling on the *formatted* size of the result, in
40
+ * estimated tokens. When set, ranked hits are packed until the next
41
+ * hit would overflow; the rest are reported as omitted. This is the
42
+ * Aider-style "the budget is the API" idea — recall output never
43
+ * balloons unpredictably. ~4 chars/token, consistent with the rest
44
+ * of the codebase.
45
+ */
46
+ tokenBudget?: number;
47
+ /**
48
+ * Optional, agent-supplied intent lean. The agent calling recall has
49
+ * already understood the user's request — in whatever natural
50
+ * language — so `prefer` lets it pass that understanding through and
51
+ * make ranking query-dependent:
52
+ * - "code" — lean toward implementation; gently de-weight
53
+ * memories whose path looks test-related
54
+ * - "tests" — lean toward test files (when the user really is
55
+ * asking about tests, that's exactly what's wanted)
56
+ * - "history" — lean toward change-history memories
57
+ * - "any" / omitted — neutral; ranking is unchanged
58
+ * The lean is a mild score multiplier, deliberately never a filter:
59
+ * a strongly-matching test file still surfaces under "code", only
60
+ * lower. This keeps test de-emphasis query-dependent and reversible
61
+ * rather than a blunt exclusion.
62
+ */
63
+ prefer?: "code" | "tests" | "history" | "any";
64
+ }
65
+ /** ~4 chars per token — the rough heuristic used throughout the plugin. */
66
+ export declare function estimateTokens(s: string): number;
67
+ /**
68
+ * Pack ranked hits into a token budget. `format` renders one hit to
69
+ * the string the agent will actually see, so the estimate matches the
70
+ * real output. Always returns at least one hit (the top-ranked) even
71
+ * if it alone exceeds the budget — an empty result would be worse
72
+ * than a slightly-over one — but in that case the hit's `content` is
73
+ * truncated so the budget stays a real ceiling rather than a wish.
74
+ * Returns the (possibly content-truncated) kept hits and how many
75
+ * were dropped.
76
+ */
77
+ export declare function packToTokenBudget(hits: RecallHit[], budget: number, format: (h: RecallHit) => string): {
78
+ kept: RecallHit[];
79
+ omitted: number;
80
+ };
81
+ export declare function search(index: InvertedIndex, byId: Map<string, Memory>, opts: SearchOptions): RecallHit[];