npm - opencode-diane - Versions diffs - 0.0.5 - Mend

opencode-diane 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

package/CHANGELOG.md +180 -0
package/LICENSE +21 -0
package/README.md +206 -0
package/WIKI.md +1430 -0
package/dist/index.d.ts +28 -0
package/dist/index.js +1632 -0
package/dist/ingest/adaptive.d.ts +47 -0
package/dist/ingest/adaptive.js +182 -0
package/dist/ingest/code-health.d.ts +58 -0
package/dist/ingest/code-health.js +202 -0
package/dist/ingest/code-map.d.ts +71 -0
package/dist/ingest/code-map.js +670 -0
package/dist/ingest/cross-refs.d.ts +59 -0
package/dist/ingest/cross-refs.js +1207 -0
package/dist/ingest/docs.d.ts +49 -0
package/dist/ingest/docs.js +325 -0
package/dist/ingest/git.d.ts +77 -0
package/dist/ingest/git.js +390 -0
package/dist/ingest/live-session.d.ts +101 -0
package/dist/ingest/live-session.js +173 -0
package/dist/ingest/project-notes.d.ts +28 -0
package/dist/ingest/project-notes.js +102 -0
package/dist/ingest/project.d.ts +35 -0
package/dist/ingest/project.js +430 -0
package/dist/ingest/session-snapshot.d.ts +63 -0
package/dist/ingest/session-snapshot.js +94 -0
package/dist/ingest/sessions.d.ts +29 -0
package/dist/ingest/sessions.js +164 -0
package/dist/ingest/tables.d.ts +52 -0
package/dist/ingest/tables.js +360 -0
package/dist/mining/skill-miner.d.ts +53 -0
package/dist/mining/skill-miner.js +234 -0
package/dist/search/bm25.d.ts +81 -0
package/dist/search/bm25.js +334 -0
package/dist/search/e5-embedder.d.ts +30 -0
package/dist/search/e5-embedder.js +91 -0
package/dist/search/embed-pass.d.ts +26 -0
package/dist/search/embed-pass.js +43 -0
package/dist/search/embedder.d.ts +58 -0
package/dist/search/embedder.js +85 -0
package/dist/search/inverted-index.d.ts +51 -0
package/dist/search/inverted-index.js +139 -0
package/dist/search/ppr.d.ts +44 -0
package/dist/search/ppr.js +118 -0
package/dist/search/tokenize.d.ts +26 -0
package/dist/search/tokenize.js +98 -0
package/dist/store/eviction.d.ts +16 -0
package/dist/store/eviction.js +37 -0
package/dist/store/repository.d.ts +222 -0
package/dist/store/repository.js +420 -0
package/dist/store/sqlite-store.d.ts +89 -0
package/dist/store/sqlite-store.js +252 -0
package/dist/store/vector-store.d.ts +66 -0
package/dist/store/vector-store.js +160 -0
package/dist/types.d.ts +385 -0
package/dist/types.js +9 -0
package/dist/utils/file-log.d.ts +87 -0
package/dist/utils/file-log.js +215 -0
package/dist/utils/peer-detection.d.ts +45 -0
package/dist/utils/peer-detection.js +90 -0
package/dist/utils/shell.d.ts +43 -0
package/dist/utils/shell.js +110 -0
package/dist/utils/usage-skill.d.ts +42 -0
package/dist/utils/usage-skill.js +129 -0
package/dist/utils/xlsx.d.ts +36 -0
package/dist/utils/xlsx.js +270 -0
package/grammars/tree-sitter-c.wasm +0 -0
package/grammars/tree-sitter-c_sharp.wasm +0 -0
package/grammars/tree-sitter-cpp.wasm +0 -0
package/grammars/tree-sitter-css.wasm +0 -0
package/grammars/tree-sitter-go.wasm +0 -0
package/grammars/tree-sitter-html.wasm +0 -0
package/grammars/tree-sitter-java.wasm +0 -0
package/grammars/tree-sitter-javascript.wasm +0 -0
package/grammars/tree-sitter-json.wasm +0 -0
package/grammars/tree-sitter-php.wasm +0 -0
package/grammars/tree-sitter-python.wasm +0 -0
package/grammars/tree-sitter-rust.wasm +0 -0
package/grammars/tree-sitter-typescript.wasm +0 -0
package/package.json +80 -0

package/dist/mining/skill-miner.d.ts ADDED Viewed

@@ -0,0 +1,53 @@
+/**
+ * Skill miner — turns clusters of related memories into
+ * OpenCode-compatible SKILL.md files.
+ *
+ * Clustering is deterministic and cheap: group memories by `subject`,
+ * keep groups with at least `minCluster` entries (default 3), and
+ * write one skill per such group. The skill description is built
+ * from the subject + the most-used tags across the cluster, so it
+ * triggers when the agent's task mentions the same area.
+ *
+ * Output: `<root>/<skillsOutputDir>/<slug>/SKILL.md`. Each file has
+ * YAML frontmatter (name, description, license, compatibility,
+ * metadata) followed by a bullet-list body summarising the cluster.
+ */
+import type { MemoryRepository } from "../store/repository.js";
+/**
+ * A skill file found on disk, parsed enough to surface in the
+ * `memory_skill` tool. `body` is the SKILL.md content with the YAML
+ * frontmatter stripped — the instructional part an agent actually
+ * wants injected into context.
+ */
+export interface MinedSkillInfo {
+    slug: string;
+    name: string;
+    description: string;
+    path: string;
+    body: string;
+    generatedByPlugin: boolean;
+}
+/**
+ * Read the skill files currently on disk under
+ * `<root>/<skillsOutputDir>`. This is read FRESH on every call — it's
+ * what lets the `memory_skill` tool surface skills written *after*
+ * OpenCode started (e.g. by `memory_mine_skills` mid-session), which
+ * OpenCode's own startup-time skill discovery cannot do.
+ *
+ * Tolerant by construction: a missing directory yields an empty list,
+ * and an unreadable or frontmatter-less skill folder is skipped
+ * rather than throwing. Never throws.
+ *
+ * `slugPrefix`, if non-empty, filters the results to subdirectories
+ * whose name starts with the prefix — used when a coexisting plugin
+ * (caveman, oh-my-opencode) writes its own skills into the shared
+ * `.opencode/skills/` directory and we want to surface only ours.
+ * Default `""` returns everything, matching the standalone behaviour.
+ */
+export declare function readMinedSkills(root: string, skillsOutputDir: string, slugPrefix?: string): Promise<MinedSkillInfo[]>;
+export interface MineResult {
+    clustersConsidered: number;
+    skillsWritten: number;
+    writtenPaths: string[];
+}
+export declare function mineSkills(repo: MemoryRepository, root: string, skillsOutputDir: string, minCluster: number, slugPrefix?: string): Promise<MineResult>;

package/dist/mining/skill-miner.js ADDED Viewed

@@ -0,0 +1,234 @@
+/**
+ * Skill miner — turns clusters of related memories into
+ * OpenCode-compatible SKILL.md files.
+ *
+ * Clustering is deterministic and cheap: group memories by `subject`,
+ * keep groups with at least `minCluster` entries (default 3), and
+ * write one skill per such group. The skill description is built
+ * from the subject + the most-used tags across the cluster, so it
+ * triggers when the agent's task mentions the same area.
+ *
+ * Output: `<root>/<skillsOutputDir>/<slug>/SKILL.md`. Each file has
+ * YAML frontmatter (name, description, license, compatibility,
+ * metadata) followed by a bullet-list body summarising the cluster.
+ */
+import { mkdir, writeFile, readdir, readFile } from "node:fs/promises";
+import { join } from "node:path";
+const MAX_BODY_BULLETS = 12;
+const MAX_SKILLS_PER_RUN = 30;
+/**
+ * Read the skill files currently on disk under
+ * `<root>/<skillsOutputDir>`. This is read FRESH on every call — it's
+ * what lets the `memory_skill` tool surface skills written *after*
+ * OpenCode started (e.g. by `memory_mine_skills` mid-session), which
+ * OpenCode's own startup-time skill discovery cannot do.
+ *
+ * Tolerant by construction: a missing directory yields an empty list,
+ * and an unreadable or frontmatter-less skill folder is skipped
+ * rather than throwing. Never throws.
+ *
+ * `slugPrefix`, if non-empty, filters the results to subdirectories
+ * whose name starts with the prefix — used when a coexisting plugin
+ * (caveman, oh-my-opencode) writes its own skills into the shared
+ * `.opencode/skills/` directory and we want to surface only ours.
+ * Default `""` returns everything, matching the standalone behaviour.
+ */
+export async function readMinedSkills(root, skillsOutputDir, slugPrefix = "") {
+    const base = join(root, skillsOutputDir);
+    let entries;
+    try {
+        entries = await readdir(base);
+    }
+    catch {
+        return []; // no skills directory yet — nothing mined
+    }
+    // When a prefix is configured we only surface subdirectories matching
+    // it — peer plugins' subdirs (e.g. caveman's `caveman-commit/`) are
+    // theirs to list, not ours.
+    if (slugPrefix.length > 0) {
+        entries = entries.filter((e) => e.startsWith(slugPrefix));
+    }
+    const out = [];
+    for (const slug of entries) {
+        const path = join(base, slug, "SKILL.md");
+        let raw;
+        try {
+            raw = await readFile(path, "utf-8");
+        }
+        catch {
+            continue; // not a skill directory, or unreadable — skip
+        }
+        const parsed = parseSkillFile(raw);
+        out.push({
+            slug,
+            name: parsed.name || slug,
+            description: parsed.description || "(no description)",
+            path,
+            body: parsed.body,
+            generatedByPlugin: parsed.generatedByPlugin,
+        });
+    }
+    out.sort((a, b) => a.slug.localeCompare(b.slug));
+    return out;
+}
+/**
+ * Split a SKILL.md into its frontmatter-derived fields and its body.
+ * Frontmatter is the block between the first two `---` lines; the body
+ * is everything after. Deliberately a small hand parser — no YAML
+ * dependency — because we only need `name` and `description`.
+ */
+function parseSkillFile(raw) {
+    const lines = raw.split("\n");
+    let name = "";
+    let description = "";
+    let generatedByPlugin = false;
+    let body = raw;
+    if (lines[0]?.trim() === "---") {
+        const end = lines.indexOf("---", 1);
+        if (end > 0) {
+            for (const line of lines.slice(1, end)) {
+                const m = /^([A-Za-z_]+):\s*(.*)$/.exec(line);
+                if (!m)
+                    continue;
+                if (m[1] === "name")
+                    name = m[2].trim();
+                else if (m[1] === "description")
+                    description = m[2].trim();
+            }
+            if (raw.includes("generated_by: opencode-diane"))
+                generatedByPlugin = true;
+            body = lines
+                .slice(end + 1)
+                .join("\n")
+                .trim();
+        }
+    }
+    return { name, description, body, generatedByPlugin };
+}
+export async function mineSkills(repo, root, skillsOutputDir, minCluster, slugPrefix = "") {
+    const all = repo.allMemories();
+    // ── Cluster by subject ─────────────────────────────────────────────
+    const groups = new Map();
+    for (const m of all) {
+        let list = groups.get(m.subject);
+        if (!list) {
+            list = [];
+            groups.set(m.subject, list);
+        }
+        list.push(m);
+    }
+    const candidates = Array.from(groups.entries()).filter(([, ms]) => ms.length >= minCluster);
+    // Order so the most signal-rich clusters get written first when we
+    // hit the per-run cap.
+    candidates.sort((a, b) => b[1].length - a[1].length);
+    const writtenPaths = [];
+    let skillsWritten = 0;
+    const outputBase = join(root, skillsOutputDir);
+    for (const [subject, members] of candidates) {
+        if (skillsWritten >= MAX_SKILLS_PER_RUN)
+            break;
+        const skill = buildSkill(subject, members);
+        if (!skill)
+            continue;
+        // Prefix the on-disk subdirectory name AND the memory-store subject
+        // so peer plugins (caveman, oh-my-opencode) writing into the shared
+        // `.opencode/skills/` directory don't collide with us, and the
+        // subsequent `readMinedSkills(prefix)` round-trip finds the same
+        // entries. Empty prefix is the standalone behaviour and the path
+        // is byte-for-byte unchanged.
+        const namespacedSlug = `${slugPrefix}${skill.slug}`;
+        const dir = join(outputBase, namespacedSlug);
+        await mkdir(dir, { recursive: true });
+        const path = join(dir, "SKILL.md");
+        await writeFile(path, skill.content, "utf-8");
+        // Record a memory pointing at the skill so future mining doesn't
+        // re-emit the same one and the agent can find it via recall.
+        repo.insertIfMissing({
+            category: "skill-mined",
+            subject: namespacedSlug,
+            content: `Mined skill "${skill.name}" (description: ${skill.description}). ` +
+                `Backed by ${members.length} memories on subject "${subject}". ` +
+                `File: ${path.replace(root + "/", "")}`,
+            tags: ["skill", skill.slug],
+            source: `skill-miner:${skill.slug}`,
+        });
+        writtenPaths.push(path);
+        skillsWritten += 1;
+    }
+    return {
+        clustersConsidered: candidates.length,
+        skillsWritten,
+        writtenPaths,
+    };
+}
+function buildSkill(subject, members) {
+    const slug = toSlug(subject);
+    if (!slug)
+        return null;
+    const name = slug;
+    // Tag frequency
+    const tagCount = new Map();
+    for (const m of members) {
+        for (const t of m.tags)
+            tagCount.set(t, (tagCount.get(t) ?? 0) + 1);
+    }
+    const topTags = Array.from(tagCount.entries())
+        .sort((a, b) => b[1] - a[1])
+        .map(([t]) => t)
+        .slice(0, 6);
+    // Description: 20+ char minimum required by OpenCode skill spec.
+    const description = padDescription(`Recurring patterns and past actions associated with "${subject}". ` +
+        `Use when the user's task mentions ${subject}` +
+        (topTags.length > 0 ? ` or any of: ${topTags.slice(0, 4).join(", ")}.` : "."));
+    const bullets = [];
+    // Sort members by useCount desc so the most-relevant memories appear first.
+    const sorted = members.slice().sort((a, b) => b.useCount - a.useCount);
+    for (const m of sorted.slice(0, MAX_BODY_BULLETS)) {
+        bullets.push(`- (${m.category}, source ${m.source}): ${oneLine(m.content)}`);
+    }
+    const omitted = members.length > MAX_BODY_BULLETS ? members.length - MAX_BODY_BULLETS : 0;
+    const frontmatter = [
+        "---",
+        `name: ${name}`,
+        `description: ${description}`,
+        "license: MIT",
+        "compatibility: opencode",
+        "metadata:",
+        "  generated_by: opencode-diane",
+        `  subject: "${escapeYaml(subject)}"`,
+        `  cluster_size: ${members.length}`,
+        `  top_tags: [${topTags.map((t) => `"${escapeYaml(t)}"`).join(", ")}]`,
+        "---",
+        "",
+    ].join("\n");
+    const body = `# ${name}\n\n` +
+        `This skill was mined automatically from project memory: a cluster of ${members.length} entries on subject "${subject}".\n\n` +
+        `## When to use\n\n` +
+        `${description}\n\n` +
+        `## Known patterns\n\n` +
+        bullets.join("\n") +
+        (omitted > 0 ? `\n- … and ${omitted} more entries\n` : "\n") +
+        `\n## Source\n\n` +
+        `Generated by \`opencode-diane\` skill miner. ` +
+        `Backing memories live in \`.opencode/diane.json\` under subject ` +
+        `\`${escapeYaml(subject)}\`. Re-run \`memory_mine_skills\` to refresh.\n`;
+    return { slug, name, description, content: frontmatter + body };
+}
+function toSlug(subject) {
+    return subject
+        .toLowerCase()
+        .replace(/[^a-z0-9]+/g, "-")
+        .replace(/^-+|-+$/g, "")
+        .slice(0, 64);
+}
+function padDescription(s) {
+    if (s.length >= 20)
+        return s;
+    return s + " ".repeat(20 - s.length);
+}
+function oneLine(s) {
+    return s.replace(/\s+/g, " ").trim().slice(0, 240);
+}
+function escapeYaml(s) {
+    return s.replace(/"/g, '\\"');
+}

package/dist/search/bm25.d.ts ADDED Viewed

@@ -0,0 +1,81 @@
+/**
+ * BM25 retrieval over the in-memory inverted index.
+ *
+ * Hierarchical filtering: callers can narrow candidates by category
+ * and/or subject before scoring. If neither filter is provided, all
+ * docs that contain any query term are considered.
+ *
+ * k1=1.2, b=0.75 — standard defaults that work well on short docs.
+ */
+import type { Category, Memory, RecallHit } from "../types.js";
+import { InvertedIndex } from "./inverted-index.js";
+export interface SearchOptions {
+    query: string;
+    category?: Category;
+    subject?: string;
+    /** Cap on returned hits (count). Default 10. */
+    limit?: number;
+    /**
+     * Optional pre-computed embedding of `query`. Supplied only when
+     * semantic search is enabled — the async embedding is done by the
+     * caller so the recall path itself stays synchronous. When present,
+     * `recallDetailed` fuses vector similarity with the BM25 ranking;
+     * when absent, retrieval is the pure lexical path. `search()` itself
+     * ignores this field — fusion happens one level up, in the
+     * repository.
+     */
+    queryVector?: Float32Array;
+    /**
+     * Use Personalized PageRank for the co-change boost instead of the
+     * default single-hop propagation. Default off (undefined / false).
+     *
+     * When on, the co-change graph contribution is computed as a
+     * random-walk-with-restart personalized on the query's textual hits
+     * — relevance spreads multi-hop and is graded by graph distance.
+     * When off, retrieval uses the cheaper one-hop boost. See ppr.ts.
+     */
+    personalizedPageRank?: boolean;
+    /**
+     * Optional ceiling on the *formatted* size of the result, in
+     * estimated tokens. When set, ranked hits are packed until the next
+     * hit would overflow; the rest are reported as omitted. This is the
+     * Aider-style "the budget is the API" idea — recall output never
+     * balloons unpredictably. ~4 chars/token, consistent with the rest
+     * of the codebase.
+     */
+    tokenBudget?: number;
+    /**
+     * Optional, agent-supplied intent lean. The agent calling recall has
+     * already understood the user's request — in whatever natural
+     * language — so `prefer` lets it pass that understanding through and
+     * make ranking query-dependent:
+     *   - "code"    — lean toward implementation; gently de-weight
+     *                 memories whose path looks test-related
+     *   - "tests"   — lean toward test files (when the user really is
+     *                 asking about tests, that's exactly what's wanted)
+     *   - "history" — lean toward change-history memories
+     *   - "any" / omitted — neutral; ranking is unchanged
+     * The lean is a mild score multiplier, deliberately never a filter:
+     * a strongly-matching test file still surfaces under "code", only
+     * lower. This keeps test de-emphasis query-dependent and reversible
+     * rather than a blunt exclusion.
+     */
+    prefer?: "code" | "tests" | "history" | "any";
+}
+/** ~4 chars per token — the rough heuristic used throughout the plugin. */
+export declare function estimateTokens(s: string): number;
+/**
+ * Pack ranked hits into a token budget. `format` renders one hit to
+ * the string the agent will actually see, so the estimate matches the
+ * real output. Always returns at least one hit (the top-ranked) even
+ * if it alone exceeds the budget — an empty result would be worse
+ * than a slightly-over one — but in that case the hit's `content` is
+ * truncated so the budget stays a real ceiling rather than a wish.
+ * Returns the (possibly content-truncated) kept hits and how many
+ * were dropped.
+ */
+export declare function packToTokenBudget(hits: RecallHit[], budget: number, format: (h: RecallHit) => string): {
+    kept: RecallHit[];
+    omitted: number;
+};
+export declare function search(index: InvertedIndex, byId: Map<string, Memory>, opts: SearchOptions): RecallHit[];