opencode-diane 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +180 -0
  2. package/LICENSE +21 -0
  3. package/README.md +206 -0
  4. package/WIKI.md +1430 -0
  5. package/dist/index.d.ts +28 -0
  6. package/dist/index.js +1632 -0
  7. package/dist/ingest/adaptive.d.ts +47 -0
  8. package/dist/ingest/adaptive.js +182 -0
  9. package/dist/ingest/code-health.d.ts +58 -0
  10. package/dist/ingest/code-health.js +202 -0
  11. package/dist/ingest/code-map.d.ts +71 -0
  12. package/dist/ingest/code-map.js +670 -0
  13. package/dist/ingest/cross-refs.d.ts +59 -0
  14. package/dist/ingest/cross-refs.js +1207 -0
  15. package/dist/ingest/docs.d.ts +49 -0
  16. package/dist/ingest/docs.js +325 -0
  17. package/dist/ingest/git.d.ts +77 -0
  18. package/dist/ingest/git.js +390 -0
  19. package/dist/ingest/live-session.d.ts +101 -0
  20. package/dist/ingest/live-session.js +173 -0
  21. package/dist/ingest/project-notes.d.ts +28 -0
  22. package/dist/ingest/project-notes.js +102 -0
  23. package/dist/ingest/project.d.ts +35 -0
  24. package/dist/ingest/project.js +430 -0
  25. package/dist/ingest/session-snapshot.d.ts +63 -0
  26. package/dist/ingest/session-snapshot.js +94 -0
  27. package/dist/ingest/sessions.d.ts +29 -0
  28. package/dist/ingest/sessions.js +164 -0
  29. package/dist/ingest/tables.d.ts +52 -0
  30. package/dist/ingest/tables.js +360 -0
  31. package/dist/mining/skill-miner.d.ts +53 -0
  32. package/dist/mining/skill-miner.js +234 -0
  33. package/dist/search/bm25.d.ts +81 -0
  34. package/dist/search/bm25.js +334 -0
  35. package/dist/search/e5-embedder.d.ts +30 -0
  36. package/dist/search/e5-embedder.js +91 -0
  37. package/dist/search/embed-pass.d.ts +26 -0
  38. package/dist/search/embed-pass.js +43 -0
  39. package/dist/search/embedder.d.ts +58 -0
  40. package/dist/search/embedder.js +85 -0
  41. package/dist/search/inverted-index.d.ts +51 -0
  42. package/dist/search/inverted-index.js +139 -0
  43. package/dist/search/ppr.d.ts +44 -0
  44. package/dist/search/ppr.js +118 -0
  45. package/dist/search/tokenize.d.ts +26 -0
  46. package/dist/search/tokenize.js +98 -0
  47. package/dist/store/eviction.d.ts +16 -0
  48. package/dist/store/eviction.js +37 -0
  49. package/dist/store/repository.d.ts +222 -0
  50. package/dist/store/repository.js +420 -0
  51. package/dist/store/sqlite-store.d.ts +89 -0
  52. package/dist/store/sqlite-store.js +252 -0
  53. package/dist/store/vector-store.d.ts +66 -0
  54. package/dist/store/vector-store.js +160 -0
  55. package/dist/types.d.ts +385 -0
  56. package/dist/types.js +9 -0
  57. package/dist/utils/file-log.d.ts +87 -0
  58. package/dist/utils/file-log.js +215 -0
  59. package/dist/utils/peer-detection.d.ts +45 -0
  60. package/dist/utils/peer-detection.js +90 -0
  61. package/dist/utils/shell.d.ts +43 -0
  62. package/dist/utils/shell.js +110 -0
  63. package/dist/utils/usage-skill.d.ts +42 -0
  64. package/dist/utils/usage-skill.js +129 -0
  65. package/dist/utils/xlsx.d.ts +36 -0
  66. package/dist/utils/xlsx.js +270 -0
  67. package/grammars/tree-sitter-c.wasm +0 -0
  68. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  69. package/grammars/tree-sitter-cpp.wasm +0 -0
  70. package/grammars/tree-sitter-css.wasm +0 -0
  71. package/grammars/tree-sitter-go.wasm +0 -0
  72. package/grammars/tree-sitter-html.wasm +0 -0
  73. package/grammars/tree-sitter-java.wasm +0 -0
  74. package/grammars/tree-sitter-javascript.wasm +0 -0
  75. package/grammars/tree-sitter-json.wasm +0 -0
  76. package/grammars/tree-sitter-php.wasm +0 -0
  77. package/grammars/tree-sitter-python.wasm +0 -0
  78. package/grammars/tree-sitter-rust.wasm +0 -0
  79. package/grammars/tree-sitter-typescript.wasm +0 -0
  80. package/package.json +80 -0
@@ -0,0 +1,49 @@
1
+ /**
2
+ * docs.ts — ingest project documentation as recallable section
3
+ * pointers.
4
+ *
5
+ * The premise: long-form docs are something the agent can already
6
+ * `read`; what it can't do is *find* the right section to read in a
7
+ * 30-file `docs/` tree without grepping the whole thing first. This
8
+ * ingester emits one memory per heading (H1/H2/H3) with the heading
9
+ * text, the first paragraph of body, and `path:line` so a
10
+ * `memory_recall { query: "installation" }` returns
11
+ *
12
+ * docs/install.md:15 ## Installation
13
+ * This project uses bun. Run `bun install` …
14
+ *
15
+ * — a direct pointer the agent can act on without any directory walk.
16
+ *
17
+ * **Scope** (deliberately conservative — see the design notes at the
18
+ * bottom for what was considered and rejected):
19
+ *
20
+ * - Walks `<root>/docs/` recursively for `*.md` and `*.markdown`.
21
+ * - Adds a fixed set of conventional root-level docs files (the
22
+ * `ROOT_DOCS` list — CHANGELOG, CONTRIBUTING, ARCHITECTURE, …).
23
+ * - Skips README.md — that's handled by the project ingester for
24
+ * the headline paragraph, and full-file ingestion of READMEs
25
+ * would create duplicate entries for the same content.
26
+ * - Skips dotfiles, `node_modules`, `.git`, and the standard
27
+ * SKIP_DIRS used by every other ingester.
28
+ * - Caps headings per file to prevent a runaway TOC, and caps
29
+ * files walked to prevent a runaway monorepo. Both caps are
30
+ * intentionally generous; the goal is "doesn't blow up on a 500-
31
+ * file vendored docs tree" not "hits a tight budget."
32
+ *
33
+ * **Granularity choice — one memory per heading, not per file.**
34
+ * Per-file would mean recall returns "docs/api.md mentions install"
35
+ * — true but useless, the agent still has to read the file. Per-
36
+ * heading returns the SECTION, which is the actionable unit. Costs
37
+ * a small memory-table inflation that's bounded by the cap.
38
+ */
39
+ import type { MemoryRepository } from "../store/repository.js";
40
+ export interface DocsIngestOptions {
41
+ maxFiles?: number;
42
+ bodyChars?: number;
43
+ maxHeadingLevel?: number;
44
+ }
45
+ export interface DocsIngestResult {
46
+ filesWalked: number;
47
+ headingsIndexed: number;
48
+ }
49
+ export declare function ingestDocs(repo: MemoryRepository, root: string, opts?: DocsIngestOptions): Promise<DocsIngestResult>;
@@ -0,0 +1,325 @@
1
+ /**
2
+ * docs.ts — ingest project documentation as recallable section
3
+ * pointers.
4
+ *
5
+ * The premise: long-form docs are something the agent can already
6
+ * `read`; what it can't do is *find* the right section to read in a
7
+ * 30-file `docs/` tree without grepping the whole thing first. This
8
+ * ingester emits one memory per heading (H1/H2/H3) with the heading
9
+ * text, the first paragraph of body, and `path:line` so a
10
+ * `memory_recall { query: "installation" }` returns
11
+ *
12
+ * docs/install.md:15 ## Installation
13
+ * This project uses bun. Run `bun install` …
14
+ *
15
+ * — a direct pointer the agent can act on without any directory walk.
16
+ *
17
+ * **Scope** (deliberately conservative — see the design notes at the
18
+ * bottom for what was considered and rejected):
19
+ *
20
+ * - Walks `<root>/docs/` recursively for `*.md` and `*.markdown`.
21
+ * - Adds a fixed set of conventional root-level docs files (the
22
+ * `ROOT_DOCS` list — CHANGELOG, CONTRIBUTING, ARCHITECTURE, …).
23
+ * - Skips README.md — that's handled by the project ingester for
24
+ * the headline paragraph, and full-file ingestion of READMEs
25
+ * would create duplicate entries for the same content.
26
+ * - Skips dotfiles, `node_modules`, `.git`, and the standard
27
+ * SKIP_DIRS used by every other ingester.
28
+ * - Caps headings per file to prevent a runaway TOC, and caps
29
+ * files walked to prevent a runaway monorepo. Both caps are
30
+ * intentionally generous; the goal is "doesn't blow up on a 500-
31
+ * file vendored docs tree" not "hits a tight budget."
32
+ *
33
+ * **Granularity choice — one memory per heading, not per file.**
34
+ * Per-file would mean recall returns "docs/api.md mentions install"
35
+ * — true but useless, the agent still has to read the file. Per-
36
+ * heading returns the SECTION, which is the actionable unit. Costs
37
+ * a small memory-table inflation that's bounded by the cap.
38
+ */
39
+ import { readdir, readFile, stat } from "node:fs/promises";
40
+ import { join, relative, sep } from "node:path";
41
+ /** Categories existing memories already use; reusing `project-facts`
42
+ * keeps the docs entries discoverable by the same `category` filter
43
+ * the agent reaches for when it wants "facts about this repo." A new
44
+ * category would just split a single mental model into two. */
45
+ const CATEGORY = "project-facts";
46
+ /** Directories the project ingester also skips. Mirrored here so
47
+ * walking a repo's `docs/` doesn't accidentally descend into a
48
+ * vendored copy under `docs/node_modules` etc. */
49
+ const SKIP_DIRS = new Set([
50
+ "node_modules",
51
+ ".git",
52
+ "dist",
53
+ "build",
54
+ "out",
55
+ "target",
56
+ ".next",
57
+ "coverage",
58
+ ".cache",
59
+ "vendor",
60
+ ]);
61
+ /** Conventional root-level markdown files that aren't the README and
62
+ * aren't agent-instruction files (those go to project-notes.ts).
63
+ * Each one is checked at the root only — no recursion. */
64
+ const ROOT_DOCS = [
65
+ "CHANGELOG.md",
66
+ "CONTRIBUTING.md",
67
+ "ARCHITECTURE.md",
68
+ "ROADMAP.md",
69
+ "TODO.md",
70
+ "HISTORY.md",
71
+ "NOTES.md",
72
+ "SECURITY.md",
73
+ "GOVERNANCE.md",
74
+ "MAINTAINERS.md",
75
+ "AUTHORS.md",
76
+ "CODE_OF_CONDUCT.md",
77
+ ];
78
+ /** Headings deeper than this are typically internal section markers
79
+ * inside a long doc — useful inside the file, but rarely worth a
80
+ * top-level recall pointer. H1/H2/H3 only. */
81
+ const MAX_HEADING_LEVEL = 3;
82
+ /** Per-file cap — most docs have under 30 H1-H3 headings; a file
83
+ * with more is almost certainly auto-generated. */
84
+ const MAX_HEADINGS_PER_FILE = 50;
85
+ /** Hard cap on files walked in one ingest pass. Bounded walk so a
86
+ * 500-file vendored docs tree can't stall startup. */
87
+ const MAX_FILES = 200;
88
+ /** Skip files larger than this — typically auto-generated catalogs
89
+ * with no manually-authored structure worth recalling. */
90
+ const MAX_FILE_BYTES = 256 * 1024;
91
+ /** Bytes of body following a heading to capture as context — enough
92
+ * for one decent paragraph, not so much that 50 headings turn into
93
+ * a wall of duplicated prose. */
94
+ const BODY_CHARS = 240;
95
+ export async function ingestDocs(repo, root, opts = {}) {
96
+ const maxFilesLimit = Math.max(1, Math.round(opts.maxFiles ?? MAX_FILES));
97
+ const bodyCharsLimit = Math.max(40, Math.round(opts.bodyChars ?? BODY_CHARS));
98
+ const maxLevel = Math.min(6, Math.max(1, Math.round(opts.maxHeadingLevel ?? MAX_HEADING_LEVEL)));
99
+ let filesWalked = 0;
100
+ let headingsIndexed = 0;
101
+ const add = (subject, content, tags) => {
102
+ repo.insertIfMissing({
103
+ category: CATEGORY,
104
+ subject,
105
+ content,
106
+ tags,
107
+ source: "docs-ingest",
108
+ });
109
+ headingsIndexed += 1;
110
+ };
111
+ const seen = new Set();
112
+ // ── 1. <root>/docs/ recursive walk ────────────────────────────────
113
+ const docsDir = join(root, "docs");
114
+ if (await isDirectory(docsDir)) {
115
+ const stack = [docsDir];
116
+ while (stack.length > 0 && filesWalked < maxFilesLimit) {
117
+ const dir = stack.pop();
118
+ let entries;
119
+ try {
120
+ entries = await readdir(dir, { withFileTypes: true });
121
+ }
122
+ catch {
123
+ continue;
124
+ }
125
+ for (const e of entries) {
126
+ if (e.name.startsWith("."))
127
+ continue;
128
+ const abs = join(dir, e.name);
129
+ if (e.isDirectory()) {
130
+ if (!SKIP_DIRS.has(e.name))
131
+ stack.push(abs);
132
+ continue;
133
+ }
134
+ if (!e.isFile())
135
+ continue;
136
+ if (!isMarkdown(e.name))
137
+ continue;
138
+ if (seen.has(abs))
139
+ continue;
140
+ seen.add(abs);
141
+ filesWalked += 1;
142
+ if (filesWalked > maxFilesLimit)
143
+ break;
144
+ await ingestOneFile(abs, root, add, MAX_HEADINGS_PER_FILE, bodyCharsLimit, maxLevel);
145
+ }
146
+ }
147
+ }
148
+ // ── 2. Root-level conventional docs ───────────────────────────────
149
+ for (const name of ROOT_DOCS) {
150
+ if (filesWalked >= maxFilesLimit)
151
+ break;
152
+ const abs = join(root, name);
153
+ if (seen.has(abs))
154
+ continue;
155
+ if (!(await isFile(abs)))
156
+ continue;
157
+ seen.add(abs);
158
+ filesWalked += 1;
159
+ await ingestOneFile(abs, root, add, MAX_HEADINGS_PER_FILE, bodyCharsLimit, maxLevel);
160
+ }
161
+ return { filesWalked, headingsIndexed };
162
+ }
163
+ /** Read one .md file and emit one memory per heading (H1-H3, capped). */
164
+ async function ingestOneFile(abs, root, add, maxHeadings, bodyChars, maxLevel) {
165
+ let raw;
166
+ try {
167
+ const s = await stat(abs);
168
+ if (s.size > MAX_FILE_BYTES)
169
+ return;
170
+ raw = await readFile(abs, "utf-8");
171
+ }
172
+ catch {
173
+ return;
174
+ }
175
+ const rel = relative(root, abs).split(sep).join("/");
176
+ const lines = raw.split("\n");
177
+ const headings = extractHeadings(lines, maxLevel);
178
+ if (headings.length === 0)
179
+ return;
180
+ // Filename without ".md" → tag candidate ("install", "architecture").
181
+ const fileTag = rel.replace(/\.md$|\.markdown$/i, "").replace(/[^a-zA-Z0-9]+/g, "-").toLowerCase();
182
+ let emitted = 0;
183
+ for (const h of headings) {
184
+ if (emitted >= maxHeadings)
185
+ break;
186
+ const body = readFollowingBody(lines, h.lineIdx, headings, bodyChars);
187
+ const slug = toSlug(h.text);
188
+ const subject = `docs:${rel}#${slug || `line-${h.line}`}`;
189
+ const headingTags = h.text
190
+ .toLowerCase()
191
+ .replace(/[^a-z0-9 ]/g, " ")
192
+ .split(/\s+/)
193
+ .filter((w) => w.length > 2)
194
+ .slice(0, 5);
195
+ add(subject,
196
+ // `path:line` so the agent has a precise pointer it can pass
197
+ // to OpenCode's `read` tool, plus the heading and first paragraph
198
+ // so a recall snippet alone often answers the question.
199
+ `${rel}:${h.line} ${"#".repeat(h.level)} ${h.text}` +
200
+ (body ? `\n${body}` : ""), ["docs", "section", fileTag, ...headingTags]);
201
+ emitted += 1;
202
+ }
203
+ }
204
+ /** Parse H1-H3 ATX headings (`# `, `## `, `### `). Setext-style
205
+ * underline headings (`Foo\n===`) are not parsed — rare in modern
206
+ * projects and supporting them isn't worth the parser complexity. */
207
+ function extractHeadings(lines, maxHeadingLevel = MAX_HEADING_LEVEL) {
208
+ const out = [];
209
+ let inFence = false;
210
+ for (let i = 0; i < lines.length; i++) {
211
+ const line = lines[i];
212
+ // Track fenced code blocks so `# comment` inside a code block
213
+ // doesn't get parsed as a heading.
214
+ if (/^\s*```/.test(line)) {
215
+ inFence = !inFence;
216
+ continue;
217
+ }
218
+ if (inFence)
219
+ continue;
220
+ const m = /^(#{1,6})\s+(.+?)\s*#*\s*$/.exec(line);
221
+ if (!m)
222
+ continue;
223
+ const level = m[1].length;
224
+ if (level > maxHeadingLevel)
225
+ continue;
226
+ const text = m[2].trim();
227
+ if (text.length === 0)
228
+ continue;
229
+ out.push({ level, text, line: i + 1, lineIdx: i });
230
+ }
231
+ return out;
232
+ }
233
+ /** Capture the first ~BODY_CHARS of non-empty, non-heading prose
234
+ * following a heading, up to the next heading. Skip code fences
235
+ * (their contents add noise to BM25 without helping the recall
236
+ * signal). Returns "" if no body is present. */
237
+ function readFollowingBody(lines, headingIdx, headings, bodyChars = BODY_CHARS) {
238
+ const nextHeadingIdx = headings.find((h) => h.lineIdx > headingIdx)?.lineIdx ?? lines.length;
239
+ const buf = [];
240
+ let bytes = 0;
241
+ let inFence = false;
242
+ for (let i = headingIdx + 1; i < nextHeadingIdx; i++) {
243
+ const line = lines[i];
244
+ if (/^\s*```/.test(line)) {
245
+ inFence = !inFence;
246
+ continue;
247
+ }
248
+ if (inFence)
249
+ continue;
250
+ const trimmed = line.trim();
251
+ if (trimmed.length === 0) {
252
+ if (buf.length > 0)
253
+ break; // we have one paragraph, stop
254
+ continue;
255
+ }
256
+ buf.push(trimmed);
257
+ bytes += trimmed.length + 1;
258
+ if (bytes >= bodyChars)
259
+ break;
260
+ }
261
+ const joined = buf.join(" ");
262
+ return joined.length > bodyChars ? joined.slice(0, bodyChars - 1) + "…" : joined;
263
+ }
264
+ /** Heading → slug for the memory subject. Keeps the memory subject
265
+ * stable across re-runs (a heading edit creates a new subject; the
266
+ * old one decays via the existing pruning rules). */
267
+ function toSlug(s) {
268
+ return s
269
+ .toLowerCase()
270
+ .replace(/[^a-z0-9]+/g, "-")
271
+ .replace(/^-+|-+$/g, "")
272
+ .slice(0, 40);
273
+ }
274
+ function isMarkdown(name) {
275
+ // Pure extension check. The ROOT README is implicitly excluded
276
+ // because (a) the recursive walk only descends `<root>/docs/`,
277
+ // never the project root, and (b) the conventional-root-docs list
278
+ // (`ROOT_DOCS`) doesn't list README. A README INSIDE docs/ — like
279
+ // `docs/README.md`, the typical docs-index file — is fine to walk.
280
+ return /\.(md|markdown)$/i.test(name);
281
+ }
282
+ async function isDirectory(p) {
283
+ try {
284
+ const s = await stat(p);
285
+ return s.isDirectory();
286
+ }
287
+ catch {
288
+ return false;
289
+ }
290
+ }
291
+ async function isFile(p) {
292
+ try {
293
+ const s = await stat(p);
294
+ return s.isFile();
295
+ }
296
+ catch {
297
+ return false;
298
+ }
299
+ }
300
+ /* ── Design notes (kept here on purpose) ─────────────────────────────
301
+ *
302
+ * Things considered and rejected:
303
+ *
304
+ * - Walking *all* .md files under `<root>/`: too noisy in repos
305
+ * that vendor docs (e.g. translations of foreign-language READMEs,
306
+ * license texts under packages/). The convention is docs/, and
307
+ * ROOT_DOCS handles the rest.
308
+ *
309
+ * - Indexing the FULL prose of each section: blows up the memory
310
+ * store, and recall already has the path:line pointer for the
311
+ * agent to read the rest on demand. First paragraph is the right
312
+ * balance between "snippet alone often answers" and "doesn't
313
+ * duplicate the whole file."
314
+ *
315
+ * - Tracking heading hierarchy (e.g. "H2 Install > H3 Linux"):
316
+ * would be valuable but doubles the snippet length and changes
317
+ * the recall ranking in ways I don't want to assess without
318
+ * measurement. Punted.
319
+ *
320
+ * - Honouring Markdown links / TOCs: a real outline parser is
321
+ * ~200 lines and the cost-benefit is poor — the heading itself
322
+ * is the actionable token; the link target the agent can grep
323
+ * for if needed.
324
+ * ────────────────────────────────────────────────────────────────────
325
+ */
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Git history ingestion — fully convention-agnostic.
3
+ *
4
+ * The earlier version classified commits by parsing the *subject
5
+ * line* (conventional commits, bracket tags, gitmoji, English
6
+ * keywords). That is unreliable on real repositories, many of which
7
+ * have no commit-message culture at all ("wip", "fix", ".", "update",
8
+ * non-English text, empty subjects). Message-derived "flavor" was
9
+ * noise dressed up as signal.
10
+ *
11
+ * This version derives everything from STRUCTURE — facts about what
12
+ * the commit physically did, which are true regardless of how (or
13
+ * whether) the author described it:
14
+ *
15
+ * - diff shape : files touched, lines +/-, files created/deleted,
16
+ * net direction. From `git log --numstat --summary`.
17
+ * - co-change : pairs of files modified in the same commit,
18
+ * counted across history (mechanical/huge commits
19
+ * skipped).
20
+ * - churn : how often each file changes — a stability signal.
21
+ * - recency : which files were touched in the most recent
22
+ * commits.
23
+ *
24
+ * The commit subject is still STORED, verbatim, inside the memory
25
+ * content — it is text the agent may legitimately search for — but it
26
+ * never drives tags or categorisation. It is data, not signal.
27
+ *
28
+ * Output is hard-capped by `gitHistoryDepth`. Re-running ingest is
29
+ * idempotent thanks to insertIfMissing on the repository.
30
+ */
31
+ import type { MemoryRepository } from "../store/repository.js";
32
+ type FileStatus = "created" | "deleted" | "modified";
33
+ interface FileChange {
34
+ path: string;
35
+ added: number;
36
+ deleted: number;
37
+ status: FileStatus;
38
+ }
39
+ export interface GitIngestResult {
40
+ scanned: number;
41
+ commitMemories: number;
42
+ coChangeMemories: number;
43
+ churnMemories: number;
44
+ recencyMemories: number;
45
+ /**
46
+ * Commits skipped for their own per-commit memory because they are
47
+ * balanced churn (additions ≈ deletions — content moved/reformatted,
48
+ * not created). They still feed co-change and churn signals.
49
+ */
50
+ balancedChurnSkipped: number;
51
+ /** Distribution of structural shape tags across commit memories. */
52
+ shapeTagCounts: Record<string, number>;
53
+ }
54
+ /**
55
+ * True when a commit is "balanced churn": substantial, and its added
56
+ * line count is within ~8 % of its deleted count. That near-equality
57
+ * is the convention-free fingerprint of content being *moved or
58
+ * reformatted* rather than written — a file rename (with `--no-renames`
59
+ * a rename shows as +N to the new path, -N from the old), a `.rst`→
60
+ * `.md` doc migration, a reformat. Such commits flood keyword recall
61
+ * (they touch keyword-named files) while carrying no logic signal, so
62
+ * they get no per-commit memory — exactly as merge commits don't.
63
+ *
64
+ * Deliberately conservative: the ≥ 25-line floor spares small commits,
65
+ * and 92 % balance is tight enough that a genuine logic change (which
66
+ * almost never lands added ≈ deleted to within 8 %) is not caught.
67
+ * Pure arithmetic on the diff stat — no message parsing, no language
68
+ * or commit-convention assumptions.
69
+ */
70
+ export declare function isBalancedChurnCommit(files: FileChange[]): boolean;
71
+ export declare function ingestGitHistory(repo: MemoryRepository, root: string, depth: number, coChangeMaxCommits?: number, coChangeMinOccurrences?: number): Promise<GitIngestResult>;
72
+ /**
73
+ * Derive tags purely from what the commit physically did. Every tag
74
+ * here is a fact about the diff, not an interpretation of intent.
75
+ */
76
+ export declare function deriveShapeTags(files: FileChange[]): string[];
77
+ export {};