@rarusoft/dendrite-wiki 0.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +79 -0
  2. package/dist/api-extractor/extract.js +269 -0
  3. package/dist/api-extractor/language-extractor.js +15 -0
  4. package/dist/api-extractor/python-extractor.js +358 -0
  5. package/dist/api-extractor/render.js +195 -0
  6. package/dist/api-extractor/tree-sitter-extractor.js +1079 -0
  7. package/dist/api-extractor/types.js +11 -0
  8. package/dist/api-extractor/typescript-extractor.js +50 -0
  9. package/dist/api-extractor/walk.js +178 -0
  10. package/dist/api-reference.js +438 -0
  11. package/dist/benchmark-events.js +129 -0
  12. package/dist/benchmark.js +270 -0
  13. package/dist/binder-export.js +381 -0
  14. package/dist/canonical-target.js +168 -0
  15. package/dist/chart-insert.js +377 -0
  16. package/dist/chart-prompts.js +414 -0
  17. package/dist/context-cache.js +98 -0
  18. package/dist/contradicts-shipped-memory.js +232 -0
  19. package/dist/diff-context.js +142 -0
  20. package/dist/doctor.js +220 -0
  21. package/dist/generated-docs.js +219 -0
  22. package/dist/i18n.js +71 -0
  23. package/dist/index.js +49 -0
  24. package/dist/librarian.js +255 -0
  25. package/dist/maintenance-actions.js +244 -0
  26. package/dist/maintenance-inbox.js +842 -0
  27. package/dist/maintenance-runner.js +62 -0
  28. package/dist/page-drift.js +225 -0
  29. package/dist/page-inbox.js +168 -0
  30. package/dist/report-export.js +339 -0
  31. package/dist/review-bridge.js +1386 -0
  32. package/dist/search-index.js +199 -0
  33. package/dist/store.js +1617 -0
  34. package/dist/telemetry-defaults.js +44 -0
  35. package/dist/telemetry-report.js +263 -0
  36. package/dist/telemetry.js +544 -0
  37. package/dist/wiki-synthesis.js +901 -0
  38. package/package.json +35 -0
  39. package/src/api-extractor/extract.ts +333 -0
  40. package/src/api-extractor/language-extractor.ts +37 -0
  41. package/src/api-extractor/python-extractor.ts +380 -0
  42. package/src/api-extractor/render.ts +267 -0
  43. package/src/api-extractor/tree-sitter-extractor.ts +1210 -0
  44. package/src/api-extractor/types.ts +41 -0
  45. package/src/api-extractor/typescript-extractor.ts +56 -0
  46. package/src/api-extractor/walk.ts +209 -0
  47. package/src/api-reference.ts +552 -0
  48. package/src/benchmark-events.ts +216 -0
  49. package/src/benchmark.ts +376 -0
  50. package/src/binder-export.ts +437 -0
  51. package/src/canonical-target.ts +192 -0
  52. package/src/chart-insert.ts +478 -0
  53. package/src/chart-prompts.ts +417 -0
  54. package/src/context-cache.ts +129 -0
  55. package/src/contradicts-shipped-memory.ts +311 -0
  56. package/src/diff-context.ts +187 -0
  57. package/src/doctor.ts +260 -0
  58. package/src/generated-docs.ts +316 -0
  59. package/src/i18n.ts +106 -0
  60. package/src/index.ts +59 -0
  61. package/src/librarian.ts +331 -0
  62. package/src/maintenance-actions.ts +314 -0
  63. package/src/maintenance-inbox.ts +1132 -0
  64. package/src/maintenance-runner.ts +85 -0
  65. package/src/page-drift.ts +292 -0
  66. package/src/page-inbox.ts +254 -0
  67. package/src/report-export.ts +392 -0
  68. package/src/review-bridge.ts +1729 -0
  69. package/src/search-index.ts +266 -0
  70. package/src/store.ts +2171 -0
  71. package/src/telemetry-defaults.ts +50 -0
  72. package/src/telemetry-report.ts +365 -0
  73. package/src/telemetry.ts +757 -0
  74. package/src/wiki-synthesis.ts +1307 -0
@@ -0,0 +1,901 @@
1
+ /**
2
+ * Synthesis providers — deterministic prompt builders for LLM-assisted wiki work.
3
+ *
4
+ * Builds structured prompts for three distinct tasks: claim synthesis (turn a page's prose
5
+ * into source-backed `[planned]`/`[current]` claims), guidance synthesis (suggest where a
6
+ * piece of agent guidance should live based on existing patterns), and proposal synthesis
7
+ * (draft a `WikiMergeGuidanceProposal` or `WikiRouteGuidanceProposal` for the maintenance
8
+ * inbox). Drift-resolution prompts assist when a page-drift finding needs an LLM to
9
+ * suggest whether to update the page, the project log, or both.
10
+ *
11
+ * No LLM is called from this module — every function returns a structured prompt the
12
+ * operator pastes into Claude/GPT/local-Ollama, then feeds the result back through the
13
+ * normal `wiki_apply_proposal` or `memory_remember` paths. This is the "agent provider"
14
+ * pattern: provider-agnostic, no API keys required by default, no opaque dependencies.
15
+ * `listOllamaModels` exists for the optional local-model path.
16
+ */
17
+ import { promises as fs } from 'node:fs';
18
+ import path from 'node:path';
19
+ import { extractWikiClaims, listProjectGuidanceFiles, listWikiPages, listWikiProposals, readWikiPage } from './store.js';
20
+ import { buildChartPrompt, parseChartResponse } from './chart-prompts.js';
21
+ const defaultOllamaUrl = 'http://localhost:11434';
22
+ // Per-provider default timeouts. Local Ollama generations on slow hardware can take
23
+ // 30-90s for the first call (cold-start of a freshly-loaded model is the worst case).
24
+ // Cloud APIs reliably respond well under 30s. The agent provider doesn't actually call
25
+ // out — it just returns a handoff prompt — so its timeout is only here for symmetry.
26
+ // All values are an upper bound; the request will return as soon as the provider does.
27
+ //
28
+ // Ollama default at 5 minutes: chart synthesis (M4 of the AI-mermaid-charts roadmap)
29
+ // regularly exceeds the previous 2-minute default for small models on CPU producing
30
+ // flowcharts with many nodes. The env var DENDRITE_WIKI_SYNTHESIS_TIMEOUT_MS overrides
31
+ // for operators with bigger workloads or beefier hardware.
32
+ const defaultSynthesisTimeoutMsByKind = {
33
+ none: 8_000,
34
+ agent: 5_000,
35
+ ollama: 300_000,
36
+ cloud: 30_000
37
+ };
38
+ const fallbackSynthesisTimeoutMs = 8_000;
39
+ const maxSynthesizedSummaryLength = 280;
40
+ const maxSynthesizedExplanationLength = 360;
41
+ const maxSynthesizedDistillationLength = 600;
42
+ const maxPromptContentLength = 4_000;
43
+ const repoRoot = path.resolve(process.cwd());
44
+ export function resolveWikiSynthesisProvider(options = {}) {
45
+ const env = options.env ?? process.env;
46
+ const kind = options.requestedKind ?? parseProviderKind(env.DENDRITE_WIKI_SYNTHESIS_PROVIDER);
47
+ // Default timeout depends on provider kind. The env var still wins for explicit overrides.
48
+ const timeoutMs = parseTimeoutMs(env.DENDRITE_WIKI_SYNTHESIS_TIMEOUT_MS, defaultSynthesisTimeoutMsByKind[kind]);
49
+ switch (kind) {
50
+ case 'none':
51
+ return {
52
+ kind,
53
+ status: 'disabled',
54
+ reason: 'Optional synthesis is disabled. Set DENDRITE_WIKI_SYNTHESIS_PROVIDER=ollama or pass provider "ollama" to this tool.',
55
+ timeoutMs
56
+ };
57
+ case 'agent':
58
+ return {
59
+ kind,
60
+ status: 'ready',
61
+ reason: 'The agent provider returns a bounded handoff prompt for the active coding agent instead of running server-side inference.',
62
+ timeoutMs
63
+ };
64
+ case 'cloud':
65
+ return resolveCloudProvider(env, timeoutMs);
66
+ case 'ollama': {
67
+ // Per-call override (e.g., from the review board model picker) wins over env.
68
+ const overrideModel = options.requestedOllamaModel?.trim() ?? '';
69
+ const model = overrideModel || env.OLLAMA_MODEL?.trim() || '';
70
+ const endpoint = env.OLLAMA_URL?.trim() || defaultOllamaUrl;
71
+ if (model.length === 0) {
72
+ return {
73
+ kind,
74
+ status: 'misconfigured',
75
+ reason: 'OLLAMA_MODEL must be set (or a model passed in the request) before the ollama provider can run.',
76
+ endpoint,
77
+ timeoutMs
78
+ };
79
+ }
80
+ return {
81
+ kind,
82
+ status: 'ready',
83
+ model,
84
+ endpoint,
85
+ timeoutMs
86
+ };
87
+ }
88
+ }
89
+ }
90
+ function resolveCloudProvider(env, timeoutMs) {
91
+ const endpoint = env.DENDRITE_WIKI_CLOUD_URL?.trim() ?? '';
92
+ const model = env.DENDRITE_WIKI_CLOUD_MODEL?.trim() ?? '';
93
+ const apiKey = env.DENDRITE_WIKI_CLOUD_API_KEY?.trim() ?? '';
94
+ if (!endpoint || !model || !apiKey) {
95
+ const missing = [
96
+ endpoint ? '' : 'DENDRITE_WIKI_CLOUD_URL',
97
+ model ? '' : 'DENDRITE_WIKI_CLOUD_MODEL',
98
+ apiKey ? '' : 'DENDRITE_WIKI_CLOUD_API_KEY'
99
+ ].filter(Boolean).join(', ');
100
+ return {
101
+ kind: 'cloud',
102
+ status: 'misconfigured',
103
+ reason: `Cloud synthesis requires ${missing}.`,
104
+ endpoint: endpoint || undefined,
105
+ model: model || undefined,
106
+ timeoutMs
107
+ };
108
+ }
109
+ return {
110
+ kind: 'cloud',
111
+ status: 'ready',
112
+ endpoint,
113
+ model,
114
+ timeoutMs
115
+ };
116
+ }
117
+ export async function synthesizeWikiProposals(options = {}) {
118
+ const provider = resolveWikiSynthesisProvider(options);
119
+ const proposals = options.proposals ?? (await listWikiProposals());
120
+ const selected = selectProposals(proposals, options.reviewSlug, options.maxItems);
121
+ return {
122
+ provider,
123
+ proposals: await Promise.all(selected.map((proposal) => synthesizeProposalSummary(proposal, provider, { fetcher: options.fetcher })))
124
+ };
125
+ }
126
+ export async function synthesizeWikiClaims(options = {}) {
127
+ const provider = resolveWikiSynthesisProvider(options);
128
+ const claims = options.claims ?? (await listStaleClaims());
129
+ const selected = selectClaims(claims, options.pageSlug, options.maxItems);
130
+ return {
131
+ provider,
132
+ claims: await Promise.all(selected.map((claim) => synthesizeStaleClaimExplanation(claim, provider, { fetcher: options.fetcher })))
133
+ };
134
+ }
135
+ export async function synthesizeWikiGuidance(options = {}) {
136
+ const provider = resolveWikiSynthesisProvider(options);
137
+ const guidanceFiles = options.guidanceFiles ?? (await listProjectGuidanceFiles());
138
+ const selected = selectGuidanceFiles(guidanceFiles, options.guidancePath, options.maxItems);
139
+ return {
140
+ provider,
141
+ guidanceFiles: await Promise.all(selected.map((guidance) => synthesizeGuidanceDistillation(guidance, provider, { fetcher: options.fetcher })))
142
+ };
143
+ }
144
+ export async function synthesizeProposalSummary(proposal, provider, options = {}) {
145
+ const synthesis = await synthesizeText(buildProposalSummaryPrompt(proposal), provider, {
146
+ fetcher: options.fetcher,
147
+ maxLength: maxSynthesizedSummaryLength,
148
+ emptyMessage: 'Synthesis provider returned an empty proposal summary.'
149
+ });
150
+ return {
151
+ reviewSlug: proposal.reviewSlug,
152
+ kind: proposal.kind,
153
+ summary: proposal.summary,
154
+ currentStateSummary: proposal.currentStateSummary,
155
+ afterApplySummary: proposal.afterApplySummary,
156
+ rationale: proposal.rationale,
157
+ synthesisStatus: synthesis.status,
158
+ synthesizedSummary: synthesis.text,
159
+ handoffPrompt: synthesis.handoffPrompt,
160
+ failureReason: synthesis.failureReason
161
+ };
162
+ }
163
+ export async function synthesizeStaleClaimExplanation(claim, provider, options = {}) {
164
+ const synthesis = await synthesizeText(buildClaimExplanationPrompt(claim), provider, {
165
+ fetcher: options.fetcher,
166
+ maxLength: maxSynthesizedExplanationLength,
167
+ emptyMessage: 'Synthesis provider returned an empty stale-claim explanation.'
168
+ });
169
+ return {
170
+ pageSlug: claim.pageSlug,
171
+ text: claim.text,
172
+ status: claim.status,
173
+ sources: claim.sources,
174
+ synthesisStatus: synthesis.status,
175
+ synthesizedExplanation: synthesis.text,
176
+ handoffPrompt: synthesis.handoffPrompt,
177
+ failureReason: synthesis.failureReason
178
+ };
179
+ }
180
+ export async function synthesizeGuidanceDistillation(guidance, provider, options = {}) {
181
+ const content = await fs.readFile(path.join(repoRoot, guidance.path), 'utf8').catch(() => '');
182
+ const synthesis = await synthesizeText(buildGuidanceDistillationPrompt(guidance, content), provider, {
183
+ fetcher: options.fetcher,
184
+ maxLength: maxSynthesizedDistillationLength,
185
+ emptyMessage: 'Synthesis provider returned an empty guidance distillation.'
186
+ });
187
+ return {
188
+ path: guidance.path,
189
+ kind: guidance.kind,
190
+ summary: guidance.summary,
191
+ synthesisStatus: synthesis.status,
192
+ synthesizedDistillation: synthesis.text,
193
+ handoffPrompt: synthesis.handoffPrompt,
194
+ failureReason: synthesis.failureReason
195
+ };
196
+ }
197
+ function selectProposals(proposals, reviewSlug, maxItems = 3) {
198
+ if (reviewSlug) {
199
+ const proposal = proposals.find((candidate) => candidate.reviewSlug === reviewSlug);
200
+ if (!proposal) {
201
+ throw new Error(`Unknown active proposal: ${reviewSlug}`);
202
+ }
203
+ return [proposal];
204
+ }
205
+ return proposals.slice(0, maxItems);
206
+ }
207
+ function selectClaims(claims, pageSlug, maxItems = 5) {
208
+ return (pageSlug ? claims.filter((claim) => claim.pageSlug === pageSlug) : claims).slice(0, maxItems);
209
+ }
210
+ function selectGuidanceFiles(guidanceFiles, guidancePath, maxItems = 3) {
211
+ return (guidancePath ? guidanceFiles.filter((guidance) => guidance.path === guidancePath) : guidanceFiles).slice(0, maxItems);
212
+ }
213
+ async function listStaleClaims() {
214
+ const pages = await listWikiPages();
215
+ const pageByPath = new Map(pages.map((page) => [page.path, page.slug]));
216
+ const claims = [];
217
+ for (const page of pages) {
218
+ const content = await readWikiPage(page.slug);
219
+ claims.push(...extractWikiClaims(page.slug, content, pageByPath).filter((claim) => claim.status !== 'current'));
220
+ }
221
+ return claims.sort((left, right) => `${left.pageSlug}:${left.text}`.localeCompare(`${right.pageSlug}:${right.text}`));
222
+ }
223
+ function parseProviderKind(value) {
224
+ switch (value?.trim()) {
225
+ case 'agent':
226
+ return 'agent';
227
+ case 'ollama':
228
+ return 'ollama';
229
+ case 'cloud':
230
+ return 'cloud';
231
+ default:
232
+ return 'none';
233
+ }
234
+ }
235
+ function parseTimeoutMs(value, fallback = fallbackSynthesisTimeoutMs) {
236
+ if (!value) {
237
+ return fallback;
238
+ }
239
+ const parsed = Number.parseInt(value, 10);
240
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
241
+ }
242
+ async function synthesizeText(prompt, provider, options) {
243
+ if (provider.status === 'disabled') {
244
+ return {
245
+ status: 'disabled',
246
+ failureReason: provider.reason
247
+ };
248
+ }
249
+ if (provider.status !== 'ready') {
250
+ return {
251
+ status: 'unavailable',
252
+ failureReason: provider.reason
253
+ };
254
+ }
255
+ if (provider.kind === 'agent') {
256
+ return {
257
+ status: 'handoff',
258
+ handoffPrompt: prompt
259
+ };
260
+ }
261
+ try {
262
+ if (provider.kind === 'cloud') {
263
+ return {
264
+ status: 'generated',
265
+ text: await requestCloudSynthesis(prompt, provider, options.fetcher ?? fetch, options.maxLength, options.emptyMessage)
266
+ };
267
+ }
268
+ return {
269
+ status: 'generated',
270
+ text: await requestOllamaSynthesis(prompt, provider, options.fetcher ?? fetch, options.maxLength, options.emptyMessage)
271
+ };
272
+ }
273
+ catch (error) {
274
+ return {
275
+ status: 'failed',
276
+ failureReason: error instanceof Error ? error.message : 'Unknown synthesis error.'
277
+ };
278
+ }
279
+ }
280
+ async function requestCloudSynthesis(prompt, provider, fetcher, maxLength, emptyMessage) {
281
+ const apiKey = process.env.DENDRITE_WIKI_CLOUD_API_KEY?.trim() ?? '';
282
+ const controller = new AbortController();
283
+ const timeoutHandle = setTimeout(() => controller.abort(), provider.timeoutMs);
284
+ try {
285
+ const response = await fetcher(provider.endpoint ?? '', {
286
+ method: 'POST',
287
+ headers: {
288
+ authorization: `Bearer ${apiKey}`,
289
+ 'content-type': 'application/json'
290
+ },
291
+ body: JSON.stringify({
292
+ model: provider.model,
293
+ messages: [
294
+ { role: 'system', content: 'You produce bounded, read-only synthesis for a local project wiki.' },
295
+ { role: 'user', content: prompt }
296
+ ],
297
+ temperature: 0
298
+ }),
299
+ signal: controller.signal
300
+ });
301
+ if (!response.ok) {
302
+ throw new Error(`Cloud synthesis request failed with status ${response.status}.`);
303
+ }
304
+ const payload = (await response.json());
305
+ const content = typeof payload.output_text === 'string'
306
+ ? payload.output_text
307
+ : typeof payload.choices?.[0]?.message?.content === 'string'
308
+ ? payload.choices[0].message.content
309
+ : '';
310
+ return normalizeSynthesizedText(content, maxLength, emptyMessage);
311
+ }
312
+ catch (error) {
313
+ if (error instanceof Error && error.name === 'AbortError') {
314
+ throw new Error(`Cloud synthesis timed out after ${provider.timeoutMs}ms.`);
315
+ }
316
+ throw error;
317
+ }
318
+ finally {
319
+ clearTimeout(timeoutHandle);
320
+ }
321
+ }
322
+ async function requestOllamaSynthesis(prompt, provider, fetcher, maxLength, emptyMessage) {
323
+ const controller = new AbortController();
324
+ const timeoutHandle = setTimeout(() => controller.abort(), provider.timeoutMs);
325
+ try {
326
+ const response = await fetcher(new URL('/api/generate', provider.endpoint ?? defaultOllamaUrl), {
327
+ method: 'POST',
328
+ headers: {
329
+ 'content-type': 'application/json'
330
+ },
331
+ body: JSON.stringify({
332
+ model: provider.model,
333
+ stream: false,
334
+ prompt
335
+ }),
336
+ signal: controller.signal
337
+ });
338
+ if (!response.ok) {
339
+ throw new Error(`Ollama request failed with status ${response.status}.`);
340
+ }
341
+ const payload = (await response.json());
342
+ return normalizeSynthesizedText(typeof payload.response === 'string' ? payload.response : '', maxLength, emptyMessage);
343
+ }
344
+ catch (error) {
345
+ if (error instanceof Error && error.name === 'AbortError') {
346
+ throw new Error(`Ollama synthesis timed out after ${provider.timeoutMs}ms.`);
347
+ }
348
+ throw error;
349
+ }
350
+ finally {
351
+ clearTimeout(timeoutHandle);
352
+ }
353
+ }
354
+ function buildProposalSummaryPrompt(proposal) {
355
+ return [
356
+ 'You are summarizing a deterministic wiki maintenance proposal for a cautious reviewer.',
357
+ `Return exactly one sentence under ${maxSynthesizedSummaryLength} characters.`,
358
+ 'Mention the cleanup being suggested and the main safety boundary.',
359
+ 'Do not use markdown bullets, code fences, or extra commentary.',
360
+ '',
361
+ `Proposal kind: ${proposal.kind}`,
362
+ `Summary: ${proposal.summary}`,
363
+ `Current state: ${proposal.currentStateSummary}`,
364
+ `After apply: ${proposal.afterApplySummary}`,
365
+ `Rationale: ${proposal.rationale}`
366
+ ].join('\n');
367
+ }
368
+ function buildClaimExplanationPrompt(claim) {
369
+ const sources = claim.sources.length > 0 ? claim.sources.map((source) => `${source.label} (${source.slug})`).join(', ') : 'No linked sources.';
370
+ return [
371
+ 'You are explaining a stale or non-current wiki claim for a cautious project maintainer.',
372
+ `Return exactly one sentence under ${maxSynthesizedExplanationLength} characters.`,
373
+ 'Explain why this claim should be reviewed before it is trusted, using only the evidence below.',
374
+ 'Do not mark the claim current and do not propose a write.',
375
+ '',
376
+ `Page: ${claim.pageSlug}`,
377
+ `Status: ${claim.status}`,
378
+ `Claim: ${claim.text}`,
379
+ `Sources: ${sources}`
380
+ ].join('\n');
381
+ }
382
+ function buildGuidanceDistillationPrompt(guidance, content) {
383
+ return [
384
+ 'You are distilling an agent guidance file into concise candidate notes for review.',
385
+ `Return at most three short bullets under ${maxSynthesizedDistillationLength} characters total.`,
386
+ 'Preserve only durable operating guidance and mention if details should stay in linked wiki pages.',
387
+ 'Do not output replacement file content and do not propose an automatic edit.',
388
+ '',
389
+ `Guidance path: ${guidance.path}`,
390
+ `Guidance kind: ${guidance.kind}`,
391
+ `Existing summary: ${guidance.summary}`,
392
+ '',
393
+ 'Guidance content excerpt:',
394
+ truncateForPrompt(content)
395
+ ].join('\n');
396
+ }
397
+ function normalizeSynthesizedText(value, maxLength, emptyMessage) {
398
+ const normalized = value.replace(/\s+/g, ' ').trim();
399
+ if (normalized.length === 0) {
400
+ throw new Error(emptyMessage);
401
+ }
402
+ if (normalized.length > maxLength) {
403
+ throw new Error(`Synthesis provider returned ${normalized.length} characters, which exceeds the ${maxLength} character limit.`);
404
+ }
405
+ return normalized;
406
+ }
407
+ function truncateForPrompt(value) {
408
+ const normalized = value.trim();
409
+ if (normalized.length <= maxPromptContentLength) {
410
+ return normalized;
411
+ }
412
+ return `${normalized.slice(0, maxPromptContentLength)}\n[truncated]`;
413
+ }
414
+ // =============================================================================
415
+ // PAGE-DRIFT RESOLUTION SYNTHESIS
416
+ // =============================================================================
417
+ //
418
+ // The maintenance review board's drift findings ask the operator to either
419
+ // rewrite a page's first paragraph or snooze the finding. Asking the operator
420
+ // to draft prose from scratch is hostile UX — they don't know what the page
421
+ // currently says, what recent activity has been about, or what new wording
422
+ // would close the vocabulary gap. This synthesizer flips the workflow:
423
+ // the system gathers the evidence, asks the configured AI provider for a
424
+ // proposed replacement, and the operator just approves / regenerates / snoozes.
425
+ //
426
+ // The synthesizer also recognizes a "this is session noise, recommend snooze"
427
+ // outcome — if the AI examines the evidence and concludes the drift signal
428
+ // shouldn't be acted on, it returns a snooze recommendation with reasoning
429
+ // instead of a replacement paragraph.
430
+ import { extractPageIntent, extractRecentEntriesMentioningPage } from './page-drift.js';
431
+ import { pagePathFromSlug } from './store.js';
432
+ const maxSynthesizedFirstParagraphLength = 800;
433
+ const maxRecentActivityEntriesShown = 6;
434
+ export async function synthesizeWikiDriftResolution(slug, options = {}) {
435
+ // The review board's model picker passes ollamaModel as a UX-friendly shortcut.
436
+ // It implies requestedKind='ollama' (the picker only makes sense for ollama).
437
+ const resolverOptions = options.ollamaModel?.trim()
438
+ ? {
439
+ ...options,
440
+ requestedKind: 'ollama',
441
+ requestedOllamaModel: options.ollamaModel
442
+ }
443
+ : options;
444
+ const provider = resolveWikiSynthesisProvider(resolverOptions);
445
+ const evidence = await gatherDriftEvidence(slug);
446
+ // If we couldn't even gather the evidence (page missing, no activity), return early
447
+ // with a snooze recommendation — there's nothing for the AI to chew on.
448
+ if (!evidence.currentIntent) {
449
+ return {
450
+ provider,
451
+ evidence,
452
+ suggestion: {
453
+ outcome: 'unavailable',
454
+ status: 'failed',
455
+ failureReason: `Could not read page intent for ${slug}.`
456
+ }
457
+ };
458
+ }
459
+ if (evidence.recentActivityEntries.length === 0) {
460
+ return {
461
+ provider,
462
+ evidence,
463
+ suggestion: {
464
+ outcome: 'snooze-recommended',
465
+ status: 'generated',
466
+ reasoning: 'No recent project-log activity mentions this page right now. The drift signal has nothing to compare against — snoozing is safer than guessing at a rewrite.'
467
+ }
468
+ };
469
+ }
470
+ const prompt = buildDriftResolutionPrompt(evidence);
471
+ const result = await synthesizeText(prompt, provider, {
472
+ fetcher: options.fetcher,
473
+ maxLength: maxSynthesizedFirstParagraphLength,
474
+ emptyMessage: 'Synthesis provider returned no text for the drift resolution.'
475
+ });
476
+ if (result.status === 'handoff') {
477
+ return {
478
+ provider,
479
+ evidence,
480
+ suggestion: {
481
+ outcome: 'replacement',
482
+ status: 'handoff',
483
+ handoffPrompt: result.handoffPrompt
484
+ }
485
+ };
486
+ }
487
+ if (result.status === 'generated' && result.text) {
488
+ const parsed = parseDriftResolutionResponse(result.text);
489
+ return {
490
+ provider,
491
+ evidence,
492
+ suggestion: { ...parsed, status: 'generated' }
493
+ };
494
+ }
495
+ return {
496
+ provider,
497
+ evidence,
498
+ suggestion: {
499
+ outcome: 'unavailable',
500
+ status: result.status,
501
+ failureReason: result.failureReason
502
+ }
503
+ };
504
+ }
505
+ async function gatherDriftEvidence(slug) {
506
+ const pageContent = await fs.readFile(pagePathFromSlug(slug), 'utf8').catch(() => '');
507
+ const projectLog = await fs.readFile(pagePathFromSlug('project-log'), 'utf8').catch(() => '');
508
+ const intent = pageContent ? extractPageIntent(pageContent) : '';
509
+ const match = projectLog
510
+ ? extractRecentEntriesMentioningPage(projectLog, slug, maxRecentActivityEntriesShown, 7)
511
+ : { entries: [], distinctDays: 0 };
512
+ return {
513
+ slug,
514
+ currentIntent: intent,
515
+ recentActivityEntries: match.entries,
516
+ matchedDistinctDays: match.distinctDays
517
+ };
518
+ }
519
+ function buildDriftResolutionPrompt(evidence) {
520
+ const activityBlock = evidence.recentActivityEntries
521
+ .map((entry, idx) => `${idx + 1}. ${entry}`)
522
+ .join('\n');
523
+ return [
524
+ `You are helping resolve a "page drift" finding on a project wiki page (slug: ${evidence.slug}).`,
525
+ '',
526
+ 'Page drift fires when the page\'s first paragraph (its stated intent) does not share much vocabulary with recent project-log entries that mention the page. The hypothesis is that the page may have outgrown its summary.',
527
+ '',
528
+ 'CURRENT FIRST PARAGRAPH (the page\'s stated intent — title + first paragraph):',
529
+ `"""${truncateForPrompt(evidence.currentIntent)}"""`,
530
+ '',
531
+ `RECENT PROJECT-LOG ENTRIES MENTIONING THIS PAGE (last 7 days, ${evidence.matchedDistinctDays} distinct day${evidence.matchedDistinctDays === 1 ? '' : 's'}):`,
532
+ activityBlock,
533
+ '',
534
+ 'Decide ONE of two outcomes:',
535
+ '',
536
+ '1. The activity reflects a real shift in what the page is about. Generate a replacement first paragraph (1-3 sentences, plain prose, no markdown headings) that better describes what the page is now actually about. The replacement should keep the same level of abstraction as the current intent — it summarizes the page, it does not list every recent change.',
537
+ '',
538
+ '2. The activity is just session noise (a temporary burst of unrelated work, or implementation detail that doesn\'t belong in the page summary). Recommend snooze instead.',
539
+ '',
540
+ 'Respond in EXACTLY one of these two formats and nothing else:',
541
+ '',
542
+ 'REPLACEMENT: <one to three sentence replacement first paragraph>',
543
+ 'REASONING: <one sentence explaining why this rewrite captures the page better>',
544
+ '',
545
+ 'OR',
546
+ '',
547
+ 'SNOOZE: <one sentence reason — what makes this look like noise rather than real drift>'
548
+ ].join('\n');
549
+ }
550
+ export async function listOllamaModels(options = {}) {
551
+ const env = options.env ?? process.env;
552
+ const endpoint = env.OLLAMA_URL?.trim() || defaultOllamaUrl;
553
+ const fetcher = options.fetcher ?? fetch;
554
+ const timeoutMs = options.timeoutMs ?? parseTimeoutMs(env.DENDRITE_WIKI_SYNTHESIS_TIMEOUT_MS);
555
+ const controller = new AbortController();
556
+ const timeoutHandle = setTimeout(() => controller.abort(), Math.min(timeoutMs, 5_000));
557
+ try {
558
+ const response = await fetcher(new URL('/api/tags', endpoint), {
559
+ signal: controller.signal
560
+ });
561
+ if (!response.ok) {
562
+ return {
563
+ endpoint,
564
+ status: 'error',
565
+ models: [],
566
+ failureReason: `Ollama returned HTTP ${response.status}`
567
+ };
568
+ }
569
+ const payload = (await response.json());
570
+ const rawModels = Array.isArray(payload.models) ? payload.models : [];
571
+ const models = rawModels.flatMap((entry) => {
572
+ if (!entry || typeof entry !== 'object')
573
+ return [];
574
+ const e = entry;
575
+ if (typeof e.name !== 'string' || !e.name.trim())
576
+ return [];
577
+ const details = (e.details && typeof e.details === 'object') ? e.details : {};
578
+ return [{
579
+ name: e.name,
580
+ size: typeof e.size === 'number' ? e.size : undefined,
581
+ modifiedAt: typeof e.modified_at === 'string'
582
+ ? e.modified_at
583
+ : typeof e.modifiedAt === 'string' ? e.modifiedAt : undefined,
584
+ details: {
585
+ family: typeof details.family === 'string' ? details.family : undefined,
586
+ parameterSize: typeof details.parameter_size === 'string'
587
+ ? details.parameter_size
588
+ : typeof details.parameterSize === 'string' ? details.parameterSize : undefined
589
+ }
590
+ }];
591
+ });
592
+ // Sort alphabetical for stable UX in the picker.
593
+ models.sort((left, right) => left.name.localeCompare(right.name));
594
+ return { endpoint, status: 'ok', models };
595
+ }
596
+ catch (error) {
597
+ const failureReason = error instanceof Error
598
+ ? (error.name === 'AbortError' ? `Ollama did not respond within ${Math.min(timeoutMs, 5_000)}ms — is the server running on ${endpoint}?` : error.message)
599
+ : String(error);
600
+ return {
601
+ endpoint,
602
+ status: 'unreachable',
603
+ models: [],
604
+ failureReason
605
+ };
606
+ }
607
+ finally {
608
+ clearTimeout(timeoutHandle);
609
+ }
610
+ }
611
+ function parseDriftResolutionResponse(text) {
612
+ const normalized = text.replace(/\r\n/g, '\n').trim();
613
+ // Snooze recommendation
614
+ const snoozeMatch = normalized.match(/^SNOOZE:\s*(.+?)$/im);
615
+ if (snoozeMatch) {
616
+ return {
617
+ outcome: 'snooze-recommended',
618
+ reasoning: snoozeMatch[1].trim()
619
+ };
620
+ }
621
+ // Replacement (with optional REASONING line)
622
+ const replacementMatch = normalized.match(/^REPLACEMENT:\s*([\s\S]+?)(?=\n\s*REASONING:|$)/im);
623
+ if (replacementMatch) {
624
+ const replacementText = replacementMatch[1].trim();
625
+ const reasoningMatch = normalized.match(/^REASONING:\s*(.+?)$/im);
626
+ return {
627
+ outcome: 'replacement',
628
+ text: replacementText,
629
+ reasoning: reasoningMatch?.[1].trim()
630
+ };
631
+ }
632
+ // Provider didn't follow the format — treat the whole response as a candidate
633
+ // replacement so the operator can still see it. They can edit before applying.
634
+ return {
635
+ outcome: 'replacement',
636
+ text: normalized,
637
+ reasoning: 'Provider did not follow the structured format; using full response as the candidate replacement.'
638
+ };
639
+ }
640
+ const CHART_SYNTHESIS_MAX_LENGTH = 4_096;
641
+ export async function synthesizeWikiChart(input, options = {}) {
642
+ const resolverOptions = options.ollamaModel?.trim()
643
+ ? { ...options, requestedKind: 'ollama', requestedOllamaModel: options.ollamaModel }
644
+ : options;
645
+ const provider = resolveWikiSynthesisProvider(resolverOptions);
646
+ const prompt = buildChartPrompt({ kind: input.chartKind, context: input.context, intent: input.intent });
647
+ const startedAt = Date.now();
648
+ const result = await synthesizeText(prompt, provider, {
649
+ fetcher: options.fetcher,
650
+ maxLength: CHART_SYNTHESIS_MAX_LENGTH,
651
+ emptyMessage: 'Synthesis provider returned no text for the chart request.'
652
+ });
653
+ const durationMs = Date.now() - startedAt;
654
+ if (result.status === 'handoff') {
655
+ return { provider, status: 'handoff', handoffPrompt: result.handoffPrompt, durationMs };
656
+ }
657
+ if (result.status === 'generated' && result.text) {
658
+ const mermaidSource = parseChartResponse(result.text);
659
+ return { provider, status: 'generated', mermaidSource, rawResponse: result.text, durationMs };
660
+ }
661
+ return { provider, status: result.status, failureReason: result.failureReason, durationMs };
662
+ }
663
+ const memoryAutoCleanResponseMaxLength = 32_000;
664
+ export async function synthesizeMemoryAutoCleanDecisions(candidates, options = {}) {
665
+ const resolverOptions = options.ollamaModel?.trim()
666
+ ? { ...options, requestedKind: 'ollama', requestedOllamaModel: options.ollamaModel }
667
+ : options;
668
+ const provider = resolveWikiSynthesisProvider(resolverOptions);
669
+ if (candidates.length === 0) {
670
+ return { provider, status: 'generated', decisions: [] };
671
+ }
672
+ const prompt = buildMemoryAutoCleanPrompt(candidates);
673
+ if (provider.status === 'disabled') {
674
+ return { provider, status: 'disabled', failureReason: provider.reason };
675
+ }
676
+ if (provider.status !== 'ready') {
677
+ return { provider, status: 'unavailable', failureReason: provider.reason };
678
+ }
679
+ if (provider.kind === 'agent') {
680
+ return { provider, status: 'handoff', handoffPrompt: prompt };
681
+ }
682
+ let rawResponse;
683
+ try {
684
+ rawResponse =
685
+ provider.kind === 'cloud'
686
+ ? await requestCloudMemoryAutoCleanResponse(prompt, provider, options.fetcher ?? fetch)
687
+ : await requestOllamaMemoryAutoCleanResponse(prompt, provider, options.fetcher ?? fetch);
688
+ }
689
+ catch (error) {
690
+ return {
691
+ provider,
692
+ status: 'failed',
693
+ failureReason: error instanceof Error ? error.message : String(error)
694
+ };
695
+ }
696
+ const parsed = parseMemoryAutoCleanResponse(rawResponse, candidates);
697
+ if (!parsed.ok) {
698
+ return { provider, status: 'parse-failed', rawResponse, failureReason: parsed.failureReason };
699
+ }
700
+ return { provider, status: 'generated', decisions: parsed.decisions, rawResponse };
701
+ }
702
+ function buildMemoryAutoCleanPrompt(candidates) {
703
+ const lines = [
704
+ 'You are an expert memory archivist for an AI coding agent\'s project-local memory store.',
705
+ 'For each candidate memory below, decide one of two verbs:',
706
+ ' - "archive": junk, vague, restates the obvious, no actionable content, or a weaker duplicate of another memory.',
707
+ ' - "keep-and-watch": concrete lessons, specific facts, decisions with context — still has signal. When uncertain, prefer this.',
708
+ '',
709
+ 'Output format:',
710
+ ' Return a JSON object with exactly this shape:',
711
+ ' { "decisions": [ { "memoryId": "...", "verb": "...", "reason": "...", "confidence": 0.0 }, ... ] }',
712
+ ' Field rules per decision:',
713
+ ' - memoryId: string, exactly the id given in the input (e.g. "mem_abc-123").',
714
+ ' - verb: either "archive" or "keep-and-watch" (literal string, no other values).',
715
+ ' - reason: one short sentence (under 200 chars) explaining the choice.',
716
+ ' - confidence: a number from 0.0 to 1.0.',
717
+ ' Cover EVERY candidate memoryId below exactly once. No code fences. No prose outside the JSON.',
718
+ '',
719
+ 'Guidelines:',
720
+ ' - Memories with recallCount > 0 almost always keep-and-watch (recall proves usefulness).',
721
+ ' - Memories under 14 days old default to keep-and-watch unless the text is clearly junk.',
722
+ ' - High-confidence (>=0.8) for clearly junk or clearly valuable. Lower for genuinely uncertain.',
723
+ '',
724
+ 'Candidates:'
725
+ ];
726
+ candidates.forEach((candidate, index) => {
727
+ const text = candidate.text.length > 600 ? `${candidate.text.slice(0, 597)}...` : candidate.text;
728
+ const lastRecalled = candidate.lastRecalledAt ? `, last recalled ${candidate.lastRecalledAt.slice(0, 10)}` : ', never recalled';
729
+ lines.push('', `[${index + 1}] memoryId: ${candidate.memoryId}`, ` kind: ${candidate.kind}, finding: ${candidate.reviewFindingKind}, age: ${candidate.ageInDays}d, recallCount: ${candidate.recallCount}${lastRecalled}, sources: ${candidate.sources}`, ` text: ${text.replace(/\n+/g, ' ').trim()}`);
730
+ });
731
+ lines.push('', 'Return the JSON array now.');
732
+ return lines.join('\n');
733
+ }
734
+ async function requestOllamaMemoryAutoCleanResponse(prompt, provider, fetcher) {
735
+ const controller = new AbortController();
736
+ const timeoutHandle = setTimeout(() => controller.abort(), provider.timeoutMs);
737
+ try {
738
+ const response = await fetcher(new URL('/api/generate', provider.endpoint ?? defaultOllamaUrl), {
739
+ method: 'POST',
740
+ headers: { 'content-type': 'application/json' },
741
+ body: JSON.stringify({
742
+ model: provider.model,
743
+ stream: false,
744
+ prompt,
745
+ format: 'json',
746
+ // Keep the model resident across batches. Without this, Ollama unloads after each
747
+ // request and the next batch pays a cold-load penalty (often 30-60s on slow boxes),
748
+ // which compounds painfully across N batches. 15 minutes covers the full auto-clean
749
+ // sweep with margin for slow generation.
750
+ keep_alive: '15m'
751
+ }),
752
+ signal: controller.signal
753
+ });
754
+ if (!response.ok) {
755
+ throw new Error(`Ollama request failed with status ${response.status}.`);
756
+ }
757
+ const payload = (await response.json());
758
+ const raw = typeof payload.response === 'string' ? payload.response : '';
759
+ if (raw.length === 0) {
760
+ throw new Error('Ollama returned an empty response.');
761
+ }
762
+ if (raw.length > memoryAutoCleanResponseMaxLength) {
763
+ throw new Error(`Ollama returned ${raw.length} characters, which exceeds the ${memoryAutoCleanResponseMaxLength} character limit for auto-clean decisions.`);
764
+ }
765
+ return raw;
766
+ }
767
+ catch (error) {
768
+ if (error instanceof Error && error.name === 'AbortError') {
769
+ throw new Error(`Ollama auto-clean synthesis timed out after ${provider.timeoutMs}ms.`);
770
+ }
771
+ throw error;
772
+ }
773
+ finally {
774
+ clearTimeout(timeoutHandle);
775
+ }
776
+ }
777
+ async function requestCloudMemoryAutoCleanResponse(prompt, provider, fetcher) {
778
+ const apiKey = process.env.DENDRITE_WIKI_CLOUD_API_KEY?.trim() ?? '';
779
+ const controller = new AbortController();
780
+ const timeoutHandle = setTimeout(() => controller.abort(), provider.timeoutMs);
781
+ try {
782
+ const response = await fetcher(provider.endpoint ?? '', {
783
+ method: 'POST',
784
+ headers: {
785
+ authorization: `Bearer ${apiKey}`,
786
+ 'content-type': 'application/json'
787
+ },
788
+ body: JSON.stringify({
789
+ model: provider.model,
790
+ messages: [
791
+ { role: 'system', content: 'You produce strict JSON output for an AI memory archivist task. No prose. No fences.' },
792
+ { role: 'user', content: prompt }
793
+ ],
794
+ temperature: 0
795
+ }),
796
+ signal: controller.signal
797
+ });
798
+ if (!response.ok) {
799
+ throw new Error(`Cloud auto-clean request failed with status ${response.status}.`);
800
+ }
801
+ const payload = (await response.json());
802
+ const content = typeof payload.output_text === 'string'
803
+ ? payload.output_text
804
+ : typeof payload.choices?.[0]?.message?.content === 'string'
805
+ ? payload.choices[0].message.content
806
+ : '';
807
+ if (!content) {
808
+ throw new Error('Cloud provider returned an empty response.');
809
+ }
810
+ if (content.length > memoryAutoCleanResponseMaxLength) {
811
+ throw new Error(`Cloud provider returned ${content.length} characters, which exceeds the ${memoryAutoCleanResponseMaxLength} character limit.`);
812
+ }
813
+ return content;
814
+ }
815
+ catch (error) {
816
+ if (error instanceof Error && error.name === 'AbortError') {
817
+ throw new Error(`Cloud auto-clean synthesis timed out after ${provider.timeoutMs}ms.`);
818
+ }
819
+ throw error;
820
+ }
821
+ finally {
822
+ clearTimeout(timeoutHandle);
823
+ }
824
+ }
825
+ function parseMemoryAutoCleanResponse(text, candidates) {
826
+ const candidateIds = new Set(candidates.map((candidate) => candidate.memoryId));
827
+ let parsed;
828
+ try {
829
+ parsed = JSON.parse(stripJsonWrapping(text));
830
+ }
831
+ catch (error) {
832
+ return { ok: false, failureReason: `Response was not valid JSON: ${error instanceof Error ? error.message : String(error)}` };
833
+ }
834
+ // Walk the tree and collect anything that looks like a decision. Handles all the shapes
835
+ // local models tend to emit under `format: 'json'`:
836
+ // - bare array: [ {...}, {...} ]
837
+ // - wrapped: { decisions: [...] }, { results: [...] }, { memories: [...] }
838
+ // - keyed map: { mem_abc: { verb, reason }, mem_def: { verb, reason } }
839
+ // - nested: { data: { decisions: [...] } }
840
+ const collected = collectDecisionLikeObjects(parsed);
841
+ const decisions = [];
842
+ const seenIds = new Set();
843
+ for (const entry of collected) {
844
+ const memoryId = typeof entry.memoryId === 'string' ? entry.memoryId : '';
845
+ const verbRaw = typeof entry.verb === 'string' ? entry.verb : '';
846
+ const reason = typeof entry.reason === 'string' ? entry.reason : '';
847
+ const confidenceRaw = typeof entry.confidence === 'number' ? entry.confidence : Number.NaN;
848
+ if (!memoryId || !candidateIds.has(memoryId))
849
+ continue;
850
+ if (seenIds.has(memoryId))
851
+ continue;
852
+ if (verbRaw !== 'archive' && verbRaw !== 'keep-and-watch')
853
+ continue;
854
+ if (!reason)
855
+ continue;
856
+ const confidence = Number.isFinite(confidenceRaw) ? Math.max(0, Math.min(1, confidenceRaw)) : 0.5;
857
+ decisions.push({ memoryId, verb: verbRaw, reason, confidence });
858
+ seenIds.add(memoryId);
859
+ }
860
+ if (decisions.length === 0) {
861
+ return { ok: false, failureReason: 'No decisions in the response matched the candidate IDs.' };
862
+ }
863
+ return { ok: true, decisions };
864
+ }
865
+ // Walk a parsed JSON value and collect every object that looks like an auto-clean
866
+ // decision (has both `memoryId` and `verb`). Handles arbitrary nesting / wrapping so
867
+ // it tolerates whatever shape the local model decides to emit under `format: 'json'`.
868
+ // Also handles the "keyed map" shape where decisions are object keys: `{mem_abc: {verb, reason}}`.
869
+ function collectDecisionLikeObjects(value) {
870
+ if (!value)
871
+ return [];
872
+ if (Array.isArray(value)) {
873
+ return value.flatMap((item) => collectDecisionLikeObjects(item));
874
+ }
875
+ if (typeof value === 'object') {
876
+ const obj = value;
877
+ if (typeof obj.memoryId === 'string' && typeof obj.verb === 'string') {
878
+ return [obj];
879
+ }
880
+ // Keyed-map fallback: if every key looks like a memory id (`mem_*`) and every value
881
+ // is a verb-bearing object, treat the keys as memoryIds and the values as decisions.
882
+ const entries = Object.entries(obj);
883
+ if (entries.length > 0 && entries.every(([key, val]) => key.startsWith('mem_') &&
884
+ val !== null && typeof val === 'object' &&
885
+ typeof val.verb === 'string')) {
886
+ return entries.map(([key, val]) => ({ memoryId: key, ...val }));
887
+ }
888
+ return entries.flatMap(([, val]) => collectDecisionLikeObjects(val));
889
+ }
890
+ return [];
891
+ }
892
+ function stripJsonWrapping(text) {
893
+ // Be lenient: some local models wrap JSON in code fences or add a prose preamble.
894
+ // Strip the most common offenders and trim. The parser is tolerant about object vs
895
+ // array roots, so we don't need to surgically extract a [...] block anymore.
896
+ return text
897
+ .replace(/^[^{[]*([{[])/, '$1') // drop any prose preamble before the first { or [
898
+ .replace(/^```(?:json)?\s*/i, '')
899
+ .replace(/\s*```\s*$/i, '')
900
+ .trim();
901
+ }