@pella-labs/pinakes 0.3.14 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +16 -7
  2. package/dist/cli/audit-wiki.d.ts +45 -1
  3. package/dist/cli/audit-wiki.d.ts.map +1 -1
  4. package/dist/cli/audit-wiki.js +348 -80
  5. package/dist/cli/audit-wiki.js.map +1 -1
  6. package/dist/cli/claims.d.ts +49 -0
  7. package/dist/cli/claims.d.ts.map +1 -0
  8. package/dist/cli/claims.js +169 -0
  9. package/dist/cli/claims.js.map +1 -0
  10. package/dist/cli/contradiction.d.ts +46 -28
  11. package/dist/cli/contradiction.d.ts.map +1 -1
  12. package/dist/cli/contradiction.js +182 -115
  13. package/dist/cli/contradiction.js.map +1 -1
  14. package/dist/cli/index.js +4 -2
  15. package/dist/cli/index.js.map +1 -1
  16. package/dist/cli/progress.d.ts +19 -0
  17. package/dist/cli/progress.d.ts.map +1 -0
  18. package/dist/cli/progress.js +44 -0
  19. package/dist/cli/progress.js.map +1 -0
  20. package/dist/cli/serve.js +1 -1
  21. package/dist/db/client.js +1 -1
  22. package/dist/db/migrations/0003_add_pinakes_claims.sql +13 -0
  23. package/dist/db/migrations/0004_add_confidence_score.sql +12 -0
  24. package/dist/db/migrations/meta/_journal.json +14 -0
  25. package/dist/db/schema.d.ts +161 -1
  26. package/dist/db/schema.d.ts.map +1 -1
  27. package/dist/db/schema.js +24 -1
  28. package/dist/db/schema.js.map +1 -1
  29. package/dist/gate/confidence.d.ts +82 -0
  30. package/dist/gate/confidence.d.ts.map +1 -0
  31. package/dist/gate/confidence.js +190 -0
  32. package/dist/gate/confidence.js.map +1 -0
  33. package/dist/ingest/ingester.d.ts.map +1 -1
  34. package/dist/ingest/ingester.js +4 -3
  35. package/dist/ingest/ingester.js.map +1 -1
  36. package/dist/ingest/repo-mirror.d.ts.map +1 -1
  37. package/dist/ingest/repo-mirror.js +5 -1
  38. package/dist/ingest/repo-mirror.js.map +1 -1
  39. package/dist/init/copy.d.ts.map +1 -1
  40. package/dist/init/copy.js +9 -0
  41. package/dist/init/copy.js.map +1 -1
  42. package/dist/init/scanner.js +7 -0
  43. package/dist/init/scanner.js.map +1 -1
  44. package/dist/llm/provider.d.ts.map +1 -1
  45. package/dist/llm/provider.js +19 -5
  46. package/dist/llm/provider.js.map +1 -1
  47. package/dist/mcp/tools/search.d.ts.map +1 -1
  48. package/dist/mcp/tools/search.js +2 -2
  49. package/dist/mcp/tools/search.js.map +1 -1
  50. package/dist/retrieval/fts.d.ts +1 -0
  51. package/dist/retrieval/fts.d.ts.map +1 -1
  52. package/dist/retrieval/fts.js +18 -2
  53. package/dist/retrieval/fts.js.map +1 -1
  54. package/dist/retrieval/hybrid.d.ts +1 -0
  55. package/dist/retrieval/hybrid.d.ts.map +1 -1
  56. package/dist/retrieval/hybrid.js +5 -0
  57. package/dist/retrieval/hybrid.js.map +1 -1
  58. package/dist/retrieval/vec.d.ts +1 -0
  59. package/dist/retrieval/vec.d.ts.map +1 -1
  60. package/dist/retrieval/vec.js +17 -2
  61. package/dist/retrieval/vec.js.map +1 -1
  62. package/dist/sandbox/bindings/pinakes.d.ts.map +1 -1
  63. package/dist/sandbox/bindings/pinakes.js +9 -2
  64. package/dist/sandbox/bindings/pinakes.js.map +1 -1
  65. package/package.json +30 -19
package/README.md CHANGED
@@ -145,13 +145,14 @@ knowledge_query({
145
145
  All data is stored under `~/.pinakes/` (override with `PINAKES_ROOT`). Project data lives at `~/.pinakes/projects/<mangled-path>/`.
146
146
 
147
147
  ```bash
148
- npx @pella-labs/pinakes serve [--wiki-path <dir>] # Start the stdio MCP server
149
- npx @pella-labs/pinakes rebuild [--wiki-path <dir>] # Full rebuild from markdown
150
- npx @pella-labs/pinakes status # Health check + row counts
151
- npx @pella-labs/pinakes audit [--n 20] # Tail the audit log
152
- npx @pella-labs/pinakes purge --scope <s> --confirm # Delete a scope's DB
153
- npx @pella-labs/pinakes export --scope <s> [--out f] # Dump nodes + edges as JSON
154
- npx @pella-labs/pinakes import --scope <s> --in f # Restore from dump
148
+ npx @pella-labs/pinakes serve [--wiki-path <dir>] # Start the stdio MCP server
149
+ npx @pella-labs/pinakes rebuild [--wiki-path <dir>] # Full rebuild from markdown
150
+ npx @pella-labs/pinakes status # Health check + row counts
151
+ npx @pella-labs/pinakes audit [--n 20] # Tail the audit log
152
+ npx @pella-labs/pinakes audit-wiki # Wiki audit (contradictions, gaps)
153
+ npx @pella-labs/pinakes purge --scope <s> --confirm # Delete a scope's DB
154
+ npx @pella-labs/pinakes export --scope <s> [--out f] # Dump nodes + edges as JSON
155
+ npx @pella-labs/pinakes import --scope <s> --in f # Restore from dump
155
156
  ```
156
157
 
157
158
  ## Embedder configuration
@@ -192,6 +193,14 @@ Changing the embedder requires a full rebuild (`pinakes rebuild`) since the vect
192
193
  - **Deterministic IDs**: `sha1(scope + ':' + source_uri + ':' + section_path)` means re-indexing is idempotent
193
194
  - **Centralized storage**: all data under `~/.pinakes/`, project paths mirrored as `~/.pinakes/projects/<mangled-path>/`
194
195
 
196
+ ## Wiki auditing
197
+
198
+ Two paths for auditing your knowledge base:
199
+
200
+ **Claude Code users** — run `/audit-wiki` for a deep agent-powered audit. This runs the pipeline first, then has Claude read through wiki files to find cross-file contradictions, broken references, terminology inconsistencies, and stale info that the pipeline can't catch.
201
+
202
+ **All users** — run `npx @pella-labs/pinakes audit-wiki` (or `pnpm run pinakes -- audit-wiki` from source) for the deterministic pipeline audit. Produces `_audit-report.md` in the wiki directory with contradictions, documentation gaps, and health metrics. Requires an LLM provider (Ollama, API key, or `claude` CLI).
203
+
195
204
  ## Development
196
205
 
197
206
  ```bash
@@ -1,14 +1,58 @@
1
+ import type { Database as BetterSqliteDatabase } from 'better-sqlite3';
2
+ import { type GapRow } from '../gaps/detector.js';
3
+ import { type LlmProvider } from '../llm/provider.js';
1
4
  import { type ContradictionResult } from './contradiction.js';
5
+ import { createProgressReporter } from './progress.js';
2
6
  export interface WikiAuditOptions {
3
7
  projectRoot?: string;
4
8
  dbPath?: string;
5
9
  scope?: 'project' | 'personal';
10
+ quiet?: boolean;
11
+ generateStubs?: boolean;
6
12
  }
7
13
  export interface WikiAuditResult {
8
14
  contradictions: ContradictionResult;
9
15
  gaps_found: number;
10
- stub_pages_created: string[];
16
+ topology_gaps: number;
17
+ stubs_generated: number;
11
18
  audit_report_path: string;
12
19
  }
13
20
  export declare function auditWikiCommand(opts: WikiAuditOptions): Promise<WikiAuditResult>;
21
+ export declare function llmFilterGaps(gaps: GapRow[], llmProvider: LlmProvider, progress?: {
22
+ tick: (label: string, detail?: string) => void;
23
+ }): Promise<GapRow[]>;
24
+ export declare function parseLlmFilterResponse(response: string): string[];
25
+ export interface TopologyGap {
26
+ topic: string;
27
+ in_degree: number;
28
+ source: 'graph-topology';
29
+ }
30
+ export declare function findTopologyGaps(reader: BetterSqliteDatabase, scope: string): TopologyGap[];
31
+ export interface GapContext {
32
+ topic: string;
33
+ mentions: Array<{
34
+ source_uri: string;
35
+ excerpt: string;
36
+ }>;
37
+ }
38
+ export declare function gatherGapContexts(reader: BetterSqliteDatabase, scope: string, gaps: GapRow[]): GapContext[];
39
+ export declare function generateSynthesisStubs(gaps: GapRow[], contexts: GapContext[], wikiRoot: string, llmProvider: LlmProvider, progress?: ReturnType<typeof createProgressReporter>): Promise<number>;
40
+ export interface HealthMetrics {
41
+ file_count: number;
42
+ chunk_count: number;
43
+ node_count: number;
44
+ edge_count: number;
45
+ }
46
+ export declare function getHealthMetrics(reader: BetterSqliteDatabase, scope: string): HealthMetrics;
47
+ /**
48
+ * Filter out noise from the gap detector (D42 Tier 1 tightening).
49
+ *
50
+ * Rejects topics that are:
51
+ * - Too short
52
+ * - URLs, file paths, qualified names
53
+ * - Code fragments (snake_case, camelCase, SCREAMING_SNAKE)
54
+ * - Common stopwords (English + technical)
55
+ * - Single-word generic terms that aren't proper nouns/acronyms
56
+ */
57
+ export declare function isRealGap(topic: string): boolean;
14
58
  //# sourceMappingURL=audit-wiki.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"audit-wiki.d.ts","sourceRoot":"","sources":["../../src/cli/audit-wiki.ts"],"names":[],"mappings":"AAaA,OAAO,EAAqB,KAAK,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAgBjF,MAAM,WAAW,gBAAgB;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,SAAS,GAAG,UAAU,CAAC;CAChC;AAED,MAAM,WAAW,eAAe;IAC9B,cAAc,EAAE,mBAAmB,CAAC;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,kBAAkB,EAAE,MAAM,EAAE,CAAC;IAC7B,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC,CA4EvF"}
1
+ {"version":3,"file":"audit-wiki.d.ts","sourceRoot":"","sources":["../../src/cli/audit-wiki.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,QAAQ,IAAI,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAGvE,OAAO,EAAa,KAAK,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAC7D,OAAO,EAAqB,KAAK,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAQzE,OAAO,EAAqB,KAAK,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACjF,OAAO,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC;AAgBvD,MAAM,WAAW,gBAAgB;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,SAAS,GAAG,UAAU,CAAC;IAC/B,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,eAAe;IAC9B,cAAc,EAAE,mBAAmB,CAAC;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC,CAkGvF;AAkBD,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EAAE,EACd,WAAW,EAAE,WAAW,EACxB,QAAQ,CAAC,EAAE;IAAE,IAAI,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,KAAK,IAAI,CAAA;CAAE,GAC5D,OAAO,CAAC,MAAM,EAAE,CAAC,CAgCnB;AAED,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,EAAE,CAYjE;AAMD,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,gBAAgB,CAAC;CAC1B;AAED,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,GACZ,WAAW,EAAE,CA0Bf;AAMD,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,KAAK,CAAC;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAC1D;AAED,wBAAgB,iBAAiB,CAC/B,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,EAAE,GACb,UAAU,EAAE,CA4Bd;AAeD,wBAAsB,sBAAsB,CAC1C,IAAI,EAAE,MAAM,EAAE,EACd,QAAQ,EAAE,UAAU,EAAE,EACtB,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,WAAW,EACxB,QAAQ,CAAC,EAAE,UAAU,CAAC,OAAO,sBAAsB,CAAC,GACnD,OAAO,CAAC,MAAM,CAAC,CAoDjB;AAmBD,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,GACZ,aAAa,CAoBf;AA6GD;;;;;;;;;GASG;AACH,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAgChD"}
@@ -1,10 +1,11 @@
1
- import { writeFileSync } from 'node:fs';
2
- import { join } from 'node:path';
1
+ import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
2
+ import { join, resolve } from 'node:path';
3
3
  import { closeDb, openDb } from '../db/client.js';
4
4
  import { queryGaps } from '../gaps/detector.js';
5
5
  import { createLlmProvider } from '../llm/provider.js';
6
6
  import { resolveAbs, projectWikiPath as defaultProjectWikiPath, projectDbPath as defaultProjectDbPath, personalWikiPath as defaultPersonalWikiPath, personalDbPath as defaultPersonalDbPath, } from '../paths.js';
7
7
  import { contradictionScan } from './contradiction.js';
8
+ import { createProgressReporter } from './progress.js';
8
9
  /**
9
10
  * `pinakes audit-wiki` — LLM-powered wiki audit command.
10
11
  *
@@ -16,7 +17,7 @@ import { contradictionScan } from './contradiction.js';
16
17
  * error message if no provider is available.
17
18
  */
18
19
  const GAP_MENTION_THRESHOLD = 10;
19
- const MIN_TOPIC_LENGTH = 4; // filter out "or", "no", "id", etc.
20
+ const MIN_TOPIC_LENGTH = 5; // filter out short tokens (D42 tightened from 4)
20
21
  export async function auditWikiCommand(opts) {
21
22
  const scope = opts.scope ?? 'project';
22
23
  const projectRoot = resolveAbs(opts.projectRoot ?? process.cwd());
@@ -31,13 +32,13 @@ export async function auditWikiCommand(opts) {
31
32
  const bundle = openDb(dbPath);
32
33
  try {
33
34
  const llmProvider = createLlmProvider();
35
+ const progress = createProgressReporter({ quiet: opts.quiet });
34
36
  // eslint-disable-next-line no-console
35
37
  console.log(`Running wiki audit (LLM provider: ${llmProvider.name})...`);
36
38
  // 1. Contradiction scan (requires LLM provider)
37
39
  let contradictions;
38
40
  if (llmProvider.available()) {
39
- // eslint-disable-next-line no-console
40
- console.log(' Scanning for contradictions...');
41
+ progress.startPhase('Phase 1/3: Scanning for contradictions', 1);
41
42
  contradictions = await contradictionScan({
42
43
  bundle,
43
44
  scope,
@@ -45,41 +46,51 @@ export async function auditWikiCommand(opts) {
45
46
  wikiRoot: wikiPath,
46
47
  });
47
48
  if (contradictions.rate_limited) {
48
- // eslint-disable-next-line no-console
49
- console.log(' Contradiction scan rate-limited (last scan < 1h ago)');
49
+ progress.endPhase('Rate-limited (last scan < 1h ago)');
50
50
  }
51
51
  else {
52
- // eslint-disable-next-line no-console
53
- console.log(` Scanned ${contradictions.scanned_pairs} pairs, found ${contradictions.contradictions.length} contradictions`);
52
+ progress.endPhase(`Scanned ${contradictions.scanned_pairs} pairs, found ${contradictions.contradictions.length} contradictions`);
54
53
  }
55
54
  }
56
55
  else {
57
- // eslint-disable-next-line no-console
58
- console.log(' Skipping contradiction scan (no LLM provider available)');
59
- contradictions = { scanned_pairs: 0, contradictions: [], rate_limited: false };
56
+ progress.startPhase('Phase 1/3: Contradiction scan', 0);
57
+ progress.endPhase('Skipped (no LLM provider available)');
58
+ contradictions = { scanned_pairs: 0, topics_scanned: 0, claims_extracted: 0, contradictions: [], rate_limited: false };
60
59
  }
61
- // 2. Gap detection — filter out noise (short tokens, common words, code fragments)
62
- // eslint-disable-next-line no-console
63
- console.log(' Checking for documentation gaps...');
60
+ // 2. Gap detection — syntactic filter + LLM filter + graph topology (D42)
64
61
  const allGaps = queryGaps(bundle.writer, scope);
65
- const gaps = allGaps.filter((g) => isRealGap(g.topic));
66
- const significantGaps = gaps.filter((g) => g.mentions_count >= GAP_MENTION_THRESHOLD);
67
- // eslint-disable-next-line no-console
68
- console.log(` Found ${gaps.length} gaps (${significantGaps.length} significant)`);
69
- // 3. Stub page generation is deferred to the Pharos integration (Tier 2).
70
- // The claude subprocess provider is too unreliable for batch generation,
71
- // and the gap detector still produces too many false positives for
72
- // automated page creation. The audit report lists gaps for manual review.
73
- const stubPages = [];
74
- // 4. Generate audit report
62
+ const syntacticGaps = allGaps.filter((g) => isRealGap(g.topic));
63
+ const significantGaps = syntacticGaps.filter((g) => g.mentions_count >= GAP_MENTION_THRESHOLD);
64
+ progress.startPhase('Phase 2/3: Filtering documentation gaps', allGaps.length);
65
+ // LLM batch filter (Tier 2)
66
+ let filteredGaps;
67
+ if (llmProvider.available() && significantGaps.length > 0) {
68
+ filteredGaps = await llmFilterGaps(significantGaps, llmProvider, progress);
69
+ }
70
+ else {
71
+ filteredGaps = significantGaps;
72
+ }
73
+ // Add graph topology gaps (high in-degree, no dedicated page)
74
+ const topoGaps = findTopologyGaps(bundle.writer, scope);
75
+ progress.endPhase(`${allGaps.length} raw → ${syntacticGaps.length} syntactic → ${filteredGaps.length} LLM-filtered, ${topoGaps.length} topology gaps`);
76
+ // 3. Gather context for each confirmed gap
77
+ const gapContexts = gatherGapContexts(bundle.writer, scope, filteredGaps);
78
+ // 4. Opt-in synthesis stubs (D43 — --generate-stubs flag)
79
+ let stubsGenerated = 0;
80
+ if (opts.generateStubs && filteredGaps.length > 0 && llmProvider.available()) {
81
+ stubsGenerated = await generateSynthesisStubs(filteredGaps, gapContexts, wikiPath, llmProvider, progress);
82
+ }
83
+ // 5. Generate audit report (D46 restructured: contradictions, gaps, health)
84
+ const healthMetrics = getHealthMetrics(bundle.writer, scope);
75
85
  const reportPath = join(wikiPath, '_audit-report.md');
76
- writeAuditReport(reportPath, contradictions, gaps, significantGaps, stubPages);
86
+ writeAuditReport(reportPath, contradictions, filteredGaps, topoGaps, gapContexts, healthMetrics, stubsGenerated);
77
87
  // eslint-disable-next-line no-console
78
88
  console.log(`\nAudit report written to: ${reportPath}`);
79
89
  return {
80
90
  contradictions,
81
- gaps_found: gaps.length,
82
- stub_pages_created: stubPages,
91
+ gaps_found: filteredGaps.length,
92
+ topology_gaps: topoGaps.length,
93
+ stubs_generated: stubsGenerated,
83
94
  audit_report_path: reportPath,
84
95
  };
85
96
  }
@@ -88,58 +99,273 @@ export async function auditWikiCommand(opts) {
88
99
  }
89
100
  }
90
101
  // ---------------------------------------------------------------------------
91
- // Internals
102
+ // LLM gap filtering (D42 Tier 2)
103
+ // ---------------------------------------------------------------------------
104
+ const GAP_FILTER_SYSTEM = `You are a documentation quality analyst. Given a list of terms extracted from a technical wiki, identify which represent real documentation topics that would benefit from a dedicated wiki page.
105
+
106
+ Return ONLY a JSON array of the real topics: ["topic1", "topic2", ...]
107
+
108
+ Filter out:
109
+ - Common words and generic technical terms
110
+ - Code syntax, variable names, file extensions
111
+ - Terms too specific or too vague to be standalone pages
112
+ - Terms that are part of larger concepts already documented`;
113
+ const LLM_FILTER_BATCH_SIZE = 50;
114
+ export async function llmFilterGaps(gaps, llmProvider, progress) {
115
+ const result = [];
116
+ for (let i = 0; i < gaps.length; i += LLM_FILTER_BATCH_SIZE) {
117
+ const batch = gaps.slice(i, i + LLM_FILTER_BATCH_SIZE);
118
+ const topics = batch.map((g) => g.topic);
119
+ try {
120
+ const response = await llmProvider.complete({
121
+ system: GAP_FILTER_SYSTEM,
122
+ prompt: `Filter these ${topics.length} terms:\n${JSON.stringify(topics)}`,
123
+ maxTokens: 1000,
124
+ });
125
+ const kept = parseLlmFilterResponse(response);
126
+ const keptSet = new Set(kept.map((t) => t.toLowerCase()));
127
+ for (const gap of batch) {
128
+ if (keptSet.has(gap.topic.toLowerCase())) {
129
+ result.push(gap);
130
+ }
131
+ }
132
+ progress?.tick(`batch ${Math.floor(i / LLM_FILTER_BATCH_SIZE) + 1}`, `${kept.length}/${batch.length} kept`);
133
+ }
134
+ catch {
135
+ // LLM filter failed — keep all gaps in this batch (graceful degradation)
136
+ result.push(...batch);
137
+ progress?.tick(`batch ${Math.floor(i / LLM_FILTER_BATCH_SIZE) + 1}`, 'LLM filter failed, keeping all');
138
+ }
139
+ }
140
+ return result;
141
+ }
142
+ export function parseLlmFilterResponse(response) {
143
+ try {
144
+ const fenceMatch = response.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
145
+ const jsonStr = fenceMatch ? fenceMatch[1] : response;
146
+ const arrMatch = jsonStr.match(/\[[\s\S]*\]/);
147
+ if (!arrMatch)
148
+ return [];
149
+ const parsed = JSON.parse(arrMatch[0]);
150
+ if (!Array.isArray(parsed))
151
+ return [];
152
+ return parsed.filter((t) => typeof t === 'string');
153
+ }
154
+ catch {
155
+ return [];
156
+ }
157
+ }
158
+ export function findTopologyGaps(reader, scope) {
159
+ // Find nodes referenced by wikilink edges that don't have their own page
160
+ // We look for edge targets (dst_id) that appear frequently but
161
+ // whose corresponding node titles don't exist as dedicated pages
162
+ try {
163
+ const rows = reader
164
+ .prepare(`SELECT n.title, COUNT(*) as cnt
165
+ FROM pinakes_edges e
166
+ JOIN pinakes_nodes n ON e.dst_id = n.id
167
+ WHERE n.scope = ? AND e.edge_kind = ?
168
+ GROUP BY n.title
169
+ HAVING cnt >= 3
170
+ ORDER BY cnt DESC
171
+ LIMIT 20`)
172
+ .all(scope, 'wikilink');
173
+ return rows.map((r) => ({
174
+ topic: r.title ?? 'untitled',
175
+ in_degree: r.cnt,
176
+ source: 'graph-topology',
177
+ }));
178
+ }
179
+ catch {
180
+ return []; // Table might not have edges yet
181
+ }
182
+ }
183
+ export function gatherGapContexts(reader, scope, gaps) {
184
+ const contexts = [];
185
+ for (const gap of gaps.slice(0, 20)) {
186
+ try {
187
+ const mentions = reader
188
+ .prepare(`SELECT n.source_uri, c.text
189
+ FROM pinakes_chunks c
190
+ JOIN pinakes_nodes n ON c.node_id = n.id
191
+ WHERE n.scope = ? AND c.text LIKE '%' || ? || '%' COLLATE NOCASE
192
+ LIMIT 5`)
193
+ .all(scope, gap.topic);
194
+ contexts.push({
195
+ topic: gap.topic,
196
+ mentions: mentions.map((m) => ({
197
+ source_uri: m.source_uri,
198
+ excerpt: truncate(m.text, 200),
199
+ })),
200
+ });
201
+ }
202
+ catch {
203
+ // Non-fatal
204
+ }
205
+ }
206
+ return contexts;
207
+ }
208
+ // ---------------------------------------------------------------------------
209
+ // Synthesis stubs (D43 — opt-in via --generate-stubs)
92
210
  // ---------------------------------------------------------------------------
93
- // Stub page generation deferred to Pharos Tier 2 integration.
94
- function writeAuditReport(reportPath, contradictions, allGaps, significantGaps, stubPages) {
211
+ const SYNTHESIS_SYSTEM = `You are a technical documentation writer. Based on the following excerpts from a knowledge wiki, write a concise wiki page about the given topic.
212
+
213
+ Rules:
214
+ - Include ONLY facts present in the excerpts
215
+ - Mark any inferences with "(inferred)"
216
+ - Format as markdown with a title (H1), summary paragraph, and relevant details
217
+ - Keep it under 500 words
218
+ - Output only the markdown content`;
219
+ export async function generateSynthesisStubs(gaps, contexts, wikiRoot, llmProvider, progress) {
220
+ const draftsDir = resolve(wikiRoot, '_audit-drafts');
221
+ mkdirSync(draftsDir, { recursive: true });
222
+ // Ensure _audit-drafts is gitignored
223
+ ensureGitignored(wikiRoot, '_audit-drafts/');
224
+ const MAX_STUBS = 20;
225
+ const toGenerate = gaps.slice(0, MAX_STUBS);
226
+ progress?.startPhase('Phase 3/3: Generating synthesis drafts', toGenerate.length);
227
+ let generated = 0;
228
+ for (const gap of toGenerate) {
229
+ const ctx = contexts.find((c) => c.topic === gap.topic);
230
+ if (!ctx || ctx.mentions.length === 0) {
231
+ progress?.tick(gap.topic, 'skipped (no context)');
232
+ continue;
233
+ }
234
+ const slug = gap.topic
235
+ .toLowerCase()
236
+ .replace(/[^a-z0-9]+/g, '-')
237
+ .replace(/(^-|-$)/g, '');
238
+ if (!slug) {
239
+ progress?.tick(gap.topic, 'skipped (invalid slug)');
240
+ continue;
241
+ }
242
+ const filePath = join(draftsDir, `${slug}.md`);
243
+ try {
244
+ const excerpts = ctx.mentions
245
+ .map((m) => `From ${m.source_uri}:\n${m.excerpt}`)
246
+ .join('\n\n');
247
+ const content = await llmProvider.complete({
248
+ system: SYNTHESIS_SYSTEM,
249
+ prompt: `Write a wiki page about "${gap.topic}" based on these excerpts:\n\n${excerpts}`,
250
+ maxTokens: 1000,
251
+ });
252
+ writeFileSync(filePath, content, 'utf-8');
253
+ generated++;
254
+ progress?.tick(gap.topic, 'draft created');
255
+ }
256
+ catch (err) {
257
+ progress?.tick(gap.topic, `failed: ${err instanceof Error ? err.message.slice(0, 60) : err}`);
258
+ }
259
+ }
260
+ progress?.endPhase(`${generated} drafts written to _audit-drafts/`);
261
+ return generated;
262
+ }
263
+ function ensureGitignored(wikiRoot, entry) {
264
+ // Look for .gitignore in the .pinakes parent directory
265
+ const pinakesDir = resolve(wikiRoot, '..');
266
+ const gitignorePath = join(pinakesDir, '.gitignore');
267
+ if (!existsSync(gitignorePath))
268
+ return;
269
+ const content = readFileSync(gitignorePath, 'utf-8');
270
+ if (content.includes(entry))
271
+ return;
272
+ appendFileSync(gitignorePath, `\n${entry}\n`, 'utf-8');
273
+ }
274
+ export function getHealthMetrics(reader, scope) {
275
+ const fileCount = reader.prepare(`SELECT COUNT(DISTINCT source_uri) as c FROM pinakes_nodes WHERE scope = ?`).get(scope)?.c ?? 0;
276
+ const chunkCount = reader.prepare(`SELECT COUNT(*) as c FROM pinakes_chunks ch
277
+ JOIN pinakes_nodes n ON ch.node_id = n.id WHERE n.scope = ?`).get(scope)?.c ?? 0;
278
+ const nodeCount = reader.prepare(`SELECT COUNT(*) as c FROM pinakes_nodes WHERE scope = ?`).get(scope)?.c ?? 0;
279
+ const edgeCount = reader.prepare(`SELECT COUNT(*) as c FROM pinakes_edges e
280
+ JOIN pinakes_nodes n ON e.src_id = n.id WHERE n.scope = ?`).get(scope)?.c ?? 0;
281
+ return { file_count: fileCount, chunk_count: chunkCount, node_count: nodeCount, edge_count: edgeCount };
282
+ }
283
+ // ---------------------------------------------------------------------------
284
+ // Audit report (D46 restructured)
285
+ // ---------------------------------------------------------------------------
286
+ function writeAuditReport(reportPath, contradictions, filteredGaps, topoGaps, gapContexts, health, stubsGenerated = 0) {
95
287
  const lines = [
96
288
  '# Wiki Audit Report',
97
289
  '',
98
290
  `*Generated: ${new Date().toISOString()}*`,
99
291
  '',
100
- '## Summary',
101
- '',
102
292
  ];
293
+ // Section 1: Contradictions
294
+ lines.push('## Contradictions');
295
+ lines.push('');
103
296
  if (contradictions.rate_limited) {
104
- lines.push('- **Contradictions**: scan rate-limited (last scan < 1h ago)');
297
+ lines.push('*Scan rate-limited (last scan < 1h ago)*');
105
298
  }
106
- else {
107
- lines.push(`- **Contradictions**: ${contradictions.contradictions.length} found (${contradictions.scanned_pairs} pairs scanned)`);
299
+ else if (contradictions.contradictions.length === 0) {
300
+ lines.push(`*No contradictions found (${contradictions.topics_scanned} topics, ${contradictions.claims_extracted} claims scanned)*`);
108
301
  }
109
- lines.push(`- **Documentation gaps**: ${allGaps.length} total, ${significantGaps.length} significant (${GAP_MENTION_THRESHOLD}+ mentions)`);
110
- lines.push(`- **Stub pages generated**: ${stubPages.length}`);
111
- lines.push('');
112
- if (contradictions.contradictions.length > 0) {
113
- lines.push('## Contradictions');
302
+ else {
303
+ lines.push(`**${contradictions.contradictions.length} contradictions found** (${contradictions.topics_scanned} topics scanned)`);
114
304
  lines.push('');
115
305
  for (const c of contradictions.contradictions) {
116
- lines.push(`### ${c.chunkA.source_uri} vs ${c.chunkB.source_uri}`);
306
+ lines.push(`### ${c.topic}`);
117
307
  lines.push('');
118
- lines.push(`- **Confidence**: ${c.confidence}`);
119
- lines.push(`- **Explanation**: ${c.explanation}`);
120
- lines.push(`- Chunk A: *"${truncate(c.chunkA.text, 150)}"*`);
121
- lines.push(`- Chunk B: *"${truncate(c.chunkB.text, 150)}"*`);
308
+ lines.push(`- **${c.claimA.source_uri}**: "${truncate(c.claimA.claim, 150)}"`);
309
+ lines.push(`- **${c.claimB.source_uri}**: "${truncate(c.claimB.claim, 150)}"`);
310
+ lines.push(`- **Why**: ${c.explanation} *(${c.confidence} confidence)*`);
122
311
  lines.push('');
123
312
  }
124
313
  }
125
- if (significantGaps.length > 0) {
126
- lines.push('## Documentation Gaps');
127
- lines.push('');
128
- lines.push('| Topic | Mentions | Status |');
129
- lines.push('|---|---|---|');
130
- for (const g of significantGaps) {
131
- const status = g.resolved_at ? 'Resolved' : 'Open';
132
- lines.push(`| ${g.topic} | ${g.mentions_count} | ${status} |`);
314
+ lines.push('');
315
+ // Section 2: Documentation Gaps
316
+ lines.push('## Documentation Gaps');
317
+ lines.push('');
318
+ if (filteredGaps.length === 0 && topoGaps.length === 0) {
319
+ lines.push('*No significant gaps found*');
320
+ }
321
+ else {
322
+ if (filteredGaps.length > 0) {
323
+ lines.push(`### By mention frequency (${filteredGaps.length} topics)`);
324
+ lines.push('');
325
+ lines.push('| Topic | Mentions | Context |');
326
+ lines.push('|---|---|---|');
327
+ for (const g of filteredGaps) {
328
+ const ctx = gapContexts.find((c) => c.topic === g.topic);
329
+ const ctxSummary = ctx?.mentions.length
330
+ ? `Referenced in ${ctx.mentions.map((m) => m.source_uri).join(', ')}`
331
+ : '';
332
+ lines.push(`| ${g.topic} | ${g.mentions_count} | ${ctxSummary} |`);
333
+ }
334
+ lines.push('');
133
335
  }
336
+ if (topoGaps.length > 0) {
337
+ lines.push(`### By link topology (${topoGaps.length} topics)`);
338
+ lines.push('');
339
+ lines.push('| Topic | In-degree |');
340
+ lines.push('|---|---|');
341
+ for (const g of topoGaps) {
342
+ lines.push(`| ${g.topic} | ${g.in_degree} |`);
343
+ }
344
+ lines.push('');
345
+ }
346
+ }
347
+ lines.push('');
348
+ // Section 3: Health Metrics
349
+ lines.push('## Health Metrics');
350
+ lines.push('');
351
+ lines.push(`| Metric | Value |`);
352
+ lines.push(`|---|---|`);
353
+ lines.push(`| Files | ${health.file_count} |`);
354
+ lines.push(`| Nodes | ${health.node_count} |`);
355
+ lines.push(`| Chunks | ${health.chunk_count} |`);
356
+ lines.push(`| Edges | ${health.edge_count} |`);
357
+ lines.push('');
358
+ // Generated drafts section
359
+ if (stubsGenerated > 0) {
360
+ lines.push('## Generated Drafts');
361
+ lines.push('');
362
+ lines.push(`${stubsGenerated} synthesis drafts written to \`_audit-drafts/\`. Review before promoting to wiki.`);
134
363
  lines.push('');
135
364
  }
136
- if (stubPages.length > 0) {
137
- lines.push('## Generated Stub Pages');
365
+ else {
366
+ lines.push('---');
138
367
  lines.push('');
139
- for (const p of stubPages) {
140
- const name = p.split('/').pop() ?? p;
141
- lines.push(`- [[${name.replace('.md', '')}]]`);
142
- }
368
+ lines.push('*Run with `--generate-stubs` to auto-generate draft pages for gaps.*');
143
369
  lines.push('');
144
370
  }
145
371
  writeFileSync(reportPath, lines.join('\n'), 'utf-8');
@@ -150,12 +376,16 @@ function truncate(s, maxLen) {
150
376
  return s.slice(0, maxLen) + '...';
151
377
  }
152
378
  /**
153
- * Filter out noise from the gap detector. Rejects topics that are:
154
- * - Too short (common tokens like "or", "no", "id")
155
- * - Pure code fragments (paths, URLs, variable names with underscores)
156
- * - Common stopwords
379
+ * Filter out noise from the gap detector (D42 Tier 1 tightening).
380
+ *
381
+ * Rejects topics that are:
382
+ * - Too short
383
+ * - URLs, file paths, qualified names
384
+ * - Code fragments (snake_case, camelCase, SCREAMING_SNAKE)
385
+ * - Common stopwords (English + technical)
386
+ * - Single-word generic terms that aren't proper nouns/acronyms
157
387
  */
158
- function isRealGap(topic) {
388
+ export function isRealGap(topic) {
159
389
  if (topic.length < MIN_TOPIC_LENGTH)
160
390
  return false;
161
391
  // Skip URLs, file paths, code fragments
@@ -163,24 +393,62 @@ function isRealGap(topic) {
163
393
  return false;
164
394
  if (topic.includes('://'))
165
395
  return false;
166
- // Skip things that look like code (snake_case with no spaces, camelCase identifiers)
167
- if (/^[a-z_]+$/.test(topic) && topic.includes('_') && !topic.includes(' '))
396
+ // Qualified names (e.g., "fs.readFileSync", "path.join")
397
+ if (topic.includes('.') && !topic.includes(' '))
168
398
  return false;
169
- // Skip common stopwords that aren't real topics
170
- const stopwords = new Set([
171
- 'the', 'and', 'for', 'with', 'that', 'this', 'from', 'are', 'was', 'will',
172
- 'can', 'not', 'but', 'all', 'has', 'have', 'had', 'been', 'would', 'could',
173
- 'should', 'may', 'might', 'must', 'shall', 'into', 'than', 'then', 'when',
174
- 'where', 'which', 'while', 'about', 'after', 'before', 'between', 'under',
175
- 'over', 'only', 'also', 'just', 'like', 'more', 'most', 'some', 'such',
176
- 'each', 'every', 'both', 'either', 'neither', 'other', 'another',
177
- 'true', 'false', 'null', 'none', 'yes', 'done',
178
- ]);
179
- if (stopwords.has(topic.toLowerCase()))
399
+ // Skip code-like patterns
400
+ // snake_case: all lowercase with underscores
401
+ if (/^[a-z_]+$/.test(topic) && topic.includes('_'))
180
402
  return false;
181
- // Skip single-word topics that are too generic
182
- if (!topic.includes(' ') && topic.length < 6)
403
+ // camelCase: starts lowercase then has uppercase
404
+ if (/^[a-z]+[A-Z]/.test(topic) && !topic.includes(' '))
183
405
  return false;
406
+ // SCREAMING_SNAKE_CASE
407
+ if (/^[A-Z][A-Z0-9_]+$/.test(topic))
408
+ return false;
409
+ const lower = topic.toLowerCase();
410
+ // Skip common English stopwords
411
+ if (STOPWORDS.has(lower))
412
+ return false;
413
+ // Skip common technical terms that aren't real topics
414
+ if (TECH_STOPWORDS.has(lower))
415
+ return false;
416
+ // Single-word topics: only keep proper nouns/acronyms (starts with uppercase
417
+ // or is all-caps like "OAuth2", "Docker", "PostgreSQL")
418
+ if (!topic.includes(' ')) {
419
+ const looksProper = /^[A-Z]/.test(topic);
420
+ if (!looksProper)
421
+ return false;
422
+ }
184
423
  return true;
185
424
  }
425
+ const STOPWORDS = new Set([
426
+ 'the', 'and', 'for', 'with', 'that', 'this', 'from', 'are', 'was', 'will',
427
+ 'can', 'not', 'but', 'all', 'has', 'have', 'had', 'been', 'would', 'could',
428
+ 'should', 'may', 'might', 'must', 'shall', 'into', 'than', 'then', 'when',
429
+ 'where', 'which', 'while', 'about', 'after', 'before', 'between', 'under',
430
+ 'over', 'only', 'also', 'just', 'like', 'more', 'most', 'some', 'such',
431
+ 'each', 'every', 'both', 'either', 'neither', 'other', 'another',
432
+ 'true', 'false', 'null', 'none', 'yes', 'done', 'note', 'using',
433
+ 'first', 'still', 'instead', 'enable', 'default', 'since', 'based',
434
+ 'here', 'there', 'these', 'those', 'above', 'below', 'through',
435
+ ]);
436
+ const TECH_STOPWORDS = new Set([
437
+ 'example', 'section', 'configuration', 'implementation', 'method',
438
+ 'function', 'parameter', 'argument', 'option', 'value', 'result',
439
+ 'output', 'input', 'error', 'warning', 'status', 'type', 'string',
440
+ 'number', 'boolean', 'object', 'array', 'list', 'file', 'path',
441
+ 'name', 'version', 'update', 'change', 'create', 'delete', 'read',
442
+ 'write', 'server', 'client', 'request', 'response', 'source', 'model',
443
+ 'command', 'description', 'detail', 'content', 'window', 'provider',
444
+ 'module', 'package', 'import', 'export', 'return', 'class', 'interface',
445
+ 'property', 'field', 'table', 'column', 'index', 'query', 'schema',
446
+ 'handler', 'callback', 'promise', 'async', 'await', 'event', 'action',
447
+ 'state', 'props', 'component', 'render', 'route', 'endpoint', 'context',
448
+ 'scope', 'token', 'session', 'header', 'body', 'payload', 'message',
449
+ 'process', 'service', 'manager', 'factory', 'builder', 'helper',
450
+ 'utility', 'config', 'setting', 'feature', 'support', 'format',
451
+ 'connection', 'database', 'storage', 'cache', 'buffer', 'stream',
452
+ 'directory', 'folder', 'entry', 'record', 'document', 'resource',
453
+ ]);
186
454
  //# sourceMappingURL=audit-wiki.js.map