@pella-labs/pinakes 0.3.14 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -7
- package/dist/cli/audit-wiki.d.ts +45 -1
- package/dist/cli/audit-wiki.d.ts.map +1 -1
- package/dist/cli/audit-wiki.js +348 -80
- package/dist/cli/audit-wiki.js.map +1 -1
- package/dist/cli/claims.d.ts +49 -0
- package/dist/cli/claims.d.ts.map +1 -0
- package/dist/cli/claims.js +169 -0
- package/dist/cli/claims.js.map +1 -0
- package/dist/cli/contradiction.d.ts +46 -28
- package/dist/cli/contradiction.d.ts.map +1 -1
- package/dist/cli/contradiction.js +182 -115
- package/dist/cli/contradiction.js.map +1 -1
- package/dist/cli/index.js +4 -2
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/progress.d.ts +19 -0
- package/dist/cli/progress.d.ts.map +1 -0
- package/dist/cli/progress.js +44 -0
- package/dist/cli/progress.js.map +1 -0
- package/dist/cli/serve.js +1 -1
- package/dist/db/client.js +1 -1
- package/dist/db/migrations/0003_add_pinakes_claims.sql +13 -0
- package/dist/db/migrations/0004_add_confidence_score.sql +12 -0
- package/dist/db/migrations/meta/_journal.json +14 -0
- package/dist/db/schema.d.ts +161 -1
- package/dist/db/schema.d.ts.map +1 -1
- package/dist/db/schema.js +24 -1
- package/dist/db/schema.js.map +1 -1
- package/dist/gate/confidence.d.ts +82 -0
- package/dist/gate/confidence.d.ts.map +1 -0
- package/dist/gate/confidence.js +190 -0
- package/dist/gate/confidence.js.map +1 -0
- package/dist/ingest/ingester.d.ts.map +1 -1
- package/dist/ingest/ingester.js +4 -3
- package/dist/ingest/ingester.js.map +1 -1
- package/dist/ingest/repo-mirror.d.ts.map +1 -1
- package/dist/ingest/repo-mirror.js +5 -1
- package/dist/ingest/repo-mirror.js.map +1 -1
- package/dist/init/copy.d.ts.map +1 -1
- package/dist/init/copy.js +9 -0
- package/dist/init/copy.js.map +1 -1
- package/dist/init/scanner.js +7 -0
- package/dist/init/scanner.js.map +1 -1
- package/dist/llm/provider.d.ts.map +1 -1
- package/dist/llm/provider.js +19 -5
- package/dist/llm/provider.js.map +1 -1
- package/dist/mcp/tools/search.d.ts.map +1 -1
- package/dist/mcp/tools/search.js +2 -2
- package/dist/mcp/tools/search.js.map +1 -1
- package/dist/retrieval/fts.d.ts +1 -0
- package/dist/retrieval/fts.d.ts.map +1 -1
- package/dist/retrieval/fts.js +18 -2
- package/dist/retrieval/fts.js.map +1 -1
- package/dist/retrieval/hybrid.d.ts +1 -0
- package/dist/retrieval/hybrid.d.ts.map +1 -1
- package/dist/retrieval/hybrid.js +5 -0
- package/dist/retrieval/hybrid.js.map +1 -1
- package/dist/retrieval/vec.d.ts +1 -0
- package/dist/retrieval/vec.d.ts.map +1 -1
- package/dist/retrieval/vec.js +17 -2
- package/dist/retrieval/vec.js.map +1 -1
- package/dist/sandbox/bindings/pinakes.d.ts.map +1 -1
- package/dist/sandbox/bindings/pinakes.js +9 -2
- package/dist/sandbox/bindings/pinakes.js.map +1 -1
- package/package.json +30 -19
package/README.md
CHANGED
|
@@ -145,13 +145,14 @@ knowledge_query({
|
|
|
145
145
|
All data is stored under `~/.pinakes/` (override with `PINAKES_ROOT`). Project data lives at `~/.pinakes/projects/<mangled-path>/`.
|
|
146
146
|
|
|
147
147
|
```bash
|
|
148
|
-
npx @pella-labs/pinakes serve
|
|
149
|
-
npx @pella-labs/pinakes rebuild
|
|
150
|
-
npx @pella-labs/pinakes status
|
|
151
|
-
npx @pella-labs/pinakes audit
|
|
152
|
-
npx @pella-labs/pinakes
|
|
153
|
-
npx @pella-labs/pinakes
|
|
154
|
-
npx @pella-labs/pinakes
|
|
148
|
+
npx @pella-labs/pinakes serve [--wiki-path <dir>] # Start the stdio MCP server
|
|
149
|
+
npx @pella-labs/pinakes rebuild [--wiki-path <dir>] # Full rebuild from markdown
|
|
150
|
+
npx @pella-labs/pinakes status # Health check + row counts
|
|
151
|
+
npx @pella-labs/pinakes audit [--n 20] # Tail the audit log
|
|
152
|
+
npx @pella-labs/pinakes audit-wiki # Wiki audit (contradictions, gaps)
|
|
153
|
+
npx @pella-labs/pinakes purge --scope <s> --confirm # Delete a scope's DB
|
|
154
|
+
npx @pella-labs/pinakes export --scope <s> [--out f] # Dump nodes + edges as JSON
|
|
155
|
+
npx @pella-labs/pinakes import --scope <s> --in f # Restore from dump
|
|
155
156
|
```
|
|
156
157
|
|
|
157
158
|
## Embedder configuration
|
|
@@ -192,6 +193,14 @@ Changing the embedder requires a full rebuild (`pinakes rebuild`) since the vect
|
|
|
192
193
|
- **Deterministic IDs**: `sha1(scope + ':' + source_uri + ':' + section_path)` means re-indexing is idempotent
|
|
193
194
|
- **Centralized storage**: all data under `~/.pinakes/`, project paths mirrored as `~/.pinakes/projects/<mangled-path>/`
|
|
194
195
|
|
|
196
|
+
## Wiki auditing
|
|
197
|
+
|
|
198
|
+
Two paths for auditing your knowledge base:
|
|
199
|
+
|
|
200
|
+
**Claude Code users** — run `/audit-wiki` for a deep agent-powered audit. This runs the pipeline first, then has Claude read through wiki files to find cross-file contradictions, broken references, terminology inconsistencies, and stale info that the pipeline can't catch.
|
|
201
|
+
|
|
202
|
+
**All users** — run `npx @pella-labs/pinakes audit-wiki` (or `pnpm run pinakes -- audit-wiki` from source) for the deterministic pipeline audit. Produces `_audit-report.md` in the wiki directory with contradictions, documentation gaps, and health metrics. Requires an LLM provider (Ollama, API key, or `claude` CLI).
|
|
203
|
+
|
|
195
204
|
## Development
|
|
196
205
|
|
|
197
206
|
```bash
|
package/dist/cli/audit-wiki.d.ts
CHANGED
|
@@ -1,14 +1,58 @@
|
|
|
1
|
+
import type { Database as BetterSqliteDatabase } from 'better-sqlite3';
|
|
2
|
+
import { type GapRow } from '../gaps/detector.js';
|
|
3
|
+
import { type LlmProvider } from '../llm/provider.js';
|
|
1
4
|
import { type ContradictionResult } from './contradiction.js';
|
|
5
|
+
import { createProgressReporter } from './progress.js';
|
|
2
6
|
export interface WikiAuditOptions {
|
|
3
7
|
projectRoot?: string;
|
|
4
8
|
dbPath?: string;
|
|
5
9
|
scope?: 'project' | 'personal';
|
|
10
|
+
quiet?: boolean;
|
|
11
|
+
generateStubs?: boolean;
|
|
6
12
|
}
|
|
7
13
|
export interface WikiAuditResult {
|
|
8
14
|
contradictions: ContradictionResult;
|
|
9
15
|
gaps_found: number;
|
|
10
|
-
|
|
16
|
+
topology_gaps: number;
|
|
17
|
+
stubs_generated: number;
|
|
11
18
|
audit_report_path: string;
|
|
12
19
|
}
|
|
13
20
|
export declare function auditWikiCommand(opts: WikiAuditOptions): Promise<WikiAuditResult>;
|
|
21
|
+
export declare function llmFilterGaps(gaps: GapRow[], llmProvider: LlmProvider, progress?: {
|
|
22
|
+
tick: (label: string, detail?: string) => void;
|
|
23
|
+
}): Promise<GapRow[]>;
|
|
24
|
+
export declare function parseLlmFilterResponse(response: string): string[];
|
|
25
|
+
export interface TopologyGap {
|
|
26
|
+
topic: string;
|
|
27
|
+
in_degree: number;
|
|
28
|
+
source: 'graph-topology';
|
|
29
|
+
}
|
|
30
|
+
export declare function findTopologyGaps(reader: BetterSqliteDatabase, scope: string): TopologyGap[];
|
|
31
|
+
export interface GapContext {
|
|
32
|
+
topic: string;
|
|
33
|
+
mentions: Array<{
|
|
34
|
+
source_uri: string;
|
|
35
|
+
excerpt: string;
|
|
36
|
+
}>;
|
|
37
|
+
}
|
|
38
|
+
export declare function gatherGapContexts(reader: BetterSqliteDatabase, scope: string, gaps: GapRow[]): GapContext[];
|
|
39
|
+
export declare function generateSynthesisStubs(gaps: GapRow[], contexts: GapContext[], wikiRoot: string, llmProvider: LlmProvider, progress?: ReturnType<typeof createProgressReporter>): Promise<number>;
|
|
40
|
+
export interface HealthMetrics {
|
|
41
|
+
file_count: number;
|
|
42
|
+
chunk_count: number;
|
|
43
|
+
node_count: number;
|
|
44
|
+
edge_count: number;
|
|
45
|
+
}
|
|
46
|
+
export declare function getHealthMetrics(reader: BetterSqliteDatabase, scope: string): HealthMetrics;
|
|
47
|
+
/**
|
|
48
|
+
* Filter out noise from the gap detector (D42 Tier 1 tightening).
|
|
49
|
+
*
|
|
50
|
+
* Rejects topics that are:
|
|
51
|
+
* - Too short
|
|
52
|
+
* - URLs, file paths, qualified names
|
|
53
|
+
* - Code fragments (snake_case, camelCase, SCREAMING_SNAKE)
|
|
54
|
+
* - Common stopwords (English + technical)
|
|
55
|
+
* - Single-word generic terms that aren't proper nouns/acronyms
|
|
56
|
+
*/
|
|
57
|
+
export declare function isRealGap(topic: string): boolean;
|
|
14
58
|
//# sourceMappingURL=audit-wiki.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audit-wiki.d.ts","sourceRoot":"","sources":["../../src/cli/audit-wiki.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"audit-wiki.d.ts","sourceRoot":"","sources":["../../src/cli/audit-wiki.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,QAAQ,IAAI,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAGvE,OAAO,EAAa,KAAK,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAC7D,OAAO,EAAqB,KAAK,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAQzE,OAAO,EAAqB,KAAK,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACjF,OAAO,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC;AAgBvD,MAAM,WAAW,gBAAgB;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,SAAS,GAAG,UAAU,CAAC;IAC/B,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,eAAe;IAC9B,cAAc,EAAE,mBAAmB,CAAC;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC,CAkGvF;AAkBD,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EAAE,EACd,WAAW,EAAE,WAAW,EACxB,QAAQ,CAAC,EAAE;IAAE,IAAI,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,KAAK,IAAI,CAAA;CAAE,GAC5D,OAAO,CAAC,MAAM,EAAE,CAAC,CAgCnB;AAED,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,EAAE,CAYjE;AAMD,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,gBAAgB,CAAC;CAC1B;AAED,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,GACZ,WAAW,EAAE,CA0Bf;AAMD,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,KAAK,CAAC;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAC1D;AAED,wBAAgB,iBAAiB,CAC/B,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,EAAE,GACb,UAAU,EAAE,CA4Bd;AAeD,wBAAsB,sBAAsB,CAC1C,IAAI,EAAE,MAAM,EAAE,EACd,QAAQ,EAAE,UAAU,EAAE,EACtB,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,WAAW,EACxB,QAAQ,CAAC,EAAE,UAAU,CAAC,OAAO,sBAAsB,CAAC,GACnD,OAAO,CAAC,MAAM,CAAC,CAoDjB;AAmBD,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,GACZ,aAAa,CAoBf;AA6GD;;;;;;;;;GASG;AACH,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAgChD"}
|
package/dist/cli/audit-wiki.js
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import { writeFileSync } from 'node:fs';
|
|
2
|
-
import { join } from 'node:path';
|
|
1
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import { join, resolve } from 'node:path';
|
|
3
3
|
import { closeDb, openDb } from '../db/client.js';
|
|
4
4
|
import { queryGaps } from '../gaps/detector.js';
|
|
5
5
|
import { createLlmProvider } from '../llm/provider.js';
|
|
6
6
|
import { resolveAbs, projectWikiPath as defaultProjectWikiPath, projectDbPath as defaultProjectDbPath, personalWikiPath as defaultPersonalWikiPath, personalDbPath as defaultPersonalDbPath, } from '../paths.js';
|
|
7
7
|
import { contradictionScan } from './contradiction.js';
|
|
8
|
+
import { createProgressReporter } from './progress.js';
|
|
8
9
|
/**
|
|
9
10
|
* `pinakes audit-wiki` — LLM-powered wiki audit command.
|
|
10
11
|
*
|
|
@@ -16,7 +17,7 @@ import { contradictionScan } from './contradiction.js';
|
|
|
16
17
|
* error message if no provider is available.
|
|
17
18
|
*/
|
|
18
19
|
const GAP_MENTION_THRESHOLD = 10;
|
|
19
|
-
const MIN_TOPIC_LENGTH =
|
|
20
|
+
const MIN_TOPIC_LENGTH = 5; // filter out short tokens (D42 tightened from 4)
|
|
20
21
|
export async function auditWikiCommand(opts) {
|
|
21
22
|
const scope = opts.scope ?? 'project';
|
|
22
23
|
const projectRoot = resolveAbs(opts.projectRoot ?? process.cwd());
|
|
@@ -31,13 +32,13 @@ export async function auditWikiCommand(opts) {
|
|
|
31
32
|
const bundle = openDb(dbPath);
|
|
32
33
|
try {
|
|
33
34
|
const llmProvider = createLlmProvider();
|
|
35
|
+
const progress = createProgressReporter({ quiet: opts.quiet });
|
|
34
36
|
// eslint-disable-next-line no-console
|
|
35
37
|
console.log(`Running wiki audit (LLM provider: ${llmProvider.name})...`);
|
|
36
38
|
// 1. Contradiction scan (requires LLM provider)
|
|
37
39
|
let contradictions;
|
|
38
40
|
if (llmProvider.available()) {
|
|
39
|
-
|
|
40
|
-
console.log(' Scanning for contradictions...');
|
|
41
|
+
progress.startPhase('Phase 1/3: Scanning for contradictions', 1);
|
|
41
42
|
contradictions = await contradictionScan({
|
|
42
43
|
bundle,
|
|
43
44
|
scope,
|
|
@@ -45,41 +46,51 @@ export async function auditWikiCommand(opts) {
|
|
|
45
46
|
wikiRoot: wikiPath,
|
|
46
47
|
});
|
|
47
48
|
if (contradictions.rate_limited) {
|
|
48
|
-
|
|
49
|
-
console.log(' Contradiction scan rate-limited (last scan < 1h ago)');
|
|
49
|
+
progress.endPhase('Rate-limited (last scan < 1h ago)');
|
|
50
50
|
}
|
|
51
51
|
else {
|
|
52
|
-
|
|
53
|
-
console.log(` Scanned ${contradictions.scanned_pairs} pairs, found ${contradictions.contradictions.length} contradictions`);
|
|
52
|
+
progress.endPhase(`Scanned ${contradictions.scanned_pairs} pairs, found ${contradictions.contradictions.length} contradictions`);
|
|
54
53
|
}
|
|
55
54
|
}
|
|
56
55
|
else {
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
contradictions = { scanned_pairs: 0, contradictions: [], rate_limited: false };
|
|
56
|
+
progress.startPhase('Phase 1/3: Contradiction scan', 0);
|
|
57
|
+
progress.endPhase('Skipped (no LLM provider available)');
|
|
58
|
+
contradictions = { scanned_pairs: 0, topics_scanned: 0, claims_extracted: 0, contradictions: [], rate_limited: false };
|
|
60
59
|
}
|
|
61
|
-
// 2. Gap detection — filter
|
|
62
|
-
// eslint-disable-next-line no-console
|
|
63
|
-
console.log(' Checking for documentation gaps...');
|
|
60
|
+
// 2. Gap detection — syntactic filter + LLM filter + graph topology (D42)
|
|
64
61
|
const allGaps = queryGaps(bundle.writer, scope);
|
|
65
|
-
const
|
|
66
|
-
const significantGaps =
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
62
|
+
const syntacticGaps = allGaps.filter((g) => isRealGap(g.topic));
|
|
63
|
+
const significantGaps = syntacticGaps.filter((g) => g.mentions_count >= GAP_MENTION_THRESHOLD);
|
|
64
|
+
progress.startPhase('Phase 2/3: Filtering documentation gaps', allGaps.length);
|
|
65
|
+
// LLM batch filter (Tier 2)
|
|
66
|
+
let filteredGaps;
|
|
67
|
+
if (llmProvider.available() && significantGaps.length > 0) {
|
|
68
|
+
filteredGaps = await llmFilterGaps(significantGaps, llmProvider, progress);
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
filteredGaps = significantGaps;
|
|
72
|
+
}
|
|
73
|
+
// Add graph topology gaps (high in-degree, no dedicated page)
|
|
74
|
+
const topoGaps = findTopologyGaps(bundle.writer, scope);
|
|
75
|
+
progress.endPhase(`${allGaps.length} raw → ${syntacticGaps.length} syntactic → ${filteredGaps.length} LLM-filtered, ${topoGaps.length} topology gaps`);
|
|
76
|
+
// 3. Gather context for each confirmed gap
|
|
77
|
+
const gapContexts = gatherGapContexts(bundle.writer, scope, filteredGaps);
|
|
78
|
+
// 4. Opt-in synthesis stubs (D43 — --generate-stubs flag)
|
|
79
|
+
let stubsGenerated = 0;
|
|
80
|
+
if (opts.generateStubs && filteredGaps.length > 0 && llmProvider.available()) {
|
|
81
|
+
stubsGenerated = await generateSynthesisStubs(filteredGaps, gapContexts, wikiPath, llmProvider, progress);
|
|
82
|
+
}
|
|
83
|
+
// 5. Generate audit report (D46 restructured: contradictions, gaps, health)
|
|
84
|
+
const healthMetrics = getHealthMetrics(bundle.writer, scope);
|
|
75
85
|
const reportPath = join(wikiPath, '_audit-report.md');
|
|
76
|
-
writeAuditReport(reportPath, contradictions,
|
|
86
|
+
writeAuditReport(reportPath, contradictions, filteredGaps, topoGaps, gapContexts, healthMetrics, stubsGenerated);
|
|
77
87
|
// eslint-disable-next-line no-console
|
|
78
88
|
console.log(`\nAudit report written to: ${reportPath}`);
|
|
79
89
|
return {
|
|
80
90
|
contradictions,
|
|
81
|
-
gaps_found:
|
|
82
|
-
|
|
91
|
+
gaps_found: filteredGaps.length,
|
|
92
|
+
topology_gaps: topoGaps.length,
|
|
93
|
+
stubs_generated: stubsGenerated,
|
|
83
94
|
audit_report_path: reportPath,
|
|
84
95
|
};
|
|
85
96
|
}
|
|
@@ -88,58 +99,273 @@ export async function auditWikiCommand(opts) {
|
|
|
88
99
|
}
|
|
89
100
|
}
|
|
90
101
|
// ---------------------------------------------------------------------------
|
|
91
|
-
//
|
|
102
|
+
// LLM gap filtering (D42 Tier 2)
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
const GAP_FILTER_SYSTEM = `You are a documentation quality analyst. Given a list of terms extracted from a technical wiki, identify which represent real documentation topics that would benefit from a dedicated wiki page.
|
|
105
|
+
|
|
106
|
+
Return ONLY a JSON array of the real topics: ["topic1", "topic2", ...]
|
|
107
|
+
|
|
108
|
+
Filter out:
|
|
109
|
+
- Common words and generic technical terms
|
|
110
|
+
- Code syntax, variable names, file extensions
|
|
111
|
+
- Terms too specific or too vague to be standalone pages
|
|
112
|
+
- Terms that are part of larger concepts already documented`;
|
|
113
|
+
const LLM_FILTER_BATCH_SIZE = 50;
|
|
114
|
+
export async function llmFilterGaps(gaps, llmProvider, progress) {
|
|
115
|
+
const result = [];
|
|
116
|
+
for (let i = 0; i < gaps.length; i += LLM_FILTER_BATCH_SIZE) {
|
|
117
|
+
const batch = gaps.slice(i, i + LLM_FILTER_BATCH_SIZE);
|
|
118
|
+
const topics = batch.map((g) => g.topic);
|
|
119
|
+
try {
|
|
120
|
+
const response = await llmProvider.complete({
|
|
121
|
+
system: GAP_FILTER_SYSTEM,
|
|
122
|
+
prompt: `Filter these ${topics.length} terms:\n${JSON.stringify(topics)}`,
|
|
123
|
+
maxTokens: 1000,
|
|
124
|
+
});
|
|
125
|
+
const kept = parseLlmFilterResponse(response);
|
|
126
|
+
const keptSet = new Set(kept.map((t) => t.toLowerCase()));
|
|
127
|
+
for (const gap of batch) {
|
|
128
|
+
if (keptSet.has(gap.topic.toLowerCase())) {
|
|
129
|
+
result.push(gap);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
progress?.tick(`batch ${Math.floor(i / LLM_FILTER_BATCH_SIZE) + 1}`, `${kept.length}/${batch.length} kept`);
|
|
133
|
+
}
|
|
134
|
+
catch {
|
|
135
|
+
// LLM filter failed — keep all gaps in this batch (graceful degradation)
|
|
136
|
+
result.push(...batch);
|
|
137
|
+
progress?.tick(`batch ${Math.floor(i / LLM_FILTER_BATCH_SIZE) + 1}`, 'LLM filter failed, keeping all');
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
return result;
|
|
141
|
+
}
|
|
142
|
+
export function parseLlmFilterResponse(response) {
|
|
143
|
+
try {
|
|
144
|
+
const fenceMatch = response.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
|
|
145
|
+
const jsonStr = fenceMatch ? fenceMatch[1] : response;
|
|
146
|
+
const arrMatch = jsonStr.match(/\[[\s\S]*\]/);
|
|
147
|
+
if (!arrMatch)
|
|
148
|
+
return [];
|
|
149
|
+
const parsed = JSON.parse(arrMatch[0]);
|
|
150
|
+
if (!Array.isArray(parsed))
|
|
151
|
+
return [];
|
|
152
|
+
return parsed.filter((t) => typeof t === 'string');
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
return [];
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
export function findTopologyGaps(reader, scope) {
|
|
159
|
+
// Find nodes referenced by wikilink edges that don't have their own page
|
|
160
|
+
// We look for edge targets (dst_id) that appear frequently but
|
|
161
|
+
// whose corresponding node titles don't exist as dedicated pages
|
|
162
|
+
try {
|
|
163
|
+
const rows = reader
|
|
164
|
+
.prepare(`SELECT n.title, COUNT(*) as cnt
|
|
165
|
+
FROM pinakes_edges e
|
|
166
|
+
JOIN pinakes_nodes n ON e.dst_id = n.id
|
|
167
|
+
WHERE n.scope = ? AND e.edge_kind = ?
|
|
168
|
+
GROUP BY n.title
|
|
169
|
+
HAVING cnt >= 3
|
|
170
|
+
ORDER BY cnt DESC
|
|
171
|
+
LIMIT 20`)
|
|
172
|
+
.all(scope, 'wikilink');
|
|
173
|
+
return rows.map((r) => ({
|
|
174
|
+
topic: r.title ?? 'untitled',
|
|
175
|
+
in_degree: r.cnt,
|
|
176
|
+
source: 'graph-topology',
|
|
177
|
+
}));
|
|
178
|
+
}
|
|
179
|
+
catch {
|
|
180
|
+
return []; // Table might not have edges yet
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
export function gatherGapContexts(reader, scope, gaps) {
|
|
184
|
+
const contexts = [];
|
|
185
|
+
for (const gap of gaps.slice(0, 20)) {
|
|
186
|
+
try {
|
|
187
|
+
const mentions = reader
|
|
188
|
+
.prepare(`SELECT n.source_uri, c.text
|
|
189
|
+
FROM pinakes_chunks c
|
|
190
|
+
JOIN pinakes_nodes n ON c.node_id = n.id
|
|
191
|
+
WHERE n.scope = ? AND c.text LIKE '%' || ? || '%' COLLATE NOCASE
|
|
192
|
+
LIMIT 5`)
|
|
193
|
+
.all(scope, gap.topic);
|
|
194
|
+
contexts.push({
|
|
195
|
+
topic: gap.topic,
|
|
196
|
+
mentions: mentions.map((m) => ({
|
|
197
|
+
source_uri: m.source_uri,
|
|
198
|
+
excerpt: truncate(m.text, 200),
|
|
199
|
+
})),
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
catch {
|
|
203
|
+
// Non-fatal
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
return contexts;
|
|
207
|
+
}
|
|
208
|
+
// ---------------------------------------------------------------------------
|
|
209
|
+
// Synthesis stubs (D43 — opt-in via --generate-stubs)
|
|
92
210
|
// ---------------------------------------------------------------------------
|
|
93
|
-
|
|
94
|
-
|
|
211
|
+
const SYNTHESIS_SYSTEM = `You are a technical documentation writer. Based on the following excerpts from a knowledge wiki, write a concise wiki page about the given topic.
|
|
212
|
+
|
|
213
|
+
Rules:
|
|
214
|
+
- Include ONLY facts present in the excerpts
|
|
215
|
+
- Mark any inferences with "(inferred)"
|
|
216
|
+
- Format as markdown with a title (H1), summary paragraph, and relevant details
|
|
217
|
+
- Keep it under 500 words
|
|
218
|
+
- Output only the markdown content`;
|
|
219
|
+
export async function generateSynthesisStubs(gaps, contexts, wikiRoot, llmProvider, progress) {
|
|
220
|
+
const draftsDir = resolve(wikiRoot, '_audit-drafts');
|
|
221
|
+
mkdirSync(draftsDir, { recursive: true });
|
|
222
|
+
// Ensure _audit-drafts is gitignored
|
|
223
|
+
ensureGitignored(wikiRoot, '_audit-drafts/');
|
|
224
|
+
const MAX_STUBS = 20;
|
|
225
|
+
const toGenerate = gaps.slice(0, MAX_STUBS);
|
|
226
|
+
progress?.startPhase('Phase 3/3: Generating synthesis drafts', toGenerate.length);
|
|
227
|
+
let generated = 0;
|
|
228
|
+
for (const gap of toGenerate) {
|
|
229
|
+
const ctx = contexts.find((c) => c.topic === gap.topic);
|
|
230
|
+
if (!ctx || ctx.mentions.length === 0) {
|
|
231
|
+
progress?.tick(gap.topic, 'skipped (no context)');
|
|
232
|
+
continue;
|
|
233
|
+
}
|
|
234
|
+
const slug = gap.topic
|
|
235
|
+
.toLowerCase()
|
|
236
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
237
|
+
.replace(/(^-|-$)/g, '');
|
|
238
|
+
if (!slug) {
|
|
239
|
+
progress?.tick(gap.topic, 'skipped (invalid slug)');
|
|
240
|
+
continue;
|
|
241
|
+
}
|
|
242
|
+
const filePath = join(draftsDir, `${slug}.md`);
|
|
243
|
+
try {
|
|
244
|
+
const excerpts = ctx.mentions
|
|
245
|
+
.map((m) => `From ${m.source_uri}:\n${m.excerpt}`)
|
|
246
|
+
.join('\n\n');
|
|
247
|
+
const content = await llmProvider.complete({
|
|
248
|
+
system: SYNTHESIS_SYSTEM,
|
|
249
|
+
prompt: `Write a wiki page about "${gap.topic}" based on these excerpts:\n\n${excerpts}`,
|
|
250
|
+
maxTokens: 1000,
|
|
251
|
+
});
|
|
252
|
+
writeFileSync(filePath, content, 'utf-8');
|
|
253
|
+
generated++;
|
|
254
|
+
progress?.tick(gap.topic, 'draft created');
|
|
255
|
+
}
|
|
256
|
+
catch (err) {
|
|
257
|
+
progress?.tick(gap.topic, `failed: ${err instanceof Error ? err.message.slice(0, 60) : err}`);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
progress?.endPhase(`${generated} drafts written to _audit-drafts/`);
|
|
261
|
+
return generated;
|
|
262
|
+
}
|
|
263
|
+
function ensureGitignored(wikiRoot, entry) {
|
|
264
|
+
// Look for .gitignore in the .pinakes parent directory
|
|
265
|
+
const pinakesDir = resolve(wikiRoot, '..');
|
|
266
|
+
const gitignorePath = join(pinakesDir, '.gitignore');
|
|
267
|
+
if (!existsSync(gitignorePath))
|
|
268
|
+
return;
|
|
269
|
+
const content = readFileSync(gitignorePath, 'utf-8');
|
|
270
|
+
if (content.includes(entry))
|
|
271
|
+
return;
|
|
272
|
+
appendFileSync(gitignorePath, `\n${entry}\n`, 'utf-8');
|
|
273
|
+
}
|
|
274
|
+
export function getHealthMetrics(reader, scope) {
|
|
275
|
+
const fileCount = reader.prepare(`SELECT COUNT(DISTINCT source_uri) as c FROM pinakes_nodes WHERE scope = ?`).get(scope)?.c ?? 0;
|
|
276
|
+
const chunkCount = reader.prepare(`SELECT COUNT(*) as c FROM pinakes_chunks ch
|
|
277
|
+
JOIN pinakes_nodes n ON ch.node_id = n.id WHERE n.scope = ?`).get(scope)?.c ?? 0;
|
|
278
|
+
const nodeCount = reader.prepare(`SELECT COUNT(*) as c FROM pinakes_nodes WHERE scope = ?`).get(scope)?.c ?? 0;
|
|
279
|
+
const edgeCount = reader.prepare(`SELECT COUNT(*) as c FROM pinakes_edges e
|
|
280
|
+
JOIN pinakes_nodes n ON e.src_id = n.id WHERE n.scope = ?`).get(scope)?.c ?? 0;
|
|
281
|
+
return { file_count: fileCount, chunk_count: chunkCount, node_count: nodeCount, edge_count: edgeCount };
|
|
282
|
+
}
|
|
283
|
+
// ---------------------------------------------------------------------------
|
|
284
|
+
// Audit report (D46 restructured)
|
|
285
|
+
// ---------------------------------------------------------------------------
|
|
286
|
+
function writeAuditReport(reportPath, contradictions, filteredGaps, topoGaps, gapContexts, health, stubsGenerated = 0) {
|
|
95
287
|
const lines = [
|
|
96
288
|
'# Wiki Audit Report',
|
|
97
289
|
'',
|
|
98
290
|
`*Generated: ${new Date().toISOString()}*`,
|
|
99
291
|
'',
|
|
100
|
-
'## Summary',
|
|
101
|
-
'',
|
|
102
292
|
];
|
|
293
|
+
// Section 1: Contradictions
|
|
294
|
+
lines.push('## Contradictions');
|
|
295
|
+
lines.push('');
|
|
103
296
|
if (contradictions.rate_limited) {
|
|
104
|
-
lines.push('
|
|
297
|
+
lines.push('*Scan rate-limited (last scan < 1h ago)*');
|
|
105
298
|
}
|
|
106
|
-
else {
|
|
107
|
-
lines.push(
|
|
299
|
+
else if (contradictions.contradictions.length === 0) {
|
|
300
|
+
lines.push(`*No contradictions found (${contradictions.topics_scanned} topics, ${contradictions.claims_extracted} claims scanned)*`);
|
|
108
301
|
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
lines.push('');
|
|
112
|
-
if (contradictions.contradictions.length > 0) {
|
|
113
|
-
lines.push('## Contradictions');
|
|
302
|
+
else {
|
|
303
|
+
lines.push(`**${contradictions.contradictions.length} contradictions found** (${contradictions.topics_scanned} topics scanned)`);
|
|
114
304
|
lines.push('');
|
|
115
305
|
for (const c of contradictions.contradictions) {
|
|
116
|
-
lines.push(`### ${c.
|
|
306
|
+
lines.push(`### ${c.topic}`);
|
|
117
307
|
lines.push('');
|
|
118
|
-
lines.push(`-
|
|
119
|
-
lines.push(`-
|
|
120
|
-
lines.push(`-
|
|
121
|
-
lines.push(`- Chunk B: *"${truncate(c.chunkB.text, 150)}"*`);
|
|
308
|
+
lines.push(`- **${c.claimA.source_uri}**: "${truncate(c.claimA.claim, 150)}"`);
|
|
309
|
+
lines.push(`- **${c.claimB.source_uri}**: "${truncate(c.claimB.claim, 150)}"`);
|
|
310
|
+
lines.push(`- **Why**: ${c.explanation} *(${c.confidence} confidence)*`);
|
|
122
311
|
lines.push('');
|
|
123
312
|
}
|
|
124
313
|
}
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
314
|
+
lines.push('');
|
|
315
|
+
// Section 2: Documentation Gaps
|
|
316
|
+
lines.push('## Documentation Gaps');
|
|
317
|
+
lines.push('');
|
|
318
|
+
if (filteredGaps.length === 0 && topoGaps.length === 0) {
|
|
319
|
+
lines.push('*No significant gaps found*');
|
|
320
|
+
}
|
|
321
|
+
else {
|
|
322
|
+
if (filteredGaps.length > 0) {
|
|
323
|
+
lines.push(`### By mention frequency (${filteredGaps.length} topics)`);
|
|
324
|
+
lines.push('');
|
|
325
|
+
lines.push('| Topic | Mentions | Context |');
|
|
326
|
+
lines.push('|---|---|---|');
|
|
327
|
+
for (const g of filteredGaps) {
|
|
328
|
+
const ctx = gapContexts.find((c) => c.topic === g.topic);
|
|
329
|
+
const ctxSummary = ctx?.mentions.length
|
|
330
|
+
? `Referenced in ${ctx.mentions.map((m) => m.source_uri).join(', ')}`
|
|
331
|
+
: '';
|
|
332
|
+
lines.push(`| ${g.topic} | ${g.mentions_count} | ${ctxSummary} |`);
|
|
333
|
+
}
|
|
334
|
+
lines.push('');
|
|
133
335
|
}
|
|
336
|
+
if (topoGaps.length > 0) {
|
|
337
|
+
lines.push(`### By link topology (${topoGaps.length} topics)`);
|
|
338
|
+
lines.push('');
|
|
339
|
+
lines.push('| Topic | In-degree |');
|
|
340
|
+
lines.push('|---|---|');
|
|
341
|
+
for (const g of topoGaps) {
|
|
342
|
+
lines.push(`| ${g.topic} | ${g.in_degree} |`);
|
|
343
|
+
}
|
|
344
|
+
lines.push('');
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
lines.push('');
|
|
348
|
+
// Section 3: Health Metrics
|
|
349
|
+
lines.push('## Health Metrics');
|
|
350
|
+
lines.push('');
|
|
351
|
+
lines.push(`| Metric | Value |`);
|
|
352
|
+
lines.push(`|---|---|`);
|
|
353
|
+
lines.push(`| Files | ${health.file_count} |`);
|
|
354
|
+
lines.push(`| Nodes | ${health.node_count} |`);
|
|
355
|
+
lines.push(`| Chunks | ${health.chunk_count} |`);
|
|
356
|
+
lines.push(`| Edges | ${health.edge_count} |`);
|
|
357
|
+
lines.push('');
|
|
358
|
+
// Generated drafts section
|
|
359
|
+
if (stubsGenerated > 0) {
|
|
360
|
+
lines.push('## Generated Drafts');
|
|
361
|
+
lines.push('');
|
|
362
|
+
lines.push(`${stubsGenerated} synthesis drafts written to \`_audit-drafts/\`. Review before promoting to wiki.`);
|
|
134
363
|
lines.push('');
|
|
135
364
|
}
|
|
136
|
-
|
|
137
|
-
lines.push('
|
|
365
|
+
else {
|
|
366
|
+
lines.push('---');
|
|
138
367
|
lines.push('');
|
|
139
|
-
|
|
140
|
-
const name = p.split('/').pop() ?? p;
|
|
141
|
-
lines.push(`- [[${name.replace('.md', '')}]]`);
|
|
142
|
-
}
|
|
368
|
+
lines.push('*Run with `--generate-stubs` to auto-generate draft pages for gaps.*');
|
|
143
369
|
lines.push('');
|
|
144
370
|
}
|
|
145
371
|
writeFileSync(reportPath, lines.join('\n'), 'utf-8');
|
|
@@ -150,12 +376,16 @@ function truncate(s, maxLen) {
|
|
|
150
376
|
return s.slice(0, maxLen) + '...';
|
|
151
377
|
}
|
|
152
378
|
/**
|
|
153
|
-
* Filter out noise from the gap detector
|
|
154
|
-
*
|
|
155
|
-
*
|
|
156
|
-
* -
|
|
379
|
+
* Filter out noise from the gap detector (D42 Tier 1 tightening).
|
|
380
|
+
*
|
|
381
|
+
* Rejects topics that are:
|
|
382
|
+
* - Too short
|
|
383
|
+
* - URLs, file paths, qualified names
|
|
384
|
+
* - Code fragments (snake_case, camelCase, SCREAMING_SNAKE)
|
|
385
|
+
* - Common stopwords (English + technical)
|
|
386
|
+
* - Single-word generic terms that aren't proper nouns/acronyms
|
|
157
387
|
*/
|
|
158
|
-
function isRealGap(topic) {
|
|
388
|
+
export function isRealGap(topic) {
|
|
159
389
|
if (topic.length < MIN_TOPIC_LENGTH)
|
|
160
390
|
return false;
|
|
161
391
|
// Skip URLs, file paths, code fragments
|
|
@@ -163,24 +393,62 @@ function isRealGap(topic) {
|
|
|
163
393
|
return false;
|
|
164
394
|
if (topic.includes('://'))
|
|
165
395
|
return false;
|
|
166
|
-
//
|
|
167
|
-
if (
|
|
396
|
+
// Qualified names (e.g., "fs.readFileSync", "path.join")
|
|
397
|
+
if (topic.includes('.') && !topic.includes(' '))
|
|
168
398
|
return false;
|
|
169
|
-
// Skip
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
'can', 'not', 'but', 'all', 'has', 'have', 'had', 'been', 'would', 'could',
|
|
173
|
-
'should', 'may', 'might', 'must', 'shall', 'into', 'than', 'then', 'when',
|
|
174
|
-
'where', 'which', 'while', 'about', 'after', 'before', 'between', 'under',
|
|
175
|
-
'over', 'only', 'also', 'just', 'like', 'more', 'most', 'some', 'such',
|
|
176
|
-
'each', 'every', 'both', 'either', 'neither', 'other', 'another',
|
|
177
|
-
'true', 'false', 'null', 'none', 'yes', 'done',
|
|
178
|
-
]);
|
|
179
|
-
if (stopwords.has(topic.toLowerCase()))
|
|
399
|
+
// Skip code-like patterns
|
|
400
|
+
// snake_case: all lowercase with underscores
|
|
401
|
+
if (/^[a-z_]+$/.test(topic) && topic.includes('_'))
|
|
180
402
|
return false;
|
|
181
|
-
//
|
|
182
|
-
if (!topic.includes(' ')
|
|
403
|
+
// camelCase: starts lowercase then has uppercase
|
|
404
|
+
if (/^[a-z]+[A-Z]/.test(topic) && !topic.includes(' '))
|
|
183
405
|
return false;
|
|
406
|
+
// SCREAMING_SNAKE_CASE
|
|
407
|
+
if (/^[A-Z][A-Z0-9_]+$/.test(topic))
|
|
408
|
+
return false;
|
|
409
|
+
const lower = topic.toLowerCase();
|
|
410
|
+
// Skip common English stopwords
|
|
411
|
+
if (STOPWORDS.has(lower))
|
|
412
|
+
return false;
|
|
413
|
+
// Skip common technical terms that aren't real topics
|
|
414
|
+
if (TECH_STOPWORDS.has(lower))
|
|
415
|
+
return false;
|
|
416
|
+
// Single-word topics: only keep proper nouns/acronyms (starts with uppercase
|
|
417
|
+
// or is all-caps like "OAuth2", "Docker", "PostgreSQL")
|
|
418
|
+
if (!topic.includes(' ')) {
|
|
419
|
+
const looksProper = /^[A-Z]/.test(topic);
|
|
420
|
+
if (!looksProper)
|
|
421
|
+
return false;
|
|
422
|
+
}
|
|
184
423
|
return true;
|
|
185
424
|
}
|
|
425
|
+
const STOPWORDS = new Set([
|
|
426
|
+
'the', 'and', 'for', 'with', 'that', 'this', 'from', 'are', 'was', 'will',
|
|
427
|
+
'can', 'not', 'but', 'all', 'has', 'have', 'had', 'been', 'would', 'could',
|
|
428
|
+
'should', 'may', 'might', 'must', 'shall', 'into', 'than', 'then', 'when',
|
|
429
|
+
'where', 'which', 'while', 'about', 'after', 'before', 'between', 'under',
|
|
430
|
+
'over', 'only', 'also', 'just', 'like', 'more', 'most', 'some', 'such',
|
|
431
|
+
'each', 'every', 'both', 'either', 'neither', 'other', 'another',
|
|
432
|
+
'true', 'false', 'null', 'none', 'yes', 'done', 'note', 'using',
|
|
433
|
+
'first', 'still', 'instead', 'enable', 'default', 'since', 'based',
|
|
434
|
+
'here', 'there', 'these', 'those', 'above', 'below', 'through',
|
|
435
|
+
]);
|
|
436
|
+
const TECH_STOPWORDS = new Set([
|
|
437
|
+
'example', 'section', 'configuration', 'implementation', 'method',
|
|
438
|
+
'function', 'parameter', 'argument', 'option', 'value', 'result',
|
|
439
|
+
'output', 'input', 'error', 'warning', 'status', 'type', 'string',
|
|
440
|
+
'number', 'boolean', 'object', 'array', 'list', 'file', 'path',
|
|
441
|
+
'name', 'version', 'update', 'change', 'create', 'delete', 'read',
|
|
442
|
+
'write', 'server', 'client', 'request', 'response', 'source', 'model',
|
|
443
|
+
'command', 'description', 'detail', 'content', 'window', 'provider',
|
|
444
|
+
'module', 'package', 'import', 'export', 'return', 'class', 'interface',
|
|
445
|
+
'property', 'field', 'table', 'column', 'index', 'query', 'schema',
|
|
446
|
+
'handler', 'callback', 'promise', 'async', 'await', 'event', 'action',
|
|
447
|
+
'state', 'props', 'component', 'render', 'route', 'endpoint', 'context',
|
|
448
|
+
'scope', 'token', 'session', 'header', 'body', 'payload', 'message',
|
|
449
|
+
'process', 'service', 'manager', 'factory', 'builder', 'helper',
|
|
450
|
+
'utility', 'config', 'setting', 'feature', 'support', 'format',
|
|
451
|
+
'connection', 'database', 'storage', 'cache', 'buffer', 'stream',
|
|
452
|
+
'directory', 'folder', 'entry', 'record', 'document', 'resource',
|
|
453
|
+
]);
|
|
186
454
|
//# sourceMappingURL=audit-wiki.js.map
|