crewly 1.8.8 → 1.8.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/roles/_common/memory-instructions.md +6 -5
- package/config/roles/_common/wiki-instructions.md +49 -0
- package/config/roles/architect/prompt.md +2 -2
- package/config/roles/backend-developer/prompt.md +2 -2
- package/config/roles/designer/prompt.md +2 -2
- package/config/roles/developer/prompt.md +2 -2
- package/config/roles/frontend-developer/prompt.md +2 -2
- package/config/roles/fullstack-dev/prompt.md +2 -2
- package/config/roles/generalist/prompt.md +2 -2
- package/config/roles/ops/prompt.md +2 -2
- package/config/roles/orchestrator/prompt.md +135 -11
- package/config/roles/product-manager/prompt.md +2 -2
- package/config/roles/qa/prompt.md +2 -2
- package/config/roles/qa-engineer/prompt.md +2 -2
- package/config/roles/researcher/prompt.md +15 -6
- package/config/roles/sales/prompt.md +2 -2
- package/config/roles/support/prompt.md +2 -2
- package/config/roles/team-leader/prompt.md +17 -2
- package/config/roles/tpm/prompt.md +2 -2
- package/config/roles/ux-designer/prompt.md +2 -2
- package/config/skills/orchestrator/wiki-cleanup/SKILL.md +89 -0
- package/config/skills/orchestrator/wiki-cleanup/execute.sh +139 -0
- package/config/skills/orchestrator/wiki-lint/SKILL.md +75 -0
- package/config/skills/orchestrator/wiki-lint/execute.sh +66 -0
- package/config/skills/orchestrator/wiki-migrate/SKILL.md +103 -0
- package/config/skills/orchestrator/wiki-migrate/execute.sh +82 -0
- package/config/skills/orchestrator/wiki-process-queue/SKILL.md +9 -1
- package/dist/backend/backend/src/constants.d.ts +12 -0
- package/dist/backend/backend/src/constants.d.ts.map +1 -1
- package/dist/backend/backend/src/constants.js +12 -0
- package/dist/backend/backend/src/constants.js.map +1 -1
- package/dist/backend/backend/src/controllers/browser/browser.controller.d.ts.map +1 -1
- package/dist/backend/backend/src/controllers/browser/browser.controller.js +17 -0
- package/dist/backend/backend/src/controllers/browser/browser.controller.js.map +1 -1
- package/dist/backend/backend/src/controllers/cloud/cloud.controller.d.ts.map +1 -1
- package/dist/backend/backend/src/controllers/cloud/cloud.controller.js +8 -1
- package/dist/backend/backend/src/controllers/cloud/cloud.controller.js.map +1 -1
- package/dist/backend/backend/src/controllers/task-pool/task-pool.controller.d.ts +18 -0
- package/dist/backend/backend/src/controllers/task-pool/task-pool.controller.d.ts.map +1 -1
- package/dist/backend/backend/src/controllers/task-pool/task-pool.controller.js +63 -0
- package/dist/backend/backend/src/controllers/task-pool/task-pool.controller.js.map +1 -1
- package/dist/backend/backend/src/controllers/task-pool/task-pool.routes.d.ts.map +1 -1
- package/dist/backend/backend/src/controllers/task-pool/task-pool.routes.js +5 -1
- package/dist/backend/backend/src/controllers/task-pool/task-pool.routes.js.map +1 -1
- package/dist/backend/backend/src/controllers/wiki/wiki.controller.d.ts +109 -0
- package/dist/backend/backend/src/controllers/wiki/wiki.controller.d.ts.map +1 -1
- package/dist/backend/backend/src/controllers/wiki/wiki.controller.js +418 -4
- package/dist/backend/backend/src/controllers/wiki/wiki.controller.js.map +1 -1
- package/dist/backend/backend/src/controllers/wiki/wiki.routes.d.ts.map +1 -1
- package/dist/backend/backend/src/controllers/wiki/wiki.routes.js +11 -1
- package/dist/backend/backend/src/controllers/wiki/wiki.routes.js.map +1 -1
- package/dist/backend/backend/src/index.d.ts.map +1 -1
- package/dist/backend/backend/src/index.js +79 -7
- package/dist/backend/backend/src/index.js.map +1 -1
- package/dist/backend/backend/src/index.js.orc-bak-20260529 +3130 -0
- package/dist/backend/backend/src/services/ai/prompt-builder.service.js +1 -1
- package/dist/backend/backend/src/services/browser/browser-bridge.service.d.ts.map +1 -1
- package/dist/backend/backend/src/services/browser/browser-bridge.service.js +15 -29
- package/dist/backend/backend/src/services/browser/browser-bridge.service.js.map +1 -1
- package/dist/backend/backend/src/services/browser/browser-proxy.service.d.ts +97 -1
- package/dist/backend/backend/src/services/browser/browser-proxy.service.d.ts.map +1 -1
- package/dist/backend/backend/src/services/browser/browser-proxy.service.js +174 -15
- package/dist/backend/backend/src/services/browser/browser-proxy.service.js.map +1 -1
- package/dist/backend/backend/src/services/browser/browser-relay-adapter.service.d.ts +12 -4
- package/dist/backend/backend/src/services/browser/browser-relay-adapter.service.d.ts.map +1 -1
- package/dist/backend/backend/src/services/browser/browser-relay-adapter.service.js +17 -5
- package/dist/backend/backend/src/services/browser/browser-relay-adapter.service.js.map +1 -1
- package/dist/backend/backend/src/services/cloud/cloud-client.service.d.ts +75 -0
- package/dist/backend/backend/src/services/cloud/cloud-client.service.d.ts.map +1 -1
- package/dist/backend/backend/src/services/cloud/cloud-client.service.js +164 -12
- package/dist/backend/backend/src/services/cloud/cloud-client.service.js.map +1 -1
- package/dist/backend/backend/src/services/reconciler/reconciler-data-provider.d.ts.map +1 -1
- package/dist/backend/backend/src/services/reconciler/reconciler-data-provider.js +50 -0
- package/dist/backend/backend/src/services/reconciler/reconciler-data-provider.js.map +1 -1
- package/dist/backend/backend/src/services/task-pool/task-pool.service.d.ts +19 -0
- package/dist/backend/backend/src/services/task-pool/task-pool.service.d.ts.map +1 -1
- package/dist/backend/backend/src/services/task-pool/task-pool.service.js +45 -0
- package/dist/backend/backend/src/services/task-pool/task-pool.service.js.map +1 -1
- package/dist/backend/backend/src/services/v3/agent-auto-claim.service.d.ts.map +1 -1
- package/dist/backend/backend/src/services/v3/agent-auto-claim.service.js +34 -1
- package/dist/backend/backend/src/services/v3/agent-auto-claim.service.js.map +1 -1
- package/dist/backend/backend/src/services/wiki/wiki-backlinks.service.d.ts +72 -0
- package/dist/backend/backend/src/services/wiki/wiki-backlinks.service.d.ts.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-backlinks.service.js +186 -0
- package/dist/backend/backend/src/services/wiki/wiki-backlinks.service.js.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-bookkeep-trigger.service.d.ts +4 -1
- package/dist/backend/backend/src/services/wiki/wiki-bookkeep-trigger.service.d.ts.map +1 -1
- package/dist/backend/backend/src/services/wiki/wiki-bookkeep-trigger.service.js +24 -1
- package/dist/backend/backend/src/services/wiki/wiki-bookkeep-trigger.service.js.map +1 -1
- package/dist/backend/backend/src/services/wiki/wiki-cleanup.service.d.ts +160 -0
- package/dist/backend/backend/src/services/wiki/wiki-cleanup.service.d.ts.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-cleanup.service.js +399 -0
- package/dist/backend/backend/src/services/wiki/wiki-cleanup.service.js.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-lint.service.d.ts +182 -0
- package/dist/backend/backend/src/services/wiki/wiki-lint.service.d.ts.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-lint.service.js +505 -0
- package/dist/backend/backend/src/services/wiki/wiki-lint.service.js.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-migrate.service.d.ts +232 -0
- package/dist/backend/backend/src/services/wiki/wiki-migrate.service.d.ts.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-migrate.service.js +1416 -0
- package/dist/backend/backend/src/services/wiki/wiki-migrate.service.js.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-recent.service.d.ts +51 -0
- package/dist/backend/backend/src/services/wiki/wiki-recent.service.d.ts.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-recent.service.js +102 -0
- package/dist/backend/backend/src/services/wiki/wiki-recent.service.js.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-reflect-trigger.service.d.ts +84 -0
- package/dist/backend/backend/src/services/wiki/wiki-reflect-trigger.service.d.ts.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-reflect-trigger.service.js +156 -0
- package/dist/backend/backend/src/services/wiki/wiki-reflect-trigger.service.js.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-search.service.d.ts +90 -0
- package/dist/backend/backend/src/services/wiki/wiki-search.service.d.ts.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-search.service.js +190 -0
- package/dist/backend/backend/src/services/wiki/wiki-search.service.js.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-workitem-bridge.service.d.ts +164 -0
- package/dist/backend/backend/src/services/wiki/wiki-workitem-bridge.service.d.ts.map +1 -0
- package/dist/backend/backend/src/services/wiki/wiki-workitem-bridge.service.js +675 -0
- package/dist/backend/backend/src/services/wiki/wiki-workitem-bridge.service.js.map +1 -0
- package/dist/backend/backend/src/services/workflow/cron-task.service.d.ts.map +1 -1
- package/dist/backend/backend/src/services/workflow/cron-task.service.js +65 -0
- package/dist/backend/backend/src/services/workflow/cron-task.service.js.map +1 -1
- package/dist/backend/backend/src/types/cron-task.types.d.ts +16 -1
- package/dist/backend/backend/src/types/cron-task.types.d.ts.map +1 -1
- package/dist/cli/backend/src/constants.d.ts +12 -0
- package/dist/cli/backend/src/constants.d.ts.map +1 -1
- package/dist/cli/backend/src/constants.js +12 -0
- package/dist/cli/backend/src/constants.js.map +1 -1
- package/dist/cli/backend/src/services/task-pool/task-pool.service.d.ts +19 -0
- package/dist/cli/backend/src/services/task-pool/task-pool.service.d.ts.map +1 -1
- package/dist/cli/backend/src/services/task-pool/task-pool.service.js +45 -0
- package/dist/cli/backend/src/services/task-pool/task-pool.service.js.map +1 -1
- package/frontend/dist/assets/{index-db3f5041.css → index-068bb4f6.css} +10 -1
- package/frontend/dist/assets/index-c24ceb15.js +4960 -0
- package/frontend/dist/index.html +2 -2
- package/package.json +1 -1
- package/config/skills/agent/core/query-knowledge/SKILL.md +0 -87
- package/config/skills/agent/core/query-knowledge/execute.sh +0 -30
- package/config/skills/orchestrator/query-knowledge/SKILL.md +0 -75
- package/config/skills/orchestrator/query-knowledge/execute.sh +0 -30
- package/frontend/dist/assets/index-cc115bb4.js +0 -4926
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WikiLintService — deterministic validation pass over a vault.
|
|
3
|
+
*
|
|
4
|
+
* Per v2.1 spec §3, the third Phase 1 skill (alongside `wiki-ingest` and
|
|
5
|
+
* `wiki-query`). Unlike `wiki-bookkeep` (vault HEALTH metrics — counts,
|
|
6
|
+
* recent activity, duplicate clusters), lint focuses on CORRECTNESS:
|
|
7
|
+
*
|
|
8
|
+
* - **frozenPathRespected** — no markdown content in folders flagged
|
|
9
|
+
* `frozen: true` in SCHEMA.md (other than the SCHEMA.md itself /
|
|
10
|
+
* legitimate frozen content). Lint refuses to alter frozen paths.
|
|
11
|
+
* - **missingEntities** — `[[wikilinks]]` that don't resolve to any
|
|
12
|
+
* page in the vault. Either the target was renamed, deleted, or the
|
|
13
|
+
* wikilink was made up. The lint flags them; the agent decides.
|
|
14
|
+
* - **orphanPages** — pages with zero incoming wikilinks, excluding
|
|
15
|
+
* seed pages (log.md, index.md, README*). These are candidates for
|
|
16
|
+
* either deletion or new linking.
|
|
17
|
+
* - **staleClaims** — files un-touched for `staleDays` (default 90).
|
|
18
|
+
* - **restructureProposals** — heuristics for llm-curated/ only: large
|
|
19
|
+
* un-indexed folders, near-duplicate filenames.
|
|
20
|
+
*
|
|
21
|
+
* The service never writes. The agent's LLM reads the report and decides
|
|
22
|
+
* whether to ingest a consolidation, archive stale pages, etc.
|
|
23
|
+
*
|
|
24
|
+
* @module services/wiki/wiki-lint.service
|
|
25
|
+
*/
|
|
26
|
+
/** Default age threshold (days) for marking a page stale. */
|
|
27
|
+
export declare const WIKI_LINT_DEFAULT_STALE_DAYS = 90;
|
|
28
|
+
/** Folder size threshold above which lint proposes an `index.md` rollup. */
|
|
29
|
+
export declare const WIKI_LINT_ROLLUP_THRESHOLD = 20;
|
|
30
|
+
/** Max number of pages walked per lint pass. */
|
|
31
|
+
export declare const WIKI_LINT_MAX_PAGES = 1000;
|
|
32
|
+
/** Cap per category to keep the payload bounded. */
|
|
33
|
+
export declare const WIKI_LINT_MAX_ROWS_PER_SECTION = 50;
|
|
34
|
+
/** Pages with these basenames never count as "orphans". */
|
|
35
|
+
export declare const WIKI_LINT_SEED_BASENAMES: Set<string>;
|
|
36
|
+
/**
|
|
37
|
+
* A `[[wikilink]]` target is "concept-shaped" (vs a dangling typo) when
|
|
38
|
+
* it appears at least this many times across the vault. Below this, the
|
|
39
|
+
* unresolved target is just noise; at or above this, the concept is
|
|
40
|
+
* load-bearing and deserves its own page.
|
|
41
|
+
*/
|
|
42
|
+
export declare const WIKI_LINT_MISSING_CONCEPT_THRESHOLD = 3;
|
|
43
|
+
export interface WikiLintInput {
|
|
44
|
+
vaultPath: string;
|
|
45
|
+
staleDays?: number;
|
|
46
|
+
}
|
|
47
|
+
export interface WikiLintFrozenViolation {
|
|
48
|
+
/** Relative path of the offending file. */
|
|
49
|
+
path: string;
|
|
50
|
+
/** The frozen folder it lives in. */
|
|
51
|
+
frozenFolder: string;
|
|
52
|
+
}
|
|
53
|
+
export interface WikiLintMissingEntity {
|
|
54
|
+
/** Page that contains the dangling wikilink. */
|
|
55
|
+
sourcePath: string;
|
|
56
|
+
/** The unresolved wikilink target. */
|
|
57
|
+
target: string;
|
|
58
|
+
/** Line number in the source page (1-based). */
|
|
59
|
+
lineNumber: number;
|
|
60
|
+
}
|
|
61
|
+
export interface WikiLintRestructureProposal {
|
|
62
|
+
/** Plain-English description of the proposal, e.g. "folder X has 31 pages without an index.md — propose llm-curated/X/index.md". */
|
|
63
|
+
description: string;
|
|
64
|
+
/** Optional related path (folder or page). */
|
|
65
|
+
path?: string;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* A wikilink target that appears repeatedly across the vault but has no
|
|
69
|
+
* dedicated page. Karpathy's lint contract names this explicitly:
|
|
70
|
+
* *"important concepts mentioned but lacking their own page."* Different
|
|
71
|
+
* from `missingEntities` (which is one-off dangling refs / typos):
|
|
72
|
+
* `missingConcepts` is **frequency-weighted** signal that the team keeps
|
|
73
|
+
* mentioning a thing → write a page for it.
|
|
74
|
+
*/
|
|
75
|
+
export interface WikiLintMissingConcept {
|
|
76
|
+
/** The wikilink target referenced (e.g. `verify-output`). */
|
|
77
|
+
target: string;
|
|
78
|
+
/** How many distinct wikilink occurrences resolved to nothing. */
|
|
79
|
+
referenceCount: number;
|
|
80
|
+
/** Sample sources (capped) that reference this concept. */
|
|
81
|
+
sources: string[];
|
|
82
|
+
}
|
|
83
|
+
export interface WikiLintReport {
|
|
84
|
+
vault: {
|
|
85
|
+
scope: string;
|
|
86
|
+
id: string;
|
|
87
|
+
path: string;
|
|
88
|
+
};
|
|
89
|
+
generatedAt: string;
|
|
90
|
+
staleDays: number;
|
|
91
|
+
frozenPathRespected: boolean;
|
|
92
|
+
frozenViolations: WikiLintFrozenViolation[];
|
|
93
|
+
missingEntities: WikiLintMissingEntity[];
|
|
94
|
+
/** New 2026-05-26: frequency-weighted "missing concept" signals. */
|
|
95
|
+
missingConcepts: WikiLintMissingConcept[];
|
|
96
|
+
orphanPages: string[];
|
|
97
|
+
staleClaims: string[];
|
|
98
|
+
restructureProposals: WikiLintRestructureProposal[];
|
|
99
|
+
truncated: boolean;
|
|
100
|
+
}
|
|
101
|
+
export type WikiLintOutcome = {
|
|
102
|
+
ok: true;
|
|
103
|
+
report: WikiLintReport;
|
|
104
|
+
} | {
|
|
105
|
+
ok: false;
|
|
106
|
+
reason: 'vault_missing' | 'schema_missing' | 'invalid_input';
|
|
107
|
+
message: string;
|
|
108
|
+
};
|
|
109
|
+
/**
|
|
110
|
+
* Singleton lint service. Stateless — pattern mirrors the other wiki
|
|
111
|
+
* services so the controller wiring is uniform.
|
|
112
|
+
*/
|
|
113
|
+
export declare class WikiLintService {
|
|
114
|
+
private static instance;
|
|
115
|
+
private readonly logger;
|
|
116
|
+
private readonly schemaLoader;
|
|
117
|
+
private constructor();
|
|
118
|
+
static getInstance(): WikiLintService;
|
|
119
|
+
static resetInstance(): void;
|
|
120
|
+
/**
|
|
121
|
+
* Generate a lint report for the given vault.
|
|
122
|
+
*/
|
|
123
|
+
generate(input: WikiLintInput): Promise<WikiLintOutcome>;
|
|
124
|
+
/**
|
|
125
|
+
* Recursive walk collecting `.md` files. Honors `WIKI_LINT_MAX_PAGES`.
|
|
126
|
+
*/
|
|
127
|
+
private collectFiles;
|
|
128
|
+
/**
|
|
129
|
+
* Find `.md` files that live inside any frozen folder. SCHEMA.md and
|
|
130
|
+
* legitimate sibling files inside frozen dirs (e.g. `sop/<role>.md`)
|
|
131
|
+
* are excluded — frozen folders ARE allowed to contain content; what's
|
|
132
|
+
* NOT allowed is `wiki-ingest` writing INTO them. That's a runtime
|
|
133
|
+
* check elsewhere. Lint surfaces files only when they look like they
|
|
134
|
+
* were created by ingest patterns (timestamp-prefixed slugs).
|
|
135
|
+
*/
|
|
136
|
+
private detectFrozenViolations;
|
|
137
|
+
/**
|
|
138
|
+
* One read pass per file: pulls `[[wikilink]]` references, builds the
|
|
139
|
+
* inverse "incoming" map for orphan detection, and emits missing-entity
|
|
140
|
+
* rows for any link that resolves to nothing.
|
|
141
|
+
*/
|
|
142
|
+
private scanWikilinks;
|
|
143
|
+
/**
|
|
144
|
+
* A page is an orphan when no other page wikilinks to it AND it is
|
|
145
|
+
* not a seed page (log.md / index.md / README.md / SCHEMA.md).
|
|
146
|
+
*/
|
|
147
|
+
private detectOrphans;
|
|
148
|
+
/**
|
|
149
|
+
* Find load-bearing concepts referenced repeatedly across the vault
|
|
150
|
+
* but lacking a dedicated page. Karpathy's lint contract names this:
|
|
151
|
+
* *"important concepts mentioned but lacking their own page."*
|
|
152
|
+
*
|
|
153
|
+
* Frequency-filtered (`WIKI_LINT_MISSING_CONCEPT_THRESHOLD`, default 3)
|
|
154
|
+
* so we ignore typos and inline-code-style `[[X]]` brackets that aren't
|
|
155
|
+
* meant as wikilinks. Distinct from `missingEntities`, which flags every
|
|
156
|
+
* dangling ref individually.
|
|
157
|
+
*
|
|
158
|
+
* Results sort by referenceCount desc — highest-leverage concepts first.
|
|
159
|
+
*/
|
|
160
|
+
private detectMissingConcepts;
|
|
161
|
+
/**
|
|
162
|
+
* Files whose content is older than `staleDays`.
|
|
163
|
+
*
|
|
164
|
+
* Migrated pages may have an `original_date` frontmatter field
|
|
165
|
+
* (e.g. the legacy decision was made 2026-02-01 but we wrote the
|
|
166
|
+
* markdown TODAY). Stale detection should respect the original date
|
|
167
|
+
* when present — otherwise every freshly-migrated old page would look
|
|
168
|
+
* "new" by mtime alone. Falls back to mtime when frontmatter is absent.
|
|
169
|
+
*/
|
|
170
|
+
private detectStale;
|
|
171
|
+
/**
|
|
172
|
+
* Heuristic restructure proposals — only for `llm-curated/` since
|
|
173
|
+
* frozen folders cannot be restructured.
|
|
174
|
+
*
|
|
175
|
+
* (1) A subfolder with > WIKI_LINT_ROLLUP_THRESHOLD pages and NO `index.md`
|
|
176
|
+
* gets an "add an index.md rollup" proposal.
|
|
177
|
+
* (2) Near-duplicate filename prefixes (e.g. `pricing-v1.md`, `pricing-v2.md`)
|
|
178
|
+
* get a "consider merging" proposal.
|
|
179
|
+
*/
|
|
180
|
+
private proposeRestructures;
|
|
181
|
+
}
|
|
182
|
+
//# sourceMappingURL=wiki-lint.service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"wiki-lint.service.d.ts","sourceRoot":"","sources":["../../../../../../backend/src/services/wiki/wiki-lint.service.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AASH,6DAA6D;AAC7D,eAAO,MAAM,4BAA4B,KAAK,CAAC;AAC/C,4EAA4E;AAC5E,eAAO,MAAM,0BAA0B,KAAK,CAAC;AAC7C,gDAAgD;AAChD,eAAO,MAAM,mBAAmB,OAAO,CAAC;AACxC,oDAAoD;AACpD,eAAO,MAAM,8BAA8B,KAAK,CAAC;AACjD,2DAA2D;AAC3D,eAAO,MAAM,wBAAwB,aAKnC,CAAC;AAEH;;;;;GAKG;AACH,eAAO,MAAM,mCAAmC,IAAI,CAAC;AAKrD,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,uBAAuB;IACtC,2CAA2C;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,qCAAqC;IACrC,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,qBAAqB;IACpC,gDAAgD;IAChD,UAAU,EAAE,MAAM,CAAC;IACnB,sCAAsC;IACtC,MAAM,EAAE,MAAM,CAAC;IACf,gDAAgD;IAChD,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,2BAA2B;IAC1C,oIAAoI;IACpI,WAAW,EAAE,MAAM,CAAC;IACpB,8CAA8C;IAC9C,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;GAOG;AACH,MAAM,WAAW,sBAAsB;IACrC,6DAA6D;IAC7D,MAAM,EAAE,MAAM,CAAC;IACf,kEAAkE;IAClE,cAAc,EAAE,MAAM,CAAC;IACvB,2DAA2D;IAC3D,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACnD,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,mBAAmB,EAAE,OAAO,CAAC;IAC7B,gBAAgB,EAAE,uBAAuB,EAAE,CAAC;IAC5C,eAAe,EAAE,qBAAqB,EAAE,CAAC;IACzC,oEAAoE;IACpE,eAAe,EAAE,sBAAsB,EAAE,CAAC;IAC1C,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,oBAAoB,EAAE,2BAA2B,EAAE,CAAC;IACpD,SAAS,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,MAAM,eAAe,GACvB;IAAE,EAAE,EAAE,IAAI,CAAC;IAAC,MAAM,EAAE,cAAc,CAAA;CAAE,GACpC;IACE,EAAE,EAAE,KAAK,CAAC;IACV,MAAM,EAAE,eAAe,GAAG,gBAAgB,GAAG,eAAe,CAAC;IAC7D,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEN;;;GAGG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAgC;IACvD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAkB;IACzC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAsB;IAEnD,OAAO;IAKP,MAAM,CAAC,WAAW,IAAI,eAAe;IAKrC,MAAM,CAAC,aAAa,IAAI,IAAI;IAI5B;;OAEG;IACG,QAAQ,CAAC,KAAK,EAAE,aAAa,GAAG,OAAO,CAAC,eAAe,CAAC;IAqF9D;;OAEG;YACW,YAAY;IAoC1B;;;;;;;OAOG;IACH,OAAO,CAAC,sBAAsB;IA2B9B;;;;OAIG;YACW,aAAa;IAoE3B;;;OAGG;IACH,OAAO,CAAC,aAAa;IAUrB;;;;;;;;;;;OAWG;IACH,OAAO,CAAC,qBAAqB;IAkB7B;;;;;;;;OAQG;IACH,OAAO,CAAC,WAAW;IAUnB;;;;;;;;OAQG;IACH,OAAO,CAAC,mBAAmB;CA6D5B"}
|
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WikiLintService — deterministic validation pass over a vault.
|
|
3
|
+
*
|
|
4
|
+
* Per v2.1 spec §3, the third Phase 1 skill (alongside `wiki-ingest` and
|
|
5
|
+
* `wiki-query`). Unlike `wiki-bookkeep` (vault HEALTH metrics — counts,
|
|
6
|
+
* recent activity, duplicate clusters), lint focuses on CORRECTNESS:
|
|
7
|
+
*
|
|
8
|
+
* - **frozenPathRespected** — no markdown content in folders flagged
|
|
9
|
+
* `frozen: true` in SCHEMA.md (other than the SCHEMA.md itself /
|
|
10
|
+
* legitimate frozen content). Lint refuses to alter frozen paths.
|
|
11
|
+
* - **missingEntities** — `[[wikilinks]]` that don't resolve to any
|
|
12
|
+
* page in the vault. Either the target was renamed, deleted, or the
|
|
13
|
+
* wikilink was made up. The lint flags them; the agent decides.
|
|
14
|
+
* - **orphanPages** — pages with zero incoming wikilinks, excluding
|
|
15
|
+
* seed pages (log.md, index.md, README*). These are candidates for
|
|
16
|
+
* either deletion or new linking.
|
|
17
|
+
* - **staleClaims** — files un-touched for `staleDays` (default 90).
|
|
18
|
+
* - **restructureProposals** — heuristics for llm-curated/ only: large
|
|
19
|
+
* un-indexed folders, near-duplicate filenames.
|
|
20
|
+
*
|
|
21
|
+
* The service never writes. The agent's LLM reads the report and decides
|
|
22
|
+
* whether to ingest a consolidation, archive stale pages, etc.
|
|
23
|
+
*
|
|
24
|
+
* @module services/wiki/wiki-lint.service
|
|
25
|
+
*/
|
|
26
|
+
import * as path from 'path';
|
|
27
|
+
import * as fs from 'fs/promises';
|
|
28
|
+
import { existsSync } from 'fs';
|
|
29
|
+
import { LoggerService } from '../core/logger.service.js';
|
|
30
|
+
import { SchemaLoaderService } from './schema-loader.service.js';
|
|
31
|
+
/** Default age threshold (days) for marking a page stale. */
|
|
32
|
+
export const WIKI_LINT_DEFAULT_STALE_DAYS = 90;
|
|
33
|
+
/** Folder size threshold above which lint proposes an `index.md` rollup. */
|
|
34
|
+
export const WIKI_LINT_ROLLUP_THRESHOLD = 20;
|
|
35
|
+
/** Max number of pages walked per lint pass. */
|
|
36
|
+
export const WIKI_LINT_MAX_PAGES = 1000;
|
|
37
|
+
/** Cap per category to keep the payload bounded. */
|
|
38
|
+
export const WIKI_LINT_MAX_ROWS_PER_SECTION = 50;
|
|
39
|
+
/** Pages with these basenames never count as "orphans". */
|
|
40
|
+
export const WIKI_LINT_SEED_BASENAMES = new Set([
|
|
41
|
+
'log.md',
|
|
42
|
+
'index.md',
|
|
43
|
+
'readme.md',
|
|
44
|
+
'schema.md',
|
|
45
|
+
]);
|
|
46
|
+
/**
|
|
47
|
+
* A `[[wikilink]]` target is "concept-shaped" (vs a dangling typo) when
|
|
48
|
+
* it appears at least this many times across the vault. Below this, the
|
|
49
|
+
* unresolved target is just noise; at or above this, the concept is
|
|
50
|
+
* load-bearing and deserves its own page.
|
|
51
|
+
*/
|
|
52
|
+
export const WIKI_LINT_MISSING_CONCEPT_THRESHOLD = 3;
|
|
53
|
+
/** `[[target]]` or `[[target|alias]]`. */
|
|
54
|
+
const WIKILINK_RE = /\[\[([^\]|]+?)(?:\|[^\]]+?)?\]\]/g;
|
|
55
|
+
/**
|
|
56
|
+
* Singleton lint service. Stateless — pattern mirrors the other wiki
|
|
57
|
+
* services so the controller wiring is uniform.
|
|
58
|
+
*/
|
|
59
|
+
export class WikiLintService {
|
|
60
|
+
static instance = null;
|
|
61
|
+
logger;
|
|
62
|
+
schemaLoader;
|
|
63
|
+
constructor() {
|
|
64
|
+
this.logger = LoggerService.getInstance().createComponentLogger('WikiLintService');
|
|
65
|
+
this.schemaLoader = new SchemaLoaderService();
|
|
66
|
+
}
|
|
67
|
+
static getInstance() {
|
|
68
|
+
if (!this.instance)
|
|
69
|
+
this.instance = new WikiLintService();
|
|
70
|
+
return this.instance;
|
|
71
|
+
}
|
|
72
|
+
static resetInstance() {
|
|
73
|
+
this.instance = null;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Generate a lint report for the given vault.
|
|
77
|
+
*/
|
|
78
|
+
async generate(input) {
|
|
79
|
+
const { vaultPath } = input;
|
|
80
|
+
const staleDays = input.staleDays ?? WIKI_LINT_DEFAULT_STALE_DAYS;
|
|
81
|
+
if (!vaultPath || !path.isAbsolute(vaultPath)) {
|
|
82
|
+
return {
|
|
83
|
+
ok: false,
|
|
84
|
+
reason: 'invalid_input',
|
|
85
|
+
message: 'vaultPath must be an absolute path',
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
if (staleDays <= 0) {
|
|
89
|
+
return {
|
|
90
|
+
ok: false,
|
|
91
|
+
reason: 'invalid_input',
|
|
92
|
+
message: 'staleDays must be positive',
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
if (!existsSync(vaultPath)) {
|
|
96
|
+
return { ok: false, reason: 'vault_missing', message: `vault not found: ${vaultPath}` };
|
|
97
|
+
}
|
|
98
|
+
if (!existsSync(path.join(vaultPath, 'SCHEMA.md'))) {
|
|
99
|
+
return {
|
|
100
|
+
ok: false,
|
|
101
|
+
reason: 'schema_missing',
|
|
102
|
+
message: `SCHEMA.md not found inside ${vaultPath}`,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
let schema;
|
|
106
|
+
try {
|
|
107
|
+
schema = await this.schemaLoader.load(vaultPath);
|
|
108
|
+
}
|
|
109
|
+
catch (err) {
|
|
110
|
+
this.logger.warn('WikiLintService: schema load failed', {
|
|
111
|
+
vault: vaultPath,
|
|
112
|
+
error: err.message,
|
|
113
|
+
});
|
|
114
|
+
return {
|
|
115
|
+
ok: false,
|
|
116
|
+
reason: 'schema_missing',
|
|
117
|
+
message: `SCHEMA.md unparseable: ${err.message}`,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
const frozenFolders = this.schemaLoader
|
|
121
|
+
.getFrozenPaths(schema)
|
|
122
|
+
.map((p) => p.replace(/[/\\]+$/, ''));
|
|
123
|
+
const allFiles = [];
|
|
124
|
+
await this.collectFiles(vaultPath, vaultPath, allFiles);
|
|
125
|
+
const truncated = allFiles.length >= WIKI_LINT_MAX_PAGES;
|
|
126
|
+
const files = allFiles.slice(0, WIKI_LINT_MAX_PAGES);
|
|
127
|
+
const frozenViolations = this.detectFrozenViolations(files, frozenFolders);
|
|
128
|
+
const { missingEntities, incomingMap, unresolvedFrequency } = await this.scanWikilinks(vaultPath, files);
|
|
129
|
+
const orphanPages = this.detectOrphans(files, incomingMap);
|
|
130
|
+
const staleClaims = this.detectStale(files, staleDays);
|
|
131
|
+
const restructureProposals = this.proposeRestructures(files);
|
|
132
|
+
const missingConcepts = this.detectMissingConcepts(unresolvedFrequency);
|
|
133
|
+
const report = {
|
|
134
|
+
vault: {
|
|
135
|
+
scope: schema.vault_scope,
|
|
136
|
+
id: schema.vault_id,
|
|
137
|
+
path: vaultPath,
|
|
138
|
+
},
|
|
139
|
+
generatedAt: new Date().toISOString(),
|
|
140
|
+
staleDays,
|
|
141
|
+
frozenPathRespected: frozenViolations.length === 0,
|
|
142
|
+
frozenViolations: frozenViolations.slice(0, WIKI_LINT_MAX_ROWS_PER_SECTION),
|
|
143
|
+
missingEntities: missingEntities.slice(0, WIKI_LINT_MAX_ROWS_PER_SECTION),
|
|
144
|
+
missingConcepts: missingConcepts.slice(0, WIKI_LINT_MAX_ROWS_PER_SECTION),
|
|
145
|
+
orphanPages: orphanPages.slice(0, WIKI_LINT_MAX_ROWS_PER_SECTION),
|
|
146
|
+
staleClaims: staleClaims.slice(0, WIKI_LINT_MAX_ROWS_PER_SECTION),
|
|
147
|
+
restructureProposals: restructureProposals.slice(0, WIKI_LINT_MAX_ROWS_PER_SECTION),
|
|
148
|
+
truncated,
|
|
149
|
+
};
|
|
150
|
+
return { ok: true, report };
|
|
151
|
+
}
|
|
152
|
+
// ---------------------------------------------------------------------------
|
|
153
|
+
// Internals
|
|
154
|
+
// ---------------------------------------------------------------------------
|
|
155
|
+
/**
|
|
156
|
+
* Recursive walk collecting `.md` files. Honors `WIKI_LINT_MAX_PAGES`.
|
|
157
|
+
*/
|
|
158
|
+
async collectFiles(rootDir, dir, acc) {
|
|
159
|
+
if (acc.length >= WIKI_LINT_MAX_PAGES)
|
|
160
|
+
return;
|
|
161
|
+
let entries;
|
|
162
|
+
try {
|
|
163
|
+
entries = await fs.readdir(dir, { withFileTypes: true });
|
|
164
|
+
}
|
|
165
|
+
catch {
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
for (const entry of entries) {
|
|
169
|
+
if (acc.length >= WIKI_LINT_MAX_PAGES)
|
|
170
|
+
return;
|
|
171
|
+
if (entry.name.startsWith('.'))
|
|
172
|
+
continue;
|
|
173
|
+
const abs = path.join(dir, entry.name);
|
|
174
|
+
if (entry.isDirectory()) {
|
|
175
|
+
await this.collectFiles(rootDir, abs, acc);
|
|
176
|
+
}
|
|
177
|
+
else if (entry.isFile() && entry.name.endsWith('.md')) {
|
|
178
|
+
const rel = path.relative(rootDir, abs).replace(/\\/g, '/');
|
|
179
|
+
try {
|
|
180
|
+
const stat = await fs.stat(abs);
|
|
181
|
+
// Peek at the first few hundred bytes only to extract a possible
|
|
182
|
+
// `original_date:` field from a YAML frontmatter block. We avoid
|
|
183
|
+
// reading the whole file in this pass — scanWikilinks() does the
|
|
184
|
+
// full read separately.
|
|
185
|
+
const originalDateMs = await peekOriginalDate(abs);
|
|
186
|
+
acc.push({
|
|
187
|
+
relativePath: rel,
|
|
188
|
+
basename: entry.name,
|
|
189
|
+
modifiedMs: stat.mtimeMs,
|
|
190
|
+
originalDateMs,
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
catch {
|
|
194
|
+
// unreadable file — skip
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Find `.md` files that live inside any frozen folder. SCHEMA.md and
|
|
201
|
+
* legitimate sibling files inside frozen dirs (e.g. `sop/<role>.md`)
|
|
202
|
+
* are excluded — frozen folders ARE allowed to contain content; what's
|
|
203
|
+
* NOT allowed is `wiki-ingest` writing INTO them. That's a runtime
|
|
204
|
+
* check elsewhere. Lint surfaces files only when they look like they
|
|
205
|
+
* were created by ingest patterns (timestamp-prefixed slugs).
|
|
206
|
+
*/
|
|
207
|
+
detectFrozenViolations(files, frozenFolders) {
|
|
208
|
+
const out = [];
|
|
209
|
+
if (frozenFolders.length === 0)
|
|
210
|
+
return out;
|
|
211
|
+
const ingestNamePattern = /^\d{4}-\d{2}-\d{2}-/;
|
|
212
|
+
for (const f of files) {
|
|
213
|
+
// Match if the file lives strictly inside a frozen folder.
|
|
214
|
+
for (const folder of frozenFolders) {
|
|
215
|
+
if (!folder)
|
|
216
|
+
continue;
|
|
217
|
+
if (f.relativePath === `${folder}/${f.basename}` ||
|
|
218
|
+
f.relativePath.startsWith(`${folder}/`)) {
|
|
219
|
+
// Only flag ingest-shaped names (date-prefixed) — preexisting
|
|
220
|
+
// frozen content is fine.
|
|
221
|
+
if (ingestNamePattern.test(f.basename) || f.basename === 'log.md') {
|
|
222
|
+
out.push({ path: f.relativePath, frozenFolder: folder });
|
|
223
|
+
}
|
|
224
|
+
break;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return out;
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* One read pass per file: pulls `[[wikilink]]` references, builds the
|
|
232
|
+
* inverse "incoming" map for orphan detection, and emits missing-entity
|
|
233
|
+
* rows for any link that resolves to nothing.
|
|
234
|
+
*/
|
|
235
|
+
async scanWikilinks(vaultPath, files) {
|
|
236
|
+
const incomingMap = new Map();
|
|
237
|
+
const missingEntities = [];
|
|
238
|
+
const unresolvedFrequency = new Map();
|
|
239
|
+
// Normalize the page set for fast resolution.
|
|
240
|
+
const allPaths = files.map((f) => f.relativePath.toLowerCase());
|
|
241
|
+
for (const file of files) {
|
|
242
|
+
let content;
|
|
243
|
+
try {
|
|
244
|
+
content = await fs.readFile(path.join(vaultPath, file.relativePath), 'utf8');
|
|
245
|
+
}
|
|
246
|
+
catch {
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
if (!content.includes('[['))
|
|
250
|
+
continue;
|
|
251
|
+
const lines = content.split(/\r?\n/);
|
|
252
|
+
for (let i = 0; i < lines.length; i++) {
|
|
253
|
+
const line = lines[i];
|
|
254
|
+
if (!line.includes('[['))
|
|
255
|
+
continue;
|
|
256
|
+
WIKILINK_RE.lastIndex = 0;
|
|
257
|
+
let m;
|
|
258
|
+
while ((m = WIKILINK_RE.exec(line)) !== null) {
|
|
259
|
+
const rawTarget = m[1]?.trim();
|
|
260
|
+
if (!rawTarget)
|
|
261
|
+
continue;
|
|
262
|
+
const resolved = resolveAgainstFiles(rawTarget, allPaths, files);
|
|
263
|
+
if (!resolved) {
|
|
264
|
+
missingEntities.push({
|
|
265
|
+
sourcePath: file.relativePath,
|
|
266
|
+
target: rawTarget,
|
|
267
|
+
lineNumber: i + 1,
|
|
268
|
+
});
|
|
269
|
+
// Accumulate frequency for missing-concepts detection.
|
|
270
|
+
const key = rawTarget.toLowerCase();
|
|
271
|
+
const slot = unresolvedFrequency.get(key) ?? {
|
|
272
|
+
count: 0,
|
|
273
|
+
sources: new Set(),
|
|
274
|
+
};
|
|
275
|
+
slot.count++;
|
|
276
|
+
slot.sources.add(file.relativePath);
|
|
277
|
+
unresolvedFrequency.set(key, slot);
|
|
278
|
+
}
|
|
279
|
+
else {
|
|
280
|
+
const arr = incomingMap.get(resolved) ?? [];
|
|
281
|
+
if (!arr.includes(file.relativePath))
|
|
282
|
+
arr.push(file.relativePath);
|
|
283
|
+
incomingMap.set(resolved, arr);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
return { missingEntities, incomingMap, unresolvedFrequency };
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* A page is an orphan when no other page wikilinks to it AND it is
|
|
292
|
+
* not a seed page (log.md / index.md / README.md / SCHEMA.md).
|
|
293
|
+
*/
|
|
294
|
+
detectOrphans(files, incomingMap) {
|
|
295
|
+
const out = [];
|
|
296
|
+
for (const f of files) {
|
|
297
|
+
if (WIKI_LINT_SEED_BASENAMES.has(f.basename.toLowerCase()))
|
|
298
|
+
continue;
|
|
299
|
+
const incoming = incomingMap.get(f.relativePath) ?? [];
|
|
300
|
+
if (incoming.length === 0)
|
|
301
|
+
out.push(f.relativePath);
|
|
302
|
+
}
|
|
303
|
+
return out;
|
|
304
|
+
}
|
|
305
|
+
/**
|
|
306
|
+
* Find load-bearing concepts referenced repeatedly across the vault
|
|
307
|
+
* but lacking a dedicated page. Karpathy's lint contract names this:
|
|
308
|
+
* *"important concepts mentioned but lacking their own page."*
|
|
309
|
+
*
|
|
310
|
+
* Frequency-filtered (`WIKI_LINT_MISSING_CONCEPT_THRESHOLD`, default 3)
|
|
311
|
+
* so we ignore typos and inline-code-style `[[X]]` brackets that aren't
|
|
312
|
+
* meant as wikilinks. Distinct from `missingEntities`, which flags every
|
|
313
|
+
* dangling ref individually.
|
|
314
|
+
*
|
|
315
|
+
* Results sort by referenceCount desc — highest-leverage concepts first.
|
|
316
|
+
*/
|
|
317
|
+
detectMissingConcepts(unresolvedFrequency) {
|
|
318
|
+
const out = [];
|
|
319
|
+
for (const [target, { count, sources }] of unresolvedFrequency) {
|
|
320
|
+
if (count < WIKI_LINT_MISSING_CONCEPT_THRESHOLD)
|
|
321
|
+
continue;
|
|
322
|
+
out.push({
|
|
323
|
+
target,
|
|
324
|
+
referenceCount: count,
|
|
325
|
+
// Cap sources at 10 to keep payload bounded; full list lives in
|
|
326
|
+
// `missingEntities` (which lists every individual occurrence).
|
|
327
|
+
sources: [...sources].slice(0, 10),
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
out.sort((a, b) => b.referenceCount - a.referenceCount);
|
|
331
|
+
return out;
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Files whose content is older than `staleDays`.
|
|
335
|
+
*
|
|
336
|
+
* Migrated pages may have an `original_date` frontmatter field
|
|
337
|
+
* (e.g. the legacy decision was made 2026-02-01 but we wrote the
|
|
338
|
+
* markdown TODAY). Stale detection should respect the original date
|
|
339
|
+
* when present — otherwise every freshly-migrated old page would look
|
|
340
|
+
* "new" by mtime alone. Falls back to mtime when frontmatter is absent.
|
|
341
|
+
*/
|
|
342
|
+
detectStale(files, staleDays) {
|
|
343
|
+
const cutoff = Date.now() - staleDays * 24 * 60 * 60 * 1000;
|
|
344
|
+
return files
|
|
345
|
+
.filter((f) => {
|
|
346
|
+
const ageMarker = f.originalDateMs ?? f.modifiedMs;
|
|
347
|
+
return ageMarker < cutoff;
|
|
348
|
+
})
|
|
349
|
+
.map((f) => f.relativePath);
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Heuristic restructure proposals — only for `llm-curated/` since
|
|
353
|
+
* frozen folders cannot be restructured.
|
|
354
|
+
*
|
|
355
|
+
* (1) A subfolder with > WIKI_LINT_ROLLUP_THRESHOLD pages and NO `index.md`
|
|
356
|
+
* gets an "add an index.md rollup" proposal.
|
|
357
|
+
* (2) Near-duplicate filename prefixes (e.g. `pricing-v1.md`, `pricing-v2.md`)
|
|
358
|
+
* get a "consider merging" proposal.
|
|
359
|
+
*/
|
|
360
|
+
proposeRestructures(files) {
|
|
361
|
+
const proposals = [];
|
|
362
|
+
// (1) Index proposals.
|
|
363
|
+
const folderToFiles = new Map();
|
|
364
|
+
for (const f of files) {
|
|
365
|
+
if (!f.relativePath.startsWith('llm-curated/'))
|
|
366
|
+
continue;
|
|
367
|
+
const parts = f.relativePath.split('/');
|
|
368
|
+
if (parts.length < 3)
|
|
369
|
+
continue; // top-level llm-curated/page.md — skip
|
|
370
|
+
const folder = parts.slice(0, -1).join('/');
|
|
371
|
+
const arr = folderToFiles.get(folder) ?? [];
|
|
372
|
+
arr.push(f);
|
|
373
|
+
folderToFiles.set(folder, arr);
|
|
374
|
+
}
|
|
375
|
+
for (const [folder, fs2] of folderToFiles) {
|
|
376
|
+
if (fs2.length < WIKI_LINT_ROLLUP_THRESHOLD)
|
|
377
|
+
continue;
|
|
378
|
+
const hasIndex = fs2.some((f) => f.basename.toLowerCase() === 'index.md');
|
|
379
|
+
if (hasIndex)
|
|
380
|
+
continue;
|
|
381
|
+
proposals.push({
|
|
382
|
+
description: `${folder}/ has ${fs2.length} pages and no index.md — propose adding ${folder}/index.md to summarize + link.`,
|
|
383
|
+
path: folder,
|
|
384
|
+
});
|
|
385
|
+
}
|
|
386
|
+
// (2) Near-duplicate filename prefixes (8+ chars shared, llm-curated only).
|
|
387
|
+
//
|
|
388
|
+
// IMPORTANT: strip the `YYYY-MM-DD-` date prefix before clustering.
|
|
389
|
+
// Migrated pages all start with their `original_date` (e.g.
|
|
390
|
+
// `2026-05-04-...`), so clustering on raw filename would dump every
|
|
391
|
+
// file from that month into one bogus "near-duplicate" cluster.
|
|
392
|
+
// What we actually want is to cluster on the CONTENT slug after the
|
|
393
|
+
// date — that's where real near-duplicates live (`pricing-v1`,
|
|
394
|
+
// `pricing-v2`, etc.).
|
|
395
|
+
const grouped = new Map();
|
|
396
|
+
const prefixLen = 8;
|
|
397
|
+
for (const f of files) {
|
|
398
|
+
if (!f.relativePath.startsWith('llm-curated/'))
|
|
399
|
+
continue;
|
|
400
|
+
const baseNoMd = f.basename.replace(/\.md$/i, '').toLowerCase();
|
|
401
|
+
// Some migrated memory entries have NESTED date prefixes (the
|
|
402
|
+
// original content body started with its own date, which got
|
|
403
|
+
// slugified, then the migrator prepended the entry's createdAt
|
|
404
|
+
// date in front). Strip date prefixes REPEATEDLY until none remain.
|
|
405
|
+
let contentSlug = baseNoMd;
|
|
406
|
+
while (DATE_PREFIX_RE.test(contentSlug)) {
|
|
407
|
+
contentSlug = contentSlug.replace(DATE_PREFIX_RE, '');
|
|
408
|
+
}
|
|
409
|
+
if (contentSlug.length < prefixLen)
|
|
410
|
+
continue;
|
|
411
|
+
const key = path.dirname(f.relativePath) + '/' + contentSlug.slice(0, prefixLen);
|
|
412
|
+
const arr = grouped.get(key) ?? [];
|
|
413
|
+
arr.push(f);
|
|
414
|
+
grouped.set(key, arr);
|
|
415
|
+
}
|
|
416
|
+
for (const [key, group] of grouped) {
|
|
417
|
+
if (group.length < 2)
|
|
418
|
+
continue;
|
|
419
|
+
proposals.push({
|
|
420
|
+
description: `${group.length} pages share the prefix "${path.basename(key)}" — consider merging into one canonical page.`,
|
|
421
|
+
path: group.map((g) => g.relativePath).join(' | '),
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
return proposals;
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
/**
|
|
428
|
+
* Read the first ~1KB of a markdown file and look for an
|
|
429
|
+
* `original_date: "<iso>"` field inside a YAML frontmatter block at the
|
|
430
|
+
* very top. Returns the parsed timestamp in ms, or undefined when the
|
|
431
|
+
* file lacks a parseable frontmatter date.
|
|
432
|
+
*
|
|
433
|
+
* Tolerant of unquoted, single-quoted, and double-quoted values.
|
|
434
|
+
*/
|
|
435
|
+
async function peekOriginalDate(absPath) {
|
|
436
|
+
let head;
|
|
437
|
+
try {
|
|
438
|
+
// Open the file directly so we can read at most ~1KB without slurping the whole thing.
|
|
439
|
+
const fh = await fs.open(absPath, 'r');
|
|
440
|
+
try {
|
|
441
|
+
const buf = Buffer.alloc(1024);
|
|
442
|
+
const { bytesRead } = await fh.read(buf, 0, buf.length, 0);
|
|
443
|
+
head = buf.subarray(0, bytesRead).toString('utf8');
|
|
444
|
+
}
|
|
445
|
+
finally {
|
|
446
|
+
await fh.close();
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
catch {
|
|
450
|
+
return undefined;
|
|
451
|
+
}
|
|
452
|
+
if (!head.startsWith('---'))
|
|
453
|
+
return undefined;
|
|
454
|
+
// Frontmatter ends at the next `---` line.
|
|
455
|
+
const end = head.indexOf('\n---', 3);
|
|
456
|
+
const block = end === -1 ? head.slice(3) : head.slice(3, end);
|
|
457
|
+
const m = block.match(/^\s*original_date:\s*["']?([^"'\n]+)["']?\s*$/m);
|
|
458
|
+
if (!m)
|
|
459
|
+
return undefined;
|
|
460
|
+
const ts = Date.parse(m[1].trim());
|
|
461
|
+
return Number.isFinite(ts) ? ts : undefined;
|
|
462
|
+
}
|
|
463
|
+
/**
|
|
464
|
+
* Matches a leading date prefix at the start of a basename (no `.md`).
|
|
465
|
+
*
|
|
466
|
+
* Handles both plain-date `YYYY-MM-DD-` AND ISO-timestamp-as-slug
|
|
467
|
+
* `YYYY-MM-DDtHH-MMz` / `YYYY-MM-DDtHHz-` forms (legacy agent memory
|
|
468
|
+
* often embedded full ISO timestamps as content prefixes; the slugifier
|
|
469
|
+
* replaced the `:` colons with `-` hyphens).
|
|
470
|
+
*/
|
|
471
|
+
const DATE_PREFIX_RE = /^\d{4}-\d{2}-\d{2}(?:t\d{2}(?:-\d{2})?z?)?-?/;
|
|
472
|
+
/**
|
|
473
|
+
* Mirror of the frontend's wikilink resolver — used during lint to find
|
|
474
|
+
* the canonical page a `[[target]]` resolves to. Returns the relativePath
|
|
475
|
+
* of the matching page (case preserved), or null when nothing matches.
|
|
476
|
+
*/
|
|
477
|
+
function resolveAgainstFiles(target, allPathsLower, files) {
|
|
478
|
+
const t = target.trim().toLowerCase();
|
|
479
|
+
if (!t)
|
|
480
|
+
return null;
|
|
481
|
+
const tWithMd = t.endsWith('.md') ? t : `${t}.md`;
|
|
482
|
+
const tNoMd = t.endsWith('.md') ? t.slice(0, -3) : t;
|
|
483
|
+
let suffixIdx = -1;
|
|
484
|
+
let basenameIdx = -1;
|
|
485
|
+
for (let i = 0; i < allPathsLower.length; i++) {
|
|
486
|
+
const p = allPathsLower[i];
|
|
487
|
+
if (p === tWithMd || p === t)
|
|
488
|
+
return files[i].relativePath;
|
|
489
|
+
if (suffixIdx === -1 && (p.endsWith(`/${tWithMd}`) || p.endsWith(`/${t}`))) {
|
|
490
|
+
suffixIdx = i;
|
|
491
|
+
}
|
|
492
|
+
if (basenameIdx === -1) {
|
|
493
|
+
const base = p.split('/').pop() ?? '';
|
|
494
|
+
const baseNoMd = base.endsWith('.md') ? base.slice(0, -3) : base;
|
|
495
|
+
if (baseNoMd === tNoMd)
|
|
496
|
+
basenameIdx = i;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
if (suffixIdx !== -1)
|
|
500
|
+
return files[suffixIdx].relativePath;
|
|
501
|
+
if (basenameIdx !== -1)
|
|
502
|
+
return files[basenameIdx].relativePath;
|
|
503
|
+
return null;
|
|
504
|
+
}
|
|
505
|
+
//# sourceMappingURL=wiki-lint.service.js.map
|