@mfittko/repo-wiki 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.llmwiki/schema.md +107 -0
- package/AGENTS.md +42 -0
- package/CHANGELOG.md +91 -0
- package/LICENSE +21 -0
- package/README.md +254 -0
- package/dist/bin/repo-wiki.d.ts +2 -0
- package/dist/bin/repo-wiki.js +7 -0
- package/dist/bin/repo-wiki.js.map +1 -0
- package/dist/src/cli.d.ts +1 -0
- package/dist/src/cli.js +404 -0
- package/dist/src/cli.js.map +1 -0
- package/dist/src/compiler.d.ts +55 -0
- package/dist/src/compiler.js +2046 -0
- package/dist/src/compiler.js.map +1 -0
- package/dist/src/config.d.ts +63 -0
- package/dist/src/config.js +86 -0
- package/dist/src/config.js.map +1 -0
- package/dist/src/context-assembler.d.ts +68 -0
- package/dist/src/context-assembler.js +378 -0
- package/dist/src/context-assembler.js.map +1 -0
- package/dist/src/data-model-signals.d.ts +1 -0
- package/dist/src/data-model-signals.js +13 -0
- package/dist/src/data-model-signals.js.map +1 -0
- package/dist/src/docs-ingestor.d.ts +138 -0
- package/dist/src/docs-ingestor.js +844 -0
- package/dist/src/docs-ingestor.js.map +1 -0
- package/dist/src/docs-linter.d.ts +14 -0
- package/dist/src/docs-linter.js +164 -0
- package/dist/src/docs-linter.js.map +1 -0
- package/dist/src/docs-validation.d.ts +36 -0
- package/dist/src/docs-validation.js +297 -0
- package/dist/src/docs-validation.js.map +1 -0
- package/dist/src/extractors.d.ts +50 -0
- package/dist/src/extractors.js +2275 -0
- package/dist/src/extractors.js.map +1 -0
- package/dist/src/frontmatter.d.ts +46 -0
- package/dist/src/frontmatter.js +377 -0
- package/dist/src/frontmatter.js.map +1 -0
- package/dist/src/index.d.ts +26 -0
- package/dist/src/index.js +18 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/init.d.ts +12 -0
- package/dist/src/init.js +121 -0
- package/dist/src/init.js.map +1 -0
- package/dist/src/language.d.ts +2 -0
- package/dist/src/language.js +62 -0
- package/dist/src/language.js.map +1 -0
- package/dist/src/linter.d.ts +33 -0
- package/dist/src/linter.js +398 -0
- package/dist/src/linter.js.map +1 -0
- package/dist/src/llm-provider.d.ts +267 -0
- package/dist/src/llm-provider.js +474 -0
- package/dist/src/llm-provider.js.map +1 -0
- package/dist/src/page-ownership.d.ts +38 -0
- package/dist/src/page-ownership.js +96 -0
- package/dist/src/page-ownership.js.map +1 -0
- package/dist/src/planner.d.ts +55 -0
- package/dist/src/planner.js +422 -0
- package/dist/src/planner.js.map +1 -0
- package/dist/src/prompts.d.ts +103 -0
- package/dist/src/prompts.js +344 -0
- package/dist/src/prompts.js.map +1 -0
- package/dist/src/publisher.d.ts +68 -0
- package/dist/src/publisher.js +662 -0
- package/dist/src/publisher.js.map +1 -0
- package/dist/src/repository-analysis.d.ts +88 -0
- package/dist/src/repository-analysis.js +485 -0
- package/dist/src/repository-analysis.js.map +1 -0
- package/dist/src/scanner.d.ts +122 -0
- package/dist/src/scanner.js +309 -0
- package/dist/src/scanner.js.map +1 -0
- package/dist/src/search.d.ts +71 -0
- package/dist/src/search.js +410 -0
- package/dist/src/search.js.map +1 -0
- package/dist/src/secret-patterns.d.ts +3 -0
- package/dist/src/secret-patterns.js +14 -0
- package/dist/src/secret-patterns.js.map +1 -0
- package/dist/src/utils/args.d.ts +2 -0
- package/dist/src/utils/args.js +19 -0
- package/dist/src/utils/args.js.map +1 -0
- package/dist/src/utils/dotenv.d.ts +7 -0
- package/dist/src/utils/dotenv.js +73 -0
- package/dist/src/utils/dotenv.js.map +1 -0
- package/dist/src/utils/fs.d.ts +22 -0
- package/dist/src/utils/fs.js +83 -0
- package/dist/src/utils/fs.js.map +1 -0
- package/dist/src/utils/git.d.ts +13 -0
- package/dist/src/utils/git.js +39 -0
- package/dist/src/utils/git.js.map +1 -0
- package/dist/src/wiki-graph.d.ts +74 -0
- package/dist/src/wiki-graph.js +335 -0
- package/dist/src/wiki-graph.js.map +1 -0
- package/dist/src/wiki-patch.d.ts +152 -0
- package/dist/src/wiki-patch.js +489 -0
- package/dist/src/wiki-patch.js.map +1 -0
- package/dist/src/wiki-query.d.ts +63 -0
- package/dist/src/wiki-query.js +255 -0
- package/dist/src/wiki-query.js.map +1 -0
- package/dist/test/cli.test.d.ts +1 -0
- package/dist/test/cli.test.js +514 -0
- package/dist/test/cli.test.js.map +1 -0
- package/dist/test/compiler-eval.test.d.ts +1 -0
- package/dist/test/compiler-eval.test.js +234 -0
- package/dist/test/compiler-eval.test.js.map +1 -0
- package/dist/test/compiler.test.d.ts +1 -0
- package/dist/test/compiler.test.js +2537 -0
- package/dist/test/compiler.test.js.map +1 -0
- package/dist/test/context-assembler.test.d.ts +1 -0
- package/dist/test/context-assembler.test.js +379 -0
- package/dist/test/context-assembler.test.js.map +1 -0
- package/dist/test/docs-linter.test.d.ts +1 -0
- package/dist/test/docs-linter.test.js +900 -0
- package/dist/test/docs-linter.test.js.map +1 -0
- package/dist/test/dotenv.test.d.ts +1 -0
- package/dist/test/dotenv.test.js +77 -0
- package/dist/test/dotenv.test.js.map +1 -0
- package/dist/test/extractors-go.test.d.ts +1 -0
- package/dist/test/extractors-go.test.js +393 -0
- package/dist/test/extractors-go.test.js.map +1 -0
- package/dist/test/extractors-rust.test.d.ts +1 -0
- package/dist/test/extractors-rust.test.js +219 -0
- package/dist/test/extractors-rust.test.js.map +1 -0
- package/dist/test/extractors-utils.test.d.ts +1 -0
- package/dist/test/extractors-utils.test.js +786 -0
- package/dist/test/extractors-utils.test.js.map +1 -0
- package/dist/test/fixtures/compiler-e2e/basic-node-service/repo/infra/deploy.d.ts +1 -0
- package/dist/test/fixtures/compiler-e2e/basic-node-service/repo/infra/deploy.js +4 -0
- package/dist/test/fixtures/compiler-e2e/basic-node-service/repo/infra/deploy.js.map +1 -0
- package/dist/test/frontmatter.test.d.ts +1 -0
- package/dist/test/frontmatter.test.js +287 -0
- package/dist/test/frontmatter.test.js.map +1 -0
- package/dist/test/init-planner.test.d.ts +1 -0
- package/dist/test/init-planner.test.js +688 -0
- package/dist/test/init-planner.test.js.map +1 -0
- package/dist/test/linter.test.d.ts +1 -0
- package/dist/test/linter.test.js +426 -0
- package/dist/test/linter.test.js.map +1 -0
- package/dist/test/llm-provider.test.d.ts +1 -0
- package/dist/test/llm-provider.test.js +783 -0
- package/dist/test/llm-provider.test.js.map +1 -0
- package/dist/test/page-ownership.test.d.ts +1 -0
- package/dist/test/page-ownership.test.js +247 -0
- package/dist/test/page-ownership.test.js.map +1 -0
- package/dist/test/publisher.test.d.ts +1 -0
- package/dist/test/publisher.test.js +1297 -0
- package/dist/test/publisher.test.js.map +1 -0
- package/dist/test/repository-analysis.test.d.ts +1 -0
- package/dist/test/repository-analysis.test.js +182 -0
- package/dist/test/repository-analysis.test.js.map +1 -0
- package/dist/test/run-compiled-tests.d.ts +1 -0
- package/dist/test/run-compiled-tests.js +48 -0
- package/dist/test/run-compiled-tests.js.map +1 -0
- package/dist/test/scanner.test.d.ts +1 -0
- package/dist/test/scanner.test.js +551 -0
- package/dist/test/scanner.test.js.map +1 -0
- package/dist/test/search.test.d.ts +1 -0
- package/dist/test/search.test.js +92 -0
- package/dist/test/search.test.js.map +1 -0
- package/dist/test/update-changelog.test.d.ts +1 -0
- package/dist/test/update-changelog.test.js +125 -0
- package/dist/test/update-changelog.test.js.map +1 -0
- package/dist/test/wiki-graph.test.d.ts +1 -0
- package/dist/test/wiki-graph.test.js +164 -0
- package/dist/test/wiki-graph.test.js.map +1 -0
- package/dist/test/wiki-patch.test.d.ts +1 -0
- package/dist/test/wiki-patch.test.js +610 -0
- package/dist/test/wiki-patch.test.js.map +1 -0
- package/dist/test/wiki-query.test.d.ts +1 -0
- package/dist/test/wiki-query.test.js +163 -0
- package/dist/test/wiki-query.test.js.map +1 -0
- package/docs/PLAN.md +993 -0
- package/docs/WHY.md +61 -0
- package/docs/plans/agent-integration.md +85 -0
- package/docs/plans/ci-publishing.md +111 -0
- package/docs/plans/doc-validation.md +92 -0
- package/docs/plans/github-action.md +113 -0
- package/docs/plans/incremental-mode.md +98 -0
- package/docs/plans/karpathy-llm-wiki-alignment.md +84 -0
- package/docs/plans/llm-compiler.md +160 -0
- package/docs/plans/production-scanner.md +104 -0
- package/docs/plans/query-and-file-back.md +103 -0
- package/docs/plans/search-index.md +118 -0
- package/docs/plans/trust-hardening.md +74 -0
- package/docs/plans/wiki-graph.md +183 -0
- package/docs/plans/wiki-health.md +76 -0
- package/package.json +83 -0
- package/prompts/compiler.md +16 -0
- package/prompts/lint.md +18 -0
- package/prompts/page-templates.md +25 -0
- package/skills/repo-wiki-cli/SKILL.md +139 -0
|
@@ -0,0 +1,844 @@
|
|
|
1
|
+
import { promises as fs } from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { cleanDocumentedPathTarget, hasParentDirectorySegment, isGeneratedOutputReference, normalizeRoutePath } from './docs-validation.js';
|
|
4
|
+
const DOC_EXTENSIONS = ['.md', '.mdx', '.markdown'];
|
|
5
|
+
const ROUTE_PATH_PART_PATTERN = "[A-Za-z0-9._~:@!$&'()*+,;=%\\-[\\]{}]+";
|
|
6
|
+
const ROUTE_PATH_PATTERN = `(?:\\/(?:${ROUTE_PATH_PART_PATTERN}(?:\\/+${ROUTE_PATH_PART_PATTERN})*)?\\/?)`;
|
|
7
|
+
const ROUTE_CLAIM_PATTERN = new RegExp(`\\b(GET|POST|PUT|PATCH|DELETE|OPTIONS|HEAD|ALL)\\b\\s+(${ROUTE_PATH_PATTERN})`, 'gi');
|
|
8
|
+
const ROUTE_TABLE_PATH_PATTERN = new RegExp(`(?:^|[\\s|\`(])(${ROUTE_PATH_PATTERN})(?=$|[\\s|\`),.;:!?])`, 'g');
|
|
9
|
+
// Npm lifecycle commands that map directly to package.json scripts
|
|
10
|
+
const NPM_LIFECYCLE_SCRIPTS = new Set(['test', 'start', 'stop', 'restart']);
|
|
11
|
+
const SHELL_RESERVED_WORDS = new Set(['if', 'then', 'else', 'elif', 'fi', 'for', 'select', 'while', 'until', 'do', 'done', 'case', 'esac', '{', '}']);
|
|
12
|
+
const COMMON_ENV_VAR_NAMES = new Set(['CI', 'HOME', 'PATH', 'PORT', 'SHELL', 'TERM', 'USER']);
|
|
13
|
+
const RECOGNIZED_DOC_COMMAND_PREFIX = /^(npm|pnpm|yarn|node|npx|make|just|task|docker|git)\b/;
|
|
14
|
+
/**
|
|
15
|
+
* Extract npm/shell commands from CI workflow YAML content.
|
|
16
|
+
* Parses `run:` lines and `command:` matrix fields.
|
|
17
|
+
*/
|
|
18
|
+
export function extractCiCommands(content) {
|
|
19
|
+
return [...new Set(extractCiCommandSources(content).map((entry) => entry.command))];
|
|
20
|
+
}
|
|
21
|
+
export function extractCiCommandSources(content) {
|
|
22
|
+
const commands = [];
|
|
23
|
+
const seen = new Set();
|
|
24
|
+
const lines = content.split('\n');
|
|
25
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
26
|
+
const line = lines[index];
|
|
27
|
+
// Match both `- run: <cmd>` (list item) and ` run: <cmd>` (property)
|
|
28
|
+
const runMatch = /^(\s+)(?:-\s+)?run:\s+(.+)$/.exec(line);
|
|
29
|
+
if (runMatch) {
|
|
30
|
+
const { parts, lastLineIndex } = extractWorkflowCommandValue(runMatch[2], lines, index, runMatch[1].length);
|
|
31
|
+
for (const part of parts) {
|
|
32
|
+
pushCiWorkflowCommandSource(commands, seen, part);
|
|
33
|
+
}
|
|
34
|
+
index = lastLineIndex;
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
// Match `command: <cmd>` matrix fields
|
|
38
|
+
const cmdMatch = /^(\s+)command:\s+(.+)$/.exec(line);
|
|
39
|
+
if (cmdMatch) {
|
|
40
|
+
const { parts, lastLineIndex } = extractWorkflowCommandValue(cmdMatch[2], lines, index, cmdMatch[1].length);
|
|
41
|
+
for (const part of parts) {
|
|
42
|
+
pushCiWorkflowCommandSource(commands, seen, part);
|
|
43
|
+
}
|
|
44
|
+
index = lastLineIndex;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return commands;
|
|
48
|
+
}
|
|
49
|
+
export function extractMakeTargets(content) {
|
|
50
|
+
return [...new Set(extractMakeTargetSources(content).map((entry) => entry.target))];
|
|
51
|
+
}
|
|
52
|
+
export function extractMakeTargetSources(content) {
|
|
53
|
+
const targets = [];
|
|
54
|
+
const seen = new Set();
|
|
55
|
+
const lines = content.split('\n');
|
|
56
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
57
|
+
const rawLine = lines[index];
|
|
58
|
+
if (/^\s*#/.test(rawLine) || /^\t/.test(rawLine))
|
|
59
|
+
continue;
|
|
60
|
+
const line = rawLine.trimStart();
|
|
61
|
+
const match = /^([^:#=][^:#=]*?):(?![=])/.exec(line);
|
|
62
|
+
if (!match)
|
|
63
|
+
continue;
|
|
64
|
+
for (const target of match[1].trim().split(/\s+/)) {
|
|
65
|
+
if (!isDeterministicTargetName(target))
|
|
66
|
+
continue;
|
|
67
|
+
const key = `${target}\u0000${index + 1}`;
|
|
68
|
+
if (seen.has(key))
|
|
69
|
+
continue;
|
|
70
|
+
seen.add(key);
|
|
71
|
+
targets.push({ target, line: index + 1 });
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return targets;
|
|
75
|
+
}
|
|
76
|
+
export function extractJustfileTargets(content) {
|
|
77
|
+
return [...new Set(extractJustfileTargetSources(content).map((entry) => entry.target))];
|
|
78
|
+
}
|
|
79
|
+
export function extractJustfileTargetSources(content) {
|
|
80
|
+
const targets = [];
|
|
81
|
+
const seen = new Set();
|
|
82
|
+
const lines = content.split('\n');
|
|
83
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
84
|
+
const rawLine = lines[index];
|
|
85
|
+
if (/^\s*#/.test(rawLine) || /^\s+/.test(rawLine))
|
|
86
|
+
continue;
|
|
87
|
+
const match = /^([A-Za-z0-9_][A-Za-z0-9_./-]*)\s*:(?![=])/.exec(rawLine);
|
|
88
|
+
if (!match)
|
|
89
|
+
continue;
|
|
90
|
+
const target = match[1];
|
|
91
|
+
if (!isDeterministicTargetName(target))
|
|
92
|
+
continue;
|
|
93
|
+
const key = `${target}\u0000${index + 1}`;
|
|
94
|
+
if (seen.has(key))
|
|
95
|
+
continue;
|
|
96
|
+
seen.add(key);
|
|
97
|
+
targets.push({ target, runner: 'just', line: index + 1 });
|
|
98
|
+
}
|
|
99
|
+
return targets;
|
|
100
|
+
}
|
|
101
|
+
export function extractTaskfileTargets(content) {
|
|
102
|
+
return [...new Set(extractTaskfileTargetSources(content).map((entry) => entry.target))];
|
|
103
|
+
}
|
|
104
|
+
export function extractTaskfileTargetSources(content) {
|
|
105
|
+
const targets = [];
|
|
106
|
+
const seen = new Set();
|
|
107
|
+
const lines = content.split('\n');
|
|
108
|
+
let tasksIndent = null;
|
|
109
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
110
|
+
const rawLine = lines[index];
|
|
111
|
+
const trimmed = rawLine.trim();
|
|
112
|
+
if (!trimmed || trimmed.startsWith('#'))
|
|
113
|
+
continue;
|
|
114
|
+
if (tasksIndent === null) {
|
|
115
|
+
const tasksMatch = /^(\s*)tasks\s*:\s*(?:#.*)?$/.exec(rawLine);
|
|
116
|
+
if (tasksMatch) {
|
|
117
|
+
tasksIndent = tasksMatch[1].length;
|
|
118
|
+
}
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
const indent = leadingSpaces(rawLine);
|
|
122
|
+
if (indent <= tasksIndent) {
|
|
123
|
+
tasksIndent = null;
|
|
124
|
+
index -= 1;
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
// Task names are direct children under `tasks:` in canonical Taskfile YAML.
|
|
128
|
+
if (indent !== tasksIndent + 2)
|
|
129
|
+
continue;
|
|
130
|
+
// Taskfile keys may be quoted. `\1` matches the same quote char captured in group 1.
|
|
131
|
+
const targetMatch = /^\s*(["']?)([A-Za-z0-9_][A-Za-z0-9_./-]*)\1\s*:\s*(?:$|#)/.exec(rawLine);
|
|
132
|
+
if (!targetMatch)
|
|
133
|
+
continue;
|
|
134
|
+
const target = targetMatch[2];
|
|
135
|
+
if (!isDeterministicTargetName(target))
|
|
136
|
+
continue;
|
|
137
|
+
const key = `${target}\u0000${index + 1}`;
|
|
138
|
+
if (seen.has(key))
|
|
139
|
+
continue;
|
|
140
|
+
seen.add(key);
|
|
141
|
+
targets.push({ target, runner: 'taskfile', line: index + 1 });
|
|
142
|
+
}
|
|
143
|
+
return targets;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Merge package scripts from all package.json entries in a manifest's analysis.
|
|
147
|
+
* Later entries overwrite earlier ones on key collision, following the manifest's
|
|
148
|
+
* sorted package_scripts array order.
|
|
149
|
+
*/
|
|
150
|
+
export function mergePackageScripts(manifest) {
|
|
151
|
+
const result = {};
|
|
152
|
+
for (const pkg of manifest.analysis?.package_scripts || []) {
|
|
153
|
+
Object.assign(result, pkg.scripts || {});
|
|
154
|
+
}
|
|
155
|
+
return result;
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Classify documented commands against known package scripts and CI commands.
|
|
159
|
+
* Returns each command with a validation status: validated, missing, or unvalidated.
|
|
160
|
+
*/
|
|
161
|
+
export function classifyDocumentedCommands(commands, packageScripts, ciCommands, options = {}) {
|
|
162
|
+
const normalizedOptions = normalizeCommandClassificationOptions(options);
|
|
163
|
+
return commands.flatMap((command) => splitShellCommand(command).map((part) => classifyCommand(part, packageScripts, ciCommands, normalizedOptions)));
|
|
164
|
+
}
|
|
165
|
+
function classifyCommand(command, packageScripts, ciCommands, options) {
|
|
166
|
+
// A verbatim CI workflow match is authoritative for any supported command form,
|
|
167
|
+
// including npm workspace invocations this best-effort parser cannot map safely.
|
|
168
|
+
const normalized = command.trim();
|
|
169
|
+
if (ciCommands.some((ci) => ci.trim() === normalized)) {
|
|
170
|
+
return { command, status: 'validated', source: 'ci_workflow' };
|
|
171
|
+
}
|
|
172
|
+
// npm workspace selectors require package-to-workspace resolution. Keep those
|
|
173
|
+
// conservative unless CI validated the exact documented command above.
|
|
174
|
+
if (hasNpmWorkspaceSelector(command)) {
|
|
175
|
+
return { command, status: 'unvalidated', source: 'unknown' };
|
|
176
|
+
}
|
|
177
|
+
// npm run <scriptName>
|
|
178
|
+
const npmRunScript = parseNpmRunScript(command);
|
|
179
|
+
if (npmRunScript) {
|
|
180
|
+
const scriptName = npmRunScript;
|
|
181
|
+
return {
|
|
182
|
+
command,
|
|
183
|
+
status: scriptName in packageScripts ? 'validated' : 'missing',
|
|
184
|
+
source: 'package_scripts',
|
|
185
|
+
script_name: scriptName
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
// npm test / npm start / npm stop / npm restart (lifecycle commands)
|
|
189
|
+
const lifecycleScript = parseNpmLifecycleScript(command);
|
|
190
|
+
if (lifecycleScript) {
|
|
191
|
+
const scriptName = lifecycleScript;
|
|
192
|
+
return {
|
|
193
|
+
command,
|
|
194
|
+
status: scriptName in packageScripts ? 'validated' : 'unvalidated',
|
|
195
|
+
source: 'package_scripts',
|
|
196
|
+
script_name: scriptName
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
const makeTarget = parseMakeTarget(command);
|
|
200
|
+
if (makeTarget) {
|
|
201
|
+
return {
|
|
202
|
+
command,
|
|
203
|
+
status: options.makeTargets.has(makeTarget) ? 'validated' : 'missing',
|
|
204
|
+
source: 'makefile',
|
|
205
|
+
target_name: makeTarget
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
const taskRunnerTarget = parseTaskRunnerTarget(command);
|
|
209
|
+
if (taskRunnerTarget) {
|
|
210
|
+
const known = options.taskRunnerTargetsByRunner[taskRunnerTarget.runner] || options.taskRunnerTargets;
|
|
211
|
+
return {
|
|
212
|
+
command,
|
|
213
|
+
status: known.has(taskRunnerTarget.target) ? 'validated' : 'missing',
|
|
214
|
+
source: 'task_runner',
|
|
215
|
+
target_name: taskRunnerTarget.target
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
return { command, status: 'unvalidated', source: 'unknown' };
|
|
219
|
+
}
|
|
220
|
+
export function isDocumentationFile(filePath, config) {
|
|
221
|
+
const lower = filePath.toLowerCase();
|
|
222
|
+
if (!DOC_EXTENSIONS.some((ext) => lower.endsWith(ext)))
|
|
223
|
+
return false;
|
|
224
|
+
const docs = config.documentation || {};
|
|
225
|
+
if (docs.ingest === false)
|
|
226
|
+
return false;
|
|
227
|
+
if ((docs.exclude || []).some((pattern) => globLikeMatch(filePath, pattern)))
|
|
228
|
+
return false;
|
|
229
|
+
return (docs.include || []).some((pattern) => globLikeMatch(filePath, pattern));
|
|
230
|
+
}
|
|
231
|
+
export async function createDocumentationCard({ file, content, config, repoPath }) {
|
|
232
|
+
const stats = await fs.stat(path.join(repoPath, file.relative));
|
|
233
|
+
const headings = extractHeadings(content);
|
|
234
|
+
const links = extractMarkdownLinks(content);
|
|
235
|
+
const codeBlocks = extractCodeBlocks(content);
|
|
236
|
+
const filePaths = extractDocumentedFilePaths(content);
|
|
237
|
+
const claims = extractDocumentationClaims(content);
|
|
238
|
+
const validation = validateDocClaims({ claims, content, filePath: file.relative });
|
|
239
|
+
const adr = detectAdrMetadata(file.relative, content);
|
|
240
|
+
const ageDays = Math.floor((Date.now() - stats.mtimeMs) / 86_400_000);
|
|
241
|
+
const staleAfterDays = config.documentation?.stale_after_days ?? 180;
|
|
242
|
+
const stale = ageDays > staleAfterDays || /\b(deprecated|obsolete|archived|outdated|legacy only)\b/i.test(content);
|
|
243
|
+
return {
|
|
244
|
+
kind: 'documentation_card',
|
|
245
|
+
path: file.relative,
|
|
246
|
+
authority: config.documentation?.authority || 'secondary',
|
|
247
|
+
modified_at: stats.mtime.toISOString(),
|
|
248
|
+
age_days: ageDays,
|
|
249
|
+
stale_after_days: staleAfterDays,
|
|
250
|
+
stale,
|
|
251
|
+
headings,
|
|
252
|
+
links,
|
|
253
|
+
code_blocks: codeBlocks,
|
|
254
|
+
file_paths: filePaths,
|
|
255
|
+
claims,
|
|
256
|
+
validation,
|
|
257
|
+
adr,
|
|
258
|
+
status: stale ? 'stale' : validation.contradictions.length ? 'contradicted' : validation.validated.length ? 'partially_validated' : 'unvalidated'
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
function detectAdrMetadata(filePath, content) {
|
|
262
|
+
const normalizedPath = String(filePath || '').replaceAll('\\', '/');
|
|
263
|
+
const lowerPath = normalizedPath.toLowerCase();
|
|
264
|
+
const frontmatter = parseMarkdownFrontmatter(content);
|
|
265
|
+
const frontmatterKeys = new Set(Object.keys(frontmatter));
|
|
266
|
+
const frontmatterStatus = readFrontmatterValue(frontmatter, ['status']);
|
|
267
|
+
const frontmatterSupersededBy = readFrontmatterValue(frontmatter, ['superseded_by', 'superseded-by']);
|
|
268
|
+
const frontmatterReplaces = readFrontmatterValue(frontmatter, ['replaces']);
|
|
269
|
+
const statusLine = readLabeledLine(content, 'Status');
|
|
270
|
+
const supersededByLine = readLabeledLine(content, 'Superseded by');
|
|
271
|
+
const replacesLine = readLabeledLine(content, 'Replaces');
|
|
272
|
+
const adrHeading = /^\s*#{1,6}\s*(?:ADR\s*:|ADR-\d+)\b/im.test(content) || /^\s*ADR-\d+\b/im.test(content);
|
|
273
|
+
const statusMarker = Boolean(statusLine || frontmatterKeys.has('status'));
|
|
274
|
+
const strongMarkers = Boolean(adrHeading
|
|
275
|
+
|| supersededByLine
|
|
276
|
+
|| replacesLine
|
|
277
|
+
|| frontmatterKeys.has('superseded_by')
|
|
278
|
+
|| frontmatterKeys.has('superseded-by')
|
|
279
|
+
|| frontmatterKeys.has('replaces'));
|
|
280
|
+
const pathHint = lowerPath.startsWith('adr/') || lowerPath.startsWith('docs/adr/') || lowerPath.startsWith('docs/adrs/');
|
|
281
|
+
const architectureHint = lowerPath.startsWith('docs/architecture/');
|
|
282
|
+
const detectedByMarker = strongMarkers || (statusMarker && adrHeading);
|
|
283
|
+
const detected = pathHint || detectedByMarker;
|
|
284
|
+
const detection_source = pathHint && detectedByMarker ? 'path+marker' : pathHint ? 'path' : detectedByMarker ? 'marker' : 'none';
|
|
285
|
+
const status = firstDefined(frontmatterStatus, statusLine);
|
|
286
|
+
const supersededBy = firstDefined(frontmatterSupersededBy, supersededByLine);
|
|
287
|
+
const replaces = firstDefined(frontmatterReplaces, replacesLine);
|
|
288
|
+
const normalizedStatus = status ? status.toLowerCase() : '';
|
|
289
|
+
const superseded = Boolean(supersededBy || /\bsupersed(?:ed|ing)\b/.test(normalizedStatus) || /\breplaced\b/.test(normalizedStatus));
|
|
290
|
+
const hasStatusMetadata = Boolean(status || supersededBy || replaces);
|
|
291
|
+
return {
|
|
292
|
+
detected,
|
|
293
|
+
detection_source,
|
|
294
|
+
status: status || null,
|
|
295
|
+
superseded_by: supersededBy || null,
|
|
296
|
+
replaces: replaces || null,
|
|
297
|
+
has_status_metadata: hasStatusMetadata,
|
|
298
|
+
superseded
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
function parseMarkdownFrontmatter(content) {
|
|
302
|
+
const text = String(content || '');
|
|
303
|
+
if (!text.startsWith('---\n'))
|
|
304
|
+
return {};
|
|
305
|
+
const closingOffset = text.indexOf('\n---', 4);
|
|
306
|
+
if (closingOffset < 0)
|
|
307
|
+
return {};
|
|
308
|
+
const block = text.slice(4, closingOffset);
|
|
309
|
+
const values = {};
|
|
310
|
+
for (const line of block.split('\n')) {
|
|
311
|
+
const match = /^\s*([A-Za-z0-9_-]+)\s*:\s*(.+?)\s*$/.exec(line);
|
|
312
|
+
if (!match)
|
|
313
|
+
continue;
|
|
314
|
+
const key = match[1].toLowerCase();
|
|
315
|
+
const value = match[2].replace(/^['"]|['"]$/g, '').trim();
|
|
316
|
+
if (!value)
|
|
317
|
+
continue;
|
|
318
|
+
values[key] = value;
|
|
319
|
+
}
|
|
320
|
+
return values;
|
|
321
|
+
}
|
|
322
|
+
function readFrontmatterValue(frontmatter, keys) {
|
|
323
|
+
for (const key of keys) {
|
|
324
|
+
const value = frontmatter[key];
|
|
325
|
+
if (value)
|
|
326
|
+
return value;
|
|
327
|
+
}
|
|
328
|
+
return '';
|
|
329
|
+
}
|
|
330
|
+
function readLabeledLine(content, label) {
|
|
331
|
+
const escapedLabel = label.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').replace(/\s+/g, '\\s+');
|
|
332
|
+
const match = new RegExp(`^\\s*${escapedLabel}\\s*:\\s*(.+?)\\s*$`, 'im').exec(content);
|
|
333
|
+
if (!match)
|
|
334
|
+
return '';
|
|
335
|
+
return match[1].trim().replace(/^['"]|['"]$/g, '');
|
|
336
|
+
}
|
|
337
|
+
function firstDefined(...values) {
|
|
338
|
+
for (const value of values) {
|
|
339
|
+
if (value)
|
|
340
|
+
return value;
|
|
341
|
+
}
|
|
342
|
+
return '';
|
|
343
|
+
}
|
|
344
|
+
export function extractDocumentationClaims(content) {
|
|
345
|
+
const claims = [];
|
|
346
|
+
const lines = content.split('\n');
|
|
347
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
348
|
+
const line = lines[index].trim();
|
|
349
|
+
if (!line || line.startsWith('#') || line.startsWith('```'))
|
|
350
|
+
continue;
|
|
351
|
+
if (/^(the |this |we |our |users |developers |run |use |requires |supports |deploy|build|test|configure)/i.test(line)) {
|
|
352
|
+
claims.push({ line: index + 1, text: line.slice(0, 280), status: 'unvalidated' });
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
return claims.slice(0, 100);
|
|
356
|
+
}
|
|
357
|
+
export function validateDocClaims({ claims, content, filePath }) {
|
|
358
|
+
const validated = [];
|
|
359
|
+
const contradictions = [];
|
|
360
|
+
const commands = [];
|
|
361
|
+
const envVars = [];
|
|
362
|
+
const routeClaims = extractRouteClaims(content);
|
|
363
|
+
for (const block of extractCodeBlocks(content)) {
|
|
364
|
+
if (/^(bash|sh|shell|zsh|console)?$/i.test(block.language || '')) {
|
|
365
|
+
for (const line of block.content.split('\n')) {
|
|
366
|
+
const trimmed = line.trim().replace(/^[$>]\s*/, '');
|
|
367
|
+
if (RECOGNIZED_DOC_COMMAND_PREFIX.test(trimmed)) {
|
|
368
|
+
commands.push(...splitShellCommand(trimmed));
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
for (const match of content.matchAll(/\b[A-Z][A-Z0-9_]{2,}\b/g)) {
|
|
374
|
+
if (isEnvironmentVariableMention(match[0]))
|
|
375
|
+
envVars.push(match[0]);
|
|
376
|
+
}
|
|
377
|
+
for (const claim of claims) {
|
|
378
|
+
if (/deprecated|obsolete|no longer|removed/i.test(claim.text)) {
|
|
379
|
+
contradictions.push({ ...claim, status: 'needs-review', reason: 'documentation contains deprecation or removal language' });
|
|
380
|
+
}
|
|
381
|
+
else if (/run|command|npm|pnpm|yarn|make|docker|env|config|route|api|test/i.test(claim.text)) {
|
|
382
|
+
validated.push({ ...claim, status: 'needs-code-validation', reason: 'claim is operational and should be checked against code/config' });
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
return {
|
|
386
|
+
validated,
|
|
387
|
+
contradictions,
|
|
388
|
+
route_claims: routeClaims,
|
|
389
|
+
commands: [...new Set(commands)].slice(0, 50),
|
|
390
|
+
env_vars: [...new Set(envVars)].slice(0, 50),
|
|
391
|
+
summary: {
|
|
392
|
+
claims: claims.length,
|
|
393
|
+
needs_code_validation: validated.length,
|
|
394
|
+
contradictions: contradictions.length,
|
|
395
|
+
route_claims: routeClaims.length,
|
|
396
|
+
commands: commands.length,
|
|
397
|
+
env_vars: envVars.length,
|
|
398
|
+
file: filePath
|
|
399
|
+
}
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
export function extractRouteClaims(content) {
|
|
403
|
+
const routes = [];
|
|
404
|
+
const seen = new Set();
|
|
405
|
+
const lines = String(content || '').split('\n');
|
|
406
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
407
|
+
const line = lines[index];
|
|
408
|
+
const snippet = line.trim();
|
|
409
|
+
if (!snippet || /^[-|\s:]+$/.test(snippet))
|
|
410
|
+
continue;
|
|
411
|
+
pushRouteMatches(routes, seen, line, index + 1);
|
|
412
|
+
}
|
|
413
|
+
return routes.slice(0, 100);
|
|
414
|
+
}
|
|
415
|
+
function pushRouteMatches(routes, seen, line, lineNumber) {
|
|
416
|
+
const snippet = line.trim().slice(0, 280);
|
|
417
|
+
if (!snippet)
|
|
418
|
+
return;
|
|
419
|
+
ROUTE_CLAIM_PATTERN.lastIndex = 0;
|
|
420
|
+
for (const match of line.matchAll(ROUTE_CLAIM_PATTERN)) {
|
|
421
|
+
pushRoute(routes, seen, {
|
|
422
|
+
line: lineNumber,
|
|
423
|
+
text: snippet,
|
|
424
|
+
snippet,
|
|
425
|
+
path: normalizeRouteClaimPath(match[2]),
|
|
426
|
+
method: match[1].toUpperCase()
|
|
427
|
+
});
|
|
428
|
+
}
|
|
429
|
+
if (line.includes('|')) {
|
|
430
|
+
const methods = [...line.matchAll(/\b(GET|POST|PUT|PATCH|DELETE|OPTIONS|HEAD|ALL)\b/gi)].map((match) => match[1].toUpperCase());
|
|
431
|
+
ROUTE_TABLE_PATH_PATTERN.lastIndex = 0;
|
|
432
|
+
const paths = [...line.matchAll(ROUTE_TABLE_PATH_PATTERN)].map((match) => normalizeRouteClaimPath(match[1]));
|
|
433
|
+
const pairCount = Math.min(methods.length, paths.length);
|
|
434
|
+
for (let index = 0; index < pairCount; index += 1) {
|
|
435
|
+
pushRoute(routes, seen, {
|
|
436
|
+
line: lineNumber,
|
|
437
|
+
text: snippet,
|
|
438
|
+
snippet,
|
|
439
|
+
path: paths[index],
|
|
440
|
+
method: methods[index]
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
function normalizeRouteClaimPath(routePath) {
|
|
446
|
+
const normalized = normalizeRoutePath(routePath);
|
|
447
|
+
if (!normalized)
|
|
448
|
+
return '';
|
|
449
|
+
if (normalized !== '/')
|
|
450
|
+
return normalized;
|
|
451
|
+
const cleaned = String(routePath || '')
|
|
452
|
+
.trim()
|
|
453
|
+
.replace(/^[`'"\[({<]+/, '')
|
|
454
|
+
.replace(/[`'"\]\)}>.,;:!?]+$/, '')
|
|
455
|
+
.trim()
|
|
456
|
+
.replace(/[?#].*$/, '');
|
|
457
|
+
return /^\/{2,}$/.test(cleaned) ? '' : normalized;
|
|
458
|
+
}
|
|
459
|
+
function pushRoute(routes, seen, route) {
|
|
460
|
+
if (!route.path || !route.method)
|
|
461
|
+
return;
|
|
462
|
+
const key = `${route.line}\u0000${route.method}\u0000${route.path}`;
|
|
463
|
+
if (!seen.has(key)) {
|
|
464
|
+
seen.add(key);
|
|
465
|
+
routes.push(route);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
function isEnvironmentVariableMention(value) {
|
|
469
|
+
if (!/^[A-Z][A-Z0-9_]{1,}$/.test(value))
|
|
470
|
+
return false;
|
|
471
|
+
if (COMMON_ENV_VAR_NAMES.has(value))
|
|
472
|
+
return true;
|
|
473
|
+
if (!value.includes('_'))
|
|
474
|
+
return false;
|
|
475
|
+
if (/^(README|TODO|HTTP|HTTPS|JSON|YAML|CLI|API)$/.test(value))
|
|
476
|
+
return false;
|
|
477
|
+
// Exclude known template markers, GitHub review states, and other non-env-var constants
|
|
478
|
+
if (/^(HUMAN_NOTES|CHANGES_REQUESTED|APPROVED|DISMISSED|COMMENT_ONLY)$/.test(value))
|
|
479
|
+
return false;
|
|
480
|
+
return true;
|
|
481
|
+
}
|
|
482
|
+
function extractWorkflowCommandValue(value, lines, lineIndex, baseIndent) {
|
|
483
|
+
if (/^[|>](?:[+-]?\d*|\d*[+-]?)$/.test(value.trim())) {
|
|
484
|
+
const blockLines = [];
|
|
485
|
+
let lastLineIndex = lineIndex;
|
|
486
|
+
for (let index = lineIndex + 1; index < lines.length; index += 1) {
|
|
487
|
+
const line = lines[index];
|
|
488
|
+
if (line.trim() && leadingSpaces(line) <= baseIndent)
|
|
489
|
+
break;
|
|
490
|
+
lastLineIndex = index;
|
|
491
|
+
if (!line.trim())
|
|
492
|
+
continue;
|
|
493
|
+
blockLines.push({ line: line.trim(), lineNumber: index + 1 });
|
|
494
|
+
}
|
|
495
|
+
const parts = coalesceMultilineWorkflowCommands(blockLines).flatMap((entry) => extractWorkflowCommandParts(entry.command, entry.start_line, entry.end_line));
|
|
496
|
+
return { parts, lastLineIndex };
|
|
497
|
+
}
|
|
498
|
+
return { parts: extractWorkflowCommandParts(value, lineIndex + 1), lastLineIndex: lineIndex };
|
|
499
|
+
}
|
|
500
|
+
function extractWorkflowCommandParts(command, line, endLine) {
|
|
501
|
+
const unquoted = command.trim().replace(/^["']|["']$/g, '');
|
|
502
|
+
if (!unquoted || unquoted.includes('${{'))
|
|
503
|
+
return [];
|
|
504
|
+
return splitShellCommand(unquoted, false)
|
|
505
|
+
.filter((part) => !isShellReservedCommand(part))
|
|
506
|
+
.map((part) => ({
|
|
507
|
+
command: part,
|
|
508
|
+
line,
|
|
509
|
+
...(typeof endLine === 'number' && endLine > line ? { end_line: endLine } : {})
|
|
510
|
+
}));
|
|
511
|
+
}
|
|
512
|
+
function isShellReservedCommand(command) {
|
|
513
|
+
const firstToken = tokenizeShellWords(command)[0];
|
|
514
|
+
return Boolean(firstToken && SHELL_RESERVED_WORDS.has(firstToken));
|
|
515
|
+
}
|
|
516
|
+
function leadingSpaces(line) {
|
|
517
|
+
return /^ */.exec(line)?.[0].length || 0;
|
|
518
|
+
}
|
|
519
|
+
function pushCiWorkflowCommandSource(target, seen, value) {
|
|
520
|
+
const key = `${value.command}␟${value.line ?? ''}␟${value.end_line ?? ''}`;
|
|
521
|
+
if (seen.has(key)) {
|
|
522
|
+
return;
|
|
523
|
+
}
|
|
524
|
+
seen.add(key);
|
|
525
|
+
target.push(value);
|
|
526
|
+
}
|
|
527
|
+
function coalesceMultilineWorkflowCommands(lines) {
|
|
528
|
+
const commands = [];
|
|
529
|
+
let pending = '';
|
|
530
|
+
let startLine = 0;
|
|
531
|
+
let lastLineNumber = 0;
|
|
532
|
+
for (const entry of lines) {
|
|
533
|
+
lastLineNumber = entry.lineNumber;
|
|
534
|
+
const line = entry.line;
|
|
535
|
+
const continues = hasLineContinuation(line);
|
|
536
|
+
const normalized = (continues ? stripContinuationBackslash(line) : line).trim();
|
|
537
|
+
if (!normalized) {
|
|
538
|
+
if (!continues) {
|
|
539
|
+
pending = '';
|
|
540
|
+
startLine = 0;
|
|
541
|
+
}
|
|
542
|
+
continue;
|
|
543
|
+
}
|
|
544
|
+
if (!pending) {
|
|
545
|
+
pending = normalized;
|
|
546
|
+
startLine = entry.lineNumber;
|
|
547
|
+
}
|
|
548
|
+
else {
|
|
549
|
+
pending = `${pending} ${normalized}`;
|
|
550
|
+
}
|
|
551
|
+
if (!continues) {
|
|
552
|
+
commands.push({ command: pending, start_line: startLine, end_line: entry.lineNumber });
|
|
553
|
+
pending = '';
|
|
554
|
+
startLine = 0;
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
if (pending && startLine > 0) {
|
|
558
|
+
commands.push({ command: pending, start_line: startLine, end_line: lastLineNumber || startLine });
|
|
559
|
+
}
|
|
560
|
+
return commands;
|
|
561
|
+
}
|
|
562
|
+
function hasLineContinuation(line) {
|
|
563
|
+
return ((/(\\+)\s*$/.exec(line)?.[1].length ?? 0) % 2) === 1;
|
|
564
|
+
}
|
|
565
|
+
function stripContinuationBackslash(line) {
|
|
566
|
+
return line.replace(/(\\+)(\s*)$/, (_, slashes, ws) => `${slashes.slice(0, -1)}${ws}`);
|
|
567
|
+
}
|
|
568
|
+
function normalizeCommandClassificationOptions(options) {
|
|
569
|
+
return {
|
|
570
|
+
makeTargets: new Set(options.makeTargets || []),
|
|
571
|
+
taskRunnerTargets: new Set(options.taskRunnerTargets || []),
|
|
572
|
+
taskRunnerTargetsByRunner: {
|
|
573
|
+
just: new Set(options.taskRunnerTargetsByRunner?.just || []),
|
|
574
|
+
taskfile: new Set(options.taskRunnerTargetsByRunner?.taskfile || [])
|
|
575
|
+
}
|
|
576
|
+
};
|
|
577
|
+
}
|
|
578
|
+
function parseNpmRunScript(command) {
|
|
579
|
+
const tokens = tokenizeShellWords(command);
|
|
580
|
+
if (tokens[0] !== 'npm')
|
|
581
|
+
return undefined;
|
|
582
|
+
const runIndex = tokens.findIndex((token, index) => index > 0 && token === 'run');
|
|
583
|
+
if (runIndex === -1)
|
|
584
|
+
return undefined;
|
|
585
|
+
return tokens.slice(runIndex + 1).find((token) => token && !token.startsWith('-'));
|
|
586
|
+
}
|
|
587
|
+
function parseNpmLifecycleScript(command) {
|
|
588
|
+
const tokens = tokenizeShellWords(command);
|
|
589
|
+
if (tokens[0] !== 'npm')
|
|
590
|
+
return undefined;
|
|
591
|
+
return NPM_LIFECYCLE_SCRIPTS.has(tokens[1]) ? tokens[1] : undefined;
|
|
592
|
+
}
|
|
593
|
+
function hasNpmWorkspaceSelector(command) {
|
|
594
|
+
const tokens = tokenizeShellWords(command);
|
|
595
|
+
if (tokens[0] !== 'npm')
|
|
596
|
+
return false;
|
|
597
|
+
return tokens.some((token) => token === '-w' || token === '--workspace' || token === '--workspaces' || token.startsWith('--workspace='));
|
|
598
|
+
}
|
|
599
|
+
function tokenizeShellWords(command) {
|
|
600
|
+
return (command.match(/"[^"]*"|'[^']*'|\S+/g) || []).map((token) => token.replace(/^["']|["']$/g, ''));
|
|
601
|
+
}
|
|
602
|
+
function parseMakeTarget(command) {
|
|
603
|
+
const tokens = tokenizeShellWords(command);
|
|
604
|
+
if (tokens[0] !== 'make')
|
|
605
|
+
return undefined;
|
|
606
|
+
return findFirstTaskToken(tokens.slice(1), makeOptionConsumesValue);
|
|
607
|
+
}
|
|
608
|
+
function parseTaskRunnerTarget(command) {
|
|
609
|
+
const tokens = tokenizeShellWords(command);
|
|
610
|
+
if (tokens[0] !== 'just' && tokens[0] !== 'task')
|
|
611
|
+
return undefined;
|
|
612
|
+
const target = findFirstTaskToken(tokens.slice(1));
|
|
613
|
+
if (!target)
|
|
614
|
+
return undefined;
|
|
615
|
+
return {
|
|
616
|
+
runner: tokens[0] === 'just' ? 'just' : 'taskfile',
|
|
617
|
+
target
|
|
618
|
+
};
|
|
619
|
+
}
|
|
620
|
+
function findFirstTaskToken(tokens, optionConsumesValue = () => false) {
|
|
621
|
+
for (let index = 0; index < tokens.length; index += 1) {
|
|
622
|
+
const token = tokens[index];
|
|
623
|
+
if (!token)
|
|
624
|
+
continue;
|
|
625
|
+
if (token === '--')
|
|
626
|
+
return tokens[index + 1];
|
|
627
|
+
// Skip assignment-style flags (`--flag=value`) and variable assignments (`FOO=bar`).
|
|
628
|
+
if (token.includes('='))
|
|
629
|
+
continue;
|
|
630
|
+
if (token.startsWith('-')) {
|
|
631
|
+
if (optionConsumesValue(token) && index + 1 < tokens.length) {
|
|
632
|
+
index += 1;
|
|
633
|
+
}
|
|
634
|
+
continue;
|
|
635
|
+
}
|
|
636
|
+
return token;
|
|
637
|
+
}
|
|
638
|
+
return undefined;
|
|
639
|
+
}
|
|
640
|
+
function makeOptionConsumesValue(option) {
|
|
641
|
+
return option === '-C'
|
|
642
|
+
|| option === '-f'
|
|
643
|
+
|| option === '--directory'
|
|
644
|
+
|| option === '--file'
|
|
645
|
+
|| option === '--makefile';
|
|
646
|
+
}
|
|
647
|
+
function isDeterministicTargetName(target) {
|
|
648
|
+
// Exclude special/pattern/expansion tokens to keep Make/task extraction deterministic.
|
|
649
|
+
return Boolean(target)
|
|
650
|
+
&& !target.startsWith('.')
|
|
651
|
+
&& !target.includes('%')
|
|
652
|
+
&& !target.includes('$')
|
|
653
|
+
&& !target.includes('(')
|
|
654
|
+
&& !target.includes(')');
|
|
655
|
+
}
|
|
656
|
+
function splitShellCommand(command, recognizedOnly = true) {
|
|
657
|
+
const parts = [];
|
|
658
|
+
let current = '';
|
|
659
|
+
let quote = '';
|
|
660
|
+
for (let index = 0; index < command.length; index += 1) {
|
|
661
|
+
const char = command[index];
|
|
662
|
+
const next = command[index + 1];
|
|
663
|
+
if ((char === '"' || char === "'") && !quote) {
|
|
664
|
+
quote = char;
|
|
665
|
+
current += char;
|
|
666
|
+
continue;
|
|
667
|
+
}
|
|
668
|
+
if (char === quote) {
|
|
669
|
+
quote = '';
|
|
670
|
+
current += char;
|
|
671
|
+
continue;
|
|
672
|
+
}
|
|
673
|
+
if (!quote && ((char === '&' && next === '&') || (char === '|' && next === '|'))) {
|
|
674
|
+
parts.push(current.trim());
|
|
675
|
+
current = '';
|
|
676
|
+
index += 1;
|
|
677
|
+
continue;
|
|
678
|
+
}
|
|
679
|
+
if (!quote && char === ';') {
|
|
680
|
+
parts.push(current.trim());
|
|
681
|
+
current = '';
|
|
682
|
+
continue;
|
|
683
|
+
}
|
|
684
|
+
current += char;
|
|
685
|
+
}
|
|
686
|
+
parts.push(current.trim());
|
|
687
|
+
return parts.filter((part) => part && (!recognizedOnly || RECOGNIZED_DOC_COMMAND_PREFIX.test(part)));
|
|
688
|
+
}
|
|
689
|
+
export function extractDocumentedFilePaths(content) {
|
|
690
|
+
const results = [];
|
|
691
|
+
const seen = new Set();
|
|
692
|
+
const lines = content.split('\n');
|
|
693
|
+
let fenceMarker = '';
|
|
694
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
695
|
+
const line = lines[index];
|
|
696
|
+
const fenceMatch = /^\s*(```+|~~~+)/.exec(line);
|
|
697
|
+
if (fenceMatch && (!fenceMarker || fenceMatch[1][0] === fenceMarker)) {
|
|
698
|
+
fenceMarker = fenceMarker ? '' : fenceMatch[1][0];
|
|
699
|
+
continue;
|
|
700
|
+
}
|
|
701
|
+
if (fenceMarker)
|
|
702
|
+
continue;
|
|
703
|
+
for (const linkTarget of extractMarkdownLinkTargets(line)) {
|
|
704
|
+
const target = cleanDocumentedPathTarget(linkTarget);
|
|
705
|
+
if (isDocumentedPathCandidate(target, true)) {
|
|
706
|
+
pushDocumentedPath(results, seen, { path: target, line: index + 1, source: 'link' });
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
for (const match of line.matchAll(/`([^`]+)`/g)) {
|
|
710
|
+
const target = cleanDocumentedPathTarget(match[1]);
|
|
711
|
+
if (isDocumentedPathCandidate(target, false)) {
|
|
712
|
+
pushDocumentedPath(results, seen, { path: target, line: index + 1, source: 'inline_code' });
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
return results.slice(0, 200);
|
|
717
|
+
}
|
|
718
|
+
function extractMarkdownLinkTargets(line) {
|
|
719
|
+
const targets = [];
|
|
720
|
+
for (let index = 0; index < line.length; index += 1) {
|
|
721
|
+
const openBracket = line.indexOf('[', index);
|
|
722
|
+
if (openBracket === -1)
|
|
723
|
+
break;
|
|
724
|
+
const closeBracket = line.indexOf(']', openBracket + 1);
|
|
725
|
+
if (closeBracket === -1 || line[closeBracket + 1] !== '(') {
|
|
726
|
+
index = openBracket;
|
|
727
|
+
continue;
|
|
728
|
+
}
|
|
729
|
+
let cursor = closeBracket + 2;
|
|
730
|
+
let target = '';
|
|
731
|
+
if (line[cursor] === '<') {
|
|
732
|
+
cursor += 1;
|
|
733
|
+
const closeAngle = line.indexOf('>', cursor);
|
|
734
|
+
if (closeAngle === -1) {
|
|
735
|
+
index = cursor;
|
|
736
|
+
continue;
|
|
737
|
+
}
|
|
738
|
+
target = line.slice(cursor, closeAngle);
|
|
739
|
+
cursor = closeAngle + 1;
|
|
740
|
+
while (line[cursor] && /\s/.test(line[cursor]))
|
|
741
|
+
cursor += 1;
|
|
742
|
+
if (line[cursor] !== ')') {
|
|
743
|
+
index = cursor;
|
|
744
|
+
continue;
|
|
745
|
+
}
|
|
746
|
+
targets.push(target);
|
|
747
|
+
index = cursor;
|
|
748
|
+
continue;
|
|
749
|
+
}
|
|
750
|
+
let depth = 0;
|
|
751
|
+
let quote = '';
|
|
752
|
+
for (; cursor < line.length; cursor += 1) {
|
|
753
|
+
const char = line[cursor];
|
|
754
|
+
if ((char === '"' || char === "'") && !quote) {
|
|
755
|
+
quote = char;
|
|
756
|
+
}
|
|
757
|
+
else if (char === quote) {
|
|
758
|
+
quote = '';
|
|
759
|
+
}
|
|
760
|
+
else if (!quote && char === '(') {
|
|
761
|
+
depth += 1;
|
|
762
|
+
}
|
|
763
|
+
else if (!quote && char === ')') {
|
|
764
|
+
if (depth === 0)
|
|
765
|
+
break;
|
|
766
|
+
depth -= 1;
|
|
767
|
+
}
|
|
768
|
+
target += char;
|
|
769
|
+
}
|
|
770
|
+
if (cursor < line.length && line[cursor] === ')') {
|
|
771
|
+
targets.push(target);
|
|
772
|
+
index = cursor;
|
|
773
|
+
}
|
|
774
|
+
else {
|
|
775
|
+
index = closeBracket;
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
return targets;
|
|
779
|
+
}
|
|
780
|
+
function pushDocumentedPath(results, seen, value) {
|
|
781
|
+
const key = `${value.path}\0${value.line}\0${value.source}`;
|
|
782
|
+
if (!seen.has(key)) {
|
|
783
|
+
seen.add(key);
|
|
784
|
+
results.push(value);
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
function isDocumentedPathCandidate(value, fromLink) {
|
|
788
|
+
if (!value || value.startsWith('#') || /^(https?:|mailto:|tel:)/i.test(value))
|
|
789
|
+
return false;
|
|
790
|
+
if (/[{}*]/.test(value))
|
|
791
|
+
return false;
|
|
792
|
+
if (/\s/.test(value))
|
|
793
|
+
return false;
|
|
794
|
+
if (/^[A-Za-z][A-Za-z0-9+.-]*:/.test(value))
|
|
795
|
+
return false;
|
|
796
|
+
if (value.startsWith('/'))
|
|
797
|
+
return false;
|
|
798
|
+
if (value.endsWith('.git'))
|
|
799
|
+
return false; // git remote URL, not a file path
|
|
800
|
+
if (isGeneratedOutputReference(value))
|
|
801
|
+
return false;
|
|
802
|
+
if (hasParentDirectorySegment(value))
|
|
803
|
+
return true;
|
|
804
|
+
if (value.startsWith('./') || value.startsWith('../'))
|
|
805
|
+
return true;
|
|
806
|
+
if (value.includes('/'))
|
|
807
|
+
return true;
|
|
808
|
+
if (fromLink)
|
|
809
|
+
return true;
|
|
810
|
+
return /^(?:[A-Z]+\.)?[^/]+\.(?:md|mdx|markdown|ts|tsx|js|jsx|mjs|cjs|json|ya?ml|toml|rs|go|py|rb|java|kt|cs|php|prisma|sql|sh|bash|env|txt)$/i.test(value);
|
|
811
|
+
}
|
|
812
|
+
function extractHeadings(content) {
|
|
813
|
+
return content.split('\n')
|
|
814
|
+
.map((line, index) => ({ line: index + 1, match: /^(#{1,6})\s+(.+)$/.exec(line) }))
|
|
815
|
+
.filter((item) => item.match)
|
|
816
|
+
.map((item) => ({ line: item.line, level: item.match[1].length, text: item.match[2].trim() }))
|
|
817
|
+
.slice(0, 100);
|
|
818
|
+
}
|
|
819
|
+
function extractMarkdownLinks(content) {
|
|
820
|
+
const links = [];
|
|
821
|
+
for (const line of content.split('\n')) {
|
|
822
|
+
for (const target of extractMarkdownLinkTargets(line)) {
|
|
823
|
+
links.push(cleanDocumentedPathTarget(target));
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
return [...new Set(links)].slice(0, 200);
|
|
827
|
+
}
|
|
828
|
+
function extractCodeBlocks(content) {
|
|
829
|
+
const blocks = [];
|
|
830
|
+
const pattern = /```([^\n`]*)\n([\s\S]*?)```/g;
|
|
831
|
+
for (const match of content.matchAll(pattern)) {
|
|
832
|
+
blocks.push({ language: match[1].trim(), content: match[2].trim().slice(0, 4000) });
|
|
833
|
+
}
|
|
834
|
+
return blocks.slice(0, 40);
|
|
835
|
+
}
|
|
836
|
+
function globLikeMatch(filePath, pattern) {
|
|
837
|
+
const escaped = pattern
|
|
838
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&')
|
|
839
|
+
.replace(/\*\*\//g, '(?:.*/)?')
|
|
840
|
+
.replace(/\*\*/g, '.*')
|
|
841
|
+
.replace(/\*/g, '[^/]*');
|
|
842
|
+
return new RegExp(`^${escaped}$`).test(filePath);
|
|
843
|
+
}
|
|
844
|
+
//# sourceMappingURL=docs-ingestor.js.map
|