cap-pro 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/README.md +26 -0
- package/.claude-plugin/marketplace.json +24 -0
- package/.claude-plugin/plugin.json +24 -0
- package/LICENSE +21 -0
- package/README.ja-JP.md +834 -0
- package/README.ko-KR.md +823 -0
- package/README.md +806 -0
- package/README.pt-BR.md +452 -0
- package/README.zh-CN.md +800 -0
- package/agents/cap-architect.md +269 -0
- package/agents/cap-brainstormer.md +207 -0
- package/agents/cap-curator.md +276 -0
- package/agents/cap-debugger.md +365 -0
- package/agents/cap-designer.md +246 -0
- package/agents/cap-historian.md +464 -0
- package/agents/cap-migrator.md +291 -0
- package/agents/cap-prototyper.md +197 -0
- package/agents/cap-validator.md +308 -0
- package/bin/install.js +5433 -0
- package/cap/bin/cap-tools.cjs +853 -0
- package/cap/bin/lib/arc-scanner.cjs +344 -0
- package/cap/bin/lib/cap-affinity-engine.cjs +862 -0
- package/cap/bin/lib/cap-anchor.cjs +228 -0
- package/cap/bin/lib/cap-annotation-writer.cjs +340 -0
- package/cap/bin/lib/cap-checkpoint.cjs +434 -0
- package/cap/bin/lib/cap-cluster-detect.cjs +945 -0
- package/cap/bin/lib/cap-cluster-display.cjs +52 -0
- package/cap/bin/lib/cap-cluster-format.cjs +245 -0
- package/cap/bin/lib/cap-cluster-helpers.cjs +295 -0
- package/cap/bin/lib/cap-cluster-io.cjs +212 -0
- package/cap/bin/lib/cap-completeness.cjs +540 -0
- package/cap/bin/lib/cap-deps.cjs +583 -0
- package/cap/bin/lib/cap-design-families.cjs +332 -0
- package/cap/bin/lib/cap-design.cjs +966 -0
- package/cap/bin/lib/cap-divergence-detector.cjs +400 -0
- package/cap/bin/lib/cap-doctor.cjs +752 -0
- package/cap/bin/lib/cap-feature-map-internals.cjs +19 -0
- package/cap/bin/lib/cap-feature-map-migrate.cjs +335 -0
- package/cap/bin/lib/cap-feature-map-monorepo.cjs +885 -0
- package/cap/bin/lib/cap-feature-map-shard.cjs +315 -0
- package/cap/bin/lib/cap-feature-map.cjs +1943 -0
- package/cap/bin/lib/cap-fitness-score.cjs +1075 -0
- package/cap/bin/lib/cap-impact-analysis.cjs +652 -0
- package/cap/bin/lib/cap-learn-review.cjs +1072 -0
- package/cap/bin/lib/cap-learning-signals.cjs +627 -0
- package/cap/bin/lib/cap-loader.cjs +227 -0
- package/cap/bin/lib/cap-logger.cjs +57 -0
- package/cap/bin/lib/cap-memory-bridge.cjs +764 -0
- package/cap/bin/lib/cap-memory-confidence.cjs +452 -0
- package/cap/bin/lib/cap-memory-dir.cjs +987 -0
- package/cap/bin/lib/cap-memory-engine.cjs +698 -0
- package/cap/bin/lib/cap-memory-extends.cjs +398 -0
- package/cap/bin/lib/cap-memory-graph.cjs +790 -0
- package/cap/bin/lib/cap-memory-migrate.cjs +2015 -0
- package/cap/bin/lib/cap-memory-pin.cjs +183 -0
- package/cap/bin/lib/cap-memory-platform.cjs +490 -0
- package/cap/bin/lib/cap-memory-prune.cjs +707 -0
- package/cap/bin/lib/cap-memory-schema.cjs +812 -0
- package/cap/bin/lib/cap-migrate-tags.cjs +309 -0
- package/cap/bin/lib/cap-migrate.cjs +540 -0
- package/cap/bin/lib/cap-pattern-apply.cjs +1203 -0
- package/cap/bin/lib/cap-pattern-pipeline.cjs +1034 -0
- package/cap/bin/lib/cap-plugin-manifest.cjs +80 -0
- package/cap/bin/lib/cap-realtime-affinity.cjs +399 -0
- package/cap/bin/lib/cap-reconcile.cjs +570 -0
- package/cap/bin/lib/cap-research-gate.cjs +218 -0
- package/cap/bin/lib/cap-scope-filter.cjs +402 -0
- package/cap/bin/lib/cap-semantic-pipeline.cjs +1038 -0
- package/cap/bin/lib/cap-session-extract.cjs +987 -0
- package/cap/bin/lib/cap-session.cjs +445 -0
- package/cap/bin/lib/cap-snapshot-linkage.cjs +963 -0
- package/cap/bin/lib/cap-stack-docs.cjs +646 -0
- package/cap/bin/lib/cap-tag-observer.cjs +371 -0
- package/cap/bin/lib/cap-tag-scanner.cjs +1766 -0
- package/cap/bin/lib/cap-telemetry.cjs +466 -0
- package/cap/bin/lib/cap-test-audit.cjs +1438 -0
- package/cap/bin/lib/cap-thread-migrator.cjs +307 -0
- package/cap/bin/lib/cap-thread-synthesis.cjs +545 -0
- package/cap/bin/lib/cap-thread-tracker.cjs +519 -0
- package/cap/bin/lib/cap-trace.cjs +399 -0
- package/cap/bin/lib/cap-trust-mode.cjs +336 -0
- package/cap/bin/lib/cap-ui-design-editor.cjs +642 -0
- package/cap/bin/lib/cap-ui-mind-map.cjs +712 -0
- package/cap/bin/lib/cap-ui-thread-nav.cjs +693 -0
- package/cap/bin/lib/cap-ui.cjs +1245 -0
- package/cap/bin/lib/cap-upgrade.cjs +1028 -0
- package/cap/bin/lib/cli/arg-helpers.cjs +49 -0
- package/cap/bin/lib/cli/frontmatter-router.cjs +31 -0
- package/cap/bin/lib/cli/init-router.cjs +68 -0
- package/cap/bin/lib/cli/phase-router.cjs +102 -0
- package/cap/bin/lib/cli/state-router.cjs +61 -0
- package/cap/bin/lib/cli/template-router.cjs +37 -0
- package/cap/bin/lib/cli/uat-router.cjs +29 -0
- package/cap/bin/lib/cli/validation-router.cjs +26 -0
- package/cap/bin/lib/cli/verification-router.cjs +31 -0
- package/cap/bin/lib/cli/workstream-router.cjs +39 -0
- package/cap/bin/lib/commands.cjs +961 -0
- package/cap/bin/lib/config.cjs +467 -0
- package/cap/bin/lib/convention-reader.cjs +258 -0
- package/cap/bin/lib/core.cjs +1241 -0
- package/cap/bin/lib/feature-aggregator.cjs +423 -0
- package/cap/bin/lib/frontmatter.cjs +337 -0
- package/cap/bin/lib/init.cjs +1443 -0
- package/cap/bin/lib/manifest-generator.cjs +383 -0
- package/cap/bin/lib/milestone.cjs +253 -0
- package/cap/bin/lib/model-profiles.cjs +69 -0
- package/cap/bin/lib/monorepo-context.cjs +226 -0
- package/cap/bin/lib/monorepo-migrator.cjs +509 -0
- package/cap/bin/lib/phase.cjs +889 -0
- package/cap/bin/lib/profile-output.cjs +989 -0
- package/cap/bin/lib/profile-pipeline.cjs +540 -0
- package/cap/bin/lib/roadmap.cjs +330 -0
- package/cap/bin/lib/security.cjs +394 -0
- package/cap/bin/lib/session-manager.cjs +292 -0
- package/cap/bin/lib/skeleton-generator.cjs +179 -0
- package/cap/bin/lib/state.cjs +1032 -0
- package/cap/bin/lib/template.cjs +231 -0
- package/cap/bin/lib/test-detector.cjs +62 -0
- package/cap/bin/lib/uat.cjs +283 -0
- package/cap/bin/lib/verify.cjs +889 -0
- package/cap/bin/lib/workspace-detector.cjs +371 -0
- package/cap/bin/lib/workstream.cjs +492 -0
- package/cap/commands/gsd/workstreams.md +63 -0
- package/cap/references/arc-standard.md +315 -0
- package/cap/references/cap-agent-architecture.md +101 -0
- package/cap/references/cap-gitignore-template +9 -0
- package/cap/references/cap-zero-deps.md +158 -0
- package/cap/references/checkpoints.md +778 -0
- package/cap/references/continuation-format.md +249 -0
- package/cap/references/contract-test-templates.md +312 -0
- package/cap/references/feature-map-template.md +25 -0
- package/cap/references/git-integration.md +295 -0
- package/cap/references/git-planning-commit.md +38 -0
- package/cap/references/model-profiles.md +174 -0
- package/cap/references/phase-numbering.md +126 -0
- package/cap/references/planning-config.md +202 -0
- package/cap/references/property-test-templates.md +316 -0
- package/cap/references/security-test-templates.md +347 -0
- package/cap/references/session-template.json +8 -0
- package/cap/references/tdd.md +263 -0
- package/cap/references/user-profiling.md +681 -0
- package/cap/references/verification-patterns.md +612 -0
- package/cap/templates/UAT.md +265 -0
- package/cap/templates/claude-md.md +175 -0
- package/cap/templates/codebase/architecture.md +255 -0
- package/cap/templates/codebase/concerns.md +310 -0
- package/cap/templates/codebase/conventions.md +307 -0
- package/cap/templates/codebase/integrations.md +280 -0
- package/cap/templates/codebase/stack.md +186 -0
- package/cap/templates/codebase/structure.md +285 -0
- package/cap/templates/codebase/testing.md +480 -0
- package/cap/templates/config.json +44 -0
- package/cap/templates/context.md +352 -0
- package/cap/templates/continue-here.md +78 -0
- package/cap/templates/copilot-instructions.md +7 -0
- package/cap/templates/debug-subagent-prompt.md +91 -0
- package/cap/templates/discussion-log.md +63 -0
- package/cap/templates/milestone-archive.md +123 -0
- package/cap/templates/milestone.md +115 -0
- package/cap/templates/phase-prompt.md +610 -0
- package/cap/templates/planner-subagent-prompt.md +117 -0
- package/cap/templates/project.md +186 -0
- package/cap/templates/requirements.md +231 -0
- package/cap/templates/research-project/ARCHITECTURE.md +204 -0
- package/cap/templates/research-project/FEATURES.md +147 -0
- package/cap/templates/research-project/PITFALLS.md +200 -0
- package/cap/templates/research-project/STACK.md +120 -0
- package/cap/templates/research-project/SUMMARY.md +170 -0
- package/cap/templates/research.md +552 -0
- package/cap/templates/roadmap.md +202 -0
- package/cap/templates/state.md +176 -0
- package/cap/templates/summary.md +364 -0
- package/cap/templates/user-preferences.md +498 -0
- package/cap/templates/verification-report.md +322 -0
- package/cap/workflows/add-phase.md +112 -0
- package/cap/workflows/add-tests.md +351 -0
- package/cap/workflows/add-todo.md +158 -0
- package/cap/workflows/audit-milestone.md +340 -0
- package/cap/workflows/audit-uat.md +109 -0
- package/cap/workflows/autonomous.md +891 -0
- package/cap/workflows/check-todos.md +177 -0
- package/cap/workflows/cleanup.md +152 -0
- package/cap/workflows/complete-milestone.md +767 -0
- package/cap/workflows/diagnose-issues.md +231 -0
- package/cap/workflows/discovery-phase.md +289 -0
- package/cap/workflows/discuss-phase-assumptions.md +653 -0
- package/cap/workflows/discuss-phase.md +1049 -0
- package/cap/workflows/do.md +104 -0
- package/cap/workflows/execute-phase.md +846 -0
- package/cap/workflows/execute-plan.md +514 -0
- package/cap/workflows/fast.md +105 -0
- package/cap/workflows/forensics.md +265 -0
- package/cap/workflows/health.md +181 -0
- package/cap/workflows/help.md +660 -0
- package/cap/workflows/insert-phase.md +130 -0
- package/cap/workflows/list-phase-assumptions.md +178 -0
- package/cap/workflows/list-workspaces.md +56 -0
- package/cap/workflows/manager.md +362 -0
- package/cap/workflows/map-codebase.md +377 -0
- package/cap/workflows/milestone-summary.md +223 -0
- package/cap/workflows/new-milestone.md +486 -0
- package/cap/workflows/new-project.md +1250 -0
- package/cap/workflows/new-workspace.md +237 -0
- package/cap/workflows/next.md +97 -0
- package/cap/workflows/node-repair.md +92 -0
- package/cap/workflows/note.md +156 -0
- package/cap/workflows/pause-work.md +176 -0
- package/cap/workflows/plan-milestone-gaps.md +273 -0
- package/cap/workflows/plan-phase.md +857 -0
- package/cap/workflows/plant-seed.md +169 -0
- package/cap/workflows/pr-branch.md +129 -0
- package/cap/workflows/profile-user.md +449 -0
- package/cap/workflows/progress.md +507 -0
- package/cap/workflows/quick.md +757 -0
- package/cap/workflows/remove-phase.md +155 -0
- package/cap/workflows/remove-workspace.md +90 -0
- package/cap/workflows/research-phase.md +82 -0
- package/cap/workflows/resume-project.md +326 -0
- package/cap/workflows/review.md +228 -0
- package/cap/workflows/session-report.md +146 -0
- package/cap/workflows/settings.md +283 -0
- package/cap/workflows/ship.md +228 -0
- package/cap/workflows/stats.md +60 -0
- package/cap/workflows/transition.md +671 -0
- package/cap/workflows/ui-phase.md +298 -0
- package/cap/workflows/ui-review.md +161 -0
- package/cap/workflows/update.md +323 -0
- package/cap/workflows/validate-phase.md +170 -0
- package/cap/workflows/verify-phase.md +254 -0
- package/cap/workflows/verify-work.md +637 -0
- package/commands/cap/annotate.md +165 -0
- package/commands/cap/brainstorm.md +393 -0
- package/commands/cap/checkpoint.md +106 -0
- package/commands/cap/completeness.md +94 -0
- package/commands/cap/continue.md +72 -0
- package/commands/cap/debug.md +588 -0
- package/commands/cap/deps.md +169 -0
- package/commands/cap/design.md +479 -0
- package/commands/cap/init.md +354 -0
- package/commands/cap/iterate.md +249 -0
- package/commands/cap/learn.md +459 -0
- package/commands/cap/memory.md +275 -0
- package/commands/cap/migrate-feature-map.md +91 -0
- package/commands/cap/migrate-memory.md +108 -0
- package/commands/cap/migrate-tags.md +91 -0
- package/commands/cap/migrate.md +131 -0
- package/commands/cap/prototype.md +510 -0
- package/commands/cap/reconcile.md +121 -0
- package/commands/cap/review.md +360 -0
- package/commands/cap/save.md +72 -0
- package/commands/cap/scan.md +404 -0
- package/commands/cap/start.md +356 -0
- package/commands/cap/status.md +118 -0
- package/commands/cap/test-audit.md +262 -0
- package/commands/cap/test.md +394 -0
- package/commands/cap/trace.md +133 -0
- package/commands/cap/ui.md +167 -0
- package/hooks/dist/cap-check-update.js +115 -0
- package/hooks/dist/cap-context-monitor.js +185 -0
- package/hooks/dist/cap-learn-review-hook.js +114 -0
- package/hooks/dist/cap-learning-hook.js +192 -0
- package/hooks/dist/cap-memory.js +299 -0
- package/hooks/dist/cap-prompt-guard.js +97 -0
- package/hooks/dist/cap-statusline.js +157 -0
- package/hooks/dist/cap-tag-observer.js +115 -0
- package/hooks/dist/cap-version-check.js +112 -0
- package/hooks/dist/cap-workflow-guard.js +175 -0
- package/hooks/hooks.json +55 -0
- package/package.json +58 -0
- package/scripts/base64-scan.sh +262 -0
- package/scripts/build-hooks.js +93 -0
- package/scripts/cap-removal-checklist.md +202 -0
- package/scripts/prompt-injection-scan.sh +199 -0
- package/scripts/run-tests.cjs +181 -0
- package/scripts/secret-scan.sh +227 -0
|
@@ -0,0 +1,1766 @@
|
|
|
1
|
+
// @cap-context CAP v2.0 tag scanner -- extracts @cap-feature, @cap-todo, @cap-risk, and @cap-decision tags from source files.
|
|
2
|
+
// @cap-decision Separate module from arc-scanner.cjs -- CAP tags use @cap- prefix (not @gsd-) and have different metadata semantics (feature: key instead of phase: key).
|
|
3
|
+
// @cap-decision Regex-based extraction (not AST) -- language-agnostic, zero dependencies, proven sufficient in GSD arc-scanner.cjs.
|
|
4
|
+
// @cap-constraint Zero external dependencies -- uses only Node.js built-ins (fs, path).
|
|
5
|
+
// @cap-pattern Same comment anchor rule as ARC: tag is only valid when first non-whitespace content on a line is a comment token.
|
|
6
|
+
|
|
7
|
+
'use strict';
|
|
8
|
+
|
|
9
|
+
// @cap-feature(feature:F-001) Tag Scanner — regex-based extraction of @cap-* tags from source files
|
|
10
|
+
// @cap-todo decision: Migrating @gsd-* comment headers in this file to @cap-* format is blocked on F-006 migration completion
|
|
11
|
+
|
|
12
|
+
// @cap-history(sessions:4, edits:17, since:2026-04-20, learned:2026-05-08) Frequently modified — 4 sessions, 17 edits
|
|
13
|
+
const fs = require('node:fs');
|
|
14
|
+
const path = require('node:path');
|
|
15
|
+
// @cap-feature(feature:F-085) Scope filter integration — gitignore + path-pattern + plugin-mirror
|
|
16
|
+
// awareness lives in cap-scope-filter.cjs. Imported here so scanDirectory and friends share the
|
|
17
|
+
// same exclusion semantics with cap-migrate-tags.
|
|
18
|
+
const scopeModule = require('./cap-scope-filter.cjs');
|
|
19
|
+
|
|
20
|
+
// @cap-todo(ref:AC-20) Primary tags are @cap-feature and @cap-todo; risk and decision are optional standalone tags
|
|
21
|
+
// @cap-decision CAP tag types: 2 primary (feature, todo) + 2 optional (risk, decision). Simplified from GSD's 8 types.
|
|
22
|
+
const CAP_TAG_TYPES = ['feature', 'todo', 'risk', 'decision'];
|
|
23
|
+
|
|
24
|
+
// @cap-feature(feature:F-047) Opt-in config check for unified anchor block parsing.
|
|
25
|
+
// Returns true when .cap/config.json has { unifiedAnchors: { enabled: true } }.
|
|
26
|
+
// Returns false on any error or when the section is absent. Called once per scanDirectory.
|
|
27
|
+
function isUnifiedAnchorsEnabled(projectRoot) {
|
|
28
|
+
try {
|
|
29
|
+
const cfgPath = path.join(projectRoot, '.cap', 'config.json');
|
|
30
|
+
const raw = fs.readFileSync(cfgPath, 'utf8');
|
|
31
|
+
const parsed = JSON.parse(raw);
|
|
32
|
+
return !!(parsed && parsed.unifiedAnchors && parsed.unifiedAnchors.enabled === true);
|
|
33
|
+
} catch (_e) {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// @cap-feature(feature:F-094, primary:true) Opt-out config check for multi-line @cap-* description capture.
|
|
39
|
+
// Default is ON: continuation-pickup runs unless .cap/config.json explicitly sets
|
|
40
|
+
// { multilineCapture: { enabled: false } }. Missing config or any read error returns true (default).
|
|
41
|
+
function isMultilineCaptureEnabled(projectRoot) {
|
|
42
|
+
if (!projectRoot) return true;
|
|
43
|
+
try {
|
|
44
|
+
const cfgPath = path.join(projectRoot, '.cap', 'config.json');
|
|
45
|
+
const raw = fs.readFileSync(cfgPath, 'utf8');
|
|
46
|
+
const parsed = JSON.parse(raw);
|
|
47
|
+
if (parsed && parsed.multilineCapture && parsed.multilineCapture.enabled === false) return false;
|
|
48
|
+
return true;
|
|
49
|
+
} catch (_e) {
|
|
50
|
+
return true;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// @cap-todo(ref:AC-25) Tag scanner uses native RegExp with dotAll flag for multiline extraction
|
|
55
|
+
// @cap-pattern Tag regex anchors to comment tokens at line start -- identical approach to arc-scanner.cjs
|
|
56
|
+
// @cap-decision F-046 leaves CAP_TAG_RE untouched (AC-5 backward compat). New polylingual extension uses extractTagsWithContext + getCommentStyle for richer per-language detection.
|
|
57
|
+
const CAP_TAG_RE = /^[ \t]*(?:\/\/|\/\*|\*|#|--|"""|''')[ \t]*@cap-(feature|todo|risk|decision)(?:\(([^)]*)\))?[ \t]*(.*)/;
|
|
58
|
+
|
|
59
|
+
// @cap-feature(feature:F-063) Design-Tag recognition in the tag scanner.
|
|
60
|
+
// @cap-todo(ac:F-063/AC-2) Recognise @cap-design-token(id:DT-NNN) and @cap-design-component(id:DC-NNN) in source comments.
|
|
61
|
+
// @cap-decision Keep the core CAP_TAG_RE / CAP_TAG_TYPES untouched — adding design types there would break F-001's
|
|
62
|
+
// regression tests (CAP_TAG_TYPES.length === 4 is pinned). Design tags get a sibling regex and are merged into
|
|
63
|
+
// extractTags output with type values 'design-token' | 'design-component'. Consumers that filter by tag.type
|
|
64
|
+
// against {'feature','todo','risk','decision'} are unaffected.
|
|
65
|
+
const CAP_DESIGN_TAG_RE = /^[ \t]*(?:\/\/|\/\*|\*|#|--|"""|''')[ \t]*@cap-(design-token|design-component)(?:\(([^)]*)\))?[ \t]*(.*)/;
|
|
66
|
+
|
|
67
|
+
// @cap-api CAP_DESIGN_TAG_TYPES -- exported for /cap:deps --design and /cap:trace design-usage.
|
|
68
|
+
const CAP_DESIGN_TAG_TYPES = ['design-token', 'design-component'];
|
|
69
|
+
|
|
70
|
+
// @cap-todo(ref:AC-26) Tag scanner is language-agnostic, operating on comment syntax patterns across JS, TS, Python, Ruby, Shell
|
|
71
|
+
// @cap-decision F-046 leaves SUPPORTED_EXTENSIONS untouched to preserve AC-5 backward compatibility (existing test asserts list length === 18). The new polylingual scanner uses Object.keys(COMMENT_STYLES) as its default extension list, which DOES include HTML/CSS/SCSS/Markdown/YAML/TOML/Shell-zsh.
|
|
72
|
+
const SUPPORTED_EXTENSIONS = ['.js', '.cjs', '.mjs', '.ts', '.tsx', '.jsx', '.py', '.rb', '.sh', '.bash', '.sql', '.go', '.rs', '.java', '.c', '.cpp', '.h', '.hpp'];
|
|
73
|
+
// @cap-decision DEFAULT_EXCLUDE covers (a) VCS + tooling metadata, (b) JS/TS build outputs, (c) framework
|
|
74
|
+
// caches that emit source-mapped JS the scanner would otherwise mistake for real code.
|
|
75
|
+
// The Next.js / Turbo / Nx caches were the worst offenders — a single GoetzeInvest scan
|
|
76
|
+
// surfaced 344 decisions sourced from `.next/dev/server/chunks/*.js` (~28 % of the
|
|
77
|
+
// decisions.md file). Build artifacts MUST never enter the memory pipeline; pre-existing
|
|
78
|
+
// entries should be pruned via `cap:memory prune` after this constant lands.
|
|
79
|
+
const DEFAULT_EXCLUDE = [
|
|
80
|
+
// VCS + CAP own metadata
|
|
81
|
+
'.git', '.cap', '.planning',
|
|
82
|
+
// Generic JS/TS build outputs
|
|
83
|
+
'node_modules', 'dist', 'build', 'coverage', 'out',
|
|
84
|
+
// Framework / monorepo caches that emit source-mapped JS
|
|
85
|
+
'.next', '.turbo', '.nx', '.cache', '.parcel-cache', '.vercel', '.svelte-kit',
|
|
86
|
+
// Other ecosystems (Python / Java / Rust / iOS / Android)
|
|
87
|
+
'__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache', '.tox', 'venv', '.venv',
|
|
88
|
+
'target', '.gradle', 'Pods', '.expo',
|
|
89
|
+
];
|
|
90
|
+
|
|
91
|
+
// @cap-todo(ref:AC-22) @cap-todo supports structured subtypes: risk:..., decision:...
|
|
92
|
+
// @cap-decision Subtype detection uses prefix matching on the description text (e.g., "risk: memory leak" -> subtype: "risk")
|
|
93
|
+
const SUBTYPE_RE = /^(risk|decision):\s*(.*)/;
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* @typedef {Object} CapTag
|
|
97
|
+
* @property {string} type - Tag type without @cap- prefix ('feature', 'todo', 'risk', 'decision')
|
|
98
|
+
* @property {string} file - Relative path from project root
|
|
99
|
+
* @property {number} line - 1-based line number
|
|
100
|
+
* @property {Object<string,string>} metadata - Parsed key-value pairs from parenthesized block
|
|
101
|
+
* @property {string} description - Text after metadata block
|
|
102
|
+
* @property {string} raw - Complete original line
|
|
103
|
+
* @property {string|null} subtype - For @cap-todo: 'risk' or 'decision' if prefixed, else null
|
|
104
|
+
*/
|
|
105
|
+
|
|
106
|
+
// @cap-api parseMetadata(metadataStr) -- Parses parenthesized key:value pairs.
|
|
107
|
+
// Returns: Object<string,string> -- flat key-value object.
|
|
108
|
+
/**
|
|
109
|
+
* @param {string} metadataStr - Raw metadata string without parens (e.g., "feature:auth, ac:AUTH/AC-1")
|
|
110
|
+
* @returns {Object<string,string>}
|
|
111
|
+
*/
|
|
112
|
+
function parseMetadata(metadataStr) {
|
|
113
|
+
if (!metadataStr || !metadataStr.trim()) return {};
|
|
114
|
+
const result = {};
|
|
115
|
+
const pairs = metadataStr.split(',');
|
|
116
|
+
for (const pair of pairs) {
|
|
117
|
+
const trimmed = pair.trim();
|
|
118
|
+
if (!trimmed) continue;
|
|
119
|
+
const colonIdx = trimmed.indexOf(':');
|
|
120
|
+
if (colonIdx === -1) {
|
|
121
|
+
// Key without value -- store as truthy flag
|
|
122
|
+
result[trimmed] = 'true';
|
|
123
|
+
} else {
|
|
124
|
+
const key = trimmed.slice(0, colonIdx).trim();
|
|
125
|
+
const value = trimmed.slice(colonIdx + 1).trim();
|
|
126
|
+
if (key) result[key] = value;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return result;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// @cap-feature(feature:F-094) Detect the comment-anchor token used to introduce a @cap-* tag.
|
|
133
|
+
// Returns one of: '//', '/*', '*', '#', '--', '"""', "'''", or null when the line does not
|
|
134
|
+
// match the expected leading-token shape. Used to drive continuation-line matching: a Line-comment
|
|
135
|
+
// anchor (//, #, --) only continues with the same token; a Block-comment anchor (/*, """, ''')
|
|
136
|
+
// continues into block-body until the closing token; a `*` anchor (already-inside JSDoc body)
|
|
137
|
+
// continues with `*` body lines.
|
|
138
|
+
const ANCHOR_TOKEN_RE = /^[ \t]*(\/\/|\/\*|\*|#|--|"""|''')/;
|
|
139
|
+
function detectAnchorToken(line) {
|
|
140
|
+
const m = line.match(ANCHOR_TOKEN_RE);
|
|
141
|
+
return m ? m[1] : null;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// @cap-feature(feature:F-094) Extract continuation lines for a @cap-* tag.
|
|
145
|
+
// Walks forward from `startIdx` collecting comment-continuation lines until a stop-condition
|
|
146
|
+
// is met. Stop-conditions per AC-2: empty line, code line (no matching opener), new @cap-* tag,
|
|
147
|
+
// block-comment-close token. Returns an array of cleaned text fragments (one per continuation
|
|
148
|
+
// line). Caller concatenates with single-space separator.
|
|
149
|
+
//
|
|
150
|
+
// Block-comment behaviour (anchor `/*`, `"""`, `'''`): body lines accumulate until close token
|
|
151
|
+
// or stop-condition; leading `*` and surrounding whitespace are stripped from each body line.
|
|
152
|
+
// Line-comment behaviour (anchor `//`, `#`, `--`, `*`): each continuation line must start with
|
|
153
|
+
// the same anchor token (with arbitrary indent before/after).
|
|
154
|
+
//
|
|
155
|
+
// NOT covered (deliberate scope): cross-block continuations (a tag in one /* */ block plus
|
|
156
|
+
// prose in a separate /* */ block below); continuations after exactly one blank line; nested
|
|
157
|
+
// block comments. These remain F-094 follow-up scope.
|
|
158
|
+
function captureContinuations(lines, startIdx, anchor) {
|
|
159
|
+
const continuations = [];
|
|
160
|
+
if (!anchor) return continuations;
|
|
161
|
+
|
|
162
|
+
const isBlockOpen = (anchor === '/*' || anchor === '"""' || anchor === "'''");
|
|
163
|
+
// For line-comment anchors we precompute a regex matching the same token at line start.
|
|
164
|
+
let lineRe = null;
|
|
165
|
+
if (!isBlockOpen) {
|
|
166
|
+
const escaped = anchor.replace(/[/*\-]/g, '\\$&');
|
|
167
|
+
lineRe = new RegExp('^[ \\t]*' + escaped + '[ \\t]+(.*)$');
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
for (let j = startIdx; j < lines.length; j++) {
|
|
171
|
+
const line = lines[j];
|
|
172
|
+
|
|
173
|
+
if (!line.trim()) break;
|
|
174
|
+
if (CAP_TAG_RE.test(line) || CAP_DESIGN_TAG_RE.test(line)) break;
|
|
175
|
+
|
|
176
|
+
let text = null;
|
|
177
|
+
let blockClosed = false;
|
|
178
|
+
|
|
179
|
+
if (anchor === '/*') {
|
|
180
|
+
const closeIdx = line.indexOf('*/');
|
|
181
|
+
const body = closeIdx === -1 ? line : line.slice(0, closeIdx);
|
|
182
|
+
const stripped = body.replace(/^[ \t]*\*?[ \t]?/, '').replace(/\s+$/, '');
|
|
183
|
+
text = stripped.trim();
|
|
184
|
+
if (closeIdx !== -1) blockClosed = true;
|
|
185
|
+
} else if (anchor === '"""' || anchor === "'''") {
|
|
186
|
+
const closeIdx = line.indexOf(anchor);
|
|
187
|
+
if (closeIdx !== -1) {
|
|
188
|
+
text = line.slice(0, closeIdx).trim();
|
|
189
|
+
blockClosed = true;
|
|
190
|
+
} else {
|
|
191
|
+
text = line.trim();
|
|
192
|
+
}
|
|
193
|
+
} else {
|
|
194
|
+
const m = line.match(lineRe);
|
|
195
|
+
if (!m) break;
|
|
196
|
+
text = m[1].trim();
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (text) continuations.push(text);
|
|
200
|
+
if (blockClosed) break;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
return continuations;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// @cap-api extractTags(content, filePath, options) -- Regex extraction engine supporting //, #, /* */, """ """ comment styles.
|
|
207
|
+
// Returns: CapTag[] -- array of extracted tags.
|
|
208
|
+
/**
|
|
209
|
+
* @param {string} content - File content to scan
|
|
210
|
+
* @param {string} filePath - Relative file path (for tag metadata)
|
|
211
|
+
* @param {Object} [options={}] - Extraction options. Default-initialised so that
|
|
212
|
+
* `extractTags.length === 2` stays pinned by F-046/AC-5 backward-compat test.
|
|
213
|
+
* @param {boolean} [options.multilineCapture=true] - When true, multi-line continuations are appended to description (F-094)
|
|
214
|
+
* @returns {CapTag[]}
|
|
215
|
+
*/
|
|
216
|
+
function extractTags(content, filePath, options = {}) {
|
|
217
|
+
const multilineEnabled = options.multilineCapture !== false; // default ON
|
|
218
|
+
const lines = content.split('\n');
|
|
219
|
+
const tags = [];
|
|
220
|
+
for (let i = 0; i < lines.length; i++) {
|
|
221
|
+
const line = lines[i];
|
|
222
|
+
const match = line.match(CAP_TAG_RE);
|
|
223
|
+
if (match) {
|
|
224
|
+
const type = match[1];
|
|
225
|
+
const metadataStr = match[2] || '';
|
|
226
|
+
let description = (match[3] || '').trim();
|
|
227
|
+
const metadata = parseMetadata(metadataStr);
|
|
228
|
+
|
|
229
|
+
// @cap-feature(feature:F-094) Continuation-pickup: if multilineCapture is enabled, walk
|
|
230
|
+
// forward from the next line and append continuation-line content to description.
|
|
231
|
+
// The original `raw` and `line` (1-based anchor line) are preserved for migration
|
|
232
|
+
// compatibility (AC-4).
|
|
233
|
+
if (multilineEnabled) {
|
|
234
|
+
const anchor = detectAnchorToken(line);
|
|
235
|
+
const cont = captureContinuations(lines, i + 1, anchor);
|
|
236
|
+
if (cont.length > 0) {
|
|
237
|
+
description = (description + ' ' + cont.join(' ')).replace(/\s+/g, ' ').trim();
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// @cap-todo(ref:AC-22) Detect subtypes in @cap-todo description (risk:..., decision:...)
|
|
242
|
+
let subtype = null;
|
|
243
|
+
if (type === 'todo') {
|
|
244
|
+
const subtypeMatch = description.match(SUBTYPE_RE);
|
|
245
|
+
if (subtypeMatch) {
|
|
246
|
+
subtype = subtypeMatch[1];
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
tags.push({
|
|
251
|
+
type,
|
|
252
|
+
file: filePath,
|
|
253
|
+
line: i + 1,
|
|
254
|
+
metadata,
|
|
255
|
+
description,
|
|
256
|
+
raw: line,
|
|
257
|
+
subtype,
|
|
258
|
+
});
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// @cap-todo(ac:F-063/AC-2) Fall through to design-tag recognition. Two separate regexes keep the
|
|
263
|
+
// core tag-type set (feature/todo/risk/decision) stable and pinned by F-001's regression tests.
|
|
264
|
+
const designMatch = line.match(CAP_DESIGN_TAG_RE);
|
|
265
|
+
if (designMatch) {
|
|
266
|
+
const type = designMatch[1]; // 'design-token' | 'design-component'
|
|
267
|
+
const metadataStr = designMatch[2] || '';
|
|
268
|
+
let description = (designMatch[3] || '').trim();
|
|
269
|
+
const metadata = parseMetadata(metadataStr);
|
|
270
|
+
|
|
271
|
+
if (multilineEnabled) {
|
|
272
|
+
const anchor = detectAnchorToken(line);
|
|
273
|
+
const cont = captureContinuations(lines, i + 1, anchor);
|
|
274
|
+
if (cont.length > 0) {
|
|
275
|
+
description = (description + ' ' + cont.join(' ')).replace(/\s+/g, ' ').trim();
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
tags.push({
|
|
280
|
+
type,
|
|
281
|
+
file: filePath,
|
|
282
|
+
line: i + 1,
|
|
283
|
+
metadata,
|
|
284
|
+
description,
|
|
285
|
+
raw: line,
|
|
286
|
+
subtype: null,
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return tags;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// @cap-api scanFile(filePath, projectRoot) -- Scans a single file for @cap-* tags.
|
|
294
|
+
// Returns: CapTag[] -- array of extracted tags with file, line, metadata, description.
|
|
295
|
+
/**
|
|
296
|
+
* @param {string} filePath - Absolute path to file
|
|
297
|
+
* @param {string} projectRoot - Absolute path to project root (for relative path computation)
|
|
298
|
+
* @returns {CapTag[]}
|
|
299
|
+
*/
|
|
300
|
+
// @cap-todo(ac:F-047/AC-1) scanFile shall also expand unified @cap anchor blocks when
|
|
301
|
+
// the caller passes { unifiedAnchors: true }. Backward-compatible default (off).
|
|
302
|
+
function scanFile(filePath, projectRoot, options) {
|
|
303
|
+
// @cap-todo(ref:AC-25) Use native RegExp for tag extraction -- no AST parsing
|
|
304
|
+
let content;
|
|
305
|
+
try {
|
|
306
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
307
|
+
} catch (_e) {
|
|
308
|
+
return [];
|
|
309
|
+
}
|
|
310
|
+
const relativePath = path.relative(projectRoot, filePath);
|
|
311
|
+
// @cap-feature(feature:F-094) Forward the multilineCapture flag to extractTags so callers
|
|
312
|
+
// that resolved it once (scanDirectory) don't re-read .cap/config.json per file.
|
|
313
|
+
const extractOpts = {};
|
|
314
|
+
if (options && options.multilineCapture != null) extractOpts.multilineCapture = options.multilineCapture;
|
|
315
|
+
const tags = extractTags(content, relativePath, extractOpts);
|
|
316
|
+
if (options && options.unifiedAnchors) {
|
|
317
|
+
// Lazy require keeps the module decoupled when the feature is disabled.
|
|
318
|
+
const anchor = require('./cap-anchor.cjs');
|
|
319
|
+
tags.push(...anchor.scanAnchorsInContent(content, relativePath));
|
|
320
|
+
}
|
|
321
|
+
return tags;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// @cap-api scanDirectory(dirPath, options) -- Recursively scans a directory for @cap-* tags.
|
|
325
|
+
// Returns: CapTag[] -- aggregated tags from all matching files.
|
|
326
|
+
// Options: { extensions?: string[], exclude?: string[] }
|
|
327
|
+
/**
|
|
328
|
+
* @param {string} dirPath - Absolute path to directory to scan
|
|
329
|
+
* @param {Object} [options]
|
|
330
|
+
* @param {string[]} [options.extensions] - File extensions to include (e.g., ['.js', '.ts', '.py'])
|
|
331
|
+
* @param {string[]} [options.exclude] - Directory names to exclude (e.g., ['node_modules', '.git'])
|
|
332
|
+
* @param {string} [options.projectRoot] - Project root for relative paths (defaults to dirPath)
|
|
333
|
+
* @returns {CapTag[]}
|
|
334
|
+
*/
|
|
335
|
+
function scanDirectory(dirPath, options = {}) {
|
|
336
|
+
const extensions = options.extensions || SUPPORTED_EXTENSIONS;
|
|
337
|
+
const projectRoot = options.projectRoot || dirPath;
|
|
338
|
+
// @cap-todo(ac:F-085/AC-1) The scanner consumes a unified scope filter: gitignore-aware,
|
|
339
|
+
// path-pattern-aware, plugin-mirror-aware. Legacy `options.exclude` (basename list) is
|
|
340
|
+
// forwarded as dirExcludes for backwards compat with single-purpose callers.
|
|
341
|
+
const scope = options.scope || scopeModule.buildScopeFilter(projectRoot, {
|
|
342
|
+
dirExcludes: options.exclude,
|
|
343
|
+
});
|
|
344
|
+
// F-047: honour explicit opt-in via options OR .cap/config.json flag. Config is
|
|
345
|
+
// read once per scan so the overhead stays constant regardless of file count.
|
|
346
|
+
const unifiedAnchors =
|
|
347
|
+
options.unifiedAnchors != null
|
|
348
|
+
? !!options.unifiedAnchors
|
|
349
|
+
: isUnifiedAnchorsEnabled(projectRoot);
|
|
350
|
+
// @cap-feature(feature:F-094) F-094 multilineCapture is opt-OUT (default ON). Honour explicit
|
|
351
|
+
// options.multilineCapture; otherwise resolve from .cap/config.json once per scan.
|
|
352
|
+
const multilineCapture =
|
|
353
|
+
options.multilineCapture != null
|
|
354
|
+
? !!options.multilineCapture
|
|
355
|
+
: isMultilineCaptureEnabled(projectRoot);
|
|
356
|
+
const tags = [];
|
|
357
|
+
|
|
358
|
+
// @cap-constraint Uses readdirSync (not glob) per project zero-dep constraint
|
|
359
|
+
function walk(dir) {
|
|
360
|
+
let entries;
|
|
361
|
+
try {
|
|
362
|
+
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
363
|
+
} catch (_e) {
|
|
364
|
+
return;
|
|
365
|
+
}
|
|
366
|
+
for (const entry of entries) {
|
|
367
|
+
const fullPath = path.join(dir, entry.name);
|
|
368
|
+
if (entry.isDirectory()) {
|
|
369
|
+
if (scope.isExcluded(fullPath, true)) continue;
|
|
370
|
+
walk(fullPath);
|
|
371
|
+
} else if (entry.isFile()) {
|
|
372
|
+
const ext = path.extname(entry.name);
|
|
373
|
+
if (!extensions.includes(ext)) continue;
|
|
374
|
+
if (scope.isExcluded(fullPath, false)) continue;
|
|
375
|
+
const fileTags = scanFile(fullPath, projectRoot, { unifiedAnchors, multilineCapture });
|
|
376
|
+
tags.push(...fileTags);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
walk(dirPath);
|
|
382
|
+
return tags;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// @cap-api groupByFeature(tags) -- Groups tags by their feature: metadata value.
|
|
386
|
+
// Returns: Object<string, CapTag[]> -- map from feature name to tags.
|
|
387
|
+
/**
|
|
388
|
+
* @param {CapTag[]} tags - Array of extracted tags
|
|
389
|
+
* @returns {Object<string, CapTag[]>}
|
|
390
|
+
*/
|
|
391
|
+
function groupByFeature(tags) {
|
|
392
|
+
const groups = {};
|
|
393
|
+
for (const tag of tags) {
|
|
394
|
+
const featureId = tag.metadata.feature || '(unassigned)';
|
|
395
|
+
if (!groups[featureId]) groups[featureId] = [];
|
|
396
|
+
groups[featureId].push(tag);
|
|
397
|
+
}
|
|
398
|
+
return groups;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// @cap-feature(feature:F-045) Multi-file AC traceability — aggregates per-AC file references and detects primary file per AC.
|
|
402
|
+
// @cap-decision Place buildAcFileMap alongside groupByFeature in the scanner module (not in cap-trace.cjs) — it is pure tag aggregation, no IO/graph traversal, mirrors the shape of the existing groupByFeature helper. cap-trace.cjs depends on it.
|
|
403
|
+
// @cap-decision The "ac" key in @cap-todo metadata accepts two formats: "F-045/AC-1" (fully qualified) and "AC-1" (relies on the surrounding @cap-feature for the feature ID). buildAcFileMap normalizes both.
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* @typedef {Object} AcFileMapEntry
|
|
407
|
+
* @property {string[]} files - All files that contributed at least one tag to this AC (deduped, stable order)
|
|
408
|
+
* @property {string|null} primary - Primary implementation file (designated, inferred, or null when no files)
|
|
409
|
+
* @property {('designated'|'inferred'|null)} primarySource - How `primary` was determined
|
|
410
|
+
* @property {Object<string,number>} tagDensity - Map from file path -> tag count contributing to this AC
|
|
411
|
+
* @property {string[]} warnings - Human-readable warnings (e.g., heuristic primary picked)
|
|
412
|
+
*/
|
|
413
|
+
|
|
414
|
+
// @cap-api buildAcFileMap(tags) -- Aggregate tags into per-AC entries with primary file detection.
|
|
415
|
+
// @cap-todo(ac:F-045/AC-1) Recognize `primary:true` flag on @cap-feature tags as the canonical-file marker.
|
|
416
|
+
// @cap-todo(ac:F-045/AC-2) Emit a structured acFileMap keyed by `<feature-id>/<ac-id>` with all contributing files.
|
|
417
|
+
// @cap-todo(ac:F-045/AC-3) When no `primary:true` is found and the AC spans multiple files, infer primary from highest tag density and emit a warning.
|
|
418
|
+
/**
|
|
419
|
+
* Build a map of AC -> { files, primary, primarySource, tagDensity, warnings }.
|
|
420
|
+
*
|
|
421
|
+
* Key shape: "<feature-id>/<ac-id>" e.g. "F-045/AC-1".
|
|
422
|
+
* Files contribute to an AC when:
|
|
423
|
+
* - the tag is @cap-todo with metadata.ac matching "F-XXX/AC-N" or just "AC-N" (resolved via metadata.feature)
|
|
424
|
+
* - or the tag is @cap-feature/risk/decision with metadata.feature AND metadata.ac present (rare but supported)
|
|
425
|
+
*
|
|
426
|
+
* Primary file detection:
|
|
427
|
+
* - If any @cap-feature tag for the matching feature has `primary:true` AND that file also has a tag for this AC -> designated
|
|
428
|
+
* - Else if any @cap-feature tag for the matching feature has `primary:true` -> designated (file may not directly tag the AC)
|
|
429
|
+
* - Else if multiple files contribute -> inferred via highest tag density (warning emitted)
|
|
430
|
+
* - Else if exactly one file contributes -> that file (inferred, trivially)
|
|
431
|
+
* - Else -> null
|
|
432
|
+
*
|
|
433
|
+
* @param {CapTag[]} tags
|
|
434
|
+
* @returns {Object<string, AcFileMapEntry>}
|
|
435
|
+
*/
|
|
436
|
+
function buildAcFileMap(tags) {
|
|
437
|
+
const map = {};
|
|
438
|
+
|
|
439
|
+
// First pass: collect designated-primary files per feature (from @cap-feature primary:true tags).
|
|
440
|
+
// @cap-decision primary:true is a flag on @cap-feature only — putting it on @cap-todo or @cap-risk is meaningless because those tags are AC-level not feature-level.
|
|
441
|
+
const designatedPrimaryByFeature = {}; // featureId -> file
|
|
442
|
+
for (const tag of tags) {
|
|
443
|
+
if (tag.type !== 'feature') continue;
|
|
444
|
+
if (!tag.metadata || !tag.metadata.feature) continue;
|
|
445
|
+
// Normalize "true" string flag (parser stores all values as strings) to boolean check.
|
|
446
|
+
const isPrimary = tag.metadata.primary === 'true' || tag.metadata.primary === true;
|
|
447
|
+
if (!isPrimary) continue;
|
|
448
|
+
// First wins — if multiple files claim primary for the same feature, the first encountered wins.
|
|
449
|
+
// @cap-risk Multiple primary:true claims on the same feature are silently ignored after the first; consider warning in a follow-up if this becomes a problem in practice.
|
|
450
|
+
if (!designatedPrimaryByFeature[tag.metadata.feature]) {
|
|
451
|
+
designatedPrimaryByFeature[tag.metadata.feature] = tag.file;
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// Second pass: build per-AC contribution lists.
|
|
456
|
+
// We support two ways a tag references an AC:
|
|
457
|
+
// 1) metadata.ac with full form "F-NNN/AC-M"
|
|
458
|
+
// 2) metadata.ac with short form "AC-M" PLUS metadata.feature giving the feature
|
|
459
|
+
for (const tag of tags) {
|
|
460
|
+
if (!tag.metadata || !tag.metadata.ac) continue;
|
|
461
|
+
const acRaw = tag.metadata.ac;
|
|
462
|
+
|
|
463
|
+
let key;
|
|
464
|
+
if (acRaw.includes('/')) {
|
|
465
|
+
key = acRaw;
|
|
466
|
+
} else if (tag.metadata.feature) {
|
|
467
|
+
key = `${tag.metadata.feature}/${acRaw}`;
|
|
468
|
+
} else {
|
|
469
|
+
// Tag references an AC without enough context to qualify it. Skip silently — orphan detection lives elsewhere.
|
|
470
|
+
continue;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
if (!map[key]) {
|
|
474
|
+
map[key] = {
|
|
475
|
+
files: [],
|
|
476
|
+
primary: null,
|
|
477
|
+
primarySource: null,
|
|
478
|
+
tagDensity: {},
|
|
479
|
+
warnings: [],
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
const entry = map[key];
|
|
483
|
+
if (!entry.files.includes(tag.file)) entry.files.push(tag.file);
|
|
484
|
+
entry.tagDensity[tag.file] = (entry.tagDensity[tag.file] || 0) + 1;
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
// Third pass: resolve primary for each AC entry.
|
|
488
|
+
for (const acKey of Object.keys(map)) {
|
|
489
|
+
const entry = map[acKey];
|
|
490
|
+
const featureId = acKey.split('/')[0];
|
|
491
|
+
|
|
492
|
+
// Designated primary takes precedence — only if that file actually contributes to this AC.
|
|
493
|
+
// If a feature designates a primary file but the AC isn't tagged in that file, fall back to inference.
|
|
494
|
+
// @cap-decision Designated primary requires the file to actually contain at least one tag for this AC. Otherwise primary:true on an unrelated file (e.g. a barrel index) would mislead the trace.
|
|
495
|
+
const designatedFile = designatedPrimaryByFeature[featureId];
|
|
496
|
+
if (designatedFile && entry.files.includes(designatedFile)) {
|
|
497
|
+
entry.primary = designatedFile;
|
|
498
|
+
entry.primarySource = 'designated';
|
|
499
|
+
continue;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
if (entry.files.length === 0) {
|
|
503
|
+
entry.primary = null;
|
|
504
|
+
entry.primarySource = null;
|
|
505
|
+
continue;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
if (entry.files.length === 1) {
|
|
509
|
+
entry.primary = entry.files[0];
|
|
510
|
+
entry.primarySource = 'inferred';
|
|
511
|
+
continue;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Multiple files contribute and no designated primary — pick by tag density.
|
|
515
|
+
// @cap-decision Tag density (count of contributing tags per file) is the simplest defensible heuristic. Future signals could include @cap-feature presence, file size, or import graph centrality, but those add complexity for marginal gain in a heuristic-anyway choice.
|
|
516
|
+
let bestFile = null;
|
|
517
|
+
let bestCount = -1;
|
|
518
|
+
// Iterate files in stable order so ties are broken by first-appearance.
|
|
519
|
+
for (const f of entry.files) {
|
|
520
|
+
const count = entry.tagDensity[f] || 0;
|
|
521
|
+
if (count > bestCount) {
|
|
522
|
+
bestCount = count;
|
|
523
|
+
bestFile = f;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
entry.primary = bestFile;
|
|
527
|
+
entry.primarySource = 'inferred';
|
|
528
|
+
entry.warnings.push(
|
|
529
|
+
`AC ${acKey} spans ${entry.files.length} files with no @cap-feature(...primary:true) tag — inferred primary: ${bestFile}`
|
|
530
|
+
);
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
return map;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// @cap-api detectOrphans(tags, featureIds) -- Compare tags against Feature Map entries, fuzzy-match hints for orphans.
|
|
537
|
+
// Returns: Array of { tag, hint } where hint is the closest matching feature ID.
|
|
538
|
+
// @cap-todo(ref:AC-15) Orphan tags flagged with fuzzy-match hint suggesting closest existing feature ID
|
|
539
|
+
/**
|
|
540
|
+
* @param {CapTag[]} tags - Array of extracted tags
|
|
541
|
+
* @param {string[]} featureIds - Known feature IDs from Feature Map (e.g., ['F-001', 'F-002'])
|
|
542
|
+
* @returns {{ tag: CapTag, hint: string|null }[]}
|
|
543
|
+
*/
|
|
544
|
+
function detectOrphans(tags, featureIds) {
|
|
545
|
+
const orphans = [];
|
|
546
|
+
const featureSet = new Set(featureIds);
|
|
547
|
+
|
|
548
|
+
for (const tag of tags) {
|
|
549
|
+
const tagFeatureId = tag.metadata.feature;
|
|
550
|
+
if (!tagFeatureId) continue;
|
|
551
|
+
if (featureSet.has(tagFeatureId)) continue;
|
|
552
|
+
|
|
553
|
+
// Fuzzy match: find closest feature ID by Levenshtein-like similarity
|
|
554
|
+
const hint = findClosestMatch(tagFeatureId, featureIds);
|
|
555
|
+
orphans.push({ tag, hint });
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
return orphans;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// @cap-decision Simple character-level distance for fuzzy matching -- no external library needed
|
|
562
|
+
/**
|
|
563
|
+
* Compute edit distance between two strings (Levenshtein).
|
|
564
|
+
* @param {string} a
|
|
565
|
+
* @param {string} b
|
|
566
|
+
* @returns {number}
|
|
567
|
+
*/
|
|
568
|
+
function editDistance(a, b) {
|
|
569
|
+
const la = a.length;
|
|
570
|
+
const lb = b.length;
|
|
571
|
+
const dp = Array.from({ length: la + 1 }, () => Array(lb + 1).fill(0));
|
|
572
|
+
for (let i = 0; i <= la; i++) dp[i][0] = i;
|
|
573
|
+
for (let j = 0; j <= lb; j++) dp[0][j] = j;
|
|
574
|
+
for (let i = 1; i <= la; i++) {
|
|
575
|
+
for (let j = 1; j <= lb; j++) {
|
|
576
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
577
|
+
dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + cost);
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
return dp[la][lb];
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
/**
|
|
584
|
+
* Find the closest matching string from candidates using edit distance.
|
|
585
|
+
* @param {string} target
|
|
586
|
+
* @param {string[]} candidates
|
|
587
|
+
* @returns {string|null}
|
|
588
|
+
*/
|
|
589
|
+
function findClosestMatch(target, candidates) {
|
|
590
|
+
if (candidates.length === 0) return null;
|
|
591
|
+
let bestDist = Infinity;
|
|
592
|
+
let bestMatch = null;
|
|
593
|
+
const lowerTarget = target.toLowerCase();
|
|
594
|
+
for (const candidate of candidates) {
|
|
595
|
+
const dist = editDistance(lowerTarget, candidate.toLowerCase());
|
|
596
|
+
if (dist < bestDist) {
|
|
597
|
+
bestDist = dist;
|
|
598
|
+
bestMatch = candidate;
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
// Only suggest if distance is reasonable (less than half the target length)
|
|
602
|
+
if (bestDist <= Math.ceil(target.length / 2)) return bestMatch;
|
|
603
|
+
return null;
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// @cap-todo(ref:AC-78) /cap:scan shall traverse all packages in a monorepo
|
|
607
|
+
// @cap-todo(ref:AC-93) Zero runtime dependencies -- uses only Node.js built-ins
|
|
608
|
+
// @cap-todo(ref:AC-94) Tag scanner uses native RegExp -- no comment-parser or AST parser
|
|
609
|
+
// @cap-todo(ref:AC-95) File discovery uses fs.readdirSync with recursive walk -- no glob library
|
|
610
|
+
// @cap-todo(ref:AC-96) CLI argument parsing uses existing parseNamedArgs() pattern
|
|
611
|
+
|
|
612
|
+
// @cap-api detectWorkspaces(projectRoot) -- Detects monorepo workspaces from package.json and lerna.json.
|
|
613
|
+
// Returns: { isMonorepo: boolean, packages: string[] }
|
|
614
|
+
/**
|
|
615
|
+
* @param {string} projectRoot - Absolute path to project root
|
|
616
|
+
* @returns {{ isMonorepo: boolean, packages: string[] }}
|
|
617
|
+
*/
|
|
618
|
+
function detectWorkspaces(projectRoot) {
|
|
619
|
+
const result = { isMonorepo: false, packages: [] };
|
|
620
|
+
|
|
621
|
+
// Check package.json workspaces (npm/yarn/pnpm)
|
|
622
|
+
const pkgPath = path.join(projectRoot, 'package.json');
|
|
623
|
+
if (fs.existsSync(pkgPath)) {
|
|
624
|
+
try {
|
|
625
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
|
|
626
|
+
if (pkg.workspaces) {
|
|
627
|
+
result.isMonorepo = true;
|
|
628
|
+
const patterns = Array.isArray(pkg.workspaces)
|
|
629
|
+
? pkg.workspaces
|
|
630
|
+
: (pkg.workspaces.packages || []);
|
|
631
|
+
result.packages = resolveWorkspaceGlobs(projectRoot, patterns);
|
|
632
|
+
}
|
|
633
|
+
} catch (_e) {
|
|
634
|
+
// Malformed package.json
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
// Check pnpm-workspace.yaml
|
|
639
|
+
if (!result.isMonorepo) {
|
|
640
|
+
const pnpmPath = path.join(projectRoot, 'pnpm-workspace.yaml');
|
|
641
|
+
if (fs.existsSync(pnpmPath)) {
|
|
642
|
+
try {
|
|
643
|
+
const content = fs.readFileSync(pnpmPath, 'utf8');
|
|
644
|
+
// Simple YAML parsing for packages array — handles:
|
|
645
|
+
// packages:
|
|
646
|
+
// - "apps/*"
|
|
647
|
+
// - "packages/*"
|
|
648
|
+
const packagesMatch = content.match(/packages:\s*\n((?:\s+-\s*.+\n?)*)/);
|
|
649
|
+
if (packagesMatch) {
|
|
650
|
+
result.isMonorepo = true;
|
|
651
|
+
const patterns = packagesMatch[1]
|
|
652
|
+
.split('\n')
|
|
653
|
+
.map(line => line.replace(/^\s*-\s*['"]?/, '').replace(/['"]?\s*$/, ''))
|
|
654
|
+
.filter(Boolean);
|
|
655
|
+
result.packages = resolveWorkspaceGlobs(projectRoot, patterns);
|
|
656
|
+
}
|
|
657
|
+
} catch (_e) {
|
|
658
|
+
// Malformed pnpm-workspace.yaml
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// Check nx.json (NX workspace — look for project patterns or apps/packages dirs)
|
|
664
|
+
if (!result.isMonorepo) {
|
|
665
|
+
const nxPath = path.join(projectRoot, 'nx.json');
|
|
666
|
+
if (fs.existsSync(nxPath)) {
|
|
667
|
+
try {
|
|
668
|
+
const nx = JSON.parse(fs.readFileSync(nxPath, 'utf8'));
|
|
669
|
+
result.isMonorepo = true;
|
|
670
|
+
// NX may define workspaceLayout or rely on convention (apps/, packages/, libs/)
|
|
671
|
+
const layout = nx.workspaceLayout || {};
|
|
672
|
+
const patterns = [];
|
|
673
|
+
if (layout.appsDir) patterns.push(layout.appsDir + '/*');
|
|
674
|
+
if (layout.libsDir) patterns.push(layout.libsDir + '/*');
|
|
675
|
+
// Fallback: check common NX directories
|
|
676
|
+
if (patterns.length === 0) {
|
|
677
|
+
for (const dir of ['apps', 'packages', 'libs']) {
|
|
678
|
+
if (fs.existsSync(path.join(projectRoot, dir))) {
|
|
679
|
+
patterns.push(dir + '/*');
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
if (patterns.length > 0) {
|
|
684
|
+
result.packages = resolveWorkspaceGlobs(projectRoot, patterns);
|
|
685
|
+
}
|
|
686
|
+
} catch (_e) {
|
|
687
|
+
// Malformed nx.json
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
// Check lerna.json
|
|
693
|
+
if (!result.isMonorepo) {
|
|
694
|
+
const lernaPath = path.join(projectRoot, 'lerna.json');
|
|
695
|
+
if (fs.existsSync(lernaPath)) {
|
|
696
|
+
try {
|
|
697
|
+
const lerna = JSON.parse(fs.readFileSync(lernaPath, 'utf8'));
|
|
698
|
+
result.isMonorepo = true;
|
|
699
|
+
const patterns = lerna.packages || ['packages/*'];
|
|
700
|
+
result.packages = resolveWorkspaceGlobs(projectRoot, patterns);
|
|
701
|
+
} catch (_e) {
|
|
702
|
+
// Malformed lerna.json
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
return result;
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// @cap-api resolveWorkspaceGlobs(projectRoot, patterns) -- Expands workspace glob patterns to actual directories.
|
|
711
|
+
// @cap-decision Uses fs.readdirSync instead of glob library for workspace pattern expansion. Handles only simple patterns (dir/* and dir/**).
|
|
712
|
+
/**
|
|
713
|
+
* @param {string} projectRoot - Absolute path to project root
|
|
714
|
+
* @param {string[]} patterns - Workspace glob patterns (e.g., ["packages/*", "apps/*"])
|
|
715
|
+
* @returns {string[]} - Array of relative package directory paths
|
|
716
|
+
*/
|
|
717
|
+
function resolveWorkspaceGlobs(projectRoot, patterns) {
|
|
718
|
+
const packages = [];
|
|
719
|
+
|
|
720
|
+
for (const pattern of patterns) {
|
|
721
|
+
// Strip trailing glob: "packages/*" -> "packages", "apps/**" -> "apps"
|
|
722
|
+
const baseDir = pattern.replace(/\/\*+$/, '');
|
|
723
|
+
const fullPath = path.join(projectRoot, baseDir);
|
|
724
|
+
|
|
725
|
+
if (!fs.existsSync(fullPath)) continue;
|
|
726
|
+
|
|
727
|
+
const stat = fs.statSync(fullPath);
|
|
728
|
+
if (!stat.isDirectory()) continue;
|
|
729
|
+
|
|
730
|
+
// If pattern has no glob, it is a direct package reference
|
|
731
|
+
if (!pattern.includes('*')) {
|
|
732
|
+
packages.push(baseDir);
|
|
733
|
+
continue;
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
// Enumerate subdirectories
|
|
737
|
+
try {
|
|
738
|
+
const entries = fs.readdirSync(fullPath, { withFileTypes: true });
|
|
739
|
+
for (const entry of entries) {
|
|
740
|
+
if (entry.isDirectory() && !entry.name.startsWith('.')) {
|
|
741
|
+
packages.push(path.join(baseDir, entry.name));
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
} catch (_e) {
|
|
745
|
+
// Skip unreadable directories
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
return packages;
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
// @cap-api scanMonorepo(projectRoot, options) -- Scans all workspace packages in a monorepo for @cap-* tags.
|
|
753
|
+
// @cap-todo(ref:AC-79) Feature Map entries support cross-package file references (e.g., packages/core/src/auth.ts)
|
|
754
|
+
// @cap-todo(ref:AC-80) Works seamlessly with single-repo projects -- returns regular scanDirectory results if not a monorepo
|
|
755
|
+
/**
|
|
756
|
+
* Scans a monorepo or single repo for @cap-* tags.
|
|
757
|
+
* In monorepo mode: scans root + each workspace package.
|
|
758
|
+
* In single-repo mode: delegates to scanDirectory.
|
|
759
|
+
* All file paths are relative to project root for cross-package references.
|
|
760
|
+
*
|
|
761
|
+
* @param {string} projectRoot - Absolute path to project root
|
|
762
|
+
* @param {Object} [options]
|
|
763
|
+
* @param {string[]} [options.extensions] - File extensions to include
|
|
764
|
+
* @param {string[]} [options.exclude] - Directory names to exclude
|
|
765
|
+
* @returns {{ tags: CapTag[], isMonorepo: boolean, packages: string[] }}
|
|
766
|
+
*/
|
|
767
|
+
function scanMonorepo(projectRoot, options = {}) {
|
|
768
|
+
const workspaces = detectWorkspaces(projectRoot);
|
|
769
|
+
|
|
770
|
+
if (!workspaces.isMonorepo) {
|
|
771
|
+
// Single repo -- delegate to base scanner
|
|
772
|
+
const tags = scanDirectory(projectRoot, {
|
|
773
|
+
...options,
|
|
774
|
+
projectRoot,
|
|
775
|
+
});
|
|
776
|
+
return { tags, isMonorepo: false, packages: [] };
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
// Monorepo -- scan root and each package
|
|
780
|
+
const allTags = [];
|
|
781
|
+
const seen = new Set();
|
|
782
|
+
|
|
783
|
+
// Scan root (excludes workspace dirs by default since they are scanned separately)
|
|
784
|
+
const rootTags = scanDirectory(projectRoot, {
|
|
785
|
+
...options,
|
|
786
|
+
projectRoot,
|
|
787
|
+
});
|
|
788
|
+
for (const tag of rootTags) {
|
|
789
|
+
const key = `${tag.file}:${tag.line}`;
|
|
790
|
+
if (!seen.has(key)) {
|
|
791
|
+
seen.add(key);
|
|
792
|
+
allTags.push(tag);
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
// Scan each workspace package
|
|
797
|
+
for (const pkg of workspaces.packages) {
|
|
798
|
+
const pkgDir = path.join(projectRoot, pkg);
|
|
799
|
+
if (!fs.existsSync(pkgDir)) continue;
|
|
800
|
+
|
|
801
|
+
const pkgTags = scanDirectory(pkgDir, {
|
|
802
|
+
...options,
|
|
803
|
+
projectRoot, // Paths relative to monorepo root, not package root
|
|
804
|
+
});
|
|
805
|
+
|
|
806
|
+
for (const tag of pkgTags) {
|
|
807
|
+
const key = `${tag.file}:${tag.line}`;
|
|
808
|
+
if (!seen.has(key)) {
|
|
809
|
+
seen.add(key);
|
|
810
|
+
allTags.push(tag);
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
return { tags: allTags, isMonorepo: true, packages: workspaces.packages };
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
// @cap-api groupByPackage(tags) -- Groups tags by their workspace package based on file path prefix.
|
|
819
|
+
/**
|
|
820
|
+
* @param {CapTag[]} tags - Array of extracted tags
|
|
821
|
+
* @param {string[]} packages - Known workspace package paths
|
|
822
|
+
* @returns {Object<string, CapTag[]>}
|
|
823
|
+
*/
|
|
824
|
+
function groupByPackage(tags, packages) {
|
|
825
|
+
const groups = { '(root)': [] };
|
|
826
|
+
for (const pkg of packages) {
|
|
827
|
+
groups[pkg] = [];
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
for (const tag of tags) {
|
|
831
|
+
let matched = false;
|
|
832
|
+
for (const pkg of packages) {
|
|
833
|
+
if (tag.file.startsWith(pkg + '/') || tag.file.startsWith(pkg + path.sep)) {
|
|
834
|
+
groups[pkg].push(tag);
|
|
835
|
+
matched = true;
|
|
836
|
+
break;
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
if (!matched) {
|
|
840
|
+
groups['(root)'].push(tag);
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
return groups;
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
// @cap-api scanApp(projectRoot, appPath, options) -- Scans a single app directory plus referenced shared packages.
|
|
848
|
+
// When activeApp is set, scans only the active app and shared packages it imports.
|
|
849
|
+
/**
|
|
850
|
+
* @param {string} projectRoot - Absolute path to project root
|
|
851
|
+
* @param {string} appPath - Relative app path (e.g., "apps/flow")
|
|
852
|
+
* @param {Object} [options]
|
|
853
|
+
* @param {string[]} [options.extensions] - File extensions to include
|
|
854
|
+
* @param {string[]} [options.exclude] - Directory names to exclude
|
|
855
|
+
* @returns {{ tags: CapTag[], scannedDirs: string[] }}
|
|
856
|
+
*/
|
|
857
|
+
function scanApp(projectRoot, appPath, options = {}) {
|
|
858
|
+
const appDir = path.join(projectRoot, appPath);
|
|
859
|
+
const scannedDirs = [appPath];
|
|
860
|
+
|
|
861
|
+
// Scan the app directory itself
|
|
862
|
+
const appTags = scanDirectory(appDir, {
|
|
863
|
+
...options,
|
|
864
|
+
projectRoot,
|
|
865
|
+
});
|
|
866
|
+
|
|
867
|
+
const allTags = [...appTags];
|
|
868
|
+
const seen = new Set(appTags.map(t => `${t.file}:${t.line}`));
|
|
869
|
+
|
|
870
|
+
// Detect shared packages referenced by this app via package.json dependencies
|
|
871
|
+
const sharedPkgs = detectSharedPackages(projectRoot, appPath);
|
|
872
|
+
for (const pkg of sharedPkgs) {
|
|
873
|
+
const pkgDir = path.join(projectRoot, pkg);
|
|
874
|
+
if (!fs.existsSync(pkgDir)) continue;
|
|
875
|
+
scannedDirs.push(pkg);
|
|
876
|
+
const pkgTags = scanDirectory(pkgDir, {
|
|
877
|
+
...options,
|
|
878
|
+
projectRoot,
|
|
879
|
+
});
|
|
880
|
+
for (const tag of pkgTags) {
|
|
881
|
+
const key = `${tag.file}:${tag.line}`;
|
|
882
|
+
if (!seen.has(key)) {
|
|
883
|
+
seen.add(key);
|
|
884
|
+
allTags.push(tag);
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
return { tags: allTags, scannedDirs };
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
// @cap-api detectSharedPackages(projectRoot, appPath) -- Detects workspace packages referenced by an app's package.json.
|
|
893
|
+
/**
|
|
894
|
+
* @param {string} projectRoot - Absolute path to project root
|
|
895
|
+
* @param {string} appPath - Relative app path
|
|
896
|
+
* @returns {string[]} - Array of relative paths to shared packages
|
|
897
|
+
*/
|
|
898
|
+
function detectSharedPackages(projectRoot, appPath) {
|
|
899
|
+
const packages = [];
|
|
900
|
+
const appPkgPath = path.join(projectRoot, appPath, 'package.json');
|
|
901
|
+
if (!fs.existsSync(appPkgPath)) return packages;
|
|
902
|
+
|
|
903
|
+
let appPkg;
|
|
904
|
+
try {
|
|
905
|
+
appPkg = JSON.parse(fs.readFileSync(appPkgPath, 'utf8'));
|
|
906
|
+
} catch (_e) {
|
|
907
|
+
return packages;
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
// Collect all dependency names
|
|
911
|
+
const allDeps = Object.keys(appPkg.dependencies || {}).concat(Object.keys(appPkg.devDependencies || {}));
|
|
912
|
+
|
|
913
|
+
// Resolve workspace packages -- check if any dep matches a workspace package name
|
|
914
|
+
const workspaces = detectWorkspaces(projectRoot);
|
|
915
|
+
if (!workspaces.isMonorepo) return packages;
|
|
916
|
+
|
|
917
|
+
for (const wsPkg of workspaces.packages) {
|
|
918
|
+
const wsPkgJsonPath = path.join(projectRoot, wsPkg, 'package.json');
|
|
919
|
+
if (!fs.existsSync(wsPkgJsonPath)) continue;
|
|
920
|
+
try {
|
|
921
|
+
const wsPkgJson = JSON.parse(fs.readFileSync(wsPkgJsonPath, 'utf8'));
|
|
922
|
+
if (wsPkgJson.name && allDeps.includes(wsPkgJson.name)) {
|
|
923
|
+
packages.push(wsPkg);
|
|
924
|
+
}
|
|
925
|
+
} catch (_e) {
|
|
926
|
+
// Skip malformed
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
return packages;
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
// =====================================================================
|
|
934
|
+
// F-046: Polylingual comment-context detection
|
|
935
|
+
// =====================================================================
|
|
936
|
+
//
|
|
937
|
+
// @cap-feature(feature:F-046, primary:true) Strengthen Polylingual Comment-Token Detection in Tag Scanner
|
|
938
|
+
// @cap-decision Comment-style table is extension-driven (per-language) rather than heuristic — extensions are deterministic, low-risk, and match how editors highlight code. A heuristic (e.g., shebang-sniffing) would over-trigger on polyglot files like .md with embedded code blocks.
|
|
939
|
+
// @cap-decision Backward-compat strategy: keep `extractTags(content, file) -> CapTag[]` legacy shape (Option A from spec) and add a new `extractTagsWithContext(content, file) -> { tags, warnings }`. F-046/AC-5 requires JS/TS callsites to be untouched, and this avoids churning ~30 callers.
|
|
940
|
+
// @cap-decision Comment-context detection is implemented as an in-place line-by-line state machine rather than a tokenizer or AST. The scanner has been regex-based since F-001; adopting a tokenizer for one feature would balloon scope and add maintenance burden. The state machine handles 95%+ of real-world cases (line + block comments, multi-line block tracking) with ~80 lines of logic.
|
|
941
|
+
// @cap-risk Edge cases not covered: nested string-quote inside block comment (e.g., `# "@cap-feature" still in code`), here-docs in shell, raw strings in Python (r"@cap..."), C++ raw string literals R"(@cap)". These are extremely rare for tag-bearing files and would require a real lexer to handle correctly. The warning system in AC-3 catches most false positives; AC-4's --strict mode is the safety net for CI.
|
|
942
|
+
// @cap-risk Unrecognized extensions fall back to "treat as JS-style line + block comments" so behavior is at least no worse than today. Documented below at COMMENT_STYLES_DEFAULT.
|
|
943
|
+
// @cap-feature(feature:F-046, ac:F-046/AC-3) String-literal awareness — classifyTagContext now tracks string state alongside comment state. A line like `const x = "// @cap-feature(F-999) fake"` is correctly classified as a string-literal context, the @cap-* token is NOT extracted as a tag, and a structured warning is emitted instead. Implementation: STRING_STYLES per-extension table, _matchStringOpen / _findStringClose helpers, and string-state extension to blockState carried across lines (Python triple-quotes, TOML triple-quotes, Rust raw strings, JS template literals all multi-line capable). See tests/cap-tag-scanner-polylingual-adversarial.test.cjs `'F-046/AC-3 string literal containing comment token is correctly rejected'` for the inverted witness tests that pin the fix.
|
|
944
|
+
|
|
945
|
+
/**
|
|
946
|
+
* @typedef {Object} CommentStyle
|
|
947
|
+
* @property {string[]} line - Line-comment tokens (e.g., ["//"])
|
|
948
|
+
* @property {Array<[string,string]>} block - Block-comment open/close pairs (e.g., [["/*", "*\/"]])
|
|
949
|
+
*/
|
|
950
|
+
|
|
951
|
+
// @cap-todo(ac:F-046/AC-1) Per-extension comment style table covering Python, Ruby, Shell, Go, Rust, HTML, CSS in addition to JS/TS.
|
|
952
|
+
// Order within `line` matters: longer tokens must come first so that `///` matches before `//`.
|
|
953
|
+
/** @type {Object<string, CommentStyle>} */
|
|
954
|
+
const COMMENT_STYLES = {
|
|
955
|
+
// JS / TS family — preserved from existing behavior (AC-5).
|
|
956
|
+
'.js': { line: ['//'], block: [['/*', '*/']] },
|
|
957
|
+
'.cjs': { line: ['//'], block: [['/*', '*/']] },
|
|
958
|
+
'.mjs': { line: ['//'], block: [['/*', '*/']] },
|
|
959
|
+
'.ts': { line: ['//'], block: [['/*', '*/']] },
|
|
960
|
+
'.tsx': { line: ['//'], block: [['/*', '*/']] },
|
|
961
|
+
'.jsx': { line: ['//'], block: [['/*', '*/']] },
|
|
962
|
+
// Python — line `#`; block via triple-quoted strings (used as docstring comments).
|
|
963
|
+
'.py': { line: ['#'], block: [['"""', '"""'], ["'''", "'''"]] },
|
|
964
|
+
// Ruby — line `#`; block via =begin/=end.
|
|
965
|
+
'.rb': { line: ['#'], block: [['=begin', '=end']] },
|
|
966
|
+
// Shell family — line `#` only.
|
|
967
|
+
'.sh': { line: ['#'], block: [] },
|
|
968
|
+
'.bash': { line: ['#'], block: [] },
|
|
969
|
+
'.zsh': { line: ['#'], block: [] },
|
|
970
|
+
// Go — same as JS family.
|
|
971
|
+
'.go': { line: ['//'], block: [['/*', '*/']] },
|
|
972
|
+
// Rust — `///` doc-comment must be matched before `//`.
|
|
973
|
+
'.rs': { line: ['///', '//'], block: [['/*', '*/']] },
|
|
974
|
+
// HTML / Markdown HTML comments — block only.
|
|
975
|
+
'.html': { line: [], block: [['<!--', '-->']] },
|
|
976
|
+
'.htm': { line: [], block: [['<!--', '-->']] },
|
|
977
|
+
'.md': { line: [], block: [['<!--', '-->']] },
|
|
978
|
+
// CSS / SCSS — block always; SCSS adds line comments.
|
|
979
|
+
'.css': { line: [], block: [['/*', '*/']] },
|
|
980
|
+
'.scss': { line: ['//'], block: [['/*', '*/']] },
|
|
981
|
+
// YAML / TOML — line `#` only.
|
|
982
|
+
'.yaml': { line: ['#'], block: [] },
|
|
983
|
+
'.yml': { line: ['#'], block: [] },
|
|
984
|
+
'.toml': { line: ['#'], block: [] },
|
|
985
|
+
// SQL / Lua — line `--`.
|
|
986
|
+
'.sql': { line: ['--'], block: [['/*', '*/']] },
|
|
987
|
+
// C / C++ / Java — same as JS family.
|
|
988
|
+
'.java': { line: ['//'], block: [['/*', '*/']] },
|
|
989
|
+
'.c': { line: ['//'], block: [['/*', '*/']] },
|
|
990
|
+
'.cpp': { line: ['//'], block: [['/*', '*/']] },
|
|
991
|
+
'.h': { line: ['//'], block: [['/*', '*/']] },
|
|
992
|
+
'.hpp': { line: ['//'], block: [['/*', '*/']] },
|
|
993
|
+
};
|
|
994
|
+
|
|
995
|
+
// @cap-decision Default fallback for unrecognized extensions: assume JS-style. This is the safest non-breaking default — files we don't know about will behave exactly as they did before F-046 (regex-only).
|
|
996
|
+
/** @type {CommentStyle} */
|
|
997
|
+
const COMMENT_STYLES_DEFAULT = { line: ['//', '#', '--'], block: [['/*', '*/'], ['"""', '"""'], ["'''", "'''"], ['<!--', '-->'], ['=begin', '=end']] };
|
|
998
|
+
|
|
999
|
+
/**
|
|
1000
|
+
* Pick the comment style for a file path based on its extension.
|
|
1001
|
+
* @param {string} filePath
|
|
1002
|
+
* @returns {CommentStyle}
|
|
1003
|
+
*/
|
|
1004
|
+
function getCommentStyle(filePath) {
|
|
1005
|
+
const ext = path.extname(filePath || '').toLowerCase();
|
|
1006
|
+
return COMMENT_STYLES[ext] || COMMENT_STYLES_DEFAULT;
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
// =====================================================================
|
|
1010
|
+
// F-046/AC-3 — String-literal awareness
|
|
1011
|
+
// =====================================================================
|
|
1012
|
+
//
|
|
1013
|
+
// @cap-feature(feature:F-046) String-state tracker — prevents @cap-* tokens INSIDE string literals from being misclassified as comments. Resolves the AC-3 bug pinned by adversarial tests.
|
|
1014
|
+
// @cap-decision String-state lives in the same blockState object as comment-state, walked synchronously by classifyTagContext. A separate pass would double the asymptotic work and require keeping two parallel cursors in sync; one walker that checks string-open BEFORE comment-open at each position is simpler and provably correct.
|
|
1015
|
+
// @cap-decision Per-language STRING_STYLES table — same shape philosophy as COMMENT_STYLES. Order within the array matters: longer / more-specific tokens (triple-quotes, raw-string prefixes like r" or r#") must be listed before their substring counterparts.
|
|
1016
|
+
// @cap-risk(out-of-scope) Ruby `<<~END` heredocs and Shell `<< EOF` heredocs are NOT tracked. The body of a heredoc is plain text but the scanner sees it as code. Documented limitation; pinned by adversarial tests `'heredocs and multi-line strings (current behaviour)'`. A real fix requires tokenizing the heredoc-introducer syntax, which is non-trivial (delimiter is identifier-defined, can be quoted or unquoted, can be `<<~` for indent-stripping). Out of scope for this iteration.
|
|
1017
|
+
// @cap-risk(out-of-scope) Rust nested `/* /* */ */` block comments still close on the first `*/`. Same documented limitation as before F-046/AC-3 fix — nesting requires a depth counter, separate from string-state.
|
|
1018
|
+
// @cap-risk(out-of-scope) Markdown ```code fences``` are NOT understood as comments-or-strings. A tag inside a fenced code block is treated as a plain prose mention and emits a warning. Documented in adversarial test `'Markdown code fences are NOT understood'`.
|
|
1019
|
+
|
|
1020
|
+
/**
|
|
1021
|
+
* @typedef {Object} StringSyntax
|
|
1022
|
+
* @property {string} open - Opening token (e.g., '"', "'", '"""', 'r#"').
|
|
1023
|
+
* @property {string} close - Closing token. For raw strings with hash counts (r#"..."#), the runtime computes the actual close from the open.
|
|
1024
|
+
* @property {boolean} escapes - When true, backslash escapes the next character; when false (raw strings, shell single-quotes, Python r"..."), the backslash is literal.
|
|
1025
|
+
* @property {boolean} multiline - When true, the string can span multiple lines (Python """, TOML ''', etc).
|
|
1026
|
+
* @property {boolean} [rustRaw] - Special-case marker for Rust r#"..."# raw strings whose close depends on hash count of open.
|
|
1027
|
+
*/
|
|
1028
|
+
|
|
1029
|
+
// @cap-feature(feature:F-046) Per-extension string syntax table — used by classifyTagContext to detect when the cursor enters a string literal so comment-token matches inside the string are ignored.
|
|
1030
|
+
// @cap-decision Order matters: longer / prefixed tokens come first so `"""` matches before `"`, `r"..."` matches before `"..."`. Otherwise the shorter token would consume the prefix and misclassify.
|
|
1031
|
+
/** @type {Object<string, StringSyntax[]>} */
|
|
1032
|
+
const STRING_STYLES = {
|
|
1033
|
+
// JS / TS family — double, single, and template literals (backtick treated as plain string; interpolation NOT tracked).
|
|
1034
|
+
'.js': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
|
|
1035
|
+
'.cjs': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
|
|
1036
|
+
'.mjs': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
|
|
1037
|
+
'.ts': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
|
|
1038
|
+
'.tsx': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
|
|
1039
|
+
'.jsx': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
|
|
1040
|
+
// Python — single-line strings only here. Triple-quoted strings are treated as BLOCK COMMENTS via COMMENT_STYLES['.py'] for docstring compatibility (this matches Python convention where """...""" at module/function/class level is the docstring).
|
|
1041
|
+
// @cap-decision Triple-quoted strings are NOT in Python STRING_STYLES — they remain in COMMENT_STYLES.block to preserve the F-046/AC-1 contract that Python docstrings carry tags. Edge case: a triple-quoted string used as a literal value (e.g., `s = """hello"""`) is misclassified as a comment, but this is the existing behavior the original tests pin (see `'Python inline triple-quote'` test).
|
|
1042
|
+
'.py': [
|
|
1043
|
+
// Prefixed strings come BEFORE plain strings so `r"..."` matches before `"..."`.
|
|
1044
|
+
{ open: 'rb"', close: '"', escapes: false, multiline: false, isRaw: true },
|
|
1045
|
+
{ open: "rb'", close: "'", escapes: false, multiline: false, isRaw: true },
|
|
1046
|
+
{ open: 'br"', close: '"', escapes: false, multiline: false, isRaw: true },
|
|
1047
|
+
{ open: "br'", close: "'", escapes: false, multiline: false, isRaw: true },
|
|
1048
|
+
{ open: 'r"', close: '"', escapes: false, multiline: false, isRaw: true },
|
|
1049
|
+
{ open: "r'", close: "'", escapes: false, multiline: false, isRaw: true },
|
|
1050
|
+
{ open: 'b"', close: '"', escapes: true, multiline: false },
|
|
1051
|
+
{ open: "b'", close: "'", escapes: true, multiline: false },
|
|
1052
|
+
{ open: 'f"', close: '"', escapes: true, multiline: false },
|
|
1053
|
+
{ open: "f'", close: "'", escapes: true, multiline: false },
|
|
1054
|
+
{ open: '"', close: '"', escapes: true, multiline: false },
|
|
1055
|
+
{ open: "'", close: "'", escapes: true, multiline: false },
|
|
1056
|
+
],
|
|
1057
|
+
// Ruby — double + single. Heredocs NOT tracked (see @cap-risk above).
|
|
1058
|
+
'.rb': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }],
|
|
1059
|
+
// Shell — double, single (no escapes in single-quoted), backtick command substitution. Heredocs NOT tracked.
|
|
1060
|
+
'.sh': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }, { open: '`', close: '`', escapes: true, multiline: false }],
|
|
1061
|
+
'.bash': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }, { open: '`', close: '`', escapes: true, multiline: false }],
|
|
1062
|
+
'.zsh': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }, { open: '`', close: '`', escapes: true, multiline: false }],
|
|
1063
|
+
// Go — double, single (rune literal), backtick raw string.
|
|
1064
|
+
'.go': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: false, multiline: true }],
|
|
1065
|
+
// Rust — raw strings with hash counts handled specially. r#"..."#, r##"..."##, etc.
|
|
1066
|
+
'.rs': [
|
|
1067
|
+
{ open: 'r#"', close: '"#', escapes: false, multiline: true, rustRaw: true },
|
|
1068
|
+
{ open: 'r"', close: '"', escapes: false, multiline: true, isRaw: true },
|
|
1069
|
+
{ open: 'b"', close: '"', escapes: true, multiline: false },
|
|
1070
|
+
{ open: '"', close: '"', escapes: true, multiline: true },
|
|
1071
|
+
// Char literals 'x' — single quotes in Rust are char literals, but treating them as 1-char strings is fine for our purposes.
|
|
1072
|
+
{ open: "'", close: "'", escapes: true, multiline: false },
|
|
1073
|
+
],
|
|
1074
|
+
// HTML — attribute strings inside tags. Treat anywhere as string for our purposes (over-flag is acceptable).
|
|
1075
|
+
'.html': [{ open: '"', close: '"', escapes: false, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }],
|
|
1076
|
+
'.htm': [{ open: '"', close: '"', escapes: false, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }],
|
|
1077
|
+
// Markdown — no string literals natively; leave empty so prose is not treated as string.
|
|
1078
|
+
'.md': [],
|
|
1079
|
+
// CSS / SCSS — both quote styles.
|
|
1080
|
+
'.css': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
|
|
1081
|
+
'.scss': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
|
|
1082
|
+
// YAML — both quote styles. Single-quote escape via doubling NOT tracked exactly; over-flag is acceptable.
|
|
1083
|
+
'.yaml': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }],
|
|
1084
|
+
'.yml': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }],
|
|
1085
|
+
// TOML — triple-quote multiline first, then plain.
|
|
1086
|
+
'.toml': [
|
|
1087
|
+
{ open: '"""', close: '"""', escapes: true, multiline: true },
|
|
1088
|
+
{ open: "'''", close: "'''", escapes: false, multiline: true },
|
|
1089
|
+
{ open: '"', close: '"', escapes: true, multiline: false },
|
|
1090
|
+
{ open: "'", close: "'", escapes: false, multiline: false },
|
|
1091
|
+
],
|
|
1092
|
+
// SQL — single-quote string with doubled-quote escape. Treat as escape-aware for simplicity.
|
|
1093
|
+
'.sql': [{ open: "'", close: "'", escapes: true, multiline: false }, { open: '"', close: '"', escapes: true, multiline: false }],
|
|
1094
|
+
// C / C++ / Java — double for string, single for char.
|
|
1095
|
+
'.java': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
|
|
1096
|
+
'.c': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
|
|
1097
|
+
'.cpp': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
|
|
1098
|
+
'.h': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
|
|
1099
|
+
'.hpp': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
|
|
1100
|
+
};
|
|
1101
|
+
|
|
1102
|
+
// @cap-decision Default string-style fallback for unknown extensions: double + single quotes with escape handling. Matches behavior of nearly every C-family language. Files of unknown type are over-flagged rather than under-flagged (safer).
|
|
1103
|
+
/** @type {StringSyntax[]} */
|
|
1104
|
+
const STRING_STYLES_DEFAULT = [
|
|
1105
|
+
{ open: '"', close: '"', escapes: true, multiline: false },
|
|
1106
|
+
{ open: "'", close: "'", escapes: true, multiline: false },
|
|
1107
|
+
];
|
|
1108
|
+
|
|
1109
|
+
/**
|
|
1110
|
+
* Pick the string-syntax table for a file path based on its extension.
|
|
1111
|
+
* @param {string} filePath
|
|
1112
|
+
* @returns {StringSyntax[]}
|
|
1113
|
+
*/
|
|
1114
|
+
function getStringStyle(filePath) {
|
|
1115
|
+
const ext = path.extname(filePath || '').toLowerCase();
|
|
1116
|
+
return STRING_STYLES[ext] || STRING_STYLES_DEFAULT;
|
|
1117
|
+
}
|
|
1118
|
+
|
|
1119
|
+
/**
|
|
1120
|
+
* Try to match any string-open token at position `i` in `line`.
|
|
1121
|
+
* Returns the matched StringSyntax + the actual close token (computed for Rust raw r##"..."##),
|
|
1122
|
+
* or null if no string opens at this position.
|
|
1123
|
+
*
|
|
1124
|
+
* For Rust r##"..."##: counts the run of `#` characters after `r` and computes the close as `"` + same count of `#`.
|
|
1125
|
+
*
|
|
1126
|
+
* @param {StringSyntax[]} stringStyle
|
|
1127
|
+
* @param {string} line
|
|
1128
|
+
* @param {number} i
|
|
1129
|
+
* @returns {{ syntax: StringSyntax, openLen: number, close: string } | null}
|
|
1130
|
+
*/
|
|
1131
|
+
function _matchStringOpen(stringStyle, line, i) {
|
|
1132
|
+
for (const syn of stringStyle) {
|
|
1133
|
+
if (syn.rustRaw) {
|
|
1134
|
+
// Rust r#"..."# / r##"..."## / etc. Match `r` followed by 1+ `#` followed by `"`.
|
|
1135
|
+
if (line[i] !== 'r') continue;
|
|
1136
|
+
let j = i + 1;
|
|
1137
|
+
let hashCount = 0;
|
|
1138
|
+
while (j < line.length && line[j] === '#') { hashCount++; j++; }
|
|
1139
|
+
if (hashCount === 0) continue; // Need at least one `#` to be the rustRaw form.
|
|
1140
|
+
if (line[j] !== '"') continue;
|
|
1141
|
+
const openLen = j - i + 1; // r + N# + "
|
|
1142
|
+
const close = '"' + '#'.repeat(hashCount);
|
|
1143
|
+
return { syntax: syn, openLen, close };
|
|
1144
|
+
}
|
|
1145
|
+
if (line.startsWith(syn.open, i)) {
|
|
1146
|
+
return { syntax: syn, openLen: syn.open.length, close: syn.close };
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1149
|
+
return null;
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
/**
|
|
1153
|
+
* Find the index where the currently open string closes, starting from `i`.
|
|
1154
|
+
* Honors escape rules per syntax. Returns -1 if the string does not close on this line.
|
|
1155
|
+
*
|
|
1156
|
+
* @param {string} line
|
|
1157
|
+
* @param {number} i - Position to start searching (just past the open token)
|
|
1158
|
+
* @param {string} close - Close token to find
|
|
1159
|
+
* @param {boolean} escapes - Whether backslash escapes the next char
|
|
1160
|
+
* @returns {number} - Index of the close token, or -1 if not found on this line
|
|
1161
|
+
*/
|
|
1162
|
+
function _findStringClose(line, i, close, escapes) {
|
|
1163
|
+
let j = i;
|
|
1164
|
+
const n = line.length;
|
|
1165
|
+
while (j < n) {
|
|
1166
|
+
if (escapes && line[j] === '\\' && j + 1 < n) {
|
|
1167
|
+
// Skip escaped character.
|
|
1168
|
+
j += 2;
|
|
1169
|
+
continue;
|
|
1170
|
+
}
|
|
1171
|
+
if (line.startsWith(close, j)) {
|
|
1172
|
+
return j;
|
|
1173
|
+
}
|
|
1174
|
+
j++;
|
|
1175
|
+
}
|
|
1176
|
+
return -1;
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
/**
|
|
1180
|
+
* Find the longest matching syntax token at position `i` across {block-comment-open, string-open, line-comment}.
|
|
1181
|
+
* Longest-match wins so e.g. Python `"""` (block-comment) beats `"` (string-open).
|
|
1182
|
+
* Equal-length ties: block-comment > string > line-comment (block syntax is the more intentional construct).
|
|
1183
|
+
*
|
|
1184
|
+
* Returns one of:
|
|
1185
|
+
* { kind: 'blockComment', open, close, length }
|
|
1186
|
+
* { kind: 'string', syntax, openLen, close, length }
|
|
1187
|
+
* { kind: 'lineComment', token, length }
|
|
1188
|
+
* null if nothing matches at i.
|
|
1189
|
+
*
|
|
1190
|
+
* @param {CommentStyle} style
|
|
1191
|
+
* @param {StringSyntax[]} stringStyle
|
|
1192
|
+
* @param {string} line
|
|
1193
|
+
* @param {number} i
|
|
1194
|
+
*/
|
|
1195
|
+
function _longestTokenMatch(style, stringStyle, line, i) {
|
|
1196
|
+
let best = null;
|
|
1197
|
+
|
|
1198
|
+
// Block-comment open candidates.
|
|
1199
|
+
for (const pair of style.block) {
|
|
1200
|
+
const [open, close] = pair;
|
|
1201
|
+
if (line.startsWith(open, i)) {
|
|
1202
|
+
const candidate = { kind: 'blockComment', open, close, length: open.length, priority: 3 };
|
|
1203
|
+
if (!best || candidate.length > best.length || (candidate.length === best.length && candidate.priority > best.priority)) {
|
|
1204
|
+
best = candidate;
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1209
|
+
// String-open candidates.
|
|
1210
|
+
const strOpen = _matchStringOpen(stringStyle, line, i);
|
|
1211
|
+
if (strOpen) {
|
|
1212
|
+
const candidate = { kind: 'string', syntax: strOpen.syntax, openLen: strOpen.openLen, close: strOpen.close, length: strOpen.openLen, priority: 2 };
|
|
1213
|
+
if (!best || candidate.length > best.length || (candidate.length === best.length && candidate.priority > best.priority)) {
|
|
1214
|
+
best = candidate;
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
// Line-comment candidates.
|
|
1219
|
+
for (const lt of style.line) {
|
|
1220
|
+
if (line.startsWith(lt, i)) {
|
|
1221
|
+
const candidate = { kind: 'lineComment', token: lt, length: lt.length, priority: 1 };
|
|
1222
|
+
if (!best || candidate.length > best.length || (candidate.length === best.length && candidate.priority > best.priority)) {
|
|
1223
|
+
best = candidate;
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
return best;
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
/**
|
|
1232
|
+
* @typedef {Object} ClassifyResult
|
|
1233
|
+
* @property {('comment'|'string'|'code'|'unknown')} context - Where the @cap-* token was found
|
|
1234
|
+
* @property {string} reason - Short human-readable reason ("python triple-quote block", "JS line comment", "outside any comment")
|
|
1235
|
+
*/
|
|
1236
|
+
|
|
1237
|
+
// @cap-todo(ac:F-046/AC-3) classifyTagContext returns 'comment' when the tag column is inside a recognized comment, 'string' when inside a string literal, else 'code' (both 'string' and 'code' are warning candidates).
|
|
1238
|
+
// @cap-feature(feature:F-046) classifyTagContext is string-state aware — at each cursor position it checks string-open BEFORE comment-open so a `// @cap-...` token inside `"..."` is correctly classified as a string-literal context, not a comment.
|
|
1239
|
+
/**
|
|
1240
|
+
* Classify whether `tagColumn` in `lineContent` is inside a comment, a string, or code.
|
|
1241
|
+
* The caller maintains `blockState` across lines so multi-line block comments AND multi-line strings
|
|
1242
|
+
* (Python triple-quotes, TOML triple-quotes, Rust raw strings) are tracked.
|
|
1243
|
+
*
|
|
1244
|
+
* Walker order at each position i (in priority order):
|
|
1245
|
+
* 1. Carried-over block comment (from a previous line) — look for its close.
|
|
1246
|
+
* 2. Carried-over multi-line string (from a previous line) — look for its close.
|
|
1247
|
+
* 3. String-open token at i — enter string mode.
|
|
1248
|
+
* 4. Line-comment token at i — rest of line is comment.
|
|
1249
|
+
* 5. Block-comment open token at i — enter block mode.
|
|
1250
|
+
*
|
|
1251
|
+
* String-open is checked BEFORE comment-open because a `// @cap-...` inside `"..."` should be
|
|
1252
|
+
* classified as string, not comment.
|
|
1253
|
+
*
|
|
1254
|
+
* @param {CommentStyle} style
|
|
1255
|
+
* @param {string} lineContent - Full line text
|
|
1256
|
+
* @param {number} tagColumn - 0-based column of the @cap-... match
|
|
1257
|
+
* @param {{ open: [string,string]|null, stringClose: string|null, stringEscapes: boolean, stringOpenToken: string|null }} blockState - Mutable block-comment + string state across lines
|
|
1258
|
+
* @param {StringSyntax[]} [stringStyle] - Optional string syntax table (defaults derived from style if provided as ['filePath', '...'])
|
|
1259
|
+
* @returns {ClassifyResult}
|
|
1260
|
+
*/
|
|
1261
|
+
function classifyTagContext(style, lineContent, tagColumn, blockState, stringStyle) {
|
|
1262
|
+
// Default string style: empty (no string detection) — preserves backward compat for callers
|
|
1263
|
+
// that pre-date AC-3 and pass only 4 args.
|
|
1264
|
+
const ss = Array.isArray(stringStyle) ? stringStyle : [];
|
|
1265
|
+
|
|
1266
|
+
let i = 0;
|
|
1267
|
+
const n = lineContent.length;
|
|
1268
|
+
|
|
1269
|
+
while (i <= tagColumn && i < n) {
|
|
1270
|
+
// 1) Carried-over block comment from a previous line.
|
|
1271
|
+
if (blockState.open) {
|
|
1272
|
+
const [, close] = blockState.open;
|
|
1273
|
+
const closeIdx = lineContent.indexOf(close, i);
|
|
1274
|
+
if (closeIdx === -1) {
|
|
1275
|
+
if (tagColumn >= i) {
|
|
1276
|
+
return { context: 'comment', reason: `inside block comment ${blockState.open[0]}...${blockState.open[1]}` };
|
|
1277
|
+
}
|
|
1278
|
+
return { context: 'comment', reason: 'inside multi-line block comment' };
|
|
1279
|
+
}
|
|
1280
|
+
if (tagColumn < closeIdx) {
|
|
1281
|
+
return { context: 'comment', reason: `inside block comment ${blockState.open[0]}...${blockState.open[1]}` };
|
|
1282
|
+
}
|
|
1283
|
+
i = closeIdx + close.length;
|
|
1284
|
+
blockState.open = null;
|
|
1285
|
+
continue;
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
// 2) Carried-over multi-line string from a previous line.
|
|
1289
|
+
if (blockState.stringClose) {
|
|
1290
|
+
const close = blockState.stringClose;
|
|
1291
|
+
const escapes = !!blockState.stringEscapes;
|
|
1292
|
+
const closeIdx = _findStringClose(lineContent, i, close, escapes);
|
|
1293
|
+
if (closeIdx === -1) {
|
|
1294
|
+
// String stays open through end of line. tagColumn is inside the string.
|
|
1295
|
+
if (tagColumn >= i) {
|
|
1296
|
+
return { context: 'string', reason: `inside multi-line string literal ${blockState.stringOpenToken || ''}...${close}` };
|
|
1297
|
+
}
|
|
1298
|
+
return { context: 'string', reason: 'inside multi-line string literal' };
|
|
1299
|
+
}
|
|
1300
|
+
if (tagColumn < closeIdx) {
|
|
1301
|
+
return { context: 'string', reason: `inside multi-line string literal ${blockState.stringOpenToken || ''}...${close}` };
|
|
1302
|
+
}
|
|
1303
|
+
// String closes before tagColumn. Clear state and continue past the close.
|
|
1304
|
+
i = closeIdx + close.length;
|
|
1305
|
+
blockState.stringClose = null;
|
|
1306
|
+
blockState.stringEscapes = false;
|
|
1307
|
+
blockState.stringOpenToken = null;
|
|
1308
|
+
continue;
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
// 3) Find the longest matching token at i across {block-comment-open, string-open, line-comment}.
|
|
1312
|
+
// Longest-match wins so e.g. Python `"""` (block-comment) beats `"` (string-open).
|
|
1313
|
+
// Equal-length ties prefer block-comment over string over line-comment (block syntax tends to be the more intentional construct).
|
|
1314
|
+
const tokenMatch = _longestTokenMatch(style, ss, lineContent, i);
|
|
1315
|
+
|
|
1316
|
+
if (tokenMatch && tokenMatch.kind === 'string') {
|
|
1317
|
+
const strOpen = tokenMatch;
|
|
1318
|
+
const startCol = i;
|
|
1319
|
+
const afterOpen = i + strOpen.openLen;
|
|
1320
|
+
const closeIdx = _findStringClose(lineContent, afterOpen, strOpen.close, strOpen.syntax.escapes);
|
|
1321
|
+
if (closeIdx === -1) {
|
|
1322
|
+
if (strOpen.syntax.multiline) {
|
|
1323
|
+
blockState.stringClose = strOpen.close;
|
|
1324
|
+
blockState.stringEscapes = strOpen.syntax.escapes;
|
|
1325
|
+
blockState.stringOpenToken = strOpen.syntax.open;
|
|
1326
|
+
}
|
|
1327
|
+
if (tagColumn >= startCol) {
|
|
1328
|
+
return { context: 'string', reason: `inside string literal ${strOpen.syntax.open}...${strOpen.close}` };
|
|
1329
|
+
}
|
|
1330
|
+
return { context: 'string', reason: 'inside string literal' };
|
|
1331
|
+
}
|
|
1332
|
+
if (tagColumn >= startCol && tagColumn < closeIdx + strOpen.close.length) {
|
|
1333
|
+
return { context: 'string', reason: `inside string literal ${strOpen.syntax.open}...${strOpen.close}` };
|
|
1334
|
+
}
|
|
1335
|
+
i = closeIdx + strOpen.close.length;
|
|
1336
|
+
continue;
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
if (tokenMatch && tokenMatch.kind === 'lineComment') {
|
|
1340
|
+
if (i <= tagColumn) {
|
|
1341
|
+
return { context: 'comment', reason: `line comment ${tokenMatch.token}` };
|
|
1342
|
+
}
|
|
1343
|
+
return { context: 'comment', reason: 'line comment' };
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
if (tokenMatch && tokenMatch.kind === 'blockComment') {
|
|
1347
|
+
const open = tokenMatch.open;
|
|
1348
|
+
const close = tokenMatch.close;
|
|
1349
|
+
const closeIdx = lineContent.indexOf(close, i + open.length);
|
|
1350
|
+
if (closeIdx === -1) {
|
|
1351
|
+
blockState.open = [open, close];
|
|
1352
|
+
if (tagColumn >= i) {
|
|
1353
|
+
return { context: 'comment', reason: `inside block comment ${open}...${close}` };
|
|
1354
|
+
}
|
|
1355
|
+
return { context: 'comment', reason: `inside block comment ${open}...${close}` };
|
|
1356
|
+
}
|
|
1357
|
+
if (tagColumn >= i && tagColumn < closeIdx + close.length) {
|
|
1358
|
+
return { context: 'comment', reason: `block comment ${open}...${close}` };
|
|
1359
|
+
}
|
|
1360
|
+
i = closeIdx + close.length;
|
|
1361
|
+
continue;
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1364
|
+
// 4) No special token at i. Advance one char.
|
|
1365
|
+
i++;
|
|
1366
|
+
}
|
|
1367
|
+
|
|
1368
|
+
// Cursor walked past tagColumn without entering any comment or string — tag is in code.
|
|
1369
|
+
return { context: 'code', reason: 'outside any comment' };
|
|
1370
|
+
}
|
|
1371
|
+
|
|
1372
|
+
/**
|
|
1373
|
+
* @typedef {Object} ScannerWarning
|
|
1374
|
+
* @property {string} file - Relative file path
|
|
1375
|
+
* @property {number} line - 1-based line number
|
|
1376
|
+
* @property {number} column - 0-based column index of the @cap-* token
|
|
1377
|
+
* @property {string} reason - Human-readable reason the tag was rejected
|
|
1378
|
+
* @property {string} raw - Full original line text
|
|
1379
|
+
*/
|
|
1380
|
+
|
|
1381
|
+
// @cap-todo(ac:F-046/AC-1) extractTagsWithContext is the polylingual entry point — same regex match as legacy extractTags, but each match is verified to land inside a real comment.
|
|
1382
|
+
// @cap-todo(ac:F-046/AC-3) Tags found outside comments are not parsed; they appear in `warnings` instead so callers (and CI in --strict mode) can surface them.
|
|
1383
|
+
/**
|
|
1384
|
+
* Polylingual extraction. Detects per-line `@cap-...` matches anywhere on the line, then verifies
|
|
1385
|
+
* each match sits inside a recognized comment context for the file's extension.
|
|
1386
|
+
*
|
|
1387
|
+
* Tags inside comments are emitted as CapTag (same shape as `extractTags`).
|
|
1388
|
+
* Tags outside any comment are emitted as `warnings` and NOT parsed as tags.
|
|
1389
|
+
*
|
|
1390
|
+
* @param {string} content
|
|
1391
|
+
* @param {string} filePath
|
|
1392
|
+
* @returns {{ tags: CapTag[], warnings: ScannerWarning[] }}
|
|
1393
|
+
*/
|
|
1394
|
+
function extractTagsWithContext(content, filePath) {
|
|
1395
|
+
const style = getCommentStyle(filePath);
|
|
1396
|
+
const stringStyle = getStringStyle(filePath);
|
|
1397
|
+
const lines = content.split('\n');
|
|
1398
|
+
const tags = [];
|
|
1399
|
+
const warnings = [];
|
|
1400
|
+
// Loose match — `@cap-(feature|todo|risk|decision)` anywhere on the line, with optional metadata block.
|
|
1401
|
+
// We keep CAP_TAG_RE intact (it requires a leading comment token) and use this looser regex only here.
|
|
1402
|
+
const looseTagRe = /@cap-(feature|todo|risk|decision)(?:\(([^)]*)\))?[ \t]*([^\r\n]*)/g;
|
|
1403
|
+
|
|
1404
|
+
// Persistent state carries across lines: block comments AND multi-line strings.
|
|
1405
|
+
// @cap-feature(feature:F-046) blockState now also tracks string-literal state for Python triple-quotes, TOML triple-quotes, Rust raw strings, JS template literals, etc.
|
|
1406
|
+
/** @type {{ open: [string,string]|null, stringClose: string|null, stringEscapes: boolean, stringOpenToken: string|null }} */
|
|
1407
|
+
const blockState = { open: null, stringClose: null, stringEscapes: false, stringOpenToken: null };
|
|
1408
|
+
|
|
1409
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1410
|
+
const line = lines[i];
|
|
1411
|
+
// Reset regex state for each line.
|
|
1412
|
+
looseTagRe.lastIndex = 0;
|
|
1413
|
+
|
|
1414
|
+
// First, find all candidate @cap-* matches on this line.
|
|
1415
|
+
const matches = [];
|
|
1416
|
+
let m;
|
|
1417
|
+
while ((m = looseTagRe.exec(line)) !== null) {
|
|
1418
|
+
matches.push({
|
|
1419
|
+
index: m.index,
|
|
1420
|
+
type: m[1],
|
|
1421
|
+
metadataStr: m[2] || '',
|
|
1422
|
+
description: (m[3] || '').trim(),
|
|
1423
|
+
});
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
// Snapshot block + string state BEFORE we mutate via classifyTagContext.
|
|
1427
|
+
// Each match starts the walk at column 0 with a fresh copy.
|
|
1428
|
+
const blockStateBeforeLine = {
|
|
1429
|
+
open: blockState.open,
|
|
1430
|
+
stringClose: blockState.stringClose,
|
|
1431
|
+
stringEscapes: blockState.stringEscapes,
|
|
1432
|
+
stringOpenToken: blockState.stringOpenToken,
|
|
1433
|
+
};
|
|
1434
|
+
|
|
1435
|
+
if (matches.length === 0) {
|
|
1436
|
+
// No tags on this line, but we still need to advance the persistent state for the line.
|
|
1437
|
+
_advanceBlockState(style, line, blockState, stringStyle);
|
|
1438
|
+
continue;
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
for (const match of matches) {
|
|
1442
|
+
// Use a fresh state copy for each classification (state machine restarts from col 0).
|
|
1443
|
+
const localState = {
|
|
1444
|
+
open: blockStateBeforeLine.open,
|
|
1445
|
+
stringClose: blockStateBeforeLine.stringClose,
|
|
1446
|
+
stringEscapes: blockStateBeforeLine.stringEscapes,
|
|
1447
|
+
stringOpenToken: blockStateBeforeLine.stringOpenToken,
|
|
1448
|
+
};
|
|
1449
|
+
const result = classifyTagContext(style, line, match.index, localState, stringStyle);
|
|
1450
|
+
|
|
1451
|
+
if (result.context === 'comment') {
|
|
1452
|
+
// Strip subtype if @cap-todo
|
|
1453
|
+
let subtype = null;
|
|
1454
|
+
if (match.type === 'todo') {
|
|
1455
|
+
const sm = match.description.match(SUBTYPE_RE);
|
|
1456
|
+
if (sm) subtype = sm[1];
|
|
1457
|
+
}
|
|
1458
|
+
tags.push({
|
|
1459
|
+
type: match.type,
|
|
1460
|
+
file: filePath,
|
|
1461
|
+
line: i + 1,
|
|
1462
|
+
metadata: parseMetadata(match.metadataStr),
|
|
1463
|
+
description: match.description,
|
|
1464
|
+
raw: line,
|
|
1465
|
+
subtype,
|
|
1466
|
+
});
|
|
1467
|
+
} else if (result.context === 'string') {
|
|
1468
|
+
// @cap-feature(feature:F-046) Tag found inside a string literal — emit warning with explicit string-literal reason.
|
|
1469
|
+
warnings.push({
|
|
1470
|
+
file: filePath,
|
|
1471
|
+
line: i + 1,
|
|
1472
|
+
column: match.index,
|
|
1473
|
+
reason: `@cap-${match.type} found inside a string literal (${result.reason}) — not parsed as tag`,
|
|
1474
|
+
raw: line,
|
|
1475
|
+
});
|
|
1476
|
+
} else {
|
|
1477
|
+
// Tag found outside any comment — emit a warning, do NOT parse as a tag.
|
|
1478
|
+
warnings.push({
|
|
1479
|
+
file: filePath,
|
|
1480
|
+
line: i + 1,
|
|
1481
|
+
column: match.index,
|
|
1482
|
+
reason: `@cap-${match.type} found outside any comment context (${result.reason}) — likely a string literal or code reference`,
|
|
1483
|
+
raw: line,
|
|
1484
|
+
});
|
|
1485
|
+
}
|
|
1486
|
+
}
|
|
1487
|
+
|
|
1488
|
+
// Now advance the persistent state through the entire line so the next line picks up correctly.
|
|
1489
|
+
_advanceBlockState(style, line, blockState, stringStyle);
|
|
1490
|
+
}
|
|
1491
|
+
|
|
1492
|
+
return { tags, warnings };
|
|
1493
|
+
}
|
|
1494
|
+
|
|
1495
|
+
/**
|
|
1496
|
+
* Walk the line and update blockState to reflect any block comment open/close OR multi-line
|
|
1497
|
+
* string open/close that crossed line boundaries. Internal helper — purely advances state.
|
|
1498
|
+
*
|
|
1499
|
+
* Walker order matches classifyTagContext: carried block → carried string → string-open → line-comment → block-open.
|
|
1500
|
+
*
|
|
1501
|
+
* @param {CommentStyle} style
|
|
1502
|
+
* @param {string} line
|
|
1503
|
+
* @param {{ open: [string,string]|null, stringClose: string|null, stringEscapes: boolean, stringOpenToken: string|null }} blockState
|
|
1504
|
+
* @param {StringSyntax[]} [stringStyle] - Optional string syntax table; when omitted, string state is not advanced (back-compat).
|
|
1505
|
+
*/
|
|
1506
|
+
function _advanceBlockState(style, line, blockState, stringStyle) {
|
|
1507
|
+
const ss = Array.isArray(stringStyle) ? stringStyle : [];
|
|
1508
|
+
let i = 0;
|
|
1509
|
+
const n = line.length;
|
|
1510
|
+
while (i < n) {
|
|
1511
|
+
// Carried block comment.
|
|
1512
|
+
if (blockState.open) {
|
|
1513
|
+
const [, close] = blockState.open;
|
|
1514
|
+
const closeIdx = line.indexOf(close, i);
|
|
1515
|
+
if (closeIdx === -1) {
|
|
1516
|
+
return;
|
|
1517
|
+
}
|
|
1518
|
+
i = closeIdx + close.length;
|
|
1519
|
+
blockState.open = null;
|
|
1520
|
+
continue;
|
|
1521
|
+
}
|
|
1522
|
+
// Carried multi-line string.
|
|
1523
|
+
if (blockState.stringClose) {
|
|
1524
|
+
const close = blockState.stringClose;
|
|
1525
|
+
const escapes = !!blockState.stringEscapes;
|
|
1526
|
+
const closeIdx = _findStringClose(line, i, close, escapes);
|
|
1527
|
+
if (closeIdx === -1) {
|
|
1528
|
+
return;
|
|
1529
|
+
}
|
|
1530
|
+
i = closeIdx + close.length;
|
|
1531
|
+
blockState.stringClose = null;
|
|
1532
|
+
blockState.stringEscapes = false;
|
|
1533
|
+
blockState.stringOpenToken = null;
|
|
1534
|
+
continue;
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
// Longest-match across {block-comment-open, string-open, line-comment}.
|
|
1538
|
+
const tokenMatch = _longestTokenMatch(style, ss, line, i);
|
|
1539
|
+
|
|
1540
|
+
if (tokenMatch && tokenMatch.kind === 'string') {
|
|
1541
|
+
const afterOpen = i + tokenMatch.openLen;
|
|
1542
|
+
const closeIdx = _findStringClose(line, afterOpen, tokenMatch.close, tokenMatch.syntax.escapes);
|
|
1543
|
+
if (closeIdx === -1) {
|
|
1544
|
+
if (tokenMatch.syntax.multiline) {
|
|
1545
|
+
blockState.stringClose = tokenMatch.close;
|
|
1546
|
+
blockState.stringEscapes = tokenMatch.syntax.escapes;
|
|
1547
|
+
blockState.stringOpenToken = tokenMatch.syntax.open;
|
|
1548
|
+
}
|
|
1549
|
+
return;
|
|
1550
|
+
}
|
|
1551
|
+
i = closeIdx + tokenMatch.close.length;
|
|
1552
|
+
continue;
|
|
1553
|
+
}
|
|
1554
|
+
|
|
1555
|
+
if (tokenMatch && tokenMatch.kind === 'lineComment') {
|
|
1556
|
+
// Line-comment consumes the rest of the line.
|
|
1557
|
+
return;
|
|
1558
|
+
}
|
|
1559
|
+
|
|
1560
|
+
if (tokenMatch && tokenMatch.kind === 'blockComment') {
|
|
1561
|
+
const closeIdx = line.indexOf(tokenMatch.close, i + tokenMatch.open.length);
|
|
1562
|
+
if (closeIdx === -1) {
|
|
1563
|
+
blockState.open = [tokenMatch.open, tokenMatch.close];
|
|
1564
|
+
return;
|
|
1565
|
+
}
|
|
1566
|
+
i = closeIdx + tokenMatch.close.length;
|
|
1567
|
+
continue;
|
|
1568
|
+
}
|
|
1569
|
+
|
|
1570
|
+
i++;
|
|
1571
|
+
}
|
|
1572
|
+
}
|
|
1573
|
+
|
|
1574
|
+
// @cap-todo(ac:F-046/AC-4) scanFileWithContext + scanDirectoryWithContext expose the new {tags, warnings} shape and support a strict mode that throws on any warning.
|
|
1575
|
+
/**
|
|
1576
|
+
* Polylingual single-file scan. Returns {tags, warnings}.
|
|
1577
|
+
* @param {string} filePath - Absolute path
|
|
1578
|
+
* @param {string} projectRoot - Absolute project root
|
|
1579
|
+
* @returns {{ tags: CapTag[], warnings: ScannerWarning[] }}
|
|
1580
|
+
*/
|
|
1581
|
+
function scanFileWithContext(filePath, projectRoot) {
|
|
1582
|
+
let content;
|
|
1583
|
+
try {
|
|
1584
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
1585
|
+
} catch (_e) {
|
|
1586
|
+
return { tags: [], warnings: [] };
|
|
1587
|
+
}
|
|
1588
|
+
const relativePath = path.relative(projectRoot, filePath);
|
|
1589
|
+
return extractTagsWithContext(content, relativePath);
|
|
1590
|
+
}
|
|
1591
|
+
|
|
1592
|
+
/**
|
|
1593
|
+
* Polylingual directory scan. Returns {tags, warnings}.
|
|
1594
|
+
*
|
|
1595
|
+
* @param {string} dirPath
|
|
1596
|
+
* @param {Object} [options]
|
|
1597
|
+
* @param {string[]} [options.extensions]
|
|
1598
|
+
* @param {string[]} [options.exclude]
|
|
1599
|
+
* @param {string} [options.projectRoot]
|
|
1600
|
+
* @param {boolean} [options.strict] - When true, throws an Error if any warnings are emitted.
|
|
1601
|
+
* @returns {{ tags: CapTag[], warnings: ScannerWarning[] }}
|
|
1602
|
+
*/
|
|
1603
|
+
function scanDirectoryWithContext(dirPath, options = {}) {
|
|
1604
|
+
const extensions = options.extensions || Object.keys(COMMENT_STYLES);
|
|
1605
|
+
const projectRoot = options.projectRoot || dirPath;
|
|
1606
|
+
const scope = options.scope || scopeModule.buildScopeFilter(projectRoot, {
|
|
1607
|
+
dirExcludes: options.exclude,
|
|
1608
|
+
});
|
|
1609
|
+
const tags = [];
|
|
1610
|
+
const warnings = [];
|
|
1611
|
+
|
|
1612
|
+
function walk(dir) {
|
|
1613
|
+
let entries;
|
|
1614
|
+
try {
|
|
1615
|
+
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
1616
|
+
} catch (_e) {
|
|
1617
|
+
return;
|
|
1618
|
+
}
|
|
1619
|
+
for (const entry of entries) {
|
|
1620
|
+
const fullPath = path.join(dir, entry.name);
|
|
1621
|
+
if (entry.isDirectory()) {
|
|
1622
|
+
if (scope.isExcluded(fullPath, true)) continue;
|
|
1623
|
+
walk(fullPath);
|
|
1624
|
+
} else if (entry.isFile()) {
|
|
1625
|
+
const ext = path.extname(entry.name);
|
|
1626
|
+
if (!extensions.includes(ext)) continue;
|
|
1627
|
+
if (scope.isExcluded(fullPath, false)) continue;
|
|
1628
|
+
const result = scanFileWithContext(fullPath, projectRoot);
|
|
1629
|
+
tags.push(...result.tags);
|
|
1630
|
+
warnings.push(...result.warnings);
|
|
1631
|
+
}
|
|
1632
|
+
}
|
|
1633
|
+
}
|
|
1634
|
+
|
|
1635
|
+
walk(dirPath);
|
|
1636
|
+
|
|
1637
|
+
if (options.strict && warnings.length > 0) {
|
|
1638
|
+
const summary = warnings.slice(0, 5).map(w => ` ${w.file}:${w.line}:${w.column} - ${w.reason}`).join('\n');
|
|
1639
|
+
const more = warnings.length > 5 ? `\n ... and ${warnings.length - 5} more` : '';
|
|
1640
|
+
const err = new Error(`cap-tag-scanner --strict: found ${warnings.length} tag(s) outside comment context\n${summary}${more}`);
|
|
1641
|
+
err.warnings = warnings;
|
|
1642
|
+
err.code = 'CAP_STRICT_TAG_VIOLATION';
|
|
1643
|
+
throw err;
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1646
|
+
return { tags, warnings };
|
|
1647
|
+
}
|
|
1648
|
+
|
|
1649
|
+
// =====================================================================
|
|
1650
|
+
// End F-046 polylingual extension
|
|
1651
|
+
// =====================================================================
|
|
1652
|
+
|
|
1653
|
+
// @cap-todo Detect legacy @gsd-* tags and recommend /cap:migrate
|
|
1654
|
+
const LEGACY_TAG_RE = /^[ \t]*(?:\/\/|\/\*|\*|#|--|"""|''')[ \t]*@gsd-(feature|todo|risk|decision|context|status|depends|ref|pattern|api|constraint)/;
|
|
1655
|
+
|
|
1656
|
+
/**
|
|
1657
|
+
* Detect legacy @gsd-* tags in scanned files.
|
|
1658
|
+
* Re-scans source files for @gsd-* patterns that the primary scanner ignores.
|
|
1659
|
+
*
|
|
1660
|
+
* @param {string} projectRoot - Absolute path to project root
|
|
1661
|
+
* @param {Object} [options]
|
|
1662
|
+
* @param {string[]} [options.extensions] - File extensions to include
|
|
1663
|
+
* @param {string[]} [options.exclude] - Directory names to exclude
|
|
1664
|
+
* @returns {{ count: number, files: string[], recommendation: string }}
|
|
1665
|
+
*/
|
|
1666
|
+
function detectLegacyTags(projectRoot, options = {}) {
|
|
1667
|
+
const extensions = options.extensions || SUPPORTED_EXTENSIONS;
|
|
1668
|
+
const scope = options.scope || scopeModule.buildScopeFilter(projectRoot, {
|
|
1669
|
+
dirExcludes: options.exclude,
|
|
1670
|
+
});
|
|
1671
|
+
const result = { count: 0, files: [], recommendation: '' };
|
|
1672
|
+
const fileSet = new Set();
|
|
1673
|
+
|
|
1674
|
+
function walk(dir) {
|
|
1675
|
+
let entries;
|
|
1676
|
+
try {
|
|
1677
|
+
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
1678
|
+
} catch (_e) {
|
|
1679
|
+
return;
|
|
1680
|
+
}
|
|
1681
|
+
for (const entry of entries) {
|
|
1682
|
+
const fullPath = path.join(dir, entry.name);
|
|
1683
|
+
if (entry.isDirectory()) {
|
|
1684
|
+
if (scope.isExcluded(fullPath, true)) continue;
|
|
1685
|
+
walk(fullPath);
|
|
1686
|
+
} else if (entry.isFile()) {
|
|
1687
|
+
const ext = path.extname(entry.name);
|
|
1688
|
+
if (!extensions.includes(ext)) continue;
|
|
1689
|
+
if (scope.isExcluded(fullPath, false)) continue;
|
|
1690
|
+
scanFileForLegacy(fullPath);
|
|
1691
|
+
}
|
|
1692
|
+
}
|
|
1693
|
+
}
|
|
1694
|
+
|
|
1695
|
+
function scanFileForLegacy(filePath) {
|
|
1696
|
+
let content;
|
|
1697
|
+
try {
|
|
1698
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
1699
|
+
} catch (_e) {
|
|
1700
|
+
return;
|
|
1701
|
+
}
|
|
1702
|
+
const lines = content.split('\n');
|
|
1703
|
+
let found = false;
|
|
1704
|
+
for (const line of lines) {
|
|
1705
|
+
if (LEGACY_TAG_RE.test(line)) {
|
|
1706
|
+
result.count++;
|
|
1707
|
+
found = true;
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
if (found) {
|
|
1711
|
+
const relativePath = path.relative(projectRoot, filePath);
|
|
1712
|
+
fileSet.add(relativePath);
|
|
1713
|
+
}
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
walk(projectRoot);
|
|
1717
|
+
result.files = Array.from(fileSet).sort();
|
|
1718
|
+
|
|
1719
|
+
if (result.count > 0) {
|
|
1720
|
+
result.recommendation = `Found ${result.count} legacy @gsd-* tag(s) in ${result.files.length} file(s). Run /cap:migrate to convert them to @cap-* format.`;
|
|
1721
|
+
}
|
|
1722
|
+
|
|
1723
|
+
return result;
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1726
|
+
module.exports = {
|
|
1727
|
+
CAP_TAG_TYPES,
|
|
1728
|
+
CAP_TAG_RE,
|
|
1729
|
+
// F-063 design tag recognition — additive, separate from CAP_TAG_TYPES to preserve F-001 regression tests.
|
|
1730
|
+
CAP_DESIGN_TAG_TYPES,
|
|
1731
|
+
CAP_DESIGN_TAG_RE,
|
|
1732
|
+
SUPPORTED_EXTENSIONS,
|
|
1733
|
+
DEFAULT_EXCLUDE,
|
|
1734
|
+
LEGACY_TAG_RE,
|
|
1735
|
+
isUnifiedAnchorsEnabled,
|
|
1736
|
+
// F-094 multi-line @cap-* description capture
|
|
1737
|
+
isMultilineCaptureEnabled,
|
|
1738
|
+
detectAnchorToken,
|
|
1739
|
+
captureContinuations,
|
|
1740
|
+
scanFile,
|
|
1741
|
+
scanDirectory,
|
|
1742
|
+
extractTags,
|
|
1743
|
+
parseMetadata,
|
|
1744
|
+
groupByFeature,
|
|
1745
|
+
buildAcFileMap,
|
|
1746
|
+
detectOrphans,
|
|
1747
|
+
editDistance,
|
|
1748
|
+
detectWorkspaces,
|
|
1749
|
+
resolveWorkspaceGlobs,
|
|
1750
|
+
scanMonorepo,
|
|
1751
|
+
groupByPackage,
|
|
1752
|
+
detectLegacyTags,
|
|
1753
|
+
scanApp,
|
|
1754
|
+
detectSharedPackages,
|
|
1755
|
+
// F-046 polylingual extension
|
|
1756
|
+
COMMENT_STYLES,
|
|
1757
|
+
COMMENT_STYLES_DEFAULT,
|
|
1758
|
+
STRING_STYLES,
|
|
1759
|
+
STRING_STYLES_DEFAULT,
|
|
1760
|
+
getCommentStyle,
|
|
1761
|
+
getStringStyle,
|
|
1762
|
+
classifyTagContext,
|
|
1763
|
+
extractTagsWithContext,
|
|
1764
|
+
scanFileWithContext,
|
|
1765
|
+
scanDirectoryWithContext,
|
|
1766
|
+
};
|