cap-pro 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. package/.claude-plugin/README.md +26 -0
  2. package/.claude-plugin/marketplace.json +24 -0
  3. package/.claude-plugin/plugin.json +24 -0
  4. package/LICENSE +21 -0
  5. package/README.ja-JP.md +834 -0
  6. package/README.ko-KR.md +823 -0
  7. package/README.md +806 -0
  8. package/README.pt-BR.md +452 -0
  9. package/README.zh-CN.md +800 -0
  10. package/agents/cap-architect.md +269 -0
  11. package/agents/cap-brainstormer.md +207 -0
  12. package/agents/cap-curator.md +276 -0
  13. package/agents/cap-debugger.md +365 -0
  14. package/agents/cap-designer.md +246 -0
  15. package/agents/cap-historian.md +464 -0
  16. package/agents/cap-migrator.md +291 -0
  17. package/agents/cap-prototyper.md +197 -0
  18. package/agents/cap-validator.md +308 -0
  19. package/bin/install.js +5433 -0
  20. package/cap/bin/cap-tools.cjs +853 -0
  21. package/cap/bin/lib/arc-scanner.cjs +344 -0
  22. package/cap/bin/lib/cap-affinity-engine.cjs +862 -0
  23. package/cap/bin/lib/cap-anchor.cjs +228 -0
  24. package/cap/bin/lib/cap-annotation-writer.cjs +340 -0
  25. package/cap/bin/lib/cap-checkpoint.cjs +434 -0
  26. package/cap/bin/lib/cap-cluster-detect.cjs +945 -0
  27. package/cap/bin/lib/cap-cluster-display.cjs +52 -0
  28. package/cap/bin/lib/cap-cluster-format.cjs +245 -0
  29. package/cap/bin/lib/cap-cluster-helpers.cjs +295 -0
  30. package/cap/bin/lib/cap-cluster-io.cjs +212 -0
  31. package/cap/bin/lib/cap-completeness.cjs +540 -0
  32. package/cap/bin/lib/cap-deps.cjs +583 -0
  33. package/cap/bin/lib/cap-design-families.cjs +332 -0
  34. package/cap/bin/lib/cap-design.cjs +966 -0
  35. package/cap/bin/lib/cap-divergence-detector.cjs +400 -0
  36. package/cap/bin/lib/cap-doctor.cjs +752 -0
  37. package/cap/bin/lib/cap-feature-map-internals.cjs +19 -0
  38. package/cap/bin/lib/cap-feature-map-migrate.cjs +335 -0
  39. package/cap/bin/lib/cap-feature-map-monorepo.cjs +885 -0
  40. package/cap/bin/lib/cap-feature-map-shard.cjs +315 -0
  41. package/cap/bin/lib/cap-feature-map.cjs +1943 -0
  42. package/cap/bin/lib/cap-fitness-score.cjs +1075 -0
  43. package/cap/bin/lib/cap-impact-analysis.cjs +652 -0
  44. package/cap/bin/lib/cap-learn-review.cjs +1072 -0
  45. package/cap/bin/lib/cap-learning-signals.cjs +627 -0
  46. package/cap/bin/lib/cap-loader.cjs +227 -0
  47. package/cap/bin/lib/cap-logger.cjs +57 -0
  48. package/cap/bin/lib/cap-memory-bridge.cjs +764 -0
  49. package/cap/bin/lib/cap-memory-confidence.cjs +452 -0
  50. package/cap/bin/lib/cap-memory-dir.cjs +987 -0
  51. package/cap/bin/lib/cap-memory-engine.cjs +698 -0
  52. package/cap/bin/lib/cap-memory-extends.cjs +398 -0
  53. package/cap/bin/lib/cap-memory-graph.cjs +790 -0
  54. package/cap/bin/lib/cap-memory-migrate.cjs +2015 -0
  55. package/cap/bin/lib/cap-memory-pin.cjs +183 -0
  56. package/cap/bin/lib/cap-memory-platform.cjs +490 -0
  57. package/cap/bin/lib/cap-memory-prune.cjs +707 -0
  58. package/cap/bin/lib/cap-memory-schema.cjs +812 -0
  59. package/cap/bin/lib/cap-migrate-tags.cjs +309 -0
  60. package/cap/bin/lib/cap-migrate.cjs +540 -0
  61. package/cap/bin/lib/cap-pattern-apply.cjs +1203 -0
  62. package/cap/bin/lib/cap-pattern-pipeline.cjs +1034 -0
  63. package/cap/bin/lib/cap-plugin-manifest.cjs +80 -0
  64. package/cap/bin/lib/cap-realtime-affinity.cjs +399 -0
  65. package/cap/bin/lib/cap-reconcile.cjs +570 -0
  66. package/cap/bin/lib/cap-research-gate.cjs +218 -0
  67. package/cap/bin/lib/cap-scope-filter.cjs +402 -0
  68. package/cap/bin/lib/cap-semantic-pipeline.cjs +1038 -0
  69. package/cap/bin/lib/cap-session-extract.cjs +987 -0
  70. package/cap/bin/lib/cap-session.cjs +445 -0
  71. package/cap/bin/lib/cap-snapshot-linkage.cjs +963 -0
  72. package/cap/bin/lib/cap-stack-docs.cjs +646 -0
  73. package/cap/bin/lib/cap-tag-observer.cjs +371 -0
  74. package/cap/bin/lib/cap-tag-scanner.cjs +1766 -0
  75. package/cap/bin/lib/cap-telemetry.cjs +466 -0
  76. package/cap/bin/lib/cap-test-audit.cjs +1438 -0
  77. package/cap/bin/lib/cap-thread-migrator.cjs +307 -0
  78. package/cap/bin/lib/cap-thread-synthesis.cjs +545 -0
  79. package/cap/bin/lib/cap-thread-tracker.cjs +519 -0
  80. package/cap/bin/lib/cap-trace.cjs +399 -0
  81. package/cap/bin/lib/cap-trust-mode.cjs +336 -0
  82. package/cap/bin/lib/cap-ui-design-editor.cjs +642 -0
  83. package/cap/bin/lib/cap-ui-mind-map.cjs +712 -0
  84. package/cap/bin/lib/cap-ui-thread-nav.cjs +693 -0
  85. package/cap/bin/lib/cap-ui.cjs +1245 -0
  86. package/cap/bin/lib/cap-upgrade.cjs +1028 -0
  87. package/cap/bin/lib/cli/arg-helpers.cjs +49 -0
  88. package/cap/bin/lib/cli/frontmatter-router.cjs +31 -0
  89. package/cap/bin/lib/cli/init-router.cjs +68 -0
  90. package/cap/bin/lib/cli/phase-router.cjs +102 -0
  91. package/cap/bin/lib/cli/state-router.cjs +61 -0
  92. package/cap/bin/lib/cli/template-router.cjs +37 -0
  93. package/cap/bin/lib/cli/uat-router.cjs +29 -0
  94. package/cap/bin/lib/cli/validation-router.cjs +26 -0
  95. package/cap/bin/lib/cli/verification-router.cjs +31 -0
  96. package/cap/bin/lib/cli/workstream-router.cjs +39 -0
  97. package/cap/bin/lib/commands.cjs +961 -0
  98. package/cap/bin/lib/config.cjs +467 -0
  99. package/cap/bin/lib/convention-reader.cjs +258 -0
  100. package/cap/bin/lib/core.cjs +1241 -0
  101. package/cap/bin/lib/feature-aggregator.cjs +423 -0
  102. package/cap/bin/lib/frontmatter.cjs +337 -0
  103. package/cap/bin/lib/init.cjs +1443 -0
  104. package/cap/bin/lib/manifest-generator.cjs +383 -0
  105. package/cap/bin/lib/milestone.cjs +253 -0
  106. package/cap/bin/lib/model-profiles.cjs +69 -0
  107. package/cap/bin/lib/monorepo-context.cjs +226 -0
  108. package/cap/bin/lib/monorepo-migrator.cjs +509 -0
  109. package/cap/bin/lib/phase.cjs +889 -0
  110. package/cap/bin/lib/profile-output.cjs +989 -0
  111. package/cap/bin/lib/profile-pipeline.cjs +540 -0
  112. package/cap/bin/lib/roadmap.cjs +330 -0
  113. package/cap/bin/lib/security.cjs +394 -0
  114. package/cap/bin/lib/session-manager.cjs +292 -0
  115. package/cap/bin/lib/skeleton-generator.cjs +179 -0
  116. package/cap/bin/lib/state.cjs +1032 -0
  117. package/cap/bin/lib/template.cjs +231 -0
  118. package/cap/bin/lib/test-detector.cjs +62 -0
  119. package/cap/bin/lib/uat.cjs +283 -0
  120. package/cap/bin/lib/verify.cjs +889 -0
  121. package/cap/bin/lib/workspace-detector.cjs +371 -0
  122. package/cap/bin/lib/workstream.cjs +492 -0
  123. package/cap/commands/gsd/workstreams.md +63 -0
  124. package/cap/references/arc-standard.md +315 -0
  125. package/cap/references/cap-agent-architecture.md +101 -0
  126. package/cap/references/cap-gitignore-template +9 -0
  127. package/cap/references/cap-zero-deps.md +158 -0
  128. package/cap/references/checkpoints.md +778 -0
  129. package/cap/references/continuation-format.md +249 -0
  130. package/cap/references/contract-test-templates.md +312 -0
  131. package/cap/references/feature-map-template.md +25 -0
  132. package/cap/references/git-integration.md +295 -0
  133. package/cap/references/git-planning-commit.md +38 -0
  134. package/cap/references/model-profiles.md +174 -0
  135. package/cap/references/phase-numbering.md +126 -0
  136. package/cap/references/planning-config.md +202 -0
  137. package/cap/references/property-test-templates.md +316 -0
  138. package/cap/references/security-test-templates.md +347 -0
  139. package/cap/references/session-template.json +8 -0
  140. package/cap/references/tdd.md +263 -0
  141. package/cap/references/user-profiling.md +681 -0
  142. package/cap/references/verification-patterns.md +612 -0
  143. package/cap/templates/UAT.md +265 -0
  144. package/cap/templates/claude-md.md +175 -0
  145. package/cap/templates/codebase/architecture.md +255 -0
  146. package/cap/templates/codebase/concerns.md +310 -0
  147. package/cap/templates/codebase/conventions.md +307 -0
  148. package/cap/templates/codebase/integrations.md +280 -0
  149. package/cap/templates/codebase/stack.md +186 -0
  150. package/cap/templates/codebase/structure.md +285 -0
  151. package/cap/templates/codebase/testing.md +480 -0
  152. package/cap/templates/config.json +44 -0
  153. package/cap/templates/context.md +352 -0
  154. package/cap/templates/continue-here.md +78 -0
  155. package/cap/templates/copilot-instructions.md +7 -0
  156. package/cap/templates/debug-subagent-prompt.md +91 -0
  157. package/cap/templates/discussion-log.md +63 -0
  158. package/cap/templates/milestone-archive.md +123 -0
  159. package/cap/templates/milestone.md +115 -0
  160. package/cap/templates/phase-prompt.md +610 -0
  161. package/cap/templates/planner-subagent-prompt.md +117 -0
  162. package/cap/templates/project.md +186 -0
  163. package/cap/templates/requirements.md +231 -0
  164. package/cap/templates/research-project/ARCHITECTURE.md +204 -0
  165. package/cap/templates/research-project/FEATURES.md +147 -0
  166. package/cap/templates/research-project/PITFALLS.md +200 -0
  167. package/cap/templates/research-project/STACK.md +120 -0
  168. package/cap/templates/research-project/SUMMARY.md +170 -0
  169. package/cap/templates/research.md +552 -0
  170. package/cap/templates/roadmap.md +202 -0
  171. package/cap/templates/state.md +176 -0
  172. package/cap/templates/summary.md +364 -0
  173. package/cap/templates/user-preferences.md +498 -0
  174. package/cap/templates/verification-report.md +322 -0
  175. package/cap/workflows/add-phase.md +112 -0
  176. package/cap/workflows/add-tests.md +351 -0
  177. package/cap/workflows/add-todo.md +158 -0
  178. package/cap/workflows/audit-milestone.md +340 -0
  179. package/cap/workflows/audit-uat.md +109 -0
  180. package/cap/workflows/autonomous.md +891 -0
  181. package/cap/workflows/check-todos.md +177 -0
  182. package/cap/workflows/cleanup.md +152 -0
  183. package/cap/workflows/complete-milestone.md +767 -0
  184. package/cap/workflows/diagnose-issues.md +231 -0
  185. package/cap/workflows/discovery-phase.md +289 -0
  186. package/cap/workflows/discuss-phase-assumptions.md +653 -0
  187. package/cap/workflows/discuss-phase.md +1049 -0
  188. package/cap/workflows/do.md +104 -0
  189. package/cap/workflows/execute-phase.md +846 -0
  190. package/cap/workflows/execute-plan.md +514 -0
  191. package/cap/workflows/fast.md +105 -0
  192. package/cap/workflows/forensics.md +265 -0
  193. package/cap/workflows/health.md +181 -0
  194. package/cap/workflows/help.md +660 -0
  195. package/cap/workflows/insert-phase.md +130 -0
  196. package/cap/workflows/list-phase-assumptions.md +178 -0
  197. package/cap/workflows/list-workspaces.md +56 -0
  198. package/cap/workflows/manager.md +362 -0
  199. package/cap/workflows/map-codebase.md +377 -0
  200. package/cap/workflows/milestone-summary.md +223 -0
  201. package/cap/workflows/new-milestone.md +486 -0
  202. package/cap/workflows/new-project.md +1250 -0
  203. package/cap/workflows/new-workspace.md +237 -0
  204. package/cap/workflows/next.md +97 -0
  205. package/cap/workflows/node-repair.md +92 -0
  206. package/cap/workflows/note.md +156 -0
  207. package/cap/workflows/pause-work.md +176 -0
  208. package/cap/workflows/plan-milestone-gaps.md +273 -0
  209. package/cap/workflows/plan-phase.md +857 -0
  210. package/cap/workflows/plant-seed.md +169 -0
  211. package/cap/workflows/pr-branch.md +129 -0
  212. package/cap/workflows/profile-user.md +449 -0
  213. package/cap/workflows/progress.md +507 -0
  214. package/cap/workflows/quick.md +757 -0
  215. package/cap/workflows/remove-phase.md +155 -0
  216. package/cap/workflows/remove-workspace.md +90 -0
  217. package/cap/workflows/research-phase.md +82 -0
  218. package/cap/workflows/resume-project.md +326 -0
  219. package/cap/workflows/review.md +228 -0
  220. package/cap/workflows/session-report.md +146 -0
  221. package/cap/workflows/settings.md +283 -0
  222. package/cap/workflows/ship.md +228 -0
  223. package/cap/workflows/stats.md +60 -0
  224. package/cap/workflows/transition.md +671 -0
  225. package/cap/workflows/ui-phase.md +298 -0
  226. package/cap/workflows/ui-review.md +161 -0
  227. package/cap/workflows/update.md +323 -0
  228. package/cap/workflows/validate-phase.md +170 -0
  229. package/cap/workflows/verify-phase.md +254 -0
  230. package/cap/workflows/verify-work.md +637 -0
  231. package/commands/cap/annotate.md +165 -0
  232. package/commands/cap/brainstorm.md +393 -0
  233. package/commands/cap/checkpoint.md +106 -0
  234. package/commands/cap/completeness.md +94 -0
  235. package/commands/cap/continue.md +72 -0
  236. package/commands/cap/debug.md +588 -0
  237. package/commands/cap/deps.md +169 -0
  238. package/commands/cap/design.md +479 -0
  239. package/commands/cap/init.md +354 -0
  240. package/commands/cap/iterate.md +249 -0
  241. package/commands/cap/learn.md +459 -0
  242. package/commands/cap/memory.md +275 -0
  243. package/commands/cap/migrate-feature-map.md +91 -0
  244. package/commands/cap/migrate-memory.md +108 -0
  245. package/commands/cap/migrate-tags.md +91 -0
  246. package/commands/cap/migrate.md +131 -0
  247. package/commands/cap/prototype.md +510 -0
  248. package/commands/cap/reconcile.md +121 -0
  249. package/commands/cap/review.md +360 -0
  250. package/commands/cap/save.md +72 -0
  251. package/commands/cap/scan.md +404 -0
  252. package/commands/cap/start.md +356 -0
  253. package/commands/cap/status.md +118 -0
  254. package/commands/cap/test-audit.md +262 -0
  255. package/commands/cap/test.md +394 -0
  256. package/commands/cap/trace.md +133 -0
  257. package/commands/cap/ui.md +167 -0
  258. package/hooks/dist/cap-check-update.js +115 -0
  259. package/hooks/dist/cap-context-monitor.js +185 -0
  260. package/hooks/dist/cap-learn-review-hook.js +114 -0
  261. package/hooks/dist/cap-learning-hook.js +192 -0
  262. package/hooks/dist/cap-memory.js +299 -0
  263. package/hooks/dist/cap-prompt-guard.js +97 -0
  264. package/hooks/dist/cap-statusline.js +157 -0
  265. package/hooks/dist/cap-tag-observer.js +115 -0
  266. package/hooks/dist/cap-version-check.js +112 -0
  267. package/hooks/dist/cap-workflow-guard.js +175 -0
  268. package/hooks/hooks.json +55 -0
  269. package/package.json +58 -0
  270. package/scripts/base64-scan.sh +262 -0
  271. package/scripts/build-hooks.js +93 -0
  272. package/scripts/cap-removal-checklist.md +202 -0
  273. package/scripts/prompt-injection-scan.sh +199 -0
  274. package/scripts/run-tests.cjs +181 -0
  275. package/scripts/secret-scan.sh +227 -0
@@ -0,0 +1,1766 @@
1
+ // @cap-context CAP v2.0 tag scanner -- extracts @cap-feature, @cap-todo, @cap-risk, and @cap-decision tags from source files.
2
+ // @cap-decision Separate module from arc-scanner.cjs -- CAP tags use @cap- prefix (not @gsd-) and have different metadata semantics (feature: key instead of phase: key).
3
+ // @cap-decision Regex-based extraction (not AST) -- language-agnostic, zero dependencies, proven sufficient in GSD arc-scanner.cjs.
4
+ // @cap-constraint Zero external dependencies -- uses only Node.js built-ins (fs, path).
5
+ // @cap-pattern Same comment anchor rule as ARC: tag is only valid when first non-whitespace content on a line is a comment token.
6
+
7
+ 'use strict';
8
+
9
+ // @cap-feature(feature:F-001) Tag Scanner — regex-based extraction of @cap-* tags from source files
10
+ // @cap-todo decision: Migrating @gsd-* comment headers in this file to @cap-* format is blocked on F-006 migration completion
11
+
12
+ // @cap-history(sessions:4, edits:17, since:2026-04-20, learned:2026-05-08) Frequently modified — 4 sessions, 17 edits
13
+ const fs = require('node:fs');
14
+ const path = require('node:path');
15
+ // @cap-feature(feature:F-085) Scope filter integration — gitignore + path-pattern + plugin-mirror
16
+ // awareness lives in cap-scope-filter.cjs. Imported here so scanDirectory and friends share the
17
+ // same exclusion semantics with cap-migrate-tags.
18
+ const scopeModule = require('./cap-scope-filter.cjs');
19
+
20
+ // @cap-todo(ref:AC-20) Primary tags are @cap-feature and @cap-todo; risk and decision are optional standalone tags
21
+ // @cap-decision CAP tag types: 2 primary (feature, todo) + 2 optional (risk, decision). Simplified from GSD's 8 types.
22
+ const CAP_TAG_TYPES = ['feature', 'todo', 'risk', 'decision'];
23
+
24
+ // @cap-feature(feature:F-047) Opt-in config check for unified anchor block parsing.
25
+ // Returns true when .cap/config.json has { unifiedAnchors: { enabled: true } }.
26
+ // Returns false on any error or when the section is absent. Called once per scanDirectory.
27
+ function isUnifiedAnchorsEnabled(projectRoot) {
28
+ try {
29
+ const cfgPath = path.join(projectRoot, '.cap', 'config.json');
30
+ const raw = fs.readFileSync(cfgPath, 'utf8');
31
+ const parsed = JSON.parse(raw);
32
+ return !!(parsed && parsed.unifiedAnchors && parsed.unifiedAnchors.enabled === true);
33
+ } catch (_e) {
34
+ return false;
35
+ }
36
+ }
37
+
38
+ // @cap-feature(feature:F-094, primary:true) Opt-out config check for multi-line @cap-* description capture.
39
+ // Default is ON: continuation-pickup runs unless .cap/config.json explicitly sets
40
+ // { multilineCapture: { enabled: false } }. Missing config or any read error returns true (default).
41
+ function isMultilineCaptureEnabled(projectRoot) {
42
+ if (!projectRoot) return true;
43
+ try {
44
+ const cfgPath = path.join(projectRoot, '.cap', 'config.json');
45
+ const raw = fs.readFileSync(cfgPath, 'utf8');
46
+ const parsed = JSON.parse(raw);
47
+ if (parsed && parsed.multilineCapture && parsed.multilineCapture.enabled === false) return false;
48
+ return true;
49
+ } catch (_e) {
50
+ return true;
51
+ }
52
+ }
53
+
54
+ // @cap-todo(ref:AC-25) Tag scanner uses native RegExp with dotAll flag for multiline extraction
55
+ // @cap-pattern Tag regex anchors to comment tokens at line start -- identical approach to arc-scanner.cjs
56
+ // @cap-decision F-046 leaves CAP_TAG_RE untouched (AC-5 backward compat). New polylingual extension uses extractTagsWithContext + getCommentStyle for richer per-language detection.
57
+ const CAP_TAG_RE = /^[ \t]*(?:\/\/|\/\*|\*|#|--|"""|''')[ \t]*@cap-(feature|todo|risk|decision)(?:\(([^)]*)\))?[ \t]*(.*)/;
58
+
59
+ // @cap-feature(feature:F-063) Design-Tag recognition in the tag scanner.
60
+ // @cap-todo(ac:F-063/AC-2) Recognise @cap-design-token(id:DT-NNN) and @cap-design-component(id:DC-NNN) in source comments.
61
+ // @cap-decision Keep the core CAP_TAG_RE / CAP_TAG_TYPES untouched — adding design types there would break F-001's
62
+ // regression tests (CAP_TAG_TYPES.length === 4 is pinned). Design tags get a sibling regex and are merged into
63
+ // extractTags output with type values 'design-token' | 'design-component'. Consumers that filter by tag.type
64
+ // against {'feature','todo','risk','decision'} are unaffected.
65
+ const CAP_DESIGN_TAG_RE = /^[ \t]*(?:\/\/|\/\*|\*|#|--|"""|''')[ \t]*@cap-(design-token|design-component)(?:\(([^)]*)\))?[ \t]*(.*)/;
66
+
67
+ // @cap-api CAP_DESIGN_TAG_TYPES -- exported for /cap:deps --design and /cap:trace design-usage.
68
+ const CAP_DESIGN_TAG_TYPES = ['design-token', 'design-component'];
69
+
70
+ // @cap-todo(ref:AC-26) Tag scanner is language-agnostic, operating on comment syntax patterns across JS, TS, Python, Ruby, Shell
71
+ // @cap-decision F-046 leaves SUPPORTED_EXTENSIONS untouched to preserve AC-5 backward compatibility (existing test asserts list length === 18). The new polylingual scanner uses Object.keys(COMMENT_STYLES) as its default extension list, which DOES include HTML/CSS/SCSS/Markdown/YAML/TOML/Shell-zsh.
72
+ const SUPPORTED_EXTENSIONS = ['.js', '.cjs', '.mjs', '.ts', '.tsx', '.jsx', '.py', '.rb', '.sh', '.bash', '.sql', '.go', '.rs', '.java', '.c', '.cpp', '.h', '.hpp'];
73
+ // @cap-decision DEFAULT_EXCLUDE covers (a) VCS + tooling metadata, (b) JS/TS build outputs, (c) framework
74
+ // caches that emit source-mapped JS the scanner would otherwise mistake for real code.
75
+ // The Next.js / Turbo / Nx caches were the worst offenders — a single GoetzeInvest scan
76
+ // surfaced 344 decisions sourced from `.next/dev/server/chunks/*.js` (~28 % of the
77
+ // decisions.md file). Build artifacts MUST never enter the memory pipeline; pre-existing
78
+ // entries should be pruned via `cap:memory prune` after this constant lands.
79
+ const DEFAULT_EXCLUDE = [
80
+ // VCS + CAP own metadata
81
+ '.git', '.cap', '.planning',
82
+ // Generic JS/TS build outputs
83
+ 'node_modules', 'dist', 'build', 'coverage', 'out',
84
+ // Framework / monorepo caches that emit source-mapped JS
85
+ '.next', '.turbo', '.nx', '.cache', '.parcel-cache', '.vercel', '.svelte-kit',
86
+ // Other ecosystems (Python / Java / Rust / iOS / Android)
87
+ '__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache', '.tox', 'venv', '.venv',
88
+ 'target', '.gradle', 'Pods', '.expo',
89
+ ];
90
+
91
+ // @cap-todo(ref:AC-22) @cap-todo supports structured subtypes: risk:..., decision:...
92
+ // @cap-decision Subtype detection uses prefix matching on the description text (e.g., "risk: memory leak" -> subtype: "risk")
93
+ const SUBTYPE_RE = /^(risk|decision):\s*(.*)/;
94
+
95
+ /**
96
+ * @typedef {Object} CapTag
97
+ * @property {string} type - Tag type without @cap- prefix ('feature', 'todo', 'risk', 'decision')
98
+ * @property {string} file - Relative path from project root
99
+ * @property {number} line - 1-based line number
100
+ * @property {Object<string,string>} metadata - Parsed key-value pairs from parenthesized block
101
+ * @property {string} description - Text after metadata block
102
+ * @property {string} raw - Complete original line
103
+ * @property {string|null} subtype - For @cap-todo: 'risk' or 'decision' if prefixed, else null
104
+ */
105
+
106
+ // @cap-api parseMetadata(metadataStr) -- Parses parenthesized key:value pairs.
107
+ // Returns: Object<string,string> -- flat key-value object.
108
+ /**
109
+ * @param {string} metadataStr - Raw metadata string without parens (e.g., "feature:auth, ac:AUTH/AC-1")
110
+ * @returns {Object<string,string>}
111
+ */
112
+ function parseMetadata(metadataStr) {
113
+ if (!metadataStr || !metadataStr.trim()) return {};
114
+ const result = {};
115
+ const pairs = metadataStr.split(',');
116
+ for (const pair of pairs) {
117
+ const trimmed = pair.trim();
118
+ if (!trimmed) continue;
119
+ const colonIdx = trimmed.indexOf(':');
120
+ if (colonIdx === -1) {
121
+ // Key without value -- store as truthy flag
122
+ result[trimmed] = 'true';
123
+ } else {
124
+ const key = trimmed.slice(0, colonIdx).trim();
125
+ const value = trimmed.slice(colonIdx + 1).trim();
126
+ if (key) result[key] = value;
127
+ }
128
+ }
129
+ return result;
130
+ }
131
+
132
+ // @cap-feature(feature:F-094) Detect the comment-anchor token used to introduce a @cap-* tag.
133
+ // Returns one of: '//', '/*', '*', '#', '--', '"""', "'''", or null when the line does not
134
+ // match the expected leading-token shape. Used to drive continuation-line matching: a Line-comment
135
+ // anchor (//, #, --) only continues with the same token; a Block-comment anchor (/*, """, ''')
136
+ // continues into block-body until the closing token; a `*` anchor (already-inside JSDoc body)
137
+ // continues with `*` body lines.
138
+ const ANCHOR_TOKEN_RE = /^[ \t]*(\/\/|\/\*|\*|#|--|"""|''')/;
139
+ function detectAnchorToken(line) {
140
+ const m = line.match(ANCHOR_TOKEN_RE);
141
+ return m ? m[1] : null;
142
+ }
143
+
144
+ // @cap-feature(feature:F-094) Extract continuation lines for a @cap-* tag.
145
+ // Walks forward from `startIdx` collecting comment-continuation lines until a stop-condition
146
+ // is met. Stop-conditions per AC-2: empty line, code line (no matching opener), new @cap-* tag,
147
+ // block-comment-close token. Returns an array of cleaned text fragments (one per continuation
148
+ // line). Caller concatenates with single-space separator.
149
+ //
150
+ // Block-comment behaviour (anchor `/*`, `"""`, `'''`): body lines accumulate until close token
151
+ // or stop-condition; leading `*` and surrounding whitespace are stripped from each body line.
152
+ // Line-comment behaviour (anchor `//`, `#`, `--`, `*`): each continuation line must start with
153
+ // the same anchor token (with arbitrary indent before/after).
154
+ //
155
+ // NOT covered (deliberate scope): cross-block continuations (a tag in one /* */ block plus
156
+ // prose in a separate /* */ block below); continuations after exactly one blank line; nested
157
+ // block comments. These remain F-094 follow-up scope.
158
+ function captureContinuations(lines, startIdx, anchor) {
159
+ const continuations = [];
160
+ if (!anchor) return continuations;
161
+
162
+ const isBlockOpen = (anchor === '/*' || anchor === '"""' || anchor === "'''");
163
+ // For line-comment anchors we precompute a regex matching the same token at line start.
164
+ let lineRe = null;
165
+ if (!isBlockOpen) {
166
+ const escaped = anchor.replace(/[/*\-]/g, '\\$&');
167
+ lineRe = new RegExp('^[ \\t]*' + escaped + '[ \\t]+(.*)$');
168
+ }
169
+
170
+ for (let j = startIdx; j < lines.length; j++) {
171
+ const line = lines[j];
172
+
173
+ if (!line.trim()) break;
174
+ if (CAP_TAG_RE.test(line) || CAP_DESIGN_TAG_RE.test(line)) break;
175
+
176
+ let text = null;
177
+ let blockClosed = false;
178
+
179
+ if (anchor === '/*') {
180
+ const closeIdx = line.indexOf('*/');
181
+ const body = closeIdx === -1 ? line : line.slice(0, closeIdx);
182
+ const stripped = body.replace(/^[ \t]*\*?[ \t]?/, '').replace(/\s+$/, '');
183
+ text = stripped.trim();
184
+ if (closeIdx !== -1) blockClosed = true;
185
+ } else if (anchor === '"""' || anchor === "'''") {
186
+ const closeIdx = line.indexOf(anchor);
187
+ if (closeIdx !== -1) {
188
+ text = line.slice(0, closeIdx).trim();
189
+ blockClosed = true;
190
+ } else {
191
+ text = line.trim();
192
+ }
193
+ } else {
194
+ const m = line.match(lineRe);
195
+ if (!m) break;
196
+ text = m[1].trim();
197
+ }
198
+
199
+ if (text) continuations.push(text);
200
+ if (blockClosed) break;
201
+ }
202
+
203
+ return continuations;
204
+ }
205
+
206
+ // @cap-api extractTags(content, filePath, options) -- Regex extraction engine supporting //, #, /* */, """ """ comment styles.
207
+ // Returns: CapTag[] -- array of extracted tags.
208
+ /**
209
+ * @param {string} content - File content to scan
210
+ * @param {string} filePath - Relative file path (for tag metadata)
211
+ * @param {Object} [options={}] - Extraction options. Default-initialised so that
212
+ * `extractTags.length === 2` stays pinned by F-046/AC-5 backward-compat test.
213
+ * @param {boolean} [options.multilineCapture=true] - When true, multi-line continuations are appended to description (F-094)
214
+ * @returns {CapTag[]}
215
+ */
216
+ function extractTags(content, filePath, options = {}) {
217
+ const multilineEnabled = options.multilineCapture !== false; // default ON
218
+ const lines = content.split('\n');
219
+ const tags = [];
220
+ for (let i = 0; i < lines.length; i++) {
221
+ const line = lines[i];
222
+ const match = line.match(CAP_TAG_RE);
223
+ if (match) {
224
+ const type = match[1];
225
+ const metadataStr = match[2] || '';
226
+ let description = (match[3] || '').trim();
227
+ const metadata = parseMetadata(metadataStr);
228
+
229
+ // @cap-feature(feature:F-094) Continuation-pickup: if multilineCapture is enabled, walk
230
+ // forward from the next line and append continuation-line content to description.
231
+ // The original `raw` and `line` (1-based anchor line) are preserved for migration
232
+ // compatibility (AC-4).
233
+ if (multilineEnabled) {
234
+ const anchor = detectAnchorToken(line);
235
+ const cont = captureContinuations(lines, i + 1, anchor);
236
+ if (cont.length > 0) {
237
+ description = (description + ' ' + cont.join(' ')).replace(/\s+/g, ' ').trim();
238
+ }
239
+ }
240
+
241
+ // @cap-todo(ref:AC-22) Detect subtypes in @cap-todo description (risk:..., decision:...)
242
+ let subtype = null;
243
+ if (type === 'todo') {
244
+ const subtypeMatch = description.match(SUBTYPE_RE);
245
+ if (subtypeMatch) {
246
+ subtype = subtypeMatch[1];
247
+ }
248
+ }
249
+
250
+ tags.push({
251
+ type,
252
+ file: filePath,
253
+ line: i + 1,
254
+ metadata,
255
+ description,
256
+ raw: line,
257
+ subtype,
258
+ });
259
+ continue;
260
+ }
261
+
262
+ // @cap-todo(ac:F-063/AC-2) Fall through to design-tag recognition. Two separate regexes keep the
263
+ // core tag-type set (feature/todo/risk/decision) stable and pinned by F-001's regression tests.
264
+ const designMatch = line.match(CAP_DESIGN_TAG_RE);
265
+ if (designMatch) {
266
+ const type = designMatch[1]; // 'design-token' | 'design-component'
267
+ const metadataStr = designMatch[2] || '';
268
+ let description = (designMatch[3] || '').trim();
269
+ const metadata = parseMetadata(metadataStr);
270
+
271
+ if (multilineEnabled) {
272
+ const anchor = detectAnchorToken(line);
273
+ const cont = captureContinuations(lines, i + 1, anchor);
274
+ if (cont.length > 0) {
275
+ description = (description + ' ' + cont.join(' ')).replace(/\s+/g, ' ').trim();
276
+ }
277
+ }
278
+
279
+ tags.push({
280
+ type,
281
+ file: filePath,
282
+ line: i + 1,
283
+ metadata,
284
+ description,
285
+ raw: line,
286
+ subtype: null,
287
+ });
288
+ }
289
+ }
290
+ return tags;
291
+ }
292
+
293
+ // @cap-api scanFile(filePath, projectRoot) -- Scans a single file for @cap-* tags.
294
+ // Returns: CapTag[] -- array of extracted tags with file, line, metadata, description.
295
+ /**
296
+ * @param {string} filePath - Absolute path to file
297
+ * @param {string} projectRoot - Absolute path to project root (for relative path computation)
298
+ * @returns {CapTag[]}
299
+ */
300
+ // @cap-todo(ac:F-047/AC-1) scanFile shall also expand unified @cap anchor blocks when
301
+ // the caller passes { unifiedAnchors: true }. Backward-compatible default (off).
302
+ function scanFile(filePath, projectRoot, options) {
303
+ // @cap-todo(ref:AC-25) Use native RegExp for tag extraction -- no AST parsing
304
+ let content;
305
+ try {
306
+ content = fs.readFileSync(filePath, 'utf8');
307
+ } catch (_e) {
308
+ return [];
309
+ }
310
+ const relativePath = path.relative(projectRoot, filePath);
311
+ // @cap-feature(feature:F-094) Forward the multilineCapture flag to extractTags so callers
312
+ // that resolved it once (scanDirectory) don't re-read .cap/config.json per file.
313
+ const extractOpts = {};
314
+ if (options && options.multilineCapture != null) extractOpts.multilineCapture = options.multilineCapture;
315
+ const tags = extractTags(content, relativePath, extractOpts);
316
+ if (options && options.unifiedAnchors) {
317
+ // Lazy require keeps the module decoupled when the feature is disabled.
318
+ const anchor = require('./cap-anchor.cjs');
319
+ tags.push(...anchor.scanAnchorsInContent(content, relativePath));
320
+ }
321
+ return tags;
322
+ }
323
+
324
+ // @cap-api scanDirectory(dirPath, options) -- Recursively scans a directory for @cap-* tags.
325
+ // Returns: CapTag[] -- aggregated tags from all matching files.
326
+ // Options: { extensions?: string[], exclude?: string[] }
327
+ /**
328
+ * @param {string} dirPath - Absolute path to directory to scan
329
+ * @param {Object} [options]
330
+ * @param {string[]} [options.extensions] - File extensions to include (e.g., ['.js', '.ts', '.py'])
331
+ * @param {string[]} [options.exclude] - Directory names to exclude (e.g., ['node_modules', '.git'])
332
+ * @param {string} [options.projectRoot] - Project root for relative paths (defaults to dirPath)
333
+ * @returns {CapTag[]}
334
+ */
335
+ function scanDirectory(dirPath, options = {}) {
336
+ const extensions = options.extensions || SUPPORTED_EXTENSIONS;
337
+ const projectRoot = options.projectRoot || dirPath;
338
+ // @cap-todo(ac:F-085/AC-1) The scanner consumes a unified scope filter: gitignore-aware,
339
+ // path-pattern-aware, plugin-mirror-aware. Legacy `options.exclude` (basename list) is
340
+ // forwarded as dirExcludes for backwards compat with single-purpose callers.
341
+ const scope = options.scope || scopeModule.buildScopeFilter(projectRoot, {
342
+ dirExcludes: options.exclude,
343
+ });
344
+ // F-047: honour explicit opt-in via options OR .cap/config.json flag. Config is
345
+ // read once per scan so the overhead stays constant regardless of file count.
346
+ const unifiedAnchors =
347
+ options.unifiedAnchors != null
348
+ ? !!options.unifiedAnchors
349
+ : isUnifiedAnchorsEnabled(projectRoot);
350
+ // @cap-feature(feature:F-094) F-094 multilineCapture is opt-OUT (default ON). Honour explicit
351
+ // options.multilineCapture; otherwise resolve from .cap/config.json once per scan.
352
+ const multilineCapture =
353
+ options.multilineCapture != null
354
+ ? !!options.multilineCapture
355
+ : isMultilineCaptureEnabled(projectRoot);
356
+ const tags = [];
357
+
358
+ // @cap-constraint Uses readdirSync (not glob) per project zero-dep constraint
359
+ function walk(dir) {
360
+ let entries;
361
+ try {
362
+ entries = fs.readdirSync(dir, { withFileTypes: true });
363
+ } catch (_e) {
364
+ return;
365
+ }
366
+ for (const entry of entries) {
367
+ const fullPath = path.join(dir, entry.name);
368
+ if (entry.isDirectory()) {
369
+ if (scope.isExcluded(fullPath, true)) continue;
370
+ walk(fullPath);
371
+ } else if (entry.isFile()) {
372
+ const ext = path.extname(entry.name);
373
+ if (!extensions.includes(ext)) continue;
374
+ if (scope.isExcluded(fullPath, false)) continue;
375
+ const fileTags = scanFile(fullPath, projectRoot, { unifiedAnchors, multilineCapture });
376
+ tags.push(...fileTags);
377
+ }
378
+ }
379
+ }
380
+
381
+ walk(dirPath);
382
+ return tags;
383
+ }
384
+
385
+ // @cap-api groupByFeature(tags) -- Groups tags by their feature: metadata value.
386
+ // Returns: Object<string, CapTag[]> -- map from feature name to tags.
387
+ /**
388
+ * @param {CapTag[]} tags - Array of extracted tags
389
+ * @returns {Object<string, CapTag[]>}
390
+ */
391
+ function groupByFeature(tags) {
392
+ const groups = {};
393
+ for (const tag of tags) {
394
+ const featureId = tag.metadata.feature || '(unassigned)';
395
+ if (!groups[featureId]) groups[featureId] = [];
396
+ groups[featureId].push(tag);
397
+ }
398
+ return groups;
399
+ }
400
+
401
+ // @cap-feature(feature:F-045) Multi-file AC traceability — aggregates per-AC file references and detects primary file per AC.
402
+ // @cap-decision Place buildAcFileMap alongside groupByFeature in the scanner module (not in cap-trace.cjs) — it is pure tag aggregation, no IO/graph traversal, mirrors the shape of the existing groupByFeature helper. cap-trace.cjs depends on it.
403
+ // @cap-decision The "ac" key in @cap-todo metadata accepts two formats: "F-045/AC-1" (fully qualified) and "AC-1" (relies on the surrounding @cap-feature for the feature ID). buildAcFileMap normalizes both.
404
+
405
+ /**
406
+ * @typedef {Object} AcFileMapEntry
407
+ * @property {string[]} files - All files that contributed at least one tag to this AC (deduped, stable order)
408
+ * @property {string|null} primary - Primary implementation file (designated, inferred, or null when no files)
409
+ * @property {('designated'|'inferred'|null)} primarySource - How `primary` was determined
410
+ * @property {Object<string,number>} tagDensity - Map from file path -> tag count contributing to this AC
411
+ * @property {string[]} warnings - Human-readable warnings (e.g., heuristic primary picked)
412
+ */
413
+
414
+ // @cap-api buildAcFileMap(tags) -- Aggregate tags into per-AC entries with primary file detection.
415
+ // @cap-todo(ac:F-045/AC-1) Recognize `primary:true` flag on @cap-feature tags as the canonical-file marker.
416
+ // @cap-todo(ac:F-045/AC-2) Emit a structured acFileMap keyed by `<feature-id>/<ac-id>` with all contributing files.
417
+ // @cap-todo(ac:F-045/AC-3) When no `primary:true` is found and the AC spans multiple files, infer primary from highest tag density and emit a warning.
418
+ /**
419
+ * Build a map of AC -> { files, primary, primarySource, tagDensity, warnings }.
420
+ *
421
+ * Key shape: "<feature-id>/<ac-id>" e.g. "F-045/AC-1".
422
+ * Files contribute to an AC when:
423
+ * - the tag is @cap-todo with metadata.ac matching "F-XXX/AC-N" or just "AC-N" (resolved via metadata.feature)
424
+ * - or the tag is @cap-feature/risk/decision with metadata.feature AND metadata.ac present (rare but supported)
425
+ *
426
+ * Primary file detection:
427
+ * - If any @cap-feature tag for the matching feature has `primary:true` AND that file also has a tag for this AC -> designated
428
+ * - Else if any @cap-feature tag for the matching feature has `primary:true` -> designated (file may not directly tag the AC)
429
+ * - Else if multiple files contribute -> inferred via highest tag density (warning emitted)
430
+ * - Else if exactly one file contributes -> that file (inferred, trivially)
431
+ * - Else -> null
432
+ *
433
+ * @param {CapTag[]} tags
434
+ * @returns {Object<string, AcFileMapEntry>}
435
+ */
436
+ function buildAcFileMap(tags) {
437
+ const map = {};
438
+
439
+ // First pass: collect designated-primary files per feature (from @cap-feature primary:true tags).
440
+ // @cap-decision primary:true is a flag on @cap-feature only — putting it on @cap-todo or @cap-risk is meaningless because those tags are AC-level not feature-level.
441
+ const designatedPrimaryByFeature = {}; // featureId -> file
442
+ for (const tag of tags) {
443
+ if (tag.type !== 'feature') continue;
444
+ if (!tag.metadata || !tag.metadata.feature) continue;
445
+ // Normalize "true" string flag (parser stores all values as strings) to boolean check.
446
+ const isPrimary = tag.metadata.primary === 'true' || tag.metadata.primary === true;
447
+ if (!isPrimary) continue;
448
+ // First wins — if multiple files claim primary for the same feature, the first encountered wins.
449
+ // @cap-risk Multiple primary:true claims on the same feature are silently ignored after the first; consider warning in a follow-up if this becomes a problem in practice.
450
+ if (!designatedPrimaryByFeature[tag.metadata.feature]) {
451
+ designatedPrimaryByFeature[tag.metadata.feature] = tag.file;
452
+ }
453
+ }
454
+
455
+ // Second pass: build per-AC contribution lists.
456
+ // We support two ways a tag references an AC:
457
+ // 1) metadata.ac with full form "F-NNN/AC-M"
458
+ // 2) metadata.ac with short form "AC-M" PLUS metadata.feature giving the feature
459
+ for (const tag of tags) {
460
+ if (!tag.metadata || !tag.metadata.ac) continue;
461
+ const acRaw = tag.metadata.ac;
462
+
463
+ let key;
464
+ if (acRaw.includes('/')) {
465
+ key = acRaw;
466
+ } else if (tag.metadata.feature) {
467
+ key = `${tag.metadata.feature}/${acRaw}`;
468
+ } else {
469
+ // Tag references an AC without enough context to qualify it. Skip silently — orphan detection lives elsewhere.
470
+ continue;
471
+ }
472
+
473
+ if (!map[key]) {
474
+ map[key] = {
475
+ files: [],
476
+ primary: null,
477
+ primarySource: null,
478
+ tagDensity: {},
479
+ warnings: [],
480
+ };
481
+ }
482
+ const entry = map[key];
483
+ if (!entry.files.includes(tag.file)) entry.files.push(tag.file);
484
+ entry.tagDensity[tag.file] = (entry.tagDensity[tag.file] || 0) + 1;
485
+ }
486
+
487
+ // Third pass: resolve primary for each AC entry.
488
+ for (const acKey of Object.keys(map)) {
489
+ const entry = map[acKey];
490
+ const featureId = acKey.split('/')[0];
491
+
492
+ // Designated primary takes precedence — only if that file actually contributes to this AC.
493
+ // If a feature designates a primary file but the AC isn't tagged in that file, fall back to inference.
494
+ // @cap-decision Designated primary requires the file to actually contain at least one tag for this AC. Otherwise primary:true on an unrelated file (e.g. a barrel index) would mislead the trace.
495
+ const designatedFile = designatedPrimaryByFeature[featureId];
496
+ if (designatedFile && entry.files.includes(designatedFile)) {
497
+ entry.primary = designatedFile;
498
+ entry.primarySource = 'designated';
499
+ continue;
500
+ }
501
+
502
+ if (entry.files.length === 0) {
503
+ entry.primary = null;
504
+ entry.primarySource = null;
505
+ continue;
506
+ }
507
+
508
+ if (entry.files.length === 1) {
509
+ entry.primary = entry.files[0];
510
+ entry.primarySource = 'inferred';
511
+ continue;
512
+ }
513
+
514
+ // Multiple files contribute and no designated primary — pick by tag density.
515
+ // @cap-decision Tag density (count of contributing tags per file) is the simplest defensible heuristic. Future signals could include @cap-feature presence, file size, or import graph centrality, but those add complexity for marginal gain in a heuristic-anyway choice.
516
+ let bestFile = null;
517
+ let bestCount = -1;
518
+ // Iterate files in stable order so ties are broken by first-appearance.
519
+ for (const f of entry.files) {
520
+ const count = entry.tagDensity[f] || 0;
521
+ if (count > bestCount) {
522
+ bestCount = count;
523
+ bestFile = f;
524
+ }
525
+ }
526
+ entry.primary = bestFile;
527
+ entry.primarySource = 'inferred';
528
+ entry.warnings.push(
529
+ `AC ${acKey} spans ${entry.files.length} files with no @cap-feature(...primary:true) tag — inferred primary: ${bestFile}`
530
+ );
531
+ }
532
+
533
+ return map;
534
+ }
535
+
536
+ // @cap-api detectOrphans(tags, featureIds) -- Compare tags against Feature Map entries, fuzzy-match hints for orphans.
537
+ // Returns: Array of { tag, hint } where hint is the closest matching feature ID.
538
+ // @cap-todo(ref:AC-15) Orphan tags flagged with fuzzy-match hint suggesting closest existing feature ID
539
+ /**
540
+ * @param {CapTag[]} tags - Array of extracted tags
541
+ * @param {string[]} featureIds - Known feature IDs from Feature Map (e.g., ['F-001', 'F-002'])
542
+ * @returns {{ tag: CapTag, hint: string|null }[]}
543
+ */
544
+ function detectOrphans(tags, featureIds) {
545
+ const orphans = [];
546
+ const featureSet = new Set(featureIds);
547
+
548
+ for (const tag of tags) {
549
+ const tagFeatureId = tag.metadata.feature;
550
+ if (!tagFeatureId) continue;
551
+ if (featureSet.has(tagFeatureId)) continue;
552
+
553
+ // Fuzzy match: find closest feature ID by Levenshtein-like similarity
554
+ const hint = findClosestMatch(tagFeatureId, featureIds);
555
+ orphans.push({ tag, hint });
556
+ }
557
+
558
+ return orphans;
559
+ }
560
+
561
+ // @cap-decision Simple character-level distance for fuzzy matching -- no external library needed
562
+ /**
563
+ * Compute edit distance between two strings (Levenshtein).
564
+ * @param {string} a
565
+ * @param {string} b
566
+ * @returns {number}
567
+ */
568
+ function editDistance(a, b) {
569
+ const la = a.length;
570
+ const lb = b.length;
571
+ const dp = Array.from({ length: la + 1 }, () => Array(lb + 1).fill(0));
572
+ for (let i = 0; i <= la; i++) dp[i][0] = i;
573
+ for (let j = 0; j <= lb; j++) dp[0][j] = j;
574
+ for (let i = 1; i <= la; i++) {
575
+ for (let j = 1; j <= lb; j++) {
576
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
577
+ dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + cost);
578
+ }
579
+ }
580
+ return dp[la][lb];
581
+ }
582
+
583
+ /**
584
+ * Find the closest matching string from candidates using edit distance.
585
+ * @param {string} target
586
+ * @param {string[]} candidates
587
+ * @returns {string|null}
588
+ */
589
+ function findClosestMatch(target, candidates) {
590
+ if (candidates.length === 0) return null;
591
+ let bestDist = Infinity;
592
+ let bestMatch = null;
593
+ const lowerTarget = target.toLowerCase();
594
+ for (const candidate of candidates) {
595
+ const dist = editDistance(lowerTarget, candidate.toLowerCase());
596
+ if (dist < bestDist) {
597
+ bestDist = dist;
598
+ bestMatch = candidate;
599
+ }
600
+ }
601
+ // Only suggest if distance is reasonable (less than half the target length)
602
+ if (bestDist <= Math.ceil(target.length / 2)) return bestMatch;
603
+ return null;
604
+ }
605
+
606
+ // @cap-todo(ref:AC-78) /cap:scan shall traverse all packages in a monorepo
607
+ // @cap-todo(ref:AC-93) Zero runtime dependencies -- uses only Node.js built-ins
608
+ // @cap-todo(ref:AC-94) Tag scanner uses native RegExp -- no comment-parser or AST parser
609
+ // @cap-todo(ref:AC-95) File discovery uses fs.readdirSync with recursive walk -- no glob library
610
+ // @cap-todo(ref:AC-96) CLI argument parsing uses existing parseNamedArgs() pattern
611
+
612
+ // @cap-api detectWorkspaces(projectRoot) -- Detects monorepo workspaces from package.json and lerna.json.
613
+ // Returns: { isMonorepo: boolean, packages: string[] }
614
+ /**
615
+ * @param {string} projectRoot - Absolute path to project root
616
+ * @returns {{ isMonorepo: boolean, packages: string[] }}
617
+ */
618
+ function detectWorkspaces(projectRoot) {
619
+ const result = { isMonorepo: false, packages: [] };
620
+
621
+ // Check package.json workspaces (npm/yarn/pnpm)
622
+ const pkgPath = path.join(projectRoot, 'package.json');
623
+ if (fs.existsSync(pkgPath)) {
624
+ try {
625
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
626
+ if (pkg.workspaces) {
627
+ result.isMonorepo = true;
628
+ const patterns = Array.isArray(pkg.workspaces)
629
+ ? pkg.workspaces
630
+ : (pkg.workspaces.packages || []);
631
+ result.packages = resolveWorkspaceGlobs(projectRoot, patterns);
632
+ }
633
+ } catch (_e) {
634
+ // Malformed package.json
635
+ }
636
+ }
637
+
638
+ // Check pnpm-workspace.yaml
639
+ if (!result.isMonorepo) {
640
+ const pnpmPath = path.join(projectRoot, 'pnpm-workspace.yaml');
641
+ if (fs.existsSync(pnpmPath)) {
642
+ try {
643
+ const content = fs.readFileSync(pnpmPath, 'utf8');
644
+ // Simple YAML parsing for packages array — handles:
645
+ // packages:
646
+ // - "apps/*"
647
+ // - "packages/*"
648
+ const packagesMatch = content.match(/packages:\s*\n((?:\s+-\s*.+\n?)*)/);
649
+ if (packagesMatch) {
650
+ result.isMonorepo = true;
651
+ const patterns = packagesMatch[1]
652
+ .split('\n')
653
+ .map(line => line.replace(/^\s*-\s*['"]?/, '').replace(/['"]?\s*$/, ''))
654
+ .filter(Boolean);
655
+ result.packages = resolveWorkspaceGlobs(projectRoot, patterns);
656
+ }
657
+ } catch (_e) {
658
+ // Malformed pnpm-workspace.yaml
659
+ }
660
+ }
661
+ }
662
+
663
+ // Check nx.json (NX workspace — look for project patterns or apps/packages dirs)
664
+ if (!result.isMonorepo) {
665
+ const nxPath = path.join(projectRoot, 'nx.json');
666
+ if (fs.existsSync(nxPath)) {
667
+ try {
668
+ const nx = JSON.parse(fs.readFileSync(nxPath, 'utf8'));
669
+ result.isMonorepo = true;
670
+ // NX may define workspaceLayout or rely on convention (apps/, packages/, libs/)
671
+ const layout = nx.workspaceLayout || {};
672
+ const patterns = [];
673
+ if (layout.appsDir) patterns.push(layout.appsDir + '/*');
674
+ if (layout.libsDir) patterns.push(layout.libsDir + '/*');
675
+ // Fallback: check common NX directories
676
+ if (patterns.length === 0) {
677
+ for (const dir of ['apps', 'packages', 'libs']) {
678
+ if (fs.existsSync(path.join(projectRoot, dir))) {
679
+ patterns.push(dir + '/*');
680
+ }
681
+ }
682
+ }
683
+ if (patterns.length > 0) {
684
+ result.packages = resolveWorkspaceGlobs(projectRoot, patterns);
685
+ }
686
+ } catch (_e) {
687
+ // Malformed nx.json
688
+ }
689
+ }
690
+ }
691
+
692
+ // Check lerna.json
693
+ if (!result.isMonorepo) {
694
+ const lernaPath = path.join(projectRoot, 'lerna.json');
695
+ if (fs.existsSync(lernaPath)) {
696
+ try {
697
+ const lerna = JSON.parse(fs.readFileSync(lernaPath, 'utf8'));
698
+ result.isMonorepo = true;
699
+ const patterns = lerna.packages || ['packages/*'];
700
+ result.packages = resolveWorkspaceGlobs(projectRoot, patterns);
701
+ } catch (_e) {
702
+ // Malformed lerna.json
703
+ }
704
+ }
705
+ }
706
+
707
+ return result;
708
+ }
709
+
710
+ // @cap-api resolveWorkspaceGlobs(projectRoot, patterns) -- Expands workspace glob patterns to actual directories.
711
+ // @cap-decision Uses fs.readdirSync instead of glob library for workspace pattern expansion. Handles only simple patterns (dir/* and dir/**).
712
+ /**
713
+ * @param {string} projectRoot - Absolute path to project root
714
+ * @param {string[]} patterns - Workspace glob patterns (e.g., ["packages/*", "apps/*"])
715
+ * @returns {string[]} - Array of relative package directory paths
716
+ */
717
+ function resolveWorkspaceGlobs(projectRoot, patterns) {
718
+ const packages = [];
719
+
720
+ for (const pattern of patterns) {
721
+ // Strip trailing glob: "packages/*" -> "packages", "apps/**" -> "apps"
722
+ const baseDir = pattern.replace(/\/\*+$/, '');
723
+ const fullPath = path.join(projectRoot, baseDir);
724
+
725
+ if (!fs.existsSync(fullPath)) continue;
726
+
727
+ const stat = fs.statSync(fullPath);
728
+ if (!stat.isDirectory()) continue;
729
+
730
+ // If pattern has no glob, it is a direct package reference
731
+ if (!pattern.includes('*')) {
732
+ packages.push(baseDir);
733
+ continue;
734
+ }
735
+
736
+ // Enumerate subdirectories
737
+ try {
738
+ const entries = fs.readdirSync(fullPath, { withFileTypes: true });
739
+ for (const entry of entries) {
740
+ if (entry.isDirectory() && !entry.name.startsWith('.')) {
741
+ packages.push(path.join(baseDir, entry.name));
742
+ }
743
+ }
744
+ } catch (_e) {
745
+ // Skip unreadable directories
746
+ }
747
+ }
748
+
749
+ return packages;
750
+ }
751
+
752
+ // @cap-api scanMonorepo(projectRoot, options) -- Scans all workspace packages in a monorepo for @cap-* tags.
753
+ // @cap-todo(ref:AC-79) Feature Map entries support cross-package file references (e.g., packages/core/src/auth.ts)
754
+ // @cap-todo(ref:AC-80) Works seamlessly with single-repo projects -- returns regular scanDirectory results if not a monorepo
755
+ /**
756
+ * Scans a monorepo or single repo for @cap-* tags.
757
+ * In monorepo mode: scans root + each workspace package.
758
+ * In single-repo mode: delegates to scanDirectory.
759
+ * All file paths are relative to project root for cross-package references.
760
+ *
761
+ * @param {string} projectRoot - Absolute path to project root
762
+ * @param {Object} [options]
763
+ * @param {string[]} [options.extensions] - File extensions to include
764
+ * @param {string[]} [options.exclude] - Directory names to exclude
765
+ * @returns {{ tags: CapTag[], isMonorepo: boolean, packages: string[] }}
766
+ */
767
+ function scanMonorepo(projectRoot, options = {}) {
768
+ const workspaces = detectWorkspaces(projectRoot);
769
+
770
+ if (!workspaces.isMonorepo) {
771
+ // Single repo -- delegate to base scanner
772
+ const tags = scanDirectory(projectRoot, {
773
+ ...options,
774
+ projectRoot,
775
+ });
776
+ return { tags, isMonorepo: false, packages: [] };
777
+ }
778
+
779
+ // Monorepo -- scan root and each package
780
+ const allTags = [];
781
+ const seen = new Set();
782
+
783
+ // Scan root (excludes workspace dirs by default since they are scanned separately)
784
+ const rootTags = scanDirectory(projectRoot, {
785
+ ...options,
786
+ projectRoot,
787
+ });
788
+ for (const tag of rootTags) {
789
+ const key = `${tag.file}:${tag.line}`;
790
+ if (!seen.has(key)) {
791
+ seen.add(key);
792
+ allTags.push(tag);
793
+ }
794
+ }
795
+
796
+ // Scan each workspace package
797
+ for (const pkg of workspaces.packages) {
798
+ const pkgDir = path.join(projectRoot, pkg);
799
+ if (!fs.existsSync(pkgDir)) continue;
800
+
801
+ const pkgTags = scanDirectory(pkgDir, {
802
+ ...options,
803
+ projectRoot, // Paths relative to monorepo root, not package root
804
+ });
805
+
806
+ for (const tag of pkgTags) {
807
+ const key = `${tag.file}:${tag.line}`;
808
+ if (!seen.has(key)) {
809
+ seen.add(key);
810
+ allTags.push(tag);
811
+ }
812
+ }
813
+ }
814
+
815
+ return { tags: allTags, isMonorepo: true, packages: workspaces.packages };
816
+ }
817
+
818
+ // @cap-api groupByPackage(tags) -- Groups tags by their workspace package based on file path prefix.
819
+ /**
820
+ * @param {CapTag[]} tags - Array of extracted tags
821
+ * @param {string[]} packages - Known workspace package paths
822
+ * @returns {Object<string, CapTag[]>}
823
+ */
824
+ function groupByPackage(tags, packages) {
825
+ const groups = { '(root)': [] };
826
+ for (const pkg of packages) {
827
+ groups[pkg] = [];
828
+ }
829
+
830
+ for (const tag of tags) {
831
+ let matched = false;
832
+ for (const pkg of packages) {
833
+ if (tag.file.startsWith(pkg + '/') || tag.file.startsWith(pkg + path.sep)) {
834
+ groups[pkg].push(tag);
835
+ matched = true;
836
+ break;
837
+ }
838
+ }
839
+ if (!matched) {
840
+ groups['(root)'].push(tag);
841
+ }
842
+ }
843
+
844
+ return groups;
845
+ }
846
+
847
+ // @cap-api scanApp(projectRoot, appPath, options) -- Scans a single app directory plus referenced shared packages.
848
+ // When activeApp is set, scans only the active app and shared packages it imports.
849
+ /**
850
+ * @param {string} projectRoot - Absolute path to project root
851
+ * @param {string} appPath - Relative app path (e.g., "apps/flow")
852
+ * @param {Object} [options]
853
+ * @param {string[]} [options.extensions] - File extensions to include
854
+ * @param {string[]} [options.exclude] - Directory names to exclude
855
+ * @returns {{ tags: CapTag[], scannedDirs: string[] }}
856
+ */
857
+ function scanApp(projectRoot, appPath, options = {}) {
858
+ const appDir = path.join(projectRoot, appPath);
859
+ const scannedDirs = [appPath];
860
+
861
+ // Scan the app directory itself
862
+ const appTags = scanDirectory(appDir, {
863
+ ...options,
864
+ projectRoot,
865
+ });
866
+
867
+ const allTags = [...appTags];
868
+ const seen = new Set(appTags.map(t => `${t.file}:${t.line}`));
869
+
870
+ // Detect shared packages referenced by this app via package.json dependencies
871
+ const sharedPkgs = detectSharedPackages(projectRoot, appPath);
872
+ for (const pkg of sharedPkgs) {
873
+ const pkgDir = path.join(projectRoot, pkg);
874
+ if (!fs.existsSync(pkgDir)) continue;
875
+ scannedDirs.push(pkg);
876
+ const pkgTags = scanDirectory(pkgDir, {
877
+ ...options,
878
+ projectRoot,
879
+ });
880
+ for (const tag of pkgTags) {
881
+ const key = `${tag.file}:${tag.line}`;
882
+ if (!seen.has(key)) {
883
+ seen.add(key);
884
+ allTags.push(tag);
885
+ }
886
+ }
887
+ }
888
+
889
+ return { tags: allTags, scannedDirs };
890
+ }
891
+
892
+ // @cap-api detectSharedPackages(projectRoot, appPath) -- Detects workspace packages referenced by an app's package.json.
893
+ /**
894
+ * @param {string} projectRoot - Absolute path to project root
895
+ * @param {string} appPath - Relative app path
896
+ * @returns {string[]} - Array of relative paths to shared packages
897
+ */
898
+ function detectSharedPackages(projectRoot, appPath) {
899
+ const packages = [];
900
+ const appPkgPath = path.join(projectRoot, appPath, 'package.json');
901
+ if (!fs.existsSync(appPkgPath)) return packages;
902
+
903
+ let appPkg;
904
+ try {
905
+ appPkg = JSON.parse(fs.readFileSync(appPkgPath, 'utf8'));
906
+ } catch (_e) {
907
+ return packages;
908
+ }
909
+
910
+ // Collect all dependency names
911
+ const allDeps = Object.keys(appPkg.dependencies || {}).concat(Object.keys(appPkg.devDependencies || {}));
912
+
913
+ // Resolve workspace packages -- check if any dep matches a workspace package name
914
+ const workspaces = detectWorkspaces(projectRoot);
915
+ if (!workspaces.isMonorepo) return packages;
916
+
917
+ for (const wsPkg of workspaces.packages) {
918
+ const wsPkgJsonPath = path.join(projectRoot, wsPkg, 'package.json');
919
+ if (!fs.existsSync(wsPkgJsonPath)) continue;
920
+ try {
921
+ const wsPkgJson = JSON.parse(fs.readFileSync(wsPkgJsonPath, 'utf8'));
922
+ if (wsPkgJson.name && allDeps.includes(wsPkgJson.name)) {
923
+ packages.push(wsPkg);
924
+ }
925
+ } catch (_e) {
926
+ // Skip malformed
927
+ }
928
+ }
929
+
930
+ return packages;
931
+ }
932
+
933
+ // =====================================================================
934
+ // F-046: Polylingual comment-context detection
935
+ // =====================================================================
936
+ //
937
+ // @cap-feature(feature:F-046, primary:true) Strengthen Polylingual Comment-Token Detection in Tag Scanner
938
+ // @cap-decision Comment-style table is extension-driven (per-language) rather than heuristic — extensions are deterministic, low-risk, and match how editors highlight code. A heuristic (e.g., shebang-sniffing) would over-trigger on polyglot files like .md with embedded code blocks.
939
+ // @cap-decision Backward-compat strategy: keep `extractTags(content, file) -> CapTag[]` legacy shape (Option A from spec) and add a new `extractTagsWithContext(content, file) -> { tags, warnings }`. F-046/AC-5 requires JS/TS callsites to be untouched, and this avoids churning ~30 callers.
940
+ // @cap-decision Comment-context detection is implemented as an in-place line-by-line state machine rather than a tokenizer or AST. The scanner has been regex-based since F-001; adopting a tokenizer for one feature would balloon scope and add maintenance burden. The state machine handles 95%+ of real-world cases (line + block comments, multi-line block tracking) with ~80 lines of logic.
941
+ // @cap-risk Edge cases not covered: nested string-quote inside block comment (e.g., `# "@cap-feature" still in code`), here-docs in shell, raw strings in Python (r"@cap..."), C++ raw string literals R"(@cap)". These are extremely rare for tag-bearing files and would require a real lexer to handle correctly. The warning system in AC-3 catches most false positives; AC-4's --strict mode is the safety net for CI.
942
+ // @cap-risk Unrecognized extensions fall back to "treat as JS-style line + block comments" so behavior is at least no worse than today. Documented below at COMMENT_STYLES_DEFAULT.
943
+ // @cap-feature(feature:F-046, ac:F-046/AC-3) String-literal awareness — classifyTagContext now tracks string state alongside comment state. A line like `const x = "// @cap-feature(F-999) fake"` is correctly classified as a string-literal context, the @cap-* token is NOT extracted as a tag, and a structured warning is emitted instead. Implementation: STRING_STYLES per-extension table, _matchStringOpen / _findStringClose helpers, and string-state extension to blockState carried across lines (Python triple-quotes, TOML triple-quotes, Rust raw strings, JS template literals all multi-line capable). See tests/cap-tag-scanner-polylingual-adversarial.test.cjs `'F-046/AC-3 string literal containing comment token is correctly rejected'` for the inverted witness tests that pin the fix.
944
+
945
+ /**
946
+ * @typedef {Object} CommentStyle
947
+ * @property {string[]} line - Line-comment tokens (e.g., ["//"])
948
+ * @property {Array<[string,string]>} block - Block-comment open/close pairs (e.g., [["/*", "*\/"]])
949
+ */
950
+
951
+ // @cap-todo(ac:F-046/AC-1) Per-extension comment style table covering Python, Ruby, Shell, Go, Rust, HTML, CSS in addition to JS/TS.
952
+ // Order within `line` matters: longer tokens must come first so that `///` matches before `//`.
953
+ /** @type {Object<string, CommentStyle>} */
954
+ const COMMENT_STYLES = {
955
+ // JS / TS family — preserved from existing behavior (AC-5).
956
+ '.js': { line: ['//'], block: [['/*', '*/']] },
957
+ '.cjs': { line: ['//'], block: [['/*', '*/']] },
958
+ '.mjs': { line: ['//'], block: [['/*', '*/']] },
959
+ '.ts': { line: ['//'], block: [['/*', '*/']] },
960
+ '.tsx': { line: ['//'], block: [['/*', '*/']] },
961
+ '.jsx': { line: ['//'], block: [['/*', '*/']] },
962
+ // Python — line `#`; block via triple-quoted strings (used as docstring comments).
963
+ '.py': { line: ['#'], block: [['"""', '"""'], ["'''", "'''"]] },
964
+ // Ruby — line `#`; block via =begin/=end.
965
+ '.rb': { line: ['#'], block: [['=begin', '=end']] },
966
+ // Shell family — line `#` only.
967
+ '.sh': { line: ['#'], block: [] },
968
+ '.bash': { line: ['#'], block: [] },
969
+ '.zsh': { line: ['#'], block: [] },
970
+ // Go — same as JS family.
971
+ '.go': { line: ['//'], block: [['/*', '*/']] },
972
+ // Rust — `///` doc-comment must be matched before `//`.
973
+ '.rs': { line: ['///', '//'], block: [['/*', '*/']] },
974
+ // HTML / Markdown HTML comments — block only.
975
+ '.html': { line: [], block: [['<!--', '-->']] },
976
+ '.htm': { line: [], block: [['<!--', '-->']] },
977
+ '.md': { line: [], block: [['<!--', '-->']] },
978
+ // CSS / SCSS — block always; SCSS adds line comments.
979
+ '.css': { line: [], block: [['/*', '*/']] },
980
+ '.scss': { line: ['//'], block: [['/*', '*/']] },
981
+ // YAML / TOML — line `#` only.
982
+ '.yaml': { line: ['#'], block: [] },
983
+ '.yml': { line: ['#'], block: [] },
984
+ '.toml': { line: ['#'], block: [] },
985
+ // SQL / Lua — line `--`.
986
+ '.sql': { line: ['--'], block: [['/*', '*/']] },
987
+ // C / C++ / Java — same as JS family.
988
+ '.java': { line: ['//'], block: [['/*', '*/']] },
989
+ '.c': { line: ['//'], block: [['/*', '*/']] },
990
+ '.cpp': { line: ['//'], block: [['/*', '*/']] },
991
+ '.h': { line: ['//'], block: [['/*', '*/']] },
992
+ '.hpp': { line: ['//'], block: [['/*', '*/']] },
993
+ };
994
+
995
+ // @cap-decision Default fallback for unrecognized extensions: assume JS-style. This is the safest non-breaking default — files we don't know about will behave exactly as they did before F-046 (regex-only).
996
+ /** @type {CommentStyle} */
997
+ const COMMENT_STYLES_DEFAULT = { line: ['//', '#', '--'], block: [['/*', '*/'], ['"""', '"""'], ["'''", "'''"], ['<!--', '-->'], ['=begin', '=end']] };
998
+
999
+ /**
1000
+ * Pick the comment style for a file path based on its extension.
1001
+ * @param {string} filePath
1002
+ * @returns {CommentStyle}
1003
+ */
1004
+ function getCommentStyle(filePath) {
1005
+ const ext = path.extname(filePath || '').toLowerCase();
1006
+ return COMMENT_STYLES[ext] || COMMENT_STYLES_DEFAULT;
1007
+ }
1008
+
1009
+ // =====================================================================
1010
+ // F-046/AC-3 — String-literal awareness
1011
+ // =====================================================================
1012
+ //
1013
+ // @cap-feature(feature:F-046) String-state tracker — prevents @cap-* tokens INSIDE string literals from being misclassified as comments. Resolves the AC-3 bug pinned by adversarial tests.
1014
+ // @cap-decision String-state lives in the same blockState object as comment-state, walked synchronously by classifyTagContext. A separate pass would double the asymptotic work and require keeping two parallel cursors in sync; one walker that checks string-open BEFORE comment-open at each position is simpler and provably correct.
1015
+ // @cap-decision Per-language STRING_STYLES table — same shape philosophy as COMMENT_STYLES. Order within the array matters: longer / more-specific tokens (triple-quotes, raw-string prefixes like r" or r#") must be listed before their substring counterparts.
1016
+ // @cap-risk(out-of-scope) Ruby `<<~END` heredocs and Shell `<< EOF` heredocs are NOT tracked. The body of a heredoc is plain text but the scanner sees it as code. Documented limitation; pinned by adversarial tests `'heredocs and multi-line strings (current behaviour)'`. A real fix requires tokenizing the heredoc-introducer syntax, which is non-trivial (delimiter is identifier-defined, can be quoted or unquoted, can be `<<~` for indent-stripping). Out of scope for this iteration.
1017
+ // @cap-risk(out-of-scope) Rust nested `/* /* */ */` block comments still close on the first `*/`. Same documented limitation as before F-046/AC-3 fix — nesting requires a depth counter, separate from string-state.
1018
+ // @cap-risk(out-of-scope) Markdown ```code fences``` are NOT understood as comments-or-strings. A tag inside a fenced code block is treated as a plain prose mention and emits a warning. Documented in adversarial test `'Markdown code fences are NOT understood'`.
1019
+
1020
+ /**
1021
+ * @typedef {Object} StringSyntax
1022
+ * @property {string} open - Opening token (e.g., '"', "'", '"""', 'r#"').
1023
+ * @property {string} close - Closing token. For raw strings with hash counts (r#"..."#), the runtime computes the actual close from the open.
1024
+ * @property {boolean} escapes - When true, backslash escapes the next character; when false (raw strings, shell single-quotes, Python r"..."), the backslash is literal.
1025
+ * @property {boolean} multiline - When true, the string can span multiple lines (Python """, TOML ''', etc).
1026
+ * @property {boolean} [rustRaw] - Special-case marker for Rust r#"..."# raw strings whose close depends on hash count of open.
1027
+ */
1028
+
1029
+ // @cap-feature(feature:F-046) Per-extension string syntax table — used by classifyTagContext to detect when the cursor enters a string literal so comment-token matches inside the string are ignored.
1030
+ // @cap-decision Order matters: longer / prefixed tokens come first so `"""` matches before `"`, `r"..."` matches before `"..."`. Otherwise the shorter token would consume the prefix and misclassify.
1031
+ /** @type {Object<string, StringSyntax[]>} */
1032
+ const STRING_STYLES = {
1033
+ // JS / TS family — double, single, and template literals (backtick treated as plain string; interpolation NOT tracked).
1034
+ '.js': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
1035
+ '.cjs': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
1036
+ '.mjs': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
1037
+ '.ts': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
1038
+ '.tsx': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
1039
+ '.jsx': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: true, multiline: true }],
1040
+ // Python — single-line strings only here. Triple-quoted strings are treated as BLOCK COMMENTS via COMMENT_STYLES['.py'] for docstring compatibility (this matches Python convention where """...""" at module/function/class level is the docstring).
1041
+ // @cap-decision Triple-quoted strings are NOT in Python STRING_STYLES — they remain in COMMENT_STYLES.block to preserve the F-046/AC-1 contract that Python docstrings carry tags. Edge case: a triple-quoted string used as a literal value (e.g., `s = """hello"""`) is misclassified as a comment, but this is the existing behavior the original tests pin (see `'Python inline triple-quote'` test).
1042
+ '.py': [
1043
+ // Prefixed strings come BEFORE plain strings so `r"..."` matches before `"..."`.
1044
+ { open: 'rb"', close: '"', escapes: false, multiline: false, isRaw: true },
1045
+ { open: "rb'", close: "'", escapes: false, multiline: false, isRaw: true },
1046
+ { open: 'br"', close: '"', escapes: false, multiline: false, isRaw: true },
1047
+ { open: "br'", close: "'", escapes: false, multiline: false, isRaw: true },
1048
+ { open: 'r"', close: '"', escapes: false, multiline: false, isRaw: true },
1049
+ { open: "r'", close: "'", escapes: false, multiline: false, isRaw: true },
1050
+ { open: 'b"', close: '"', escapes: true, multiline: false },
1051
+ { open: "b'", close: "'", escapes: true, multiline: false },
1052
+ { open: 'f"', close: '"', escapes: true, multiline: false },
1053
+ { open: "f'", close: "'", escapes: true, multiline: false },
1054
+ { open: '"', close: '"', escapes: true, multiline: false },
1055
+ { open: "'", close: "'", escapes: true, multiline: false },
1056
+ ],
1057
+ // Ruby — double + single. Heredocs NOT tracked (see @cap-risk above).
1058
+ '.rb': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }],
1059
+ // Shell — double, single (no escapes in single-quoted), backtick command substitution. Heredocs NOT tracked.
1060
+ '.sh': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }, { open: '`', close: '`', escapes: true, multiline: false }],
1061
+ '.bash': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }, { open: '`', close: '`', escapes: true, multiline: false }],
1062
+ '.zsh': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }, { open: '`', close: '`', escapes: true, multiline: false }],
1063
+ // Go — double, single (rune literal), backtick raw string.
1064
+ '.go': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }, { open: '`', close: '`', escapes: false, multiline: true }],
1065
+ // Rust — raw strings with hash counts handled specially. r#"..."#, r##"..."##, etc.
1066
+ '.rs': [
1067
+ { open: 'r#"', close: '"#', escapes: false, multiline: true, rustRaw: true },
1068
+ { open: 'r"', close: '"', escapes: false, multiline: true, isRaw: true },
1069
+ { open: 'b"', close: '"', escapes: true, multiline: false },
1070
+ { open: '"', close: '"', escapes: true, multiline: true },
1071
+ // Char literals 'x' — single quotes in Rust are char literals, but treating them as 1-char strings is fine for our purposes.
1072
+ { open: "'", close: "'", escapes: true, multiline: false },
1073
+ ],
1074
+ // HTML — attribute strings inside tags. Treat anywhere as string for our purposes (over-flag is acceptable).
1075
+ '.html': [{ open: '"', close: '"', escapes: false, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }],
1076
+ '.htm': [{ open: '"', close: '"', escapes: false, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }],
1077
+ // Markdown — no string literals natively; leave empty so prose is not treated as string.
1078
+ '.md': [],
1079
+ // CSS / SCSS — both quote styles.
1080
+ '.css': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
1081
+ '.scss': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
1082
+ // YAML — both quote styles. Single-quote escape via doubling NOT tracked exactly; over-flag is acceptable.
1083
+ '.yaml': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }],
1084
+ '.yml': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: false, multiline: false }],
1085
+ // TOML — triple-quote multiline first, then plain.
1086
+ '.toml': [
1087
+ { open: '"""', close: '"""', escapes: true, multiline: true },
1088
+ { open: "'''", close: "'''", escapes: false, multiline: true },
1089
+ { open: '"', close: '"', escapes: true, multiline: false },
1090
+ { open: "'", close: "'", escapes: false, multiline: false },
1091
+ ],
1092
+ // SQL — single-quote string with doubled-quote escape. Treat as escape-aware for simplicity.
1093
+ '.sql': [{ open: "'", close: "'", escapes: true, multiline: false }, { open: '"', close: '"', escapes: true, multiline: false }],
1094
+ // C / C++ / Java — double for string, single for char.
1095
+ '.java': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
1096
+ '.c': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
1097
+ '.cpp': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
1098
+ '.h': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
1099
+ '.hpp': [{ open: '"', close: '"', escapes: true, multiline: false }, { open: "'", close: "'", escapes: true, multiline: false }],
1100
+ };
1101
+
1102
+ // @cap-decision Default string-style fallback for unknown extensions: double + single quotes with escape handling. Matches behavior of nearly every C-family language. Files of unknown type are over-flagged rather than under-flagged (safer).
1103
+ /** @type {StringSyntax[]} */
1104
+ const STRING_STYLES_DEFAULT = [
1105
+ { open: '"', close: '"', escapes: true, multiline: false },
1106
+ { open: "'", close: "'", escapes: true, multiline: false },
1107
+ ];
1108
+
1109
+ /**
1110
+ * Pick the string-syntax table for a file path based on its extension.
1111
+ * @param {string} filePath
1112
+ * @returns {StringSyntax[]}
1113
+ */
1114
+ function getStringStyle(filePath) {
1115
+ const ext = path.extname(filePath || '').toLowerCase();
1116
+ return STRING_STYLES[ext] || STRING_STYLES_DEFAULT;
1117
+ }
1118
+
1119
+ /**
1120
+ * Try to match any string-open token at position `i` in `line`.
1121
+ * Returns the matched StringSyntax + the actual close token (computed for Rust raw r##"..."##),
1122
+ * or null if no string opens at this position.
1123
+ *
1124
+ * For Rust r##"..."##: counts the run of `#` characters after `r` and computes the close as `"` + same count of `#`.
1125
+ *
1126
+ * @param {StringSyntax[]} stringStyle
1127
+ * @param {string} line
1128
+ * @param {number} i
1129
+ * @returns {{ syntax: StringSyntax, openLen: number, close: string } | null}
1130
+ */
1131
+ function _matchStringOpen(stringStyle, line, i) {
1132
+ for (const syn of stringStyle) {
1133
+ if (syn.rustRaw) {
1134
+ // Rust r#"..."# / r##"..."## / etc. Match `r` followed by 1+ `#` followed by `"`.
1135
+ if (line[i] !== 'r') continue;
1136
+ let j = i + 1;
1137
+ let hashCount = 0;
1138
+ while (j < line.length && line[j] === '#') { hashCount++; j++; }
1139
+ if (hashCount === 0) continue; // Need at least one `#` to be the rustRaw form.
1140
+ if (line[j] !== '"') continue;
1141
+ const openLen = j - i + 1; // r + N# + "
1142
+ const close = '"' + '#'.repeat(hashCount);
1143
+ return { syntax: syn, openLen, close };
1144
+ }
1145
+ if (line.startsWith(syn.open, i)) {
1146
+ return { syntax: syn, openLen: syn.open.length, close: syn.close };
1147
+ }
1148
+ }
1149
+ return null;
1150
+ }
1151
+
1152
+ /**
1153
+ * Find the index where the currently open string closes, starting from `i`.
1154
+ * Honors escape rules per syntax. Returns -1 if the string does not close on this line.
1155
+ *
1156
+ * @param {string} line
1157
+ * @param {number} i - Position to start searching (just past the open token)
1158
+ * @param {string} close - Close token to find
1159
+ * @param {boolean} escapes - Whether backslash escapes the next char
1160
+ * @returns {number} - Index of the close token, or -1 if not found on this line
1161
+ */
1162
+ function _findStringClose(line, i, close, escapes) {
1163
+ let j = i;
1164
+ const n = line.length;
1165
+ while (j < n) {
1166
+ if (escapes && line[j] === '\\' && j + 1 < n) {
1167
+ // Skip escaped character.
1168
+ j += 2;
1169
+ continue;
1170
+ }
1171
+ if (line.startsWith(close, j)) {
1172
+ return j;
1173
+ }
1174
+ j++;
1175
+ }
1176
+ return -1;
1177
+ }
1178
+
1179
+ /**
1180
+ * Find the longest matching syntax token at position `i` across {block-comment-open, string-open, line-comment}.
1181
+ * Longest-match wins so e.g. Python `"""` (block-comment) beats `"` (string-open).
1182
+ * Equal-length ties: block-comment > string > line-comment (block syntax is the more intentional construct).
1183
+ *
1184
+ * Returns one of:
1185
+ * { kind: 'blockComment', open, close, length }
1186
+ * { kind: 'string', syntax, openLen, close, length }
1187
+ * { kind: 'lineComment', token, length }
1188
+ * null if nothing matches at i.
1189
+ *
1190
+ * @param {CommentStyle} style
1191
+ * @param {StringSyntax[]} stringStyle
1192
+ * @param {string} line
1193
+ * @param {number} i
1194
+ */
1195
+ function _longestTokenMatch(style, stringStyle, line, i) {
1196
+ let best = null;
1197
+
1198
+ // Block-comment open candidates.
1199
+ for (const pair of style.block) {
1200
+ const [open, close] = pair;
1201
+ if (line.startsWith(open, i)) {
1202
+ const candidate = { kind: 'blockComment', open, close, length: open.length, priority: 3 };
1203
+ if (!best || candidate.length > best.length || (candidate.length === best.length && candidate.priority > best.priority)) {
1204
+ best = candidate;
1205
+ }
1206
+ }
1207
+ }
1208
+
1209
+ // String-open candidates.
1210
+ const strOpen = _matchStringOpen(stringStyle, line, i);
1211
+ if (strOpen) {
1212
+ const candidate = { kind: 'string', syntax: strOpen.syntax, openLen: strOpen.openLen, close: strOpen.close, length: strOpen.openLen, priority: 2 };
1213
+ if (!best || candidate.length > best.length || (candidate.length === best.length && candidate.priority > best.priority)) {
1214
+ best = candidate;
1215
+ }
1216
+ }
1217
+
1218
+ // Line-comment candidates.
1219
+ for (const lt of style.line) {
1220
+ if (line.startsWith(lt, i)) {
1221
+ const candidate = { kind: 'lineComment', token: lt, length: lt.length, priority: 1 };
1222
+ if (!best || candidate.length > best.length || (candidate.length === best.length && candidate.priority > best.priority)) {
1223
+ best = candidate;
1224
+ }
1225
+ }
1226
+ }
1227
+
1228
+ return best;
1229
+ }
1230
+
1231
+ /**
1232
+ * @typedef {Object} ClassifyResult
1233
+ * @property {('comment'|'string'|'code'|'unknown')} context - Where the @cap-* token was found
1234
+ * @property {string} reason - Short human-readable reason ("python triple-quote block", "JS line comment", "outside any comment")
1235
+ */
1236
+
1237
+ // @cap-todo(ac:F-046/AC-3) classifyTagContext returns 'comment' when the tag column is inside a recognized comment, 'string' when inside a string literal, else 'code' (both 'string' and 'code' are warning candidates).
1238
+ // @cap-feature(feature:F-046) classifyTagContext is string-state aware — at each cursor position it checks string-open BEFORE comment-open so a `// @cap-...` token inside `"..."` is correctly classified as a string-literal context, not a comment.
1239
+ /**
1240
+ * Classify whether `tagColumn` in `lineContent` is inside a comment, a string, or code.
1241
+ * The caller maintains `blockState` across lines so multi-line block comments AND multi-line strings
1242
+ * (Python triple-quotes, TOML triple-quotes, Rust raw strings) are tracked.
1243
+ *
1244
+ * Walker order at each position i (in priority order):
1245
+ * 1. Carried-over block comment (from a previous line) — look for its close.
1246
+ * 2. Carried-over multi-line string (from a previous line) — look for its close.
1247
+ * 3. String-open token at i — enter string mode.
1248
+ * 4. Line-comment token at i — rest of line is comment.
1249
+ * 5. Block-comment open token at i — enter block mode.
1250
+ *
1251
+ * String-open is checked BEFORE comment-open because a `// @cap-...` inside `"..."` should be
1252
+ * classified as string, not comment.
1253
+ *
1254
+ * @param {CommentStyle} style
1255
+ * @param {string} lineContent - Full line text
1256
+ * @param {number} tagColumn - 0-based column of the @cap-... match
1257
+ * @param {{ open: [string,string]|null, stringClose: string|null, stringEscapes: boolean, stringOpenToken: string|null }} blockState - Mutable block-comment + string state across lines
1258
+ * @param {StringSyntax[]} [stringStyle] - Optional string syntax table (defaults derived from style if provided as ['filePath', '...'])
1259
+ * @returns {ClassifyResult}
1260
+ */
1261
+ function classifyTagContext(style, lineContent, tagColumn, blockState, stringStyle) {
1262
+ // Default string style: empty (no string detection) — preserves backward compat for callers
1263
+ // that pre-date AC-3 and pass only 4 args.
1264
+ const ss = Array.isArray(stringStyle) ? stringStyle : [];
1265
+
1266
+ let i = 0;
1267
+ const n = lineContent.length;
1268
+
1269
+ while (i <= tagColumn && i < n) {
1270
+ // 1) Carried-over block comment from a previous line.
1271
+ if (blockState.open) {
1272
+ const [, close] = blockState.open;
1273
+ const closeIdx = lineContent.indexOf(close, i);
1274
+ if (closeIdx === -1) {
1275
+ if (tagColumn >= i) {
1276
+ return { context: 'comment', reason: `inside block comment ${blockState.open[0]}...${blockState.open[1]}` };
1277
+ }
1278
+ return { context: 'comment', reason: 'inside multi-line block comment' };
1279
+ }
1280
+ if (tagColumn < closeIdx) {
1281
+ return { context: 'comment', reason: `inside block comment ${blockState.open[0]}...${blockState.open[1]}` };
1282
+ }
1283
+ i = closeIdx + close.length;
1284
+ blockState.open = null;
1285
+ continue;
1286
+ }
1287
+
1288
+ // 2) Carried-over multi-line string from a previous line.
1289
+ if (blockState.stringClose) {
1290
+ const close = blockState.stringClose;
1291
+ const escapes = !!blockState.stringEscapes;
1292
+ const closeIdx = _findStringClose(lineContent, i, close, escapes);
1293
+ if (closeIdx === -1) {
1294
+ // String stays open through end of line. tagColumn is inside the string.
1295
+ if (tagColumn >= i) {
1296
+ return { context: 'string', reason: `inside multi-line string literal ${blockState.stringOpenToken || ''}...${close}` };
1297
+ }
1298
+ return { context: 'string', reason: 'inside multi-line string literal' };
1299
+ }
1300
+ if (tagColumn < closeIdx) {
1301
+ return { context: 'string', reason: `inside multi-line string literal ${blockState.stringOpenToken || ''}...${close}` };
1302
+ }
1303
+ // String closes before tagColumn. Clear state and continue past the close.
1304
+ i = closeIdx + close.length;
1305
+ blockState.stringClose = null;
1306
+ blockState.stringEscapes = false;
1307
+ blockState.stringOpenToken = null;
1308
+ continue;
1309
+ }
1310
+
1311
+ // 3) Find the longest matching token at i across {block-comment-open, string-open, line-comment}.
1312
+ // Longest-match wins so e.g. Python `"""` (block-comment) beats `"` (string-open).
1313
+ // Equal-length ties prefer block-comment over string over line-comment (block syntax tends to be the more intentional construct).
1314
+ const tokenMatch = _longestTokenMatch(style, ss, lineContent, i);
1315
+
1316
+ if (tokenMatch && tokenMatch.kind === 'string') {
1317
+ const strOpen = tokenMatch;
1318
+ const startCol = i;
1319
+ const afterOpen = i + strOpen.openLen;
1320
+ const closeIdx = _findStringClose(lineContent, afterOpen, strOpen.close, strOpen.syntax.escapes);
1321
+ if (closeIdx === -1) {
1322
+ if (strOpen.syntax.multiline) {
1323
+ blockState.stringClose = strOpen.close;
1324
+ blockState.stringEscapes = strOpen.syntax.escapes;
1325
+ blockState.stringOpenToken = strOpen.syntax.open;
1326
+ }
1327
+ if (tagColumn >= startCol) {
1328
+ return { context: 'string', reason: `inside string literal ${strOpen.syntax.open}...${strOpen.close}` };
1329
+ }
1330
+ return { context: 'string', reason: 'inside string literal' };
1331
+ }
1332
+ if (tagColumn >= startCol && tagColumn < closeIdx + strOpen.close.length) {
1333
+ return { context: 'string', reason: `inside string literal ${strOpen.syntax.open}...${strOpen.close}` };
1334
+ }
1335
+ i = closeIdx + strOpen.close.length;
1336
+ continue;
1337
+ }
1338
+
1339
+ if (tokenMatch && tokenMatch.kind === 'lineComment') {
1340
+ if (i <= tagColumn) {
1341
+ return { context: 'comment', reason: `line comment ${tokenMatch.token}` };
1342
+ }
1343
+ return { context: 'comment', reason: 'line comment' };
1344
+ }
1345
+
1346
+ if (tokenMatch && tokenMatch.kind === 'blockComment') {
1347
+ const open = tokenMatch.open;
1348
+ const close = tokenMatch.close;
1349
+ const closeIdx = lineContent.indexOf(close, i + open.length);
1350
+ if (closeIdx === -1) {
1351
+ blockState.open = [open, close];
1352
+ if (tagColumn >= i) {
1353
+ return { context: 'comment', reason: `inside block comment ${open}...${close}` };
1354
+ }
1355
+ return { context: 'comment', reason: `inside block comment ${open}...${close}` };
1356
+ }
1357
+ if (tagColumn >= i && tagColumn < closeIdx + close.length) {
1358
+ return { context: 'comment', reason: `block comment ${open}...${close}` };
1359
+ }
1360
+ i = closeIdx + close.length;
1361
+ continue;
1362
+ }
1363
+
1364
+ // 4) No special token at i. Advance one char.
1365
+ i++;
1366
+ }
1367
+
1368
+ // Cursor walked past tagColumn without entering any comment or string — tag is in code.
1369
+ return { context: 'code', reason: 'outside any comment' };
1370
+ }
1371
+
1372
+ /**
1373
+ * @typedef {Object} ScannerWarning
1374
+ * @property {string} file - Relative file path
1375
+ * @property {number} line - 1-based line number
1376
+ * @property {number} column - 0-based column index of the @cap-* token
1377
+ * @property {string} reason - Human-readable reason the tag was rejected
1378
+ * @property {string} raw - Full original line text
1379
+ */
1380
+
1381
+ // @cap-todo(ac:F-046/AC-1) extractTagsWithContext is the polylingual entry point — same regex match as legacy extractTags, but each match is verified to land inside a real comment.
1382
+ // @cap-todo(ac:F-046/AC-3) Tags found outside comments are not parsed; they appear in `warnings` instead so callers (and CI in --strict mode) can surface them.
1383
+ /**
1384
+ * Polylingual extraction. Detects per-line `@cap-...` matches anywhere on the line, then verifies
1385
+ * each match sits inside a recognized comment context for the file's extension.
1386
+ *
1387
+ * Tags inside comments are emitted as CapTag (same shape as `extractTags`).
1388
+ * Tags outside any comment are emitted as `warnings` and NOT parsed as tags.
1389
+ *
1390
+ * @param {string} content
1391
+ * @param {string} filePath
1392
+ * @returns {{ tags: CapTag[], warnings: ScannerWarning[] }}
1393
+ */
1394
+ function extractTagsWithContext(content, filePath) {
1395
+ const style = getCommentStyle(filePath);
1396
+ const stringStyle = getStringStyle(filePath);
1397
+ const lines = content.split('\n');
1398
+ const tags = [];
1399
+ const warnings = [];
1400
+ // Loose match — `@cap-(feature|todo|risk|decision)` anywhere on the line, with optional metadata block.
1401
+ // We keep CAP_TAG_RE intact (it requires a leading comment token) and use this looser regex only here.
1402
+ const looseTagRe = /@cap-(feature|todo|risk|decision)(?:\(([^)]*)\))?[ \t]*([^\r\n]*)/g;
1403
+
1404
+ // Persistent state carries across lines: block comments AND multi-line strings.
1405
+ // @cap-feature(feature:F-046) blockState now also tracks string-literal state for Python triple-quotes, TOML triple-quotes, Rust raw strings, JS template literals, etc.
1406
+ /** @type {{ open: [string,string]|null, stringClose: string|null, stringEscapes: boolean, stringOpenToken: string|null }} */
1407
+ const blockState = { open: null, stringClose: null, stringEscapes: false, stringOpenToken: null };
1408
+
1409
+ for (let i = 0; i < lines.length; i++) {
1410
+ const line = lines[i];
1411
+ // Reset regex state for each line.
1412
+ looseTagRe.lastIndex = 0;
1413
+
1414
+ // First, find all candidate @cap-* matches on this line.
1415
+ const matches = [];
1416
+ let m;
1417
+ while ((m = looseTagRe.exec(line)) !== null) {
1418
+ matches.push({
1419
+ index: m.index,
1420
+ type: m[1],
1421
+ metadataStr: m[2] || '',
1422
+ description: (m[3] || '').trim(),
1423
+ });
1424
+ }
1425
+
1426
+ // Snapshot block + string state BEFORE we mutate via classifyTagContext.
1427
+ // Each match starts the walk at column 0 with a fresh copy.
1428
+ const blockStateBeforeLine = {
1429
+ open: blockState.open,
1430
+ stringClose: blockState.stringClose,
1431
+ stringEscapes: blockState.stringEscapes,
1432
+ stringOpenToken: blockState.stringOpenToken,
1433
+ };
1434
+
1435
+ if (matches.length === 0) {
1436
+ // No tags on this line, but we still need to advance the persistent state for the line.
1437
+ _advanceBlockState(style, line, blockState, stringStyle);
1438
+ continue;
1439
+ }
1440
+
1441
+ for (const match of matches) {
1442
+ // Use a fresh state copy for each classification (state machine restarts from col 0).
1443
+ const localState = {
1444
+ open: blockStateBeforeLine.open,
1445
+ stringClose: blockStateBeforeLine.stringClose,
1446
+ stringEscapes: blockStateBeforeLine.stringEscapes,
1447
+ stringOpenToken: blockStateBeforeLine.stringOpenToken,
1448
+ };
1449
+ const result = classifyTagContext(style, line, match.index, localState, stringStyle);
1450
+
1451
+ if (result.context === 'comment') {
1452
+ // Strip subtype if @cap-todo
1453
+ let subtype = null;
1454
+ if (match.type === 'todo') {
1455
+ const sm = match.description.match(SUBTYPE_RE);
1456
+ if (sm) subtype = sm[1];
1457
+ }
1458
+ tags.push({
1459
+ type: match.type,
1460
+ file: filePath,
1461
+ line: i + 1,
1462
+ metadata: parseMetadata(match.metadataStr),
1463
+ description: match.description,
1464
+ raw: line,
1465
+ subtype,
1466
+ });
1467
+ } else if (result.context === 'string') {
1468
+ // @cap-feature(feature:F-046) Tag found inside a string literal — emit warning with explicit string-literal reason.
1469
+ warnings.push({
1470
+ file: filePath,
1471
+ line: i + 1,
1472
+ column: match.index,
1473
+ reason: `@cap-${match.type} found inside a string literal (${result.reason}) — not parsed as tag`,
1474
+ raw: line,
1475
+ });
1476
+ } else {
1477
+ // Tag found outside any comment — emit a warning, do NOT parse as a tag.
1478
+ warnings.push({
1479
+ file: filePath,
1480
+ line: i + 1,
1481
+ column: match.index,
1482
+ reason: `@cap-${match.type} found outside any comment context (${result.reason}) — likely a string literal or code reference`,
1483
+ raw: line,
1484
+ });
1485
+ }
1486
+ }
1487
+
1488
+ // Now advance the persistent state through the entire line so the next line picks up correctly.
1489
+ _advanceBlockState(style, line, blockState, stringStyle);
1490
+ }
1491
+
1492
+ return { tags, warnings };
1493
+ }
1494
+
1495
+ /**
1496
+ * Walk the line and update blockState to reflect any block comment open/close OR multi-line
1497
+ * string open/close that crossed line boundaries. Internal helper — purely advances state.
1498
+ *
1499
+ * Walker order matches classifyTagContext: carried block → carried string → string-open → line-comment → block-open.
1500
+ *
1501
+ * @param {CommentStyle} style
1502
+ * @param {string} line
1503
+ * @param {{ open: [string,string]|null, stringClose: string|null, stringEscapes: boolean, stringOpenToken: string|null }} blockState
1504
+ * @param {StringSyntax[]} [stringStyle] - Optional string syntax table; when omitted, string state is not advanced (back-compat).
1505
+ */
1506
+ function _advanceBlockState(style, line, blockState, stringStyle) {
1507
+ const ss = Array.isArray(stringStyle) ? stringStyle : [];
1508
+ let i = 0;
1509
+ const n = line.length;
1510
+ while (i < n) {
1511
+ // Carried block comment.
1512
+ if (blockState.open) {
1513
+ const [, close] = blockState.open;
1514
+ const closeIdx = line.indexOf(close, i);
1515
+ if (closeIdx === -1) {
1516
+ return;
1517
+ }
1518
+ i = closeIdx + close.length;
1519
+ blockState.open = null;
1520
+ continue;
1521
+ }
1522
+ // Carried multi-line string.
1523
+ if (blockState.stringClose) {
1524
+ const close = blockState.stringClose;
1525
+ const escapes = !!blockState.stringEscapes;
1526
+ const closeIdx = _findStringClose(line, i, close, escapes);
1527
+ if (closeIdx === -1) {
1528
+ return;
1529
+ }
1530
+ i = closeIdx + close.length;
1531
+ blockState.stringClose = null;
1532
+ blockState.stringEscapes = false;
1533
+ blockState.stringOpenToken = null;
1534
+ continue;
1535
+ }
1536
+
1537
+ // Longest-match across {block-comment-open, string-open, line-comment}.
1538
+ const tokenMatch = _longestTokenMatch(style, ss, line, i);
1539
+
1540
+ if (tokenMatch && tokenMatch.kind === 'string') {
1541
+ const afterOpen = i + tokenMatch.openLen;
1542
+ const closeIdx = _findStringClose(line, afterOpen, tokenMatch.close, tokenMatch.syntax.escapes);
1543
+ if (closeIdx === -1) {
1544
+ if (tokenMatch.syntax.multiline) {
1545
+ blockState.stringClose = tokenMatch.close;
1546
+ blockState.stringEscapes = tokenMatch.syntax.escapes;
1547
+ blockState.stringOpenToken = tokenMatch.syntax.open;
1548
+ }
1549
+ return;
1550
+ }
1551
+ i = closeIdx + tokenMatch.close.length;
1552
+ continue;
1553
+ }
1554
+
1555
+ if (tokenMatch && tokenMatch.kind === 'lineComment') {
1556
+ // Line-comment consumes the rest of the line.
1557
+ return;
1558
+ }
1559
+
1560
+ if (tokenMatch && tokenMatch.kind === 'blockComment') {
1561
+ const closeIdx = line.indexOf(tokenMatch.close, i + tokenMatch.open.length);
1562
+ if (closeIdx === -1) {
1563
+ blockState.open = [tokenMatch.open, tokenMatch.close];
1564
+ return;
1565
+ }
1566
+ i = closeIdx + tokenMatch.close.length;
1567
+ continue;
1568
+ }
1569
+
1570
+ i++;
1571
+ }
1572
+ }
1573
+
1574
+ // @cap-todo(ac:F-046/AC-4) scanFileWithContext + scanDirectoryWithContext expose the new {tags, warnings} shape and support a strict mode that throws on any warning.
1575
+ /**
1576
+ * Polylingual single-file scan. Returns {tags, warnings}.
1577
+ * @param {string} filePath - Absolute path
1578
+ * @param {string} projectRoot - Absolute project root
1579
+ * @returns {{ tags: CapTag[], warnings: ScannerWarning[] }}
1580
+ */
1581
+ function scanFileWithContext(filePath, projectRoot) {
1582
+ let content;
1583
+ try {
1584
+ content = fs.readFileSync(filePath, 'utf8');
1585
+ } catch (_e) {
1586
+ return { tags: [], warnings: [] };
1587
+ }
1588
+ const relativePath = path.relative(projectRoot, filePath);
1589
+ return extractTagsWithContext(content, relativePath);
1590
+ }
1591
+
1592
+ /**
1593
+ * Polylingual directory scan. Returns {tags, warnings}.
1594
+ *
1595
+ * @param {string} dirPath
1596
+ * @param {Object} [options]
1597
+ * @param {string[]} [options.extensions]
1598
+ * @param {string[]} [options.exclude]
1599
+ * @param {string} [options.projectRoot]
1600
+ * @param {boolean} [options.strict] - When true, throws an Error if any warnings are emitted.
1601
+ * @returns {{ tags: CapTag[], warnings: ScannerWarning[] }}
1602
+ */
1603
+ function scanDirectoryWithContext(dirPath, options = {}) {
1604
+ const extensions = options.extensions || Object.keys(COMMENT_STYLES);
1605
+ const projectRoot = options.projectRoot || dirPath;
1606
+ const scope = options.scope || scopeModule.buildScopeFilter(projectRoot, {
1607
+ dirExcludes: options.exclude,
1608
+ });
1609
+ const tags = [];
1610
+ const warnings = [];
1611
+
1612
+ function walk(dir) {
1613
+ let entries;
1614
+ try {
1615
+ entries = fs.readdirSync(dir, { withFileTypes: true });
1616
+ } catch (_e) {
1617
+ return;
1618
+ }
1619
+ for (const entry of entries) {
1620
+ const fullPath = path.join(dir, entry.name);
1621
+ if (entry.isDirectory()) {
1622
+ if (scope.isExcluded(fullPath, true)) continue;
1623
+ walk(fullPath);
1624
+ } else if (entry.isFile()) {
1625
+ const ext = path.extname(entry.name);
1626
+ if (!extensions.includes(ext)) continue;
1627
+ if (scope.isExcluded(fullPath, false)) continue;
1628
+ const result = scanFileWithContext(fullPath, projectRoot);
1629
+ tags.push(...result.tags);
1630
+ warnings.push(...result.warnings);
1631
+ }
1632
+ }
1633
+ }
1634
+
1635
+ walk(dirPath);
1636
+
1637
+ if (options.strict && warnings.length > 0) {
1638
+ const summary = warnings.slice(0, 5).map(w => ` ${w.file}:${w.line}:${w.column} - ${w.reason}`).join('\n');
1639
+ const more = warnings.length > 5 ? `\n ... and ${warnings.length - 5} more` : '';
1640
+ const err = new Error(`cap-tag-scanner --strict: found ${warnings.length} tag(s) outside comment context\n${summary}${more}`);
1641
+ err.warnings = warnings;
1642
+ err.code = 'CAP_STRICT_TAG_VIOLATION';
1643
+ throw err;
1644
+ }
1645
+
1646
+ return { tags, warnings };
1647
+ }
1648
+
1649
+ // =====================================================================
1650
+ // End F-046 polylingual extension
1651
+ // =====================================================================
1652
+
1653
+ // @cap-todo Detect legacy @gsd-* tags and recommend /cap:migrate
1654
+ const LEGACY_TAG_RE = /^[ \t]*(?:\/\/|\/\*|\*|#|--|"""|''')[ \t]*@gsd-(feature|todo|risk|decision|context|status|depends|ref|pattern|api|constraint)/;
1655
+
1656
+ /**
1657
+ * Detect legacy @gsd-* tags in scanned files.
1658
+ * Re-scans source files for @gsd-* patterns that the primary scanner ignores.
1659
+ *
1660
+ * @param {string} projectRoot - Absolute path to project root
1661
+ * @param {Object} [options]
1662
+ * @param {string[]} [options.extensions] - File extensions to include
1663
+ * @param {string[]} [options.exclude] - Directory names to exclude
1664
+ * @returns {{ count: number, files: string[], recommendation: string }}
1665
+ */
1666
+ function detectLegacyTags(projectRoot, options = {}) {
1667
+ const extensions = options.extensions || SUPPORTED_EXTENSIONS;
1668
+ const scope = options.scope || scopeModule.buildScopeFilter(projectRoot, {
1669
+ dirExcludes: options.exclude,
1670
+ });
1671
+ const result = { count: 0, files: [], recommendation: '' };
1672
+ const fileSet = new Set();
1673
+
1674
+ function walk(dir) {
1675
+ let entries;
1676
+ try {
1677
+ entries = fs.readdirSync(dir, { withFileTypes: true });
1678
+ } catch (_e) {
1679
+ return;
1680
+ }
1681
+ for (const entry of entries) {
1682
+ const fullPath = path.join(dir, entry.name);
1683
+ if (entry.isDirectory()) {
1684
+ if (scope.isExcluded(fullPath, true)) continue;
1685
+ walk(fullPath);
1686
+ } else if (entry.isFile()) {
1687
+ const ext = path.extname(entry.name);
1688
+ if (!extensions.includes(ext)) continue;
1689
+ if (scope.isExcluded(fullPath, false)) continue;
1690
+ scanFileForLegacy(fullPath);
1691
+ }
1692
+ }
1693
+ }
1694
+
1695
+ function scanFileForLegacy(filePath) {
1696
+ let content;
1697
+ try {
1698
+ content = fs.readFileSync(filePath, 'utf8');
1699
+ } catch (_e) {
1700
+ return;
1701
+ }
1702
+ const lines = content.split('\n');
1703
+ let found = false;
1704
+ for (const line of lines) {
1705
+ if (LEGACY_TAG_RE.test(line)) {
1706
+ result.count++;
1707
+ found = true;
1708
+ }
1709
+ }
1710
+ if (found) {
1711
+ const relativePath = path.relative(projectRoot, filePath);
1712
+ fileSet.add(relativePath);
1713
+ }
1714
+ }
1715
+
1716
+ walk(projectRoot);
1717
+ result.files = Array.from(fileSet).sort();
1718
+
1719
+ if (result.count > 0) {
1720
+ result.recommendation = `Found ${result.count} legacy @gsd-* tag(s) in ${result.files.length} file(s). Run /cap:migrate to convert them to @cap-* format.`;
1721
+ }
1722
+
1723
+ return result;
1724
+ }
1725
+
1726
+ module.exports = {
1727
+ CAP_TAG_TYPES,
1728
+ CAP_TAG_RE,
1729
+ // F-063 design tag recognition — additive, separate from CAP_TAG_TYPES to preserve F-001 regression tests.
1730
+ CAP_DESIGN_TAG_TYPES,
1731
+ CAP_DESIGN_TAG_RE,
1732
+ SUPPORTED_EXTENSIONS,
1733
+ DEFAULT_EXCLUDE,
1734
+ LEGACY_TAG_RE,
1735
+ isUnifiedAnchorsEnabled,
1736
+ // F-094 multi-line @cap-* description capture
1737
+ isMultilineCaptureEnabled,
1738
+ detectAnchorToken,
1739
+ captureContinuations,
1740
+ scanFile,
1741
+ scanDirectory,
1742
+ extractTags,
1743
+ parseMetadata,
1744
+ groupByFeature,
1745
+ buildAcFileMap,
1746
+ detectOrphans,
1747
+ editDistance,
1748
+ detectWorkspaces,
1749
+ resolveWorkspaceGlobs,
1750
+ scanMonorepo,
1751
+ groupByPackage,
1752
+ detectLegacyTags,
1753
+ scanApp,
1754
+ detectSharedPackages,
1755
+ // F-046 polylingual extension
1756
+ COMMENT_STYLES,
1757
+ COMMENT_STYLES_DEFAULT,
1758
+ STRING_STYLES,
1759
+ STRING_STYLES_DEFAULT,
1760
+ getCommentStyle,
1761
+ getStringStyle,
1762
+ classifyTagContext,
1763
+ extractTagsWithContext,
1764
+ scanFileWithContext,
1765
+ scanDirectoryWithContext,
1766
+ };