@rkarim08/sia 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/.claude-plugin/marketplace.json +35 -0
  2. package/.claude-plugin/plugin.json +27 -0
  3. package/.mcp.json +13 -0
  4. package/CLAUDE.md +226 -0
  5. package/LICENSE +202 -0
  6. package/PLUGIN_README.md +253 -0
  7. package/README.md +1013 -0
  8. package/agents/sia-changelog-writer.md +89 -0
  9. package/agents/sia-code-reviewer.md +86 -0
  10. package/agents/sia-conflict-resolver.md +100 -0
  11. package/agents/sia-convention-enforcer.md +69 -0
  12. package/agents/sia-debug.md +106 -0
  13. package/agents/sia-decision-reviewer.md +101 -0
  14. package/agents/sia-dependency-tracker.md +80 -0
  15. package/agents/sia-explain.md +126 -0
  16. package/agents/sia-feature.md +116 -0
  17. package/agents/sia-knowledge-capture.md +117 -0
  18. package/agents/sia-lead-architecture-advisor.md +93 -0
  19. package/agents/sia-lead-team-health.md +107 -0
  20. package/agents/sia-migration.md +100 -0
  21. package/agents/sia-onboarding.md +115 -0
  22. package/agents/sia-orientation.md +99 -0
  23. package/agents/sia-pm-briefing.md +106 -0
  24. package/agents/sia-pm-risk-advisor.md +82 -0
  25. package/agents/sia-qa-analyst.md +116 -0
  26. package/agents/sia-qa-regression-map.md +94 -0
  27. package/agents/sia-refactor.md +115 -0
  28. package/agents/sia-regression.md +112 -0
  29. package/agents/sia-security-audit.md +125 -0
  30. package/agents/sia-test-advisor.md +91 -0
  31. package/hooks/hooks.json +98 -0
  32. package/migrations/bridge/001_initial.sql +34 -0
  33. package/migrations/episodic/001_initial.sql +35 -0
  34. package/migrations/meta/001_initial.sql +68 -0
  35. package/migrations/semantic/001_initial.sql +292 -0
  36. package/migrations/semantic/002_ontology.sql +89 -0
  37. package/migrations/semantic/003_freshness.sql +63 -0
  38. package/migrations/semantic/004_v5_unified_schema.sql +194 -0
  39. package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
  40. package/migrations/semantic/006_tree_sitter.sql +6 -0
  41. package/migrations/semantic/007_branch_snapshots.sql +22 -0
  42. package/package.json +110 -0
  43. package/scripts/branch-switch.sh +13 -0
  44. package/scripts/build-wasm-grammars.sh +81 -0
  45. package/scripts/post-compact.sh +8 -0
  46. package/scripts/post-tool-use.sh +10 -0
  47. package/scripts/pre-compact.sh +8 -0
  48. package/scripts/session-end.sh +8 -0
  49. package/scripts/session-start.sh +8 -0
  50. package/scripts/start-mcp.ts +45 -0
  51. package/scripts/stop-hook.sh +8 -0
  52. package/scripts/user-prompt-submit.sh +8 -0
  53. package/scripts/viz-server.ts +152 -0
  54. package/skills/sia-brainstorm/SKILL.md +156 -0
  55. package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
  56. package/skills/sia-brainstorm/scripts/helper.js +95 -0
  57. package/skills/sia-brainstorm/scripts/server.cjs +338 -0
  58. package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
  59. package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
  60. package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
  61. package/skills/sia-brainstorm/visual-companion.md +286 -0
  62. package/skills/sia-capture/SKILL.md +64 -0
  63. package/skills/sia-compare/SKILL.md +33 -0
  64. package/skills/sia-conflicts/SKILL.md +38 -0
  65. package/skills/sia-debug-workflow/SKILL.md +120 -0
  66. package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
  67. package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
  68. package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
  69. package/skills/sia-digest/SKILL.md +23 -0
  70. package/skills/sia-dispatch/SKILL.md +69 -0
  71. package/skills/sia-dispatch/agent-task-template.md +99 -0
  72. package/skills/sia-doctor/SKILL.md +39 -0
  73. package/skills/sia-execute/SKILL.md +70 -0
  74. package/skills/sia-execute-plan/SKILL.md +85 -0
  75. package/skills/sia-export-import/SKILL.md +49 -0
  76. package/skills/sia-export-knowledge/SKILL.md +46 -0
  77. package/skills/sia-finish/SKILL.md +100 -0
  78. package/skills/sia-finish/pr-summary-template.md +54 -0
  79. package/skills/sia-freshness/SKILL.md +38 -0
  80. package/skills/sia-history/SKILL.md +42 -0
  81. package/skills/sia-impact/SKILL.md +70 -0
  82. package/skills/sia-index/SKILL.md +54 -0
  83. package/skills/sia-install/SKILL.md +39 -0
  84. package/skills/sia-lead-compliance/SKILL.md +16 -0
  85. package/skills/sia-lead-drift-report/SKILL.md +16 -0
  86. package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
  87. package/skills/sia-learn/SKILL.md +58 -0
  88. package/skills/sia-plan/SKILL.md +68 -0
  89. package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
  90. package/skills/sia-playbooks/SKILL.md +29 -0
  91. package/skills/sia-playbooks/reference-feature.md +100 -0
  92. package/skills/sia-playbooks/reference-flagging.md +50 -0
  93. package/skills/sia-playbooks/reference-orientation.md +92 -0
  94. package/skills/sia-playbooks/reference-regression.md +115 -0
  95. package/skills/sia-playbooks/reference-review.md +64 -0
  96. package/skills/sia-playbooks/reference-tools.md +239 -0
  97. package/skills/sia-pm-decision-log/SKILL.md +28 -0
  98. package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
  99. package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
  100. package/skills/sia-prune/SKILL.md +45 -0
  101. package/skills/sia-qa-coverage/SKILL.md +28 -0
  102. package/skills/sia-qa-flaky/SKILL.md +20 -0
  103. package/skills/sia-qa-report/SKILL.md +26 -0
  104. package/skills/sia-reindex/SKILL.md +30 -0
  105. package/skills/sia-review-respond/SKILL.md +88 -0
  106. package/skills/sia-review-respond/pushback-patterns.md +90 -0
  107. package/skills/sia-search/SKILL.md +47 -0
  108. package/skills/sia-setup/SKILL.md +82 -0
  109. package/skills/sia-setup/setup-checklist.md +97 -0
  110. package/skills/sia-stats/SKILL.md +36 -0
  111. package/skills/sia-status/SKILL.md +44 -0
  112. package/skills/sia-sync/SKILL.md +46 -0
  113. package/skills/sia-team/SKILL.md +64 -0
  114. package/skills/sia-test/SKILL.md +92 -0
  115. package/skills/sia-test/testing-anti-patterns.md +104 -0
  116. package/skills/sia-tour/SKILL.md +29 -0
  117. package/skills/sia-upgrade/SKILL.md +43 -0
  118. package/skills/sia-verify/SKILL.md +81 -0
  119. package/skills/sia-visualize/SKILL.md +28 -0
  120. package/skills/sia-visualize-live/SKILL.md +55 -0
  121. package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
  122. package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
  123. package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
  124. package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
  125. package/skills/sia-workspace/SKILL.md +57 -0
  126. package/src/agent/claude-md-template-flagging.md +219 -0
  127. package/src/agent/claude-md-template.md +213 -0
  128. package/src/agent/modules/sia-feature.md +100 -0
  129. package/src/agent/modules/sia-flagging.md +50 -0
  130. package/src/agent/modules/sia-orientation.md +92 -0
  131. package/src/agent/modules/sia-regression.md +115 -0
  132. package/src/agent/modules/sia-review.md +64 -0
  133. package/src/agent/modules/sia-tools.md +239 -0
  134. package/src/ast/extractors/c-include.ts +189 -0
  135. package/src/ast/extractors/csharp-project.ts +260 -0
  136. package/src/ast/extractors/prisma-schema.ts +44 -0
  137. package/src/ast/extractors/project-manifest.ts +111 -0
  138. package/src/ast/extractors/sql-schema.ts +67 -0
  139. package/src/ast/extractors/tier-a.ts +423 -0
  140. package/src/ast/extractors/tier-b.ts +289 -0
  141. package/src/ast/extractors/tier-dispatch.ts +247 -0
  142. package/src/ast/index-worker.ts +108 -0
  143. package/src/ast/indexer.ts +484 -0
  144. package/src/ast/languages.ts +408 -0
  145. package/src/ast/pagerank-builder.ts +125 -0
  146. package/src/ast/path-utils.ts +137 -0
  147. package/src/ast/tree-sitter/backends/native.ts +57 -0
  148. package/src/ast/tree-sitter/backends/wasm.ts +39 -0
  149. package/src/ast/tree-sitter/call-walker.ts +44 -0
  150. package/src/ast/tree-sitter/edit-computer.ts +55 -0
  151. package/src/ast/tree-sitter/query-runner.ts +46 -0
  152. package/src/ast/tree-sitter/service.ts +174 -0
  153. package/src/ast/tree-sitter/tree-cache.ts +39 -0
  154. package/src/ast/tree-sitter/types.ts +79 -0
  155. package/src/ast/watcher.ts +322 -0
  156. package/src/capture/chunker.ts +169 -0
  157. package/src/capture/consolidate.ts +127 -0
  158. package/src/capture/edge-inferrer.ts +161 -0
  159. package/src/capture/embedder.ts +166 -0
  160. package/src/capture/embedding-cache.ts +73 -0
  161. package/src/capture/flag-processor.ts +64 -0
  162. package/src/capture/hook.ts +67 -0
  163. package/src/capture/pipeline.ts +450 -0
  164. package/src/capture/prompts/consolidate.ts +25 -0
  165. package/src/capture/prompts/edge-infer.ts +29 -0
  166. package/src/capture/prompts/extract-flagged.ts +36 -0
  167. package/src/capture/prompts/extract.ts +42 -0
  168. package/src/capture/tokenizer.ts +147 -0
  169. package/src/capture/track-a-ast.ts +93 -0
  170. package/src/capture/track-b-llm.ts +149 -0
  171. package/src/capture/types.ts +64 -0
  172. package/src/cli/commands/community.ts +137 -0
  173. package/src/cli/commands/compare.ts +123 -0
  174. package/src/cli/commands/conflicts.ts +41 -0
  175. package/src/cli/commands/digest.ts +197 -0
  176. package/src/cli/commands/disable-flagging.ts +34 -0
  177. package/src/cli/commands/doctor.ts +240 -0
  178. package/src/cli/commands/download-model.ts +161 -0
  179. package/src/cli/commands/enable-flagging.ts +34 -0
  180. package/src/cli/commands/export-knowledge.ts +208 -0
  181. package/src/cli/commands/export.ts +85 -0
  182. package/src/cli/commands/freshness.ts +164 -0
  183. package/src/cli/commands/graph.ts +51 -0
  184. package/src/cli/commands/history.ts +139 -0
  185. package/src/cli/commands/import.ts +335 -0
  186. package/src/cli/commands/install.ts +156 -0
  187. package/src/cli/commands/lead-report.ts +241 -0
  188. package/src/cli/commands/learn.ts +321 -0
  189. package/src/cli/commands/pm-report.ts +413 -0
  190. package/src/cli/commands/prune.ts +75 -0
  191. package/src/cli/commands/qa-report.ts +278 -0
  192. package/src/cli/commands/reindex.ts +104 -0
  193. package/src/cli/commands/rollback.ts +70 -0
  194. package/src/cli/commands/search.ts +103 -0
  195. package/src/cli/commands/server.ts +91 -0
  196. package/src/cli/commands/share.ts +33 -0
  197. package/src/cli/commands/stats.ts +79 -0
  198. package/src/cli/commands/status.ts +176 -0
  199. package/src/cli/commands/sync.ts +96 -0
  200. package/src/cli/commands/team.ts +118 -0
  201. package/src/cli/commands/tour.ts +157 -0
  202. package/src/cli/commands/visualize-live.ts +162 -0
  203. package/src/cli/commands/workspace.ts +117 -0
  204. package/src/cli/index.ts +424 -0
  205. package/src/cli/learn-progress.ts +87 -0
  206. package/src/community/detection-bridge.ts +344 -0
  207. package/src/community/leiden.ts +462 -0
  208. package/src/community/raptor.ts +210 -0
  209. package/src/community/scheduler.ts +74 -0
  210. package/src/community/summarize.ts +115 -0
  211. package/src/decay/archiver.ts +73 -0
  212. package/src/decay/bridge-orphan-cleanup.ts +212 -0
  213. package/src/decay/consolidation-sweep.ts +112 -0
  214. package/src/decay/decay.ts +116 -0
  215. package/src/decay/deep-validator.ts +62 -0
  216. package/src/decay/episodic-promoter.ts +132 -0
  217. package/src/decay/maintenance-scheduler.ts +326 -0
  218. package/src/decay/scheduler.ts +6 -0
  219. package/src/decay/session-sweeper.ts +79 -0
  220. package/src/decay/types.ts +17 -0
  221. package/src/freshness/confidence-decay.ts +122 -0
  222. package/src/freshness/cuckoo-filter.ts +176 -0
  223. package/src/freshness/deep-validation.ts +345 -0
  224. package/src/freshness/dirty-tracker.ts +237 -0
  225. package/src/freshness/file-watcher-layer.ts +119 -0
  226. package/src/freshness/firewall.ts +64 -0
  227. package/src/freshness/git-reconcile-layer.ts +161 -0
  228. package/src/freshness/inverted-index.ts +158 -0
  229. package/src/freshness/stale-read-layer.ts +222 -0
  230. package/src/graph/audit.ts +69 -0
  231. package/src/graph/bridge-db.ts +141 -0
  232. package/src/graph/communities.ts +195 -0
  233. package/src/graph/db-interface.ts +259 -0
  234. package/src/graph/edges.ts +163 -0
  235. package/src/graph/entities.ts +327 -0
  236. package/src/graph/episodic-db.ts +113 -0
  237. package/src/graph/flags.ts +31 -0
  238. package/src/graph/meta-db.ts +200 -0
  239. package/src/graph/semantic-db.ts +101 -0
  240. package/src/graph/session-resume.ts +56 -0
  241. package/src/graph/snapshots.ts +342 -0
  242. package/src/graph/staging.ts +151 -0
  243. package/src/graph/types.ts +128 -0
  244. package/src/hooks/adapters/claude-code.ts +21 -0
  245. package/src/hooks/adapters/cline.ts +43 -0
  246. package/src/hooks/adapters/cursor.ts +65 -0
  247. package/src/hooks/adapters/generic.ts +12 -0
  248. package/src/hooks/agent-detect.ts +34 -0
  249. package/src/hooks/claude-md-directives.ts +32 -0
  250. package/src/hooks/event-router.ts +182 -0
  251. package/src/hooks/extractors/pattern-detector.ts +111 -0
  252. package/src/hooks/handlers/post-compact.ts +30 -0
  253. package/src/hooks/handlers/post-tool-use.ts +403 -0
  254. package/src/hooks/handlers/pre-compact.ts +100 -0
  255. package/src/hooks/handlers/session-end.ts +47 -0
  256. package/src/hooks/handlers/session-start.ts +154 -0
  257. package/src/hooks/handlers/stop.ts +128 -0
  258. package/src/hooks/handlers/user-prompt-submit.ts +68 -0
  259. package/src/hooks/plugin-branch-switch.ts +68 -0
  260. package/src/hooks/plugin-common.ts +47 -0
  261. package/src/hooks/plugin-post-compact.ts +28 -0
  262. package/src/hooks/plugin-post-tool-use.ts +38 -0
  263. package/src/hooks/plugin-pre-compact.ts +37 -0
  264. package/src/hooks/plugin-session-end.ts +37 -0
  265. package/src/hooks/plugin-session-start.ts +75 -0
  266. package/src/hooks/plugin-stop.ts +61 -0
  267. package/src/hooks/plugin-user-prompt-submit.ts +47 -0
  268. package/src/hooks/types.ts +43 -0
  269. package/src/knowledge/discovery.ts +238 -0
  270. package/src/knowledge/external-refs.ts +98 -0
  271. package/src/knowledge/freshness.ts +221 -0
  272. package/src/knowledge/ingest.ts +330 -0
  273. package/src/knowledge/markdown-export.ts +229 -0
  274. package/src/knowledge/markdown-import.ts +359 -0
  275. package/src/knowledge/patterns.ts +74 -0
  276. package/src/knowledge/templates.ts +307 -0
  277. package/src/llm/ai-sdk-adapter.ts +46 -0
  278. package/src/llm/config.ts +88 -0
  279. package/src/llm/cost-tracker.ts +110 -0
  280. package/src/llm/prompts/extraction.ts +55 -0
  281. package/src/llm/prompts/summarization.ts +36 -0
  282. package/src/llm/prompts/validation.ts +37 -0
  283. package/src/llm/provider-registry.ts +68 -0
  284. package/src/llm/reliability.ts +179 -0
  285. package/src/llm/schemas.ts +52 -0
  286. package/src/mcp/freshness-annotator.ts +69 -0
  287. package/src/mcp/server.ts +949 -0
  288. package/src/mcp/tools/sia-ast-query.ts +225 -0
  289. package/src/mcp/tools/sia-at-time.ts +151 -0
  290. package/src/mcp/tools/sia-backlinks.ts +87 -0
  291. package/src/mcp/tools/sia-batch-execute.ts +169 -0
  292. package/src/mcp/tools/sia-by-file.ts +89 -0
  293. package/src/mcp/tools/sia-community.ts +113 -0
  294. package/src/mcp/tools/sia-doctor.ts +73 -0
  295. package/src/mcp/tools/sia-execute-file.ts +122 -0
  296. package/src/mcp/tools/sia-execute.ts +104 -0
  297. package/src/mcp/tools/sia-expand.ts +158 -0
  298. package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
  299. package/src/mcp/tools/sia-flag.ts +65 -0
  300. package/src/mcp/tools/sia-index.ts +111 -0
  301. package/src/mcp/tools/sia-note.ts +134 -0
  302. package/src/mcp/tools/sia-search.ts +105 -0
  303. package/src/mcp/tools/sia-stats.ts +63 -0
  304. package/src/mcp/tools/sia-sync-status.ts +44 -0
  305. package/src/mcp/tools/sia-upgrade.ts +247 -0
  306. package/src/mcp/truncate.ts +231 -0
  307. package/src/native/bridge.ts +167 -0
  308. package/src/native/fallback-ast-diff.ts +144 -0
  309. package/src/native/fallback-graph.ts +325 -0
  310. package/src/ontology/constraints.ts +56 -0
  311. package/src/ontology/errors.ts +8 -0
  312. package/src/ontology/middleware.ts +266 -0
  313. package/src/retrieval/bm25-search.ts +151 -0
  314. package/src/retrieval/context-assembly.ts +76 -0
  315. package/src/retrieval/graph-traversal.ts +168 -0
  316. package/src/retrieval/pagerank.ts +40 -0
  317. package/src/retrieval/query-classifier.ts +106 -0
  318. package/src/retrieval/reranker.ts +156 -0
  319. package/src/retrieval/search.ts +236 -0
  320. package/src/retrieval/throttle.ts +102 -0
  321. package/src/retrieval/vector-search.ts +203 -0
  322. package/src/retrieval/workspace-search.ts +130 -0
  323. package/src/sandbox/context-mode.ts +285 -0
  324. package/src/sandbox/credential-pass.ts +55 -0
  325. package/src/sandbox/executor.ts +235 -0
  326. package/src/security/pattern-detector.ts +127 -0
  327. package/src/security/rule-of-two.ts +50 -0
  328. package/src/security/sanitize.ts +46 -0
  329. package/src/security/semantic-consistency.ts +93 -0
  330. package/src/security/staging-promoter.ts +154 -0
  331. package/src/shared/config.ts +302 -0
  332. package/src/shared/diagnostics.ts +210 -0
  333. package/src/shared/errors.ts +48 -0
  334. package/src/shared/git-utils.ts +143 -0
  335. package/src/shared/llm-client.ts +120 -0
  336. package/src/shared/logger.ts +99 -0
  337. package/src/shared/types.ts +79 -0
  338. package/src/sync/client.ts +43 -0
  339. package/src/sync/conflict.ts +106 -0
  340. package/src/sync/dedup.ts +183 -0
  341. package/src/sync/hlc.ts +117 -0
  342. package/src/sync/keychain.ts +144 -0
  343. package/src/sync/pull.ts +232 -0
  344. package/src/sync/push.ts +131 -0
  345. package/src/types/chokidar.d.ts +23 -0
  346. package/src/visualization/graph-renderer.ts +312 -0
  347. package/src/visualization/subgraph-extract.ts +208 -0
  348. package/src/visualization/views/community-clusters.ts +246 -0
  349. package/src/visualization/views/dependency-map.ts +189 -0
  350. package/src/visualization/views/graph-explorer.ts +364 -0
  351. package/src/visualization/views/timeline.ts +247 -0
  352. package/src/workspace/api-contracts.ts +226 -0
  353. package/src/workspace/cross-repo.ts +61 -0
  354. package/src/workspace/detector.ts +190 -0
  355. package/src/workspace/manifest.ts +141 -0
@@ -0,0 +1,423 @@
1
+ // Module: tier-a — Full structural extraction for 15 Tier A languages
2
+
3
+ import { basename } from "node:path";
4
+ import type { CandidateFact } from "@/capture/types";
5
+
6
+ /** Regex patterns grouped by extraction category for a single language. */
7
+ interface LanguagePatterns {
8
+ functions: RegExp[];
9
+ classes: RegExp[];
10
+ imports: RegExp[];
11
+ calls: RegExp[];
12
+ }
13
+
14
+ /** Return 3 surrounding lines around a match index for context. */
15
+ function surroundingLines(content: string, matchIndex: number): string {
16
+ const before = content.lastIndexOf("\n", matchIndex);
17
+ const lineStart = before === -1 ? 0 : before + 1;
18
+ let end = matchIndex;
19
+ for (let i = 0; i < 3; i++) {
20
+ const next = content.indexOf("\n", end + 1);
21
+ if (next === -1) {
22
+ end = content.length;
23
+ break;
24
+ }
25
+ end = next;
26
+ }
27
+ return content.slice(lineStart, end).trim();
28
+ }
29
+
30
+ // ---------- Tier A pattern table ----------
31
+
32
+ const tsPatterns: LanguagePatterns = {
33
+ functions: [
34
+ // export async function name, function name, arrow functions
35
+ /(?:export\s+)?(?:async\s+)?function\s+(\w+)/gm,
36
+ // const name = (...) => or const name = async (...) =>
37
+ /(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s*)?\(/gm,
38
+ ],
39
+ classes: [
40
+ /(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/gm,
41
+ /(?:export\s+)?interface\s+(\w+)/gm,
42
+ /(?:export\s+)?type\s+(\w+)\s*[<=]/gm,
43
+ /(?:export\s+)?enum\s+(\w+)/gm,
44
+ ],
45
+ imports: [
46
+ // import { foo } from "bar" — captures the first named import
47
+ /import\s+\{\s*(\w+)/gm,
48
+ // import * as name from "bar"
49
+ /import\s+\*\s+as\s+(\w+)/gm,
50
+ // import name from "bar"
51
+ /import\s+(\w+)\s+from\s+/gm,
52
+ // require("bar")
53
+ /require\s*\(\s*["']([^"']+)["']\s*\)/gm,
54
+ ],
55
+ calls: [
56
+ // standalone function call: name(
57
+ /(?<![.\w])(\w+)\s*\(/gm,
58
+ // method call: obj.name( — captures name
59
+ /\.(\w+)\s*\(/gm,
60
+ // new Constructor(
61
+ /new\s+(\w+)\s*\(/gm,
62
+ ],
63
+ };
64
+
65
+ // JS is same as TS minus the type keyword pattern
66
+ const jsPatterns: LanguagePatterns = {
67
+ functions: tsPatterns.functions,
68
+ classes: [/(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/gm, /(?:export\s+)?enum\s+(\w+)/gm],
69
+ imports: tsPatterns.imports,
70
+ calls: tsPatterns.calls,
71
+ };
72
+
73
+ const pythonPatterns: LanguagePatterns = {
74
+ functions: [/(?:async\s+)?def\s+(\w+)/gm],
75
+ classes: [/^class\s+(\w+)/gm],
76
+ imports: [
77
+ // from module import name — captures name
78
+ /from\s+\S+\s+import\s+(\w+)/gm,
79
+ // import module
80
+ /^import\s+(\w+)/gm,
81
+ ],
82
+ calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
83
+ };
84
+
85
+ const goPatterns: LanguagePatterns = {
86
+ functions: [
87
+ // func Name( or func (receiver) Name(
88
+ /func\s+(?:\([^)]*\)\s+)?(\w+)\s*\(/gm,
89
+ ],
90
+ classes: [/type\s+(\w+)\s+struct\b/gm, /type\s+(\w+)\s+interface\b/gm],
91
+ imports: [
92
+ // Single import: import "pkg"
93
+ /import\s+"([^"]+)"/gm,
94
+ // Grouped imports: each "pkg" line inside import ( ... )
95
+ /^\s+"([^"]+)"/gm,
96
+ ],
97
+ calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
98
+ };
99
+
100
+ const rustPatterns: LanguagePatterns = {
101
+ functions: [
102
+ // pub fn name, fn name, pub async fn name, async fn name
103
+ /(?:pub\s+)?(?:async\s+)?fn\s+(\w+)/gm,
104
+ ],
105
+ classes: [
106
+ /(?:pub\s+)?struct\s+(\w+)/gm,
107
+ /(?:pub\s+)?enum\s+(\w+)/gm,
108
+ /(?:pub\s+)?trait\s+(\w+)/gm,
109
+ ],
110
+ imports: [
111
+ // use path::Name — captures the last segment
112
+ /use\s+(?:\w+::)*(\w+)/gm,
113
+ // mod name
114
+ /mod\s+(\w+)/gm,
115
+ ],
116
+ calls: [/(?<![.\w])(\w+)\s*\(/gm, /::(\w+)\s*\(/gm],
117
+ };
118
+
119
+ const javaPatterns: LanguagePatterns = {
120
+ functions: [
121
+ // method: access modifier, optional static/final, return type, name(
122
+ /(?:public|private|protected)\s+(?:static\s+)?(?:final\s+)?(?:(?:abstract|synchronized|native)\s+)?(?:\w+(?:<[^>]*>)?)\s+(\w+)\s*\(/gm,
123
+ // default-access methods: returnType name( — at indent
124
+ /^\s+\w+(?:<[^>]*>)?\s+(\w+)\s*\(/gm,
125
+ ],
126
+ classes: [
127
+ /(?:public\s+)?(?:abstract\s+)?(?:final\s+)?class\s+(\w+)/gm,
128
+ /(?:public\s+)?interface\s+(\w+)/gm,
129
+ /(?:public\s+)?enum\s+(\w+)/gm,
130
+ ],
131
+ imports: [/import\s+(?:static\s+)?[\w.]+\.(\w+)\s*;/gm],
132
+ calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm, /new\s+(\w+)\s*\(/gm],
133
+ };
134
+
135
+ const kotlinPatterns: LanguagePatterns = {
136
+ functions: [/(?:suspend\s+)?fun\s+(?:<[^>]*>\s+)?(\w+)/gm],
137
+ classes: [
138
+ /(?:data\s+)?class\s+(\w+)/gm,
139
+ /object\s+(\w+)/gm,
140
+ /interface\s+(\w+)/gm,
141
+ /enum\s+class\s+(\w+)/gm,
142
+ ],
143
+ imports: [/import\s+[\w.]+\.(\w+)/gm],
144
+ calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
145
+ };
146
+
147
+ const swiftPatterns: LanguagePatterns = {
148
+ functions: [/func\s+(\w+)/gm],
149
+ classes: [/class\s+(\w+)/gm, /struct\s+(\w+)/gm, /enum\s+(\w+)/gm, /protocol\s+(\w+)/gm],
150
+ imports: [/import\s+(\w+)/gm],
151
+ calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
152
+ };
153
+
154
+ const phpPatterns: LanguagePatterns = {
155
+ functions: [
156
+ // standalone function
157
+ /^function\s+(\w+)/gm,
158
+ // public/private/protected function
159
+ /(?:public|private|protected)\s+(?:static\s+)?function\s+(\w+)/gm,
160
+ ],
161
+ classes: [/(?:abstract\s+)?class\s+(\w+)/gm, /interface\s+(\w+)/gm, /trait\s+(\w+)/gm],
162
+ imports: [
163
+ // use Namespace\Class — captures last segment
164
+ /use\s+[\w\\]+\\(\w+)/gm,
165
+ // require/include variants
166
+ /(?:require|require_once|include|include_once)\s+["']([^"']+)["']/gm,
167
+ ],
168
+ calls: [/(?<![.\w$])(\w+)\s*\(/gm, /->(\w+)\s*\(/gm, /::(\w+)\s*\(/gm],
169
+ };
170
+
171
+ const rubyPatterns: LanguagePatterns = {
172
+ functions: [/def\s+(?:self\.)?(\w+)/gm],
173
+ classes: [/class\s+(\w+)/gm, /module\s+(\w+)/gm],
174
+ imports: [
175
+ // require "name" or require 'name'
176
+ /require\s+["']([^"']+)["']/gm,
177
+ // require_relative "path"
178
+ /require_relative\s+["']([^"']+)["']/gm,
179
+ ],
180
+ calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
181
+ };
182
+
183
+ const scalaPatterns: LanguagePatterns = {
184
+ functions: [/def\s+(\w+)/gm],
185
+ classes: [/(?:case\s+)?class\s+(\w+)/gm, /object\s+(\w+)/gm, /trait\s+(\w+)/gm],
186
+ imports: [/import\s+[\w.]+\.(\w+)/gm],
187
+ calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
188
+ };
189
+
190
+ const elixirPatterns: LanguagePatterns = {
191
+ functions: [
192
+ // def name or defp name — but not defmodule
193
+ /\b(?:def|defp)\s+(\w+)/gm,
194
+ ],
195
+ classes: [
196
+ // defmodule with dotted names
197
+ /defmodule\s+([\w.]+)/gm,
198
+ ],
199
+ imports: [/\bimport\s+([\w.]+)/gm, /\balias\s+([\w.]+)/gm, /\buse\s+([\w.]+)/gm],
200
+ calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
201
+ };
202
+
203
+ const dartPatterns: LanguagePatterns = {
204
+ functions: [
205
+ // return-type + name( — void main(, Widget build(
206
+ /(?:void|int|double|bool|String|dynamic|Future|Stream|List|Map|Set|\w+)\s+(\w+)\s*\(/gm,
207
+ ],
208
+ classes: [/(?:abstract\s+)?class\s+(\w+)/gm, /mixin\s+(\w+)/gm, /extension\s+(\w+)/gm],
209
+ imports: [/import\s+['"]([^'"]+)['"]/gm],
210
+ calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
211
+ };
212
+
213
+ // ---------- Extension to (language name, patterns) mapping ----------
214
+
215
+ const TIER_A_PATTERNS: Record<string, { language: string; patterns: LanguagePatterns }> = {
216
+ ".ts": { language: "typescript", patterns: tsPatterns },
217
+ ".tsx": { language: "tsx", patterns: tsPatterns },
218
+ ".js": { language: "javascript", patterns: jsPatterns },
219
+ ".mjs": { language: "javascript", patterns: jsPatterns },
220
+ ".cjs": { language: "javascript", patterns: jsPatterns },
221
+ ".jsx": { language: "jsx", patterns: jsPatterns },
222
+ ".py": { language: "python", patterns: pythonPatterns },
223
+ ".go": { language: "go", patterns: goPatterns },
224
+ ".rs": { language: "rust", patterns: rustPatterns },
225
+ ".java": { language: "java", patterns: javaPatterns },
226
+ ".kt": { language: "kotlin", patterns: kotlinPatterns },
227
+ ".kts": { language: "kotlin", patterns: kotlinPatterns },
228
+ ".swift": { language: "swift", patterns: swiftPatterns },
229
+ ".php": { language: "php", patterns: phpPatterns },
230
+ ".rb": { language: "ruby", patterns: rubyPatterns },
231
+ ".scala": { language: "scala", patterns: scalaPatterns },
232
+ ".ex": { language: "elixir", patterns: elixirPatterns },
233
+ ".exs": { language: "elixir", patterns: elixirPatterns },
234
+ ".dart": { language: "dart", patterns: dartPatterns },
235
+ };
236
+
237
+ // Categories for pattern extraction — must match LanguagePatterns keys
238
+ type Category = "function" | "class" | "import" | "call";
239
+
240
+ const CATEGORIES: { key: keyof LanguagePatterns; category: Category }[] = [
241
+ { key: "functions", category: "function" },
242
+ { key: "classes", category: "class" },
243
+ { key: "imports", category: "import" },
244
+ { key: "calls", category: "call" },
245
+ ];
246
+
247
+ // Common keywords/noise to exclude from call extraction
248
+ const CALL_NOISE = new Set([
249
+ "if",
250
+ "for",
251
+ "while",
252
+ "switch",
253
+ "catch",
254
+ "return",
255
+ "throw",
256
+ "typeof",
257
+ "instanceof",
258
+ "void",
259
+ "delete",
260
+ "await",
261
+ "else",
262
+ "case",
263
+ "break",
264
+ "continue",
265
+ "do",
266
+ "in",
267
+ "of",
268
+ "let",
269
+ "const",
270
+ "var",
271
+ "true",
272
+ "false",
273
+ "null",
274
+ "undefined",
275
+ "try",
276
+ "finally",
277
+ "yield",
278
+ "import",
279
+ "export",
280
+ "from",
281
+ "require",
282
+ "include",
283
+ "require_once",
284
+ "include_once",
285
+ "def",
286
+ "class",
287
+ "fn",
288
+ "func",
289
+ "fun",
290
+ "function",
291
+ "pub",
292
+ "async",
293
+ "self",
294
+ "super",
295
+ "this",
296
+ "new",
297
+ "use",
298
+ "mod",
299
+ "type",
300
+ "interface",
301
+ "enum",
302
+ "struct",
303
+ "trait",
304
+ "impl",
305
+ "where",
306
+ "match",
307
+ "loop",
308
+ "print",
309
+ "println",
310
+ "printf",
311
+ "fmt",
312
+ "defmodule",
313
+ "defp",
314
+ ]);
315
+
316
+ /**
317
+ * Extract structural code entities from file content using language-specific
318
+ * regex patterns. Supports all 15 Tier A languages.
319
+ *
320
+ * @param content The file text to scan.
321
+ * @param filePath Path used to determine language by extension.
322
+ * @returns An array of CandidateFact objects for every matched entity.
323
+ */
324
+ export function extractTierA(content: string, filePath: string): CandidateFact[] {
325
+ if (!content || !filePath) return [];
326
+
327
+ const dotIdx = filePath.lastIndexOf(".");
328
+ if (dotIdx === -1) return [];
329
+
330
+ const ext = filePath.slice(dotIdx);
331
+ const entry = TIER_A_PATTERNS[ext];
332
+ if (!entry) return [];
333
+
334
+ const { language, patterns } = entry;
335
+ const base = basename(filePath);
336
+ const facts: CandidateFact[] = [];
337
+ const seen = new Set<string>();
338
+
339
+ // Track positions where function/class declarations match so call extraction
340
+ // can skip overlapping positions (avoids treating `function foo()` as a call).
341
+ const declPositions = new Set<number>();
342
+
343
+ for (const { key, category } of CATEGORIES) {
344
+ const regexes = patterns[key];
345
+ for (const regex of regexes) {
346
+ // Reset lastIndex so the regex starts from the beginning each time
347
+ regex.lastIndex = 0;
348
+
349
+ let m: RegExpExecArray | null = regex.exec(content);
350
+ while (m !== null) {
351
+ const name = m[1];
352
+ if (!name) {
353
+ m = regex.exec(content);
354
+ continue;
355
+ }
356
+
357
+ // Skip noise words in call extraction
358
+ if (category === "call" && CALL_NOISE.has(name)) {
359
+ m = regex.exec(content);
360
+ continue;
361
+ }
362
+
363
+ // For calls, skip matches whose name starts at a position covered
364
+ // by a function/class declaration match
365
+ if (category === "call") {
366
+ // The capture group starts at m.index + (length of text before group 1)
367
+ const nameStart = m.index + m[0].indexOf(name);
368
+ if (declPositions.has(nameStart)) {
369
+ m = regex.exec(content);
370
+ continue;
371
+ }
372
+ }
373
+
374
+ // Deduplicate by name + category
375
+ const dedupeKey = `${category}:${name}`;
376
+ if (seen.has(dedupeKey)) {
377
+ m = regex.exec(content);
378
+ continue;
379
+ }
380
+ seen.add(dedupeKey);
381
+
382
+ // Record the name position for function/class declarations
383
+ if (category === "function" || category === "class") {
384
+ const nameStart = m.index + m[0].indexOf(name);
385
+ declPositions.add(nameStart);
386
+ }
387
+
388
+ const context = surroundingLines(content, m.index);
389
+ const fact: CandidateFact = {
390
+ type: "CodeEntity",
391
+ name,
392
+ content: context,
393
+ summary: `${category} ${name} in ${base}`,
394
+ tags: [language, category],
395
+ file_paths: [filePath],
396
+ trust_tier: 2,
397
+ confidence: 0.92,
398
+ extraction_method: "regex-ast",
399
+ };
400
+
401
+ // For imports, extract source module from the matched line
402
+ if (category === "import") {
403
+ const lineEnd = content.indexOf("\n", m.index);
404
+ const line = content.slice(m.index, lineEnd === -1 ? undefined : lineEnd);
405
+ const fromMatch = /from\s+["']([^"']+)["']/.exec(line);
406
+ const reqMatch = /require\s*\(\s*["']([^"']+)["']\s*\)/.exec(line);
407
+ const sourceMod = fromMatch?.[1] ?? reqMatch?.[1];
408
+ if (sourceMod) {
409
+ fact.proposed_relationships = [
410
+ { target_name: sourceMod, type: "imports", weight: 0.9 },
411
+ ];
412
+ }
413
+ }
414
+
415
+ facts.push(fact);
416
+
417
+ m = regex.exec(content);
418
+ }
419
+ }
420
+ }
421
+
422
+ return facts;
423
+ }
@@ -0,0 +1,289 @@
1
+ // Module: tier-b — Structural extraction (no call tracking) for 10 Tier B languages
2
+
3
+ import { basename } from "node:path";
4
+ import type { CandidateFact } from "@/capture/types";
5
+
6
+ /** Regex patterns grouped by extraction category for a single language. */
7
+ interface LanguagePatterns {
8
+ functions: RegExp[];
9
+ classes: RegExp[];
10
+ imports: RegExp[];
11
+ calls: RegExp[];
12
+ }
13
+
14
+ /** Return 3 surrounding lines around a match index for context. */
15
+ function surroundingLines(content: string, matchIndex: number): string {
16
+ const before = content.lastIndexOf("\n", matchIndex);
17
+ const lineStart = before === -1 ? 0 : before + 1;
18
+ let end = matchIndex;
19
+ for (let i = 0; i < 3; i++) {
20
+ const next = content.indexOf("\n", end + 1);
21
+ if (next === -1) {
22
+ end = content.length;
23
+ break;
24
+ }
25
+ end = next;
26
+ }
27
+ return content.slice(lineStart, end).trim();
28
+ }
29
+
30
+ // ---------- Tier B pattern table ----------
31
+
32
+ const cPatterns: LanguagePatterns = {
33
+ functions: [
34
+ // return_type name( — covers int main(, void foo(, char* bar(
35
+ /(?:unsigned\s+)?(?:void|int|char|float|double|long|short|size_t|bool|(?:struct\s+)?\w+)\s*\*?\s+(\w+)\s*\(/gm,
36
+ ],
37
+ classes: [
38
+ /\bstruct\s+(\w+)/gm,
39
+ /\btypedef\s+(?:struct|union|enum)\s*\{[^}]*\}\s*(\w+)/gm,
40
+ /\bunion\s+(\w+)/gm,
41
+ ],
42
+ imports: [
43
+ // #include <header.h> or #include "header.h"
44
+ /#include\s+[<"]([^>"]+)[>"]/gm,
45
+ ],
46
+ calls: [],
47
+ };
48
+
49
+ const cppPatterns: LanguagePatterns = {
50
+ functions: [
51
+ // template<...> return_type name(
52
+ /template\s*<[^>]*>\s*(?:\w[\w:*&\s]*)\s+(\w+)\s*\(/gm,
53
+ // return_type Class::method(
54
+ /(?:\w[\w:*&\s]*)\s+\w+::(\w+)\s*\(/gm,
55
+ // standalone: return_type name(
56
+ /(?:unsigned\s+)?(?:void|int|char|float|double|long|short|bool|auto|size_t|std::\w+|(?:struct\s+)?\w+)\s*[*&]?\s+(\w+)\s*\(/gm,
57
+ ],
58
+ classes: [/\bclass\s+(\w+)/gm, /\bstruct\s+(\w+)/gm, /\bnamespace\s+(\w+)/gm],
59
+ imports: [/#include\s+[<"]([^>"]+)[>"]/gm, /\busing\s+(?:namespace\s+)?(\w[\w:]*)/gm],
60
+ calls: [],
61
+ };
62
+
63
+ const csharpPatterns: LanguagePatterns = {
64
+ functions: [
65
+ // access_modifier [static] return_type name(
66
+ /(?:public|private|protected|internal)\s+(?:static\s+)?(?:virtual\s+)?(?:override\s+)?(?:async\s+)?(?:\w+(?:<[^>]*>)?)\s+(\w+)\s*\(/gm,
67
+ ],
68
+ classes: [
69
+ /(?:public\s+|private\s+|protected\s+|internal\s+)?(?:static\s+)?(?:abstract\s+)?(?:sealed\s+)?class\s+(\w+)/gm,
70
+ /(?:public\s+|private\s+|protected\s+|internal\s+)?interface\s+(\w+)/gm,
71
+ /(?:public\s+|private\s+|protected\s+|internal\s+)?struct\s+(\w+)/gm,
72
+ /(?:public\s+|private\s+|protected\s+|internal\s+)?enum\s+(\w+)/gm,
73
+ ],
74
+ imports: [
75
+ // using System; or using System.Collections.Generic;
76
+ /^using\s+([\w.]+)\s*;/gm,
77
+ ],
78
+ calls: [],
79
+ };
80
+
81
+ const bashPatterns: LanguagePatterns = {
82
+ functions: [
83
+ // function name { or function name() {
84
+ /\bfunction\s+(\w+)/gm,
85
+ // name() { — shorthand
86
+ /^(\w+)\s*\(\s*\)\s*\{/gm,
87
+ ],
88
+ classes: [],
89
+ imports: [
90
+ // source path or . path
91
+ /\bsource\s+(\S+)/gm,
92
+ /^\.\s+(\S+)/gm,
93
+ ],
94
+ calls: [],
95
+ };
96
+
97
+ const luaPatterns: LanguagePatterns = {
98
+ functions: [
99
+ // function name(
100
+ /\bfunction\s+(\w+)\s*\(/gm,
101
+ // local function name(
102
+ /\blocal\s+function\s+(\w+)\s*\(/gm,
103
+ ],
104
+ classes: [],
105
+ imports: [
106
+ // require("name") or require("dotted.path")
107
+ /\brequire\s*\(\s*["']([^"']+)["']\s*\)/gm,
108
+ ],
109
+ calls: [],
110
+ };
111
+
112
+ const zigPatterns: LanguagePatterns = {
113
+ functions: [
114
+ // pub fn name or fn name
115
+ /(?:pub\s+)?fn\s+(\w+)/gm,
116
+ ],
117
+ classes: [
118
+ // const Name = struct/enum/union
119
+ /\b(\w+)\s*=\s*(?:packed\s+)?(?:struct|enum|union)/gm,
120
+ ],
121
+ imports: [
122
+ // @import("name")
123
+ /@import\s*\(\s*["']([^"']+)["']\s*\)/gm,
124
+ ],
125
+ calls: [],
126
+ };
127
+
128
+ const perlPatterns: LanguagePatterns = {
129
+ functions: [/\bsub\s+(\w+)/gm],
130
+ classes: [
131
+ // package Name or package Name::Sub
132
+ /\bpackage\s+([\w:]+)/gm,
133
+ ],
134
+ imports: [
135
+ // use Module; or use Module::Sub;
136
+ /\buse\s+([\w:]+)/gm,
137
+ // require "file" or require Module
138
+ /\brequire\s+["']([^"']+)["']/gm,
139
+ ],
140
+ calls: [],
141
+ };
142
+
143
+ const rPatterns: LanguagePatterns = {
144
+ functions: [
145
+ // name <- function(
146
+ /(\w+)\s*<-\s*function\s*\(/gm,
147
+ // name = function(
148
+ /(\w+)\s*=\s*function\s*\(/gm,
149
+ ],
150
+ classes: [/\bsetClass\s*\(\s*["'](\w+)["']/gm, /\bR6Class\s*\(\s*["'](\w+)["']/gm],
151
+ imports: [/\blibrary\s*\(\s*(\w+)\s*\)/gm, /\brequire\s*\(\s*(\w+)\s*\)/gm],
152
+ calls: [],
153
+ };
154
+
155
+ const ocamlPatterns: LanguagePatterns = {
156
+ functions: [
157
+ // let rec name ... = or let name ... =
158
+ /\blet\s+rec\s+(\w+)/gm,
159
+ /\blet\s+(\w+)\b.*(?::|=)/gm,
160
+ // val name : (in .mli)
161
+ /\bval\s+(\w+)\s*:/gm,
162
+ ],
163
+ classes: [
164
+ // module Name or module type Name
165
+ /\bmodule\s+(?:type\s+)?(\w+)/gm,
166
+ // type name
167
+ /\btype\s+(\w+)/gm,
168
+ ],
169
+ imports: [/\bopen\s+([\w.]+)/gm],
170
+ calls: [],
171
+ };
172
+
173
+ const haskellPatterns: LanguagePatterns = {
174
+ functions: [
175
+ // type signature: name :: Type
176
+ /^(\w+)\s*::\s*.+$/gm,
177
+ ],
178
+ classes: [
179
+ /\bdata\s+(\w+)/gm,
180
+ /\bnewtype\s+(\w+)/gm,
181
+ /\bclass\s+(?:\([^)]*\)\s*=>)?\s*(\w+)/gm,
182
+ /\binstance\s+(?:\([^)]*\)\s*=>)?\s*(\w+)/gm,
183
+ ],
184
+ imports: [
185
+ // import [qualified] Module.Name [as X]
186
+ /\bimport\s+(?:qualified\s+)?([\w.]+)/gm,
187
+ ],
188
+ calls: [],
189
+ };
190
+
191
+ // ---------- Extension to (language name, patterns) mapping ----------
192
+
193
+ const TIER_B_PATTERNS: Record<string, { language: string; patterns: LanguagePatterns }> = {
194
+ ".c": { language: "c", patterns: cPatterns },
195
+ ".h": { language: "c", patterns: cPatterns },
196
+ ".cpp": { language: "cpp", patterns: cppPatterns },
197
+ ".cc": { language: "cpp", patterns: cppPatterns },
198
+ ".cxx": { language: "cpp", patterns: cppPatterns },
199
+ ".hpp": { language: "cpp", patterns: cppPatterns },
200
+ ".hxx": { language: "cpp", patterns: cppPatterns },
201
+ ".cs": { language: "csharp", patterns: csharpPatterns },
202
+ ".sh": { language: "bash", patterns: bashPatterns },
203
+ ".bash": { language: "bash", patterns: bashPatterns },
204
+ ".lua": { language: "lua", patterns: luaPatterns },
205
+ ".zig": { language: "zig", patterns: zigPatterns },
206
+ ".pl": { language: "perl", patterns: perlPatterns },
207
+ ".pm": { language: "perl", patterns: perlPatterns },
208
+ ".r": { language: "r", patterns: rPatterns },
209
+ ".R": { language: "r", patterns: rPatterns },
210
+ ".ml": { language: "ocaml", patterns: ocamlPatterns },
211
+ ".mli": { language: "ocaml", patterns: ocamlPatterns },
212
+ ".hs": { language: "haskell", patterns: haskellPatterns },
213
+ };
214
+
215
+ // Categories for pattern extraction — must match LanguagePatterns keys
216
+ // Tier B omits "call" since calls: [] for all languages.
217
+ type Category = "function" | "class" | "import";
218
+
219
+ const CATEGORIES: { key: keyof LanguagePatterns; category: Category }[] = [
220
+ { key: "functions", category: "function" },
221
+ { key: "classes", category: "class" },
222
+ { key: "imports", category: "import" },
223
+ ];
224
+
225
+ /**
226
+ * Extract structural code entities from file content using language-specific
227
+ * regex patterns. Supports all 10 Tier B languages. No call extraction.
228
+ *
229
+ * @param content The file text to scan.
230
+ * @param filePath Path used to determine language by extension.
231
+ * @returns An array of CandidateFact objects for every matched entity.
232
+ */
233
+ export function extractTierB(content: string, filePath: string): CandidateFact[] {
234
+ if (!content || !filePath) return [];
235
+
236
+ const dotIdx = filePath.lastIndexOf(".");
237
+ if (dotIdx === -1) return [];
238
+
239
+ const ext = filePath.slice(dotIdx);
240
+ const entry = TIER_B_PATTERNS[ext];
241
+ if (!entry) return [];
242
+
243
+ const { language, patterns } = entry;
244
+ const base = basename(filePath);
245
+ const facts: CandidateFact[] = [];
246
+ const seen = new Set<string>();
247
+
248
+ for (const { key, category } of CATEGORIES) {
249
+ const regexes = patterns[key];
250
+ for (const regex of regexes) {
251
+ // Reset lastIndex so the regex starts from the beginning each time
252
+ regex.lastIndex = 0;
253
+
254
+ let m: RegExpExecArray | null = regex.exec(content);
255
+ while (m !== null) {
256
+ const name = m[1];
257
+ if (!name) {
258
+ m = regex.exec(content);
259
+ continue;
260
+ }
261
+
262
+ // Deduplicate by name + category
263
+ const dedupeKey = `${category}:${name}`;
264
+ if (seen.has(dedupeKey)) {
265
+ m = regex.exec(content);
266
+ continue;
267
+ }
268
+ seen.add(dedupeKey);
269
+
270
+ const context = surroundingLines(content, m.index);
271
+ facts.push({
272
+ type: "CodeEntity",
273
+ name,
274
+ content: context,
275
+ summary: `${category} ${name} in ${base}`,
276
+ tags: [language, category],
277
+ file_paths: [filePath],
278
+ trust_tier: 2,
279
+ confidence: 0.92,
280
+ extraction_method: "regex-ast",
281
+ });
282
+
283
+ m = regex.exec(content);
284
+ }
285
+ }
286
+ }
287
+
288
+ return facts;
289
+ }