claude-brain 0.14.2 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/README.md +191 -191
  2. package/VERSION +1 -1
  3. package/assets/CLAUDE-unified.md +11 -11
  4. package/assets/CLAUDE.md +11 -11
  5. package/bunfig.toml +8 -8
  6. package/package.json +80 -80
  7. package/packs/backend/node.json +173 -173
  8. package/packs/core/javascript.json +176 -176
  9. package/packs/core/typescript.json +222 -222
  10. package/packs/frontend/react.json +254 -254
  11. package/packs/meta/testing.json +172 -172
  12. package/src/automation/auto-context.ts +240 -240
  13. package/src/automation/decision-detector.ts +452 -452
  14. package/src/automation/index.ts +11 -11
  15. package/src/automation/phase12-manager.ts +456 -456
  16. package/src/automation/proactive-recall.ts +373 -373
  17. package/src/automation/project-detector.ts +310 -310
  18. package/src/automation/repo-scanner.ts +205 -205
  19. package/src/cli/auto-setup.ts +82 -82
  20. package/src/cli/bin.ts +202 -202
  21. package/src/cli/commands/chroma.ts +573 -573
  22. package/src/cli/commands/git-hook.ts +189 -189
  23. package/src/cli/commands/hooks.ts +213 -213
  24. package/src/cli/commands/init.ts +122 -122
  25. package/src/cli/commands/install-mcp.ts +92 -92
  26. package/src/cli/commands/pack.ts +197 -197
  27. package/src/cli/commands/serve.ts +167 -167
  28. package/src/cli/commands/start.ts +42 -42
  29. package/src/cli/commands/uninstall-mcp.ts +41 -41
  30. package/src/cli/commands/update.ts +121 -121
  31. package/src/cli/diagnose.ts +4 -4
  32. package/src/cli/health-check.ts +4 -4
  33. package/src/cli/migrate-chroma.ts +106 -106
  34. package/src/cli/setup.ts +4 -4
  35. package/src/cli/ui/animations.ts +80 -80
  36. package/src/cli/ui/components.ts +82 -82
  37. package/src/cli/ui/index.ts +4 -4
  38. package/src/cli/ui/logo.ts +36 -36
  39. package/src/cli/ui/theme.ts +55 -55
  40. package/src/config/defaults.ts +50 -50
  41. package/src/config/home.ts +55 -55
  42. package/src/config/index.ts +7 -7
  43. package/src/config/loader.ts +166 -166
  44. package/src/config/migration.ts +76 -76
  45. package/src/config/schema.ts +360 -360
  46. package/src/config/validator.ts +184 -184
  47. package/src/config/watcher.ts +86 -86
  48. package/src/context/assembler.ts +398 -398
  49. package/src/context/cache-manager.ts +101 -101
  50. package/src/context/formatter.ts +84 -84
  51. package/src/context/hierarchy.ts +85 -85
  52. package/src/context/index.ts +83 -83
  53. package/src/context/progress-tracker.ts +174 -174
  54. package/src/context/standards-manager.ts +287 -287
  55. package/src/context/types.ts +252 -252
  56. package/src/context/validator.ts +58 -58
  57. package/src/diagnostics/index.ts +123 -123
  58. package/src/health/index.ts +229 -229
  59. package/src/hooks/brain-hook.ts +112 -112
  60. package/src/hooks/capture.ts +168 -168
  61. package/src/hooks/deduplicator.ts +72 -72
  62. package/src/hooks/git-capture.ts +109 -109
  63. package/src/hooks/git-hook-installer.ts +207 -207
  64. package/src/hooks/index.ts +20 -20
  65. package/src/hooks/installer.ts +191 -194
  66. package/src/hooks/passive-classifier.ts +366 -366
  67. package/src/hooks/queue.ts +129 -129
  68. package/src/hooks/session-tracker.ts +275 -275
  69. package/src/hooks/types.ts +47 -47
  70. package/src/index.ts +7 -7
  71. package/src/intelligence/cross-project/affinity.ts +162 -162
  72. package/src/intelligence/cross-project/generalizer.ts +283 -283
  73. package/src/intelligence/cross-project/index.ts +13 -13
  74. package/src/intelligence/cross-project/transfer.ts +201 -201
  75. package/src/intelligence/index.ts +24 -24
  76. package/src/intelligence/optimization/index.ts +10 -10
  77. package/src/intelligence/optimization/precompute.ts +202 -202
  78. package/src/intelligence/optimization/semantic-cache.ts +207 -207
  79. package/src/intelligence/prediction/context-anticipator.ts +198 -198
  80. package/src/intelligence/prediction/decision-predictor.ts +184 -184
  81. package/src/intelligence/prediction/index.ts +13 -13
  82. package/src/intelligence/prediction/recommender.ts +268 -268
  83. package/src/intelligence/reasoning/chain-retrieval.ts +247 -247
  84. package/src/intelligence/reasoning/counterfactual.ts +248 -248
  85. package/src/intelligence/reasoning/index.ts +13 -13
  86. package/src/intelligence/reasoning/synthesizer.ts +169 -169
  87. package/src/intelligence/temporal/evolution.ts +197 -197
  88. package/src/intelligence/temporal/index.ts +16 -16
  89. package/src/intelligence/temporal/query-processor.ts +190 -190
  90. package/src/intelligence/temporal/timeline.ts +259 -259
  91. package/src/intelligence/temporal/trends.ts +263 -263
  92. package/src/knowledge/entity-extractor.ts +416 -416
  93. package/src/knowledge/graph/builder.ts +185 -185
  94. package/src/knowledge/graph/linker.ts +201 -201
  95. package/src/knowledge/graph/memory-graph.ts +359 -359
  96. package/src/knowledge/graph/schema.ts +99 -99
  97. package/src/knowledge/graph/search.ts +168 -168
  98. package/src/knowledge/relationship-extractor.ts +108 -108
  99. package/src/memory/chroma/client.ts +174 -174
  100. package/src/memory/chroma/collection-manager.ts +94 -94
  101. package/src/memory/chroma/config.ts +57 -57
  102. package/src/memory/chroma/embeddings.ts +153 -153
  103. package/src/memory/chroma/index.ts +82 -82
  104. package/src/memory/chroma/migration.ts +270 -270
  105. package/src/memory/chroma/schemas.ts +69 -69
  106. package/src/memory/chroma/search.ts +315 -315
  107. package/src/memory/chroma/store.ts +741 -741
  108. package/src/memory/consolidation/archiver.ts +164 -164
  109. package/src/memory/consolidation/merger.ts +186 -186
  110. package/src/memory/consolidation/scorer.ts +138 -138
  111. package/src/memory/context-builder.ts +236 -236
  112. package/src/memory/database.ts +169 -169
  113. package/src/memory/embedding-utils.ts +156 -156
  114. package/src/memory/embeddings.ts +226 -226
  115. package/src/memory/episodic/detector.ts +108 -108
  116. package/src/memory/episodic/manager.ts +351 -351
  117. package/src/memory/episodic/summarizer.ts +179 -179
  118. package/src/memory/episodic/types.ts +52 -52
  119. package/src/memory/index.ts +582 -582
  120. package/src/memory/knowledge-extractor.ts +455 -455
  121. package/src/memory/learning.ts +378 -378
  122. package/src/memory/patterns.ts +396 -396
  123. package/src/memory/schema.ts +88 -88
  124. package/src/memory/search.ts +309 -309
  125. package/src/memory/store.ts +787 -787
  126. package/src/memory/types.ts +121 -121
  127. package/src/orchestrator/coordinator.ts +272 -272
  128. package/src/orchestrator/decision-logger.ts +228 -228
  129. package/src/orchestrator/event-emitter.ts +198 -198
  130. package/src/orchestrator/event-queue.ts +184 -184
  131. package/src/orchestrator/handlers/base-handler.ts +70 -70
  132. package/src/orchestrator/handlers/context-handler.ts +73 -73
  133. package/src/orchestrator/handlers/decision-handler.ts +204 -204
  134. package/src/orchestrator/handlers/index.ts +10 -10
  135. package/src/orchestrator/handlers/status-handler.ts +131 -131
  136. package/src/orchestrator/handlers/task-handler.ts +171 -171
  137. package/src/orchestrator/index.ts +275 -275
  138. package/src/orchestrator/task-parser.ts +284 -284
  139. package/src/orchestrator/types.ts +98 -98
  140. package/src/packs/index.ts +9 -9
  141. package/src/packs/loader.ts +134 -134
  142. package/src/packs/manager.ts +204 -204
  143. package/src/packs/ranker.ts +78 -78
  144. package/src/packs/types.ts +81 -81
  145. package/src/phase12/index.ts +5 -5
  146. package/src/retrieval/bm25/index.ts +300 -300
  147. package/src/retrieval/bm25/tokenizer.ts +184 -184
  148. package/src/retrieval/feedback/adaptive.ts +223 -223
  149. package/src/retrieval/feedback/index.ts +16 -16
  150. package/src/retrieval/feedback/metrics.ts +223 -223
  151. package/src/retrieval/feedback/store.ts +283 -283
  152. package/src/retrieval/fusion/index.ts +194 -194
  153. package/src/retrieval/fusion/rrf.ts +163 -163
  154. package/src/retrieval/index.ts +12 -12
  155. package/src/retrieval/pipeline.ts +375 -375
  156. package/src/retrieval/query/expander.ts +198 -198
  157. package/src/retrieval/query/index.ts +27 -27
  158. package/src/retrieval/query/intent-classifier.ts +236 -236
  159. package/src/retrieval/query/temporal-parser.ts +295 -295
  160. package/src/retrieval/reranker/index.ts +188 -188
  161. package/src/retrieval/reranker/model.ts +95 -95
  162. package/src/retrieval/service.ts +125 -125
  163. package/src/retrieval/types.ts +162 -162
  164. package/src/routing/entity-extractor.ts +428 -428
  165. package/src/routing/intent-classifier.ts +436 -436
  166. package/src/routing/response-filter.ts +258 -254
  167. package/src/routing/router.ts +1322 -1314
  168. package/src/routing/search-engine.ts +475 -475
  169. package/src/routing/types.ts +94 -84
  170. package/src/scripts/health-check.ts +118 -118
  171. package/src/scripts/setup.ts +122 -122
  172. package/src/server/handlers/call-tool.ts +156 -156
  173. package/src/server/handlers/index.ts +9 -9
  174. package/src/server/handlers/list-tools.ts +35 -35
  175. package/src/server/handlers/tools/analyze-decision-evolution.ts +151 -151
  176. package/src/server/handlers/tools/auto-remember.ts +200 -200
  177. package/src/server/handlers/tools/brain.ts +85 -85
  178. package/src/server/handlers/tools/create-project.ts +135 -135
  179. package/src/server/handlers/tools/detect-trends.ts +144 -144
  180. package/src/server/handlers/tools/find-cross-project-patterns.ts +168 -168
  181. package/src/server/handlers/tools/get-activity-log.ts +194 -194
  182. package/src/server/handlers/tools/get-code-standards.ts +124 -124
  183. package/src/server/handlers/tools/get-corrections.ts +154 -154
  184. package/src/server/handlers/tools/get-decision-timeline.ts +172 -172
  185. package/src/server/handlers/tools/get-episode.ts +103 -103
  186. package/src/server/handlers/tools/get-patterns.ts +158 -158
  187. package/src/server/handlers/tools/get-phase12-status.ts +63 -63
  188. package/src/server/handlers/tools/get-project-context.ts +75 -75
  189. package/src/server/handlers/tools/get-recommendations.ts +145 -145
  190. package/src/server/handlers/tools/index.ts +31 -31
  191. package/src/server/handlers/tools/init-project.ts +757 -757
  192. package/src/server/handlers/tools/list-episodes.ts +90 -90
  193. package/src/server/handlers/tools/list-projects.ts +125 -125
  194. package/src/server/handlers/tools/rate-memory.ts +101 -101
  195. package/src/server/handlers/tools/recall-similar.ts +87 -87
  196. package/src/server/handlers/tools/recognize-pattern.ts +126 -126
  197. package/src/server/handlers/tools/record-correction.ts +125 -125
  198. package/src/server/handlers/tools/remember-decision.ts +153 -153
  199. package/src/server/handlers/tools/schemas.ts +253 -253
  200. package/src/server/handlers/tools/search-knowledge-graph.ts +102 -102
  201. package/src/server/handlers/tools/smart-context.ts +146 -146
  202. package/src/server/handlers/tools/update-progress.ts +131 -131
  203. package/src/server/handlers/tools/what-if-analysis.ts +135 -135
  204. package/src/server/http-api.ts +693 -693
  205. package/src/server/index.ts +40 -40
  206. package/src/server/mcp-server.ts +283 -283
  207. package/src/server/providers/index.ts +7 -7
  208. package/src/server/providers/prompts.ts +327 -327
  209. package/src/server/providers/resources.ts +622 -622
  210. package/src/server/services.ts +468 -468
  211. package/src/server/types.ts +39 -39
  212. package/src/server/utils/error-handler.ts +155 -155
  213. package/src/server/utils/index.ts +13 -13
  214. package/src/server/utils/memory-indicator.ts +83 -83
  215. package/src/server/utils/request-context.ts +122 -122
  216. package/src/server/utils/response-formatter.ts +129 -124
  217. package/src/server/utils/validators.ts +210 -210
  218. package/src/setup/index.ts +48 -48
  219. package/src/setup/wizard.ts +461 -461
  220. package/src/tools/index.ts +24 -24
  221. package/src/tools/registry.ts +115 -115
  222. package/src/tools/schemas.test.ts +30 -30
  223. package/src/tools/schemas.ts +617 -617
  224. package/src/tools/types.ts +412 -412
  225. package/src/utils/circuit-breaker.ts +130 -130
  226. package/src/utils/cleanup.ts +34 -34
  227. package/src/utils/error-handler.ts +132 -132
  228. package/src/utils/error-messages.ts +60 -60
  229. package/src/utils/fallback.ts +45 -45
  230. package/src/utils/index.ts +54 -54
  231. package/src/utils/logger-utils.ts +80 -80
  232. package/src/utils/logger.ts +88 -88
  233. package/src/utils/phase12-helper.ts +56 -56
  234. package/src/utils/retry.ts +94 -94
  235. package/src/utils/timing.ts +47 -47
  236. package/src/utils/transaction.ts +63 -63
  237. package/src/vault/frontmatter.ts +264 -264
  238. package/src/vault/index.ts +318 -318
  239. package/src/vault/paths.ts +106 -106
  240. package/src/vault/query.ts +422 -422
  241. package/src/vault/reader.ts +264 -264
  242. package/src/vault/templates.ts +186 -186
  243. package/src/vault/types.ts +73 -73
  244. package/src/vault/watcher.ts +277 -277
  245. package/src/vault/writer.ts +413 -413
  246. package/tsconfig.json +30 -30
@@ -1,416 +1,416 @@
1
- /**
2
- * Entity Extractor
3
- * Rule-based NER using compromise + technology dictionary
4
- */
5
-
6
- import type { EntityType } from './graph/schema'
7
-
8
- export interface ExtractedEntity {
9
- name: string
10
- normalizedName: string
11
- type: EntityType
12
- confidence: number
13
- source: 'dictionary' | 'nlp' | 'rule'
14
- positions: number[]
15
- }
16
-
17
- const TECH_DICTIONARY: Record<string, { aliases: string[]; type: EntityType }> = {
18
- // Languages
19
- typescript: { aliases: ['ts', 'typescript', 'type-script'], type: 'technology' },
20
- javascript: { aliases: ['js', 'javascript', 'ecmascript', 'es6', 'es2015', 'es2020', 'es2021', 'es2022'], type: 'technology' },
21
- python: { aliases: ['py', 'python3', 'python2'], type: 'technology' },
22
- rust: { aliases: ['rust-lang', 'rustlang'], type: 'technology' },
23
- go: { aliases: ['golang', 'go-lang'], type: 'technology' },
24
- java: { aliases: [], type: 'technology' },
25
- csharp: { aliases: ['c#', 'c-sharp', 'dotnet', '.net'], type: 'technology' },
26
- ruby: { aliases: ['rb'], type: 'technology' },
27
- php: { aliases: [], type: 'technology' },
28
- swift: { aliases: [], type: 'technology' },
29
- kotlin: { aliases: ['kt'], type: 'technology' },
30
- scala: { aliases: [], type: 'technology' },
31
- elixir: { aliases: [], type: 'technology' },
32
- clojure: { aliases: ['clj'], type: 'technology' },
33
- haskell: { aliases: ['hs'], type: 'technology' },
34
- lua: { aliases: [], type: 'technology' },
35
- perl: { aliases: ['pl'], type: 'technology' },
36
- r: { aliases: ['r-lang', 'rlang'], type: 'technology' },
37
- dart: { aliases: [], type: 'technology' },
38
- sql: { aliases: ['mysql', 'postgresql', 'postgres', 'sqlite', 'mssql', 'mariadb'], type: 'technology' },
39
- html: { aliases: ['html5'], type: 'technology' },
40
- css: { aliases: ['css3', 'scss', 'sass', 'less', 'stylus'], type: 'technology' },
41
- graphql: { aliases: ['gql'], type: 'technology' },
42
- yaml: { aliases: ['yml'], type: 'technology' },
43
- json: { aliases: [], type: 'technology' },
44
- markdown: { aliases: ['md'], type: 'technology' },
45
- bash: { aliases: ['sh', 'shell', 'zsh', 'fish'], type: 'technology' },
46
- c: { aliases: ['c-lang', 'ansi-c'], type: 'technology' },
47
- cpp: { aliases: ['c++', 'cplusplus'], type: 'technology' },
48
-
49
- // Frontend Frameworks
50
- react: { aliases: ['reactjs', 'react.js', 'react-dom'], type: 'technology' },
51
- vue: { aliases: ['vuejs', 'vue.js', 'vue3', 'vue2'], type: 'technology' },
52
- angular: { aliases: ['angularjs', 'angular.js', 'ng'], type: 'technology' },
53
- svelte: { aliases: ['sveltejs', 'sveltekit'], type: 'technology' },
54
- nextjs: { aliases: ['next.js', 'next', 'nextjs'], type: 'technology' },
55
- nuxt: { aliases: ['nuxtjs', 'nuxt.js'], type: 'technology' },
56
- remix: { aliases: ['remix-run'], type: 'technology' },
57
- astro: { aliases: ['astrojs'], type: 'technology' },
58
- gatsby: { aliases: ['gatsbyjs'], type: 'technology' },
59
- solid: { aliases: ['solidjs', 'solid-js'], type: 'technology' },
60
- preact: { aliases: ['preactjs'], type: 'technology' },
61
- htmx: { aliases: [], type: 'technology' },
62
- jquery: { aliases: [], type: 'technology' },
63
-
64
- // Backend Frameworks
65
- express: { aliases: ['expressjs', 'express.js'], type: 'technology' },
66
- fastify: { aliases: [], type: 'technology' },
67
- hono: { aliases: [], type: 'technology' },
68
- koa: { aliases: ['koajs'], type: 'technology' },
69
- nestjs: { aliases: ['nest.js', 'nest'], type: 'technology' },
70
- django: { aliases: [], type: 'technology' },
71
- flask: { aliases: [], type: 'technology' },
72
- fastapi: { aliases: ['fast-api'], type: 'technology' },
73
- rails: { aliases: ['ruby-on-rails', 'ror'], type: 'technology' },
74
- spring: { aliases: ['spring-boot', 'springboot'], type: 'technology' },
75
- laravel: { aliases: [], type: 'technology' },
76
- phoenix: { aliases: [], type: 'technology' },
77
- gin: { aliases: [], type: 'technology' },
78
- actix: { aliases: ['actix-web'], type: 'technology' },
79
-
80
- // Databases
81
- mongodb: { aliases: ['mongo'], type: 'technology' },
82
- redis: { aliases: [], type: 'technology' },
83
- elasticsearch: { aliases: ['elastic', 'es'], type: 'technology' },
84
- dynamodb: { aliases: ['dynamo'], type: 'technology' },
85
- cassandra: { aliases: [], type: 'technology' },
86
- neo4j: { aliases: [], type: 'technology' },
87
- couchdb: { aliases: ['couch'], type: 'technology' },
88
- firebase: { aliases: ['firestore'], type: 'technology' },
89
- supabase: { aliases: [], type: 'technology' },
90
- prisma: { aliases: [], type: 'technology' },
91
- drizzle: { aliases: ['drizzle-orm'], type: 'technology' },
92
- sequelize: { aliases: [], type: 'technology' },
93
- typeorm: { aliases: [], type: 'technology' },
94
- chromadb: { aliases: ['chroma'], type: 'technology' },
95
- pinecone: { aliases: [], type: 'technology' },
96
- weaviate: { aliases: [], type: 'technology' },
97
- qdrant: { aliases: [], type: 'technology' },
98
-
99
- // Cloud & DevOps
100
- aws: { aliases: ['amazon-web-services', 'amazon'], type: 'technology' },
101
- gcp: { aliases: ['google-cloud', 'google-cloud-platform'], type: 'technology' },
102
- azure: { aliases: ['microsoft-azure'], type: 'technology' },
103
- docker: { aliases: ['dockerfile', 'docker-compose'], type: 'technology' },
104
- kubernetes: { aliases: ['k8s', 'kube'], type: 'technology' },
105
- terraform: { aliases: ['tf'], type: 'technology' },
106
- ansible: { aliases: [], type: 'technology' },
107
- jenkins: { aliases: [], type: 'technology' },
108
- github: { aliases: ['gh'], type: 'technology' },
109
- gitlab: { aliases: [], type: 'technology' },
110
- vercel: { aliases: [], type: 'technology' },
111
- netlify: { aliases: [], type: 'technology' },
112
- cloudflare: { aliases: ['cf'], type: 'technology' },
113
- nginx: { aliases: [], type: 'technology' },
114
- caddy: { aliases: [], type: 'technology' },
115
-
116
- // Tools & Libraries
117
- webpack: { aliases: [], type: 'technology' },
118
- vite: { aliases: [], type: 'technology' },
119
- esbuild: { aliases: [], type: 'technology' },
120
- rollup: { aliases: ['rollupjs'], type: 'technology' },
121
- parcel: { aliases: [], type: 'technology' },
122
- turbopack: { aliases: [], type: 'technology' },
123
- bun: { aliases: ['bunjs'], type: 'technology' },
124
- deno: { aliases: [], type: 'technology' },
125
- node: { aliases: ['nodejs', 'node.js'], type: 'technology' },
126
- npm: { aliases: [], type: 'technology' },
127
- yarn: { aliases: [], type: 'technology' },
128
- pnpm: { aliases: [], type: 'technology' },
129
- git: { aliases: [], type: 'technology' },
130
- jest: { aliases: [], type: 'technology' },
131
- vitest: { aliases: [], type: 'technology' },
132
- mocha: { aliases: [], type: 'technology' },
133
- cypress: { aliases: [], type: 'technology' },
134
- playwright: { aliases: [], type: 'technology' },
135
- eslint: { aliases: [], type: 'technology' },
136
- prettier: { aliases: [], type: 'technology' },
137
- biome: { aliases: ['biomejs'], type: 'technology' },
138
- tailwind: { aliases: ['tailwindcss', 'tailwind-css'], type: 'technology' },
139
- bootstrap: { aliases: [], type: 'technology' },
140
- zod: { aliases: [], type: 'technology' },
141
- trpc: { aliases: ['t-rpc'], type: 'technology' },
142
- graphql: { aliases: ['gql'], type: 'technology' },
143
- grpc: { aliases: ['g-rpc'], type: 'technology' },
144
- websocket: { aliases: ['ws', 'websockets'], type: 'technology' },
145
- oauth: { aliases: ['oauth2', 'oauth2.0'], type: 'technology' },
146
- jwt: { aliases: ['json-web-token'], type: 'technology' },
147
- openai: { aliases: ['gpt', 'chatgpt', 'gpt-4'], type: 'technology' },
148
- anthropic: { aliases: ['claude', 'claude-ai'], type: 'technology' },
149
- langchain: { aliases: [], type: 'technology' },
150
- llamaindex: { aliases: ['llama-index'], type: 'technology' },
151
- huggingface: { aliases: ['hf', 'hugging-face'], type: 'technology' },
152
- tensorflow: { aliases: ['tf'], type: 'technology' },
153
- pytorch: { aliases: ['torch'], type: 'technology' },
154
- pino: { aliases: [], type: 'technology' },
155
- winston: { aliases: [], type: 'technology' },
156
- storybook: { aliases: [], type: 'technology' },
157
- nx: { aliases: [], type: 'technology' },
158
- turborepo: { aliases: [], type: 'technology' },
159
- lerna: { aliases: [], type: 'technology' },
160
- compromise: { aliases: ['compromise-nlp'], type: 'technology' },
161
- minisearch: { aliases: [], type: 'technology' },
162
-
163
- // Concepts
164
- microservices: { aliases: ['micro-services', 'microservice'], type: 'concept' },
165
- monolith: { aliases: ['monolithic'], type: 'concept' },
166
- serverless: { aliases: ['faas', 'lambda'], type: 'concept' },
167
- rest: { aliases: ['restful', 'rest-api'], type: 'concept' },
168
- api: { aliases: ['apis'], type: 'concept' },
169
- ci: { aliases: ['continuous-integration'], type: 'concept' },
170
- cd: { aliases: ['continuous-deployment', 'continuous-delivery'], type: 'concept' },
171
- tdd: { aliases: ['test-driven-development'], type: 'concept' },
172
- bdd: { aliases: ['behavior-driven-development'], type: 'concept' },
173
- ddd: { aliases: ['domain-driven-design'], type: 'concept' },
174
- cqrs: { aliases: ['command-query-responsibility-segregation'], type: 'concept' },
175
- mvc: { aliases: ['model-view-controller'], type: 'concept' },
176
- mvvm: { aliases: ['model-view-viewmodel'], type: 'concept' },
177
- oop: { aliases: ['object-oriented-programming', 'object-oriented'], type: 'concept' },
178
- fp: { aliases: ['functional-programming', 'functional'], type: 'concept' },
179
- ssr: { aliases: ['server-side-rendering'], type: 'concept' },
180
- ssg: { aliases: ['static-site-generation'], type: 'concept' },
181
- spa: { aliases: ['single-page-application', 'single-page-app'], type: 'concept' },
182
- pwa: { aliases: ['progressive-web-app'], type: 'concept' },
183
- mcp: { aliases: ['model-context-protocol'], type: 'concept' },
184
- rag: { aliases: ['retrieval-augmented-generation'], type: 'concept' },
185
- embedding: { aliases: ['embeddings', 'vector-embedding'], type: 'concept' },
186
- 'vector-search': { aliases: ['semantic-search', 'vector-similarity'], type: 'concept' },
187
- }
188
-
189
- // Build reverse lookup: alias → normalized name
190
- const ALIAS_MAP = new Map<string, string>()
191
- for (const [normalized, entry] of Object.entries(TECH_DICTIONARY)) {
192
- ALIAS_MAP.set(normalized.toLowerCase(), normalized)
193
- for (const alias of entry.aliases) {
194
- ALIAS_MAP.set(alias.toLowerCase(), normalized)
195
- }
196
- }
197
-
198
- // File path regex - handles ./path, ~/path, /path, and relative paths
199
- const FILE_PATH_REGEX = /(?:^|[\s(,])((?:\.\/|~\/|\/)?(?:[\w-]+\/)+[\w.-]+\.\w+)/g
200
- // URL regex
201
- const URL_REGEX = /https?:\/\/[^\s),]+/g
202
- // Version number regex
203
- const VERSION_REGEX = /\b[vV]?\d+\.\d+(?:\.\d+)?(?:-[\w.]+)?\b/g
204
-
205
- let nlpModule: any = null
206
-
207
- async function loadNlp(): Promise<any> {
208
- if (!nlpModule) {
209
- try {
210
- nlpModule = (await import('compromise')).default
211
- } catch {
212
- nlpModule = null
213
- }
214
- }
215
- return nlpModule
216
- }
217
-
218
- export class EntityExtractor {
219
- private nlpLoaded = false
220
- private nlp: any = null
221
-
222
- async initialize(): Promise<void> {
223
- this.nlp = await loadNlp()
224
- this.nlpLoaded = this.nlp !== null
225
- }
226
-
227
- extract(text: string): ExtractedEntity[] {
228
- const entities: Map<string, ExtractedEntity> = new Map()
229
-
230
- this.extractFromDictionary(text, entities)
231
- this.extractFilePaths(text, entities)
232
- this.extractUrls(text, entities)
233
- this.extractDates(text, entities)
234
-
235
- if (this.nlpLoaded && this.nlp) {
236
- this.extractWithNlp(text, entities)
237
- }
238
-
239
- return Array.from(entities.values())
240
- .sort((a, b) => b.confidence - a.confidence)
241
- }
242
-
243
- extractBatch(texts: string[]): ExtractedEntity[][] {
244
- return texts.map(text => this.extract(text))
245
- }
246
-
247
- private extractFromDictionary(text: string, entities: Map<string, ExtractedEntity>): void {
248
- const lowerText = text.toLowerCase()
249
- const words = lowerText.split(/[\s,;:()[\]{}"'`|/\\]+/)
250
-
251
- for (const word of words) {
252
- const cleaned = word.replace(/^[^a-z0-9]+|[^a-z0-9]+$/g, '')
253
- if (cleaned.length < 2) continue
254
-
255
- const normalized = ALIAS_MAP.get(cleaned)
256
- if (normalized) {
257
- const dictEntry = TECH_DICTIONARY[normalized]
258
- if (!dictEntry) continue
259
-
260
- const existing = entities.get(normalized)
261
- if (existing) {
262
- existing.positions.push(lowerText.indexOf(cleaned))
263
- existing.confidence = Math.min(1.0, existing.confidence + 0.05)
264
- } else {
265
- entities.set(normalized, {
266
- name: cleaned,
267
- normalizedName: normalized,
268
- type: dictEntry.type,
269
- confidence: 0.95,
270
- source: 'dictionary',
271
- positions: [lowerText.indexOf(cleaned)]
272
- })
273
- }
274
- }
275
- }
276
-
277
- // Also check multi-word aliases
278
- for (const [alias, normalized] of ALIAS_MAP) {
279
- if (alias.includes('-') || alias.includes('.') || alias.includes(' ')) {
280
- if (lowerText.includes(alias) && !entities.has(normalized)) {
281
- const dictEntry = TECH_DICTIONARY[normalized]
282
- if (!dictEntry) continue
283
-
284
- entities.set(normalized, {
285
- name: alias,
286
- normalizedName: normalized,
287
- type: dictEntry.type,
288
- confidence: 0.95,
289
- source: 'dictionary',
290
- positions: [lowerText.indexOf(alias)]
291
- })
292
- }
293
- }
294
- }
295
- }
296
-
297
- private extractFilePaths(text: string, entities: Map<string, ExtractedEntity>): void {
298
- let match: RegExpExecArray | null
299
- const regex = new RegExp(FILE_PATH_REGEX.source, FILE_PATH_REGEX.flags)
300
-
301
- while ((match = regex.exec(text)) !== null) {
302
- const filePath = match[1].trim()
303
- if (filePath.length < 4) continue
304
-
305
- const key = `file:${filePath}`
306
- if (!entities.has(key)) {
307
- entities.set(key, {
308
- name: filePath,
309
- normalizedName: filePath,
310
- type: 'file',
311
- confidence: 0.85,
312
- source: 'rule',
313
- positions: [match.index]
314
- })
315
- }
316
- }
317
- }
318
-
319
- private extractUrls(text: string, entities: Map<string, ExtractedEntity>): void {
320
- let match: RegExpExecArray | null
321
- const regex = new RegExp(URL_REGEX.source, URL_REGEX.flags)
322
-
323
- while ((match = regex.exec(text)) !== null) {
324
- const url = match[0]
325
- const key = `url:${url}`
326
- if (!entities.has(key)) {
327
- entities.set(key, {
328
- name: url,
329
- normalizedName: url,
330
- type: 'file',
331
- confidence: 0.9,
332
- source: 'rule',
333
- positions: [match.index]
334
- })
335
- }
336
- }
337
- }
338
-
339
- private extractDates(text: string, entities: Map<string, ExtractedEntity>): void {
340
- // ISO date pattern
341
- const isoDateRegex = /\b\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})?)?\b/g
342
- let match: RegExpExecArray | null
343
-
344
- while ((match = isoDateRegex.exec(text)) !== null) {
345
- const dateStr = match[0]
346
- const key = `date:${dateStr}`
347
- if (!entities.has(key)) {
348
- entities.set(key, {
349
- name: dateStr,
350
- normalizedName: dateStr,
351
- type: 'date',
352
- confidence: 0.9,
353
- source: 'rule',
354
- positions: [match.index]
355
- })
356
- }
357
- }
358
- }
359
-
360
- private extractWithNlp(text: string, entities: Map<string, ExtractedEntity>): void {
361
- try {
362
- const doc = this.nlp(text)
363
-
364
- // Extract people
365
- const people = doc.people()
366
- if (people && people.length > 0) {
367
- for (const person of people.out('array') as string[]) {
368
- const cleaned = person.trim()
369
- if (cleaned.length < 2) continue
370
- const key = `person:${cleaned.toLowerCase()}`
371
- if (!entities.has(key)) {
372
- entities.set(key, {
373
- name: cleaned,
374
- normalizedName: cleaned.toLowerCase(),
375
- type: 'person',
376
- confidence: 0.7,
377
- source: 'nlp',
378
- positions: [text.indexOf(cleaned)]
379
- })
380
- }
381
- }
382
- }
383
-
384
- // Extract organizations
385
- const orgs = doc.organizations()
386
- if (orgs && orgs.length > 0) {
387
- for (const org of orgs.out('array') as string[]) {
388
- const cleaned = org.trim()
389
- if (cleaned.length < 2) continue
390
- // Check if it's already a known technology
391
- const normalizedCheck = ALIAS_MAP.get(cleaned.toLowerCase())
392
- if (normalizedCheck) continue
393
-
394
- const key = `org:${cleaned.toLowerCase()}`
395
- if (!entities.has(key)) {
396
- entities.set(key, {
397
- name: cleaned,
398
- normalizedName: cleaned.toLowerCase(),
399
- type: 'concept',
400
- confidence: 0.65,
401
- source: 'nlp',
402
- positions: [text.indexOf(cleaned)]
403
- })
404
- }
405
- }
406
- }
407
- } catch {
408
- // NLP extraction failure is non-critical
409
- }
410
- }
411
-
412
- static normalizeEntityName(name: string): string {
413
- const lower = name.toLowerCase().trim()
414
- return ALIAS_MAP.get(lower) || lower
415
- }
416
- }
1
+ /**
2
+ * Entity Extractor
3
+ * Rule-based NER using compromise + technology dictionary
4
+ */
5
+
6
+ import type { EntityType } from './graph/schema'
7
+
8
+ export interface ExtractedEntity {
9
+ name: string
10
+ normalizedName: string
11
+ type: EntityType
12
+ confidence: number
13
+ source: 'dictionary' | 'nlp' | 'rule'
14
+ positions: number[]
15
+ }
16
+
17
+ const TECH_DICTIONARY: Record<string, { aliases: string[]; type: EntityType }> = {
18
+ // Languages
19
+ typescript: { aliases: ['ts', 'typescript', 'type-script'], type: 'technology' },
20
+ javascript: { aliases: ['js', 'javascript', 'ecmascript', 'es6', 'es2015', 'es2020', 'es2021', 'es2022'], type: 'technology' },
21
+ python: { aliases: ['py', 'python3', 'python2'], type: 'technology' },
22
+ rust: { aliases: ['rust-lang', 'rustlang'], type: 'technology' },
23
+ go: { aliases: ['golang', 'go-lang'], type: 'technology' },
24
+ java: { aliases: [], type: 'technology' },
25
+ csharp: { aliases: ['c#', 'c-sharp', 'dotnet', '.net'], type: 'technology' },
26
+ ruby: { aliases: ['rb'], type: 'technology' },
27
+ php: { aliases: [], type: 'technology' },
28
+ swift: { aliases: [], type: 'technology' },
29
+ kotlin: { aliases: ['kt'], type: 'technology' },
30
+ scala: { aliases: [], type: 'technology' },
31
+ elixir: { aliases: [], type: 'technology' },
32
+ clojure: { aliases: ['clj'], type: 'technology' },
33
+ haskell: { aliases: ['hs'], type: 'technology' },
34
+ lua: { aliases: [], type: 'technology' },
35
+ perl: { aliases: ['pl'], type: 'technology' },
36
+ r: { aliases: ['r-lang', 'rlang'], type: 'technology' },
37
+ dart: { aliases: [], type: 'technology' },
38
+ sql: { aliases: ['mysql', 'postgresql', 'postgres', 'sqlite', 'mssql', 'mariadb'], type: 'technology' },
39
+ html: { aliases: ['html5'], type: 'technology' },
40
+ css: { aliases: ['css3', 'scss', 'sass', 'less', 'stylus'], type: 'technology' },
41
+ graphql: { aliases: ['gql'], type: 'technology' },
42
+ yaml: { aliases: ['yml'], type: 'technology' },
43
+ json: { aliases: [], type: 'technology' },
44
+ markdown: { aliases: ['md'], type: 'technology' },
45
+ bash: { aliases: ['sh', 'shell', 'zsh', 'fish'], type: 'technology' },
46
+ c: { aliases: ['c-lang', 'ansi-c'], type: 'technology' },
47
+ cpp: { aliases: ['c++', 'cplusplus'], type: 'technology' },
48
+
49
+ // Frontend Frameworks
50
+ react: { aliases: ['reactjs', 'react.js', 'react-dom'], type: 'technology' },
51
+ vue: { aliases: ['vuejs', 'vue.js', 'vue3', 'vue2'], type: 'technology' },
52
+ angular: { aliases: ['angularjs', 'angular.js', 'ng'], type: 'technology' },
53
+ svelte: { aliases: ['sveltejs', 'sveltekit'], type: 'technology' },
54
+ nextjs: { aliases: ['next.js', 'next', 'nextjs'], type: 'technology' },
55
+ nuxt: { aliases: ['nuxtjs', 'nuxt.js'], type: 'technology' },
56
+ remix: { aliases: ['remix-run'], type: 'technology' },
57
+ astro: { aliases: ['astrojs'], type: 'technology' },
58
+ gatsby: { aliases: ['gatsbyjs'], type: 'technology' },
59
+ solid: { aliases: ['solidjs', 'solid-js'], type: 'technology' },
60
+ preact: { aliases: ['preactjs'], type: 'technology' },
61
+ htmx: { aliases: [], type: 'technology' },
62
+ jquery: { aliases: [], type: 'technology' },
63
+
64
+ // Backend Frameworks
65
+ express: { aliases: ['expressjs', 'express.js'], type: 'technology' },
66
+ fastify: { aliases: [], type: 'technology' },
67
+ hono: { aliases: [], type: 'technology' },
68
+ koa: { aliases: ['koajs'], type: 'technology' },
69
+ nestjs: { aliases: ['nest.js', 'nest'], type: 'technology' },
70
+ django: { aliases: [], type: 'technology' },
71
+ flask: { aliases: [], type: 'technology' },
72
+ fastapi: { aliases: ['fast-api'], type: 'technology' },
73
+ rails: { aliases: ['ruby-on-rails', 'ror'], type: 'technology' },
74
+ spring: { aliases: ['spring-boot', 'springboot'], type: 'technology' },
75
+ laravel: { aliases: [], type: 'technology' },
76
+ phoenix: { aliases: [], type: 'technology' },
77
+ gin: { aliases: [], type: 'technology' },
78
+ actix: { aliases: ['actix-web'], type: 'technology' },
79
+
80
+ // Databases
81
+ mongodb: { aliases: ['mongo'], type: 'technology' },
82
+ redis: { aliases: [], type: 'technology' },
83
+ elasticsearch: { aliases: ['elastic', 'es'], type: 'technology' },
84
+ dynamodb: { aliases: ['dynamo'], type: 'technology' },
85
+ cassandra: { aliases: [], type: 'technology' },
86
+ neo4j: { aliases: [], type: 'technology' },
87
+ couchdb: { aliases: ['couch'], type: 'technology' },
88
+ firebase: { aliases: ['firestore'], type: 'technology' },
89
+ supabase: { aliases: [], type: 'technology' },
90
+ prisma: { aliases: [], type: 'technology' },
91
+ drizzle: { aliases: ['drizzle-orm'], type: 'technology' },
92
+ sequelize: { aliases: [], type: 'technology' },
93
+ typeorm: { aliases: [], type: 'technology' },
94
+ chromadb: { aliases: ['chroma'], type: 'technology' },
95
+ pinecone: { aliases: [], type: 'technology' },
96
+ weaviate: { aliases: [], type: 'technology' },
97
+ qdrant: { aliases: [], type: 'technology' },
98
+
99
+ // Cloud & DevOps
100
+ aws: { aliases: ['amazon-web-services', 'amazon'], type: 'technology' },
101
+ gcp: { aliases: ['google-cloud', 'google-cloud-platform'], type: 'technology' },
102
+ azure: { aliases: ['microsoft-azure'], type: 'technology' },
103
+ docker: { aliases: ['dockerfile', 'docker-compose'], type: 'technology' },
104
+ kubernetes: { aliases: ['k8s', 'kube'], type: 'technology' },
105
+ terraform: { aliases: ['tf'], type: 'technology' },
106
+ ansible: { aliases: [], type: 'technology' },
107
+ jenkins: { aliases: [], type: 'technology' },
108
+ github: { aliases: ['gh'], type: 'technology' },
109
+ gitlab: { aliases: [], type: 'technology' },
110
+ vercel: { aliases: [], type: 'technology' },
111
+ netlify: { aliases: [], type: 'technology' },
112
+ cloudflare: { aliases: ['cf'], type: 'technology' },
113
+ nginx: { aliases: [], type: 'technology' },
114
+ caddy: { aliases: [], type: 'technology' },
115
+
116
+ // Tools & Libraries
117
+ webpack: { aliases: [], type: 'technology' },
118
+ vite: { aliases: [], type: 'technology' },
119
+ esbuild: { aliases: [], type: 'technology' },
120
+ rollup: { aliases: ['rollupjs'], type: 'technology' },
121
+ parcel: { aliases: [], type: 'technology' },
122
+ turbopack: { aliases: [], type: 'technology' },
123
+ bun: { aliases: ['bunjs'], type: 'technology' },
124
+ deno: { aliases: [], type: 'technology' },
125
+ node: { aliases: ['nodejs', 'node.js'], type: 'technology' },
126
+ npm: { aliases: [], type: 'technology' },
127
+ yarn: { aliases: [], type: 'technology' },
128
+ pnpm: { aliases: [], type: 'technology' },
129
+ git: { aliases: [], type: 'technology' },
130
+ jest: { aliases: [], type: 'technology' },
131
+ vitest: { aliases: [], type: 'technology' },
132
+ mocha: { aliases: [], type: 'technology' },
133
+ cypress: { aliases: [], type: 'technology' },
134
+ playwright: { aliases: [], type: 'technology' },
135
+ eslint: { aliases: [], type: 'technology' },
136
+ prettier: { aliases: [], type: 'technology' },
137
+ biome: { aliases: ['biomejs'], type: 'technology' },
138
+ tailwind: { aliases: ['tailwindcss', 'tailwind-css'], type: 'technology' },
139
+ bootstrap: { aliases: [], type: 'technology' },
140
+ zod: { aliases: [], type: 'technology' },
141
+ trpc: { aliases: ['t-rpc'], type: 'technology' },
142
+ graphql: { aliases: ['gql'], type: 'technology' },
143
+ grpc: { aliases: ['g-rpc'], type: 'technology' },
144
+ websocket: { aliases: ['ws', 'websockets'], type: 'technology' },
145
+ oauth: { aliases: ['oauth2', 'oauth2.0'], type: 'technology' },
146
+ jwt: { aliases: ['json-web-token'], type: 'technology' },
147
+ openai: { aliases: ['gpt', 'chatgpt', 'gpt-4'], type: 'technology' },
148
+ anthropic: { aliases: ['claude', 'claude-ai'], type: 'technology' },
149
+ langchain: { aliases: [], type: 'technology' },
150
+ llamaindex: { aliases: ['llama-index'], type: 'technology' },
151
+ huggingface: { aliases: ['hf', 'hugging-face'], type: 'technology' },
152
+ tensorflow: { aliases: ['tf'], type: 'technology' },
153
+ pytorch: { aliases: ['torch'], type: 'technology' },
154
+ pino: { aliases: [], type: 'technology' },
155
+ winston: { aliases: [], type: 'technology' },
156
+ storybook: { aliases: [], type: 'technology' },
157
+ nx: { aliases: [], type: 'technology' },
158
+ turborepo: { aliases: [], type: 'technology' },
159
+ lerna: { aliases: [], type: 'technology' },
160
+ compromise: { aliases: ['compromise-nlp'], type: 'technology' },
161
+ minisearch: { aliases: [], type: 'technology' },
162
+
163
+ // Concepts
164
+ microservices: { aliases: ['micro-services', 'microservice'], type: 'concept' },
165
+ monolith: { aliases: ['monolithic'], type: 'concept' },
166
+ serverless: { aliases: ['faas', 'lambda'], type: 'concept' },
167
+ rest: { aliases: ['restful', 'rest-api'], type: 'concept' },
168
+ api: { aliases: ['apis'], type: 'concept' },
169
+ ci: { aliases: ['continuous-integration'], type: 'concept' },
170
+ cd: { aliases: ['continuous-deployment', 'continuous-delivery'], type: 'concept' },
171
+ tdd: { aliases: ['test-driven-development'], type: 'concept' },
172
+ bdd: { aliases: ['behavior-driven-development'], type: 'concept' },
173
+ ddd: { aliases: ['domain-driven-design'], type: 'concept' },
174
+ cqrs: { aliases: ['command-query-responsibility-segregation'], type: 'concept' },
175
+ mvc: { aliases: ['model-view-controller'], type: 'concept' },
176
+ mvvm: { aliases: ['model-view-viewmodel'], type: 'concept' },
177
+ oop: { aliases: ['object-oriented-programming', 'object-oriented'], type: 'concept' },
178
+ fp: { aliases: ['functional-programming', 'functional'], type: 'concept' },
179
+ ssr: { aliases: ['server-side-rendering'], type: 'concept' },
180
+ ssg: { aliases: ['static-site-generation'], type: 'concept' },
181
+ spa: { aliases: ['single-page-application', 'single-page-app'], type: 'concept' },
182
+ pwa: { aliases: ['progressive-web-app'], type: 'concept' },
183
+ mcp: { aliases: ['model-context-protocol'], type: 'concept' },
184
+ rag: { aliases: ['retrieval-augmented-generation'], type: 'concept' },
185
+ embedding: { aliases: ['embeddings', 'vector-embedding'], type: 'concept' },
186
+ 'vector-search': { aliases: ['semantic-search', 'vector-similarity'], type: 'concept' },
187
+ }
188
+
189
+ // Build reverse lookup: alias → normalized name
190
+ const ALIAS_MAP = new Map<string, string>()
191
+ for (const [normalized, entry] of Object.entries(TECH_DICTIONARY)) {
192
+ ALIAS_MAP.set(normalized.toLowerCase(), normalized)
193
+ for (const alias of entry.aliases) {
194
+ ALIAS_MAP.set(alias.toLowerCase(), normalized)
195
+ }
196
+ }
197
+
198
+ // File path regex - handles ./path, ~/path, /path, and relative paths
199
+ const FILE_PATH_REGEX = /(?:^|[\s(,])((?:\.\/|~\/|\/)?(?:[\w-]+\/)+[\w.-]+\.\w+)/g
200
+ // URL regex
201
+ const URL_REGEX = /https?:\/\/[^\s),]+/g
202
+ // Version number regex
203
+ const VERSION_REGEX = /\b[vV]?\d+\.\d+(?:\.\d+)?(?:-[\w.]+)?\b/g
204
+
205
+ let nlpModule: any = null
206
+
207
+ async function loadNlp(): Promise<any> {
208
+ if (!nlpModule) {
209
+ try {
210
+ nlpModule = (await import('compromise')).default
211
+ } catch {
212
+ nlpModule = null
213
+ }
214
+ }
215
+ return nlpModule
216
+ }
217
+
218
+ export class EntityExtractor {
219
+ private nlpLoaded = false
220
+ private nlp: any = null
221
+
222
+ async initialize(): Promise<void> {
223
+ this.nlp = await loadNlp()
224
+ this.nlpLoaded = this.nlp !== null
225
+ }
226
+
227
+ extract(text: string): ExtractedEntity[] {
228
+ const entities: Map<string, ExtractedEntity> = new Map()
229
+
230
+ this.extractFromDictionary(text, entities)
231
+ this.extractFilePaths(text, entities)
232
+ this.extractUrls(text, entities)
233
+ this.extractDates(text, entities)
234
+
235
+ if (this.nlpLoaded && this.nlp) {
236
+ this.extractWithNlp(text, entities)
237
+ }
238
+
239
+ return Array.from(entities.values())
240
+ .sort((a, b) => b.confidence - a.confidence)
241
+ }
242
+
243
+ extractBatch(texts: string[]): ExtractedEntity[][] {
244
+ return texts.map(text => this.extract(text))
245
+ }
246
+
247
+ private extractFromDictionary(text: string, entities: Map<string, ExtractedEntity>): void {
248
+ const lowerText = text.toLowerCase()
249
+ const words = lowerText.split(/[\s,;:()[\]{}"'`|/\\]+/)
250
+
251
+ for (const word of words) {
252
+ const cleaned = word.replace(/^[^a-z0-9]+|[^a-z0-9]+$/g, '')
253
+ if (cleaned.length < 2) continue
254
+
255
+ const normalized = ALIAS_MAP.get(cleaned)
256
+ if (normalized) {
257
+ const dictEntry = TECH_DICTIONARY[normalized]
258
+ if (!dictEntry) continue
259
+
260
+ const existing = entities.get(normalized)
261
+ if (existing) {
262
+ existing.positions.push(lowerText.indexOf(cleaned))
263
+ existing.confidence = Math.min(1.0, existing.confidence + 0.05)
264
+ } else {
265
+ entities.set(normalized, {
266
+ name: cleaned,
267
+ normalizedName: normalized,
268
+ type: dictEntry.type,
269
+ confidence: 0.95,
270
+ source: 'dictionary',
271
+ positions: [lowerText.indexOf(cleaned)]
272
+ })
273
+ }
274
+ }
275
+ }
276
+
277
+ // Also check multi-word aliases
278
+ for (const [alias, normalized] of ALIAS_MAP) {
279
+ if (alias.includes('-') || alias.includes('.') || alias.includes(' ')) {
280
+ if (lowerText.includes(alias) && !entities.has(normalized)) {
281
+ const dictEntry = TECH_DICTIONARY[normalized]
282
+ if (!dictEntry) continue
283
+
284
+ entities.set(normalized, {
285
+ name: alias,
286
+ normalizedName: normalized,
287
+ type: dictEntry.type,
288
+ confidence: 0.95,
289
+ source: 'dictionary',
290
+ positions: [lowerText.indexOf(alias)]
291
+ })
292
+ }
293
+ }
294
+ }
295
+ }
296
+
297
+ private extractFilePaths(text: string, entities: Map<string, ExtractedEntity>): void {
298
+ let match: RegExpExecArray | null
299
+ const regex = new RegExp(FILE_PATH_REGEX.source, FILE_PATH_REGEX.flags)
300
+
301
+ while ((match = regex.exec(text)) !== null) {
302
+ const filePath = match[1].trim()
303
+ if (filePath.length < 4) continue
304
+
305
+ const key = `file:${filePath}`
306
+ if (!entities.has(key)) {
307
+ entities.set(key, {
308
+ name: filePath,
309
+ normalizedName: filePath,
310
+ type: 'file',
311
+ confidence: 0.85,
312
+ source: 'rule',
313
+ positions: [match.index]
314
+ })
315
+ }
316
+ }
317
+ }
318
+
319
+ private extractUrls(text: string, entities: Map<string, ExtractedEntity>): void {
320
+ let match: RegExpExecArray | null
321
+ const regex = new RegExp(URL_REGEX.source, URL_REGEX.flags)
322
+
323
+ while ((match = regex.exec(text)) !== null) {
324
+ const url = match[0]
325
+ const key = `url:${url}`
326
+ if (!entities.has(key)) {
327
+ entities.set(key, {
328
+ name: url,
329
+ normalizedName: url,
330
+ type: 'file',
331
+ confidence: 0.9,
332
+ source: 'rule',
333
+ positions: [match.index]
334
+ })
335
+ }
336
+ }
337
+ }
338
+
339
+ private extractDates(text: string, entities: Map<string, ExtractedEntity>): void {
340
+ // ISO date pattern
341
+ const isoDateRegex = /\b\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})?)?\b/g
342
+ let match: RegExpExecArray | null
343
+
344
+ while ((match = isoDateRegex.exec(text)) !== null) {
345
+ const dateStr = match[0]
346
+ const key = `date:${dateStr}`
347
+ if (!entities.has(key)) {
348
+ entities.set(key, {
349
+ name: dateStr,
350
+ normalizedName: dateStr,
351
+ type: 'date',
352
+ confidence: 0.9,
353
+ source: 'rule',
354
+ positions: [match.index]
355
+ })
356
+ }
357
+ }
358
+ }
359
+
360
+ private extractWithNlp(text: string, entities: Map<string, ExtractedEntity>): void {
361
+ try {
362
+ const doc = this.nlp(text)
363
+
364
+ // Extract people
365
+ const people = doc.people()
366
+ if (people && people.length > 0) {
367
+ for (const person of people.out('array') as string[]) {
368
+ const cleaned = person.trim()
369
+ if (cleaned.length < 2) continue
370
+ const key = `person:${cleaned.toLowerCase()}`
371
+ if (!entities.has(key)) {
372
+ entities.set(key, {
373
+ name: cleaned,
374
+ normalizedName: cleaned.toLowerCase(),
375
+ type: 'person',
376
+ confidence: 0.7,
377
+ source: 'nlp',
378
+ positions: [text.indexOf(cleaned)]
379
+ })
380
+ }
381
+ }
382
+ }
383
+
384
+ // Extract organizations
385
+ const orgs = doc.organizations()
386
+ if (orgs && orgs.length > 0) {
387
+ for (const org of orgs.out('array') as string[]) {
388
+ const cleaned = org.trim()
389
+ if (cleaned.length < 2) continue
390
+ // Check if it's already a known technology
391
+ const normalizedCheck = ALIAS_MAP.get(cleaned.toLowerCase())
392
+ if (normalizedCheck) continue
393
+
394
+ const key = `org:${cleaned.toLowerCase()}`
395
+ if (!entities.has(key)) {
396
+ entities.set(key, {
397
+ name: cleaned,
398
+ normalizedName: cleaned.toLowerCase(),
399
+ type: 'concept',
400
+ confidence: 0.65,
401
+ source: 'nlp',
402
+ positions: [text.indexOf(cleaned)]
403
+ })
404
+ }
405
+ }
406
+ }
407
+ } catch {
408
+ // NLP extraction failure is non-critical
409
+ }
410
+ }
411
+
412
+ static normalizeEntityName(name: string): string {
413
+ const lower = name.toLowerCase().trim()
414
+ return ALIAS_MAP.get(lower) || lower
415
+ }
416
+ }