ocr-provenance-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocr-provenance-mcp might be problematic. Click here for more details.

Files changed (578) hide show
  1. package/.env.example +55 -0
  2. package/LICENSE +78 -0
  3. package/README.md +1154 -0
  4. package/dist/bin-http.d.ts +24 -0
  5. package/dist/bin-http.d.ts.map +1 -0
  6. package/dist/bin-http.js +275 -0
  7. package/dist/bin-http.js.map +1 -0
  8. package/dist/bin-setup.d.ts +11 -0
  9. package/dist/bin-setup.d.ts.map +1 -0
  10. package/dist/bin-setup.js +610 -0
  11. package/dist/bin-setup.js.map +1 -0
  12. package/dist/bin.d.ts +16 -0
  13. package/dist/bin.d.ts.map +1 -0
  14. package/dist/bin.js +16 -0
  15. package/dist/bin.js.map +1 -0
  16. package/dist/index.d.ts +13 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +90 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/models/chunk.d.ts +136 -0
  21. package/dist/models/chunk.d.ts.map +1 -0
  22. package/dist/models/chunk.js +27 -0
  23. package/dist/models/chunk.js.map +1 -0
  24. package/dist/models/cluster.d.ts +79 -0
  25. package/dist/models/cluster.d.ts.map +1 -0
  26. package/dist/models/cluster.js +10 -0
  27. package/dist/models/cluster.js.map +1 -0
  28. package/dist/models/comparison.d.ts +62 -0
  29. package/dist/models/comparison.d.ts.map +1 -0
  30. package/dist/models/comparison.js +8 -0
  31. package/dist/models/comparison.js.map +1 -0
  32. package/dist/models/document.d.ts +104 -0
  33. package/dist/models/document.d.ts.map +1 -0
  34. package/dist/models/document.js +15 -0
  35. package/dist/models/document.js.map +1 -0
  36. package/dist/models/embedding.d.ts +87 -0
  37. package/dist/models/embedding.d.ts.map +1 -0
  38. package/dist/models/embedding.js +23 -0
  39. package/dist/models/embedding.js.map +1 -0
  40. package/dist/models/extraction.d.ts +15 -0
  41. package/dist/models/extraction.d.ts.map +1 -0
  42. package/dist/models/extraction.js +2 -0
  43. package/dist/models/extraction.js.map +1 -0
  44. package/dist/models/form-fill.d.ts +23 -0
  45. package/dist/models/form-fill.d.ts.map +1 -0
  46. package/dist/models/form-fill.js +2 -0
  47. package/dist/models/form-fill.js.map +1 -0
  48. package/dist/models/image.d.ts +177 -0
  49. package/dist/models/image.d.ts.map +1 -0
  50. package/dist/models/image.js +8 -0
  51. package/dist/models/image.js.map +1 -0
  52. package/dist/models/index.d.ts +14 -0
  53. package/dist/models/index.d.ts.map +1 -0
  54. package/dist/models/index.js +22 -0
  55. package/dist/models/index.js.map +1 -0
  56. package/dist/models/provenance.d.ts +174 -0
  57. package/dist/models/provenance.d.ts.map +1 -0
  58. package/dist/models/provenance.js +53 -0
  59. package/dist/models/provenance.js.map +1 -0
  60. package/dist/models/uploaded-file.d.ts +20 -0
  61. package/dist/models/uploaded-file.d.ts.map +1 -0
  62. package/dist/models/uploaded-file.js +2 -0
  63. package/dist/models/uploaded-file.js.map +1 -0
  64. package/dist/server/errors.d.ts +93 -0
  65. package/dist/server/errors.d.ts.map +1 -0
  66. package/dist/server/errors.js +256 -0
  67. package/dist/server/errors.js.map +1 -0
  68. package/dist/server/events.d.ts +36 -0
  69. package/dist/server/events.d.ts.map +1 -0
  70. package/dist/server/events.js +48 -0
  71. package/dist/server/events.js.map +1 -0
  72. package/dist/server/permissions.d.ts +26 -0
  73. package/dist/server/permissions.d.ts.map +1 -0
  74. package/dist/server/permissions.js +194 -0
  75. package/dist/server/permissions.js.map +1 -0
  76. package/dist/server/register-tools.d.ts +25 -0
  77. package/dist/server/register-tools.d.ts.map +1 -0
  78. package/dist/server/register-tools.js +102 -0
  79. package/dist/server/register-tools.js.map +1 -0
  80. package/dist/server/startup.d.ts +16 -0
  81. package/dist/server/startup.d.ts.map +1 -0
  82. package/dist/server/startup.js +37 -0
  83. package/dist/server/startup.js.map +1 -0
  84. package/dist/server/state.d.ts +166 -0
  85. package/dist/server/state.d.ts.map +1 -0
  86. package/dist/server/state.js +424 -0
  87. package/dist/server/state.js.map +1 -0
  88. package/dist/server/transports/http-transport.d.ts +37 -0
  89. package/dist/server/transports/http-transport.d.ts.map +1 -0
  90. package/dist/server/transports/http-transport.js +204 -0
  91. package/dist/server/transports/http-transport.js.map +1 -0
  92. package/dist/server/transports/index.d.ts +9 -0
  93. package/dist/server/transports/index.d.ts.map +1 -0
  94. package/dist/server/transports/index.js +9 -0
  95. package/dist/server/transports/index.js.map +1 -0
  96. package/dist/server/transports/session-manager.d.ts +40 -0
  97. package/dist/server/transports/session-manager.d.ts.map +1 -0
  98. package/dist/server/transports/session-manager.js +74 -0
  99. package/dist/server/transports/session-manager.js.map +1 -0
  100. package/dist/server/types.d.ts +82 -0
  101. package/dist/server/types.d.ts.map +1 -0
  102. package/dist/server/types.js +14 -0
  103. package/dist/server/types.js.map +1 -0
  104. package/dist/services/audit.d.ts +26 -0
  105. package/dist/services/audit.d.ts.map +1 -0
  106. package/dist/services/audit.js +43 -0
  107. package/dist/services/audit.js.map +1 -0
  108. package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
  109. package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
  110. package/dist/services/chunking/chunk-deduplicator.js +46 -0
  111. package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
  112. package/dist/services/chunking/chunk-merger.d.ts +26 -0
  113. package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
  114. package/dist/services/chunking/chunk-merger.js +94 -0
  115. package/dist/services/chunking/chunk-merger.js.map +1 -0
  116. package/dist/services/chunking/chunker.d.ts +62 -0
  117. package/dist/services/chunking/chunker.d.ts.map +1 -0
  118. package/dist/services/chunking/chunker.js +566 -0
  119. package/dist/services/chunking/chunker.js.map +1 -0
  120. package/dist/services/chunking/heading-normalizer.d.ts +33 -0
  121. package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
  122. package/dist/services/chunking/heading-normalizer.js +101 -0
  123. package/dist/services/chunking/heading-normalizer.js.map +1 -0
  124. package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
  125. package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
  126. package/dist/services/chunking/json-block-analyzer.js +1033 -0
  127. package/dist/services/chunking/json-block-analyzer.js.map +1 -0
  128. package/dist/services/chunking/markdown-parser.d.ts +75 -0
  129. package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
  130. package/dist/services/chunking/markdown-parser.js +428 -0
  131. package/dist/services/chunking/markdown-parser.js.map +1 -0
  132. package/dist/services/chunking/text-normalizer.d.ts +20 -0
  133. package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
  134. package/dist/services/chunking/text-normalizer.js +36 -0
  135. package/dist/services/chunking/text-normalizer.js.map +1 -0
  136. package/dist/services/clm/contract-schemas.d.ts +36 -0
  137. package/dist/services/clm/contract-schemas.d.ts.map +1 -0
  138. package/dist/services/clm/contract-schemas.js +92 -0
  139. package/dist/services/clm/contract-schemas.js.map +1 -0
  140. package/dist/services/clm/summarization.d.ts +46 -0
  141. package/dist/services/clm/summarization.d.ts.map +1 -0
  142. package/dist/services/clm/summarization.js +61 -0
  143. package/dist/services/clm/summarization.js.map +1 -0
  144. package/dist/services/clustering/clustering-service.d.ts +58 -0
  145. package/dist/services/clustering/clustering-service.d.ts.map +1 -0
  146. package/dist/services/clustering/clustering-service.js +467 -0
  147. package/dist/services/clustering/clustering-service.js.map +1 -0
  148. package/dist/services/comparison/diff-service.d.ts +41 -0
  149. package/dist/services/comparison/diff-service.d.ts.map +1 -0
  150. package/dist/services/comparison/diff-service.js +120 -0
  151. package/dist/services/comparison/diff-service.js.map +1 -0
  152. package/dist/services/embedding/embedder.d.ts +55 -0
  153. package/dist/services/embedding/embedder.d.ts.map +1 -0
  154. package/dist/services/embedding/embedder.js +202 -0
  155. package/dist/services/embedding/embedder.js.map +1 -0
  156. package/dist/services/embedding/nomic.d.ts +67 -0
  157. package/dist/services/embedding/nomic.d.ts.map +1 -0
  158. package/dist/services/embedding/nomic.js +280 -0
  159. package/dist/services/embedding/nomic.js.map +1 -0
  160. package/dist/services/gemini/circuit-breaker.d.ts +106 -0
  161. package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
  162. package/dist/services/gemini/circuit-breaker.js +237 -0
  163. package/dist/services/gemini/circuit-breaker.js.map +1 -0
  164. package/dist/services/gemini/client.d.ts +173 -0
  165. package/dist/services/gemini/client.d.ts.map +1 -0
  166. package/dist/services/gemini/client.js +483 -0
  167. package/dist/services/gemini/client.js.map +1 -0
  168. package/dist/services/gemini/config.d.ts +116 -0
  169. package/dist/services/gemini/config.d.ts.map +1 -0
  170. package/dist/services/gemini/config.js +118 -0
  171. package/dist/services/gemini/config.js.map +1 -0
  172. package/dist/services/gemini/index.d.ts +9 -0
  173. package/dist/services/gemini/index.d.ts.map +1 -0
  174. package/dist/services/gemini/index.js +13 -0
  175. package/dist/services/gemini/index.js.map +1 -0
  176. package/dist/services/gemini/rate-limiter.d.ts +62 -0
  177. package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
  178. package/dist/services/gemini/rate-limiter.js +120 -0
  179. package/dist/services/gemini/rate-limiter.js.map +1 -0
  180. package/dist/services/images/extractor.d.ts +88 -0
  181. package/dist/services/images/extractor.d.ts.map +1 -0
  182. package/dist/services/images/extractor.js +340 -0
  183. package/dist/services/images/extractor.js.map +1 -0
  184. package/dist/services/images/optimizer.d.ts +130 -0
  185. package/dist/services/images/optimizer.d.ts.map +1 -0
  186. package/dist/services/images/optimizer.js +228 -0
  187. package/dist/services/images/optimizer.js.map +1 -0
  188. package/dist/services/ocr/datalab.d.ts +64 -0
  189. package/dist/services/ocr/datalab.d.ts.map +1 -0
  190. package/dist/services/ocr/datalab.js +425 -0
  191. package/dist/services/ocr/datalab.js.map +1 -0
  192. package/dist/services/ocr/errors.d.ts +38 -0
  193. package/dist/services/ocr/errors.d.ts.map +1 -0
  194. package/dist/services/ocr/errors.js +83 -0
  195. package/dist/services/ocr/errors.js.map +1 -0
  196. package/dist/services/ocr/file-manager.d.ts +76 -0
  197. package/dist/services/ocr/file-manager.d.ts.map +1 -0
  198. package/dist/services/ocr/file-manager.js +238 -0
  199. package/dist/services/ocr/file-manager.js.map +1 -0
  200. package/dist/services/ocr/form-fill.d.ts +48 -0
  201. package/dist/services/ocr/form-fill.d.ts.map +1 -0
  202. package/dist/services/ocr/form-fill.js +213 -0
  203. package/dist/services/ocr/form-fill.js.map +1 -0
  204. package/dist/services/ocr/processor.d.ts +95 -0
  205. package/dist/services/ocr/processor.d.ts.map +1 -0
  206. package/dist/services/ocr/processor.js +259 -0
  207. package/dist/services/ocr/processor.js.map +1 -0
  208. package/dist/services/provenance/agent-metadata.d.ts +82 -0
  209. package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
  210. package/dist/services/provenance/agent-metadata.js +106 -0
  211. package/dist/services/provenance/agent-metadata.js.map +1 -0
  212. package/dist/services/provenance/chain-hash.d.ts +57 -0
  213. package/dist/services/provenance/chain-hash.d.ts.map +1 -0
  214. package/dist/services/provenance/chain-hash.js +131 -0
  215. package/dist/services/provenance/chain-hash.js.map +1 -0
  216. package/dist/services/provenance/exporter.d.ts +202 -0
  217. package/dist/services/provenance/exporter.d.ts.map +1 -0
  218. package/dist/services/provenance/exporter.js +457 -0
  219. package/dist/services/provenance/exporter.js.map +1 -0
  220. package/dist/services/provenance/index.d.ts +15 -0
  221. package/dist/services/provenance/index.d.ts.map +1 -0
  222. package/dist/services/provenance/index.js +17 -0
  223. package/dist/services/provenance/index.js.map +1 -0
  224. package/dist/services/provenance/tracker.d.ts +138 -0
  225. package/dist/services/provenance/tracker.d.ts.map +1 -0
  226. package/dist/services/provenance/tracker.js +293 -0
  227. package/dist/services/provenance/tracker.js.map +1 -0
  228. package/dist/services/provenance/verifier.d.ts +153 -0
  229. package/dist/services/provenance/verifier.d.ts.map +1 -0
  230. package/dist/services/provenance/verifier.js +536 -0
  231. package/dist/services/provenance/verifier.js.map +1 -0
  232. package/dist/services/python-pool.d.ts +70 -0
  233. package/dist/services/python-pool.d.ts.map +1 -0
  234. package/dist/services/python-pool.js +265 -0
  235. package/dist/services/python-pool.js.map +1 -0
  236. package/dist/services/search/bm25.d.ts +180 -0
  237. package/dist/services/search/bm25.d.ts.map +1 -0
  238. package/dist/services/search/bm25.js +656 -0
  239. package/dist/services/search/bm25.js.map +1 -0
  240. package/dist/services/search/fusion.d.ts +103 -0
  241. package/dist/services/search/fusion.d.ts.map +1 -0
  242. package/dist/services/search/fusion.js +122 -0
  243. package/dist/services/search/fusion.js.map +1 -0
  244. package/dist/services/search/local-reranker.d.ts +30 -0
  245. package/dist/services/search/local-reranker.d.ts.map +1 -0
  246. package/dist/services/search/local-reranker.js +123 -0
  247. package/dist/services/search/local-reranker.js.map +1 -0
  248. package/dist/services/search/quality.d.ts +11 -0
  249. package/dist/services/search/quality.d.ts.map +1 -0
  250. package/dist/services/search/quality.js +17 -0
  251. package/dist/services/search/quality.js.map +1 -0
  252. package/dist/services/search/query-classifier.d.ts +34 -0
  253. package/dist/services/search/query-classifier.d.ts.map +1 -0
  254. package/dist/services/search/query-classifier.js +114 -0
  255. package/dist/services/search/query-classifier.js.map +1 -0
  256. package/dist/services/search/query-expander.d.ts +73 -0
  257. package/dist/services/search/query-expander.d.ts.map +1 -0
  258. package/dist/services/search/query-expander.js +281 -0
  259. package/dist/services/search/query-expander.js.map +1 -0
  260. package/dist/services/search/reranker.d.ts +44 -0
  261. package/dist/services/search/reranker.d.ts.map +1 -0
  262. package/dist/services/search/reranker.js +101 -0
  263. package/dist/services/search/reranker.js.map +1 -0
  264. package/dist/services/storage/database/annotation-operations.d.ts +113 -0
  265. package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
  266. package/dist/services/storage/database/annotation-operations.js +177 -0
  267. package/dist/services/storage/database/annotation-operations.js.map +1 -0
  268. package/dist/services/storage/database/approval-operations.d.ts +132 -0
  269. package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
  270. package/dist/services/storage/database/approval-operations.js +206 -0
  271. package/dist/services/storage/database/approval-operations.js.map +1 -0
  272. package/dist/services/storage/database/chunk-operations.d.ts +132 -0
  273. package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
  274. package/dist/services/storage/database/chunk-operations.js +306 -0
  275. package/dist/services/storage/database/chunk-operations.js.map +1 -0
  276. package/dist/services/storage/database/cluster-operations.d.ts +97 -0
  277. package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
  278. package/dist/services/storage/database/cluster-operations.js +258 -0
  279. package/dist/services/storage/database/cluster-operations.js.map +1 -0
  280. package/dist/services/storage/database/comparison-operations.d.ts +41 -0
  281. package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
  282. package/dist/services/storage/database/comparison-operations.js +65 -0
  283. package/dist/services/storage/database/comparison-operations.js.map +1 -0
  284. package/dist/services/storage/database/converters.d.ts +36 -0
  285. package/dist/services/storage/database/converters.d.ts.map +1 -0
  286. package/dist/services/storage/database/converters.js +244 -0
  287. package/dist/services/storage/database/converters.js.map +1 -0
  288. package/dist/services/storage/database/document-operations.d.ts +145 -0
  289. package/dist/services/storage/database/document-operations.d.ts.map +1 -0
  290. package/dist/services/storage/database/document-operations.js +498 -0
  291. package/dist/services/storage/database/document-operations.js.map +1 -0
  292. package/dist/services/storage/database/embedding-operations.d.ts +130 -0
  293. package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
  294. package/dist/services/storage/database/embedding-operations.js +315 -0
  295. package/dist/services/storage/database/embedding-operations.js.map +1 -0
  296. package/dist/services/storage/database/extraction-operations.d.ts +47 -0
  297. package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
  298. package/dist/services/storage/database/extraction-operations.js +85 -0
  299. package/dist/services/storage/database/extraction-operations.js.map +1 -0
  300. package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
  301. package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
  302. package/dist/services/storage/database/form-fill-operations.js +116 -0
  303. package/dist/services/storage/database/form-fill-operations.js.map +1 -0
  304. package/dist/services/storage/database/helpers.d.ts +29 -0
  305. package/dist/services/storage/database/helpers.d.ts.map +1 -0
  306. package/dist/services/storage/database/helpers.js +55 -0
  307. package/dist/services/storage/database/helpers.js.map +1 -0
  308. package/dist/services/storage/database/image-operations.d.ts +202 -0
  309. package/dist/services/storage/database/image-operations.d.ts.map +1 -0
  310. package/dist/services/storage/database/image-operations.js +484 -0
  311. package/dist/services/storage/database/image-operations.js.map +1 -0
  312. package/dist/services/storage/database/index.d.ts +13 -0
  313. package/dist/services/storage/database/index.d.ts.map +1 -0
  314. package/dist/services/storage/database/index.js +16 -0
  315. package/dist/services/storage/database/index.js.map +1 -0
  316. package/dist/services/storage/database/lock-operations.d.ts +59 -0
  317. package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
  318. package/dist/services/storage/database/lock-operations.js +89 -0
  319. package/dist/services/storage/database/lock-operations.js.map +1 -0
  320. package/dist/services/storage/database/obligation-operations.d.ts +88 -0
  321. package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
  322. package/dist/services/storage/database/obligation-operations.js +206 -0
  323. package/dist/services/storage/database/obligation-operations.js.map +1 -0
  324. package/dist/services/storage/database/ocr-operations.d.ts +33 -0
  325. package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
  326. package/dist/services/storage/database/ocr-operations.js +70 -0
  327. package/dist/services/storage/database/ocr-operations.js.map +1 -0
  328. package/dist/services/storage/database/playbook-operations.d.ts +72 -0
  329. package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
  330. package/dist/services/storage/database/playbook-operations.js +247 -0
  331. package/dist/services/storage/database/playbook-operations.js.map +1 -0
  332. package/dist/services/storage/database/provenance-operations.d.ts +112 -0
  333. package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
  334. package/dist/services/storage/database/provenance-operations.js +251 -0
  335. package/dist/services/storage/database/provenance-operations.js.map +1 -0
  336. package/dist/services/storage/database/service.d.ts +142 -0
  337. package/dist/services/storage/database/service.d.ts.map +1 -0
  338. package/dist/services/storage/database/service.js +310 -0
  339. package/dist/services/storage/database/service.js.map +1 -0
  340. package/dist/services/storage/database/static-operations.d.ts +30 -0
  341. package/dist/services/storage/database/static-operations.d.ts.map +1 -0
  342. package/dist/services/storage/database/static-operations.js +218 -0
  343. package/dist/services/storage/database/static-operations.js.map +1 -0
  344. package/dist/services/storage/database/stats-operations.d.ts +101 -0
  345. package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
  346. package/dist/services/storage/database/stats-operations.js +394 -0
  347. package/dist/services/storage/database/stats-operations.js.map +1 -0
  348. package/dist/services/storage/database/tag-operations.d.ts +76 -0
  349. package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
  350. package/dist/services/storage/database/tag-operations.js +178 -0
  351. package/dist/services/storage/database/tag-operations.js.map +1 -0
  352. package/dist/services/storage/database/types.d.ts +286 -0
  353. package/dist/services/storage/database/types.d.ts.map +1 -0
  354. package/dist/services/storage/database/types.js +39 -0
  355. package/dist/services/storage/database/types.js.map +1 -0
  356. package/dist/services/storage/database/upload-operations.d.ts +71 -0
  357. package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
  358. package/dist/services/storage/database/upload-operations.js +124 -0
  359. package/dist/services/storage/database/upload-operations.js.map +1 -0
  360. package/dist/services/storage/database/user-operations.d.ts +102 -0
  361. package/dist/services/storage/database/user-operations.d.ts.map +1 -0
  362. package/dist/services/storage/database/user-operations.js +151 -0
  363. package/dist/services/storage/database/user-operations.js.map +1 -0
  364. package/dist/services/storage/database/workflow-operations.d.ts +98 -0
  365. package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
  366. package/dist/services/storage/database/workflow-operations.js +157 -0
  367. package/dist/services/storage/database/workflow-operations.js.map +1 -0
  368. package/dist/services/storage/database.d.ts +16 -0
  369. package/dist/services/storage/database.d.ts.map +1 -0
  370. package/dist/services/storage/database.js +15 -0
  371. package/dist/services/storage/database.js.map +1 -0
  372. package/dist/services/storage/index.d.ts +10 -0
  373. package/dist/services/storage/index.d.ts.map +1 -0
  374. package/dist/services/storage/index.js +10 -0
  375. package/dist/services/storage/index.js.map +1 -0
  376. package/dist/services/storage/migrations/index.d.ts +16 -0
  377. package/dist/services/storage/migrations/index.d.ts.map +1 -0
  378. package/dist/services/storage/migrations/index.js +20 -0
  379. package/dist/services/storage/migrations/index.js.map +1 -0
  380. package/dist/services/storage/migrations/operations.d.ts +40 -0
  381. package/dist/services/storage/migrations/operations.d.ts.map +1 -0
  382. package/dist/services/storage/migrations/operations.js +2910 -0
  383. package/dist/services/storage/migrations/operations.js.map +1 -0
  384. package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
  385. package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
  386. package/dist/services/storage/migrations/schema-definitions.js +1006 -0
  387. package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
  388. package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
  389. package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
  390. package/dist/services/storage/migrations/schema-helpers.js +176 -0
  391. package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
  392. package/dist/services/storage/migrations/types.d.ts +15 -0
  393. package/dist/services/storage/migrations/types.d.ts.map +1 -0
  394. package/dist/services/storage/migrations/types.js +21 -0
  395. package/dist/services/storage/migrations/types.js.map +1 -0
  396. package/dist/services/storage/migrations/verification.d.ts +20 -0
  397. package/dist/services/storage/migrations/verification.d.ts.map +1 -0
  398. package/dist/services/storage/migrations/verification.js +78 -0
  399. package/dist/services/storage/migrations/verification.js.map +1 -0
  400. package/dist/services/storage/migrations.d.ts +16 -0
  401. package/dist/services/storage/migrations.d.ts.map +1 -0
  402. package/dist/services/storage/migrations.js +17 -0
  403. package/dist/services/storage/migrations.js.map +1 -0
  404. package/dist/services/storage/types.d.ts +12 -0
  405. package/dist/services/storage/types.d.ts.map +1 -0
  406. package/dist/services/storage/types.js +5 -0
  407. package/dist/services/storage/types.js.map +1 -0
  408. package/dist/services/storage/vector.d.ts +208 -0
  409. package/dist/services/storage/vector.d.ts.map +1 -0
  410. package/dist/services/storage/vector.js +526 -0
  411. package/dist/services/storage/vector.js.map +1 -0
  412. package/dist/services/vlm/pipeline.d.ts +194 -0
  413. package/dist/services/vlm/pipeline.d.ts.map +1 -0
  414. package/dist/services/vlm/pipeline.js +800 -0
  415. package/dist/services/vlm/pipeline.js.map +1 -0
  416. package/dist/services/vlm/prompts.d.ts +171 -0
  417. package/dist/services/vlm/prompts.d.ts.map +1 -0
  418. package/dist/services/vlm/prompts.js +229 -0
  419. package/dist/services/vlm/prompts.js.map +1 -0
  420. package/dist/services/vlm/service.d.ts +174 -0
  421. package/dist/services/vlm/service.d.ts.map +1 -0
  422. package/dist/services/vlm/service.js +256 -0
  423. package/dist/services/vlm/service.js.map +1 -0
  424. package/dist/services/webhook-delivery.d.ts +4 -0
  425. package/dist/services/webhook-delivery.d.ts.map +1 -0
  426. package/dist/services/webhook-delivery.js +140 -0
  427. package/dist/services/webhook-delivery.js.map +1 -0
  428. package/dist/tools/chunks.d.ts +19 -0
  429. package/dist/tools/chunks.d.ts.map +1 -0
  430. package/dist/tools/chunks.js +392 -0
  431. package/dist/tools/chunks.js.map +1 -0
  432. package/dist/tools/clm.d.ts +16 -0
  433. package/dist/tools/clm.d.ts.map +1 -0
  434. package/dist/tools/clm.js +668 -0
  435. package/dist/tools/clm.js.map +1 -0
  436. package/dist/tools/clustering.d.ts +13 -0
  437. package/dist/tools/clustering.d.ts.map +1 -0
  438. package/dist/tools/clustering.js +498 -0
  439. package/dist/tools/clustering.js.map +1 -0
  440. package/dist/tools/collaboration.d.ts +15 -0
  441. package/dist/tools/collaboration.d.ts.map +1 -0
  442. package/dist/tools/collaboration.js +516 -0
  443. package/dist/tools/collaboration.js.map +1 -0
  444. package/dist/tools/comparison.d.ts +13 -0
  445. package/dist/tools/comparison.d.ts.map +1 -0
  446. package/dist/tools/comparison.js +735 -0
  447. package/dist/tools/comparison.js.map +1 -0
  448. package/dist/tools/compliance.d.ts +15 -0
  449. package/dist/tools/compliance.d.ts.map +1 -0
  450. package/dist/tools/compliance.js +640 -0
  451. package/dist/tools/compliance.js.map +1 -0
  452. package/dist/tools/config.d.ts +19 -0
  453. package/dist/tools/config.d.ts.map +1 -0
  454. package/dist/tools/config.js +213 -0
  455. package/dist/tools/config.js.map +1 -0
  456. package/dist/tools/database.d.ts +62 -0
  457. package/dist/tools/database.d.ts.map +1 -0
  458. package/dist/tools/database.js +288 -0
  459. package/dist/tools/database.js.map +1 -0
  460. package/dist/tools/documents.d.ts +61 -0
  461. package/dist/tools/documents.d.ts.map +1 -0
  462. package/dist/tools/documents.js +1624 -0
  463. package/dist/tools/documents.js.map +1 -0
  464. package/dist/tools/embeddings.d.ts +14 -0
  465. package/dist/tools/embeddings.d.ts.map +1 -0
  466. package/dist/tools/embeddings.js +626 -0
  467. package/dist/tools/embeddings.js.map +1 -0
  468. package/dist/tools/evaluation.d.ts +25 -0
  469. package/dist/tools/evaluation.d.ts.map +1 -0
  470. package/dist/tools/evaluation.js +523 -0
  471. package/dist/tools/evaluation.js.map +1 -0
  472. package/dist/tools/events.d.ts +16 -0
  473. package/dist/tools/events.d.ts.map +1 -0
  474. package/dist/tools/events.js +493 -0
  475. package/dist/tools/events.js.map +1 -0
  476. package/dist/tools/extraction-structured.d.ts +13 -0
  477. package/dist/tools/extraction-structured.d.ts.map +1 -0
  478. package/dist/tools/extraction-structured.js +390 -0
  479. package/dist/tools/extraction-structured.js.map +1 -0
  480. package/dist/tools/extraction.d.ts +24 -0
  481. package/dist/tools/extraction.d.ts.map +1 -0
  482. package/dist/tools/extraction.js +424 -0
  483. package/dist/tools/extraction.js.map +1 -0
  484. package/dist/tools/file-management.d.ts +14 -0
  485. package/dist/tools/file-management.d.ts.map +1 -0
  486. package/dist/tools/file-management.js +523 -0
  487. package/dist/tools/file-management.js.map +1 -0
  488. package/dist/tools/form-fill.d.ts +13 -0
  489. package/dist/tools/form-fill.d.ts.map +1 -0
  490. package/dist/tools/form-fill.js +250 -0
  491. package/dist/tools/form-fill.js.map +1 -0
  492. package/dist/tools/health.d.ts +19 -0
  493. package/dist/tools/health.d.ts.map +1 -0
  494. package/dist/tools/health.js +229 -0
  495. package/dist/tools/health.js.map +1 -0
  496. package/dist/tools/images.d.ts +54 -0
  497. package/dist/tools/images.d.ts.map +1 -0
  498. package/dist/tools/images.js +787 -0
  499. package/dist/tools/images.js.map +1 -0
  500. package/dist/tools/ingestion.d.ts +94 -0
  501. package/dist/tools/ingestion.d.ts.map +1 -0
  502. package/dist/tools/ingestion.js +1659 -0
  503. package/dist/tools/ingestion.js.map +1 -0
  504. package/dist/tools/intelligence.d.ts +18 -0
  505. package/dist/tools/intelligence.d.ts.map +1 -0
  506. package/dist/tools/intelligence.js +1039 -0
  507. package/dist/tools/intelligence.js.map +1 -0
  508. package/dist/tools/provenance.d.ts +51 -0
  509. package/dist/tools/provenance.d.ts.map +1 -0
  510. package/dist/tools/provenance.js +691 -0
  511. package/dist/tools/provenance.js.map +1 -0
  512. package/dist/tools/reports.d.ts +41 -0
  513. package/dist/tools/reports.d.ts.map +1 -0
  514. package/dist/tools/reports.js +1394 -0
  515. package/dist/tools/reports.js.map +1 -0
  516. package/dist/tools/search.d.ts +35 -0
  517. package/dist/tools/search.d.ts.map +1 -0
  518. package/dist/tools/search.js +2528 -0
  519. package/dist/tools/search.js.map +1 -0
  520. package/dist/tools/shared.d.ts +52 -0
  521. package/dist/tools/shared.d.ts.map +1 -0
  522. package/dist/tools/shared.js +54 -0
  523. package/dist/tools/shared.js.map +1 -0
  524. package/dist/tools/tags.d.ts +15 -0
  525. package/dist/tools/tags.d.ts.map +1 -0
  526. package/dist/tools/tags.js +287 -0
  527. package/dist/tools/tags.js.map +1 -0
  528. package/dist/tools/timeline.d.ts +15 -0
  529. package/dist/tools/timeline.d.ts.map +1 -0
  530. package/dist/tools/timeline.js +14 -0
  531. package/dist/tools/timeline.js.map +1 -0
  532. package/dist/tools/users.d.ts +14 -0
  533. package/dist/tools/users.d.ts.map +1 -0
  534. package/dist/tools/users.js +257 -0
  535. package/dist/tools/users.js.map +1 -0
  536. package/dist/tools/vlm.d.ts +40 -0
  537. package/dist/tools/vlm.d.ts.map +1 -0
  538. package/dist/tools/vlm.js +475 -0
  539. package/dist/tools/vlm.js.map +1 -0
  540. package/dist/tools/workflow.d.ts +16 -0
  541. package/dist/tools/workflow.d.ts.map +1 -0
  542. package/dist/tools/workflow.js +495 -0
  543. package/dist/tools/workflow.js.map +1 -0
  544. package/dist/utils/backoff.d.ts +53 -0
  545. package/dist/utils/backoff.d.ts.map +1 -0
  546. package/dist/utils/backoff.js +78 -0
  547. package/dist/utils/backoff.js.map +1 -0
  548. package/dist/utils/config-persistence.d.ts +33 -0
  549. package/dist/utils/config-persistence.d.ts.map +1 -0
  550. package/dist/utils/config-persistence.js +61 -0
  551. package/dist/utils/config-persistence.js.map +1 -0
  552. package/dist/utils/hash.d.ts +65 -0
  553. package/dist/utils/hash.d.ts.map +1 -0
  554. package/dist/utils/hash.js +146 -0
  555. package/dist/utils/hash.js.map +1 -0
  556. package/dist/utils/math.d.ts +21 -0
  557. package/dist/utils/math.d.ts.map +1 -0
  558. package/dist/utils/math.js +39 -0
  559. package/dist/utils/math.js.map +1 -0
  560. package/dist/utils/validation.d.ts +697 -0
  561. package/dist/utils/validation.d.ts.map +1 -0
  562. package/dist/utils/validation.js +529 -0
  563. package/dist/utils/validation.js.map +1 -0
  564. package/package.json +96 -0
  565. package/python/.gitkeep +0 -0
  566. package/python/__init__.py +104 -0
  567. package/python/clustering_worker.py +440 -0
  568. package/python/docx_image_extractor.py +524 -0
  569. package/python/embedding_worker.py +552 -0
  570. package/python/file_manager_worker.py +564 -0
  571. package/python/form_fill_worker.py +399 -0
  572. package/python/gpu_utils.py +582 -0
  573. package/python/image_extractor.py +317 -0
  574. package/python/image_optimizer.py +444 -0
  575. package/python/ocr_worker.py +712 -0
  576. package/python/pyproject.toml +76 -0
  577. package/python/requirements.txt +51 -0
  578. package/python/reranker_worker.py +87 -0
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Query Expander for Legal/Medical Domain + Corpus-Driven Expansion
3
+ *
4
+ * Expands search queries with domain-specific synonyms and corpus cluster top terms.
5
+ * When enabled, the query "injury" also searches for "wound", "trauma", etc.
6
+ * With a database connection, also expands from cluster top_terms_json for
7
+ * corpus-specific vocabulary.
8
+ *
9
+ * CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
10
+ *
11
+ * @module services/search/query-expander
12
+ */
13
+ import type { DatabaseService } from '../storage/database/index.js';
14
+ /**
15
+ * Sanitize a single term for safe inclusion in an FTS5 OR-joined query.
16
+ *
17
+ * 1. Strips all FTS5 metacharacters
18
+ * 2. Returns empty string if the cleaned term is an FTS5 operator (AND/OR/NOT)
19
+ * to prevent accidental operator injection from corpus/table data
20
+ *
21
+ * @param term - Raw term from corpus clusters, table columns, or synonyms
22
+ * @returns Sanitized term safe for FTS5, or empty string if term should be skipped
23
+ */
24
+ export declare function sanitizeFTS5Term(term: string): string;
25
+ /**
26
+ * Get matching table column names from provenance processing_params.
27
+ * Searches for column headers that contain any of the query words.
28
+ *
29
+ * @param db - DatabaseService instance
30
+ * @param queryWords - Lowercased query words to match against column names
31
+ * @returns Array of matching column name terms
32
+ */
33
+ export declare function getTableColumnExpansionTerms(db: DatabaseService, queryWords: string[]): string[];
34
+ /**
35
+ * Query cluster top terms from the database for corpus-specific expansion.
36
+ *
37
+ * For each query word, checks if it appears in any cluster's top_terms_json
38
+ * (clusters with coherence_score > 0.3). Returns other terms from matching
39
+ * clusters as expansion candidates (max 3 per cluster match).
40
+ *
41
+ * @param db - DatabaseService instance
42
+ * @param queryWords - Lowercased query words to match against cluster terms
43
+ * @returns Map of query word -> corpus expansion terms
44
+ */
45
+ export declare function getCorpusExpansionTerms(db: DatabaseService, queryWords: string[]): Record<string, string[]>;
46
+ /**
47
+ * Get detailed expansion information for a query.
48
+ * Shows which words were expanded and what synonyms were found.
49
+ * When a database is provided, also includes corpus-driven expansion from cluster top terms.
50
+ *
51
+ * @param query - Original search query
52
+ * @param db - Optional DatabaseService for corpus-driven expansion
53
+ * @param isTableQuery - Whether the query targets table content
54
+ * @returns Expansion details: original query, new expanded terms, synonym map, corpus terms
55
+ */
56
+ export declare function getExpandedTerms(query: string, db?: DatabaseService, isTableQuery?: boolean): {
57
+ original: string;
58
+ expanded: string[];
59
+ synonyms_found: Record<string, string[]>;
60
+ corpus_terms?: Record<string, string[]>;
61
+ table_column_terms?: string[];
62
+ };
63
+ /**
64
+ * Expand query using static synonyms and optional corpus cluster terms.
65
+ * When isTableQuery is true, also expands with matching table column names.
66
+ *
67
+ * @param query - Original search query
68
+ * @param db - Optional DatabaseService for corpus-driven expansion
69
+ * @param isTableQuery - Whether the query targets table content
70
+ * @returns OR-joined expanded query string
71
+ */
72
+ export declare function expandQuery(query: string, db?: DatabaseService, isTableQuery?: boolean): string;
73
+ //# sourceMappingURL=query-expander.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"query-expander.d.ts","sourceRoot":"","sources":["../../../src/services/search/query-expander.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAQpE;;;;;;;;;GASG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAKrD;AA8BD;;;;;;;GAOG;AACH,wBAAgB,4BAA4B,CAC1C,EAAE,EAAE,eAAe,EACnB,UAAU,EAAE,MAAM,EAAE,GACnB,MAAM,EAAE,CA8CV;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CACrC,EAAE,EAAE,eAAe,EACnB,UAAU,EAAE,MAAM,EAAE,GACnB,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAgD1B;AAED;;;;;;;;;GASG;AACH,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,MAAM,EACb,EAAE,CAAC,EAAE,eAAe,EACpB,YAAY,CAAC,EAAE,OAAO,GACrB;IACD,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;IACzC,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;IACxC,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC/B,CAuCA;AAED;;;;;;;;GAQG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,eAAe,EAAE,YAAY,CAAC,EAAE,OAAO,GAAG,MAAM,CA0D/F"}
@@ -0,0 +1,281 @@
1
+ /**
2
+ * Query Expander for Legal/Medical Domain + Corpus-Driven Expansion
3
+ *
4
+ * Expands search queries with domain-specific synonyms and corpus cluster top terms.
5
+ * When enabled, the query "injury" also searches for "wound", "trauma", etc.
6
+ * With a database connection, also expands from cluster top_terms_json for
7
+ * corpus-specific vocabulary.
8
+ *
9
+ * CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
10
+ *
11
+ * @module services/search/query-expander
12
+ */
13
+ /** FTS5 metacharacters that must be stripped from expansion terms */
14
+ const FTS5_METACHAR_RE = /['"()*:^~+{}[\]\\;@<>#!$%&|,./`?-]/g;
15
+ /** FTS5 boolean operators that corrupt query semantics if used as literal terms */
16
+ const FTS5_OPERATOR_WORDS = new Set(['AND', 'OR', 'NOT']);
17
+ /**
18
+ * Sanitize a single term for safe inclusion in an FTS5 OR-joined query.
19
+ *
20
+ * 1. Strips all FTS5 metacharacters
21
+ * 2. Returns empty string if the cleaned term is an FTS5 operator (AND/OR/NOT)
22
+ * to prevent accidental operator injection from corpus/table data
23
+ *
24
+ * @param term - Raw term from corpus clusters, table columns, or synonyms
25
+ * @returns Sanitized term safe for FTS5, or empty string if term should be skipped
26
+ */
27
+ export function sanitizeFTS5Term(term) {
28
+ const cleaned = term.replace(FTS5_METACHAR_RE, '').trim();
29
+ if (cleaned.length === 0)
30
+ return '';
31
+ if (FTS5_OPERATOR_WORDS.has(cleaned.toUpperCase()))
32
+ return '';
33
+ return cleaned;
34
+ }
35
+ const SYNONYM_MAP = {
36
+ // Legal terms
37
+ injury: ['wound', 'trauma', 'harm', 'damage'],
38
+ accident: ['collision', 'crash', 'incident', 'wreck'],
39
+ plaintiff: ['claimant', 'complainant', 'petitioner'],
40
+ defendant: ['respondent', 'accused'],
41
+ contract: ['agreement', 'covenant', 'pact'],
42
+ negligence: ['carelessness', 'recklessness', 'fault'],
43
+ damages: ['compensation', 'restitution', 'remedy'],
44
+ testimony: ['deposition', 'declaration', 'statement', 'affidavit'],
45
+ evidence: ['exhibit', 'proof', 'documentation'],
46
+ settlement: ['resolution', 'compromise', 'accord'],
47
+ // Medical terms
48
+ fracture: ['break', 'crack', 'rupture'],
49
+ surgery: ['operation', 'procedure', 'intervention'],
50
+ diagnosis: ['assessment', 'evaluation', 'finding'],
51
+ medication: ['drug', 'prescription', 'pharmaceutical', 'medicine'],
52
+ chronic: ['persistent', 'ongoing', 'long-term', 'recurring'],
53
+ pain: ['discomfort', 'ache', 'soreness', 'agony'],
54
+ treatment: ['therapy', 'care', 'intervention', 'management'],
55
+ };
56
+ /** Max corpus expansion terms per cluster match */
57
+ const MAX_CORPUS_TERMS_PER_CLUSTER = 3;
58
+ /** Max table column terms to add per query */
59
+ const MAX_TABLE_COLUMN_TERMS = 5;
60
+ /**
61
+ * Get matching table column names from provenance processing_params.
62
+ * Searches for column headers that contain any of the query words.
63
+ *
64
+ * @param db - DatabaseService instance
65
+ * @param queryWords - Lowercased query words to match against column names
66
+ * @returns Array of matching column name terms
67
+ */
68
+ export function getTableColumnExpansionTerms(db, queryWords) {
69
+ const terms = [];
70
+ try {
71
+ const conn = db.getConnection();
72
+ const rows = conn.prepare("SELECT DISTINCT processing_params FROM provenance WHERE processing_params LIKE '%table_columns%' LIMIT 500").all();
73
+ const allColumns = new Set();
74
+ for (const row of rows) {
75
+ try {
76
+ const params = JSON.parse(row.processing_params);
77
+ const cols = params.table_columns;
78
+ if (Array.isArray(cols)) {
79
+ for (const col of cols) {
80
+ if (typeof col === 'string' && col.length > 0) {
81
+ allColumns.add(col);
82
+ }
83
+ }
84
+ }
85
+ }
86
+ catch (error) {
87
+ console.error(`[query-expander] Failed to parse table_columns from processing_params: ${error instanceof Error ? error.message : String(error)}`);
88
+ }
89
+ }
90
+ // Find columns matching any query word
91
+ for (const col of allColumns) {
92
+ const colLower = col.toLowerCase();
93
+ for (const word of queryWords) {
94
+ if (colLower.includes(word) || word.includes(colLower)) {
95
+ // Add individual words from the column name, sanitized for FTS5 safety
96
+ const colWords = col.split(/\s+/)
97
+ .map(w => sanitizeFTS5Term(w))
98
+ .filter(w => w.length > 2);
99
+ terms.push(...colWords);
100
+ break;
101
+ }
102
+ }
103
+ if (terms.length >= MAX_TABLE_COLUMN_TERMS)
104
+ break;
105
+ }
106
+ }
107
+ catch (error) {
108
+ console.error(`[QueryExpander] Failed to query table column terms: ${String(error)}`);
109
+ }
110
+ return [...new Set(terms)].slice(0, MAX_TABLE_COLUMN_TERMS);
111
+ }
112
+ /**
113
+ * Query cluster top terms from the database for corpus-specific expansion.
114
+ *
115
+ * For each query word, checks if it appears in any cluster's top_terms_json
116
+ * (clusters with coherence_score > 0.3). Returns other terms from matching
117
+ * clusters as expansion candidates (max 3 per cluster match).
118
+ *
119
+ * @param db - DatabaseService instance
120
+ * @param queryWords - Lowercased query words to match against cluster terms
121
+ * @returns Map of query word -> corpus expansion terms
122
+ */
123
+ export function getCorpusExpansionTerms(db, queryWords) {
124
+ const corpusTerms = {};
125
+ try {
126
+ const conn = db.getConnection();
127
+ const rows = conn
128
+ .prepare('SELECT top_terms_json FROM clusters WHERE top_terms_json IS NOT NULL AND coherence_score > 0.3 LIMIT 200')
129
+ .all();
130
+ if (rows.length === 0)
131
+ return corpusTerms;
132
+ // Parse all cluster top terms
133
+ const clusterTermSets = [];
134
+ for (const row of rows) {
135
+ try {
136
+ const terms = JSON.parse(row.top_terms_json);
137
+ if (Array.isArray(terms) && terms.length > 0) {
138
+ clusterTermSets.push(terms.map((t) => String(t).toLowerCase()));
139
+ }
140
+ }
141
+ catch (error) {
142
+ console.error(`[query-expander] Failed to parse cluster top_terms_json: ${error instanceof Error ? error.message : String(error)}`);
143
+ }
144
+ }
145
+ // For each query word, find matching clusters and extract expansion terms
146
+ for (const word of queryWords) {
147
+ const expansions = [];
148
+ for (const clusterTerms of clusterTermSets) {
149
+ if (clusterTerms.includes(word)) {
150
+ // Add other terms from this cluster as expansions (max 3)
151
+ const otherTerms = clusterTerms
152
+ .filter((t) => t !== word && !queryWords.includes(t))
153
+ .slice(0, MAX_CORPUS_TERMS_PER_CLUSTER);
154
+ expansions.push(...otherTerms);
155
+ }
156
+ }
157
+ if (expansions.length > 0) {
158
+ // Deduplicate
159
+ corpusTerms[word] = [...new Set(expansions)];
160
+ }
161
+ }
162
+ }
163
+ catch (error) {
164
+ console.error(`[QueryExpander] Failed to query corpus expansion terms: ${String(error)}`);
165
+ }
166
+ return corpusTerms;
167
+ }
168
+ /**
169
+ * Get detailed expansion information for a query.
170
+ * Shows which words were expanded and what synonyms were found.
171
+ * When a database is provided, also includes corpus-driven expansion from cluster top terms.
172
+ *
173
+ * @param query - Original search query
174
+ * @param db - Optional DatabaseService for corpus-driven expansion
175
+ * @param isTableQuery - Whether the query targets table content
176
+ * @returns Expansion details: original query, new expanded terms, synonym map, corpus terms
177
+ */
178
+ export function getExpandedTerms(query, db, isTableQuery) {
179
+ const words = query
180
+ .toLowerCase()
181
+ .split(/\s+/)
182
+ .filter((w) => w.length > 0);
183
+ const synonymsFound = {};
184
+ const expanded = [];
185
+ for (const word of words) {
186
+ const synonyms = SYNONYM_MAP[word];
187
+ if (synonyms) {
188
+ synonymsFound[word] = synonyms;
189
+ expanded.push(...synonyms);
190
+ }
191
+ }
192
+ // Corpus-driven expansion from cluster top terms
193
+ let corpusTerms;
194
+ if (db) {
195
+ corpusTerms = getCorpusExpansionTerms(db, words);
196
+ for (const terms of Object.values(corpusTerms)) {
197
+ expanded.push(...terms);
198
+ }
199
+ }
200
+ // Table column expansion
201
+ let tableColumnTerms;
202
+ if (isTableQuery && db) {
203
+ tableColumnTerms = getTableColumnExpansionTerms(db, words);
204
+ expanded.push(...tableColumnTerms);
205
+ }
206
+ return {
207
+ original: query,
208
+ expanded,
209
+ synonyms_found: synonymsFound,
210
+ ...(corpusTerms && Object.keys(corpusTerms).length > 0 ? { corpus_terms: corpusTerms } : {}),
211
+ ...(tableColumnTerms && tableColumnTerms.length > 0 ? { table_column_terms: tableColumnTerms } : {}),
212
+ };
213
+ }
214
+ /**
215
+ * Expand query using static synonyms and optional corpus cluster terms.
216
+ * When isTableQuery is true, also expands with matching table column names.
217
+ *
218
+ * @param query - Original search query
219
+ * @param db - Optional DatabaseService for corpus-driven expansion
220
+ * @param isTableQuery - Whether the query targets table content
221
+ * @returns OR-joined expanded query string
222
+ */
223
+ export function expandQuery(query, db, isTableQuery) {
224
+ const words = query
225
+ .toLowerCase()
226
+ .split(/\s+/)
227
+ .filter((w) => w.length > 0);
228
+ // Sanitize every term (including original query words) before adding to the set.
229
+ // This ensures the final OR-joined string is a valid FTS5 expression with no
230
+ // metacharacters or accidental operator words from any source.
231
+ const expanded = new Set();
232
+ for (const w of words) {
233
+ const safe = sanitizeFTS5Term(w);
234
+ if (safe.length > 0)
235
+ expanded.add(safe);
236
+ }
237
+ for (const word of words) {
238
+ const synonyms = SYNONYM_MAP[word];
239
+ if (synonyms) {
240
+ for (const syn of synonyms) {
241
+ const safe = sanitizeFTS5Term(syn);
242
+ if (safe.length > 0)
243
+ expanded.add(safe);
244
+ }
245
+ }
246
+ }
247
+ // Corpus-driven expansion from cluster top terms.
248
+ // Corpus terms may be multi-word (e.g. "patient records") -- split into
249
+ // individual words so each becomes a separate OR operand.
250
+ if (db) {
251
+ const corpusTerms = getCorpusExpansionTerms(db, words);
252
+ for (const terms of Object.values(corpusTerms)) {
253
+ for (const term of terms) {
254
+ for (const part of term.split(/\s+/)) {
255
+ const safe = sanitizeFTS5Term(part);
256
+ if (safe.length > 0)
257
+ expanded.add(safe);
258
+ }
259
+ }
260
+ }
261
+ }
262
+ // Table column expansion for table-related queries
263
+ if (isTableQuery && db) {
264
+ const tableTerms = getTableColumnExpansionTerms(db, words);
265
+ for (const term of tableTerms) {
266
+ for (const part of term.toLowerCase().split(/\s+/)) {
267
+ const safe = sanitizeFTS5Term(part);
268
+ if (safe.length > 0)
269
+ expanded.add(safe);
270
+ }
271
+ }
272
+ }
273
+ const terms = [...expanded];
274
+ if (terms.length > 20) {
275
+ // Cap expanded terms to prevent query dilution
276
+ // Keep the first 20 terms (original words come first, then synonyms, then corpus terms)
277
+ return terms.slice(0, 20).join(' OR ');
278
+ }
279
+ return terms.join(' OR ');
280
+ }
281
+ //# sourceMappingURL=query-expander.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"query-expander.js","sourceRoot":"","sources":["../../../src/services/search/query-expander.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAIH,qEAAqE;AACrE,MAAM,gBAAgB,GAAG,qCAAqC,CAAC;AAE/D,mFAAmF;AACnF,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;AAE1D;;;;;;;;;GASG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1D,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACpC,IAAI,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;QAAE,OAAO,EAAE,CAAC;IAC9D,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,WAAW,GAA6B;IAC5C,cAAc;IACd,MAAM,EAAE,CAAC,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC;IAC7C,QAAQ,EAAE,CAAC,WAAW,EAAE,OAAO,EAAE,UAAU,EAAE,OAAO,CAAC;IACrD,SAAS,EAAE,CAAC,UAAU,EAAE,aAAa,EAAE,YAAY,CAAC;IACpD,SAAS,EAAE,CAAC,YAAY,EAAE,SAAS,CAAC;IACpC,QAAQ,EAAE,CAAC,WAAW,EAAE,UAAU,EAAE,MAAM,CAAC;IAC3C,UAAU,EAAE,CAAC,cAAc,EAAE,cAAc,EAAE,OAAO,CAAC;IACrD,OAAO,EAAE,CAAC,cAAc,EAAE,aAAa,EAAE,QAAQ,CAAC;IAClD,SAAS,EAAE,CAAC,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,WAAW,CAAC;IAClE,QAAQ,EAAE,CAAC,SAAS,EAAE,OAAO,EAAE,eAAe,CAAC;IAC/C,UAAU,EAAE,CAAC,YAAY,EAAE,YAAY,EAAE,QAAQ,CAAC;IAClD,gBAAgB;IAChB,QAAQ,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,SAAS,CAAC;IACvC,OAAO,EAAE,CAAC,WAAW,EAAE,WAAW,EAAE,cAAc,CAAC;IACnD,SAAS,EAAE,CAAC,YAAY,EAAE,YAAY,EAAE,SAAS,CAAC;IAClD,UAAU,EAAE,CAAC,MAAM,EAAE,cAAc,EAAE,gBAAgB,EAAE,UAAU,CAAC;IAClE,OAAO,EAAE,CAAC,YAAY,EAAE,SAAS,EAAE,WAAW,EAAE,WAAW,CAAC;IAC5D,IAAI,EAAE,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,CAAC;IACjD,SAAS,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,cAAc,EAAE,YAAY,CAAC;CAC7D,CAAC;AAEF,mDAAmD;AACnD,MAAM,4BAA4B,GAAG,CAAC,CAAC;AAEvC,8CAA8C;AAC9C,MAAM,sBAAsB,GAAG,CAAC,CAAC;AAEjC;;;;;;;GAOG;AACH,MAAM,UAAU,4BAA4B,CAC1C,EAAmB,EACnB,UAAoB;IAEpB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,EAAE,CAAC,aAAa,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CACvB,4GAA4G,CAC7G,CAAC,GAAG,EAA0C,CAAC;QAEhD,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;QACrC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,iBAAiB,CAA4B,CAAC;gBAC5E,MAAM,IAAI,GAAG,MAAM,CAAC,aAAa,CAAC;gBAClC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;oBACxB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;wBACvB,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;4BAC9C,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;wBACtB,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,KAAK,CAAC,0EAA0E,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACpJ,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;YAC7B,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;YACnC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;gBAC9B,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACvD,uEAAuE;oBACvE,MAAM,QAAQ,GAAG,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC;yBAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC;yBAC7B,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBAC7B,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,CAAC;oBACxB,MAAM;gBACR,CAAC;YACH,CAAC;YACD,IAAI,KAAK,CAAC,MAAM,IAAI,sBAAsB;gBAAE,MAAM;QACpD,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,uDAAuD,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACxF,CAAC;IAED,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,sBAAsB,CAAC,CAAC;AAC9D,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,uBAAuB,CACrC,EAAmB,EACnB,UAAoB;IAEpB,MAAM,WAAW,GAA6B,EAAE,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,EAAE,CAAC,aAAa,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI;aACd,OAAO,CACN,0GAA0G,CAC3G;aACA,GAAG,EAAuC,CAAC;QAE9C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,WAAW,CAAC;QAE1C,8BAA8B;QAC9B,MAAM,eAAe,GAAe,EAAE,CAAC;QACvC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;gBAC7C,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC7C,eAAe,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAU,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;gBAC3E,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,KAAK,CAAC,4DAA4D,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACtI,CAAC;QACH,CAAC;QAED,0EAA0E;QAC1E,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,UAAU,GAAa,EAAE,CAAC;YAChC,KAAK,MAAM,YAAY,IAAI,eAAe,EAAE,CAAC;gBAC3C,IAAI,YAAY,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAChC,0DAA0D;oBAC1D,MAAM,UAAU,GAAG,YAAY;yBAC5B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;yBACpD,KAAK,CAAC,CAAC,EAAE,4BAA4B,CAAC,CAAC;oBAC1C,UAAU,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;gBACjC,CAAC;YACH,CAAC;YACD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,cAAc;gBACd,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC;YAC/C,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,2DAA2D,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC5F,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,gBAAgB,CAC9B,KAAa,EACb,EAAoB,EACpB,YAAsB;IAQtB,MAAM,KAAK,GAAG,KAAK;SAChB,WAAW,EAAE;SACb,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC/B,MAAM,aAAa,GAA6B,EAAE,CAAC;IACnD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,QAAQ,EAAE,CAAC;YACb,aAAa,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC;YAC/B,QAAQ,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,iDAAiD;IACjD,IAAI,WAAiD,CAAC;IACtD,IAAI,EAAE,EAAE,CAAC;QACP,WAAW,GAAG,uBAAuB,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QACjD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,CAAC;YAC/C,QAAQ,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,yBAAyB;IACzB,IAAI,gBAAsC,CAAC;IAC3C,IAAI,YAAY,IAAI,EAAE,EAAE,CAAC;QACvB,gBAAgB,GAAG,4BAA4B,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QAC3D,QAAQ,CAAC,IAAI,CAAC,GAAG,gBAAgB,CAAC,CAAC;IACrC,CAAC;IAED,OAAO;QACL,QAAQ,EAAE,KAAK;QACf,QAAQ;QACR,cAAc,EAAE,aAAa;QAC7B,GAAG,CAAC,WAAW,IAAI,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5F,GAAG,CAAC,gBAAgB,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACrG,CAAC;AACJ,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,WAAW,CAAC,KAAa,EAAE,EAAoB,EAAE,YAAsB;IACrF,MAAM,KAAK,GAAG,KAAK;SAChB,WAAW,EAAE;SACb,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE/B,iFAAiF;IACjF,6EAA6E;IAC7E,+DAA+D;IAC/D,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,IAAI,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;QACjC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC1C,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,QAAQ,EAAE,CAAC;YACb,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;gBAC3B,MAAM,IAAI,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC;gBACnC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;oBAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC;IACH,CAAC;IAED,kDAAkD;IAClD,wEAAwE;IACxE,0DAA0D;IAC1D,IAAI,EAAE,EAAE,CAAC;QACP,MAAM,WAAW,GAAG,uBAAuB,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QACvD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,CAAC;YAC/C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;oBACrC,MAAM,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;oBACpC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;wBAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;gBAC1C,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,mDAAmD;IACnD,IAAI,YAAY,IAAI,EAAE,EAAE,CAAC;QACvB,MAAM,UAAU,GAAG,4BAA4B,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QAC3D,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBACnD,MAAM,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;gBACpC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;oBAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;IAC5B,IAAI,KAAK,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QACtB,+CAA+C;QAC/C,wFAAwF;QACxF,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC"}
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Local Cross-Encoder Search Re-ranker
3
+ *
4
+ * Re-ranks search results using a local cross-encoder model (ms-marco-MiniLM-L-12-v2)
5
+ * via the Python reranker worker. NO Gemini/cloud dependency.
6
+ *
7
+ * If the local model is not available (sentence-transformers not installed, model not
8
+ * downloaded, Python error), returns results as-is with a console.error() warning.
9
+ *
10
+ * CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
11
+ *
12
+ * @module services/search/reranker
13
+ */
14
+ /**
15
+ * Re-rank search results using a local cross-encoder model.
16
+ *
17
+ * Takes the top results (max 20), sends them to the local Python cross-encoder
18
+ * for relevance scoring, and returns sorted results.
19
+ *
20
+ * If the local model is unavailable, returns the original results in their existing
21
+ * order with reasoning indicating no reranking was performed.
22
+ *
23
+ * @param query - The original search query
24
+ * @param results - Search results with original_text field
25
+ * @param maxResults - Maximum results to return after re-ranking (default: 10)
26
+ * @returns Re-ranked results with scores and reasoning
27
+ */
28
+ export declare function rerankResults(query: string, results: Array<{
29
+ original_text: string;
30
+ [key: string]: unknown;
31
+ }>, maxResults?: number): Promise<Array<{
32
+ original_index: number;
33
+ relevance_score: number;
34
+ reasoning: string;
35
+ }>>;
36
+ /**
37
+ * Build a re-rank prompt string (legacy helper, used only by unit tests).
38
+ *
39
+ * @param query - Search query
40
+ * @param excerpts - Array of text excerpts
41
+ * @returns Formatted prompt string
42
+ */
43
+ export declare function buildRerankPrompt(query: string, excerpts: string[]): string;
44
+ //# sourceMappingURL=reranker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"reranker.d.ts","sourceRoot":"","sources":["../../../src/services/search/reranker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAIH;;;;;;;;;;;;;GAaG;AACH,wBAAsB,aAAa,CACjC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,KAAK,CAAC;IAAE,aAAa,EAAE,MAAM,CAAC;IAAC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CAAE,CAAC,EACjE,UAAU,GAAE,MAAW,GACtB,OAAO,CAAC,KAAK,CAAC;IAAE,cAAc,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CA8DxF;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAC/B,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAAE,GACjB,MAAM,CAaR"}
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Local Cross-Encoder Search Re-ranker
3
+ *
4
+ * Re-ranks search results using a local cross-encoder model (ms-marco-MiniLM-L-12-v2)
5
+ * via the Python reranker worker. NO Gemini/cloud dependency.
6
+ *
7
+ * If the local model is not available (sentence-transformers not installed, model not
8
+ * downloaded, Python error), returns results as-is with a console.error() warning.
9
+ *
10
+ * CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
11
+ *
12
+ * @module services/search/reranker
13
+ */
14
+ import { localRerank } from './local-reranker.js';
15
+ /**
16
+ * Re-rank search results using a local cross-encoder model.
17
+ *
18
+ * Takes the top results (max 20), sends them to the local Python cross-encoder
19
+ * for relevance scoring, and returns sorted results.
20
+ *
21
+ * If the local model is unavailable, returns the original results in their existing
22
+ * order with reasoning indicating no reranking was performed.
23
+ *
24
+ * @param query - The original search query
25
+ * @param results - Search results with original_text field
26
+ * @param maxResults - Maximum results to return after re-ranking (default: 10)
27
+ * @returns Re-ranked results with scores and reasoning
28
+ */
29
+ export async function rerankResults(query, results, maxResults = 10) {
30
+ if (results.length === 0)
31
+ return [];
32
+ // Take top results to re-rank (max 20 to stay within token limits)
33
+ const toRerank = results.slice(0, Math.min(results.length, 20));
34
+ // Build passages for the local reranker
35
+ const passages = toRerank.map((r, i) => {
36
+ let originalScore = 0;
37
+ if (typeof r.bm25_score === 'number') {
38
+ originalScore = r.bm25_score;
39
+ }
40
+ else if (typeof r.score === 'number') {
41
+ originalScore = r.score;
42
+ }
43
+ return { index: i, text: String(r.original_text), original_score: originalScore };
44
+ });
45
+ const localResults = await localRerank(query, passages);
46
+ if (localResults === null) {
47
+ // Local model not available - return results as-is in original order
48
+ console.error('[reranker] Local cross-encoder unavailable. Returning results without reranking. ' +
49
+ 'Install sentence-transformers for local reranking: pip install sentence-transformers');
50
+ return toRerank
51
+ .slice(0, maxResults)
52
+ .map((_, i) => ({
53
+ original_index: i,
54
+ relevance_score: 0,
55
+ reasoning: 'local cross-encoder unavailable, original order preserved',
56
+ }));
57
+ }
58
+ // Filter to valid indices only
59
+ const validResults = localResults.filter((r) => r.index >= 0 && r.index < toRerank.length);
60
+ if (validResults.length === 0 && toRerank.length > 0) {
61
+ console.error(`[reranker] Cross-encoder returned 0 valid results from ${toRerank.length} inputs. ` +
62
+ 'Returning results without reranking.');
63
+ return toRerank
64
+ .slice(0, maxResults)
65
+ .map((_, i) => ({
66
+ original_index: i,
67
+ relevance_score: 0,
68
+ reasoning: 'cross-encoder returned no valid results, original order preserved',
69
+ }));
70
+ }
71
+ // Sort by relevance score descending, take maxResults
72
+ return validResults
73
+ .sort((a, b) => b.relevance_score - a.relevance_score)
74
+ .slice(0, maxResults)
75
+ .map((r) => ({
76
+ original_index: r.index,
77
+ relevance_score: r.relevance_score,
78
+ reasoning: `cross-encoder score: ${r.relevance_score.toFixed(4)}`,
79
+ }));
80
+ }
81
+ /**
82
+ * Build a re-rank prompt string (legacy helper, used only by unit tests).
83
+ *
84
+ * @param query - Search query
85
+ * @param excerpts - Array of text excerpts
86
+ * @returns Formatted prompt string
87
+ */
88
+ export function buildRerankPrompt(query, excerpts) {
89
+ const formattedExcerpts = excerpts
90
+ .map((text, i) => `[${i}] ${text.slice(0, 500)}`)
91
+ .join('\n\n');
92
+ return `You are a legal document search relevance expert. Given a search query and a list of document excerpts, score each excerpt's relevance to the query on a scale of 0-10.
93
+
94
+ Query: "${query}"
95
+
96
+ Excerpts:
97
+ ${formattedExcerpts}
98
+
99
+ Score each excerpt's relevance to the query. Return a JSON object with a "rankings" array containing objects with "index" (number), "relevance_score" (0-10), and "reasoning" (string).`;
100
+ }
101
+ //# sourceMappingURL=reranker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"reranker.js","sourceRoot":"","sources":["../../../src/services/search/reranker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAElD;;;;;;;;;;;;;GAaG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,KAAa,EACb,OAAiE,EACjE,aAAqB,EAAE;IAEvB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,mEAAmE;IACnE,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC;IAEhE,wCAAwC;IACxC,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACrC,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,OAAO,CAAC,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;YACrC,aAAa,GAAG,CAAC,CAAC,UAAU,CAAC;QAC/B,CAAC;aAAM,IAAI,OAAO,CAAC,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACvC,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC;QAC1B,CAAC;QACD,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,aAAa,CAAC,EAAE,cAAc,EAAE,aAAa,EAAE,CAAC;IACpF,CAAC,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,MAAM,WAAW,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IAExD,IAAI,YAAY,KAAK,IAAI,EAAE,CAAC;QAC1B,qEAAqE;QACrE,OAAO,CAAC,KAAK,CACX,mFAAmF;YACnF,sFAAsF,CACvF,CAAC;QACF,OAAO,QAAQ;aACZ,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC;aACpB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YACd,cAAc,EAAE,CAAC;YACjB,eAAe,EAAE,CAAC;YAClB,SAAS,EAAE,2DAA2D;SACvE,CAAC,CAAC,CAAC;IACR,CAAC;IAED,+BAA+B;IAC/B,MAAM,YAAY,GAAG,YAAY,CAAC,MAAM,CACtC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,GAAG,QAAQ,CAAC,MAAM,CACjD,CAAC;IAEF,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrD,OAAO,CAAC,KAAK,CACX,0DAA0D,QAAQ,CAAC,MAAM,WAAW;YACpF,sCAAsC,CACvC,CAAC;QACF,OAAO,QAAQ;aACZ,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC;aACpB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YACd,cAAc,EAAE,CAAC;YACjB,eAAe,EAAE,CAAC;YAClB,SAAS,EAAE,mEAAmE;SAC/E,CAAC,CAAC,CAAC;IACR,CAAC;IAED,sDAAsD;IACtD,OAAO,YAAY;SAChB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe,GAAG,CAAC,CAAC,eAAe,CAAC;SACrD,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC;SACpB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACX,cAAc,EAAE,CAAC,CAAC,KAAK;QACvB,eAAe,EAAE,CAAC,CAAC,eAAe;QAClC,SAAS,EAAE,wBAAwB,CAAC,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;KAClE,CAAC,CAAC,CAAC;AACR,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAC/B,KAAa,EACb,QAAkB;IAElB,MAAM,iBAAiB,GAAG,QAAQ;SAC/B,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;SAChD,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,OAAO;;UAEC,KAAK;;;EAGb,iBAAiB;;wLAEqK,CAAC;AACzL,CAAC"}
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Annotation Database Operations
3
+ *
4
+ * CRUD operations for document annotations with threading support.
5
+ * Annotations can be attached to documents or specific chunks, with
6
+ * optional page numbers and threaded replies via parent_id.
7
+ *
8
+ * CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
9
+ *
10
+ * @module database/annotation-operations
11
+ */
12
+ import type Database from 'better-sqlite3';
13
+ export interface CreateAnnotationParams {
14
+ document_id: string;
15
+ user_id?: string | null;
16
+ chunk_id?: string | null;
17
+ page_number?: number | null;
18
+ annotation_type: string;
19
+ content: string;
20
+ parent_id?: string | null;
21
+ }
22
+ export interface AnnotationRow {
23
+ id: string;
24
+ document_id: string;
25
+ user_id: string | null;
26
+ chunk_id: string | null;
27
+ page_number: number | null;
28
+ annotation_type: string;
29
+ content: string;
30
+ status: string;
31
+ parent_id: string | null;
32
+ created_at: string;
33
+ updated_at: string;
34
+ }
35
+ export interface ListAnnotationsOptions {
36
+ document_id: string;
37
+ annotation_type?: string;
38
+ status?: string;
39
+ user_id?: string;
40
+ page_number?: number;
41
+ limit?: number;
42
+ offset?: number;
43
+ }
44
+ /**
45
+ * Create a new annotation on a document or chunk.
46
+ *
47
+ * @param conn - Database connection
48
+ * @param params - Annotation creation parameters
49
+ * @returns The created annotation row
50
+ * @throws Error if document, chunk, or parent annotation not found
51
+ */
52
+ export declare function createAnnotation(conn: Database.Database, params: CreateAnnotationParams): AnnotationRow;
53
+ /**
54
+ * Get an annotation by ID.
55
+ *
56
+ * @param conn - Database connection
57
+ * @param id - Annotation ID
58
+ * @returns The annotation row or null if not found
59
+ */
60
+ export declare function getAnnotation(conn: Database.Database, id: string): AnnotationRow | null;
61
+ /**
62
+ * Get an annotation with its threaded replies.
63
+ *
64
+ * @param conn - Database connection
65
+ * @param id - Annotation ID
66
+ * @returns The annotation and its replies, or null if not found
67
+ */
68
+ export declare function getAnnotationWithThread(conn: Database.Database, id: string): {
69
+ annotation: AnnotationRow;
70
+ replies: AnnotationRow[];
71
+ } | null;
72
+ /**
73
+ * List annotations for a document with optional filters.
74
+ *
75
+ * @param conn - Database connection
76
+ * @param opts - Filter and pagination options
77
+ * @returns Paginated annotations and total count
78
+ */
79
+ export declare function listAnnotations(conn: Database.Database, opts: ListAnnotationsOptions): {
80
+ annotations: AnnotationRow[];
81
+ total: number;
82
+ };
83
+ /**
84
+ * Update an annotation's content and/or status.
85
+ *
86
+ * @param conn - Database connection
87
+ * @param id - Annotation ID
88
+ * @param updates - Fields to update (content and/or status)
89
+ * @returns The updated annotation row
90
+ * @throws Error if annotation not found
91
+ */
92
+ export declare function updateAnnotation(conn: Database.Database, id: string, updates: {
93
+ content?: string;
94
+ status?: string;
95
+ }): AnnotationRow;
96
+ /**
97
+ * Delete an annotation by ID.
98
+ * Child annotations (replies) are cascade-deleted by the FK constraint.
99
+ *
100
+ * @param conn - Database connection
101
+ * @param id - Annotation ID
102
+ * @throws Error if annotation not found
103
+ */
104
+ export declare function deleteAnnotation(conn: Database.Database, id: string): void;
105
+ /**
106
+ * Get annotation summary statistics for a document.
107
+ *
108
+ * @param conn - Database connection
109
+ * @param documentId - Document ID
110
+ * @returns Summary with counts by type, status, and totals
111
+ */
112
+ export declare function getAnnotationSummary(conn: Database.Database, documentId: string): Record<string, unknown>;
113
+ //# sourceMappingURL=annotation-operations.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"annotation-operations.d.ts","sourceRoot":"","sources":["../../../../src/services/storage/database/annotation-operations.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,QAAQ,MAAM,gBAAgB,CAAC;AAO3C,MAAM,WAAW,sBAAsB;IACrC,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,eAAe,EAAE,MAAM,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAED,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,eAAe,EAAE,MAAM,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,sBAAsB;IACrC,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAMD;;;;;;;GAOG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,sBAAsB,GAAG,aAAa,CA2CvG;AAED;;;;;;GAMG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC,QAAQ,EAAE,EAAE,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAEvF;AAED;;;;;;GAMG;AACH,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,QAAQ,CAAC,QAAQ,EACvB,EAAE,EAAE,MAAM,GACT;IAAE,UAAU,EAAE,aAAa,CAAC;IAAC,OAAO,EAAE,aAAa,EAAE,CAAA;CAAE,GAAG,IAAI,CAShE;AAED;;;;;;GAMG;AACH,wBAAgB,eAAe,CAC7B,IAAI,EAAE,QAAQ,CAAC,QAAQ,EACvB,IAAI,EAAE,sBAAsB,GAC3B;IAAE,WAAW,EAAE,aAAa,EAAE,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAmCjD;AAED;;;;;;;;GAQG;AACH,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,QAAQ,CAAC,QAAQ,EACvB,EAAE,EAAE,MAAM,EACV,OAAO,EAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAC7C,aAAa,CAsBf;AAED;;;;;;;GAOG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,QAAQ,CAAC,QAAQ,EAAE,EAAE,EAAE,MAAM,GAAG,IAAI,CAM1E;AAED;;;;;;GAMG;AACH,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,QAAQ,CAAC,QAAQ,EACvB,UAAU,EAAE,MAAM,GACjB,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CA2BzB"}