@atomicmemory/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (589) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/LICENSE +201 -0
  3. package/README.md +314 -0
  4. package/dist/app/bind-ephemeral.d.ts +18 -0
  5. package/dist/app/bind-ephemeral.js +22 -0
  6. package/dist/app/cors-headers.d.ts +12 -0
  7. package/dist/app/cors-headers.js +18 -0
  8. package/dist/app/create-app.d.ts +25 -0
  9. package/dist/app/create-app.js +156 -0
  10. package/dist/app/runtime-config-route-snapshot.d.ts +27 -0
  11. package/dist/app/runtime-config-route-snapshot.js +27 -0
  12. package/dist/app/runtime-container.d.ts +281 -0
  13. package/dist/app/runtime-container.js +297 -0
  14. package/dist/app/startup-checks.d.ts +28 -0
  15. package/dist/app/startup-checks.js +45 -0
  16. package/dist/bin.d.ts +17 -0
  17. package/dist/bin.js +128 -0
  18. package/dist/config.d.ts +680 -0
  19. package/dist/config.js +808 -0
  20. package/dist/db/agent-trust-repository.d.ts +49 -0
  21. package/dist/db/agent-trust-repository.js +66 -0
  22. package/dist/db/belief-edges-repository.d.ts +68 -0
  23. package/dist/db/belief-edges-repository.js +124 -0
  24. package/dist/db/claim-repository.d.ts +6 -0
  25. package/dist/db/claim-repository.js +4 -0
  26. package/dist/db/contradictions-repository.d.ts +56 -0
  27. package/dist/db/contradictions-repository.js +88 -0
  28. package/dist/db/document-chunk-repository.d.ts +48 -0
  29. package/dist/db/document-chunk-repository.js +145 -0
  30. package/dist/db/document-chunk-types.d.ts +35 -0
  31. package/dist/db/document-chunk-types.js +9 -0
  32. package/dist/db/document-list-cursor.d.ts +45 -0
  33. package/dist/db/document-list-cursor.js +111 -0
  34. package/dist/db/document-list-repository.d.ts +103 -0
  35. package/dist/db/document-list-repository.js +204 -0
  36. package/dist/db/entity-cards-repository.d.ts +37 -0
  37. package/dist/db/entity-cards-repository.js +46 -0
  38. package/dist/db/entity-values-repository.d.ts +26 -0
  39. package/dist/db/entity-values-repository.js +57 -0
  40. package/dist/db/link-repository.d.ts +30 -0
  41. package/dist/db/link-repository.js +54 -0
  42. package/dist/db/memory-repository.d.ts +163 -0
  43. package/dist/db/memory-repository.js +232 -0
  44. package/dist/db/migrate.d.ts +6 -0
  45. package/dist/db/migrate.js +36 -0
  46. package/dist/db/mmr.d.ts +14 -0
  47. package/dist/db/mmr.js +57 -0
  48. package/dist/db/passport-feed-repository.d.ts +91 -0
  49. package/dist/db/passport-feed-repository.js +198 -0
  50. package/dist/db/pg-episode-store.d.ts +19 -0
  51. package/dist/db/pg-episode-store.js +17 -0
  52. package/dist/db/pg-link-store.d.ts +17 -0
  53. package/dist/db/pg-link-store.js +14 -0
  54. package/dist/db/pg-memory-store.d.ts +68 -0
  55. package/dist/db/pg-memory-store.js +53 -0
  56. package/dist/db/pg-recap-store.d.ts +13 -0
  57. package/dist/db/pg-recap-store.js +19 -0
  58. package/dist/db/pg-representation-store.d.ts +17 -0
  59. package/dist/db/pg-representation-store.js +17 -0
  60. package/dist/db/pg-search-store.d.ts +29 -0
  61. package/dist/db/pg-search-store.js +47 -0
  62. package/dist/db/pool.d.ts +5 -0
  63. package/dist/db/pool.js +21 -0
  64. package/dist/db/ppr.d.ts +56 -0
  65. package/dist/db/ppr.js +178 -0
  66. package/dist/db/query-helpers.d.ts +44 -0
  67. package/dist/db/query-helpers.js +60 -0
  68. package/dist/db/raw-doc-artifact-sync.d.ts +128 -0
  69. package/dist/db/raw-doc-artifact-sync.js +259 -0
  70. package/dist/db/raw-document-blob-repository.d.ts +148 -0
  71. package/dist/db/raw-document-blob-repository.js +300 -0
  72. package/dist/db/raw-document-repository.d.ts +104 -0
  73. package/dist/db/raw-document-repository.js +410 -0
  74. package/dist/db/raw-document-status-repository.d.ts +122 -0
  75. package/dist/db/raw-document-status-repository.js +183 -0
  76. package/dist/db/raw-document-types.d.ts +236 -0
  77. package/dist/db/raw-document-types.js +10 -0
  78. package/dist/db/raw-storage-reconciliation-repository.d.ts +110 -0
  79. package/dist/db/raw-storage-reconciliation-repository.js +200 -0
  80. package/dist/db/reflection-jobs-repository.d.ts +33 -0
  81. package/dist/db/reflection-jobs-repository.js +48 -0
  82. package/dist/db/reflections-repository.d.ts +41 -0
  83. package/dist/db/reflections-repository.js +83 -0
  84. package/dist/db/repository-claims.d.ts +141 -0
  85. package/dist/db/repository-claims.js +376 -0
  86. package/dist/db/repository-deferred-audn.d.ts +33 -0
  87. package/dist/db/repository-deferred-audn.js +69 -0
  88. package/dist/db/repository-document-delete.d.ts +53 -0
  89. package/dist/db/repository-document-delete.js +156 -0
  90. package/dist/db/repository-entities.d.ts +114 -0
  91. package/dist/db/repository-entities.js +317 -0
  92. package/dist/db/repository-entity-attributes.d.ts +41 -0
  93. package/dist/db/repository-entity-attributes.js +65 -0
  94. package/dist/db/repository-entity-graph.d.ts +32 -0
  95. package/dist/db/repository-entity-graph.js +87 -0
  96. package/dist/db/repository-first-mentions.d.ts +41 -0
  97. package/dist/db/repository-first-mentions.js +79 -0
  98. package/dist/db/repository-lessons.d.ts +51 -0
  99. package/dist/db/repository-lessons.js +90 -0
  100. package/dist/db/repository-links.d.ts +26 -0
  101. package/dist/db/repository-links.js +105 -0
  102. package/dist/db/repository-observation.d.ts +26 -0
  103. package/dist/db/repository-observation.js +51 -0
  104. package/dist/db/repository-read.d.ts +56 -0
  105. package/dist/db/repository-read.js +271 -0
  106. package/dist/db/repository-recaps.d.ts +59 -0
  107. package/dist/db/repository-recaps.js +158 -0
  108. package/dist/db/repository-representations.d.ts +48 -0
  109. package/dist/db/repository-representations.js +162 -0
  110. package/dist/db/repository-temporal-state.d.ts +35 -0
  111. package/dist/db/repository-temporal-state.js +46 -0
  112. package/dist/db/repository-tll.d.ts +88 -0
  113. package/dist/db/repository-tll.js +179 -0
  114. package/dist/db/repository-types.d.ts +313 -0
  115. package/dist/db/repository-types.js +142 -0
  116. package/dist/db/repository-user-profiles.d.ts +17 -0
  117. package/dist/db/repository-user-profiles.js +28 -0
  118. package/dist/db/repository-vector-search.d.ts +33 -0
  119. package/dist/db/repository-vector-search.js +373 -0
  120. package/dist/db/repository-wipe.d.ts +34 -0
  121. package/dist/db/repository-wipe.js +94 -0
  122. package/dist/db/repository-write.d.ts +61 -0
  123. package/dist/db/repository-write.js +279 -0
  124. package/dist/db/schema.sql +1355 -0
  125. package/dist/db/storage-artifact-delete-tx.d.ts +56 -0
  126. package/dist/db/storage-artifact-delete-tx.js +123 -0
  127. package/dist/db/storage-artifact-providers.d.ts +21 -0
  128. package/dist/db/storage-artifact-providers.js +21 -0
  129. package/dist/db/storage-artifact-recovery-repository.d.ts +66 -0
  130. package/dist/db/storage-artifact-recovery-repository.js +58 -0
  131. package/dist/db/storage-artifact-repository.d.ts +329 -0
  132. package/dist/db/storage-artifact-repository.js +497 -0
  133. package/dist/db/stores.d.ts +220 -0
  134. package/dist/db/stores.js +12 -0
  135. package/dist/db/summaries-repository.d.ts +74 -0
  136. package/dist/db/summaries-repository.js +125 -0
  137. package/dist/eval/beam-10m-loader.d.ts +98 -0
  138. package/dist/eval/beam-10m-loader.js +128 -0
  139. package/dist/index.d.ts +18 -0
  140. package/dist/index.js +17 -0
  141. package/dist/middleware/require-bearer.d.ts +27 -0
  142. package/dist/middleware/require-bearer.js +60 -0
  143. package/dist/middleware/validate-response.d.ts +33 -0
  144. package/dist/middleware/validate-response.js +55 -0
  145. package/dist/middleware/validate.d.ts +43 -0
  146. package/dist/middleware/validate.js +85 -0
  147. package/dist/routes/agents.d.ts +13 -0
  148. package/dist/routes/agents.js +89 -0
  149. package/dist/routes/document-response-formatters.d.ts +98 -0
  150. package/dist/routes/document-response-formatters.js +243 -0
  151. package/dist/routes/documents.d.ts +74 -0
  152. package/dist/routes/documents.js +425 -0
  153. package/dist/routes/memories.d.ts +29 -0
  154. package/dist/routes/memories.js +725 -0
  155. package/dist/routes/memory-response-formatters.d.ts +179 -0
  156. package/dist/routes/memory-response-formatters.js +210 -0
  157. package/dist/routes/public-raw-storage-metadata.d.ts +54 -0
  158. package/dist/routes/public-raw-storage-metadata.js +56 -0
  159. package/dist/routes/reflect.d.ts +14 -0
  160. package/dist/routes/reflect.js +19 -0
  161. package/dist/routes/response-schema-map.d.ts +14 -0
  162. package/dist/routes/response-schema-map.js +69 -0
  163. package/dist/routes/route-errors.d.ts +12 -0
  164. package/dist/routes/route-errors.js +30 -0
  165. package/dist/routes/storage-error-handlers.d.ts +34 -0
  166. package/dist/routes/storage-error-handlers.js +185 -0
  167. package/dist/routes/storage-response-formatters.d.ts +44 -0
  168. package/dist/routes/storage-response-formatters.js +155 -0
  169. package/dist/routes/storage.d.ts +38 -0
  170. package/dist/routes/storage.js +369 -0
  171. package/dist/routes/upstream-provider-errors.d.ts +19 -0
  172. package/dist/routes/upstream-provider-errors.js +95 -0
  173. package/dist/schemas/agents.d.ts +79 -0
  174. package/dist/schemas/agents.js +126 -0
  175. package/dist/schemas/common.d.ts +110 -0
  176. package/dist/schemas/common.js +190 -0
  177. package/dist/schemas/document-list-responses.d.ts +102 -0
  178. package/dist/schemas/document-list-responses.js +87 -0
  179. package/dist/schemas/document-list-schemas.d.ts +123 -0
  180. package/dist/schemas/document-list-schemas.js +174 -0
  181. package/dist/schemas/document-response-schemas.d.ts +610 -0
  182. package/dist/schemas/document-response-schemas.js +264 -0
  183. package/dist/schemas/document-status-envelope.d.ts +48 -0
  184. package/dist/schemas/document-status-envelope.js +54 -0
  185. package/dist/schemas/documents.d.ts +292 -0
  186. package/dist/schemas/documents.js +449 -0
  187. package/dist/schemas/errors.d.ts +75 -0
  188. package/dist/schemas/errors.js +105 -0
  189. package/dist/schemas/memories.d.ts +378 -0
  190. package/dist/schemas/memories.js +542 -0
  191. package/dist/schemas/openapi.d.ts +24 -0
  192. package/dist/schemas/openapi.js +1038 -0
  193. package/dist/schemas/response-scalars.d.ts +10 -0
  194. package/dist/schemas/response-scalars.js +10 -0
  195. package/dist/schemas/responses.d.ts +536 -0
  196. package/dist/schemas/responses.js +350 -0
  197. package/dist/schemas/search-response-parts.d.ts +97 -0
  198. package/dist/schemas/search-response-parts.js +103 -0
  199. package/dist/schemas/storage-schemas.d.ts +175 -0
  200. package/dist/schemas/storage-schemas.js +277 -0
  201. package/dist/schemas/zod-setup.d.ts +15 -0
  202. package/dist/schemas/zod-setup.js +17 -0
  203. package/dist/server.d.ts +13 -0
  204. package/dist/server.js +57 -0
  205. package/dist/services/abstract-query-policy.d.ts +13 -0
  206. package/dist/services/abstract-query-policy.js +50 -0
  207. package/dist/services/affinity-clustering.d.ts +66 -0
  208. package/dist/services/affinity-clustering.js +125 -0
  209. package/dist/services/agentic-retrieval.d.ts +38 -0
  210. package/dist/services/agentic-retrieval.js +126 -0
  211. package/dist/services/answer-format.d.ts +56 -0
  212. package/dist/services/answer-format.js +118 -0
  213. package/dist/services/answer-rescue.d.ts +72 -0
  214. package/dist/services/answer-rescue.js +177 -0
  215. package/dist/services/answer-verifier.d.ts +24 -0
  216. package/dist/services/answer-verifier.js +73 -0
  217. package/dist/services/api-retry.d.ts +6 -0
  218. package/dist/services/api-retry.js +41 -0
  219. package/dist/services/assistant-turn-filter.d.ts +20 -0
  220. package/dist/services/assistant-turn-filter.js +69 -0
  221. package/dist/services/atomicmem-uri.d.ts +33 -0
  222. package/dist/services/atomicmem-uri.js +86 -0
  223. package/dist/services/audit-events.d.ts +54 -0
  224. package/dist/services/audit-events.js +56 -0
  225. package/dist/services/chunked-extraction.d.ts +21 -0
  226. package/dist/services/chunked-extraction.js +108 -0
  227. package/dist/services/claim-slotting.d.ts +27 -0
  228. package/dist/services/claim-slotting.js +38 -0
  229. package/dist/services/claude-code-llm.d.ts +19 -0
  230. package/dist/services/claude-code-llm.js +96 -0
  231. package/dist/services/composite-dedup.d.ts +50 -0
  232. package/dist/services/composite-dedup.js +153 -0
  233. package/dist/services/composite-grouping.d.ts +41 -0
  234. package/dist/services/composite-grouping.js +111 -0
  235. package/dist/services/composite-staleness.d.ts +20 -0
  236. package/dist/services/composite-staleness.js +50 -0
  237. package/dist/services/conciseness-preference.d.ts +14 -0
  238. package/dist/services/conciseness-preference.js +42 -0
  239. package/dist/services/conflict-policy.d.ts +20 -0
  240. package/dist/services/conflict-policy.js +335 -0
  241. package/dist/services/consensus-extraction.d.ts +39 -0
  242. package/dist/services/consensus-extraction.js +147 -0
  243. package/dist/services/consensus-validation.d.ts +52 -0
  244. package/dist/services/consensus-validation.js +206 -0
  245. package/dist/services/consolidation-service.d.ts +60 -0
  246. package/dist/services/consolidation-service.js +171 -0
  247. package/dist/services/content-detection.d.ts +18 -0
  248. package/dist/services/content-detection.js +25 -0
  249. package/dist/services/contradiction-surfacing.d.ts +62 -0
  250. package/dist/services/contradiction-surfacing.js +111 -0
  251. package/dist/services/cost-telemetry.d.ts +39 -0
  252. package/dist/services/cost-telemetry.js +58 -0
  253. package/dist/services/counter-evidence.d.ts +34 -0
  254. package/dist/services/counter-evidence.js +92 -0
  255. package/dist/services/current-state-ranking.d.ts +21 -0
  256. package/dist/services/current-state-ranking.js +152 -0
  257. package/dist/services/deferred-audn.d.ts +47 -0
  258. package/dist/services/deferred-audn.js +162 -0
  259. package/dist/services/document-chunker.d.ts +50 -0
  260. package/dist/services/document-chunker.js +153 -0
  261. package/dist/services/document-failure-markers.d.ts +91 -0
  262. package/dist/services/document-failure-markers.js +305 -0
  263. package/dist/services/document-indexer.d.ts +122 -0
  264. package/dist/services/document-indexer.js +405 -0
  265. package/dist/services/document-service.d.ts +245 -0
  266. package/dist/services/document-service.js +325 -0
  267. package/dist/services/document-upload-artifact-sync.d.ts +80 -0
  268. package/dist/services/document-upload-artifact-sync.js +162 -0
  269. package/dist/services/document-upload-beta2-recovery.d.ts +72 -0
  270. package/dist/services/document-upload-beta2-recovery.js +94 -0
  271. package/dist/services/document-upload.d.ts +44 -0
  272. package/dist/services/document-upload.js +353 -0
  273. package/dist/services/embedding.d.ts +57 -0
  274. package/dist/services/embedding.js +416 -0
  275. package/dist/services/entity-attribute-extractor.d.ts +34 -0
  276. package/dist/services/entity-attribute-extractor.js +117 -0
  277. package/dist/services/entity-card-synthesis.d.ts +54 -0
  278. package/dist/services/entity-card-synthesis.js +92 -0
  279. package/dist/services/entity-dedup.d.ts +9 -0
  280. package/dist/services/entity-dedup.js +14 -0
  281. package/dist/services/entity-graph.d.ts +17 -0
  282. package/dist/services/entity-graph.js +135 -0
  283. package/dist/services/entropy-gate.d.ts +52 -0
  284. package/dist/services/entropy-gate.js +56 -0
  285. package/dist/services/episode-fetcher.d.ts +47 -0
  286. package/dist/services/episode-fetcher.js +128 -0
  287. package/dist/services/event-anchor-facts.d.ts +8 -0
  288. package/dist/services/event-anchor-facts.js +205 -0
  289. package/dist/services/event-chain-detector.d.ts +52 -0
  290. package/dist/services/event-chain-detector.js +83 -0
  291. package/dist/services/extraction-cache.d.ts +9 -0
  292. package/dist/services/extraction-cache.js +54 -0
  293. package/dist/services/extraction-enrichment.d.ts +9 -0
  294. package/dist/services/extraction-enrichment.js +223 -0
  295. package/dist/services/extraction.d.ts +69 -0
  296. package/dist/services/extraction.js +596 -0
  297. package/dist/services/fact-normalization.d.ts +12 -0
  298. package/dist/services/fact-normalization.js +248 -0
  299. package/dist/services/filecoin-observability.d.ts +127 -0
  300. package/dist/services/filecoin-observability.js +200 -0
  301. package/dist/services/first-mention-service.d.ts +76 -0
  302. package/dist/services/first-mention-service.js +186 -0
  303. package/dist/services/hierarchical-retrieval.d.ts +49 -0
  304. package/dist/services/hierarchical-retrieval.js +50 -0
  305. package/dist/services/ingest-fact-pipeline.d.ts +32 -0
  306. package/dist/services/ingest-fact-pipeline.js +212 -0
  307. package/dist/services/ingest-post-write.d.ts +50 -0
  308. package/dist/services/ingest-post-write.js +117 -0
  309. package/dist/services/ingest-trace.d.ts +32 -0
  310. package/dist/services/ingest-trace.js +60 -0
  311. package/dist/services/input-sanitizer.d.ts +41 -0
  312. package/dist/services/input-sanitizer.js +135 -0
  313. package/dist/services/iterative-retrieval.d.ts +26 -0
  314. package/dist/services/iterative-retrieval.js +139 -0
  315. package/dist/services/keyword-expansion.d.ts +10 -0
  316. package/dist/services/keyword-expansion.js +26 -0
  317. package/dist/services/lesson-service.d.ts +68 -0
  318. package/dist/services/lesson-service.js +178 -0
  319. package/dist/services/literal-extractor.d.ts +16 -0
  320. package/dist/services/literal-extractor.js +74 -0
  321. package/dist/services/literal-list-protection.d.ts +17 -0
  322. package/dist/services/literal-list-protection.js +134 -0
  323. package/dist/services/literal-query-expansion.d.ts +20 -0
  324. package/dist/services/literal-query-expansion.js +181 -0
  325. package/dist/services/llm.d.ts +61 -0
  326. package/dist/services/llm.js +265 -0
  327. package/dist/services/memcell-projection.d.ts +17 -0
  328. package/dist/services/memcell-projection.js +41 -0
  329. package/dist/services/memory-audn.d.ts +43 -0
  330. package/dist/services/memory-audn.js +419 -0
  331. package/dist/services/memory-crud.d.ts +93 -0
  332. package/dist/services/memory-crud.js +255 -0
  333. package/dist/services/memory-ingest.d.ts +21 -0
  334. package/dist/services/memory-ingest.js +249 -0
  335. package/dist/services/memory-lifecycle.d.ts +75 -0
  336. package/dist/services/memory-lifecycle.js +108 -0
  337. package/dist/services/memory-lineage.d.ts +181 -0
  338. package/dist/services/memory-lineage.js +232 -0
  339. package/dist/services/memory-network.d.ts +40 -0
  340. package/dist/services/memory-network.js +75 -0
  341. package/dist/services/memory-search-types.d.ts +25 -0
  342. package/dist/services/memory-search-types.js +10 -0
  343. package/dist/services/memory-search.d.ts +48 -0
  344. package/dist/services/memory-search.js +505 -0
  345. package/dist/services/memory-service-types.d.ts +371 -0
  346. package/dist/services/memory-service-types.js +8 -0
  347. package/dist/services/memory-service.d.ts +152 -0
  348. package/dist/services/memory-service.js +225 -0
  349. package/dist/services/memory-storage.d.ts +33 -0
  350. package/dist/services/memory-storage.js +328 -0
  351. package/dist/services/msr-aggregator.d.ts +38 -0
  352. package/dist/services/msr-aggregator.js +97 -0
  353. package/dist/services/msr-detector.d.ts +35 -0
  354. package/dist/services/msr-detector.js +65 -0
  355. package/dist/services/namespace-retrieval.d.ts +60 -0
  356. package/dist/services/namespace-retrieval.js +180 -0
  357. package/dist/services/observation-date-extraction.d.ts +12 -0
  358. package/dist/services/observation-date-extraction.js +50 -0
  359. package/dist/services/observation-service.d.ts +27 -0
  360. package/dist/services/observation-service.js +84 -0
  361. package/dist/services/packaging-observability.d.ts +29 -0
  362. package/dist/services/packaging-observability.js +146 -0
  363. package/dist/services/query-expansion.d.ts +83 -0
  364. package/dist/services/query-expansion.js +242 -0
  365. package/dist/services/query-keyword-matches.d.ts +6 -0
  366. package/dist/services/query-keyword-matches.js +56 -0
  367. package/dist/services/query-term-visibility.d.ts +28 -0
  368. package/dist/services/query-term-visibility.js +100 -0
  369. package/dist/services/quick-extraction.d.ts +25 -0
  370. package/dist/services/quick-extraction.js +431 -0
  371. package/dist/services/quoted-entity-extraction.d.ts +10 -0
  372. package/dist/services/quoted-entity-extraction.js +161 -0
  373. package/dist/services/raw-storage-reconciler-backoff.d.ts +8 -0
  374. package/dist/services/raw-storage-reconciler-backoff.js +14 -0
  375. package/dist/services/raw-storage-reconciler-scheduler.d.ts +29 -0
  376. package/dist/services/raw-storage-reconciler-scheduler.js +43 -0
  377. package/dist/services/raw-storage-reconciler.d.ts +71 -0
  378. package/dist/services/raw-storage-reconciler.js +278 -0
  379. package/dist/services/recap-builder.d.ts +49 -0
  380. package/dist/services/recap-builder.js +157 -0
  381. package/dist/services/reflect-jobs.d.ts +23 -0
  382. package/dist/services/reflect-jobs.js +36 -0
  383. package/dist/services/reflect-prompts.d.ts +71 -0
  384. package/dist/services/reflect-prompts.js +99 -0
  385. package/dist/services/reflect-retrieval.d.ts +33 -0
  386. package/dist/services/reflect-retrieval.js +30 -0
  387. package/dist/services/reflect.d.ts +49 -0
  388. package/dist/services/reflect.js +84 -0
  389. package/dist/services/relative-temporal.d.ts +14 -0
  390. package/dist/services/relative-temporal.js +163 -0
  391. package/dist/services/relevance-policy.d.ts +37 -0
  392. package/dist/services/relevance-policy.js +109 -0
  393. package/dist/services/rerank.d.ts +32 -0
  394. package/dist/services/rerank.js +118 -0
  395. package/dist/services/reranker.d.ts +20 -0
  396. package/dist/services/reranker.js +99 -0
  397. package/dist/services/retrieval-channel-rules.d.ts +34 -0
  398. package/dist/services/retrieval-channel-rules.js +41 -0
  399. package/dist/services/retrieval-config-overlay.d.ts +36 -0
  400. package/dist/services/retrieval-config-overlay.js +44 -0
  401. package/dist/services/retrieval-format.d.ts +119 -0
  402. package/dist/services/retrieval-format.js +559 -0
  403. package/dist/services/retrieval-policy.d.ts +69 -0
  404. package/dist/services/retrieval-policy.js +275 -0
  405. package/dist/services/retrieval-profiles.d.ts +37 -0
  406. package/dist/services/retrieval-profiles.js +90 -0
  407. package/dist/services/retrieval-side-effects.d.ts +14 -0
  408. package/dist/services/retrieval-side-effects.js +26 -0
  409. package/dist/services/retrieval-trace.d.ts +108 -0
  410. package/dist/services/retrieval-trace.js +147 -0
  411. package/dist/services/rrf-fusion.d.ts +18 -0
  412. package/dist/services/rrf-fusion.js +34 -0
  413. package/dist/services/search-pipeline.d.ts +71 -0
  414. package/dist/services/search-pipeline.js +788 -0
  415. package/dist/services/session-date.d.ts +20 -0
  416. package/dist/services/session-date.js +61 -0
  417. package/dist/services/session-packaging.d.ts +53 -0
  418. package/dist/services/session-packaging.js +182 -0
  419. package/dist/services/session-summary-generator.d.ts +53 -0
  420. package/dist/services/session-summary-generator.js +134 -0
  421. package/dist/services/specialists/cr-specialist.d.ts +52 -0
  422. package/dist/services/specialists/cr-specialist.js +121 -0
  423. package/dist/services/specialists/dispatch.d.ts +53 -0
  424. package/dist/services/specialists/dispatch.js +102 -0
  425. package/dist/services/specialists/ie-ku-specialist.d.ts +37 -0
  426. package/dist/services/specialists/ie-ku-specialist.js +63 -0
  427. package/dist/services/specialists/msr-specialist.d.ts +61 -0
  428. package/dist/services/specialists/msr-specialist.js +162 -0
  429. package/dist/services/specialists/tr-specialist.d.ts +37 -0
  430. package/dist/services/specialists/tr-specialist.js +146 -0
  431. package/dist/services/storage-key-prefix.d.ts +42 -0
  432. package/dist/services/storage-key-prefix.js +45 -0
  433. package/dist/services/storage-put-recovery.d.ts +71 -0
  434. package/dist/services/storage-put-recovery.js +269 -0
  435. package/dist/services/storage-service-errors.d.ts +124 -0
  436. package/dist/services/storage-service-errors.js +189 -0
  437. package/dist/services/storage-service.d.ts +176 -0
  438. package/dist/services/storage-service.js +423 -0
  439. package/dist/services/subject-aware-ranking.d.ts +19 -0
  440. package/dist/services/subject-aware-ranking.js +161 -0
  441. package/dist/services/supplemental-extraction.d.ts +7 -0
  442. package/dist/services/supplemental-extraction.js +116 -0
  443. package/dist/services/tbc-execution.d.ts +49 -0
  444. package/dist/services/tbc-execution.js +284 -0
  445. package/dist/services/temporal-classifier.d.ts +56 -0
  446. package/dist/services/temporal-classifier.js +94 -0
  447. package/dist/services/temporal-endpoint-evidence.d.ts +12 -0
  448. package/dist/services/temporal-endpoint-evidence.js +313 -0
  449. package/dist/services/temporal-fingerprint.d.ts +6 -0
  450. package/dist/services/temporal-fingerprint.js +12 -0
  451. package/dist/services/temporal-format.d.ts +9 -0
  452. package/dist/services/temporal-format.js +21 -0
  453. package/dist/services/temporal-intent.d.ts +39 -0
  454. package/dist/services/temporal-intent.js +78 -0
  455. package/dist/services/temporal-query-constraints.d.ts +16 -0
  456. package/dist/services/temporal-query-constraints.js +107 -0
  457. package/dist/services/temporal-query-expansion.d.ts +14 -0
  458. package/dist/services/temporal-query-expansion.js +131 -0
  459. package/dist/services/temporal-rerank.d.ts +22 -0
  460. package/dist/services/temporal-rerank.js +47 -0
  461. package/dist/services/temporal-result-protection.d.ts +7 -0
  462. package/dist/services/temporal-result-protection.js +60 -0
  463. package/dist/services/temporal-state-write.d.ts +57 -0
  464. package/dist/services/temporal-state-write.js +45 -0
  465. package/dist/services/tiered-context.d.ts +87 -0
  466. package/dist/services/tiered-context.js +214 -0
  467. package/dist/services/tiered-loading.d.ts +88 -0
  468. package/dist/services/tiered-loading.js +263 -0
  469. package/dist/services/timeline-pack.d.ts +36 -0
  470. package/dist/services/timeline-pack.js +50 -0
  471. package/dist/services/timing.d.ts +13 -0
  472. package/dist/services/timing.js +72 -0
  473. package/dist/services/tll-augmentation.d.ts +20 -0
  474. package/dist/services/tll-augmentation.js +125 -0
  475. package/dist/services/tll-retrieval.d.ts +55 -0
  476. package/dist/services/tll-retrieval.js +101 -0
  477. package/dist/services/topic-abstraction.d.ts +36 -0
  478. package/dist/services/topic-abstraction.js +105 -0
  479. package/dist/services/trust-scoring.d.ts +43 -0
  480. package/dist/services/trust-scoring.js +89 -0
  481. package/dist/services/typed-belief-calculus.d.ts +126 -0
  482. package/dist/services/typed-belief-calculus.js +204 -0
  483. package/dist/services/upload-config.d.ts +34 -0
  484. package/dist/services/upload-config.js +23 -0
  485. package/dist/services/upload-decision.d.ts +65 -0
  486. package/dist/services/upload-decision.js +98 -0
  487. package/dist/services/upload-helpers.d.ts +107 -0
  488. package/dist/services/upload-helpers.js +148 -0
  489. package/dist/services/user-profile-builder.d.ts +22 -0
  490. package/dist/services/user-profile-builder.js +109 -0
  491. package/dist/services/voyage-embedding.d.ts +22 -0
  492. package/dist/services/voyage-embedding.js +77 -0
  493. package/dist/services/write-security.d.ts +31 -0
  494. package/dist/services/write-security.js +64 -0
  495. package/dist/storage/artifact-public-redaction.d.ts +34 -0
  496. package/dist/storage/artifact-public-redaction.js +83 -0
  497. package/dist/storage/cleanup.d.ts +103 -0
  498. package/dist/storage/cleanup.js +138 -0
  499. package/dist/storage/codec-factory.d.ts +17 -0
  500. package/dist/storage/codec-factory.js +33 -0
  501. package/dist/storage/codecs/aes-gcm-codec.d.ts +44 -0
  502. package/dist/storage/codecs/aes-gcm-codec.js +108 -0
  503. package/dist/storage/codecs/noop-codec.d.ts +16 -0
  504. package/dist/storage/codecs/noop-codec.js +23 -0
  505. package/dist/storage/factory.d.ts +44 -0
  506. package/dist/storage/factory.js +99 -0
  507. package/dist/storage/filecoin-cid-validation.d.ts +82 -0
  508. package/dist/storage/filecoin-cid-validation.js +122 -0
  509. package/dist/storage/filecoin-public-metadata.d.ts +73 -0
  510. package/dist/storage/filecoin-public-metadata.js +110 -0
  511. package/dist/storage/local-fs-store.d.ts +39 -0
  512. package/dist/storage/local-fs-store.js +145 -0
  513. package/dist/storage/pointer-uri-allowlist.d.ts +38 -0
  514. package/dist/storage/pointer-uri-allowlist.js +70 -0
  515. package/dist/storage/provider-metadata-projection.d.ts +27 -0
  516. package/dist/storage/provider-metadata-projection.js +68 -0
  517. package/dist/storage/providers/filecoin/backend.d.ts +42 -0
  518. package/dist/storage/providers/filecoin/backend.js +250 -0
  519. package/dist/storage/providers/filecoin/config.d.ts +70 -0
  520. package/dist/storage/providers/filecoin/config.js +275 -0
  521. package/dist/storage/providers/filecoin/errors.d.ts +45 -0
  522. package/dist/storage/providers/filecoin/errors.js +56 -0
  523. package/dist/storage/providers/filecoin/filecoin-pin-car.d.ts +78 -0
  524. package/dist/storage/providers/filecoin/filecoin-pin-car.js +155 -0
  525. package/dist/storage/providers/filecoin/filecoin-pin-client.d.ts +92 -0
  526. package/dist/storage/providers/filecoin/filecoin-pin-client.js +199 -0
  527. package/dist/storage/providers/filecoin/filecoin-pin-mapping.d.ts +58 -0
  528. package/dist/storage/providers/filecoin/filecoin-pin-mapping.js +103 -0
  529. package/dist/storage/providers/filecoin/filecoin-pin-timeout.d.ts +30 -0
  530. package/dist/storage/providers/filecoin/filecoin-pin-timeout.js +53 -0
  531. package/dist/storage/providers/filecoin/filecoin-pin-vendor.d.ts +111 -0
  532. package/dist/storage/providers/filecoin/filecoin-pin-vendor.js +87 -0
  533. package/dist/storage/providers/filecoin/hints.d.ts +71 -0
  534. package/dist/storage/providers/filecoin/hints.js +123 -0
  535. package/dist/storage/providers/filecoin/index.d.ts +51 -0
  536. package/dist/storage/providers/filecoin/index.js +103 -0
  537. package/dist/storage/providers/filecoin/ipfs-cid.d.ts +50 -0
  538. package/dist/storage/providers/filecoin/ipfs-cid.js +64 -0
  539. package/dist/storage/providers/filecoin/metadata.d.ts +72 -0
  540. package/dist/storage/providers/filecoin/metadata.js +137 -0
  541. package/dist/storage/providers/filecoin/piece-cid.d.ts +48 -0
  542. package/dist/storage/providers/filecoin/piece-cid.js +57 -0
  543. package/dist/storage/providers/filecoin/provider-client.d.ts +234 -0
  544. package/dist/storage/providers/filecoin/provider-client.js +27 -0
  545. package/dist/storage/providers/filecoin/readiness.d.ts +62 -0
  546. package/dist/storage/providers/filecoin/readiness.js +85 -0
  547. package/dist/storage/providers/filecoin/retriever.d.ts +82 -0
  548. package/dist/storage/providers/filecoin/retriever.js +63 -0
  549. package/dist/storage/providers/filecoin/skeleton-client.d.ts +36 -0
  550. package/dist/storage/providers/filecoin/skeleton-client.js +55 -0
  551. package/dist/storage/providers/filecoin/synapse-client.d.ts +169 -0
  552. package/dist/storage/providers/filecoin/synapse-client.js +343 -0
  553. package/dist/storage/providers/filecoin/synapse-construction.d.ts +26 -0
  554. package/dist/storage/providers/filecoin/synapse-construction.js +47 -0
  555. package/dist/storage/providers/filecoin/synapse-error-mapping.d.ts +23 -0
  556. package/dist/storage/providers/filecoin/synapse-error-mapping.js +49 -0
  557. package/dist/storage/providers/filecoin/synapse-readiness.d.ts +37 -0
  558. package/dist/storage/providers/filecoin/synapse-readiness.js +231 -0
  559. package/dist/storage/providers/filecoin/uri.d.ts +49 -0
  560. package/dist/storage/providers/filecoin/uri.js +84 -0
  561. package/dist/storage/providers/filecoin/verified-fetch-lifecycle.d.ts +77 -0
  562. package/dist/storage/providers/filecoin/verified-fetch-lifecycle.js +196 -0
  563. package/dist/storage/providers/filecoin/verified-fetch-retriever.d.ts +54 -0
  564. package/dist/storage/providers/filecoin/verified-fetch-retriever.js +81 -0
  565. package/dist/storage/providers/filecoin/verified-fetch-vendor.d.ts +71 -0
  566. package/dist/storage/providers/filecoin/verified-fetch-vendor.js +94 -0
  567. package/dist/storage/raw-content-codec.d.ts +89 -0
  568. package/dist/storage/raw-content-codec.js +47 -0
  569. package/dist/storage/raw-content-store-backend-adapter.d.ts +28 -0
  570. package/dist/storage/raw-content-store-backend-adapter.js +67 -0
  571. package/dist/storage/raw-content-store.d.ts +228 -0
  572. package/dist/storage/raw-content-store.js +27 -0
  573. package/dist/storage/s3-store.d.ts +42 -0
  574. package/dist/storage/s3-store.js +181 -0
  575. package/dist/storage/storage-backend-registry.d.ts +58 -0
  576. package/dist/storage/storage-backend-registry.js +56 -0
  577. package/dist/storage/storage-backend.d.ts +82 -0
  578. package/dist/storage/storage-backend.js +14 -0
  579. package/dist/storage/storage-capabilities.d.ts +56 -0
  580. package/dist/storage/storage-capabilities.js +170 -0
  581. package/dist/storage/store-registry.d.ts +67 -0
  582. package/dist/storage/store-registry.js +77 -0
  583. package/dist/vector-math.d.ts +15 -0
  584. package/dist/vector-math.js +31 -0
  585. package/dist/xml-escape.d.ts +5 -0
  586. package/dist/xml-escape.js +7 -0
  587. package/openapi.json +15395 -0
  588. package/openapi.yaml +10794 -0
  589. package/package.json +119 -0
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Deferred AUDN Reconciliation Service.
3
+ *
4
+ * When DEFERRED_AUDN_ENABLED=true, facts with conflict candidates
5
+ * (0.7 ≤ similarity < 0.95) are stored immediately as ADD and flagged
6
+ * for background reconciliation. This eliminates the 500–2000ms LLM
7
+ * AUDN call from the synchronous ingest path.
8
+ *
9
+ * The reconciliation pass processes flagged memories in batches,
10
+ * running the full LLM AUDN pipeline and applying decisions
11
+ * (NOOP → delete, SUPERSEDE → soft-delete original, UPDATE → merge).
12
+ *
13
+ * Expected latency improvement: 60–80% reduction for ingest batches
14
+ * with moderate conflict rates (30–70% of facts hitting candidates).
15
+ */
16
+ import { config } from '../config.js';
17
+ import { findDeferredMemories, findAllDeferredMemories, clearDeferredFlag, countDeferredMemories, markMemoryDeferred, } from '../db/repository-deferred-audn.js';
18
+ import { cachedResolveAUDN } from './extraction-cache.js';
19
+ import { applyClarificationOverrides } from './conflict-policy.js';
20
+ import { embedText } from './embedding.js';
21
+ import { emitAuditEvent } from './audit-events.js';
22
+ /**
23
+ * Check whether a set of candidates should be deferred rather than
24
+ * resolved via LLM AUDN synchronously.
25
+ */
26
+ export function shouldDeferAudn(fastDecisionResolved, candidateCount) {
27
+ return config.deferredAudnEnabled && !fastDecisionResolved && candidateCount > 0;
28
+ }
29
+ /**
30
+ * Mark a newly stored memory for deferred reconciliation.
31
+ */
32
+ export async function deferMemoryForReconciliation(pool, memoryId, candidates) {
33
+ const serialized = candidates.map((c) => ({
34
+ id: c.id,
35
+ content: c.content,
36
+ similarity: c.similarity,
37
+ }));
38
+ await markMemoryDeferred(pool, memoryId, serialized);
39
+ }
40
+ /** Run a reconciliation pass for a single user. */
41
+ export async function reconcileUser(pool, repo, userId, batchSize = config.deferredAudnBatchSize) {
42
+ const start = Date.now();
43
+ const deferred = await findDeferredMemories(pool, userId, batchSize);
44
+ return processReconciliationBatch(pool, repo, deferred, start);
45
+ }
46
+ /** Run a reconciliation pass across all users. */
47
+ export async function reconcileAll(pool, repo, batchSize = config.deferredAudnBatchSize) {
48
+ const start = Date.now();
49
+ const deferred = await findAllDeferredMemories(pool, batchSize);
50
+ return processReconciliationBatch(pool, repo, deferred, start);
51
+ }
52
+ /** Get reconciliation status for a user. */
53
+ export async function getReconciliationStatus(pool, userId) {
54
+ const pending = await countDeferredMemories(pool, userId);
55
+ return { pending, enabled: config.deferredAudnEnabled };
56
+ }
57
+ async function processReconciliationBatch(pool, repo, deferred, startMs) {
58
+ const result = {
59
+ processed: 0, resolved: 0, noops: 0, updates: 0,
60
+ supersedes: 0, deletes: 0, adds: 0, errors: 0, durationMs: 0,
61
+ };
62
+ for (const memory of deferred) {
63
+ result.processed++;
64
+ try {
65
+ const action = await reconcileSingleMemory(pool, repo, memory);
66
+ result.resolved++;
67
+ switch (action) {
68
+ case 'NOOP':
69
+ result.noops++;
70
+ break;
71
+ case 'UPDATE':
72
+ result.updates++;
73
+ break;
74
+ case 'SUPERSEDE':
75
+ result.supersedes++;
76
+ break;
77
+ case 'DELETE':
78
+ result.deletes++;
79
+ break;
80
+ case 'ADD':
81
+ result.adds++;
82
+ break;
83
+ }
84
+ }
85
+ catch (err) {
86
+ result.errors++;
87
+ console.error(`[deferred-audn] Error reconciling memory ${memory.id}:`, err);
88
+ }
89
+ }
90
+ result.durationMs = Date.now() - startMs;
91
+ return result;
92
+ }
93
+ async function reconcileSingleMemory(pool, repo, memory) {
94
+ const candidates = await refreshCandidates(repo, memory.userId, memory.candidates);
95
+ if (candidates.length === 0) {
96
+ await clearDeferredFlag(pool, memory.id);
97
+ return 'ADD';
98
+ }
99
+ const decision = applyClarificationOverrides(await cachedResolveAUDN(memory.content, candidates), memory.content, candidates, [], 'knowledge');
100
+ await applyDeferredDecision(pool, repo, memory, decision);
101
+ await clearDeferredFlag(pool, memory.id);
102
+ if (config.auditLoggingEnabled) {
103
+ emitAuditEvent('deferred-audn:reconcile', memory.userId, {
104
+ memoryId: memory.id,
105
+ action: decision.action,
106
+ targetMemoryId: decision.targetMemoryId,
107
+ });
108
+ }
109
+ return decision.action;
110
+ }
111
+ /**
112
+ * Refresh candidate data — candidates stored at ingest time may have
113
+ * been modified or deleted since. Re-fetch from DB to ensure accuracy.
114
+ */
115
+ async function refreshCandidates(repo, userId, storedCandidates) {
116
+ const refreshed = [];
117
+ for (const candidate of storedCandidates) {
118
+ const memory = await repo.getMemory(candidate.id, userId);
119
+ if (memory && !memory.deleted_at) {
120
+ refreshed.push({
121
+ id: memory.id,
122
+ content: memory.content,
123
+ similarity: candidate.similarity,
124
+ importance: memory.importance,
125
+ });
126
+ }
127
+ }
128
+ return refreshed;
129
+ }
130
+ async function applyDeferredDecision(pool, repo, memory, decision) {
131
+ switch (decision.action) {
132
+ case 'NOOP':
133
+ await repo.softDeleteMemory(memory.userId, memory.id);
134
+ console.log(`[deferred-audn] NOOP: deleted duplicate ${memory.id}`);
135
+ break;
136
+ case 'UPDATE':
137
+ if (decision.targetMemoryId && decision.updatedContent) {
138
+ const target = await repo.getMemory(decision.targetMemoryId, memory.userId);
139
+ const newEmbedding = await embedText(decision.updatedContent);
140
+ await repo.updateMemoryContent(memory.userId, decision.targetMemoryId, decision.updatedContent, newEmbedding, target?.importance ?? 0.5);
141
+ await repo.softDeleteMemory(memory.userId, memory.id);
142
+ console.log(`[deferred-audn] UPDATE: merged ${memory.id} into ${decision.targetMemoryId}`);
143
+ }
144
+ break;
145
+ case 'SUPERSEDE':
146
+ if (decision.targetMemoryId) {
147
+ await repo.softDeleteMemory(memory.userId, decision.targetMemoryId);
148
+ console.log(`[deferred-audn] SUPERSEDE: ${memory.id} replaces ${decision.targetMemoryId}`);
149
+ }
150
+ break;
151
+ case 'DELETE':
152
+ await repo.softDeleteMemory(memory.userId, memory.id);
153
+ console.log(`[deferred-audn] DELETE: removed ${memory.id}`);
154
+ break;
155
+ case 'ADD':
156
+ console.log(`[deferred-audn] ADD: confirmed ${memory.id} is distinct`);
157
+ break;
158
+ default:
159
+ console.log(`[deferred-audn] ${decision.action}: no action for ${memory.id}`);
160
+ break;
161
+ }
162
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Deterministic text chunker for the document pipeline (Phase 2).
3
+ *
4
+ * Pure function: same (text, options) → same chunks. No timing, no
5
+ * randomness, no provider state. Each chunk carries an absolute
6
+ * character offset range, a content_hash, a stable index, and a token
7
+ * estimate; the index field is what the active-unique partial index on
8
+ * `document_chunks` keys on alongside `chunker_version`.
9
+ *
10
+ * Phase 2 ships a single chunker_version (`PHASE2_CHUNKER_VERSION`).
11
+ * If the algorithm changes meaningfully, bump the constant — that
12
+ * triggers a fresh insert generation rather than colliding with the
13
+ * old run on the unique index.
14
+ *
15
+ * See `Atomicmemory-research/docs/core-repo/design/large-file-ingestion-and-raw-storage-plan-2026-05-08.md`
16
+ * Phase 2.
17
+ */
18
+ /**
19
+ * Pinned chunker identifier. Bump when the algorithm output changes for
20
+ * the same input text — that lets the active-unique index treat the new
21
+ * generation as fresh inserts rather than colliding with prior runs.
22
+ */
23
+ export declare const PHASE2_CHUNKER_VERSION = "phase2-fixed-v1";
24
+ /** Pinned parser identifier. Phase 2 accepts text input only. */
25
+ export declare const PHASE2_PARSER_VERSION = "phase2-text-v1";
26
+ export interface ChunkOptions {
27
+ chunkSize?: number;
28
+ chunkOverlap?: number;
29
+ minChunkSize?: number;
30
+ }
31
+ export interface ChunkResult {
32
+ chunkIndex: number;
33
+ content: string;
34
+ contentHash: string;
35
+ charStart: number;
36
+ charEnd: number;
37
+ tokenCount: number;
38
+ }
39
+ /**
40
+ * Chunk `text` deterministically. Returns `[]` for empty or
41
+ * whitespace-only input.
42
+ */
43
+ export declare function chunkText(text: string, options?: ChunkOptions): ChunkResult[];
44
+ /** Fingerprint a chunk's content; stable across runs for byte-identical input. */
45
+ export declare function hashChunkContent(content: string): string;
46
+ /**
47
+ * Whole-text fingerprint, used by the indexer's idempotency check.
48
+ * Distinct helper so tests can pin both invariants independently.
49
+ */
50
+ export declare function hashIndexedText(text: string): string;
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Deterministic text chunker for the document pipeline (Phase 2).
3
+ *
4
+ * Pure function: same (text, options) → same chunks. No timing, no
5
+ * randomness, no provider state. Each chunk carries an absolute
6
+ * character offset range, a content_hash, a stable index, and a token
7
+ * estimate; the index field is what the active-unique partial index on
8
+ * `document_chunks` keys on alongside `chunker_version`.
9
+ *
10
+ * Phase 2 ships a single chunker_version (`PHASE2_CHUNKER_VERSION`).
11
+ * If the algorithm changes meaningfully, bump the constant — that
12
+ * triggers a fresh insert generation rather than colliding with the
13
+ * old run on the unique index.
14
+ *
15
+ * See `Atomicmemory-research/docs/core-repo/design/large-file-ingestion-and-raw-storage-plan-2026-05-08.md`
16
+ * Phase 2.
17
+ */
18
+ import { createHash } from 'node:crypto';
19
+ /**
20
+ * Character size of one chunk before word-boundary trimming. ~250 tokens
21
+ * for `text-embedding-3-small`'s typical English-text ratio (4 chars/token);
22
+ * well under the 8192-token model limit.
23
+ */
24
+ const DEFAULT_CHUNK_SIZE = 1500;
25
+ /**
26
+ * Overlap between adjacent chunks. ~10% of chunk size keeps adjacent
27
+ * sentences findable via either chunk without exploding the chunk count.
28
+ */
29
+ const DEFAULT_CHUNK_OVERLAP = 150;
30
+ /** Reject chunks that fall below this size after trimming. */
31
+ const DEFAULT_MIN_CHUNK_SIZE = 100;
32
+ /**
33
+ * Pinned chunker identifier. Bump when the algorithm output changes for
34
+ * the same input text — that lets the active-unique index treat the new
35
+ * generation as fresh inserts rather than colliding with prior runs.
36
+ */
37
+ export const PHASE2_CHUNKER_VERSION = 'phase2-fixed-v1';
38
+ /** Pinned parser identifier. Phase 2 accepts text input only. */
39
+ export const PHASE2_PARSER_VERSION = 'phase2-text-v1';
40
+ /**
41
+ * Chunk `text` deterministically. Returns `[]` for empty or
42
+ * whitespace-only input.
43
+ */
44
+ export function chunkText(text, options = {}) {
45
+ if (!text || text.trim().length === 0)
46
+ return [];
47
+ const opts = resolveOptions(options);
48
+ if (text.length <= opts.chunkSize) {
49
+ const leading = text.length - text.trimStart().length;
50
+ const trailing = text.length - text.trimEnd().length;
51
+ const trimmed = text.slice(leading, text.length - trailing);
52
+ return trimmed.length === 0
53
+ ? []
54
+ : [makeChunk(0, trimmed, leading, text.length - trailing)];
55
+ }
56
+ return slidingWindowChunks(text, opts);
57
+ }
58
+ /** Fingerprint a chunk's content; stable across runs for byte-identical input. */
59
+ export function hashChunkContent(content) {
60
+ return createHash('sha256').update(content, 'utf8').digest('hex');
61
+ }
62
+ /**
63
+ * Whole-text fingerprint, used by the indexer's idempotency check.
64
+ * Distinct helper so tests can pin both invariants independently.
65
+ */
66
+ export function hashIndexedText(text) {
67
+ return createHash('sha256').update(text, 'utf8').digest('hex');
68
+ }
69
+ function resolveOptions(input) {
70
+ const chunkSize = Math.max(1, input.chunkSize ?? DEFAULT_CHUNK_SIZE);
71
+ const chunkOverlap = clampOverlap(input.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP, chunkSize);
72
+ const minChunkSize = Math.max(1, Math.min(input.minChunkSize ?? DEFAULT_MIN_CHUNK_SIZE, chunkSize));
73
+ return { chunkSize, chunkOverlap, minChunkSize };
74
+ }
75
+ function clampOverlap(requested, chunkSize) {
76
+ if (requested < 0)
77
+ return 0;
78
+ // Overlap must leave room to advance past it on each step; cap at half.
79
+ return Math.min(requested, Math.floor(chunkSize / 2));
80
+ }
81
+ function slidingWindowChunks(text, opts) {
82
+ const chunks = [];
83
+ let cursor = 0;
84
+ let chunkIndex = 0;
85
+ while (cursor < text.length) {
86
+ const window = openWindow(text, cursor, opts);
87
+ if (window.content.length >= opts.minChunkSize) {
88
+ chunks.push(makeChunk(chunkIndex, window.content, window.charStart, window.charEnd));
89
+ chunkIndex++;
90
+ }
91
+ if (window.advanceTo >= text.length)
92
+ break;
93
+ cursor = advanceCursor(window.advanceTo, opts.chunkOverlap, cursor);
94
+ }
95
+ return chunks;
96
+ }
97
+ function openWindow(text, cursor, opts) {
98
+ const rawEnd = Math.min(cursor + opts.chunkSize, text.length);
99
+ const wordEnd = preserveWordBoundary(text, cursor, rawEnd, opts.minChunkSize);
100
+ // Recompute the offsets so they exactly bound the trimmed content; this
101
+ // is the invariant downstream relies on (text.slice(charStart, charEnd)
102
+ // === content), and downstream callers — provenance audit, future raw
103
+ // re-fetch — would otherwise see ranges that include leading/trailing
104
+ // whitespace not present in the chunk's stored content.
105
+ const slice = text.slice(cursor, wordEnd);
106
+ const leading = slice.length - slice.trimStart().length;
107
+ const trailing = slice.length - slice.trimEnd().length;
108
+ const charStart = cursor + leading;
109
+ const charEnd = wordEnd - trailing;
110
+ const content = slice.slice(leading, slice.length - trailing);
111
+ return { content, charStart, charEnd, advanceTo: wordEnd };
112
+ }
113
+ /**
114
+ * Walk back from `rawEnd` to the previous whitespace so the chunk doesn't
115
+ * end mid-word. Bails out (returning rawEnd unchanged) if no whitespace
116
+ * is found within the [cursor + minChunkSize, rawEnd) window — that
117
+ * keeps the slider from collapsing on inputs with no spaces.
118
+ */
119
+ function preserveWordBoundary(text, cursor, rawEnd, minChunkSize) {
120
+ if (rawEnd >= text.length)
121
+ return rawEnd;
122
+ const lower = cursor + minChunkSize;
123
+ for (let i = rawEnd; i > lower; i--) {
124
+ if (/\s/.test(text[i - 1]))
125
+ return i;
126
+ }
127
+ return rawEnd;
128
+ }
129
+ function advanceCursor(charEnd, overlap, prevCursor) {
130
+ const next = charEnd - overlap;
131
+ // Guard against the case where overlap >= last window length, which
132
+ // would otherwise make the cursor stand still and loop.
133
+ return next > prevCursor ? next : charEnd;
134
+ }
135
+ function makeChunk(chunkIndex, content, charStart, charEnd) {
136
+ return {
137
+ chunkIndex,
138
+ content,
139
+ contentHash: hashChunkContent(content),
140
+ charStart,
141
+ charEnd,
142
+ tokenCount: estimateTokens(content),
143
+ };
144
+ }
145
+ /**
146
+ * Cheap token estimate: ~4 chars / token for English ASCII. We intentionally
147
+ * don't use a real tokenizer here — that would couple the chunker to the
148
+ * embedding model and add a heavy dependency. The number is metadata for
149
+ * downstream cost reporting, not a control.
150
+ */
151
+ function estimateTokens(content) {
152
+ return Math.max(1, Math.ceil(content.length / 4));
153
+ }
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Phase C constrained failure-marker transitions.
3
+ *
4
+ * `POST /v1/documents/:id/extraction-failure` and
5
+ * `POST /v1/documents/:id/index-failure` need to be **constrained**
6
+ * client-side surfaces, not arbitrary status writes - clients can
7
+ * declare *that* extraction or indexing failed and *what category*,
8
+ * but cannot put a document into arbitrary status combinations or
9
+ * smuggle log content into `last_error`. This module owns the
10
+ * load-then-transition logic for both endpoints:
11
+ *
12
+ * 1. Open a transaction and take the per-document
13
+ * `pg_advisory_xact_lock` so concurrent markers serialize.
14
+ * 2. Load the row's raw / extraction / semantic-index status.
15
+ * 3. Match the current state against the allowed source set and
16
+ * apply the corresponding write (or throw an
17
+ * `*InvalidStateError` -> 409).
18
+ * 4. Read the row back and COMMIT so the caller can see the
19
+ * durable post-transition shape.
20
+ *
21
+ * The audit fix the Phase B plan calls out (rev 18 Phase C section):
22
+ * the marker MUST sit in front of `markExtractionStatus` /
23
+ * `markSemanticIndexStatus` so the row's status pair stays internally
24
+ * consistent (e.g. `extraction='failed'` + `semantic_index='not_required'`
25
+ * always travel together).
26
+ */
27
+ import pg from 'pg';
28
+ import type { ExtractionStatus, RawDocumentRow, RawStorageStatus, SemanticIndexStatus } from '../db/raw-document-types.js';
29
+ import type { ExtractionErrorCode, IndexErrorCode } from '../schemas/documents.js';
30
+ /** State snapshot echoed in `*InvalidStateError` for 409 bodies. */
31
+ export interface DocumentLayerStateSnapshot {
32
+ raw_storage_status: RawStorageStatus;
33
+ extraction_status: ExtractionStatus;
34
+ semantic_index_status: SemanticIndexStatus;
35
+ }
36
+ /** Document not found / not owned by user. Routes map to 404. */
37
+ export declare class FailureMarkerDocumentNotFoundError extends Error {
38
+ readonly documentId: string;
39
+ constructor(documentId: string);
40
+ }
41
+ /**
42
+ * Phase C - the row's current state does not allow the requested
43
+ * extraction-layer transition. Routes map to 409 and echo `current`
44
+ * in the response so the caller can decide whether to retry.
45
+ */
46
+ export declare class ExtractionFailureInvalidStateError extends Error {
47
+ readonly documentId: string;
48
+ readonly current: DocumentLayerStateSnapshot;
49
+ constructor(documentId: string, current: DocumentLayerStateSnapshot);
50
+ }
51
+ /**
52
+ * Phase C - the row's current state does not allow the requested
53
+ * index-layer transition. Routes map to 409 and echo `current` in
54
+ * the response.
55
+ */
56
+ export declare class IndexFailureInvalidStateError extends Error {
57
+ readonly documentId: string;
58
+ readonly current: DocumentLayerStateSnapshot;
59
+ constructor(documentId: string, current: DocumentLayerStateSnapshot);
60
+ }
61
+ export interface MarkerInput<C> {
62
+ userId: string;
63
+ documentId: string;
64
+ errorCode: C;
65
+ errorMessage: string;
66
+ }
67
+ export interface MarkerResult {
68
+ document: RawDocumentRow;
69
+ /**
70
+ * `true` when the row was already in the target failed state and
71
+ * the call only refreshed `last_error` (or was a complete no-op
72
+ * for same-code retries). `false` for a first-time transition.
73
+ */
74
+ idempotent: boolean;
75
+ }
76
+ /**
77
+ * Phase C constrained transition for the extraction layer.
78
+ *
79
+ * Allowed source states:
80
+ * * `extraction_status='failed'` + same `errorCode` -> idempotent
81
+ * no-op; caller sees the existing row with `idempotent: true`.
82
+ * * `extraction_status='failed'` + different `errorCode` -> refresh
83
+ * `last_error` only; status stays `'failed'`. `idempotent: true`.
84
+ * * `extraction_status='pending'` + raw stored ->
85
+ * `extraction_status='failed'` + `semantic_index_status='not_required'`
86
+ * + new `last_error.layer='extraction'`. `idempotent: false`.
87
+ *
88
+ * Any other state throws `ExtractionFailureInvalidStateError` (-> 409).
89
+ */
90
+ export declare function markExtractionFailure(pool: pg.Pool, input: MarkerInput<ExtractionErrorCode>): Promise<MarkerResult>;
91
+ export declare function markIndexFailure(pool: pg.Pool, input: MarkerInput<IndexErrorCode>): Promise<MarkerResult>;