@atomicmemory/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (589) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/LICENSE +201 -0
  3. package/README.md +314 -0
  4. package/dist/app/bind-ephemeral.d.ts +18 -0
  5. package/dist/app/bind-ephemeral.js +22 -0
  6. package/dist/app/cors-headers.d.ts +12 -0
  7. package/dist/app/cors-headers.js +18 -0
  8. package/dist/app/create-app.d.ts +25 -0
  9. package/dist/app/create-app.js +156 -0
  10. package/dist/app/runtime-config-route-snapshot.d.ts +27 -0
  11. package/dist/app/runtime-config-route-snapshot.js +27 -0
  12. package/dist/app/runtime-container.d.ts +281 -0
  13. package/dist/app/runtime-container.js +297 -0
  14. package/dist/app/startup-checks.d.ts +28 -0
  15. package/dist/app/startup-checks.js +45 -0
  16. package/dist/bin.d.ts +17 -0
  17. package/dist/bin.js +128 -0
  18. package/dist/config.d.ts +680 -0
  19. package/dist/config.js +808 -0
  20. package/dist/db/agent-trust-repository.d.ts +49 -0
  21. package/dist/db/agent-trust-repository.js +66 -0
  22. package/dist/db/belief-edges-repository.d.ts +68 -0
  23. package/dist/db/belief-edges-repository.js +124 -0
  24. package/dist/db/claim-repository.d.ts +6 -0
  25. package/dist/db/claim-repository.js +4 -0
  26. package/dist/db/contradictions-repository.d.ts +56 -0
  27. package/dist/db/contradictions-repository.js +88 -0
  28. package/dist/db/document-chunk-repository.d.ts +48 -0
  29. package/dist/db/document-chunk-repository.js +145 -0
  30. package/dist/db/document-chunk-types.d.ts +35 -0
  31. package/dist/db/document-chunk-types.js +9 -0
  32. package/dist/db/document-list-cursor.d.ts +45 -0
  33. package/dist/db/document-list-cursor.js +111 -0
  34. package/dist/db/document-list-repository.d.ts +103 -0
  35. package/dist/db/document-list-repository.js +204 -0
  36. package/dist/db/entity-cards-repository.d.ts +37 -0
  37. package/dist/db/entity-cards-repository.js +46 -0
  38. package/dist/db/entity-values-repository.d.ts +26 -0
  39. package/dist/db/entity-values-repository.js +57 -0
  40. package/dist/db/link-repository.d.ts +30 -0
  41. package/dist/db/link-repository.js +54 -0
  42. package/dist/db/memory-repository.d.ts +163 -0
  43. package/dist/db/memory-repository.js +232 -0
  44. package/dist/db/migrate.d.ts +6 -0
  45. package/dist/db/migrate.js +36 -0
  46. package/dist/db/mmr.d.ts +14 -0
  47. package/dist/db/mmr.js +57 -0
  48. package/dist/db/passport-feed-repository.d.ts +91 -0
  49. package/dist/db/passport-feed-repository.js +198 -0
  50. package/dist/db/pg-episode-store.d.ts +19 -0
  51. package/dist/db/pg-episode-store.js +17 -0
  52. package/dist/db/pg-link-store.d.ts +17 -0
  53. package/dist/db/pg-link-store.js +14 -0
  54. package/dist/db/pg-memory-store.d.ts +68 -0
  55. package/dist/db/pg-memory-store.js +53 -0
  56. package/dist/db/pg-recap-store.d.ts +13 -0
  57. package/dist/db/pg-recap-store.js +19 -0
  58. package/dist/db/pg-representation-store.d.ts +17 -0
  59. package/dist/db/pg-representation-store.js +17 -0
  60. package/dist/db/pg-search-store.d.ts +29 -0
  61. package/dist/db/pg-search-store.js +47 -0
  62. package/dist/db/pool.d.ts +5 -0
  63. package/dist/db/pool.js +21 -0
  64. package/dist/db/ppr.d.ts +56 -0
  65. package/dist/db/ppr.js +178 -0
  66. package/dist/db/query-helpers.d.ts +44 -0
  67. package/dist/db/query-helpers.js +60 -0
  68. package/dist/db/raw-doc-artifact-sync.d.ts +128 -0
  69. package/dist/db/raw-doc-artifact-sync.js +259 -0
  70. package/dist/db/raw-document-blob-repository.d.ts +148 -0
  71. package/dist/db/raw-document-blob-repository.js +300 -0
  72. package/dist/db/raw-document-repository.d.ts +104 -0
  73. package/dist/db/raw-document-repository.js +410 -0
  74. package/dist/db/raw-document-status-repository.d.ts +122 -0
  75. package/dist/db/raw-document-status-repository.js +183 -0
  76. package/dist/db/raw-document-types.d.ts +236 -0
  77. package/dist/db/raw-document-types.js +10 -0
  78. package/dist/db/raw-storage-reconciliation-repository.d.ts +110 -0
  79. package/dist/db/raw-storage-reconciliation-repository.js +200 -0
  80. package/dist/db/reflection-jobs-repository.d.ts +33 -0
  81. package/dist/db/reflection-jobs-repository.js +48 -0
  82. package/dist/db/reflections-repository.d.ts +41 -0
  83. package/dist/db/reflections-repository.js +83 -0
  84. package/dist/db/repository-claims.d.ts +141 -0
  85. package/dist/db/repository-claims.js +376 -0
  86. package/dist/db/repository-deferred-audn.d.ts +33 -0
  87. package/dist/db/repository-deferred-audn.js +69 -0
  88. package/dist/db/repository-document-delete.d.ts +53 -0
  89. package/dist/db/repository-document-delete.js +156 -0
  90. package/dist/db/repository-entities.d.ts +114 -0
  91. package/dist/db/repository-entities.js +317 -0
  92. package/dist/db/repository-entity-attributes.d.ts +41 -0
  93. package/dist/db/repository-entity-attributes.js +65 -0
  94. package/dist/db/repository-entity-graph.d.ts +32 -0
  95. package/dist/db/repository-entity-graph.js +87 -0
  96. package/dist/db/repository-first-mentions.d.ts +41 -0
  97. package/dist/db/repository-first-mentions.js +79 -0
  98. package/dist/db/repository-lessons.d.ts +51 -0
  99. package/dist/db/repository-lessons.js +90 -0
  100. package/dist/db/repository-links.d.ts +26 -0
  101. package/dist/db/repository-links.js +105 -0
  102. package/dist/db/repository-observation.d.ts +26 -0
  103. package/dist/db/repository-observation.js +51 -0
  104. package/dist/db/repository-read.d.ts +56 -0
  105. package/dist/db/repository-read.js +271 -0
  106. package/dist/db/repository-recaps.d.ts +59 -0
  107. package/dist/db/repository-recaps.js +158 -0
  108. package/dist/db/repository-representations.d.ts +48 -0
  109. package/dist/db/repository-representations.js +162 -0
  110. package/dist/db/repository-temporal-state.d.ts +35 -0
  111. package/dist/db/repository-temporal-state.js +46 -0
  112. package/dist/db/repository-tll.d.ts +88 -0
  113. package/dist/db/repository-tll.js +179 -0
  114. package/dist/db/repository-types.d.ts +313 -0
  115. package/dist/db/repository-types.js +142 -0
  116. package/dist/db/repository-user-profiles.d.ts +17 -0
  117. package/dist/db/repository-user-profiles.js +28 -0
  118. package/dist/db/repository-vector-search.d.ts +33 -0
  119. package/dist/db/repository-vector-search.js +373 -0
  120. package/dist/db/repository-wipe.d.ts +34 -0
  121. package/dist/db/repository-wipe.js +94 -0
  122. package/dist/db/repository-write.d.ts +61 -0
  123. package/dist/db/repository-write.js +279 -0
  124. package/dist/db/schema.sql +1355 -0
  125. package/dist/db/storage-artifact-delete-tx.d.ts +56 -0
  126. package/dist/db/storage-artifact-delete-tx.js +123 -0
  127. package/dist/db/storage-artifact-providers.d.ts +21 -0
  128. package/dist/db/storage-artifact-providers.js +21 -0
  129. package/dist/db/storage-artifact-recovery-repository.d.ts +66 -0
  130. package/dist/db/storage-artifact-recovery-repository.js +58 -0
  131. package/dist/db/storage-artifact-repository.d.ts +329 -0
  132. package/dist/db/storage-artifact-repository.js +497 -0
  133. package/dist/db/stores.d.ts +220 -0
  134. package/dist/db/stores.js +12 -0
  135. package/dist/db/summaries-repository.d.ts +74 -0
  136. package/dist/db/summaries-repository.js +125 -0
  137. package/dist/eval/beam-10m-loader.d.ts +98 -0
  138. package/dist/eval/beam-10m-loader.js +128 -0
  139. package/dist/index.d.ts +18 -0
  140. package/dist/index.js +17 -0
  141. package/dist/middleware/require-bearer.d.ts +27 -0
  142. package/dist/middleware/require-bearer.js +60 -0
  143. package/dist/middleware/validate-response.d.ts +33 -0
  144. package/dist/middleware/validate-response.js +55 -0
  145. package/dist/middleware/validate.d.ts +43 -0
  146. package/dist/middleware/validate.js +85 -0
  147. package/dist/routes/agents.d.ts +13 -0
  148. package/dist/routes/agents.js +89 -0
  149. package/dist/routes/document-response-formatters.d.ts +98 -0
  150. package/dist/routes/document-response-formatters.js +243 -0
  151. package/dist/routes/documents.d.ts +74 -0
  152. package/dist/routes/documents.js +425 -0
  153. package/dist/routes/memories.d.ts +29 -0
  154. package/dist/routes/memories.js +725 -0
  155. package/dist/routes/memory-response-formatters.d.ts +179 -0
  156. package/dist/routes/memory-response-formatters.js +210 -0
  157. package/dist/routes/public-raw-storage-metadata.d.ts +54 -0
  158. package/dist/routes/public-raw-storage-metadata.js +56 -0
  159. package/dist/routes/reflect.d.ts +14 -0
  160. package/dist/routes/reflect.js +19 -0
  161. package/dist/routes/response-schema-map.d.ts +14 -0
  162. package/dist/routes/response-schema-map.js +69 -0
  163. package/dist/routes/route-errors.d.ts +12 -0
  164. package/dist/routes/route-errors.js +30 -0
  165. package/dist/routes/storage-error-handlers.d.ts +34 -0
  166. package/dist/routes/storage-error-handlers.js +185 -0
  167. package/dist/routes/storage-response-formatters.d.ts +44 -0
  168. package/dist/routes/storage-response-formatters.js +155 -0
  169. package/dist/routes/storage.d.ts +38 -0
  170. package/dist/routes/storage.js +369 -0
  171. package/dist/routes/upstream-provider-errors.d.ts +19 -0
  172. package/dist/routes/upstream-provider-errors.js +95 -0
  173. package/dist/schemas/agents.d.ts +79 -0
  174. package/dist/schemas/agents.js +126 -0
  175. package/dist/schemas/common.d.ts +110 -0
  176. package/dist/schemas/common.js +190 -0
  177. package/dist/schemas/document-list-responses.d.ts +102 -0
  178. package/dist/schemas/document-list-responses.js +87 -0
  179. package/dist/schemas/document-list-schemas.d.ts +123 -0
  180. package/dist/schemas/document-list-schemas.js +174 -0
  181. package/dist/schemas/document-response-schemas.d.ts +610 -0
  182. package/dist/schemas/document-response-schemas.js +264 -0
  183. package/dist/schemas/document-status-envelope.d.ts +48 -0
  184. package/dist/schemas/document-status-envelope.js +54 -0
  185. package/dist/schemas/documents.d.ts +292 -0
  186. package/dist/schemas/documents.js +449 -0
  187. package/dist/schemas/errors.d.ts +75 -0
  188. package/dist/schemas/errors.js +105 -0
  189. package/dist/schemas/memories.d.ts +378 -0
  190. package/dist/schemas/memories.js +542 -0
  191. package/dist/schemas/openapi.d.ts +24 -0
  192. package/dist/schemas/openapi.js +1038 -0
  193. package/dist/schemas/response-scalars.d.ts +10 -0
  194. package/dist/schemas/response-scalars.js +10 -0
  195. package/dist/schemas/responses.d.ts +536 -0
  196. package/dist/schemas/responses.js +350 -0
  197. package/dist/schemas/search-response-parts.d.ts +97 -0
  198. package/dist/schemas/search-response-parts.js +103 -0
  199. package/dist/schemas/storage-schemas.d.ts +175 -0
  200. package/dist/schemas/storage-schemas.js +277 -0
  201. package/dist/schemas/zod-setup.d.ts +15 -0
  202. package/dist/schemas/zod-setup.js +17 -0
  203. package/dist/server.d.ts +13 -0
  204. package/dist/server.js +57 -0
  205. package/dist/services/abstract-query-policy.d.ts +13 -0
  206. package/dist/services/abstract-query-policy.js +50 -0
  207. package/dist/services/affinity-clustering.d.ts +66 -0
  208. package/dist/services/affinity-clustering.js +125 -0
  209. package/dist/services/agentic-retrieval.d.ts +38 -0
  210. package/dist/services/agentic-retrieval.js +126 -0
  211. package/dist/services/answer-format.d.ts +56 -0
  212. package/dist/services/answer-format.js +118 -0
  213. package/dist/services/answer-rescue.d.ts +72 -0
  214. package/dist/services/answer-rescue.js +177 -0
  215. package/dist/services/answer-verifier.d.ts +24 -0
  216. package/dist/services/answer-verifier.js +73 -0
  217. package/dist/services/api-retry.d.ts +6 -0
  218. package/dist/services/api-retry.js +41 -0
  219. package/dist/services/assistant-turn-filter.d.ts +20 -0
  220. package/dist/services/assistant-turn-filter.js +69 -0
  221. package/dist/services/atomicmem-uri.d.ts +33 -0
  222. package/dist/services/atomicmem-uri.js +86 -0
  223. package/dist/services/audit-events.d.ts +54 -0
  224. package/dist/services/audit-events.js +56 -0
  225. package/dist/services/chunked-extraction.d.ts +21 -0
  226. package/dist/services/chunked-extraction.js +108 -0
  227. package/dist/services/claim-slotting.d.ts +27 -0
  228. package/dist/services/claim-slotting.js +38 -0
  229. package/dist/services/claude-code-llm.d.ts +19 -0
  230. package/dist/services/claude-code-llm.js +96 -0
  231. package/dist/services/composite-dedup.d.ts +50 -0
  232. package/dist/services/composite-dedup.js +153 -0
  233. package/dist/services/composite-grouping.d.ts +41 -0
  234. package/dist/services/composite-grouping.js +111 -0
  235. package/dist/services/composite-staleness.d.ts +20 -0
  236. package/dist/services/composite-staleness.js +50 -0
  237. package/dist/services/conciseness-preference.d.ts +14 -0
  238. package/dist/services/conciseness-preference.js +42 -0
  239. package/dist/services/conflict-policy.d.ts +20 -0
  240. package/dist/services/conflict-policy.js +335 -0
  241. package/dist/services/consensus-extraction.d.ts +39 -0
  242. package/dist/services/consensus-extraction.js +147 -0
  243. package/dist/services/consensus-validation.d.ts +52 -0
  244. package/dist/services/consensus-validation.js +206 -0
  245. package/dist/services/consolidation-service.d.ts +60 -0
  246. package/dist/services/consolidation-service.js +171 -0
  247. package/dist/services/content-detection.d.ts +18 -0
  248. package/dist/services/content-detection.js +25 -0
  249. package/dist/services/contradiction-surfacing.d.ts +62 -0
  250. package/dist/services/contradiction-surfacing.js +111 -0
  251. package/dist/services/cost-telemetry.d.ts +39 -0
  252. package/dist/services/cost-telemetry.js +58 -0
  253. package/dist/services/counter-evidence.d.ts +34 -0
  254. package/dist/services/counter-evidence.js +92 -0
  255. package/dist/services/current-state-ranking.d.ts +21 -0
  256. package/dist/services/current-state-ranking.js +152 -0
  257. package/dist/services/deferred-audn.d.ts +47 -0
  258. package/dist/services/deferred-audn.js +162 -0
  259. package/dist/services/document-chunker.d.ts +50 -0
  260. package/dist/services/document-chunker.js +153 -0
  261. package/dist/services/document-failure-markers.d.ts +91 -0
  262. package/dist/services/document-failure-markers.js +305 -0
  263. package/dist/services/document-indexer.d.ts +122 -0
  264. package/dist/services/document-indexer.js +405 -0
  265. package/dist/services/document-service.d.ts +245 -0
  266. package/dist/services/document-service.js +325 -0
  267. package/dist/services/document-upload-artifact-sync.d.ts +80 -0
  268. package/dist/services/document-upload-artifact-sync.js +162 -0
  269. package/dist/services/document-upload-beta2-recovery.d.ts +72 -0
  270. package/dist/services/document-upload-beta2-recovery.js +94 -0
  271. package/dist/services/document-upload.d.ts +44 -0
  272. package/dist/services/document-upload.js +353 -0
  273. package/dist/services/embedding.d.ts +57 -0
  274. package/dist/services/embedding.js +416 -0
  275. package/dist/services/entity-attribute-extractor.d.ts +34 -0
  276. package/dist/services/entity-attribute-extractor.js +117 -0
  277. package/dist/services/entity-card-synthesis.d.ts +54 -0
  278. package/dist/services/entity-card-synthesis.js +92 -0
  279. package/dist/services/entity-dedup.d.ts +9 -0
  280. package/dist/services/entity-dedup.js +14 -0
  281. package/dist/services/entity-graph.d.ts +17 -0
  282. package/dist/services/entity-graph.js +135 -0
  283. package/dist/services/entropy-gate.d.ts +52 -0
  284. package/dist/services/entropy-gate.js +56 -0
  285. package/dist/services/episode-fetcher.d.ts +47 -0
  286. package/dist/services/episode-fetcher.js +128 -0
  287. package/dist/services/event-anchor-facts.d.ts +8 -0
  288. package/dist/services/event-anchor-facts.js +205 -0
  289. package/dist/services/event-chain-detector.d.ts +52 -0
  290. package/dist/services/event-chain-detector.js +83 -0
  291. package/dist/services/extraction-cache.d.ts +9 -0
  292. package/dist/services/extraction-cache.js +54 -0
  293. package/dist/services/extraction-enrichment.d.ts +9 -0
  294. package/dist/services/extraction-enrichment.js +223 -0
  295. package/dist/services/extraction.d.ts +69 -0
  296. package/dist/services/extraction.js +596 -0
  297. package/dist/services/fact-normalization.d.ts +12 -0
  298. package/dist/services/fact-normalization.js +248 -0
  299. package/dist/services/filecoin-observability.d.ts +127 -0
  300. package/dist/services/filecoin-observability.js +200 -0
  301. package/dist/services/first-mention-service.d.ts +76 -0
  302. package/dist/services/first-mention-service.js +186 -0
  303. package/dist/services/hierarchical-retrieval.d.ts +49 -0
  304. package/dist/services/hierarchical-retrieval.js +50 -0
  305. package/dist/services/ingest-fact-pipeline.d.ts +32 -0
  306. package/dist/services/ingest-fact-pipeline.js +212 -0
  307. package/dist/services/ingest-post-write.d.ts +50 -0
  308. package/dist/services/ingest-post-write.js +117 -0
  309. package/dist/services/ingest-trace.d.ts +32 -0
  310. package/dist/services/ingest-trace.js +60 -0
  311. package/dist/services/input-sanitizer.d.ts +41 -0
  312. package/dist/services/input-sanitizer.js +135 -0
  313. package/dist/services/iterative-retrieval.d.ts +26 -0
  314. package/dist/services/iterative-retrieval.js +139 -0
  315. package/dist/services/keyword-expansion.d.ts +10 -0
  316. package/dist/services/keyword-expansion.js +26 -0
  317. package/dist/services/lesson-service.d.ts +68 -0
  318. package/dist/services/lesson-service.js +178 -0
  319. package/dist/services/literal-extractor.d.ts +16 -0
  320. package/dist/services/literal-extractor.js +74 -0
  321. package/dist/services/literal-list-protection.d.ts +17 -0
  322. package/dist/services/literal-list-protection.js +134 -0
  323. package/dist/services/literal-query-expansion.d.ts +20 -0
  324. package/dist/services/literal-query-expansion.js +181 -0
  325. package/dist/services/llm.d.ts +61 -0
  326. package/dist/services/llm.js +265 -0
  327. package/dist/services/memcell-projection.d.ts +17 -0
  328. package/dist/services/memcell-projection.js +41 -0
  329. package/dist/services/memory-audn.d.ts +43 -0
  330. package/dist/services/memory-audn.js +419 -0
  331. package/dist/services/memory-crud.d.ts +93 -0
  332. package/dist/services/memory-crud.js +255 -0
  333. package/dist/services/memory-ingest.d.ts +21 -0
  334. package/dist/services/memory-ingest.js +249 -0
  335. package/dist/services/memory-lifecycle.d.ts +75 -0
  336. package/dist/services/memory-lifecycle.js +108 -0
  337. package/dist/services/memory-lineage.d.ts +181 -0
  338. package/dist/services/memory-lineage.js +232 -0
  339. package/dist/services/memory-network.d.ts +40 -0
  340. package/dist/services/memory-network.js +75 -0
  341. package/dist/services/memory-search-types.d.ts +25 -0
  342. package/dist/services/memory-search-types.js +10 -0
  343. package/dist/services/memory-search.d.ts +48 -0
  344. package/dist/services/memory-search.js +505 -0
  345. package/dist/services/memory-service-types.d.ts +371 -0
  346. package/dist/services/memory-service-types.js +8 -0
  347. package/dist/services/memory-service.d.ts +152 -0
  348. package/dist/services/memory-service.js +225 -0
  349. package/dist/services/memory-storage.d.ts +33 -0
  350. package/dist/services/memory-storage.js +328 -0
  351. package/dist/services/msr-aggregator.d.ts +38 -0
  352. package/dist/services/msr-aggregator.js +97 -0
  353. package/dist/services/msr-detector.d.ts +35 -0
  354. package/dist/services/msr-detector.js +65 -0
  355. package/dist/services/namespace-retrieval.d.ts +60 -0
  356. package/dist/services/namespace-retrieval.js +180 -0
  357. package/dist/services/observation-date-extraction.d.ts +12 -0
  358. package/dist/services/observation-date-extraction.js +50 -0
  359. package/dist/services/observation-service.d.ts +27 -0
  360. package/dist/services/observation-service.js +84 -0
  361. package/dist/services/packaging-observability.d.ts +29 -0
  362. package/dist/services/packaging-observability.js +146 -0
  363. package/dist/services/query-expansion.d.ts +83 -0
  364. package/dist/services/query-expansion.js +242 -0
  365. package/dist/services/query-keyword-matches.d.ts +6 -0
  366. package/dist/services/query-keyword-matches.js +56 -0
  367. package/dist/services/query-term-visibility.d.ts +28 -0
  368. package/dist/services/query-term-visibility.js +100 -0
  369. package/dist/services/quick-extraction.d.ts +25 -0
  370. package/dist/services/quick-extraction.js +431 -0
  371. package/dist/services/quoted-entity-extraction.d.ts +10 -0
  372. package/dist/services/quoted-entity-extraction.js +161 -0
  373. package/dist/services/raw-storage-reconciler-backoff.d.ts +8 -0
  374. package/dist/services/raw-storage-reconciler-backoff.js +14 -0
  375. package/dist/services/raw-storage-reconciler-scheduler.d.ts +29 -0
  376. package/dist/services/raw-storage-reconciler-scheduler.js +43 -0
  377. package/dist/services/raw-storage-reconciler.d.ts +71 -0
  378. package/dist/services/raw-storage-reconciler.js +278 -0
  379. package/dist/services/recap-builder.d.ts +49 -0
  380. package/dist/services/recap-builder.js +157 -0
  381. package/dist/services/reflect-jobs.d.ts +23 -0
  382. package/dist/services/reflect-jobs.js +36 -0
  383. package/dist/services/reflect-prompts.d.ts +71 -0
  384. package/dist/services/reflect-prompts.js +99 -0
  385. package/dist/services/reflect-retrieval.d.ts +33 -0
  386. package/dist/services/reflect-retrieval.js +30 -0
  387. package/dist/services/reflect.d.ts +49 -0
  388. package/dist/services/reflect.js +84 -0
  389. package/dist/services/relative-temporal.d.ts +14 -0
  390. package/dist/services/relative-temporal.js +163 -0
  391. package/dist/services/relevance-policy.d.ts +37 -0
  392. package/dist/services/relevance-policy.js +109 -0
  393. package/dist/services/rerank.d.ts +32 -0
  394. package/dist/services/rerank.js +118 -0
  395. package/dist/services/reranker.d.ts +20 -0
  396. package/dist/services/reranker.js +99 -0
  397. package/dist/services/retrieval-channel-rules.d.ts +34 -0
  398. package/dist/services/retrieval-channel-rules.js +41 -0
  399. package/dist/services/retrieval-config-overlay.d.ts +36 -0
  400. package/dist/services/retrieval-config-overlay.js +44 -0
  401. package/dist/services/retrieval-format.d.ts +119 -0
  402. package/dist/services/retrieval-format.js +559 -0
  403. package/dist/services/retrieval-policy.d.ts +69 -0
  404. package/dist/services/retrieval-policy.js +275 -0
  405. package/dist/services/retrieval-profiles.d.ts +37 -0
  406. package/dist/services/retrieval-profiles.js +90 -0
  407. package/dist/services/retrieval-side-effects.d.ts +14 -0
  408. package/dist/services/retrieval-side-effects.js +26 -0
  409. package/dist/services/retrieval-trace.d.ts +108 -0
  410. package/dist/services/retrieval-trace.js +147 -0
  411. package/dist/services/rrf-fusion.d.ts +18 -0
  412. package/dist/services/rrf-fusion.js +34 -0
  413. package/dist/services/search-pipeline.d.ts +71 -0
  414. package/dist/services/search-pipeline.js +788 -0
  415. package/dist/services/session-date.d.ts +20 -0
  416. package/dist/services/session-date.js +61 -0
  417. package/dist/services/session-packaging.d.ts +53 -0
  418. package/dist/services/session-packaging.js +182 -0
  419. package/dist/services/session-summary-generator.d.ts +53 -0
  420. package/dist/services/session-summary-generator.js +134 -0
  421. package/dist/services/specialists/cr-specialist.d.ts +52 -0
  422. package/dist/services/specialists/cr-specialist.js +121 -0
  423. package/dist/services/specialists/dispatch.d.ts +53 -0
  424. package/dist/services/specialists/dispatch.js +102 -0
  425. package/dist/services/specialists/ie-ku-specialist.d.ts +37 -0
  426. package/dist/services/specialists/ie-ku-specialist.js +63 -0
  427. package/dist/services/specialists/msr-specialist.d.ts +61 -0
  428. package/dist/services/specialists/msr-specialist.js +162 -0
  429. package/dist/services/specialists/tr-specialist.d.ts +37 -0
  430. package/dist/services/specialists/tr-specialist.js +146 -0
  431. package/dist/services/storage-key-prefix.d.ts +42 -0
  432. package/dist/services/storage-key-prefix.js +45 -0
  433. package/dist/services/storage-put-recovery.d.ts +71 -0
  434. package/dist/services/storage-put-recovery.js +269 -0
  435. package/dist/services/storage-service-errors.d.ts +124 -0
  436. package/dist/services/storage-service-errors.js +189 -0
  437. package/dist/services/storage-service.d.ts +176 -0
  438. package/dist/services/storage-service.js +423 -0
  439. package/dist/services/subject-aware-ranking.d.ts +19 -0
  440. package/dist/services/subject-aware-ranking.js +161 -0
  441. package/dist/services/supplemental-extraction.d.ts +7 -0
  442. package/dist/services/supplemental-extraction.js +116 -0
  443. package/dist/services/tbc-execution.d.ts +49 -0
  444. package/dist/services/tbc-execution.js +284 -0
  445. package/dist/services/temporal-classifier.d.ts +56 -0
  446. package/dist/services/temporal-classifier.js +94 -0
  447. package/dist/services/temporal-endpoint-evidence.d.ts +12 -0
  448. package/dist/services/temporal-endpoint-evidence.js +313 -0
  449. package/dist/services/temporal-fingerprint.d.ts +6 -0
  450. package/dist/services/temporal-fingerprint.js +12 -0
  451. package/dist/services/temporal-format.d.ts +9 -0
  452. package/dist/services/temporal-format.js +21 -0
  453. package/dist/services/temporal-intent.d.ts +39 -0
  454. package/dist/services/temporal-intent.js +78 -0
  455. package/dist/services/temporal-query-constraints.d.ts +16 -0
  456. package/dist/services/temporal-query-constraints.js +107 -0
  457. package/dist/services/temporal-query-expansion.d.ts +14 -0
  458. package/dist/services/temporal-query-expansion.js +131 -0
  459. package/dist/services/temporal-rerank.d.ts +22 -0
  460. package/dist/services/temporal-rerank.js +47 -0
  461. package/dist/services/temporal-result-protection.d.ts +7 -0
  462. package/dist/services/temporal-result-protection.js +60 -0
  463. package/dist/services/temporal-state-write.d.ts +57 -0
  464. package/dist/services/temporal-state-write.js +45 -0
  465. package/dist/services/tiered-context.d.ts +87 -0
  466. package/dist/services/tiered-context.js +214 -0
  467. package/dist/services/tiered-loading.d.ts +88 -0
  468. package/dist/services/tiered-loading.js +263 -0
  469. package/dist/services/timeline-pack.d.ts +36 -0
  470. package/dist/services/timeline-pack.js +50 -0
  471. package/dist/services/timing.d.ts +13 -0
  472. package/dist/services/timing.js +72 -0
  473. package/dist/services/tll-augmentation.d.ts +20 -0
  474. package/dist/services/tll-augmentation.js +125 -0
  475. package/dist/services/tll-retrieval.d.ts +55 -0
  476. package/dist/services/tll-retrieval.js +101 -0
  477. package/dist/services/topic-abstraction.d.ts +36 -0
  478. package/dist/services/topic-abstraction.js +105 -0
  479. package/dist/services/trust-scoring.d.ts +43 -0
  480. package/dist/services/trust-scoring.js +89 -0
  481. package/dist/services/typed-belief-calculus.d.ts +126 -0
  482. package/dist/services/typed-belief-calculus.js +204 -0
  483. package/dist/services/upload-config.d.ts +34 -0
  484. package/dist/services/upload-config.js +23 -0
  485. package/dist/services/upload-decision.d.ts +65 -0
  486. package/dist/services/upload-decision.js +98 -0
  487. package/dist/services/upload-helpers.d.ts +107 -0
  488. package/dist/services/upload-helpers.js +148 -0
  489. package/dist/services/user-profile-builder.d.ts +22 -0
  490. package/dist/services/user-profile-builder.js +109 -0
  491. package/dist/services/voyage-embedding.d.ts +22 -0
  492. package/dist/services/voyage-embedding.js +77 -0
  493. package/dist/services/write-security.d.ts +31 -0
  494. package/dist/services/write-security.js +64 -0
  495. package/dist/storage/artifact-public-redaction.d.ts +34 -0
  496. package/dist/storage/artifact-public-redaction.js +83 -0
  497. package/dist/storage/cleanup.d.ts +103 -0
  498. package/dist/storage/cleanup.js +138 -0
  499. package/dist/storage/codec-factory.d.ts +17 -0
  500. package/dist/storage/codec-factory.js +33 -0
  501. package/dist/storage/codecs/aes-gcm-codec.d.ts +44 -0
  502. package/dist/storage/codecs/aes-gcm-codec.js +108 -0
  503. package/dist/storage/codecs/noop-codec.d.ts +16 -0
  504. package/dist/storage/codecs/noop-codec.js +23 -0
  505. package/dist/storage/factory.d.ts +44 -0
  506. package/dist/storage/factory.js +99 -0
  507. package/dist/storage/filecoin-cid-validation.d.ts +82 -0
  508. package/dist/storage/filecoin-cid-validation.js +122 -0
  509. package/dist/storage/filecoin-public-metadata.d.ts +73 -0
  510. package/dist/storage/filecoin-public-metadata.js +110 -0
  511. package/dist/storage/local-fs-store.d.ts +39 -0
  512. package/dist/storage/local-fs-store.js +145 -0
  513. package/dist/storage/pointer-uri-allowlist.d.ts +38 -0
  514. package/dist/storage/pointer-uri-allowlist.js +70 -0
  515. package/dist/storage/provider-metadata-projection.d.ts +27 -0
  516. package/dist/storage/provider-metadata-projection.js +68 -0
  517. package/dist/storage/providers/filecoin/backend.d.ts +42 -0
  518. package/dist/storage/providers/filecoin/backend.js +250 -0
  519. package/dist/storage/providers/filecoin/config.d.ts +70 -0
  520. package/dist/storage/providers/filecoin/config.js +275 -0
  521. package/dist/storage/providers/filecoin/errors.d.ts +45 -0
  522. package/dist/storage/providers/filecoin/errors.js +56 -0
  523. package/dist/storage/providers/filecoin/filecoin-pin-car.d.ts +78 -0
  524. package/dist/storage/providers/filecoin/filecoin-pin-car.js +155 -0
  525. package/dist/storage/providers/filecoin/filecoin-pin-client.d.ts +92 -0
  526. package/dist/storage/providers/filecoin/filecoin-pin-client.js +199 -0
  527. package/dist/storage/providers/filecoin/filecoin-pin-mapping.d.ts +58 -0
  528. package/dist/storage/providers/filecoin/filecoin-pin-mapping.js +103 -0
  529. package/dist/storage/providers/filecoin/filecoin-pin-timeout.d.ts +30 -0
  530. package/dist/storage/providers/filecoin/filecoin-pin-timeout.js +53 -0
  531. package/dist/storage/providers/filecoin/filecoin-pin-vendor.d.ts +111 -0
  532. package/dist/storage/providers/filecoin/filecoin-pin-vendor.js +87 -0
  533. package/dist/storage/providers/filecoin/hints.d.ts +71 -0
  534. package/dist/storage/providers/filecoin/hints.js +123 -0
  535. package/dist/storage/providers/filecoin/index.d.ts +51 -0
  536. package/dist/storage/providers/filecoin/index.js +103 -0
  537. package/dist/storage/providers/filecoin/ipfs-cid.d.ts +50 -0
  538. package/dist/storage/providers/filecoin/ipfs-cid.js +64 -0
  539. package/dist/storage/providers/filecoin/metadata.d.ts +72 -0
  540. package/dist/storage/providers/filecoin/metadata.js +137 -0
  541. package/dist/storage/providers/filecoin/piece-cid.d.ts +48 -0
  542. package/dist/storage/providers/filecoin/piece-cid.js +57 -0
  543. package/dist/storage/providers/filecoin/provider-client.d.ts +234 -0
  544. package/dist/storage/providers/filecoin/provider-client.js +27 -0
  545. package/dist/storage/providers/filecoin/readiness.d.ts +62 -0
  546. package/dist/storage/providers/filecoin/readiness.js +85 -0
  547. package/dist/storage/providers/filecoin/retriever.d.ts +82 -0
  548. package/dist/storage/providers/filecoin/retriever.js +63 -0
  549. package/dist/storage/providers/filecoin/skeleton-client.d.ts +36 -0
  550. package/dist/storage/providers/filecoin/skeleton-client.js +55 -0
  551. package/dist/storage/providers/filecoin/synapse-client.d.ts +169 -0
  552. package/dist/storage/providers/filecoin/synapse-client.js +343 -0
  553. package/dist/storage/providers/filecoin/synapse-construction.d.ts +26 -0
  554. package/dist/storage/providers/filecoin/synapse-construction.js +47 -0
  555. package/dist/storage/providers/filecoin/synapse-error-mapping.d.ts +23 -0
  556. package/dist/storage/providers/filecoin/synapse-error-mapping.js +49 -0
  557. package/dist/storage/providers/filecoin/synapse-readiness.d.ts +37 -0
  558. package/dist/storage/providers/filecoin/synapse-readiness.js +231 -0
  559. package/dist/storage/providers/filecoin/uri.d.ts +49 -0
  560. package/dist/storage/providers/filecoin/uri.js +84 -0
  561. package/dist/storage/providers/filecoin/verified-fetch-lifecycle.d.ts +77 -0
  562. package/dist/storage/providers/filecoin/verified-fetch-lifecycle.js +196 -0
  563. package/dist/storage/providers/filecoin/verified-fetch-retriever.d.ts +54 -0
  564. package/dist/storage/providers/filecoin/verified-fetch-retriever.js +81 -0
  565. package/dist/storage/providers/filecoin/verified-fetch-vendor.d.ts +71 -0
  566. package/dist/storage/providers/filecoin/verified-fetch-vendor.js +94 -0
  567. package/dist/storage/raw-content-codec.d.ts +89 -0
  568. package/dist/storage/raw-content-codec.js +47 -0
  569. package/dist/storage/raw-content-store-backend-adapter.d.ts +28 -0
  570. package/dist/storage/raw-content-store-backend-adapter.js +67 -0
  571. package/dist/storage/raw-content-store.d.ts +228 -0
  572. package/dist/storage/raw-content-store.js +27 -0
  573. package/dist/storage/s3-store.d.ts +42 -0
  574. package/dist/storage/s3-store.js +181 -0
  575. package/dist/storage/storage-backend-registry.d.ts +58 -0
  576. package/dist/storage/storage-backend-registry.js +56 -0
  577. package/dist/storage/storage-backend.d.ts +82 -0
  578. package/dist/storage/storage-backend.js +14 -0
  579. package/dist/storage/storage-capabilities.d.ts +56 -0
  580. package/dist/storage/storage-capabilities.js +170 -0
  581. package/dist/storage/store-registry.d.ts +67 -0
  582. package/dist/storage/store-registry.js +77 -0
  583. package/dist/vector-math.d.ts +15 -0
  584. package/dist/vector-math.js +31 -0
  585. package/dist/xml-escape.d.ts +5 -0
  586. package/dist/xml-escape.js +7 -0
  587. package/openapi.json +15395 -0
  588. package/openapi.yaml +10794 -0
  589. package/package.json +119 -0
@@ -0,0 +1,405 @@
1
+ /**
2
+ * Document indexer (Phase 2 + Phase B hardening).
3
+ *
4
+ * Takes a registered document + a body of text, deterministically
5
+ * chunks it, embeds every chunk in one batch via the existing core
6
+ * `embedTexts` helper, persists chunks to `document_chunks`, and writes
7
+ * one row per chunk into `memories` with `raw_document_id` +
8
+ * `document_chunk_id` provenance set so the existing
9
+ * `/v1/memories/search` retrieval pipeline finds them.
10
+ *
11
+ * Phase B (rev-18 plan, Phase B section "document-indexer.ts") rewrote
12
+ * the flow so it cannot leave a row in a stuck pending/running state:
13
+ *
14
+ * 1. Schema validation — type check only, no row touched (`IndexInputError` → 400).
15
+ * 2. Open transaction, take per-document advisory lock, load row.
16
+ * 3. Idempotent / re-index short-circuits BEFORE marking running:
17
+ * - `'complete'` + same `indexed_content_hash` → COMMIT, return idempotent skip.
18
+ * - `'complete'` + different hash → fall through; the conditional
19
+ * UPDATE below moves the row through 'running' and the existing
20
+ * `clearPriorGeneration` path replaces chunks atomically.
21
+ * 4. Atomic conditional UPDATE → `'running'` (CAS). Only fires when
22
+ * `semantic_index_status IN ('pending','failed','stale','complete')`;
23
+ * `'running'` (concurrent writer) and `'not_required'` (registered
24
+ * not-to-index) yield rowCount=0 → ROLLBACK + 409.
25
+ * 5. Known-document semantic validation: text-too-large / empty bodies
26
+ * ROLLBACK the running write, then write durable `'failed'` from
27
+ * a fresh statement so direct SDK callers cannot leave a stuck
28
+ * pending row.
29
+ * 6. Prepare chunks (chunkText + embedTexts), persist inside the same
30
+ * transaction, mark `'complete'` + clear `last_error.semantic_index`,
31
+ * COMMIT.
32
+ *
33
+ * Visibility note: the `'running'` state is written *inside* the
34
+ * BEGIN..COMMIT transaction, so under READ COMMITTED isolation other
35
+ * connections never observe it — the row reads as the prior committed
36
+ * state until COMMIT, at which point the row reads as `'complete'`
37
+ * (success) or — after the catch-path fresh-tx write — `'failed'`. The
38
+ * `'running'` value exists for the conditional-UPDATE concurrency
39
+ * guard, not as a UI state. UI rendering of "indexing in progress"
40
+ * requires a future async-worker design that commits `'running'`
41
+ * before doing the work, with a lease/heartbeat (out of scope; see
42
+ * the rev-18 "Out of scope" section).
43
+ *
44
+ * Idempotency contract (preserved): a re-index with byte-identical text
45
+ * under the current `chunker_version` is a no-op (no fresh chunks, no
46
+ * fresh memories, `indexed_content_hash` unchanged). A re-index with
47
+ * new text soft-deletes the prior chunk + memory generation in
48
+ * user-scope before inserting the fresh one. Retry from
49
+ * `semantic_index_status='failed'` proceeds normally (no skip), clears
50
+ * `last_error`, and lands `'complete'`.
51
+ */
52
+ import { embedTexts } from './embedding.js';
53
+ import { PHASE2_CHUNKER_VERSION, PHASE2_PARSER_VERSION, chunkText, hashIndexedText, } from './document-chunker.js';
54
+ import { countActiveChunksForDocument, insertDocumentChunks, softDeleteChunksForDocument, } from '../db/document-chunk-repository.js';
55
+ import { getDocumentWithSourceSite, setRawDocumentIndexedHashWithClient, } from '../db/raw-document-repository.js';
56
+ import { buildLastError, markSemanticIndexStatus, } from '../db/raw-document-status-repository.js';
57
+ import { softDeleteMemoriesForDocument, storeMemoryWithClient } from '../db/repository-write.js';
58
+ import { MAX_INDEX_TEXT_BYTES } from '../schemas/documents.js';
59
+ /** Document not found / not owned by user. Routes map to 404. */
60
+ export class DocumentNotFoundError extends Error {
61
+ documentId;
62
+ constructor(documentId) {
63
+ super(`document ${documentId} not found`);
64
+ this.documentId = documentId;
65
+ this.name = 'DocumentNotFoundError';
66
+ }
67
+ }
68
+ /**
69
+ * Schema-level (pre-document) input failure. Routes map to 400; no
70
+ * row update fires because the document hasn't been loaded yet (and
71
+ * may not even exist).
72
+ */
73
+ export class IndexInputError extends Error {
74
+ constructor(message) {
75
+ super(message);
76
+ this.name = 'IndexInputError';
77
+ }
78
+ }
79
+ /**
80
+ * Phase B — the conditional UPDATE that moves
81
+ * `semantic_index_status` to `'running'` returned rowCount=0. The
82
+ * row is in a state that does not allow indexing (`'running'`
83
+ * concurrent writer, `'not_required'`, or vanished between load
84
+ * and the CAS write). Routes map to 409.
85
+ */
86
+ export class IndexInvalidStateError extends Error {
87
+ documentId;
88
+ currentStatus;
89
+ constructor(documentId, currentStatus) {
90
+ super(`document ${documentId} is in semantic_index_status='${currentStatus}'; ` +
91
+ `indexing not permitted from this state`);
92
+ this.documentId = documentId;
93
+ this.currentStatus = currentStatus;
94
+ this.name = 'IndexInvalidStateError';
95
+ }
96
+ }
97
+ export class IndexSemanticValidationError extends Error {
98
+ documentId;
99
+ code;
100
+ constructor(documentId, code, message) {
101
+ super(message);
102
+ this.documentId = documentId;
103
+ this.code = code;
104
+ this.name = 'IndexSemanticValidationError';
105
+ }
106
+ }
107
+ /**
108
+ * Run the full index path. See file docstring for the six-phase
109
+ * structure. The `pool` is used to check out one client for the
110
+ * mutation transaction; failure markers ride a fresh statement on the
111
+ * same pool so they survive the rollback.
112
+ */
113
+ export async function indexDocumentText(pool, input) {
114
+ // Phase 1 — schema validation. Pure type check; Zod already
115
+ // enforces this at the route boundary, but in-process callers
116
+ // (tests, future workers) reach here directly and we don't want to
117
+ // accept anything but a string.
118
+ validateInput(input);
119
+ const newHash = hashIndexedText(input.text);
120
+ return runIndexFlow(pool, input, newHash);
121
+ }
122
+ // ---------------------------------------------------------------------------
123
+ // Phase 1 — schema validation, no DB state on the line.
124
+ // ---------------------------------------------------------------------------
125
+ function validateInput(input) {
126
+ if (typeof input.text !== 'string') {
127
+ throw new IndexInputError('text must be a string');
128
+ }
129
+ }
130
+ // ---------------------------------------------------------------------------
131
+ // Phase 5 helper — known-document semantic validation. Thrown errors
132
+ // trigger the catch path's fresh-tx 'failed' marker write so direct
133
+ // SDK callers don't leave a stuck pending row.
134
+ // ---------------------------------------------------------------------------
135
+ function semanticValidate(documentId, text) {
136
+ if (text.trim().length === 0) {
137
+ throw new IndexSemanticValidationError(documentId, 'extraction_empty', 'text must contain non-whitespace content');
138
+ }
139
+ if (Buffer.byteLength(text, 'utf8') > MAX_INDEX_TEXT_BYTES) {
140
+ throw new IndexSemanticValidationError(documentId, 'index_text_too_large', `text exceeds max size of ${MAX_INDEX_TEXT_BYTES} bytes (utf-8)`);
141
+ }
142
+ }
143
+ async function prepareChunks(input) {
144
+ const chunks = chunkText(input.text, input.chunkOptions);
145
+ if (chunks.length === 0)
146
+ return { chunks, embeddings: [] };
147
+ const embeddings = await embedTexts(chunks.map((c) => c.content), 'document');
148
+ return { chunks, embeddings };
149
+ }
150
+ // ---------------------------------------------------------------------------
151
+ // Phase 2-6 orchestration. Single BEGIN/COMMIT, advisory-lock-serialized
152
+ // per document. Catch-path writes durable `semantic_index_status='failed'`
153
+ // from a fresh statement after ROLLBACK so the failure is observable
154
+ // even though the in-tx 'running' marker is reverted.
155
+ // ---------------------------------------------------------------------------
156
+ async function runIndexFlow(pool, input, newHash) {
157
+ const client = await pool.connect();
158
+ let knownDocument = false;
159
+ let caughtError = null;
160
+ try {
161
+ await client.query('BEGIN');
162
+ await acquirePerDocumentLock(client, input.documentId);
163
+ const loaded = await getDocumentWithSourceSite(client, input.userId, input.documentId);
164
+ if (!loaded) {
165
+ // Phase 2 — 404 short-circuit; no row to mark failed. ROLLBACK
166
+ // happens in the outer catch.
167
+ throw new DocumentNotFoundError(input.documentId);
168
+ }
169
+ knownDocument = true;
170
+ const { document, sourceSite } = loaded;
171
+ // Phase 3 — idempotent / re-index short-circuits BEFORE marking
172
+ // running. complete + same-hash is a no-op; complete + different
173
+ // hash falls through to the conditional UPDATE below.
174
+ const idempotent = await maybeIdempotentSkip(client, document, newHash);
175
+ if (idempotent) {
176
+ await client.query('COMMIT');
177
+ return idempotent;
178
+ }
179
+ // Phase 4 — atomic conditional UPDATE → 'running'.
180
+ const casOk = await tryAdvanceToRunning(client, document.userId, document.id);
181
+ if (!casOk) {
182
+ // Re-load the row to surface the current state in the 409.
183
+ // Cannot fail the row from this branch — `'running'` belongs to
184
+ // another writer; clobbering its status would corrupt their flow.
185
+ const stale = await getDocumentWithSourceSite(client, input.userId, input.documentId);
186
+ const currentStatus = stale?.document.semanticIndexStatus ?? document.semanticIndexStatus;
187
+ throw new IndexInvalidStateError(input.documentId, currentStatus);
188
+ }
189
+ // Phase 5 — known-document semantic validation. Throws here trigger
190
+ // the outer catch, which writes durable 'failed' from a fresh tx.
191
+ semanticValidate(input.documentId, input.text);
192
+ // Phase 6 — prepare chunks (chunk + embed) + persist + mark complete.
193
+ const prepared = await prepareChunks(input);
194
+ const result = await applyIndexInsideTx(client, document, sourceSite, newHash, prepared);
195
+ await client.query('COMMIT');
196
+ return result;
197
+ }
198
+ catch (err) {
199
+ caughtError = err;
200
+ await client.query('ROLLBACK').catch(() => undefined);
201
+ }
202
+ finally {
203
+ client.release();
204
+ }
205
+ // Marker writes run AFTER `client.release()` so the
206
+ // single-connection test pool can hand the connection out to
207
+ // the marker's `pool.query` (see `db/pool.ts` — pgvector HNSW
208
+ // index serialization requires `max=1` in tests; a marker call
209
+ // with the original client still checked out would deadlock).
210
+ if (knownDocument) {
211
+ await markIndexerFailureBestEffort(pool, input.userId, input.documentId, caughtError);
212
+ }
213
+ throw caughtError;
214
+ }
215
+ /**
216
+ * Per-document serialization. `pg_advisory_xact_lock` releases at COMMIT
217
+ * or ROLLBACK; `hashtext($uuid)` collapses the UUID to a 32-bit key (good
218
+ * enough for per-document mutex granularity).
219
+ */
220
+ async function acquirePerDocumentLock(client, documentId) {
221
+ await client.query('SELECT pg_advisory_xact_lock(hashtext($1))', [documentId]);
222
+ }
223
+ /**
224
+ * Phase 4 — atomic CAS that flips the row to `'running'` only when
225
+ * its current `semantic_index_status` is in the allowed start set.
226
+ * Returns true on success (one row updated); false when the row is
227
+ * `'running'` (concurrent writer), `'not_required'`, or vanished
228
+ * between the load above and this UPDATE.
229
+ *
230
+ * Note that `'complete'` is in the allowed set: a re-index with new
231
+ * content has already short-circuited the same-hash idempotent skip
232
+ * above and lands here intentionally so chunks can be replaced.
233
+ */
234
+ async function tryAdvanceToRunning(client, userId, documentId) {
235
+ const result = await client.query(`UPDATE raw_documents
236
+ SET semantic_index_status = 'running',
237
+ updated_at = NOW()
238
+ WHERE id = $1
239
+ AND user_id = $2
240
+ AND deleted_at IS NULL
241
+ AND semantic_index_status IN ('pending', 'failed', 'stale', 'complete')
242
+ RETURNING id`, [documentId, userId]);
243
+ return (result.rowCount ?? 0) > 0;
244
+ }
245
+ /**
246
+ * Phase 3 helper — return an idempotent-skip result when the row's
247
+ * recorded `indexed_content_hash` matches the incoming text and at
248
+ * least one active chunk row exists for the current chunker_version.
249
+ * Returns null when the caller should fall through to the running
250
+ * CAS + chunk replacement path.
251
+ */
252
+ async function maybeIdempotentSkip(client, document, newHash) {
253
+ if (document.indexedContentHash !== newHash)
254
+ return null;
255
+ const existingChunkCount = await countActiveChunksForDocument(client, document.id, PHASE2_CHUNKER_VERSION);
256
+ if (existingChunkCount === 0)
257
+ return null;
258
+ return idempotentResult(document, newHash);
259
+ }
260
+ /**
261
+ * Phase 6 — persist a fresh chunk + memory generation, mark
262
+ * `semantic_index_status='complete'`, clear the semantic-index
263
+ * `last_error`, and update `indexed_content_hash`. Runs inside the
264
+ * caller's transaction; the COMMIT happens in `runIndexFlow`.
265
+ */
266
+ async function applyIndexInsideTx(client, document, sourceSite, newHash, prepared) {
267
+ await clearPriorGeneration(client, document.userId, document.id);
268
+ if (prepared.chunks.length === 0) {
269
+ await setRawDocumentIndexedHashWithClient(client, document.userId, document.id, newHash);
270
+ await markSemanticIndexStatus({
271
+ q: client,
272
+ userId: document.userId,
273
+ documentId: document.id,
274
+ status: 'complete',
275
+ });
276
+ return makeResult(document, newHash, /* chunksCreated */ 0, /* memoriesCreated */ 0, false);
277
+ }
278
+ const chunkRows = await insertChunkRows(client, document, prepared);
279
+ const memoriesCreated = await materializeMemories(client, document, sourceSite, chunkRows);
280
+ await setRawDocumentIndexedHashWithClient(client, document.userId, document.id, newHash);
281
+ await markSemanticIndexStatus({
282
+ q: client,
283
+ userId: document.userId,
284
+ documentId: document.id,
285
+ status: 'complete',
286
+ });
287
+ return makeResult(document, newHash, chunkRows.length, memoriesCreated, false);
288
+ }
289
+ /**
290
+ * Best-effort `semantic_index_status='failed'` marker run on a fresh
291
+ * pool statement after the indexer transaction has rolled back.
292
+ * Skipped for `IndexInvalidStateError` (the row is owned by another
293
+ * writer or in a state we shouldn't clobber).
294
+ */
295
+ async function markIndexerFailureBestEffort(pool, userId, documentId, err) {
296
+ if (err instanceof IndexInvalidStateError)
297
+ return;
298
+ const code = classifyIndexerFailure(err);
299
+ const message = err instanceof Error ? err.message : String(err);
300
+ try {
301
+ await markSemanticIndexStatus({
302
+ q: pool,
303
+ userId,
304
+ documentId,
305
+ status: 'failed',
306
+ lastError: buildLastError('semantic_index', code, message),
307
+ });
308
+ }
309
+ catch (markerErr) {
310
+ console.error(`[document-indexer] semantic_index_status=failed marker write failed for documentId=${documentId}:`, markerErr);
311
+ }
312
+ }
313
+ function classifyIndexerFailure(err) {
314
+ if (err instanceof IndexSemanticValidationError)
315
+ return err.code;
316
+ return 'unknown';
317
+ }
318
+ async function clearPriorGeneration(client, userId, documentId) {
319
+ await softDeleteChunksForDocument(client, userId, documentId);
320
+ await softDeleteMemoriesForDocument(client, userId, documentId);
321
+ }
322
+ async function insertChunkRows(client, document, prepared) {
323
+ const inputs = prepared.chunks.map((chunk, i) => ({
324
+ userId: document.userId,
325
+ rawDocumentId: document.id,
326
+ chunkIndex: chunk.chunkIndex,
327
+ content: chunk.content,
328
+ contentHash: chunk.contentHash,
329
+ charStart: chunk.charStart,
330
+ charEnd: chunk.charEnd,
331
+ tokenCount: chunk.tokenCount,
332
+ embedding: prepared.embeddings[i],
333
+ parserVersion: PHASE2_PARSER_VERSION,
334
+ chunkerVersion: PHASE2_CHUNKER_VERSION,
335
+ }));
336
+ return insertDocumentChunks(client, inputs);
337
+ }
338
+ async function materializeMemories(client, document, sourceSite, chunkRows) {
339
+ // Phase 4: thread the document's display metadata onto every derived
340
+ // memory so downstream consumers (e.g. webapp Context Passport) can
341
+ // render filename/mime without re-joining `raw_documents`. The
342
+ // raw-document `metadata` JSONB is the document-author payload (e.g.
343
+ // `{ filename, mimeType }` from the webapp upload route) — copied as
344
+ // a base, then overlaid with first-class display columns
345
+ // (`display_name`, `mime_type`) so the column values always win on
346
+ // conflict. `type='user-context'` matches the existing
347
+ // `storeContext` shape used by `/api/context/text` so passport
348
+ // grouping logic stays uniform across both write paths.
349
+ const documentMetadata = buildDocumentLevelMetadata(document);
350
+ let count = 0;
351
+ for (const chunk of chunkRows) {
352
+ await storeMemoryWithClient(client, {
353
+ userId: chunk.userId,
354
+ content: chunk.content,
355
+ embedding: chunk.embedding,
356
+ importance: 0.5,
357
+ sourceSite,
358
+ sourceUrl: document.externalUri ?? '',
359
+ rawDocumentId: chunk.rawDocumentId,
360
+ documentChunkId: chunk.id,
361
+ // Stamp the per-chunk id alongside the document-level fields so
362
+ // SDK consumers that only read `memory.metadata` (rather than
363
+ // the typed `raw_document_id` / `document_chunk_id` columns) can
364
+ // still resolve provenance per chunk. Typed columns remain the
365
+ // source of truth.
366
+ metadata: { ...documentMetadata, document_chunk_id: chunk.id },
367
+ });
368
+ count++;
369
+ }
370
+ return count;
371
+ }
372
+ /**
373
+ * Compose the document-level metadata fields attached to every
374
+ * chunk-derived memory. Document-supplied `metadata` is the base;
375
+ * first-class display fields (`display_name`, `mime_type`) override
376
+ * on conflict so a malicious metadata payload cannot spoof the row's
377
+ * real filename. `type='user-context'` defaults when the document
378
+ * didn't declare one. The per-chunk `document_chunk_id` is added by
379
+ * the caller so this helper stays document-scoped.
380
+ */
381
+ function buildDocumentLevelMetadata(document) {
382
+ const base = { ...(document.metadata ?? {}) };
383
+ if (document.displayName)
384
+ base.filename = document.displayName;
385
+ if (document.mimeType)
386
+ base.mimeType = document.mimeType;
387
+ if (base.type === undefined)
388
+ base.type = 'user-context';
389
+ base.raw_document_id = document.id;
390
+ return base;
391
+ }
392
+ function idempotentResult(document, newHash) {
393
+ return makeResult(document, newHash, 0, 0, /* idempotentSkip */ true);
394
+ }
395
+ function makeResult(document, indexedContentHash, chunksCreated, memoriesCreated, idempotentSkip) {
396
+ return {
397
+ documentId: document.id,
398
+ indexedContentHash,
399
+ chunksCreated,
400
+ memoriesCreated,
401
+ idempotentSkip,
402
+ chunkerVersion: PHASE2_CHUNKER_VERSION,
403
+ parserVersion: PHASE2_PARSER_VERSION,
404
+ };
405
+ }
@@ -0,0 +1,245 @@
1
+ /**
2
+ * Document service — Phases 1 and 2 of the large-file ingestion plan.
3
+ *
4
+ * Phase 1: pointer-only registry (register / get / list / delete).
5
+ * Phase 2: text indexing — `indexText` chunks supplied text, embeds
6
+ * the chunks via the existing core embedding stack, persists chunks to
7
+ * `document_chunks`, and writes one provenance-linked memory per chunk
8
+ * so `/v1/memories/search` can retrieve them. Implementation lives in
9
+ * `document-indexer.ts`; this service is a thin facade.
10
+ *
11
+ * Schema validation lives in `src/schemas/documents.ts`; this service
12
+ * trusts the validated input shape and only enforces the Phase 1
13
+ * storage-mode invariant defensively (so direct in-process callers
14
+ * can't bypass the Zod gate). No managed blob storage, no fact
15
+ * extraction — those are Phase 3+.
16
+ *
17
+ * See `Atomicmemory-research/docs/core-repo/design/large-file-ingestion-and-raw-storage-plan-2026-05-08.md`.
18
+ */
19
+ import type pg from 'pg';
20
+ import { type DocumentRecoveryStatusFilter, type ListDocumentsForUserResult } from '../db/document-list-repository.js';
21
+ import { type ListPassportFeedResult } from '../db/passport-feed-repository.js';
22
+ import { type RawContentStoreRegistry } from '../storage/store-registry.js';
23
+ import type { ListRawDocumentsInput, RawDocumentRow, RawStorageMode } from '../db/raw-document-types.js';
24
+ import { type IndexDocumentInput, type IndexDocumentResult } from './document-indexer.js';
25
+ import { type UploadRawInput, type UploadRawResult } from './document-upload.js';
26
+ import { type MarkerInput, type MarkerResult } from './document-failure-markers.js';
27
+ import type { UploadConfig } from './upload-config.js';
28
+ import type { RawContentStore } from '../storage/raw-content-store.js';
29
+ import type { RawContentCodec } from '../storage/raw-content-codec.js';
30
+ import type { ExtractionErrorCode, IndexErrorCode } from '../schemas/documents.js';
31
+ /**
32
+ * Inputs to `register`. Mirrors the camelCase shape produced by
33
+ * `RegisterDocumentBodySchema.transform()`.
34
+ */
35
+ export interface RegisterDocumentInput {
36
+ userId: string;
37
+ sourceSite: string;
38
+ provider: string;
39
+ externalId: string;
40
+ accountId: string | null;
41
+ externalUri: string | null;
42
+ displayName: string | null;
43
+ mimeType: string | null;
44
+ sizeBytes: number | null;
45
+ contentHash: string | null;
46
+ providerVersion: string | null;
47
+ sourceModifiedAt: Date | null;
48
+ storageMode: RawStorageMode;
49
+ retentionPolicy: Record<string, unknown>;
50
+ consentPolicy: Record<string, unknown>;
51
+ metadata: Record<string, unknown>;
52
+ /**
53
+ * Phase B — optional restricted-initial-state status fields. Default
54
+ * to `'not_required'` at the repository layer when omitted, matching
55
+ * the column defaults. Service-owned values (`'running'` /
56
+ * `'complete'` / `'failed'`) are blocked at the schema layer.
57
+ */
58
+ extractionStatus?: 'pending' | 'not_required' | 'unsupported';
59
+ semanticIndexStatus?: 'pending' | 'not_required';
60
+ }
61
+ export interface RegisterDocumentResult {
62
+ document: RawDocumentRow;
63
+ created: boolean;
64
+ }
65
+ export interface DeleteDocumentResult {
66
+ success: true;
67
+ alreadyDeleted: boolean;
68
+ }
69
+ /** Optional dependencies injected at composition time. */
70
+ export interface DocumentServiceOptions {
71
+ /** Phase 3 raw-content adapter. `null` when `rawStorageMode='pointer_only'`. */
72
+ rawContentStore?: RawContentStore | null;
73
+ /**
74
+ * Phase 4a registry that dispatches cleanup by per-row provider.
75
+ * Defaults to a single-store registry wrapping `rawContentStore`
76
+ * (the pre-Phase-4a behavior); composition-root code passes a
77
+ * multi-provider registry when `RAW_STORAGE_LEGACY_PROVIDERS` is
78
+ * set.
79
+ */
80
+ storeRegistry?: RawContentStoreRegistry;
81
+ /**
82
+ * Phase 5 content codec injected around `store.put()`/`get()`.
83
+ * Defaults to the noop codec for backcompat with pre-Phase-5 test
84
+ * contexts; production composition supplies the codec configured
85
+ * by `RAW_CONTENT_CODEC` (noop or aes_gcm).
86
+ */
87
+ codec?: RawContentCodec;
88
+ /**
89
+ * Subset of RuntimeConfig the service needs at runtime. Modeled
90
+ * as a discriminated union so a pointer-only deployment cannot
91
+ * leak a placeholder secret into service code: pointer_only
92
+ * carries NO `storageKeyHmacSecret` (it's never derived); only
93
+ * the managed_blob variant requires the HMAC secret.
94
+ */
95
+ config?: UploadConfig;
96
+ }
97
+ /**
98
+ * Document service. Phase 1 covers register/get/list/delete; Phase 2
99
+ * adds `indexText`; Phase 3 adds `uploadRaw` (managed-blob storage).
100
+ * The Phase-3 dependencies (`rawContentStore`, raw-storage config) are
101
+ * optional so existing test contexts that only need pointer-only
102
+ * registration don't have to thread the new wiring.
103
+ */
104
+ export declare class DocumentService {
105
+ private readonly pool;
106
+ private readonly rawContentStore;
107
+ private readonly storeRegistry;
108
+ private readonly codec;
109
+ private readonly uploadConfig;
110
+ constructor(pool: pg.Pool, options?: DocumentServiceOptions);
111
+ /**
112
+ * Per-row provider dispatch registry. Exposed so the route layer's
113
+ * Phase 7a formatters can resolve `delete_semantics` from each
114
+ * row's `storage_provider` without rebuilding the registry. The
115
+ * registry is read-only at the route boundary.
116
+ */
117
+ getStoreRegistry(): RawContentStoreRegistry;
118
+ /**
119
+ * Idempotently register a document pointer. Looks up (or inserts) the
120
+ * matching `raw_sources` row, then registers the document.
121
+ *
122
+ * Returns `{ document, created }` where `created = false` when an
123
+ * active row already existed for the (user, source, external_id,
124
+ * provider_version) namespace (route handler maps that to a 200; new
125
+ * inserts map to 201).
126
+ */
127
+ register(input: RegisterDocumentInput): Promise<RegisterDocumentResult>;
128
+ /** Fetch one active document by id; null when missing/deleted/cross-user. */
129
+ get(userId: string, id: string): Promise<RawDocumentRow | null>;
130
+ /** List active documents for a user, optionally filtered by source_site. */
131
+ list(input: ListRawDocumentsInput): Promise<RawDocumentRow[]>;
132
+ /**
133
+ * Phase D — cursor-paginated user-scoped document list with optional
134
+ * recovery-status bucket filter. Distinct from {@link list} which uses
135
+ * offset/limit + source_site filter (kept for backwards
136
+ * compatibility with `GET /v1/documents/list`). The route layer
137
+ * decodes the opaque `cursor` query param via `decodeListCursor`
138
+ * BEFORE calling this method, so a malformed cursor surfaces as 400
139
+ * upstream rather than a 500 from the SQL layer.
140
+ */
141
+ listForUser(input: ListForUserServiceInput): Promise<ListDocumentsForUserResult>;
142
+ /**
143
+ * Phase D — list active documents WITHOUT non-deleted memories,
144
+ * narrowed by the layer-aware recovery filter. Backs the
145
+ * passport server-side merge document-only stream and the
146
+ * `GET /v1/documents/without-memories` endpoint.
147
+ */
148
+ listWithoutMemoriesForUser(input: ListWithoutMemoriesServiceInput): Promise<ListDocumentsForUserResult>;
149
+ /**
150
+ * Phase D — passport feed (data-layer grouped query). Backs
151
+ * `GET /v1/documents/passport-feed`. The webapp's
152
+ * `/api/context/passport` route consumes this as the memory-feed
153
+ * stream of its server-side two-stream merge.
154
+ *
155
+ * Cursor decoding mirrors the other Phase D list facades; a
156
+ * malformed cursor (incl. structurally-valid-but-non-server
157
+ * sortAt) throws `InvalidDocumentListCursorError` so the route
158
+ * layer maps it to 400 invalid_cursor.
159
+ */
160
+ listPassportFeed(input: ListPassportFeedServiceInput): Promise<ListPassportFeedResult>;
161
+ /**
162
+ * Soft-delete one document together with its Phase 2 derived chunks
163
+ * and provenance-linked memories, in one transaction with a per-doc
164
+ * advisory lock. `alreadyDeleted = true` when the row was missing or
165
+ * previously tombstoned — keeps DELETE idempotent.
166
+ */
167
+ delete(userId: string, id: string): Promise<DeleteDocumentResult>;
168
+ /**
169
+ * Run cleanup against `blobs`; on failure mark each failing row
170
+ * `raw_storage_failed` and throw, on success flip the rows to the
171
+ * terminal `blob_deleted` state so a future retry of `DELETE
172
+ * /v1/documents/:id` short-circuits cleanly.
173
+ */
174
+ private runBlobCleanupOrThrow;
175
+ /**
176
+ * Phase 2 indexing entry point. Idempotent on byte-identical text
177
+ * + current `chunker_version`; otherwise re-chunks (soft-deleting
178
+ * the prior generation of chunks + derived memories first). The
179
+ * heavy lifting lives in `document-indexer.ts`.
180
+ */
181
+ indexText(input: IndexDocumentInput): Promise<IndexDocumentResult>;
182
+ /**
183
+ * Phase 3 managed-blob upload. Throws `ManagedStorageDisabledError`
184
+ * (→ 503) when the deployment runs `rawStorageMode='pointer_only'`,
185
+ * `UploadDocumentNotFoundError` (→ 404) when the document is missing
186
+ * or owned by a different user. Idempotent on byte-identical input.
187
+ */
188
+ uploadRaw(input: UploadRawInput): Promise<UploadRawResult>;
189
+ /**
190
+ * Phase C constrained extraction-layer transition. See
191
+ * `services/document-failure-markers.ts` for the full state-machine
192
+ * docstring; this is a thin facade so route handlers can call into
193
+ * the service the same way they do for `register` / `indexText` /
194
+ * `uploadRaw`.
195
+ */
196
+ markExtractionFailure(input: MarkerInput<ExtractionErrorCode>): Promise<MarkerResult>;
197
+ /**
198
+ * Phase C constrained semantic-index-layer transition.
199
+ */
200
+ markIndexFailure(input: MarkerInput<IndexErrorCode>): Promise<MarkerResult>;
201
+ }
202
+ /**
203
+ * Phase D — input shape for {@link DocumentService.listForUser}.
204
+ * Mirrors the camelCase output of `DocumentListRootQuerySchema` plus
205
+ * the `statusFilter` keying the data-layer filter. The cursor is the
206
+ * opaque base64 string the route received; service-level decoding
207
+ * lets us emit a clean 400 on malformed cursors via
208
+ * `InvalidDocumentListCursorError`.
209
+ */
210
+ export interface ListForUserServiceInput {
211
+ userId: string;
212
+ limit?: number;
213
+ cursor?: string;
214
+ statusFilter?: 'failed' | 'unsupported' | 'pending' | 'all';
215
+ }
216
+ /**
217
+ * Phase D — input shape for {@link DocumentService.listWithoutMemoriesForUser}.
218
+ * Mirrors the camelCase output of `ListDocumentsWithoutMemoriesQuerySchema`.
219
+ * `statusFilter` is the optional layer-aware override; the repository
220
+ * applies the rev-18 recovery default when undefined.
221
+ */
222
+ export interface ListWithoutMemoriesServiceInput {
223
+ userId: string;
224
+ limit?: number;
225
+ cursor?: string;
226
+ statusFilter?: DocumentRecoveryStatusFilter;
227
+ }
228
+ /**
229
+ * Phase D — input shape for {@link DocumentService.listPassportFeed}.
230
+ * Mirrors the camelCase output of `PassportFeedQuerySchema` from
231
+ * `schemas/document-list-schemas.ts`.
232
+ */
233
+ export interface ListPassportFeedServiceInput {
234
+ userId: string;
235
+ limit?: number;
236
+ cursor?: string;
237
+ }
238
+ /**
239
+ * Phase D — sentinel thrown by {@link DocumentService.listForUser}
240
+ * when the supplied opaque cursor is malformed. Route handlers map
241
+ * this to 400 with `error: 'invalid_cursor'`.
242
+ */
243
+ export declare class InvalidDocumentListCursorError extends Error {
244
+ constructor();
245
+ }