@atomicmemory/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/LICENSE +201 -0
- package/README.md +314 -0
- package/dist/app/bind-ephemeral.d.ts +18 -0
- package/dist/app/bind-ephemeral.js +22 -0
- package/dist/app/cors-headers.d.ts +12 -0
- package/dist/app/cors-headers.js +18 -0
- package/dist/app/create-app.d.ts +25 -0
- package/dist/app/create-app.js +156 -0
- package/dist/app/runtime-config-route-snapshot.d.ts +27 -0
- package/dist/app/runtime-config-route-snapshot.js +27 -0
- package/dist/app/runtime-container.d.ts +281 -0
- package/dist/app/runtime-container.js +297 -0
- package/dist/app/startup-checks.d.ts +28 -0
- package/dist/app/startup-checks.js +45 -0
- package/dist/bin.d.ts +17 -0
- package/dist/bin.js +128 -0
- package/dist/config.d.ts +680 -0
- package/dist/config.js +808 -0
- package/dist/db/agent-trust-repository.d.ts +49 -0
- package/dist/db/agent-trust-repository.js +66 -0
- package/dist/db/belief-edges-repository.d.ts +68 -0
- package/dist/db/belief-edges-repository.js +124 -0
- package/dist/db/claim-repository.d.ts +6 -0
- package/dist/db/claim-repository.js +4 -0
- package/dist/db/contradictions-repository.d.ts +56 -0
- package/dist/db/contradictions-repository.js +88 -0
- package/dist/db/document-chunk-repository.d.ts +48 -0
- package/dist/db/document-chunk-repository.js +145 -0
- package/dist/db/document-chunk-types.d.ts +35 -0
- package/dist/db/document-chunk-types.js +9 -0
- package/dist/db/document-list-cursor.d.ts +45 -0
- package/dist/db/document-list-cursor.js +111 -0
- package/dist/db/document-list-repository.d.ts +103 -0
- package/dist/db/document-list-repository.js +204 -0
- package/dist/db/entity-cards-repository.d.ts +37 -0
- package/dist/db/entity-cards-repository.js +46 -0
- package/dist/db/entity-values-repository.d.ts +26 -0
- package/dist/db/entity-values-repository.js +57 -0
- package/dist/db/link-repository.d.ts +30 -0
- package/dist/db/link-repository.js +54 -0
- package/dist/db/memory-repository.d.ts +163 -0
- package/dist/db/memory-repository.js +232 -0
- package/dist/db/migrate.d.ts +6 -0
- package/dist/db/migrate.js +36 -0
- package/dist/db/mmr.d.ts +14 -0
- package/dist/db/mmr.js +57 -0
- package/dist/db/passport-feed-repository.d.ts +91 -0
- package/dist/db/passport-feed-repository.js +198 -0
- package/dist/db/pg-episode-store.d.ts +19 -0
- package/dist/db/pg-episode-store.js +17 -0
- package/dist/db/pg-link-store.d.ts +17 -0
- package/dist/db/pg-link-store.js +14 -0
- package/dist/db/pg-memory-store.d.ts +68 -0
- package/dist/db/pg-memory-store.js +53 -0
- package/dist/db/pg-recap-store.d.ts +13 -0
- package/dist/db/pg-recap-store.js +19 -0
- package/dist/db/pg-representation-store.d.ts +17 -0
- package/dist/db/pg-representation-store.js +17 -0
- package/dist/db/pg-search-store.d.ts +29 -0
- package/dist/db/pg-search-store.js +47 -0
- package/dist/db/pool.d.ts +5 -0
- package/dist/db/pool.js +21 -0
- package/dist/db/ppr.d.ts +56 -0
- package/dist/db/ppr.js +178 -0
- package/dist/db/query-helpers.d.ts +44 -0
- package/dist/db/query-helpers.js +60 -0
- package/dist/db/raw-doc-artifact-sync.d.ts +128 -0
- package/dist/db/raw-doc-artifact-sync.js +259 -0
- package/dist/db/raw-document-blob-repository.d.ts +148 -0
- package/dist/db/raw-document-blob-repository.js +300 -0
- package/dist/db/raw-document-repository.d.ts +104 -0
- package/dist/db/raw-document-repository.js +410 -0
- package/dist/db/raw-document-status-repository.d.ts +122 -0
- package/dist/db/raw-document-status-repository.js +183 -0
- package/dist/db/raw-document-types.d.ts +236 -0
- package/dist/db/raw-document-types.js +10 -0
- package/dist/db/raw-storage-reconciliation-repository.d.ts +110 -0
- package/dist/db/raw-storage-reconciliation-repository.js +200 -0
- package/dist/db/reflection-jobs-repository.d.ts +33 -0
- package/dist/db/reflection-jobs-repository.js +48 -0
- package/dist/db/reflections-repository.d.ts +41 -0
- package/dist/db/reflections-repository.js +83 -0
- package/dist/db/repository-claims.d.ts +141 -0
- package/dist/db/repository-claims.js +376 -0
- package/dist/db/repository-deferred-audn.d.ts +33 -0
- package/dist/db/repository-deferred-audn.js +69 -0
- package/dist/db/repository-document-delete.d.ts +53 -0
- package/dist/db/repository-document-delete.js +156 -0
- package/dist/db/repository-entities.d.ts +114 -0
- package/dist/db/repository-entities.js +317 -0
- package/dist/db/repository-entity-attributes.d.ts +41 -0
- package/dist/db/repository-entity-attributes.js +65 -0
- package/dist/db/repository-entity-graph.d.ts +32 -0
- package/dist/db/repository-entity-graph.js +87 -0
- package/dist/db/repository-first-mentions.d.ts +41 -0
- package/dist/db/repository-first-mentions.js +79 -0
- package/dist/db/repository-lessons.d.ts +51 -0
- package/dist/db/repository-lessons.js +90 -0
- package/dist/db/repository-links.d.ts +26 -0
- package/dist/db/repository-links.js +105 -0
- package/dist/db/repository-observation.d.ts +26 -0
- package/dist/db/repository-observation.js +51 -0
- package/dist/db/repository-read.d.ts +56 -0
- package/dist/db/repository-read.js +271 -0
- package/dist/db/repository-recaps.d.ts +59 -0
- package/dist/db/repository-recaps.js +158 -0
- package/dist/db/repository-representations.d.ts +48 -0
- package/dist/db/repository-representations.js +162 -0
- package/dist/db/repository-temporal-state.d.ts +35 -0
- package/dist/db/repository-temporal-state.js +46 -0
- package/dist/db/repository-tll.d.ts +88 -0
- package/dist/db/repository-tll.js +179 -0
- package/dist/db/repository-types.d.ts +313 -0
- package/dist/db/repository-types.js +142 -0
- package/dist/db/repository-user-profiles.d.ts +17 -0
- package/dist/db/repository-user-profiles.js +28 -0
- package/dist/db/repository-vector-search.d.ts +33 -0
- package/dist/db/repository-vector-search.js +373 -0
- package/dist/db/repository-wipe.d.ts +34 -0
- package/dist/db/repository-wipe.js +94 -0
- package/dist/db/repository-write.d.ts +61 -0
- package/dist/db/repository-write.js +279 -0
- package/dist/db/schema.sql +1355 -0
- package/dist/db/storage-artifact-delete-tx.d.ts +56 -0
- package/dist/db/storage-artifact-delete-tx.js +123 -0
- package/dist/db/storage-artifact-providers.d.ts +21 -0
- package/dist/db/storage-artifact-providers.js +21 -0
- package/dist/db/storage-artifact-recovery-repository.d.ts +66 -0
- package/dist/db/storage-artifact-recovery-repository.js +58 -0
- package/dist/db/storage-artifact-repository.d.ts +329 -0
- package/dist/db/storage-artifact-repository.js +497 -0
- package/dist/db/stores.d.ts +220 -0
- package/dist/db/stores.js +12 -0
- package/dist/db/summaries-repository.d.ts +74 -0
- package/dist/db/summaries-repository.js +125 -0
- package/dist/eval/beam-10m-loader.d.ts +98 -0
- package/dist/eval/beam-10m-loader.js +128 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +17 -0
- package/dist/middleware/require-bearer.d.ts +27 -0
- package/dist/middleware/require-bearer.js +60 -0
- package/dist/middleware/validate-response.d.ts +33 -0
- package/dist/middleware/validate-response.js +55 -0
- package/dist/middleware/validate.d.ts +43 -0
- package/dist/middleware/validate.js +85 -0
- package/dist/routes/agents.d.ts +13 -0
- package/dist/routes/agents.js +89 -0
- package/dist/routes/document-response-formatters.d.ts +98 -0
- package/dist/routes/document-response-formatters.js +243 -0
- package/dist/routes/documents.d.ts +74 -0
- package/dist/routes/documents.js +425 -0
- package/dist/routes/memories.d.ts +29 -0
- package/dist/routes/memories.js +725 -0
- package/dist/routes/memory-response-formatters.d.ts +179 -0
- package/dist/routes/memory-response-formatters.js +210 -0
- package/dist/routes/public-raw-storage-metadata.d.ts +54 -0
- package/dist/routes/public-raw-storage-metadata.js +56 -0
- package/dist/routes/reflect.d.ts +14 -0
- package/dist/routes/reflect.js +19 -0
- package/dist/routes/response-schema-map.d.ts +14 -0
- package/dist/routes/response-schema-map.js +69 -0
- package/dist/routes/route-errors.d.ts +12 -0
- package/dist/routes/route-errors.js +30 -0
- package/dist/routes/storage-error-handlers.d.ts +34 -0
- package/dist/routes/storage-error-handlers.js +185 -0
- package/dist/routes/storage-response-formatters.d.ts +44 -0
- package/dist/routes/storage-response-formatters.js +155 -0
- package/dist/routes/storage.d.ts +38 -0
- package/dist/routes/storage.js +369 -0
- package/dist/routes/upstream-provider-errors.d.ts +19 -0
- package/dist/routes/upstream-provider-errors.js +95 -0
- package/dist/schemas/agents.d.ts +79 -0
- package/dist/schemas/agents.js +126 -0
- package/dist/schemas/common.d.ts +110 -0
- package/dist/schemas/common.js +190 -0
- package/dist/schemas/document-list-responses.d.ts +102 -0
- package/dist/schemas/document-list-responses.js +87 -0
- package/dist/schemas/document-list-schemas.d.ts +123 -0
- package/dist/schemas/document-list-schemas.js +174 -0
- package/dist/schemas/document-response-schemas.d.ts +610 -0
- package/dist/schemas/document-response-schemas.js +264 -0
- package/dist/schemas/document-status-envelope.d.ts +48 -0
- package/dist/schemas/document-status-envelope.js +54 -0
- package/dist/schemas/documents.d.ts +292 -0
- package/dist/schemas/documents.js +449 -0
- package/dist/schemas/errors.d.ts +75 -0
- package/dist/schemas/errors.js +105 -0
- package/dist/schemas/memories.d.ts +378 -0
- package/dist/schemas/memories.js +542 -0
- package/dist/schemas/openapi.d.ts +24 -0
- package/dist/schemas/openapi.js +1038 -0
- package/dist/schemas/response-scalars.d.ts +10 -0
- package/dist/schemas/response-scalars.js +10 -0
- package/dist/schemas/responses.d.ts +536 -0
- package/dist/schemas/responses.js +350 -0
- package/dist/schemas/search-response-parts.d.ts +97 -0
- package/dist/schemas/search-response-parts.js +103 -0
- package/dist/schemas/storage-schemas.d.ts +175 -0
- package/dist/schemas/storage-schemas.js +277 -0
- package/dist/schemas/zod-setup.d.ts +15 -0
- package/dist/schemas/zod-setup.js +17 -0
- package/dist/server.d.ts +13 -0
- package/dist/server.js +57 -0
- package/dist/services/abstract-query-policy.d.ts +13 -0
- package/dist/services/abstract-query-policy.js +50 -0
- package/dist/services/affinity-clustering.d.ts +66 -0
- package/dist/services/affinity-clustering.js +125 -0
- package/dist/services/agentic-retrieval.d.ts +38 -0
- package/dist/services/agentic-retrieval.js +126 -0
- package/dist/services/answer-format.d.ts +56 -0
- package/dist/services/answer-format.js +118 -0
- package/dist/services/answer-rescue.d.ts +72 -0
- package/dist/services/answer-rescue.js +177 -0
- package/dist/services/answer-verifier.d.ts +24 -0
- package/dist/services/answer-verifier.js +73 -0
- package/dist/services/api-retry.d.ts +6 -0
- package/dist/services/api-retry.js +41 -0
- package/dist/services/assistant-turn-filter.d.ts +20 -0
- package/dist/services/assistant-turn-filter.js +69 -0
- package/dist/services/atomicmem-uri.d.ts +33 -0
- package/dist/services/atomicmem-uri.js +86 -0
- package/dist/services/audit-events.d.ts +54 -0
- package/dist/services/audit-events.js +56 -0
- package/dist/services/chunked-extraction.d.ts +21 -0
- package/dist/services/chunked-extraction.js +108 -0
- package/dist/services/claim-slotting.d.ts +27 -0
- package/dist/services/claim-slotting.js +38 -0
- package/dist/services/claude-code-llm.d.ts +19 -0
- package/dist/services/claude-code-llm.js +96 -0
- package/dist/services/composite-dedup.d.ts +50 -0
- package/dist/services/composite-dedup.js +153 -0
- package/dist/services/composite-grouping.d.ts +41 -0
- package/dist/services/composite-grouping.js +111 -0
- package/dist/services/composite-staleness.d.ts +20 -0
- package/dist/services/composite-staleness.js +50 -0
- package/dist/services/conciseness-preference.d.ts +14 -0
- package/dist/services/conciseness-preference.js +42 -0
- package/dist/services/conflict-policy.d.ts +20 -0
- package/dist/services/conflict-policy.js +335 -0
- package/dist/services/consensus-extraction.d.ts +39 -0
- package/dist/services/consensus-extraction.js +147 -0
- package/dist/services/consensus-validation.d.ts +52 -0
- package/dist/services/consensus-validation.js +206 -0
- package/dist/services/consolidation-service.d.ts +60 -0
- package/dist/services/consolidation-service.js +171 -0
- package/dist/services/content-detection.d.ts +18 -0
- package/dist/services/content-detection.js +25 -0
- package/dist/services/contradiction-surfacing.d.ts +62 -0
- package/dist/services/contradiction-surfacing.js +111 -0
- package/dist/services/cost-telemetry.d.ts +39 -0
- package/dist/services/cost-telemetry.js +58 -0
- package/dist/services/counter-evidence.d.ts +34 -0
- package/dist/services/counter-evidence.js +92 -0
- package/dist/services/current-state-ranking.d.ts +21 -0
- package/dist/services/current-state-ranking.js +152 -0
- package/dist/services/deferred-audn.d.ts +47 -0
- package/dist/services/deferred-audn.js +162 -0
- package/dist/services/document-chunker.d.ts +50 -0
- package/dist/services/document-chunker.js +153 -0
- package/dist/services/document-failure-markers.d.ts +91 -0
- package/dist/services/document-failure-markers.js +305 -0
- package/dist/services/document-indexer.d.ts +122 -0
- package/dist/services/document-indexer.js +405 -0
- package/dist/services/document-service.d.ts +245 -0
- package/dist/services/document-service.js +325 -0
- package/dist/services/document-upload-artifact-sync.d.ts +80 -0
- package/dist/services/document-upload-artifact-sync.js +162 -0
- package/dist/services/document-upload-beta2-recovery.d.ts +72 -0
- package/dist/services/document-upload-beta2-recovery.js +94 -0
- package/dist/services/document-upload.d.ts +44 -0
- package/dist/services/document-upload.js +353 -0
- package/dist/services/embedding.d.ts +57 -0
- package/dist/services/embedding.js +416 -0
- package/dist/services/entity-attribute-extractor.d.ts +34 -0
- package/dist/services/entity-attribute-extractor.js +117 -0
- package/dist/services/entity-card-synthesis.d.ts +54 -0
- package/dist/services/entity-card-synthesis.js +92 -0
- package/dist/services/entity-dedup.d.ts +9 -0
- package/dist/services/entity-dedup.js +14 -0
- package/dist/services/entity-graph.d.ts +17 -0
- package/dist/services/entity-graph.js +135 -0
- package/dist/services/entropy-gate.d.ts +52 -0
- package/dist/services/entropy-gate.js +56 -0
- package/dist/services/episode-fetcher.d.ts +47 -0
- package/dist/services/episode-fetcher.js +128 -0
- package/dist/services/event-anchor-facts.d.ts +8 -0
- package/dist/services/event-anchor-facts.js +205 -0
- package/dist/services/event-chain-detector.d.ts +52 -0
- package/dist/services/event-chain-detector.js +83 -0
- package/dist/services/extraction-cache.d.ts +9 -0
- package/dist/services/extraction-cache.js +54 -0
- package/dist/services/extraction-enrichment.d.ts +9 -0
- package/dist/services/extraction-enrichment.js +223 -0
- package/dist/services/extraction.d.ts +69 -0
- package/dist/services/extraction.js +596 -0
- package/dist/services/fact-normalization.d.ts +12 -0
- package/dist/services/fact-normalization.js +248 -0
- package/dist/services/filecoin-observability.d.ts +127 -0
- package/dist/services/filecoin-observability.js +200 -0
- package/dist/services/first-mention-service.d.ts +76 -0
- package/dist/services/first-mention-service.js +186 -0
- package/dist/services/hierarchical-retrieval.d.ts +49 -0
- package/dist/services/hierarchical-retrieval.js +50 -0
- package/dist/services/ingest-fact-pipeline.d.ts +32 -0
- package/dist/services/ingest-fact-pipeline.js +212 -0
- package/dist/services/ingest-post-write.d.ts +50 -0
- package/dist/services/ingest-post-write.js +117 -0
- package/dist/services/ingest-trace.d.ts +32 -0
- package/dist/services/ingest-trace.js +60 -0
- package/dist/services/input-sanitizer.d.ts +41 -0
- package/dist/services/input-sanitizer.js +135 -0
- package/dist/services/iterative-retrieval.d.ts +26 -0
- package/dist/services/iterative-retrieval.js +139 -0
- package/dist/services/keyword-expansion.d.ts +10 -0
- package/dist/services/keyword-expansion.js +26 -0
- package/dist/services/lesson-service.d.ts +68 -0
- package/dist/services/lesson-service.js +178 -0
- package/dist/services/literal-extractor.d.ts +16 -0
- package/dist/services/literal-extractor.js +74 -0
- package/dist/services/literal-list-protection.d.ts +17 -0
- package/dist/services/literal-list-protection.js +134 -0
- package/dist/services/literal-query-expansion.d.ts +20 -0
- package/dist/services/literal-query-expansion.js +181 -0
- package/dist/services/llm.d.ts +61 -0
- package/dist/services/llm.js +265 -0
- package/dist/services/memcell-projection.d.ts +17 -0
- package/dist/services/memcell-projection.js +41 -0
- package/dist/services/memory-audn.d.ts +43 -0
- package/dist/services/memory-audn.js +419 -0
- package/dist/services/memory-crud.d.ts +93 -0
- package/dist/services/memory-crud.js +255 -0
- package/dist/services/memory-ingest.d.ts +21 -0
- package/dist/services/memory-ingest.js +249 -0
- package/dist/services/memory-lifecycle.d.ts +75 -0
- package/dist/services/memory-lifecycle.js +108 -0
- package/dist/services/memory-lineage.d.ts +181 -0
- package/dist/services/memory-lineage.js +232 -0
- package/dist/services/memory-network.d.ts +40 -0
- package/dist/services/memory-network.js +75 -0
- package/dist/services/memory-search-types.d.ts +25 -0
- package/dist/services/memory-search-types.js +10 -0
- package/dist/services/memory-search.d.ts +48 -0
- package/dist/services/memory-search.js +505 -0
- package/dist/services/memory-service-types.d.ts +371 -0
- package/dist/services/memory-service-types.js +8 -0
- package/dist/services/memory-service.d.ts +152 -0
- package/dist/services/memory-service.js +225 -0
- package/dist/services/memory-storage.d.ts +33 -0
- package/dist/services/memory-storage.js +328 -0
- package/dist/services/msr-aggregator.d.ts +38 -0
- package/dist/services/msr-aggregator.js +97 -0
- package/dist/services/msr-detector.d.ts +35 -0
- package/dist/services/msr-detector.js +65 -0
- package/dist/services/namespace-retrieval.d.ts +60 -0
- package/dist/services/namespace-retrieval.js +180 -0
- package/dist/services/observation-date-extraction.d.ts +12 -0
- package/dist/services/observation-date-extraction.js +50 -0
- package/dist/services/observation-service.d.ts +27 -0
- package/dist/services/observation-service.js +84 -0
- package/dist/services/packaging-observability.d.ts +29 -0
- package/dist/services/packaging-observability.js +146 -0
- package/dist/services/query-expansion.d.ts +83 -0
- package/dist/services/query-expansion.js +242 -0
- package/dist/services/query-keyword-matches.d.ts +6 -0
- package/dist/services/query-keyword-matches.js +56 -0
- package/dist/services/query-term-visibility.d.ts +28 -0
- package/dist/services/query-term-visibility.js +100 -0
- package/dist/services/quick-extraction.d.ts +25 -0
- package/dist/services/quick-extraction.js +431 -0
- package/dist/services/quoted-entity-extraction.d.ts +10 -0
- package/dist/services/quoted-entity-extraction.js +161 -0
- package/dist/services/raw-storage-reconciler-backoff.d.ts +8 -0
- package/dist/services/raw-storage-reconciler-backoff.js +14 -0
- package/dist/services/raw-storage-reconciler-scheduler.d.ts +29 -0
- package/dist/services/raw-storage-reconciler-scheduler.js +43 -0
- package/dist/services/raw-storage-reconciler.d.ts +71 -0
- package/dist/services/raw-storage-reconciler.js +278 -0
- package/dist/services/recap-builder.d.ts +49 -0
- package/dist/services/recap-builder.js +157 -0
- package/dist/services/reflect-jobs.d.ts +23 -0
- package/dist/services/reflect-jobs.js +36 -0
- package/dist/services/reflect-prompts.d.ts +71 -0
- package/dist/services/reflect-prompts.js +99 -0
- package/dist/services/reflect-retrieval.d.ts +33 -0
- package/dist/services/reflect-retrieval.js +30 -0
- package/dist/services/reflect.d.ts +49 -0
- package/dist/services/reflect.js +84 -0
- package/dist/services/relative-temporal.d.ts +14 -0
- package/dist/services/relative-temporal.js +163 -0
- package/dist/services/relevance-policy.d.ts +37 -0
- package/dist/services/relevance-policy.js +109 -0
- package/dist/services/rerank.d.ts +32 -0
- package/dist/services/rerank.js +118 -0
- package/dist/services/reranker.d.ts +20 -0
- package/dist/services/reranker.js +99 -0
- package/dist/services/retrieval-channel-rules.d.ts +34 -0
- package/dist/services/retrieval-channel-rules.js +41 -0
- package/dist/services/retrieval-config-overlay.d.ts +36 -0
- package/dist/services/retrieval-config-overlay.js +44 -0
- package/dist/services/retrieval-format.d.ts +119 -0
- package/dist/services/retrieval-format.js +559 -0
- package/dist/services/retrieval-policy.d.ts +69 -0
- package/dist/services/retrieval-policy.js +275 -0
- package/dist/services/retrieval-profiles.d.ts +37 -0
- package/dist/services/retrieval-profiles.js +90 -0
- package/dist/services/retrieval-side-effects.d.ts +14 -0
- package/dist/services/retrieval-side-effects.js +26 -0
- package/dist/services/retrieval-trace.d.ts +108 -0
- package/dist/services/retrieval-trace.js +147 -0
- package/dist/services/rrf-fusion.d.ts +18 -0
- package/dist/services/rrf-fusion.js +34 -0
- package/dist/services/search-pipeline.d.ts +71 -0
- package/dist/services/search-pipeline.js +788 -0
- package/dist/services/session-date.d.ts +20 -0
- package/dist/services/session-date.js +61 -0
- package/dist/services/session-packaging.d.ts +53 -0
- package/dist/services/session-packaging.js +182 -0
- package/dist/services/session-summary-generator.d.ts +53 -0
- package/dist/services/session-summary-generator.js +134 -0
- package/dist/services/specialists/cr-specialist.d.ts +52 -0
- package/dist/services/specialists/cr-specialist.js +121 -0
- package/dist/services/specialists/dispatch.d.ts +53 -0
- package/dist/services/specialists/dispatch.js +102 -0
- package/dist/services/specialists/ie-ku-specialist.d.ts +37 -0
- package/dist/services/specialists/ie-ku-specialist.js +63 -0
- package/dist/services/specialists/msr-specialist.d.ts +61 -0
- package/dist/services/specialists/msr-specialist.js +162 -0
- package/dist/services/specialists/tr-specialist.d.ts +37 -0
- package/dist/services/specialists/tr-specialist.js +146 -0
- package/dist/services/storage-key-prefix.d.ts +42 -0
- package/dist/services/storage-key-prefix.js +45 -0
- package/dist/services/storage-put-recovery.d.ts +71 -0
- package/dist/services/storage-put-recovery.js +269 -0
- package/dist/services/storage-service-errors.d.ts +124 -0
- package/dist/services/storage-service-errors.js +189 -0
- package/dist/services/storage-service.d.ts +176 -0
- package/dist/services/storage-service.js +423 -0
- package/dist/services/subject-aware-ranking.d.ts +19 -0
- package/dist/services/subject-aware-ranking.js +161 -0
- package/dist/services/supplemental-extraction.d.ts +7 -0
- package/dist/services/supplemental-extraction.js +116 -0
- package/dist/services/tbc-execution.d.ts +49 -0
- package/dist/services/tbc-execution.js +284 -0
- package/dist/services/temporal-classifier.d.ts +56 -0
- package/dist/services/temporal-classifier.js +94 -0
- package/dist/services/temporal-endpoint-evidence.d.ts +12 -0
- package/dist/services/temporal-endpoint-evidence.js +313 -0
- package/dist/services/temporal-fingerprint.d.ts +6 -0
- package/dist/services/temporal-fingerprint.js +12 -0
- package/dist/services/temporal-format.d.ts +9 -0
- package/dist/services/temporal-format.js +21 -0
- package/dist/services/temporal-intent.d.ts +39 -0
- package/dist/services/temporal-intent.js +78 -0
- package/dist/services/temporal-query-constraints.d.ts +16 -0
- package/dist/services/temporal-query-constraints.js +107 -0
- package/dist/services/temporal-query-expansion.d.ts +14 -0
- package/dist/services/temporal-query-expansion.js +131 -0
- package/dist/services/temporal-rerank.d.ts +22 -0
- package/dist/services/temporal-rerank.js +47 -0
- package/dist/services/temporal-result-protection.d.ts +7 -0
- package/dist/services/temporal-result-protection.js +60 -0
- package/dist/services/temporal-state-write.d.ts +57 -0
- package/dist/services/temporal-state-write.js +45 -0
- package/dist/services/tiered-context.d.ts +87 -0
- package/dist/services/tiered-context.js +214 -0
- package/dist/services/tiered-loading.d.ts +88 -0
- package/dist/services/tiered-loading.js +263 -0
- package/dist/services/timeline-pack.d.ts +36 -0
- package/dist/services/timeline-pack.js +50 -0
- package/dist/services/timing.d.ts +13 -0
- package/dist/services/timing.js +72 -0
- package/dist/services/tll-augmentation.d.ts +20 -0
- package/dist/services/tll-augmentation.js +125 -0
- package/dist/services/tll-retrieval.d.ts +55 -0
- package/dist/services/tll-retrieval.js +101 -0
- package/dist/services/topic-abstraction.d.ts +36 -0
- package/dist/services/topic-abstraction.js +105 -0
- package/dist/services/trust-scoring.d.ts +43 -0
- package/dist/services/trust-scoring.js +89 -0
- package/dist/services/typed-belief-calculus.d.ts +126 -0
- package/dist/services/typed-belief-calculus.js +204 -0
- package/dist/services/upload-config.d.ts +34 -0
- package/dist/services/upload-config.js +23 -0
- package/dist/services/upload-decision.d.ts +65 -0
- package/dist/services/upload-decision.js +98 -0
- package/dist/services/upload-helpers.d.ts +107 -0
- package/dist/services/upload-helpers.js +148 -0
- package/dist/services/user-profile-builder.d.ts +22 -0
- package/dist/services/user-profile-builder.js +109 -0
- package/dist/services/voyage-embedding.d.ts +22 -0
- package/dist/services/voyage-embedding.js +77 -0
- package/dist/services/write-security.d.ts +31 -0
- package/dist/services/write-security.js +64 -0
- package/dist/storage/artifact-public-redaction.d.ts +34 -0
- package/dist/storage/artifact-public-redaction.js +83 -0
- package/dist/storage/cleanup.d.ts +103 -0
- package/dist/storage/cleanup.js +138 -0
- package/dist/storage/codec-factory.d.ts +17 -0
- package/dist/storage/codec-factory.js +33 -0
- package/dist/storage/codecs/aes-gcm-codec.d.ts +44 -0
- package/dist/storage/codecs/aes-gcm-codec.js +108 -0
- package/dist/storage/codecs/noop-codec.d.ts +16 -0
- package/dist/storage/codecs/noop-codec.js +23 -0
- package/dist/storage/factory.d.ts +44 -0
- package/dist/storage/factory.js +99 -0
- package/dist/storage/filecoin-cid-validation.d.ts +82 -0
- package/dist/storage/filecoin-cid-validation.js +122 -0
- package/dist/storage/filecoin-public-metadata.d.ts +73 -0
- package/dist/storage/filecoin-public-metadata.js +110 -0
- package/dist/storage/local-fs-store.d.ts +39 -0
- package/dist/storage/local-fs-store.js +145 -0
- package/dist/storage/pointer-uri-allowlist.d.ts +38 -0
- package/dist/storage/pointer-uri-allowlist.js +70 -0
- package/dist/storage/provider-metadata-projection.d.ts +27 -0
- package/dist/storage/provider-metadata-projection.js +68 -0
- package/dist/storage/providers/filecoin/backend.d.ts +42 -0
- package/dist/storage/providers/filecoin/backend.js +250 -0
- package/dist/storage/providers/filecoin/config.d.ts +70 -0
- package/dist/storage/providers/filecoin/config.js +275 -0
- package/dist/storage/providers/filecoin/errors.d.ts +45 -0
- package/dist/storage/providers/filecoin/errors.js +56 -0
- package/dist/storage/providers/filecoin/filecoin-pin-car.d.ts +78 -0
- package/dist/storage/providers/filecoin/filecoin-pin-car.js +155 -0
- package/dist/storage/providers/filecoin/filecoin-pin-client.d.ts +92 -0
- package/dist/storage/providers/filecoin/filecoin-pin-client.js +199 -0
- package/dist/storage/providers/filecoin/filecoin-pin-mapping.d.ts +58 -0
- package/dist/storage/providers/filecoin/filecoin-pin-mapping.js +103 -0
- package/dist/storage/providers/filecoin/filecoin-pin-timeout.d.ts +30 -0
- package/dist/storage/providers/filecoin/filecoin-pin-timeout.js +53 -0
- package/dist/storage/providers/filecoin/filecoin-pin-vendor.d.ts +111 -0
- package/dist/storage/providers/filecoin/filecoin-pin-vendor.js +87 -0
- package/dist/storage/providers/filecoin/hints.d.ts +71 -0
- package/dist/storage/providers/filecoin/hints.js +123 -0
- package/dist/storage/providers/filecoin/index.d.ts +51 -0
- package/dist/storage/providers/filecoin/index.js +103 -0
- package/dist/storage/providers/filecoin/ipfs-cid.d.ts +50 -0
- package/dist/storage/providers/filecoin/ipfs-cid.js +64 -0
- package/dist/storage/providers/filecoin/metadata.d.ts +72 -0
- package/dist/storage/providers/filecoin/metadata.js +137 -0
- package/dist/storage/providers/filecoin/piece-cid.d.ts +48 -0
- package/dist/storage/providers/filecoin/piece-cid.js +57 -0
- package/dist/storage/providers/filecoin/provider-client.d.ts +234 -0
- package/dist/storage/providers/filecoin/provider-client.js +27 -0
- package/dist/storage/providers/filecoin/readiness.d.ts +62 -0
- package/dist/storage/providers/filecoin/readiness.js +85 -0
- package/dist/storage/providers/filecoin/retriever.d.ts +82 -0
- package/dist/storage/providers/filecoin/retriever.js +63 -0
- package/dist/storage/providers/filecoin/skeleton-client.d.ts +36 -0
- package/dist/storage/providers/filecoin/skeleton-client.js +55 -0
- package/dist/storage/providers/filecoin/synapse-client.d.ts +169 -0
- package/dist/storage/providers/filecoin/synapse-client.js +343 -0
- package/dist/storage/providers/filecoin/synapse-construction.d.ts +26 -0
- package/dist/storage/providers/filecoin/synapse-construction.js +47 -0
- package/dist/storage/providers/filecoin/synapse-error-mapping.d.ts +23 -0
- package/dist/storage/providers/filecoin/synapse-error-mapping.js +49 -0
- package/dist/storage/providers/filecoin/synapse-readiness.d.ts +37 -0
- package/dist/storage/providers/filecoin/synapse-readiness.js +231 -0
- package/dist/storage/providers/filecoin/uri.d.ts +49 -0
- package/dist/storage/providers/filecoin/uri.js +84 -0
- package/dist/storage/providers/filecoin/verified-fetch-lifecycle.d.ts +77 -0
- package/dist/storage/providers/filecoin/verified-fetch-lifecycle.js +196 -0
- package/dist/storage/providers/filecoin/verified-fetch-retriever.d.ts +54 -0
- package/dist/storage/providers/filecoin/verified-fetch-retriever.js +81 -0
- package/dist/storage/providers/filecoin/verified-fetch-vendor.d.ts +71 -0
- package/dist/storage/providers/filecoin/verified-fetch-vendor.js +94 -0
- package/dist/storage/raw-content-codec.d.ts +89 -0
- package/dist/storage/raw-content-codec.js +47 -0
- package/dist/storage/raw-content-store-backend-adapter.d.ts +28 -0
- package/dist/storage/raw-content-store-backend-adapter.js +67 -0
- package/dist/storage/raw-content-store.d.ts +228 -0
- package/dist/storage/raw-content-store.js +27 -0
- package/dist/storage/s3-store.d.ts +42 -0
- package/dist/storage/s3-store.js +181 -0
- package/dist/storage/storage-backend-registry.d.ts +58 -0
- package/dist/storage/storage-backend-registry.js +56 -0
- package/dist/storage/storage-backend.d.ts +82 -0
- package/dist/storage/storage-backend.js +14 -0
- package/dist/storage/storage-capabilities.d.ts +56 -0
- package/dist/storage/storage-capabilities.js +170 -0
- package/dist/storage/store-registry.d.ts +67 -0
- package/dist/storage/store-registry.js +77 -0
- package/dist/vector-math.d.ts +15 -0
- package/dist/vector-math.js +31 -0
- package/dist/xml-escape.d.ts +5 -0
- package/dist/xml-escape.js +7 -0
- package/openapi.json +15395 -0
- package/openapi.yaml +10794 -0
- package/package.json +119 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deferred AUDN Reconciliation Service.
|
|
3
|
+
*
|
|
4
|
+
* When DEFERRED_AUDN_ENABLED=true, facts with conflict candidates
|
|
5
|
+
* (0.7 ≤ similarity < 0.95) are stored immediately as ADD and flagged
|
|
6
|
+
* for background reconciliation. This eliminates the 500–2000ms LLM
|
|
7
|
+
* AUDN call from the synchronous ingest path.
|
|
8
|
+
*
|
|
9
|
+
* The reconciliation pass processes flagged memories in batches,
|
|
10
|
+
* running the full LLM AUDN pipeline and applying decisions
|
|
11
|
+
* (NOOP → delete, SUPERSEDE → soft-delete original, UPDATE → merge).
|
|
12
|
+
*
|
|
13
|
+
* Expected latency improvement: 60–80% reduction for ingest batches
|
|
14
|
+
* with moderate conflict rates (30–70% of facts hitting candidates).
|
|
15
|
+
*/
|
|
16
|
+
import { config } from '../config.js';
|
|
17
|
+
import { findDeferredMemories, findAllDeferredMemories, clearDeferredFlag, countDeferredMemories, markMemoryDeferred, } from '../db/repository-deferred-audn.js';
|
|
18
|
+
import { cachedResolveAUDN } from './extraction-cache.js';
|
|
19
|
+
import { applyClarificationOverrides } from './conflict-policy.js';
|
|
20
|
+
import { embedText } from './embedding.js';
|
|
21
|
+
import { emitAuditEvent } from './audit-events.js';
|
|
22
|
+
/**
|
|
23
|
+
* Check whether a set of candidates should be deferred rather than
|
|
24
|
+
* resolved via LLM AUDN synchronously.
|
|
25
|
+
*/
|
|
26
|
+
export function shouldDeferAudn(fastDecisionResolved, candidateCount) {
|
|
27
|
+
return config.deferredAudnEnabled && !fastDecisionResolved && candidateCount > 0;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Mark a newly stored memory for deferred reconciliation.
|
|
31
|
+
*/
|
|
32
|
+
export async function deferMemoryForReconciliation(pool, memoryId, candidates) {
|
|
33
|
+
const serialized = candidates.map((c) => ({
|
|
34
|
+
id: c.id,
|
|
35
|
+
content: c.content,
|
|
36
|
+
similarity: c.similarity,
|
|
37
|
+
}));
|
|
38
|
+
await markMemoryDeferred(pool, memoryId, serialized);
|
|
39
|
+
}
|
|
40
|
+
/** Run a reconciliation pass for a single user. */
|
|
41
|
+
export async function reconcileUser(pool, repo, userId, batchSize = config.deferredAudnBatchSize) {
|
|
42
|
+
const start = Date.now();
|
|
43
|
+
const deferred = await findDeferredMemories(pool, userId, batchSize);
|
|
44
|
+
return processReconciliationBatch(pool, repo, deferred, start);
|
|
45
|
+
}
|
|
46
|
+
/** Run a reconciliation pass across all users. */
|
|
47
|
+
export async function reconcileAll(pool, repo, batchSize = config.deferredAudnBatchSize) {
|
|
48
|
+
const start = Date.now();
|
|
49
|
+
const deferred = await findAllDeferredMemories(pool, batchSize);
|
|
50
|
+
return processReconciliationBatch(pool, repo, deferred, start);
|
|
51
|
+
}
|
|
52
|
+
/** Get reconciliation status for a user. */
|
|
53
|
+
export async function getReconciliationStatus(pool, userId) {
|
|
54
|
+
const pending = await countDeferredMemories(pool, userId);
|
|
55
|
+
return { pending, enabled: config.deferredAudnEnabled };
|
|
56
|
+
}
|
|
57
|
+
async function processReconciliationBatch(pool, repo, deferred, startMs) {
|
|
58
|
+
const result = {
|
|
59
|
+
processed: 0, resolved: 0, noops: 0, updates: 0,
|
|
60
|
+
supersedes: 0, deletes: 0, adds: 0, errors: 0, durationMs: 0,
|
|
61
|
+
};
|
|
62
|
+
for (const memory of deferred) {
|
|
63
|
+
result.processed++;
|
|
64
|
+
try {
|
|
65
|
+
const action = await reconcileSingleMemory(pool, repo, memory);
|
|
66
|
+
result.resolved++;
|
|
67
|
+
switch (action) {
|
|
68
|
+
case 'NOOP':
|
|
69
|
+
result.noops++;
|
|
70
|
+
break;
|
|
71
|
+
case 'UPDATE':
|
|
72
|
+
result.updates++;
|
|
73
|
+
break;
|
|
74
|
+
case 'SUPERSEDE':
|
|
75
|
+
result.supersedes++;
|
|
76
|
+
break;
|
|
77
|
+
case 'DELETE':
|
|
78
|
+
result.deletes++;
|
|
79
|
+
break;
|
|
80
|
+
case 'ADD':
|
|
81
|
+
result.adds++;
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
catch (err) {
|
|
86
|
+
result.errors++;
|
|
87
|
+
console.error(`[deferred-audn] Error reconciling memory ${memory.id}:`, err);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
result.durationMs = Date.now() - startMs;
|
|
91
|
+
return result;
|
|
92
|
+
}
|
|
93
|
+
async function reconcileSingleMemory(pool, repo, memory) {
|
|
94
|
+
const candidates = await refreshCandidates(repo, memory.userId, memory.candidates);
|
|
95
|
+
if (candidates.length === 0) {
|
|
96
|
+
await clearDeferredFlag(pool, memory.id);
|
|
97
|
+
return 'ADD';
|
|
98
|
+
}
|
|
99
|
+
const decision = applyClarificationOverrides(await cachedResolveAUDN(memory.content, candidates), memory.content, candidates, [], 'knowledge');
|
|
100
|
+
await applyDeferredDecision(pool, repo, memory, decision);
|
|
101
|
+
await clearDeferredFlag(pool, memory.id);
|
|
102
|
+
if (config.auditLoggingEnabled) {
|
|
103
|
+
emitAuditEvent('deferred-audn:reconcile', memory.userId, {
|
|
104
|
+
memoryId: memory.id,
|
|
105
|
+
action: decision.action,
|
|
106
|
+
targetMemoryId: decision.targetMemoryId,
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
return decision.action;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Refresh candidate data — candidates stored at ingest time may have
|
|
113
|
+
* been modified or deleted since. Re-fetch from DB to ensure accuracy.
|
|
114
|
+
*/
|
|
115
|
+
async function refreshCandidates(repo, userId, storedCandidates) {
|
|
116
|
+
const refreshed = [];
|
|
117
|
+
for (const candidate of storedCandidates) {
|
|
118
|
+
const memory = await repo.getMemory(candidate.id, userId);
|
|
119
|
+
if (memory && !memory.deleted_at) {
|
|
120
|
+
refreshed.push({
|
|
121
|
+
id: memory.id,
|
|
122
|
+
content: memory.content,
|
|
123
|
+
similarity: candidate.similarity,
|
|
124
|
+
importance: memory.importance,
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
return refreshed;
|
|
129
|
+
}
|
|
130
|
+
async function applyDeferredDecision(pool, repo, memory, decision) {
|
|
131
|
+
switch (decision.action) {
|
|
132
|
+
case 'NOOP':
|
|
133
|
+
await repo.softDeleteMemory(memory.userId, memory.id);
|
|
134
|
+
console.log(`[deferred-audn] NOOP: deleted duplicate ${memory.id}`);
|
|
135
|
+
break;
|
|
136
|
+
case 'UPDATE':
|
|
137
|
+
if (decision.targetMemoryId && decision.updatedContent) {
|
|
138
|
+
const target = await repo.getMemory(decision.targetMemoryId, memory.userId);
|
|
139
|
+
const newEmbedding = await embedText(decision.updatedContent);
|
|
140
|
+
await repo.updateMemoryContent(memory.userId, decision.targetMemoryId, decision.updatedContent, newEmbedding, target?.importance ?? 0.5);
|
|
141
|
+
await repo.softDeleteMemory(memory.userId, memory.id);
|
|
142
|
+
console.log(`[deferred-audn] UPDATE: merged ${memory.id} into ${decision.targetMemoryId}`);
|
|
143
|
+
}
|
|
144
|
+
break;
|
|
145
|
+
case 'SUPERSEDE':
|
|
146
|
+
if (decision.targetMemoryId) {
|
|
147
|
+
await repo.softDeleteMemory(memory.userId, decision.targetMemoryId);
|
|
148
|
+
console.log(`[deferred-audn] SUPERSEDE: ${memory.id} replaces ${decision.targetMemoryId}`);
|
|
149
|
+
}
|
|
150
|
+
break;
|
|
151
|
+
case 'DELETE':
|
|
152
|
+
await repo.softDeleteMemory(memory.userId, memory.id);
|
|
153
|
+
console.log(`[deferred-audn] DELETE: removed ${memory.id}`);
|
|
154
|
+
break;
|
|
155
|
+
case 'ADD':
|
|
156
|
+
console.log(`[deferred-audn] ADD: confirmed ${memory.id} is distinct`);
|
|
157
|
+
break;
|
|
158
|
+
default:
|
|
159
|
+
console.log(`[deferred-audn] ${decision.action}: no action for ${memory.id}`);
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic text chunker for the document pipeline (Phase 2).
|
|
3
|
+
*
|
|
4
|
+
* Pure function: same (text, options) → same chunks. No timing, no
|
|
5
|
+
* randomness, no provider state. Each chunk carries an absolute
|
|
6
|
+
* character offset range, a content_hash, a stable index, and a token
|
|
7
|
+
* estimate; the index field is what the active-unique partial index on
|
|
8
|
+
* `document_chunks` keys on alongside `chunker_version`.
|
|
9
|
+
*
|
|
10
|
+
* Phase 2 ships a single chunker_version (`PHASE2_CHUNKER_VERSION`).
|
|
11
|
+
* If the algorithm changes meaningfully, bump the constant — that
|
|
12
|
+
* triggers a fresh insert generation rather than colliding with the
|
|
13
|
+
* old run on the unique index.
|
|
14
|
+
*
|
|
15
|
+
* See `Atomicmemory-research/docs/core-repo/design/large-file-ingestion-and-raw-storage-plan-2026-05-08.md`
|
|
16
|
+
* Phase 2.
|
|
17
|
+
*/
|
|
18
|
+
/**
|
|
19
|
+
* Pinned chunker identifier. Bump when the algorithm output changes for
|
|
20
|
+
* the same input text — that lets the active-unique index treat the new
|
|
21
|
+
* generation as fresh inserts rather than colliding with prior runs.
|
|
22
|
+
*/
|
|
23
|
+
export declare const PHASE2_CHUNKER_VERSION = "phase2-fixed-v1";
|
|
24
|
+
/** Pinned parser identifier. Phase 2 accepts text input only. */
|
|
25
|
+
export declare const PHASE2_PARSER_VERSION = "phase2-text-v1";
|
|
26
|
+
export interface ChunkOptions {
|
|
27
|
+
chunkSize?: number;
|
|
28
|
+
chunkOverlap?: number;
|
|
29
|
+
minChunkSize?: number;
|
|
30
|
+
}
|
|
31
|
+
export interface ChunkResult {
|
|
32
|
+
chunkIndex: number;
|
|
33
|
+
content: string;
|
|
34
|
+
contentHash: string;
|
|
35
|
+
charStart: number;
|
|
36
|
+
charEnd: number;
|
|
37
|
+
tokenCount: number;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Chunk `text` deterministically. Returns `[]` for empty or
|
|
41
|
+
* whitespace-only input.
|
|
42
|
+
*/
|
|
43
|
+
export declare function chunkText(text: string, options?: ChunkOptions): ChunkResult[];
|
|
44
|
+
/** Fingerprint a chunk's content; stable across runs for byte-identical input. */
|
|
45
|
+
export declare function hashChunkContent(content: string): string;
|
|
46
|
+
/**
|
|
47
|
+
* Whole-text fingerprint, used by the indexer's idempotency check.
|
|
48
|
+
* Distinct helper so tests can pin both invariants independently.
|
|
49
|
+
*/
|
|
50
|
+
export declare function hashIndexedText(text: string): string;
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic text chunker for the document pipeline (Phase 2).
|
|
3
|
+
*
|
|
4
|
+
* Pure function: same (text, options) → same chunks. No timing, no
|
|
5
|
+
* randomness, no provider state. Each chunk carries an absolute
|
|
6
|
+
* character offset range, a content_hash, a stable index, and a token
|
|
7
|
+
* estimate; the index field is what the active-unique partial index on
|
|
8
|
+
* `document_chunks` keys on alongside `chunker_version`.
|
|
9
|
+
*
|
|
10
|
+
* Phase 2 ships a single chunker_version (`PHASE2_CHUNKER_VERSION`).
|
|
11
|
+
* If the algorithm changes meaningfully, bump the constant — that
|
|
12
|
+
* triggers a fresh insert generation rather than colliding with the
|
|
13
|
+
* old run on the unique index.
|
|
14
|
+
*
|
|
15
|
+
* See `Atomicmemory-research/docs/core-repo/design/large-file-ingestion-and-raw-storage-plan-2026-05-08.md`
|
|
16
|
+
* Phase 2.
|
|
17
|
+
*/
|
|
18
|
+
import { createHash } from 'node:crypto';
|
|
19
|
+
/**
|
|
20
|
+
* Character size of one chunk before word-boundary trimming. ~250 tokens
|
|
21
|
+
* for `text-embedding-3-small`'s typical English-text ratio (4 chars/token);
|
|
22
|
+
* well under the 8192-token model limit.
|
|
23
|
+
*/
|
|
24
|
+
const DEFAULT_CHUNK_SIZE = 1500;
|
|
25
|
+
/**
|
|
26
|
+
* Overlap between adjacent chunks. ~10% of chunk size keeps adjacent
|
|
27
|
+
* sentences findable via either chunk without exploding the chunk count.
|
|
28
|
+
*/
|
|
29
|
+
const DEFAULT_CHUNK_OVERLAP = 150;
|
|
30
|
+
/** Reject chunks that fall below this size after trimming. */
|
|
31
|
+
const DEFAULT_MIN_CHUNK_SIZE = 100;
|
|
32
|
+
/**
|
|
33
|
+
* Pinned chunker identifier. Bump when the algorithm output changes for
|
|
34
|
+
* the same input text — that lets the active-unique index treat the new
|
|
35
|
+
* generation as fresh inserts rather than colliding with prior runs.
|
|
36
|
+
*/
|
|
37
|
+
export const PHASE2_CHUNKER_VERSION = 'phase2-fixed-v1';
|
|
38
|
+
/** Pinned parser identifier. Phase 2 accepts text input only. */
|
|
39
|
+
export const PHASE2_PARSER_VERSION = 'phase2-text-v1';
|
|
40
|
+
/**
|
|
41
|
+
* Chunk `text` deterministically. Returns `[]` for empty or
|
|
42
|
+
* whitespace-only input.
|
|
43
|
+
*/
|
|
44
|
+
export function chunkText(text, options = {}) {
|
|
45
|
+
if (!text || text.trim().length === 0)
|
|
46
|
+
return [];
|
|
47
|
+
const opts = resolveOptions(options);
|
|
48
|
+
if (text.length <= opts.chunkSize) {
|
|
49
|
+
const leading = text.length - text.trimStart().length;
|
|
50
|
+
const trailing = text.length - text.trimEnd().length;
|
|
51
|
+
const trimmed = text.slice(leading, text.length - trailing);
|
|
52
|
+
return trimmed.length === 0
|
|
53
|
+
? []
|
|
54
|
+
: [makeChunk(0, trimmed, leading, text.length - trailing)];
|
|
55
|
+
}
|
|
56
|
+
return slidingWindowChunks(text, opts);
|
|
57
|
+
}
|
|
58
|
+
/** Fingerprint a chunk's content; stable across runs for byte-identical input. */
|
|
59
|
+
export function hashChunkContent(content) {
|
|
60
|
+
return createHash('sha256').update(content, 'utf8').digest('hex');
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Whole-text fingerprint, used by the indexer's idempotency check.
|
|
64
|
+
* Distinct helper so tests can pin both invariants independently.
|
|
65
|
+
*/
|
|
66
|
+
export function hashIndexedText(text) {
|
|
67
|
+
return createHash('sha256').update(text, 'utf8').digest('hex');
|
|
68
|
+
}
|
|
69
|
+
function resolveOptions(input) {
|
|
70
|
+
const chunkSize = Math.max(1, input.chunkSize ?? DEFAULT_CHUNK_SIZE);
|
|
71
|
+
const chunkOverlap = clampOverlap(input.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP, chunkSize);
|
|
72
|
+
const minChunkSize = Math.max(1, Math.min(input.minChunkSize ?? DEFAULT_MIN_CHUNK_SIZE, chunkSize));
|
|
73
|
+
return { chunkSize, chunkOverlap, minChunkSize };
|
|
74
|
+
}
|
|
75
|
+
function clampOverlap(requested, chunkSize) {
|
|
76
|
+
if (requested < 0)
|
|
77
|
+
return 0;
|
|
78
|
+
// Overlap must leave room to advance past it on each step; cap at half.
|
|
79
|
+
return Math.min(requested, Math.floor(chunkSize / 2));
|
|
80
|
+
}
|
|
81
|
+
function slidingWindowChunks(text, opts) {
|
|
82
|
+
const chunks = [];
|
|
83
|
+
let cursor = 0;
|
|
84
|
+
let chunkIndex = 0;
|
|
85
|
+
while (cursor < text.length) {
|
|
86
|
+
const window = openWindow(text, cursor, opts);
|
|
87
|
+
if (window.content.length >= opts.minChunkSize) {
|
|
88
|
+
chunks.push(makeChunk(chunkIndex, window.content, window.charStart, window.charEnd));
|
|
89
|
+
chunkIndex++;
|
|
90
|
+
}
|
|
91
|
+
if (window.advanceTo >= text.length)
|
|
92
|
+
break;
|
|
93
|
+
cursor = advanceCursor(window.advanceTo, opts.chunkOverlap, cursor);
|
|
94
|
+
}
|
|
95
|
+
return chunks;
|
|
96
|
+
}
|
|
97
|
+
function openWindow(text, cursor, opts) {
|
|
98
|
+
const rawEnd = Math.min(cursor + opts.chunkSize, text.length);
|
|
99
|
+
const wordEnd = preserveWordBoundary(text, cursor, rawEnd, opts.minChunkSize);
|
|
100
|
+
// Recompute the offsets so they exactly bound the trimmed content; this
|
|
101
|
+
// is the invariant downstream relies on (text.slice(charStart, charEnd)
|
|
102
|
+
// === content), and downstream callers — provenance audit, future raw
|
|
103
|
+
// re-fetch — would otherwise see ranges that include leading/trailing
|
|
104
|
+
// whitespace not present in the chunk's stored content.
|
|
105
|
+
const slice = text.slice(cursor, wordEnd);
|
|
106
|
+
const leading = slice.length - slice.trimStart().length;
|
|
107
|
+
const trailing = slice.length - slice.trimEnd().length;
|
|
108
|
+
const charStart = cursor + leading;
|
|
109
|
+
const charEnd = wordEnd - trailing;
|
|
110
|
+
const content = slice.slice(leading, slice.length - trailing);
|
|
111
|
+
return { content, charStart, charEnd, advanceTo: wordEnd };
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Walk back from `rawEnd` to the previous whitespace so the chunk doesn't
|
|
115
|
+
* end mid-word. Bails out (returning rawEnd unchanged) if no whitespace
|
|
116
|
+
* is found within the [cursor + minChunkSize, rawEnd) window — that
|
|
117
|
+
* keeps the slider from collapsing on inputs with no spaces.
|
|
118
|
+
*/
|
|
119
|
+
function preserveWordBoundary(text, cursor, rawEnd, minChunkSize) {
|
|
120
|
+
if (rawEnd >= text.length)
|
|
121
|
+
return rawEnd;
|
|
122
|
+
const lower = cursor + minChunkSize;
|
|
123
|
+
for (let i = rawEnd; i > lower; i--) {
|
|
124
|
+
if (/\s/.test(text[i - 1]))
|
|
125
|
+
return i;
|
|
126
|
+
}
|
|
127
|
+
return rawEnd;
|
|
128
|
+
}
|
|
129
|
+
function advanceCursor(charEnd, overlap, prevCursor) {
|
|
130
|
+
const next = charEnd - overlap;
|
|
131
|
+
// Guard against the case where overlap >= last window length, which
|
|
132
|
+
// would otherwise make the cursor stand still and loop.
|
|
133
|
+
return next > prevCursor ? next : charEnd;
|
|
134
|
+
}
|
|
135
|
+
function makeChunk(chunkIndex, content, charStart, charEnd) {
|
|
136
|
+
return {
|
|
137
|
+
chunkIndex,
|
|
138
|
+
content,
|
|
139
|
+
contentHash: hashChunkContent(content),
|
|
140
|
+
charStart,
|
|
141
|
+
charEnd,
|
|
142
|
+
tokenCount: estimateTokens(content),
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Cheap token estimate: ~4 chars / token for English ASCII. We intentionally
|
|
147
|
+
* don't use a real tokenizer here — that would couple the chunker to the
|
|
148
|
+
* embedding model and add a heavy dependency. The number is metadata for
|
|
149
|
+
* downstream cost reporting, not a control.
|
|
150
|
+
*/
|
|
151
|
+
function estimateTokens(content) {
|
|
152
|
+
return Math.max(1, Math.ceil(content.length / 4));
|
|
153
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase C constrained failure-marker transitions.
|
|
3
|
+
*
|
|
4
|
+
* `POST /v1/documents/:id/extraction-failure` and
|
|
5
|
+
* `POST /v1/documents/:id/index-failure` need to be **constrained**
|
|
6
|
+
* client-side surfaces, not arbitrary status writes - clients can
|
|
7
|
+
* declare *that* extraction or indexing failed and *what category*,
|
|
8
|
+
* but cannot put a document into arbitrary status combinations or
|
|
9
|
+
* smuggle log content into `last_error`. This module owns the
|
|
10
|
+
* load-then-transition logic for both endpoints:
|
|
11
|
+
*
|
|
12
|
+
* 1. Open a transaction and take the per-document
|
|
13
|
+
* `pg_advisory_xact_lock` so concurrent markers serialize.
|
|
14
|
+
* 2. Load the row's raw / extraction / semantic-index status.
|
|
15
|
+
* 3. Match the current state against the allowed source set and
|
|
16
|
+
* apply the corresponding write (or throw an
|
|
17
|
+
* `*InvalidStateError` -> 409).
|
|
18
|
+
* 4. Read the row back and COMMIT so the caller can see the
|
|
19
|
+
* durable post-transition shape.
|
|
20
|
+
*
|
|
21
|
+
* The audit fix the Phase B plan calls out (rev 18 Phase C section):
|
|
22
|
+
* the marker MUST sit in front of `markExtractionStatus` /
|
|
23
|
+
* `markSemanticIndexStatus` so the row's status pair stays internally
|
|
24
|
+
* consistent (e.g. `extraction='failed'` + `semantic_index='not_required'`
|
|
25
|
+
* always travel together).
|
|
26
|
+
*/
|
|
27
|
+
import pg from 'pg';
|
|
28
|
+
import type { ExtractionStatus, RawDocumentRow, RawStorageStatus, SemanticIndexStatus } from '../db/raw-document-types.js';
|
|
29
|
+
import type { ExtractionErrorCode, IndexErrorCode } from '../schemas/documents.js';
|
|
30
|
+
/** State snapshot echoed in `*InvalidStateError` for 409 bodies. */
|
|
31
|
+
export interface DocumentLayerStateSnapshot {
|
|
32
|
+
raw_storage_status: RawStorageStatus;
|
|
33
|
+
extraction_status: ExtractionStatus;
|
|
34
|
+
semantic_index_status: SemanticIndexStatus;
|
|
35
|
+
}
|
|
36
|
+
/** Document not found / not owned by user. Routes map to 404. */
|
|
37
|
+
export declare class FailureMarkerDocumentNotFoundError extends Error {
|
|
38
|
+
readonly documentId: string;
|
|
39
|
+
constructor(documentId: string);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Phase C - the row's current state does not allow the requested
|
|
43
|
+
* extraction-layer transition. Routes map to 409 and echo `current`
|
|
44
|
+
* in the response so the caller can decide whether to retry.
|
|
45
|
+
*/
|
|
46
|
+
export declare class ExtractionFailureInvalidStateError extends Error {
|
|
47
|
+
readonly documentId: string;
|
|
48
|
+
readonly current: DocumentLayerStateSnapshot;
|
|
49
|
+
constructor(documentId: string, current: DocumentLayerStateSnapshot);
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Phase C - the row's current state does not allow the requested
|
|
53
|
+
* index-layer transition. Routes map to 409 and echo `current` in
|
|
54
|
+
* the response.
|
|
55
|
+
*/
|
|
56
|
+
export declare class IndexFailureInvalidStateError extends Error {
|
|
57
|
+
readonly documentId: string;
|
|
58
|
+
readonly current: DocumentLayerStateSnapshot;
|
|
59
|
+
constructor(documentId: string, current: DocumentLayerStateSnapshot);
|
|
60
|
+
}
|
|
61
|
+
export interface MarkerInput<C> {
|
|
62
|
+
userId: string;
|
|
63
|
+
documentId: string;
|
|
64
|
+
errorCode: C;
|
|
65
|
+
errorMessage: string;
|
|
66
|
+
}
|
|
67
|
+
export interface MarkerResult {
|
|
68
|
+
document: RawDocumentRow;
|
|
69
|
+
/**
|
|
70
|
+
* `true` when the row was already in the target failed state and
|
|
71
|
+
* the call only refreshed `last_error` (or was a complete no-op
|
|
72
|
+
* for same-code retries). `false` for a first-time transition.
|
|
73
|
+
*/
|
|
74
|
+
idempotent: boolean;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Phase C constrained transition for the extraction layer.
|
|
78
|
+
*
|
|
79
|
+
* Allowed source states:
|
|
80
|
+
* * `extraction_status='failed'` + same `errorCode` -> idempotent
|
|
81
|
+
* no-op; caller sees the existing row with `idempotent: true`.
|
|
82
|
+
* * `extraction_status='failed'` + different `errorCode` -> refresh
|
|
83
|
+
* `last_error` only; status stays `'failed'`. `idempotent: true`.
|
|
84
|
+
* * `extraction_status='pending'` + raw stored ->
|
|
85
|
+
* `extraction_status='failed'` + `semantic_index_status='not_required'`
|
|
86
|
+
* + new `last_error.layer='extraction'`. `idempotent: false`.
|
|
87
|
+
*
|
|
88
|
+
* Any other state throws `ExtractionFailureInvalidStateError` (-> 409).
|
|
89
|
+
*/
|
|
90
|
+
export declare function markExtractionFailure(pool: pg.Pool, input: MarkerInput<ExtractionErrorCode>): Promise<MarkerResult>;
|
|
91
|
+
export declare function markIndexFailure(pool: pg.Pool, input: MarkerInput<IndexErrorCode>): Promise<MarkerResult>;
|