@atomicmemory/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/LICENSE +201 -0
- package/README.md +314 -0
- package/dist/app/bind-ephemeral.d.ts +18 -0
- package/dist/app/bind-ephemeral.js +22 -0
- package/dist/app/cors-headers.d.ts +12 -0
- package/dist/app/cors-headers.js +18 -0
- package/dist/app/create-app.d.ts +25 -0
- package/dist/app/create-app.js +156 -0
- package/dist/app/runtime-config-route-snapshot.d.ts +27 -0
- package/dist/app/runtime-config-route-snapshot.js +27 -0
- package/dist/app/runtime-container.d.ts +281 -0
- package/dist/app/runtime-container.js +297 -0
- package/dist/app/startup-checks.d.ts +28 -0
- package/dist/app/startup-checks.js +45 -0
- package/dist/bin.d.ts +17 -0
- package/dist/bin.js +128 -0
- package/dist/config.d.ts +680 -0
- package/dist/config.js +808 -0
- package/dist/db/agent-trust-repository.d.ts +49 -0
- package/dist/db/agent-trust-repository.js +66 -0
- package/dist/db/belief-edges-repository.d.ts +68 -0
- package/dist/db/belief-edges-repository.js +124 -0
- package/dist/db/claim-repository.d.ts +6 -0
- package/dist/db/claim-repository.js +4 -0
- package/dist/db/contradictions-repository.d.ts +56 -0
- package/dist/db/contradictions-repository.js +88 -0
- package/dist/db/document-chunk-repository.d.ts +48 -0
- package/dist/db/document-chunk-repository.js +145 -0
- package/dist/db/document-chunk-types.d.ts +35 -0
- package/dist/db/document-chunk-types.js +9 -0
- package/dist/db/document-list-cursor.d.ts +45 -0
- package/dist/db/document-list-cursor.js +111 -0
- package/dist/db/document-list-repository.d.ts +103 -0
- package/dist/db/document-list-repository.js +204 -0
- package/dist/db/entity-cards-repository.d.ts +37 -0
- package/dist/db/entity-cards-repository.js +46 -0
- package/dist/db/entity-values-repository.d.ts +26 -0
- package/dist/db/entity-values-repository.js +57 -0
- package/dist/db/link-repository.d.ts +30 -0
- package/dist/db/link-repository.js +54 -0
- package/dist/db/memory-repository.d.ts +163 -0
- package/dist/db/memory-repository.js +232 -0
- package/dist/db/migrate.d.ts +6 -0
- package/dist/db/migrate.js +36 -0
- package/dist/db/mmr.d.ts +14 -0
- package/dist/db/mmr.js +57 -0
- package/dist/db/passport-feed-repository.d.ts +91 -0
- package/dist/db/passport-feed-repository.js +198 -0
- package/dist/db/pg-episode-store.d.ts +19 -0
- package/dist/db/pg-episode-store.js +17 -0
- package/dist/db/pg-link-store.d.ts +17 -0
- package/dist/db/pg-link-store.js +14 -0
- package/dist/db/pg-memory-store.d.ts +68 -0
- package/dist/db/pg-memory-store.js +53 -0
- package/dist/db/pg-recap-store.d.ts +13 -0
- package/dist/db/pg-recap-store.js +19 -0
- package/dist/db/pg-representation-store.d.ts +17 -0
- package/dist/db/pg-representation-store.js +17 -0
- package/dist/db/pg-search-store.d.ts +29 -0
- package/dist/db/pg-search-store.js +47 -0
- package/dist/db/pool.d.ts +5 -0
- package/dist/db/pool.js +21 -0
- package/dist/db/ppr.d.ts +56 -0
- package/dist/db/ppr.js +178 -0
- package/dist/db/query-helpers.d.ts +44 -0
- package/dist/db/query-helpers.js +60 -0
- package/dist/db/raw-doc-artifact-sync.d.ts +128 -0
- package/dist/db/raw-doc-artifact-sync.js +259 -0
- package/dist/db/raw-document-blob-repository.d.ts +148 -0
- package/dist/db/raw-document-blob-repository.js +300 -0
- package/dist/db/raw-document-repository.d.ts +104 -0
- package/dist/db/raw-document-repository.js +410 -0
- package/dist/db/raw-document-status-repository.d.ts +122 -0
- package/dist/db/raw-document-status-repository.js +183 -0
- package/dist/db/raw-document-types.d.ts +236 -0
- package/dist/db/raw-document-types.js +10 -0
- package/dist/db/raw-storage-reconciliation-repository.d.ts +110 -0
- package/dist/db/raw-storage-reconciliation-repository.js +200 -0
- package/dist/db/reflection-jobs-repository.d.ts +33 -0
- package/dist/db/reflection-jobs-repository.js +48 -0
- package/dist/db/reflections-repository.d.ts +41 -0
- package/dist/db/reflections-repository.js +83 -0
- package/dist/db/repository-claims.d.ts +141 -0
- package/dist/db/repository-claims.js +376 -0
- package/dist/db/repository-deferred-audn.d.ts +33 -0
- package/dist/db/repository-deferred-audn.js +69 -0
- package/dist/db/repository-document-delete.d.ts +53 -0
- package/dist/db/repository-document-delete.js +156 -0
- package/dist/db/repository-entities.d.ts +114 -0
- package/dist/db/repository-entities.js +317 -0
- package/dist/db/repository-entity-attributes.d.ts +41 -0
- package/dist/db/repository-entity-attributes.js +65 -0
- package/dist/db/repository-entity-graph.d.ts +32 -0
- package/dist/db/repository-entity-graph.js +87 -0
- package/dist/db/repository-first-mentions.d.ts +41 -0
- package/dist/db/repository-first-mentions.js +79 -0
- package/dist/db/repository-lessons.d.ts +51 -0
- package/dist/db/repository-lessons.js +90 -0
- package/dist/db/repository-links.d.ts +26 -0
- package/dist/db/repository-links.js +105 -0
- package/dist/db/repository-observation.d.ts +26 -0
- package/dist/db/repository-observation.js +51 -0
- package/dist/db/repository-read.d.ts +56 -0
- package/dist/db/repository-read.js +271 -0
- package/dist/db/repository-recaps.d.ts +59 -0
- package/dist/db/repository-recaps.js +158 -0
- package/dist/db/repository-representations.d.ts +48 -0
- package/dist/db/repository-representations.js +162 -0
- package/dist/db/repository-temporal-state.d.ts +35 -0
- package/dist/db/repository-temporal-state.js +46 -0
- package/dist/db/repository-tll.d.ts +88 -0
- package/dist/db/repository-tll.js +179 -0
- package/dist/db/repository-types.d.ts +313 -0
- package/dist/db/repository-types.js +142 -0
- package/dist/db/repository-user-profiles.d.ts +17 -0
- package/dist/db/repository-user-profiles.js +28 -0
- package/dist/db/repository-vector-search.d.ts +33 -0
- package/dist/db/repository-vector-search.js +373 -0
- package/dist/db/repository-wipe.d.ts +34 -0
- package/dist/db/repository-wipe.js +94 -0
- package/dist/db/repository-write.d.ts +61 -0
- package/dist/db/repository-write.js +279 -0
- package/dist/db/schema.sql +1355 -0
- package/dist/db/storage-artifact-delete-tx.d.ts +56 -0
- package/dist/db/storage-artifact-delete-tx.js +123 -0
- package/dist/db/storage-artifact-providers.d.ts +21 -0
- package/dist/db/storage-artifact-providers.js +21 -0
- package/dist/db/storage-artifact-recovery-repository.d.ts +66 -0
- package/dist/db/storage-artifact-recovery-repository.js +58 -0
- package/dist/db/storage-artifact-repository.d.ts +329 -0
- package/dist/db/storage-artifact-repository.js +497 -0
- package/dist/db/stores.d.ts +220 -0
- package/dist/db/stores.js +12 -0
- package/dist/db/summaries-repository.d.ts +74 -0
- package/dist/db/summaries-repository.js +125 -0
- package/dist/eval/beam-10m-loader.d.ts +98 -0
- package/dist/eval/beam-10m-loader.js +128 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +17 -0
- package/dist/middleware/require-bearer.d.ts +27 -0
- package/dist/middleware/require-bearer.js +60 -0
- package/dist/middleware/validate-response.d.ts +33 -0
- package/dist/middleware/validate-response.js +55 -0
- package/dist/middleware/validate.d.ts +43 -0
- package/dist/middleware/validate.js +85 -0
- package/dist/routes/agents.d.ts +13 -0
- package/dist/routes/agents.js +89 -0
- package/dist/routes/document-response-formatters.d.ts +98 -0
- package/dist/routes/document-response-formatters.js +243 -0
- package/dist/routes/documents.d.ts +74 -0
- package/dist/routes/documents.js +425 -0
- package/dist/routes/memories.d.ts +29 -0
- package/dist/routes/memories.js +725 -0
- package/dist/routes/memory-response-formatters.d.ts +179 -0
- package/dist/routes/memory-response-formatters.js +210 -0
- package/dist/routes/public-raw-storage-metadata.d.ts +54 -0
- package/dist/routes/public-raw-storage-metadata.js +56 -0
- package/dist/routes/reflect.d.ts +14 -0
- package/dist/routes/reflect.js +19 -0
- package/dist/routes/response-schema-map.d.ts +14 -0
- package/dist/routes/response-schema-map.js +69 -0
- package/dist/routes/route-errors.d.ts +12 -0
- package/dist/routes/route-errors.js +30 -0
- package/dist/routes/storage-error-handlers.d.ts +34 -0
- package/dist/routes/storage-error-handlers.js +185 -0
- package/dist/routes/storage-response-formatters.d.ts +44 -0
- package/dist/routes/storage-response-formatters.js +155 -0
- package/dist/routes/storage.d.ts +38 -0
- package/dist/routes/storage.js +369 -0
- package/dist/routes/upstream-provider-errors.d.ts +19 -0
- package/dist/routes/upstream-provider-errors.js +95 -0
- package/dist/schemas/agents.d.ts +79 -0
- package/dist/schemas/agents.js +126 -0
- package/dist/schemas/common.d.ts +110 -0
- package/dist/schemas/common.js +190 -0
- package/dist/schemas/document-list-responses.d.ts +102 -0
- package/dist/schemas/document-list-responses.js +87 -0
- package/dist/schemas/document-list-schemas.d.ts +123 -0
- package/dist/schemas/document-list-schemas.js +174 -0
- package/dist/schemas/document-response-schemas.d.ts +610 -0
- package/dist/schemas/document-response-schemas.js +264 -0
- package/dist/schemas/document-status-envelope.d.ts +48 -0
- package/dist/schemas/document-status-envelope.js +54 -0
- package/dist/schemas/documents.d.ts +292 -0
- package/dist/schemas/documents.js +449 -0
- package/dist/schemas/errors.d.ts +75 -0
- package/dist/schemas/errors.js +105 -0
- package/dist/schemas/memories.d.ts +378 -0
- package/dist/schemas/memories.js +542 -0
- package/dist/schemas/openapi.d.ts +24 -0
- package/dist/schemas/openapi.js +1038 -0
- package/dist/schemas/response-scalars.d.ts +10 -0
- package/dist/schemas/response-scalars.js +10 -0
- package/dist/schemas/responses.d.ts +536 -0
- package/dist/schemas/responses.js +350 -0
- package/dist/schemas/search-response-parts.d.ts +97 -0
- package/dist/schemas/search-response-parts.js +103 -0
- package/dist/schemas/storage-schemas.d.ts +175 -0
- package/dist/schemas/storage-schemas.js +277 -0
- package/dist/schemas/zod-setup.d.ts +15 -0
- package/dist/schemas/zod-setup.js +17 -0
- package/dist/server.d.ts +13 -0
- package/dist/server.js +57 -0
- package/dist/services/abstract-query-policy.d.ts +13 -0
- package/dist/services/abstract-query-policy.js +50 -0
- package/dist/services/affinity-clustering.d.ts +66 -0
- package/dist/services/affinity-clustering.js +125 -0
- package/dist/services/agentic-retrieval.d.ts +38 -0
- package/dist/services/agentic-retrieval.js +126 -0
- package/dist/services/answer-format.d.ts +56 -0
- package/dist/services/answer-format.js +118 -0
- package/dist/services/answer-rescue.d.ts +72 -0
- package/dist/services/answer-rescue.js +177 -0
- package/dist/services/answer-verifier.d.ts +24 -0
- package/dist/services/answer-verifier.js +73 -0
- package/dist/services/api-retry.d.ts +6 -0
- package/dist/services/api-retry.js +41 -0
- package/dist/services/assistant-turn-filter.d.ts +20 -0
- package/dist/services/assistant-turn-filter.js +69 -0
- package/dist/services/atomicmem-uri.d.ts +33 -0
- package/dist/services/atomicmem-uri.js +86 -0
- package/dist/services/audit-events.d.ts +54 -0
- package/dist/services/audit-events.js +56 -0
- package/dist/services/chunked-extraction.d.ts +21 -0
- package/dist/services/chunked-extraction.js +108 -0
- package/dist/services/claim-slotting.d.ts +27 -0
- package/dist/services/claim-slotting.js +38 -0
- package/dist/services/claude-code-llm.d.ts +19 -0
- package/dist/services/claude-code-llm.js +96 -0
- package/dist/services/composite-dedup.d.ts +50 -0
- package/dist/services/composite-dedup.js +153 -0
- package/dist/services/composite-grouping.d.ts +41 -0
- package/dist/services/composite-grouping.js +111 -0
- package/dist/services/composite-staleness.d.ts +20 -0
- package/dist/services/composite-staleness.js +50 -0
- package/dist/services/conciseness-preference.d.ts +14 -0
- package/dist/services/conciseness-preference.js +42 -0
- package/dist/services/conflict-policy.d.ts +20 -0
- package/dist/services/conflict-policy.js +335 -0
- package/dist/services/consensus-extraction.d.ts +39 -0
- package/dist/services/consensus-extraction.js +147 -0
- package/dist/services/consensus-validation.d.ts +52 -0
- package/dist/services/consensus-validation.js +206 -0
- package/dist/services/consolidation-service.d.ts +60 -0
- package/dist/services/consolidation-service.js +171 -0
- package/dist/services/content-detection.d.ts +18 -0
- package/dist/services/content-detection.js +25 -0
- package/dist/services/contradiction-surfacing.d.ts +62 -0
- package/dist/services/contradiction-surfacing.js +111 -0
- package/dist/services/cost-telemetry.d.ts +39 -0
- package/dist/services/cost-telemetry.js +58 -0
- package/dist/services/counter-evidence.d.ts +34 -0
- package/dist/services/counter-evidence.js +92 -0
- package/dist/services/current-state-ranking.d.ts +21 -0
- package/dist/services/current-state-ranking.js +152 -0
- package/dist/services/deferred-audn.d.ts +47 -0
- package/dist/services/deferred-audn.js +162 -0
- package/dist/services/document-chunker.d.ts +50 -0
- package/dist/services/document-chunker.js +153 -0
- package/dist/services/document-failure-markers.d.ts +91 -0
- package/dist/services/document-failure-markers.js +305 -0
- package/dist/services/document-indexer.d.ts +122 -0
- package/dist/services/document-indexer.js +405 -0
- package/dist/services/document-service.d.ts +245 -0
- package/dist/services/document-service.js +325 -0
- package/dist/services/document-upload-artifact-sync.d.ts +80 -0
- package/dist/services/document-upload-artifact-sync.js +162 -0
- package/dist/services/document-upload-beta2-recovery.d.ts +72 -0
- package/dist/services/document-upload-beta2-recovery.js +94 -0
- package/dist/services/document-upload.d.ts +44 -0
- package/dist/services/document-upload.js +353 -0
- package/dist/services/embedding.d.ts +57 -0
- package/dist/services/embedding.js +416 -0
- package/dist/services/entity-attribute-extractor.d.ts +34 -0
- package/dist/services/entity-attribute-extractor.js +117 -0
- package/dist/services/entity-card-synthesis.d.ts +54 -0
- package/dist/services/entity-card-synthesis.js +92 -0
- package/dist/services/entity-dedup.d.ts +9 -0
- package/dist/services/entity-dedup.js +14 -0
- package/dist/services/entity-graph.d.ts +17 -0
- package/dist/services/entity-graph.js +135 -0
- package/dist/services/entropy-gate.d.ts +52 -0
- package/dist/services/entropy-gate.js +56 -0
- package/dist/services/episode-fetcher.d.ts +47 -0
- package/dist/services/episode-fetcher.js +128 -0
- package/dist/services/event-anchor-facts.d.ts +8 -0
- package/dist/services/event-anchor-facts.js +205 -0
- package/dist/services/event-chain-detector.d.ts +52 -0
- package/dist/services/event-chain-detector.js +83 -0
- package/dist/services/extraction-cache.d.ts +9 -0
- package/dist/services/extraction-cache.js +54 -0
- package/dist/services/extraction-enrichment.d.ts +9 -0
- package/dist/services/extraction-enrichment.js +223 -0
- package/dist/services/extraction.d.ts +69 -0
- package/dist/services/extraction.js +596 -0
- package/dist/services/fact-normalization.d.ts +12 -0
- package/dist/services/fact-normalization.js +248 -0
- package/dist/services/filecoin-observability.d.ts +127 -0
- package/dist/services/filecoin-observability.js +200 -0
- package/dist/services/first-mention-service.d.ts +76 -0
- package/dist/services/first-mention-service.js +186 -0
- package/dist/services/hierarchical-retrieval.d.ts +49 -0
- package/dist/services/hierarchical-retrieval.js +50 -0
- package/dist/services/ingest-fact-pipeline.d.ts +32 -0
- package/dist/services/ingest-fact-pipeline.js +212 -0
- package/dist/services/ingest-post-write.d.ts +50 -0
- package/dist/services/ingest-post-write.js +117 -0
- package/dist/services/ingest-trace.d.ts +32 -0
- package/dist/services/ingest-trace.js +60 -0
- package/dist/services/input-sanitizer.d.ts +41 -0
- package/dist/services/input-sanitizer.js +135 -0
- package/dist/services/iterative-retrieval.d.ts +26 -0
- package/dist/services/iterative-retrieval.js +139 -0
- package/dist/services/keyword-expansion.d.ts +10 -0
- package/dist/services/keyword-expansion.js +26 -0
- package/dist/services/lesson-service.d.ts +68 -0
- package/dist/services/lesson-service.js +178 -0
- package/dist/services/literal-extractor.d.ts +16 -0
- package/dist/services/literal-extractor.js +74 -0
- package/dist/services/literal-list-protection.d.ts +17 -0
- package/dist/services/literal-list-protection.js +134 -0
- package/dist/services/literal-query-expansion.d.ts +20 -0
- package/dist/services/literal-query-expansion.js +181 -0
- package/dist/services/llm.d.ts +61 -0
- package/dist/services/llm.js +265 -0
- package/dist/services/memcell-projection.d.ts +17 -0
- package/dist/services/memcell-projection.js +41 -0
- package/dist/services/memory-audn.d.ts +43 -0
- package/dist/services/memory-audn.js +419 -0
- package/dist/services/memory-crud.d.ts +93 -0
- package/dist/services/memory-crud.js +255 -0
- package/dist/services/memory-ingest.d.ts +21 -0
- package/dist/services/memory-ingest.js +249 -0
- package/dist/services/memory-lifecycle.d.ts +75 -0
- package/dist/services/memory-lifecycle.js +108 -0
- package/dist/services/memory-lineage.d.ts +181 -0
- package/dist/services/memory-lineage.js +232 -0
- package/dist/services/memory-network.d.ts +40 -0
- package/dist/services/memory-network.js +75 -0
- package/dist/services/memory-search-types.d.ts +25 -0
- package/dist/services/memory-search-types.js +10 -0
- package/dist/services/memory-search.d.ts +48 -0
- package/dist/services/memory-search.js +505 -0
- package/dist/services/memory-service-types.d.ts +371 -0
- package/dist/services/memory-service-types.js +8 -0
- package/dist/services/memory-service.d.ts +152 -0
- package/dist/services/memory-service.js +225 -0
- package/dist/services/memory-storage.d.ts +33 -0
- package/dist/services/memory-storage.js +328 -0
- package/dist/services/msr-aggregator.d.ts +38 -0
- package/dist/services/msr-aggregator.js +97 -0
- package/dist/services/msr-detector.d.ts +35 -0
- package/dist/services/msr-detector.js +65 -0
- package/dist/services/namespace-retrieval.d.ts +60 -0
- package/dist/services/namespace-retrieval.js +180 -0
- package/dist/services/observation-date-extraction.d.ts +12 -0
- package/dist/services/observation-date-extraction.js +50 -0
- package/dist/services/observation-service.d.ts +27 -0
- package/dist/services/observation-service.js +84 -0
- package/dist/services/packaging-observability.d.ts +29 -0
- package/dist/services/packaging-observability.js +146 -0
- package/dist/services/query-expansion.d.ts +83 -0
- package/dist/services/query-expansion.js +242 -0
- package/dist/services/query-keyword-matches.d.ts +6 -0
- package/dist/services/query-keyword-matches.js +56 -0
- package/dist/services/query-term-visibility.d.ts +28 -0
- package/dist/services/query-term-visibility.js +100 -0
- package/dist/services/quick-extraction.d.ts +25 -0
- package/dist/services/quick-extraction.js +431 -0
- package/dist/services/quoted-entity-extraction.d.ts +10 -0
- package/dist/services/quoted-entity-extraction.js +161 -0
- package/dist/services/raw-storage-reconciler-backoff.d.ts +8 -0
- package/dist/services/raw-storage-reconciler-backoff.js +14 -0
- package/dist/services/raw-storage-reconciler-scheduler.d.ts +29 -0
- package/dist/services/raw-storage-reconciler-scheduler.js +43 -0
- package/dist/services/raw-storage-reconciler.d.ts +71 -0
- package/dist/services/raw-storage-reconciler.js +278 -0
- package/dist/services/recap-builder.d.ts +49 -0
- package/dist/services/recap-builder.js +157 -0
- package/dist/services/reflect-jobs.d.ts +23 -0
- package/dist/services/reflect-jobs.js +36 -0
- package/dist/services/reflect-prompts.d.ts +71 -0
- package/dist/services/reflect-prompts.js +99 -0
- package/dist/services/reflect-retrieval.d.ts +33 -0
- package/dist/services/reflect-retrieval.js +30 -0
- package/dist/services/reflect.d.ts +49 -0
- package/dist/services/reflect.js +84 -0
- package/dist/services/relative-temporal.d.ts +14 -0
- package/dist/services/relative-temporal.js +163 -0
- package/dist/services/relevance-policy.d.ts +37 -0
- package/dist/services/relevance-policy.js +109 -0
- package/dist/services/rerank.d.ts +32 -0
- package/dist/services/rerank.js +118 -0
- package/dist/services/reranker.d.ts +20 -0
- package/dist/services/reranker.js +99 -0
- package/dist/services/retrieval-channel-rules.d.ts +34 -0
- package/dist/services/retrieval-channel-rules.js +41 -0
- package/dist/services/retrieval-config-overlay.d.ts +36 -0
- package/dist/services/retrieval-config-overlay.js +44 -0
- package/dist/services/retrieval-format.d.ts +119 -0
- package/dist/services/retrieval-format.js +559 -0
- package/dist/services/retrieval-policy.d.ts +69 -0
- package/dist/services/retrieval-policy.js +275 -0
- package/dist/services/retrieval-profiles.d.ts +37 -0
- package/dist/services/retrieval-profiles.js +90 -0
- package/dist/services/retrieval-side-effects.d.ts +14 -0
- package/dist/services/retrieval-side-effects.js +26 -0
- package/dist/services/retrieval-trace.d.ts +108 -0
- package/dist/services/retrieval-trace.js +147 -0
- package/dist/services/rrf-fusion.d.ts +18 -0
- package/dist/services/rrf-fusion.js +34 -0
- package/dist/services/search-pipeline.d.ts +71 -0
- package/dist/services/search-pipeline.js +788 -0
- package/dist/services/session-date.d.ts +20 -0
- package/dist/services/session-date.js +61 -0
- package/dist/services/session-packaging.d.ts +53 -0
- package/dist/services/session-packaging.js +182 -0
- package/dist/services/session-summary-generator.d.ts +53 -0
- package/dist/services/session-summary-generator.js +134 -0
- package/dist/services/specialists/cr-specialist.d.ts +52 -0
- package/dist/services/specialists/cr-specialist.js +121 -0
- package/dist/services/specialists/dispatch.d.ts +53 -0
- package/dist/services/specialists/dispatch.js +102 -0
- package/dist/services/specialists/ie-ku-specialist.d.ts +37 -0
- package/dist/services/specialists/ie-ku-specialist.js +63 -0
- package/dist/services/specialists/msr-specialist.d.ts +61 -0
- package/dist/services/specialists/msr-specialist.js +162 -0
- package/dist/services/specialists/tr-specialist.d.ts +37 -0
- package/dist/services/specialists/tr-specialist.js +146 -0
- package/dist/services/storage-key-prefix.d.ts +42 -0
- package/dist/services/storage-key-prefix.js +45 -0
- package/dist/services/storage-put-recovery.d.ts +71 -0
- package/dist/services/storage-put-recovery.js +269 -0
- package/dist/services/storage-service-errors.d.ts +124 -0
- package/dist/services/storage-service-errors.js +189 -0
- package/dist/services/storage-service.d.ts +176 -0
- package/dist/services/storage-service.js +423 -0
- package/dist/services/subject-aware-ranking.d.ts +19 -0
- package/dist/services/subject-aware-ranking.js +161 -0
- package/dist/services/supplemental-extraction.d.ts +7 -0
- package/dist/services/supplemental-extraction.js +116 -0
- package/dist/services/tbc-execution.d.ts +49 -0
- package/dist/services/tbc-execution.js +284 -0
- package/dist/services/temporal-classifier.d.ts +56 -0
- package/dist/services/temporal-classifier.js +94 -0
- package/dist/services/temporal-endpoint-evidence.d.ts +12 -0
- package/dist/services/temporal-endpoint-evidence.js +313 -0
- package/dist/services/temporal-fingerprint.d.ts +6 -0
- package/dist/services/temporal-fingerprint.js +12 -0
- package/dist/services/temporal-format.d.ts +9 -0
- package/dist/services/temporal-format.js +21 -0
- package/dist/services/temporal-intent.d.ts +39 -0
- package/dist/services/temporal-intent.js +78 -0
- package/dist/services/temporal-query-constraints.d.ts +16 -0
- package/dist/services/temporal-query-constraints.js +107 -0
- package/dist/services/temporal-query-expansion.d.ts +14 -0
- package/dist/services/temporal-query-expansion.js +131 -0
- package/dist/services/temporal-rerank.d.ts +22 -0
- package/dist/services/temporal-rerank.js +47 -0
- package/dist/services/temporal-result-protection.d.ts +7 -0
- package/dist/services/temporal-result-protection.js +60 -0
- package/dist/services/temporal-state-write.d.ts +57 -0
- package/dist/services/temporal-state-write.js +45 -0
- package/dist/services/tiered-context.d.ts +87 -0
- package/dist/services/tiered-context.js +214 -0
- package/dist/services/tiered-loading.d.ts +88 -0
- package/dist/services/tiered-loading.js +263 -0
- package/dist/services/timeline-pack.d.ts +36 -0
- package/dist/services/timeline-pack.js +50 -0
- package/dist/services/timing.d.ts +13 -0
- package/dist/services/timing.js +72 -0
- package/dist/services/tll-augmentation.d.ts +20 -0
- package/dist/services/tll-augmentation.js +125 -0
- package/dist/services/tll-retrieval.d.ts +55 -0
- package/dist/services/tll-retrieval.js +101 -0
- package/dist/services/topic-abstraction.d.ts +36 -0
- package/dist/services/topic-abstraction.js +105 -0
- package/dist/services/trust-scoring.d.ts +43 -0
- package/dist/services/trust-scoring.js +89 -0
- package/dist/services/typed-belief-calculus.d.ts +126 -0
- package/dist/services/typed-belief-calculus.js +204 -0
- package/dist/services/upload-config.d.ts +34 -0
- package/dist/services/upload-config.js +23 -0
- package/dist/services/upload-decision.d.ts +65 -0
- package/dist/services/upload-decision.js +98 -0
- package/dist/services/upload-helpers.d.ts +107 -0
- package/dist/services/upload-helpers.js +148 -0
- package/dist/services/user-profile-builder.d.ts +22 -0
- package/dist/services/user-profile-builder.js +109 -0
- package/dist/services/voyage-embedding.d.ts +22 -0
- package/dist/services/voyage-embedding.js +77 -0
- package/dist/services/write-security.d.ts +31 -0
- package/dist/services/write-security.js +64 -0
- package/dist/storage/artifact-public-redaction.d.ts +34 -0
- package/dist/storage/artifact-public-redaction.js +83 -0
- package/dist/storage/cleanup.d.ts +103 -0
- package/dist/storage/cleanup.js +138 -0
- package/dist/storage/codec-factory.d.ts +17 -0
- package/dist/storage/codec-factory.js +33 -0
- package/dist/storage/codecs/aes-gcm-codec.d.ts +44 -0
- package/dist/storage/codecs/aes-gcm-codec.js +108 -0
- package/dist/storage/codecs/noop-codec.d.ts +16 -0
- package/dist/storage/codecs/noop-codec.js +23 -0
- package/dist/storage/factory.d.ts +44 -0
- package/dist/storage/factory.js +99 -0
- package/dist/storage/filecoin-cid-validation.d.ts +82 -0
- package/dist/storage/filecoin-cid-validation.js +122 -0
- package/dist/storage/filecoin-public-metadata.d.ts +73 -0
- package/dist/storage/filecoin-public-metadata.js +110 -0
- package/dist/storage/local-fs-store.d.ts +39 -0
- package/dist/storage/local-fs-store.js +145 -0
- package/dist/storage/pointer-uri-allowlist.d.ts +38 -0
- package/dist/storage/pointer-uri-allowlist.js +70 -0
- package/dist/storage/provider-metadata-projection.d.ts +27 -0
- package/dist/storage/provider-metadata-projection.js +68 -0
- package/dist/storage/providers/filecoin/backend.d.ts +42 -0
- package/dist/storage/providers/filecoin/backend.js +250 -0
- package/dist/storage/providers/filecoin/config.d.ts +70 -0
- package/dist/storage/providers/filecoin/config.js +275 -0
- package/dist/storage/providers/filecoin/errors.d.ts +45 -0
- package/dist/storage/providers/filecoin/errors.js +56 -0
- package/dist/storage/providers/filecoin/filecoin-pin-car.d.ts +78 -0
- package/dist/storage/providers/filecoin/filecoin-pin-car.js +155 -0
- package/dist/storage/providers/filecoin/filecoin-pin-client.d.ts +92 -0
- package/dist/storage/providers/filecoin/filecoin-pin-client.js +199 -0
- package/dist/storage/providers/filecoin/filecoin-pin-mapping.d.ts +58 -0
- package/dist/storage/providers/filecoin/filecoin-pin-mapping.js +103 -0
- package/dist/storage/providers/filecoin/filecoin-pin-timeout.d.ts +30 -0
- package/dist/storage/providers/filecoin/filecoin-pin-timeout.js +53 -0
- package/dist/storage/providers/filecoin/filecoin-pin-vendor.d.ts +111 -0
- package/dist/storage/providers/filecoin/filecoin-pin-vendor.js +87 -0
- package/dist/storage/providers/filecoin/hints.d.ts +71 -0
- package/dist/storage/providers/filecoin/hints.js +123 -0
- package/dist/storage/providers/filecoin/index.d.ts +51 -0
- package/dist/storage/providers/filecoin/index.js +103 -0
- package/dist/storage/providers/filecoin/ipfs-cid.d.ts +50 -0
- package/dist/storage/providers/filecoin/ipfs-cid.js +64 -0
- package/dist/storage/providers/filecoin/metadata.d.ts +72 -0
- package/dist/storage/providers/filecoin/metadata.js +137 -0
- package/dist/storage/providers/filecoin/piece-cid.d.ts +48 -0
- package/dist/storage/providers/filecoin/piece-cid.js +57 -0
- package/dist/storage/providers/filecoin/provider-client.d.ts +234 -0
- package/dist/storage/providers/filecoin/provider-client.js +27 -0
- package/dist/storage/providers/filecoin/readiness.d.ts +62 -0
- package/dist/storage/providers/filecoin/readiness.js +85 -0
- package/dist/storage/providers/filecoin/retriever.d.ts +82 -0
- package/dist/storage/providers/filecoin/retriever.js +63 -0
- package/dist/storage/providers/filecoin/skeleton-client.d.ts +36 -0
- package/dist/storage/providers/filecoin/skeleton-client.js +55 -0
- package/dist/storage/providers/filecoin/synapse-client.d.ts +169 -0
- package/dist/storage/providers/filecoin/synapse-client.js +343 -0
- package/dist/storage/providers/filecoin/synapse-construction.d.ts +26 -0
- package/dist/storage/providers/filecoin/synapse-construction.js +47 -0
- package/dist/storage/providers/filecoin/synapse-error-mapping.d.ts +23 -0
- package/dist/storage/providers/filecoin/synapse-error-mapping.js +49 -0
- package/dist/storage/providers/filecoin/synapse-readiness.d.ts +37 -0
- package/dist/storage/providers/filecoin/synapse-readiness.js +231 -0
- package/dist/storage/providers/filecoin/uri.d.ts +49 -0
- package/dist/storage/providers/filecoin/uri.js +84 -0
- package/dist/storage/providers/filecoin/verified-fetch-lifecycle.d.ts +77 -0
- package/dist/storage/providers/filecoin/verified-fetch-lifecycle.js +196 -0
- package/dist/storage/providers/filecoin/verified-fetch-retriever.d.ts +54 -0
- package/dist/storage/providers/filecoin/verified-fetch-retriever.js +81 -0
- package/dist/storage/providers/filecoin/verified-fetch-vendor.d.ts +71 -0
- package/dist/storage/providers/filecoin/verified-fetch-vendor.js +94 -0
- package/dist/storage/raw-content-codec.d.ts +89 -0
- package/dist/storage/raw-content-codec.js +47 -0
- package/dist/storage/raw-content-store-backend-adapter.d.ts +28 -0
- package/dist/storage/raw-content-store-backend-adapter.js +67 -0
- package/dist/storage/raw-content-store.d.ts +228 -0
- package/dist/storage/raw-content-store.js +27 -0
- package/dist/storage/s3-store.d.ts +42 -0
- package/dist/storage/s3-store.js +181 -0
- package/dist/storage/storage-backend-registry.d.ts +58 -0
- package/dist/storage/storage-backend-registry.js +56 -0
- package/dist/storage/storage-backend.d.ts +82 -0
- package/dist/storage/storage-backend.js +14 -0
- package/dist/storage/storage-capabilities.d.ts +56 -0
- package/dist/storage/storage-capabilities.js +170 -0
- package/dist/storage/store-registry.d.ts +67 -0
- package/dist/storage/store-registry.js +77 -0
- package/dist/vector-math.d.ts +15 -0
- package/dist/vector-math.js +31 -0
- package/dist/xml-escape.d.ts +5 -0
- package/dist/xml-escape.js +7 -0
- package/openapi.json +15395 -0
- package/openapi.yaml +10794 -0
- package/package.json +119 -0
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding provider abstraction.
|
|
3
|
+
* Supports OpenAI, Ollama, OpenAI-compatible APIs, Voyage AI, and local
|
|
4
|
+
* WASM (via @huggingface/transformers with ONNX Runtime). Provider/model
|
|
5
|
+
* selection comes from the RuntimeConfig bound by createCoreRuntime().
|
|
6
|
+
*/
|
|
7
|
+
import { createHash } from 'node:crypto';
|
|
8
|
+
import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
|
|
9
|
+
import { join } from 'node:path';
|
|
10
|
+
import OpenAI from 'openai';
|
|
11
|
+
import { retryOnRateLimit } from './api-retry.js';
|
|
12
|
+
import { estimateCostUsd, summarizeUsage, writeCostEvent, } from './cost-telemetry.js';
|
|
13
|
+
import { VOYAGE_API_BASE, VoyageEmbedding } from './voyage-embedding.js';
|
|
14
|
+
let embeddingConfig = null;
|
|
15
|
+
/**
|
|
16
|
+
* Bind the embedding module's config. Called once by the composition
|
|
17
|
+
* root (`createCoreRuntime`). Calling again rebinds and invalidates the
|
|
18
|
+
* stateful provider cache — primarily for tests that need to swap
|
|
19
|
+
* providers within a process.
|
|
20
|
+
*/
|
|
21
|
+
export function initEmbedding(config) {
|
|
22
|
+
embeddingConfig = config;
|
|
23
|
+
provider = null;
|
|
24
|
+
providerKey = '';
|
|
25
|
+
embeddingCache.clear();
|
|
26
|
+
}
|
|
27
|
+
function requireConfig() {
|
|
28
|
+
if (!embeddingConfig) {
|
|
29
|
+
throw new Error('embedding.ts: initEmbedding(config) must be called at composition-root time before embedText/embedTexts. See runtime-container.ts.');
|
|
30
|
+
}
|
|
31
|
+
return embeddingConfig;
|
|
32
|
+
}
|
|
33
|
+
function writeEmbeddingUsageEvent(config, model, usage, started) {
|
|
34
|
+
writeCostEvent({
|
|
35
|
+
stage: 'embedding',
|
|
36
|
+
provider: config.embeddingProvider,
|
|
37
|
+
model,
|
|
38
|
+
requestKind: 'embedding',
|
|
39
|
+
durationMs: performance.now() - started,
|
|
40
|
+
cacheHit: false,
|
|
41
|
+
inputTokens: usage.inputTokens ?? null,
|
|
42
|
+
outputTokens: usage.outputTokens ?? null,
|
|
43
|
+
totalTokens: usage.totalTokens ?? null,
|
|
44
|
+
estimatedCostUsd: estimateCostUsd(config.embeddingProvider, model, usage),
|
|
45
|
+
}, config);
|
|
46
|
+
}
|
|
47
|
+
/** OpenAI and any OpenAI-compatible embedding API. */
|
|
48
|
+
class OpenAICompatibleEmbedding {
|
|
49
|
+
client;
|
|
50
|
+
model;
|
|
51
|
+
dimensions;
|
|
52
|
+
constructor(apiKey, model, baseURL, dimensions) {
|
|
53
|
+
this.client = new OpenAI({ apiKey, baseURL });
|
|
54
|
+
this.model = model;
|
|
55
|
+
this.dimensions = dimensions;
|
|
56
|
+
}
|
|
57
|
+
async embed(text, _task) {
|
|
58
|
+
const response = await this.requestAndTrack(text);
|
|
59
|
+
return response.data[0].embedding;
|
|
60
|
+
}
|
|
61
|
+
async embedBatch(texts, _task) {
|
|
62
|
+
if (texts.length === 0)
|
|
63
|
+
return [];
|
|
64
|
+
const response = await this.requestAndTrack(texts);
|
|
65
|
+
return response.data
|
|
66
|
+
.sort((a, b) => a.index - b.index)
|
|
67
|
+
.map((d) => d.embedding);
|
|
68
|
+
}
|
|
69
|
+
embeddingRequest(input) {
|
|
70
|
+
const request = {
|
|
71
|
+
model: this.model,
|
|
72
|
+
input,
|
|
73
|
+
};
|
|
74
|
+
if (this.dimensions !== undefined)
|
|
75
|
+
request.dimensions = this.dimensions;
|
|
76
|
+
return request;
|
|
77
|
+
}
|
|
78
|
+
usageFromResponse(response) {
|
|
79
|
+
const totalTokens = response.usage?.total_tokens ?? null;
|
|
80
|
+
const promptTokens = response.usage?.prompt_tokens ?? null;
|
|
81
|
+
const inputTokens = promptTokens === null ? totalTokens : promptTokens;
|
|
82
|
+
return summarizeUsage(inputTokens, null, totalTokens);
|
|
83
|
+
}
|
|
84
|
+
async requestAndTrack(input) {
|
|
85
|
+
const config = requireConfig();
|
|
86
|
+
const request = () => this.client.embeddings.create(this.embeddingRequest(input));
|
|
87
|
+
const started = performance.now();
|
|
88
|
+
const response = await retryOnRateLimit(request);
|
|
89
|
+
writeEmbeddingUsageEvent(config, this.model, this.usageFromResponse(response), started);
|
|
90
|
+
return response;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/** Ollama embedding via native HTTP API. */
|
|
94
|
+
class OllamaEmbedding {
|
|
95
|
+
baseUrl;
|
|
96
|
+
model;
|
|
97
|
+
constructor(model, baseUrl = 'http://localhost:11434') {
|
|
98
|
+
this.baseUrl = baseUrl;
|
|
99
|
+
this.model = model;
|
|
100
|
+
}
|
|
101
|
+
async embed(text, _task) {
|
|
102
|
+
const data = await this.ollamaFetch(text, 'Ollama embed failed');
|
|
103
|
+
return data.embeddings[0];
|
|
104
|
+
}
|
|
105
|
+
async embedBatch(texts, _task) {
|
|
106
|
+
if (texts.length === 0)
|
|
107
|
+
return [];
|
|
108
|
+
const data = await this.ollamaFetch(texts, 'Ollama embed batch failed');
|
|
109
|
+
return data.embeddings;
|
|
110
|
+
}
|
|
111
|
+
async ollamaFetch(input, errorLabel) {
|
|
112
|
+
const config = requireConfig();
|
|
113
|
+
const started = performance.now();
|
|
114
|
+
const response = await fetch(`${this.baseUrl}/api/embed`, {
|
|
115
|
+
method: 'POST',
|
|
116
|
+
headers: { 'Content-Type': 'application/json' },
|
|
117
|
+
body: JSON.stringify({ model: this.model, input }),
|
|
118
|
+
signal: AbortSignal.timeout(300_000),
|
|
119
|
+
});
|
|
120
|
+
if (!response.ok) {
|
|
121
|
+
throw new Error(`${errorLabel} (${response.status}): ${await response.text()}`);
|
|
122
|
+
}
|
|
123
|
+
const data = await response.json();
|
|
124
|
+
const usage = summarizeUsage(data.prompt_eval_count ?? null, data.eval_count ?? null, null);
|
|
125
|
+
writeEmbeddingUsageEvent(config, this.model, usage, started);
|
|
126
|
+
return data;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Local WASM embedding via @huggingface/transformers (ONNX Runtime).
|
|
131
|
+
* Eliminates network latency — target sub-15ms per embed at fp32.
|
|
132
|
+
* Pipeline is lazily initialized on first use (downloads model on first run).
|
|
133
|
+
*/
|
|
134
|
+
/**
|
|
135
|
+
* Local ONNX embedding with serialized inference.
|
|
136
|
+
* ONNX Runtime's C++ mutex corrupts under concurrent async calls, causing
|
|
137
|
+
* `mutex lock failed: Invalid argument` crashes in long-running processes.
|
|
138
|
+
* All inference is serialized through a promise queue to prevent this.
|
|
139
|
+
*/
|
|
140
|
+
class TransformersEmbedding {
|
|
141
|
+
model;
|
|
142
|
+
pipelinePromise = null;
|
|
143
|
+
inferenceQueue = Promise.resolve();
|
|
144
|
+
constructor(model) {
|
|
145
|
+
this.model = model;
|
|
146
|
+
}
|
|
147
|
+
getPipeline() {
|
|
148
|
+
if (!this.pipelinePromise) {
|
|
149
|
+
this.pipelinePromise = initTransformersPipeline(this.model);
|
|
150
|
+
}
|
|
151
|
+
return this.pipelinePromise;
|
|
152
|
+
}
|
|
153
|
+
serialized(fn) {
|
|
154
|
+
return new Promise((resolve, reject) => {
|
|
155
|
+
this.inferenceQueue = this.inferenceQueue.then(async () => {
|
|
156
|
+
try {
|
|
157
|
+
const extractor = await this.getPipeline();
|
|
158
|
+
resolve(await fn(extractor));
|
|
159
|
+
}
|
|
160
|
+
catch (err) {
|
|
161
|
+
reject(err);
|
|
162
|
+
}
|
|
163
|
+
});
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
async embed(text, _task) {
|
|
167
|
+
return this.serialized(async (extractor) => {
|
|
168
|
+
const output = await extractor(text, { pooling: 'mean', normalize: true });
|
|
169
|
+
return Array.from(output.data);
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
async embedBatch(texts, _task) {
|
|
173
|
+
if (texts.length === 0)
|
|
174
|
+
return [];
|
|
175
|
+
return this.serialized(async (extractor) => {
|
|
176
|
+
const output = await extractor(texts, { pooling: 'mean', normalize: true });
|
|
177
|
+
const dims = output.dims;
|
|
178
|
+
const embeddingSize = dims[dims.length - 1];
|
|
179
|
+
const data = output.data;
|
|
180
|
+
const results = [];
|
|
181
|
+
for (let i = 0; i < texts.length; i++) {
|
|
182
|
+
results.push(Array.from(data.slice(i * embeddingSize, (i + 1) * embeddingSize)));
|
|
183
|
+
}
|
|
184
|
+
return results;
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
async function initTransformersPipeline(model) {
|
|
189
|
+
const { pipeline } = await import('@huggingface/transformers');
|
|
190
|
+
console.log(`[embedding] Loading local WASM model: ${model}`);
|
|
191
|
+
const start = performance.now();
|
|
192
|
+
const extractor = await pipeline('feature-extraction', model, { dtype: 'fp32' });
|
|
193
|
+
console.log(`[embedding] Model loaded in ${(performance.now() - start).toFixed(0)}ms`);
|
|
194
|
+
return extractor;
|
|
195
|
+
}
|
|
196
|
+
/** Create embedding provider from config. */
|
|
197
|
+
function createEmbeddingProvider() {
|
|
198
|
+
const config = requireConfig();
|
|
199
|
+
switch (config.embeddingProvider) {
|
|
200
|
+
case 'openai':
|
|
201
|
+
return new OpenAICompatibleEmbedding(config.openaiApiKey, config.embeddingModel, undefined, config.embeddingDimensions);
|
|
202
|
+
case 'ollama':
|
|
203
|
+
return new OllamaEmbedding(config.embeddingModel, config.ollamaBaseUrl);
|
|
204
|
+
case 'openai-compatible':
|
|
205
|
+
return new OpenAICompatibleEmbedding(config.embeddingApiKey ?? config.openaiApiKey, config.embeddingModel, config.embeddingApiUrl, config.embeddingDimensions);
|
|
206
|
+
case 'transformers':
|
|
207
|
+
return new TransformersEmbedding(config.embeddingModel);
|
|
208
|
+
case 'voyage':
|
|
209
|
+
if (!config.voyageApiKey) {
|
|
210
|
+
throw new Error('VOYAGE_API_KEY is required when EMBEDDING_PROVIDER=voyage');
|
|
211
|
+
}
|
|
212
|
+
return new VoyageEmbedding(config, config.voyageApiKey, config.voyageDocumentModel, config.voyageQueryModel, config.embeddingDimensions);
|
|
213
|
+
default:
|
|
214
|
+
throw new Error(`Unknown embedding provider: ${config.embeddingProvider}`);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
let provider = null;
|
|
218
|
+
let providerKey = '';
|
|
219
|
+
function setEmbeddingDimensions(dimensions) {
|
|
220
|
+
const config = requireConfig();
|
|
221
|
+
config.embeddingDimensions = dimensions;
|
|
222
|
+
provider = null;
|
|
223
|
+
providerKey = '';
|
|
224
|
+
embeddingCache.clear();
|
|
225
|
+
}
|
|
226
|
+
function effectiveModel(task) {
|
|
227
|
+
const config = requireConfig();
|
|
228
|
+
if (config.embeddingProvider === 'voyage') {
|
|
229
|
+
return task === 'query' ? config.voyageQueryModel : config.voyageDocumentModel;
|
|
230
|
+
}
|
|
231
|
+
return config.embeddingModel;
|
|
232
|
+
}
|
|
233
|
+
function endpointMarker() {
|
|
234
|
+
const config = requireConfig();
|
|
235
|
+
switch (config.embeddingProvider) {
|
|
236
|
+
case 'openai':
|
|
237
|
+
return 'openai:api.openai.com';
|
|
238
|
+
case 'openai-compatible':
|
|
239
|
+
return `compat:${config.embeddingApiUrl ?? ''}`;
|
|
240
|
+
case 'ollama':
|
|
241
|
+
return `ollama:${config.ollamaBaseUrl}`;
|
|
242
|
+
case 'transformers':
|
|
243
|
+
return 'transformers:local';
|
|
244
|
+
case 'voyage':
|
|
245
|
+
return `voyage:${VOYAGE_API_BASE}`;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
function getProviderKey() {
|
|
249
|
+
const config = requireConfig();
|
|
250
|
+
return [
|
|
251
|
+
config.embeddingProvider,
|
|
252
|
+
config.embeddingDimensions,
|
|
253
|
+
endpointMarker(),
|
|
254
|
+
config.embeddingModel,
|
|
255
|
+
config.voyageDocumentModel,
|
|
256
|
+
config.voyageQueryModel,
|
|
257
|
+
].join('|');
|
|
258
|
+
}
|
|
259
|
+
function getProvider() {
|
|
260
|
+
const nextKey = getProviderKey();
|
|
261
|
+
if (!provider || nextKey !== providerKey) {
|
|
262
|
+
provider = createEmbeddingProvider();
|
|
263
|
+
providerKey = nextKey;
|
|
264
|
+
embeddingCache.clear();
|
|
265
|
+
}
|
|
266
|
+
return provider;
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Returns the instruction prefix required by certain embedding models.
|
|
270
|
+
* snowflake-arctic-embed2 and mxbai-embed-large are sensitive to these.
|
|
271
|
+
*/
|
|
272
|
+
function getInstructionPrefix(model, task) {
|
|
273
|
+
if (task === 'document')
|
|
274
|
+
return '';
|
|
275
|
+
if (model.includes('mxbai-embed-large')) {
|
|
276
|
+
// 0.838 similarity with this prefix vs 0.831 without.
|
|
277
|
+
return 'Represent this sentence for searching relevant passages: ';
|
|
278
|
+
}
|
|
279
|
+
if (model.includes('nomic-embed-text')) {
|
|
280
|
+
return 'search_query: ';
|
|
281
|
+
}
|
|
282
|
+
// snowflake-arctic-embed2 (Ollama v0.5.x) diagnostic showed significant
|
|
283
|
+
// regression with prefixes (0.80 -> 0.71). We use no prefix for this model.
|
|
284
|
+
return '';
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* LRU embedding cache — avoids redundant API calls for identical text within
|
|
288
|
+
* and across requests. The key includes provider, endpoint, model, dimensions,
|
|
289
|
+
* and task so query/document embeddings of the same text never collide.
|
|
290
|
+
*/
|
|
291
|
+
const EMBEDDING_CACHE_MAX = 512;
|
|
292
|
+
const embeddingCache = new Map();
|
|
293
|
+
function embeddingCacheKey(text, task) {
|
|
294
|
+
const config = requireConfig();
|
|
295
|
+
const parts = [
|
|
296
|
+
config.embeddingProvider,
|
|
297
|
+
effectiveModel(task),
|
|
298
|
+
String(config.embeddingDimensions),
|
|
299
|
+
endpointMarker(),
|
|
300
|
+
task,
|
|
301
|
+
text,
|
|
302
|
+
].join('\0');
|
|
303
|
+
return createHash('sha256').update(parts).digest('hex').slice(0, 16);
|
|
304
|
+
}
|
|
305
|
+
function getCachedEmbedding(key) {
|
|
306
|
+
const cached = embeddingCache.get(key);
|
|
307
|
+
if (cached) {
|
|
308
|
+
// Move to end (most recently used)
|
|
309
|
+
embeddingCache.delete(key);
|
|
310
|
+
embeddingCache.set(key, cached);
|
|
311
|
+
}
|
|
312
|
+
return cached;
|
|
313
|
+
}
|
|
314
|
+
function setCachedEmbedding(key, embedding) {
|
|
315
|
+
if (embeddingCache.size >= EMBEDDING_CACHE_MAX) {
|
|
316
|
+
// Evict oldest (first entry)
|
|
317
|
+
const oldest = embeddingCache.keys().next().value;
|
|
318
|
+
if (oldest !== undefined)
|
|
319
|
+
embeddingCache.delete(oldest);
|
|
320
|
+
}
|
|
321
|
+
embeddingCache.set(key, embedding);
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Disk-based embedding cache — persists embeddings across process restarts.
|
|
325
|
+
* Eliminates API transport variance and saves API calls during eval runs.
|
|
326
|
+
* Enabled via EMBEDDING_CACHE_ENABLED=true; reuses EXTRACTION_CACHE_DIR.
|
|
327
|
+
*/
|
|
328
|
+
function readDiskEmbedding(key) {
|
|
329
|
+
const config = requireConfig();
|
|
330
|
+
if (!config.embeddingCacheEnabled)
|
|
331
|
+
return null;
|
|
332
|
+
const filePath = join(config.extractionCacheDir, `emb-${key}.json`);
|
|
333
|
+
if (!existsSync(filePath))
|
|
334
|
+
return null;
|
|
335
|
+
return JSON.parse(readFileSync(filePath, 'utf-8'));
|
|
336
|
+
}
|
|
337
|
+
function writeDiskEmbedding(key, embedding) {
|
|
338
|
+
const config = requireConfig();
|
|
339
|
+
if (!config.embeddingCacheEnabled)
|
|
340
|
+
return;
|
|
341
|
+
mkdirSync(config.extractionCacheDir, { recursive: true });
|
|
342
|
+
const filePath = join(config.extractionCacheDir, `emb-${key}.json`);
|
|
343
|
+
const tmpPath = `${filePath}.tmp`;
|
|
344
|
+
writeFileSync(tmpPath, JSON.stringify(embedding), 'utf-8');
|
|
345
|
+
renameSync(tmpPath, filePath);
|
|
346
|
+
}
|
|
347
|
+
/** Embed a single text — primary API used throughout the codebase. */
|
|
348
|
+
export async function embedText(text, task = 'document') {
|
|
349
|
+
const prefix = getInstructionPrefix(effectiveModel(task), task);
|
|
350
|
+
const finalInput = prefix + text;
|
|
351
|
+
const key = embeddingCacheKey(finalInput, task);
|
|
352
|
+
const cached = getCachedEmbedding(key);
|
|
353
|
+
if (cached)
|
|
354
|
+
return cached;
|
|
355
|
+
// Check disk cache before hitting the API
|
|
356
|
+
const diskCached = readDiskEmbedding(key);
|
|
357
|
+
if (diskCached) {
|
|
358
|
+
setCachedEmbedding(key, diskCached);
|
|
359
|
+
return diskCached;
|
|
360
|
+
}
|
|
361
|
+
const embedding = await getProvider().embed(finalInput, task);
|
|
362
|
+
setCachedEmbedding(key, embedding);
|
|
363
|
+
writeDiskEmbedding(key, embedding);
|
|
364
|
+
return embedding;
|
|
365
|
+
}
|
|
366
|
+
/** Embed multiple texts in one call, with per-text cache integration. */
|
|
367
|
+
export async function embedTexts(texts, task = 'document') {
|
|
368
|
+
if (texts.length === 0)
|
|
369
|
+
return [];
|
|
370
|
+
const prefix = getInstructionPrefix(effectiveModel(task), task);
|
|
371
|
+
const inputs = texts.map((t) => prefix + t);
|
|
372
|
+
const keys = inputs.map((input) => embeddingCacheKey(input, task));
|
|
373
|
+
const results = keys.map((key) => getCachedEmbedding(key) ?? null);
|
|
374
|
+
const uncachedIndices = results.reduce((acc, r, i) => {
|
|
375
|
+
if (r === null)
|
|
376
|
+
acc.push(i);
|
|
377
|
+
return acc;
|
|
378
|
+
}, []);
|
|
379
|
+
if (uncachedIndices.length > 0) {
|
|
380
|
+
const uncachedInputs = uncachedIndices.map((i) => inputs[i]);
|
|
381
|
+
const freshEmbeddings = await getProvider().embedBatch(uncachedInputs, task);
|
|
382
|
+
for (let j = 0; j < uncachedIndices.length; j++) {
|
|
383
|
+
const idx = uncachedIndices[j];
|
|
384
|
+
results[idx] = freshEmbeddings[j];
|
|
385
|
+
setCachedEmbedding(keys[idx], freshEmbeddings[j]);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
return results;
|
|
389
|
+
}
|
|
390
|
+
/** Get current cache size (for testing/monitoring). */
|
|
391
|
+
export function getEmbeddingCacheSize() {
|
|
392
|
+
return embeddingCache.size;
|
|
393
|
+
}
|
|
394
|
+
/** Clear the embedding cache (for testing). */
|
|
395
|
+
export function clearEmbeddingCache() {
|
|
396
|
+
embeddingCache.clear();
|
|
397
|
+
}
|
|
398
|
+
/**
|
|
399
|
+
* Detect the actual embedding width returned by the active provider.
|
|
400
|
+
* Some OpenAI-compatible backends ignore the requested dimensions parameter,
|
|
401
|
+
* so the database schema must align to the real vector width, not just config.
|
|
402
|
+
*/
|
|
403
|
+
export async function resolveEmbeddingDimensions() {
|
|
404
|
+
const config = requireConfig();
|
|
405
|
+
const p = getProvider();
|
|
406
|
+
console.log(`[embedding] resolveEmbeddingDimensions: using provider ${p.constructor.name} for model ${effectiveModel('document')}`);
|
|
407
|
+
const embedding = await p.embed('dimension probe', 'document');
|
|
408
|
+
const actualDimensions = embedding.length;
|
|
409
|
+
console.log(`[embedding] resolveEmbeddingDimensions: actual length returned is ${actualDimensions}`);
|
|
410
|
+
if (actualDimensions !== config.embeddingDimensions) {
|
|
411
|
+
console.warn(`[embedding] Configured EMBEDDING_DIMENSIONS=${config.embeddingDimensions}, provider returned ${actualDimensions}; using provider dimensions.`);
|
|
412
|
+
setEmbeddingDimensions(actualDimensions);
|
|
413
|
+
}
|
|
414
|
+
return actualDimensions;
|
|
415
|
+
}
|
|
416
|
+
export { cosineSimilarity } from '../vector-math.js';
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Entity-Attribute Index (EAI) extractor — Sprint 4.
|
|
3
|
+
*
|
|
4
|
+
* For each ingest with stored memories, runs an LLM pass that extracts
|
|
5
|
+
* (entity_name, attribute_key, attribute_value, value_type) quadruples
|
|
6
|
+
* from the conversation text. The triples populate the entity_attributes
|
|
7
|
+
* table for later lookup by specific-fact retrieval.
|
|
8
|
+
*
|
|
9
|
+
* Example extractions:
|
|
10
|
+
* - "I added two columns: category and notes" →
|
|
11
|
+
* (transactions_table, columns, "[category, notes]", list)
|
|
12
|
+
* (transactions_table, columns_count, "2", number)
|
|
13
|
+
* - "completed 25 problems with 90% accuracy" →
|
|
14
|
+
* (problems, count, "25", number)
|
|
15
|
+
* (problems, accuracy, "90%", string)
|
|
16
|
+
*
|
|
17
|
+
* Fire-and-forget from ingest. Fail-closed on parse errors (throws). The
|
|
18
|
+
* caller wraps in try/catch so failures never block the ingest path.
|
|
19
|
+
*/
|
|
20
|
+
import type { LLMProvider } from './llm.js';
|
|
21
|
+
import type { MemoryServiceDeps } from './memory-service-types.js';
|
|
22
|
+
import type { EntityAttributeInput } from '../db/repository-entity-attributes.js';
|
|
23
|
+
export declare class EntityAttributeExtractorError extends Error {
|
|
24
|
+
readonly cause?: unknown | undefined;
|
|
25
|
+
constructor(message: string, cause?: unknown | undefined);
|
|
26
|
+
}
|
|
27
|
+
/** Extract triples from raw conversation text via the LLM. */
|
|
28
|
+
export declare function extractEntityAttributes(conversationText: string, llmClient?: LLMProvider): Promise<Array<Omit<EntityAttributeInput, 'userId' | 'sourceMemoryId' | 'observedAt'>>>;
|
|
29
|
+
/**
|
|
30
|
+
* Post-write fire-and-forget extractor. Pulls the conversation text + the
|
|
31
|
+
* stored memory IDs from the caller, runs the LLM pass, bulk-inserts.
|
|
32
|
+
* Errors are caught and logged; never throws to the caller.
|
|
33
|
+
*/
|
|
34
|
+
export declare function maybeExtractEntityAttributesForIngest(deps: MemoryServiceDeps, userId: string, conversationText: string, sessionTimestamp: Date | undefined, storedMemoryIds: string[]): Promise<number>;
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { llm as defaultLlm } from './llm.js';
|
|
2
|
+
import { extractFirstJsonObject } from './extraction.js';
|
|
3
|
+
const EXTRACT_MAX_TOKENS = 1024;
|
|
4
|
+
const MAX_TRIPLES_PER_CALL = 24;
|
|
5
|
+
const MAX_INPUT_CHARS = 6000;
|
|
6
|
+
const ALLOWED_VALUE_TYPES = new Set(['number', 'string', 'list', 'boolean', 'date']);
|
|
7
|
+
const SYSTEM_PROMPT = [
|
|
8
|
+
'You extract (entity, attribute, value, value_type) quadruples from a user conversation.',
|
|
9
|
+
'',
|
|
10
|
+
'Rules:',
|
|
11
|
+
'- Each quadruple captures one specific fact the user stated or that the assistant confirmed about the user.',
|
|
12
|
+
'- entity_name: the noun phrase the fact is about (e.g. "transactions_table", "weather_app", "triangle_problems").',
|
|
13
|
+
'- attribute_key: a short snake_case key naming what is being measured/described (e.g. "columns_count", "accuracy", "features_list", "api_quota").',
|
|
14
|
+
'- attribute_value: the exact value as a string (e.g. "25", "90%", "category, notes", "1200/day", "true").',
|
|
15
|
+
'- value_type: one of: number, string, list, boolean, date.',
|
|
16
|
+
'- Extract specific facts: counts, names, dates, percentages, lists. Skip vague generalities.',
|
|
17
|
+
'- Do NOT speculate or infer beyond what the conversation directly states.',
|
|
18
|
+
'- Output a JSON object: {"triples": [{"entity_name": "...", "attribute_key": "...", "attribute_value": "...", "value_type": "..."}, ...]}.',
|
|
19
|
+
'- Output up to ' + String(MAX_TRIPLES_PER_CALL) + ' triples.',
|
|
20
|
+
'- No markdown fences. No prose around the JSON.',
|
|
21
|
+
].join('\n');
|
|
22
|
+
export class EntityAttributeExtractorError extends Error {
|
|
23
|
+
cause;
|
|
24
|
+
constructor(message, cause) {
|
|
25
|
+
super(message);
|
|
26
|
+
this.cause = cause;
|
|
27
|
+
this.name = 'EntityAttributeExtractorError';
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
/** Extract triples from raw conversation text via the LLM. */
|
|
31
|
+
export async function extractEntityAttributes(conversationText, llmClient = defaultLlm) {
|
|
32
|
+
const text = conversationText.slice(0, MAX_INPUT_CHARS);
|
|
33
|
+
if (text.trim().length === 0) {
|
|
34
|
+
throw new EntityAttributeExtractorError('empty input text');
|
|
35
|
+
}
|
|
36
|
+
const messages = [
|
|
37
|
+
{ role: 'system', content: SYSTEM_PROMPT },
|
|
38
|
+
{ role: 'user', content: 'CONVERSATION:\n' + text + '\n\nReturn JSON {"triples": [...]}.' },
|
|
39
|
+
];
|
|
40
|
+
let raw;
|
|
41
|
+
try {
|
|
42
|
+
raw = await llmClient.chat(messages, {
|
|
43
|
+
temperature: 0,
|
|
44
|
+
jsonMode: true,
|
|
45
|
+
maxTokens: EXTRACT_MAX_TOKENS,
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
catch (err) {
|
|
49
|
+
throw new EntityAttributeExtractorError(`extractor LLM call failed: ${err.message}`, err);
|
|
50
|
+
}
|
|
51
|
+
if (!raw)
|
|
52
|
+
throw new EntityAttributeExtractorError('extractor returned empty content');
|
|
53
|
+
const cleaned = extractFirstJsonObject(raw);
|
|
54
|
+
let parsed;
|
|
55
|
+
try {
|
|
56
|
+
parsed = JSON.parse(cleaned);
|
|
57
|
+
}
|
|
58
|
+
catch (err) {
|
|
59
|
+
throw new EntityAttributeExtractorError(`extractor returned non-JSON: ${cleaned.slice(0, 200)}`, err);
|
|
60
|
+
}
|
|
61
|
+
if (!Array.isArray(parsed.triples)) {
|
|
62
|
+
throw new EntityAttributeExtractorError('extractor JSON missing "triples" array');
|
|
63
|
+
}
|
|
64
|
+
return parsed.triples.flatMap((t) => validateTriple(t));
|
|
65
|
+
}
|
|
66
|
+
function asTrimmedString(value) {
|
|
67
|
+
return typeof value === 'string' ? value.trim() : '';
|
|
68
|
+
}
|
|
69
|
+
function asValueType(value) {
|
|
70
|
+
if (typeof value !== 'string')
|
|
71
|
+
return null;
|
|
72
|
+
return ALLOWED_VALUE_TYPES.has(value) ? value : null;
|
|
73
|
+
}
|
|
74
|
+
function validateTriple(t) {
|
|
75
|
+
const entityName = asTrimmedString(t.entity_name);
|
|
76
|
+
const attributeKey = asTrimmedString(t.attribute_key);
|
|
77
|
+
const attributeValue = asTrimmedString(t.attribute_value);
|
|
78
|
+
const valueType = asValueType(t.value_type);
|
|
79
|
+
if (!entityName || !attributeKey || !attributeValue || !valueType)
|
|
80
|
+
return [];
|
|
81
|
+
return [{ entityName, attributeKey, attributeValue, valueType }];
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Post-write fire-and-forget extractor. Pulls the conversation text + the
|
|
85
|
+
* stored memory IDs from the caller, runs the LLM pass, bulk-inserts.
|
|
86
|
+
* Errors are caught and logged; never throws to the caller.
|
|
87
|
+
*/
|
|
88
|
+
export async function maybeExtractEntityAttributesForIngest(deps, userId, conversationText, sessionTimestamp, storedMemoryIds) {
|
|
89
|
+
if (!deps.config.entityAttributesEnabled)
|
|
90
|
+
return 0;
|
|
91
|
+
const repo = deps.stores.entityAttributes;
|
|
92
|
+
if (!repo)
|
|
93
|
+
return 0;
|
|
94
|
+
if (storedMemoryIds.length === 0)
|
|
95
|
+
return 0;
|
|
96
|
+
try {
|
|
97
|
+
const triples = await extractEntityAttributes(conversationText);
|
|
98
|
+
if (triples.length === 0)
|
|
99
|
+
return 0;
|
|
100
|
+
const observedAt = sessionTimestamp ?? new Date();
|
|
101
|
+
const sourceMemoryId = storedMemoryIds[0]; // attribute provenance to the first new memory
|
|
102
|
+
const rows = triples.map((t) => ({
|
|
103
|
+
userId,
|
|
104
|
+
entityName: t.entityName,
|
|
105
|
+
attributeKey: t.attributeKey,
|
|
106
|
+
attributeValue: t.attributeValue,
|
|
107
|
+
valueType: t.valueType,
|
|
108
|
+
sourceMemoryId,
|
|
109
|
+
observedAt,
|
|
110
|
+
}));
|
|
111
|
+
return await repo.bulkInsert(rows);
|
|
112
|
+
}
|
|
113
|
+
catch (err) {
|
|
114
|
+
console.warn(`[eai] extraction failed for user=${userId}: ${err.message}`);
|
|
115
|
+
return 0;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/** One observation fed into entity grouping + card synthesis. */
|
|
2
|
+
export interface ObservationForCard {
|
|
3
|
+
id: string;
|
|
4
|
+
text: string;
|
|
5
|
+
type: string;
|
|
6
|
+
observedAt: Date;
|
|
7
|
+
}
|
|
8
|
+
export interface EntityCardSynth {
|
|
9
|
+
entityName: string;
|
|
10
|
+
cardText: string;
|
|
11
|
+
sourceObservationIds: string[];
|
|
12
|
+
}
|
|
13
|
+
/** Schema for the tool-use call that returns the updated card text. */
|
|
14
|
+
declare const ENTITY_CARD_TOOL_SCHEMA: {
|
|
15
|
+
readonly name: "record_entity_card";
|
|
16
|
+
readonly description: "Persist the updated entity card text.";
|
|
17
|
+
readonly input_schema: {
|
|
18
|
+
readonly type: "object";
|
|
19
|
+
readonly properties: {
|
|
20
|
+
readonly card_text: {
|
|
21
|
+
readonly type: "string";
|
|
22
|
+
};
|
|
23
|
+
};
|
|
24
|
+
readonly required: readonly ["card_text"];
|
|
25
|
+
};
|
|
26
|
+
};
|
|
27
|
+
interface EntityCardToolOutput {
|
|
28
|
+
card_text: string;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Group observations by entity_name. The entity is extracted from
|
|
32
|
+
* entity_state observations whose text begins with "<EntityName>:" or
|
|
33
|
+
* a leading capitalized noun before the first colon/verb.
|
|
34
|
+
*
|
|
35
|
+
* Heuristic (simplified — see compromises in report): the first capitalized
|
|
36
|
+
* token sequence at the start of the observation text. The literal token
|
|
37
|
+
* "User" / "user" maps to the canonical entity "user".
|
|
38
|
+
*/
|
|
39
|
+
export declare function groupObservationsByEntity(observations: readonly ObservationForCard[]): Map<string, ObservationForCard[]>;
|
|
40
|
+
export interface SynthesizeCardsDeps {
|
|
41
|
+
llmCallTool: (system: string, user: string, schema: typeof ENTITY_CARD_TOOL_SCHEMA) => Promise<EntityCardToolOutput>;
|
|
42
|
+
/** Minimum observations an entity needs before its card is synthesized. */
|
|
43
|
+
minObservations: number;
|
|
44
|
+
/** Max entities to synthesize per call (cost ceiling). */
|
|
45
|
+
maxEntities: number;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Synthesize ENTITY_CARDs for every entity with >= minObservations
|
|
49
|
+
* observations in the current Reflect run. Prior cards are looked up
|
|
50
|
+
* by entity_name; absent entities use the empty prior. Returns one
|
|
51
|
+
* EntityCardSynth per synthesized entity.
|
|
52
|
+
*/
|
|
53
|
+
export declare function synthesizeCards(observations: readonly ObservationForCard[], priorCards: ReadonlyMap<string, string>, deps: SynthesizeCardsDeps): Promise<EntityCardSynth[]>;
|
|
54
|
+
export {};
|