@atomicmemory/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/LICENSE +201 -0
- package/README.md +314 -0
- package/dist/app/bind-ephemeral.d.ts +18 -0
- package/dist/app/bind-ephemeral.js +22 -0
- package/dist/app/cors-headers.d.ts +12 -0
- package/dist/app/cors-headers.js +18 -0
- package/dist/app/create-app.d.ts +25 -0
- package/dist/app/create-app.js +156 -0
- package/dist/app/runtime-config-route-snapshot.d.ts +27 -0
- package/dist/app/runtime-config-route-snapshot.js +27 -0
- package/dist/app/runtime-container.d.ts +281 -0
- package/dist/app/runtime-container.js +297 -0
- package/dist/app/startup-checks.d.ts +28 -0
- package/dist/app/startup-checks.js +45 -0
- package/dist/bin.d.ts +17 -0
- package/dist/bin.js +128 -0
- package/dist/config.d.ts +680 -0
- package/dist/config.js +808 -0
- package/dist/db/agent-trust-repository.d.ts +49 -0
- package/dist/db/agent-trust-repository.js +66 -0
- package/dist/db/belief-edges-repository.d.ts +68 -0
- package/dist/db/belief-edges-repository.js +124 -0
- package/dist/db/claim-repository.d.ts +6 -0
- package/dist/db/claim-repository.js +4 -0
- package/dist/db/contradictions-repository.d.ts +56 -0
- package/dist/db/contradictions-repository.js +88 -0
- package/dist/db/document-chunk-repository.d.ts +48 -0
- package/dist/db/document-chunk-repository.js +145 -0
- package/dist/db/document-chunk-types.d.ts +35 -0
- package/dist/db/document-chunk-types.js +9 -0
- package/dist/db/document-list-cursor.d.ts +45 -0
- package/dist/db/document-list-cursor.js +111 -0
- package/dist/db/document-list-repository.d.ts +103 -0
- package/dist/db/document-list-repository.js +204 -0
- package/dist/db/entity-cards-repository.d.ts +37 -0
- package/dist/db/entity-cards-repository.js +46 -0
- package/dist/db/entity-values-repository.d.ts +26 -0
- package/dist/db/entity-values-repository.js +57 -0
- package/dist/db/link-repository.d.ts +30 -0
- package/dist/db/link-repository.js +54 -0
- package/dist/db/memory-repository.d.ts +163 -0
- package/dist/db/memory-repository.js +232 -0
- package/dist/db/migrate.d.ts +6 -0
- package/dist/db/migrate.js +36 -0
- package/dist/db/mmr.d.ts +14 -0
- package/dist/db/mmr.js +57 -0
- package/dist/db/passport-feed-repository.d.ts +91 -0
- package/dist/db/passport-feed-repository.js +198 -0
- package/dist/db/pg-episode-store.d.ts +19 -0
- package/dist/db/pg-episode-store.js +17 -0
- package/dist/db/pg-link-store.d.ts +17 -0
- package/dist/db/pg-link-store.js +14 -0
- package/dist/db/pg-memory-store.d.ts +68 -0
- package/dist/db/pg-memory-store.js +53 -0
- package/dist/db/pg-recap-store.d.ts +13 -0
- package/dist/db/pg-recap-store.js +19 -0
- package/dist/db/pg-representation-store.d.ts +17 -0
- package/dist/db/pg-representation-store.js +17 -0
- package/dist/db/pg-search-store.d.ts +29 -0
- package/dist/db/pg-search-store.js +47 -0
- package/dist/db/pool.d.ts +5 -0
- package/dist/db/pool.js +21 -0
- package/dist/db/ppr.d.ts +56 -0
- package/dist/db/ppr.js +178 -0
- package/dist/db/query-helpers.d.ts +44 -0
- package/dist/db/query-helpers.js +60 -0
- package/dist/db/raw-doc-artifact-sync.d.ts +128 -0
- package/dist/db/raw-doc-artifact-sync.js +259 -0
- package/dist/db/raw-document-blob-repository.d.ts +148 -0
- package/dist/db/raw-document-blob-repository.js +300 -0
- package/dist/db/raw-document-repository.d.ts +104 -0
- package/dist/db/raw-document-repository.js +410 -0
- package/dist/db/raw-document-status-repository.d.ts +122 -0
- package/dist/db/raw-document-status-repository.js +183 -0
- package/dist/db/raw-document-types.d.ts +236 -0
- package/dist/db/raw-document-types.js +10 -0
- package/dist/db/raw-storage-reconciliation-repository.d.ts +110 -0
- package/dist/db/raw-storage-reconciliation-repository.js +200 -0
- package/dist/db/reflection-jobs-repository.d.ts +33 -0
- package/dist/db/reflection-jobs-repository.js +48 -0
- package/dist/db/reflections-repository.d.ts +41 -0
- package/dist/db/reflections-repository.js +83 -0
- package/dist/db/repository-claims.d.ts +141 -0
- package/dist/db/repository-claims.js +376 -0
- package/dist/db/repository-deferred-audn.d.ts +33 -0
- package/dist/db/repository-deferred-audn.js +69 -0
- package/dist/db/repository-document-delete.d.ts +53 -0
- package/dist/db/repository-document-delete.js +156 -0
- package/dist/db/repository-entities.d.ts +114 -0
- package/dist/db/repository-entities.js +317 -0
- package/dist/db/repository-entity-attributes.d.ts +41 -0
- package/dist/db/repository-entity-attributes.js +65 -0
- package/dist/db/repository-entity-graph.d.ts +32 -0
- package/dist/db/repository-entity-graph.js +87 -0
- package/dist/db/repository-first-mentions.d.ts +41 -0
- package/dist/db/repository-first-mentions.js +79 -0
- package/dist/db/repository-lessons.d.ts +51 -0
- package/dist/db/repository-lessons.js +90 -0
- package/dist/db/repository-links.d.ts +26 -0
- package/dist/db/repository-links.js +105 -0
- package/dist/db/repository-observation.d.ts +26 -0
- package/dist/db/repository-observation.js +51 -0
- package/dist/db/repository-read.d.ts +56 -0
- package/dist/db/repository-read.js +271 -0
- package/dist/db/repository-recaps.d.ts +59 -0
- package/dist/db/repository-recaps.js +158 -0
- package/dist/db/repository-representations.d.ts +48 -0
- package/dist/db/repository-representations.js +162 -0
- package/dist/db/repository-temporal-state.d.ts +35 -0
- package/dist/db/repository-temporal-state.js +46 -0
- package/dist/db/repository-tll.d.ts +88 -0
- package/dist/db/repository-tll.js +179 -0
- package/dist/db/repository-types.d.ts +313 -0
- package/dist/db/repository-types.js +142 -0
- package/dist/db/repository-user-profiles.d.ts +17 -0
- package/dist/db/repository-user-profiles.js +28 -0
- package/dist/db/repository-vector-search.d.ts +33 -0
- package/dist/db/repository-vector-search.js +373 -0
- package/dist/db/repository-wipe.d.ts +34 -0
- package/dist/db/repository-wipe.js +94 -0
- package/dist/db/repository-write.d.ts +61 -0
- package/dist/db/repository-write.js +279 -0
- package/dist/db/schema.sql +1355 -0
- package/dist/db/storage-artifact-delete-tx.d.ts +56 -0
- package/dist/db/storage-artifact-delete-tx.js +123 -0
- package/dist/db/storage-artifact-providers.d.ts +21 -0
- package/dist/db/storage-artifact-providers.js +21 -0
- package/dist/db/storage-artifact-recovery-repository.d.ts +66 -0
- package/dist/db/storage-artifact-recovery-repository.js +58 -0
- package/dist/db/storage-artifact-repository.d.ts +329 -0
- package/dist/db/storage-artifact-repository.js +497 -0
- package/dist/db/stores.d.ts +220 -0
- package/dist/db/stores.js +12 -0
- package/dist/db/summaries-repository.d.ts +74 -0
- package/dist/db/summaries-repository.js +125 -0
- package/dist/eval/beam-10m-loader.d.ts +98 -0
- package/dist/eval/beam-10m-loader.js +128 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +17 -0
- package/dist/middleware/require-bearer.d.ts +27 -0
- package/dist/middleware/require-bearer.js +60 -0
- package/dist/middleware/validate-response.d.ts +33 -0
- package/dist/middleware/validate-response.js +55 -0
- package/dist/middleware/validate.d.ts +43 -0
- package/dist/middleware/validate.js +85 -0
- package/dist/routes/agents.d.ts +13 -0
- package/dist/routes/agents.js +89 -0
- package/dist/routes/document-response-formatters.d.ts +98 -0
- package/dist/routes/document-response-formatters.js +243 -0
- package/dist/routes/documents.d.ts +74 -0
- package/dist/routes/documents.js +425 -0
- package/dist/routes/memories.d.ts +29 -0
- package/dist/routes/memories.js +725 -0
- package/dist/routes/memory-response-formatters.d.ts +179 -0
- package/dist/routes/memory-response-formatters.js +210 -0
- package/dist/routes/public-raw-storage-metadata.d.ts +54 -0
- package/dist/routes/public-raw-storage-metadata.js +56 -0
- package/dist/routes/reflect.d.ts +14 -0
- package/dist/routes/reflect.js +19 -0
- package/dist/routes/response-schema-map.d.ts +14 -0
- package/dist/routes/response-schema-map.js +69 -0
- package/dist/routes/route-errors.d.ts +12 -0
- package/dist/routes/route-errors.js +30 -0
- package/dist/routes/storage-error-handlers.d.ts +34 -0
- package/dist/routes/storage-error-handlers.js +185 -0
- package/dist/routes/storage-response-formatters.d.ts +44 -0
- package/dist/routes/storage-response-formatters.js +155 -0
- package/dist/routes/storage.d.ts +38 -0
- package/dist/routes/storage.js +369 -0
- package/dist/routes/upstream-provider-errors.d.ts +19 -0
- package/dist/routes/upstream-provider-errors.js +95 -0
- package/dist/schemas/agents.d.ts +79 -0
- package/dist/schemas/agents.js +126 -0
- package/dist/schemas/common.d.ts +110 -0
- package/dist/schemas/common.js +190 -0
- package/dist/schemas/document-list-responses.d.ts +102 -0
- package/dist/schemas/document-list-responses.js +87 -0
- package/dist/schemas/document-list-schemas.d.ts +123 -0
- package/dist/schemas/document-list-schemas.js +174 -0
- package/dist/schemas/document-response-schemas.d.ts +610 -0
- package/dist/schemas/document-response-schemas.js +264 -0
- package/dist/schemas/document-status-envelope.d.ts +48 -0
- package/dist/schemas/document-status-envelope.js +54 -0
- package/dist/schemas/documents.d.ts +292 -0
- package/dist/schemas/documents.js +449 -0
- package/dist/schemas/errors.d.ts +75 -0
- package/dist/schemas/errors.js +105 -0
- package/dist/schemas/memories.d.ts +378 -0
- package/dist/schemas/memories.js +542 -0
- package/dist/schemas/openapi.d.ts +24 -0
- package/dist/schemas/openapi.js +1038 -0
- package/dist/schemas/response-scalars.d.ts +10 -0
- package/dist/schemas/response-scalars.js +10 -0
- package/dist/schemas/responses.d.ts +536 -0
- package/dist/schemas/responses.js +350 -0
- package/dist/schemas/search-response-parts.d.ts +97 -0
- package/dist/schemas/search-response-parts.js +103 -0
- package/dist/schemas/storage-schemas.d.ts +175 -0
- package/dist/schemas/storage-schemas.js +277 -0
- package/dist/schemas/zod-setup.d.ts +15 -0
- package/dist/schemas/zod-setup.js +17 -0
- package/dist/server.d.ts +13 -0
- package/dist/server.js +57 -0
- package/dist/services/abstract-query-policy.d.ts +13 -0
- package/dist/services/abstract-query-policy.js +50 -0
- package/dist/services/affinity-clustering.d.ts +66 -0
- package/dist/services/affinity-clustering.js +125 -0
- package/dist/services/agentic-retrieval.d.ts +38 -0
- package/dist/services/agentic-retrieval.js +126 -0
- package/dist/services/answer-format.d.ts +56 -0
- package/dist/services/answer-format.js +118 -0
- package/dist/services/answer-rescue.d.ts +72 -0
- package/dist/services/answer-rescue.js +177 -0
- package/dist/services/answer-verifier.d.ts +24 -0
- package/dist/services/answer-verifier.js +73 -0
- package/dist/services/api-retry.d.ts +6 -0
- package/dist/services/api-retry.js +41 -0
- package/dist/services/assistant-turn-filter.d.ts +20 -0
- package/dist/services/assistant-turn-filter.js +69 -0
- package/dist/services/atomicmem-uri.d.ts +33 -0
- package/dist/services/atomicmem-uri.js +86 -0
- package/dist/services/audit-events.d.ts +54 -0
- package/dist/services/audit-events.js +56 -0
- package/dist/services/chunked-extraction.d.ts +21 -0
- package/dist/services/chunked-extraction.js +108 -0
- package/dist/services/claim-slotting.d.ts +27 -0
- package/dist/services/claim-slotting.js +38 -0
- package/dist/services/claude-code-llm.d.ts +19 -0
- package/dist/services/claude-code-llm.js +96 -0
- package/dist/services/composite-dedup.d.ts +50 -0
- package/dist/services/composite-dedup.js +153 -0
- package/dist/services/composite-grouping.d.ts +41 -0
- package/dist/services/composite-grouping.js +111 -0
- package/dist/services/composite-staleness.d.ts +20 -0
- package/dist/services/composite-staleness.js +50 -0
- package/dist/services/conciseness-preference.d.ts +14 -0
- package/dist/services/conciseness-preference.js +42 -0
- package/dist/services/conflict-policy.d.ts +20 -0
- package/dist/services/conflict-policy.js +335 -0
- package/dist/services/consensus-extraction.d.ts +39 -0
- package/dist/services/consensus-extraction.js +147 -0
- package/dist/services/consensus-validation.d.ts +52 -0
- package/dist/services/consensus-validation.js +206 -0
- package/dist/services/consolidation-service.d.ts +60 -0
- package/dist/services/consolidation-service.js +171 -0
- package/dist/services/content-detection.d.ts +18 -0
- package/dist/services/content-detection.js +25 -0
- package/dist/services/contradiction-surfacing.d.ts +62 -0
- package/dist/services/contradiction-surfacing.js +111 -0
- package/dist/services/cost-telemetry.d.ts +39 -0
- package/dist/services/cost-telemetry.js +58 -0
- package/dist/services/counter-evidence.d.ts +34 -0
- package/dist/services/counter-evidence.js +92 -0
- package/dist/services/current-state-ranking.d.ts +21 -0
- package/dist/services/current-state-ranking.js +152 -0
- package/dist/services/deferred-audn.d.ts +47 -0
- package/dist/services/deferred-audn.js +162 -0
- package/dist/services/document-chunker.d.ts +50 -0
- package/dist/services/document-chunker.js +153 -0
- package/dist/services/document-failure-markers.d.ts +91 -0
- package/dist/services/document-failure-markers.js +305 -0
- package/dist/services/document-indexer.d.ts +122 -0
- package/dist/services/document-indexer.js +405 -0
- package/dist/services/document-service.d.ts +245 -0
- package/dist/services/document-service.js +325 -0
- package/dist/services/document-upload-artifact-sync.d.ts +80 -0
- package/dist/services/document-upload-artifact-sync.js +162 -0
- package/dist/services/document-upload-beta2-recovery.d.ts +72 -0
- package/dist/services/document-upload-beta2-recovery.js +94 -0
- package/dist/services/document-upload.d.ts +44 -0
- package/dist/services/document-upload.js +353 -0
- package/dist/services/embedding.d.ts +57 -0
- package/dist/services/embedding.js +416 -0
- package/dist/services/entity-attribute-extractor.d.ts +34 -0
- package/dist/services/entity-attribute-extractor.js +117 -0
- package/dist/services/entity-card-synthesis.d.ts +54 -0
- package/dist/services/entity-card-synthesis.js +92 -0
- package/dist/services/entity-dedup.d.ts +9 -0
- package/dist/services/entity-dedup.js +14 -0
- package/dist/services/entity-graph.d.ts +17 -0
- package/dist/services/entity-graph.js +135 -0
- package/dist/services/entropy-gate.d.ts +52 -0
- package/dist/services/entropy-gate.js +56 -0
- package/dist/services/episode-fetcher.d.ts +47 -0
- package/dist/services/episode-fetcher.js +128 -0
- package/dist/services/event-anchor-facts.d.ts +8 -0
- package/dist/services/event-anchor-facts.js +205 -0
- package/dist/services/event-chain-detector.d.ts +52 -0
- package/dist/services/event-chain-detector.js +83 -0
- package/dist/services/extraction-cache.d.ts +9 -0
- package/dist/services/extraction-cache.js +54 -0
- package/dist/services/extraction-enrichment.d.ts +9 -0
- package/dist/services/extraction-enrichment.js +223 -0
- package/dist/services/extraction.d.ts +69 -0
- package/dist/services/extraction.js +596 -0
- package/dist/services/fact-normalization.d.ts +12 -0
- package/dist/services/fact-normalization.js +248 -0
- package/dist/services/filecoin-observability.d.ts +127 -0
- package/dist/services/filecoin-observability.js +200 -0
- package/dist/services/first-mention-service.d.ts +76 -0
- package/dist/services/first-mention-service.js +186 -0
- package/dist/services/hierarchical-retrieval.d.ts +49 -0
- package/dist/services/hierarchical-retrieval.js +50 -0
- package/dist/services/ingest-fact-pipeline.d.ts +32 -0
- package/dist/services/ingest-fact-pipeline.js +212 -0
- package/dist/services/ingest-post-write.d.ts +50 -0
- package/dist/services/ingest-post-write.js +117 -0
- package/dist/services/ingest-trace.d.ts +32 -0
- package/dist/services/ingest-trace.js +60 -0
- package/dist/services/input-sanitizer.d.ts +41 -0
- package/dist/services/input-sanitizer.js +135 -0
- package/dist/services/iterative-retrieval.d.ts +26 -0
- package/dist/services/iterative-retrieval.js +139 -0
- package/dist/services/keyword-expansion.d.ts +10 -0
- package/dist/services/keyword-expansion.js +26 -0
- package/dist/services/lesson-service.d.ts +68 -0
- package/dist/services/lesson-service.js +178 -0
- package/dist/services/literal-extractor.d.ts +16 -0
- package/dist/services/literal-extractor.js +74 -0
- package/dist/services/literal-list-protection.d.ts +17 -0
- package/dist/services/literal-list-protection.js +134 -0
- package/dist/services/literal-query-expansion.d.ts +20 -0
- package/dist/services/literal-query-expansion.js +181 -0
- package/dist/services/llm.d.ts +61 -0
- package/dist/services/llm.js +265 -0
- package/dist/services/memcell-projection.d.ts +17 -0
- package/dist/services/memcell-projection.js +41 -0
- package/dist/services/memory-audn.d.ts +43 -0
- package/dist/services/memory-audn.js +419 -0
- package/dist/services/memory-crud.d.ts +93 -0
- package/dist/services/memory-crud.js +255 -0
- package/dist/services/memory-ingest.d.ts +21 -0
- package/dist/services/memory-ingest.js +249 -0
- package/dist/services/memory-lifecycle.d.ts +75 -0
- package/dist/services/memory-lifecycle.js +108 -0
- package/dist/services/memory-lineage.d.ts +181 -0
- package/dist/services/memory-lineage.js +232 -0
- package/dist/services/memory-network.d.ts +40 -0
- package/dist/services/memory-network.js +75 -0
- package/dist/services/memory-search-types.d.ts +25 -0
- package/dist/services/memory-search-types.js +10 -0
- package/dist/services/memory-search.d.ts +48 -0
- package/dist/services/memory-search.js +505 -0
- package/dist/services/memory-service-types.d.ts +371 -0
- package/dist/services/memory-service-types.js +8 -0
- package/dist/services/memory-service.d.ts +152 -0
- package/dist/services/memory-service.js +225 -0
- package/dist/services/memory-storage.d.ts +33 -0
- package/dist/services/memory-storage.js +328 -0
- package/dist/services/msr-aggregator.d.ts +38 -0
- package/dist/services/msr-aggregator.js +97 -0
- package/dist/services/msr-detector.d.ts +35 -0
- package/dist/services/msr-detector.js +65 -0
- package/dist/services/namespace-retrieval.d.ts +60 -0
- package/dist/services/namespace-retrieval.js +180 -0
- package/dist/services/observation-date-extraction.d.ts +12 -0
- package/dist/services/observation-date-extraction.js +50 -0
- package/dist/services/observation-service.d.ts +27 -0
- package/dist/services/observation-service.js +84 -0
- package/dist/services/packaging-observability.d.ts +29 -0
- package/dist/services/packaging-observability.js +146 -0
- package/dist/services/query-expansion.d.ts +83 -0
- package/dist/services/query-expansion.js +242 -0
- package/dist/services/query-keyword-matches.d.ts +6 -0
- package/dist/services/query-keyword-matches.js +56 -0
- package/dist/services/query-term-visibility.d.ts +28 -0
- package/dist/services/query-term-visibility.js +100 -0
- package/dist/services/quick-extraction.d.ts +25 -0
- package/dist/services/quick-extraction.js +431 -0
- package/dist/services/quoted-entity-extraction.d.ts +10 -0
- package/dist/services/quoted-entity-extraction.js +161 -0
- package/dist/services/raw-storage-reconciler-backoff.d.ts +8 -0
- package/dist/services/raw-storage-reconciler-backoff.js +14 -0
- package/dist/services/raw-storage-reconciler-scheduler.d.ts +29 -0
- package/dist/services/raw-storage-reconciler-scheduler.js +43 -0
- package/dist/services/raw-storage-reconciler.d.ts +71 -0
- package/dist/services/raw-storage-reconciler.js +278 -0
- package/dist/services/recap-builder.d.ts +49 -0
- package/dist/services/recap-builder.js +157 -0
- package/dist/services/reflect-jobs.d.ts +23 -0
- package/dist/services/reflect-jobs.js +36 -0
- package/dist/services/reflect-prompts.d.ts +71 -0
- package/dist/services/reflect-prompts.js +99 -0
- package/dist/services/reflect-retrieval.d.ts +33 -0
- package/dist/services/reflect-retrieval.js +30 -0
- package/dist/services/reflect.d.ts +49 -0
- package/dist/services/reflect.js +84 -0
- package/dist/services/relative-temporal.d.ts +14 -0
- package/dist/services/relative-temporal.js +163 -0
- package/dist/services/relevance-policy.d.ts +37 -0
- package/dist/services/relevance-policy.js +109 -0
- package/dist/services/rerank.d.ts +32 -0
- package/dist/services/rerank.js +118 -0
- package/dist/services/reranker.d.ts +20 -0
- package/dist/services/reranker.js +99 -0
- package/dist/services/retrieval-channel-rules.d.ts +34 -0
- package/dist/services/retrieval-channel-rules.js +41 -0
- package/dist/services/retrieval-config-overlay.d.ts +36 -0
- package/dist/services/retrieval-config-overlay.js +44 -0
- package/dist/services/retrieval-format.d.ts +119 -0
- package/dist/services/retrieval-format.js +559 -0
- package/dist/services/retrieval-policy.d.ts +69 -0
- package/dist/services/retrieval-policy.js +275 -0
- package/dist/services/retrieval-profiles.d.ts +37 -0
- package/dist/services/retrieval-profiles.js +90 -0
- package/dist/services/retrieval-side-effects.d.ts +14 -0
- package/dist/services/retrieval-side-effects.js +26 -0
- package/dist/services/retrieval-trace.d.ts +108 -0
- package/dist/services/retrieval-trace.js +147 -0
- package/dist/services/rrf-fusion.d.ts +18 -0
- package/dist/services/rrf-fusion.js +34 -0
- package/dist/services/search-pipeline.d.ts +71 -0
- package/dist/services/search-pipeline.js +788 -0
- package/dist/services/session-date.d.ts +20 -0
- package/dist/services/session-date.js +61 -0
- package/dist/services/session-packaging.d.ts +53 -0
- package/dist/services/session-packaging.js +182 -0
- package/dist/services/session-summary-generator.d.ts +53 -0
- package/dist/services/session-summary-generator.js +134 -0
- package/dist/services/specialists/cr-specialist.d.ts +52 -0
- package/dist/services/specialists/cr-specialist.js +121 -0
- package/dist/services/specialists/dispatch.d.ts +53 -0
- package/dist/services/specialists/dispatch.js +102 -0
- package/dist/services/specialists/ie-ku-specialist.d.ts +37 -0
- package/dist/services/specialists/ie-ku-specialist.js +63 -0
- package/dist/services/specialists/msr-specialist.d.ts +61 -0
- package/dist/services/specialists/msr-specialist.js +162 -0
- package/dist/services/specialists/tr-specialist.d.ts +37 -0
- package/dist/services/specialists/tr-specialist.js +146 -0
- package/dist/services/storage-key-prefix.d.ts +42 -0
- package/dist/services/storage-key-prefix.js +45 -0
- package/dist/services/storage-put-recovery.d.ts +71 -0
- package/dist/services/storage-put-recovery.js +269 -0
- package/dist/services/storage-service-errors.d.ts +124 -0
- package/dist/services/storage-service-errors.js +189 -0
- package/dist/services/storage-service.d.ts +176 -0
- package/dist/services/storage-service.js +423 -0
- package/dist/services/subject-aware-ranking.d.ts +19 -0
- package/dist/services/subject-aware-ranking.js +161 -0
- package/dist/services/supplemental-extraction.d.ts +7 -0
- package/dist/services/supplemental-extraction.js +116 -0
- package/dist/services/tbc-execution.d.ts +49 -0
- package/dist/services/tbc-execution.js +284 -0
- package/dist/services/temporal-classifier.d.ts +56 -0
- package/dist/services/temporal-classifier.js +94 -0
- package/dist/services/temporal-endpoint-evidence.d.ts +12 -0
- package/dist/services/temporal-endpoint-evidence.js +313 -0
- package/dist/services/temporal-fingerprint.d.ts +6 -0
- package/dist/services/temporal-fingerprint.js +12 -0
- package/dist/services/temporal-format.d.ts +9 -0
- package/dist/services/temporal-format.js +21 -0
- package/dist/services/temporal-intent.d.ts +39 -0
- package/dist/services/temporal-intent.js +78 -0
- package/dist/services/temporal-query-constraints.d.ts +16 -0
- package/dist/services/temporal-query-constraints.js +107 -0
- package/dist/services/temporal-query-expansion.d.ts +14 -0
- package/dist/services/temporal-query-expansion.js +131 -0
- package/dist/services/temporal-rerank.d.ts +22 -0
- package/dist/services/temporal-rerank.js +47 -0
- package/dist/services/temporal-result-protection.d.ts +7 -0
- package/dist/services/temporal-result-protection.js +60 -0
- package/dist/services/temporal-state-write.d.ts +57 -0
- package/dist/services/temporal-state-write.js +45 -0
- package/dist/services/tiered-context.d.ts +87 -0
- package/dist/services/tiered-context.js +214 -0
- package/dist/services/tiered-loading.d.ts +88 -0
- package/dist/services/tiered-loading.js +263 -0
- package/dist/services/timeline-pack.d.ts +36 -0
- package/dist/services/timeline-pack.js +50 -0
- package/dist/services/timing.d.ts +13 -0
- package/dist/services/timing.js +72 -0
- package/dist/services/tll-augmentation.d.ts +20 -0
- package/dist/services/tll-augmentation.js +125 -0
- package/dist/services/tll-retrieval.d.ts +55 -0
- package/dist/services/tll-retrieval.js +101 -0
- package/dist/services/topic-abstraction.d.ts +36 -0
- package/dist/services/topic-abstraction.js +105 -0
- package/dist/services/trust-scoring.d.ts +43 -0
- package/dist/services/trust-scoring.js +89 -0
- package/dist/services/typed-belief-calculus.d.ts +126 -0
- package/dist/services/typed-belief-calculus.js +204 -0
- package/dist/services/upload-config.d.ts +34 -0
- package/dist/services/upload-config.js +23 -0
- package/dist/services/upload-decision.d.ts +65 -0
- package/dist/services/upload-decision.js +98 -0
- package/dist/services/upload-helpers.d.ts +107 -0
- package/dist/services/upload-helpers.js +148 -0
- package/dist/services/user-profile-builder.d.ts +22 -0
- package/dist/services/user-profile-builder.js +109 -0
- package/dist/services/voyage-embedding.d.ts +22 -0
- package/dist/services/voyage-embedding.js +77 -0
- package/dist/services/write-security.d.ts +31 -0
- package/dist/services/write-security.js +64 -0
- package/dist/storage/artifact-public-redaction.d.ts +34 -0
- package/dist/storage/artifact-public-redaction.js +83 -0
- package/dist/storage/cleanup.d.ts +103 -0
- package/dist/storage/cleanup.js +138 -0
- package/dist/storage/codec-factory.d.ts +17 -0
- package/dist/storage/codec-factory.js +33 -0
- package/dist/storage/codecs/aes-gcm-codec.d.ts +44 -0
- package/dist/storage/codecs/aes-gcm-codec.js +108 -0
- package/dist/storage/codecs/noop-codec.d.ts +16 -0
- package/dist/storage/codecs/noop-codec.js +23 -0
- package/dist/storage/factory.d.ts +44 -0
- package/dist/storage/factory.js +99 -0
- package/dist/storage/filecoin-cid-validation.d.ts +82 -0
- package/dist/storage/filecoin-cid-validation.js +122 -0
- package/dist/storage/filecoin-public-metadata.d.ts +73 -0
- package/dist/storage/filecoin-public-metadata.js +110 -0
- package/dist/storage/local-fs-store.d.ts +39 -0
- package/dist/storage/local-fs-store.js +145 -0
- package/dist/storage/pointer-uri-allowlist.d.ts +38 -0
- package/dist/storage/pointer-uri-allowlist.js +70 -0
- package/dist/storage/provider-metadata-projection.d.ts +27 -0
- package/dist/storage/provider-metadata-projection.js +68 -0
- package/dist/storage/providers/filecoin/backend.d.ts +42 -0
- package/dist/storage/providers/filecoin/backend.js +250 -0
- package/dist/storage/providers/filecoin/config.d.ts +70 -0
- package/dist/storage/providers/filecoin/config.js +275 -0
- package/dist/storage/providers/filecoin/errors.d.ts +45 -0
- package/dist/storage/providers/filecoin/errors.js +56 -0
- package/dist/storage/providers/filecoin/filecoin-pin-car.d.ts +78 -0
- package/dist/storage/providers/filecoin/filecoin-pin-car.js +155 -0
- package/dist/storage/providers/filecoin/filecoin-pin-client.d.ts +92 -0
- package/dist/storage/providers/filecoin/filecoin-pin-client.js +199 -0
- package/dist/storage/providers/filecoin/filecoin-pin-mapping.d.ts +58 -0
- package/dist/storage/providers/filecoin/filecoin-pin-mapping.js +103 -0
- package/dist/storage/providers/filecoin/filecoin-pin-timeout.d.ts +30 -0
- package/dist/storage/providers/filecoin/filecoin-pin-timeout.js +53 -0
- package/dist/storage/providers/filecoin/filecoin-pin-vendor.d.ts +111 -0
- package/dist/storage/providers/filecoin/filecoin-pin-vendor.js +87 -0
- package/dist/storage/providers/filecoin/hints.d.ts +71 -0
- package/dist/storage/providers/filecoin/hints.js +123 -0
- package/dist/storage/providers/filecoin/index.d.ts +51 -0
- package/dist/storage/providers/filecoin/index.js +103 -0
- package/dist/storage/providers/filecoin/ipfs-cid.d.ts +50 -0
- package/dist/storage/providers/filecoin/ipfs-cid.js +64 -0
- package/dist/storage/providers/filecoin/metadata.d.ts +72 -0
- package/dist/storage/providers/filecoin/metadata.js +137 -0
- package/dist/storage/providers/filecoin/piece-cid.d.ts +48 -0
- package/dist/storage/providers/filecoin/piece-cid.js +57 -0
- package/dist/storage/providers/filecoin/provider-client.d.ts +234 -0
- package/dist/storage/providers/filecoin/provider-client.js +27 -0
- package/dist/storage/providers/filecoin/readiness.d.ts +62 -0
- package/dist/storage/providers/filecoin/readiness.js +85 -0
- package/dist/storage/providers/filecoin/retriever.d.ts +82 -0
- package/dist/storage/providers/filecoin/retriever.js +63 -0
- package/dist/storage/providers/filecoin/skeleton-client.d.ts +36 -0
- package/dist/storage/providers/filecoin/skeleton-client.js +55 -0
- package/dist/storage/providers/filecoin/synapse-client.d.ts +169 -0
- package/dist/storage/providers/filecoin/synapse-client.js +343 -0
- package/dist/storage/providers/filecoin/synapse-construction.d.ts +26 -0
- package/dist/storage/providers/filecoin/synapse-construction.js +47 -0
- package/dist/storage/providers/filecoin/synapse-error-mapping.d.ts +23 -0
- package/dist/storage/providers/filecoin/synapse-error-mapping.js +49 -0
- package/dist/storage/providers/filecoin/synapse-readiness.d.ts +37 -0
- package/dist/storage/providers/filecoin/synapse-readiness.js +231 -0
- package/dist/storage/providers/filecoin/uri.d.ts +49 -0
- package/dist/storage/providers/filecoin/uri.js +84 -0
- package/dist/storage/providers/filecoin/verified-fetch-lifecycle.d.ts +77 -0
- package/dist/storage/providers/filecoin/verified-fetch-lifecycle.js +196 -0
- package/dist/storage/providers/filecoin/verified-fetch-retriever.d.ts +54 -0
- package/dist/storage/providers/filecoin/verified-fetch-retriever.js +81 -0
- package/dist/storage/providers/filecoin/verified-fetch-vendor.d.ts +71 -0
- package/dist/storage/providers/filecoin/verified-fetch-vendor.js +94 -0
- package/dist/storage/raw-content-codec.d.ts +89 -0
- package/dist/storage/raw-content-codec.js +47 -0
- package/dist/storage/raw-content-store-backend-adapter.d.ts +28 -0
- package/dist/storage/raw-content-store-backend-adapter.js +67 -0
- package/dist/storage/raw-content-store.d.ts +228 -0
- package/dist/storage/raw-content-store.js +27 -0
- package/dist/storage/s3-store.d.ts +42 -0
- package/dist/storage/s3-store.js +181 -0
- package/dist/storage/storage-backend-registry.d.ts +58 -0
- package/dist/storage/storage-backend-registry.js +56 -0
- package/dist/storage/storage-backend.d.ts +82 -0
- package/dist/storage/storage-backend.js +14 -0
- package/dist/storage/storage-capabilities.d.ts +56 -0
- package/dist/storage/storage-capabilities.js +170 -0
- package/dist/storage/store-registry.d.ts +67 -0
- package/dist/storage/store-registry.js +77 -0
- package/dist/vector-math.d.ts +15 -0
- package/dist/vector-math.js +31 -0
- package/dist/xml-escape.d.ts +5 -0
- package/dist/xml-escape.js +7 -0
- package/openapi.json +15395 -0
- package/openapi.yaml +10794 -0
- package/package.json +119 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fast rule-based fact extraction for low-latency ingest (UC2).
|
|
3
|
+
* Extracts facts from conversation text using pattern matching and simple
|
|
4
|
+
* NER — no LLM calls. Produces ExtractedFact[] compatible with the full
|
|
5
|
+
* consensus pipeline.
|
|
6
|
+
*
|
|
7
|
+
* Design: User turns are analyzed for self-referential statements ("I", "my",
|
|
8
|
+
* "we") that reveal preferences, facts, plans, or knowledge. Fact-bearing
|
|
9
|
+
* assistant turns (containing named entities, specific data, structured
|
|
10
|
+
* content) are also extracted. Generic assistant chatter is filtered out.
|
|
11
|
+
*
|
|
12
|
+
* Trade-offs vs LLM extraction:
|
|
13
|
+
* - Speed: <50ms vs 2-22s
|
|
14
|
+
* - Quality: Captures explicit statements only (no inference, no summarization)
|
|
15
|
+
* - Entity extraction: Basic NER plus deterministic enrichment
|
|
16
|
+
* - Relation extraction: deterministic post-processing for high-signal patterns
|
|
17
|
+
*/
|
|
18
|
+
import type { ExtractedFact } from './extraction.js';
|
|
19
|
+
/**
|
|
20
|
+
* Quick fact extraction — rule-based, no LLM calls.
|
|
21
|
+
* Returns ExtractedFact[] compatible with the full extraction pipeline.
|
|
22
|
+
* Processes both user turns (first-person fact detection) and fact-bearing
|
|
23
|
+
* assistant turns (specific content detection).
|
|
24
|
+
*/
|
|
25
|
+
export declare function quickExtractFacts(conversationText: string): ExtractedFact[];
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fast rule-based fact extraction for low-latency ingest (UC2).
|
|
3
|
+
* Extracts facts from conversation text using pattern matching and simple
|
|
4
|
+
* NER — no LLM calls. Produces ExtractedFact[] compatible with the full
|
|
5
|
+
* consensus pipeline.
|
|
6
|
+
*
|
|
7
|
+
* Design: User turns are analyzed for self-referential statements ("I", "my",
|
|
8
|
+
* "we") that reveal preferences, facts, plans, or knowledge. Fact-bearing
|
|
9
|
+
* assistant turns (containing named entities, specific data, structured
|
|
10
|
+
* content) are also extracted. Generic assistant chatter is filtered out.
|
|
11
|
+
*
|
|
12
|
+
* Trade-offs vs LLM extraction:
|
|
13
|
+
* - Speed: <50ms vs 2-22s
|
|
14
|
+
* - Quality: Captures explicit statements only (no inference, no summarization)
|
|
15
|
+
* - Entity extraction: Basic NER plus deterministic enrichment
|
|
16
|
+
* - Relation extraction: deterministic post-processing for high-signal patterns
|
|
17
|
+
*/
|
|
18
|
+
import { enrichExtractedFacts } from './extraction-enrichment.js';
|
|
19
|
+
import { annotateRelativeTemporalText } from './relative-temporal.js';
|
|
20
|
+
import { isFactBearingAssistantTurn, isAssistantFactStatement } from './assistant-turn-filter.js';
|
|
21
|
+
import { ENTITY_PATTERNS, QUOTED_TEXT_PATTERN, LITERAL_DETAIL_PATTERN, EVENT_DETAIL_PATTERN, hasStandaloneEntity, } from './content-detection.js';
|
|
22
|
+
const SESSION_DATE_PATTERN = /^\[Session date:\s*(\d{4})-(\d{2})-(\d{2})\]/i;
|
|
23
|
+
const EXPLICIT_ABSOLUTE_DATE_PATTERN = /\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}(?:st|nd|rd|th)?(?:,\s*|\s+)\d{4}\b/i;
|
|
24
|
+
const MONTH_NAMES = [
|
|
25
|
+
'January',
|
|
26
|
+
'February',
|
|
27
|
+
'March',
|
|
28
|
+
'April',
|
|
29
|
+
'May',
|
|
30
|
+
'June',
|
|
31
|
+
'July',
|
|
32
|
+
'August',
|
|
33
|
+
'September',
|
|
34
|
+
'October',
|
|
35
|
+
'November',
|
|
36
|
+
'December',
|
|
37
|
+
];
|
|
38
|
+
const SPEAKER_PREFIX_PATTERN = /^[A-Z][A-Za-z0-9' -]{1,40}:\s*/;
|
|
39
|
+
const IMPLICIT_FIRST_PERSON_EVENT_PATTERN = /^(?:started|starting|built|building|developed|developing|created|creating|launched|launching|opened|opening|accepted|receiv(?:ed|ing)|got|had|went|attended|visited|reading|posted|hosting|working|looking|planning|taking|took)\b/i;
|
|
40
|
+
/** Patterns that indicate a user is stating a fact about themselves. */
|
|
41
|
+
const FIRST_PERSON_PATTERNS = [
|
|
42
|
+
/\bI\s+(?:am|was|have|had|use|used|like|liked|prefer|preferred|love|loved|hate|hated|need|needed|want|wanted|work|worked|live|lived|study|studied|started|finished|completed|built|created|made|bought|got|moved|joined|left|quit|switched|tried|learned|know|knew|think|thought|believe|believed|feel|felt|plan|planned|decided|chose|picked|signed|enrolled|attended|visited|went|add|added|implement|implemented|submit|submitted|receive|received|take|took|score|scored|launch|launched|apply|applied|consider|considered|advise|advised|recommend|recommended|call|called|focus|focused|support|supported|find|found|design|designed)\b/i,
|
|
43
|
+
/\bmy\s+(?:name|job|role|team|company|project|favorite|preference|goal|plan|background|experience|hobby|family|wife|husband|partner|son|daughter|kid|dog|cat|address|email|phone|stack|setup|workflow|necklace|book|books|song|songs|painting|photo|poster|library|store|pet|pets|bowl)\b/i,
|
|
44
|
+
/\bwe\s+(?:use|used|have|had|built|created|switched|moved|started|decided|chose|plan|are|were)\b/i,
|
|
45
|
+
/\bI['']m\s+(?:a|an|the|from|based|working|building|using|looking|trying|planning|learning|studying|interested|responsible|currently)\b/i,
|
|
46
|
+
/\bI['']ve\s+(?:been|had|used|tried|built|worked|lived|started|finished|switched|decided)\b/i,
|
|
47
|
+
/\b(?:had|got)\s+(?:a\s+)?(?:check-up|doctor['’]?s appointment|doc['’]?s appointment)\b/i,
|
|
48
|
+
/\bLet['’]?s\s+(?:create|collaborate|get together|make|work)\b/i,
|
|
49
|
+
/\bI\s+should\b/i,
|
|
50
|
+
];
|
|
51
|
+
/** Patterns for specific fact types. */
|
|
52
|
+
const TYPE_PATTERNS = [
|
|
53
|
+
{ pattern: /\b(?:prefer|like|love|hate|favorite|rather|instead of)\b/i, type: 'preference' },
|
|
54
|
+
{ pattern: /\b(?:project|repo|codebase|app|service|deploy|release|sprint|ticket)\b/i, type: 'project' },
|
|
55
|
+
{ pattern: /\b(?:plan|planning|going to|will|schedule|deadline|target|goal|roadmap)\b/i, type: 'plan' },
|
|
56
|
+
{ pattern: /\b(?:colleague|team|manager|boss|friend|family|wife|husband|partner|mentor)\b/i, type: 'person' },
|
|
57
|
+
];
|
|
58
|
+
/**
|
|
59
|
+
* Split conversation into turns, returning user turns and fact-bearing
|
|
60
|
+
* assistant turns. Generic assistant chatter (acknowledgments, clarifying
|
|
61
|
+
* questions, meta-commentary) is filtered out.
|
|
62
|
+
*/
|
|
63
|
+
function extractFactBearingTurns(text) {
|
|
64
|
+
const lines = text.split('\n');
|
|
65
|
+
const turns = [];
|
|
66
|
+
const state = { currentTurn: '', currentSpeaker: null, currentSource: 'user' };
|
|
67
|
+
for (const line of lines) {
|
|
68
|
+
applyTurnLine(turns, state, line.trim());
|
|
69
|
+
}
|
|
70
|
+
pushTurn(turns, state.currentTurn, state.currentSpeaker, state.currentSource);
|
|
71
|
+
// If no turn markers found, treat entire text as user input
|
|
72
|
+
if (turns.length === 0 && text.trim()) {
|
|
73
|
+
turns.push({ speaker: null, text: text.trim(), source: 'user' });
|
|
74
|
+
}
|
|
75
|
+
return turns;
|
|
76
|
+
}
|
|
77
|
+
function applyTurnLine(turns, state, trimmed) {
|
|
78
|
+
if (SESSION_DATE_PATTERN.test(trimmed))
|
|
79
|
+
return;
|
|
80
|
+
const speakerTurn = parseSpeakerTurn(trimmed);
|
|
81
|
+
if (!speakerTurn) {
|
|
82
|
+
state.currentTurn += '\n' + trimmed;
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
pushTurn(turns, state.currentTurn, state.currentSpeaker, state.currentSource);
|
|
86
|
+
state.currentTurn = speakerTurn.text;
|
|
87
|
+
state.currentSpeaker = speakerTurn.speaker;
|
|
88
|
+
state.currentSource = speakerTurn.source;
|
|
89
|
+
}
|
|
90
|
+
function parseSpeakerTurn(trimmed) {
|
|
91
|
+
if (/^(?:User|Human|Me):/i.test(trimmed)) {
|
|
92
|
+
return { speaker: null, text: trimmed.replace(/^(?:User|Human|Me):\s*/i, ''), source: 'user' };
|
|
93
|
+
}
|
|
94
|
+
if (/^(?:Assistant|AI|Bot|Claude|ChatGPT|GPT):/i.test(trimmed)) {
|
|
95
|
+
return { speaker: null, text: trimmed.replace(/^(?:Assistant|AI|Bot|Claude|ChatGPT|GPT):\s*/i, ''), source: 'assistant' };
|
|
96
|
+
}
|
|
97
|
+
if (!SPEAKER_PREFIX_PATTERN.test(trimmed))
|
|
98
|
+
return null;
|
|
99
|
+
return {
|
|
100
|
+
speaker: trimmed.match(/^([A-Z][A-Za-z0-9' -]{1,40}):/)?.[1] ?? null,
|
|
101
|
+
text: trimmed,
|
|
102
|
+
source: 'user',
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
function pushTurn(turns, text, speaker, source) {
|
|
106
|
+
const trimmed = text.trim();
|
|
107
|
+
if (!trimmed)
|
|
108
|
+
return;
|
|
109
|
+
if (source === 'assistant' && !isFactBearingAssistantTurn(trimmed))
|
|
110
|
+
return;
|
|
111
|
+
turns.push({ speaker, text: trimmed, source });
|
|
112
|
+
}
|
|
113
|
+
/** Split a user turn into individual sentences. */
|
|
114
|
+
function splitSentences(text) {
|
|
115
|
+
const protectedText = protectAbbreviations(text);
|
|
116
|
+
return protectedText
|
|
117
|
+
.split(/(?<=[.!?])\s+|(?<=\n)/)
|
|
118
|
+
.map(restoreAbbreviations)
|
|
119
|
+
.map((s) => s.trim())
|
|
120
|
+
.filter((s) => s.length > 10);
|
|
121
|
+
}
|
|
122
|
+
/** Check if a sentence contains a fact statement worth extracting. */
|
|
123
|
+
function isFactStatement(sentence) {
|
|
124
|
+
return FIRST_PERSON_PATTERNS.some((p) => p.test(sentence))
|
|
125
|
+
|| looksLikeImplicitFirstPersonEvent(sentence)
|
|
126
|
+
|| looksLikeStandaloneFact(sentence)
|
|
127
|
+
|| looksLikeThirdPersonDeclarative(sentence);
|
|
128
|
+
}
|
|
129
|
+
/** Classify the fact type based on content patterns. */
|
|
130
|
+
function classifyType(sentence) {
|
|
131
|
+
for (const { pattern, type } of TYPE_PATTERNS) {
|
|
132
|
+
if (pattern.test(sentence))
|
|
133
|
+
return type;
|
|
134
|
+
}
|
|
135
|
+
return 'knowledge';
|
|
136
|
+
}
|
|
137
|
+
/** Assign importance (0-1) based on specificity signals. */
|
|
138
|
+
function estimateImportance(sentence) {
|
|
139
|
+
let score = 0.5;
|
|
140
|
+
if (/\b(?:always|never|every|must|critical|important|key)\b/i.test(sentence))
|
|
141
|
+
score += 0.2;
|
|
142
|
+
if (/\b(?:my name|I am|I work|my role|my team|my company)\b/i.test(sentence))
|
|
143
|
+
score += 0.15;
|
|
144
|
+
if (/\d/.test(sentence))
|
|
145
|
+
score += 0.1; // contains numbers (dates, versions, etc.)
|
|
146
|
+
if (sentence.length > 100)
|
|
147
|
+
score += 0.05; // longer = more specific
|
|
148
|
+
return Math.min(1, score);
|
|
149
|
+
}
|
|
150
|
+
/** Extract entities from text using pattern matching and capitalization. */
|
|
151
|
+
function extractEntities(sentence) {
|
|
152
|
+
const entities = [];
|
|
153
|
+
const seen = new Set();
|
|
154
|
+
// Pattern-based entity extraction (tools, orgs)
|
|
155
|
+
for (const { pattern, type } of ENTITY_PATTERNS) {
|
|
156
|
+
const regex = new RegExp(pattern.source, pattern.flags);
|
|
157
|
+
let match;
|
|
158
|
+
while ((match = regex.exec(sentence)) !== null) {
|
|
159
|
+
const name = match[0];
|
|
160
|
+
if (!seen.has(name.toLowerCase())) {
|
|
161
|
+
seen.add(name.toLowerCase());
|
|
162
|
+
entities.push({ name, type });
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
// Capitalized proper nouns (likely person/place names)
|
|
167
|
+
// Skip sentence starts and common words
|
|
168
|
+
const SKIP_WORDS = new Set(['I', 'The', 'A', 'An', 'My', 'We', 'Our', 'It', 'This', 'That', 'But', 'And', 'Or', 'So', 'If', 'When', 'What', 'How', 'Why', 'Where', 'Yes', 'No', 'Also', 'Just', 'Really', 'Actually', 'Currently', 'Recently', 'Usually', 'Sometimes', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']);
|
|
169
|
+
const words = sentence.split(/\s+/);
|
|
170
|
+
for (let i = 1; i < words.length; i++) {
|
|
171
|
+
const word = words[i].replace(/[^a-zA-Z]/g, '');
|
|
172
|
+
if (word.length > 1 && /^[A-Z]/.test(word) && !SKIP_WORDS.has(word) && !seen.has(word.toLowerCase())) {
|
|
173
|
+
seen.add(word.toLowerCase());
|
|
174
|
+
entities.push({ name: word, type: 'concept' });
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return entities;
|
|
178
|
+
}
|
|
179
|
+
function protectAbbreviations(text) {
|
|
180
|
+
return text
|
|
181
|
+
.replace(/\bDr\./g, 'Dr<prd>')
|
|
182
|
+
.replace(/\bMr\./g, 'Mr<prd>')
|
|
183
|
+
.replace(/\bMrs\./g, 'Mrs<prd>')
|
|
184
|
+
.replace(/\bMs\./g, 'Ms<prd>')
|
|
185
|
+
.replace(/\bProf\./g, 'Prof<prd>');
|
|
186
|
+
}
|
|
187
|
+
function restoreAbbreviations(text) {
|
|
188
|
+
return text.replace(/<prd>/g, '.');
|
|
189
|
+
}
|
|
190
|
+
function looksLikeStandaloneFact(sentence) {
|
|
191
|
+
if (sentence.endsWith('?'))
|
|
192
|
+
return false;
|
|
193
|
+
if (sentence.length < 16)
|
|
194
|
+
return false;
|
|
195
|
+
return hasStandaloneEntity(sentence)
|
|
196
|
+
|| /\b\d{4}\b/.test(sentence)
|
|
197
|
+
|| QUOTED_TEXT_PATTERN.test(sentence)
|
|
198
|
+
|| LITERAL_DETAIL_PATTERN.test(sentence)
|
|
199
|
+
|| EVENT_DETAIL_PATTERN.test(sentence);
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Detect third-person declarative statements that carry factual content.
|
|
203
|
+
* Catches patterns like "Maria Chen is the engineering lead",
|
|
204
|
+
* "The daily standup is at 9:30 AM", "Sprint velocity averaged 34 points",
|
|
205
|
+
* "Our deployment strategy prioritizes zero-downtime releases".
|
|
206
|
+
*
|
|
207
|
+
* Guards: requires at least one specificity signal (proper noun subject,
|
|
208
|
+
* determiner-led subject + verb, numeric data, or time expression) to
|
|
209
|
+
* avoid extracting generic statements.
|
|
210
|
+
*/
|
|
211
|
+
function looksLikeThirdPersonDeclarative(sentence) {
|
|
212
|
+
if (sentence.endsWith('?'))
|
|
213
|
+
return false;
|
|
214
|
+
if (sentence.length < 20)
|
|
215
|
+
return false;
|
|
216
|
+
const DECLARATIVE_VERBS = /(?:is|was|are|were|has|had|leads?|manages?|runs?|works?|heads?|handles?|provides?|requires?|follows?|supports?|enables?|triggers?|processes?|happens?|occurs?|communicates?|serves?|guarantees?|prioritizes?|includes?|needs?|ships?|deploys?|reviews?|schedules?|averages?|takes?|starts?|meets?|begins?|ends?|costs?|uses?|optimizes?|streamlines?)\b/;
|
|
217
|
+
const PROPER_NOUN_EXCLUDE = '(?!(?:It|That|Something|Everything|Everyone|Nothing|Anything|Anyone|Okay|Sure|Well|So|Maybe|Perhaps|Probably|Obviously|Clearly|Basically|Actually|Apparently|Honestly|Definitely|Certainly)\\b)';
|
|
218
|
+
const hasProperNounSubject = new RegExp('^' + PROPER_NOUN_EXCLUDE + '[A-Z][a-z]+(?:\\s+[A-Z][a-z]+)*\\s+' + DECLARATIVE_VERBS.source).test(sentence);
|
|
219
|
+
const hasDeterminerSubject = new RegExp('^(?:The|A|An|Our|This|Each|Every|All)\\s+\\w+(?:\\s+\\w+)?\\s+' + DECLARATIVE_VERBS.source).test(sentence);
|
|
220
|
+
const hasAcronymSubject = new RegExp('^[A-Z]{2,}(?:[/ ][A-Z]{2,})*\\s+(?:\\w+\\s+)?' + DECLARATIVE_VERBS.source).test(sentence);
|
|
221
|
+
const hasCompoundNounSubject = new RegExp('^[A-Z][a-z]+\\s+[a-z]+(?:\\s+[a-z]+)?\\s+' + DECLARATIVE_VERBS.source).test(sentence);
|
|
222
|
+
const hasNumericData = /\b\d+(?:\.\d+)?\s*(?:story points?|points?|percent|%|minutes?|hours?|days?|weeks?|months?|sprints?|users?|requests?|items?|members?|milliseconds?|seconds?|ms|MB|GB|TB|million|billion|k)\b/i.test(sentence);
|
|
223
|
+
const hasTimeExpression = /\b\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)?\b/.test(sentence);
|
|
224
|
+
const hasPassiveDeclarative = /^(?:The|A|An|Our|This|Each|Every|All)\s+\w+(?:\s+\w+)?\s+(?:is|are|was|were)\s+\w+ed\b/.test(sentence);
|
|
225
|
+
return hasProperNounSubject || hasDeterminerSubject || hasAcronymSubject
|
|
226
|
+
|| hasCompoundNounSubject || hasNumericData || hasTimeExpression || hasPassiveDeclarative;
|
|
227
|
+
}
|
|
228
|
+
function parseSessionDate(text) {
|
|
229
|
+
const match = text.match(SESSION_DATE_PATTERN);
|
|
230
|
+
if (!match)
|
|
231
|
+
return null;
|
|
232
|
+
const year = match[1];
|
|
233
|
+
const month = Number(match[2]);
|
|
234
|
+
const day = Number(match[3]);
|
|
235
|
+
const monthName = MONTH_NAMES[month - 1];
|
|
236
|
+
return monthName ? `${monthName} ${day} ${year}` : null;
|
|
237
|
+
}
|
|
238
|
+
function parseSessionDateValue(text) {
|
|
239
|
+
const match = text.match(SESSION_DATE_PATTERN);
|
|
240
|
+
if (!match)
|
|
241
|
+
return null;
|
|
242
|
+
const year = Number(match[1]);
|
|
243
|
+
const monthIndex = Number(match[2]) - 1;
|
|
244
|
+
const day = Number(match[3]);
|
|
245
|
+
return new Date(Date.UTC(year, monthIndex, day, 0, 0, 0, 0));
|
|
246
|
+
}
|
|
247
|
+
function shouldExtractWholeTurn(turn, sentences) {
|
|
248
|
+
if (sentences.length < 2)
|
|
249
|
+
return false;
|
|
250
|
+
if (turn.length > 280)
|
|
251
|
+
return false;
|
|
252
|
+
return sentences.some((sentence) => isFactStatement(sentence));
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Strip speaker labels and filler words from a candidate fact sentence.
|
|
256
|
+
* Uses a stricter pattern than SPEAKER_PREFIX_PATTERN to avoid stripping
|
|
257
|
+
* text before numeric colons (e.g., "at 9:30 AM" must not be truncated).
|
|
258
|
+
*/
|
|
259
|
+
function normalizeCandidateText(text) {
|
|
260
|
+
return text
|
|
261
|
+
.replace(/^[A-Z][A-Za-z' -]{1,40}(?<!\d):\s+/, '')
|
|
262
|
+
.replace(/^(?:Oh,\s*btw,?|btw,?|well,|so,|yeah,|hah,|haha,)\s*/i, '')
|
|
263
|
+
.trim();
|
|
264
|
+
}
|
|
265
|
+
function anchorFact(sentence, sessionDate, sessionDateValue) {
|
|
266
|
+
const normalizedInput = normalizeCandidateText(sentence);
|
|
267
|
+
const rewritten = resolveLeadEntityReference(rewriteLeadPronoun(normalizedInput));
|
|
268
|
+
const normalized = sessionDateValue
|
|
269
|
+
? annotateRelativeTemporalText(rewritten, sessionDateValue)
|
|
270
|
+
: rewritten;
|
|
271
|
+
if (!sessionDate || /\bAs of\b/i.test(normalized) || EXPLICIT_ABSOLUTE_DATE_PATTERN.test(rewritten)) {
|
|
272
|
+
return normalized;
|
|
273
|
+
}
|
|
274
|
+
return `As of ${sessionDate}, ${formatAnchoredBody(normalized)}`;
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Anchor an assistant-sourced fact with the session date.
|
|
278
|
+
* Unlike user facts, assistant sentences do not get first-person rewriting —
|
|
279
|
+
* they are stored closer to their original form with a date prefix.
|
|
280
|
+
* Preserves leading capitalization for proper nouns (e.g., "Miss Bee").
|
|
281
|
+
*/
|
|
282
|
+
function anchorAssistantFact(sentence, sessionDate) {
|
|
283
|
+
const trimmed = sentence.trim();
|
|
284
|
+
if (!sessionDate || /\bAs of\b/i.test(trimmed) || EXPLICIT_ABSOLUTE_DATE_PATTERN.test(trimmed)) {
|
|
285
|
+
return trimmed;
|
|
286
|
+
}
|
|
287
|
+
const startsWithProperNoun = /^[A-Z][a-z]/.test(trimmed) && !/^(?:The|A|An|This|That|It|Here|There)\b/.test(trimmed);
|
|
288
|
+
const body = startsWithProperNoun ? trimmed : lowercaseLead(trimmed);
|
|
289
|
+
return `As of ${sessionDate}, ${body}`;
|
|
290
|
+
}
|
|
291
|
+
function rewriteLeadPronoun(sentence) {
|
|
292
|
+
const cleaned = sentence.trim();
|
|
293
|
+
return cleaned
|
|
294
|
+
.replace(/^I['’]d\b/i, 'user would')
|
|
295
|
+
.replace(/^I['’]ll\b/i, 'user will')
|
|
296
|
+
.replace(/^I['']ve\b/i, 'user has')
|
|
297
|
+
.replace(/^I['']m\b/i, 'user is')
|
|
298
|
+
.replace(/^I\b/i, 'user')
|
|
299
|
+
.replace(/^My\b/i, "user's");
|
|
300
|
+
}
|
|
301
|
+
function lowercaseLead(text) {
|
|
302
|
+
return text.charAt(0).toLowerCase() + text.slice(1);
|
|
303
|
+
}
|
|
304
|
+
function formatAnchoredBody(text) {
|
|
305
|
+
return /^user\b/i.test(text) ? lowercaseLead(text) : text;
|
|
306
|
+
}
|
|
307
|
+
function resolveLeadEntityReference(text) {
|
|
308
|
+
const originalMatch = text.match(/^([A-Z][A-Za-z0-9.+ -]{1,60})\.\s+I['']ve been using it\b/i);
|
|
309
|
+
if (originalMatch) {
|
|
310
|
+
const entity = originalMatch[1].trim();
|
|
311
|
+
return text.replace(/^([A-Z][A-Za-z0-9.+ -]{1,60})\.\s+I['']ve been using it\b/i, `user has been using ${entity}`);
|
|
312
|
+
}
|
|
313
|
+
const rewrittenMatch = text.match(/^([A-Z][A-Za-z0-9.+ -]{1,60})\.\s+(user(?:\s+has\s+been)?\s+(?:using|preferring|liking))\s+it\b/i);
|
|
314
|
+
if (!rewrittenMatch)
|
|
315
|
+
return text;
|
|
316
|
+
const entity = rewrittenMatch[1].trim();
|
|
317
|
+
const predicate = rewrittenMatch[2];
|
|
318
|
+
return `${predicate} ${entity}${text.slice(rewrittenMatch[0].length)}`;
|
|
319
|
+
}
|
|
320
|
+
function looksLikeImplicitFirstPersonEvent(sentence) {
|
|
321
|
+
const normalized = normalizeCandidateText(sentence);
|
|
322
|
+
if (!IMPLICIT_FIRST_PERSON_EVENT_PATTERN.test(normalized)) {
|
|
323
|
+
return false;
|
|
324
|
+
}
|
|
325
|
+
return containsHighSignalEventDetail(normalized) || QUOTED_TEXT_PATTERN.test(normalized);
|
|
326
|
+
}
|
|
327
|
+
function containsHighSignalEventDetail(text) {
|
|
328
|
+
return EVENT_DETAIL_PATTERN.test(text)
|
|
329
|
+
|| LITERAL_DETAIL_PATTERN.test(text)
|
|
330
|
+
|| /\b(?:today|tomorrow|yesterday|last\s+\w+)\b/i.test(text);
|
|
331
|
+
}
|
|
332
|
+
/** Generate a concise headline from a fact sentence. */
|
|
333
|
+
function generateHeadline(sentence) {
|
|
334
|
+
const words = sentence.split(/\s+/).slice(0, 8);
|
|
335
|
+
return words.join(' ') + (words.length < sentence.split(/\s+/).length ? '...' : '');
|
|
336
|
+
}
|
|
337
|
+
/** Extract keywords (significant words) from sentence. */
|
|
338
|
+
function extractKeywords(sentence) {
|
|
339
|
+
const STOP_WORDS = new Set(['i', 'me', 'my', 'we', 'our', 'the', 'a', 'an', 'is', 'am', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'can', 'may', 'might', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'about', 'that', 'this', 'it', 'not', 'but', 'and', 'or', 'so', 'if', 'than', 'too', 'very', 'just', 'also', 'really', 'actually', 'currently', 'been', 'being']);
|
|
340
|
+
return sentence
|
|
341
|
+
.toLowerCase()
|
|
342
|
+
.replace(/[^a-z0-9\s-]/g, '')
|
|
343
|
+
.split(/\s+/)
|
|
344
|
+
.filter((w) => w.length > 2 && !STOP_WORDS.has(w))
|
|
345
|
+
.slice(0, 10);
|
|
346
|
+
}
|
|
347
|
+
/**
|
|
348
|
+
* Quick fact extraction — rule-based, no LLM calls.
|
|
349
|
+
* Returns ExtractedFact[] compatible with the full extraction pipeline.
|
|
350
|
+
* Processes both user turns (first-person fact detection) and fact-bearing
|
|
351
|
+
* assistant turns (specific content detection).
|
|
352
|
+
*/
|
|
353
|
+
export function quickExtractFacts(conversationText) {
|
|
354
|
+
const turns = extractFactBearingTurns(conversationText);
|
|
355
|
+
const sessionDate = parseSessionDate(conversationText);
|
|
356
|
+
const sessionDateValue = parseSessionDateValue(conversationText);
|
|
357
|
+
const facts = [];
|
|
358
|
+
const seenFacts = new Set();
|
|
359
|
+
for (const turn of turns) {
|
|
360
|
+
extractFactsFromTurn(turn, conversationText, sessionDate, sessionDateValue, seenFacts, facts);
|
|
361
|
+
}
|
|
362
|
+
return enrichExtractedFacts(facts);
|
|
363
|
+
}
|
|
364
|
+
/** Extract facts from a single turn's sentences and add to the accumulator. */
|
|
365
|
+
function extractFactsFromTurn(turn, contextText, sessionDate, sessionDateValue, seenFacts, facts) {
|
|
366
|
+
const sentences = splitSentences(turn.text);
|
|
367
|
+
const isAssistant = turn.source === 'assistant';
|
|
368
|
+
const candidates = shouldExtractWholeTurn(turn.text, sentences)
|
|
369
|
+
? [...sentences, turn.text]
|
|
370
|
+
: sentences;
|
|
371
|
+
for (const sentence of candidates) {
|
|
372
|
+
const fact = processSentence(sentence, isAssistant, turn.speaker, sessionDate, sessionDateValue, seenFacts);
|
|
373
|
+
if (fact)
|
|
374
|
+
facts.push(resolveContextualObjectReference(fact, contextText));
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
/** Process a single sentence into an ExtractedFact or null if filtered/duplicate. */
|
|
378
|
+
function processSentence(sentence, isAssistant, speaker, sessionDate, sessionDateValue, seenFacts) {
|
|
379
|
+
const speakerAwareSentence = isAssistant ? sentence : applySpeakerSubject(sentence, speaker);
|
|
380
|
+
const passesFilter = isAssistant
|
|
381
|
+
? isAssistantFactStatement(sentence)
|
|
382
|
+
: (isFactStatement(sentence) || isFactStatement(speakerAwareSentence));
|
|
383
|
+
if (!passesFilter)
|
|
384
|
+
return null;
|
|
385
|
+
const factText = isAssistant
|
|
386
|
+
? anchorAssistantFact(sentence, sessionDate)
|
|
387
|
+
: anchorFact(speakerAwareSentence, sessionDate, sessionDateValue);
|
|
388
|
+
const normalized = factText.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
389
|
+
if (seenFacts.has(normalized))
|
|
390
|
+
return null;
|
|
391
|
+
seenFacts.add(normalized);
|
|
392
|
+
return {
|
|
393
|
+
fact: factText,
|
|
394
|
+
headline: generateHeadline(factText),
|
|
395
|
+
importance: isAssistant ? estimateImportance(factText) * 0.9 : estimateImportance(factText),
|
|
396
|
+
type: classifyType(factText),
|
|
397
|
+
keywords: extractKeywords(factText),
|
|
398
|
+
entities: extractEntities(factText),
|
|
399
|
+
relations: [],
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
function applySpeakerSubject(sentence, speaker) {
|
|
403
|
+
if (!speaker) {
|
|
404
|
+
return sentence;
|
|
405
|
+
}
|
|
406
|
+
const impliedSpeaker = sentence.replace(/^(?:Appreciate[^,]{0,80},\s+but\s+)?had\b/i, `${speaker} had`);
|
|
407
|
+
return impliedSpeaker
|
|
408
|
+
.replace(/\bI['’]d\b/g, `${speaker} would`)
|
|
409
|
+
.replace(/\bI['’]ll\b/g, `${speaker} will`)
|
|
410
|
+
.replace(/\bI['’]ve\b/g, `${speaker} has`)
|
|
411
|
+
.replace(/\bI['’]m\b/g, `${speaker} is`)
|
|
412
|
+
.replace(/\bI\b/g, speaker)
|
|
413
|
+
.replace(/\bmy\b/gi, `${speaker}'s`);
|
|
414
|
+
}
|
|
415
|
+
function resolveContextualObjectReference(fact, turnText) {
|
|
416
|
+
if (!/\bhad them for\b/i.test(fact.fact)) {
|
|
417
|
+
return fact;
|
|
418
|
+
}
|
|
419
|
+
const object = findContextualObject(turnText);
|
|
420
|
+
if (!object) {
|
|
421
|
+
return fact;
|
|
422
|
+
}
|
|
423
|
+
return {
|
|
424
|
+
...fact,
|
|
425
|
+
fact: fact.fact.replace(/\bhad them for\b/i, `had the ${object} for`),
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
function findContextualObject(text) {
|
|
429
|
+
const match = text.match(/\b(turtles|snakes|dogs|cats|pets)\b/i);
|
|
430
|
+
return match ? match[1].toLowerCase() : null;
|
|
431
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic quoted-entity extraction for exact titles and event names.
|
|
3
|
+
*
|
|
4
|
+
* This supplements LLM extraction when exact quoted titles or performers are
|
|
5
|
+
* text-visible but the generated fact weakens the relation. It intentionally
|
|
6
|
+
* does not infer image-only text or unseen metadata.
|
|
7
|
+
*/
|
|
8
|
+
import type { ExtractedFact } from './extraction.js';
|
|
9
|
+
export declare function mergeQuotedEntityFacts(existingFacts: ExtractedFact[], conversationText: string): ExtractedFact[];
|
|
10
|
+
export declare function extractQuotedEntityFacts(conversationText: string): ExtractedFact[];
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic quoted-entity extraction for exact titles and event names.
|
|
3
|
+
*
|
|
4
|
+
* This supplements LLM extraction when exact quoted titles or performers are
|
|
5
|
+
* text-visible but the generated fact weakens the relation. It intentionally
|
|
6
|
+
* does not infer image-only text or unseen metadata.
|
|
7
|
+
*/
|
|
8
|
+
const SESSION_DATE_PATTERN = /^\[Session date:\s*(\d{4})-(\d{2})-(\d{2})\]/im;
|
|
9
|
+
const SPEAKER_LINE_PATTERN = /^([A-Za-z][A-Za-z0-9' -]{1,40}):\s*(.+)$/;
|
|
10
|
+
const MONTH_NAMES = [
|
|
11
|
+
'January', 'February', 'March', 'April', 'May', 'June',
|
|
12
|
+
'July', 'August', 'September', 'October', 'November', 'December',
|
|
13
|
+
];
|
|
14
|
+
export function mergeQuotedEntityFacts(existingFacts, conversationText) {
|
|
15
|
+
const supplemental = extractQuotedEntityFacts(conversationText);
|
|
16
|
+
if (supplemental.length === 0)
|
|
17
|
+
return existingFacts;
|
|
18
|
+
const byFact = new Map(existingFacts.map((fact) => [normalize(fact.fact), fact]));
|
|
19
|
+
for (const fact of supplemental) {
|
|
20
|
+
if (!byFact.has(normalize(fact.fact))) {
|
|
21
|
+
byFact.set(normalize(fact.fact), fact);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return [...byFact.values()];
|
|
25
|
+
}
|
|
26
|
+
export function extractQuotedEntityFacts(conversationText) {
|
|
27
|
+
const sessionDate = parseSessionDate(conversationText);
|
|
28
|
+
const facts = parseTurns(conversationText)
|
|
29
|
+
.flatMap((turn) => extractFactsFromTurn(turn, sessionDate));
|
|
30
|
+
return dedupeFacts(facts);
|
|
31
|
+
}
|
|
32
|
+
function extractFactsFromTurn(turn, sessionDate) {
|
|
33
|
+
return [
|
|
34
|
+
...extractBookTitleFacts(turn, sessionDate),
|
|
35
|
+
...extractPerformerEventFacts(turn, sessionDate),
|
|
36
|
+
...extractRecommendationLetterFacts(turn, sessionDate),
|
|
37
|
+
];
|
|
38
|
+
}
|
|
39
|
+
function extractBookTitleFacts(turn, sessionDate) {
|
|
40
|
+
if (!/\b(?:book|books|read|reading)\b/i.test(turn.text))
|
|
41
|
+
return [];
|
|
42
|
+
return extractQuotedValues(turn.text).map((title) => {
|
|
43
|
+
const isFavorite = /\bfavou?rite\b/i.test(turn.text);
|
|
44
|
+
const relation = isFavorite ? 'favorite childhood book was' : 'read';
|
|
45
|
+
const fact = `${subjectPrefix(sessionDate, turn.speaker)} ${relation} "${title}".`;
|
|
46
|
+
return buildFact(fact, title, 'concept', ['book', title], isFavorite ? 'preference' : 'knowledge');
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
function extractPerformerEventFacts(turn, sessionDate) {
|
|
50
|
+
const facts = [];
|
|
51
|
+
const leadingQuoted = turn.text.match(/^\s*["'“‘]([^"'”’]{2,80})["'”’]\s*[-:]/);
|
|
52
|
+
if (leadingQuoted && hasPerformanceSignal(turn.text)) {
|
|
53
|
+
const performer = leadingQuoted[1].trim();
|
|
54
|
+
facts.push(buildPerformerFact(sessionDate, turn.speaker, performer));
|
|
55
|
+
}
|
|
56
|
+
for (const performer of extractNamedConcertPerformers(turn.text)) {
|
|
57
|
+
facts.push(buildPerformerFact(sessionDate, turn.speaker, performer));
|
|
58
|
+
}
|
|
59
|
+
return facts;
|
|
60
|
+
}
|
|
61
|
+
function extractRecommendationLetterFacts(turn, sessionDate) {
|
|
62
|
+
if (!/\brecommendation letter\b/i.test(turn.text))
|
|
63
|
+
return [];
|
|
64
|
+
if (!/\b(?:writing|write|wrote|agreed to write)\b/i.test(turn.text))
|
|
65
|
+
return [];
|
|
66
|
+
const writer = extractRecommendationWriter(turn.text);
|
|
67
|
+
if (!writer)
|
|
68
|
+
return [];
|
|
69
|
+
return [
|
|
70
|
+
buildFact(`${subjectPrefix(sessionDate, writer)} is writing ${possessiveSubject(turn.speaker)} main recommendation letter.`, writer, 'person', ['recommendation letter', writer], 'knowledge'),
|
|
71
|
+
];
|
|
72
|
+
}
|
|
73
|
+
function extractRecommendationWriter(text) {
|
|
74
|
+
const direct = text.match(/\b(?<writer>Dr\.?\s+[A-Z][A-Za-z'’.-]+|[A-Z][A-Za-z'’.-]+(?:\s+[A-Z][A-Za-z'’.-]+){0,3})\s+(?:is|'s|will be|agreed to)\s+(?:writing|write)\s+(?:my|their|the user's)?\s*(?:main\s+)?recommendation letter\b/i);
|
|
75
|
+
if (direct?.groups?.writer)
|
|
76
|
+
return normalizePersonName(direct.groups.writer);
|
|
77
|
+
const pronoun = text.match(/\b(?<writer>Dr\.?\s+[A-Z][A-Za-z'’.-]+|[A-Z][A-Za-z'’.-]+(?:\s+[A-Z][A-Za-z'’.-]+){0,3})\b[^.]{0,120}\.\s*(?:she|he|they)\s*(?:'s| is| are| will be)?\s+(?:writing|write)\s+(?:my|their|the user's)?\s*(?:main\s+)?recommendation letter\b/i);
|
|
78
|
+
return pronoun?.groups?.writer ? normalizePersonName(pronoun.groups.writer) : null;
|
|
79
|
+
}
|
|
80
|
+
function buildPerformerFact(sessionDate, speaker, performer) {
|
|
81
|
+
return buildFact(`${subjectPrefix(sessionDate, speaker)} saw "${performer}" perform music.`, performer, 'concept', ['artist', 'band', 'music', performer], 'knowledge');
|
|
82
|
+
}
|
|
83
|
+
function extractNamedConcertPerformers(text) {
|
|
84
|
+
const performers = [];
|
|
85
|
+
const patterns = [
|
|
86
|
+
/\bat\s+(?:a\s+)?([A-Z][A-Za-z'’.-]+(?:\s+[A-Z][A-Za-z'’.-]+){0,4})\s+concert\b/g,
|
|
87
|
+
/\bconcert\s+(?:featuring|with)\s+([A-Z][A-Za-z'’.-]+(?:\s+[A-Z][A-Za-z'’.-]+){0,4})\b/g,
|
|
88
|
+
];
|
|
89
|
+
for (const pattern of patterns) {
|
|
90
|
+
for (const match of text.matchAll(pattern)) {
|
|
91
|
+
performers.push(stripTrailingWords(match[1]));
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return performers.filter(Boolean);
|
|
95
|
+
}
|
|
96
|
+
function hasPerformanceSignal(text) {
|
|
97
|
+
return /\b(?:played|playing|performed|performing|concert|show|stage|song|songs|dancing|singing)\b/i.test(text);
|
|
98
|
+
}
|
|
99
|
+
function extractQuotedValues(text) {
|
|
100
|
+
const values = [];
|
|
101
|
+
collectQuotedValues(values, text, /"([^"]{2,80})"/g);
|
|
102
|
+
collectQuotedValues(values, text, /“([^”]{2,80})”/g);
|
|
103
|
+
collectQuotedValues(values, text, /'([^']{2,80})'/g);
|
|
104
|
+
collectQuotedValues(values, text, /‘([^’]{2,80})’/g);
|
|
105
|
+
return values;
|
|
106
|
+
}
|
|
107
|
+
function collectQuotedValues(values, text, pattern) {
|
|
108
|
+
for (const match of text.matchAll(pattern)) {
|
|
109
|
+
values.push(match[1].trim());
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
function parseTurns(conversationText) {
|
|
113
|
+
return conversationText
|
|
114
|
+
.split('\n')
|
|
115
|
+
.map((line) => line.trim())
|
|
116
|
+
.map((line) => line.match(SPEAKER_LINE_PATTERN))
|
|
117
|
+
.filter((match) => match !== null)
|
|
118
|
+
.map((match) => ({ speaker: match[1], text: match[2] }));
|
|
119
|
+
}
|
|
120
|
+
function parseSessionDate(conversationText) {
|
|
121
|
+
const match = conversationText.match(SESSION_DATE_PATTERN);
|
|
122
|
+
if (!match)
|
|
123
|
+
return null;
|
|
124
|
+
const month = MONTH_NAMES[Number(match[2]) - 1];
|
|
125
|
+
return month ? `${month} ${Number(match[3])} ${match[1]}` : null;
|
|
126
|
+
}
|
|
127
|
+
function subjectPrefix(sessionDate, speaker) {
|
|
128
|
+
const subject = speaker || 'user';
|
|
129
|
+
return sessionDate ? `As of ${sessionDate}, ${subject}` : subject;
|
|
130
|
+
}
|
|
131
|
+
function possessiveSubject(speaker) {
|
|
132
|
+
return /^user$/i.test(speaker) ? "user's" : `${speaker}'s`;
|
|
133
|
+
}
|
|
134
|
+
function buildFact(fact, entityName, entityType, keywords, type) {
|
|
135
|
+
return {
|
|
136
|
+
fact,
|
|
137
|
+
headline: fact.split(/\s+/).slice(0, 10).join(' '),
|
|
138
|
+
importance: 0.7,
|
|
139
|
+
type,
|
|
140
|
+
keywords,
|
|
141
|
+
entities: [{ name: entityName, type: entityType }],
|
|
142
|
+
relations: [],
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
function stripTrailingWords(text) {
|
|
146
|
+
return text.replace(/\s+(?:last|yesterday|today|tomorrow)$/i, '').trim();
|
|
147
|
+
}
|
|
148
|
+
function normalizePersonName(text) {
|
|
149
|
+
return text
|
|
150
|
+
.replace(/^(?:my|the user's|user's)\s+(?:advisor|mentor|professor)\s+/i, '')
|
|
151
|
+
.replace(/\bDr\s+/i, 'Dr. ')
|
|
152
|
+
.replace(/\s+/g, ' ')
|
|
153
|
+
.trim();
|
|
154
|
+
}
|
|
155
|
+
function dedupeFacts(facts) {
|
|
156
|
+
const unique = new Map(facts.map((fact) => [normalize(fact.fact), fact]));
|
|
157
|
+
return [...unique.values()];
|
|
158
|
+
}
|
|
159
|
+
function normalize(text) {
|
|
160
|
+
return text.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
161
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Deterministic retry-backoff helper for raw-storage reconciliation.
|
|
3
|
+
*
|
|
4
|
+
* Kept separate from the reconciler orchestration so both production
|
|
5
|
+
* code and tests can assert scheduling math without depending on the
|
|
6
|
+
* larger DB/network reconciliation module.
|
|
7
|
+
*/
|
|
8
|
+
export declare function computeBackoffMs(attempts: number, baseMs: number, maxMs: number): number;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Deterministic retry-backoff helper for raw-storage reconciliation.
|
|
3
|
+
*
|
|
4
|
+
* Kept separate from the reconciler orchestration so both production
|
|
5
|
+
* code and tests can assert scheduling math without depending on the
|
|
6
|
+
* larger DB/network reconciliation module.
|
|
7
|
+
*/
|
|
8
|
+
export function computeBackoffMs(attempts, baseMs, maxMs) {
|
|
9
|
+
if (attempts <= 0)
|
|
10
|
+
return baseMs;
|
|
11
|
+
const safeAttempts = Math.min(attempts, 32);
|
|
12
|
+
const exponential = Math.pow(2, safeAttempts) * baseMs;
|
|
13
|
+
return Math.min(exponential, maxMs);
|
|
14
|
+
}
|