@pentatonic-ai/ai-agent-sdk 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/bin/cli.js +1 -1
- package/bin/commands/config.js +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +2 -2
- package/packages/doctor/src/checks/local-memory.js +2 -2
- package/packages/memory/README.md +2 -2
- package/packages/memory/openclaw-plugin/README.md +2 -2
- package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
- package/packages/memory/src/server.js +2 -2
- package/packages/memory-engine-v2/.env.example +30 -0
- package/packages/memory-engine-v2/README.md +125 -0
- package/packages/memory-engine-v2/compat/Dockerfile +11 -0
- package/packages/memory-engine-v2/compat/requirements.txt +6 -0
- package/packages/memory-engine-v2/compat/server.py +1047 -0
- package/packages/memory-engine-v2/docker-compose.aws.yml +78 -0
- package/packages/memory-engine-v2/docker-compose.yml +206 -0
- package/packages/memory-engine-v2/extractor-async/Dockerfile +14 -0
- package/packages/memory-engine-v2/extractor-async/confidence.py +62 -0
- package/packages/memory-engine-v2/extractor-async/noise_filter.py +144 -0
- package/packages/memory-engine-v2/extractor-async/requirements.txt +2 -0
- package/packages/memory-engine-v2/extractor-async/test_confidence.py +76 -0
- package/packages/memory-engine-v2/extractor-async/test_noise_filter.py +177 -0
- package/packages/memory-engine-v2/extractor-async/worker.py +797 -0
- package/packages/memory-engine-v2/extractor-sync/Dockerfile +11 -0
- package/packages/memory-engine-v2/extractor-sync/requirements.txt +4 -0
- package/packages/memory-engine-v2/extractor-sync/server.py +424 -0
- package/packages/memory-engine-v2/org-model/migrations/001_init.sql +390 -0
- package/packages/memory-engine-v2/tests/e2e_smoke.py +356 -0
- package/packages/memory-engine-v2/tests/fixtures/generate_synthetic_corpus.py +758 -0
- package/packages/memory-engine/.env.example +0 -13
- package/packages/memory-engine/MIGRATION.md +0 -219
- package/packages/memory-engine/README.md +0 -145
- package/packages/memory-engine/bench/README.md +0 -99
- package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +0 -961
- package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +0 -937
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +0 -961
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +0 -883
- package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +0 -937
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +0 -1115
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +0 -1115
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +0 -819
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +0 -542
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +0 -1278
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +0 -894
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +0 -1018
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +0 -680
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +0 -1038
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +0 -693
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +0 -961
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +0 -727
- package/packages/memory-engine/compat/Dockerfile +0 -22
- package/packages/memory-engine/compat/server.py +0 -1255
- package/packages/memory-engine/docker-compose.test.yml +0 -59
- package/packages/memory-engine/docker-compose.yml +0 -255
- package/packages/memory-engine/engine/README.md +0 -52
- package/packages/memory-engine/engine/l2-hybridrag-proxy.py +0 -1543
- package/packages/memory-engine/engine/l5-comms-layer.py +0 -663
- package/packages/memory-engine/engine/l6-document-store.py +0 -1018
- package/packages/memory-engine/engine/services/_shared/__init__.py +0 -1
- package/packages/memory-engine/engine/services/_shared/embed_provider.py +0 -562
- package/packages/memory-engine/engine/services/l2/Dockerfile +0 -50
- package/packages/memory-engine/engine/services/l2/init_databases.py +0 -81
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +0 -2721
- package/packages/memory-engine/engine/services/l5/Dockerfile +0 -11
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +0 -808
- package/packages/memory-engine/engine/services/l6/Dockerfile +0 -30
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +0 -1221
- package/packages/memory-engine/engine/services/nv-embed/Dockerfile +0 -28
- package/packages/memory-engine/engine/services/nv-embed/server.py +0 -152
- package/packages/memory-engine/pme_memory/__init__.py +0 -0
- package/packages/memory-engine/pme_memory/__main__.py +0 -129
- package/packages/memory-engine/pme_memory/artifacts.py +0 -95
- package/packages/memory-engine/pme_memory/embed.py +0 -74
- package/packages/memory-engine/pme_memory/health.py +0 -36
- package/packages/memory-engine/pme_memory/hygiene.py +0 -159
- package/packages/memory-engine/pme_memory/indexer.py +0 -200
- package/packages/memory-engine/pme_memory/needs.py +0 -55
- package/packages/memory-engine/pme_memory/provenance.py +0 -80
- package/packages/memory-engine/pme_memory/scoring.py +0 -168
- package/packages/memory-engine/pme_memory/search.py +0 -52
- package/packages/memory-engine/pme_memory/store.py +0 -86
- package/packages/memory-engine/pme_memory/synthesis.py +0 -114
- package/packages/memory-engine/pyproject.toml +0 -65
- package/packages/memory-engine/scripts/kg-extractor.py +0 -557
- package/packages/memory-engine/scripts/kg-preflexor-v2.py +0 -738
- package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py +0 -128
- package/packages/memory-engine/tests/e2e_arena.sh +0 -259
- package/packages/memory-engine/tests/embed_stub/Dockerfile +0 -13
- package/packages/memory-engine/tests/embed_stub/server.py +0 -80
- package/packages/memory-engine/tests/test_aggregate.py +0 -333
- package/packages/memory-engine/tests/test_api_contract.sh +0 -57
- package/packages/memory-engine/tests/test_arena_safety.py +0 -232
- package/packages/memory-engine/tests/test_channel_stat_reader.py +0 -437
- package/packages/memory-engine/tests/test_channel_stat_rollups.py +0 -308
- package/packages/memory-engine/tests/test_compat_nv_embed_probe.py +0 -48
- package/packages/memory-engine/tests/test_embed_provider.py +0 -693
- package/packages/memory-engine/tests/test_l2_qmd_vec_search.py +0 -280
- package/packages/memory-engine/tests/test_l3_arena_isolation.py +0 -412
- package/packages/memory-engine/tests/test_l6_module_load.py +0 -84
- package/packages/memory-engine/tests/test_people_list_reader.py +0 -432
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# AWS overlay for pentatonic-memory-engine v2.
|
|
2
|
+
#
|
|
3
|
+
# Differences vs the base compose:
|
|
4
|
+
# - Volumes bind-mounted under /var/lib/pme-v2/* so AWS Backup
|
|
5
|
+
# snapshots map cleanly to per-layer data dirs (matches v1's
|
|
6
|
+
# pattern).
|
|
7
|
+
# - org-model + extractor-async tuned for production (more
|
|
8
|
+
# connection pool headroom, longer claim TTL).
|
|
9
|
+
# - cloudflared added as a service so v2 is reachable via the
|
|
10
|
+
# same tunnel + Access policy v1 uses (until the memory-proxy
|
|
11
|
+
# Worker lands).
|
|
12
|
+
#
|
|
13
|
+
# Apply:
|
|
14
|
+
# docker compose --env-file .env \
|
|
15
|
+
# -f docker-compose.yml -f docker-compose.aws.yml up -d --build
|
|
16
|
+
#
|
|
17
|
+
# Cloudflared expects TUNNEL_TOKEN in the env file. The token is the
|
|
18
|
+
# same one v1 uses unless you've cut a separate tunnel for v2.
|
|
19
|
+
|
|
20
|
+
services:
|
|
21
|
+
org-model:
|
|
22
|
+
environment:
|
|
23
|
+
# Production tuning: bigger shared_buffers for the materialised
|
|
24
|
+
# views, more connection slots for the extractor + compat pools.
|
|
25
|
+
POSTGRES_SHARED_BUFFERS: "1GB"
|
|
26
|
+
POSTGRES_MAX_CONNECTIONS: "200"
|
|
27
|
+
volumes:
|
|
28
|
+
- /var/lib/pme-v2/org-model:/var/lib/postgresql/data
|
|
29
|
+
- ./org-model/migrations:/docker-entrypoint-initdb.d:ro
|
|
30
|
+
|
|
31
|
+
vector-index:
|
|
32
|
+
environment:
|
|
33
|
+
# Qdrant prod settings: allow more storage threads, log INFO.
|
|
34
|
+
QDRANT__STORAGE__PERFORMANCE__MAX_OPTIMIZATION_THREADS: "4"
|
|
35
|
+
QDRANT__LOG_LEVEL: "INFO"
|
|
36
|
+
volumes:
|
|
37
|
+
- /var/lib/pme-v2/vector-index:/qdrant/storage
|
|
38
|
+
|
|
39
|
+
extractor-sync:
|
|
40
|
+
environment:
|
|
41
|
+
PG_DSN: ${PME_V2_PG_DSN}
|
|
42
|
+
|
|
43
|
+
extractor-async:
|
|
44
|
+
environment:
|
|
45
|
+
PG_DSN: ${PME_V2_PG_DSN}
|
|
46
|
+
LLM_ENDPOINT: ${PME_V2_LLM_ENDPOINT:-}
|
|
47
|
+
LLM_API_KEY: ${PENTATONIC_AI_GATEWAY_KEY:-}
|
|
48
|
+
POLL_INTERVAL_SEC: "10"
|
|
49
|
+
CLAIM_TTL_SEC: "600"
|
|
50
|
+
|
|
51
|
+
compat:
|
|
52
|
+
environment:
|
|
53
|
+
PG_DSN: ${PME_V2_PG_DSN}
|
|
54
|
+
VECTOR_INDEX_URL: http://vector-index:6333
|
|
55
|
+
EXTRACTOR_SYNC_URL: http://extractor-sync:8101
|
|
56
|
+
NV_EMBED_URL: ${NV_EMBED_URL}
|
|
57
|
+
NV_EMBED_API_KEY: ${PENTATONIC_AI_GATEWAY_KEY}
|
|
58
|
+
NV_EMBED_PROVIDER: pentatonic-gateway
|
|
59
|
+
EMBED_DIM: "4096"
|
|
60
|
+
|
|
61
|
+
# Cloudflared tunnel — same pattern as v1. Optional; only start if
|
|
62
|
+
# you want v2 reachable via the public tunnel before the memory-proxy
|
|
63
|
+
# Worker migration lands.
|
|
64
|
+
cloudflared:
|
|
65
|
+
image: cloudflare/cloudflared:latest
|
|
66
|
+
container_name: pme2-cloudflared
|
|
67
|
+
restart: unless-stopped
|
|
68
|
+
# --protocol http2 matches the v1 setup: forces TCP/7844 instead of
|
|
69
|
+
# QUIC/UDP/7844, which is more robust through restrictive outbound
|
|
70
|
+
# firewalls / NATs and matches how v1's tunnel was running. Keep
|
|
71
|
+
# parity so the cutover doesn't change any tunnel-level behaviour
|
|
72
|
+
# the operator's used to.
|
|
73
|
+
command: tunnel --no-autoupdate --protocol http2 run --token ${TUNNEL_TOKEN_V2}
|
|
74
|
+
networks:
|
|
75
|
+
- engine-net
|
|
76
|
+
depends_on:
|
|
77
|
+
compat:
|
|
78
|
+
condition: service_healthy
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
name: pentatonic-memory-engine-v2
|
|
2
|
+
|
|
3
|
+
# pentatonic-memory-engine v2 — keystone-first rebuild.
|
|
4
|
+
#
|
|
5
|
+
# 3 stores, not 7:
|
|
6
|
+
# - org-model (Postgres): structured extraction; source of truth for
|
|
7
|
+
# "what the company knows". Most queries answer here without
|
|
8
|
+
# touching the vector index.
|
|
9
|
+
# - vector-index (Qdrant): single binary, mmap + filtered search by
|
|
10
|
+
# default. Holds embeddings + lightweight payloads only. Cited by
|
|
11
|
+
# org-model via provenance pointers (late materialisation).
|
|
12
|
+
# - compat shim (FastAPI): wire-format compatible with v1's
|
|
13
|
+
# /store, /store-batch, /search, /forget. Routes writes through
|
|
14
|
+
# extractor-sync (deterministic fast path) before indexing.
|
|
15
|
+
# Routes reads via the typed-router architecture.
|
|
16
|
+
#
|
|
17
|
+
# Extraction is split:
|
|
18
|
+
# - extractor-sync: deterministic per-source rules (entities,
|
|
19
|
+
# mentions, content-hash, basic typing). Inline on the write path,
|
|
20
|
+
# no LLM. Fast — does not block the consumer DO drain.
|
|
21
|
+
# - extractor-async: LLM-driven distillation (decisions, commitments,
|
|
22
|
+
# summaries). Reads from a `distillation_queue` table in
|
|
23
|
+
# org-model; writes facts back. Independent of write-path latency.
|
|
24
|
+
#
|
|
25
|
+
# Embed gateway is external — same as v1's `NV_EMBED_URL` env. In
|
|
26
|
+
# prod this points at the GH200 / Lambda Cloud gateway. Local dev
|
|
27
|
+
# can point at the same shared dev endpoint.
|
|
28
|
+
#
|
|
29
|
+
# Bring up: docker compose up -d
|
|
30
|
+
# API surface: http://localhost:${PME_V2_COMPAT_PORT:-8099}
|
|
31
|
+
# Health: curl http://localhost:${PME_V2_COMPAT_PORT:-8099}/health
|
|
32
|
+
|
|
33
|
+
x-engine-base: &engine-base
|
|
34
|
+
restart: unless-stopped
|
|
35
|
+
networks:
|
|
36
|
+
- engine-net
|
|
37
|
+
|
|
38
|
+
services:
|
|
39
|
+
# --------------------------------------------------------------------
|
|
40
|
+
# org-model — Postgres holding the canonical org model.
|
|
41
|
+
#
|
|
42
|
+
# Schema includes provenance + participant set + disclosure class
|
|
43
|
+
# columns from day one. See org-model/migrations/001_init.sql for
|
|
44
|
+
# the canonical schema definition and the rationale comments.
|
|
45
|
+
# --------------------------------------------------------------------
|
|
46
|
+
org-model:
|
|
47
|
+
<<: *engine-base
|
|
48
|
+
image: postgres:16-alpine
|
|
49
|
+
container_name: pme2-org-model
|
|
50
|
+
ports: ["127.0.0.1:${PME_V2_ORG_MODEL_PORT:-15432}:5432"]
|
|
51
|
+
environment:
|
|
52
|
+
POSTGRES_USER: ${PME_V2_PG_USER:-pme}
|
|
53
|
+
POSTGRES_PASSWORD: ${PME_V2_PG_PASSWORD:-local-dev-pw}
|
|
54
|
+
POSTGRES_DB: ${PME_V2_PG_DB:-org_model}
|
|
55
|
+
volumes:
|
|
56
|
+
- pme2-org-model-data:/var/lib/postgresql/data
|
|
57
|
+
- ./org-model/migrations:/docker-entrypoint-initdb.d:ro
|
|
58
|
+
healthcheck:
|
|
59
|
+
test: ["CMD-SHELL", "pg_isready -U ${PME_V2_PG_USER:-pme} -d ${PME_V2_PG_DB:-org_model}"]
|
|
60
|
+
interval: 5s
|
|
61
|
+
timeout: 3s
|
|
62
|
+
retries: 30
|
|
63
|
+
start_period: 20s
|
|
64
|
+
|
|
65
|
+
# --------------------------------------------------------------------
|
|
66
|
+
# vector-index — Qdrant.
|
|
67
|
+
#
|
|
68
|
+
# Single-node, mmap-by-default, payload-indexed for fast filtered
|
|
69
|
+
# search on (arena, kind, recency). Replaces the four vector
|
|
70
|
+
# materialisations in v1 (L4-vec, L4-QMD, L5, L6).
|
|
71
|
+
#
|
|
72
|
+
# Quantization (SQ8) is configured per-collection at creation time
|
|
73
|
+
# by compat/server.py on startup; not a compose-level concern.
|
|
74
|
+
# --------------------------------------------------------------------
|
|
75
|
+
vector-index:
|
|
76
|
+
<<: *engine-base
|
|
77
|
+
image: qdrant/qdrant:v1.12.4
|
|
78
|
+
container_name: pme2-vector-index
|
|
79
|
+
ports:
|
|
80
|
+
- "127.0.0.1:${PME_V2_QDRANT_HTTP_PORT:-16333}:6333"
|
|
81
|
+
- "127.0.0.1:${PME_V2_QDRANT_GRPC_PORT:-16334}:6334"
|
|
82
|
+
environment:
|
|
83
|
+
QDRANT__SERVICE__HTTP_PORT: "6333"
|
|
84
|
+
QDRANT__SERVICE__GRPC_PORT: "6334"
|
|
85
|
+
# mmap by default — disk-resident, page-cache governed.
|
|
86
|
+
# Avoids the v1 "everything pinned in RAM" problem.
|
|
87
|
+
QDRANT__STORAGE__PERFORMANCE__MAX_OPTIMIZATION_THREADS: "2"
|
|
88
|
+
volumes:
|
|
89
|
+
- pme2-vector-index-data:/qdrant/storage
|
|
90
|
+
healthcheck:
|
|
91
|
+
# qdrant image has bash but no curl/wget. Use bash's built-in
|
|
92
|
+
# /dev/tcp to confirm the port is accepting connections — enough
|
|
93
|
+
# to gate dependents on the service being up.
|
|
94
|
+
test: ["CMD", "bash", "-c", "</dev/tcp/localhost/6333"]
|
|
95
|
+
interval: 5s
|
|
96
|
+
timeout: 3s
|
|
97
|
+
retries: 30
|
|
98
|
+
start_period: 20s
|
|
99
|
+
|
|
100
|
+
# --------------------------------------------------------------------
|
|
101
|
+
# extractor-sync — deterministic fast-path extraction.
|
|
102
|
+
#
|
|
103
|
+
# Inline on the compat shim's write path. No LLM. Per-source
|
|
104
|
+
# rules (slack → chat, drive → doc, gmail → note, calendar → event,
|
|
105
|
+
# etc.) extract entities + content-hash + canonical typing.
|
|
106
|
+
# Writes provisional facts into org-model + enqueues an entry on
|
|
107
|
+
# the distillation_queue for the async worker to upgrade with LLM
|
|
108
|
+
# extraction later.
|
|
109
|
+
#
|
|
110
|
+
# See extractor-sync/server.py for the per-source rule registry.
|
|
111
|
+
# --------------------------------------------------------------------
|
|
112
|
+
extractor-sync:
|
|
113
|
+
<<: *engine-base
|
|
114
|
+
build:
|
|
115
|
+
context: ./extractor-sync
|
|
116
|
+
dockerfile: Dockerfile
|
|
117
|
+
container_name: pme2-extractor-sync
|
|
118
|
+
ports: ["127.0.0.1:${PME_V2_EXTRACTOR_SYNC_PORT:-8101}:8101"]
|
|
119
|
+
environment:
|
|
120
|
+
PG_DSN: ${PME_V2_PG_DSN:-postgresql://pme:local-dev-pw@org-model:5432/org_model}
|
|
121
|
+
depends_on:
|
|
122
|
+
org-model:
|
|
123
|
+
condition: service_healthy
|
|
124
|
+
healthcheck:
|
|
125
|
+
# python:slim base image has no wget; use urllib instead.
|
|
126
|
+
test: ["CMD", "python", "-c", "import urllib.request,sys; urllib.request.urlopen('http://localhost:8101/health',timeout=3)"]
|
|
127
|
+
interval: 10s
|
|
128
|
+
timeout: 5s
|
|
129
|
+
retries: 20
|
|
130
|
+
start_period: 15s
|
|
131
|
+
|
|
132
|
+
# --------------------------------------------------------------------
|
|
133
|
+
# extractor-async — LLM-driven distillation worker.
|
|
134
|
+
#
|
|
135
|
+
# Reads from org-model's distillation_queue table, runs LLM
|
|
136
|
+
# extraction against the provisional facts that extractor-sync
|
|
137
|
+
# laid down, and writes back the upgraded versions. Independent
|
|
138
|
+
# of the write-path — runs at its own cadence, retries on failure,
|
|
139
|
+
# never blocks the consumer drain.
|
|
140
|
+
#
|
|
141
|
+
# In prod this calls out to the same embed/LLM gateway compat does
|
|
142
|
+
# (NV_EMBED_URL + a chat-completions LLM endpoint).
|
|
143
|
+
# --------------------------------------------------------------------
|
|
144
|
+
extractor-async:
|
|
145
|
+
<<: *engine-base
|
|
146
|
+
build:
|
|
147
|
+
context: ./extractor-async
|
|
148
|
+
dockerfile: Dockerfile
|
|
149
|
+
container_name: pme2-extractor-async
|
|
150
|
+
environment:
|
|
151
|
+
PG_DSN: ${PME_V2_PG_DSN:-postgresql://pme:local-dev-pw@org-model:5432/org_model}
|
|
152
|
+
LLM_ENDPOINT: ${PME_V2_LLM_ENDPOINT:-http://localhost:8000/v1/chat/completions}
|
|
153
|
+
LLM_API_KEY: ${PENTATONIC_AI_GATEWAY_KEY:-}
|
|
154
|
+
POLL_INTERVAL_SEC: "5"
|
|
155
|
+
depends_on:
|
|
156
|
+
org-model:
|
|
157
|
+
condition: service_healthy
|
|
158
|
+
|
|
159
|
+
# --------------------------------------------------------------------
|
|
160
|
+
# compat — FastAPI shim, wire-format compatible with v1.
|
|
161
|
+
#
|
|
162
|
+
# /store, /store-batch → extractor-sync → org-model + vector-index
|
|
163
|
+
# /search → typed-router → org-model and/or vector-index
|
|
164
|
+
# /forget → org-model deletes + vector-index payload filter
|
|
165
|
+
# /health, /health/deep → reports all three stores
|
|
166
|
+
# /aggregate, /people-list → org-model (graph-as-derived-view)
|
|
167
|
+
#
|
|
168
|
+
# Same external port as v1 (8099) so a TES env-var flip is the
|
|
169
|
+
# only thing needed to point traffic at v2.
|
|
170
|
+
# --------------------------------------------------------------------
|
|
171
|
+
compat:
|
|
172
|
+
<<: *engine-base
|
|
173
|
+
build:
|
|
174
|
+
context: ./compat
|
|
175
|
+
dockerfile: Dockerfile
|
|
176
|
+
container_name: pme2-compat
|
|
177
|
+
ports: ["${PME_V2_COMPAT_PORT:-8099}:8099"]
|
|
178
|
+
environment:
|
|
179
|
+
PG_DSN: ${PME_V2_PG_DSN:-postgresql://pme:local-dev-pw@org-model:5432/org_model}
|
|
180
|
+
VECTOR_INDEX_URL: ${PME_V2_VECTOR_INDEX_URL:-http://vector-index:6333}
|
|
181
|
+
EXTRACTOR_SYNC_URL: ${PME_V2_EXTRACTOR_SYNC_URL:-http://extractor-sync:8101}
|
|
182
|
+
NV_EMBED_URL: ${NV_EMBED_URL:-http://nv-embed:8041/v1/embeddings}
|
|
183
|
+
NV_EMBED_API_KEY: ${PENTATONIC_AI_GATEWAY_KEY:-}
|
|
184
|
+
NV_EMBED_PROVIDER: ${NV_EMBED_PROVIDER:-pentatonic-gateway}
|
|
185
|
+
EMBED_DIM: ${PME_V2_EMBED_DIM:-4096}
|
|
186
|
+
depends_on:
|
|
187
|
+
org-model:
|
|
188
|
+
condition: service_healthy
|
|
189
|
+
vector-index:
|
|
190
|
+
condition: service_healthy
|
|
191
|
+
extractor-sync:
|
|
192
|
+
condition: service_healthy
|
|
193
|
+
healthcheck:
|
|
194
|
+
test: ["CMD", "python", "-c", "import urllib.request,sys; urllib.request.urlopen('http://localhost:8099/health',timeout=3)"]
|
|
195
|
+
interval: 10s
|
|
196
|
+
timeout: 5s
|
|
197
|
+
retries: 30
|
|
198
|
+
start_period: 30s
|
|
199
|
+
|
|
200
|
+
volumes:
|
|
201
|
+
pme2-org-model-data:
|
|
202
|
+
pme2-vector-index-data:
|
|
203
|
+
|
|
204
|
+
networks:
|
|
205
|
+
engine-net:
|
|
206
|
+
driver: bridge
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
FROM python:3.12-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
COPY requirements.txt .
|
|
6
|
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
7
|
+
|
|
8
|
+
COPY worker.py .
|
|
9
|
+
# Pure helper modules — sibling imports inside worker.py
|
|
10
|
+
# (noise_filter, confidence). The test_*.py files are intentionally
|
|
11
|
+
# excluded; they're for local pytest, not container runtime.
|
|
12
|
+
COPY noise_filter.py confidence.py ./
|
|
13
|
+
|
|
14
|
+
CMD ["python", "worker.py"]
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""confidence — fact confidence promotion based on multi-source corroboration.
|
|
2
|
+
|
|
3
|
+
Today every fact lands in org_model at confidence 0.5 / stage 'provisional'
|
|
4
|
+
and never moves. Live-data audit (2026-05-25): EVERY fact across 200
|
|
5
|
+
sampled rows in pentatonic-team is stuck at 0.5 — no signal of
|
|
6
|
+
"how trustworthy is this?" reaches the read side.
|
|
7
|
+
|
|
8
|
+
The right signal is **multi-source corroboration**: the same statement
|
|
9
|
+
appearing in two emails AND a calendar event is meaningfully more
|
|
10
|
+
trustworthy than a one-off mention in a Slack DM. The extractor
|
|
11
|
+
already records `provenance_event_ids` (the list of source events
|
|
12
|
+
that mention each fact), so the data needed for promotion is there
|
|
13
|
+
— we just don't use it.
|
|
14
|
+
|
|
15
|
+
Formula:
|
|
16
|
+
|
|
17
|
+
confidence = min(0.5 + 0.15 * (n_sources - 1), 0.9)
|
|
18
|
+
|
|
19
|
+
Concretely:
|
|
20
|
+
|
|
21
|
+
1 source → 0.50 (single mention, default)
|
|
22
|
+
2 sources → 0.65 (one corroboration)
|
|
23
|
+
3 sources → 0.80
|
|
24
|
+
4 sources → 0.90 (cap; "verified" remains human-only)
|
|
25
|
+
5+ → 0.90
|
|
26
|
+
|
|
27
|
+
Cap at 0.9 reserves the [0.9, 1.0] range for human-verified facts
|
|
28
|
+
(`stage = 'verified'`), which the extractor cannot produce on its
|
|
29
|
+
own. We never bump the stage from `provisional` to `distilled` or
|
|
30
|
+
`verified` from this code path — corroboration is a signal, not a
|
|
31
|
+
promotion. Stage transitions stay deliberate / explicit.
|
|
32
|
+
|
|
33
|
+
Pure module — no I/O, no deps. Importable from worker.py without
|
|
34
|
+
pulling in psycopg / httpx.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
# Bump-per-additional-source. Tuned so:
|
|
40
|
+
# 1 → 0.50 (base)
|
|
41
|
+
# 2 → 0.65
|
|
42
|
+
# 3 → 0.80
|
|
43
|
+
# 4 → 0.90 (cap reached)
|
|
44
|
+
# Picked instead of a smooth log/sqrt because the read-side bucket
|
|
45
|
+
# boundaries (UI badge colours) align cleanly with these steps.
|
|
46
|
+
_CONF_PER_SOURCE = 0.15
|
|
47
|
+
_CONF_BASE = 0.5
|
|
48
|
+
_CONF_CAP = 0.9
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def corroborated_confidence(n_sources: int) -> float:
|
|
52
|
+
"""Confidence score for a fact corroborated by `n_sources` events.
|
|
53
|
+
|
|
54
|
+
`n_sources <= 0` returns the base confidence — never negative,
|
|
55
|
+
never above the cap. Pure function for easy unit testing.
|
|
56
|
+
"""
|
|
57
|
+
if n_sources <= 1:
|
|
58
|
+
return _CONF_BASE
|
|
59
|
+
bumped = _CONF_BASE + _CONF_PER_SOURCE * (n_sources - 1)
|
|
60
|
+
if bumped > _CONF_CAP:
|
|
61
|
+
return _CONF_CAP
|
|
62
|
+
return round(bumped, 2)
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""noise_filter — drop junk entity names before they enter the graph.
|
|
2
|
+
|
|
3
|
+
The extractor-async LLM pulls "entities" from event content, but the
|
|
4
|
+
model has no way to know which proper nouns are real and which are
|
|
5
|
+
artefacts of the prompt or transcript. Live-data audit of the
|
|
6
|
+
pentatonic-team arena (2026-05-25) found the following classes of
|
|
7
|
+
junk under entity_type=person / other:
|
|
8
|
+
|
|
9
|
+
- First-person pronouns: "i", "me", "my", "myself"
|
|
10
|
+
- Second-person pronouns: "you", "yourself"
|
|
11
|
+
- Plural first-person: "we", "us", "ourselves"
|
|
12
|
+
- Generic referents: "agent", "the user", "the speaker",
|
|
13
|
+
"the guy who wrote it", "both of you"
|
|
14
|
+
- Tailscale hostnames the agent emits in scratchpad text:
|
|
15
|
+
"pin-dev-office-01.tail4aaf0c.ts.net"
|
|
16
|
+
- Worktree/agent labels the orchestrator leaks into transcripts:
|
|
17
|
+
"Agent A — L2 provenance taint", "Agent B"
|
|
18
|
+
- Filesystem paths: "/Users/admin/cursor/Pip",
|
|
19
|
+
"src/memory-engine/compat/server.py"
|
|
20
|
+
- Single-character or empty strings: "i", "a", ""
|
|
21
|
+
|
|
22
|
+
These are precise patterns, not heuristics. The intent is "if a
|
|
23
|
+
human looked at this row they'd say 'that's not a thing' immediately",
|
|
24
|
+
not "low-confidence extraction" — those keep their place in the graph
|
|
25
|
+
with the existing confidence scoring. The bar for adding a new
|
|
26
|
+
pattern: it appears repeatedly in real arena data AND there is no
|
|
27
|
+
realistic case where the same string could be a legitimate entity.
|
|
28
|
+
|
|
29
|
+
Pure module — no I/O, no deps. Importable from worker.py without
|
|
30
|
+
pulling in psycopg / httpx, and importable from tests without any
|
|
31
|
+
fixtures.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import re
|
|
37
|
+
|
|
38
|
+
# --------------------------------------------------------------------
|
|
39
|
+
# Pattern banks
|
|
40
|
+
# --------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
# Lowercased exact-match drops. Order doesn't matter; lookup is O(1).
|
|
43
|
+
_NOISE_EXACT: frozenset[str] = frozenset(
|
|
44
|
+
{
|
|
45
|
+
# First-person
|
|
46
|
+
"i", "me", "my", "mine", "myself",
|
|
47
|
+
# Second-person
|
|
48
|
+
"you", "your", "yours", "yourself",
|
|
49
|
+
# Plural first-person
|
|
50
|
+
"we", "us", "our", "ours", "ourselves",
|
|
51
|
+
# Plural second-person
|
|
52
|
+
"y'all", "yall",
|
|
53
|
+
# Generic referents the LLM falls back to
|
|
54
|
+
"agent", "the agent", "user", "the user",
|
|
55
|
+
"speaker", "the speaker", "author", "the author",
|
|
56
|
+
"person", "the person", "someone", "anyone",
|
|
57
|
+
"everyone", "nobody", "no one", "everybody",
|
|
58
|
+
# Spurious meta the prompt sometimes echoes
|
|
59
|
+
"transcript", "meeting", "the meeting", "this",
|
|
60
|
+
"that", "the conversation", "conversation",
|
|
61
|
+
}
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Substring/regex drops. Slower (linear scan), so kept small. Each
|
|
65
|
+
# pattern is anchored to be precise.
|
|
66
|
+
_NOISE_REGEX: tuple[re.Pattern[str], ...] = (
|
|
67
|
+
# Tailscale hostnames (always have .ts.net suffix). Real
|
|
68
|
+
# companies don't have ".ts.net" in their canonical name.
|
|
69
|
+
re.compile(r"\.ts\.net$", re.IGNORECASE),
|
|
70
|
+
# Filesystem paths — absolute or repo-rooted. A real entity name
|
|
71
|
+
# never contains "/" (commercial slashes like "and/or" don't
|
|
72
|
+
# land as entity names) or starts with "src/".
|
|
73
|
+
re.compile(r"^/[A-Za-z]"),
|
|
74
|
+
re.compile(r"^[A-Za-z]+/[A-Za-z]"),
|
|
75
|
+
# Worktree labels emitted by parallel-agent orchestrators. The
|
|
76
|
+
# em-dash + descriptive suffix is the dead giveaway.
|
|
77
|
+
re.compile(r"^Agent [A-Z]\s*[—–-]", re.IGNORECASE),
|
|
78
|
+
# Long descriptive sentence-shapes ("the guy who wrote it",
|
|
79
|
+
# "both of you", "the team that handles this"). Heuristic:
|
|
80
|
+
# entity names with >3 spaces AND lowercase first letter aren't
|
|
81
|
+
# canonical proper nouns.
|
|
82
|
+
re.compile(r"^[a-z][a-z\s']*\s\S+\s\S+\s\S+"),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# --------------------------------------------------------------------
|
|
87
|
+
# Public API
|
|
88
|
+
# --------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def is_noise_entity_name(entity_type: str, name: str) -> bool:
|
|
92
|
+
"""Return True when `name` is junk that should never enter the
|
|
93
|
+
entities table. The decision is independent of `entity_type` for
|
|
94
|
+
most patterns, but the param is kept so future per-type rules
|
|
95
|
+
have a place (e.g. "any 'person' shorter than 2 chars" without
|
|
96
|
+
rejecting "AI" as a concept).
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
entity_type: lowercased entity type ("person", "company",
|
|
100
|
+
"concept", "topic", "place", "date", "other").
|
|
101
|
+
name: the canonical name the LLM extracted, pre-cleaning.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
True if the name should be dropped, False if it should be
|
|
105
|
+
upserted. Empty / whitespace-only names always drop.
|
|
106
|
+
"""
|
|
107
|
+
if not isinstance(name, str):
|
|
108
|
+
return True
|
|
109
|
+
cleaned = name.strip()
|
|
110
|
+
if not cleaned:
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
# Single-character drops are always junk for any entity type.
|
|
114
|
+
# "AI" and "ML" (legitimate 2-char concepts) are kept by the
|
|
115
|
+
# >=2 length floor below.
|
|
116
|
+
if len(cleaned) < 2:
|
|
117
|
+
return True
|
|
118
|
+
|
|
119
|
+
lowered = cleaned.lower()
|
|
120
|
+
|
|
121
|
+
# Exact-match drops (pronouns, generic referents).
|
|
122
|
+
if lowered in _NOISE_EXACT:
|
|
123
|
+
return True
|
|
124
|
+
|
|
125
|
+
# Per-type fine-grained rules. Person entities have a higher bar
|
|
126
|
+
# because every junk person creates a fake node in the relationship
|
|
127
|
+
# graph; companies/concepts get a lighter touch since proper-noun
|
|
128
|
+
# uniqueness is more naturally enforced.
|
|
129
|
+
if entity_type == "person":
|
|
130
|
+
# Person names without any uppercase letter are almost always
|
|
131
|
+
# junk ("the user", "the guy", "someone"). Real names — even
|
|
132
|
+
# all-lowercase handles like "philip" — are extremely rare;
|
|
133
|
+
# the LLM almost always title-cases when it has a real signal.
|
|
134
|
+
# Allow if the lower string contains '@' (an email) since
|
|
135
|
+
# that's a legitimate signal we don't want to lose.
|
|
136
|
+
if cleaned == lowered and "@" not in cleaned:
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
# Regex drops (Tailscale, paths, agent-worktree labels).
|
|
140
|
+
for pattern in _NOISE_REGEX:
|
|
141
|
+
if pattern.search(cleaned):
|
|
142
|
+
return True
|
|
143
|
+
|
|
144
|
+
return False
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Tests for confidence — fact confidence promotion formula.
|
|
2
|
+
|
|
3
|
+
Run: pytest packages/memory-engine-v2/extractor-async/test_confidence.py
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
from confidence import corroborated_confidence
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestCorroboratedConfidence:
|
|
14
|
+
"""The promotion ladder: 1 → 0.5, 2 → 0.65, 3 → 0.8, 4+ → 0.9."""
|
|
15
|
+
|
|
16
|
+
def test_single_source_base(self):
|
|
17
|
+
assert corroborated_confidence(1) == 0.5
|
|
18
|
+
|
|
19
|
+
def test_two_sources_first_bump(self):
|
|
20
|
+
assert corroborated_confidence(2) == 0.65
|
|
21
|
+
|
|
22
|
+
def test_three_sources_second_bump(self):
|
|
23
|
+
assert corroborated_confidence(3) == 0.80
|
|
24
|
+
|
|
25
|
+
def test_four_sources_at_cap(self):
|
|
26
|
+
assert corroborated_confidence(4) == 0.90
|
|
27
|
+
|
|
28
|
+
def test_five_sources_clamped_at_cap(self):
|
|
29
|
+
# The cap reserves [0.9, 1.0] for human-verified facts. No
|
|
30
|
+
# amount of corroboration alone can push a fact above 0.9.
|
|
31
|
+
assert corroborated_confidence(5) == 0.90
|
|
32
|
+
assert corroborated_confidence(50) == 0.90
|
|
33
|
+
assert corroborated_confidence(1_000) == 0.90
|
|
34
|
+
|
|
35
|
+
def test_zero_or_negative_falls_back_to_base(self):
|
|
36
|
+
# Defensive: should never happen in practice (provenance is
|
|
37
|
+
# always >= 1 by the time this is called), but the helper
|
|
38
|
+
# tolerates pathological inputs without spitting NaN.
|
|
39
|
+
assert corroborated_confidence(0) == 0.5
|
|
40
|
+
assert corroborated_confidence(-3) == 0.5
|
|
41
|
+
|
|
42
|
+
def test_monotonic_non_decreasing(self):
|
|
43
|
+
# The function must be monotonically non-decreasing — more
|
|
44
|
+
# corroboration is never less confident.
|
|
45
|
+
prev = -1.0
|
|
46
|
+
for n in range(1, 20):
|
|
47
|
+
cur = corroborated_confidence(n)
|
|
48
|
+
assert cur >= prev, (n, cur, prev)
|
|
49
|
+
prev = cur
|
|
50
|
+
|
|
51
|
+
def test_stays_within_valid_range(self):
|
|
52
|
+
# CHECK (confidence BETWEEN 0.0 AND 1.0) on the org_model
|
|
53
|
+
# facts table — the helper must never produce a value the
|
|
54
|
+
# database would reject.
|
|
55
|
+
for n in range(-5, 1000, 13):
|
|
56
|
+
c = corroborated_confidence(n)
|
|
57
|
+
assert 0.0 <= c <= 1.0, (n, c)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class TestNoStageTransition:
|
|
61
|
+
"""The helper only produces a confidence number. Stage transitions
|
|
62
|
+
(provisional → distilled → verified) are explicitly NOT in scope —
|
|
63
|
+
they require deliberate signal that this code can't see.
|
|
64
|
+
|
|
65
|
+
This test exists as a documentation pin: if a future change tries
|
|
66
|
+
to fold stage into the helper, the contract should be revisited
|
|
67
|
+
intentionally.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def test_returns_only_a_float(self):
|
|
71
|
+
result = corroborated_confidence(3)
|
|
72
|
+
assert isinstance(result, float)
|
|
73
|
+
# Not a tuple, dict, or any structured value that might encode
|
|
74
|
+
# stage alongside confidence.
|
|
75
|
+
with pytest.raises((TypeError, IndexError)):
|
|
76
|
+
_ = result[0] # type: ignore[index]
|