@pentatonic-ai/ai-agent-sdk 0.9.6 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +3 -3
  2. package/bin/cli.js +1 -1
  3. package/bin/commands/config.js +1 -1
  4. package/dist/index.cjs +1 -1
  5. package/dist/index.js +1 -1
  6. package/package.json +2 -2
  7. package/packages/doctor/src/checks/local-memory.js +2 -2
  8. package/packages/memory/README.md +2 -2
  9. package/packages/memory/openclaw-plugin/README.md +2 -2
  10. package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
  11. package/packages/memory/src/server.js +2 -2
  12. package/packages/memory-engine-v2/.env.example +30 -0
  13. package/packages/memory-engine-v2/README.md +125 -0
  14. package/packages/memory-engine-v2/compat/Dockerfile +11 -0
  15. package/packages/memory-engine-v2/compat/requirements.txt +6 -0
  16. package/packages/memory-engine-v2/compat/server.py +1047 -0
  17. package/packages/memory-engine-v2/docker-compose.aws.yml +78 -0
  18. package/packages/memory-engine-v2/docker-compose.yml +206 -0
  19. package/packages/memory-engine-v2/extractor-async/Dockerfile +14 -0
  20. package/packages/memory-engine-v2/extractor-async/confidence.py +62 -0
  21. package/packages/memory-engine-v2/extractor-async/noise_filter.py +144 -0
  22. package/packages/memory-engine-v2/extractor-async/requirements.txt +2 -0
  23. package/packages/memory-engine-v2/extractor-async/test_confidence.py +76 -0
  24. package/packages/memory-engine-v2/extractor-async/test_noise_filter.py +177 -0
  25. package/packages/memory-engine-v2/extractor-async/worker.py +827 -0
  26. package/packages/memory-engine-v2/extractor-sync/Dockerfile +11 -0
  27. package/packages/memory-engine-v2/extractor-sync/requirements.txt +4 -0
  28. package/packages/memory-engine-v2/extractor-sync/server.py +424 -0
  29. package/packages/memory-engine-v2/org-model/migrations/001_init.sql +390 -0
  30. package/packages/memory-engine-v2/tests/e2e_smoke.py +356 -0
  31. package/packages/memory-engine-v2/tests/fixtures/generate_synthetic_corpus.py +758 -0
  32. package/packages/memory-engine/.env.example +0 -13
  33. package/packages/memory-engine/MIGRATION.md +0 -219
  34. package/packages/memory-engine/README.md +0 -145
  35. package/packages/memory-engine/bench/README.md +0 -99
  36. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +0 -1115
  37. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +0 -819
  38. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +0 -1278
  39. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +0 -1018
  40. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +0 -1038
  41. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +0 -961
  42. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +0 -1115
  43. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +0 -819
  44. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +0 -1278
  45. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +0 -1018
  46. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +0 -1038
  47. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +0 -937
  48. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +0 -1115
  49. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +0 -819
  50. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +0 -1278
  51. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +0 -1018
  52. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +0 -1038
  53. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +0 -961
  54. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +0 -1115
  55. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +0 -819
  56. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +0 -1278
  57. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +0 -1018
  58. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +0 -1038
  59. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +0 -883
  60. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +0 -1115
  61. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +0 -819
  62. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +0 -1278
  63. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +0 -1018
  64. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +0 -1038
  65. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +0 -937
  66. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +0 -1115
  67. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +0 -1115
  68. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +0 -819
  69. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +0 -542
  70. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +0 -1278
  71. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +0 -894
  72. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +0 -1018
  73. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +0 -680
  74. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +0 -1038
  75. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +0 -693
  76. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +0 -961
  77. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +0 -727
  78. package/packages/memory-engine/compat/Dockerfile +0 -22
  79. package/packages/memory-engine/compat/server.py +0 -1255
  80. package/packages/memory-engine/docker-compose.test.yml +0 -59
  81. package/packages/memory-engine/docker-compose.yml +0 -255
  82. package/packages/memory-engine/engine/README.md +0 -52
  83. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +0 -1543
  84. package/packages/memory-engine/engine/l5-comms-layer.py +0 -663
  85. package/packages/memory-engine/engine/l6-document-store.py +0 -1018
  86. package/packages/memory-engine/engine/services/_shared/__init__.py +0 -1
  87. package/packages/memory-engine/engine/services/_shared/embed_provider.py +0 -562
  88. package/packages/memory-engine/engine/services/l2/Dockerfile +0 -50
  89. package/packages/memory-engine/engine/services/l2/init_databases.py +0 -81
  90. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +0 -2721
  91. package/packages/memory-engine/engine/services/l5/Dockerfile +0 -11
  92. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +0 -808
  93. package/packages/memory-engine/engine/services/l6/Dockerfile +0 -30
  94. package/packages/memory-engine/engine/services/l6/l6-document-store.py +0 -1221
  95. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +0 -28
  96. package/packages/memory-engine/engine/services/nv-embed/server.py +0 -152
  97. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  98. package/packages/memory-engine/pme_memory/__main__.py +0 -129
  99. package/packages/memory-engine/pme_memory/artifacts.py +0 -95
  100. package/packages/memory-engine/pme_memory/embed.py +0 -74
  101. package/packages/memory-engine/pme_memory/health.py +0 -36
  102. package/packages/memory-engine/pme_memory/hygiene.py +0 -159
  103. package/packages/memory-engine/pme_memory/indexer.py +0 -200
  104. package/packages/memory-engine/pme_memory/needs.py +0 -55
  105. package/packages/memory-engine/pme_memory/provenance.py +0 -80
  106. package/packages/memory-engine/pme_memory/scoring.py +0 -168
  107. package/packages/memory-engine/pme_memory/search.py +0 -52
  108. package/packages/memory-engine/pme_memory/store.py +0 -86
  109. package/packages/memory-engine/pme_memory/synthesis.py +0 -114
  110. package/packages/memory-engine/pyproject.toml +0 -65
  111. package/packages/memory-engine/scripts/kg-extractor.py +0 -557
  112. package/packages/memory-engine/scripts/kg-preflexor-v2.py +0 -738
  113. package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py +0 -128
  114. package/packages/memory-engine/tests/e2e_arena.sh +0 -259
  115. package/packages/memory-engine/tests/embed_stub/Dockerfile +0 -13
  116. package/packages/memory-engine/tests/embed_stub/server.py +0 -80
  117. package/packages/memory-engine/tests/test_aggregate.py +0 -333
  118. package/packages/memory-engine/tests/test_api_contract.sh +0 -57
  119. package/packages/memory-engine/tests/test_arena_safety.py +0 -232
  120. package/packages/memory-engine/tests/test_channel_stat_reader.py +0 -437
  121. package/packages/memory-engine/tests/test_channel_stat_rollups.py +0 -308
  122. package/packages/memory-engine/tests/test_compat_nv_embed_probe.py +0 -48
  123. package/packages/memory-engine/tests/test_embed_provider.py +0 -693
  124. package/packages/memory-engine/tests/test_l2_qmd_vec_search.py +0 -280
  125. package/packages/memory-engine/tests/test_l3_arena_isolation.py +0 -412
  126. package/packages/memory-engine/tests/test_l6_module_load.py +0 -84
  127. package/packages/memory-engine/tests/test_people_list_reader.py +0 -432
@@ -0,0 +1,1047 @@
1
+ """pentatonic-memory-engine v2 compat shim.
2
+
3
+ Wire-format compatible with v1: same /store, /store-batch, /search,
4
+ /forget, /health, /health/deep wire shape. TES can be flipped from v1
5
+ to v2 by changing a single env var (MEMORY_ENGINE_URL) — no caller-side
6
+ changes.
7
+
8
+ Internally the architecture is entirely different from v1:
9
+
10
+ - /store: embed → extractor-sync (org-model writes + distillation
11
+ queue) → vector-index (Qdrant upsert with provenance).
12
+ - /search: typed router. Org-model query for facts/entities;
13
+ vector-index search filtered by arena + kind for evidence.
14
+ Fused at the response layer.
15
+ - /forget: events DELETE (cascade trigger removes provenance from
16
+ facts/entities/relationships; orphaned facts get deleted in same
17
+ txn). Then Qdrant payload-filtered delete to drop vectors.
18
+ - /health: cheap liveness. /health/deep: round-trips all three
19
+ stores + the embed gateway.
20
+
21
+ What's not in this v1: typed query routing logic. For tonight, /search
22
+ just hits vector-index with arena filter and returns. The full
23
+ typed-router implementation (intent classify → route by kind → fuse
24
+ selected layers) lands once the keystone spec defines the intent
25
+ taxonomy.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import asyncio
31
+ import hashlib
32
+ import logging
33
+ import os
34
+ import re
35
+ import time
36
+ import uuid
37
+ from contextlib import asynccontextmanager
38
+ from datetime import datetime
39
+ from typing import Any
40
+
41
+ import httpx
42
+ import numpy as np
43
+ import psycopg
44
+ import psycopg.rows
45
+ from fastapi import FastAPI, HTTPException
46
+ from psycopg_pool import AsyncConnectionPool
47
+ from pydantic import BaseModel, Field
48
+ from qdrant_client import AsyncQdrantClient, models as qmodels
49
+
50
+ logging.basicConfig(level=logging.INFO)
51
+ log = logging.getLogger("compat")
52
+
53
+ PG_DSN = os.environ.get("PG_DSN", "postgresql://pme:local-dev-pw@org-model:5432/org_model")
54
+ VECTOR_INDEX_URL = os.environ.get("VECTOR_INDEX_URL", "http://vector-index:6333")
55
+ EXTRACTOR_SYNC_URL = os.environ.get("EXTRACTOR_SYNC_URL", "http://extractor-sync:8101")
56
+ NV_EMBED_URL = os.environ.get("NV_EMBED_URL", "http://nv-embed:8041/v1/embeddings")
57
+ NV_EMBED_API_KEY = os.environ.get("NV_EMBED_API_KEY", "")
58
+ NV_EMBED_PROVIDER = os.environ.get("NV_EMBED_PROVIDER", "openai") # 'openai' | 'pentatonic-gateway'
59
+ EMBED_DIM = int(os.environ.get("EMBED_DIM", "4096"))
60
+
61
+ COLLECTION_NAME = "evidence"
62
+
63
+ # /search ranking-side knobs (issue #343).
64
+ #
65
+ # OVERFETCH_MULT: pull this many × the caller's limit from Qdrant
66
+ # before dedup + quota trim. 3 covers the common case where Pip's
67
+ # chunker stores ~3 overlapping chunks per source event; 100 caps
68
+ # the absolute fetch so a caller with limit=100 doesn't ask for 300.
69
+ #
70
+ # SOURCE_TYPE_QUOTA: max fraction of the returned slots any single
71
+ # `source_kind` can hold. 0.6 with limit=10 = max 6 of the same kind,
72
+ # so when slack one-liners flood top-k a canonical event/doc record
73
+ # can still land. Disable by setting to 1.0.
74
+ SEARCH_OVERFETCH_MULT = int(os.environ.get("SEARCH_OVERFETCH_MULT", "3"))
75
+ SEARCH_OVERFETCH_MAX = int(os.environ.get("SEARCH_OVERFETCH_MAX", "100"))
76
+ SEARCH_SOURCE_TYPE_QUOTA = float(os.environ.get("SEARCH_SOURCE_TYPE_QUOTA", "0.6"))
77
+
78
+ # Phase 3 (#343): MMR diversification over the deduped candidate pool.
79
+ # Greedy: each pick maximises λ·sim(query,c) − (1−λ)·max sim(c,picked).
80
+ # λ=0.7 leans relevant-first with mild diversity — high enough that
81
+ # top results still match the query, low enough that we don't repeat
82
+ # semantically near-identical chunks from different events.
83
+ SEARCH_MMR_ENABLED = os.environ.get("SEARCH_MMR_ENABLED", "1") not in ("0", "false", "")
84
+ SEARCH_MMR_LAMBDA = float(os.environ.get("SEARCH_MMR_LAMBDA", "0.7"))
85
+
86
+ # Phase 4 (#343): intent-aware source_kind boosts. The patterns are
87
+ # narrow on purpose — broad matchers would over-boost generic queries
88
+ # and bury good vector hits. Boost magnitudes are added to the cosine
89
+ # similarity score (typical 0.7–0.85 range), so +0.06 flips a near-tie
90
+ # in favour of the structurally-better record without surfacing
91
+ # unrelated content. Lift to 0 (env) to disable.
92
+ SEARCH_INTENT_BOOST = os.environ.get("SEARCH_INTENT_BOOST", "1") not in ("0", "false", "")
93
+
94
+ # Issue #350: for temporal intent (the "last meeting" class of queries),
95
+ # rank the candidate pool by `attributes.timestamp` desc instead of
96
+ # similarity. The intent boost (#343 Phase 4) already lifts source_kind=event
97
+ # into top-k, but among event records pure cosine still picks
98
+ # semantically-best, not chronologically-latest — that's the
99
+ # confidently-wrong "stale 2025-12 meeting beats the actual 2026-05
100
+ # meeting" failure mode #350 documents. Temporal re-rank trumps MMR for
101
+ # this intent class (recency IS the diversification axis); records
102
+ # without a parseable timestamp sink to the bottom but aren't dropped.
103
+ SEARCH_TEMPORAL_RERANK = os.environ.get("SEARCH_TEMPORAL_RERANK", "1") not in ("0", "false", "")
104
+
105
+ TEMPORAL_INTENT_RE = re.compile(
106
+ r"\b(when did|when was|last (?:time|met|saw|spoke|called)|"
107
+ r"how long ago|first time (?:i|we) (?:met|saw|spoke)|recent(?:ly)?|"
108
+ r"most recent|latest|"
109
+ r"timeline of|history with)\b",
110
+ re.IGNORECASE,
111
+ )
112
+ FACTUAL_INTENT_RE = re.compile(
113
+ r"\b(summary of|summarise|summarize|list of|tell me about|"
114
+ r"overview of|what (?:do|did) (?:i|we) (?:know|do))\b",
115
+ re.IGNORECASE,
116
+ )
117
+ INTENT_BOOSTS: dict[str, dict[str, float]] = {
118
+ # source_kind -> additive boost on cosine score
119
+ "temporal": {"event": 0.08, "doc": 0.04, "note": 0.02},
120
+ "factual": {"doc": 0.06, "note": 0.03, "event": 0.03},
121
+ }
122
+
123
+
124
+ def _classify_intent(query: str) -> str | None:
125
+ """Return 'temporal' | 'factual' | None. Heuristic; no LLM call.
126
+ First-match wins — temporal trumps factual when both fire (since
127
+ "summary of recent meetings" is more temporal-shaped)."""
128
+ if TEMPORAL_INTENT_RE.search(query):
129
+ return "temporal"
130
+ if FACTUAL_INTENT_RE.search(query):
131
+ return "factual"
132
+ return None
133
+
134
+
135
+ def _apply_intent_boost(results: list[Any], intent: str | None) -> list[Any]:
136
+ """Add the intent-specific bump to each result.score in place and
137
+ return a freshly sorted list (highest first). No-op when intent is
138
+ None or has no boost table entry."""
139
+ if not intent or intent not in INTENT_BOOSTS:
140
+ return results
141
+ boosts = INTENT_BOOSTS[intent]
142
+ for r in results:
143
+ kind = r.payload.get("source_kind") or ""
144
+ bump = boosts.get(kind, 0.0)
145
+ if bump:
146
+ r.score = r.score + bump
147
+ return sorted(results, key=lambda r: r.score, reverse=True)
148
+
149
+
150
+ def _parse_ts(value: Any) -> float | None:
151
+ """Best-effort ISO-8601 → unix timestamp. Returns None on anything
152
+ we can't parse. Accepts both 'Z' suffix and explicit offsets."""
153
+ if not isinstance(value, str) or not value:
154
+ return None
155
+ try:
156
+ # `fromisoformat` handles `+00:00` but not the bare `Z` suffix
157
+ # until Python 3.11; normalise to be safe across runtime
158
+ # versions on the engine box.
159
+ return datetime.fromisoformat(value.replace("Z", "+00:00")).timestamp()
160
+ except Exception:
161
+ return None
162
+
163
+
164
+ def _apply_temporal_sort(
165
+ results: list[Any], attrs_by_event_id: dict[str, dict[str, Any]]
166
+ ) -> list[Any]:
167
+ """Sort by `attributes.timestamp` desc for temporal-intent queries.
168
+
169
+ Postgres-attributes timestamp wins over the Qdrant payload version
170
+ (postgres is authoritative and retroactively populated by #345).
171
+ Records with no parseable timestamp sink to the bottom but preserve
172
+ their relative similarity order (Python's sort is stable, so the
173
+ in-bucket order coming in from `_apply_intent_boost` is preserved).
174
+ """
175
+ def neg_ts(r):
176
+ eid = r.payload.get("event_id")
177
+ attrs = attrs_by_event_id.get(eid) or {}
178
+ # attrs.timestamp first (authoritative), Qdrant payload fallback
179
+ # for any record where the postgres row is missing.
180
+ ts = _parse_ts(attrs.get("timestamp")) or _parse_ts(r.payload.get("timestamp"))
181
+ # Records without a timestamp get +inf so they sort last.
182
+ return -ts if ts is not None else float("inf")
183
+
184
+ return sorted(results, key=neg_ts)
185
+
186
+
187
+ def _mmr_select(
188
+ candidates: list[Any], target: int, lambda_: float
189
+ ) -> list[Any]:
190
+ """Greedy MMR. Candidates must carry `.vector` (Qdrant returns
191
+ these when search() is called with with_vectors=True). Vectors are
192
+ expected to be L2-normalised (Qdrant does this for COSINE
193
+ collections), so cosine = dot product.
194
+
195
+ Falls back to score-order if any candidate is missing a vector
196
+ (the collection might have been created without vector storage).
197
+ Time complexity O(target × |candidates| × dim) — fine for our
198
+ candidate pool (≤100) and 4096-dim embeddings."""
199
+ if not candidates or target <= 0:
200
+ return []
201
+ # Bail to pure-relevance ordering if vectors weren't returned.
202
+ if any(getattr(c, "vector", None) is None for c in candidates):
203
+ return sorted(candidates, key=lambda r: r.score, reverse=True)[:target]
204
+
205
+ vecs = np.asarray([c.vector for c in candidates], dtype=np.float32)
206
+ scores = np.asarray([c.score for c in candidates], dtype=np.float32)
207
+ # Precompute pairwise similarity matrix; cheaper than per-step
208
+ # dot products at our scale and lets us slice into it by index.
209
+ sim_matrix = vecs @ vecs.T # (N, N), values in [-1, 1]
210
+
211
+ n = len(candidates)
212
+ target = min(target, n)
213
+ selected_idx: list[int] = []
214
+ remaining = set(range(n))
215
+
216
+ while len(selected_idx) < target and remaining:
217
+ best_i = -1
218
+ best_score = -1e9
219
+ for i in remaining:
220
+ rel = scores[i]
221
+ if not selected_idx:
222
+ penalty = 0.0
223
+ else:
224
+ penalty = float(np.max(sim_matrix[i, selected_idx]))
225
+ mmr_score = lambda_ * float(rel) - (1.0 - lambda_) * penalty
226
+ if mmr_score > best_score:
227
+ best_score = mmr_score
228
+ best_i = i
229
+ if best_i < 0:
230
+ break
231
+ selected_idx.append(best_i)
232
+ remaining.remove(best_i)
233
+ return [candidates[i] for i in selected_idx]
234
+
235
+ # Connection pool for org-model writes/reads. Tuned for the same call
236
+ # rate as v1's compat — bump if the consumer's drain rate justifies.
237
+ _pool: AsyncConnectionPool | None = None
238
+ _qdrant: AsyncQdrantClient | None = None
239
+ _http: httpx.AsyncClient | None = None
240
+
241
+
242
+ @asynccontextmanager
243
+ async def lifespan(app: FastAPI):
244
+ global _pool, _qdrant, _http
245
+ _pool = AsyncConnectionPool(
246
+ conninfo=PG_DSN,
247
+ min_size=2,
248
+ max_size=20,
249
+ kwargs={"row_factory": psycopg.rows.dict_row},
250
+ open=False,
251
+ )
252
+ await _pool.open()
253
+ log.info("compat: pool opened")
254
+
255
+ _qdrant = AsyncQdrantClient(url=VECTOR_INDEX_URL, prefer_grpc=False)
256
+ # Idempotent collection creation. Qdrant rejects re-creation, so we
257
+ # check first. Schema: 4096-d vectors (NV-Embed-v2), cosine
258
+ # distance, mmap by default, scalar quantization for RAM efficiency.
259
+ try:
260
+ collections = await _qdrant.get_collections()
261
+ names = {c.name for c in collections.collections}
262
+ if COLLECTION_NAME not in names:
263
+ await _qdrant.create_collection(
264
+ collection_name=COLLECTION_NAME,
265
+ vectors_config=qmodels.VectorParams(
266
+ size=EMBED_DIM,
267
+ distance=qmodels.Distance.COSINE,
268
+ on_disk=True,
269
+ ),
270
+ # Scalar quantization (int8) — 4× RAM reduction on the
271
+ # quantile-cached portion. Page-cache governs hot set.
272
+ quantization_config=qmodels.ScalarQuantization(
273
+ scalar=qmodels.ScalarQuantizationConfig(
274
+ type=qmodels.ScalarType.INT8,
275
+ always_ram=False,
276
+ )
277
+ ),
278
+ )
279
+ log.info(f"created qdrant collection: {COLLECTION_NAME} dim={EMBED_DIM}")
280
+ # Payload indexes for fast filtered search (this is the
281
+ # whole point of choosing Qdrant — first-class filter perf).
282
+ for field in ("arena", "source_kind", "clientId", "userId"):
283
+ await _qdrant.create_payload_index(
284
+ collection_name=COLLECTION_NAME,
285
+ field_name=field,
286
+ field_schema=qmodels.PayloadSchemaType.KEYWORD,
287
+ )
288
+ log.info("created qdrant payload indexes: arena, source_kind, clientId, userId")
289
+ except Exception as e:
290
+ log.error(f"qdrant init error: {e}")
291
+ # Don't crash compat on Qdrant init failure — let liveness
292
+ # probe report it and operators investigate. The compat shim
293
+ # should be more available than the underlying store.
294
+
295
+ _http = httpx.AsyncClient(timeout=httpx.Timeout(60.0))
296
+
297
+ yield
298
+ await _pool.close()
299
+ await _qdrant.close()
300
+ await _http.aclose()
301
+
302
+
303
+ app = FastAPI(title="pme2-compat", lifespan=lifespan)
304
+
305
+
306
+ # ----------------------------------------------------------------------
307
+ # Wire models — match v1's compat shim shape for drop-in compatibility.
308
+ # ----------------------------------------------------------------------
309
+
310
+
311
+ class StoreRequest(BaseModel):
312
+ content: str
313
+ metadata: dict[str, Any] | None = None
314
+
315
+
316
+ class StoreBatchRequest(BaseModel):
317
+ records: list[dict[str, Any]] = Field(default_factory=list)
318
+ arena: str | None = "general"
319
+ # v1's optional pre-computed embeddings — passed through but we
320
+ # re-embed regardless. The shared-embed optimisation lives at the
321
+ # SDK level now (PR #58 retry-with-jitter); compat trusts the
322
+ # gateway will return.
323
+ embeddings: list[list[float]] | None = None
324
+
325
+
326
+ class SearchRequest(BaseModel):
327
+ query: str
328
+ limit: int | None = 10
329
+ min_score: float | None = 0.001
330
+ arena: str | None = None
331
+ arenas: list[str] | None = None
332
+ metadata_filter: dict[str, Any] | None = None
333
+
334
+
335
+ class ForgetRequest(BaseModel):
336
+ metadata_contains: dict[str, Any] | None = None
337
+ id: str | None = None
338
+
339
+
340
+ # ----------------------------------------------------------------------
341
+ # Helpers
342
+ # ----------------------------------------------------------------------
343
+
344
+
345
+ def _content_hash(arena: str, content: str) -> str:
346
+ """sha256(arena:content)[:32] — caller-predicted IDs match this."""
347
+ return hashlib.sha256(f"{arena}:{content}".encode()).hexdigest()[:32]
348
+
349
+
350
+ # Embed gateway is a single-GPU NV-Embed-v2 instance — concurrent
351
+ # bursts above its in-flight ceiling return 502s. Semaphore caps how
352
+ # many /v1/embed calls we make at once so the gateway never sees more
353
+ # than it can serve. With TES at shardCount=8 and BATCH_SIZE=50 we get
354
+ # up to 8 concurrent /store-batch calls hitting this path; 4 keeps the
355
+ # gateway healthy and queues the rest in compat instead of pushing
356
+ # the failure back through the DO retry loop (which causes DLQ on
357
+ # repeated 502s — observed 2026-05-17). Pair with retry below.
358
+ _EMBED_SEMAPHORE = asyncio.Semaphore(4)
359
+ _EMBED_RETRY_STATUSES = {502, 503, 504, 429}
360
+ _EMBED_MAX_ATTEMPTS = 5
361
+
362
+
363
+ async def _embed_batch(texts: list[str]) -> list[list[float]]:
364
+ """Call the external embed gateway. Both 'openai' and
365
+ 'pentatonic-gateway' provider shapes supported."""
366
+ if not texts:
367
+ return []
368
+ headers = {"Content-Type": "application/json"}
369
+ if NV_EMBED_API_KEY:
370
+ if NV_EMBED_PROVIDER == "pentatonic-gateway":
371
+ headers["X-API-Key"] = NV_EMBED_API_KEY
372
+ else:
373
+ headers["Authorization"] = f"Bearer {NV_EMBED_API_KEY}"
374
+
375
+ body = {"input": texts, "model": "nv-embed-v2"}
376
+
377
+ async with _EMBED_SEMAPHORE:
378
+ # Retry transient gateway failures (502/503/504/429) with
379
+ # exponential backoff before bubbling up to the caller. Without
380
+ # this a single GPU hiccup propagates a 500 to the TES DO,
381
+ # which then DLQs after MAX_ATTEMPTS attempts.
382
+ last_exc: Exception | None = None
383
+ for attempt in range(_EMBED_MAX_ATTEMPTS):
384
+ try:
385
+ r = await _http.post(NV_EMBED_URL, json=body, headers=headers)
386
+ if r.status_code in _EMBED_RETRY_STATUSES:
387
+ last_exc = httpx.HTTPStatusError(
388
+ f"embed gateway {r.status_code}", request=r.request, response=r,
389
+ )
390
+ log.warning(
391
+ f"embed gateway {r.status_code} attempt {attempt + 1}/{_EMBED_MAX_ATTEMPTS}; retrying"
392
+ )
393
+ await asyncio.sleep(0.25 * (2 ** attempt))
394
+ continue
395
+ r.raise_for_status()
396
+ data = r.json()
397
+ # Two response shapes in the wild:
398
+ # { "data": [{"embedding": [...]}] } (openai-compat)
399
+ # { "embeddings": [[...]] } (pentatonic-gateway direct)
400
+ if "embeddings" in data:
401
+ return data["embeddings"]
402
+ if "data" in data:
403
+ return [d["embedding"] for d in data["data"]]
404
+ raise RuntimeError(f"unexpected embed response shape: keys={list(data.keys())}")
405
+ except (httpx.TimeoutException, httpx.NetworkError) as e:
406
+ last_exc = e
407
+ log.warning(
408
+ f"embed gateway transport error attempt {attempt + 1}/{_EMBED_MAX_ATTEMPTS}: {e}"
409
+ )
410
+ await asyncio.sleep(0.25 * (2 ** attempt))
411
+
412
+ # Exhausted retries — propagate the last failure so the caller
413
+ # sees the real cause (vs a generic 500).
414
+ assert last_exc is not None
415
+ raise last_exc
416
+
417
+
418
+ async def _extract(arena: str, clientId: str, userId: str | None,
419
+ source_kind: str, content: str,
420
+ attributes: dict[str, Any]) -> str:
421
+ """Call extractor-sync. Returns the event_id (content-hash). On
422
+ error, raise — compat doesn't silently swallow extraction failures
423
+ because they break the FK invariant for vector_provenance."""
424
+ body = {
425
+ "arena": arena,
426
+ "clientId": clientId,
427
+ "userId": userId,
428
+ "source_kind": source_kind,
429
+ "content": content,
430
+ "attributes": attributes or {},
431
+ }
432
+ r = await _http.post(f"{EXTRACTOR_SYNC_URL}/extract", json=body)
433
+ r.raise_for_status()
434
+ return r.json()["event_id"]
435
+
436
+
437
+ def _arena_of(meta: dict[str, Any] | None, fallback: str = "general") -> str:
438
+ if not meta:
439
+ return fallback
440
+ if isinstance(meta.get("arena"), str) and meta["arena"]:
441
+ return meta["arena"]
442
+ return fallback
443
+
444
+
445
+ def _source_kind_of(meta: dict[str, Any] | None) -> str:
446
+ """Resolve source_kind from metadata. Priority:
447
+ 1. explicit metadata.source_kind
448
+ 2. metadata.kind (Pip's existing producer field — see PR #285)
449
+ 3. fallback 'agent' (TES default)"""
450
+ if meta:
451
+ for key in ("source_kind", "kind", "memory_kind"):
452
+ v = meta.get(key)
453
+ if isinstance(v, str) and v:
454
+ return v
455
+ return "agent"
456
+
457
+
458
+ # ----------------------------------------------------------------------
459
+ # Health
460
+ # ----------------------------------------------------------------------
461
+
462
+
463
+ @app.get("/health")
464
+ async def health():
465
+ return {"status": "healthy", "service": "pme2-compat", "version": "0.1.0"}
466
+
467
+
468
+ @app.get("/health/deep")
469
+ async def health_deep():
470
+ """Round-trips all three stores + the embed gateway. Slow; do not
471
+ use as a docker healthcheck."""
472
+ result = {"compat": "ok", "stores": {}}
473
+
474
+ # org-model
475
+ try:
476
+ async with _pool.connection() as conn:
477
+ async with conn.cursor() as cur:
478
+ await cur.execute("SELECT * FROM health_counts")
479
+ row = await cur.fetchone()
480
+ result["stores"]["org_model"] = {"status": "ok", "counts": dict(row)}
481
+ except Exception as e:
482
+ result["stores"]["org_model"] = {"status": "error", "error": str(e)}
483
+
484
+ # vector-index
485
+ try:
486
+ info = await _qdrant.get_collection(COLLECTION_NAME)
487
+ result["stores"]["vector_index"] = {
488
+ "status": "ok",
489
+ "vectors_count": info.vectors_count,
490
+ "points_count": info.points_count,
491
+ }
492
+ except Exception as e:
493
+ result["stores"]["vector_index"] = {"status": "error", "error": str(e)}
494
+
495
+ # embed gateway
496
+ try:
497
+ v = await _embed_batch(["health probe"])
498
+ result["stores"]["embed_gateway"] = {"status": "ok", "dim": len(v[0]) if v else 0}
499
+ except Exception as e:
500
+ result["stores"]["embed_gateway"] = {"status": "error", "error": str(e)}
501
+
502
+ return result
503
+
504
+
505
+ # ----------------------------------------------------------------------
506
+ # /store
507
+ # ----------------------------------------------------------------------
508
+
509
+
510
+ @app.post("/store")
511
+ async def store(req: StoreRequest):
512
+ """Single-record ingest. v1 wire shape: { content, metadata } →
513
+ { id, content, layerId, engine }."""
514
+ meta = req.metadata or {}
515
+ arena = _arena_of(meta)
516
+ clientId = meta.get("clientId") or arena.split(":")[0]
517
+ userId = meta.get("user_id") or (arena.split(":", 1)[1] if ":" in arena else None)
518
+ source_kind = _source_kind_of(meta)
519
+
520
+ t0 = time.perf_counter()
521
+ event_id = await _extract(arena, clientId, userId, source_kind, req.content, meta)
522
+ embeddings = await _embed_batch([req.content])
523
+
524
+ vector_id = str(uuid.uuid4())
525
+ # Write vector_provenance + Qdrant point in the same logical
526
+ # operation. If Qdrant fails, the provenance row gets rolled back —
527
+ # otherwise we'd have a vector with no FK back to its event.
528
+ async with _pool.connection() as conn:
529
+ async with conn.cursor() as cur:
530
+ await cur.execute(
531
+ "INSERT INTO vector_provenance (vector_id, event_id, embedding_model, embedding_dim) "
532
+ "VALUES (%s, %s, %s, %s)",
533
+ (vector_id, event_id, "nv-embed-v2", EMBED_DIM),
534
+ )
535
+ await _qdrant.upsert(
536
+ collection_name=COLLECTION_NAME,
537
+ points=[
538
+ qmodels.PointStruct(
539
+ id=vector_id,
540
+ vector=embeddings[0],
541
+ # Issue #345 (caps #342/#343/#344): Pip emits a rich
542
+ # metadata bag — timestamp, contact_email, channel,
543
+ # kind, direction, source, etc. Pre-fix the payload
544
+ # picked off 5 keys and discarded the rest, which
545
+ # broke metadata_filter (#342), recency sort (#343),
546
+ # and personEvents.occurred_at (#344). Persist the
547
+ # whole thing. Structural keys win over any name
548
+ # collision from the caller.
549
+ payload={
550
+ **(meta or {}),
551
+ "event_id": event_id,
552
+ "arena": arena,
553
+ "clientId": clientId,
554
+ "userId": userId,
555
+ "source_kind": source_kind,
556
+ "content_preview": req.content[:300],
557
+ },
558
+ )
559
+ ],
560
+ )
561
+
562
+ dur_ms = (time.perf_counter() - t0) * 1000
563
+ log.info(f"store event_id={event_id} arena={arena} ms={dur_ms:.1f}")
564
+ return {
565
+ "id": event_id,
566
+ "content": req.content,
567
+ "layerId": f"ml_{arena}_{source_kind}",
568
+ "engine": {"vector_index": 1, "org_model": 1},
569
+ }
570
+
571
+
572
+ # ----------------------------------------------------------------------
573
+ # /store-batch
574
+ # ----------------------------------------------------------------------
575
+
576
+
577
+ @app.post("/store-batch")
578
+ async def store_batch(req: StoreBatchRequest):
579
+ """Batch ingest. Same wire shape as v1: records[] → { inserted,
580
+ ids[] }. Pre-computed embeddings on the request are accepted but
581
+ ignored (we re-embed for now; sharing arrives once the keystone
582
+ spec settles the per-source vector configuration)."""
583
+ if not req.records:
584
+ return {"status": "ok", "inserted": 0, "ids": [], "engine": {}}
585
+
586
+ arena_default = req.arena or "general"
587
+ texts = [r["content"] for r in req.records]
588
+ embeddings = await _embed_batch(texts)
589
+ if len(embeddings) != len(texts):
590
+ raise HTTPException(500, f"embed count mismatch: {len(embeddings)} vs {len(texts)}")
591
+
592
+ # Resolve per-record routing fields first so we can fan out the
593
+ # extractor-sync calls in parallel. Each _extract is a network
594
+ # round-trip; serialising them was the dominant cost in /store-batch
595
+ # latency (~70ms × N records). asyncio.gather collapses N calls
596
+ # into one wall-time, capped by the extractor-sync pool size.
597
+ resolved: list[tuple[str, str, str | None, str, str, dict]] = []
598
+ for r in req.records:
599
+ meta = r.get("metadata") or {}
600
+ arena = _arena_of(meta, fallback=arena_default)
601
+ clientId = meta.get("clientId") or arena.split(":")[0]
602
+ userId = meta.get("user_id") or (arena.split(":", 1)[1] if ":" in arena else None)
603
+ source_kind = _source_kind_of(meta)
604
+ content = r["content"]
605
+ resolved.append((arena, clientId, userId, source_kind, content, meta))
606
+
607
+ event_ids = await asyncio.gather(*[
608
+ _extract(arena, clientId, userId, source_kind, content, meta)
609
+ for (arena, clientId, userId, source_kind, content, meta) in resolved
610
+ ])
611
+
612
+ ids: list[str] = []
613
+ points: list[qmodels.PointStruct] = []
614
+ provenance_rows: list[tuple] = []
615
+ for (arena, clientId, userId, source_kind, content, meta), vec, event_id in zip(
616
+ resolved, embeddings, event_ids
617
+ ):
618
+ vector_id = str(uuid.uuid4())
619
+ provenance_rows.append((vector_id, event_id, "nv-embed-v2", EMBED_DIM))
620
+ # See /store above — issue #345. Spread the caller's metadata
621
+ # into the payload so downstream metadata_filter / sort / and
622
+ # personEvents timestamp resolution actually have something to
623
+ # work with. Structural keys override on collision.
624
+ points.append(qmodels.PointStruct(
625
+ id=vector_id,
626
+ vector=vec,
627
+ payload={
628
+ **(meta or {}),
629
+ "event_id": event_id,
630
+ "arena": arena,
631
+ "clientId": clientId,
632
+ "userId": userId,
633
+ "source_kind": source_kind,
634
+ "content_preview": content[:300],
635
+ },
636
+ ))
637
+ ids.append(event_id)
638
+
639
+ async with _pool.connection() as conn:
640
+ async with conn.cursor() as cur:
641
+ await cur.executemany(
642
+ "INSERT INTO vector_provenance (vector_id, event_id, embedding_model, embedding_dim) "
643
+ "VALUES (%s, %s, %s, %s)",
644
+ provenance_rows,
645
+ )
646
+ await _qdrant.upsert(collection_name=COLLECTION_NAME, points=points)
647
+
648
+ return {
649
+ "status": "ok",
650
+ "inserted": len(ids),
651
+ "ids": ids,
652
+ "engine": {"vector_index": len(ids), "org_model": len(ids)},
653
+ }
654
+
655
+
656
+ # ----------------------------------------------------------------------
657
+ # /search
658
+ # ----------------------------------------------------------------------
659
+
660
+
661
+ # ----------------------------------------------------------------------
662
+ # Structured graph queries
663
+ # ----------------------------------------------------------------------
664
+ # Lightweight read endpoints over the org-model graph tables. Bypass the
665
+ # vector index — these are facet/filter queries (find entities by name,
666
+ # facts by subject, relationships by edge type), not similarity ranking.
667
+ # Arena is required on every query (multi-tenancy boundary). userId is
668
+ # optional; when present we span both [clientId, clientId:userId] arenas
669
+ # the same way semantic search does.
670
+
671
+
672
+ class GraphQueryRequest(BaseModel):
673
+ """Common envelope for the graph read endpoints. `arena` is a single
674
+ string OR `arenas` is a list — pick whichever the caller has handy.
675
+ All filter fields are optional; the endpoint returns most-recent
676
+ first, capped at `limit`."""
677
+ arena: str | None = None
678
+ arenas: list[str] | None = None
679
+ entity_type: str | None = None
680
+ name: str | None = None # canonical_name (ILIKE)
681
+ subject: str | None = None # entity name OR canonical_name (facts.subject_entity)
682
+ predicate: str | None = None
683
+ category: str | None = None # facts.category
684
+ from_name: str | None = None # relationships.from_entity.canonical_name
685
+ to_name: str | None = None
686
+ relationship_type: str | None = None
687
+ limit: int = 50
688
+
689
+
690
+ def _resolve_arenas(req: GraphQueryRequest) -> list[str]:
691
+ arenas = req.arenas or ([req.arena] if req.arena else [])
692
+ if not arenas:
693
+ raise HTTPException(400, "arena or arenas required")
694
+ return arenas
695
+
696
+
697
+ @app.post("/entities")
698
+ async def list_entities(req: GraphQueryRequest):
699
+ """Filter entities by arena + optional type + optional name pattern.
700
+ Aliases are matched too — searching `name='Mastercard'` catches rows
701
+ where Mastercard is the canonical_name OR an alias."""
702
+ arenas = _resolve_arenas(req)
703
+ conditions = ["arena = ANY(%s)"]
704
+ params: list[Any] = [arenas]
705
+ if req.entity_type:
706
+ conditions.append("entity_type = %s")
707
+ params.append(req.entity_type)
708
+ if req.name:
709
+ conditions.append("(canonical_name ILIKE %s OR EXISTS (SELECT 1 FROM UNNEST(aliases) AS a WHERE a ILIKE %s))")
710
+ pattern = f"%{req.name}%"
711
+ params.extend([pattern, pattern])
712
+ sql = f"""
713
+ SELECT id, arena, entity_type, canonical_name, aliases,
714
+ provenance_event_ids, last_seen
715
+ FROM entities
716
+ WHERE {' AND '.join(conditions)}
717
+ ORDER BY last_seen DESC
718
+ LIMIT %s
719
+ """
720
+ params.append(req.limit)
721
+ async with _pool.connection() as conn:
722
+ async with conn.cursor() as cur:
723
+ await cur.execute(sql, params)
724
+ rows = await cur.fetchall()
725
+ return {"results": [dict(r) for r in rows]}
726
+
727
+
728
+ @app.post("/facts")
729
+ async def list_facts(req: GraphQueryRequest):
730
+ """Filter facts by arena + optional category/predicate + optional
731
+ subject-entity name. Subject filter joins facts → entities via
732
+ subject_entity_id."""
733
+ arenas = _resolve_arenas(req)
734
+ conditions = ["f.arena = ANY(%s)"]
735
+ params: list[Any] = [arenas]
736
+ if req.category:
737
+ conditions.append("f.category = %s")
738
+ params.append(req.category)
739
+ if req.predicate:
740
+ conditions.append("f.predicate ILIKE %s")
741
+ params.append(f"%{req.predicate}%")
742
+ if req.subject:
743
+ conditions.append("EXISTS (SELECT 1 FROM entities e WHERE e.id = f.subject_entity_id AND (e.canonical_name ILIKE %s OR %s = ANY(e.aliases)))")
744
+ params.extend([f"%{req.subject}%", req.subject])
745
+ sql = f"""
746
+ SELECT f.id, f.arena, f.category, f.predicate, f.statement,
747
+ f.subject_entity_id, f.object_entity_id,
748
+ f.confidence, f.stage, f.asserted_at,
749
+ f.provenance_event_ids
750
+ FROM facts f
751
+ WHERE {' AND '.join(conditions)}
752
+ ORDER BY f.asserted_at DESC
753
+ LIMIT %s
754
+ """
755
+ params.append(req.limit)
756
+ async with _pool.connection() as conn:
757
+ async with conn.cursor() as cur:
758
+ await cur.execute(sql, params)
759
+ rows = await cur.fetchall()
760
+ return {"results": [dict(r) for r in rows]}
761
+
762
+
763
+ @app.post("/relationships")
764
+ async def list_relationships(req: GraphQueryRequest):
765
+ """Filter edges by arena + optional from/to entity names + optional
766
+ relationship_type. Returns the resolved from/to canonical names so
767
+ the caller doesn't need to round-trip back to /entities."""
768
+ arenas = _resolve_arenas(req)
769
+ conditions = ["r.arena = ANY(%s)"]
770
+ params: list[Any] = [arenas]
771
+ if req.relationship_type:
772
+ conditions.append("r.relationship_type ILIKE %s")
773
+ params.append(f"%{req.relationship_type}%")
774
+ if req.from_name:
775
+ conditions.append("(ef.canonical_name ILIKE %s OR %s = ANY(ef.aliases))")
776
+ params.extend([f"%{req.from_name}%", req.from_name])
777
+ if req.to_name:
778
+ conditions.append("(et.canonical_name ILIKE %s OR %s = ANY(et.aliases))")
779
+ params.extend([f"%{req.to_name}%", req.to_name])
780
+ sql = f"""
781
+ SELECT r.id, r.arena, r.relationship_type, r.weight,
782
+ r.from_entity_id, r.to_entity_id,
783
+ ef.canonical_name AS from_name,
784
+ et.canonical_name AS to_name,
785
+ r.first_seen, r.last_seen,
786
+ r.provenance_event_ids
787
+ FROM relationships r
788
+ JOIN entities ef ON ef.id = r.from_entity_id
789
+ JOIN entities et ON et.id = r.to_entity_id
790
+ WHERE {' AND '.join(conditions)}
791
+ ORDER BY r.last_seen DESC
792
+ LIMIT %s
793
+ """
794
+ params.append(req.limit)
795
+ async with _pool.connection() as conn:
796
+ async with conn.cursor() as cur:
797
+ await cur.execute(sql, params)
798
+ rows = await cur.fetchall()
799
+ return {"results": [dict(r) for r in rows]}
800
+
801
+
802
+ @app.post("/search")
803
+ async def search(req: SearchRequest):
804
+ """Search the evidence index. Filtered by arena (single or list).
805
+
806
+ For tonight this is vector-only — no typed router, no org-model
807
+ fact lookup, no L0 BM25. Those come once the keystone spec defines
808
+ the intent classification scheme. The output shape matches v1 so
809
+ callers don't break: { results: [{id, content, similarity, metadata}] }."""
810
+ arenas = req.arenas or ([req.arena] if req.arena else [])
811
+ if not arenas:
812
+ # No arena scope = unsafe. v1 silently scoped to 'general'; v2
813
+ # rejects to force callers to be explicit.
814
+ raise HTTPException(400, "arena or arenas required")
815
+
816
+ qvec = (await _embed_batch([req.query]))[0]
817
+ # Compose Qdrant Filter: arena scope is always required, plus any
818
+ # caller-supplied metadata_filter keys ANDed in. Mirrors how
819
+ # /forget's `metadata_contains` already builds containment filters
820
+ # (see issue #342 — `metadata_filter` was previously a dead param
821
+ # silently accepted by SearchRequest but never applied, blocking
822
+ # consumer-side source_kind/kind retrieval filtering).
823
+ #
824
+ # Filter shape:
825
+ # - list value -> MatchAny (`source_kind IN ('note','event')`)
826
+ # - scalar -> MatchValue (exact)
827
+ # - null / "" -> skipped, so {"source_kind": null} doesn't match nothing
828
+ #
829
+ # MAX_META_FILTER_KEYS caps the number of extra clauses so a caller
830
+ # passing 100 keys can't blow up Qdrant's query plan.
831
+ MAX_META_FILTER_KEYS = 16
832
+ must: list[Any] = [
833
+ qmodels.FieldCondition(key="arena", match=qmodels.MatchAny(any=arenas))
834
+ ]
835
+ if req.metadata_filter:
836
+ for k, v in list(req.metadata_filter.items())[:MAX_META_FILTER_KEYS]:
837
+ if v is None or v == "":
838
+ continue
839
+ if isinstance(v, list):
840
+ if not v:
841
+ continue
842
+ must.append(qmodels.FieldCondition(key=k, match=qmodels.MatchAny(any=v)))
843
+ else:
844
+ must.append(qmodels.FieldCondition(key=k, match=qmodels.MatchValue(value=v)))
845
+ filter_ = qmodels.Filter(must=must)
846
+ # Issue #343: over-fetch then dedup + quota.
847
+ #
848
+ # Pip's chunker stores ~3 overlapping chunks per source event, so
849
+ # raw Qdrant top-k can be dominated by 8/10 vectors that all share
850
+ # one event_id. Fetch (limit × OVERFETCH_MULT) candidates, then:
851
+ # (a) collapse to one row per event_id, keeping the highest score
852
+ # (Qdrant returns score-desc → first-wins is correct)
853
+ # (b) apply a per-source_kind quota so slack one-liners can't
854
+ # crowd out the canonical event/doc record that actually
855
+ # answers the query.
856
+ # Anything quota-rejected goes to an overflow list and is appended
857
+ # last so we never return fewer than the available, deduped pool.
858
+ target_limit = req.limit or 10
859
+ overfetch = min(target_limit * SEARCH_OVERFETCH_MULT, SEARCH_OVERFETCH_MAX)
860
+ # Phase 4 (#343): classify the query intent once; cheap regex.
861
+ intent = _classify_intent(req.query) if SEARCH_INTENT_BOOST else None
862
+ # Issue #350: temporal-intent queries don't need vectors (we skip
863
+ # MMR in favour of timestamp-desc sort), so save the per-hit
864
+ # vector-payload bandwidth (4096 × float32 × overfetch) when
865
+ # vectors won't be used.
866
+ temporal_active = (intent == "temporal") and SEARCH_TEMPORAL_RERANK
867
+ raw_results = await _qdrant.search(
868
+ collection_name=COLLECTION_NAME,
869
+ query_vector=qvec,
870
+ query_filter=filter_,
871
+ limit=max(overfetch, target_limit),
872
+ score_threshold=req.min_score,
873
+ with_payload=True,
874
+ # Phase 3 (#343): MMR needs the actual vectors to score pairwise
875
+ # similarity. Only pull them when MMR is enabled AND we aren't
876
+ # about to skip MMR for a temporal re-rank.
877
+ with_vectors=SEARCH_MMR_ENABLED and not temporal_active,
878
+ )
879
+
880
+ # (a) dedup by event_id — first occurrence wins (highest score).
881
+ seen_eids: set[str] = set()
882
+ deduped: list[Any] = []
883
+ for r in raw_results:
884
+ eid = r.payload.get("event_id")
885
+ if not eid or eid in seen_eids:
886
+ continue
887
+ seen_eids.add(eid)
888
+ deduped.append(r)
889
+
890
+ # (b) Hoisted from below (#350): fetch content + attributes for the
891
+ # deduped candidate pool BEFORE re-ranking. The temporal sort needs
892
+ # `attributes.timestamp` to be available at rank time, and pulling
893
+ # for the deduped set (vs the final selected set) is one extra DB
894
+ # round-trip on N candidates which is dominated by the network
895
+ # cost of the Qdrant search itself — cheap.
896
+ candidate_event_ids = [r.payload["event_id"] for r in deduped if r.payload.get("event_id")]
897
+ full_content: dict[str, str] = {}
898
+ full_attrs: dict[str, dict[str, Any]] = {}
899
+ if candidate_event_ids:
900
+ async with _pool.connection() as conn:
901
+ async with conn.cursor() as cur:
902
+ await cur.execute(
903
+ "SELECT id, content, attributes FROM events WHERE id = ANY(%s)",
904
+ (candidate_event_ids,),
905
+ )
906
+ for row in await cur.fetchall():
907
+ full_content[row["id"]] = row["content"]
908
+ full_attrs[row["id"]] = row["attributes"] or {}
909
+
910
+ # (c) Phase 4: intent-aware boost. Re-sorts the pool by adjusted
911
+ # score so the temporal sort below operates on a similarity-adjusted
912
+ # baseline (records with no timestamp will sink to the bottom of
913
+ # the temporal sort but keep this in-bucket order).
914
+ if intent:
915
+ deduped = _apply_intent_boost(deduped, intent)
916
+
917
+ # (d) Phase 3 or Issue #350: diversify the pool.
918
+ #
919
+ # - temporal intent (`last meeting`, `most recent`, ...) → sort
920
+ # by attributes.timestamp DESC. Recency IS the diversification
921
+ # axis for this class of query; MMR's semantic-spread would
922
+ # un-sort the chronological order we want.
923
+ # - everything else → MMR over the deduped pool for semantic
924
+ # diversity.
925
+ mmr_target = min(target_limit * 2, len(deduped))
926
+ if temporal_active:
927
+ deduped = _apply_temporal_sort(deduped, full_attrs)
928
+ mmr_pool = deduped[:mmr_target]
929
+ elif SEARCH_MMR_ENABLED:
930
+ mmr_pool = _mmr_select(deduped, target=mmr_target, lambda_=SEARCH_MMR_LAMBDA)
931
+ else:
932
+ mmr_pool = deduped[:mmr_target]
933
+
934
+ # (e) source-type quota. max_per_kind floors at 1 so a single-kind
935
+ # corpus still returns results; quota >= 1.0 disables.
936
+ max_per_kind = max(1, int(target_limit * SEARCH_SOURCE_TYPE_QUOTA))
937
+ selected: list[Any] = []
938
+ overflow: list[Any] = []
939
+ counts: dict[str, int] = {}
940
+ for r in mmr_pool:
941
+ if len(selected) >= target_limit:
942
+ break
943
+ kind = r.payload.get("source_kind") or "unknown"
944
+ if counts.get(kind, 0) < max_per_kind or SEARCH_SOURCE_TYPE_QUOTA >= 1.0:
945
+ selected.append(r)
946
+ counts[kind] = counts.get(kind, 0) + 1
947
+ else:
948
+ overflow.append(r)
949
+ # Backfill from overflow if quota was over-restrictive (e.g. corpus
950
+ # is 90% one source_kind). Better to return slightly skewed top-k
951
+ # than fewer results than the caller asked for.
952
+ if len(selected) < target_limit and overflow:
953
+ selected.extend(overflow[: target_limit - len(selected)])
954
+
955
+ results = selected
956
+ # Note: `full_content` and `full_attrs` were populated above (over
957
+ # the candidate pool). The projection below reads from them by
958
+ # event_id — any selected result will have an entry since the
959
+ # candidate set is a superset of `results`.
960
+
961
+ # Issue #345: surface the rich metadata bag in the response.
962
+ # Composition: Qdrant payload first (structural keys: arena,
963
+ # clientId, userId, source_kind, event_id), then postgres
964
+ # `events.attributes` (the canonical full bag), so postgres wins
965
+ # on collision. `content_preview` and the embedded `content` field
966
+ # (which extractor-sync stamps into attributes for provenance) are
967
+ # excluded — the top-level `content` already carries the text and
968
+ # we don't want it duplicated inside metadata.
969
+ METADATA_INTERNAL_KEYS = {"content_preview", "content"}
970
+ out = []
971
+ for r in results:
972
+ eid = r.payload["event_id"]
973
+ merged_meta = {
974
+ **{k: v for k, v in r.payload.items() if k not in METADATA_INTERNAL_KEYS},
975
+ **{k: v for k, v in full_attrs.get(eid, {}).items() if k not in METADATA_INTERNAL_KEYS},
976
+ }
977
+ out.append({
978
+ "id": eid,
979
+ "content": full_content.get(eid, r.payload.get("content_preview", "")),
980
+ "similarity": r.score,
981
+ "metadata": merged_meta,
982
+ })
983
+ return {"results": out}
984
+
985
+
986
+ # ----------------------------------------------------------------------
987
+ # /forget
988
+ # ----------------------------------------------------------------------
989
+
990
+
991
+ @app.post("/forget")
992
+ async def forget(req: ForgetRequest):
993
+ """Delete records by ID or metadata filter.
994
+
995
+ org-model events DELETE → cascade trigger drops provenance from
996
+ facts/entities/relationships; orphaned facts/relationships deleted
997
+ in same txn. Then Qdrant payload-filter delete drops the vectors.
998
+ """
999
+ if not req.id and not req.metadata_contains:
1000
+ raise HTTPException(400, "id or metadata_contains required")
1001
+
1002
+ deleted_events: list[str] = []
1003
+
1004
+ async with _pool.connection() as conn:
1005
+ async with conn.cursor() as cur:
1006
+ if req.id:
1007
+ await cur.execute(
1008
+ "DELETE FROM events WHERE id = %s RETURNING id",
1009
+ (req.id,),
1010
+ )
1011
+ else:
1012
+ # Build a JSONB containment filter from metadata_contains.
1013
+ # Engine arena is read from metadata_contains.arena for
1014
+ # v1 wire compatibility (see PR #327 history).
1015
+ arena = req.metadata_contains.get("arena")
1016
+ # Other keys become attributes-JSONB containment.
1017
+ other = {k: v for k, v in req.metadata_contains.items() if k != "arena"}
1018
+ params: list = []
1019
+ where = []
1020
+ if arena:
1021
+ where.append("arena = %s")
1022
+ params.append(arena)
1023
+ if other:
1024
+ where.append("attributes @> %s::jsonb")
1025
+ params.append(psycopg.types.json.Json(other))
1026
+ if not where:
1027
+ raise HTTPException(400, "metadata_contains must specify arena or other filters")
1028
+ sql = "DELETE FROM events WHERE " + " AND ".join(where) + " RETURNING id"
1029
+ await cur.execute(sql, params)
1030
+ rows = await cur.fetchall()
1031
+ deleted_events = [row["id"] for row in rows]
1032
+
1033
+ # Drop the vectors. Qdrant supports payload-filter delete natively.
1034
+ if deleted_events:
1035
+ await _qdrant.delete(
1036
+ collection_name=COLLECTION_NAME,
1037
+ points_selector=qmodels.FilterSelector(
1038
+ filter=qmodels.Filter(
1039
+ must=[qmodels.FieldCondition(
1040
+ key="event_id",
1041
+ match=qmodels.MatchAny(any=deleted_events),
1042
+ )]
1043
+ )
1044
+ ),
1045
+ )
1046
+
1047
+ return {"deleted": len(deleted_events), "engine": "pme2"}