agentforge-core 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. agentforge_core/__init__.py +228 -0
  2. agentforge_core/_bm25.py +132 -0
  3. agentforge_core/config/__init__.py +62 -0
  4. agentforge_core/config/loader.py +239 -0
  5. agentforge_core/config/module_schemas.py +208 -0
  6. agentforge_core/config/schema.py +424 -0
  7. agentforge_core/contracts/__init__.py +52 -0
  8. agentforge_core/contracts/auth.py +33 -0
  9. agentforge_core/contracts/chat.py +118 -0
  10. agentforge_core/contracts/embedding.py +71 -0
  11. agentforge_core/contracts/evaluator.py +56 -0
  12. agentforge_core/contracts/finding.py +39 -0
  13. agentforge_core/contracts/graph_store.py +180 -0
  14. agentforge_core/contracts/guardrails.py +129 -0
  15. agentforge_core/contracts/llm.py +152 -0
  16. agentforge_core/contracts/memory.py +113 -0
  17. agentforge_core/contracts/migrator.py +120 -0
  18. agentforge_core/contracts/renderer.py +57 -0
  19. agentforge_core/contracts/reranker.py +91 -0
  20. agentforge_core/contracts/strategy.py +70 -0
  21. agentforge_core/contracts/task.py +73 -0
  22. agentforge_core/contracts/tool.py +71 -0
  23. agentforge_core/contracts/vector_store.py +151 -0
  24. agentforge_core/migrations/__init__.py +14 -0
  25. agentforge_core/migrations/discover.py +77 -0
  26. agentforge_core/migrations/template.py +34 -0
  27. agentforge_core/observability/__init__.py +18 -0
  28. agentforge_core/observability/tracing.py +37 -0
  29. agentforge_core/production/__init__.py +77 -0
  30. agentforge_core/production/budget.py +134 -0
  31. agentforge_core/production/exceptions.py +136 -0
  32. agentforge_core/production/fallback.py +321 -0
  33. agentforge_core/production/log_filter.py +49 -0
  34. agentforge_core/production/log_format.py +117 -0
  35. agentforge_core/production/run_context.py +108 -0
  36. agentforge_core/py.typed +0 -0
  37. agentforge_core/resolver/__init__.py +38 -0
  38. agentforge_core/resolver/discover.py +145 -0
  39. agentforge_core/resolver/resolve.py +168 -0
  40. agentforge_core/testing/__init__.py +45 -0
  41. agentforge_core/testing/conformance.py +1138 -0
  42. agentforge_core/values/__init__.py +103 -0
  43. agentforge_core/values/auth.py +20 -0
  44. agentforge_core/values/chat.py +131 -0
  45. agentforge_core/values/claim.py +30 -0
  46. agentforge_core/values/graph.py +136 -0
  47. agentforge_core/values/guardrails.py +49 -0
  48. agentforge_core/values/manifest.py +129 -0
  49. agentforge_core/values/messages.py +153 -0
  50. agentforge_core/values/module.py +40 -0
  51. agentforge_core/values/pipeline.py +43 -0
  52. agentforge_core/values/retrieval.py +53 -0
  53. agentforge_core/values/state.py +118 -0
  54. agentforge_core/values/vector.py +59 -0
  55. agentforge_core-0.2.1.dist-info/METADATA +66 -0
  56. agentforge_core-0.2.1.dist-info/RECORD +58 -0
  57. agentforge_core-0.2.1.dist-info/WHEEL +4 -0
  58. agentforge_core-0.2.1.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,1138 @@
1
+ """Conformance suites for `agentforge-core` ABCs.
2
+
3
+ Every shipped or third-party driver must pass these suites. They are
4
+ exposed as functions (not pytest collections) so they can be invoked
5
+ from any test runner by passing in a ready-to-use store / client.
6
+
7
+ Usage in a driver's tests:
8
+
9
+ import pytest
10
+ from agentforge_core.testing import run_memory_conformance
11
+ from my_pkg import MyMemoryStore
12
+
13
+ @pytest.mark.asyncio
14
+ async def test_my_driver_conforms() -> None:
15
+ async with MyMemoryStore.from_url("...") as store:
16
+ await run_memory_conformance(store)
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import itertools
22
+ import math
23
+ import typing
24
+ from collections.abc import AsyncIterator, Awaitable, Callable
25
+ from typing import Any
26
+
27
+ from agentforge_core.contracts.chat import ChatHistoryStore, HistoryTruncationStrategy
28
+ from agentforge_core.contracts.embedding import EmbeddingClient
29
+ from agentforge_core.contracts.graph_store import GraphStore
30
+ from agentforge_core.contracts.guardrails import (
31
+ InputValidator,
32
+ OutputValidator,
33
+ ToolCallGate,
34
+ )
35
+ from agentforge_core.contracts.memory import MemoryStore
36
+ from agentforge_core.contracts.reranker import Reranker
37
+ from agentforge_core.contracts.strategy import ReasoningStrategy
38
+ from agentforge_core.contracts.task import Task
39
+ from agentforge_core.contracts.tool import Tool
40
+ from agentforge_core.contracts.vector_store import VectorStore
41
+ from agentforge_core.values.claim import Claim
42
+ from agentforge_core.values.graph import (
43
+ GraphEdge,
44
+ GraphNode,
45
+ GraphPattern,
46
+ GraphSegment,
47
+ )
48
+ from agentforge_core.values.guardrails import ValidationResult
49
+ from agentforge_core.values.state import AgentState, StepKind
50
+ from agentforge_core.values.vector import VectorItem, VectorMatch
51
+
52
+ _VALID_STEP_KINDS: frozenset[str] = frozenset(typing.get_args(StepKind))
53
+ """Closed enum mirror of `StepKind`. Used by `run_strategy_conformance`."""
54
+
55
+
56
+ def _claim(
57
+ *,
58
+ project: str = "p1",
59
+ agent: str = "a1",
60
+ run_id: str = "run-x",
61
+ category: str = "finding",
62
+ payload: dict[str, object] | None = None,
63
+ ) -> Claim:
64
+ return Claim(
65
+ run_id=run_id,
66
+ project=project,
67
+ agent=agent,
68
+ category=category,
69
+ payload=payload if payload is not None else {"v": 1},
70
+ )
71
+
72
+
73
+ async def _collect(it: AsyncIterator[Claim]) -> list[Claim]:
74
+ return [c async for c in it]
75
+
76
+
77
+ _EXPECTED_DELETE_COUNT = 2
78
+ """Magic-number constant for the `delete()` conformance cases."""
79
+
80
+ _EXPECTED_CHAT_TURNS_SID = 2
81
+ """Magic-number constant for the chat-history conformance cases."""
82
+
83
+ _EXPECTED_CHAT_TURNS_SID_B = 1
84
+ """Magic-number constant for the chat-history conformance cases."""
85
+
86
+
87
+ async def _run_delete_conformance(store: MemoryStore) -> None:
88
+ """feat-017 `delete()` conformance cases (separated from the main
89
+ suite so the parent function stays under ruff's PLR0915 cap)."""
90
+ from agentforge_core.production.exceptions import ModuleError # noqa: PLC0415
91
+
92
+ # No-filter refuses (defence against silent total wipe).
93
+ try:
94
+ await store.delete()
95
+ except ModuleError:
96
+ pass
97
+ else:
98
+ raise AssertionError("delete() with no filters must raise ModuleError")
99
+
100
+ # delete(run_id=...) only removes matching claims; accurate count.
101
+ purge_run = "run-purge"
102
+ keeper_run = "run-keep"
103
+ await store.put(_claim(run_id=purge_run, category="purge-me"))
104
+ await store.put(_claim(run_id=purge_run, category="purge-me"))
105
+ await store.put(_claim(run_id=keeper_run, category="purge-me"))
106
+ removed_run = await store.delete(run_id=purge_run)
107
+ assert removed_run == _EXPECTED_DELETE_COUNT, (
108
+ f"delete(run_id={purge_run!r}) must return count; got {removed_run}"
109
+ )
110
+ remaining = await store.query(category="purge-me")
111
+ assert all(c.run_id == keeper_run for c in remaining), (
112
+ "delete(run_id=...) must leave non-matching claims behind"
113
+ )
114
+
115
+ # delete(category=...) clears the whole category.
116
+ cat_marker = "ephemeral-step"
117
+ await store.put(_claim(category=cat_marker))
118
+ await store.put(_claim(category=cat_marker))
119
+ await store.put(_claim(category="something-else"))
120
+ removed_cat = await store.delete(category=cat_marker)
121
+ assert removed_cat == _EXPECTED_DELETE_COUNT, (
122
+ f"delete(category={cat_marker!r}) must report accurate count; got {removed_cat}"
123
+ )
124
+ after_cat = await store.query(category=cat_marker)
125
+ assert after_cat == [], "delete(category=...) must clear all claims of that category"
126
+
127
+
128
+ async def run_memory_conformance(store: MemoryStore) -> None:
129
+ """Run the full MemoryStore conformance suite against `store`.
130
+
131
+ The store must be empty when this is called and is left empty when
132
+ the function returns (every claim written is also deleted, except
133
+ where the contract demands history retention via `supersede`).
134
+
135
+ Raises:
136
+ AssertionError: a contract was violated.
137
+ """
138
+ # 1. put + get roundtrip
139
+ c1 = _claim(category="finding")
140
+ cid = await store.put(c1)
141
+ assert cid == c1.id, "put() must return the claim's id"
142
+ fetched = await store.get(cid)
143
+ assert fetched is not None, "get() must return the persisted claim"
144
+ assert fetched.id == c1.id
145
+
146
+ # 2. get returns None for unknown id
147
+ missing = await store.get("01HX-NONEXISTENT")
148
+ assert missing is None, "get() of an unknown id must return None"
149
+
150
+ # 3. query with no filters returns at least the claim we put
151
+ all_results = await store.query()
152
+ assert any(c.id == cid for c in all_results), (
153
+ "query() with no filters must include the put claim"
154
+ )
155
+
156
+ # 4. query filters by project
157
+ other_project = _claim(project="other-project")
158
+ await store.put(other_project)
159
+ only_p1 = await store.query(project="p1")
160
+ assert any(c.id == cid for c in only_p1)
161
+ assert all(c.project == "p1" for c in only_p1), (
162
+ "query(project=...) must filter results to that project"
163
+ )
164
+
165
+ # 5. query filters by agent
166
+ other_agent = _claim(agent="other-agent")
167
+ await store.put(other_agent)
168
+ only_a1 = await store.query(agent="a1")
169
+ assert all(c.agent == "a1" for c in only_a1), (
170
+ "query(agent=...) must filter results to that agent"
171
+ )
172
+
173
+ # 6. query filters by category
174
+ decision = _claim(category="decision")
175
+ await store.put(decision)
176
+ only_findings = await store.query(category="finding")
177
+ assert all(c.category == "finding" for c in only_findings), (
178
+ "query(category=...) must filter results to that category"
179
+ )
180
+
181
+ # 7. query filters by run_id
182
+ other_run = _claim(run_id="run-y")
183
+ await store.put(other_run)
184
+ only_run_x = await store.query(run_id="run-x")
185
+ assert all(c.run_id == "run-x" for c in only_run_x), (
186
+ "query(run_id=...) must filter results to that run_id"
187
+ )
188
+
189
+ # 8. query respects limit
190
+ limited = await store.query(limit=1)
191
+ assert len(limited) <= 1, "query(limit=N) must return at most N claims"
192
+
193
+ # 9. supersede chains old → new
194
+ new_claim = _claim(payload={"v": 2})
195
+ new_id = await store.supersede(cid, new_claim)
196
+ assert new_id == new_claim.id
197
+ refetched = await store.get(new_id)
198
+ assert refetched is not None
199
+ assert refetched.supersedes == cid, "supersede() must set supersedes link on the new claim"
200
+
201
+ # 10. stream yields claims
202
+ streamed = await _collect(store.stream(project="p1"))
203
+ assert len(streamed) >= 1, "stream() must yield matching claims"
204
+ assert all(c.project == "p1" for c in streamed)
205
+
206
+ # 11. capabilities() returns a set
207
+ caps = store.capabilities()
208
+ assert isinstance(caps, set)
209
+
210
+ # 12. supports() reflects capabilities()
211
+ if caps:
212
+ sample = next(iter(caps))
213
+ assert store.supports(sample) is True
214
+ assert store.supports("definitely-not-a-capability-2026") is False
215
+
216
+ # 13-15. delete() — feat-017. Tested separately so the main
217
+ # conformance function stays under PLR0915's statement cap.
218
+ await _run_delete_conformance(store)
219
+
220
+
221
+ # ----------------------------------------------------------------------
222
+ # Strategy conformance — feat-002.
223
+ # ----------------------------------------------------------------------
224
+
225
+
226
+ async def run_strategy_conformance(
227
+ strategy: ReasoningStrategy,
228
+ *,
229
+ state_factory: Callable[[], AgentState],
230
+ pre_run: Callable[[AgentState], None | Awaitable[None]] | None = None,
231
+ ) -> None:
232
+ """Run the shared `ReasoningStrategy` conformance suite.
233
+
234
+ Args:
235
+ strategy: A constructed strategy instance.
236
+ state_factory: Builds a fresh `AgentState` for each scenario
237
+ (with `RuntimeContext` bound on `state.metadata` if the
238
+ strategy needs one — the framework runtime does this; tests
239
+ must do it explicitly).
240
+ pre_run: Optional async-or-sync callable invoked on the freshly
241
+ built `AgentState` before `strategy.run()` (e.g. to seed
242
+ findings or steps). May be omitted.
243
+
244
+ Verifies the locked invariants of `ReasoningStrategy.run`:
245
+
246
+ 1. Returns the same `AgentState` instance it was given.
247
+ 2. Populates `state.steps` with at least one step.
248
+ 3. Every emitted step's `kind` is a valid `StepKind` value.
249
+ 4. `step.iteration` is monotonically non-decreasing across the run.
250
+ 5. Every emitted step has non-negative `tokens_in`, `tokens_out`,
251
+ `cost_usd`, `duration_ms` (Pydantic enforces; the assertion
252
+ here is defence-in-depth).
253
+
254
+ Raises:
255
+ AssertionError: a contract was violated.
256
+ """
257
+ state = state_factory()
258
+ if pre_run is not None:
259
+ outcome = pre_run(state)
260
+ if outcome is not None and hasattr(outcome, "__await__"):
261
+ await outcome
262
+
263
+ result = await strategy.run(state)
264
+
265
+ # 1. Returns the same instance
266
+ assert result is state, (
267
+ "ReasoningStrategy.run must return the same AgentState instance "
268
+ "it received (state mutation, not replacement)."
269
+ )
270
+
271
+ # 2. Populates state.steps
272
+ assert len(state.steps) >= 1, (
273
+ "ReasoningStrategy.run must append at least one Step to state.steps before returning."
274
+ )
275
+
276
+ # 3. Every step.kind is valid
277
+ for step in state.steps:
278
+ assert step.kind in _VALID_STEP_KINDS, (
279
+ f"step.kind={step.kind!r} is not a valid StepKind. "
280
+ f"Valid kinds: {sorted(_VALID_STEP_KINDS)}"
281
+ )
282
+
283
+ # 4. step.iteration monotonic non-decreasing
284
+ last_iter = -1
285
+ for step in state.steps:
286
+ assert step.iteration >= last_iter, (
287
+ f"step.iteration must be monotonically non-decreasing; "
288
+ f"saw {step.iteration} after {last_iter}."
289
+ )
290
+ last_iter = step.iteration
291
+
292
+ # 5. Non-negative cost / token / duration fields (Pydantic
293
+ # already enforces ge=0; this is defence-in-depth)
294
+ for step in state.steps:
295
+ assert step.tokens_in >= 0, "step.tokens_in must be non-negative"
296
+ assert step.tokens_out >= 0, "step.tokens_out must be non-negative"
297
+ assert step.cost_usd >= 0.0, "step.cost_usd must be non-negative"
298
+ assert step.duration_ms >= 0, "step.duration_ms must be non-negative"
299
+
300
+
301
+ # ----------------------------------------------------------------------
302
+ # Embedding conformance — feat-003.
303
+ # ----------------------------------------------------------------------
304
+
305
+
306
+ async def run_embedding_conformance(client: EmbeddingClient) -> None:
307
+ """Run the shared `EmbeddingClient` conformance suite.
308
+
309
+ Verifies the locked invariants of `EmbeddingClient.embed`:
310
+
311
+ 1. `dimensions()` returns a positive integer without a network
312
+ round-trip (callers rely on this for storage sizing).
313
+ 2. `embed(texts)` raises `ValueError` on an empty input list
314
+ (no provider supports zero-length batches).
315
+ 3. The returned `EmbeddingResponse` has one vector per input
316
+ text in input order.
317
+ 4. Every vector has length `dimensions()`.
318
+ 5. `usage.input_tokens >= 0` and `usage.output_tokens == 0`
319
+ (embeddings have no output tokens).
320
+ 6. `cost_usd >= 0`.
321
+ 7. `model` and `provider` are non-empty strings.
322
+ 8. `supports("not-a-real-capability")` returns False (the
323
+ capability check is honest about unknown names).
324
+
325
+ Drivers may need to issue a real (or mocked) network call inside
326
+ this test, so it is async. Tests are responsible for arranging the
327
+ necessary fixtures (e.g. injecting a fake AWS session) before
328
+ calling this helper.
329
+
330
+ Args:
331
+ client: A constructed `EmbeddingClient` instance, ready to use.
332
+
333
+ Raises:
334
+ AssertionError: a contract was violated.
335
+ """
336
+ # 1. dimensions() is sync, positive, no network round-trip
337
+ dim = client.dimensions()
338
+ assert isinstance(dim, int), "dimensions() must return an int"
339
+ assert dim >= 1, f"dimensions() must be >= 1, got {dim}"
340
+
341
+ # 2. empty batch raises ValueError
342
+ raised_value_error = False
343
+ try:
344
+ await client.embed([])
345
+ except ValueError:
346
+ raised_value_error = True
347
+ assert raised_value_error, "embed([]) must raise ValueError on empty input"
348
+
349
+ # 3-7. embed roundtrip
350
+ texts = ["hello", "world", "agentforge"]
351
+ response = await client.embed(texts)
352
+ assert len(response.vectors) == len(texts), (
353
+ f"embed() must return one vector per input text; "
354
+ f"got {len(response.vectors)} vectors for {len(texts)} texts."
355
+ )
356
+ for i, vec in enumerate(response.vectors):
357
+ assert len(vec) == dim, f"vector {i} has length {len(vec)} but dimensions() declared {dim}"
358
+ assert response.dimensions == dim, (
359
+ f"response.dimensions ({response.dimensions}) must match client.dimensions() ({dim})"
360
+ )
361
+ assert response.usage.input_tokens >= 0
362
+ assert response.usage.output_tokens == 0, (
363
+ f"embedding responses must report output_tokens=0; got {response.usage.output_tokens}."
364
+ )
365
+ assert response.cost_usd >= 0.0
366
+ assert response.model, "EmbeddingResponse.model must be non-empty"
367
+ assert response.provider, "EmbeddingResponse.provider must be non-empty"
368
+
369
+ # 8. supports() is honest about unknown capabilities
370
+ assert client.supports("definitely-not-a-capability-2026") is False
371
+
372
+
373
+ # ----------------------------------------------------------------------
374
+ # Vector store conformance — feat-007.
375
+ # ----------------------------------------------------------------------
376
+
377
+
378
+ async def run_vector_conformance(store: VectorStore) -> None:
379
+ """Run the shared `VectorStore` conformance suite.
380
+
381
+ The store must be empty when this is called and is left empty when
382
+ the function returns (every item upserted is also deleted).
383
+
384
+ Verifies the locked invariants of `VectorStore`:
385
+
386
+ 1. `dimensions()` returns a positive int with no network call.
387
+ 2. `upsert` accepts items whose vectors match `dimensions()`;
388
+ dimension mismatch raises `ValueError`.
389
+ 3. `search` returns at most `limit` matches sorted by score
390
+ descending, with scores in `[0, 1]`.
391
+ 4. `search`'s top hit on a query identical to an upserted
392
+ vector returns that item with score ≈ 1.0.
393
+ 5. `upsert` is write-through: re-upserting an existing id
394
+ replaces the prior record (no duplicate ids in results).
395
+ 6. `delete` returns the count of items actually removed; unknown
396
+ ids are silently dropped (no exception).
397
+ 7. `filter_metadata` AND-matches every key/value in the dict.
398
+ 8. `search(limit=0)` raises `ValueError`.
399
+ 9. `supports("not-a-real-capability")` returns False.
400
+
401
+ Drivers may issue real network calls; the suite is async. Tests are
402
+ responsible for arranging fixtures (e.g. running Postgres) before
403
+ calling this helper.
404
+
405
+ Raises:
406
+ AssertionError: a contract was violated.
407
+ """
408
+ _ITEM_COUNT = 3 # noqa: N806 — local constant in this function only
409
+
410
+ dim = store.dimensions()
411
+ assert isinstance(dim, int), "dimensions() must return an int"
412
+ assert dim >= 1, f"dimensions() must be >= 1, got {dim}"
413
+
414
+ # 2. dimension-mismatch on upsert
415
+ bad = VectorItem(id="bad", vector=tuple([0.1] * (dim + 1)), text="bad", metadata={})
416
+ raised_dim_error = False
417
+ try:
418
+ await store.upsert([bad])
419
+ except ValueError:
420
+ raised_dim_error = True
421
+ assert raised_dim_error, "upsert with mismatched vector length must raise ValueError"
422
+
423
+ # 3-5. happy-path upsert + search
424
+ items = [
425
+ VectorItem(
426
+ id=f"id-{i}",
427
+ vector=tuple(_unit_vector(dim, seed=i)),
428
+ text=f"text {i}",
429
+ metadata={"category": "doc" if i < 2 else "note", "n": i}, # noqa: PLR2004
430
+ )
431
+ for i in range(_ITEM_COUNT)
432
+ ]
433
+ await store.upsert(items)
434
+
435
+ # Searching with the same vector as item-0 should put item-0 first.
436
+ results = await store.search(items[0].vector, limit=_ITEM_COUNT)
437
+ assert len(results) == _ITEM_COUNT, f"expected {_ITEM_COUNT} results, got {len(results)}"
438
+ # Sorted by score descending, all in [0, 1]
439
+ for prev, nxt in itertools.pairwise(results):
440
+ assert prev.score >= nxt.score, f"results not sorted desc: {prev.score} before {nxt.score}"
441
+ for r in results:
442
+ assert 0.0 <= r.score <= 1.0, f"score out of range: {r.score}"
443
+ assert results[0].id == "id-0", (
444
+ f"top result must be the exact-match upsert, got {results[0].id!r}"
445
+ )
446
+ score_tolerance = 1e-3
447
+ assert abs(results[0].score - 1.0) < score_tolerance, (
448
+ f"exact-match score must be ~1.0, got {results[0].score}"
449
+ )
450
+
451
+ # 5. write-through: replace id-0 and search again
452
+ replacement = VectorItem(
453
+ id="id-0",
454
+ vector=tuple(_unit_vector(dim, seed=99)),
455
+ text="replaced",
456
+ metadata={"category": "doc", "n": 0},
457
+ )
458
+ await store.upsert([replacement])
459
+ after = await store.search(items[0].vector, limit=10)
460
+ # No two results may share an id.
461
+ seen_ids = [r.id for r in after]
462
+ assert len(seen_ids) == len(set(seen_ids)), (
463
+ f"upsert must replace prior records, but got duplicate ids: {seen_ids}"
464
+ )
465
+
466
+ # 7. metadata filtering
467
+ filtered = await store.search(items[0].vector, limit=10, filter_metadata={"category": "doc"})
468
+ for r in filtered:
469
+ assert r.metadata.get("category") == "doc", (
470
+ f"filter_metadata broken: returned {r.metadata!r}"
471
+ )
472
+
473
+ # 8. limit < 1 raises
474
+ raised_limit_error = False
475
+ try:
476
+ await store.search(items[0].vector, limit=0)
477
+ except ValueError:
478
+ raised_limit_error = True
479
+ assert raised_limit_error, "search(limit=0) must raise ValueError"
480
+
481
+ # 6. delete: known + unknown ids
482
+ deleted = await store.delete([item.id for item in items] + ["never-existed"])
483
+ assert deleted == _ITEM_COUNT, (
484
+ f"delete should report {_ITEM_COUNT} actual removals "
485
+ f"(the {_ITEM_COUNT} we upserted), got {deleted}"
486
+ )
487
+ # Empty list returns 0
488
+ assert await store.delete([]) == 0
489
+
490
+ # 9. supports honesty
491
+ assert store.supports("definitely-not-a-capability-2026") is False
492
+
493
+
494
+ def _unit_vector(dim: int, *, seed: int) -> list[float]:
495
+ """Build a deterministic unit vector for conformance tests.
496
+
497
+ Returns a one-hot-like vector with the seed-th component set high
498
+ and a small uniform background, then L2-normalised so cosine
499
+ similarity computations are stable across drivers.
500
+ """
501
+ raw = [0.01] * dim
502
+ raw[seed % dim] = 1.0
503
+ norm = math.sqrt(sum(x * x for x in raw))
504
+ return [x / norm for x in raw]
505
+
506
+
507
+ # ----------------------------------------------------------------------
508
+ # Graph store conformance — feat-009.
509
+ # ----------------------------------------------------------------------
510
+
511
+ # Named constants used by the graph conformance suite. Kept module-
512
+ # private; they're only meaningful inside the assertions below.
513
+ _GRAPH_PATH_LEN_TWO = 2 # (n0)-[e]->(n1) — one segment = two nodes
514
+ _GRAPH_DEPTH_TWO = 2 # paper:3 -> paper:2 -> paper:1
515
+ _EXPECTED_YEAR = 2017
516
+
517
+
518
+ async def run_graph_conformance(store: GraphStore) -> None:
519
+ """Run the shared `GraphStore` conformance suite.
520
+
521
+ The store must be empty when this is called and is left empty when
522
+ the function returns (every node and edge created here is also
523
+ deleted).
524
+
525
+ Verifies the locked invariants of `GraphStore`:
526
+
527
+ 1. `add_node` is idempotent (re-adding the same id replaces the
528
+ prior `properties` rather than appending or erroring).
529
+ 2. `get_node(id)` returns the most-recent node, or `None` if
530
+ absent.
531
+ 3. `add_edge` rejects edges referencing unknown nodes
532
+ (`ValueError`).
533
+ 4. `add_edge` is idempotent on `(src, dst, edge_type)`.
534
+ 5. `get_edges(id, direction=...)` honours the direction filter
535
+ and the optional `edge_type` filter.
536
+ 6. `match()` finds a single-segment pattern and returns paths of
537
+ length 2 (one edge, two nodes).
538
+ 7. `match(limit=...)` caps results.
539
+ 8. `traverse()` respects `max_depth` and never returns paths
540
+ longer than `max_depth` edges.
541
+ 9. `delete_node(cascade=False)` raises if the node has incident
542
+ edges; `cascade=True` removes them.
543
+ 10. `delete_edge` returns False on unknown triples and True on
544
+ known ones.
545
+ 11. `supports()` is honest about unknown capabilities.
546
+
547
+ Raises:
548
+ AssertionError: a contract was violated.
549
+ """
550
+ await _graph_round_trip_invariants(store)
551
+ await _graph_seed_citation_chain(store)
552
+ await _graph_query_invariants(store)
553
+ await _graph_delete_invariants(store)
554
+ _graph_capability_invariants(store)
555
+
556
+
557
+ async def _graph_round_trip_invariants(store: GraphStore) -> None:
558
+ """Round-trip and idempotency invariants on a single node."""
559
+ n1 = GraphNode(id="paper:1", labels=("Doc",), properties={"topic": "ml"})
560
+ await store.add_node(n1)
561
+
562
+ fetched = await store.get_node("paper:1")
563
+ assert fetched is not None, "get_node must return the persisted node"
564
+ assert fetched.id == "paper:1"
565
+ assert fetched.properties.get("topic") == "ml"
566
+
567
+ # Unknown id returns None, not raise.
568
+ missing = await store.get_node("paper:never")
569
+ assert missing is None, "get_node of an unknown id must return None"
570
+
571
+ # Idempotent upsert: re-add with extra properties replaces.
572
+ n1_v2 = GraphNode(
573
+ id="paper:1", labels=("Doc",), properties={"topic": "ml", "year": _EXPECTED_YEAR}
574
+ )
575
+ await store.add_node(n1_v2)
576
+ refetched = await store.get_node("paper:1")
577
+ assert refetched is not None
578
+ assert refetched.properties.get("year") == _EXPECTED_YEAR, (
579
+ "add_node must replace properties on idempotent upsert"
580
+ )
581
+
582
+ # add_edge rejects unknown endpoints.
583
+ raised_unknown = False
584
+ try:
585
+ await store.add_edge(GraphEdge(src="ghost", dst="paper:1", edge_type="CITES"))
586
+ except ValueError:
587
+ raised_unknown = True
588
+ assert raised_unknown, "add_edge must raise ValueError on unknown endpoint"
589
+
590
+
591
+ async def _graph_seed_citation_chain(store: GraphStore) -> None:
592
+ """Seed a tiny three-paper citation chain. Assumes paper:1 exists."""
593
+ await store.add_node(GraphNode(id="paper:2", labels=("Doc",), properties={"topic": "ml"}))
594
+ await store.add_node(GraphNode(id="paper:3", labels=("Doc",), properties={"topic": "bio"}))
595
+ await store.add_edge(GraphEdge(src="paper:2", dst="paper:1", edge_type="CITES"))
596
+ await store.add_edge(GraphEdge(src="paper:3", dst="paper:2", edge_type="CITES"))
597
+
598
+ # Idempotent edge upsert.
599
+ await store.add_edge(
600
+ GraphEdge(src="paper:2", dst="paper:1", edge_type="CITES", properties={"weight": 0.9})
601
+ )
602
+ out_edges = await store.get_edges("paper:2", direction="out")
603
+ assert len([e for e in out_edges if e.dst == "paper:1"]) == 1, (
604
+ "add_edge must be idempotent on (src, dst, edge_type)"
605
+ )
606
+
607
+
608
+ async def _graph_query_invariants(store: GraphStore) -> None:
609
+ """get_edges, match, and traverse invariants over the seeded chain."""
610
+ out2 = await store.get_edges("paper:2", direction="out")
611
+ assert all(e.src == "paper:2" for e in out2), "direction=out filter broken"
612
+
613
+ in1 = await store.get_edges("paper:1", direction="in")
614
+ assert all(e.dst == "paper:1" for e in in1), "direction=in filter broken"
615
+ assert any(e.src == "paper:2" for e in in1)
616
+
617
+ cites_only = await store.get_edges("paper:2", edge_type="CITES", direction="out")
618
+ assert all(e.edge_type == "CITES" for e in cites_only)
619
+
620
+ pattern = GraphPattern(
621
+ segments=(GraphSegment(src_label="Doc", edge_type="CITES", dst_label="Doc"),),
622
+ )
623
+ matches = await store.match(pattern, limit=10)
624
+ assert len(matches) >= 1, "match should find at least one CITES edge"
625
+ for path in matches:
626
+ assert len(path.nodes) == _GRAPH_PATH_LEN_TWO, (
627
+ "single-segment match must return length-2 paths"
628
+ )
629
+ assert len(path.edges) == 1
630
+ assert path.edges[0].edge_type == "CITES"
631
+
632
+ capped = await store.match(pattern, limit=1)
633
+ assert len(capped) <= 1
634
+
635
+ paths_d1 = await store.traverse("paper:3", max_depth=1)
636
+ for p in paths_d1:
637
+ assert len(p.edges) <= 1, f"max_depth=1 must not return path with {len(p.edges)} edges"
638
+
639
+ paths_d2 = await store.traverse("paper:3", max_depth=_GRAPH_DEPTH_TWO)
640
+ reaches_paper1 = any(p.nodes[-1].id == "paper:1" for p in paths_d2)
641
+ assert reaches_paper1, "traverse(max_depth=2) from paper:3 must reach paper:1"
642
+ for p in paths_d2:
643
+ assert len(p.edges) <= _GRAPH_DEPTH_TWO
644
+
645
+ empty_traverse = await store.traverse("ghost-node", max_depth=_GRAPH_DEPTH_TWO)
646
+ assert empty_traverse == [], "traverse from unknown node must return empty list"
647
+
648
+
649
+ async def _graph_delete_invariants(store: GraphStore) -> None:
650
+ """Cascade and unknown-triple delete invariants. Empties the store."""
651
+ raised_cascade = False
652
+ try:
653
+ await store.delete_node("paper:2", cascade=False)
654
+ except ValueError:
655
+ raised_cascade = True
656
+ assert raised_cascade, "delete_node with cascade=False must raise on connected node"
657
+
658
+ deleted = await store.delete_node("paper:2", cascade=True)
659
+ assert deleted is True
660
+ assert await store.get_node("paper:2") is None
661
+ assert (await store.get_edges("paper:1", direction="in")) == []
662
+
663
+ # Unknown triple returns False, not raise.
664
+ assert await store.delete_edge("paper:3", "paper:never", edge_type="CITES") is False
665
+
666
+ # Empty the store fully.
667
+ await store.delete_node("paper:1", cascade=True)
668
+ await store.delete_node("paper:3", cascade=True)
669
+
670
+
671
+ def _graph_capability_invariants(store: GraphStore) -> None:
672
+ """capabilities() / supports() honesty."""
673
+ caps = store.capabilities()
674
+ assert isinstance(caps, set)
675
+ assert store.supports("definitely-not-a-capability-2026") is False
676
+ if caps:
677
+ sample = next(iter(caps))
678
+ assert store.supports(sample) is True
679
+
680
+
681
+ # ======================================================================
682
+ # Guardrail conformance — feat-018.
683
+ # ======================================================================
684
+
685
+
686
+ async def run_input_validator_conformance(
687
+ validator: InputValidator,
688
+ *,
689
+ benign: str = "What is the weather today?",
690
+ obvious_violation: str | None = None,
691
+ ) -> None:
692
+ """Validate that an InputValidator honours the locked contract.
693
+
694
+ - Concrete subclass declares `name` and `description`.
695
+ - `.validate(...)` returns a `ValidationResult`.
696
+ - Benign input produces `passed=True` and an empty `violations`
697
+ tuple.
698
+ - When `obvious_violation` is supplied, the validator flags it
699
+ (`passed=False`).
700
+ """
701
+ assert isinstance(getattr(validator, "name", None), str), "name must be a str ClassVar"
702
+ assert isinstance(
703
+ getattr(validator, "description", None),
704
+ str,
705
+ ), "description must be a str ClassVar"
706
+
707
+ result = await validator.validate(benign, {"run_id": "conformance"})
708
+ assert isinstance(result, ValidationResult), "validate() must return ValidationResult"
709
+ assert result.passed, f"benign input must pass; got violations {list(result.violations)!r}"
710
+
711
+ if obvious_violation is not None:
712
+ bad = await validator.validate(obvious_violation, {"run_id": "conformance"})
713
+ assert isinstance(bad, ValidationResult)
714
+ assert not bad.passed, (
715
+ f"obvious-violation input must fail; validator {validator.name!r} returned passed=True"
716
+ )
717
+
718
+
719
+ async def run_output_validator_conformance(
720
+ validator: OutputValidator,
721
+ *,
722
+ benign: str = "The weather is nice today.",
723
+ obvious_violation: str | None = None,
724
+ ) -> None:
725
+ """Same contract as `run_input_validator_conformance` but for
726
+ output validators. If `obvious_violation` is supplied and the
727
+ validator can redact, asserts that `redacted_content` is set."""
728
+ assert isinstance(getattr(validator, "name", None), str)
729
+ assert isinstance(getattr(validator, "description", None), str)
730
+
731
+ result = await validator.validate(benign, {"run_id": "conformance"})
732
+ assert isinstance(result, ValidationResult)
733
+ assert result.passed, f"benign output must pass; got {list(result.violations)!r}"
734
+
735
+ if obvious_violation is not None:
736
+ bad = await validator.validate(obvious_violation, {"run_id": "conformance"})
737
+ assert isinstance(bad, ValidationResult)
738
+ assert not bad.passed, "obvious-violation output must fail"
739
+
740
+
741
+ async def run_tool_gate_conformance(
742
+ gate: ToolCallGate,
743
+ *,
744
+ benign_tool: Tool,
745
+ benign_tool_name: str,
746
+ forbidden_tool: Tool | None = None,
747
+ forbidden_tool_name: str | None = None,
748
+ ) -> None:
749
+ """Validate that a ToolCallGate honours the locked contract."""
750
+ assert isinstance(getattr(gate, "name", None), str)
751
+ assert isinstance(getattr(gate, "description", None), str)
752
+
753
+ benign_result = await gate.authorize(benign_tool_name, benign_tool, {}, {})
754
+ assert isinstance(benign_result, ValidationResult)
755
+
756
+ if forbidden_tool is not None and forbidden_tool_name is not None:
757
+ denied = await gate.authorize(forbidden_tool_name, forbidden_tool, {}, {})
758
+ assert isinstance(denied, ValidationResult)
759
+ assert not denied.passed, (
760
+ f"gate {gate.name!r} must deny {forbidden_tool_name!r}; got passed=True"
761
+ )
762
+
763
+
764
+ # ----------------------------------------------------------------------
765
+ # Task conformance — feat-015.
766
+ # ----------------------------------------------------------------------
767
+
768
+
769
+ async def run_task_conformance(
770
+ task: Task,
771
+ *,
772
+ context: dict[str, object] | None = None,
773
+ ) -> None:
774
+ """Validate that a Task honours the locked contract.
775
+
776
+ Asserts:
777
+ 1. ``name`` is a non-empty string.
778
+ 2. ``cost_estimate_usd`` is a non-negative float.
779
+ 3. ``timeout_s`` is a positive float.
780
+ 4. ``depends_on`` is a tuple of strings (possibly empty).
781
+ 5. ``run(context)`` returns a list (the engine treats an empty
782
+ list as a valid no-finding result).
783
+ 6. Every emitted finding has the three required ``Finding``
784
+ attributes (``severity``, ``category``, ``message``).
785
+ """
786
+ name = type(task).name
787
+ assert isinstance(name, str), "Task.name must be a string"
788
+ assert name, "Task.name must be non-empty"
789
+ assert isinstance(type(task).cost_estimate_usd, (int, float)), (
790
+ "Task.cost_estimate_usd must be numeric"
791
+ )
792
+ assert float(type(task).cost_estimate_usd) >= 0.0, "Task.cost_estimate_usd must be non-negative"
793
+ assert isinstance(type(task).timeout_s, (int, float)), "Task.timeout_s must be numeric"
794
+ assert float(type(task).timeout_s) > 0.0, "Task.timeout_s must be positive"
795
+ deps = type(task).depends_on
796
+ assert isinstance(deps, tuple), f"Task.depends_on must be a tuple, got {type(deps).__name__}"
797
+ for dep in deps:
798
+ assert isinstance(dep, str), "Task.depends_on entries must be strings"
799
+
800
+ findings = await task.run(context if context is not None else {})
801
+ assert isinstance(findings, list), (
802
+ f"Task.run must return a list of findings, got {type(findings).__name__}"
803
+ )
804
+ for f in findings:
805
+ assert hasattr(f, "severity"), "finding must have a 'severity' attribute"
806
+ assert isinstance(f.severity, str), "finding.severity must be a string"
807
+ assert hasattr(f, "category"), "finding must have a 'category' attribute"
808
+ assert isinstance(f.category, str), "finding.category must be a string"
809
+ assert hasattr(f, "message"), "finding must have a 'message' attribute"
810
+ assert isinstance(f.message, str), "finding.message must be a string"
811
+
812
+
813
+ # ----------------------------------------------------------------------
814
+ # Chat conformance — feat-020.
815
+ # ----------------------------------------------------------------------
816
+
817
+
818
+ async def run_chat_history_conformance(store: ChatHistoryStore) -> None:
819
+ """Validate that a `ChatHistoryStore` honours the locked contract.
820
+
821
+ The store must be empty when this is called and is left empty
822
+ when the function returns.
823
+ """
824
+ from datetime import UTC, datetime # noqa: PLC0415
825
+ from uuid import uuid4 # noqa: PLC0415
826
+
827
+ from agentforge_core.values.chat import ChatTurn # noqa: PLC0415
828
+
829
+ sid = f"conf-{uuid4().hex[:8]}"
830
+ sid_b = f"conf-{uuid4().hex[:8]}"
831
+
832
+ def _turn(session: str, role: str, content: str, **kw: Any) -> ChatTurn:
833
+ return ChatTurn(
834
+ id=uuid4().hex,
835
+ session_id=session,
836
+ role=role, # type: ignore[arg-type]
837
+ content=content,
838
+ timestamp=datetime.now(UTC),
839
+ **kw,
840
+ )
841
+
842
+ # 1. append + load round-trip.
843
+ t1 = _turn(sid, "user", "hello")
844
+ t2 = _turn(sid, "assistant", "hi there", run_id="run-1")
845
+ await store.append(t1)
846
+ await store.append(t2)
847
+ loaded = await store.load(sid)
848
+ assert len(loaded) == _EXPECTED_CHAT_TURNS_SID, (
849
+ f"load() must return both turns; got {len(loaded)}"
850
+ )
851
+ assert loaded[0].id == t1.id, "load() must return turns in chronological order"
852
+ assert loaded[1].id == t2.id
853
+
854
+ # 2. count.
855
+ n = await store.count(sid)
856
+ assert n == _EXPECTED_CHAT_TURNS_SID, f"count() must reflect appended turns; got {n}"
857
+
858
+ # 3. session isolation — a different session_id sees nothing.
859
+ other_turn = _turn(sid_b, "user", "different session")
860
+ await store.append(other_turn)
861
+ only_a = await store.load(sid)
862
+ assert all(t.session_id == sid for t in only_a), (
863
+ "load(session_id) must not bleed across sessions"
864
+ )
865
+ assert await store.count(sid_b) == _EXPECTED_CHAT_TURNS_SID_B
866
+
867
+ # 4. role filter.
868
+ only_assistant = await store.load(sid, roles=["assistant"])
869
+ assert all(t.role == "assistant" for t in only_assistant), (
870
+ "load(roles=...) must filter to those roles"
871
+ )
872
+
873
+ # 5. limit.
874
+ limited = await store.load(sid, limit=1)
875
+ assert len(limited) == 1, "load(limit=N) must return at most N turns"
876
+
877
+ # 6. list_sessions returns info for every active session.
878
+ sessions = await store.list_sessions()
879
+ ids = {s.id for s in sessions}
880
+ assert sid in ids, "list_sessions() must include the first session"
881
+ assert sid_b in ids, "list_sessions() must include the second session"
882
+
883
+ # 7. update_session_metadata merges keys.
884
+ await store.update_session_metadata(sid, {"owner": "alice", "tag": "x"})
885
+ after = await store.list_sessions()
886
+ matched = [s for s in after if s.id == sid]
887
+ assert matched, "session must still appear after update_session_metadata()"
888
+
889
+ # 8. owner filter on list_sessions.
890
+ owned = await store.list_sessions(owner="alice")
891
+ assert all(s.owner == "alice" for s in owned), (
892
+ "list_sessions(owner=X) must filter to that owner"
893
+ )
894
+
895
+ # 9. delete_session removes turns + returns count.
896
+ removed = await store.delete_session(sid)
897
+ assert removed == _EXPECTED_CHAT_TURNS_SID, (
898
+ f"delete_session must return turns removed; got {removed}"
899
+ )
900
+ assert await store.count(sid) == 0
901
+ after_other = await store.count(sid_b)
902
+ assert after_other == _EXPECTED_CHAT_TURNS_SID_B, "delete_session must not touch other sessions"
903
+
904
+ # 10. expire_before — drivers without TTL may return 0.
905
+ far_future = datetime(2099, 1, 1, tzinfo=UTC)
906
+ removed_b = await store.expire_before(far_future)
907
+ assert isinstance(removed_b, int), "expire_before must return an int"
908
+
909
+ # 11. capabilities is a set.
910
+ caps = store.capabilities()
911
+ assert isinstance(caps, set)
912
+ assert store.supports("definitely-not-a-real-capability") is False
913
+
914
+ # Clean up remaining session.
915
+ await store.delete_session(sid_b)
916
+ await store.expire_before(far_future)
917
+
918
+
919
+ async def run_truncation_conformance(strategy: HistoryTruncationStrategy) -> None:
920
+ """Validate that a `HistoryTruncationStrategy` honours the
921
+ locked invariants.
922
+
923
+ Asserts:
924
+ 1. Output is a subsequence of input (order preserved, no
925
+ injection).
926
+ 2. Empty input → empty output.
927
+ """
928
+ from datetime import UTC, datetime # noqa: PLC0415
929
+ from uuid import uuid4 # noqa: PLC0415
930
+
931
+ from agentforge_core.values.chat import ChatTurn # noqa: PLC0415
932
+
933
+ def _turn(role: str, content: str) -> ChatTurn:
934
+ return ChatTurn(
935
+ id=uuid4().hex,
936
+ session_id="conf",
937
+ role=role, # type: ignore[arg-type]
938
+ content=content,
939
+ timestamp=datetime.now(UTC),
940
+ )
941
+
942
+ empty: list[ChatTurn] = []
943
+ out = await strategy.select(empty, "msg", {})
944
+ assert out == [], "empty input must yield empty output"
945
+
946
+ seq = [
947
+ _turn("user", "a"),
948
+ _turn("assistant", "b"),
949
+ _turn("user", "c"),
950
+ _turn("assistant", "d"),
951
+ ]
952
+ picked = await strategy.select(seq, "next msg", {})
953
+ ids = [t.id for t in seq]
954
+ # Output must preserve order: original turns appear in the same
955
+ # relative order as in `seq`. Synthesised summary turns marked
956
+ # `metadata["agentforge_chat.summary"] == True` are allowed
957
+ # (`SummariseOldest`) and skipped from the subsequence check.
958
+ iter_ids = iter(ids)
959
+ for t in picked:
960
+ if t.metadata.get("agentforge_chat.summary") is True:
961
+ continue
962
+ assert t.id in iter_ids, (
963
+ "truncation output must preserve input order "
964
+ "(no reordered or inserted non-summary turns)"
965
+ )
966
+
967
+
968
+ async def run_reranker_conformance(reranker: Reranker) -> None:
969
+ """Run the shared `Reranker` conformance suite (feat-021).
970
+
971
+ The reranker must be ready to call when this is passed in and is
972
+ not touched after the function returns (callers manage the
973
+ reranker's lifecycle).
974
+
975
+ Verifies the locked invariants of `Reranker`:
976
+
977
+ 1. Empty candidate list returns an empty list, no calls to any
978
+ backing model.
979
+ 2. ``top_k < 1`` (when not None) raises `ValueError`.
980
+ 3. ``rerank(query, candidates, top_k=None)`` returns a list of
981
+ the same length as `candidates`.
982
+ 4. ``rerank(query, candidates, top_k=K)`` returns at most
983
+ `K` items.
984
+ 5. Returned scores are in `[0, 1]`.
985
+ 6. Results are sorted descending by score.
986
+ 7. Returned `VectorMatch` objects carry the input's `id` /
987
+ `text` / `metadata` values unchanged (only `score` may
988
+ change).
989
+ 8. Input list is not mutated.
990
+ 9. ``supports("not-a-real-capability")`` returns False.
991
+
992
+ Raises:
993
+ AssertionError: a contract was violated.
994
+ """
995
+
996
+ # 1. empty input → empty output, no work
997
+ empty = await reranker.rerank("any query", [])
998
+ assert empty == [], f"rerank([]) must return [], got {empty!r}"
999
+
1000
+ # 2. top_k < 1 must raise
1001
+ candidates = [
1002
+ VectorMatch(id="a", text="alpha", score=0.9, metadata={"k": 1}),
1003
+ VectorMatch(id="b", text="beta", score=0.5, metadata={"k": 2}),
1004
+ VectorMatch(id="c", text="gamma", score=0.3, metadata={"k": 3}),
1005
+ ]
1006
+ raised_topk = False
1007
+ try:
1008
+ await reranker.rerank("q", candidates, top_k=0)
1009
+ except ValueError:
1010
+ raised_topk = True
1011
+ assert raised_topk, "rerank(top_k=0) must raise ValueError"
1012
+
1013
+ # 3-7. happy path
1014
+ original = list(candidates)
1015
+ full = await reranker.rerank("q", candidates)
1016
+ assert len(full) == len(candidates), (
1017
+ f"rerank(top_k=None) must return all candidates, got {len(full)} vs {len(candidates)}"
1018
+ )
1019
+ for r in full:
1020
+ assert 0.0 <= r.score <= 1.0, f"score out of range: {r.score}"
1021
+ for prev, nxt in itertools.pairwise(full):
1022
+ assert prev.score >= nxt.score, f"results not sorted desc: {prev.score} before {nxt.score}"
1023
+ by_id = {r.id: r for r in full}
1024
+ for orig in original:
1025
+ out = by_id[orig.id]
1026
+ assert out.text == orig.text, (
1027
+ f"text field mutated for id={orig.id}: {orig.text!r} → {out.text!r}"
1028
+ )
1029
+ assert out.metadata == orig.metadata, (
1030
+ f"metadata mutated for id={orig.id}: {orig.metadata!r} → {out.metadata!r}"
1031
+ )
1032
+
1033
+ # 4. top_k truncates
1034
+ truncated = await reranker.rerank("q", candidates, top_k=2)
1035
+ assert len(truncated) == 2, f"top_k=2 must return 2 items, got {len(truncated)}" # noqa: PLR2004
1036
+ for prev, nxt in itertools.pairwise(truncated):
1037
+ assert prev.score >= nxt.score
1038
+
1039
+ # 8. input not mutated
1040
+ assert candidates == original, "rerank must not mutate its input list"
1041
+
1042
+ # 9. unknown capability check
1043
+ assert reranker.supports("not-a-real-capability") is False, (
1044
+ "supports() must return False for unknown capability"
1045
+ )
1046
+
1047
+
1048
+ # ----------------------------------------------------------------------
1049
+ # Hybrid-search conformance — feat-022 (opt-in).
1050
+ # ----------------------------------------------------------------------
1051
+
1052
+
1053
+ async def run_hybrid_search_conformance(store: VectorStore) -> None:
1054
+ """Verify the `lexical_search` contract for hybrid-capable drivers.
1055
+
1056
+ Only call this on stores that declare the ``"hybrid_search"``
1057
+ capability — the function asserts the precondition. Empty store
1058
+ in, empty store out (every item upserted is deleted).
1059
+
1060
+ Verifies:
1061
+
1062
+ 1. ``store.supports("hybrid_search")`` is True.
1063
+ 2. Empty corpus returns ``[]``.
1064
+ 3. ``lexical_search`` returns at most ``limit`` matches sorted
1065
+ by score desc, with scores in ``[0, 1]`` (max-normalised).
1066
+ 4. The top hit on an exact-token query is the document that
1067
+ contains the rarest matching token.
1068
+ 5. ``filter_metadata`` AND-matches on the lexical path.
1069
+ 6. ``limit=0`` raises ``ValueError``.
1070
+ 7. Re-upserting an existing id with new text invalidates the
1071
+ prior lexical hit (the new text wins).
1072
+
1073
+ Raises:
1074
+ AssertionError: a contract was violated.
1075
+ """
1076
+ assert store.supports("hybrid_search"), (
1077
+ "run_hybrid_search_conformance called on a store that does not declare 'hybrid_search'"
1078
+ )
1079
+
1080
+ # 2. empty corpus
1081
+ empty = await store.lexical_search("anything", limit=5)
1082
+ assert empty == [], f"empty store must return [], got {empty!r}"
1083
+
1084
+ dim = store.dimensions()
1085
+ base_vec = tuple([0.1] * dim)
1086
+ items = [
1087
+ VectorItem(
1088
+ id="a", vector=base_vec, text="Paris is the capital of France", metadata={"k": 1}
1089
+ ),
1090
+ VectorItem(
1091
+ id="b", vector=base_vec, text="Berlin is the capital of Germany", metadata={"k": 2}
1092
+ ),
1093
+ VectorItem(id="c", vector=base_vec, text="The Eiffel Tower is in Paris", metadata={"k": 1}),
1094
+ ]
1095
+ await store.upsert(items)
1096
+
1097
+ try:
1098
+ # 6. limit < 1 must raise
1099
+ raised_limit = False
1100
+ try:
1101
+ await store.lexical_search("Paris", limit=0)
1102
+ except ValueError:
1103
+ raised_limit = True
1104
+ assert raised_limit, "lexical_search(limit=0) must raise ValueError"
1105
+
1106
+ # 3. ordering + score range
1107
+ eiffel = await store.lexical_search("Eiffel Tower", limit=5)
1108
+ assert len(eiffel) >= 1, "Eiffel Tower query must match at least one doc"
1109
+ assert eiffel[0].id == "c", f"top hit on 'Eiffel Tower' must be doc c, got {eiffel[0].id}"
1110
+ for prev, nxt in itertools.pairwise(eiffel):
1111
+ assert prev.score >= nxt.score, "lexical_search results not sorted desc"
1112
+ for m in eiffel:
1113
+ assert 0.0 <= m.score <= 1.0, f"lexical score out of [0, 1]: {m.score}"
1114
+
1115
+ # 5. metadata filter
1116
+ filtered = await store.lexical_search("capital", limit=5, filter_metadata={"k": 1})
1117
+ ids = {m.id for m in filtered}
1118
+ # docs with k=1 are a (Paris/capital) and c (Eiffel/Paris)
1119
+ assert "b" not in ids, "filter_metadata={k: 1} must exclude doc b"
1120
+
1121
+ # 7. re-upsert invalidates prior text
1122
+ await store.upsert(
1123
+ [
1124
+ VectorItem(
1125
+ id="a",
1126
+ vector=base_vec,
1127
+ text="Madrid is the capital of Spain",
1128
+ metadata={"k": 1},
1129
+ ),
1130
+ ]
1131
+ )
1132
+ paris_after = await store.lexical_search("Paris", limit=5)
1133
+ paris_ids = {m.id for m in paris_after}
1134
+ assert "a" not in paris_ids, (
1135
+ "re-upserting doc a with new text must drop it from a 'Paris' query"
1136
+ )
1137
+ finally:
1138
+ await store.delete(["a", "b", "c"])