@pentatonic-ai/ai-agent-sdk 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.8.1",
3
+ "version": "0.8.3",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -416,8 +416,14 @@ async def health():
416
416
  "engine": "pentatonic-memory-engine",
417
417
  "layers": {},
418
418
  }
419
- # NV-Embed exposes /health alongside /v1/embeddings.
420
- nv_embed_health = NV_EMBED_URL.replace("/v1/embeddings", "/health")
419
+ # NV-Embed (or the upstream gateway) exposes /health at the host root.
420
+ # Use urlparse so we swap *just* the path component instead of doing a
421
+ # string replace — that breaks the moment NV_EMBED_URL is /v1/embed,
422
+ # /v1/embeddings, or bare-host. The probe is informational only; gateways
423
+ # that return non-200 on root-/health don't block engine operation.
424
+ from urllib.parse import urlparse, urlunparse
425
+ _u = urlparse(NV_EMBED_URL)
426
+ nv_embed_health = urlunparse((_u.scheme, _u.netloc, "/health", "", "", ""))
421
427
 
422
428
  import asyncio
423
429
  l2_v, l4_v, l5_v, l6_v, nv_v, l3_v = await asyncio.gather(
@@ -86,7 +86,12 @@ log = logging.getLogger("l6-document-store")
86
86
  # Embedding
87
87
  # ---------------------------------------------------------------------------
88
88
 
89
- _embed_client = httpx.Client(timeout=60)
89
+ # HTTP client for Ollama entity extraction (extract_entities below). Named
90
+ # `_ollama_http`, not `_embed_client`, because the embedding HTTP client now
91
+ # lives behind the EmbedClient factory above — sharing the `_embed_client`
92
+ # identifier caused a TypeError in v0.8.0–0.8.2 where the legacy module-level
93
+ # binding shadowed the factory function.
94
+ _ollama_http = httpx.Client(timeout=60)
90
95
 
91
96
  def embed_text(text: str) -> List[float]:
92
97
  """Single-text embed via _embed_post (OpenAI-compat first, lambda-gateway fallback)."""
@@ -141,7 +146,7 @@ def rerank(query: str, results: List[Dict], top_k: int = 10) -> List[Dict]:
141
146
  def extract_entities(text: str) -> List[str]:
142
147
  """Extract entities from text using Ollama graph-preflexor."""
143
148
  try:
144
- resp = _embed_client.post(
149
+ resp = _ollama_http.post(
145
150
  f"{OLLAMA_URL}/api/generate",
146
151
  json={
147
152
  "model": "graph-preflexor",
@@ -0,0 +1,48 @@
1
+ """Regression test for the nv_embed /health probe URL construction.
2
+
3
+ Before v0.8.1 the probe was built via str.replace("/v1/embeddings", "/health"),
4
+ which silently produced the wrong URL when NV_EMBED_URL was anything other
5
+ than ".../v1/embeddings" (e.g. /v1/embed for the Pentatonic AI Gateway, or
6
+ a bare host). The probe would end up GET-ing the embed endpoint itself and
7
+ the gateway would return 401 — making /health falsely report a degraded
8
+ nv_embed layer even when embeddings worked fine.
9
+
10
+ The fix replaces the substring replace with a urlparse-based path swap.
11
+ """
12
+
13
+ from urllib.parse import urlparse, urlunparse
14
+
15
+
16
+ def probe_url(nv_embed_url: str) -> str:
17
+ """Re-implementation of the probe URL builder from compat/server.py.
18
+ Kept tiny + isolated so it can be unit-tested without booting FastAPI."""
19
+ u = urlparse(nv_embed_url)
20
+ return urlunparse((u.scheme, u.netloc, "/health", "", "", ""))
21
+
22
+
23
+ def test_probe_with_v1_embeddings_path():
24
+ assert probe_url("https://nv-embed:8041/v1/embeddings") == "https://nv-embed:8041/health"
25
+
26
+
27
+ def test_probe_with_v1_embed_path():
28
+ """The bug case — gateway moved to /v1/embed and old replace() left URL untouched."""
29
+ assert probe_url("https://lambda-gateway.pentatonic.com/v1/embed") == (
30
+ "https://lambda-gateway.pentatonic.com/health"
31
+ )
32
+
33
+
34
+ def test_probe_with_bare_host():
35
+ """No path at all — provider supplies path_default, but probe just wants /health."""
36
+ assert probe_url("https://lambda-gateway.pentatonic.com") == (
37
+ "https://lambda-gateway.pentatonic.com/health"
38
+ )
39
+
40
+
41
+ def test_probe_strips_query_and_fragment():
42
+ assert probe_url("https://gw.example.com/v1/embeddings?token=x#frag") == (
43
+ "https://gw.example.com/health"
44
+ )
45
+
46
+
47
+ def test_probe_preserves_port():
48
+ assert probe_url("http://nv-embed:8041/v1/embeddings") == "http://nv-embed:8041/health"
@@ -0,0 +1,84 @@
1
+ """Regression test for the L6 _embed_client shadowing bug introduced in v0.8.0.
2
+
3
+ When the EmbedClient refactor landed in 0.8.0, the new `def _embed_client()`
4
+ factory function was added at module top, but the legacy module-level
5
+ `_embed_client = httpx.Client(timeout=60)` binding (used by Ollama entity
6
+ extraction) was left in place. Python's top-to-bottom evaluation rebound
7
+ the name to the httpx.Client instance, so any subsequent call to
8
+ `_embed_client()` raised `TypeError: 'Client' object is not callable`.
9
+
10
+ This silently 500'd every L6 /index-batch and /search request from 0.8.0
11
+ through 0.8.2 — the bug couldn't be caught by /health because the process
12
+ itself stays up, only the request handlers fail.
13
+
14
+ This is a static-source test (parses the file) rather than an import-time
15
+ test because L6's heavy imports (pymilvus, spacy) aren't available in the
16
+ unit-test venv. The check: scan the AST for any non-function rebinding of
17
+ identifiers that are also defined as `def` in the same module. Catches
18
+ this exact bug shape across any service that uses the EmbedClient factory
19
+ pattern.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import ast
25
+ from pathlib import Path
26
+
27
+ import pytest
28
+
29
+ SERVICES_DIR = Path(__file__).parent.parent / "engine" / "services"
30
+
31
+ # Services that use the lazy EmbedClient factory pattern.
32
+ SERVICES_WITH_EMBED_FACTORY = [
33
+ SERVICES_DIR / "l4" / "server.py",
34
+ SERVICES_DIR / "l5" / "l5-comms-layer.py",
35
+ SERVICES_DIR / "l6" / "l6-document-store.py",
36
+ SERVICES_DIR / "l2" / "l2-hybridrag-proxy.py",
37
+ ]
38
+
39
+
40
+ def _module_level_defs_and_assigns(source: str) -> tuple[set[str], set[str]]:
41
+ """Return (function names, non-function-assigned names) at module level."""
42
+ tree = ast.parse(source)
43
+ funcs: set[str] = set()
44
+ assigns: set[str] = set()
45
+ for node in tree.body:
46
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
47
+ funcs.add(node.name)
48
+ elif isinstance(node, ast.Assign):
49
+ for target in node.targets:
50
+ if isinstance(target, ast.Name):
51
+ assigns.add(target.id)
52
+ elif isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
53
+ assigns.add(node.target.id)
54
+ return funcs, assigns
55
+
56
+
57
+ @pytest.mark.parametrize("service_file", SERVICES_WITH_EMBED_FACTORY, ids=lambda p: f"{p.parent.name}/{p.name}")
58
+ def test_no_module_level_shadowing_of_factory_functions(service_file: Path):
59
+ """A module-level `def foo()` must not also have a module-level `foo = ...`
60
+ later in the file. That's exactly the shape that caused the v0.8.0 L6 bug."""
61
+ source = service_file.read_text()
62
+ funcs, assigns = _module_level_defs_and_assigns(source)
63
+ overlap = funcs & assigns
64
+ assert not overlap, (
65
+ f"{service_file.relative_to(SERVICES_DIR.parent.parent)} has module-level "
66
+ f"identifier(s) defined as both `def` and `name = ...`: {sorted(overlap)}. "
67
+ f"This causes silent name shadowing — the assignment wins and any call "
68
+ f"to {sorted(overlap)[0]}() raises TypeError at runtime."
69
+ )
70
+
71
+
72
+ def test_l6_uses_renamed_ollama_http_not_embed_client():
73
+ """Belt-and-suspenders: explicitly assert L6's Ollama HTTP client is at
74
+ `_ollama_http`, not `_embed_client`. If someone reintroduces the original
75
+ binding by accident, this test catches it without depending on AST traversal."""
76
+ source = (SERVICES_DIR / "l6" / "l6-document-store.py").read_text()
77
+ assert "_embed_client = httpx.Client" not in source, (
78
+ "L6 reintroduced the legacy `_embed_client = httpx.Client(...)` binding "
79
+ "that shadowed the EmbedClient factory in v0.8.0. Rename to _ollama_http."
80
+ )
81
+ assert "_ollama_http = httpx.Client" in source, (
82
+ "L6 is missing the renamed Ollama HTTP client (`_ollama_http = httpx.Client(...)`). "
83
+ "The Ollama entity-extraction call path needs an httpx.Client somewhere."
84
+ )