@pentatonic-ai/ai-agent-sdk 0.8.1 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.3",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -416,8 +416,14 @@ async def health():
|
|
|
416
416
|
"engine": "pentatonic-memory-engine",
|
|
417
417
|
"layers": {},
|
|
418
418
|
}
|
|
419
|
-
# NV-Embed exposes /health
|
|
420
|
-
|
|
419
|
+
# NV-Embed (or the upstream gateway) exposes /health at the host root.
|
|
420
|
+
# Use urlparse so we swap *just* the path component instead of doing a
|
|
421
|
+
# string replace — that breaks the moment NV_EMBED_URL is /v1/embed,
|
|
422
|
+
# /v1/embeddings, or bare-host. The probe is informational only; gateways
|
|
423
|
+
# that return non-200 on root-/health don't block engine operation.
|
|
424
|
+
from urllib.parse import urlparse, urlunparse
|
|
425
|
+
_u = urlparse(NV_EMBED_URL)
|
|
426
|
+
nv_embed_health = urlunparse((_u.scheme, _u.netloc, "/health", "", "", ""))
|
|
421
427
|
|
|
422
428
|
import asyncio
|
|
423
429
|
l2_v, l4_v, l5_v, l6_v, nv_v, l3_v = await asyncio.gather(
|
|
@@ -86,7 +86,12 @@ log = logging.getLogger("l6-document-store")
|
|
|
86
86
|
# Embedding
|
|
87
87
|
# ---------------------------------------------------------------------------
|
|
88
88
|
|
|
89
|
-
|
|
89
|
+
# HTTP client for Ollama entity extraction (extract_entities below). Named
|
|
90
|
+
# `_ollama_http`, not `_embed_client`, because the embedding HTTP client now
|
|
91
|
+
# lives behind the EmbedClient factory above — sharing the `_embed_client`
|
|
92
|
+
# identifier caused a TypeError in v0.8.0–0.8.2 where the legacy module-level
|
|
93
|
+
# binding shadowed the factory function.
|
|
94
|
+
_ollama_http = httpx.Client(timeout=60)
|
|
90
95
|
|
|
91
96
|
def embed_text(text: str) -> List[float]:
|
|
92
97
|
"""Single-text embed via _embed_post (OpenAI-compat first, lambda-gateway fallback)."""
|
|
@@ -141,7 +146,7 @@ def rerank(query: str, results: List[Dict], top_k: int = 10) -> List[Dict]:
|
|
|
141
146
|
def extract_entities(text: str) -> List[str]:
|
|
142
147
|
"""Extract entities from text using Ollama graph-preflexor."""
|
|
143
148
|
try:
|
|
144
|
-
resp =
|
|
149
|
+
resp = _ollama_http.post(
|
|
145
150
|
f"{OLLAMA_URL}/api/generate",
|
|
146
151
|
json={
|
|
147
152
|
"model": "graph-preflexor",
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Regression test for the nv_embed /health probe URL construction.
|
|
2
|
+
|
|
3
|
+
Before v0.8.1 the probe was built via str.replace("/v1/embeddings", "/health"),
|
|
4
|
+
which silently produced the wrong URL when NV_EMBED_URL was anything other
|
|
5
|
+
than ".../v1/embeddings" (e.g. /v1/embed for the Pentatonic AI Gateway, or
|
|
6
|
+
a bare host). The probe would end up GET-ing the embed endpoint itself and
|
|
7
|
+
the gateway would return 401 — making /health falsely report a degraded
|
|
8
|
+
nv_embed layer even when embeddings worked fine.
|
|
9
|
+
|
|
10
|
+
The fix replaces the substring replace with a urlparse-based path swap.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from urllib.parse import urlparse, urlunparse
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def probe_url(nv_embed_url: str) -> str:
|
|
17
|
+
"""Re-implementation of the probe URL builder from compat/server.py.
|
|
18
|
+
Kept tiny + isolated so it can be unit-tested without booting FastAPI."""
|
|
19
|
+
u = urlparse(nv_embed_url)
|
|
20
|
+
return urlunparse((u.scheme, u.netloc, "/health", "", "", ""))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_probe_with_v1_embeddings_path():
|
|
24
|
+
assert probe_url("https://nv-embed:8041/v1/embeddings") == "https://nv-embed:8041/health"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_probe_with_v1_embed_path():
|
|
28
|
+
"""The bug case — gateway moved to /v1/embed and old replace() left URL untouched."""
|
|
29
|
+
assert probe_url("https://lambda-gateway.pentatonic.com/v1/embed") == (
|
|
30
|
+
"https://lambda-gateway.pentatonic.com/health"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_probe_with_bare_host():
|
|
35
|
+
"""No path at all — provider supplies path_default, but probe just wants /health."""
|
|
36
|
+
assert probe_url("https://lambda-gateway.pentatonic.com") == (
|
|
37
|
+
"https://lambda-gateway.pentatonic.com/health"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_probe_strips_query_and_fragment():
|
|
42
|
+
assert probe_url("https://gw.example.com/v1/embeddings?token=x#frag") == (
|
|
43
|
+
"https://gw.example.com/health"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_probe_preserves_port():
|
|
48
|
+
assert probe_url("http://nv-embed:8041/v1/embeddings") == "http://nv-embed:8041/health"
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Regression test for the L6 _embed_client shadowing bug introduced in v0.8.0.
|
|
2
|
+
|
|
3
|
+
When the EmbedClient refactor landed in 0.8.0, the new `def _embed_client()`
|
|
4
|
+
factory function was added at module top, but the legacy module-level
|
|
5
|
+
`_embed_client = httpx.Client(timeout=60)` binding (used by Ollama entity
|
|
6
|
+
extraction) was left in place. Python's top-to-bottom evaluation rebound
|
|
7
|
+
the name to the httpx.Client instance, so any subsequent call to
|
|
8
|
+
`_embed_client()` raised `TypeError: 'Client' object is not callable`.
|
|
9
|
+
|
|
10
|
+
This silently 500'd every L6 /index-batch and /search request from 0.8.0
|
|
11
|
+
through 0.8.2 — the bug couldn't be caught by /health because the process
|
|
12
|
+
itself stays up, only the request handlers fail.
|
|
13
|
+
|
|
14
|
+
This is a static-source test (parses the file) rather than an import-time
|
|
15
|
+
test because L6's heavy imports (pymilvus, spacy) aren't available in the
|
|
16
|
+
unit-test venv. The check: scan the AST for any non-function rebinding of
|
|
17
|
+
identifiers that are also defined as `def` in the same module. Catches
|
|
18
|
+
this exact bug shape across any service that uses the EmbedClient factory
|
|
19
|
+
pattern.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import ast
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
import pytest
|
|
28
|
+
|
|
29
|
+
SERVICES_DIR = Path(__file__).parent.parent / "engine" / "services"
|
|
30
|
+
|
|
31
|
+
# Services that use the lazy EmbedClient factory pattern.
|
|
32
|
+
SERVICES_WITH_EMBED_FACTORY = [
|
|
33
|
+
SERVICES_DIR / "l4" / "server.py",
|
|
34
|
+
SERVICES_DIR / "l5" / "l5-comms-layer.py",
|
|
35
|
+
SERVICES_DIR / "l6" / "l6-document-store.py",
|
|
36
|
+
SERVICES_DIR / "l2" / "l2-hybridrag-proxy.py",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _module_level_defs_and_assigns(source: str) -> tuple[set[str], set[str]]:
|
|
41
|
+
"""Return (function names, non-function-assigned names) at module level."""
|
|
42
|
+
tree = ast.parse(source)
|
|
43
|
+
funcs: set[str] = set()
|
|
44
|
+
assigns: set[str] = set()
|
|
45
|
+
for node in tree.body:
|
|
46
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
47
|
+
funcs.add(node.name)
|
|
48
|
+
elif isinstance(node, ast.Assign):
|
|
49
|
+
for target in node.targets:
|
|
50
|
+
if isinstance(target, ast.Name):
|
|
51
|
+
assigns.add(target.id)
|
|
52
|
+
elif isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
|
|
53
|
+
assigns.add(node.target.id)
|
|
54
|
+
return funcs, assigns
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@pytest.mark.parametrize("service_file", SERVICES_WITH_EMBED_FACTORY, ids=lambda p: f"{p.parent.name}/{p.name}")
|
|
58
|
+
def test_no_module_level_shadowing_of_factory_functions(service_file: Path):
|
|
59
|
+
"""A module-level `def foo()` must not also have a module-level `foo = ...`
|
|
60
|
+
later in the file. That's exactly the shape that caused the v0.8.0 L6 bug."""
|
|
61
|
+
source = service_file.read_text()
|
|
62
|
+
funcs, assigns = _module_level_defs_and_assigns(source)
|
|
63
|
+
overlap = funcs & assigns
|
|
64
|
+
assert not overlap, (
|
|
65
|
+
f"{service_file.relative_to(SERVICES_DIR.parent.parent)} has module-level "
|
|
66
|
+
f"identifier(s) defined as both `def` and `name = ...`: {sorted(overlap)}. "
|
|
67
|
+
f"This causes silent name shadowing — the assignment wins and any call "
|
|
68
|
+
f"to {sorted(overlap)[0]}() raises TypeError at runtime."
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_l6_uses_renamed_ollama_http_not_embed_client():
|
|
73
|
+
"""Belt-and-suspenders: explicitly assert L6's Ollama HTTP client is at
|
|
74
|
+
`_ollama_http`, not `_embed_client`. If someone reintroduces the original
|
|
75
|
+
binding by accident, this test catches it without depending on AST traversal."""
|
|
76
|
+
source = (SERVICES_DIR / "l6" / "l6-document-store.py").read_text()
|
|
77
|
+
assert "_embed_client = httpx.Client" not in source, (
|
|
78
|
+
"L6 reintroduced the legacy `_embed_client = httpx.Client(...)` binding "
|
|
79
|
+
"that shadowed the EmbedClient factory in v0.8.0. Rename to _ollama_http."
|
|
80
|
+
)
|
|
81
|
+
assert "_ollama_http = httpx.Client" in source, (
|
|
82
|
+
"L6 is missing the renamed Ollama HTTP client (`_ollama_http = httpx.Client(...)`). "
|
|
83
|
+
"The Ollama entity-extraction call path needs an httpx.Client somewhere."
|
|
84
|
+
)
|