agentmark-sdk 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/.gitignore +3 -1
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/CHANGELOG.md +64 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/PKG-INFO +1 -1
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/pyproject.toml +1 -1
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/src/agentmark_sdk/__init__.py +2 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/src/agentmark_sdk/config.py +1 -1
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/src/agentmark_sdk/otlp_json_exporter.py +8 -0
- agentmark_sdk-0.3.0/src/agentmark_sdk/span_hooks.py +80 -0
- agentmark_sdk-0.3.0/tests/test_span_hooks.py +46 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/package.json +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/src/agentmark_sdk/decorator.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/src/agentmark_sdk/masking_processor.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/src/agentmark_sdk/pii_masker.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/src/agentmark_sdk/py.typed +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/src/agentmark_sdk/sampler.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/src/agentmark_sdk/sdk.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/src/agentmark_sdk/serialize.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/src/agentmark_sdk/trace.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/tests/__init__.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/tests/test_decorator.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/tests/test_masking_processor.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/tests/test_otlp_json_exporter.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/tests/test_pii_masker.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/tests/test_sampler.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/tests/test_sdk.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/tests/test_serialize.py +0 -0
- {agentmark_sdk-0.2.0 → agentmark_sdk-0.3.0}/tests/test_trace.py +0 -0
|
@@ -1,3 +1,67 @@
|
|
|
1
|
+
## 0.3.0 (2026-06-09)
|
|
2
|
+
|
|
3
|
+
### 🚀 Features
|
|
4
|
+
|
|
5
|
+
- feat(webhook): the runner owns dispatch; evals reach the cloud on every path ([#717](https://github.com/agentmark-ai/agentmark/pull/717))
|
|
6
|
+
|
|
7
|
+
The New Experiment dialog showed *"No evals available"* for deployed apps even
|
|
8
|
+
when they registered evals. Root cause: no single object owned "what the
|
|
9
|
+
deployed app exposes," so the eval registry had to travel a hand-assembled chain
|
|
10
|
+
(client → executor → runner → handler → dispatch → transport) that every entry
|
|
11
|
+
path re-wired — and any path could drop it. The Python managed handler hand-rolled
|
|
12
|
+
dispatch and 400'd on `get-evals`; the TS managed server forwarded the dispatch
|
|
13
|
+
envelope raw; the BYO `createWebhookRunner` built a client with no `evals` input
|
|
14
|
+
at all. This makes the chain non-assemblable.
|
|
15
|
+
|
|
16
|
+
- **Dispatch lives on the runner.** `WebhookRunner.dispatch(event)` (TS + Python)
|
|
17
|
+
routes prompt-run / dataset-run / get-evals, sourcing evals from its OWN
|
|
18
|
+
client — no passable, omittable client argument. The canonical managed handler
|
|
19
|
+
is `handler = runner.dispatch` (or `adapterHandler.dispatch`). `runner.client`
|
|
20
|
+
/ `getEvalNames()` are public so a runner satisfies the control-plane contract.
|
|
21
|
+
|
|
22
|
+
- **`evals` is threaded through every builder.** TS `createWebhookRunner({ evals })`
|
|
23
|
+
and the new Python `create_webhook_runner(executor, evals=…)` register evals
|
|
24
|
+
once → they both run in experiments and list in the dialog. Adapter factories
|
|
25
|
+
already threaded evals; now the BYO path does too.
|
|
26
|
+
|
|
27
|
+
- **Adapters delegate, don't reimplement.** Pydantic / claude / ai-sdk-v4 / v5
|
|
28
|
+
webhook handlers expose `.dispatch` + `.client` by delegating to the shared
|
|
29
|
+
runner (both span hooks bundled at construction); no per-adapter dispatch code.
|
|
30
|
+
|
|
31
|
+
- **Anti-drift.** `conformance-vectors/protocol-catalog.json` gains a normative
|
|
32
|
+
`webhookJobs` section; both languages assert their REAL dispatch's job-type set
|
|
33
|
+
(`WEBHOOK_JOB_TYPES` / `WebhookRequest['type']`) is exhaustive over it, and the
|
|
34
|
+
get-evals payload stays pinned to `control-plane.json` on the dev AND managed
|
|
35
|
+
surfaces. Adding a job to one language without the other fails the other's CI.
|
|
36
|
+
|
|
37
|
+
New public API (minor) across prompt-core (TS + Python), the SDK
|
|
38
|
+
(`createWebhookRunner` `evals` option), and the adapters (`dispatch`/`client`).
|
|
39
|
+
Back-compat: `handleWebhookRequest(event, handler, client?)` still works; the
|
|
40
|
+
managed servers still accept legacy flat results. The managed Node server now
|
|
41
|
+
unwraps the dispatch envelope (the TS half of the empty dialog) — see
|
|
42
|
+
`apps/builder` machine-execute-contract test (monorepo, not released here).
|
|
43
|
+
|
|
44
|
+
## 0.2.1 (2026-04-16)
|
|
45
|
+
|
|
46
|
+
### 🩹 Fixes
|
|
47
|
+
|
|
48
|
+
- fix: set explicit User-Agent on OTLP span exports to bypass Cloudflare BIC ([#584](https://github.com/agentmark-ai/agentmark/pull/584))
|
|
49
|
+
|
|
50
|
+
Cloudflare's Browser Integrity Check rejects requests bearing the default
|
|
51
|
+
`Python-urllib/*` User-Agent with HTTP 403 (error code 1010). `JsonOtlpSpanExporter`
|
|
52
|
+
uses `urllib.request.urlopen` without setting a UA, so every trace export through
|
|
53
|
+
a Cloudflare-proxied zone (api.agentmark.co, api-stg.agentmark.co) was silently
|
|
54
|
+
rejected before reaching the gateway. Combined with the exporter's bare
|
|
55
|
+
`except Exception: return FAILURE`, the failure produced no logs, no metrics,
|
|
56
|
+
and no ClickHouse rows — just a complete absence of traces.
|
|
57
|
+
|
|
58
|
+
The `ApiLoader` path (`/v1/templates` etc.) wasn't affected because it uses
|
|
59
|
+
`httpx`, whose default UA `python-httpx/<version>` isn't on the BIC block list.
|
|
60
|
+
|
|
61
|
+
Set `User-Agent: agentmark-sdk-python/<version>` on every outbound POST. The
|
|
62
|
+
version is resolved at import time via `importlib.metadata.version("agentmark-sdk")`
|
|
63
|
+
so it stays in lockstep with the installed distribution.
|
|
64
|
+
|
|
1
65
|
## 0.2.0 (2026-04-13)
|
|
2
66
|
|
|
3
67
|
### 🚀 Features
|
|
@@ -46,6 +46,7 @@ from .pii_masker import CustomPattern, PiiMaskerConfig, create_pii_masker
|
|
|
46
46
|
from .sampler import AgentmarkSampler
|
|
47
47
|
from .sdk import AgentMarkSDK
|
|
48
48
|
from .serialize import serialize_value
|
|
49
|
+
from .span_hooks import create_agentmark_span_hooks
|
|
49
50
|
from .trace import SpanContext, SpanOptions, SpanResult, span, span_context, span_context_sync
|
|
50
51
|
|
|
51
52
|
__all__ = [
|
|
@@ -57,6 +58,7 @@ __all__ = [
|
|
|
57
58
|
"span",
|
|
58
59
|
"span_context",
|
|
59
60
|
"span_context_sync",
|
|
61
|
+
"create_agentmark_span_hooks",
|
|
60
62
|
"observe",
|
|
61
63
|
"SpanOptions",
|
|
62
64
|
"SpanContext",
|
|
@@ -15,12 +15,19 @@ import base64
|
|
|
15
15
|
import json
|
|
16
16
|
import urllib.request
|
|
17
17
|
from collections import defaultdict
|
|
18
|
+
from importlib.metadata import version as _pkg_version
|
|
18
19
|
from typing import Any, Sequence
|
|
19
20
|
|
|
20
21
|
from opentelemetry.sdk.trace import ReadableSpan
|
|
21
22
|
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
22
23
|
from opentelemetry.trace import SpanKind
|
|
23
24
|
|
|
25
|
+
# Cloudflare Browser Integrity Check rejects the default urllib User-Agent
|
|
26
|
+
# ("Python-urllib/x.y") with a 403 error code 1010. Set an explicit SDK UA so
|
|
27
|
+
# requests through proxied zones (api.agentmark.co et al) aren't blocked
|
|
28
|
+
# before reaching the gateway.
|
|
29
|
+
_SDK_USER_AGENT = f"agentmark-sdk-python/{_pkg_version('agentmark-sdk')}"
|
|
30
|
+
|
|
24
31
|
# OTLP wire format uses 1-indexed SpanKind values (UNSPECIFIED=0,
|
|
25
32
|
# INTERNAL=1, SERVER=2, ...) while the Python API uses 0-indexed
|
|
26
33
|
# (INTERNAL=0, SERVER=1, ...). This mapping is stable per the OTLP spec.
|
|
@@ -50,6 +57,7 @@ class JsonOtlpSpanExporter(SpanExporter):
|
|
|
50
57
|
data=payload,
|
|
51
58
|
headers={
|
|
52
59
|
"Content-Type": "application/json",
|
|
60
|
+
"User-Agent": _SDK_USER_AGENT,
|
|
53
61
|
**self._headers,
|
|
54
62
|
},
|
|
55
63
|
method="POST",
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""AgentMark span hooks for the shared ``WebhookRunner``.
|
|
2
|
+
|
|
3
|
+
``create_agentmark_span_hooks()`` is the Python counterpart of the TypeScript
|
|
4
|
+
``createAgentmarkSpanHooks()`` (``@agentmark-ai/sdk``): the one call that wires a
|
|
5
|
+
runner so every prompt run and every experiment item is traced to AgentMark. A
|
|
6
|
+
bring-your-own-SDK app passes it to ``create_webhook_runner`` (which also
|
|
7
|
+
defaults to it when this SDK is installed), so Python BYO tracing is as
|
|
8
|
+
turn-key as TypeScript.
|
|
9
|
+
|
|
10
|
+
The hooks map the runner's per-call params (``ExperimentItemParams`` /
|
|
11
|
+
``PromptSpanParams`` from ``agentmark.prompt_core``, duck-typed here so this SDK
|
|
12
|
+
stays prompt-core-free) onto ``span_context``. They are intentionally identical
|
|
13
|
+
to the per-adapter hooks the pydantic / claude adapters define inline today —
|
|
14
|
+
those should adopt this single source in a follow-up so the mapping lives once.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
from contextlib import asynccontextmanager, suppress
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from .trace import SpanOptions, span_context
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class _AgentMarkSpanCtx:
|
|
29
|
+
"""Adapts a span context to the hook Protocol the shared runner expects:
|
|
30
|
+
``trace_id`` + ``set_attribute(key, value)``."""
|
|
31
|
+
|
|
32
|
+
_inner: Any
|
|
33
|
+
trace_id: str = ""
|
|
34
|
+
|
|
35
|
+
def set_attribute(self, key: str, value: str) -> None:
|
|
36
|
+
with suppress(Exception):
|
|
37
|
+
self._inner.set_attribute(key, value)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@asynccontextmanager
|
|
41
|
+
async def _item_span(params: Any) -> Any:
|
|
42
|
+
"""Per-item experiment span: maps the runner's params → SpanOptions."""
|
|
43
|
+
dataset_expected = (
|
|
44
|
+
json.dumps(params.dataset_expected_output)
|
|
45
|
+
if params.dataset_expected_output is not None
|
|
46
|
+
else None
|
|
47
|
+
)
|
|
48
|
+
dataset_input_json = (
|
|
49
|
+
json.dumps(params.dataset_input, default=str)
|
|
50
|
+
if params.dataset_input is not None
|
|
51
|
+
else None
|
|
52
|
+
)
|
|
53
|
+
options = SpanOptions(
|
|
54
|
+
name=f"experiment-{params.dataset_run_name}-{params.index}",
|
|
55
|
+
prompt_name=params.prompt_name,
|
|
56
|
+
dataset_run_id=params.experiment_run_id,
|
|
57
|
+
dataset_run_name=params.dataset_run_name,
|
|
58
|
+
dataset_item_name=params.dataset_item_name,
|
|
59
|
+
dataset_expected_output=dataset_expected,
|
|
60
|
+
dataset_input=dataset_input_json,
|
|
61
|
+
dataset_path=params.dataset_path,
|
|
62
|
+
metadata={"commit_sha": params.commit_sha} if params.commit_sha else None,
|
|
63
|
+
)
|
|
64
|
+
async with span_context(options) as ctx:
|
|
65
|
+
yield _AgentMarkSpanCtx(_inner=ctx, trace_id=ctx.trace_id)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@asynccontextmanager
|
|
69
|
+
async def _prompt_span(params: Any) -> Any:
|
|
70
|
+
"""Prompt-level span for a single run."""
|
|
71
|
+
options = SpanOptions(name=params.name, prompt_name=params.prompt_name)
|
|
72
|
+
async with span_context(options) as ctx:
|
|
73
|
+
yield _AgentMarkSpanCtx(_inner=ctx, trace_id=ctx.trace_id)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def create_agentmark_span_hooks() -> dict[str, Any]:
|
|
77
|
+
"""Return ``{"prompt_span_hook", "item_span_hook"}`` for a ``WebhookRunner`` —
|
|
78
|
+
every run and experiment item traced to AgentMark. Mirrors the TS
|
|
79
|
+
``createAgentmarkSpanHooks()``."""
|
|
80
|
+
return {"prompt_span_hook": _prompt_span, "item_span_hook": _item_span}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""create_agentmark_span_hooks — the shared WebhookRunner span hooks.
|
|
2
|
+
|
|
3
|
+
Python counterpart of the TS ``createAgentmarkSpanHooks()``: the one call that
|
|
4
|
+
makes ``create_webhook_runner`` (and the adapters) trace every run and
|
|
5
|
+
experiment item. The hooks take the runner's per-call params duck-typed, so a
|
|
6
|
+
plain namespace stands in for ``PromptSpanParams`` / ``ExperimentItemParams``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from types import SimpleNamespace
|
|
12
|
+
|
|
13
|
+
from agentmark_sdk import create_agentmark_span_hooks
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_returns_both_runner_hooks() -> None:
|
|
17
|
+
hooks = create_agentmark_span_hooks()
|
|
18
|
+
assert set(hooks) == {"prompt_span_hook", "item_span_hook"}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
async def test_prompt_span_yields_an_annotatable_ctx() -> None:
|
|
22
|
+
hooks = create_agentmark_span_hooks()
|
|
23
|
+
params = SimpleNamespace(name="run", prompt_name="greeting")
|
|
24
|
+
async with hooks["prompt_span_hook"](params) as ctx:
|
|
25
|
+
assert hasattr(ctx, "trace_id")
|
|
26
|
+
ctx.set_attribute("k", "v") # must not raise
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def test_item_span_maps_dataset_params_without_error() -> None:
|
|
30
|
+
# Exercises the dataset-field mapping (json.dumps of expected/input + the
|
|
31
|
+
# commit_sha metadata branch), the part most likely to drift.
|
|
32
|
+
hooks = create_agentmark_span_hooks()
|
|
33
|
+
params = SimpleNamespace(
|
|
34
|
+
dataset_run_name="exp",
|
|
35
|
+
index=0,
|
|
36
|
+
prompt_name="greeting",
|
|
37
|
+
experiment_run_id="run-1",
|
|
38
|
+
dataset_item_name="item-0",
|
|
39
|
+
dataset_expected_output={"ok": True},
|
|
40
|
+
dataset_input={"q": "hi"},
|
|
41
|
+
dataset_path="data/x.jsonl",
|
|
42
|
+
commit_sha="abc",
|
|
43
|
+
)
|
|
44
|
+
async with hooks["item_span_hook"](params) as ctx:
|
|
45
|
+
assert hasattr(ctx, "trace_id")
|
|
46
|
+
ctx.set_attribute("k", "v")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|