prefactor-core 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/.gitignore +1 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/PKG-INFO +1 -1
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/pyproject.toml +4 -1
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/__init__.py +6 -2
- prefactor_core-0.2.3/src/prefactor_core/_version.py +7 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/client.py +87 -2
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/exceptions.py +20 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/managers/agent_instance.py +29 -14
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/managers/span.py +8 -1
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/queue/executor.py +11 -2
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/queue/memory.py +4 -4
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/schema_registry.py +25 -1
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/span_context.py +27 -4
- prefactor_core-0.2.3/tests/test_failure_handling.py +322 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/tests/test_queue.py +31 -0
- prefactor_core-0.2.3/tests/test_sdk_header.py +78 -0
- prefactor_core-0.2.3/tests/test_span_context.py +42 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/tests/test_span_manager.py +14 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/README.md +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/examples/agent_e2e.py +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/config.py +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/context_stack.py +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/managers/__init__.py +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/models.py +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/operations.py +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/queue/__init__.py +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/queue/base.py +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/src/prefactor_core/utils.py +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/tests/test_client.py +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/tests/test_imports.py +0 -0
- {prefactor_core-0.2.2 → prefactor_core-0.2.3}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "prefactor-core"
|
|
3
|
-
|
|
3
|
+
dynamic = ["version"]
|
|
4
4
|
description = "Core Prefactor SDK with async queue-based operations"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { text = "MIT" }
|
|
@@ -17,6 +17,9 @@ dependencies = [
|
|
|
17
17
|
requires = ["hatchling"]
|
|
18
18
|
build-backend = "hatchling.build"
|
|
19
19
|
|
|
20
|
+
[tool.hatch.version]
|
|
21
|
+
path = "src/prefactor_core/_version.py"
|
|
22
|
+
|
|
20
23
|
[tool.hatch.build.targets.wheel]
|
|
21
24
|
packages = ["src/prefactor_core"]
|
|
22
25
|
|
|
@@ -3,6 +3,9 @@
|
|
|
3
3
|
This module exports the main classes and functions for the prefactor-core SDK.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from ._version import __version__
|
|
6
9
|
from .client import PrefactorCoreClient
|
|
7
10
|
from .config import PrefactorCoreConfig, QueueConfig
|
|
8
11
|
from .context_stack import SpanContextStack
|
|
@@ -12,6 +15,7 @@ from .exceptions import (
|
|
|
12
15
|
InstanceNotFoundError,
|
|
13
16
|
OperationError,
|
|
14
17
|
PrefactorCoreError,
|
|
18
|
+
PrefactorTelemetryFailureError,
|
|
15
19
|
SpanNotFoundError,
|
|
16
20
|
)
|
|
17
21
|
from .managers.agent_instance import AgentInstanceHandle
|
|
@@ -22,8 +26,6 @@ from .schema_registry import SchemaRegistry
|
|
|
22
26
|
from .span_context import SpanContext
|
|
23
27
|
from .utils import generate_idempotency_key, validate_idempotency_key
|
|
24
28
|
|
|
25
|
-
__version__ = "0.2.1"
|
|
26
|
-
|
|
27
29
|
__all__ = [
|
|
28
30
|
# Client
|
|
29
31
|
"PrefactorCoreClient",
|
|
@@ -40,6 +42,7 @@ __all__ = [
|
|
|
40
42
|
"OperationError",
|
|
41
43
|
"InstanceNotFoundError",
|
|
42
44
|
"SpanNotFoundError",
|
|
45
|
+
"PrefactorTelemetryFailureError",
|
|
43
46
|
# Models
|
|
44
47
|
"AgentInstance",
|
|
45
48
|
"Span",
|
|
@@ -58,4 +61,5 @@ __all__ = [
|
|
|
58
61
|
# Utils
|
|
59
62
|
"generate_idempotency_key",
|
|
60
63
|
"validate_idempotency_key",
|
|
64
|
+
"__version__",
|
|
61
65
|
]
|
|
@@ -13,12 +13,16 @@ from contextlib import asynccontextmanager
|
|
|
13
13
|
from typing import TYPE_CHECKING, Any
|
|
14
14
|
|
|
15
15
|
from prefactor_http.client import PrefactorHttpClient
|
|
16
|
+
from prefactor_http.exceptions import is_permanent_http_error, is_transient_http_error
|
|
16
17
|
|
|
18
|
+
from ._version import PACKAGE_NAME as CORE_PACKAGE_NAME
|
|
19
|
+
from ._version import PACKAGE_VERSION as CORE_PACKAGE_VERSION
|
|
17
20
|
from .config import PrefactorCoreConfig
|
|
18
21
|
from .context_stack import SpanContextStack
|
|
19
22
|
from .exceptions import (
|
|
20
23
|
ClientAlreadyInitializedError,
|
|
21
24
|
ClientNotInitializedError,
|
|
25
|
+
PrefactorTelemetryFailureError,
|
|
22
26
|
)
|
|
23
27
|
from .managers.agent_instance import AgentInstanceManager
|
|
24
28
|
from .managers.span import SpanManager
|
|
@@ -31,6 +35,7 @@ if TYPE_CHECKING:
|
|
|
31
35
|
from .managers.agent_instance import AgentInstanceHandle
|
|
32
36
|
|
|
33
37
|
logger = logging.getLogger(__name__)
|
|
38
|
+
CORE_SDK_HEADER_ENTRY = f"{CORE_PACKAGE_NAME}@{CORE_PACKAGE_VERSION}"
|
|
34
39
|
|
|
35
40
|
|
|
36
41
|
class PrefactorCoreClient:
|
|
@@ -61,6 +66,7 @@ class PrefactorCoreClient:
|
|
|
61
66
|
self,
|
|
62
67
|
config: PrefactorCoreConfig,
|
|
63
68
|
queue: Queue[Operation] | None = None,
|
|
69
|
+
sdk_header_entry: str | None = None,
|
|
64
70
|
) -> None:
|
|
65
71
|
"""Initialize the client.
|
|
66
72
|
|
|
@@ -68,14 +74,30 @@ class PrefactorCoreClient:
|
|
|
68
74
|
config: Configuration for the client.
|
|
69
75
|
queue: Optional custom queue implementation. If not provided,
|
|
70
76
|
an InMemoryQueue is used.
|
|
77
|
+
sdk_header_entry: Optional upstream SDK header entry to prepend.
|
|
71
78
|
"""
|
|
72
79
|
self._config = config
|
|
73
80
|
self._queue = queue or InMemoryQueue()
|
|
81
|
+
self._sdk_header_entry = sdk_header_entry.strip() if sdk_header_entry else None
|
|
74
82
|
self._http: PrefactorHttpClient | None = None
|
|
75
83
|
self._executor: TaskExecutor | None = None
|
|
76
84
|
self._instance_manager: AgentInstanceManager | None = None
|
|
77
85
|
self._span_manager: SpanManager | None = None
|
|
78
86
|
self._initialized = False
|
|
87
|
+
self._telemetry_failure: PrefactorTelemetryFailureError | None = None
|
|
88
|
+
self._telemetry_failure_observed = False
|
|
89
|
+
|
|
90
|
+
def _build_http_sdk_header(self) -> str:
|
|
91
|
+
"""Build the effective SDK header for HTTP requests."""
|
|
92
|
+
if self._sdk_header_entry:
|
|
93
|
+
return f"{self._sdk_header_entry} {CORE_SDK_HEADER_ENTRY}"
|
|
94
|
+
return CORE_SDK_HEADER_ENTRY
|
|
95
|
+
|
|
96
|
+
def _set_sdk_header_entry(self, sdk_header_entry: str | None) -> None:
|
|
97
|
+
"""Set the upstream SDK header entry for this client lifetime."""
|
|
98
|
+
self._sdk_header_entry = sdk_header_entry.strip() if sdk_header_entry else None
|
|
99
|
+
if self._http is not None:
|
|
100
|
+
self._http._sdk_header = self._build_http_sdk_header()
|
|
79
101
|
|
|
80
102
|
async def __aenter__(self) -> "PrefactorCoreClient":
|
|
81
103
|
"""Enter async context manager."""
|
|
@@ -84,7 +106,11 @@ class PrefactorCoreClient:
|
|
|
84
106
|
|
|
85
107
|
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
86
108
|
"""Exit async context manager."""
|
|
87
|
-
|
|
109
|
+
try:
|
|
110
|
+
await self.close()
|
|
111
|
+
except PrefactorTelemetryFailureError:
|
|
112
|
+
if exc_type is None:
|
|
113
|
+
raise
|
|
88
114
|
|
|
89
115
|
async def initialize(self) -> None:
|
|
90
116
|
"""Initialize the client and start processing.
|
|
@@ -101,13 +127,17 @@ class PrefactorCoreClient:
|
|
|
101
127
|
raise ClientAlreadyInitializedError("Client is already initialized")
|
|
102
128
|
|
|
103
129
|
# Initialize HTTP client
|
|
104
|
-
self._http = PrefactorHttpClient(
|
|
130
|
+
self._http = PrefactorHttpClient(
|
|
131
|
+
self._config.http_config,
|
|
132
|
+
sdk_header=self._build_http_sdk_header(),
|
|
133
|
+
)
|
|
105
134
|
await self._http.__aenter__()
|
|
106
135
|
|
|
107
136
|
# Initialize executor
|
|
108
137
|
self._executor = TaskExecutor(
|
|
109
138
|
queue=self._queue,
|
|
110
139
|
handler=self._process_operation,
|
|
140
|
+
is_retryable=self._is_retryable_operation_error,
|
|
111
141
|
num_workers=self._config.queue_config.num_workers,
|
|
112
142
|
max_retries=self._config.queue_config.max_retries,
|
|
113
143
|
)
|
|
@@ -144,6 +174,10 @@ class PrefactorCoreClient:
|
|
|
144
174
|
|
|
145
175
|
self._initialized = False
|
|
146
176
|
|
|
177
|
+
if self._telemetry_failure is not None and not self._telemetry_failure_observed:
|
|
178
|
+
self._telemetry_failure_observed = True
|
|
179
|
+
raise self._telemetry_failure
|
|
180
|
+
|
|
147
181
|
def _ensure_initialized(self) -> None:
|
|
148
182
|
"""Ensure the client is initialized.
|
|
149
183
|
|
|
@@ -156,12 +190,55 @@ class PrefactorCoreClient:
|
|
|
156
190
|
"use as context manager."
|
|
157
191
|
)
|
|
158
192
|
|
|
193
|
+
def _record_telemetry_failure(
|
|
194
|
+
self, cause: Exception, operation_type: OperationType | str
|
|
195
|
+
) -> None:
|
|
196
|
+
"""Latch the first permanent telemetry failure."""
|
|
197
|
+
if self._telemetry_failure is not None:
|
|
198
|
+
return
|
|
199
|
+
if isinstance(operation_type, OperationType):
|
|
200
|
+
operation_name = operation_type.name
|
|
201
|
+
else:
|
|
202
|
+
operation_name = str(operation_type)
|
|
203
|
+
self._telemetry_failure = PrefactorTelemetryFailureError(
|
|
204
|
+
f"Telemetry permanently failed during {operation_name}",
|
|
205
|
+
cause=cause,
|
|
206
|
+
operation_type=operation_name,
|
|
207
|
+
dropped_operations=0,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
def _increment_dropped_operations(self) -> None:
|
|
211
|
+
"""Increment the dropped operation counter on the latched failure."""
|
|
212
|
+
if self._telemetry_failure is None:
|
|
213
|
+
return
|
|
214
|
+
self._telemetry_failure.dropped_operations += 1
|
|
215
|
+
|
|
216
|
+
def _raise_if_telemetry_failed(self) -> None:
|
|
217
|
+
"""Raise the latched telemetry failure for caller-visible operations."""
|
|
218
|
+
if self._telemetry_failure is None:
|
|
219
|
+
return
|
|
220
|
+
self._telemetry_failure_observed = True
|
|
221
|
+
raise self._telemetry_failure
|
|
222
|
+
|
|
223
|
+
def _is_retryable_operation_error(self, error: Exception) -> bool:
|
|
224
|
+
"""Return True when the worker should retry the operation."""
|
|
225
|
+
if isinstance(error, PrefactorTelemetryFailureError):
|
|
226
|
+
return False
|
|
227
|
+
if is_permanent_http_error(error):
|
|
228
|
+
return False
|
|
229
|
+
if is_transient_http_error(error):
|
|
230
|
+
return True
|
|
231
|
+
return True
|
|
232
|
+
|
|
159
233
|
async def _enqueue(self, operation: Operation) -> None:
|
|
160
234
|
"""Add an operation to the queue.
|
|
161
235
|
|
|
162
236
|
Args:
|
|
163
237
|
operation: The operation to queue.
|
|
164
238
|
"""
|
|
239
|
+
if self._telemetry_failure is not None:
|
|
240
|
+
self._increment_dropped_operations()
|
|
241
|
+
self._raise_if_telemetry_failed()
|
|
165
242
|
await self._queue.put(operation)
|
|
166
243
|
|
|
167
244
|
async def _process_operation(self, operation: Operation) -> None:
|
|
@@ -174,6 +251,9 @@ class PrefactorCoreClient:
|
|
|
174
251
|
"""
|
|
175
252
|
if not self._http:
|
|
176
253
|
return
|
|
254
|
+
if self._telemetry_failure is not None:
|
|
255
|
+
self._increment_dropped_operations()
|
|
256
|
+
return
|
|
177
257
|
|
|
178
258
|
try:
|
|
179
259
|
if operation.type == OperationType.REGISTER_AGENT_INSTANCE:
|
|
@@ -217,6 +297,8 @@ class PrefactorCoreClient:
|
|
|
217
297
|
)
|
|
218
298
|
|
|
219
299
|
except Exception as e:
|
|
300
|
+
if is_permanent_http_error(e):
|
|
301
|
+
self._record_telemetry_failure(e, operation.type)
|
|
220
302
|
# Log error and re-raise so TaskExecutor retries can run
|
|
221
303
|
logger.error(
|
|
222
304
|
f"Failed to process operation {operation.type}: {e}",
|
|
@@ -262,6 +344,7 @@ class PrefactorCoreClient:
|
|
|
262
344
|
ValueError: If no schema version provided and registry not configured.
|
|
263
345
|
"""
|
|
264
346
|
self._ensure_initialized()
|
|
347
|
+
self._raise_if_telemetry_failed()
|
|
265
348
|
assert self._instance_manager is not None
|
|
266
349
|
|
|
267
350
|
# Determine the agent_schema_version to use
|
|
@@ -318,6 +401,7 @@ class PrefactorCoreClient:
|
|
|
318
401
|
The span ID.
|
|
319
402
|
"""
|
|
320
403
|
self._ensure_initialized()
|
|
404
|
+
self._raise_if_telemetry_failed()
|
|
321
405
|
assert self._span_manager is not None
|
|
322
406
|
|
|
323
407
|
if parent_span_id is None:
|
|
@@ -380,6 +464,7 @@ class PrefactorCoreClient:
|
|
|
380
464
|
SpanContext for the created span.
|
|
381
465
|
"""
|
|
382
466
|
self._ensure_initialized()
|
|
467
|
+
self._raise_if_telemetry_failed()
|
|
383
468
|
assert self._span_manager is not None
|
|
384
469
|
|
|
385
470
|
# Import here to avoid circular import
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Custom exceptions for prefactor-core."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
|
|
4
6
|
class PrefactorCoreError(Exception):
|
|
5
7
|
"""Base exception for all prefactor-core errors."""
|
|
@@ -39,6 +41,23 @@ class SpanNotFoundError(PrefactorCoreError):
|
|
|
39
41
|
pass
|
|
40
42
|
|
|
41
43
|
|
|
44
|
+
class PrefactorTelemetryFailureError(PrefactorCoreError):
|
|
45
|
+
"""Raised when telemetry enters a permanent failure state."""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
message: str,
|
|
50
|
+
*,
|
|
51
|
+
cause: Exception,
|
|
52
|
+
operation_type: str | None = None,
|
|
53
|
+
dropped_operations: int = 0,
|
|
54
|
+
) -> None:
|
|
55
|
+
super().__init__(message)
|
|
56
|
+
self.cause = cause
|
|
57
|
+
self.operation_type = operation_type
|
|
58
|
+
self.dropped_operations = dropped_operations
|
|
59
|
+
|
|
60
|
+
|
|
42
61
|
__all__ = [
|
|
43
62
|
"PrefactorCoreError",
|
|
44
63
|
"ClientNotInitializedError",
|
|
@@ -46,4 +65,5 @@ __all__ = [
|
|
|
46
65
|
"OperationError",
|
|
47
66
|
"InstanceNotFoundError",
|
|
48
67
|
"SpanNotFoundError",
|
|
68
|
+
"PrefactorTelemetryFailureError",
|
|
49
69
|
]
|
|
@@ -96,11 +96,19 @@ class AgentInstanceManager:
|
|
|
96
96
|
Args:
|
|
97
97
|
instance_id: The ID of the instance to start.
|
|
98
98
|
"""
|
|
99
|
+
await self.start_with_idempotency_key(instance_id, generate_idempotency_key())
|
|
100
|
+
|
|
101
|
+
async def start_with_idempotency_key(
|
|
102
|
+
self,
|
|
103
|
+
instance_id: str,
|
|
104
|
+
idempotency_key: str,
|
|
105
|
+
) -> None:
|
|
106
|
+
"""Queue a start operation using a stable idempotency key."""
|
|
99
107
|
operation = Operation(
|
|
100
108
|
type=OperationType.START_AGENT_INSTANCE,
|
|
101
109
|
payload={
|
|
102
110
|
"instance_id": instance_id,
|
|
103
|
-
"idempotency_key":
|
|
111
|
+
"idempotency_key": idempotency_key,
|
|
104
112
|
},
|
|
105
113
|
timestamp=datetime.now(timezone.utc),
|
|
106
114
|
)
|
|
@@ -115,11 +123,19 @@ class AgentInstanceManager:
|
|
|
115
123
|
Args:
|
|
116
124
|
instance_id: The ID of the instance to finish.
|
|
117
125
|
"""
|
|
126
|
+
await self.finish_with_idempotency_key(instance_id, generate_idempotency_key())
|
|
127
|
+
|
|
128
|
+
async def finish_with_idempotency_key(
|
|
129
|
+
self,
|
|
130
|
+
instance_id: str,
|
|
131
|
+
idempotency_key: str,
|
|
132
|
+
) -> None:
|
|
133
|
+
"""Queue a finish operation using a stable idempotency key."""
|
|
118
134
|
operation = Operation(
|
|
119
135
|
type=OperationType.FINISH_AGENT_INSTANCE,
|
|
120
136
|
payload={
|
|
121
137
|
"instance_id": instance_id,
|
|
122
|
-
"idempotency_key":
|
|
138
|
+
"idempotency_key": idempotency_key,
|
|
123
139
|
},
|
|
124
140
|
timestamp=datetime.now(timezone.utc),
|
|
125
141
|
)
|
|
@@ -159,8 +175,8 @@ class AgentInstanceHandle:
|
|
|
159
175
|
"""
|
|
160
176
|
self._instance_id = instance_id
|
|
161
177
|
self._client = client
|
|
162
|
-
self.
|
|
163
|
-
self.
|
|
178
|
+
self._start_idempotency_key = generate_idempotency_key()
|
|
179
|
+
self._finish_idempotency_key = generate_idempotency_key()
|
|
164
180
|
|
|
165
181
|
@property
|
|
166
182
|
def id(self) -> str:
|
|
@@ -176,26 +192,24 @@ class AgentInstanceHandle:
|
|
|
176
192
|
|
|
177
193
|
This queues a start operation for the instance.
|
|
178
194
|
"""
|
|
179
|
-
if self._started:
|
|
180
|
-
return
|
|
181
|
-
|
|
182
195
|
manager = self._client.instance_manager
|
|
183
196
|
assert manager is not None
|
|
184
|
-
await manager.
|
|
185
|
-
|
|
197
|
+
await manager.start_with_idempotency_key(
|
|
198
|
+
self._instance_id,
|
|
199
|
+
self._start_idempotency_key,
|
|
200
|
+
)
|
|
186
201
|
|
|
187
202
|
async def finish(self) -> None:
|
|
188
203
|
"""Mark the instance as finished.
|
|
189
204
|
|
|
190
205
|
This queues a finish operation for the instance.
|
|
191
206
|
"""
|
|
192
|
-
if self._finished:
|
|
193
|
-
return
|
|
194
|
-
|
|
195
207
|
manager = self._client.instance_manager
|
|
196
208
|
assert manager is not None
|
|
197
|
-
await manager.
|
|
198
|
-
|
|
209
|
+
await manager.finish_with_idempotency_key(
|
|
210
|
+
self._instance_id,
|
|
211
|
+
self._finish_idempotency_key,
|
|
212
|
+
)
|
|
199
213
|
|
|
200
214
|
async def create_span(
|
|
201
215
|
self,
|
|
@@ -215,6 +229,7 @@ class AgentInstanceHandle:
|
|
|
215
229
|
Returns:
|
|
216
230
|
The span ID.
|
|
217
231
|
"""
|
|
232
|
+
self._client._raise_if_telemetry_failed()
|
|
218
233
|
return await self._client.create_span(
|
|
219
234
|
instance_id=self._instance_id,
|
|
220
235
|
schema_name=schema_name,
|
|
@@ -151,6 +151,10 @@ class SpanManager:
|
|
|
151
151
|
del self._spans[temp_id]
|
|
152
152
|
self._spans[api_id] = span
|
|
153
153
|
|
|
154
|
+
for child_span in self._spans.values():
|
|
155
|
+
if child_span.parent_span_id == temp_id:
|
|
156
|
+
child_span.parent_span_id = api_id
|
|
157
|
+
|
|
154
158
|
# Replace temp ID on the context stack
|
|
155
159
|
stack = SpanContextStack.get_stack()
|
|
156
160
|
new_stack = [api_id if s == temp_id else s for s in stack]
|
|
@@ -242,6 +246,7 @@ class SpanManager:
|
|
|
242
246
|
span_id: str,
|
|
243
247
|
result_payload: dict[str, Any] | None = None,
|
|
244
248
|
status: "FinishStatus" = "complete",
|
|
249
|
+
idempotency_key: str | None = None,
|
|
245
250
|
) -> None:
|
|
246
251
|
"""Mark a span as finished.
|
|
247
252
|
|
|
@@ -254,6 +259,8 @@ class SpanManager:
|
|
|
254
259
|
``"cancelled"`` (default: ``"complete"``). The span must be
|
|
255
260
|
``active`` for this to succeed; use ``cancel_unstarted()``
|
|
256
261
|
to cancel a span that was never started.
|
|
262
|
+
idempotency_key: Optional key to make repeated finish requests
|
|
263
|
+
duplicate-safe. When omitted, a new key is generated.
|
|
257
264
|
|
|
258
265
|
Raises:
|
|
259
266
|
KeyError: If the span ID is not known.
|
|
@@ -273,7 +280,7 @@ class SpanManager:
|
|
|
273
280
|
op_payload: dict[str, Any] = {
|
|
274
281
|
"span_id": span_id,
|
|
275
282
|
"status": status,
|
|
276
|
-
"idempotency_key": generate_idempotency_key(),
|
|
283
|
+
"idempotency_key": idempotency_key or generate_idempotency_key(),
|
|
277
284
|
}
|
|
278
285
|
if result_payload is not None:
|
|
279
286
|
op_payload["result_payload"] = result_payload
|
|
@@ -42,6 +42,8 @@ class TaskExecutor:
|
|
|
42
42
|
handler: Callable[[Any], Awaitable[None]],
|
|
43
43
|
num_workers: int = 3,
|
|
44
44
|
max_retries: int = 3,
|
|
45
|
+
*,
|
|
46
|
+
is_retryable: Callable[[Exception], bool] | None = None,
|
|
45
47
|
) -> None:
|
|
46
48
|
"""Initialize the task executor.
|
|
47
49
|
|
|
@@ -50,9 +52,12 @@ class TaskExecutor:
|
|
|
50
52
|
handler: Async function to process each item.
|
|
51
53
|
num_workers: Number of concurrent worker tasks.
|
|
52
54
|
max_retries: Maximum retry attempts per item.
|
|
55
|
+
is_retryable: Optional predicate that decides whether a
|
|
56
|
+
handler failure should be retried.
|
|
53
57
|
"""
|
|
54
58
|
self._queue = queue
|
|
55
59
|
self._handler = handler
|
|
60
|
+
self._is_retryable = is_retryable or (lambda exc: True)
|
|
56
61
|
self._num_workers = num_workers
|
|
57
62
|
self._max_retries = max_retries
|
|
58
63
|
self._workers: list[Task] = []
|
|
@@ -162,13 +167,17 @@ class TaskExecutor:
|
|
|
162
167
|
"""
|
|
163
168
|
last_error: Exception | None = None
|
|
164
169
|
|
|
165
|
-
|
|
170
|
+
total_attempts = self._max_retries + 1
|
|
171
|
+
|
|
172
|
+
for attempt in range(total_attempts):
|
|
166
173
|
try:
|
|
167
174
|
await self._handler(item)
|
|
168
175
|
return
|
|
169
176
|
except Exception as e:
|
|
170
177
|
last_error = e
|
|
171
|
-
if
|
|
178
|
+
if not self._is_retryable(e):
|
|
179
|
+
raise
|
|
180
|
+
if attempt < total_attempts - 1:
|
|
172
181
|
delay = 2**attempt # 1s, 2s, 4s
|
|
173
182
|
logger.warning(
|
|
174
183
|
f"Attempt {attempt + 1} failed, retrying in {delay}s: {e}"
|
|
@@ -6,7 +6,7 @@ for durability requirements.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
from asyncio import Queue as AsyncQueue
|
|
9
|
-
from typing import TypeVar
|
|
9
|
+
from typing import TypeVar, cast
|
|
10
10
|
|
|
11
11
|
from .base import Queue, QueueClosedError
|
|
12
12
|
|
|
@@ -34,7 +34,7 @@ class InMemoryQueue(Queue[T]):
|
|
|
34
34
|
|
|
35
35
|
def __init__(self) -> None:
|
|
36
36
|
"""Initialize an empty in-memory queue."""
|
|
37
|
-
self._queue: AsyncQueue[T] = AsyncQueue()
|
|
37
|
+
self._queue: AsyncQueue[T | object] = AsyncQueue()
|
|
38
38
|
self._closed = False
|
|
39
39
|
|
|
40
40
|
async def put(self, item: T) -> None:
|
|
@@ -66,7 +66,7 @@ class InMemoryQueue(Queue[T]):
|
|
|
66
66
|
# Re-raise as closed so the worker exits cleanly.
|
|
67
67
|
if item is self._SENTINEL:
|
|
68
68
|
raise QueueClosedError("Queue is closed and empty")
|
|
69
|
-
return item
|
|
69
|
+
return cast(T, item)
|
|
70
70
|
|
|
71
71
|
def size(self) -> int:
|
|
72
72
|
"""Return the current number of items in the queue.
|
|
@@ -89,7 +89,7 @@ class InMemoryQueue(Queue[T]):
|
|
|
89
89
|
self._closed = True
|
|
90
90
|
# Wake any workers blocked in asyncio.Queue.get() so they can exit.
|
|
91
91
|
for _ in range(num_waiters):
|
|
92
|
-
await self._queue.put(self._SENTINEL)
|
|
92
|
+
await self._queue.put(self._SENTINEL)
|
|
93
93
|
|
|
94
94
|
@property
|
|
95
95
|
def closed(self) -> bool:
|
|
@@ -4,6 +4,8 @@ This module provides a SchemaRegistry that allows registration of span schemas
|
|
|
4
4
|
from multiple packages before agent instances are created.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
7
9
|
from typing import Any
|
|
8
10
|
|
|
9
11
|
|
|
@@ -131,12 +133,13 @@ class SchemaRegistry:
|
|
|
131
133
|
title: str | None = None,
|
|
132
134
|
description: str | None = None,
|
|
133
135
|
template: str | None = None,
|
|
136
|
+
data_risk: dict[str, Any] | None = None,
|
|
134
137
|
) -> None:
|
|
135
138
|
"""Register a full structured span type schema.
|
|
136
139
|
|
|
137
140
|
Adds to ``span_type_schemas``. This is the richest form and supports
|
|
138
141
|
all API fields: params schema, result schema, human-readable title,
|
|
139
|
-
description, and
|
|
142
|
+
description, template, and data risk classification.
|
|
140
143
|
|
|
141
144
|
Args:
|
|
142
145
|
name: Span type name (e.g., "agent:llm")
|
|
@@ -145,6 +148,25 @@ class SchemaRegistry:
|
|
|
145
148
|
title: Optional human-readable title (defaults to name on the API)
|
|
146
149
|
description: Optional description of the span type
|
|
147
150
|
template: Optional display template using ``{{field}}`` interpolation
|
|
151
|
+
data_risk: Optional data risk classification dict. See DataRisk model
|
|
152
|
+
in prefactor_http.models.agent_instance for structure. Must include:
|
|
153
|
+
- action_profile (object): Permitted actions with keys:
|
|
154
|
+
create_data, read_data, update_data, destroy_data,
|
|
155
|
+
financial_transactions, external_communication (values:
|
|
156
|
+
"unknown" | "allowed" | "disallowed")
|
|
157
|
+
- params_data_categories (object): Input data categories with keys
|
|
158
|
+
like personal_identifiers, contact_information,
|
|
159
|
+
financial_information, etc. (values: "unknown" | "included"
|
|
160
|
+
| "excluded")
|
|
161
|
+
- result_data_categories (object): Output data categories,
|
|
162
|
+
same structure as params_data_categories
|
|
163
|
+
All three top-level keys are required; fields within each default
|
|
164
|
+
to "unknown" when omitted.
|
|
165
|
+
Example: {
|
|
166
|
+
"action_profile": {"read_data": "allowed"},
|
|
167
|
+
"params_data_categories": {"personal_identifiers": "included"},
|
|
168
|
+
"result_data_categories": {},
|
|
169
|
+
}
|
|
148
170
|
|
|
149
171
|
Raises:
|
|
150
172
|
ValueError: If name is already registered as a span type schema.
|
|
@@ -161,6 +183,8 @@ class SchemaRegistry:
|
|
|
161
183
|
entry["description"] = description
|
|
162
184
|
if template is not None:
|
|
163
185
|
entry["template"] = template
|
|
186
|
+
if data_risk is not None:
|
|
187
|
+
entry["data_risk"] = data_risk
|
|
164
188
|
|
|
165
189
|
self._span_type_schemas[name] = entry
|
|
166
190
|
|
|
@@ -4,9 +4,15 @@ The SpanContext provides an interface for updating span data during execution
|
|
|
4
4
|
and ensures proper cleanup when the span completes.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
7
9
|
from typing import TYPE_CHECKING, Any
|
|
8
10
|
|
|
11
|
+
from .utils import generate_idempotency_key
|
|
12
|
+
|
|
9
13
|
if TYPE_CHECKING:
|
|
14
|
+
from prefactor_http.models.types import FinishStatus
|
|
15
|
+
|
|
10
16
|
from .managers.span import SpanManager
|
|
11
17
|
|
|
12
18
|
|
|
@@ -67,9 +73,13 @@ class SpanContext:
|
|
|
67
73
|
self._span_manager = span_manager
|
|
68
74
|
self._default_payload = default_payload
|
|
69
75
|
self._result_payload: dict[str, Any] = {}
|
|
70
|
-
self._finish_status:
|
|
76
|
+
self._finish_status: FinishStatus = "complete"
|
|
71
77
|
self._started = False
|
|
72
78
|
self._finished = False
|
|
79
|
+
self._finish_request: (
|
|
80
|
+
tuple[FinishStatus, tuple[tuple[str, Any], ...]] | None
|
|
81
|
+
) = None
|
|
82
|
+
self._finish_idempotency_key: str | None = None
|
|
73
83
|
|
|
74
84
|
@property
|
|
75
85
|
def id(self) -> str:
|
|
@@ -167,24 +177,37 @@ class SpanContext:
|
|
|
167
177
|
valid pre-active cancellation path. For all other statuses the span
|
|
168
178
|
is auto-started as ``active`` first.
|
|
169
179
|
"""
|
|
180
|
+
request = (
|
|
181
|
+
self._finish_status,
|
|
182
|
+
tuple(sorted(self._result_payload.items())),
|
|
183
|
+
)
|
|
170
184
|
if self._finished:
|
|
171
185
|
return
|
|
172
|
-
|
|
173
|
-
|
|
186
|
+
if self._finish_request != request:
|
|
187
|
+
self._finish_request = (
|
|
188
|
+
self._finish_status,
|
|
189
|
+
tuple(sorted(self._result_payload.items())),
|
|
190
|
+
)
|
|
191
|
+
self._finish_idempotency_key = None
|
|
174
192
|
|
|
175
193
|
try:
|
|
176
194
|
if not self._started and self._finish_status == "cancelled":
|
|
177
195
|
await self._span_manager.cancel_unstarted(self._span_id)
|
|
196
|
+
self._finished = True
|
|
178
197
|
return
|
|
179
198
|
|
|
180
199
|
if not self._started:
|
|
181
200
|
await self.start(self._default_payload)
|
|
182
201
|
|
|
202
|
+
if self._finish_idempotency_key is None:
|
|
203
|
+
self._finish_idempotency_key = generate_idempotency_key()
|
|
183
204
|
await self._span_manager.finish(
|
|
184
205
|
self._span_id,
|
|
185
206
|
result_payload=self._result_payload or None,
|
|
186
|
-
status=self._finish_status,
|
|
207
|
+
status=self._finish_status,
|
|
208
|
+
idempotency_key=self._finish_idempotency_key,
|
|
187
209
|
)
|
|
210
|
+
self._finished = True
|
|
188
211
|
except Exception:
|
|
189
212
|
raise
|
|
190
213
|
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""Tests for permanent telemetry failure handling in prefactor-core."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from types import SimpleNamespace
|
|
7
|
+
from unittest.mock import patch
|
|
8
|
+
|
|
9
|
+
import aiohttp
|
|
10
|
+
import pytest
|
|
11
|
+
from prefactor_core import PrefactorCoreClient
|
|
12
|
+
from prefactor_core.config import PrefactorCoreConfig, QueueConfig
|
|
13
|
+
from prefactor_core.exceptions import PrefactorTelemetryFailureError
|
|
14
|
+
from prefactor_http.config import HttpClientConfig
|
|
15
|
+
from prefactor_http.exceptions import (
|
|
16
|
+
PrefactorAuthError,
|
|
17
|
+
PrefactorResponseContractError,
|
|
18
|
+
PrefactorRetryExhaustedError,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class _StubAgentInstances:
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
*,
|
|
26
|
+
start_side_effect: Exception | None = None,
|
|
27
|
+
finish_side_effect: Exception | None = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
self.start_side_effect = start_side_effect
|
|
30
|
+
self.finish_side_effect = finish_side_effect
|
|
31
|
+
self.start_calls = 0
|
|
32
|
+
self.finish_calls = 0
|
|
33
|
+
|
|
34
|
+
async def register(self, **kwargs):
|
|
35
|
+
return SimpleNamespace(id=kwargs.get("id") or "inst-1")
|
|
36
|
+
|
|
37
|
+
async def start(self, **kwargs):
|
|
38
|
+
self.start_calls += 1
|
|
39
|
+
if self.start_side_effect is not None:
|
|
40
|
+
raise self.start_side_effect
|
|
41
|
+
return SimpleNamespace(id=kwargs["agent_instance_id"])
|
|
42
|
+
|
|
43
|
+
async def finish(self, **kwargs):
|
|
44
|
+
self.finish_calls += 1
|
|
45
|
+
if self.finish_side_effect is not None:
|
|
46
|
+
raise self.finish_side_effect
|
|
47
|
+
return SimpleNamespace(id=kwargs["agent_instance_id"])
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class _StubAgentSpans:
|
|
51
|
+
def __init__(self) -> None:
|
|
52
|
+
self.create_calls = []
|
|
53
|
+
self.finish_calls = []
|
|
54
|
+
|
|
55
|
+
async def create(self, **kwargs):
|
|
56
|
+
self.create_calls.append(kwargs)
|
|
57
|
+
return SimpleNamespace(id=f"span-{len(self.create_calls)}")
|
|
58
|
+
|
|
59
|
+
async def finish(self, **kwargs):
|
|
60
|
+
self.finish_calls.append(kwargs)
|
|
61
|
+
return SimpleNamespace(id=kwargs["agent_span_id"])
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class _StubHttpClient:
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
*_args,
|
|
68
|
+
agent_instances: _StubAgentInstances | None = None,
|
|
69
|
+
agent_spans: _StubAgentSpans | None = None,
|
|
70
|
+
**_kwargs,
|
|
71
|
+
) -> None:
|
|
72
|
+
self.agent_instances = agent_instances or _StubAgentInstances()
|
|
73
|
+
self.agent_spans = agent_spans or _StubAgentSpans()
|
|
74
|
+
|
|
75
|
+
async def __aenter__(self):
|
|
76
|
+
return self
|
|
77
|
+
|
|
78
|
+
async def __aexit__(self, exc_type, exc, tb):
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _make_client_config(max_retries: int = 0) -> PrefactorCoreConfig:
|
|
83
|
+
return PrefactorCoreConfig(
|
|
84
|
+
http_config=HttpClientConfig(
|
|
85
|
+
api_url="https://api.test.com",
|
|
86
|
+
api_token="test-token",
|
|
87
|
+
max_retries=0,
|
|
88
|
+
initial_retry_delay=0.01,
|
|
89
|
+
max_retry_delay=0.02,
|
|
90
|
+
),
|
|
91
|
+
queue_config=QueueConfig(num_workers=1, max_retries=max_retries),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def _wait_until(
|
|
96
|
+
predicate, *, timeout: float = 1.0, interval: float = 0.01
|
|
97
|
+
) -> None:
|
|
98
|
+
deadline = asyncio.get_running_loop().time() + timeout
|
|
99
|
+
while asyncio.get_running_loop().time() < deadline:
|
|
100
|
+
if predicate():
|
|
101
|
+
return
|
|
102
|
+
await asyncio.sleep(interval)
|
|
103
|
+
raise AssertionError("Timed out waiting for expected condition")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@pytest.mark.asyncio
|
|
107
|
+
async def test_permanent_worker_failure_latches_and_rejects_future_operations():
|
|
108
|
+
"""Permanent failures should latch and reject later queued operations."""
|
|
109
|
+
stub_http = _StubHttpClient(
|
|
110
|
+
agent_instances=_StubAgentInstances(
|
|
111
|
+
start_side_effect=PrefactorAuthError("bad token", "unauthorized", 401)
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
with patch("prefactor_core.client.PrefactorHttpClient", return_value=stub_http):
|
|
116
|
+
client = PrefactorCoreClient(_make_client_config())
|
|
117
|
+
await client.initialize()
|
|
118
|
+
instance = await client.create_agent_instance(
|
|
119
|
+
agent_id="agent-1",
|
|
120
|
+
agent_version={"name": "v1"},
|
|
121
|
+
agent_schema_version={"span_schemas": {}},
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
await instance.start()
|
|
125
|
+
await _wait_until(lambda: client._telemetry_failure is not None)
|
|
126
|
+
|
|
127
|
+
with pytest.raises(PrefactorTelemetryFailureError) as exc_info:
|
|
128
|
+
await instance.finish()
|
|
129
|
+
|
|
130
|
+
assert exc_info.value.operation_type == "START_AGENT_INSTANCE"
|
|
131
|
+
assert isinstance(exc_info.value.cause, PrefactorAuthError)
|
|
132
|
+
assert exc_info.value.dropped_operations == 1
|
|
133
|
+
|
|
134
|
+
await client.close()
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@pytest.mark.asyncio
|
|
138
|
+
async def test_close_raises_latched_failure_when_not_previously_observed():
|
|
139
|
+
"""close() should surface the latched permanent failure."""
|
|
140
|
+
stub_http = _StubHttpClient(
|
|
141
|
+
agent_instances=_StubAgentInstances(
|
|
142
|
+
start_side_effect=PrefactorAuthError("bad token", "unauthorized", 401)
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
with patch("prefactor_core.client.PrefactorHttpClient", return_value=stub_http):
|
|
147
|
+
client = PrefactorCoreClient(_make_client_config())
|
|
148
|
+
await client.initialize()
|
|
149
|
+
instance = await client.create_agent_instance(
|
|
150
|
+
agent_id="agent-1",
|
|
151
|
+
agent_version={"name": "v1"},
|
|
152
|
+
agent_schema_version={"span_schemas": {}},
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
await instance.start()
|
|
156
|
+
await _wait_until(lambda: client._telemetry_failure is not None)
|
|
157
|
+
|
|
158
|
+
with pytest.raises(PrefactorTelemetryFailureError):
|
|
159
|
+
await client.close()
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@pytest.mark.asyncio
|
|
163
|
+
async def test_close_raises_permanent_failure_first_latched_during_shutdown():
|
|
164
|
+
"""close() should surface permanent failures discovered while draining shutdown."""
|
|
165
|
+
stub_http = _StubHttpClient(
|
|
166
|
+
agent_instances=_StubAgentInstances(
|
|
167
|
+
finish_side_effect=PrefactorAuthError("bad token", "unauthorized", 401)
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
with patch("prefactor_core.client.PrefactorHttpClient", return_value=stub_http):
|
|
172
|
+
client = PrefactorCoreClient(_make_client_config())
|
|
173
|
+
await client.initialize()
|
|
174
|
+
instance = await client.create_agent_instance(
|
|
175
|
+
agent_id="agent-1",
|
|
176
|
+
agent_version={"name": "v1"},
|
|
177
|
+
agent_schema_version={"span_schemas": {}},
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
await instance.finish()
|
|
181
|
+
|
|
182
|
+
with pytest.raises(PrefactorTelemetryFailureError) as exc_info:
|
|
183
|
+
await client.close()
|
|
184
|
+
|
|
185
|
+
assert exc_info.value.operation_type == "FINISH_AGENT_INSTANCE"
|
|
186
|
+
assert isinstance(exc_info.value.cause, PrefactorAuthError)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@pytest.mark.asyncio
|
|
190
|
+
async def test_transient_retry_exhaustion_does_not_latch_permanent_failure():
|
|
191
|
+
"""Transient failures should not poison the client permanently."""
|
|
192
|
+
retry_error = PrefactorRetryExhaustedError(
|
|
193
|
+
"network exhausted",
|
|
194
|
+
last_error=aiohttp.ClientError("network down"),
|
|
195
|
+
)
|
|
196
|
+
stub_http = _StubHttpClient(
|
|
197
|
+
agent_instances=_StubAgentInstances(start_side_effect=retry_error)
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
with patch("prefactor_core.client.PrefactorHttpClient", return_value=stub_http):
|
|
201
|
+
client = PrefactorCoreClient(_make_client_config())
|
|
202
|
+
await client.initialize()
|
|
203
|
+
instance = await client.create_agent_instance(
|
|
204
|
+
agent_id="agent-1",
|
|
205
|
+
agent_version={"name": "v1"},
|
|
206
|
+
agent_schema_version={"span_schemas": {}},
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
await instance.start()
|
|
210
|
+
await _wait_until(lambda: stub_http.agent_instances.start_calls == 1)
|
|
211
|
+
|
|
212
|
+
await instance.finish()
|
|
213
|
+
await _wait_until(lambda: stub_http.agent_instances.finish_calls == 1)
|
|
214
|
+
await client.close()
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@pytest.mark.asyncio
|
|
218
|
+
async def test_malformed_503_retry_exhaustion_does_not_latch_permanent_failure():
|
|
219
|
+
"""Malformed 5xx responses should still be treated as transient."""
|
|
220
|
+
retry_error = PrefactorRetryExhaustedError(
|
|
221
|
+
"server exhausted",
|
|
222
|
+
last_error=PrefactorResponseContractError(
|
|
223
|
+
"invalid JSON",
|
|
224
|
+
status_code=503,
|
|
225
|
+
body_snippet="<html>temporary outage</html>",
|
|
226
|
+
),
|
|
227
|
+
)
|
|
228
|
+
stub_http = _StubHttpClient(
|
|
229
|
+
agent_instances=_StubAgentInstances(start_side_effect=retry_error)
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
with patch("prefactor_core.client.PrefactorHttpClient", return_value=stub_http):
|
|
233
|
+
client = PrefactorCoreClient(_make_client_config())
|
|
234
|
+
await client.initialize()
|
|
235
|
+
instance = await client.create_agent_instance(
|
|
236
|
+
agent_id="agent-1",
|
|
237
|
+
agent_version={"name": "v1"},
|
|
238
|
+
agent_schema_version={"span_schemas": {}},
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
await instance.start()
|
|
242
|
+
await _wait_until(lambda: stub_http.agent_instances.start_calls == 1)
|
|
243
|
+
|
|
244
|
+
await instance.finish()
|
|
245
|
+
await _wait_until(lambda: stub_http.agent_instances.finish_calls == 1)
|
|
246
|
+
await client.close()
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
@pytest.mark.asyncio
|
|
250
|
+
async def test_async_context_preserves_user_exception_when_telemetry_failed():
|
|
251
|
+
"""Context manager exit should not replace the caller's own exception."""
|
|
252
|
+
stub_http = _StubHttpClient(
|
|
253
|
+
agent_instances=_StubAgentInstances(
|
|
254
|
+
start_side_effect=PrefactorAuthError("bad token", "unauthorized", 401)
|
|
255
|
+
)
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
with patch("prefactor_core.client.PrefactorHttpClient", return_value=stub_http):
|
|
259
|
+
with pytest.raises(ValueError, match="user boom"):
|
|
260
|
+
async with PrefactorCoreClient(_make_client_config()) as client:
|
|
261
|
+
instance = await client.create_agent_instance(
|
|
262
|
+
agent_id="agent-1",
|
|
263
|
+
agent_version={"name": "v1"},
|
|
264
|
+
agent_schema_version={"span_schemas": {}},
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
await instance.start()
|
|
268
|
+
await _wait_until(lambda: client._telemetry_failure is not None)
|
|
269
|
+
raise ValueError("user boom")
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
@pytest.mark.asyncio
|
|
273
|
+
async def test_async_context_raises_latched_failure_when_body_succeeds():
|
|
274
|
+
"""Context manager exit should still surface unobserved telemetry failure."""
|
|
275
|
+
stub_http = _StubHttpClient(
|
|
276
|
+
agent_instances=_StubAgentInstances(
|
|
277
|
+
start_side_effect=PrefactorAuthError("bad token", "unauthorized", 401)
|
|
278
|
+
)
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
with patch("prefactor_core.client.PrefactorHttpClient", return_value=stub_http):
|
|
282
|
+
with pytest.raises(PrefactorTelemetryFailureError):
|
|
283
|
+
async with PrefactorCoreClient(_make_client_config()) as client:
|
|
284
|
+
instance = await client.create_agent_instance(
|
|
285
|
+
agent_id="agent-1",
|
|
286
|
+
agent_version={"name": "v1"},
|
|
287
|
+
agent_schema_version={"span_schemas": {}},
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
await instance.start()
|
|
291
|
+
await _wait_until(lambda: client._telemetry_failure is not None)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@pytest.mark.asyncio
|
|
295
|
+
async def test_latched_failure_drops_already_queued_backlog():
|
|
296
|
+
"""Queued work should be dropped once a permanent telemetry failure is latched."""
|
|
297
|
+
stub_http = _StubHttpClient(
|
|
298
|
+
agent_instances=_StubAgentInstances(
|
|
299
|
+
start_side_effect=PrefactorAuthError("bad token", "unauthorized", 401)
|
|
300
|
+
)
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
with patch("prefactor_core.client.PrefactorHttpClient", return_value=stub_http):
|
|
304
|
+
client = PrefactorCoreClient(_make_client_config())
|
|
305
|
+
await client.initialize()
|
|
306
|
+
instance = await client.create_agent_instance(
|
|
307
|
+
agent_id="agent-1",
|
|
308
|
+
agent_version={"name": "v1"},
|
|
309
|
+
agent_schema_version={"span_schemas": {}},
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
await instance.start()
|
|
313
|
+
await instance.finish()
|
|
314
|
+
await _wait_until(lambda: client._telemetry_failure is not None)
|
|
315
|
+
|
|
316
|
+
assert stub_http.agent_instances.start_calls == 1
|
|
317
|
+
assert stub_http.agent_instances.finish_calls == 0
|
|
318
|
+
|
|
319
|
+
with pytest.raises(PrefactorTelemetryFailureError) as exc_info:
|
|
320
|
+
await client.close()
|
|
321
|
+
|
|
322
|
+
assert exc_info.value.dropped_operations == 1
|
|
@@ -183,3 +183,34 @@ class TestTaskExecutor:
|
|
|
183
183
|
|
|
184
184
|
# Should have attempted 3 times
|
|
185
185
|
assert attempts.count("item1") == 3
|
|
186
|
+
|
|
187
|
+
async def test_zero_retries_still_attempts_once(self):
|
|
188
|
+
"""max_retries=0 should still execute the handler once."""
|
|
189
|
+
queue = InMemoryQueue()
|
|
190
|
+
attempts = []
|
|
191
|
+
|
|
192
|
+
async def failing_handler(item):
|
|
193
|
+
attempts.append(item)
|
|
194
|
+
raise Exception("boom")
|
|
195
|
+
|
|
196
|
+
executor = TaskExecutor(queue, failing_handler, num_workers=1, max_retries=0)
|
|
197
|
+
executor.start()
|
|
198
|
+
|
|
199
|
+
await queue.put("item1")
|
|
200
|
+
await asyncio.sleep(0.1)
|
|
201
|
+
await executor.stop()
|
|
202
|
+
|
|
203
|
+
assert attempts == ["item1"]
|
|
204
|
+
|
|
205
|
+
async def test_positional_constructor_remains_compatible(self):
|
|
206
|
+
"""Positional num_workers/max_retries args should keep their old meaning."""
|
|
207
|
+
queue = InMemoryQueue()
|
|
208
|
+
|
|
209
|
+
async def handler(_item):
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
executor = TaskExecutor(queue, handler, 1, 0)
|
|
213
|
+
|
|
214
|
+
assert executor._num_workers == 1
|
|
215
|
+
assert executor._max_retries == 0
|
|
216
|
+
assert callable(executor._is_retryable)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Tests for core SDK header composition."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from unittest.mock import AsyncMock, patch
|
|
6
|
+
|
|
7
|
+
from prefactor_core import PrefactorCoreClient
|
|
8
|
+
from prefactor_core._version import PACKAGE_VERSION
|
|
9
|
+
from prefactor_core.client import CORE_SDK_HEADER_ENTRY
|
|
10
|
+
from prefactor_core.config import PrefactorCoreConfig
|
|
11
|
+
from prefactor_http.config import HttpClientConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_config() -> PrefactorCoreConfig:
|
|
15
|
+
"""Create a minimal core config for SDK header tests."""
|
|
16
|
+
return PrefactorCoreConfig(
|
|
17
|
+
http_config=HttpClientConfig(
|
|
18
|
+
api_url="https://api.test.com",
|
|
19
|
+
api_token="test-token",
|
|
20
|
+
)
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TestPrefactorCoreSdkHeader:
|
|
25
|
+
"""Tests for core SDK header behavior."""
|
|
26
|
+
|
|
27
|
+
async def test_initialize_sets_core_sdk_header(self):
|
|
28
|
+
"""Core initializes the HTTP client with the core SDK header."""
|
|
29
|
+
client = PrefactorCoreClient(create_config())
|
|
30
|
+
|
|
31
|
+
with (
|
|
32
|
+
patch(
|
|
33
|
+
"prefactor_http.client.PrefactorHttpClient.__aenter__",
|
|
34
|
+
AsyncMock(return_value=None),
|
|
35
|
+
),
|
|
36
|
+
patch(
|
|
37
|
+
"prefactor_http.client.PrefactorHttpClient.__aexit__",
|
|
38
|
+
AsyncMock(return_value=None),
|
|
39
|
+
),
|
|
40
|
+
):
|
|
41
|
+
await client.initialize()
|
|
42
|
+
assert client._http is not None
|
|
43
|
+
assert client._http._sdk_header == CORE_SDK_HEADER_ENTRY
|
|
44
|
+
await client.close()
|
|
45
|
+
|
|
46
|
+
async def test_initialize_prepends_adapter_sdk_header(self):
|
|
47
|
+
"""Core prepends the adapter entry ahead of the core entry."""
|
|
48
|
+
client = PrefactorCoreClient(
|
|
49
|
+
create_config(),
|
|
50
|
+
sdk_header_entry="prefactor-langchain@0.2.4",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
with (
|
|
54
|
+
patch(
|
|
55
|
+
"prefactor_http.client.PrefactorHttpClient.__aenter__",
|
|
56
|
+
AsyncMock(return_value=None),
|
|
57
|
+
),
|
|
58
|
+
patch(
|
|
59
|
+
"prefactor_http.client.PrefactorHttpClient.__aexit__",
|
|
60
|
+
AsyncMock(return_value=None),
|
|
61
|
+
),
|
|
62
|
+
):
|
|
63
|
+
await client.initialize()
|
|
64
|
+
assert client._http is not None
|
|
65
|
+
assert client._http._sdk_header == (
|
|
66
|
+
f"prefactor-langchain@0.2.4 {CORE_SDK_HEADER_ENTRY}"
|
|
67
|
+
)
|
|
68
|
+
await client.close()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class TestCoreVersionHelpers:
|
|
72
|
+
"""Tests for package version exports."""
|
|
73
|
+
|
|
74
|
+
def test_package_version_matches_public_export(self):
|
|
75
|
+
"""Test that the package version export matches the internal constant."""
|
|
76
|
+
import prefactor_core
|
|
77
|
+
|
|
78
|
+
assert prefactor_core.__version__ == PACKAGE_VERSION
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Tests for SpanContext finish behavior."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from unittest.mock import AsyncMock
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
from prefactor_core.span_context import SpanContext
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.mark.asyncio
|
|
12
|
+
async def test_explicit_complete_then_finish_is_a_no_op():
|
|
13
|
+
"""An already-finished span should not enqueue a duplicate finish."""
|
|
14
|
+
span_manager = AsyncMock()
|
|
15
|
+
span_manager.start = AsyncMock(return_value="api-span-id")
|
|
16
|
+
span_manager.finish = AsyncMock()
|
|
17
|
+
|
|
18
|
+
context = SpanContext("temp-span-id", span_manager)
|
|
19
|
+
|
|
20
|
+
await context.complete({"ok": True})
|
|
21
|
+
await context.finish()
|
|
22
|
+
|
|
23
|
+
assert span_manager.start.await_count == 1
|
|
24
|
+
assert span_manager.finish.await_count == 1
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@pytest.mark.asyncio
|
|
28
|
+
async def test_failed_finish_can_retry_same_request():
|
|
29
|
+
"""The same finish request should be retryable until it succeeds."""
|
|
30
|
+
span_manager = AsyncMock()
|
|
31
|
+
span_manager.start = AsyncMock(return_value="api-span-id")
|
|
32
|
+
span_manager.finish = AsyncMock(side_effect=[RuntimeError("boom"), None])
|
|
33
|
+
|
|
34
|
+
context = SpanContext("temp-span-id", span_manager)
|
|
35
|
+
|
|
36
|
+
with pytest.raises(RuntimeError, match="boom"):
|
|
37
|
+
await context.complete({"ok": True})
|
|
38
|
+
|
|
39
|
+
await context.finish()
|
|
40
|
+
|
|
41
|
+
assert span_manager.start.await_count == 1
|
|
42
|
+
assert span_manager.finish.await_count == 2
|
|
@@ -83,3 +83,17 @@ class TestSpanManagerIdempotencyKeys:
|
|
|
83
83
|
assert key is not None
|
|
84
84
|
assert len(key) <= 64
|
|
85
85
|
uuid.UUID(key)
|
|
86
|
+
|
|
87
|
+
async def test_child_spans_remap_temp_parent_id_to_api_id(
|
|
88
|
+
self, manager, http_client
|
|
89
|
+
):
|
|
90
|
+
"""Children prepared before parent start should use the backend parent ID."""
|
|
91
|
+
parent_temp_id = manager.prepare(instance_id="inst-1", schema_name="agent:root")
|
|
92
|
+
child_temp_id = manager.prepare(instance_id="inst-1", schema_name="agent:child")
|
|
93
|
+
|
|
94
|
+
parent_api_id = await manager.start(parent_temp_id)
|
|
95
|
+
await manager.start(child_temp_id)
|
|
96
|
+
|
|
97
|
+
child_call_kwargs = http_client.agent_spans.create.call_args.kwargs
|
|
98
|
+
assert parent_api_id == "api-span-id"
|
|
99
|
+
assert child_call_kwargs["parent_span_id"] == parent_api_id
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|