prefactor-core 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/.gitignore +1 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/PKG-INFO +2 -2
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/pyproject.toml +5 -2
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/__init__.py +10 -2
- prefactor_core-0.2.3/src/prefactor_core/_version.py +7 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/client.py +96 -5
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/exceptions.py +20 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/managers/agent_instance.py +42 -17
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/managers/span.py +22 -5
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/operations.py +2 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/queue/executor.py +11 -2
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/queue/memory.py +4 -4
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/schema_registry.py +25 -1
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/span_context.py +27 -4
- prefactor_core-0.2.3/src/prefactor_core/utils.py +41 -0
- prefactor_core-0.2.3/tests/test_failure_handling.py +322 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/tests/test_queue.py +31 -0
- prefactor_core-0.2.3/tests/test_sdk_header.py +78 -0
- prefactor_core-0.2.3/tests/test_span_context.py +42 -0
- prefactor_core-0.2.3/tests/test_span_manager.py +99 -0
- prefactor_core-0.2.3/tests/test_utils.py +46 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/README.md +0 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/examples/agent_e2e.py +0 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/config.py +0 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/context_stack.py +0 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/managers/__init__.py +0 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/models.py +0 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/queue/__init__.py +0 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/queue/base.py +0 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/tests/test_client.py +0 -0
- {prefactor_core-0.2.1 → prefactor_core-0.2.3}/tests/test_imports.py +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: prefactor-core
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Core Prefactor SDK with async queue-based operations
|
|
5
5
|
Author-email: Prefactor Pty Ltd <josh@prefactor.tech>
|
|
6
6
|
License: MIT
|
|
7
7
|
Requires-Python: <4.0.0,>=3.11.0
|
|
8
|
-
Requires-Dist: prefactor-http>=0.1.
|
|
8
|
+
Requires-Dist: prefactor-http>=0.1.1
|
|
9
9
|
Requires-Dist: pydantic>=2.0.0
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "prefactor-core"
|
|
3
|
-
|
|
3
|
+
dynamic = ["version"]
|
|
4
4
|
description = "Core Prefactor SDK with async queue-based operations"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { text = "MIT" }
|
|
@@ -9,7 +9,7 @@ authors = [
|
|
|
9
9
|
]
|
|
10
10
|
requires-python = ">=3.11.0, <4.0.0"
|
|
11
11
|
dependencies = [
|
|
12
|
-
"prefactor-http>=0.1.
|
|
12
|
+
"prefactor-http>=0.1.1",
|
|
13
13
|
"pydantic>=2.0.0",
|
|
14
14
|
]
|
|
15
15
|
|
|
@@ -17,6 +17,9 @@ dependencies = [
|
|
|
17
17
|
requires = ["hatchling"]
|
|
18
18
|
build-backend = "hatchling.build"
|
|
19
19
|
|
|
20
|
+
[tool.hatch.version]
|
|
21
|
+
path = "src/prefactor_core/_version.py"
|
|
22
|
+
|
|
20
23
|
[tool.hatch.build.targets.wheel]
|
|
21
24
|
packages = ["src/prefactor_core"]
|
|
22
25
|
|
|
@@ -3,6 +3,9 @@
|
|
|
3
3
|
This module exports the main classes and functions for the prefactor-core SDK.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from ._version import __version__
|
|
6
9
|
from .client import PrefactorCoreClient
|
|
7
10
|
from .config import PrefactorCoreConfig, QueueConfig
|
|
8
11
|
from .context_stack import SpanContextStack
|
|
@@ -12,6 +15,7 @@ from .exceptions import (
|
|
|
12
15
|
InstanceNotFoundError,
|
|
13
16
|
OperationError,
|
|
14
17
|
PrefactorCoreError,
|
|
18
|
+
PrefactorTelemetryFailureError,
|
|
15
19
|
SpanNotFoundError,
|
|
16
20
|
)
|
|
17
21
|
from .managers.agent_instance import AgentInstanceHandle
|
|
@@ -20,8 +24,7 @@ from .operations import Operation, OperationType
|
|
|
20
24
|
from .queue import InMemoryQueue, Queue, QueueClosedError, TaskExecutor
|
|
21
25
|
from .schema_registry import SchemaRegistry
|
|
22
26
|
from .span_context import SpanContext
|
|
23
|
-
|
|
24
|
-
__version__ = "0.2.1"
|
|
27
|
+
from .utils import generate_idempotency_key, validate_idempotency_key
|
|
25
28
|
|
|
26
29
|
__all__ = [
|
|
27
30
|
# Client
|
|
@@ -39,6 +42,7 @@ __all__ = [
|
|
|
39
42
|
"OperationError",
|
|
40
43
|
"InstanceNotFoundError",
|
|
41
44
|
"SpanNotFoundError",
|
|
45
|
+
"PrefactorTelemetryFailureError",
|
|
42
46
|
# Models
|
|
43
47
|
"AgentInstance",
|
|
44
48
|
"Span",
|
|
@@ -54,4 +58,8 @@ __all__ = [
|
|
|
54
58
|
"AgentInstanceHandle",
|
|
55
59
|
# Schema Registry
|
|
56
60
|
"SchemaRegistry",
|
|
61
|
+
# Utils
|
|
62
|
+
"generate_idempotency_key",
|
|
63
|
+
"validate_idempotency_key",
|
|
64
|
+
"__version__",
|
|
57
65
|
]
|
|
@@ -13,12 +13,16 @@ from contextlib import asynccontextmanager
|
|
|
13
13
|
from typing import TYPE_CHECKING, Any
|
|
14
14
|
|
|
15
15
|
from prefactor_http.client import PrefactorHttpClient
|
|
16
|
+
from prefactor_http.exceptions import is_permanent_http_error, is_transient_http_error
|
|
16
17
|
|
|
18
|
+
from ._version import PACKAGE_NAME as CORE_PACKAGE_NAME
|
|
19
|
+
from ._version import PACKAGE_VERSION as CORE_PACKAGE_VERSION
|
|
17
20
|
from .config import PrefactorCoreConfig
|
|
18
21
|
from .context_stack import SpanContextStack
|
|
19
22
|
from .exceptions import (
|
|
20
23
|
ClientAlreadyInitializedError,
|
|
21
24
|
ClientNotInitializedError,
|
|
25
|
+
PrefactorTelemetryFailureError,
|
|
22
26
|
)
|
|
23
27
|
from .managers.agent_instance import AgentInstanceManager
|
|
24
28
|
from .managers.span import SpanManager
|
|
@@ -31,6 +35,7 @@ if TYPE_CHECKING:
|
|
|
31
35
|
from .managers.agent_instance import AgentInstanceHandle
|
|
32
36
|
|
|
33
37
|
logger = logging.getLogger(__name__)
|
|
38
|
+
CORE_SDK_HEADER_ENTRY = f"{CORE_PACKAGE_NAME}@{CORE_PACKAGE_VERSION}"
|
|
34
39
|
|
|
35
40
|
|
|
36
41
|
class PrefactorCoreClient:
|
|
@@ -61,6 +66,7 @@ class PrefactorCoreClient:
|
|
|
61
66
|
self,
|
|
62
67
|
config: PrefactorCoreConfig,
|
|
63
68
|
queue: Queue[Operation] | None = None,
|
|
69
|
+
sdk_header_entry: str | None = None,
|
|
64
70
|
) -> None:
|
|
65
71
|
"""Initialize the client.
|
|
66
72
|
|
|
@@ -68,14 +74,30 @@ class PrefactorCoreClient:
|
|
|
68
74
|
config: Configuration for the client.
|
|
69
75
|
queue: Optional custom queue implementation. If not provided,
|
|
70
76
|
an InMemoryQueue is used.
|
|
77
|
+
sdk_header_entry: Optional upstream SDK header entry to prepend.
|
|
71
78
|
"""
|
|
72
79
|
self._config = config
|
|
73
80
|
self._queue = queue or InMemoryQueue()
|
|
81
|
+
self._sdk_header_entry = sdk_header_entry.strip() if sdk_header_entry else None
|
|
74
82
|
self._http: PrefactorHttpClient | None = None
|
|
75
83
|
self._executor: TaskExecutor | None = None
|
|
76
84
|
self._instance_manager: AgentInstanceManager | None = None
|
|
77
85
|
self._span_manager: SpanManager | None = None
|
|
78
86
|
self._initialized = False
|
|
87
|
+
self._telemetry_failure: PrefactorTelemetryFailureError | None = None
|
|
88
|
+
self._telemetry_failure_observed = False
|
|
89
|
+
|
|
90
|
+
def _build_http_sdk_header(self) -> str:
|
|
91
|
+
"""Build the effective SDK header for HTTP requests."""
|
|
92
|
+
if self._sdk_header_entry:
|
|
93
|
+
return f"{self._sdk_header_entry} {CORE_SDK_HEADER_ENTRY}"
|
|
94
|
+
return CORE_SDK_HEADER_ENTRY
|
|
95
|
+
|
|
96
|
+
def _set_sdk_header_entry(self, sdk_header_entry: str | None) -> None:
|
|
97
|
+
"""Set the upstream SDK header entry for this client lifetime."""
|
|
98
|
+
self._sdk_header_entry = sdk_header_entry.strip() if sdk_header_entry else None
|
|
99
|
+
if self._http is not None:
|
|
100
|
+
self._http._sdk_header = self._build_http_sdk_header()
|
|
79
101
|
|
|
80
102
|
async def __aenter__(self) -> "PrefactorCoreClient":
|
|
81
103
|
"""Enter async context manager."""
|
|
@@ -84,7 +106,11 @@ class PrefactorCoreClient:
|
|
|
84
106
|
|
|
85
107
|
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
86
108
|
"""Exit async context manager."""
|
|
87
|
-
|
|
109
|
+
try:
|
|
110
|
+
await self.close()
|
|
111
|
+
except PrefactorTelemetryFailureError:
|
|
112
|
+
if exc_type is None:
|
|
113
|
+
raise
|
|
88
114
|
|
|
89
115
|
async def initialize(self) -> None:
|
|
90
116
|
"""Initialize the client and start processing.
|
|
@@ -101,13 +127,17 @@ class PrefactorCoreClient:
|
|
|
101
127
|
raise ClientAlreadyInitializedError("Client is already initialized")
|
|
102
128
|
|
|
103
129
|
# Initialize HTTP client
|
|
104
|
-
self._http = PrefactorHttpClient(
|
|
130
|
+
self._http = PrefactorHttpClient(
|
|
131
|
+
self._config.http_config,
|
|
132
|
+
sdk_header=self._build_http_sdk_header(),
|
|
133
|
+
)
|
|
105
134
|
await self._http.__aenter__()
|
|
106
135
|
|
|
107
136
|
# Initialize executor
|
|
108
137
|
self._executor = TaskExecutor(
|
|
109
138
|
queue=self._queue,
|
|
110
139
|
handler=self._process_operation,
|
|
140
|
+
is_retryable=self._is_retryable_operation_error,
|
|
111
141
|
num_workers=self._config.queue_config.num_workers,
|
|
112
142
|
max_retries=self._config.queue_config.max_retries,
|
|
113
143
|
)
|
|
@@ -144,6 +174,10 @@ class PrefactorCoreClient:
|
|
|
144
174
|
|
|
145
175
|
self._initialized = False
|
|
146
176
|
|
|
177
|
+
if self._telemetry_failure is not None and not self._telemetry_failure_observed:
|
|
178
|
+
self._telemetry_failure_observed = True
|
|
179
|
+
raise self._telemetry_failure
|
|
180
|
+
|
|
147
181
|
def _ensure_initialized(self) -> None:
|
|
148
182
|
"""Ensure the client is initialized.
|
|
149
183
|
|
|
@@ -156,12 +190,55 @@ class PrefactorCoreClient:
|
|
|
156
190
|
"use as context manager."
|
|
157
191
|
)
|
|
158
192
|
|
|
193
|
+
def _record_telemetry_failure(
|
|
194
|
+
self, cause: Exception, operation_type: OperationType | str
|
|
195
|
+
) -> None:
|
|
196
|
+
"""Latch the first permanent telemetry failure."""
|
|
197
|
+
if self._telemetry_failure is not None:
|
|
198
|
+
return
|
|
199
|
+
if isinstance(operation_type, OperationType):
|
|
200
|
+
operation_name = operation_type.name
|
|
201
|
+
else:
|
|
202
|
+
operation_name = str(operation_type)
|
|
203
|
+
self._telemetry_failure = PrefactorTelemetryFailureError(
|
|
204
|
+
f"Telemetry permanently failed during {operation_name}",
|
|
205
|
+
cause=cause,
|
|
206
|
+
operation_type=operation_name,
|
|
207
|
+
dropped_operations=0,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
def _increment_dropped_operations(self) -> None:
|
|
211
|
+
"""Increment the dropped operation counter on the latched failure."""
|
|
212
|
+
if self._telemetry_failure is None:
|
|
213
|
+
return
|
|
214
|
+
self._telemetry_failure.dropped_operations += 1
|
|
215
|
+
|
|
216
|
+
def _raise_if_telemetry_failed(self) -> None:
|
|
217
|
+
"""Raise the latched telemetry failure for caller-visible operations."""
|
|
218
|
+
if self._telemetry_failure is None:
|
|
219
|
+
return
|
|
220
|
+
self._telemetry_failure_observed = True
|
|
221
|
+
raise self._telemetry_failure
|
|
222
|
+
|
|
223
|
+
def _is_retryable_operation_error(self, error: Exception) -> bool:
|
|
224
|
+
"""Return True when the worker should retry the operation."""
|
|
225
|
+
if isinstance(error, PrefactorTelemetryFailureError):
|
|
226
|
+
return False
|
|
227
|
+
if is_permanent_http_error(error):
|
|
228
|
+
return False
|
|
229
|
+
if is_transient_http_error(error):
|
|
230
|
+
return True
|
|
231
|
+
return True
|
|
232
|
+
|
|
159
233
|
async def _enqueue(self, operation: Operation) -> None:
|
|
160
234
|
"""Add an operation to the queue.
|
|
161
235
|
|
|
162
236
|
Args:
|
|
163
237
|
operation: The operation to queue.
|
|
164
238
|
"""
|
|
239
|
+
if self._telemetry_failure is not None:
|
|
240
|
+
self._increment_dropped_operations()
|
|
241
|
+
self._raise_if_telemetry_failed()
|
|
165
242
|
await self._queue.put(operation)
|
|
166
243
|
|
|
167
244
|
async def _process_operation(self, operation: Operation) -> None:
|
|
@@ -174,6 +251,9 @@ class PrefactorCoreClient:
|
|
|
174
251
|
"""
|
|
175
252
|
if not self._http:
|
|
176
253
|
return
|
|
254
|
+
if self._telemetry_failure is not None:
|
|
255
|
+
self._increment_dropped_operations()
|
|
256
|
+
return
|
|
177
257
|
|
|
178
258
|
try:
|
|
179
259
|
if operation.type == OperationType.REGISTER_AGENT_INSTANCE:
|
|
@@ -188,12 +268,14 @@ class PrefactorCoreClient:
|
|
|
188
268
|
await self._http.agent_instances.start(
|
|
189
269
|
agent_instance_id=operation.payload["instance_id"],
|
|
190
270
|
timestamp=operation.timestamp,
|
|
271
|
+
idempotency_key=operation.payload.get("idempotency_key"),
|
|
191
272
|
)
|
|
192
273
|
|
|
193
274
|
elif operation.type == OperationType.FINISH_AGENT_INSTANCE:
|
|
194
275
|
await self._http.agent_instances.finish(
|
|
195
276
|
agent_instance_id=operation.payload["instance_id"],
|
|
196
277
|
timestamp=operation.timestamp,
|
|
278
|
+
idempotency_key=operation.payload.get("idempotency_key"),
|
|
197
279
|
)
|
|
198
280
|
elif operation.type == OperationType.CREATE_SPAN:
|
|
199
281
|
await self._http.agent_spans.create(
|
|
@@ -211,16 +293,24 @@ class PrefactorCoreClient:
|
|
|
211
293
|
status=operation.payload.get("status", "complete"),
|
|
212
294
|
result_payload=operation.payload.get("result_payload"),
|
|
213
295
|
timestamp=operation.timestamp,
|
|
296
|
+
idempotency_key=operation.payload.get("idempotency_key"),
|
|
214
297
|
)
|
|
215
298
|
|
|
216
299
|
except Exception as e:
|
|
217
|
-
|
|
300
|
+
if is_permanent_http_error(e):
|
|
301
|
+
self._record_telemetry_failure(e, operation.type)
|
|
302
|
+
# Log error and re-raise so TaskExecutor retries can run
|
|
218
303
|
logger.error(
|
|
219
304
|
f"Failed to process operation {operation.type}: {e}",
|
|
220
305
|
exc_info=True,
|
|
221
306
|
)
|
|
222
307
|
raise
|
|
223
308
|
|
|
309
|
+
@property
|
|
310
|
+
def instance_manager(self) -> AgentInstanceManager | None:
|
|
311
|
+
"""Public accessor for the agent instance manager."""
|
|
312
|
+
return self._instance_manager
|
|
313
|
+
|
|
224
314
|
async def create_agent_instance(
|
|
225
315
|
self,
|
|
226
316
|
agent_id: str,
|
|
@@ -254,6 +344,7 @@ class PrefactorCoreClient:
|
|
|
254
344
|
ValueError: If no schema version provided and registry not configured.
|
|
255
345
|
"""
|
|
256
346
|
self._ensure_initialized()
|
|
347
|
+
self._raise_if_telemetry_failed()
|
|
257
348
|
assert self._instance_manager is not None
|
|
258
349
|
|
|
259
350
|
# Determine the agent_schema_version to use
|
|
@@ -310,6 +401,7 @@ class PrefactorCoreClient:
|
|
|
310
401
|
The span ID.
|
|
311
402
|
"""
|
|
312
403
|
self._ensure_initialized()
|
|
404
|
+
self._raise_if_telemetry_failed()
|
|
313
405
|
assert self._span_manager is not None
|
|
314
406
|
|
|
315
407
|
if parent_span_id is None:
|
|
@@ -344,7 +436,6 @@ class PrefactorCoreClient:
|
|
|
344
436
|
instance_id: str,
|
|
345
437
|
schema_name: str,
|
|
346
438
|
parent_span_id: str | None = None,
|
|
347
|
-
span_id: str | None = None,
|
|
348
439
|
payload: dict[str, Any] | None = None,
|
|
349
440
|
):
|
|
350
441
|
"""Context manager for creating and finishing a span.
|
|
@@ -366,7 +457,6 @@ class PrefactorCoreClient:
|
|
|
366
457
|
instance_id: ID of the agent instance this span belongs to.
|
|
367
458
|
schema_name: Name of the schema for this span.
|
|
368
459
|
parent_span_id: Optional explicit parent span ID.
|
|
369
|
-
span_id: Ignored (API generates IDs).
|
|
370
460
|
payload: Optional initial payload sent via auto-start on exit
|
|
371
461
|
if ``start()`` is never called explicitly.
|
|
372
462
|
|
|
@@ -374,6 +464,7 @@ class PrefactorCoreClient:
|
|
|
374
464
|
SpanContext for the created span.
|
|
375
465
|
"""
|
|
376
466
|
self._ensure_initialized()
|
|
467
|
+
self._raise_if_telemetry_failed()
|
|
377
468
|
assert self._span_manager is not None
|
|
378
469
|
|
|
379
470
|
# Import here to avoid circular import
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Custom exceptions for prefactor-core."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
|
|
4
6
|
class PrefactorCoreError(Exception):
|
|
5
7
|
"""Base exception for all prefactor-core errors."""
|
|
@@ -39,6 +41,23 @@ class SpanNotFoundError(PrefactorCoreError):
|
|
|
39
41
|
pass
|
|
40
42
|
|
|
41
43
|
|
|
44
|
+
class PrefactorTelemetryFailureError(PrefactorCoreError):
|
|
45
|
+
"""Raised when telemetry enters a permanent failure state."""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
message: str,
|
|
50
|
+
*,
|
|
51
|
+
cause: Exception,
|
|
52
|
+
operation_type: str | None = None,
|
|
53
|
+
dropped_operations: int = 0,
|
|
54
|
+
) -> None:
|
|
55
|
+
super().__init__(message)
|
|
56
|
+
self.cause = cause
|
|
57
|
+
self.operation_type = operation_type
|
|
58
|
+
self.dropped_operations = dropped_operations
|
|
59
|
+
|
|
60
|
+
|
|
42
61
|
__all__ = [
|
|
43
62
|
"PrefactorCoreError",
|
|
44
63
|
"ClientNotInitializedError",
|
|
@@ -46,4 +65,5 @@ __all__ = [
|
|
|
46
65
|
"OperationError",
|
|
47
66
|
"InstanceNotFoundError",
|
|
48
67
|
"SpanNotFoundError",
|
|
68
|
+
"PrefactorTelemetryFailureError",
|
|
49
69
|
]
|
|
@@ -10,6 +10,7 @@ from datetime import datetime, timezone
|
|
|
10
10
|
from typing import TYPE_CHECKING, Any, Awaitable, Callable
|
|
11
11
|
|
|
12
12
|
from ..operations import Operation, OperationType
|
|
13
|
+
from ..utils import generate_idempotency_key
|
|
13
14
|
|
|
14
15
|
if TYPE_CHECKING:
|
|
15
16
|
from prefactor_http.client import PrefactorHttpClient
|
|
@@ -71,7 +72,9 @@ class AgentInstanceManager:
|
|
|
71
72
|
agent_id: ID of the agent to create an instance for.
|
|
72
73
|
agent_version: Version information (name, external_identifier, etc.).
|
|
73
74
|
agent_schema_version: Schema version information.
|
|
74
|
-
instance_id:
|
|
75
|
+
instance_id: Optional ID to forward to the API as ``id``. When
|
|
76
|
+
provided, the API uses it as the instance ID; when omitted,
|
|
77
|
+
the API generates one.
|
|
75
78
|
|
|
76
79
|
Returns:
|
|
77
80
|
The instance ID (API-generated).
|
|
@@ -81,6 +84,7 @@ class AgentInstanceManager:
|
|
|
81
84
|
agent_version=agent_version,
|
|
82
85
|
agent_schema_version=agent_schema_version,
|
|
83
86
|
id=instance_id,
|
|
87
|
+
idempotency_key=generate_idempotency_key(),
|
|
84
88
|
)
|
|
85
89
|
return result.id
|
|
86
90
|
|
|
@@ -92,9 +96,20 @@ class AgentInstanceManager:
|
|
|
92
96
|
Args:
|
|
93
97
|
instance_id: The ID of the instance to start.
|
|
94
98
|
"""
|
|
99
|
+
await self.start_with_idempotency_key(instance_id, generate_idempotency_key())
|
|
100
|
+
|
|
101
|
+
async def start_with_idempotency_key(
|
|
102
|
+
self,
|
|
103
|
+
instance_id: str,
|
|
104
|
+
idempotency_key: str,
|
|
105
|
+
) -> None:
|
|
106
|
+
"""Queue a start operation using a stable idempotency key."""
|
|
95
107
|
operation = Operation(
|
|
96
108
|
type=OperationType.START_AGENT_INSTANCE,
|
|
97
|
-
payload={
|
|
109
|
+
payload={
|
|
110
|
+
"instance_id": instance_id,
|
|
111
|
+
"idempotency_key": idempotency_key,
|
|
112
|
+
},
|
|
98
113
|
timestamp=datetime.now(timezone.utc),
|
|
99
114
|
)
|
|
100
115
|
|
|
@@ -108,9 +123,20 @@ class AgentInstanceManager:
|
|
|
108
123
|
Args:
|
|
109
124
|
instance_id: The ID of the instance to finish.
|
|
110
125
|
"""
|
|
126
|
+
await self.finish_with_idempotency_key(instance_id, generate_idempotency_key())
|
|
127
|
+
|
|
128
|
+
async def finish_with_idempotency_key(
|
|
129
|
+
self,
|
|
130
|
+
instance_id: str,
|
|
131
|
+
idempotency_key: str,
|
|
132
|
+
) -> None:
|
|
133
|
+
"""Queue a finish operation using a stable idempotency key."""
|
|
111
134
|
operation = Operation(
|
|
112
135
|
type=OperationType.FINISH_AGENT_INSTANCE,
|
|
113
|
-
payload={
|
|
136
|
+
payload={
|
|
137
|
+
"instance_id": instance_id,
|
|
138
|
+
"idempotency_key": idempotency_key,
|
|
139
|
+
},
|
|
114
140
|
timestamp=datetime.now(timezone.utc),
|
|
115
141
|
)
|
|
116
142
|
|
|
@@ -149,8 +175,8 @@ class AgentInstanceHandle:
|
|
|
149
175
|
"""
|
|
150
176
|
self._instance_id = instance_id
|
|
151
177
|
self._client = client
|
|
152
|
-
self.
|
|
153
|
-
self.
|
|
178
|
+
self._start_idempotency_key = generate_idempotency_key()
|
|
179
|
+
self._finish_idempotency_key = generate_idempotency_key()
|
|
154
180
|
|
|
155
181
|
@property
|
|
156
182
|
def id(self) -> str:
|
|
@@ -166,26 +192,24 @@ class AgentInstanceHandle:
|
|
|
166
192
|
|
|
167
193
|
This queues a start operation for the instance.
|
|
168
194
|
"""
|
|
169
|
-
|
|
170
|
-
return
|
|
171
|
-
|
|
172
|
-
manager = self._client._instance_manager
|
|
195
|
+
manager = self._client.instance_manager
|
|
173
196
|
assert manager is not None
|
|
174
|
-
await manager.
|
|
175
|
-
|
|
197
|
+
await manager.start_with_idempotency_key(
|
|
198
|
+
self._instance_id,
|
|
199
|
+
self._start_idempotency_key,
|
|
200
|
+
)
|
|
176
201
|
|
|
177
202
|
async def finish(self) -> None:
|
|
178
203
|
"""Mark the instance as finished.
|
|
179
204
|
|
|
180
205
|
This queues a finish operation for the instance.
|
|
181
206
|
"""
|
|
182
|
-
|
|
183
|
-
return
|
|
184
|
-
|
|
185
|
-
manager = self._client._instance_manager
|
|
207
|
+
manager = self._client.instance_manager
|
|
186
208
|
assert manager is not None
|
|
187
|
-
await manager.
|
|
188
|
-
|
|
209
|
+
await manager.finish_with_idempotency_key(
|
|
210
|
+
self._instance_id,
|
|
211
|
+
self._finish_idempotency_key,
|
|
212
|
+
)
|
|
189
213
|
|
|
190
214
|
async def create_span(
|
|
191
215
|
self,
|
|
@@ -205,6 +229,7 @@ class AgentInstanceHandle:
|
|
|
205
229
|
Returns:
|
|
206
230
|
The span ID.
|
|
207
231
|
"""
|
|
232
|
+
self._client._raise_if_telemetry_failed()
|
|
208
233
|
return await self._client.create_span(
|
|
209
234
|
instance_id=self._instance_id,
|
|
210
235
|
schema_name=schema_name,
|
|
@@ -5,13 +5,13 @@ calls into Operation objects that are queued for processing. It also manages
|
|
|
5
5
|
the span stack for automatic parent detection.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import uuid
|
|
9
8
|
from datetime import datetime, timezone
|
|
10
9
|
from typing import TYPE_CHECKING, Any, Awaitable, Callable
|
|
11
10
|
|
|
12
11
|
from ..context_stack import SpanContextStack
|
|
13
12
|
from ..models import Span
|
|
14
13
|
from ..operations import Operation, OperationType
|
|
14
|
+
from ..utils import generate_idempotency_key
|
|
15
15
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
17
|
from prefactor_http.client import PrefactorHttpClient
|
|
@@ -90,7 +90,7 @@ class SpanManager:
|
|
|
90
90
|
if parent_span_id is None:
|
|
91
91
|
parent_span_id = SpanContextStack.peek()
|
|
92
92
|
|
|
93
|
-
temp_id =
|
|
93
|
+
temp_id = generate_idempotency_key()
|
|
94
94
|
|
|
95
95
|
span = Span(
|
|
96
96
|
id=temp_id,
|
|
@@ -139,6 +139,7 @@ class SpanManager:
|
|
|
139
139
|
status="active",
|
|
140
140
|
payload=payload or {},
|
|
141
141
|
parent_span_id=span.parent_span_id,
|
|
142
|
+
idempotency_key=generate_idempotency_key(),
|
|
142
143
|
)
|
|
143
144
|
|
|
144
145
|
api_id = result.id
|
|
@@ -150,6 +151,10 @@ class SpanManager:
|
|
|
150
151
|
del self._spans[temp_id]
|
|
151
152
|
self._spans[api_id] = span
|
|
152
153
|
|
|
154
|
+
for child_span in self._spans.values():
|
|
155
|
+
if child_span.parent_span_id == temp_id:
|
|
156
|
+
child_span.parent_span_id = api_id
|
|
157
|
+
|
|
153
158
|
# Replace temp ID on the context stack
|
|
154
159
|
stack = SpanContextStack.get_stack()
|
|
155
160
|
new_stack = [api_id if s == temp_id else s for s in stack]
|
|
@@ -185,12 +190,14 @@ class SpanManager:
|
|
|
185
190
|
status="pending",
|
|
186
191
|
payload={},
|
|
187
192
|
parent_span_id=span.parent_span_id,
|
|
193
|
+
idempotency_key=generate_idempotency_key(),
|
|
188
194
|
)
|
|
189
195
|
api_id = result.id
|
|
190
196
|
|
|
191
197
|
await self._http.agent_spans.finish(
|
|
192
198
|
agent_span_id=api_id,
|
|
193
199
|
status="cancelled",
|
|
200
|
+
idempotency_key=generate_idempotency_key(),
|
|
194
201
|
)
|
|
195
202
|
|
|
196
203
|
span.status = "cancelled"
|
|
@@ -239,6 +246,7 @@ class SpanManager:
|
|
|
239
246
|
span_id: str,
|
|
240
247
|
result_payload: dict[str, Any] | None = None,
|
|
241
248
|
status: "FinishStatus" = "complete",
|
|
249
|
+
idempotency_key: str | None = None,
|
|
242
250
|
) -> None:
|
|
243
251
|
"""Mark a span as finished.
|
|
244
252
|
|
|
@@ -251,6 +259,8 @@ class SpanManager:
|
|
|
251
259
|
``"cancelled"`` (default: ``"complete"``). The span must be
|
|
252
260
|
``active`` for this to succeed; use ``cancel_unstarted()``
|
|
253
261
|
to cancel a span that was never started.
|
|
262
|
+
idempotency_key: Optional key to make repeated finish requests
|
|
263
|
+
duplicate-safe. When omitted, a new key is generated.
|
|
254
264
|
|
|
255
265
|
Raises:
|
|
256
266
|
KeyError: If the span ID is not known.
|
|
@@ -261,10 +271,17 @@ class SpanManager:
|
|
|
261
271
|
self._spans[span_id].status = status
|
|
262
272
|
self._spans[span_id].finished_at = datetime.now(timezone.utc)
|
|
263
273
|
|
|
264
|
-
|
|
265
|
-
|
|
274
|
+
stack = SpanContextStack.get_stack()
|
|
275
|
+
if span_id in stack:
|
|
276
|
+
from ..context_stack import _current_span_stack
|
|
277
|
+
|
|
278
|
+
_current_span_stack.set([s for s in stack if s != span_id])
|
|
266
279
|
|
|
267
|
-
op_payload: dict[str, Any] = {
|
|
280
|
+
op_payload: dict[str, Any] = {
|
|
281
|
+
"span_id": span_id,
|
|
282
|
+
"status": status,
|
|
283
|
+
"idempotency_key": idempotency_key or generate_idempotency_key(),
|
|
284
|
+
}
|
|
268
285
|
if result_payload is not None:
|
|
269
286
|
op_payload["result_payload"] = result_payload
|
|
270
287
|
|
|
@@ -42,6 +42,8 @@ class TaskExecutor:
|
|
|
42
42
|
handler: Callable[[Any], Awaitable[None]],
|
|
43
43
|
num_workers: int = 3,
|
|
44
44
|
max_retries: int = 3,
|
|
45
|
+
*,
|
|
46
|
+
is_retryable: Callable[[Exception], bool] | None = None,
|
|
45
47
|
) -> None:
|
|
46
48
|
"""Initialize the task executor.
|
|
47
49
|
|
|
@@ -50,9 +52,12 @@ class TaskExecutor:
|
|
|
50
52
|
handler: Async function to process each item.
|
|
51
53
|
num_workers: Number of concurrent worker tasks.
|
|
52
54
|
max_retries: Maximum retry attempts per item.
|
|
55
|
+
is_retryable: Optional predicate that decides whether a
|
|
56
|
+
handler failure should be retried.
|
|
53
57
|
"""
|
|
54
58
|
self._queue = queue
|
|
55
59
|
self._handler = handler
|
|
60
|
+
self._is_retryable = is_retryable or (lambda exc: True)
|
|
56
61
|
self._num_workers = num_workers
|
|
57
62
|
self._max_retries = max_retries
|
|
58
63
|
self._workers: list[Task] = []
|
|
@@ -162,13 +167,17 @@ class TaskExecutor:
|
|
|
162
167
|
"""
|
|
163
168
|
last_error: Exception | None = None
|
|
164
169
|
|
|
165
|
-
|
|
170
|
+
total_attempts = self._max_retries + 1
|
|
171
|
+
|
|
172
|
+
for attempt in range(total_attempts):
|
|
166
173
|
try:
|
|
167
174
|
await self._handler(item)
|
|
168
175
|
return
|
|
169
176
|
except Exception as e:
|
|
170
177
|
last_error = e
|
|
171
|
-
if
|
|
178
|
+
if not self._is_retryable(e):
|
|
179
|
+
raise
|
|
180
|
+
if attempt < total_attempts - 1:
|
|
172
181
|
delay = 2**attempt # 1s, 2s, 4s
|
|
173
182
|
logger.warning(
|
|
174
183
|
f"Attempt {attempt + 1} failed, retrying in {delay}s: {e}"
|