prefactor-core 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/.gitignore +1 -0
  2. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/PKG-INFO +2 -2
  3. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/pyproject.toml +5 -2
  4. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/__init__.py +10 -2
  5. prefactor_core-0.2.3/src/prefactor_core/_version.py +7 -0
  6. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/client.py +96 -5
  7. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/exceptions.py +20 -0
  8. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/managers/agent_instance.py +42 -17
  9. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/managers/span.py +22 -5
  10. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/operations.py +2 -0
  11. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/queue/executor.py +11 -2
  12. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/queue/memory.py +4 -4
  13. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/schema_registry.py +25 -1
  14. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/span_context.py +27 -4
  15. prefactor_core-0.2.3/src/prefactor_core/utils.py +41 -0
  16. prefactor_core-0.2.3/tests/test_failure_handling.py +322 -0
  17. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/tests/test_queue.py +31 -0
  18. prefactor_core-0.2.3/tests/test_sdk_header.py +78 -0
  19. prefactor_core-0.2.3/tests/test_span_context.py +42 -0
  20. prefactor_core-0.2.3/tests/test_span_manager.py +99 -0
  21. prefactor_core-0.2.3/tests/test_utils.py +46 -0
  22. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/README.md +0 -0
  23. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/examples/agent_e2e.py +0 -0
  24. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/config.py +0 -0
  25. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/context_stack.py +0 -0
  26. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/managers/__init__.py +0 -0
  27. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/models.py +0 -0
  28. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/queue/__init__.py +0 -0
  29. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/src/prefactor_core/queue/base.py +0 -0
  30. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/tests/test_client.py +0 -0
  31. {prefactor_core-0.2.1 → prefactor_core-0.2.3}/tests/test_imports.py +0 -0
@@ -69,3 +69,4 @@ htmlcov/
69
69
  # Env
70
70
  .env
71
71
  mise.local.toml
72
+ prefactor.json
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: prefactor-core
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Core Prefactor SDK with async queue-based operations
5
5
  Author-email: Prefactor Pty Ltd <josh@prefactor.tech>
6
6
  License: MIT
7
7
  Requires-Python: <4.0.0,>=3.11.0
8
- Requires-Dist: prefactor-http>=0.1.0
8
+ Requires-Dist: prefactor-http>=0.1.1
9
9
  Requires-Dist: pydantic>=2.0.0
10
10
  Description-Content-Type: text/markdown
11
11
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "prefactor-core"
3
- version = "0.2.1"
3
+ dynamic = ["version"]
4
4
  description = "Core Prefactor SDK with async queue-based operations"
5
5
  readme = "README.md"
6
6
  license = { text = "MIT" }
@@ -9,7 +9,7 @@ authors = [
9
9
  ]
10
10
  requires-python = ">=3.11.0, <4.0.0"
11
11
  dependencies = [
12
- "prefactor-http>=0.1.0",
12
+ "prefactor-http>=0.1.1",
13
13
  "pydantic>=2.0.0",
14
14
  ]
15
15
 
@@ -17,6 +17,9 @@ dependencies = [
17
17
  requires = ["hatchling"]
18
18
  build-backend = "hatchling.build"
19
19
 
20
+ [tool.hatch.version]
21
+ path = "src/prefactor_core/_version.py"
22
+
20
23
  [tool.hatch.build.targets.wheel]
21
24
  packages = ["src/prefactor_core"]
22
25
 
@@ -3,6 +3,9 @@
3
3
  This module exports the main classes and functions for the prefactor-core SDK.
4
4
  """
5
5
 
6
+ from __future__ import annotations
7
+
8
+ from ._version import __version__
6
9
  from .client import PrefactorCoreClient
7
10
  from .config import PrefactorCoreConfig, QueueConfig
8
11
  from .context_stack import SpanContextStack
@@ -12,6 +15,7 @@ from .exceptions import (
12
15
  InstanceNotFoundError,
13
16
  OperationError,
14
17
  PrefactorCoreError,
18
+ PrefactorTelemetryFailureError,
15
19
  SpanNotFoundError,
16
20
  )
17
21
  from .managers.agent_instance import AgentInstanceHandle
@@ -20,8 +24,7 @@ from .operations import Operation, OperationType
20
24
  from .queue import InMemoryQueue, Queue, QueueClosedError, TaskExecutor
21
25
  from .schema_registry import SchemaRegistry
22
26
  from .span_context import SpanContext
23
-
24
- __version__ = "0.2.1"
27
+ from .utils import generate_idempotency_key, validate_idempotency_key
25
28
 
26
29
  __all__ = [
27
30
  # Client
@@ -39,6 +42,7 @@ __all__ = [
39
42
  "OperationError",
40
43
  "InstanceNotFoundError",
41
44
  "SpanNotFoundError",
45
+ "PrefactorTelemetryFailureError",
42
46
  # Models
43
47
  "AgentInstance",
44
48
  "Span",
@@ -54,4 +58,8 @@ __all__ = [
54
58
  "AgentInstanceHandle",
55
59
  # Schema Registry
56
60
  "SchemaRegistry",
61
+ # Utils
62
+ "generate_idempotency_key",
63
+ "validate_idempotency_key",
64
+ "__version__",
57
65
  ]
@@ -0,0 +1,7 @@
1
+ """Package version for prefactor-core."""
2
+
3
+ from __future__ import annotations
4
+
5
+ PACKAGE_NAME = "prefactor-core"
6
+ __version__ = "0.2.3"
7
+ PACKAGE_VERSION = __version__
@@ -13,12 +13,16 @@ from contextlib import asynccontextmanager
13
13
  from typing import TYPE_CHECKING, Any
14
14
 
15
15
  from prefactor_http.client import PrefactorHttpClient
16
+ from prefactor_http.exceptions import is_permanent_http_error, is_transient_http_error
16
17
 
18
+ from ._version import PACKAGE_NAME as CORE_PACKAGE_NAME
19
+ from ._version import PACKAGE_VERSION as CORE_PACKAGE_VERSION
17
20
  from .config import PrefactorCoreConfig
18
21
  from .context_stack import SpanContextStack
19
22
  from .exceptions import (
20
23
  ClientAlreadyInitializedError,
21
24
  ClientNotInitializedError,
25
+ PrefactorTelemetryFailureError,
22
26
  )
23
27
  from .managers.agent_instance import AgentInstanceManager
24
28
  from .managers.span import SpanManager
@@ -31,6 +35,7 @@ if TYPE_CHECKING:
31
35
  from .managers.agent_instance import AgentInstanceHandle
32
36
 
33
37
  logger = logging.getLogger(__name__)
38
+ CORE_SDK_HEADER_ENTRY = f"{CORE_PACKAGE_NAME}@{CORE_PACKAGE_VERSION}"
34
39
 
35
40
 
36
41
  class PrefactorCoreClient:
@@ -61,6 +66,7 @@ class PrefactorCoreClient:
61
66
  self,
62
67
  config: PrefactorCoreConfig,
63
68
  queue: Queue[Operation] | None = None,
69
+ sdk_header_entry: str | None = None,
64
70
  ) -> None:
65
71
  """Initialize the client.
66
72
 
@@ -68,14 +74,30 @@ class PrefactorCoreClient:
68
74
  config: Configuration for the client.
69
75
  queue: Optional custom queue implementation. If not provided,
70
76
  an InMemoryQueue is used.
77
+ sdk_header_entry: Optional upstream SDK header entry to prepend.
71
78
  """
72
79
  self._config = config
73
80
  self._queue = queue or InMemoryQueue()
81
+ self._sdk_header_entry = sdk_header_entry.strip() if sdk_header_entry else None
74
82
  self._http: PrefactorHttpClient | None = None
75
83
  self._executor: TaskExecutor | None = None
76
84
  self._instance_manager: AgentInstanceManager | None = None
77
85
  self._span_manager: SpanManager | None = None
78
86
  self._initialized = False
87
+ self._telemetry_failure: PrefactorTelemetryFailureError | None = None
88
+ self._telemetry_failure_observed = False
89
+
90
+ def _build_http_sdk_header(self) -> str:
91
+ """Build the effective SDK header for HTTP requests."""
92
+ if self._sdk_header_entry:
93
+ return f"{self._sdk_header_entry} {CORE_SDK_HEADER_ENTRY}"
94
+ return CORE_SDK_HEADER_ENTRY
95
+
96
+ def _set_sdk_header_entry(self, sdk_header_entry: str | None) -> None:
97
+ """Set the upstream SDK header entry for this client lifetime."""
98
+ self._sdk_header_entry = sdk_header_entry.strip() if sdk_header_entry else None
99
+ if self._http is not None:
100
+ self._http._sdk_header = self._build_http_sdk_header()
79
101
 
80
102
  async def __aenter__(self) -> "PrefactorCoreClient":
81
103
  """Enter async context manager."""
@@ -84,7 +106,11 @@ class PrefactorCoreClient:
84
106
 
85
107
  async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
86
108
  """Exit async context manager."""
87
- await self.close()
109
+ try:
110
+ await self.close()
111
+ except PrefactorTelemetryFailureError:
112
+ if exc_type is None:
113
+ raise
88
114
 
89
115
  async def initialize(self) -> None:
90
116
  """Initialize the client and start processing.
@@ -101,13 +127,17 @@ class PrefactorCoreClient:
101
127
  raise ClientAlreadyInitializedError("Client is already initialized")
102
128
 
103
129
  # Initialize HTTP client
104
- self._http = PrefactorHttpClient(self._config.http_config)
130
+ self._http = PrefactorHttpClient(
131
+ self._config.http_config,
132
+ sdk_header=self._build_http_sdk_header(),
133
+ )
105
134
  await self._http.__aenter__()
106
135
 
107
136
  # Initialize executor
108
137
  self._executor = TaskExecutor(
109
138
  queue=self._queue,
110
139
  handler=self._process_operation,
140
+ is_retryable=self._is_retryable_operation_error,
111
141
  num_workers=self._config.queue_config.num_workers,
112
142
  max_retries=self._config.queue_config.max_retries,
113
143
  )
@@ -144,6 +174,10 @@ class PrefactorCoreClient:
144
174
 
145
175
  self._initialized = False
146
176
 
177
+ if self._telemetry_failure is not None and not self._telemetry_failure_observed:
178
+ self._telemetry_failure_observed = True
179
+ raise self._telemetry_failure
180
+
147
181
  def _ensure_initialized(self) -> None:
148
182
  """Ensure the client is initialized.
149
183
 
@@ -156,12 +190,55 @@ class PrefactorCoreClient:
156
190
  "use as context manager."
157
191
  )
158
192
 
193
+ def _record_telemetry_failure(
194
+ self, cause: Exception, operation_type: OperationType | str
195
+ ) -> None:
196
+ """Latch the first permanent telemetry failure."""
197
+ if self._telemetry_failure is not None:
198
+ return
199
+ if isinstance(operation_type, OperationType):
200
+ operation_name = operation_type.name
201
+ else:
202
+ operation_name = str(operation_type)
203
+ self._telemetry_failure = PrefactorTelemetryFailureError(
204
+ f"Telemetry permanently failed during {operation_name}",
205
+ cause=cause,
206
+ operation_type=operation_name,
207
+ dropped_operations=0,
208
+ )
209
+
210
+ def _increment_dropped_operations(self) -> None:
211
+ """Increment the dropped operation counter on the latched failure."""
212
+ if self._telemetry_failure is None:
213
+ return
214
+ self._telemetry_failure.dropped_operations += 1
215
+
216
+ def _raise_if_telemetry_failed(self) -> None:
217
+ """Raise the latched telemetry failure for caller-visible operations."""
218
+ if self._telemetry_failure is None:
219
+ return
220
+ self._telemetry_failure_observed = True
221
+ raise self._telemetry_failure
222
+
223
+ def _is_retryable_operation_error(self, error: Exception) -> bool:
224
+ """Return True when the worker should retry the operation."""
225
+ if isinstance(error, PrefactorTelemetryFailureError):
226
+ return False
227
+ if is_permanent_http_error(error):
228
+ return False
229
+ if is_transient_http_error(error):
230
+ return True
231
+ return True
232
+
159
233
  async def _enqueue(self, operation: Operation) -> None:
160
234
  """Add an operation to the queue.
161
235
 
162
236
  Args:
163
237
  operation: The operation to queue.
164
238
  """
239
+ if self._telemetry_failure is not None:
240
+ self._increment_dropped_operations()
241
+ self._raise_if_telemetry_failed()
165
242
  await self._queue.put(operation)
166
243
 
167
244
  async def _process_operation(self, operation: Operation) -> None:
@@ -174,6 +251,9 @@ class PrefactorCoreClient:
174
251
  """
175
252
  if not self._http:
176
253
  return
254
+ if self._telemetry_failure is not None:
255
+ self._increment_dropped_operations()
256
+ return
177
257
 
178
258
  try:
179
259
  if operation.type == OperationType.REGISTER_AGENT_INSTANCE:
@@ -188,12 +268,14 @@ class PrefactorCoreClient:
188
268
  await self._http.agent_instances.start(
189
269
  agent_instance_id=operation.payload["instance_id"],
190
270
  timestamp=operation.timestamp,
271
+ idempotency_key=operation.payload.get("idempotency_key"),
191
272
  )
192
273
 
193
274
  elif operation.type == OperationType.FINISH_AGENT_INSTANCE:
194
275
  await self._http.agent_instances.finish(
195
276
  agent_instance_id=operation.payload["instance_id"],
196
277
  timestamp=operation.timestamp,
278
+ idempotency_key=operation.payload.get("idempotency_key"),
197
279
  )
198
280
  elif operation.type == OperationType.CREATE_SPAN:
199
281
  await self._http.agent_spans.create(
@@ -211,16 +293,24 @@ class PrefactorCoreClient:
211
293
  status=operation.payload.get("status", "complete"),
212
294
  result_payload=operation.payload.get("result_payload"),
213
295
  timestamp=operation.timestamp,
296
+ idempotency_key=operation.payload.get("idempotency_key"),
214
297
  )
215
298
 
216
299
  except Exception as e:
217
- # Log error but don't re-raise - we don't want to crash the worker
300
+ if is_permanent_http_error(e):
301
+ self._record_telemetry_failure(e, operation.type)
302
+ # Log error and re-raise so TaskExecutor retries can run
218
303
  logger.error(
219
304
  f"Failed to process operation {operation.type}: {e}",
220
305
  exc_info=True,
221
306
  )
222
307
  raise
223
308
 
309
+ @property
310
+ def instance_manager(self) -> AgentInstanceManager | None:
311
+ """Public accessor for the agent instance manager."""
312
+ return self._instance_manager
313
+
224
314
  async def create_agent_instance(
225
315
  self,
226
316
  agent_id: str,
@@ -254,6 +344,7 @@ class PrefactorCoreClient:
254
344
  ValueError: If no schema version provided and registry not configured.
255
345
  """
256
346
  self._ensure_initialized()
347
+ self._raise_if_telemetry_failed()
257
348
  assert self._instance_manager is not None
258
349
 
259
350
  # Determine the agent_schema_version to use
@@ -310,6 +401,7 @@ class PrefactorCoreClient:
310
401
  The span ID.
311
402
  """
312
403
  self._ensure_initialized()
404
+ self._raise_if_telemetry_failed()
313
405
  assert self._span_manager is not None
314
406
 
315
407
  if parent_span_id is None:
@@ -344,7 +436,6 @@ class PrefactorCoreClient:
344
436
  instance_id: str,
345
437
  schema_name: str,
346
438
  parent_span_id: str | None = None,
347
- span_id: str | None = None,
348
439
  payload: dict[str, Any] | None = None,
349
440
  ):
350
441
  """Context manager for creating and finishing a span.
@@ -366,7 +457,6 @@ class PrefactorCoreClient:
366
457
  instance_id: ID of the agent instance this span belongs to.
367
458
  schema_name: Name of the schema for this span.
368
459
  parent_span_id: Optional explicit parent span ID.
369
- span_id: Ignored (API generates IDs).
370
460
  payload: Optional initial payload sent via auto-start on exit
371
461
  if ``start()`` is never called explicitly.
372
462
 
@@ -374,6 +464,7 @@ class PrefactorCoreClient:
374
464
  SpanContext for the created span.
375
465
  """
376
466
  self._ensure_initialized()
467
+ self._raise_if_telemetry_failed()
377
468
  assert self._span_manager is not None
378
469
 
379
470
  # Import here to avoid circular import
@@ -1,5 +1,7 @@
1
1
  """Custom exceptions for prefactor-core."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
 
4
6
  class PrefactorCoreError(Exception):
5
7
  """Base exception for all prefactor-core errors."""
@@ -39,6 +41,23 @@ class SpanNotFoundError(PrefactorCoreError):
39
41
  pass
40
42
 
41
43
 
44
+ class PrefactorTelemetryFailureError(PrefactorCoreError):
45
+ """Raised when telemetry enters a permanent failure state."""
46
+
47
+ def __init__(
48
+ self,
49
+ message: str,
50
+ *,
51
+ cause: Exception,
52
+ operation_type: str | None = None,
53
+ dropped_operations: int = 0,
54
+ ) -> None:
55
+ super().__init__(message)
56
+ self.cause = cause
57
+ self.operation_type = operation_type
58
+ self.dropped_operations = dropped_operations
59
+
60
+
42
61
  __all__ = [
43
62
  "PrefactorCoreError",
44
63
  "ClientNotInitializedError",
@@ -46,4 +65,5 @@ __all__ = [
46
65
  "OperationError",
47
66
  "InstanceNotFoundError",
48
67
  "SpanNotFoundError",
68
+ "PrefactorTelemetryFailureError",
49
69
  ]
@@ -10,6 +10,7 @@ from datetime import datetime, timezone
10
10
  from typing import TYPE_CHECKING, Any, Awaitable, Callable
11
11
 
12
12
  from ..operations import Operation, OperationType
13
+ from ..utils import generate_idempotency_key
13
14
 
14
15
  if TYPE_CHECKING:
15
16
  from prefactor_http.client import PrefactorHttpClient
@@ -71,7 +72,9 @@ class AgentInstanceManager:
71
72
  agent_id: ID of the agent to create an instance for.
72
73
  agent_version: Version information (name, external_identifier, etc.).
73
74
  agent_schema_version: Schema version information.
74
- instance_id: Ignored (API generates IDs with correct partition).
75
+ instance_id: Optional ID to forward to the API as ``id``. When
76
+ provided, the API uses it as the instance ID; when omitted,
77
+ the API generates one.
75
78
 
76
79
  Returns:
77
80
  The instance ID (API-generated).
@@ -81,6 +84,7 @@ class AgentInstanceManager:
81
84
  agent_version=agent_version,
82
85
  agent_schema_version=agent_schema_version,
83
86
  id=instance_id,
87
+ idempotency_key=generate_idempotency_key(),
84
88
  )
85
89
  return result.id
86
90
 
@@ -92,9 +96,20 @@ class AgentInstanceManager:
92
96
  Args:
93
97
  instance_id: The ID of the instance to start.
94
98
  """
99
+ await self.start_with_idempotency_key(instance_id, generate_idempotency_key())
100
+
101
+ async def start_with_idempotency_key(
102
+ self,
103
+ instance_id: str,
104
+ idempotency_key: str,
105
+ ) -> None:
106
+ """Queue a start operation using a stable idempotency key."""
95
107
  operation = Operation(
96
108
  type=OperationType.START_AGENT_INSTANCE,
97
- payload={"instance_id": instance_id},
109
+ payload={
110
+ "instance_id": instance_id,
111
+ "idempotency_key": idempotency_key,
112
+ },
98
113
  timestamp=datetime.now(timezone.utc),
99
114
  )
100
115
 
@@ -108,9 +123,20 @@ class AgentInstanceManager:
108
123
  Args:
109
124
  instance_id: The ID of the instance to finish.
110
125
  """
126
+ await self.finish_with_idempotency_key(instance_id, generate_idempotency_key())
127
+
128
+ async def finish_with_idempotency_key(
129
+ self,
130
+ instance_id: str,
131
+ idempotency_key: str,
132
+ ) -> None:
133
+ """Queue a finish operation using a stable idempotency key."""
111
134
  operation = Operation(
112
135
  type=OperationType.FINISH_AGENT_INSTANCE,
113
- payload={"instance_id": instance_id},
136
+ payload={
137
+ "instance_id": instance_id,
138
+ "idempotency_key": idempotency_key,
139
+ },
114
140
  timestamp=datetime.now(timezone.utc),
115
141
  )
116
142
 
@@ -149,8 +175,8 @@ class AgentInstanceHandle:
149
175
  """
150
176
  self._instance_id = instance_id
151
177
  self._client = client
152
- self._started = False
153
- self._finished = False
178
+ self._start_idempotency_key = generate_idempotency_key()
179
+ self._finish_idempotency_key = generate_idempotency_key()
154
180
 
155
181
  @property
156
182
  def id(self) -> str:
@@ -166,26 +192,24 @@ class AgentInstanceHandle:
166
192
 
167
193
  This queues a start operation for the instance.
168
194
  """
169
- if self._started:
170
- return
171
-
172
- manager = self._client._instance_manager
195
+ manager = self._client.instance_manager
173
196
  assert manager is not None
174
- await manager.start(self._instance_id)
175
- self._started = True
197
+ await manager.start_with_idempotency_key(
198
+ self._instance_id,
199
+ self._start_idempotency_key,
200
+ )
176
201
 
177
202
  async def finish(self) -> None:
178
203
  """Mark the instance as finished.
179
204
 
180
205
  This queues a finish operation for the instance.
181
206
  """
182
- if self._finished:
183
- return
184
-
185
- manager = self._client._instance_manager
207
+ manager = self._client.instance_manager
186
208
  assert manager is not None
187
- await manager.finish(self._instance_id)
188
- self._finished = True
209
+ await manager.finish_with_idempotency_key(
210
+ self._instance_id,
211
+ self._finish_idempotency_key,
212
+ )
189
213
 
190
214
  async def create_span(
191
215
  self,
@@ -205,6 +229,7 @@ class AgentInstanceHandle:
205
229
  Returns:
206
230
  The span ID.
207
231
  """
232
+ self._client._raise_if_telemetry_failed()
208
233
  return await self._client.create_span(
209
234
  instance_id=self._instance_id,
210
235
  schema_name=schema_name,
@@ -5,13 +5,13 @@ calls into Operation objects that are queued for processing. It also manages
5
5
  the span stack for automatic parent detection.
6
6
  """
7
7
 
8
- import uuid
9
8
  from datetime import datetime, timezone
10
9
  from typing import TYPE_CHECKING, Any, Awaitable, Callable
11
10
 
12
11
  from ..context_stack import SpanContextStack
13
12
  from ..models import Span
14
13
  from ..operations import Operation, OperationType
14
+ from ..utils import generate_idempotency_key
15
15
 
16
16
  if TYPE_CHECKING:
17
17
  from prefactor_http.client import PrefactorHttpClient
@@ -90,7 +90,7 @@ class SpanManager:
90
90
  if parent_span_id is None:
91
91
  parent_span_id = SpanContextStack.peek()
92
92
 
93
- temp_id = str(uuid.uuid4())
93
+ temp_id = generate_idempotency_key()
94
94
 
95
95
  span = Span(
96
96
  id=temp_id,
@@ -139,6 +139,7 @@ class SpanManager:
139
139
  status="active",
140
140
  payload=payload or {},
141
141
  parent_span_id=span.parent_span_id,
142
+ idempotency_key=generate_idempotency_key(),
142
143
  )
143
144
 
144
145
  api_id = result.id
@@ -150,6 +151,10 @@ class SpanManager:
150
151
  del self._spans[temp_id]
151
152
  self._spans[api_id] = span
152
153
 
154
+ for child_span in self._spans.values():
155
+ if child_span.parent_span_id == temp_id:
156
+ child_span.parent_span_id = api_id
157
+
153
158
  # Replace temp ID on the context stack
154
159
  stack = SpanContextStack.get_stack()
155
160
  new_stack = [api_id if s == temp_id else s for s in stack]
@@ -185,12 +190,14 @@ class SpanManager:
185
190
  status="pending",
186
191
  payload={},
187
192
  parent_span_id=span.parent_span_id,
193
+ idempotency_key=generate_idempotency_key(),
188
194
  )
189
195
  api_id = result.id
190
196
 
191
197
  await self._http.agent_spans.finish(
192
198
  agent_span_id=api_id,
193
199
  status="cancelled",
200
+ idempotency_key=generate_idempotency_key(),
194
201
  )
195
202
 
196
203
  span.status = "cancelled"
@@ -239,6 +246,7 @@ class SpanManager:
239
246
  span_id: str,
240
247
  result_payload: dict[str, Any] | None = None,
241
248
  status: "FinishStatus" = "complete",
249
+ idempotency_key: str | None = None,
242
250
  ) -> None:
243
251
  """Mark a span as finished.
244
252
 
@@ -251,6 +259,8 @@ class SpanManager:
251
259
  ``"cancelled"`` (default: ``"complete"``). The span must be
252
260
  ``active`` for this to succeed; use ``cancel_unstarted()``
253
261
  to cancel a span that was never started.
262
+ idempotency_key: Optional key to make repeated finish requests
263
+ duplicate-safe. When omitted, a new key is generated.
254
264
 
255
265
  Raises:
256
266
  KeyError: If the span ID is not known.
@@ -261,10 +271,17 @@ class SpanManager:
261
271
  self._spans[span_id].status = status
262
272
  self._spans[span_id].finished_at = datetime.now(timezone.utc)
263
273
 
264
- if SpanContextStack.peek() == span_id:
265
- SpanContextStack.pop()
274
+ stack = SpanContextStack.get_stack()
275
+ if span_id in stack:
276
+ from ..context_stack import _current_span_stack
277
+
278
+ _current_span_stack.set([s for s in stack if s != span_id])
266
279
 
267
- op_payload: dict[str, Any] = {"span_id": span_id, "status": status}
280
+ op_payload: dict[str, Any] = {
281
+ "span_id": span_id,
282
+ "status": status,
283
+ "idempotency_key": idempotency_key or generate_idempotency_key(),
284
+ }
268
285
  if result_payload is not None:
269
286
  op_payload["result_payload"] = result_payload
270
287
 
@@ -38,6 +38,8 @@ class Operation:
38
38
  metadata: Optional additional metadata.
39
39
 
40
40
  Example:
41
+ from datetime import datetime, timezone
42
+
41
43
  operation = Operation(
42
44
  type=OperationType.CREATE_SPAN,
43
45
  payload={
@@ -42,6 +42,8 @@ class TaskExecutor:
42
42
  handler: Callable[[Any], Awaitable[None]],
43
43
  num_workers: int = 3,
44
44
  max_retries: int = 3,
45
+ *,
46
+ is_retryable: Callable[[Exception], bool] | None = None,
45
47
  ) -> None:
46
48
  """Initialize the task executor.
47
49
 
@@ -50,9 +52,12 @@ class TaskExecutor:
50
52
  handler: Async function to process each item.
51
53
  num_workers: Number of concurrent worker tasks.
52
54
  max_retries: Maximum retry attempts per item.
55
+ is_retryable: Optional predicate that decides whether a
56
+ handler failure should be retried.
53
57
  """
54
58
  self._queue = queue
55
59
  self._handler = handler
60
+ self._is_retryable = is_retryable or (lambda exc: True)
56
61
  self._num_workers = num_workers
57
62
  self._max_retries = max_retries
58
63
  self._workers: list[Task] = []
@@ -162,13 +167,17 @@ class TaskExecutor:
162
167
  """
163
168
  last_error: Exception | None = None
164
169
 
165
- for attempt in range(self._max_retries):
170
+ total_attempts = self._max_retries + 1
171
+
172
+ for attempt in range(total_attempts):
166
173
  try:
167
174
  await self._handler(item)
168
175
  return
169
176
  except Exception as e:
170
177
  last_error = e
171
- if attempt < self._max_retries - 1:
178
+ if not self._is_retryable(e):
179
+ raise
180
+ if attempt < total_attempts - 1:
172
181
  delay = 2**attempt # 1s, 2s, 4s
173
182
  logger.warning(
174
183
  f"Attempt {attempt + 1} failed, retrying in {delay}s: {e}"