agnt5 0.3.0a8__cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of agnt5 might be problematic. Click here for more details.

agnt5/workflow.py ADDED
@@ -0,0 +1,1584 @@
1
+ """Workflow component implementation for AGNT5 SDK."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import functools
7
+ import inspect
8
+ import logging
9
+ import time
10
+ import uuid
11
+ from typing import Any, Awaitable, Callable, Dict, List, Optional, TypeVar, Union, cast
12
+
13
+ from ._schema_utils import extract_function_metadata, extract_function_schemas
14
+ from .context import Context, set_current_context
15
+ from .entity import Entity, EntityState, _get_state_adapter
16
+ from .function import FunctionContext
17
+ from .types import HandlerFunc, WorkflowConfig
18
+ from ._telemetry import setup_module_logger
19
+
20
+ logger = setup_module_logger(__name__)
21
+
22
+ T = TypeVar("T")
23
+
24
+ # Global workflow registry
25
+ _WORKFLOW_REGISTRY: Dict[str, WorkflowConfig] = {}
26
+
27
+
28
+ class WorkflowContext(Context):
29
+ """
30
+ Context for durable workflows.
31
+
32
+ Extends base Context with:
33
+ - State management via WorkflowEntity.state
34
+ - Step tracking and replay
35
+ - Orchestration (task, parallel, gather)
36
+ - Checkpointing (step)
37
+ - Memory scoping (session_id, user_id for multi-level memory)
38
+
39
+ WorkflowContext delegates state to the underlying WorkflowEntity,
40
+ which provides durability and state change tracking for AI workflows.
41
+
42
+ Memory Scoping:
43
+ - run_id: Unique workflow run identifier
44
+ - session_id: For multi-turn conversations (optional)
45
+ - user_id: For user-scoped long-term memory (optional)
46
+ These identifiers enable agents to automatically select the appropriate
47
+ memory scope (run/session/user) via context propagation.
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ workflow_entity: "WorkflowEntity", # Forward reference
53
+ run_id: str,
54
+ session_id: Optional[str] = None,
55
+ user_id: Optional[str] = None,
56
+ attempt: int = 0,
57
+ runtime_context: Optional[Any] = None,
58
+ checkpoint_callback: Optional[Callable[[dict], None]] = None,
59
+ checkpoint_client: Optional[Any] = None,
60
+ is_streaming: bool = False,
61
+ tenant_id: Optional[str] = None,
62
+ delta_callback: Optional[Callable[[str, str, int, int], None]] = None,
63
+ ) -> None:
64
+ """
65
+ Initialize workflow context.
66
+
67
+ Args:
68
+ workflow_entity: WorkflowEntity instance managing workflow state
69
+ run_id: Unique workflow run identifier
70
+ session_id: Session identifier for multi-turn conversations (default: run_id)
71
+ user_id: User identifier for user-scoped memory (optional)
72
+ attempt: Retry attempt number (0-indexed)
73
+ runtime_context: RuntimeContext for trace correlation
74
+ checkpoint_callback: Optional callback for sending real-time checkpoints
75
+ checkpoint_client: Optional CheckpointClient for platform-side memoization
76
+ is_streaming: Whether this is a streaming request (for real-time SSE log delivery)
77
+ tenant_id: Tenant identifier for multi-tenant deployments
78
+ delta_callback: Optional callback for forwarding streaming events from nested components
79
+ (event_type, output_data, content_index, sequence) -> None
80
+ """
81
+ super().__init__(run_id, attempt, runtime_context, is_streaming, tenant_id)
82
+ self._workflow_entity = workflow_entity
83
+ self._step_counter: int = 0 # Track step sequence
84
+ self._sequence_number: int = 0 # Global sequence for checkpoints
85
+ self._checkpoint_callback = checkpoint_callback
86
+ self._checkpoint_client = checkpoint_client
87
+ self._delta_callback = delta_callback
88
+ self._delta_sequence: int = 0 # Sequence for delta events (separate from checkpoint sequence)
89
+
90
+ # Memory scoping identifiers
91
+ self.session_id = session_id or run_id # Default: session = run (ephemeral)
92
+ self.user_id = user_id # Optional: user-scoped memory
93
+
94
+ # Step hierarchy tracking - for nested step visualization
95
+ # Stack of event IDs for currently executing steps
96
+ self._step_event_stack: List[str] = []
97
+
98
+ # === State Management ===
99
+
100
+ def _forward_delta(self, event_type: str, output_data: str, content_index: int = 0) -> None:
101
+ """
102
+ Forward a streaming delta event from a nested component.
103
+
104
+ Used by step executors to forward events from streaming agents/functions
105
+ to the client via the delta queue.
106
+
107
+ Args:
108
+ event_type: Event type (e.g., "agent.started", "lm.message.delta")
109
+ output_data: JSON-serialized event data
110
+ content_index: Content index for parallel events (default: 0)
111
+ """
112
+ if self._delta_callback:
113
+ self._delta_callback(event_type, output_data, content_index, self._delta_sequence)
114
+ self._delta_sequence += 1
115
+
116
+ async def _consume_streaming_result(self, async_gen: Any, step_name: str) -> Any:
117
+ """
118
+ Consume an async generator while forwarding streaming events to the client.
119
+
120
+ This method handles streaming from nested agents and functions within
121
+ workflow steps. Events are forwarded via the delta queue while the
122
+ final result is collected and returned for the next step.
123
+
124
+ For agents, the final output is extracted from the agent.completed event.
125
+ For functions, the last yielded value (or collected output) is returned.
126
+
127
+ Args:
128
+ async_gen: Async generator yielding Event objects or raw values
129
+ step_name: Name of the current step (for logging)
130
+
131
+ Returns:
132
+ The final result to pass to the next step:
133
+ - For agents: The output from agent.completed event
134
+ - For functions: The last yielded value or collected output
135
+ """
136
+ import json
137
+ from .events import Event, EventType
138
+
139
+ final_result = None
140
+ collected_output = [] # For streaming functions that yield chunks
141
+
142
+ async for item in async_gen:
143
+ if isinstance(item, Event):
144
+ # Forward typed Event via delta queue
145
+ event_data = item.to_response_fields()
146
+ output_data = event_data.get("output_data", b"")
147
+ output_str = output_data.decode("utf-8") if isinstance(output_data, bytes) else str(output_data or "{}")
148
+
149
+ self._forward_delta(
150
+ event_type=event_data.get("event_type", ""),
151
+ output_data=output_str,
152
+ content_index=event_data.get("content_index", 0),
153
+ )
154
+
155
+ # Capture final result from specific event types
156
+ if item.event_type == EventType.AGENT_COMPLETED:
157
+ # For agents, extract the output from completed event
158
+ final_result = item.data.get("output", "")
159
+ logger.debug(f"Step '{step_name}': Captured agent output from agent.completed")
160
+ elif item.event_type == EventType.OUTPUT_STOP:
161
+ # For streaming functions, the collected output is the result
162
+ # (already collected from delta events)
163
+ pass
164
+
165
+ else:
166
+ # Raw value (non-Event) - streaming function output
167
+ # Forward as output.delta and collect for final result
168
+ try:
169
+ chunk_json = json.dumps(item)
170
+ except (TypeError, ValueError):
171
+ chunk_json = str(item)
172
+
173
+ self._forward_delta(
174
+ event_type="output.delta",
175
+ output_data=chunk_json,
176
+ )
177
+ collected_output.append(item)
178
+
179
+ # Determine final result
180
+ if final_result is not None:
181
+ # Agent result was captured from agent.completed event
182
+ return final_result
183
+ elif collected_output:
184
+ # Streaming function - return collected chunks
185
+ # If single item, return it directly; otherwise return list
186
+ if len(collected_output) == 1:
187
+ return collected_output[0]
188
+ return collected_output
189
+ else:
190
+ # Empty generator
191
+ return None
192
+
193
+ def _send_checkpoint(self, checkpoint_type: str, checkpoint_data: dict) -> None:
194
+ """
195
+ Send a checkpoint via the checkpoint callback.
196
+
197
+ Automatically adds parent_event_id from the step event stack if we're
198
+ currently executing inside a nested step call.
199
+
200
+ Args:
201
+ checkpoint_type: Type of checkpoint (e.g., "workflow.state.changed")
202
+ checkpoint_data: Checkpoint payload (should include event_id if needed)
203
+ """
204
+ if self._checkpoint_callback:
205
+ self._sequence_number += 1
206
+
207
+ # Add parent_event_id if we're in a nested step
208
+ if self._step_event_stack:
209
+ checkpoint_data = {
210
+ **checkpoint_data,
211
+ "parent_event_id": self._step_event_stack[-1],
212
+ }
213
+
214
+ checkpoint = {
215
+ "checkpoint_type": checkpoint_type,
216
+ "checkpoint_data": checkpoint_data,
217
+ "sequence_number": self._sequence_number,
218
+ "source_timestamp_ns": time.time_ns(), # Nanosecond timestamp for correct logical ordering
219
+ }
220
+ self._checkpoint_callback(checkpoint)
221
+
222
+ @property
223
+ def state(self):
224
+ """
225
+ Delegate to WorkflowEntity.state for durable state management.
226
+
227
+ Returns:
228
+ WorkflowState instance from the workflow entity
229
+
230
+ Example:
231
+ ctx.state.set("status", "processing")
232
+ status = ctx.state.get("status")
233
+ """
234
+ state = self._workflow_entity.state
235
+ # Pass checkpoint callback to state for real-time streaming
236
+ if hasattr(state, "_set_checkpoint_callback"):
237
+ state._set_checkpoint_callback(self._send_checkpoint)
238
+ return state
239
+
240
+ # === Orchestration ===
241
+
242
+ async def step(
243
+ self,
244
+ name_or_handler: Union[str, Callable, Awaitable[T]],
245
+ func_or_awaitable: Union[Callable[..., Awaitable[T]], Awaitable[T], Any] = None,
246
+ *args: Any,
247
+ **kwargs: Any,
248
+ ) -> T:
249
+ """
250
+ Execute a durable step with automatic checkpointing.
251
+
252
+ Steps are the primary building block for durable workflows. Results are
253
+ automatically persisted, so if the workflow crashes and restarts, completed
254
+ steps return their cached result without re-executing.
255
+
256
+ Supports multiple calling patterns:
257
+
258
+ 1. **Call a @function (recommended)**:
259
+ ```python
260
+ result = await ctx.step(process_data, arg1, arg2, kwarg=value)
261
+ ```
262
+ Auto-generates step name from function. Full IDE support.
263
+
264
+ 2. **Checkpoint an awaitable with explicit name**:
265
+ ```python
266
+ result = await ctx.step("load_data", fetch_expensive_data())
267
+ ```
268
+ For arbitrary async operations that aren't @functions.
269
+
270
+ 3. **Checkpoint a callable with explicit name**:
271
+ ```python
272
+ result = await ctx.step("compute", my_function, arg1, arg2)
273
+ ```
274
+
275
+ 4. **Legacy string-based @function call**:
276
+ ```python
277
+ result = await ctx.step("function_name", input=data)
278
+ ```
279
+
280
+ Args:
281
+ name_or_handler: Step name (str), @function reference, or awaitable
282
+ func_or_awaitable: Function/awaitable when name is provided, or first arg
283
+ *args: Additional arguments for the function
284
+ **kwargs: Keyword arguments for the function
285
+
286
+ Returns:
287
+ The step result (cached on replay)
288
+
289
+ Example (@function call):
290
+ ```python
291
+ @function
292
+ async def process_data(ctx: FunctionContext, data: list, multiplier: int = 2):
293
+ return [x * multiplier for x in data]
294
+
295
+ @workflow
296
+ async def my_workflow(ctx: WorkflowContext):
297
+ result = await ctx.step(process_data, [1, 2, 3], multiplier=3)
298
+ return result
299
+ ```
300
+
301
+ Example (checkpoint awaitable):
302
+ ```python
303
+ @workflow
304
+ async def my_workflow(ctx: WorkflowContext):
305
+ # Checkpoint expensive external call
306
+ data = await ctx.step("fetch_api", fetch_from_external_api())
307
+ return data
308
+ ```
309
+ """
310
+ import inspect
311
+
312
+ # Determine which calling pattern is being used
313
+ if callable(name_or_handler) and hasattr(name_or_handler, "_agnt5_config"):
314
+ # Pattern 1: step(handler, *args, **kwargs) - @function call
315
+ return await self._step_function(name_or_handler, func_or_awaitable, *args, **kwargs)
316
+ elif isinstance(name_or_handler, str):
317
+ # Check if it's a registered function name (legacy pattern)
318
+ from .function import FunctionRegistry
319
+ if FunctionRegistry.get(name_or_handler) is not None:
320
+ # Pattern 4: Legacy string-based function call
321
+ return await self._step_function(name_or_handler, func_or_awaitable, *args, **kwargs)
322
+ elif func_or_awaitable is not None:
323
+ # Pattern 2/3: step("name", awaitable) or step("name", callable, *args)
324
+ return await self._step_checkpoint(name_or_handler, func_or_awaitable, *args, **kwargs)
325
+ else:
326
+ # String without second arg and not a registered function
327
+ raise ValueError(
328
+ f"Function '{name_or_handler}' not found in registry. "
329
+ f"Either register it with @function decorator, or use "
330
+ f"ctx.step('{name_or_handler}', awaitable) to checkpoint an arbitrary operation."
331
+ )
332
+ elif inspect.iscoroutine(name_or_handler) or inspect.isawaitable(name_or_handler):
333
+ # Awaitable passed directly - auto-generate name
334
+ coro_name = getattr(name_or_handler, '__name__', 'awaitable')
335
+ return await self._step_checkpoint(coro_name, name_or_handler)
336
+ elif callable(name_or_handler):
337
+ # Callable without @function decorator
338
+ raise ValueError(
339
+ f"Function '{name_or_handler.__name__}' is not a registered @function. "
340
+ f"Did you forget to add the @function decorator? "
341
+ f"Or use ctx.step('name', callable) for non-decorated functions."
342
+ )
343
+ else:
344
+ raise ValueError(
345
+ f"step() first argument must be a @function, string name, or awaitable. "
346
+ f"Got: {type(name_or_handler)}"
347
+ )
348
+
349
+ async def _step_function(
350
+ self,
351
+ handler: Union[str, Callable],
352
+ first_arg: Any = None,
353
+ *args: Any,
354
+ **kwargs: Any,
355
+ ) -> Any:
356
+ """
357
+ Internal: Execute a @function as a durable step.
358
+
359
+ This handles both function references and legacy string-based calls.
360
+ """
361
+ from .function import FunctionRegistry
362
+
363
+ # Reconstruct args tuple (first_arg may have been split out by step())
364
+ if first_arg is not None:
365
+ args = (first_arg,) + args
366
+
367
+ # Extract handler name from function reference or use string
368
+ if callable(handler):
369
+ handler_name = handler.__name__
370
+ if not hasattr(handler, "_agnt5_config"):
371
+ raise ValueError(
372
+ f"Function '{handler_name}' is not a registered @function. "
373
+ f"Did you forget to add the @function decorator?"
374
+ )
375
+ else:
376
+ handler_name = handler
377
+
378
+ # Generate unique step name for durability
379
+ step_name = f"{handler_name}_{self._step_counter}"
380
+ self._step_counter += 1
381
+
382
+ # Generate unique event_id for this step (for hierarchy tracking)
383
+ step_event_id = str(uuid.uuid4())
384
+
385
+ # Check if step already completed (for replay)
386
+ if self._workflow_entity.has_completed_step(step_name):
387
+ result = self._workflow_entity.get_completed_step(step_name)
388
+ self._logger.info(f"🔄 Replaying cached step: {step_name}")
389
+ return result
390
+
391
+ # Emit workflow.step.started checkpoint
392
+ self._send_checkpoint(
393
+ "workflow.step.started",
394
+ {
395
+ "step_name": step_name,
396
+ "handler_name": handler_name,
397
+ "input": args or kwargs,
398
+ "event_id": step_event_id, # Include for hierarchy tracking
399
+ },
400
+ )
401
+
402
+ # Push this step's event_id onto the stack for nested calls
403
+ self._step_event_stack.append(step_event_id)
404
+
405
+ # Execute function with OpenTelemetry span
406
+ self._logger.info(f"▶️ Executing new step: {step_name}")
407
+ func_config = FunctionRegistry.get(handler_name)
408
+ if func_config is None:
409
+ raise ValueError(f"Function '{handler_name}' not found in registry")
410
+
411
+ # Import span creation utility and JSON serialization
412
+ from ._core import create_span
413
+ import json
414
+
415
+ # Serialize input data for span attributes
416
+ input_repr = json.dumps({"args": args, "kwargs": kwargs}) if args or kwargs else "{}"
417
+
418
+ # Create span for task execution
419
+ with create_span(
420
+ f"workflow.task.{handler_name}",
421
+ "function",
422
+ self._runtime_context,
423
+ {
424
+ "step_name": step_name,
425
+ "handler_name": handler_name,
426
+ "run_id": self.run_id,
427
+ "input.data": input_repr,
428
+ },
429
+ ) as span:
430
+ # Create FunctionContext for the function execution
431
+ func_ctx = FunctionContext(
432
+ run_id=f"{self.run_id}:task:{handler_name}",
433
+ runtime_context=self._runtime_context,
434
+ )
435
+
436
+ try:
437
+ # Execute function with arguments
438
+ # Support legacy pattern: ctx.task("func_name", input=data) or ctx.task(func_ref, input=data)
439
+ if len(args) == 0 and "input" in kwargs:
440
+ # Legacy pattern - single input parameter
441
+ input_data = kwargs.pop("input") # Remove from kwargs
442
+ handler_result = func_config.handler(func_ctx, input_data, **kwargs)
443
+ else:
444
+ # Type-safe pattern - pass all args/kwargs
445
+ handler_result = func_config.handler(func_ctx, *args, **kwargs)
446
+
447
+ # Check if result is an async generator (streaming function or agent)
448
+ # If so, consume it while forwarding events via delta queue
449
+ if inspect.isasyncgen(handler_result):
450
+ result = await self._consume_streaming_result(handler_result, step_name)
451
+ elif inspect.iscoroutine(handler_result):
452
+ result = await handler_result
453
+ else:
454
+ result = handler_result
455
+
456
+ # Add output data to span
457
+ try:
458
+ output_repr = json.dumps(result)
459
+ span.set_attribute("output.data", output_repr)
460
+ except (TypeError, ValueError):
461
+ # If result is not JSON serializable, use repr
462
+ span.set_attribute("output.data", repr(result))
463
+
464
+ # Record step completion in WorkflowEntity
465
+ self._workflow_entity.record_step_completion(
466
+ step_name, handler_name, args or kwargs, result
467
+ )
468
+
469
+ # Pop this step's event_id from the stack (execution complete)
470
+ if self._step_event_stack:
471
+ popped_id = self._step_event_stack.pop()
472
+ if popped_id != step_event_id:
473
+ self._logger.warning(
474
+ f"Step event stack mismatch in task(): expected {step_event_id}, got {popped_id}"
475
+ )
476
+
477
+ # Emit workflow.step.completed checkpoint
478
+ self._send_checkpoint(
479
+ "workflow.step.completed",
480
+ {
481
+ "step_name": step_name,
482
+ "handler_name": handler_name,
483
+ "input": args or kwargs,
484
+ "result": result,
485
+ "event_id": step_event_id, # Include for consistency
486
+ },
487
+ )
488
+
489
+ return result
490
+
491
+ except Exception as e:
492
+ # Pop this step's event_id from the stack (execution failed)
493
+ if self._step_event_stack:
494
+ popped_id = self._step_event_stack.pop()
495
+ if popped_id != step_event_id:
496
+ self._logger.warning(
497
+ f"Step event stack mismatch in task() error path: expected {step_event_id}, got {popped_id}"
498
+ )
499
+
500
+ # Emit workflow.step.failed checkpoint
501
+ self._send_checkpoint(
502
+ "workflow.step.failed",
503
+ {
504
+ "step_name": step_name,
505
+ "handler_name": handler_name,
506
+ "input": args or kwargs,
507
+ "error": str(e),
508
+ "error_type": type(e).__name__,
509
+ "event_id": step_event_id, # Include for consistency
510
+ },
511
+ )
512
+
513
+ # Record error in span
514
+ span.set_attribute("error", "true")
515
+ span.set_attribute("error.message", str(e))
516
+ span.set_attribute("error.type", type(e).__name__)
517
+
518
+ # Re-raise to propagate failure
519
+ raise
520
+
521
+ async def parallel(self, *tasks: Awaitable[T]) -> List[T]:
522
+ """
523
+ Run multiple tasks in parallel.
524
+
525
+ Args:
526
+ *tasks: Async tasks to run in parallel
527
+
528
+ Returns:
529
+ List of results in the same order as tasks
530
+
531
+ Example:
532
+ result1, result2 = await ctx.parallel(
533
+ fetch_data(source1),
534
+ fetch_data(source2)
535
+ )
536
+ """
537
+ import asyncio
538
+
539
+ return list(await asyncio.gather(*tasks))
540
+
541
+ async def gather(self, **tasks: Awaitable[T]) -> Dict[str, T]:
542
+ """
543
+ Run tasks in parallel with named results.
544
+
545
+ Args:
546
+ **tasks: Named async tasks to run in parallel
547
+
548
+ Returns:
549
+ Dictionary mapping names to results
550
+
551
+ Example:
552
+ results = await ctx.gather(
553
+ db=query_database(),
554
+ api=fetch_api()
555
+ )
556
+ """
557
+ import asyncio
558
+
559
+ keys = list(tasks.keys())
560
+ values = list(tasks.values())
561
+ results = await asyncio.gather(*values)
562
+ return dict(zip(keys, results))
563
+
564
+ async def task(
565
+ self,
566
+ handler: Union[str, Callable],
567
+ *args: Any,
568
+ **kwargs: Any,
569
+ ) -> Any:
570
+ """
571
+ Execute a function and wait for result.
572
+
573
+ .. deprecated::
574
+ Use :meth:`step` instead. ``task()`` will be removed in a future version.
575
+
576
+ This method is an alias for :meth:`step` for backward compatibility.
577
+ New code should use ``ctx.step()`` directly.
578
+
579
+ Args:
580
+ handler: Either a @function reference or string name
581
+ *args: Positional arguments to pass to the function
582
+ **kwargs: Keyword arguments to pass to the function
583
+
584
+ Returns:
585
+ Function result
586
+ """
587
+ import warnings
588
+
589
+ warnings.warn(
590
+ "ctx.task() is deprecated, use ctx.step() instead. "
591
+ "task() will be removed in a future version.",
592
+ DeprecationWarning,
593
+ stacklevel=2,
594
+ )
595
+ return await self.step(handler, *args, **kwargs)
596
+
597
+ async def _step_checkpoint(
598
+ self,
599
+ name: str,
600
+ func_or_awaitable: Union[Callable[..., Awaitable[T]], Awaitable[T]],
601
+ *args: Any,
602
+ **kwargs: Any,
603
+ ) -> T:
604
+ """
605
+ Internal: Checkpoint an arbitrary awaitable or callable for durability.
606
+
607
+ If workflow crashes, won't re-execute this step on retry.
608
+ The step result is persisted to the platform for crash recovery.
609
+
610
+ When a CheckpointClient is available, this method uses platform-side
611
+ memoization via gRPC. The platform stores step results in the run_steps
612
+ table, enabling replay even after worker crashes.
613
+
614
+ Args:
615
+ name: Unique name for this checkpoint (used as step_key for memoization)
616
+ func_or_awaitable: Either an async function or awaitable
617
+ *args: Arguments to pass if func_or_awaitable is callable
618
+ **kwargs: Keyword arguments to pass if func_or_awaitable is callable
619
+
620
+ Returns:
621
+ The result of the function/awaitable
622
+ """
623
+ import inspect
624
+ import json
625
+ import time
626
+
627
+ # Generate step key for platform memoization
628
+ step_key = f"step:{name}:{self._step_counter}"
629
+ self._step_counter += 1
630
+
631
+ # Generate unique event_id for this step (for hierarchy tracking)
632
+ step_event_id = str(uuid.uuid4())
633
+
634
+ # Check platform-side memoization first (Phase 3)
635
+ if self._checkpoint_client:
636
+ try:
637
+ result = await self._checkpoint_client.step_started(
638
+ self.run_id,
639
+ step_key,
640
+ name,
641
+ "checkpoint",
642
+ )
643
+ if result.memoized and result.cached_output:
644
+ # Deserialize cached output
645
+ cached_value = json.loads(result.cached_output.decode("utf-8"))
646
+ self._logger.info(f"🔄 Replaying memoized step from platform: {name}")
647
+ # Also record locally for consistency
648
+ self._workflow_entity.record_step_completion(name, "checkpoint", None, cached_value)
649
+ return cached_value
650
+ except Exception as e:
651
+ self._logger.warning(f"Platform memoization check failed, falling back to local: {e}")
652
+
653
+ # Fall back to local memoization (for backward compatibility)
654
+ if self._workflow_entity.has_completed_step(name):
655
+ result = self._workflow_entity.get_completed_step(name)
656
+ self._logger.info(f"🔄 Replaying checkpoint from local cache: {name}")
657
+ return result
658
+
659
+ # Emit workflow.step.started checkpoint for observability
660
+ self._send_checkpoint(
661
+ "workflow.step.started",
662
+ {
663
+ "step_name": name,
664
+ "handler_name": "checkpoint",
665
+ "event_id": step_event_id, # Include for hierarchy tracking
666
+ },
667
+ )
668
+
669
+ # Push this step's event_id onto the stack for nested calls
670
+ self._step_event_stack.append(step_event_id)
671
+
672
+ start_time = time.time()
673
+ try:
674
+ # Execute and checkpoint
675
+ if inspect.isasyncgen(func_or_awaitable):
676
+ # Direct async generator - consume while forwarding events
677
+ result = await self._consume_streaming_result(func_or_awaitable, name)
678
+ elif inspect.iscoroutine(func_or_awaitable) or inspect.isawaitable(func_or_awaitable):
679
+ result = await func_or_awaitable
680
+ elif callable(func_or_awaitable):
681
+ # Call with args/kwargs if provided
682
+ call_result = func_or_awaitable(*args, **kwargs)
683
+ if inspect.isasyncgen(call_result):
684
+ # Callable returned async generator - consume while forwarding events
685
+ result = await self._consume_streaming_result(call_result, name)
686
+ elif inspect.iscoroutine(call_result) or inspect.isawaitable(call_result):
687
+ result = await call_result
688
+ else:
689
+ result = call_result
690
+ else:
691
+ raise ValueError(f"step() second argument must be awaitable or callable, got {type(func_or_awaitable)}")
692
+
693
+ latency_ms = int((time.time() - start_time) * 1000)
694
+
695
+ # Record step completion locally for in-memory replay
696
+ self._workflow_entity.record_step_completion(name, "checkpoint", None, result)
697
+
698
+ # Record to platform for persistent memoization (Phase 3)
699
+ if self._checkpoint_client:
700
+ try:
701
+ output_bytes = json.dumps(result).encode("utf-8")
702
+ await self._checkpoint_client.step_completed(
703
+ self.run_id,
704
+ step_key,
705
+ name,
706
+ "checkpoint",
707
+ output_bytes,
708
+ latency_ms,
709
+ )
710
+ except Exception as e:
711
+ self._logger.warning(f"Failed to record step completion to platform: {e}")
712
+
713
+ # Pop this step's event_id from the stack (execution complete)
714
+ if self._step_event_stack:
715
+ popped_id = self._step_event_stack.pop()
716
+ if popped_id != step_event_id:
717
+ self._logger.warning(
718
+ f"Step event stack mismatch in step(): expected {step_event_id}, got {popped_id}"
719
+ )
720
+
721
+ # Emit workflow.step.completed checkpoint to journal for crash recovery
722
+ self._send_checkpoint(
723
+ "workflow.step.completed",
724
+ {
725
+ "step_name": name,
726
+ "handler_name": "checkpoint",
727
+ "result": result,
728
+ "event_id": step_event_id, # Include for consistency
729
+ },
730
+ )
731
+
732
+ self._logger.info(f"✅ Checkpoint completed: {name} ({latency_ms}ms)")
733
+ return result
734
+
735
+ except Exception as e:
736
+ # Pop this step's event_id from the stack (execution failed)
737
+ if self._step_event_stack:
738
+ popped_id = self._step_event_stack.pop()
739
+ if popped_id != step_event_id:
740
+ self._logger.warning(
741
+ f"Step event stack mismatch in step() error path: expected {step_event_id}, got {popped_id}"
742
+ )
743
+
744
+ # Record failure to platform (Phase 3)
745
+ if self._checkpoint_client:
746
+ try:
747
+ await self._checkpoint_client.step_failed(
748
+ self.run_id,
749
+ step_key,
750
+ name,
751
+ "checkpoint",
752
+ str(e),
753
+ type(e).__name__,
754
+ )
755
+ except Exception as cp_err:
756
+ self._logger.warning(f"Failed to record step failure to platform: {cp_err}")
757
+
758
+ # Emit workflow.step.failed checkpoint
759
+ self._send_checkpoint(
760
+ "workflow.step.failed",
761
+ {
762
+ "step_name": name,
763
+ "handler_name": "checkpoint",
764
+ "error": str(e),
765
+ "error_type": type(e).__name__,
766
+ "event_id": step_event_id, # Include for consistency
767
+ },
768
+ )
769
+ raise
770
+
771
+ async def sleep(self, seconds: float, name: Optional[str] = None) -> None:
772
+ """
773
+ Durable sleep that survives workflow restarts.
774
+
775
+ Unlike regular `asyncio.sleep()`, this sleep is checkpointed. If the
776
+ workflow crashes and restarts, it will only sleep for the remaining
777
+ duration (or skip entirely if the sleep period has already elapsed).
778
+
779
+ Args:
780
+ seconds: Duration to sleep in seconds
781
+ name: Optional name for the sleep checkpoint (auto-generated if not provided)
782
+
783
+ Example:
784
+ ```python
785
+ @workflow
786
+ async def delayed_notification(ctx: WorkflowContext, user_id: str):
787
+ # Send immediate acknowledgment
788
+ await ctx.step(send_ack, user_id)
789
+
790
+ # Wait 24 hours (survives restarts!)
791
+ await ctx.sleep(24 * 60 * 60, name="wait_24h")
792
+
793
+ # Send follow-up
794
+ await ctx.step(send_followup, user_id)
795
+ ```
796
+ """
797
+ import time
798
+
799
+ # Generate unique step name for this sleep
800
+ sleep_name = name or f"sleep_{self._step_counter}"
801
+ self._step_counter += 1
802
+ step_key = f"sleep:{sleep_name}"
803
+
804
+ # Check if sleep was already started (replay scenario)
805
+ if self._workflow_entity.has_completed_step(step_key):
806
+ sleep_record = self._workflow_entity.get_completed_step(step_key)
807
+ start_time = sleep_record.get("start_time", 0)
808
+ duration = sleep_record.get("duration", seconds)
809
+ elapsed = time.time() - start_time
810
+
811
+ if elapsed >= duration:
812
+ # Sleep period already elapsed
813
+ self._logger.info(f"🔄 Sleep '{sleep_name}' already completed (elapsed: {elapsed:.1f}s)")
814
+ return
815
+
816
+ # Sleep for remaining duration
817
+ remaining = duration - elapsed
818
+ self._logger.info(f"⏰ Resuming sleep '{sleep_name}': {remaining:.1f}s remaining")
819
+ await asyncio.sleep(remaining)
820
+ return
821
+
822
+ # Record sleep start time for replay
823
+ sleep_record = {
824
+ "start_time": time.time(),
825
+ "duration": seconds,
826
+ }
827
+ self._workflow_entity.record_step_completion(step_key, "sleep", None, sleep_record)
828
+
829
+ # Emit checkpoint for observability
830
+ step_event_id = str(uuid.uuid4())
831
+ self._send_checkpoint(
832
+ "workflow.step.started",
833
+ {
834
+ "step_name": sleep_name,
835
+ "handler_name": "sleep",
836
+ "duration_seconds": seconds,
837
+ "event_id": step_event_id,
838
+ },
839
+ )
840
+
841
+ self._logger.info(f"💤 Starting durable sleep '{sleep_name}': {seconds}s")
842
+ await asyncio.sleep(seconds)
843
+
844
+ # Emit completion checkpoint
845
+ self._send_checkpoint(
846
+ "workflow.step.completed",
847
+ {
848
+ "step_name": sleep_name,
849
+ "handler_name": "sleep",
850
+ "duration_seconds": seconds,
851
+ "event_id": step_event_id,
852
+ },
853
+ )
854
+ self._logger.info(f"⏰ Sleep '{sleep_name}' completed")
855
+
856
+ async def wait_for_user(
857
+ self, question: str, input_type: str = "text", options: Optional[List[Dict]] = None
858
+ ) -> str:
859
+ """
860
+ Pause workflow execution and wait for user input.
861
+
862
+ On replay (even after worker crash), resumes from this point
863
+ with the user's response. This method enables human-in-the-loop
864
+ workflows by pausing execution and waiting for user interaction.
865
+
866
+ Args:
867
+ question: Question to ask the user
868
+ input_type: Type of input - "text", "approval", or "choice"
869
+ options: For approval/choice, list of option dicts with 'id' and 'label'
870
+
871
+ Returns:
872
+ User's response string
873
+
874
+ Raises:
875
+ WaitingForUserInputException: When no cached response exists (first call)
876
+
877
+ Example (text input):
878
+ ```python
879
+ city = await ctx.wait_for_user("Which city?")
880
+ ```
881
+
882
+ Example (approval):
883
+ ```python
884
+ decision = await ctx.wait_for_user(
885
+ "Approve this action?",
886
+ input_type="approval",
887
+ options=[
888
+ {"id": "approve", "label": "Approve"},
889
+ {"id": "reject", "label": "Reject"}
890
+ ]
891
+ )
892
+ ```
893
+
894
+ Example (choice):
895
+ ```python
896
+ model = await ctx.wait_for_user(
897
+ "Which model?",
898
+ input_type="choice",
899
+ options=[
900
+ {"id": "gpt4", "label": "GPT-4"},
901
+ {"id": "claude", "label": "Claude"}
902
+ ]
903
+ )
904
+ ```
905
+ """
906
+ from .exceptions import WaitingForUserInputException
907
+
908
+ # Generate unique step name for this user input request
909
+ # Using run_id ensures uniqueness across workflow execution
910
+ response_key = f"user_response:{self.run_id}"
911
+
912
+ # Check if we already have the user's response (replay scenario)
913
+ if self._workflow_entity.has_completed_step(response_key):
914
+ response = self._workflow_entity.get_completed_step(response_key)
915
+ self._logger.info("🔄 Replaying user response from checkpoint")
916
+ return response
917
+
918
+ # No response yet - pause execution
919
+ # Collect current workflow state for checkpoint
920
+ checkpoint_state = {}
921
+ if hasattr(self._workflow_entity, "_state") and self._workflow_entity._state is not None:
922
+ checkpoint_state = self._workflow_entity._state.get_state_snapshot()
923
+
924
+ self._logger.info(f"⏸️ Pausing workflow for user input: {question}")
925
+
926
+ raise WaitingForUserInputException(
927
+ question=question,
928
+ input_type=input_type,
929
+ options=options,
930
+ checkpoint_state=checkpoint_state,
931
+ )
932
+
933
+
934
+ # ============================================================================
935
+ # Helper functions for workflow execution
936
+ # ============================================================================
937
+
938
+
939
+ def _sanitize_for_json(obj: Any) -> Any:
940
+ """
941
+ Sanitize data for JSON serialization by removing or converting non-serializable objects.
942
+
943
+ Specifically handles:
944
+ - WorkflowContext objects (replaced with placeholder)
945
+ - Nested structures (recursively sanitized)
946
+
947
+ Args:
948
+ obj: Object to sanitize
949
+
950
+ Returns:
951
+ JSON-serializable version of the object
952
+ """
953
+ # Handle None, primitives
954
+ if obj is None or isinstance(obj, (str, int, float, bool)):
955
+ return obj
956
+
957
+ # Handle WorkflowContext - replace with placeholder
958
+ if isinstance(obj, WorkflowContext):
959
+ return "<WorkflowContext>"
960
+
961
+ # Handle tuples/lists - recursively sanitize
962
+ if isinstance(obj, (tuple, list)):
963
+ sanitized = [_sanitize_for_json(item) for item in obj]
964
+ return sanitized if isinstance(obj, list) else tuple(sanitized)
965
+
966
+ # Handle dicts - recursively sanitize values
967
+ if isinstance(obj, dict):
968
+ return {k: _sanitize_for_json(v) for k, v in obj.items()}
969
+
970
+ # For other objects, try to serialize or convert to string
971
+ try:
972
+ import json
973
+ json.dumps(obj)
974
+ return obj
975
+ except (TypeError, ValueError):
976
+ # Not JSON serializable, use string representation
977
+ return repr(obj)
978
+
979
+
980
+ # ============================================================================
981
+ # WorkflowEntity: Entity specialized for workflow execution state
982
+ # ============================================================================
983
+
984
+
985
+ class WorkflowEntity(Entity):
986
+ """
987
+ Entity specialized for workflow execution state.
988
+
989
+ Extends Entity with workflow-specific capabilities:
990
+ - Step tracking for replay and crash recovery
991
+ - State change tracking for debugging and audit (AI workflows)
992
+ - Completed step cache for efficient replay
993
+ - Automatic state persistence after workflow execution
994
+
995
+ Workflow state is persisted to the database after successful execution,
996
+ enabling crash recovery, replay, and cross-invocation state management.
997
+ The workflow decorator automatically calls _persist_state() to ensure
998
+ durability.
999
+ """
1000
+
1001
+ def __init__(
1002
+ self,
1003
+ run_id: str,
1004
+ session_id: Optional[str] = None,
1005
+ user_id: Optional[str] = None,
1006
+ component_name: Optional[str] = None,
1007
+ ):
1008
+ """
1009
+ Initialize workflow entity with memory scope.
1010
+
1011
+ Args:
1012
+ run_id: Unique workflow run identifier
1013
+ session_id: Session identifier for multi-turn conversations (optional)
1014
+ user_id: User identifier for user-scoped memory (optional)
1015
+ component_name: Workflow component name for session-scoped entities (optional)
1016
+
1017
+ Memory Scope Priority:
1018
+ - user_id present → key: user:{user_id} (shared across workflows)
1019
+ - session_id present (and != run_id) → key: workflow:{component_name}:session:{session_id}
1020
+ - else → key: run:{run_id}
1021
+
1022
+ Note: For session scope, component_name enables listing sessions by workflow name.
1023
+ User scope is shared across all workflows (not per-workflow).
1024
+ """
1025
+ # Determine entity key based on memory scope priority
1026
+ if user_id:
1027
+ # User scope: shared across all workflows (not per-workflow)
1028
+ entity_key = f"user:{user_id}"
1029
+ memory_scope = "user"
1030
+ elif session_id and session_id != run_id:
1031
+ # Session scope: include workflow name for queryability
1032
+ if component_name:
1033
+ entity_key = f"workflow:{component_name}:session:{session_id}"
1034
+ else:
1035
+ # Fallback for backward compatibility
1036
+ entity_key = f"session:{session_id}"
1037
+ memory_scope = "session"
1038
+ else:
1039
+ entity_key = f"run:{run_id}"
1040
+ memory_scope = "run"
1041
+
1042
+ # Initialize as entity with scoped key pattern
1043
+ super().__init__(key=entity_key)
1044
+
1045
+ # Store run_id separately for tracking (even if key is session/user scoped)
1046
+ self._run_id = run_id
1047
+ self._memory_scope = memory_scope
1048
+ self._component_name = component_name
1049
+
1050
+ # Step tracking for replay and recovery
1051
+ self._step_events: list[Dict[str, Any]] = []
1052
+ self._completed_steps: Dict[str, Any] = {}
1053
+
1054
+ # State change tracking for debugging/audit (AI workflows)
1055
+ self._state_changes: list[Dict[str, Any]] = []
1056
+
1057
+ logger.debug(f"Created WorkflowEntity: run={run_id}, scope={memory_scope}, key={entity_key}, component={component_name}")
1058
+
1059
+ @property
1060
+ def run_id(self) -> str:
1061
+ """Get run_id for this workflow execution."""
1062
+ return self._run_id
1063
+
1064
+ def record_step_completion(
1065
+ self, step_name: str, handler_name: str, input_data: Any, result: Any
1066
+ ) -> None:
1067
+ """
1068
+ Record completed step for replay and recovery.
1069
+
1070
+ Args:
1071
+ step_name: Unique step identifier
1072
+ handler_name: Function handler name
1073
+ input_data: Input data passed to function
1074
+ result: Function result
1075
+ """
1076
+ # Sanitize input_data and result to ensure JSON serializability
1077
+ # This removes WorkflowContext objects and other non-serializable types
1078
+ sanitized_input = _sanitize_for_json(input_data)
1079
+ sanitized_result = _sanitize_for_json(result)
1080
+
1081
+ self._step_events.append(
1082
+ {
1083
+ "step_name": step_name,
1084
+ "handler_name": handler_name,
1085
+ "input": sanitized_input,
1086
+ "result": sanitized_result,
1087
+ }
1088
+ )
1089
+ self._completed_steps[step_name] = result
1090
+ logger.debug(f"Recorded step completion: {step_name}")
1091
+
1092
+ def get_completed_step(self, step_name: str) -> Optional[Any]:
1093
+ """
1094
+ Get result of completed step (for replay).
1095
+
1096
+ Args:
1097
+ step_name: Step identifier
1098
+
1099
+ Returns:
1100
+ Step result if completed, None otherwise
1101
+ """
1102
+ return self._completed_steps.get(step_name)
1103
+
1104
+ def has_completed_step(self, step_name: str) -> bool:
1105
+ """Check if step has been completed."""
1106
+ return step_name in self._completed_steps
1107
+
1108
+ def inject_user_response(self, response: str) -> None:
1109
+ """
1110
+ Inject user response as a completed step for workflow resume.
1111
+
1112
+ This method is called by the worker when resuming a paused workflow
1113
+ with the user's response. It stores the response as if it was a
1114
+ completed step, allowing wait_for_user() to retrieve it on replay.
1115
+
1116
+ Args:
1117
+ response: User's response to inject
1118
+
1119
+ Example:
1120
+ # Platform resumes workflow with user response
1121
+ workflow_entity.inject_user_response("yes")
1122
+ # On replay, wait_for_user() returns "yes" from cache
1123
+ """
1124
+ response_key = f"user_response:{self.run_id}"
1125
+ self._completed_steps[response_key] = response
1126
+ logger.info(f"Injected user response for {self.run_id}: {response}")
1127
+
1128
+ def get_agent_data(self, agent_name: str) -> Dict[str, Any]:
1129
+ """
1130
+ Get agent conversation data from workflow state.
1131
+
1132
+ Args:
1133
+ agent_name: Name of the agent
1134
+
1135
+ Returns:
1136
+ Dictionary containing agent conversation data (messages, metadata)
1137
+ or empty dict if agent has no data yet
1138
+
1139
+ Example:
1140
+ ```python
1141
+ agent_data = workflow_entity.get_agent_data("ResearchAgent")
1142
+ messages = agent_data.get("messages", [])
1143
+ ```
1144
+ """
1145
+ return self.state.get(f"agent.{agent_name}", {})
1146
+
1147
+ def get_agent_messages(self, agent_name: str) -> list[Dict[str, Any]]:
1148
+ """
1149
+ Get agent messages from workflow state.
1150
+
1151
+ Args:
1152
+ agent_name: Name of the agent
1153
+
1154
+ Returns:
1155
+ List of message dictionaries
1156
+
1157
+ Example:
1158
+ ```python
1159
+ messages = workflow_entity.get_agent_messages("ResearchAgent")
1160
+ for msg in messages:
1161
+ print(f"{msg['role']}: {msg['content']}")
1162
+ ```
1163
+ """
1164
+ agent_data = self.get_agent_data(agent_name)
1165
+ return agent_data.get("messages", [])
1166
+
1167
+ def list_agents(self) -> list[str]:
1168
+ """
1169
+ List all agents with data in this workflow.
1170
+
1171
+ Returns:
1172
+ List of agent names that have stored conversation data
1173
+
1174
+ Example:
1175
+ ```python
1176
+ agents = workflow_entity.list_agents()
1177
+ # ['ResearchAgent', 'AnalysisAgent', 'SynthesisAgent']
1178
+ ```
1179
+ """
1180
+ agents = []
1181
+ for key in self.state._state.keys():
1182
+ if key.startswith("agent."):
1183
+ agents.append(key.replace("agent.", "", 1))
1184
+ return agents
1185
+
1186
+ async def _persist_state(self) -> None:
1187
+ """
1188
+ Internal method to persist workflow state to entity storage.
1189
+
1190
+ This is prefixed with _ so it won't be wrapped by the entity method wrapper.
1191
+ Called after workflow execution completes to ensure state is durable.
1192
+ """
1193
+ logger.info(f"🔍 DEBUG: _persist_state() CALLED for workflow {self.run_id}")
1194
+
1195
+ try:
1196
+ from .entity import _get_state_adapter
1197
+
1198
+ logger.info(f"🔍 DEBUG: Getting state adapter...")
1199
+ # Get the state adapter (must be in Worker context)
1200
+ adapter = _get_state_adapter()
1201
+ logger.info(f"🔍 DEBUG: Got state adapter: {type(adapter).__name__}")
1202
+
1203
+ logger.info(f"🔍 DEBUG: Getting state snapshot...")
1204
+ # Get current state snapshot
1205
+ state_dict = self.state.get_state_snapshot()
1206
+ logger.info(f"🔍 DEBUG: State snapshot has {len(state_dict)} keys: {list(state_dict.keys())}")
1207
+
1208
+ logger.info(f"🔍 DEBUG: Loading current version for optimistic locking...")
1209
+ # Load current version (for optimistic locking)
1210
+ _, current_version = await adapter.load_with_version(self._entity_type, self._key)
1211
+ logger.info(f"🔍 DEBUG: Current version: {current_version}")
1212
+
1213
+ logger.info(f"🔍 DEBUG: Saving state to database...")
1214
+ # Save state with version check
1215
+ new_version = await adapter.save_state(
1216
+ self._entity_type, self._key, state_dict, current_version
1217
+ )
1218
+
1219
+ logger.info(
1220
+ f"✅ SUCCESS: Persisted WorkflowEntity state for {self.run_id} "
1221
+ f"(version {current_version} -> {new_version}, {len(state_dict)} keys)"
1222
+ )
1223
+ except Exception as e:
1224
+ logger.error(
1225
+ f"❌ ERROR: Failed to persist workflow state for {self.run_id}: {e}",
1226
+ exc_info=True
1227
+ )
1228
+ # Re-raise to let caller handle
1229
+ raise
1230
+
1231
+ @property
1232
+ def state(self) -> "WorkflowState":
1233
+ """
1234
+ Get workflow state with change tracking.
1235
+
1236
+ Returns WorkflowState which tracks all state mutations
1237
+ for debugging and replay of AI workflows.
1238
+ """
1239
+ if self._state is None:
1240
+ # Initialize with empty state dict - will be populated by entity system
1241
+ self._state = WorkflowState({}, self)
1242
+ return self._state
1243
+
1244
+
1245
+ class WorkflowState(EntityState):
1246
+ """
1247
+ State interface for WorkflowEntity with change tracking.
1248
+
1249
+ Extends EntityState to track all state mutations for:
1250
+ - AI workflow debugging
1251
+ - Audit trail
1252
+ - Replay capabilities
1253
+ """
1254
+
1255
+ def __init__(self, state_dict: Dict[str, Any], workflow_entity: WorkflowEntity):
1256
+ """
1257
+ Initialize workflow state.
1258
+
1259
+ Args:
1260
+ state_dict: Dictionary to use for state storage
1261
+ workflow_entity: Parent workflow entity for tracking
1262
+ """
1263
+ super().__init__(state_dict)
1264
+ self._workflow_entity = workflow_entity
1265
+ self._checkpoint_callback: Optional[Callable[[str, dict], None]] = None
1266
+
1267
+ def _set_checkpoint_callback(self, callback: Callable[[str, dict], None]) -> None:
1268
+ """
1269
+ Set the checkpoint callback for real-time state change streaming.
1270
+
1271
+ Args:
1272
+ callback: Function to call when state changes
1273
+ """
1274
+ self._checkpoint_callback = callback
1275
+
1276
+ def set(self, key: str, value: Any) -> None:
1277
+ """Set value and track change."""
1278
+ super().set(key, value)
1279
+ # Track change for debugging/audit
1280
+ import time
1281
+
1282
+ change_record = {"key": key, "value": value, "timestamp": time.time(), "deleted": False}
1283
+ self._workflow_entity._state_changes.append(change_record)
1284
+
1285
+ # Emit checkpoint for real-time state streaming
1286
+ if self._checkpoint_callback:
1287
+ self._checkpoint_callback(
1288
+ "workflow.state.changed", {"key": key, "value": value, "operation": "set"}
1289
+ )
1290
+
1291
+ def delete(self, key: str) -> None:
1292
+ """Delete key and track change."""
1293
+ super().delete(key)
1294
+ # Track deletion
1295
+ import time
1296
+
1297
+ change_record = {"key": key, "value": None, "timestamp": time.time(), "deleted": True}
1298
+ self._workflow_entity._state_changes.append(change_record)
1299
+
1300
+ # Emit checkpoint for real-time state streaming
1301
+ if self._checkpoint_callback:
1302
+ self._checkpoint_callback("workflow.state.changed", {"key": key, "operation": "delete"})
1303
+
1304
+ def clear(self) -> None:
1305
+ """Clear all state and track change."""
1306
+ super().clear()
1307
+ # Track clear operation
1308
+ import time
1309
+
1310
+ change_record = {
1311
+ "key": "__clear__",
1312
+ "value": None,
1313
+ "timestamp": time.time(),
1314
+ "deleted": True,
1315
+ }
1316
+ self._workflow_entity._state_changes.append(change_record)
1317
+
1318
+ # Emit checkpoint for real-time state streaming
1319
+ if self._checkpoint_callback:
1320
+ self._checkpoint_callback("workflow.state.changed", {"operation": "clear"})
1321
+
1322
+ def has_changes(self) -> bool:
1323
+ """Check if any state changes have been tracked."""
1324
+ return len(self._workflow_entity._state_changes) > 0
1325
+
1326
+ def get_state_snapshot(self) -> Dict[str, Any]:
1327
+ """Get current state as a snapshot dictionary."""
1328
+ return dict(self._state)
1329
+
1330
+
1331
+ class WorkflowRegistry:
1332
+ """Registry for workflow handlers."""
1333
+
1334
+ @staticmethod
1335
+ def register(config: WorkflowConfig) -> None:
1336
+ """
1337
+ Register a workflow handler.
1338
+
1339
+ Raises:
1340
+ ValueError: If a workflow with this name is already registered
1341
+ """
1342
+ if config.name in _WORKFLOW_REGISTRY:
1343
+ existing_workflow = _WORKFLOW_REGISTRY[config.name]
1344
+ logger.error(
1345
+ f"Workflow name collision detected: '{config.name}'\n"
1346
+ f" First defined in: {existing_workflow.handler.__module__}\n"
1347
+ f" Also defined in: {config.handler.__module__}\n"
1348
+ f" This is a bug - workflows must have unique names."
1349
+ )
1350
+ raise ValueError(
1351
+ f"Workflow '{config.name}' is already registered. "
1352
+ f"Use @workflow(name='unique_name') to specify a different name."
1353
+ )
1354
+
1355
+ _WORKFLOW_REGISTRY[config.name] = config
1356
+ logger.debug(f"Registered workflow '{config.name}'")
1357
+
1358
+ @staticmethod
1359
+ def get(name: str) -> Optional[WorkflowConfig]:
1360
+ """Get workflow configuration by name."""
1361
+ return _WORKFLOW_REGISTRY.get(name)
1362
+
1363
+ @staticmethod
1364
+ def all() -> Dict[str, WorkflowConfig]:
1365
+ """Get all registered workflows."""
1366
+ return _WORKFLOW_REGISTRY.copy()
1367
+
1368
+ @staticmethod
1369
+ def list_names() -> list[str]:
1370
+ """List all registered workflow names."""
1371
+ return list(_WORKFLOW_REGISTRY.keys())
1372
+
1373
+ @staticmethod
1374
+ def clear() -> None:
1375
+ """Clear all registered workflows."""
1376
+ _WORKFLOW_REGISTRY.clear()
1377
+
1378
+
1379
+ def workflow(
1380
+ _func: Optional[Callable[..., Any]] = None,
1381
+ *,
1382
+ name: Optional[str] = None,
1383
+ chat: bool = False,
1384
+ cron: Optional[str] = None,
1385
+ webhook: bool = False,
1386
+ webhook_secret: Optional[str] = None,
1387
+ ) -> Callable[..., Any]:
1388
+ """
1389
+ Decorator to mark a function as an AGNT5 durable workflow.
1390
+
1391
+ Workflows use WorkflowEntity for state management and WorkflowContext
1392
+ for orchestration. State changes are automatically tracked for replay.
1393
+
1394
+ Args:
1395
+ name: Custom workflow name (default: function's __name__)
1396
+ chat: Enable chat mode for multi-turn conversation workflows (default: False)
1397
+ cron: Cron expression for scheduled execution (e.g., "0 9 * * *" for daily at 9am)
1398
+ webhook: Enable webhook triggering for this workflow (default: False)
1399
+ webhook_secret: Optional secret for HMAC-SHA256 signature verification
1400
+
1401
+ Example (standard workflow):
1402
+ @workflow
1403
+ async def process_order(ctx: WorkflowContext, order_id: str) -> dict:
1404
+ # Durable state - survives crashes
1405
+ ctx.state.set("status", "processing")
1406
+ ctx.state.set("order_id", order_id)
1407
+
1408
+ # Validate order
1409
+ order = await ctx.task(validate_order, input={"order_id": order_id})
1410
+
1411
+ # Process payment (checkpointed - won't re-execute on crash)
1412
+ payment = await ctx.step("payment", process_payment(order["total"]))
1413
+
1414
+ # Fulfill order
1415
+ await ctx.task(ship_order, input={"order_id": order_id})
1416
+
1417
+ ctx.state.set("status", "completed")
1418
+ return {"status": ctx.state.get("status")}
1419
+
1420
+ Example (chat workflow):
1421
+ @workflow(chat=True)
1422
+ async def customer_support(ctx: WorkflowContext, message: str) -> dict:
1423
+ # Initialize conversation state
1424
+ if not ctx.state.get("messages"):
1425
+ ctx.state.set("messages", [])
1426
+
1427
+ # Add user message
1428
+ messages = ctx.state.get("messages")
1429
+ messages.append({"role": "user", "content": message})
1430
+ ctx.state.set("messages", messages)
1431
+
1432
+ # Generate AI response
1433
+ response = await ctx.task(generate_response, messages=messages)
1434
+
1435
+ # Add assistant response
1436
+ messages.append({"role": "assistant", "content": response})
1437
+ ctx.state.set("messages", messages)
1438
+
1439
+ return {"response": response, "turn_count": len(messages) // 2}
1440
+
1441
+ Example (scheduled workflow):
1442
+ @workflow(name="daily_report", cron="0 9 * * *")
1443
+ async def daily_report(ctx: WorkflowContext) -> dict:
1444
+ # Runs automatically every day at 9am
1445
+ sales = await ctx.task(get_sales_data, report_type="sales")
1446
+ report = await ctx.task(generate_pdf, input=sales)
1447
+ await ctx.task(send_email, to="team@company.com", attachment=report)
1448
+ return {"status": "sent", "report_id": report["id"]}
1449
+
1450
+ Example (webhook workflow):
1451
+ @workflow(name="on_payment", webhook=True, webhook_secret="your_secret_key")
1452
+ async def on_payment(ctx: WorkflowContext, webhook_data: dict) -> dict:
1453
+ # Triggered by webhook POST /v1/webhooks/on_payment
1454
+ # webhook_data contains: payload, headers, source_ip, timestamp
1455
+ payment = webhook_data["payload"]
1456
+
1457
+ if payment.get("status") == "succeeded":
1458
+ await ctx.task(fulfill_order, order_id=payment["order_id"])
1459
+ await ctx.task(send_receipt, customer_email=payment["email"])
1460
+ return {"status": "processed", "order_id": payment["order_id"]}
1461
+
1462
+ return {"status": "skipped", "reason": "payment not successful"}
1463
+ """
1464
+
1465
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
1466
+ # Get workflow name
1467
+ workflow_name = name or func.__name__
1468
+
1469
+ # Validate function signature
1470
+ sig = inspect.signature(func)
1471
+ params = list(sig.parameters.values())
1472
+
1473
+ if not params or params[0].name != "ctx":
1474
+ raise ValueError(
1475
+ f"Workflow '{workflow_name}' must have 'ctx: WorkflowContext' as first parameter"
1476
+ )
1477
+
1478
+ # Convert sync to async if needed
1479
+ if inspect.iscoroutinefunction(func):
1480
+ handler_func = cast(HandlerFunc, func)
1481
+ else:
1482
+ # Wrap sync function in async
1483
+ @functools.wraps(func)
1484
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
1485
+ return func(*args, **kwargs)
1486
+
1487
+ handler_func = cast(HandlerFunc, async_wrapper)
1488
+
1489
+ # Extract schemas from type hints
1490
+ input_schema, output_schema = extract_function_schemas(func)
1491
+
1492
+ # Extract metadata (description, etc.)
1493
+ metadata = extract_function_metadata(func)
1494
+
1495
+ # Add chat metadata if chat mode is enabled
1496
+ if chat:
1497
+ metadata["chat"] = "true"
1498
+
1499
+ # Add cron metadata if cron schedule is provided
1500
+ if cron:
1501
+ metadata["cron"] = cron
1502
+
1503
+ # Add webhook metadata if webhook is enabled
1504
+ if webhook:
1505
+ metadata["webhook"] = "true"
1506
+ if webhook_secret:
1507
+ metadata["webhook_secret"] = webhook_secret
1508
+
1509
+ # Register workflow
1510
+ config = WorkflowConfig(
1511
+ name=workflow_name,
1512
+ handler=handler_func,
1513
+ input_schema=input_schema,
1514
+ output_schema=output_schema,
1515
+ metadata=metadata,
1516
+ )
1517
+ WorkflowRegistry.register(config)
1518
+
1519
+ # Create wrapper that provides context
1520
+ @functools.wraps(func)
1521
+ async def wrapper(*args: Any, **kwargs: Any) -> Any:
1522
+ # Create WorkflowEntity and WorkflowContext if not provided
1523
+ if not args or not isinstance(args[0], WorkflowContext):
1524
+ # Auto-create workflow entity and context for direct workflow calls
1525
+ run_id = f"workflow-{uuid.uuid4().hex[:8]}"
1526
+
1527
+ # Create WorkflowEntity to manage state
1528
+ workflow_entity = WorkflowEntity(run_id=run_id)
1529
+
1530
+ # Create WorkflowContext that wraps the entity
1531
+ ctx = WorkflowContext(
1532
+ workflow_entity=workflow_entity,
1533
+ run_id=run_id,
1534
+ )
1535
+
1536
+ # Set context in task-local storage for automatic propagation
1537
+ token = set_current_context(ctx)
1538
+ try:
1539
+ # Execute workflow
1540
+ result = await handler_func(ctx, *args, **kwargs)
1541
+
1542
+ # Persist workflow state after successful execution
1543
+ try:
1544
+ await workflow_entity._persist_state()
1545
+ except Exception as e:
1546
+ logger.error(f"Failed to persist workflow state (non-fatal): {e}", exc_info=True)
1547
+ # Don't fail the workflow - persistence failure shouldn't break execution
1548
+
1549
+ return result
1550
+ finally:
1551
+ # Always reset context to prevent leakage
1552
+ from .context import _current_context
1553
+
1554
+ _current_context.reset(token)
1555
+ else:
1556
+ # WorkflowContext provided - use it and set in contextvar
1557
+ ctx = args[0]
1558
+ token = set_current_context(ctx)
1559
+ try:
1560
+ result = await handler_func(*args, **kwargs)
1561
+
1562
+ # Persist workflow state after successful execution
1563
+ try:
1564
+ await ctx._workflow_entity._persist_state()
1565
+ except Exception as e:
1566
+ logger.error(f"Failed to persist workflow state (non-fatal): {e}", exc_info=True)
1567
+ # Don't fail the workflow - persistence failure shouldn't break execution
1568
+
1569
+ return result
1570
+ finally:
1571
+ # Always reset context to prevent leakage
1572
+ from .context import _current_context
1573
+
1574
+ _current_context.reset(token)
1575
+
1576
+ # Store config on wrapper for introspection
1577
+ wrapper._agnt5_config = config # type: ignore
1578
+ return wrapper
1579
+
1580
+ # Handle both @workflow and @workflow(...) syntax
1581
+ if _func is None:
1582
+ return decorator
1583
+ else:
1584
+ return decorator(_func)