penguiflow 2.2.4__py3-none-any.whl → 2.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of penguiflow might be problematic. Click here for more details.
- examples/planner_enterprise_agent/__init__.py +30 -0
- examples/planner_enterprise_agent/config.py +93 -0
- examples/planner_enterprise_agent/main.py +709 -0
- examples/planner_enterprise_agent/nodes.py +882 -0
- examples/planner_enterprise_agent/telemetry.py +245 -0
- examples/quickstart/flow.py +3 -6
- examples/trace_cancel/flow.py +9 -8
- penguiflow/__init__.py +1 -1
- penguiflow/planner/__init__.py +6 -0
- penguiflow/planner/dspy_client.py +327 -0
- penguiflow/planner/react.py +465 -52
- penguiflow/remote.py +2 -2
- penguiflow/state.py +1 -1
- {penguiflow-2.2.4.dist-info → penguiflow-2.2.6.dist-info}/METADATA +2 -1
- {penguiflow-2.2.4.dist-info → penguiflow-2.2.6.dist-info}/RECORD +19 -13
- {penguiflow-2.2.4.dist-info → penguiflow-2.2.6.dist-info}/WHEEL +0 -0
- {penguiflow-2.2.4.dist-info → penguiflow-2.2.6.dist-info}/entry_points.txt +0 -0
- {penguiflow-2.2.4.dist-info → penguiflow-2.2.6.dist-info}/licenses/LICENSE +0 -0
- {penguiflow-2.2.4.dist-info → penguiflow-2.2.6.dist-info}/top_level.txt +0 -0
penguiflow/planner/react.py
CHANGED
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import inspect
|
|
7
7
|
import json
|
|
8
|
+
import logging
|
|
8
9
|
import time
|
|
9
10
|
from collections.abc import Callable, Mapping, Sequence
|
|
10
11
|
from dataclasses import dataclass, field
|
|
@@ -18,6 +19,49 @@ from ..node import Node
|
|
|
18
19
|
from ..registry import ModelRegistry
|
|
19
20
|
from . import prompts
|
|
20
21
|
|
|
22
|
+
# Planner-specific logger
|
|
23
|
+
logger = logging.getLogger("penguiflow.planner")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True, slots=True)
|
|
27
|
+
class PlannerEvent:
|
|
28
|
+
"""Structured event emitted during planner execution for observability."""
|
|
29
|
+
|
|
30
|
+
event_type: str # step_start, step_complete, llm_call, pause, resume, finish
|
|
31
|
+
ts: float
|
|
32
|
+
trajectory_step: int
|
|
33
|
+
thought: str | None = None
|
|
34
|
+
node_name: str | None = None
|
|
35
|
+
latency_ms: float | None = None
|
|
36
|
+
token_estimate: int | None = None
|
|
37
|
+
error: str | None = None
|
|
38
|
+
extra: Mapping[str, Any] = field(default_factory=dict)
|
|
39
|
+
|
|
40
|
+
def to_payload(self) -> dict[str, Any]:
|
|
41
|
+
"""Render a dictionary payload suitable for structured logging."""
|
|
42
|
+
payload: dict[str, Any] = {
|
|
43
|
+
"event": self.event_type,
|
|
44
|
+
"ts": self.ts,
|
|
45
|
+
"step": self.trajectory_step,
|
|
46
|
+
}
|
|
47
|
+
if self.thought is not None:
|
|
48
|
+
payload["thought"] = self.thought
|
|
49
|
+
if self.node_name is not None:
|
|
50
|
+
payload["node_name"] = self.node_name
|
|
51
|
+
if self.latency_ms is not None:
|
|
52
|
+
payload["latency_ms"] = self.latency_ms
|
|
53
|
+
if self.token_estimate is not None:
|
|
54
|
+
payload["token_estimate"] = self.token_estimate
|
|
55
|
+
if self.error is not None:
|
|
56
|
+
payload["error"] = self.error
|
|
57
|
+
if self.extra:
|
|
58
|
+
payload.update(self.extra)
|
|
59
|
+
return payload
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# Observability callback type
|
|
63
|
+
PlannerEventCallback = Callable[[PlannerEvent], None]
|
|
64
|
+
|
|
21
65
|
|
|
22
66
|
class JSONLLMClient(Protocol):
|
|
23
67
|
async def complete(
|
|
@@ -372,10 +416,14 @@ class _LiteLLMJSONClient:
|
|
|
372
416
|
*,
|
|
373
417
|
temperature: float,
|
|
374
418
|
json_schema_mode: bool,
|
|
419
|
+
max_retries: int = 3,
|
|
420
|
+
timeout_s: float = 60.0,
|
|
375
421
|
) -> None:
|
|
376
422
|
self._llm = llm
|
|
377
423
|
self._temperature = temperature
|
|
378
424
|
self._json_schema_mode = json_schema_mode
|
|
425
|
+
self._max_retries = max_retries
|
|
426
|
+
self._timeout_s = timeout_s
|
|
379
427
|
|
|
380
428
|
async def complete(
|
|
381
429
|
self,
|
|
@@ -401,12 +449,59 @@ class _LiteLLMJSONClient:
|
|
|
401
449
|
if self._json_schema_mode and response_format is not None:
|
|
402
450
|
params["response_format"] = response_format
|
|
403
451
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
452
|
+
last_error: Exception | None = None
|
|
453
|
+
for attempt in range(self._max_retries):
|
|
454
|
+
try:
|
|
455
|
+
# Add timeout protection
|
|
456
|
+
async with asyncio.timeout(self._timeout_s):
|
|
457
|
+
response = await litellm.acompletion(**params)
|
|
458
|
+
choice = response["choices"][0]
|
|
459
|
+
content = choice["message"]["content"]
|
|
460
|
+
if content is None:
|
|
461
|
+
raise RuntimeError("LiteLLM returned empty content")
|
|
462
|
+
|
|
463
|
+
# Log successful LLM call with cost if available
|
|
464
|
+
cost = response.get("_hidden_params", {}).get("response_cost", 0)
|
|
465
|
+
if cost and cost > 0:
|
|
466
|
+
logger.debug(
|
|
467
|
+
"llm_call_success",
|
|
468
|
+
extra={"attempt": attempt + 1, "cost_usd": cost},
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
return content
|
|
472
|
+
except TimeoutError as exc:
|
|
473
|
+
last_error = exc
|
|
474
|
+
logger.warning(
|
|
475
|
+
"llm_timeout",
|
|
476
|
+
extra={"attempt": attempt + 1, "timeout_s": self._timeout_s},
|
|
477
|
+
)
|
|
478
|
+
except Exception as exc:
|
|
479
|
+
last_error = exc
|
|
480
|
+
# Check if it's a retryable error (network, rate limit, etc.)
|
|
481
|
+
error_type = exc.__class__.__name__
|
|
482
|
+
if "RateLimit" in error_type or "ServiceUnavailable" in error_type:
|
|
483
|
+
backoff_s = 2 ** attempt
|
|
484
|
+
logger.warning(
|
|
485
|
+
"llm_retry",
|
|
486
|
+
extra={
|
|
487
|
+
"attempt": attempt + 1,
|
|
488
|
+
"error": str(exc),
|
|
489
|
+
"backoff_s": backoff_s,
|
|
490
|
+
},
|
|
491
|
+
)
|
|
492
|
+
if attempt < self._max_retries - 1:
|
|
493
|
+
await asyncio.sleep(backoff_s)
|
|
494
|
+
continue
|
|
495
|
+
# Non-retryable error, raise immediately
|
|
496
|
+
raise
|
|
497
|
+
|
|
498
|
+
# All retries exhausted
|
|
499
|
+
logger.error(
|
|
500
|
+
"llm_retries_exhausted",
|
|
501
|
+
extra={"max_retries": self._max_retries, "last_error": str(last_error)},
|
|
502
|
+
)
|
|
503
|
+
msg = f"LLM call failed after {self._max_retries} retries"
|
|
504
|
+
raise RuntimeError(msg) from last_error
|
|
410
505
|
|
|
411
506
|
|
|
412
507
|
class _PlannerContext:
|
|
@@ -430,7 +525,93 @@ class _PlannerContext:
|
|
|
430
525
|
|
|
431
526
|
|
|
432
527
|
class ReactPlanner:
|
|
433
|
-
"""
|
|
528
|
+
"""JSON-only ReAct planner for autonomous multi-step workflows.
|
|
529
|
+
|
|
530
|
+
The ReactPlanner orchestrates a loop where an LLM selects and sequences
|
|
531
|
+
PenguiFlow nodes/tools based on structured JSON contracts. It supports
|
|
532
|
+
pause/resume for approvals, adaptive re-planning on failures, parallel
|
|
533
|
+
execution, and trajectory compression for long-running sessions.
|
|
534
|
+
|
|
535
|
+
Thread Safety
|
|
536
|
+
-------------
|
|
537
|
+
NOT thread-safe. Create separate planner instances per task.
|
|
538
|
+
|
|
539
|
+
Parameters
|
|
540
|
+
----------
|
|
541
|
+
llm : str | Mapping[str, Any] | None
|
|
542
|
+
LiteLLM model name (e.g., "gpt-4") or config dict. Required if
|
|
543
|
+
llm_client is not provided.
|
|
544
|
+
nodes : Sequence[Node] | None
|
|
545
|
+
Sequence of PenguiFlow nodes to make available as tools. Either
|
|
546
|
+
(nodes + registry) or catalog must be provided.
|
|
547
|
+
catalog : Sequence[NodeSpec] | None
|
|
548
|
+
Pre-built tool catalog. If provided, nodes and registry are ignored.
|
|
549
|
+
registry : ModelRegistry | None
|
|
550
|
+
Model registry for type resolution. Required if nodes is provided.
|
|
551
|
+
llm_client : JSONLLMClient | None
|
|
552
|
+
Custom LLM client implementation. If provided, llm is ignored.
|
|
553
|
+
max_iters : int
|
|
554
|
+
Maximum planning iterations before returning no_path. Default: 8.
|
|
555
|
+
temperature : float
|
|
556
|
+
LLM sampling temperature. Default: 0.0 for deterministic output.
|
|
557
|
+
json_schema_mode : bool
|
|
558
|
+
Enable strict JSON schema enforcement via LLM response_format.
|
|
559
|
+
Default: True.
|
|
560
|
+
system_prompt_extra : str | None
|
|
561
|
+
Additional guidance appended to system prompt.
|
|
562
|
+
token_budget : int | None
|
|
563
|
+
If set, triggers trajectory summarization when history exceeds limit.
|
|
564
|
+
Token count is estimated by character length (approx).
|
|
565
|
+
pause_enabled : bool
|
|
566
|
+
Allow nodes to trigger pause/resume flow. Default: True.
|
|
567
|
+
state_store : StateStore | None
|
|
568
|
+
Optional durable state adapter for pause/resume persistence.
|
|
569
|
+
summarizer_llm : str | Mapping[str, Any] | None
|
|
570
|
+
Separate (cheaper) LLM for trajectory compression. Falls back to
|
|
571
|
+
main LLM if not set.
|
|
572
|
+
planning_hints : Mapping[str, Any] | None
|
|
573
|
+
Structured constraints and preferences (ordering, disallowed nodes,
|
|
574
|
+
max_parallel, etc.). See plan.md for schema.
|
|
575
|
+
repair_attempts : int
|
|
576
|
+
Max attempts to repair invalid JSON from LLM. Default: 3.
|
|
577
|
+
deadline_s : float | None
|
|
578
|
+
Wall-clock deadline for planning session (seconds from start).
|
|
579
|
+
hop_budget : int | None
|
|
580
|
+
Maximum tool invocations allowed.
|
|
581
|
+
time_source : Callable[[], float] | None
|
|
582
|
+
Override time.monotonic for testing.
|
|
583
|
+
event_callback : PlannerEventCallback | None
|
|
584
|
+
Optional callback receiving PlannerEvent instances for observability.
|
|
585
|
+
llm_timeout_s : float
|
|
586
|
+
Per-LLM-call timeout in seconds. Default: 60.0.
|
|
587
|
+
llm_max_retries : int
|
|
588
|
+
Max retry attempts for transient LLM failures. Default: 3.
|
|
589
|
+
absolute_max_parallel : int
|
|
590
|
+
System-level safety limit on parallel execution regardless of hints.
|
|
591
|
+
Default: 50.
|
|
592
|
+
|
|
593
|
+
Raises
|
|
594
|
+
------
|
|
595
|
+
ValueError
|
|
596
|
+
If neither (nodes + registry) nor catalog is provided, or if neither
|
|
597
|
+
llm nor llm_client is provided.
|
|
598
|
+
RuntimeError
|
|
599
|
+
If LiteLLM is not installed and llm_client is not provided.
|
|
600
|
+
|
|
601
|
+
Examples
|
|
602
|
+
--------
|
|
603
|
+
>>> planner = ReactPlanner(
|
|
604
|
+
... llm="gpt-4",
|
|
605
|
+
... nodes=[triage_node, retrieve_node, summarize_node],
|
|
606
|
+
... registry=my_registry,
|
|
607
|
+
... max_iters=10,
|
|
608
|
+
... )
|
|
609
|
+
>>> result = await planner.run("Explain PenguiFlow's architecture")
|
|
610
|
+
>>> print(result.reason) # "answer_complete", "no_path", or "budget_exhausted"
|
|
611
|
+
"""
|
|
612
|
+
|
|
613
|
+
# Default system-level safety limit for parallel execution
|
|
614
|
+
DEFAULT_MAX_PARALLEL = 50
|
|
434
615
|
|
|
435
616
|
def __init__(
|
|
436
617
|
self,
|
|
@@ -453,6 +634,10 @@ class ReactPlanner:
|
|
|
453
634
|
deadline_s: float | None = None,
|
|
454
635
|
hop_budget: int | None = None,
|
|
455
636
|
time_source: Callable[[], float] | None = None,
|
|
637
|
+
event_callback: PlannerEventCallback | None = None,
|
|
638
|
+
llm_timeout_s: float = 60.0,
|
|
639
|
+
llm_max_retries: int = 3,
|
|
640
|
+
absolute_max_parallel: int = 50,
|
|
456
641
|
) -> None:
|
|
457
642
|
if catalog is None:
|
|
458
643
|
if nodes is None or registry is None:
|
|
@@ -487,17 +672,17 @@ class ReactPlanner:
|
|
|
487
672
|
self._deadline_s = deadline_s
|
|
488
673
|
self._hop_budget = hop_budget
|
|
489
674
|
self._time_source = time_source or time.monotonic
|
|
490
|
-
self.
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
675
|
+
self._event_callback = event_callback
|
|
676
|
+
self._absolute_max_parallel = absolute_max_parallel
|
|
677
|
+
action_schema = {
|
|
678
|
+
"type": "json_schema",
|
|
679
|
+
"json_schema": {
|
|
680
|
+
"name": "planner_action",
|
|
681
|
+
"schema": PlannerAction.model_json_schema(),
|
|
682
|
+
},
|
|
683
|
+
}
|
|
684
|
+
self._action_schema: Mapping[str, Any] = action_schema
|
|
685
|
+
self._response_format = action_schema if json_schema_mode else None
|
|
501
686
|
self._summarizer_client: JSONLLMClient | None = None
|
|
502
687
|
if llm_client is not None:
|
|
503
688
|
self._client = llm_client
|
|
@@ -508,12 +693,16 @@ class ReactPlanner:
|
|
|
508
693
|
llm,
|
|
509
694
|
temperature=temperature,
|
|
510
695
|
json_schema_mode=json_schema_mode,
|
|
696
|
+
max_retries=llm_max_retries,
|
|
697
|
+
timeout_s=llm_timeout_s,
|
|
511
698
|
)
|
|
512
699
|
if summarizer_llm is not None:
|
|
513
700
|
self._summarizer_client = _LiteLLMJSONClient(
|
|
514
701
|
summarizer_llm,
|
|
515
702
|
temperature=temperature,
|
|
516
703
|
json_schema_mode=True,
|
|
704
|
+
max_retries=llm_max_retries,
|
|
705
|
+
timeout_s=llm_timeout_s,
|
|
517
706
|
)
|
|
518
707
|
|
|
519
708
|
async def run(
|
|
@@ -522,6 +711,27 @@ class ReactPlanner:
|
|
|
522
711
|
*,
|
|
523
712
|
context_meta: Mapping[str, Any] | None = None,
|
|
524
713
|
) -> PlannerFinish | PlannerPause:
|
|
714
|
+
"""Execute planner on a query until completion or pause.
|
|
715
|
+
|
|
716
|
+
Parameters
|
|
717
|
+
----------
|
|
718
|
+
query : str
|
|
719
|
+
Natural language task description.
|
|
720
|
+
context_meta : Mapping[str, Any] | None
|
|
721
|
+
Optional metadata passed to nodes via ctx.meta.
|
|
722
|
+
|
|
723
|
+
Returns
|
|
724
|
+
-------
|
|
725
|
+
PlannerFinish | PlannerPause
|
|
726
|
+
PlannerFinish if task completed/failed, PlannerPause if paused
|
|
727
|
+
for human intervention.
|
|
728
|
+
|
|
729
|
+
Raises
|
|
730
|
+
------
|
|
731
|
+
RuntimeError
|
|
732
|
+
If LLM client fails after all retries.
|
|
733
|
+
"""
|
|
734
|
+
logger.info("planner_run_start", extra={"query": query})
|
|
525
735
|
trajectory = Trajectory(query=query, context_meta=context_meta)
|
|
526
736
|
return await self._run_loop(trajectory, tracker=None)
|
|
527
737
|
|
|
@@ -530,6 +740,26 @@ class ReactPlanner:
|
|
|
530
740
|
token: str,
|
|
531
741
|
user_input: str | None = None,
|
|
532
742
|
) -> PlannerFinish | PlannerPause:
|
|
743
|
+
"""Resume a paused planning session.
|
|
744
|
+
|
|
745
|
+
Parameters
|
|
746
|
+
----------
|
|
747
|
+
token : str
|
|
748
|
+
Resume token from a previous PlannerPause.
|
|
749
|
+
user_input : str | None
|
|
750
|
+
Optional user response to the pause (e.g., approval decision).
|
|
751
|
+
|
|
752
|
+
Returns
|
|
753
|
+
-------
|
|
754
|
+
PlannerFinish | PlannerPause
|
|
755
|
+
Updated result after resuming execution.
|
|
756
|
+
|
|
757
|
+
Raises
|
|
758
|
+
------
|
|
759
|
+
KeyError
|
|
760
|
+
If resume token is invalid or expired.
|
|
761
|
+
"""
|
|
762
|
+
logger.info("planner_resume", extra={"token": token[:8] + "..."})
|
|
533
763
|
record = await self._load_pause_record(token)
|
|
534
764
|
trajectory = record.trajectory
|
|
535
765
|
trajectory.context_meta = trajectory.context_meta or {}
|
|
@@ -541,6 +771,17 @@ class ReactPlanner:
|
|
|
541
771
|
record.constraints,
|
|
542
772
|
time_source=self._time_source,
|
|
543
773
|
)
|
|
774
|
+
|
|
775
|
+
# Emit resume event
|
|
776
|
+
self._emit_event(
|
|
777
|
+
PlannerEvent(
|
|
778
|
+
event_type="resume",
|
|
779
|
+
ts=self._time_source(),
|
|
780
|
+
trajectory_step=len(trajectory.steps),
|
|
781
|
+
extra={"user_input": user_input} if user_input else {},
|
|
782
|
+
)
|
|
783
|
+
)
|
|
784
|
+
|
|
544
785
|
return await self._run_loop(trajectory, tracker=tracker)
|
|
545
786
|
|
|
546
787
|
async def _run_loop(
|
|
@@ -562,6 +803,10 @@ class ReactPlanner:
|
|
|
562
803
|
while len(trajectory.steps) < self._max_iters:
|
|
563
804
|
deadline_message = tracker.check_deadline()
|
|
564
805
|
if deadline_message is not None:
|
|
806
|
+
logger.warning(
|
|
807
|
+
"deadline_exhausted",
|
|
808
|
+
extra={"step": len(trajectory.steps)},
|
|
809
|
+
)
|
|
565
810
|
return self._finish(
|
|
566
811
|
trajectory,
|
|
567
812
|
reason="budget_exhausted",
|
|
@@ -570,8 +815,40 @@ class ReactPlanner:
|
|
|
570
815
|
constraints=tracker,
|
|
571
816
|
)
|
|
572
817
|
|
|
818
|
+
# Emit step start event
|
|
819
|
+
step_start_ts = self._time_source()
|
|
820
|
+
self._emit_event(
|
|
821
|
+
PlannerEvent(
|
|
822
|
+
event_type="step_start",
|
|
823
|
+
ts=step_start_ts,
|
|
824
|
+
trajectory_step=len(trajectory.steps),
|
|
825
|
+
)
|
|
826
|
+
)
|
|
827
|
+
|
|
573
828
|
action = await self.step(trajectory)
|
|
574
829
|
|
|
830
|
+
# Log the action received from LLM
|
|
831
|
+
logger.info(
|
|
832
|
+
"planner_action",
|
|
833
|
+
extra={
|
|
834
|
+
"step": len(trajectory.steps),
|
|
835
|
+
"thought": action.thought,
|
|
836
|
+
"next_node": action.next_node,
|
|
837
|
+
"has_plan": action.plan is not None,
|
|
838
|
+
},
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
# Check constraints BEFORE executing parallel plan or any action
|
|
842
|
+
constraint_error = self._check_action_constraints(
|
|
843
|
+
action, trajectory, tracker
|
|
844
|
+
)
|
|
845
|
+
if constraint_error is not None:
|
|
846
|
+
trajectory.steps.append(
|
|
847
|
+
TrajectoryStep(action=action, error=constraint_error)
|
|
848
|
+
)
|
|
849
|
+
trajectory.summary = None
|
|
850
|
+
continue
|
|
851
|
+
|
|
575
852
|
if action.plan:
|
|
576
853
|
parallel_observation, pause = await self._execute_parallel_plan(
|
|
577
854
|
action, trajectory, tracker
|
|
@@ -593,16 +870,6 @@ class ReactPlanner:
|
|
|
593
870
|
constraints=tracker,
|
|
594
871
|
)
|
|
595
872
|
|
|
596
|
-
constraint_error = self._check_action_constraints(
|
|
597
|
-
action, trajectory, tracker
|
|
598
|
-
)
|
|
599
|
-
if constraint_error is not None:
|
|
600
|
-
trajectory.steps.append(
|
|
601
|
-
TrajectoryStep(action=action, error=constraint_error)
|
|
602
|
-
)
|
|
603
|
-
trajectory.summary = None
|
|
604
|
-
continue
|
|
605
|
-
|
|
606
873
|
spec = self._spec_by_name.get(action.next_node)
|
|
607
874
|
if spec is None:
|
|
608
875
|
error = prompts.render_invalid_node(
|
|
@@ -682,6 +949,19 @@ class ReactPlanner:
|
|
|
682
949
|
self._record_hint_progress(spec.name, trajectory)
|
|
683
950
|
trajectory.resume_user_input = None
|
|
684
951
|
|
|
952
|
+
# Emit step complete event
|
|
953
|
+
step_latency = (self._time_source() - step_start_ts) * 1000 # ms
|
|
954
|
+
self._emit_event(
|
|
955
|
+
PlannerEvent(
|
|
956
|
+
event_type="step_complete",
|
|
957
|
+
ts=self._time_source(),
|
|
958
|
+
trajectory_step=len(trajectory.steps) - 1,
|
|
959
|
+
thought=action.thought,
|
|
960
|
+
node_name=spec.name,
|
|
961
|
+
latency_ms=step_latency,
|
|
962
|
+
)
|
|
963
|
+
)
|
|
964
|
+
|
|
685
965
|
if tracker.deadline_triggered or tracker.hop_exhausted:
|
|
686
966
|
thought = (
|
|
687
967
|
prompts.render_deadline_exhausted()
|
|
@@ -720,9 +1000,15 @@ class ReactPlanner:
|
|
|
720
1000
|
}
|
|
721
1001
|
]
|
|
722
1002
|
|
|
1003
|
+
response_format: Mapping[str, Any] | None = self._response_format
|
|
1004
|
+
if response_format is None and getattr(
|
|
1005
|
+
self._client, "expects_json_schema", False
|
|
1006
|
+
):
|
|
1007
|
+
response_format = self._action_schema
|
|
1008
|
+
|
|
723
1009
|
raw = await self._client.complete(
|
|
724
1010
|
messages=messages,
|
|
725
|
-
response_format=
|
|
1011
|
+
response_format=response_format,
|
|
726
1012
|
)
|
|
727
1013
|
|
|
728
1014
|
try:
|
|
@@ -1095,7 +1381,33 @@ class ReactPlanner:
|
|
|
1095
1381
|
return condensed
|
|
1096
1382
|
|
|
1097
1383
|
def _estimate_size(self, messages: Sequence[Mapping[str, str]]) -> int:
|
|
1098
|
-
|
|
1384
|
+
"""Estimate token count for messages.
|
|
1385
|
+
|
|
1386
|
+
Uses a heuristic formula that accounts for JSON structure and
|
|
1387
|
+
typical token-to-character ratios for English text with JSON.
|
|
1388
|
+
|
|
1389
|
+
Returns approximately 4 characters = 1 token for GPT models.
|
|
1390
|
+
This is conservative to avoid context overflow.
|
|
1391
|
+
"""
|
|
1392
|
+
total_chars = 0
|
|
1393
|
+
for item in messages:
|
|
1394
|
+
content = item.get("content", "")
|
|
1395
|
+
role = item.get("role", "")
|
|
1396
|
+
# Count content characters
|
|
1397
|
+
total_chars += len(content)
|
|
1398
|
+
# Add overhead for message structure (role, JSON wrapping, etc.)
|
|
1399
|
+
total_chars += len(role) + 20 # Approx overhead per message
|
|
1400
|
+
|
|
1401
|
+
# Conservative estimate: 3.5 chars = 1 token (slightly aggressive)
|
|
1402
|
+
# This ensures we trigger summarization before hitting actual limits
|
|
1403
|
+
estimated_tokens = int(total_chars / 3.5)
|
|
1404
|
+
|
|
1405
|
+
logger.debug(
|
|
1406
|
+
"token_estimate",
|
|
1407
|
+
extra={"chars": total_chars, "estimated_tokens": estimated_tokens},
|
|
1408
|
+
)
|
|
1409
|
+
|
|
1410
|
+
return estimated_tokens
|
|
1099
1411
|
|
|
1100
1412
|
async def _summarise_trajectory(
|
|
1101
1413
|
self, trajectory: Trajectory
|
|
@@ -1130,10 +1442,18 @@ class ReactPlanner:
|
|
|
1130
1442
|
summary = TrajectorySummary.model_validate_json(raw)
|
|
1131
1443
|
summary.note = summary.note or "llm"
|
|
1132
1444
|
trajectory.summary = summary
|
|
1445
|
+
logger.debug("trajectory_summarized", extra={"method": "llm"})
|
|
1133
1446
|
return summary
|
|
1134
|
-
except Exception:
|
|
1447
|
+
except Exception as exc:
|
|
1448
|
+
# Catch all exceptions to prevent summarizer failures from crashing
|
|
1449
|
+
# the planner. Summarization is non-critical; always fall back.
|
|
1450
|
+
logger.warning(
|
|
1451
|
+
"summarizer_failed_fallback",
|
|
1452
|
+
extra={"error": str(exc), "error_type": exc.__class__.__name__},
|
|
1453
|
+
)
|
|
1135
1454
|
base_summary.note = "rule_based_fallback"
|
|
1136
1455
|
trajectory.summary = base_summary
|
|
1456
|
+
logger.debug("trajectory_summarized", extra={"method": "rule_based"})
|
|
1137
1457
|
return base_summary
|
|
1138
1458
|
|
|
1139
1459
|
def _check_action_constraints(
|
|
@@ -1149,8 +1469,21 @@ class ReactPlanner:
|
|
|
1149
1469
|
return prompts.render_hop_budget_violation(limit)
|
|
1150
1470
|
if node_name and node_name in hints.disallow_nodes:
|
|
1151
1471
|
return prompts.render_disallowed_node(node_name)
|
|
1152
|
-
|
|
1153
|
-
|
|
1472
|
+
|
|
1473
|
+
# Check parallel execution limits
|
|
1474
|
+
if action.plan:
|
|
1475
|
+
# Absolute system-level safety limit
|
|
1476
|
+
if len(action.plan) > self._absolute_max_parallel:
|
|
1477
|
+
logger.warning(
|
|
1478
|
+
"parallel_limit_absolute",
|
|
1479
|
+
extra={
|
|
1480
|
+
"requested": len(action.plan),
|
|
1481
|
+
"limit": self._absolute_max_parallel,
|
|
1482
|
+
},
|
|
1483
|
+
)
|
|
1484
|
+
return prompts.render_parallel_limit(self._absolute_max_parallel)
|
|
1485
|
+
# Hint-based limit
|
|
1486
|
+
if hints.max_parallel is not None and len(action.plan) > hints.max_parallel:
|
|
1154
1487
|
return prompts.render_parallel_limit(hints.max_parallel)
|
|
1155
1488
|
if hints.sequential_only and action.plan:
|
|
1156
1489
|
for item in action.plan:
|
|
@@ -1263,34 +1596,70 @@ class ReactPlanner:
|
|
|
1263
1596
|
return
|
|
1264
1597
|
saver = getattr(self._state_store, "save_planner_state", None)
|
|
1265
1598
|
if saver is None:
|
|
1599
|
+
logger.debug(
|
|
1600
|
+
"state_store_no_save_method",
|
|
1601
|
+
extra={"token": token[:8] + "..."},
|
|
1602
|
+
)
|
|
1266
1603
|
return
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1604
|
+
|
|
1605
|
+
try:
|
|
1606
|
+
payload = self._serialise_pause_record(record)
|
|
1607
|
+
result = saver(token, payload)
|
|
1608
|
+
if inspect.isawaitable(result):
|
|
1609
|
+
await result
|
|
1610
|
+
logger.debug("pause_record_saved", extra={"token": token[:8] + "..."})
|
|
1611
|
+
except Exception as exc:
|
|
1612
|
+
# Log error but don't fail the pause operation
|
|
1613
|
+
# In-memory fallback already succeeded
|
|
1614
|
+
logger.error(
|
|
1615
|
+
"state_store_save_failed",
|
|
1616
|
+
extra={
|
|
1617
|
+
"token": token[:8] + "...",
|
|
1618
|
+
"error": str(exc),
|
|
1619
|
+
"error_type": exc.__class__.__name__,
|
|
1620
|
+
},
|
|
1621
|
+
)
|
|
1271
1622
|
|
|
1272
1623
|
async def _load_pause_record(self, token: str) -> _PauseRecord:
|
|
1273
1624
|
record = self._pause_records.pop(token, None)
|
|
1274
1625
|
if record is not None:
|
|
1626
|
+
logger.debug("pause_record_loaded", extra={"source": "memory"})
|
|
1275
1627
|
return record
|
|
1628
|
+
|
|
1276
1629
|
if self._state_store is not None:
|
|
1277
1630
|
loader = getattr(self._state_store, "load_planner_state", None)
|
|
1278
1631
|
if loader is not None:
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1632
|
+
try:
|
|
1633
|
+
result = loader(token)
|
|
1634
|
+
if inspect.isawaitable(result):
|
|
1635
|
+
result = await result
|
|
1636
|
+
if result is None:
|
|
1637
|
+
raise KeyError(token)
|
|
1638
|
+
trajectory = Trajectory.from_serialised(result["trajectory"])
|
|
1639
|
+
payload = dict(result.get("payload", {}))
|
|
1640
|
+
reason = result.get("reason", "await_input")
|
|
1641
|
+
constraints = result.get("constraints")
|
|
1642
|
+
logger.debug("pause_record_loaded", extra={"source": "state_store"})
|
|
1643
|
+
return _PauseRecord(
|
|
1644
|
+
trajectory=trajectory,
|
|
1645
|
+
reason=reason,
|
|
1646
|
+
payload=payload,
|
|
1647
|
+
constraints=constraints,
|
|
1648
|
+
)
|
|
1649
|
+
except KeyError:
|
|
1650
|
+
raise
|
|
1651
|
+
except Exception as exc:
|
|
1652
|
+
# Log error and re-raise as KeyError with context
|
|
1653
|
+
logger.error(
|
|
1654
|
+
"state_store_load_failed",
|
|
1655
|
+
extra={
|
|
1656
|
+
"token": token[:8] + "...",
|
|
1657
|
+
"error": str(exc),
|
|
1658
|
+
"error_type": exc.__class__.__name__,
|
|
1659
|
+
},
|
|
1660
|
+
)
|
|
1661
|
+
raise KeyError(f"Failed to load pause record: {exc}") from exc
|
|
1662
|
+
|
|
1294
1663
|
raise KeyError(token)
|
|
1295
1664
|
|
|
1296
1665
|
def _serialise_pause_record(self, record: _PauseRecord) -> dict[str, Any]:
|
|
@@ -1303,6 +1672,24 @@ class ReactPlanner:
|
|
|
1303
1672
|
else None,
|
|
1304
1673
|
}
|
|
1305
1674
|
|
|
1675
|
+
def _emit_event(self, event: PlannerEvent) -> None:
|
|
1676
|
+
"""Emit a planner event for observability."""
|
|
1677
|
+
# Log the event
|
|
1678
|
+
logger.info(event.event_type, extra=event.to_payload())
|
|
1679
|
+
|
|
1680
|
+
# Invoke callback if provided
|
|
1681
|
+
if self._event_callback is not None:
|
|
1682
|
+
try:
|
|
1683
|
+
self._event_callback(event)
|
|
1684
|
+
except Exception:
|
|
1685
|
+
logger.exception(
|
|
1686
|
+
"event_callback_error",
|
|
1687
|
+
extra={
|
|
1688
|
+
"event_type": event.event_type,
|
|
1689
|
+
"step": event.trajectory_step,
|
|
1690
|
+
},
|
|
1691
|
+
)
|
|
1692
|
+
|
|
1306
1693
|
def _finish(
|
|
1307
1694
|
self,
|
|
1308
1695
|
trajectory: Trajectory,
|
|
@@ -1323,6 +1710,30 @@ class ReactPlanner:
|
|
|
1323
1710
|
metadata["constraints"] = constraints.snapshot()
|
|
1324
1711
|
if error is not None:
|
|
1325
1712
|
metadata["error"] = error
|
|
1713
|
+
|
|
1714
|
+
# Emit finish event
|
|
1715
|
+
extra_data: dict[str, Any] = {"reason": reason}
|
|
1716
|
+
if error:
|
|
1717
|
+
extra_data["error"] = error
|
|
1718
|
+
self._emit_event(
|
|
1719
|
+
PlannerEvent(
|
|
1720
|
+
event_type="finish",
|
|
1721
|
+
ts=self._time_source(),
|
|
1722
|
+
trajectory_step=len(trajectory.steps),
|
|
1723
|
+
thought=thought,
|
|
1724
|
+
extra=extra_data,
|
|
1725
|
+
)
|
|
1726
|
+
)
|
|
1727
|
+
|
|
1728
|
+
logger.info(
|
|
1729
|
+
"planner_finish",
|
|
1730
|
+
extra={
|
|
1731
|
+
"reason": reason,
|
|
1732
|
+
"step_count": len(trajectory.steps),
|
|
1733
|
+
"thought": thought,
|
|
1734
|
+
},
|
|
1735
|
+
)
|
|
1736
|
+
|
|
1326
1737
|
return PlannerFinish(reason=reason, payload=payload, metadata=metadata)
|
|
1327
1738
|
|
|
1328
1739
|
|
|
@@ -1330,6 +1741,8 @@ __all__ = [
|
|
|
1330
1741
|
"ParallelCall",
|
|
1331
1742
|
"ParallelJoin",
|
|
1332
1743
|
"PlannerAction",
|
|
1744
|
+
"PlannerEvent",
|
|
1745
|
+
"PlannerEventCallback",
|
|
1333
1746
|
"PlannerFinish",
|
|
1334
1747
|
"PlannerPause",
|
|
1335
1748
|
"ReactPlanner",
|