by-framework 0.2.2.dev0__py3-none-any.whl → 0.2.2.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. by_framework/__init__.py +2 -0
  2. by_framework/client/__init__.py +2 -1
  3. by_framework/client/client.py +406 -5
  4. by_framework/common/constants.py +30 -0
  5. by_framework/common/logger.py +71 -10
  6. by_framework/metrics/__init__.py +218 -0
  7. by_framework/metrics/read_client.py +257 -0
  8. by_framework/metrics/snapshot.py +2294 -0
  9. by_framework/trace/__init__.py +55 -0
  10. by_framework/trace/external_trace.py +148 -0
  11. by_framework/trace/span_recorder.py +901 -0
  12. by_framework/trace/trace_schema.py +329 -0
  13. by_framework/trace/trace_writer.py +157 -0
  14. by_framework/worker/_control_handling.py +30 -0
  15. by_framework/worker/context.py +433 -17
  16. by_framework/worker/runner.py +258 -10
  17. by_framework/worker/worker.py +135 -11
  18. {by_framework-0.2.2.dev0.dist-info → by_framework-0.2.2.dev2.dist-info}/METADATA +34 -2
  19. by_framework-0.2.2.dev2.dist-info/RECORD +49 -0
  20. {by_framework-0.2.2.dev0.dist-info → by_framework-0.2.2.dev2.dist-info}/WHEEL +1 -1
  21. by_framework/core/__init__.py +0 -95
  22. by_framework/core/availability.py +0 -495
  23. by_framework/core/delivery_gate.py +0 -60
  24. by_framework/core/discovery.py +0 -359
  25. by_framework/core/extensions/__init__.py +0 -35
  26. by_framework/core/extensions/agent_config.py +0 -64
  27. by_framework/core/extensions/plugin.py +0 -282
  28. by_framework/core/extensions/registry.py +0 -653
  29. by_framework/core/extensions/trace_provider.py +0 -20
  30. by_framework/core/protocol/__init__.py +0 -133
  31. by_framework/core/protocol/action_type.py +0 -33
  32. by_framework/core/protocol/agent_state.py +0 -78
  33. by_framework/core/protocol/byai_codec.py +0 -101
  34. by_framework/core/protocol/byai_command.py +0 -53
  35. by_framework/core/protocol/byai_types.py +0 -7
  36. by_framework/core/protocol/commands.py +0 -285
  37. by_framework/core/protocol/content_codec.py +0 -17
  38. by_framework/core/protocol/content_type.py +0 -38
  39. by_framework/core/protocol/data_message.py +0 -45
  40. by_framework/core/protocol/data_shapes.py +0 -83
  41. by_framework/core/protocol/event_type.py +0 -34
  42. by_framework/core/protocol/events.py +0 -69
  43. by_framework/core/protocol/message.py +0 -99
  44. by_framework/core/protocol/message_header.py +0 -68
  45. by_framework/core/protocol/responses.py +0 -94
  46. by_framework/core/protocol/results.py +0 -149
  47. by_framework/core/registry.py +0 -1025
  48. by_framework/core/runtime/__init__.py +0 -29
  49. by_framework/core/runtime/agent_config_manager.py +0 -283
  50. by_framework/core/runtime/agent_runtime_state.py +0 -75
  51. by_framework/core/runtime/file_manager.py +0 -437
  52. by_framework/core/runtime/file_paths.py +0 -76
  53. by_framework/core/runtime/file_permissions.py +0 -71
  54. by_framework/core/runtime/filestore/__init__.py +0 -15
  55. by_framework/core/runtime/filestore/base.py +0 -140
  56. by_framework/core/runtime/filestore/local.py +0 -321
  57. by_framework/core/runtime/history/__init__.py +0 -10
  58. by_framework/core/runtime/history/base.py +0 -57
  59. by_framework/core/runtime/history/history_manager.py +0 -55
  60. by_framework/core/runtime/history/in_memory.py +0 -58
  61. by_framework/core/runtime/session_manager.py +0 -118
  62. by_framework/core/wakeup_controller.py +0 -151
  63. by_framework/core/workspace.py +0 -126
  64. by_framework-0.2.2.dev0.dist-info/RECORD +0 -84
  65. {by_framework-0.2.2.dev0.dist-info → by_framework-0.2.2.dev2.dist-info}/licenses/LICENSE +0 -0
by_framework/__init__.py CHANGED
@@ -8,6 +8,7 @@ from `GatewayWorker` and running `run_worker`.
8
8
  from .client.byai_client import ByaiGatewayClient
9
9
  from .client.client import (
10
10
  CancelTaskResponse,
11
+ DataStreamEntry,
11
12
  GatewayClient,
12
13
  GatewayInterceptor,
13
14
  SendMessageResponse,
@@ -140,6 +141,7 @@ __all__ = [
140
141
  "GatewayClient",
141
142
  "ByaiGatewayClient",
142
143
  "GatewayInterceptor",
144
+ "DataStreamEntry",
143
145
  "SendMessageResponse",
144
146
  "CancelTaskResponse",
145
147
  "run_worker",
@@ -1,10 +1,11 @@
1
1
  """Client module for Gateway communication."""
2
2
 
3
3
  from .byai_client import ByaiGatewayClient
4
- from .client import GatewayClient, GatewayInterceptor
4
+ from .client import DataStreamEntry, GatewayClient, GatewayInterceptor
5
5
 
6
6
  __all__ = [
7
7
  "GatewayClient",
8
8
  "ByaiGatewayClient",
9
9
  "GatewayInterceptor",
10
+ "DataStreamEntry",
10
11
  ]
@@ -9,7 +9,8 @@ import json
9
9
  import time
10
10
  import uuid
11
11
  from dataclasses import dataclass
12
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Protocol
12
+ from dataclasses import fields as dataclass_fields
13
+ from typing import (TYPE_CHECKING, Any, AsyncIterator, Dict, List, Optional, Protocol)
13
14
 
14
15
  from by_framework.common.constants import (
15
16
  CANCEL_MESSAGE_ID_PREFIX,
@@ -17,6 +18,7 @@ from by_framework.common.constants import (
17
18
  MESSAGE_ID_PREFIX,
18
19
  RedisKeys,
19
20
  )
21
+ from by_framework.common.logger import logger
20
22
  from by_framework.common.redis_client import Redis, get_redis
21
23
  from by_framework.core.availability import (
22
24
  AvailabilityRouter,
@@ -32,6 +34,7 @@ from by_framework.core.protocol.commands import (
32
34
  ReloadPluginsCommand,
33
35
  ResumeCommand,
34
36
  )
37
+ from by_framework.core.protocol.data_message import DataMessage
35
38
  from by_framework.core.protocol.message_header import MessageHeader
36
39
  from by_framework.core.protocol.responses import (
37
40
  CancelTaskResponse,
@@ -40,6 +43,7 @@ from by_framework.core.protocol.responses import (
40
43
  )
41
44
  from by_framework.core.registry import WorkerRegistry
42
45
  from by_framework.errors import WorkerRegistryNotSetError
46
+ from by_framework.trace.span_recorder import (SpanRecorder, TraceSpan, str_to_uint64)
43
47
 
44
48
  if TYPE_CHECKING:
45
49
  pass
@@ -59,6 +63,14 @@ class RouteResolution:
59
63
  target_worker_id: str = ""
60
64
 
61
65
 
66
+ @dataclass(frozen=True)
67
+ class DataStreamEntry:
68
+ """A decoded entry from a session data stream."""
69
+
70
+ stream_id: str
71
+ message: DataMessage
72
+
73
+
62
74
  class GatewayClient:
63
75
  """Gateway client for sending messages and cancel requests to Gateway workers.
64
76
 
@@ -76,16 +88,183 @@ class GatewayClient:
76
88
  registry: Optional[WorkerRegistry] = None,
77
89
  redis_client: Optional[Redis] = None,
78
90
  interceptors: Optional[List[GatewayInterceptor]] = None,
91
+ span_recorder: Optional[SpanRecorder] = None,
79
92
  ):
80
93
  self.registry = registry
81
94
  self.redis = (
82
95
  redis_client or (registry.redis if registry else None) or get_redis()
83
96
  )
84
97
  self.interceptors = interceptors or []
98
+ self.span_recorder = span_recorder or SpanRecorder(self.redis)
85
99
 
86
100
  def add_interceptor(self, interceptor: GatewayInterceptor):
87
101
  self.interceptors.append(interceptor)
88
102
 
103
+ @staticmethod
104
+ def _decode_redis_value(value: Any) -> Any:
105
+ """Decode Redis bytes values while preserving already-decoded clients."""
106
+ if isinstance(value, bytes):
107
+ return value.decode("utf-8")
108
+ return value
109
+
110
+ @classmethod
111
+ def _decode_data_stream_entry(
112
+ cls, stream_id: Any, fields: Dict[Any, Any]
113
+ ) -> DataStreamEntry:
114
+ raw = fields.get(b"data")
115
+ if raw is None:
116
+ raw = fields.get("data")
117
+ if raw is None:
118
+ raise ValueError("data stream entry missing 'data' field")
119
+
120
+ payload = json.loads(cls._decode_redis_value(raw))
121
+ data_message_fields = {field.name for field in dataclass_fields(DataMessage)}
122
+ return DataStreamEntry(
123
+ stream_id=cls._decode_redis_value(stream_id),
124
+ message=DataMessage(
125
+ **{
126
+ key: value
127
+ for key, value in payload.items()
128
+ if key in data_message_fields
129
+ }
130
+ ),
131
+ )
132
+
133
+ async def read_data_messages(
134
+ self,
135
+ session_id: str,
136
+ last_id: str = "0-0",
137
+ block_ms: int = 0,
138
+ count: int = 100,
139
+ ) -> List[DataStreamEntry]:
140
+ """Read decoded messages from the session data stream.
141
+
142
+ Pass the last returned ``stream_id`` as ``last_id`` to continue from
143
+ the next entry. ``block_ms`` is passed to Redis XREAD; ``0`` means
144
+ block indefinitely on standard Redis clients.
145
+ """
146
+ stream_name = RedisKeys.session_data_stream(session_id)
147
+ messages = await self.redis.xread(
148
+ streams={stream_name: last_id},
149
+ count=count,
150
+ block=block_ms,
151
+ )
152
+
153
+ results: List[DataStreamEntry] = []
154
+ for _, msg_list in messages or []:
155
+ for stream_id, fields in msg_list:
156
+ results.append(self._decode_data_stream_entry(stream_id, fields))
157
+ return results
158
+
159
+ async def get_data_message_checkpoint(
160
+ self,
161
+ session_id: str,
162
+ consumer_name: str,
163
+ ) -> str:
164
+ """Return the last committed data stream ID for a named consumer."""
165
+ checkpoint = await self.redis.get(
166
+ RedisKeys.session_data_checkpoint(session_id, consumer_name)
167
+ )
168
+ if checkpoint is None:
169
+ return "0-0"
170
+ return self._decode_redis_value(checkpoint)
171
+
172
+ async def commit_data_message(
173
+ self,
174
+ session_id: str,
175
+ stream_id: str,
176
+ consumer_name: str,
177
+ ) -> None:
178
+ """Commit a data stream ID as processed for a named consumer."""
179
+ await self.redis.set(
180
+ RedisKeys.session_data_checkpoint(session_id, consumer_name),
181
+ stream_id,
182
+ ex=RedisKeys.DEFAULT_SESSION_TTL,
183
+ )
184
+
185
+ async def read_data_messages_from_checkpoint(
186
+ self,
187
+ session_id: str,
188
+ consumer_name: str,
189
+ block_ms: int = 0,
190
+ count: int = 100,
191
+ auto_commit: bool = False,
192
+ ) -> List[DataStreamEntry]:
193
+ """Read messages starting after a named consumer's committed checkpoint."""
194
+ last_id = await self.get_data_message_checkpoint(session_id, consumer_name)
195
+ entries = await self.read_data_messages(
196
+ session_id=session_id,
197
+ last_id=last_id,
198
+ block_ms=block_ms,
199
+ count=count,
200
+ )
201
+ if auto_commit and entries:
202
+ await self.commit_data_message(
203
+ session_id=session_id,
204
+ stream_id=entries[-1].stream_id,
205
+ consumer_name=consumer_name,
206
+ )
207
+ return entries
208
+
209
+ async def iter_data_messages(
210
+ self,
211
+ session_id: str,
212
+ last_id: str = "$",
213
+ block_ms: int = 5000,
214
+ count: int = 100,
215
+ ) -> AsyncIterator[DataStreamEntry]:
216
+ """Continuously consume decoded messages from the session data stream.
217
+
218
+ The iterator does not stop on its own. Callers should break when their
219
+ business-level terminal event is observed.
220
+ """
221
+ current_id = last_id
222
+ while True:
223
+ entries = await self.read_data_messages(
224
+ session_id=session_id,
225
+ last_id=current_id,
226
+ block_ms=block_ms,
227
+ count=count,
228
+ )
229
+ for entry in entries:
230
+ current_id = entry.stream_id
231
+ yield entry
232
+
233
+ async def consume_data_messages(
234
+ self,
235
+ session_id: str,
236
+ consumer_name: str,
237
+ block_ms: int = 5000,
238
+ count: int = 100,
239
+ ) -> AsyncIterator[DataStreamEntry]:
240
+ """Continuously consume data stream messages with checkpoint commits.
241
+
242
+ Each entry is committed after the caller's loop body completes and asks
243
+ for the next item. If processing fails or the iterator is closed before
244
+ the next item, the current entry is not committed and will be retried
245
+ from the checkpoint on the next consumer run. The iterator does not stop
246
+ on its own; callers should break on their terminal event.
247
+ """
248
+ current_id = await self.get_data_message_checkpoint(
249
+ session_id=session_id,
250
+ consumer_name=consumer_name,
251
+ )
252
+ while True:
253
+ entries = await self.read_data_messages(
254
+ session_id=session_id,
255
+ last_id=current_id,
256
+ block_ms=block_ms,
257
+ count=count,
258
+ )
259
+ for entry in entries:
260
+ yield entry
261
+ await self.commit_data_message(
262
+ session_id=session_id,
263
+ stream_id=entry.stream_id,
264
+ consumer_name=consumer_name,
265
+ )
266
+ current_id = entry.stream_id
267
+
89
268
  async def reload_plugins_for_agent_type(
90
269
  self,
91
270
  agent_type: str,
@@ -341,11 +520,16 @@ class GatewayClient:
341
520
  )
342
521
 
343
522
  if node_worker_id:
523
+ node_trace_id = (
524
+ node.get("trace_id")
525
+ or execution.get("trace_id")
526
+ or uuid.uuid4().hex
527
+ )
344
528
  cancel_command = CancelTaskCommand(
345
529
  header=MessageHeader(
346
530
  message_id=f"{CANCEL_MESSAGE_ID_PREFIX}{uuid.uuid4().hex[:8]}",
347
531
  session_id=session_id,
348
- trace_id=uuid.uuid4().hex,
532
+ trace_id=node_trace_id,
349
533
  target_agent_type=node.get("target_agent_type", ""),
350
534
  parent_message_id=node_message_id,
351
535
  ),
@@ -424,6 +608,32 @@ class GatewayClient:
424
608
  if not trace_id:
425
609
  trace_id = uuid.uuid4().hex
426
610
 
611
+ metadata = dict(params.get("metadata", {}) or {})
612
+ trace_parent_span_id = metadata.pop("trace_parent_span_id", "")
613
+ langfuse_parent_observation_id = metadata.pop(
614
+ "langfuse_parent_observation_id", ""
615
+ )
616
+ if not trace_parent_span_id:
617
+ trace_parent_span_id = (
618
+ f"{str_to_uint64(f'{message_id}:client.dispatch'):016x}"
619
+ )
620
+
621
+ langfuse_client_dispatch = None
622
+ if not params["parent_message_id"]:
623
+ langfuse_client_dispatch = self._start_langfuse_client_dispatch_observation(
624
+ trace_id=trace_id,
625
+ message_id=message_id,
626
+ target_agent_type=params["target_agent_type"],
627
+ session_id=params["session_id"],
628
+ user_code=params["user_code"],
629
+ user_name=params["user_name"],
630
+ content=params["content"],
631
+ metadata=metadata,
632
+ )
633
+ observation_id = getattr(langfuse_client_dispatch, "id", "")
634
+ if observation_id:
635
+ langfuse_parent_observation_id = observation_id
636
+
427
637
  header = MessageHeader(
428
638
  message_id=message_id,
429
639
  session_id=params["session_id"],
@@ -432,7 +642,9 @@ class GatewayClient:
432
642
  parent_message_id=params["parent_message_id"],
433
643
  user_code=params["user_code"],
434
644
  user_name=params["user_name"],
435
- metadata=params["metadata"],
645
+ metadata=metadata,
646
+ trace_parent_span_id=trace_parent_span_id,
647
+ langfuse_parent_observation_id=langfuse_parent_observation_id,
436
648
  )
437
649
  command = self._build_gateway_command(
438
650
  action_type=params["action_type"],
@@ -451,6 +663,7 @@ class GatewayClient:
451
663
  route_policy != RoutePolicy.SEND_ANYWAY,
452
664
  )
453
665
  else:
666
+ avail_start_ms = int(time.time() * 1000)
454
667
  availability = await AvailabilityRouter(
455
668
  self.redis, self.registry
456
669
  ).prepare_delivery(
@@ -470,13 +683,42 @@ class GatewayClient:
470
683
  metadata=params["metadata"],
471
684
  )
472
685
  )
686
+ try:
687
+ from by_framework.metrics import record_availability_metrics
688
+
689
+ record_availability_metrics(
690
+ agent_type=params["target_agent_type"],
691
+ policy=route_policy,
692
+ status=availability.status,
693
+ routing_ms=float(int(time.time() * 1000) - avail_start_ms),
694
+ )
695
+ except Exception: # pylint: disable=broad-exception-caught
696
+ pass
473
697
  if availability.status not in (
474
698
  AvailabilityStatus.DELIVER_NOW,
475
699
  AvailabilityStatus.WAIT_AND_DELIVER,
476
700
  AvailabilityStatus.FALLBACK_TO_OTHER_AGENT_TYPE,
477
701
  AvailabilityStatus.QUEUE_PENDING,
478
702
  ):
479
- return SendMessageResponse(
703
+ if self.registry and hasattr(
704
+ self.registry, "record_failed_route_decision"
705
+ ):
706
+ await self.registry.record_failed_route_decision(
707
+ execution_id=execution_id,
708
+ message_id=message_id,
709
+ session_id=params["session_id"],
710
+ trace_id=trace_id,
711
+ target_agent_type=params["target_agent_type"],
712
+ parent_message_id=params["parent_message_id"] or "",
713
+ source_agent_type="client",
714
+ route_policy=route_policy,
715
+ route_status=availability.status,
716
+ stream_name=availability.stream_name or "",
717
+ selected_agent_type=availability.selected_agent_type or "",
718
+ availability_error_code=availability.error_code or "",
719
+ availability_error=availability.error or "",
720
+ )
721
+ response = SendMessageResponse(
480
722
  success=False,
481
723
  status=ExecutionStatus.FAILED,
482
724
  message_id="",
@@ -487,6 +729,12 @@ class GatewayClient:
487
729
  error_code=availability.error_code
488
730
  or ExecutionStatus.ERR_AGENT_TYPE_UNAVAILABLE,
489
731
  )
732
+ self._end_langfuse_client_dispatch_observation(
733
+ langfuse_client_dispatch,
734
+ output={"success": False, "error": availability.error},
735
+ error=availability.error,
736
+ )
737
+ return response
490
738
  if availability.status == AvailabilityStatus.QUEUE_PENDING:
491
739
  should_dispatch_control = False
492
740
  route = RouteResolution(
@@ -497,6 +745,11 @@ class GatewayClient:
497
745
  params["target_agent_type"] = availability.selected_agent_type
498
746
  command.header.target_agent_type = availability.selected_agent_type
499
747
  except LookupError as err:
748
+ self._end_langfuse_client_dispatch_observation(
749
+ langfuse_client_dispatch,
750
+ output={"success": False, "error": str(err)},
751
+ error=str(err),
752
+ )
500
753
  return SendMessageResponse(
501
754
  success=False,
502
755
  status=ExecutionStatus.FAILED,
@@ -508,6 +761,11 @@ class GatewayClient:
508
761
  error_code=ExecutionStatus.ERR_WORKER_NOT_ONLINE,
509
762
  )
510
763
  except ValueError as err:
764
+ self._end_langfuse_client_dispatch_observation(
765
+ langfuse_client_dispatch,
766
+ output={"success": False, "error": str(err)},
767
+ error=str(err),
768
+ )
511
769
  return SendMessageResponse(
512
770
  success=False,
513
771
  status=ExecutionStatus.FAILED,
@@ -533,16 +791,44 @@ class GatewayClient:
533
791
  "target_agent_type": params["target_agent_type"],
534
792
  "stream_name": route.stream_name,
535
793
  "status": "QUEUED",
794
+ "route_policy": route_policy,
795
+ "route_status": availability.status
796
+ if not target_worker_id
797
+ else "DIRECT_WORKER",
798
+ "selected_agent_type": availability.selected_agent_type
799
+ if not target_worker_id
800
+ else "",
801
+ "availability_error_code": availability.error_code
802
+ if not target_worker_id
803
+ else "",
804
+ "availability_error": availability.error
805
+ if not target_worker_id
806
+ else "",
536
807
  }
537
808
  )
538
809
  except Exception: # pylint: disable=broad-exception-caught
539
810
  pass # Fallback if registry fails
540
811
 
541
812
  # 4. Route to the appropriate stream
813
+ dispatch_started_at = int(time.time() * 1000)
542
814
  if should_dispatch_control:
543
815
  await self.redis.xadd(route.stream_name, command.to_redis_payload())
816
+ await self._record_client_dispatch_span(
817
+ trace_id=trace_id,
818
+ message_id=message_id,
819
+ session_id=params["session_id"],
820
+ parent_message_id=params["parent_message_id"] or "",
821
+ target_agent_type=params["target_agent_type"],
822
+ target_worker_id=route.target_worker_id,
823
+ route_policy=route_policy,
824
+ route_status=availability.status
825
+ if not target_worker_id
826
+ else "DIRECT_WORKER",
827
+ start_ts=dispatch_started_at,
828
+ end_ts=int(time.time() * 1000),
829
+ )
544
830
 
545
- return SendMessageResponse(
831
+ response = SendMessageResponse(
546
832
  success=True,
547
833
  message_id=message_id,
548
834
  trace_id=trace_id,
@@ -550,3 +836,118 @@ class GatewayClient:
550
836
  timestamp=int(time.time() * 1000),
551
837
  status=ExecutionStatus.QUEUED,
552
838
  )
839
+ self._end_langfuse_client_dispatch_observation(
840
+ langfuse_client_dispatch,
841
+ output={
842
+ "success": True,
843
+ "message_id": message_id,
844
+ "trace_id": trace_id,
845
+ "target_worker_id": route.target_worker_id,
846
+ "status": response.status,
847
+ },
848
+ )
849
+ return response
850
+
851
+ def _start_langfuse_client_dispatch_observation(
852
+ self,
853
+ *,
854
+ trace_id: str,
855
+ message_id: str,
856
+ target_agent_type: str,
857
+ session_id: str,
858
+ user_code: str,
859
+ user_name: str,
860
+ content: Any,
861
+ metadata: Dict[str, Any],
862
+ ) -> Any:
863
+ try:
864
+ from by_framework_trace_langfuse import start_client_dispatch_observation
865
+
866
+ return start_client_dispatch_observation(
867
+ trace_id=trace_id,
868
+ message_id=message_id,
869
+ target_agent_type=target_agent_type,
870
+ session_id=session_id,
871
+ user_code=user_code,
872
+ user_name=user_name,
873
+ content=content,
874
+ metadata=metadata,
875
+ )
876
+ except Exception as err: # pylint: disable=broad-exception-caught
877
+ logger.warning(
878
+ "Langfuse client.dispatch observation skipped: %s",
879
+ err,
880
+ exc_info=True,
881
+ )
882
+ return None
883
+
884
+ @staticmethod
885
+ def _end_langfuse_client_dispatch_observation(
886
+ observation: Any,
887
+ *,
888
+ output: Any,
889
+ error: str = "",
890
+ ) -> None:
891
+ if observation is None:
892
+ return
893
+ try:
894
+ if error and hasattr(observation, "update"):
895
+ observation.update(level="ERROR", status_message=error)
896
+ observation.end(output=output)
897
+ except TypeError:
898
+ try:
899
+ observation.update(output=output)
900
+ observation.end()
901
+ except Exception: # pylint: disable=broad-exception-caught
902
+ pass
903
+ except Exception: # pylint: disable=broad-exception-caught
904
+ pass
905
+
906
+ async def _record_client_dispatch_span(
907
+ self,
908
+ *,
909
+ trace_id: str,
910
+ message_id: str,
911
+ session_id: str,
912
+ parent_message_id: str,
913
+ target_agent_type: str,
914
+ target_worker_id: str,
915
+ route_policy: str,
916
+ route_status: str,
917
+ start_ts: int,
918
+ end_ts: int,
919
+ ) -> None:
920
+ try:
921
+ logger.info(
922
+ "Recording client dispatch span: message_id=%s, trace_id=%s",
923
+ message_id,
924
+ trace_id,
925
+ )
926
+ await self.span_recorder.record_span(
927
+ TraceSpan(
928
+ trace_id=trace_id,
929
+ span_id=f"{message_id}:client.dispatch",
930
+ parent_span_id="",
931
+ operation="client.dispatch",
932
+ component="client",
933
+ start_ts=start_ts,
934
+ end_ts=end_ts,
935
+ status="COMPLETED",
936
+ session_id=session_id,
937
+ message_id=message_id,
938
+ parent_message_id=parent_message_id,
939
+ worker_id=target_worker_id,
940
+ source_agent_type="client",
941
+ target_agent_type=target_agent_type,
942
+ route_policy=route_policy,
943
+ route_status=route_status,
944
+ )
945
+ )
946
+ logger.info(
947
+ "Client dispatch span recorded successfully for message_id=%s",
948
+ message_id,
949
+ )
950
+ except Exception as err: # pylint: disable=broad-exception-caught
951
+ logger.warning(
952
+ "Failed to record client dispatch span: %s", err, exc_info=True
953
+ )
@@ -92,6 +92,36 @@ class RedisKeys:
92
92
  """Session-level data stream. Workers push streaming content here."""
93
93
  return f"byai_gateway:session:{session_id}:data_stream"
94
94
 
95
+ @staticmethod
96
+ def session_data_checkpoint(session_id: str, consumer_name: str) -> str:
97
+ """Checkpoint key storing a consumer's last processed data stream ID."""
98
+ return f"byai_gateway:session:{session_id}:consumer:{consumer_name}:checkpoint"
99
+
100
+ @staticmethod
101
+ def trace_meta(trace_id: str) -> str:
102
+ """Hash storing trace-level metadata for observability."""
103
+ return f"by_framework:trace:{trace_id}"
104
+
105
+ @staticmethod
106
+ def trace_spans(trace_id: str) -> str:
107
+ """List storing trace span JSON payloads ordered by write time."""
108
+ return f"by_framework:trace:spans:{trace_id}"
109
+
110
+ @staticmethod
111
+ def trace_index_session(session_id: str) -> str:
112
+ """Sorted Set index from session_id to trace IDs."""
113
+ return f"by_framework:trace:idx:session:{session_id}"
114
+
115
+ @staticmethod
116
+ def trace_index_worker(worker_id: str) -> str:
117
+ """Sorted Set index from worker_id to trace IDs."""
118
+ return f"by_framework:trace:idx:worker:{worker_id}"
119
+
120
+ @staticmethod
121
+ def trace_index_agent(agent_type: str) -> str:
122
+ """Sorted Set index from agent type to trace IDs."""
123
+ return f"by_framework:trace:idx:agent:{agent_type}"
124
+
95
125
  @staticmethod
96
126
  def task_group(group_id: str) -> str:
97
127
  """Task group progress tracking Hash Key."""