by-framework 0.2.2.dev1__py3-none-any.whl → 0.2.2.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. by_framework/client/client.py +2 -6
  2. by_framework/{observability/metrics.py → metrics/__init__.py} +24 -1
  3. by_framework/metrics/read_client.py +257 -0
  4. by_framework/{observability → metrics}/snapshot.py +3 -3
  5. by_framework/trace/__init__.py +55 -0
  6. by_framework/{observability → trace}/external_trace.py +1 -1
  7. by_framework/{observability → trace}/span_recorder.py +116 -55
  8. by_framework/trace/trace_schema.py +329 -0
  9. by_framework/trace/trace_writer.py +157 -0
  10. by_framework/worker/_control_handling.py +1 -1
  11. by_framework/worker/context.py +208 -129
  12. by_framework/worker/runner.py +29 -9
  13. by_framework/worker/worker.py +116 -9
  14. {by_framework-0.2.2.dev1.dist-info → by_framework-0.2.2.dev2.dist-info}/METADATA +5 -4
  15. {by_framework-0.2.2.dev1.dist-info → by_framework-0.2.2.dev2.dist-info}/RECORD +17 -24
  16. by_framework/observability/__init__.py +0 -62
  17. by_framework/observability/dashboard.py +0 -1145
  18. by_framework/observability/frontend/index.html +0 -12
  19. by_framework/observability/frontend/package-lock.json +0 -1696
  20. by_framework/observability/frontend/package.json +0 -18
  21. by_framework/observability/frontend/src/main.jsx +0 -1351
  22. by_framework/observability/frontend/src/styles.css +0 -1214
  23. by_framework/observability/frontend/vite.config.js +0 -18
  24. by_framework/observability/static/app.js +0 -115
  25. by_framework/observability/static/index.html +0 -13
  26. by_framework/observability/static/styles.css +0 -1
  27. {by_framework-0.2.2.dev1.dist-info → by_framework-0.2.2.dev2.dist-info}/WHEEL +0 -0
  28. {by_framework-0.2.2.dev1.dist-info → by_framework-0.2.2.dev2.dist-info}/licenses/LICENSE +0 -0
@@ -43,11 +43,7 @@ from by_framework.core.protocol.responses import (
43
43
  )
44
44
  from by_framework.core.registry import WorkerRegistry
45
45
  from by_framework.errors import WorkerRegistryNotSetError
46
- from by_framework.observability.span_recorder import (
47
- SpanRecorder,
48
- TraceSpan,
49
- str_to_uint64,
50
- )
46
+ from by_framework.trace.span_recorder import (SpanRecorder, TraceSpan, str_to_uint64)
51
47
 
52
48
  if TYPE_CHECKING:
53
49
  pass
@@ -688,7 +684,7 @@ class GatewayClient:
688
684
  )
689
685
  )
690
686
  try:
691
- from by_framework.observability.metrics import record_availability_metrics
687
+ from by_framework.metrics import record_availability_metrics
692
688
 
693
689
  record_availability_metrics(
694
690
  agent_type=params["target_agent_type"],
@@ -4,7 +4,12 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Any, Optional
6
6
 
7
- from by_framework.observability.snapshot import _escape_label
7
+ from by_framework.metrics.read_client import (
8
+ MetricsDiagnostic,
9
+ MetricsReadClient,
10
+ MetricsReadResult,
11
+ MetricsWindow,
12
+ )
8
13
 
9
14
  try:
10
15
  from prometheus_client import REGISTRY, Counter, Histogram # type: ignore
@@ -39,6 +44,10 @@ class DummyMetric:
39
44
  del value
40
45
 
41
46
 
47
+ def _escape_label(value: str) -> str:
48
+ return value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
49
+
50
+
42
51
  def get_registry() -> Any:
43
52
  """Get the active Prometheus collector registry."""
44
53
  return REGISTRY
@@ -193,3 +202,17 @@ def build_observability_diagnostics_metrics(diagnostics: dict[str, Any]) -> str:
193
202
  f'{{exporter="{_escape_label(str(exporter))}"}} {int(count)}'
194
203
  )
195
204
  return "\n".join(lines) + "\n"
205
+
206
+
207
+ __all__ = [
208
+ "MetricsDiagnostic",
209
+ "MetricsReadClient",
210
+ "MetricsReadResult",
211
+ "MetricsWindow",
212
+ "PROMETHEUS_AVAILABLE",
213
+ "build_observability_diagnostics_metrics",
214
+ "generate_latest_metrics",
215
+ "get_registry",
216
+ "record_availability_metrics",
217
+ "record_execution_metrics",
218
+ ]
@@ -0,0 +1,257 @@
1
+ """Read SDK for correlating metrics history with trace time windows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import asdict, dataclass, field
7
+ from typing import Any, Optional
8
+
9
+ from by_framework.common.redis_client import Redis, get_redis
10
+ from by_framework.metrics.snapshot import (
11
+ REDIS_HISTORY_KEY,
12
+ build_history_point,
13
+ build_observability_snapshot,
14
+ load_history_from_redis,
15
+ )
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class MetricsDiagnostic:
20
+ """A metrics read diagnostic."""
21
+
22
+ code: str
23
+ message: str
24
+ severity: str = "info"
25
+
26
+ def to_dict(self) -> dict[str, Any]:
27
+ return {
28
+ key: value for key, value in asdict(self).items() if value not in ("", None)
29
+ }
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class MetricsWindow:
34
+ """A time window used to correlate metrics with trace spans."""
35
+
36
+ start_ts: int
37
+ end_ts: int
38
+ buffer_ms: int = 0
39
+
40
+ def expanded(self) -> "MetricsWindow":
41
+ """Return this window with buffer applied on both sides."""
42
+ return MetricsWindow(
43
+ start_ts=max(0, self.start_ts - self.buffer_ms),
44
+ end_ts=max(self.start_ts, self.end_ts + self.buffer_ms),
45
+ buffer_ms=self.buffer_ms,
46
+ )
47
+
48
+ def to_dict(self) -> dict[str, int]:
49
+ return asdict(self)
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class MetricsReadResult:
54
+ """Metrics samples and compact summary for a time window."""
55
+
56
+ window: MetricsWindow
57
+ samples: list[dict[str, int]] = field(default_factory=list)
58
+ summary: dict[str, Any] = field(default_factory=dict)
59
+ diagnostics: list[MetricsDiagnostic] = field(default_factory=list)
60
+ status: str = "ok"
61
+
62
+ def to_dict(self) -> dict[str, Any]:
63
+ return {
64
+ "window": self.window.to_dict(),
65
+ "samples": self.samples,
66
+ "summary": self.summary,
67
+ "diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics],
68
+ "status": self.status,
69
+ }
70
+
71
+
72
+ class MetricsReadClient:
73
+ """Read metrics snapshots and history from by-framework Redis storage."""
74
+
75
+ def __init__(self, redis_client: Optional[Redis] = None) -> None:
76
+ self.redis = redis_client or get_redis()
77
+
78
+ async def get_snapshot(self) -> dict[str, Any]:
79
+ """Return the current observability snapshot."""
80
+ return await build_observability_snapshot(self.redis)
81
+
82
+ async def get_history(
83
+ self,
84
+ *,
85
+ start_ts: int,
86
+ end_ts: int,
87
+ limit: int = 120,
88
+ ) -> list[dict[str, int]]:
89
+ """Return compact metrics history points that overlap a time window."""
90
+ window = MetricsWindow(start_ts=start_ts, end_ts=end_ts).expanded()
91
+ if window.end_ts <= 0:
92
+ return []
93
+ samples = await self._load_history_between(
94
+ window.start_ts,
95
+ window.end_ts,
96
+ limit=max(1, limit),
97
+ )
98
+ return sorted(samples, key=lambda item: int(item.get("generated_at", 0) or 0))
99
+
100
+ async def explain_window(
101
+ self,
102
+ *,
103
+ start_ts: int,
104
+ end_ts: int,
105
+ buffer_ms: int = 5_000,
106
+ limit: int = 120,
107
+ ) -> MetricsReadResult:
108
+ """Summarize metrics conditions around a trace/span time window."""
109
+ window = MetricsWindow(
110
+ start_ts=int(start_ts or 0),
111
+ end_ts=max(int(start_ts or 0), int(end_ts or 0)),
112
+ buffer_ms=max(0, int(buffer_ms or 0)),
113
+ ).expanded()
114
+ diagnostics: list[MetricsDiagnostic] = []
115
+ samples = await self.get_history(
116
+ start_ts=window.start_ts,
117
+ end_ts=window.end_ts,
118
+ limit=limit,
119
+ )
120
+ if not samples:
121
+ diagnostics.append(
122
+ MetricsDiagnostic(
123
+ code="metrics_history_missing",
124
+ message="No metrics history points were found for this window.",
125
+ severity="warning",
126
+ )
127
+ )
128
+ try:
129
+ current_point = build_history_point(await self.get_snapshot())
130
+ if self._point_in_window(current_point, window):
131
+ samples = [current_point]
132
+ diagnostics.append(
133
+ MetricsDiagnostic(
134
+ code="metrics_current_snapshot_used",
135
+ message="Current metrics snapshot was used as fallback.",
136
+ )
137
+ )
138
+ except Exception as err: # pylint: disable=broad-exception-caught
139
+ diagnostics.append(
140
+ MetricsDiagnostic(
141
+ code="metrics_snapshot_failed",
142
+ message=f"Current metrics snapshot fallback failed: {err}",
143
+ severity="warning",
144
+ )
145
+ )
146
+ summary = self._summarize(samples)
147
+ diagnostics.extend(self._diagnose_summary(summary))
148
+ status = "partial" if diagnostics else "ok"
149
+ return MetricsReadResult(
150
+ window=window,
151
+ samples=samples,
152
+ summary=summary,
153
+ diagnostics=diagnostics,
154
+ status=status,
155
+ )
156
+
157
+ async def _load_history_between(
158
+ self,
159
+ start_ts: int,
160
+ end_ts: int,
161
+ *,
162
+ limit: int,
163
+ ) -> list[dict[str, int]]:
164
+ zrangebyscore = getattr(self.redis, "zrangebyscore", None)
165
+ if callable(zrangebyscore):
166
+ try:
167
+ raw_entries = await zrangebyscore(
168
+ REDIS_HISTORY_KEY,
169
+ start_ts,
170
+ end_ts,
171
+ start=0,
172
+ num=limit,
173
+ )
174
+ return [
175
+ point for point in map(self._decode_point, raw_entries) if point
176
+ ]
177
+ except TypeError:
178
+ pass
179
+ points = await load_history_from_redis(self.redis, limit=limit)
180
+ return [
181
+ point
182
+ for point in points
183
+ if start_ts <= int(point.get("generated_at", 0) or 0) <= end_ts
184
+ ]
185
+
186
+ @staticmethod
187
+ def _decode_point(raw: Any) -> dict[str, int]:
188
+ if isinstance(raw, bytes):
189
+ raw = raw.decode("utf-8")
190
+ if isinstance(raw, str):
191
+ try:
192
+ raw = json.loads(raw)
193
+ except json.JSONDecodeError:
194
+ return {}
195
+ return raw if isinstance(raw, dict) else {}
196
+
197
+ @staticmethod
198
+ def _point_in_window(point: dict[str, int], window: MetricsWindow) -> bool:
199
+ generated_at = int(point.get("generated_at", 0) or 0)
200
+ return window.start_ts <= generated_at <= window.end_ts
201
+
202
+ @staticmethod
203
+ def _summarize(samples: list[dict[str, int]]) -> dict[str, Any]:
204
+ if not samples:
205
+ return {"sample_count": 0}
206
+ fields = (
207
+ "workers_online",
208
+ "active_executions",
209
+ "queued_executions",
210
+ "failed_executions",
211
+ "queue_depth_total",
212
+ "consumer_pending_total",
213
+ "alert_count",
214
+ "latency_p95_ms",
215
+ "queue_latency_p95_ms",
216
+ "total_latency_p95_ms",
217
+ )
218
+ summary: dict[str, Any] = {"sample_count": len(samples)}
219
+ for field_name in fields:
220
+ values = [int(sample.get(field_name, 0) or 0) for sample in samples]
221
+ summary[field_name] = {
222
+ "min": min(values),
223
+ "max": max(values),
224
+ "last": values[-1],
225
+ }
226
+ return summary
227
+
228
+ @staticmethod
229
+ def _diagnose_summary(summary: dict[str, Any]) -> list[MetricsDiagnostic]:
230
+ diagnostics: list[MetricsDiagnostic] = []
231
+ if not summary or int(summary.get("sample_count", 0) or 0) <= 0:
232
+ return diagnostics
233
+ checks = (
234
+ ("queue_depth_total", "metrics_queue_backlog", "Queue depth was non-zero."),
235
+ (
236
+ "consumer_pending_total",
237
+ "metrics_consumer_pending",
238
+ "Consumer pending messages were non-zero.",
239
+ ),
240
+ ("alert_count", "metrics_alerts_present", "System alerts were present."),
241
+ (
242
+ "failed_executions",
243
+ "metrics_failures_present",
244
+ "Failed executions were present.",
245
+ ),
246
+ )
247
+ for field_name, code, message in checks:
248
+ max_value = int(summary.get(field_name, {}).get("max", 0) or 0)
249
+ if max_value > 0:
250
+ diagnostics.append(
251
+ MetricsDiagnostic(
252
+ code=code,
253
+ message=message,
254
+ severity="warning",
255
+ )
256
+ )
257
+ return diagnostics
@@ -523,12 +523,12 @@ def build_prometheus_metrics(snapshot: dict[str, Any]) -> str:
523
523
  )
524
524
  lines.append(
525
525
  "by_framework_stream_pending_messages"
526
- f"{{{labels}}} {int(group.get("pending", 0) or 0)}"
526
+ f"{{{labels}}} {int(group.get('pending', 0) or 0)}"
527
527
  )
528
528
  lag = group.get("lag")
529
529
  if lag is not None:
530
530
  lines.append(
531
- "by_framework_stream_consumer_lag" f"{{{labels}}} {int(lag or 0)}"
531
+ f"by_framework_stream_consumer_lag{{{labels}}} {int(lag or 0)}"
532
532
  )
533
533
 
534
534
  lines.extend(
@@ -1424,7 +1424,7 @@ def _build_queue_alerts(
1424
1424
  "code": "QUEUE_BACKLOG",
1425
1425
  "severity": "warning",
1426
1426
  "message": (
1427
- f"{length} messages queued for agent type " f"{agent_type}."
1427
+ f"{length} messages queued for agent type {agent_type}."
1428
1428
  ),
1429
1429
  "value": length,
1430
1430
  "threshold": policy.queue_backlog_threshold,
@@ -0,0 +1,55 @@
1
+ """Trace helpers, context propagation, and write-side observability APIs."""
2
+
3
+ from .external_trace import (
4
+ ExternalTraceContext,
5
+ build_langfuse_trace_context,
6
+ build_otel_parent_context,
7
+ extract_external_trace_context,
8
+ start_langfuse_observation,
9
+ to_langfuse_trace_id,
10
+ )
11
+ from .span_recorder import (
12
+ LiveSpanHandle,
13
+ ObservabilityConfig,
14
+ SpanRecorder,
15
+ TraceSpan,
16
+ build_observability_config,
17
+ get_observability_diagnostics,
18
+ live_execution_otel_span,
19
+ reset_observability_diagnostics,
20
+ )
21
+ from .trace_schema import (
22
+ EventRecord,
23
+ ExecutionRecord,
24
+ SpanNode,
25
+ SpanRecord,
26
+ TraceDiagnostic,
27
+ TraceReadResult,
28
+ TraceRecord,
29
+ )
30
+ from .trace_writer import TraceWriteClient
31
+
32
+ __all__ = [
33
+ "ExternalTraceContext",
34
+ "EventRecord",
35
+ "ExecutionRecord",
36
+ "LiveSpanHandle",
37
+ "ObservabilityConfig",
38
+ "SpanNode",
39
+ "SpanRecord",
40
+ "SpanRecorder",
41
+ "TraceDiagnostic",
42
+ "TraceReadResult",
43
+ "TraceRecord",
44
+ "TraceSpan",
45
+ "TraceWriteClient",
46
+ "build_langfuse_trace_context",
47
+ "build_observability_config",
48
+ "build_otel_parent_context",
49
+ "extract_external_trace_context",
50
+ "get_observability_diagnostics",
51
+ "live_execution_otel_span",
52
+ "reset_observability_diagnostics",
53
+ "start_langfuse_observation",
54
+ "to_langfuse_trace_id",
55
+ ]
@@ -7,7 +7,7 @@ from typing import Any
7
7
 
8
8
  from by_framework.core.protocol.commands import BaseCommand
9
9
  from by_framework.core.protocol.message_header import MessageHeader
10
- from by_framework.observability.span_recorder import str_to_uint128
10
+ from by_framework.trace.span_recorder import str_to_uint128
11
11
 
12
12
 
13
13
  @dataclass(frozen=True)
@@ -6,6 +6,7 @@ import contextvars
6
6
  import hashlib
7
7
  import json
8
8
  import os
9
+ import threading
9
10
  import time
10
11
  from contextlib import asynccontextmanager
11
12
  from dataclasses import asdict, dataclass, field, replace
@@ -77,7 +78,27 @@ try:
77
78
  except ImportError:
78
79
 
79
80
  class ContextIdGenerator: # type: ignore
80
- pass
81
+ """Fallback ID generator used when OpenTelemetry is not installed."""
82
+
83
+ def generate_trace_id(self) -> int:
84
+ """Return a context-provided or random 128-bit trace id."""
85
+ val = current_trace_id_var.get()
86
+ if val is not None:
87
+ return val
88
+ import secrets
89
+
90
+ val_rand = secrets.randbits(128)
91
+ return val_rand if val_rand != 0 else 1
92
+
93
+ def generate_span_id(self) -> int:
94
+ """Return a context-provided or random 64-bit span id."""
95
+ val = current_span_id_var.get()
96
+ if val is not None:
97
+ return val
98
+ import secrets
99
+
100
+ val_rand = secrets.randbits(64)
101
+ return val_rand if val_rand != 0 else 1
81
102
 
82
103
 
83
104
  def configure_otel_id_generator() -> None:
@@ -125,6 +146,9 @@ _OBSERVABILITY_DIAGNOSTICS: dict[str, Any] = {
125
146
  "export_failures_total": 0,
126
147
  "export_failures_by_exporter": {},
127
148
  }
149
+ _OBSERVABILITY_DIAGNOSTICS_LOCK = threading.Lock()
150
+ _LANGFUSE_PROCESSOR_PROVIDER_IDS: set[int] = set()
151
+ _LANGFUSE_PROCESSOR_LOCK = threading.Lock()
128
152
 
129
153
 
130
154
  @dataclass(frozen=True)
@@ -171,38 +195,44 @@ def build_observability_config() -> ObservabilityConfig:
171
195
 
172
196
  def get_observability_diagnostics() -> dict[str, Any]:
173
197
  """Return trace exporter self-diagnostics."""
174
- return {
175
- "dropped_spans_total": int(_OBSERVABILITY_DIAGNOSTICS["dropped_spans_total"]),
176
- "dropped_spans_by_reason": dict(
177
- _OBSERVABILITY_DIAGNOSTICS["dropped_spans_by_reason"]
178
- ),
179
- "export_failures_total": int(
180
- _OBSERVABILITY_DIAGNOSTICS["export_failures_total"]
181
- ),
182
- "export_failures_by_exporter": dict(
183
- _OBSERVABILITY_DIAGNOSTICS["export_failures_by_exporter"]
184
- ),
185
- }
198
+ with _OBSERVABILITY_DIAGNOSTICS_LOCK:
199
+ return {
200
+ "dropped_spans_total": int(
201
+ _OBSERVABILITY_DIAGNOSTICS["dropped_spans_total"]
202
+ ),
203
+ "dropped_spans_by_reason": dict(
204
+ _OBSERVABILITY_DIAGNOSTICS["dropped_spans_by_reason"]
205
+ ),
206
+ "export_failures_total": int(
207
+ _OBSERVABILITY_DIAGNOSTICS["export_failures_total"]
208
+ ),
209
+ "export_failures_by_exporter": dict(
210
+ _OBSERVABILITY_DIAGNOSTICS["export_failures_by_exporter"]
211
+ ),
212
+ }
186
213
 
187
214
 
188
215
  def reset_observability_diagnostics() -> None:
189
216
  """Reset trace exporter self-diagnostics for tests."""
190
- _OBSERVABILITY_DIAGNOSTICS["dropped_spans_total"] = 0
191
- _OBSERVABILITY_DIAGNOSTICS["dropped_spans_by_reason"] = {}
192
- _OBSERVABILITY_DIAGNOSTICS["export_failures_total"] = 0
193
- _OBSERVABILITY_DIAGNOSTICS["export_failures_by_exporter"] = {}
217
+ with _OBSERVABILITY_DIAGNOSTICS_LOCK:
218
+ _OBSERVABILITY_DIAGNOSTICS["dropped_spans_total"] = 0
219
+ _OBSERVABILITY_DIAGNOSTICS["dropped_spans_by_reason"] = {}
220
+ _OBSERVABILITY_DIAGNOSTICS["export_failures_total"] = 0
221
+ _OBSERVABILITY_DIAGNOSTICS["export_failures_by_exporter"] = {}
194
222
 
195
223
 
196
224
  def _record_drop(reason: str) -> None:
197
- _OBSERVABILITY_DIAGNOSTICS["dropped_spans_total"] += 1
198
- by_reason = _OBSERVABILITY_DIAGNOSTICS["dropped_spans_by_reason"]
199
- by_reason[reason] = int(by_reason.get(reason, 0)) + 1
225
+ with _OBSERVABILITY_DIAGNOSTICS_LOCK:
226
+ _OBSERVABILITY_DIAGNOSTICS["dropped_spans_total"] += 1
227
+ by_reason = _OBSERVABILITY_DIAGNOSTICS["dropped_spans_by_reason"]
228
+ by_reason[reason] = int(by_reason.get(reason, 0)) + 1
200
229
 
201
230
 
202
231
  def _record_export_failure(exporter_name: str) -> None:
203
- _OBSERVABILITY_DIAGNOSTICS["export_failures_total"] += 1
204
- by_exporter = _OBSERVABILITY_DIAGNOSTICS["export_failures_by_exporter"]
205
- by_exporter[exporter_name] = int(by_exporter.get(exporter_name, 0)) + 1
232
+ with _OBSERVABILITY_DIAGNOSTICS_LOCK:
233
+ _OBSERVABILITY_DIAGNOSTICS["export_failures_total"] += 1
234
+ by_exporter = _OBSERVABILITY_DIAGNOSTICS["export_failures_by_exporter"]
235
+ by_exporter[exporter_name] = int(by_exporter.get(exporter_name, 0)) + 1
206
236
 
207
237
 
208
238
  def _clean_env(value: str | None) -> str:
@@ -287,6 +317,13 @@ class TraceSpan:
287
317
  start_ts: int
288
318
  end_ts: int
289
319
  status: str
320
+ name: str = ""
321
+ kind: str = ""
322
+ source: str = "redis"
323
+ input: Any = None
324
+ output: Any = None
325
+ tokens: dict[str, Any] = field(default_factory=dict)
326
+ cost: dict[str, Any] = field(default_factory=dict)
290
327
  session_id: str = ""
291
328
  execution_id: str = ""
292
329
  message_id: str = ""
@@ -312,17 +349,14 @@ class TraceSpan:
312
349
  payload["start_ts"] = int(self.start_ts or 0)
313
350
  payload["end_ts"] = max(payload["start_ts"], int(self.end_ts or 0))
314
351
  payload["duration_ms"] = max(0, payload["end_ts"] - payload["start_ts"])
352
+ payload["name"] = self.name or self.operation
315
353
  if payload.get("error_message"):
316
354
  payload["error_message"] = _sanitize_value(
317
355
  "error_message", payload["error_message"]
318
356
  )
319
357
  if payload.get("metadata"):
320
358
  payload["metadata"] = _sanitize_value("metadata", payload["metadata"])
321
- return {
322
- key: value
323
- for key, value in payload.items()
324
- if value not in ("", None) and value is not False
325
- }
359
+ return {key: value for key, value in payload.items() if value not in ("", None)}
326
360
 
327
361
 
328
362
  @runtime_checkable
@@ -351,8 +385,17 @@ class RedisSpanExporter:
351
385
  payload = span.to_payload()
352
386
  trace_id = str(payload["trace_id"])
353
387
  start_ts = int(payload.get("start_ts", 0) or 0)
388
+ end_ts = int(payload.get("end_ts", start_ts) or start_ts)
354
389
  meta_key = RedisKeys.trace_meta(trace_id)
355
390
  spans_key = RedisKeys.trace_spans(trace_id)
391
+ existing_start_ts = await self._read_hash_int(meta_key, "start_ts")
392
+ existing_updated_at = await self._read_hash_int(meta_key, "updated_at")
393
+ trace_start_ts = (
394
+ min(value for value in (existing_start_ts, start_ts) if value > 0)
395
+ if existing_start_ts or start_ts
396
+ else 0
397
+ )
398
+ updated_at = max(existing_updated_at, end_ts)
356
399
  pipe = self.redis.pipeline()
357
400
  if isawaitable(pipe):
358
401
  pipe = await pipe
@@ -363,14 +406,29 @@ class RedisSpanExporter:
363
406
  await self._call_pipeline(
364
407
  pipe, "hset", meta_key, "status", str(payload.get("status", ""))
365
408
  )
366
- await self._call_pipeline(pipe, "hset", meta_key, "start_ts", start_ts)
367
- await self._call_pipeline(
368
- pipe,
369
- "hset",
370
- meta_key,
371
- "updated_at",
372
- int(payload.get("end_ts", start_ts) or start_ts),
373
- )
409
+ operation = str(payload.get("operation", ""))
410
+ if payload.get("name") and operation.startswith("client.dispatch"):
411
+ await self._call_pipeline(
412
+ pipe, "hset", meta_key, "name", str(payload.get("name", ""))
413
+ )
414
+ if payload.get("target_agent_type") and operation.startswith("client.dispatch"):
415
+ await self._call_pipeline(
416
+ pipe,
417
+ "hset",
418
+ meta_key,
419
+ "root_agent_type",
420
+ str(payload.get("target_agent_type", "")),
421
+ )
422
+ if payload.get("message_id") and operation.startswith("client.dispatch"):
423
+ await self._call_pipeline(
424
+ pipe,
425
+ "hset",
426
+ meta_key,
427
+ "root_message_id",
428
+ str(payload.get("message_id", "")),
429
+ )
430
+ await self._call_pipeline(pipe, "hset", meta_key, "start_ts", trace_start_ts)
431
+ await self._call_pipeline(pipe, "hset", meta_key, "updated_at", updated_at)
374
432
  await self._call_pipeline(
375
433
  pipe, "rpush", spans_key, json.dumps(payload, ensure_ascii=False)
376
434
  )
@@ -413,6 +471,20 @@ class RedisSpanExporter:
413
471
  if isawaitable(result):
414
472
  await result
415
473
 
474
+ async def _read_hash_int(self, name: str, field_name: str) -> int:
475
+ hget = getattr(self.redis, "hget", None)
476
+ if not callable(hget):
477
+ return 0
478
+ try:
479
+ value = hget(name, field_name) # pylint: disable=not-callable
480
+ if isawaitable(value):
481
+ value = await value
482
+ if isinstance(value, bytes):
483
+ value = value.decode("utf-8")
484
+ return int(value or 0)
485
+ except (TypeError, ValueError):
486
+ return 0
487
+
416
488
 
417
489
  class OTelSpanExporter:
418
490
  """Export TraceSpan objects into OpenTelemetry's global tracer."""
@@ -667,30 +739,18 @@ def register_langfuse_span_processor() -> None:
667
739
  if secret_key and public_key and base_url:
668
740
  # 1. Ensure the global TracerProvider exists and patch the ID generator.
669
741
  provider = trace.get_tracer_provider()
670
- if hasattr(provider, "_delegate") and provider._delegate is not None:
671
- provider = provider._delegate
672
-
673
742
  if not isinstance(provider, TracerProvider):
674
743
  provider = TracerProvider()
675
744
  trace.set_tracer_provider(provider)
676
745
  configure_otel_id_generator()
677
746
 
678
- # 2. Avoid registering duplicate LangfuseSpanProcessor instances.
679
- has_processor = False
680
- active_processor = getattr(provider, "_active_span_processor", None)
681
- if active_processor is not None:
682
- processors = []
683
- if hasattr(active_processor, "_span_processors"):
684
- processors = active_processor._span_processors
685
- else:
686
- processors = [active_processor]
687
-
688
- for p in processors:
689
- if p.__class__.__name__ == "LangfuseSpanProcessor":
690
- has_processor = True
691
- break
692
-
693
- if not has_processor:
747
+ # 2. Avoid duplicate registration from this integration without
748
+ # relying on OpenTelemetry SDK private provider internals.
749
+ provider_id = id(provider)
750
+ with _LANGFUSE_PROCESSOR_LOCK:
751
+ if provider_id in _LANGFUSE_PROCESSOR_PROVIDER_IDS:
752
+ return
753
+
694
754
  # 3. Dynamically import and attach LangfuseSpanProcessor.
695
755
  langfuse_processor_mod = import_module(
696
756
  "langfuse._client.span_processor"
@@ -725,6 +785,7 @@ def register_langfuse_span_processor() -> None:
725
785
  should_export_span=should_export_span,
726
786
  )
727
787
  provider.add_span_processor(processor)
788
+ _LANGFUSE_PROCESSOR_PROVIDER_IDS.add(provider_id)
728
789
  logger.info(
729
790
  "LangfuseSpanProcessor registered successfully to global OTel "
730
791
  "TracerProvider. Base URL: %s",