ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ai_pipeline_core/__init__.py +70 -144
  2. ai_pipeline_core/deployment/__init__.py +6 -18
  3. ai_pipeline_core/deployment/base.py +392 -212
  4. ai_pipeline_core/deployment/contract.py +6 -10
  5. ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
  6. ai_pipeline_core/deployment/helpers.py +16 -17
  7. ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
  8. ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +37 -82
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +32 -85
  34. ai_pipeline_core/images/_processing.py +5 -11
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +106 -81
  37. ai_pipeline_core/llm/client.py +267 -158
  38. ai_pipeline_core/llm/model_options.py +12 -84
  39. ai_pipeline_core/llm/model_response.py +53 -99
  40. ai_pipeline_core/llm/model_types.py +8 -23
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
  49. ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
  50. ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
  51. ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
  74. ai_pipeline_core/debug/__init__.py +0 -26
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -494
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/prompt_builder/__init__.py +0 -5
  85. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
  86. ai_pipeline_core/prompt_builder/global_cache.py +0 -78
  87. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
  88. ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
  89. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
  90. ai_pipeline_core/storage/__init__.py +0 -8
  91. ai_pipeline_core/storage/storage.py +0 -628
  92. ai_pipeline_core/utils/__init__.py +0 -8
  93. ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
  94. ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
  95. {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
  96. {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,7 @@
1
1
  """Content writing and extraction for trace debugging V3.
2
2
 
3
3
  Uses hash-based artifact storage with automatic deduplication.
4
+ Handles Document attachments by externalizing large binary/text attachments to the artifact store.
4
5
  """
5
6
 
6
7
  import base64
@@ -10,13 +11,13 @@ import re
10
11
  from datetime import datetime
11
12
  from enum import Enum
12
13
  from pathlib import Path
13
- from typing import Any
14
+ from typing import Any, cast
14
15
  from uuid import UUID
15
16
 
16
17
  import yaml
17
18
  from pydantic import BaseModel, ConfigDict, SecretStr
18
19
 
19
- from .config import TraceDebugConfig
20
+ from ._config import TraceDebugConfig
20
21
 
21
22
 
22
23
  class ContentRef(BaseModel):
@@ -54,9 +55,7 @@ class ArtifactStore:
54
55
  return self._known_hashes[content_hash]
55
56
 
56
57
  # Create sharded path: ab/cd/abcdef...1234.txt
57
- file_path = (
58
- self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}.txt"
59
- )
58
+ file_path = self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}.txt"
60
59
  file_path.parent.mkdir(parents=True, exist_ok=True)
61
60
 
62
61
  if not file_path.exists():
@@ -85,13 +84,12 @@ class ArtifactStore:
85
84
  "image/png": ".png",
86
85
  "image/jpeg": ".jpg",
87
86
  "image/gif": ".gif",
87
+ "image/webp": ".webp",
88
88
  "application/pdf": ".pdf",
89
89
  }
90
90
  ext = ext_map.get(mime_type, ".bin")
91
91
 
92
- file_path = (
93
- self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}{ext}"
94
- )
92
+ file_path = self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}{ext}"
95
93
  file_path.parent.mkdir(parents=True, exist_ok=True)
96
94
 
97
95
  if not file_path.exists():
@@ -162,9 +160,7 @@ class ContentWriter:
162
160
  if size > self._config.max_file_bytes:
163
161
  # Reduce preview sizes to fit under limit
164
162
  structured = self._reduce_previews(structured)
165
- serialized = yaml.dump(
166
- structured, default_flow_style=False, allow_unicode=True, sort_keys=False
167
- )
163
+ serialized = yaml.dump(structured, default_flow_style=False, allow_unicode=True, sort_keys=False)
168
164
  serialized = self._redact(serialized)
169
165
  size = len(serialized.encode("utf-8"))
170
166
 
@@ -172,9 +168,7 @@ class ContentWriter:
172
168
  if size > self._config.max_file_bytes:
173
169
  serialized = serialized[: self._config.max_file_bytes]
174
170
  max_bytes = self._config.max_file_bytes
175
- serialized += (
176
- f"\n\n# [TRUNCATED: original {size} bytes exceeded {max_bytes} limit]\n"
177
- )
171
+ serialized += f"\n\n# [TRUNCATED: original {size} bytes exceeded {max_bytes} limit]\n"
178
172
  size = len(serialized.encode("utf-8"))
179
173
 
180
174
  # Write file
@@ -192,10 +186,9 @@ class ContentWriter:
192
186
  """Convert raw content to structured YAML-ready format."""
193
187
  if self._is_llm_messages(content):
194
188
  return self._structure_llm_messages(content)
195
- elif self._is_document_list(content):
189
+ if self._is_document_list(content):
196
190
  return self._structure_documents(content)
197
- else:
198
- return self._structure_generic(content)
191
+ return self._structure_generic(content)
199
192
 
200
193
  def _is_llm_messages(self, content: Any) -> bool:
201
194
  """Check if content looks like LLM messages."""
@@ -203,21 +196,21 @@ class ContentWriter:
203
196
  return False
204
197
  if not content:
205
198
  return False
206
- first = content[0]
199
+ first = cast(Any, content[0])
207
200
  if not isinstance(first, dict):
208
201
  return False
209
202
  return "role" in first and "content" in first
210
203
 
211
204
  def _is_document_list(self, content: Any) -> bool:
212
- """Check if content looks like a DocumentList."""
205
+ """Check if content looks like a list of serialized documents."""
213
206
  if not isinstance(content, list):
214
207
  return False
215
208
  if not content:
216
209
  return False
217
- first = content[0]
210
+ first = cast(Any, content[0])
218
211
  if not isinstance(first, dict):
219
212
  return False
220
- return "base_type" in first and "content" in first
213
+ return "class_name" in first and "content" in first
221
214
 
222
215
  def _structure_llm_messages(self, messages: list[Any]) -> dict[str, Any]:
223
216
  """Structure LLM messages preserving ALL parts losslessly."""
@@ -238,16 +231,18 @@ class ContentWriter:
238
231
 
239
232
  if isinstance(content, list):
240
233
  # Multimodal: preserve each part separately
241
- msg_entry["parts"] = []
242
- for j, part in enumerate(content):
234
+ content_parts = cast(list[Any], content)
235
+ msg_parts: list[dict[str, Any]] = []
236
+ msg_entry["parts"] = msg_parts
237
+ for j, part in enumerate(content_parts):
243
238
  structured_part, part_bytes = self._structure_message_part(part, j)
244
- msg_entry["parts"].append(structured_part)
239
+ msg_parts.append(structured_part)
245
240
  part_type = structured_part.get("type", "")
246
241
  if part_type == "text":
247
242
  total_text_bytes += part_bytes
248
243
  elif part_type == "image":
249
244
  total_image_bytes += part_bytes
250
- elif part_type in ("tool_use", "tool_result"):
245
+ elif part_type in {"tool_use", "tool_result"}:
251
246
  total_tool_bytes += part_bytes
252
247
  elif isinstance(content, str):
253
248
  # Simple text message
@@ -284,9 +279,7 @@ class ContentWriter:
284
279
  "size_bytes": total_text_bytes + total_image_bytes + total_tool_bytes,
285
280
  }
286
281
 
287
- def _structure_message_part(
288
- self, part: dict[str, Any], sequence: int
289
- ) -> tuple[dict[str, Any], int]:
282
+ def _structure_message_part(self, part: dict[str, Any], sequence: int) -> tuple[dict[str, Any], int]:
290
283
  """Structure a single message part losslessly.
291
284
 
292
285
  Returns:
@@ -297,13 +290,13 @@ class ContentWriter:
297
290
  if part_type == "text":
298
291
  entry = self._structure_text_element(part.get("text", ""), sequence)
299
292
  return entry, entry.get("size_bytes", 0)
300
- elif part_type == "image_url":
293
+ if part_type == "image_url":
301
294
  entry = self._structure_image_openai(part, sequence)
302
295
  return entry, entry.get("size_bytes", 0)
303
- elif part_type == "image":
296
+ if part_type == "image":
304
297
  entry = self._structure_image_anthropic(part, sequence)
305
298
  return entry, entry.get("size_bytes", 0)
306
- elif part_type == "tool_use":
299
+ if part_type == "tool_use":
307
300
  input_str = json.dumps(part.get("input", {}))
308
301
  size = len(input_str.encode("utf-8"))
309
302
  return {
@@ -313,7 +306,7 @@ class ContentWriter:
313
306
  "name": part.get("name"),
314
307
  "input": self._convert_types(part.get("input")),
315
308
  }, size
316
- elif part_type == "tool_result":
309
+ if part_type == "tool_result":
317
310
  result_content = part.get("content")
318
311
  entry: dict[str, Any] = {
319
312
  "type": "tool_result",
@@ -327,25 +320,26 @@ class ContentWriter:
327
320
  entry["content"] = text_entry
328
321
  size = text_entry.get("size_bytes", 0)
329
322
  elif isinstance(result_content, list):
330
- entry["content"] = []
331
- for k, p in enumerate(result_content):
323
+ result_parts = cast(list[Any], result_content)
324
+ content_list: list[dict[str, Any]] = []
325
+ entry["content"] = content_list
326
+ for k, p in enumerate(result_parts):
332
327
  part_entry, part_size = self._structure_message_part(p, k)
333
- entry["content"].append(part_entry)
328
+ content_list.append(part_entry)
334
329
  size += part_size
335
330
  else:
336
331
  entry["content"] = self._convert_types(result_content)
337
332
  return entry, size
338
- else:
339
- # Unknown type — preserve raw data, never drop
340
- raw = self._convert_types(part)
341
- raw_str = json.dumps(raw)
342
- size = len(raw_str.encode("utf-8"))
343
- return {
344
- "type": "unknown",
345
- "sequence": sequence,
346
- "original_type": part_type,
347
- "raw_data": raw,
348
- }, size
333
+ # Unknown type — preserve raw data, never drop
334
+ raw = self._convert_types(part)
335
+ raw_str = json.dumps(raw)
336
+ size = len(raw_str.encode("utf-8"))
337
+ return {
338
+ "type": "unknown",
339
+ "sequence": sequence,
340
+ "original_type": part_type,
341
+ "raw_data": raw,
342
+ }, size
349
343
 
350
344
  def _structure_text_element(self, text: str, sequence: int) -> dict[str, Any]:
351
345
  """Structure a text element, optionally externalizing large content."""
@@ -369,9 +363,7 @@ class ContentWriter:
369
363
  "mime_type": ref.mime_type,
370
364
  "encoding": ref.encoding,
371
365
  }
372
- entry["excerpt"] = (
373
- text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
374
- )
366
+ entry["excerpt"] = text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
375
367
  else:
376
368
  # No artifact store — truncate with marker
377
369
  entry["content"] = text[: self._config.max_element_bytes]
@@ -486,20 +478,20 @@ class ContentWriter:
486
478
 
487
479
  return entry
488
480
 
489
- def _structure_documents(self, docs: list[Any]) -> dict[str, Any]:
490
- """Structure document list."""
481
+ def _structure_documents(self, docs: list[Any]) -> dict[str, Any]: # noqa: PLR0914
482
+ """Structure document list with attachment externalization."""
491
483
  doc_entries: list[dict[str, Any]] = []
492
484
 
493
485
  for i, doc in enumerate(docs):
494
486
  doc_name = doc.get("name", f"doc_{i}")
495
- base_type = doc.get("base_type", "unknown")
487
+ class_name = doc.get("class_name", "Document")
496
488
  content = doc.get("content", "")
497
489
  content_encoding = doc.get("content_encoding", "utf-8")
498
490
 
499
491
  doc_entry: dict[str, Any] = {
500
492
  "index": i,
501
493
  "name": doc_name,
502
- "base_type": base_type,
494
+ "class_name": class_name,
503
495
  }
504
496
 
505
497
  if content_encoding == "base64":
@@ -541,12 +533,75 @@ class ContentWriter:
541
533
  "mime_type": ref.mime_type,
542
534
  "encoding": ref.encoding,
543
535
  }
544
- doc_entry["excerpt"] = (
545
- text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
546
- )
536
+ doc_entry["excerpt"] = text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
547
537
  else:
548
538
  doc_entry["content"] = text
549
539
 
540
+ # Structure attachments if present
541
+ raw_attachments = doc.get("attachments")
542
+ if isinstance(raw_attachments, list) and raw_attachments:
543
+ att_entries: list[dict[str, Any]] = []
544
+ attachments_list = cast(list[Any], raw_attachments)
545
+ for j, att in enumerate(attachments_list):
546
+ if not isinstance(att, dict):
547
+ continue
548
+ att_dict = cast(dict[str, Any], att)
549
+ att_name = att_dict.get("name", f"attachment_{j}")
550
+ att_encoding = att_dict.get("content_encoding", "utf-8")
551
+ att_content = att_dict.get("content", "")
552
+
553
+ att_entry: dict[str, Any] = {
554
+ "index": j,
555
+ "name": att_name,
556
+ }
557
+ if att_dict.get("description"):
558
+ att_entry["description"] = att_dict["description"]
559
+
560
+ if att_encoding == "base64":
561
+ try:
562
+ binary_data = base64.b64decode(att_content)
563
+ size = len(binary_data)
564
+ att_entry["size_bytes"] = size
565
+ att_entry["encoding"] = "base64"
566
+ mime_type = att_dict.get("mime_type", "application/octet-stream")
567
+
568
+ if size > self._config.max_element_bytes and self._artifact_store:
569
+ ref = self._artifact_store.store_binary(binary_data, mime_type)
570
+ att_entry["content_ref"] = {
571
+ "hash": ref.hash,
572
+ "path": ref.path,
573
+ "mime_type": ref.mime_type,
574
+ "encoding": ref.encoding,
575
+ }
576
+ att_entry["preview"] = f"[Binary attachment, {size} bytes]"
577
+ else:
578
+ att_entry["content"] = att_content
579
+ except Exception:
580
+ att_entry["content"] = "[binary content - decode failed]"
581
+ att_entry["size_bytes"] = 0
582
+ else:
583
+ text = self._redact(str(att_content))
584
+ text_bytes = len(text.encode("utf-8"))
585
+ att_entry["size_bytes"] = text_bytes
586
+
587
+ if text_bytes > self._config.max_element_bytes and self._artifact_store:
588
+ ref = self._artifact_store.store_text(text)
589
+ excerpt_len = self._config.element_excerpt_bytes
590
+ att_entry["content_ref"] = {
591
+ "hash": ref.hash,
592
+ "path": ref.path,
593
+ "mime_type": ref.mime_type,
594
+ "encoding": ref.encoding,
595
+ }
596
+ att_entry["excerpt"] = text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
597
+ else:
598
+ att_entry["content"] = text
599
+
600
+ att_entries.append(att_entry)
601
+
602
+ doc_entry["attachment_count"] = len(att_entries)
603
+ doc_entry["attachments"] = att_entries
604
+
550
605
  doc_entries.append(doc_entry)
551
606
 
552
607
  return {
@@ -583,11 +638,10 @@ class ContentWriter:
583
638
  "image_bytes": metadata.get("total_image_bytes", 0),
584
639
  "tool_bytes": metadata.get("total_tool_bytes", 0),
585
640
  }
586
- elif "size_bytes" in structured:
641
+ if "size_bytes" in structured:
587
642
  return {"total_bytes": structured["size_bytes"]}
588
- else:
589
- serialized = json.dumps(self._convert_types(structured))
590
- return {"total_bytes": len(serialized.encode("utf-8"))}
643
+ serialized = json.dumps(self._convert_types(structured))
644
+ return {"total_bytes": len(serialized.encode("utf-8"))}
591
645
 
592
646
  def _reduce_previews(self, structured: dict[str, Any]) -> dict[str, Any]:
593
647
  """Reduce preview/excerpt sizes to fit file under max_file_bytes."""
@@ -606,7 +660,7 @@ class ContentWriter:
606
660
  text = pattern.sub("[REDACTED]", text)
607
661
  return text
608
662
 
609
- def _convert_types(self, value: Any, seen: set[int] | None = None) -> Any:
663
+ def _convert_types(self, value: Any, seen: set[int] | None = None) -> Any: # noqa: PLR0911
610
664
  """Convert non-serializable types recursively with cycle detection."""
611
665
  # Cycle detection
612
666
  if seen is None:
@@ -634,7 +688,7 @@ class ContentWriter:
634
688
  case Enum():
635
689
  return value.value
636
690
  case set() | frozenset():
637
- return sorted(str(x) for x in value)
691
+ return sorted(str(x) for x in cast(set[Any] | frozenset[Any], value))
638
692
  case BaseModel():
639
693
  try:
640
694
  return value.model_dump(mode="json")
@@ -642,12 +696,14 @@ class ContentWriter:
642
696
  return str(value)
643
697
  case dict():
644
698
  seen.add(obj_id)
645
- result = {str(k): self._convert_types(v, seen) for k, v in value.items()}
699
+ typed_dict = cast(dict[Any, Any], value)
700
+ result = {str(k): self._convert_types(v, seen) for k, v in typed_dict.items()}
646
701
  seen.discard(obj_id)
647
702
  return result
648
703
  case list() | tuple():
649
704
  seen.add(obj_id)
650
- result = [self._convert_types(x, seen) for x in value]
705
+ typed_seq = cast(list[Any] | tuple[Any, ...], value)
706
+ result = [self._convert_types(x, seen) for x in typed_seq]
651
707
  seen.discard(obj_id)
652
708
  return result
653
709
  case _:
@@ -680,26 +736,29 @@ def reconstruct_span_content(trace_root: Path, span_dir: Path, content_type: str
680
736
  def _rehydrate(obj: Any, trace_root: Path) -> Any:
681
737
  """Recursively replace content_ref entries with actual content."""
682
738
  if isinstance(obj, dict):
683
- if "content_ref" in obj:
739
+ obj_dict = cast(dict[str, Any], obj)
740
+ if "content_ref" in obj_dict:
684
741
  # This is an artifact reference - load the full content
685
- ref = obj["content_ref"]
686
- artifact_path = trace_root / ref["path"]
742
+ ref: dict[str, Any] = obj_dict["content_ref"]
743
+ artifact_path: Path = trace_root / ref["path"]
687
744
 
745
+ full_content: str | bytes
688
746
  if ref.get("encoding") == "utf-8":
689
747
  full_content = artifact_path.read_text(encoding="utf-8")
690
748
  else:
691
749
  full_content = artifact_path.read_bytes()
692
750
 
693
751
  # Replace ref with full content
694
- obj = obj.copy()
695
- obj["content"] = full_content
696
- del obj["content_ref"]
697
- if "excerpt" in obj:
698
- del obj["excerpt"]
752
+ obj_dict = obj_dict.copy()
753
+ obj_dict["content"] = full_content
754
+ del obj_dict["content_ref"]
755
+ if "excerpt" in obj_dict:
756
+ del obj_dict["excerpt"]
699
757
 
700
- return {k: _rehydrate(v, trace_root) for k, v in obj.items()}
758
+ return {k: _rehydrate(v, trace_root) for k, v in obj_dict.items()}
701
759
 
702
- elif isinstance(obj, list):
703
- return [_rehydrate(v, trace_root) for v in obj]
760
+ if isinstance(obj, list):
761
+ obj_list = cast(list[Any], obj)
762
+ return [_rehydrate(v, trace_root) for v in obj_list]
704
763
 
705
764
  return obj
@@ -1,10 +1,12 @@
1
1
  """OpenTelemetry SpanProcessor for local trace debugging."""
2
2
 
3
+ import contextlib
4
+
3
5
  from opentelemetry.context import Context
4
6
  from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
5
7
  from opentelemetry.trace import StatusCode
6
8
 
7
- from .writer import LocalTraceWriter, WriteJob
9
+ from ._writer import LocalTraceWriter, WriteJob
8
10
 
9
11
 
10
12
  class LocalDebugSpanProcessor(SpanProcessor):
@@ -29,7 +31,7 @@ class LocalDebugSpanProcessor(SpanProcessor):
29
31
  Creates the span directory early so we can see "running" spans.
30
32
  Input/output data is not available yet - will be captured in on_end().
31
33
  """
32
- try:
34
+ with contextlib.suppress(Exception):
33
35
  if span.context is None:
34
36
  return
35
37
  trace_id = format(span.context.trace_id, "032x")
@@ -37,9 +39,6 @@ class LocalDebugSpanProcessor(SpanProcessor):
37
39
  parent_id = self._get_parent_span_id(span)
38
40
 
39
41
  self._writer.on_span_start(trace_id, span_id, parent_id, span.name)
40
- except Exception:
41
- # Never fail the actual span - debug tracing should be transparent
42
- pass
43
42
 
44
43
  def on_end(self, span: ReadableSpan) -> None:
45
44
  """Handle span end - queue full span data for background write.
@@ -47,7 +46,7 @@ class LocalDebugSpanProcessor(SpanProcessor):
47
46
  All data (input, output, attributes, events) is captured here because
48
47
  Laminar sets these attributes after span start.
49
48
  """
50
- try:
49
+ with contextlib.suppress(Exception):
51
50
  if span.context is None or span.start_time is None or span.end_time is None:
52
51
  return
53
52
  job = WriteJob(
@@ -63,19 +62,18 @@ class LocalDebugSpanProcessor(SpanProcessor):
63
62
  end_time_ns=span.end_time,
64
63
  )
65
64
  self._writer.on_span_end(job)
66
- except Exception:
67
- # Never fail the actual span
68
- pass
69
65
 
70
66
  def shutdown(self) -> None:
71
67
  """Shutdown the processor and writer."""
72
68
  self._writer.shutdown()
73
69
 
74
- def force_flush(self, timeout_millis: int = 30000) -> bool:
70
+ def force_flush(self, timeout_millis: int = 30000) -> bool: # noqa: PLR6301
75
71
  """Force flush is not needed for this processor."""
72
+ _ = timeout_millis
76
73
  return True
77
74
 
78
- def _get_parent_span_id(self, span: Span) -> str | None:
75
+ @staticmethod
76
+ def _get_parent_span_id(span: Span) -> str | None:
79
77
  """Extract parent span ID from a writable Span."""
80
78
  if hasattr(span, "parent") and span.parent:
81
79
  parent_ctx = span.parent
@@ -83,17 +81,18 @@ class LocalDebugSpanProcessor(SpanProcessor):
83
81
  return format(parent_ctx.span_id, "016x")
84
82
  return None
85
83
 
86
- def _get_parent_span_id_from_readable(self, span: ReadableSpan) -> str | None:
84
+ @staticmethod
85
+ def _get_parent_span_id_from_readable(span: ReadableSpan) -> str | None:
87
86
  """Extract parent span ID from a ReadableSpan."""
88
- if span.parent:
89
- if hasattr(span.parent, "span_id") and span.parent.span_id:
90
- return format(span.parent.span_id, "016x")
87
+ if span.parent and hasattr(span.parent, "span_id") and span.parent.span_id:
88
+ return format(span.parent.span_id, "016x")
91
89
  return None
92
90
 
93
- def _get_status_code(self, span: ReadableSpan) -> str:
91
+ @staticmethod
92
+ def _get_status_code(span: ReadableSpan) -> str:
94
93
  """Get status code as string."""
95
94
  if span.status.status_code == StatusCode.OK:
96
95
  return "OK"
97
- elif span.status.status_code == StatusCode.ERROR:
96
+ if span.status.status_code == StatusCode.ERROR:
98
97
  return "ERROR"
99
98
  return "UNSET"