ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +64 -158
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +11 -84
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +102 -90
- ai_pipeline_core/llm/client.py +229 -183
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Content writing and extraction for trace debugging V3.
|
|
2
2
|
|
|
3
3
|
Uses hash-based artifact storage with automatic deduplication.
|
|
4
|
+
Handles Document attachments by externalizing large binary/text attachments to the artifact store.
|
|
4
5
|
"""
|
|
5
6
|
|
|
6
7
|
import base64
|
|
@@ -10,13 +11,13 @@ import re
|
|
|
10
11
|
from datetime import datetime
|
|
11
12
|
from enum import Enum
|
|
12
13
|
from pathlib import Path
|
|
13
|
-
from typing import Any
|
|
14
|
+
from typing import Any, cast
|
|
14
15
|
from uuid import UUID
|
|
15
16
|
|
|
16
17
|
import yaml
|
|
17
18
|
from pydantic import BaseModel, ConfigDict, SecretStr
|
|
18
19
|
|
|
19
|
-
from .
|
|
20
|
+
from ._config import TraceDebugConfig
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class ContentRef(BaseModel):
|
|
@@ -54,9 +55,7 @@ class ArtifactStore:
|
|
|
54
55
|
return self._known_hashes[content_hash]
|
|
55
56
|
|
|
56
57
|
# Create sharded path: ab/cd/abcdef...1234.txt
|
|
57
|
-
file_path =
|
|
58
|
-
self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}.txt"
|
|
59
|
-
)
|
|
58
|
+
file_path = self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}.txt"
|
|
60
59
|
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
61
60
|
|
|
62
61
|
if not file_path.exists():
|
|
@@ -90,9 +89,7 @@ class ArtifactStore:
|
|
|
90
89
|
}
|
|
91
90
|
ext = ext_map.get(mime_type, ".bin")
|
|
92
91
|
|
|
93
|
-
file_path =
|
|
94
|
-
self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}{ext}"
|
|
95
|
-
)
|
|
92
|
+
file_path = self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}{ext}"
|
|
96
93
|
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
97
94
|
|
|
98
95
|
if not file_path.exists():
|
|
@@ -163,9 +160,7 @@ class ContentWriter:
|
|
|
163
160
|
if size > self._config.max_file_bytes:
|
|
164
161
|
# Reduce preview sizes to fit under limit
|
|
165
162
|
structured = self._reduce_previews(structured)
|
|
166
|
-
serialized = yaml.dump(
|
|
167
|
-
structured, default_flow_style=False, allow_unicode=True, sort_keys=False
|
|
168
|
-
)
|
|
163
|
+
serialized = yaml.dump(structured, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
169
164
|
serialized = self._redact(serialized)
|
|
170
165
|
size = len(serialized.encode("utf-8"))
|
|
171
166
|
|
|
@@ -173,9 +168,7 @@ class ContentWriter:
|
|
|
173
168
|
if size > self._config.max_file_bytes:
|
|
174
169
|
serialized = serialized[: self._config.max_file_bytes]
|
|
175
170
|
max_bytes = self._config.max_file_bytes
|
|
176
|
-
serialized +=
|
|
177
|
-
f"\n\n# [TRUNCATED: original {size} bytes exceeded {max_bytes} limit]\n"
|
|
178
|
-
)
|
|
171
|
+
serialized += f"\n\n# [TRUNCATED: original {size} bytes exceeded {max_bytes} limit]\n"
|
|
179
172
|
size = len(serialized.encode("utf-8"))
|
|
180
173
|
|
|
181
174
|
# Write file
|
|
@@ -193,10 +186,9 @@ class ContentWriter:
|
|
|
193
186
|
"""Convert raw content to structured YAML-ready format."""
|
|
194
187
|
if self._is_llm_messages(content):
|
|
195
188
|
return self._structure_llm_messages(content)
|
|
196
|
-
|
|
189
|
+
if self._is_document_list(content):
|
|
197
190
|
return self._structure_documents(content)
|
|
198
|
-
|
|
199
|
-
return self._structure_generic(content)
|
|
191
|
+
return self._structure_generic(content)
|
|
200
192
|
|
|
201
193
|
def _is_llm_messages(self, content: Any) -> bool:
|
|
202
194
|
"""Check if content looks like LLM messages."""
|
|
@@ -204,21 +196,21 @@ class ContentWriter:
|
|
|
204
196
|
return False
|
|
205
197
|
if not content:
|
|
206
198
|
return False
|
|
207
|
-
first = content[0]
|
|
199
|
+
first = cast(Any, content[0])
|
|
208
200
|
if not isinstance(first, dict):
|
|
209
201
|
return False
|
|
210
202
|
return "role" in first and "content" in first
|
|
211
203
|
|
|
212
204
|
def _is_document_list(self, content: Any) -> bool:
|
|
213
|
-
"""Check if content looks like a
|
|
205
|
+
"""Check if content looks like a list of serialized documents."""
|
|
214
206
|
if not isinstance(content, list):
|
|
215
207
|
return False
|
|
216
208
|
if not content:
|
|
217
209
|
return False
|
|
218
|
-
first = content[0]
|
|
210
|
+
first = cast(Any, content[0])
|
|
219
211
|
if not isinstance(first, dict):
|
|
220
212
|
return False
|
|
221
|
-
return "
|
|
213
|
+
return "class_name" in first and "content" in first
|
|
222
214
|
|
|
223
215
|
def _structure_llm_messages(self, messages: list[Any]) -> dict[str, Any]:
|
|
224
216
|
"""Structure LLM messages preserving ALL parts losslessly."""
|
|
@@ -239,16 +231,18 @@ class ContentWriter:
|
|
|
239
231
|
|
|
240
232
|
if isinstance(content, list):
|
|
241
233
|
# Multimodal: preserve each part separately
|
|
242
|
-
|
|
243
|
-
|
|
234
|
+
content_parts = cast(list[Any], content)
|
|
235
|
+
msg_parts: list[dict[str, Any]] = []
|
|
236
|
+
msg_entry["parts"] = msg_parts
|
|
237
|
+
for j, part in enumerate(content_parts):
|
|
244
238
|
structured_part, part_bytes = self._structure_message_part(part, j)
|
|
245
|
-
|
|
239
|
+
msg_parts.append(structured_part)
|
|
246
240
|
part_type = structured_part.get("type", "")
|
|
247
241
|
if part_type == "text":
|
|
248
242
|
total_text_bytes += part_bytes
|
|
249
243
|
elif part_type == "image":
|
|
250
244
|
total_image_bytes += part_bytes
|
|
251
|
-
elif part_type in
|
|
245
|
+
elif part_type in {"tool_use", "tool_result"}:
|
|
252
246
|
total_tool_bytes += part_bytes
|
|
253
247
|
elif isinstance(content, str):
|
|
254
248
|
# Simple text message
|
|
@@ -285,9 +279,7 @@ class ContentWriter:
|
|
|
285
279
|
"size_bytes": total_text_bytes + total_image_bytes + total_tool_bytes,
|
|
286
280
|
}
|
|
287
281
|
|
|
288
|
-
def _structure_message_part(
|
|
289
|
-
self, part: dict[str, Any], sequence: int
|
|
290
|
-
) -> tuple[dict[str, Any], int]:
|
|
282
|
+
def _structure_message_part(self, part: dict[str, Any], sequence: int) -> tuple[dict[str, Any], int]:
|
|
291
283
|
"""Structure a single message part losslessly.
|
|
292
284
|
|
|
293
285
|
Returns:
|
|
@@ -298,13 +290,13 @@ class ContentWriter:
|
|
|
298
290
|
if part_type == "text":
|
|
299
291
|
entry = self._structure_text_element(part.get("text", ""), sequence)
|
|
300
292
|
return entry, entry.get("size_bytes", 0)
|
|
301
|
-
|
|
293
|
+
if part_type == "image_url":
|
|
302
294
|
entry = self._structure_image_openai(part, sequence)
|
|
303
295
|
return entry, entry.get("size_bytes", 0)
|
|
304
|
-
|
|
296
|
+
if part_type == "image":
|
|
305
297
|
entry = self._structure_image_anthropic(part, sequence)
|
|
306
298
|
return entry, entry.get("size_bytes", 0)
|
|
307
|
-
|
|
299
|
+
if part_type == "tool_use":
|
|
308
300
|
input_str = json.dumps(part.get("input", {}))
|
|
309
301
|
size = len(input_str.encode("utf-8"))
|
|
310
302
|
return {
|
|
@@ -314,7 +306,7 @@ class ContentWriter:
|
|
|
314
306
|
"name": part.get("name"),
|
|
315
307
|
"input": self._convert_types(part.get("input")),
|
|
316
308
|
}, size
|
|
317
|
-
|
|
309
|
+
if part_type == "tool_result":
|
|
318
310
|
result_content = part.get("content")
|
|
319
311
|
entry: dict[str, Any] = {
|
|
320
312
|
"type": "tool_result",
|
|
@@ -328,25 +320,26 @@ class ContentWriter:
|
|
|
328
320
|
entry["content"] = text_entry
|
|
329
321
|
size = text_entry.get("size_bytes", 0)
|
|
330
322
|
elif isinstance(result_content, list):
|
|
331
|
-
|
|
332
|
-
|
|
323
|
+
result_parts = cast(list[Any], result_content)
|
|
324
|
+
content_list: list[dict[str, Any]] = []
|
|
325
|
+
entry["content"] = content_list
|
|
326
|
+
for k, p in enumerate(result_parts):
|
|
333
327
|
part_entry, part_size = self._structure_message_part(p, k)
|
|
334
|
-
|
|
328
|
+
content_list.append(part_entry)
|
|
335
329
|
size += part_size
|
|
336
330
|
else:
|
|
337
331
|
entry["content"] = self._convert_types(result_content)
|
|
338
332
|
return entry, size
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
}, size
|
|
333
|
+
# Unknown type — preserve raw data, never drop
|
|
334
|
+
raw = self._convert_types(part)
|
|
335
|
+
raw_str = json.dumps(raw)
|
|
336
|
+
size = len(raw_str.encode("utf-8"))
|
|
337
|
+
return {
|
|
338
|
+
"type": "unknown",
|
|
339
|
+
"sequence": sequence,
|
|
340
|
+
"original_type": part_type,
|
|
341
|
+
"raw_data": raw,
|
|
342
|
+
}, size
|
|
350
343
|
|
|
351
344
|
def _structure_text_element(self, text: str, sequence: int) -> dict[str, Any]:
|
|
352
345
|
"""Structure a text element, optionally externalizing large content."""
|
|
@@ -370,9 +363,7 @@ class ContentWriter:
|
|
|
370
363
|
"mime_type": ref.mime_type,
|
|
371
364
|
"encoding": ref.encoding,
|
|
372
365
|
}
|
|
373
|
-
entry["excerpt"] =
|
|
374
|
-
text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
|
|
375
|
-
)
|
|
366
|
+
entry["excerpt"] = text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
|
|
376
367
|
else:
|
|
377
368
|
# No artifact store — truncate with marker
|
|
378
369
|
entry["content"] = text[: self._config.max_element_bytes]
|
|
@@ -487,20 +478,20 @@ class ContentWriter:
|
|
|
487
478
|
|
|
488
479
|
return entry
|
|
489
480
|
|
|
490
|
-
def _structure_documents(self, docs: list[Any]) -> dict[str, Any]:
|
|
491
|
-
"""Structure document list."""
|
|
481
|
+
def _structure_documents(self, docs: list[Any]) -> dict[str, Any]: # noqa: PLR0914
|
|
482
|
+
"""Structure document list with attachment externalization."""
|
|
492
483
|
doc_entries: list[dict[str, Any]] = []
|
|
493
484
|
|
|
494
485
|
for i, doc in enumerate(docs):
|
|
495
486
|
doc_name = doc.get("name", f"doc_{i}")
|
|
496
|
-
|
|
487
|
+
class_name = doc.get("class_name", "Document")
|
|
497
488
|
content = doc.get("content", "")
|
|
498
489
|
content_encoding = doc.get("content_encoding", "utf-8")
|
|
499
490
|
|
|
500
491
|
doc_entry: dict[str, Any] = {
|
|
501
492
|
"index": i,
|
|
502
493
|
"name": doc_name,
|
|
503
|
-
"
|
|
494
|
+
"class_name": class_name,
|
|
504
495
|
}
|
|
505
496
|
|
|
506
497
|
if content_encoding == "base64":
|
|
@@ -542,12 +533,75 @@ class ContentWriter:
|
|
|
542
533
|
"mime_type": ref.mime_type,
|
|
543
534
|
"encoding": ref.encoding,
|
|
544
535
|
}
|
|
545
|
-
doc_entry["excerpt"] =
|
|
546
|
-
text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
|
|
547
|
-
)
|
|
536
|
+
doc_entry["excerpt"] = text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
|
|
548
537
|
else:
|
|
549
538
|
doc_entry["content"] = text
|
|
550
539
|
|
|
540
|
+
# Structure attachments if present
|
|
541
|
+
raw_attachments = doc.get("attachments")
|
|
542
|
+
if isinstance(raw_attachments, list) and raw_attachments:
|
|
543
|
+
att_entries: list[dict[str, Any]] = []
|
|
544
|
+
attachments_list = cast(list[Any], raw_attachments)
|
|
545
|
+
for j, att in enumerate(attachments_list):
|
|
546
|
+
if not isinstance(att, dict):
|
|
547
|
+
continue
|
|
548
|
+
att_dict = cast(dict[str, Any], att)
|
|
549
|
+
att_name = att_dict.get("name", f"attachment_{j}")
|
|
550
|
+
att_encoding = att_dict.get("content_encoding", "utf-8")
|
|
551
|
+
att_content = att_dict.get("content", "")
|
|
552
|
+
|
|
553
|
+
att_entry: dict[str, Any] = {
|
|
554
|
+
"index": j,
|
|
555
|
+
"name": att_name,
|
|
556
|
+
}
|
|
557
|
+
if att_dict.get("description"):
|
|
558
|
+
att_entry["description"] = att_dict["description"]
|
|
559
|
+
|
|
560
|
+
if att_encoding == "base64":
|
|
561
|
+
try:
|
|
562
|
+
binary_data = base64.b64decode(att_content)
|
|
563
|
+
size = len(binary_data)
|
|
564
|
+
att_entry["size_bytes"] = size
|
|
565
|
+
att_entry["encoding"] = "base64"
|
|
566
|
+
mime_type = att_dict.get("mime_type", "application/octet-stream")
|
|
567
|
+
|
|
568
|
+
if size > self._config.max_element_bytes and self._artifact_store:
|
|
569
|
+
ref = self._artifact_store.store_binary(binary_data, mime_type)
|
|
570
|
+
att_entry["content_ref"] = {
|
|
571
|
+
"hash": ref.hash,
|
|
572
|
+
"path": ref.path,
|
|
573
|
+
"mime_type": ref.mime_type,
|
|
574
|
+
"encoding": ref.encoding,
|
|
575
|
+
}
|
|
576
|
+
att_entry["preview"] = f"[Binary attachment, {size} bytes]"
|
|
577
|
+
else:
|
|
578
|
+
att_entry["content"] = att_content
|
|
579
|
+
except Exception:
|
|
580
|
+
att_entry["content"] = "[binary content - decode failed]"
|
|
581
|
+
att_entry["size_bytes"] = 0
|
|
582
|
+
else:
|
|
583
|
+
text = self._redact(str(att_content))
|
|
584
|
+
text_bytes = len(text.encode("utf-8"))
|
|
585
|
+
att_entry["size_bytes"] = text_bytes
|
|
586
|
+
|
|
587
|
+
if text_bytes > self._config.max_element_bytes and self._artifact_store:
|
|
588
|
+
ref = self._artifact_store.store_text(text)
|
|
589
|
+
excerpt_len = self._config.element_excerpt_bytes
|
|
590
|
+
att_entry["content_ref"] = {
|
|
591
|
+
"hash": ref.hash,
|
|
592
|
+
"path": ref.path,
|
|
593
|
+
"mime_type": ref.mime_type,
|
|
594
|
+
"encoding": ref.encoding,
|
|
595
|
+
}
|
|
596
|
+
att_entry["excerpt"] = text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
|
|
597
|
+
else:
|
|
598
|
+
att_entry["content"] = text
|
|
599
|
+
|
|
600
|
+
att_entries.append(att_entry)
|
|
601
|
+
|
|
602
|
+
doc_entry["attachment_count"] = len(att_entries)
|
|
603
|
+
doc_entry["attachments"] = att_entries
|
|
604
|
+
|
|
551
605
|
doc_entries.append(doc_entry)
|
|
552
606
|
|
|
553
607
|
return {
|
|
@@ -584,11 +638,10 @@ class ContentWriter:
|
|
|
584
638
|
"image_bytes": metadata.get("total_image_bytes", 0),
|
|
585
639
|
"tool_bytes": metadata.get("total_tool_bytes", 0),
|
|
586
640
|
}
|
|
587
|
-
|
|
641
|
+
if "size_bytes" in structured:
|
|
588
642
|
return {"total_bytes": structured["size_bytes"]}
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
return {"total_bytes": len(serialized.encode("utf-8"))}
|
|
643
|
+
serialized = json.dumps(self._convert_types(structured))
|
|
644
|
+
return {"total_bytes": len(serialized.encode("utf-8"))}
|
|
592
645
|
|
|
593
646
|
def _reduce_previews(self, structured: dict[str, Any]) -> dict[str, Any]:
|
|
594
647
|
"""Reduce preview/excerpt sizes to fit file under max_file_bytes."""
|
|
@@ -607,7 +660,7 @@ class ContentWriter:
|
|
|
607
660
|
text = pattern.sub("[REDACTED]", text)
|
|
608
661
|
return text
|
|
609
662
|
|
|
610
|
-
def _convert_types(self, value: Any, seen: set[int] | None = None) -> Any:
|
|
663
|
+
def _convert_types(self, value: Any, seen: set[int] | None = None) -> Any: # noqa: PLR0911
|
|
611
664
|
"""Convert non-serializable types recursively with cycle detection."""
|
|
612
665
|
# Cycle detection
|
|
613
666
|
if seen is None:
|
|
@@ -635,7 +688,7 @@ class ContentWriter:
|
|
|
635
688
|
case Enum():
|
|
636
689
|
return value.value
|
|
637
690
|
case set() | frozenset():
|
|
638
|
-
return sorted(str(x) for x in value)
|
|
691
|
+
return sorted(str(x) for x in cast(set[Any] | frozenset[Any], value))
|
|
639
692
|
case BaseModel():
|
|
640
693
|
try:
|
|
641
694
|
return value.model_dump(mode="json")
|
|
@@ -643,12 +696,14 @@ class ContentWriter:
|
|
|
643
696
|
return str(value)
|
|
644
697
|
case dict():
|
|
645
698
|
seen.add(obj_id)
|
|
646
|
-
|
|
699
|
+
typed_dict = cast(dict[Any, Any], value)
|
|
700
|
+
result = {str(k): self._convert_types(v, seen) for k, v in typed_dict.items()}
|
|
647
701
|
seen.discard(obj_id)
|
|
648
702
|
return result
|
|
649
703
|
case list() | tuple():
|
|
650
704
|
seen.add(obj_id)
|
|
651
|
-
|
|
705
|
+
typed_seq = cast(list[Any] | tuple[Any, ...], value)
|
|
706
|
+
result = [self._convert_types(x, seen) for x in typed_seq]
|
|
652
707
|
seen.discard(obj_id)
|
|
653
708
|
return result
|
|
654
709
|
case _:
|
|
@@ -681,26 +736,29 @@ def reconstruct_span_content(trace_root: Path, span_dir: Path, content_type: str
|
|
|
681
736
|
def _rehydrate(obj: Any, trace_root: Path) -> Any:
|
|
682
737
|
"""Recursively replace content_ref entries with actual content."""
|
|
683
738
|
if isinstance(obj, dict):
|
|
684
|
-
|
|
739
|
+
obj_dict = cast(dict[str, Any], obj)
|
|
740
|
+
if "content_ref" in obj_dict:
|
|
685
741
|
# This is an artifact reference - load the full content
|
|
686
|
-
ref =
|
|
687
|
-
artifact_path = trace_root / ref["path"]
|
|
742
|
+
ref: dict[str, Any] = obj_dict["content_ref"]
|
|
743
|
+
artifact_path: Path = trace_root / ref["path"]
|
|
688
744
|
|
|
745
|
+
full_content: str | bytes
|
|
689
746
|
if ref.get("encoding") == "utf-8":
|
|
690
747
|
full_content = artifact_path.read_text(encoding="utf-8")
|
|
691
748
|
else:
|
|
692
749
|
full_content = artifact_path.read_bytes()
|
|
693
750
|
|
|
694
751
|
# Replace ref with full content
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
del
|
|
698
|
-
if "excerpt" in
|
|
699
|
-
del
|
|
752
|
+
obj_dict = obj_dict.copy()
|
|
753
|
+
obj_dict["content"] = full_content
|
|
754
|
+
del obj_dict["content_ref"]
|
|
755
|
+
if "excerpt" in obj_dict:
|
|
756
|
+
del obj_dict["excerpt"]
|
|
700
757
|
|
|
701
|
-
return {k: _rehydrate(v, trace_root) for k, v in
|
|
758
|
+
return {k: _rehydrate(v, trace_root) for k, v in obj_dict.items()}
|
|
702
759
|
|
|
703
|
-
|
|
704
|
-
|
|
760
|
+
if isinstance(obj, list):
|
|
761
|
+
obj_list = cast(list[Any], obj)
|
|
762
|
+
return [_rehydrate(v, trace_root) for v in obj_list]
|
|
705
763
|
|
|
706
764
|
return obj
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
"""OpenTelemetry SpanProcessor for local trace debugging."""
|
|
2
2
|
|
|
3
|
+
import contextlib
|
|
4
|
+
|
|
3
5
|
from opentelemetry.context import Context
|
|
4
6
|
from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
|
|
5
7
|
from opentelemetry.trace import StatusCode
|
|
6
8
|
|
|
7
|
-
from .
|
|
9
|
+
from ._writer import LocalTraceWriter, WriteJob
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class LocalDebugSpanProcessor(SpanProcessor):
|
|
@@ -29,7 +31,7 @@ class LocalDebugSpanProcessor(SpanProcessor):
|
|
|
29
31
|
Creates the span directory early so we can see "running" spans.
|
|
30
32
|
Input/output data is not available yet - will be captured in on_end().
|
|
31
33
|
"""
|
|
32
|
-
|
|
34
|
+
with contextlib.suppress(Exception):
|
|
33
35
|
if span.context is None:
|
|
34
36
|
return
|
|
35
37
|
trace_id = format(span.context.trace_id, "032x")
|
|
@@ -37,9 +39,6 @@ class LocalDebugSpanProcessor(SpanProcessor):
|
|
|
37
39
|
parent_id = self._get_parent_span_id(span)
|
|
38
40
|
|
|
39
41
|
self._writer.on_span_start(trace_id, span_id, parent_id, span.name)
|
|
40
|
-
except Exception:
|
|
41
|
-
# Never fail the actual span - debug tracing should be transparent
|
|
42
|
-
pass
|
|
43
42
|
|
|
44
43
|
def on_end(self, span: ReadableSpan) -> None:
|
|
45
44
|
"""Handle span end - queue full span data for background write.
|
|
@@ -47,7 +46,7 @@ class LocalDebugSpanProcessor(SpanProcessor):
|
|
|
47
46
|
All data (input, output, attributes, events) is captured here because
|
|
48
47
|
Laminar sets these attributes after span start.
|
|
49
48
|
"""
|
|
50
|
-
|
|
49
|
+
with contextlib.suppress(Exception):
|
|
51
50
|
if span.context is None or span.start_time is None or span.end_time is None:
|
|
52
51
|
return
|
|
53
52
|
job = WriteJob(
|
|
@@ -63,19 +62,18 @@ class LocalDebugSpanProcessor(SpanProcessor):
|
|
|
63
62
|
end_time_ns=span.end_time,
|
|
64
63
|
)
|
|
65
64
|
self._writer.on_span_end(job)
|
|
66
|
-
except Exception:
|
|
67
|
-
# Never fail the actual span
|
|
68
|
-
pass
|
|
69
65
|
|
|
70
66
|
def shutdown(self) -> None:
|
|
71
67
|
"""Shutdown the processor and writer."""
|
|
72
68
|
self._writer.shutdown()
|
|
73
69
|
|
|
74
|
-
def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
70
|
+
def force_flush(self, timeout_millis: int = 30000) -> bool: # noqa: PLR6301
|
|
75
71
|
"""Force flush is not needed for this processor."""
|
|
72
|
+
_ = timeout_millis
|
|
76
73
|
return True
|
|
77
74
|
|
|
78
|
-
|
|
75
|
+
@staticmethod
|
|
76
|
+
def _get_parent_span_id(span: Span) -> str | None:
|
|
79
77
|
"""Extract parent span ID from a writable Span."""
|
|
80
78
|
if hasattr(span, "parent") and span.parent:
|
|
81
79
|
parent_ctx = span.parent
|
|
@@ -83,17 +81,18 @@ class LocalDebugSpanProcessor(SpanProcessor):
|
|
|
83
81
|
return format(parent_ctx.span_id, "016x")
|
|
84
82
|
return None
|
|
85
83
|
|
|
86
|
-
|
|
84
|
+
@staticmethod
|
|
85
|
+
def _get_parent_span_id_from_readable(span: ReadableSpan) -> str | None:
|
|
87
86
|
"""Extract parent span ID from a ReadableSpan."""
|
|
88
|
-
if span.parent:
|
|
89
|
-
|
|
90
|
-
return format(span.parent.span_id, "016x")
|
|
87
|
+
if span.parent and hasattr(span.parent, "span_id") and span.parent.span_id:
|
|
88
|
+
return format(span.parent.span_id, "016x")
|
|
91
89
|
return None
|
|
92
90
|
|
|
93
|
-
|
|
91
|
+
@staticmethod
|
|
92
|
+
def _get_status_code(span: ReadableSpan) -> str:
|
|
94
93
|
"""Get status code as string."""
|
|
95
94
|
if span.status.status_code == StatusCode.OK:
|
|
96
95
|
return "OK"
|
|
97
|
-
|
|
96
|
+
if span.status.status_code == StatusCode.ERROR:
|
|
98
97
|
return "ERROR"
|
|
99
98
|
return "UNSET"
|