ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +70 -144
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +106 -81
- ai_pipeline_core/llm/client.py +267 -158
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Content writing and extraction for trace debugging V3.
|
|
2
2
|
|
|
3
3
|
Uses hash-based artifact storage with automatic deduplication.
|
|
4
|
+
Handles Document attachments by externalizing large binary/text attachments to the artifact store.
|
|
4
5
|
"""
|
|
5
6
|
|
|
6
7
|
import base64
|
|
@@ -10,13 +11,13 @@ import re
|
|
|
10
11
|
from datetime import datetime
|
|
11
12
|
from enum import Enum
|
|
12
13
|
from pathlib import Path
|
|
13
|
-
from typing import Any
|
|
14
|
+
from typing import Any, cast
|
|
14
15
|
from uuid import UUID
|
|
15
16
|
|
|
16
17
|
import yaml
|
|
17
18
|
from pydantic import BaseModel, ConfigDict, SecretStr
|
|
18
19
|
|
|
19
|
-
from .
|
|
20
|
+
from ._config import TraceDebugConfig
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class ContentRef(BaseModel):
|
|
@@ -54,9 +55,7 @@ class ArtifactStore:
|
|
|
54
55
|
return self._known_hashes[content_hash]
|
|
55
56
|
|
|
56
57
|
# Create sharded path: ab/cd/abcdef...1234.txt
|
|
57
|
-
file_path =
|
|
58
|
-
self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}.txt"
|
|
59
|
-
)
|
|
58
|
+
file_path = self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}.txt"
|
|
60
59
|
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
61
60
|
|
|
62
61
|
if not file_path.exists():
|
|
@@ -85,13 +84,12 @@ class ArtifactStore:
|
|
|
85
84
|
"image/png": ".png",
|
|
86
85
|
"image/jpeg": ".jpg",
|
|
87
86
|
"image/gif": ".gif",
|
|
87
|
+
"image/webp": ".webp",
|
|
88
88
|
"application/pdf": ".pdf",
|
|
89
89
|
}
|
|
90
90
|
ext = ext_map.get(mime_type, ".bin")
|
|
91
91
|
|
|
92
|
-
file_path =
|
|
93
|
-
self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}{ext}"
|
|
94
|
-
)
|
|
92
|
+
file_path = self._artifacts_path / content_hash[:2] / content_hash[2:4] / f"{content_hash}{ext}"
|
|
95
93
|
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
96
94
|
|
|
97
95
|
if not file_path.exists():
|
|
@@ -162,9 +160,7 @@ class ContentWriter:
|
|
|
162
160
|
if size > self._config.max_file_bytes:
|
|
163
161
|
# Reduce preview sizes to fit under limit
|
|
164
162
|
structured = self._reduce_previews(structured)
|
|
165
|
-
serialized = yaml.dump(
|
|
166
|
-
structured, default_flow_style=False, allow_unicode=True, sort_keys=False
|
|
167
|
-
)
|
|
163
|
+
serialized = yaml.dump(structured, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
168
164
|
serialized = self._redact(serialized)
|
|
169
165
|
size = len(serialized.encode("utf-8"))
|
|
170
166
|
|
|
@@ -172,9 +168,7 @@ class ContentWriter:
|
|
|
172
168
|
if size > self._config.max_file_bytes:
|
|
173
169
|
serialized = serialized[: self._config.max_file_bytes]
|
|
174
170
|
max_bytes = self._config.max_file_bytes
|
|
175
|
-
serialized +=
|
|
176
|
-
f"\n\n# [TRUNCATED: original {size} bytes exceeded {max_bytes} limit]\n"
|
|
177
|
-
)
|
|
171
|
+
serialized += f"\n\n# [TRUNCATED: original {size} bytes exceeded {max_bytes} limit]\n"
|
|
178
172
|
size = len(serialized.encode("utf-8"))
|
|
179
173
|
|
|
180
174
|
# Write file
|
|
@@ -192,10 +186,9 @@ class ContentWriter:
|
|
|
192
186
|
"""Convert raw content to structured YAML-ready format."""
|
|
193
187
|
if self._is_llm_messages(content):
|
|
194
188
|
return self._structure_llm_messages(content)
|
|
195
|
-
|
|
189
|
+
if self._is_document_list(content):
|
|
196
190
|
return self._structure_documents(content)
|
|
197
|
-
|
|
198
|
-
return self._structure_generic(content)
|
|
191
|
+
return self._structure_generic(content)
|
|
199
192
|
|
|
200
193
|
def _is_llm_messages(self, content: Any) -> bool:
|
|
201
194
|
"""Check if content looks like LLM messages."""
|
|
@@ -203,21 +196,21 @@ class ContentWriter:
|
|
|
203
196
|
return False
|
|
204
197
|
if not content:
|
|
205
198
|
return False
|
|
206
|
-
first = content[0]
|
|
199
|
+
first = cast(Any, content[0])
|
|
207
200
|
if not isinstance(first, dict):
|
|
208
201
|
return False
|
|
209
202
|
return "role" in first and "content" in first
|
|
210
203
|
|
|
211
204
|
def _is_document_list(self, content: Any) -> bool:
|
|
212
|
-
"""Check if content looks like a
|
|
205
|
+
"""Check if content looks like a list of serialized documents."""
|
|
213
206
|
if not isinstance(content, list):
|
|
214
207
|
return False
|
|
215
208
|
if not content:
|
|
216
209
|
return False
|
|
217
|
-
first = content[0]
|
|
210
|
+
first = cast(Any, content[0])
|
|
218
211
|
if not isinstance(first, dict):
|
|
219
212
|
return False
|
|
220
|
-
return "
|
|
213
|
+
return "class_name" in first and "content" in first
|
|
221
214
|
|
|
222
215
|
def _structure_llm_messages(self, messages: list[Any]) -> dict[str, Any]:
|
|
223
216
|
"""Structure LLM messages preserving ALL parts losslessly."""
|
|
@@ -238,16 +231,18 @@ class ContentWriter:
|
|
|
238
231
|
|
|
239
232
|
if isinstance(content, list):
|
|
240
233
|
# Multimodal: preserve each part separately
|
|
241
|
-
|
|
242
|
-
|
|
234
|
+
content_parts = cast(list[Any], content)
|
|
235
|
+
msg_parts: list[dict[str, Any]] = []
|
|
236
|
+
msg_entry["parts"] = msg_parts
|
|
237
|
+
for j, part in enumerate(content_parts):
|
|
243
238
|
structured_part, part_bytes = self._structure_message_part(part, j)
|
|
244
|
-
|
|
239
|
+
msg_parts.append(structured_part)
|
|
245
240
|
part_type = structured_part.get("type", "")
|
|
246
241
|
if part_type == "text":
|
|
247
242
|
total_text_bytes += part_bytes
|
|
248
243
|
elif part_type == "image":
|
|
249
244
|
total_image_bytes += part_bytes
|
|
250
|
-
elif part_type in
|
|
245
|
+
elif part_type in {"tool_use", "tool_result"}:
|
|
251
246
|
total_tool_bytes += part_bytes
|
|
252
247
|
elif isinstance(content, str):
|
|
253
248
|
# Simple text message
|
|
@@ -284,9 +279,7 @@ class ContentWriter:
|
|
|
284
279
|
"size_bytes": total_text_bytes + total_image_bytes + total_tool_bytes,
|
|
285
280
|
}
|
|
286
281
|
|
|
287
|
-
def _structure_message_part(
|
|
288
|
-
self, part: dict[str, Any], sequence: int
|
|
289
|
-
) -> tuple[dict[str, Any], int]:
|
|
282
|
+
def _structure_message_part(self, part: dict[str, Any], sequence: int) -> tuple[dict[str, Any], int]:
|
|
290
283
|
"""Structure a single message part losslessly.
|
|
291
284
|
|
|
292
285
|
Returns:
|
|
@@ -297,13 +290,13 @@ class ContentWriter:
|
|
|
297
290
|
if part_type == "text":
|
|
298
291
|
entry = self._structure_text_element(part.get("text", ""), sequence)
|
|
299
292
|
return entry, entry.get("size_bytes", 0)
|
|
300
|
-
|
|
293
|
+
if part_type == "image_url":
|
|
301
294
|
entry = self._structure_image_openai(part, sequence)
|
|
302
295
|
return entry, entry.get("size_bytes", 0)
|
|
303
|
-
|
|
296
|
+
if part_type == "image":
|
|
304
297
|
entry = self._structure_image_anthropic(part, sequence)
|
|
305
298
|
return entry, entry.get("size_bytes", 0)
|
|
306
|
-
|
|
299
|
+
if part_type == "tool_use":
|
|
307
300
|
input_str = json.dumps(part.get("input", {}))
|
|
308
301
|
size = len(input_str.encode("utf-8"))
|
|
309
302
|
return {
|
|
@@ -313,7 +306,7 @@ class ContentWriter:
|
|
|
313
306
|
"name": part.get("name"),
|
|
314
307
|
"input": self._convert_types(part.get("input")),
|
|
315
308
|
}, size
|
|
316
|
-
|
|
309
|
+
if part_type == "tool_result":
|
|
317
310
|
result_content = part.get("content")
|
|
318
311
|
entry: dict[str, Any] = {
|
|
319
312
|
"type": "tool_result",
|
|
@@ -327,25 +320,26 @@ class ContentWriter:
|
|
|
327
320
|
entry["content"] = text_entry
|
|
328
321
|
size = text_entry.get("size_bytes", 0)
|
|
329
322
|
elif isinstance(result_content, list):
|
|
330
|
-
|
|
331
|
-
|
|
323
|
+
result_parts = cast(list[Any], result_content)
|
|
324
|
+
content_list: list[dict[str, Any]] = []
|
|
325
|
+
entry["content"] = content_list
|
|
326
|
+
for k, p in enumerate(result_parts):
|
|
332
327
|
part_entry, part_size = self._structure_message_part(p, k)
|
|
333
|
-
|
|
328
|
+
content_list.append(part_entry)
|
|
334
329
|
size += part_size
|
|
335
330
|
else:
|
|
336
331
|
entry["content"] = self._convert_types(result_content)
|
|
337
332
|
return entry, size
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
}, size
|
|
333
|
+
# Unknown type — preserve raw data, never drop
|
|
334
|
+
raw = self._convert_types(part)
|
|
335
|
+
raw_str = json.dumps(raw)
|
|
336
|
+
size = len(raw_str.encode("utf-8"))
|
|
337
|
+
return {
|
|
338
|
+
"type": "unknown",
|
|
339
|
+
"sequence": sequence,
|
|
340
|
+
"original_type": part_type,
|
|
341
|
+
"raw_data": raw,
|
|
342
|
+
}, size
|
|
349
343
|
|
|
350
344
|
def _structure_text_element(self, text: str, sequence: int) -> dict[str, Any]:
|
|
351
345
|
"""Structure a text element, optionally externalizing large content."""
|
|
@@ -369,9 +363,7 @@ class ContentWriter:
|
|
|
369
363
|
"mime_type": ref.mime_type,
|
|
370
364
|
"encoding": ref.encoding,
|
|
371
365
|
}
|
|
372
|
-
entry["excerpt"] =
|
|
373
|
-
text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
|
|
374
|
-
)
|
|
366
|
+
entry["excerpt"] = text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
|
|
375
367
|
else:
|
|
376
368
|
# No artifact store — truncate with marker
|
|
377
369
|
entry["content"] = text[: self._config.max_element_bytes]
|
|
@@ -486,20 +478,20 @@ class ContentWriter:
|
|
|
486
478
|
|
|
487
479
|
return entry
|
|
488
480
|
|
|
489
|
-
def _structure_documents(self, docs: list[Any]) -> dict[str, Any]:
|
|
490
|
-
"""Structure document list."""
|
|
481
|
+
def _structure_documents(self, docs: list[Any]) -> dict[str, Any]: # noqa: PLR0914
|
|
482
|
+
"""Structure document list with attachment externalization."""
|
|
491
483
|
doc_entries: list[dict[str, Any]] = []
|
|
492
484
|
|
|
493
485
|
for i, doc in enumerate(docs):
|
|
494
486
|
doc_name = doc.get("name", f"doc_{i}")
|
|
495
|
-
|
|
487
|
+
class_name = doc.get("class_name", "Document")
|
|
496
488
|
content = doc.get("content", "")
|
|
497
489
|
content_encoding = doc.get("content_encoding", "utf-8")
|
|
498
490
|
|
|
499
491
|
doc_entry: dict[str, Any] = {
|
|
500
492
|
"index": i,
|
|
501
493
|
"name": doc_name,
|
|
502
|
-
"
|
|
494
|
+
"class_name": class_name,
|
|
503
495
|
}
|
|
504
496
|
|
|
505
497
|
if content_encoding == "base64":
|
|
@@ -541,12 +533,75 @@ class ContentWriter:
|
|
|
541
533
|
"mime_type": ref.mime_type,
|
|
542
534
|
"encoding": ref.encoding,
|
|
543
535
|
}
|
|
544
|
-
doc_entry["excerpt"] =
|
|
545
|
-
text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
|
|
546
|
-
)
|
|
536
|
+
doc_entry["excerpt"] = text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
|
|
547
537
|
else:
|
|
548
538
|
doc_entry["content"] = text
|
|
549
539
|
|
|
540
|
+
# Structure attachments if present
|
|
541
|
+
raw_attachments = doc.get("attachments")
|
|
542
|
+
if isinstance(raw_attachments, list) and raw_attachments:
|
|
543
|
+
att_entries: list[dict[str, Any]] = []
|
|
544
|
+
attachments_list = cast(list[Any], raw_attachments)
|
|
545
|
+
for j, att in enumerate(attachments_list):
|
|
546
|
+
if not isinstance(att, dict):
|
|
547
|
+
continue
|
|
548
|
+
att_dict = cast(dict[str, Any], att)
|
|
549
|
+
att_name = att_dict.get("name", f"attachment_{j}")
|
|
550
|
+
att_encoding = att_dict.get("content_encoding", "utf-8")
|
|
551
|
+
att_content = att_dict.get("content", "")
|
|
552
|
+
|
|
553
|
+
att_entry: dict[str, Any] = {
|
|
554
|
+
"index": j,
|
|
555
|
+
"name": att_name,
|
|
556
|
+
}
|
|
557
|
+
if att_dict.get("description"):
|
|
558
|
+
att_entry["description"] = att_dict["description"]
|
|
559
|
+
|
|
560
|
+
if att_encoding == "base64":
|
|
561
|
+
try:
|
|
562
|
+
binary_data = base64.b64decode(att_content)
|
|
563
|
+
size = len(binary_data)
|
|
564
|
+
att_entry["size_bytes"] = size
|
|
565
|
+
att_entry["encoding"] = "base64"
|
|
566
|
+
mime_type = att_dict.get("mime_type", "application/octet-stream")
|
|
567
|
+
|
|
568
|
+
if size > self._config.max_element_bytes and self._artifact_store:
|
|
569
|
+
ref = self._artifact_store.store_binary(binary_data, mime_type)
|
|
570
|
+
att_entry["content_ref"] = {
|
|
571
|
+
"hash": ref.hash,
|
|
572
|
+
"path": ref.path,
|
|
573
|
+
"mime_type": ref.mime_type,
|
|
574
|
+
"encoding": ref.encoding,
|
|
575
|
+
}
|
|
576
|
+
att_entry["preview"] = f"[Binary attachment, {size} bytes]"
|
|
577
|
+
else:
|
|
578
|
+
att_entry["content"] = att_content
|
|
579
|
+
except Exception:
|
|
580
|
+
att_entry["content"] = "[binary content - decode failed]"
|
|
581
|
+
att_entry["size_bytes"] = 0
|
|
582
|
+
else:
|
|
583
|
+
text = self._redact(str(att_content))
|
|
584
|
+
text_bytes = len(text.encode("utf-8"))
|
|
585
|
+
att_entry["size_bytes"] = text_bytes
|
|
586
|
+
|
|
587
|
+
if text_bytes > self._config.max_element_bytes and self._artifact_store:
|
|
588
|
+
ref = self._artifact_store.store_text(text)
|
|
589
|
+
excerpt_len = self._config.element_excerpt_bytes
|
|
590
|
+
att_entry["content_ref"] = {
|
|
591
|
+
"hash": ref.hash,
|
|
592
|
+
"path": ref.path,
|
|
593
|
+
"mime_type": ref.mime_type,
|
|
594
|
+
"encoding": ref.encoding,
|
|
595
|
+
}
|
|
596
|
+
att_entry["excerpt"] = text[:excerpt_len] + "\n[TRUNCATED - see artifact for full content]"
|
|
597
|
+
else:
|
|
598
|
+
att_entry["content"] = text
|
|
599
|
+
|
|
600
|
+
att_entries.append(att_entry)
|
|
601
|
+
|
|
602
|
+
doc_entry["attachment_count"] = len(att_entries)
|
|
603
|
+
doc_entry["attachments"] = att_entries
|
|
604
|
+
|
|
550
605
|
doc_entries.append(doc_entry)
|
|
551
606
|
|
|
552
607
|
return {
|
|
@@ -583,11 +638,10 @@ class ContentWriter:
|
|
|
583
638
|
"image_bytes": metadata.get("total_image_bytes", 0),
|
|
584
639
|
"tool_bytes": metadata.get("total_tool_bytes", 0),
|
|
585
640
|
}
|
|
586
|
-
|
|
641
|
+
if "size_bytes" in structured:
|
|
587
642
|
return {"total_bytes": structured["size_bytes"]}
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
return {"total_bytes": len(serialized.encode("utf-8"))}
|
|
643
|
+
serialized = json.dumps(self._convert_types(structured))
|
|
644
|
+
return {"total_bytes": len(serialized.encode("utf-8"))}
|
|
591
645
|
|
|
592
646
|
def _reduce_previews(self, structured: dict[str, Any]) -> dict[str, Any]:
|
|
593
647
|
"""Reduce preview/excerpt sizes to fit file under max_file_bytes."""
|
|
@@ -606,7 +660,7 @@ class ContentWriter:
|
|
|
606
660
|
text = pattern.sub("[REDACTED]", text)
|
|
607
661
|
return text
|
|
608
662
|
|
|
609
|
-
def _convert_types(self, value: Any, seen: set[int] | None = None) -> Any:
|
|
663
|
+
def _convert_types(self, value: Any, seen: set[int] | None = None) -> Any: # noqa: PLR0911
|
|
610
664
|
"""Convert non-serializable types recursively with cycle detection."""
|
|
611
665
|
# Cycle detection
|
|
612
666
|
if seen is None:
|
|
@@ -634,7 +688,7 @@ class ContentWriter:
|
|
|
634
688
|
case Enum():
|
|
635
689
|
return value.value
|
|
636
690
|
case set() | frozenset():
|
|
637
|
-
return sorted(str(x) for x in value)
|
|
691
|
+
return sorted(str(x) for x in cast(set[Any] | frozenset[Any], value))
|
|
638
692
|
case BaseModel():
|
|
639
693
|
try:
|
|
640
694
|
return value.model_dump(mode="json")
|
|
@@ -642,12 +696,14 @@ class ContentWriter:
|
|
|
642
696
|
return str(value)
|
|
643
697
|
case dict():
|
|
644
698
|
seen.add(obj_id)
|
|
645
|
-
|
|
699
|
+
typed_dict = cast(dict[Any, Any], value)
|
|
700
|
+
result = {str(k): self._convert_types(v, seen) for k, v in typed_dict.items()}
|
|
646
701
|
seen.discard(obj_id)
|
|
647
702
|
return result
|
|
648
703
|
case list() | tuple():
|
|
649
704
|
seen.add(obj_id)
|
|
650
|
-
|
|
705
|
+
typed_seq = cast(list[Any] | tuple[Any, ...], value)
|
|
706
|
+
result = [self._convert_types(x, seen) for x in typed_seq]
|
|
651
707
|
seen.discard(obj_id)
|
|
652
708
|
return result
|
|
653
709
|
case _:
|
|
@@ -680,26 +736,29 @@ def reconstruct_span_content(trace_root: Path, span_dir: Path, content_type: str
|
|
|
680
736
|
def _rehydrate(obj: Any, trace_root: Path) -> Any:
|
|
681
737
|
"""Recursively replace content_ref entries with actual content."""
|
|
682
738
|
if isinstance(obj, dict):
|
|
683
|
-
|
|
739
|
+
obj_dict = cast(dict[str, Any], obj)
|
|
740
|
+
if "content_ref" in obj_dict:
|
|
684
741
|
# This is an artifact reference - load the full content
|
|
685
|
-
ref =
|
|
686
|
-
artifact_path = trace_root / ref["path"]
|
|
742
|
+
ref: dict[str, Any] = obj_dict["content_ref"]
|
|
743
|
+
artifact_path: Path = trace_root / ref["path"]
|
|
687
744
|
|
|
745
|
+
full_content: str | bytes
|
|
688
746
|
if ref.get("encoding") == "utf-8":
|
|
689
747
|
full_content = artifact_path.read_text(encoding="utf-8")
|
|
690
748
|
else:
|
|
691
749
|
full_content = artifact_path.read_bytes()
|
|
692
750
|
|
|
693
751
|
# Replace ref with full content
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
del
|
|
697
|
-
if "excerpt" in
|
|
698
|
-
del
|
|
752
|
+
obj_dict = obj_dict.copy()
|
|
753
|
+
obj_dict["content"] = full_content
|
|
754
|
+
del obj_dict["content_ref"]
|
|
755
|
+
if "excerpt" in obj_dict:
|
|
756
|
+
del obj_dict["excerpt"]
|
|
699
757
|
|
|
700
|
-
return {k: _rehydrate(v, trace_root) for k, v in
|
|
758
|
+
return {k: _rehydrate(v, trace_root) for k, v in obj_dict.items()}
|
|
701
759
|
|
|
702
|
-
|
|
703
|
-
|
|
760
|
+
if isinstance(obj, list):
|
|
761
|
+
obj_list = cast(list[Any], obj)
|
|
762
|
+
return [_rehydrate(v, trace_root) for v in obj_list]
|
|
704
763
|
|
|
705
764
|
return obj
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
"""OpenTelemetry SpanProcessor for local trace debugging."""
|
|
2
2
|
|
|
3
|
+
import contextlib
|
|
4
|
+
|
|
3
5
|
from opentelemetry.context import Context
|
|
4
6
|
from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
|
|
5
7
|
from opentelemetry.trace import StatusCode
|
|
6
8
|
|
|
7
|
-
from .
|
|
9
|
+
from ._writer import LocalTraceWriter, WriteJob
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class LocalDebugSpanProcessor(SpanProcessor):
|
|
@@ -29,7 +31,7 @@ class LocalDebugSpanProcessor(SpanProcessor):
|
|
|
29
31
|
Creates the span directory early so we can see "running" spans.
|
|
30
32
|
Input/output data is not available yet - will be captured in on_end().
|
|
31
33
|
"""
|
|
32
|
-
|
|
34
|
+
with contextlib.suppress(Exception):
|
|
33
35
|
if span.context is None:
|
|
34
36
|
return
|
|
35
37
|
trace_id = format(span.context.trace_id, "032x")
|
|
@@ -37,9 +39,6 @@ class LocalDebugSpanProcessor(SpanProcessor):
|
|
|
37
39
|
parent_id = self._get_parent_span_id(span)
|
|
38
40
|
|
|
39
41
|
self._writer.on_span_start(trace_id, span_id, parent_id, span.name)
|
|
40
|
-
except Exception:
|
|
41
|
-
# Never fail the actual span - debug tracing should be transparent
|
|
42
|
-
pass
|
|
43
42
|
|
|
44
43
|
def on_end(self, span: ReadableSpan) -> None:
|
|
45
44
|
"""Handle span end - queue full span data for background write.
|
|
@@ -47,7 +46,7 @@ class LocalDebugSpanProcessor(SpanProcessor):
|
|
|
47
46
|
All data (input, output, attributes, events) is captured here because
|
|
48
47
|
Laminar sets these attributes after span start.
|
|
49
48
|
"""
|
|
50
|
-
|
|
49
|
+
with contextlib.suppress(Exception):
|
|
51
50
|
if span.context is None or span.start_time is None or span.end_time is None:
|
|
52
51
|
return
|
|
53
52
|
job = WriteJob(
|
|
@@ -63,19 +62,18 @@ class LocalDebugSpanProcessor(SpanProcessor):
|
|
|
63
62
|
end_time_ns=span.end_time,
|
|
64
63
|
)
|
|
65
64
|
self._writer.on_span_end(job)
|
|
66
|
-
except Exception:
|
|
67
|
-
# Never fail the actual span
|
|
68
|
-
pass
|
|
69
65
|
|
|
70
66
|
def shutdown(self) -> None:
|
|
71
67
|
"""Shutdown the processor and writer."""
|
|
72
68
|
self._writer.shutdown()
|
|
73
69
|
|
|
74
|
-
def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
70
|
+
def force_flush(self, timeout_millis: int = 30000) -> bool: # noqa: PLR6301
|
|
75
71
|
"""Force flush is not needed for this processor."""
|
|
72
|
+
_ = timeout_millis
|
|
76
73
|
return True
|
|
77
74
|
|
|
78
|
-
|
|
75
|
+
@staticmethod
|
|
76
|
+
def _get_parent_span_id(span: Span) -> str | None:
|
|
79
77
|
"""Extract parent span ID from a writable Span."""
|
|
80
78
|
if hasattr(span, "parent") and span.parent:
|
|
81
79
|
parent_ctx = span.parent
|
|
@@ -83,17 +81,18 @@ class LocalDebugSpanProcessor(SpanProcessor):
|
|
|
83
81
|
return format(parent_ctx.span_id, "016x")
|
|
84
82
|
return None
|
|
85
83
|
|
|
86
|
-
|
|
84
|
+
@staticmethod
|
|
85
|
+
def _get_parent_span_id_from_readable(span: ReadableSpan) -> str | None:
|
|
87
86
|
"""Extract parent span ID from a ReadableSpan."""
|
|
88
|
-
if span.parent:
|
|
89
|
-
|
|
90
|
-
return format(span.parent.span_id, "016x")
|
|
87
|
+
if span.parent and hasattr(span.parent, "span_id") and span.parent.span_id:
|
|
88
|
+
return format(span.parent.span_id, "016x")
|
|
91
89
|
return None
|
|
92
90
|
|
|
93
|
-
|
|
91
|
+
@staticmethod
|
|
92
|
+
def _get_status_code(span: ReadableSpan) -> str:
|
|
94
93
|
"""Get status code as string."""
|
|
95
94
|
if span.status.status_code == StatusCode.OK:
|
|
96
95
|
return "OK"
|
|
97
|
-
|
|
96
|
+
if span.status.status_code == StatusCode.ERROR:
|
|
98
97
|
return "ERROR"
|
|
99
98
|
return "UNSET"
|