docent-python 0.1.35a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. docent_python-0.1.35a0/.gitignore +200 -0
  2. docent_python-0.1.35a0/LICENSE.md +13 -0
  3. docent_python-0.1.35a0/PKG-INFO +33 -0
  4. docent_python-0.1.35a0/README.md +21 -0
  5. docent_python-0.1.35a0/docent/__init__.py +4 -0
  6. docent_python-0.1.35a0/docent/_llm_util/__init__.py +0 -0
  7. docent_python-0.1.35a0/docent/_llm_util/data_models/__init__.py +0 -0
  8. docent_python-0.1.35a0/docent/_llm_util/data_models/exceptions.py +48 -0
  9. docent_python-0.1.35a0/docent/_llm_util/data_models/llm_output.py +331 -0
  10. docent_python-0.1.35a0/docent/_llm_util/llm_cache.py +193 -0
  11. docent_python-0.1.35a0/docent/_llm_util/llm_svc.py +472 -0
  12. docent_python-0.1.35a0/docent/_llm_util/model_registry.py +130 -0
  13. docent_python-0.1.35a0/docent/_llm_util/providers/__init__.py +0 -0
  14. docent_python-0.1.35a0/docent/_llm_util/providers/anthropic.py +537 -0
  15. docent_python-0.1.35a0/docent/_llm_util/providers/common.py +41 -0
  16. docent_python-0.1.35a0/docent/_llm_util/providers/google.py +530 -0
  17. docent_python-0.1.35a0/docent/_llm_util/providers/openai.py +745 -0
  18. docent_python-0.1.35a0/docent/_llm_util/providers/openrouter.py +375 -0
  19. docent_python-0.1.35a0/docent/_llm_util/providers/preference_types.py +104 -0
  20. docent_python-0.1.35a0/docent/_llm_util/providers/provider_registry.py +164 -0
  21. docent_python-0.1.35a0/docent/_log_util/__init__.py +3 -0
  22. docent_python-0.1.35a0/docent/_log_util/logger.py +141 -0
  23. docent_python-0.1.35a0/docent/data_models/__init__.py +16 -0
  24. docent_python-0.1.35a0/docent/data_models/_tiktoken_util.py +91 -0
  25. docent_python-0.1.35a0/docent/data_models/agent_run.py +456 -0
  26. docent_python-0.1.35a0/docent/data_models/chat/__init__.py +31 -0
  27. docent_python-0.1.35a0/docent/data_models/chat/content.py +56 -0
  28. docent_python-0.1.35a0/docent/data_models/chat/message.py +132 -0
  29. docent_python-0.1.35a0/docent/data_models/chat/tool.py +109 -0
  30. docent_python-0.1.35a0/docent/data_models/citation.py +233 -0
  31. docent_python-0.1.35a0/docent/data_models/collection.py +23 -0
  32. docent_python-0.1.35a0/docent/data_models/judge.py +19 -0
  33. docent_python-0.1.35a0/docent/data_models/metadata_util.py +16 -0
  34. docent_python-0.1.35a0/docent/data_models/regex.py +56 -0
  35. docent_python-0.1.35a0/docent/data_models/remove_invalid_citation_ranges.py +176 -0
  36. docent_python-0.1.35a0/docent/data_models/shared_types.py +10 -0
  37. docent_python-0.1.35a0/docent/data_models/transcript.py +465 -0
  38. docent_python-0.1.35a0/docent/data_models/util.py +170 -0
  39. docent_python-0.1.35a0/docent/judges/__init__.py +23 -0
  40. docent_python-0.1.35a0/docent/judges/analysis.py +77 -0
  41. docent_python-0.1.35a0/docent/judges/impl.py +587 -0
  42. docent_python-0.1.35a0/docent/judges/runner.py +129 -0
  43. docent_python-0.1.35a0/docent/judges/stats.py +205 -0
  44. docent_python-0.1.35a0/docent/judges/types.py +311 -0
  45. docent_python-0.1.35a0/docent/judges/util/forgiving_json.py +108 -0
  46. docent_python-0.1.35a0/docent/judges/util/meta_schema.json +86 -0
  47. docent_python-0.1.35a0/docent/judges/util/meta_schema.py +29 -0
  48. docent_python-0.1.35a0/docent/judges/util/parse_output.py +87 -0
  49. docent_python-0.1.35a0/docent/judges/util/voting.py +139 -0
  50. docent_python-0.1.35a0/docent/loaders/load_inspect.py +215 -0
  51. docent_python-0.1.35a0/docent/py.typed +0 -0
  52. docent_python-0.1.35a0/docent/samples/__init__.py +3 -0
  53. docent_python-0.1.35a0/docent/samples/load.py +9 -0
  54. docent_python-0.1.35a0/docent/samples/log.eval +0 -0
  55. docent_python-0.1.35a0/docent/samples/tb_airline.json +1 -0
  56. docent_python-0.1.35a0/docent/sdk/__init__.py +0 -0
  57. docent_python-0.1.35a0/docent/sdk/agent_run_writer.py +317 -0
  58. docent_python-0.1.35a0/docent/sdk/client.py +751 -0
  59. docent_python-0.1.35a0/docent/trace.py +2752 -0
  60. docent_python-0.1.35a0/docent/trace_temp.py +1086 -0
  61. docent_python-0.1.35a0/pyproject.toml +54 -0
  62. docent_python-0.1.35a0/uv.lock +2541 -0
@@ -0,0 +1,200 @@
1
+ **/*_gitignore.*
2
+ **/*_gitignore/
3
+ *.db
4
+ .stignore
5
+ *syncthing*
6
+ .DS_Store
7
+ # *.sql (neil: disabled for ursid)
8
+ *.gz
9
+ *.tgz
10
+
11
+ *.tfstate
12
+ *.tfstate.backup
13
+ */.terraform/
14
+ */*.terraform.*
15
+
16
+ .idea/
17
+
18
+ # Byte-compiled / optimized / DLL files
19
+ __pycache__/
20
+ *.py[cod]
21
+ *$py.class
22
+
23
+ # C extensions
24
+ *.so
25
+
26
+ # Distribution / packaging
27
+ .Python
28
+ build/
29
+ develop-eggs/
30
+ dist/
31
+ downloads/
32
+ eggs/
33
+ .eggs/
34
+ parts/
35
+ sdist/
36
+ var/
37
+ wheels/
38
+ share/python-wheels/
39
+ *.egg-info/
40
+ .installed.cfg
41
+ *.egg
42
+ MANIFEST
43
+
44
+ # PyInstaller
45
+ # Usually these files are written by a python script from a template
46
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
47
+ *.manifest
48
+ *.spec
49
+
50
+ # Installer logs
51
+ pip-log.txt
52
+ pip-delete-this-directory.txt
53
+
54
+ # Unit test / coverage reports
55
+ htmlcov/
56
+ .tox/
57
+ .nox/
58
+ .coverage
59
+ .coverage.*
60
+ .cache
61
+ nosetests.xml
62
+ coverage.xml
63
+ *.cover
64
+ *.py,cover
65
+ .hypothesis/
66
+ .pytest_cache/
67
+ cover/
68
+
69
+ # Translations
70
+ *.mo
71
+ *.pot
72
+
73
+ # Django stuff:
74
+ *.log
75
+ local_settings.py
76
+ db.sqlite3
77
+ db.sqlite3-journal
78
+
79
+ # Flask stuff:
80
+ instance/
81
+ .webassets-cache
82
+
83
+ # Scrapy stuff:
84
+ .scrapy
85
+
86
+ # Sphinx documentation
87
+ docs/_build/
88
+
89
+ # PyBuilder
90
+ .pybuilder/
91
+ target/
92
+
93
+ # Jupyter Notebook
94
+ .ipynb_checkpoints
95
+
96
+ # IPython
97
+ profile_default/
98
+ ipython_config.py
99
+
100
+ # pyenv
101
+ # For a library or package, you might want to ignore these files since the code is
102
+ # intended to run in multiple environments; otherwise, check them in:
103
+ .python-version
104
+
105
+ # pipenv
106
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
107
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
108
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
109
+ # install all needed dependencies.
110
+ #Pipfile.lock
111
+
112
+ # poetry
113
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
114
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
115
+ # commonly ignored for libraries.
116
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
117
+ #poetry.lock
118
+
119
+ # pdm
120
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
121
+ #pdm.lock
122
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
123
+ # in version control.
124
+ # https://pdm.fming.dev/#use-with-ide
125
+ .pdm.toml
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .env.*
140
+ !.env.template
141
+ .venv
142
+ env/
143
+ venv/
144
+ ENV/
145
+ env.bak/
146
+ venv.bak/
147
+
148
+ # Spyder project settings
149
+ .spyderproject
150
+ .spyproject
151
+
152
+ # Rope project settings
153
+ .ropeproject
154
+
155
+ # mkdocs documentation
156
+ /site
157
+
158
+ # mypy
159
+ .mypy_cache/
160
+ .dmypy.json
161
+ dmypy.json
162
+
163
+ # Pyre type checker
164
+ .pyre/
165
+
166
+ # pytype static type analyzer
167
+ .pytype/
168
+
169
+ # Cython debug symbols
170
+ cython_debug/
171
+
172
+ # PyCharm
173
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
174
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
175
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
176
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
177
+ #.idea/
178
+
179
+ # wandb
180
+ **/wandb/
181
+
182
+ # Marimo notebook outputs
183
+ **/__marimo__/
184
+
185
+ # yarn
186
+ **/.yarn/
187
+ **/.pnp.*
188
+
189
+ # data
190
+ *.npy
191
+ *.csv
192
+ *.pkl
193
+
194
+ # personal
195
+ inspect_evals
196
+
197
+ *.swp
198
+
199
+ # test data cache
200
+ data/cache
@@ -0,0 +1,13 @@
1
+ Copyright 2025 Clarity AI Research Inc., dba Transluce
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,33 @@
1
+ Metadata-Version: 2.4
2
+ Name: docent-python
3
+ Version: 0.1.35a0
4
+ Summary: Docent SDK
5
+ Project-URL: Homepage, https://github.com/TransluceAI/docent
6
+ Project-URL: Issues, https://github.com/TransluceAI/docent/issues
7
+ Project-URL: Docs, https://transluce-docent.readthedocs-hosted.com/en/latest
8
+ Author-email: Transluce <info@transluce.org>
9
+ License-Expression: Apache-2.0
10
+ License-File: LICENSE.md
11
+ Requires-Python: >=3.11
12
+ Requires-Dist: anthropic>=0.47.0
13
+ Requires-Dist: backoff>=2.2.1
14
+ Requires-Dist: google-genai>=1.16.1
15
+ Requires-Dist: inspect-ai>=0.3.132
16
+ Requires-Dist: jsonschema>=4.24.0
17
+ Requires-Dist: openai>=1.68.0
18
+ Requires-Dist: opentelemetry-api>=1.34.1
19
+ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.34.1
20
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
21
+ Requires-Dist: opentelemetry-instrumentation-anthropic>=0.40.14
22
+ Requires-Dist: opentelemetry-instrumentation-bedrock>=0.40.14
23
+ Requires-Dist: opentelemetry-instrumentation-google-generativeai>=0.40.14
24
+ Requires-Dist: opentelemetry-instrumentation-langchain>=0.40.14
25
+ Requires-Dist: opentelemetry-instrumentation-openai>=0.40.14
26
+ Requires-Dist: opentelemetry-instrumentation-threading>=0.55b1
27
+ Requires-Dist: opentelemetry-sdk>=1.34.1
28
+ Requires-Dist: orjson>=3.11.3
29
+ Requires-Dist: pandas>=2.3.3
30
+ Requires-Dist: pydantic>=2.11.7
31
+ Requires-Dist: pyyaml>=6.0.2
32
+ Requires-Dist: tiktoken>=0.7.0
33
+ Requires-Dist: tqdm>=4.67.1
@@ -0,0 +1,21 @@
1
+ !!! note
2
+ Docent remains in alpha. The API is subject to change.
3
+
4
+ # Docent Python SDK
5
+
6
+ The official Python SDK for [Docent](https://github.com/TransluceAI/docent) - a platform for analyzing and visualizing AI agent execution traces.
7
+
8
+ ## Overview
9
+
10
+ Docent helps you understand AI agent behavior by providing tools to collect, analyze, and visualize agent execution data. This SDK allows you to programmatically interact with the Docent platform to:
11
+
12
+ - Create and manage collections of agent runs
13
+ - Upload agent execution traces and transcripts
14
+ - Define custom dimensions and filters
15
+ - Perform searches and analyses on agent behavior
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install docent-python
21
+ ```
@@ -0,0 +1,4 @@
1
+ __all__ = ["Docent", "init"]
2
+
3
+ from docent.sdk.agent_run_writer import init
4
+ from docent.sdk.client import Docent
File without changes
@@ -0,0 +1,48 @@
1
+ class LLMException(Exception):
2
+ error_type_id = "other"
3
+ user_message = "The model failed to respond. Please try again later."
4
+
5
+
6
+ class CompletionTooLongException(LLMException):
7
+ error_type_id = "completion_too_long"
8
+ user_message = "Completion too long."
9
+
10
+
11
+ class RateLimitException(LLMException):
12
+ error_type_id = "rate_limit"
13
+ user_message = "Rate limited by the model provider. Please wait and try again."
14
+
15
+
16
+ class ContextWindowException(LLMException):
17
+ error_type_id = "context_window"
18
+ user_message = "Context window exceeded."
19
+
20
+
21
+ class NoResponseException(LLMException):
22
+ error_type_id = "no_response"
23
+ user_message = "The model returned an empty response. Please try again later."
24
+
25
+
26
+ class DocentUsageLimitException(LLMException):
27
+ error_type_id = "docent_usage_limit"
28
+ user_message = "Free daily usage limit reached. Add your own API key in settings or contact us for increased limits."
29
+
30
+
31
+ class ValidationFailedException(LLMException):
32
+ error_type_id = "validation_failed"
33
+ user_message = "The model returned invalid output that failed validation."
34
+
35
+ def __init__(self, message: str = "", failed_output: str | None = None):
36
+ super().__init__(message)
37
+ self.failed_output = failed_output
38
+
39
+
40
+ LLM_ERROR_TYPES: list[type[LLMException]] = [
41
+ LLMException,
42
+ CompletionTooLongException,
43
+ RateLimitException,
44
+ ContextWindowException,
45
+ NoResponseException,
46
+ DocentUsageLimitException,
47
+ ValidationFailedException,
48
+ ]
@@ -0,0 +1,331 @@
1
+ import json
2
+ from dataclasses import dataclass, field
3
+ from typing import Any, Literal, Protocol, cast
4
+
5
+ from openai.types.chat.chat_completion_token_logprob import TopLogprob
6
+ from pydantic import BaseModel
7
+
8
+ from docent._llm_util.data_models.exceptions import (
9
+ LLM_ERROR_TYPES,
10
+ CompletionTooLongException,
11
+ ContextWindowException,
12
+ LLMException,
13
+ )
14
+ from docent._log_util import get_logger
15
+ from docent.data_models.chat import ToolCall
16
+
17
+ logger = get_logger(__name__)
18
+
19
+ FinishReasonType = Literal[
20
+ "error",
21
+ "stop",
22
+ "length",
23
+ "tool_calls",
24
+ "content_filter",
25
+ "function_call",
26
+ "streaming",
27
+ "refusal",
28
+ ]
29
+ """Possible reasons for an LLM completion to finish."""
30
+
31
+
32
+ TokenType = Literal["input", "output", "cache_read", "cache_write"]
33
+
34
+
35
+ class UsageMetrics:
36
+ _usage: dict[TokenType, int]
37
+
38
+ def __init__(self, **kwargs: int | None):
39
+ filtered_kwargs = {k: v for k, v in kwargs.items() if v is not None}
40
+ self._usage = cast(dict[TokenType, int], filtered_kwargs)
41
+
42
+ def __getitem__(self, key: TokenType) -> int:
43
+ return self._usage.get(key, 0)
44
+
45
+ def __setitem__(self, key: TokenType, value: int):
46
+ self._usage[key] = value
47
+
48
+ def to_dict(self) -> dict[TokenType, int]:
49
+ # Filter out 0 values to avoid cluttering the database
50
+ return {k: v for k, v in self._usage.items() if v != 0}
51
+
52
+ @property
53
+ def total_tokens(self) -> int:
54
+ return self["input"] + self["output"]
55
+
56
+
57
+ class LLMCompletion(BaseModel):
58
+ """A single completion from an LLM.
59
+
60
+ Attributes:
61
+ text: The generated text content.
62
+ tool_calls: List of tool calls made during the completion.
63
+ finish_reason: Reason why the completion finished.
64
+ top_logprobs: Probability distribution for top token choices.
65
+ """
66
+
67
+ text: str | None = None
68
+ tool_calls: list[ToolCall] | None = None
69
+ finish_reason: FinishReasonType | None = None
70
+ top_logprobs: list[list[TopLogprob]] | None = None
71
+ reasoning_tokens: str | None = None
72
+
73
+ @property
74
+ def no_text(self) -> bool:
75
+ """Check if the completion has no text.
76
+
77
+ Returns:
78
+ bool: True if text is None or empty, False otherwise.
79
+ """
80
+ return self.text is None or len(self.text) == 0
81
+
82
+
83
+ @dataclass
84
+ class LLMOutput:
85
+ """Container for LLM output, potentially with multiple completions.
86
+
87
+ Aggregates completions from an LLM along with metadata and error information.
88
+
89
+ Attributes:
90
+ model: The name/identifier of the model used.
91
+ completions: List of individual completions.
92
+ errors: List of error types encountered during generation.
93
+ """
94
+
95
+ model: str
96
+ completions: list[LLMCompletion]
97
+ errors: list[LLMException] = field(default_factory=list)
98
+ usage: UsageMetrics = field(default_factory=UsageMetrics)
99
+ from_cache: bool = False
100
+ duration: float | None = None
101
+
102
+ @property
103
+ def non_empty(self) -> bool:
104
+ """Check if there are any completions.
105
+
106
+ Returns:
107
+ bool: True if there's at least one completion, False otherwise.
108
+ """
109
+ return len(self.completions) > 0
110
+
111
+ @property
112
+ def first(self) -> LLMCompletion | None:
113
+ """Get the first completion if available.
114
+
115
+ Returns:
116
+ LLMCompletion | None: The first completion or None if no completions exist.
117
+ """
118
+ return self.completions[0] if self.non_empty else None
119
+
120
+ @property
121
+ def first_text(self) -> str | None:
122
+ """Get the text of the first completion if available.
123
+
124
+ Returns:
125
+ str | None: The text of the first completion or None if no completion exists.
126
+ """
127
+ return self.first.text if self.first else None
128
+
129
+ @property
130
+ def did_error(self) -> bool:
131
+ """Check if any errors occurred during generation.
132
+
133
+ Returns:
134
+ bool: True if there were errors, False otherwise.
135
+ """
136
+ return bool(self.errors)
137
+
138
+ def to_dict(self) -> dict[str, Any]:
139
+ return {
140
+ "model": self.model,
141
+ "completions": [comp.model_dump() for comp in self.completions],
142
+ "errors": [e.error_type_id for e in self.errors],
143
+ "usage": self.usage.to_dict(),
144
+ "from_cache": self.from_cache,
145
+ "duration": self.duration,
146
+ }
147
+
148
+ @classmethod
149
+ def from_dict(cls, data: dict[str, Any]) -> "LLMOutput":
150
+ error_type_map = {e.error_type_id: e for e in LLM_ERROR_TYPES}
151
+ errors = data.get("errors", [])
152
+ error_types_to_not_log: list[str] = [
153
+ CompletionTooLongException.error_type_id,
154
+ ContextWindowException.error_type_id,
155
+ ]
156
+ errors_to_log = [e for e in errors if e not in error_types_to_not_log]
157
+ if errors_to_log:
158
+ logger.error(f"Loading LLM output with errors: {errors}")
159
+ errors = [error_type_map.get(e, LLMException)() for e in errors]
160
+
161
+ completions = data.get("completions", [])
162
+ completions = [LLMCompletion.model_validate(comp) for comp in completions]
163
+
164
+ usage: dict[TokenType, int] = {}
165
+ if data_usage := data.get("usage"):
166
+ usage = cast(dict[TokenType, int], data_usage)
167
+
168
+ return cls(
169
+ model=data["model"],
170
+ completions=completions,
171
+ errors=errors,
172
+ usage=UsageMetrics(**usage),
173
+ from_cache=bool(data.get("from_cache", False)),
174
+ duration=data.get("duration"),
175
+ )
176
+
177
+
178
+ @dataclass
179
+ class ToolCallPartial:
180
+ """Partial representation of a tool call before full processing.
181
+
182
+ Used as an intermediate format before finalizing into a complete ToolCall.
183
+
184
+ Args:
185
+ id: The identifier for the tool call.
186
+ function: The name of the function to call.
187
+ arguments_raw: Raw JSON string of arguments for the function.
188
+ type: The type of the tool call, always "function".
189
+ """
190
+
191
+ id: str | None
192
+ function: str | None
193
+ arguments_raw: str | None
194
+ type: Literal["function"]
195
+
196
+
197
+ class LLMCompletionPartial(LLMCompletion):
198
+ """Partial representation of an LLM completion before finalization.
199
+
200
+ Extends LLMCompletion but with tool_calls being a list of ToolCallPartial.
201
+ This is used during the processing stage before tool calls are fully parsed.
202
+
203
+ Attributes:
204
+ tool_calls: List of partial tool call representations.
205
+ """
206
+
207
+ tool_calls: list[ToolCallPartial | None] | None = None # type: ignore
208
+
209
+
210
+ class LLMOutputPartial(LLMOutput):
211
+ """Partial representation of LLM output before finalization.
212
+
213
+ Extends LLMOutput but with completions being a list of LLMCompletionPartial.
214
+ Used as an intermediate format during processing.
215
+
216
+ Attributes:
217
+ completions: List of partial completions.
218
+ """
219
+
220
+ completions: list[LLMCompletionPartial] # type: ignore
221
+
222
+
223
+ def finalize_llm_output_partial(partial: LLMOutputPartial) -> LLMOutput:
224
+ """Convert a partial LLM output into a finalized LLM output.
225
+
226
+ Processes tool calls by parsing their arguments from raw JSON strings,
227
+ handles errors in JSON parsing, and provides warnings for truncated completions.
228
+
229
+ Args:
230
+ partial: The partial LLM output to finalize.
231
+
232
+ Returns:
233
+ LLMOutput: The finalized LLM output with processed tool calls.
234
+
235
+ Raises:
236
+ CompletionTooLongException: If the completion was truncated due to length
237
+ and resulted in empty text.
238
+ ValueError: If tool call ID or function is missing in the partial data.
239
+ """
240
+
241
+ def _parse_tool_call(tc_partial: ToolCallPartial):
242
+ if tc_partial.id is None:
243
+ raise ValueError("Tool call ID not found in partial; check for parsing errors")
244
+ if tc_partial.function is None:
245
+ raise ValueError("Tool call function not found in partial; check for parsing errors")
246
+
247
+ arguments: dict[str, Any] = {}
248
+ # Attempt to load arguments into JSON
249
+ try:
250
+ arguments = json.loads(tc_partial.arguments_raw or "{}")
251
+ parse_error = None
252
+ # If the tool call arguments are not valid JSON, return an empty dict with the error
253
+ except Exception as e:
254
+ arguments = {"__parse_error_raw_args": tc_partial.arguments_raw}
255
+ parse_error = f"Couldn't parse tool call arguments as JSON: {e}. Original input: {tc_partial.arguments_raw}"
256
+
257
+ return ToolCall(
258
+ id=tc_partial.id,
259
+ function=tc_partial.function,
260
+ arguments=arguments,
261
+ parse_error=parse_error,
262
+ type=tc_partial.type,
263
+ )
264
+
265
+ output = LLMOutput(
266
+ model=partial.model,
267
+ completions=[
268
+ LLMCompletion(
269
+ text=c.text,
270
+ tool_calls=[_parse_tool_call(tc) for tc in (c.tool_calls or []) if tc is not None],
271
+ finish_reason=c.finish_reason,
272
+ reasoning_tokens=c.reasoning_tokens,
273
+ )
274
+ for c in partial.completions
275
+ ],
276
+ usage=partial.usage,
277
+ from_cache=False,
278
+ )
279
+
280
+ # If the completion is empty and was truncated (likely due to too much reasoning), raise an exception
281
+ if output.first and output.first.finish_reason == "length" and output.first.no_text:
282
+ raise CompletionTooLongException(
283
+ "Completion empty due to truncation. Consider increasing max_new_tokens."
284
+ )
285
+ for c in output.completions:
286
+ if c.finish_reason == "length":
287
+ logger.warning(
288
+ "Completion truncated due to length; consider increasing max_new_tokens."
289
+ )
290
+
291
+ return output
292
+
293
+
294
+ class AsyncLLMOutputStreamingCallback(Protocol):
295
+ """Protocol for asynchronous streaming callbacks with batch index.
296
+
297
+ Defines the expected signature for callbacks that handle streaming output
298
+ with a batch index.
299
+
300
+ Args:
301
+ batch_index: The index of the current batch.
302
+ llm_output: The LLM output for the current batch.
303
+ """
304
+
305
+ async def __call__(
306
+ self,
307
+ batch_index: int,
308
+ llm_output: LLMOutput,
309
+ ) -> None: ...
310
+
311
+
312
+ class AsyncSingleLLMOutputStreamingCallback(Protocol):
313
+ """Protocol for asynchronous streaming callbacks without batch indexing.
314
+
315
+ Defines the expected signature for callbacks that handle streaming output
316
+ without batch indexing.
317
+
318
+ Args:
319
+ llm_output: The LLM output to process.
320
+ """
321
+
322
+ async def __call__(
323
+ self,
324
+ llm_output: LLMOutput,
325
+ ) -> None: ...
326
+
327
+
328
+ class AsyncEmbeddingStreamingCallback(Protocol):
329
+ """Protocol for sending progress updates for embedding generation."""
330
+
331
+ async def __call__(self, progress: int) -> None: ...