chat-cmpl-stream-handler 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chat_cmpl_stream_handler-0.1.0/LICENSE +21 -0
- chat_cmpl_stream_handler-0.1.0/PKG-INFO +198 -0
- chat_cmpl_stream_handler-0.1.0/README.md +176 -0
- chat_cmpl_stream_handler-0.1.0/chat_cmpl_stream_handler/__init__.py +247 -0
- chat_cmpl_stream_handler-0.1.0/chat_cmpl_stream_handler/_patch_stream_tool_call_index.py +61 -0
- chat_cmpl_stream_handler-0.1.0/pyproject.toml +50 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 AllenChou
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chat-cmpl-stream-handler
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Chat Completion Stream Handler
|
|
5
|
+
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Author: Allen Chou
|
|
8
|
+
Author-email: f1470891079@gmail.com
|
|
9
|
+
Requires-Python: >=3.11,<4
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Requires-Dist: openai (>=1)
|
|
17
|
+
Project-URL: Homepage, https://github.com/allen2c/chat-cmpl-stream-handler
|
|
18
|
+
Project-URL: PyPI, https://pypi.org/project/chat-cmpl-stream-handler/
|
|
19
|
+
Project-URL: Repository, https://github.com/allen2c/chat-cmpl-stream-handler
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# chat-cmpl-stream-handler
|
|
23
|
+
|
|
24
|
+
[](https://pypi.org/project/chat-cmpl-stream-handler/)
|
|
25
|
+
[](https://pypi.org/project/chat-cmpl-stream-handler/)
|
|
26
|
+
[](https://opensource.org/licenses/MIT)
|
|
27
|
+
[](https://github.com/allen2c/chat-cmpl-stream-handler/actions/workflows/test.yml)
|
|
28
|
+
[](https://allen2c.github.io/chat-cmpl-stream-handler/)
|
|
29
|
+
|
|
30
|
+
You've reimplemented the tool call loop for the fifth time. So have I. Never again.
|
|
31
|
+
|
|
32
|
+
## Why
|
|
33
|
+
|
|
34
|
+
OpenAI Responses API? Deprecated vibes. Agents SDK? Lovely — until the third breaking change in a month. Chat Completions API? Still here. Still boring. Still working.
|
|
35
|
+
|
|
36
|
+
This library does exactly two things that everyone keeps copy-pasting across projects:
|
|
37
|
+
|
|
38
|
+
1. Stream a chat completion and handle events
|
|
39
|
+
2. Keep looping tool calls until the model is done
|
|
40
|
+
|
|
41
|
+
That's it. No magic. No framework. Just the loop.
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install chat-cmpl-stream-handler
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Quick Start
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
import asyncio
|
|
53
|
+
import json
|
|
54
|
+
from openai import AsyncOpenAI
|
|
55
|
+
from chat_cmpl_stream_handler import ChatCompletionStreamHandler, stream_until_user_input
|
|
56
|
+
|
|
57
|
+
client = AsyncOpenAI(api_key="...")
|
|
58
|
+
|
|
59
|
+
GET_WEATHER_TOOL = {
|
|
60
|
+
"type": "function",
|
|
61
|
+
"function": {
|
|
62
|
+
"name": "get_weather",
|
|
63
|
+
"description": "Get the current weather for a given city.",
|
|
64
|
+
"parameters": {
|
|
65
|
+
"type": "object",
|
|
66
|
+
"properties": {"city": {"type": "string"}},
|
|
67
|
+
"required": ["city"],
|
|
68
|
+
"additionalProperties": False,
|
|
69
|
+
},
|
|
70
|
+
"strict": True,
|
|
71
|
+
},
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
async def get_weather(arguments: str, context) -> str:
|
|
76
|
+
args = json.loads(arguments)
|
|
77
|
+
return f"The weather in {args['city']} is sunny and 25°C."
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
async def main():
|
|
81
|
+
result = await stream_until_user_input(
|
|
82
|
+
messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
|
|
83
|
+
model="gpt-4.1-nano",
|
|
84
|
+
openai_client=client,
|
|
85
|
+
stream_handler=ChatCompletionStreamHandler(),
|
|
86
|
+
tool_invokers={"get_weather": get_weather},
|
|
87
|
+
stream_kwargs={
|
|
88
|
+
"tools": [GET_WEATHER_TOOL],
|
|
89
|
+
"stream_options": {"include_usage": True},
|
|
90
|
+
},
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# user → assistant (tool_calls) → tool → assistant (final answer)
|
|
94
|
+
for msg in result.to_input_list():
|
|
95
|
+
print(msg["role"], "->", msg.get("content", ""))
|
|
96
|
+
|
|
97
|
+
for usage in result.usages:
|
|
98
|
+
print(f"total tokens: {usage.total_tokens}")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
asyncio.run(main())
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Listening to stream events
|
|
105
|
+
|
|
106
|
+
Subclass `ChatCompletionStreamHandler` and override whatever you care about:
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from chat_cmpl_stream_handler import ChatCompletionStreamHandler
|
|
110
|
+
from openai.lib.streaming.chat._events import ContentDeltaEvent, FunctionToolCallArgumentsDoneEvent
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class PrintingHandler(ChatCompletionStreamHandler):
|
|
114
|
+
async def on_content_delta(self, event: ContentDeltaEvent) -> None:
|
|
115
|
+
print(event.delta, end="", flush=True)
|
|
116
|
+
|
|
117
|
+
async def on_tool_calls_function_arguments_done(
|
|
118
|
+
self, event: FunctionToolCallArgumentsDoneEvent
|
|
119
|
+
) -> None:
|
|
120
|
+
print(f"\n[calling] {event.name}({event.arguments})")
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## API Reference
|
|
124
|
+
|
|
125
|
+
### `stream_until_user_input`
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
async def stream_until_user_input(
|
|
129
|
+
messages: Iterable[ChatCompletionMessageParam],
|
|
130
|
+
model: str | ChatModel,
|
|
131
|
+
openai_client: AsyncOpenAI,
|
|
132
|
+
*,
|
|
133
|
+
stream_handler: ChatCompletionStreamHandler[ResponseFormatT],
|
|
134
|
+
tool_invokers: dict[str, ToolInvokerFn] | None = None,
|
|
135
|
+
stream_kwargs: dict[str, Any] | None = None,
|
|
136
|
+
context: Any | None = None,
|
|
137
|
+
max_iterations: int = 10,
|
|
138
|
+
) -> StreamResult
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Streams a completion, executes tool calls, feeds results back, repeats — until the model stops asking for tools. Raises `MaxIterationsReached` if you've somehow ended up in an infinite tool call loop (it happens).
|
|
142
|
+
|
|
143
|
+
| Parameter | Description |
|
|
144
|
+
|------------------|-----------------------------------------------------------------------------------------|
|
|
145
|
+
| `messages` | Initial message list |
|
|
146
|
+
| `model` | Model name |
|
|
147
|
+
| `openai_client` | `AsyncOpenAI` instance |
|
|
148
|
+
| `stream_handler` | Receives stream events |
|
|
149
|
+
| `tool_invokers` | `{"tool_name": async_fn}` — each fn takes `(arguments: str, context)` and returns `str` |
|
|
150
|
+
| `stream_kwargs` | Passed directly to `beta.chat.completions.stream()` (e.g. `tools`, `stream_options`) |
|
|
151
|
+
| `context` | Forwarded to every tool invoker as-is |
|
|
152
|
+
| `max_iterations` | Safety cap. Default: 10 |
|
|
153
|
+
|
|
154
|
+
### `StreamResult`
|
|
155
|
+
|
|
156
|
+
| Attribute / Method | Description |
|
|
157
|
+
|--------------------|-----------------------------------------------------------------------------|
|
|
158
|
+
| `.to_input_list()` | Full message history as a JSON-serializable list, ready for the next round |
|
|
159
|
+
| `.usages` | `list[CompletionUsage]` — one per iteration, so you can watch the bill grow |
|
|
160
|
+
|
|
161
|
+
### `ChatCompletionStreamHandler`
|
|
162
|
+
|
|
163
|
+
All methods are no-ops by default. Override only what you need.
|
|
164
|
+
|
|
165
|
+
| Method | When it fires |
|
|
166
|
+
|-------------------------------------------------|-----------------------------------------|
|
|
167
|
+
| `on_event(event)` | Every event, before more specific hooks |
|
|
168
|
+
| `on_chunk(event)` | Every raw SSE chunk |
|
|
169
|
+
| `on_content_delta(event)` | Each content token |
|
|
170
|
+
| `on_content_done(event)` | Full content string complete |
|
|
171
|
+
| `on_refusal_delta(event)` | Each refusal token |
|
|
172
|
+
| `on_refusal_done(event)` | Full refusal string complete |
|
|
173
|
+
| `on_tool_calls_function_arguments_delta(event)` | Each incremental tool argument fragment |
|
|
174
|
+
| `on_tool_calls_function_arguments_done(event)` | Full tool argument JSON available |
|
|
175
|
+
| `on_logprobs_content_delta(event)` | Each logprobs content token |
|
|
176
|
+
| `on_logprobs_content_done(event)` | All logprobs content tokens done |
|
|
177
|
+
| `on_logprobs_refusal_delta(event)` | Each logprobs refusal token |
|
|
178
|
+
| `on_logprobs_refusal_done(event)` | All logprobs refusal tokens done |
|
|
179
|
+
|
|
180
|
+
## Provider Compatibility
|
|
181
|
+
|
|
182
|
+
Works with any OpenAI-compatible endpoint. Some providers are more compatible than others.
|
|
183
|
+
|
|
184
|
+
### Gemini
|
|
185
|
+
|
|
186
|
+
Gemini's streaming API sends `tool_call_delta.index = None`, which the OpenAI SDK does not appreciate. Apply the included patch once at startup:
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
from chat_cmpl_stream_handler._patch_stream_tool_call_index import apply
|
|
190
|
+
apply() # safe to call multiple times
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Put it at the top of `main.py`, or in `conftest.py` if you're testing. This is opt-in — the library won't silently monkey-patch anything on import.
|
|
194
|
+
|
|
195
|
+
## License
|
|
196
|
+
|
|
197
|
+
MIT
|
|
198
|
+
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# chat-cmpl-stream-handler
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/chat-cmpl-stream-handler/)
|
|
4
|
+
[](https://pypi.org/project/chat-cmpl-stream-handler/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://github.com/allen2c/chat-cmpl-stream-handler/actions/workflows/test.yml)
|
|
7
|
+
[](https://allen2c.github.io/chat-cmpl-stream-handler/)
|
|
8
|
+
|
|
9
|
+
You've reimplemented the tool call loop for the fifth time. So have I. Never again.
|
|
10
|
+
|
|
11
|
+
## Why
|
|
12
|
+
|
|
13
|
+
OpenAI Responses API? Deprecated vibes. Agents SDK? Lovely — until the third breaking change in a month. Chat Completions API? Still here. Still boring. Still working.
|
|
14
|
+
|
|
15
|
+
This library does exactly two things that everyone keeps copy-pasting across projects:
|
|
16
|
+
|
|
17
|
+
1. Stream a chat completion and handle events
|
|
18
|
+
2. Keep looping tool calls until the model is done
|
|
19
|
+
|
|
20
|
+
That's it. No magic. No framework. Just the loop.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install chat-cmpl-stream-handler
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import asyncio
|
|
32
|
+
import json
|
|
33
|
+
from openai import AsyncOpenAI
|
|
34
|
+
from chat_cmpl_stream_handler import ChatCompletionStreamHandler, stream_until_user_input
|
|
35
|
+
|
|
36
|
+
client = AsyncOpenAI(api_key="...")
|
|
37
|
+
|
|
38
|
+
GET_WEATHER_TOOL = {
|
|
39
|
+
"type": "function",
|
|
40
|
+
"function": {
|
|
41
|
+
"name": "get_weather",
|
|
42
|
+
"description": "Get the current weather for a given city.",
|
|
43
|
+
"parameters": {
|
|
44
|
+
"type": "object",
|
|
45
|
+
"properties": {"city": {"type": "string"}},
|
|
46
|
+
"required": ["city"],
|
|
47
|
+
"additionalProperties": False,
|
|
48
|
+
},
|
|
49
|
+
"strict": True,
|
|
50
|
+
},
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def get_weather(arguments: str, context) -> str:
|
|
55
|
+
args = json.loads(arguments)
|
|
56
|
+
return f"The weather in {args['city']} is sunny and 25°C."
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
async def main():
|
|
60
|
+
result = await stream_until_user_input(
|
|
61
|
+
messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
|
|
62
|
+
model="gpt-4.1-nano",
|
|
63
|
+
openai_client=client,
|
|
64
|
+
stream_handler=ChatCompletionStreamHandler(),
|
|
65
|
+
tool_invokers={"get_weather": get_weather},
|
|
66
|
+
stream_kwargs={
|
|
67
|
+
"tools": [GET_WEATHER_TOOL],
|
|
68
|
+
"stream_options": {"include_usage": True},
|
|
69
|
+
},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# user → assistant (tool_calls) → tool → assistant (final answer)
|
|
73
|
+
for msg in result.to_input_list():
|
|
74
|
+
print(msg["role"], "->", msg.get("content", ""))
|
|
75
|
+
|
|
76
|
+
for usage in result.usages:
|
|
77
|
+
print(f"total tokens: {usage.total_tokens}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
asyncio.run(main())
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Listening to stream events
|
|
84
|
+
|
|
85
|
+
Subclass `ChatCompletionStreamHandler` and override whatever you care about:
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from chat_cmpl_stream_handler import ChatCompletionStreamHandler
|
|
89
|
+
from openai.lib.streaming.chat._events import ContentDeltaEvent, FunctionToolCallArgumentsDoneEvent
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class PrintingHandler(ChatCompletionStreamHandler):
|
|
93
|
+
async def on_content_delta(self, event: ContentDeltaEvent) -> None:
|
|
94
|
+
print(event.delta, end="", flush=True)
|
|
95
|
+
|
|
96
|
+
async def on_tool_calls_function_arguments_done(
|
|
97
|
+
self, event: FunctionToolCallArgumentsDoneEvent
|
|
98
|
+
) -> None:
|
|
99
|
+
print(f"\n[calling] {event.name}({event.arguments})")
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## API Reference
|
|
103
|
+
|
|
104
|
+
### `stream_until_user_input`
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
async def stream_until_user_input(
|
|
108
|
+
messages: Iterable[ChatCompletionMessageParam],
|
|
109
|
+
model: str | ChatModel,
|
|
110
|
+
openai_client: AsyncOpenAI,
|
|
111
|
+
*,
|
|
112
|
+
stream_handler: ChatCompletionStreamHandler[ResponseFormatT],
|
|
113
|
+
tool_invokers: dict[str, ToolInvokerFn] | None = None,
|
|
114
|
+
stream_kwargs: dict[str, Any] | None = None,
|
|
115
|
+
context: Any | None = None,
|
|
116
|
+
max_iterations: int = 10,
|
|
117
|
+
) -> StreamResult
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Streams a completion, executes tool calls, feeds results back, repeats — until the model stops asking for tools. Raises `MaxIterationsReached` if you've somehow ended up in an infinite tool call loop (it happens).
|
|
121
|
+
|
|
122
|
+
| Parameter | Description |
|
|
123
|
+
|------------------|-----------------------------------------------------------------------------------------|
|
|
124
|
+
| `messages` | Initial message list |
|
|
125
|
+
| `model` | Model name |
|
|
126
|
+
| `openai_client` | `AsyncOpenAI` instance |
|
|
127
|
+
| `stream_handler` | Receives stream events |
|
|
128
|
+
| `tool_invokers` | `{"tool_name": async_fn}` — each fn takes `(arguments: str, context)` and returns `str` |
|
|
129
|
+
| `stream_kwargs` | Passed directly to `beta.chat.completions.stream()` (e.g. `tools`, `stream_options`) |
|
|
130
|
+
| `context` | Forwarded to every tool invoker as-is |
|
|
131
|
+
| `max_iterations` | Safety cap. Default: 10 |
|
|
132
|
+
|
|
133
|
+
### `StreamResult`
|
|
134
|
+
|
|
135
|
+
| Attribute / Method | Description |
|
|
136
|
+
|--------------------|-----------------------------------------------------------------------------|
|
|
137
|
+
| `.to_input_list()` | Full message history as a JSON-serializable list, ready for the next round |
|
|
138
|
+
| `.usages` | `list[CompletionUsage]` — one per iteration, so you can watch the bill grow |
|
|
139
|
+
|
|
140
|
+
### `ChatCompletionStreamHandler`
|
|
141
|
+
|
|
142
|
+
All methods are no-ops by default. Override only what you need.
|
|
143
|
+
|
|
144
|
+
| Method | When it fires |
|
|
145
|
+
|-------------------------------------------------|-----------------------------------------|
|
|
146
|
+
| `on_event(event)` | Every event, before more specific hooks |
|
|
147
|
+
| `on_chunk(event)` | Every raw SSE chunk |
|
|
148
|
+
| `on_content_delta(event)` | Each content token |
|
|
149
|
+
| `on_content_done(event)` | Full content string complete |
|
|
150
|
+
| `on_refusal_delta(event)` | Each refusal token |
|
|
151
|
+
| `on_refusal_done(event)` | Full refusal string complete |
|
|
152
|
+
| `on_tool_calls_function_arguments_delta(event)` | Each incremental tool argument fragment |
|
|
153
|
+
| `on_tool_calls_function_arguments_done(event)` | Full tool argument JSON available |
|
|
154
|
+
| `on_logprobs_content_delta(event)` | Each logprobs content token |
|
|
155
|
+
| `on_logprobs_content_done(event)` | All logprobs content tokens done |
|
|
156
|
+
| `on_logprobs_refusal_delta(event)` | Each logprobs refusal token |
|
|
157
|
+
| `on_logprobs_refusal_done(event)` | All logprobs refusal tokens done |
|
|
158
|
+
|
|
159
|
+
## Provider Compatibility
|
|
160
|
+
|
|
161
|
+
Works with any OpenAI-compatible endpoint. Some providers are more compatible than others.
|
|
162
|
+
|
|
163
|
+
### Gemini
|
|
164
|
+
|
|
165
|
+
Gemini's streaming API sends `tool_call_delta.index = None`, which the OpenAI SDK does not appreciate. Apply the included patch once at startup:
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from chat_cmpl_stream_handler._patch_stream_tool_call_index import apply
|
|
169
|
+
apply() # safe to call multiple times
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Put it at the top of `main.py`, or in `conftest.py` if you're testing. This is opt-in — the library won't silently monkey-patch anything on import.
|
|
173
|
+
|
|
174
|
+
## License
|
|
175
|
+
|
|
176
|
+
MIT
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import (
|
|
4
|
+
TYPE_CHECKING,
|
|
5
|
+
Any,
|
|
6
|
+
Awaitable,
|
|
7
|
+
Callable,
|
|
8
|
+
Dict,
|
|
9
|
+
Final,
|
|
10
|
+
Generic,
|
|
11
|
+
Iterable,
|
|
12
|
+
List,
|
|
13
|
+
Text,
|
|
14
|
+
Union,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from openai import AsyncOpenAI
|
|
18
|
+
from openai.lib._parsing._completions import ResponseFormatT
|
|
19
|
+
from openai.lib.streaming.chat._events import (
|
|
20
|
+
ChunkEvent,
|
|
21
|
+
ContentDeltaEvent,
|
|
22
|
+
ContentDoneEvent,
|
|
23
|
+
FunctionToolCallArgumentsDeltaEvent,
|
|
24
|
+
FunctionToolCallArgumentsDoneEvent,
|
|
25
|
+
LogprobsContentDeltaEvent,
|
|
26
|
+
LogprobsContentDoneEvent,
|
|
27
|
+
LogprobsRefusalDeltaEvent,
|
|
28
|
+
LogprobsRefusalDoneEvent,
|
|
29
|
+
RefusalDeltaEvent,
|
|
30
|
+
RefusalDoneEvent,
|
|
31
|
+
)
|
|
32
|
+
from openai.types.chat import ChatCompletionMessageParam
|
|
33
|
+
from openai.types.chat.chat_completion_assistant_message_param import (
|
|
34
|
+
ChatCompletionAssistantMessageParam,
|
|
35
|
+
)
|
|
36
|
+
from openai.types.chat.chat_completion_message_function_tool_call_param import (
|
|
37
|
+
ChatCompletionMessageFunctionToolCallParam,
|
|
38
|
+
)
|
|
39
|
+
from openai.types.chat.chat_completion_tool_message_param import (
|
|
40
|
+
ChatCompletionToolMessageParam,
|
|
41
|
+
)
|
|
42
|
+
from openai.types.completion_usage import CompletionUsage
|
|
43
|
+
from openai.types.shared.chat_model import ChatModel
|
|
44
|
+
|
|
45
|
+
if TYPE_CHECKING:
|
|
46
|
+
from openai.lib.streaming.chat._events import ChatCompletionStreamEvent
|
|
47
|
+
|
|
48
|
+
__version__: Final[Text] = "0.1.0"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
logger = logging.getLogger(__name__)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
ToolInvokerFn = Callable[[str, Any], Awaitable[str]]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
async def stream_until_user_input(
|
|
58
|
+
messages: Iterable[ChatCompletionMessageParam],
|
|
59
|
+
model: Union[str, ChatModel],
|
|
60
|
+
openai_client: AsyncOpenAI,
|
|
61
|
+
*,
|
|
62
|
+
stream_handler: "ChatCompletionStreamHandler[ResponseFormatT]",
|
|
63
|
+
tool_invokers: Dict[str, ToolInvokerFn] | None = None,
|
|
64
|
+
stream_kwargs: Dict[Text, Any] | None = None,
|
|
65
|
+
context: Any | None = None,
|
|
66
|
+
max_iterations: int = 10,
|
|
67
|
+
**kwargs,
|
|
68
|
+
) -> "StreamResult":
|
|
69
|
+
current_messages = list(messages)
|
|
70
|
+
usages: List["CompletionUsage"] = []
|
|
71
|
+
|
|
72
|
+
for _ in range(max_iterations):
|
|
73
|
+
# 1. stream the response
|
|
74
|
+
async with openai_client.beta.chat.completions.stream(
|
|
75
|
+
messages=current_messages,
|
|
76
|
+
model=model,
|
|
77
|
+
**{
|
|
78
|
+
k: v
|
|
79
|
+
for k, v in (stream_kwargs or {}).items()
|
|
80
|
+
if k not in ("messages", "model")
|
|
81
|
+
},
|
|
82
|
+
) as stream:
|
|
83
|
+
async for event in stream:
|
|
84
|
+
await stream_handler.handle(event)
|
|
85
|
+
|
|
86
|
+
final = await stream.get_final_completion()
|
|
87
|
+
if final.usage:
|
|
88
|
+
usages.append(
|
|
89
|
+
CompletionUsage.model_validate_json(final.usage.model_dump_json())
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
assistant_msg = final.choices[0].message
|
|
93
|
+
current_messages.append(
|
|
94
|
+
ChatCompletionAssistantMessageParam(
|
|
95
|
+
role="assistant",
|
|
96
|
+
content=assistant_msg.content,
|
|
97
|
+
**(
|
|
98
|
+
{
|
|
99
|
+
"tool_calls": [
|
|
100
|
+
ChatCompletionMessageFunctionToolCallParam(
|
|
101
|
+
id=tc.id,
|
|
102
|
+
type="function",
|
|
103
|
+
function={
|
|
104
|
+
"name": tc.function.name,
|
|
105
|
+
"arguments": tc.function.arguments or "{}",
|
|
106
|
+
},
|
|
107
|
+
)
|
|
108
|
+
for tc in assistant_msg.tool_calls
|
|
109
|
+
]
|
|
110
|
+
}
|
|
111
|
+
if assistant_msg.tool_calls
|
|
112
|
+
else {}
|
|
113
|
+
),
|
|
114
|
+
)
|
|
115
|
+
) # Add assistant message to history
|
|
116
|
+
|
|
117
|
+
# 2. Check if there are tool calls
|
|
118
|
+
if not assistant_msg.tool_calls:
|
|
119
|
+
return StreamResult(current_messages, model, usages=usages) # End
|
|
120
|
+
|
|
121
|
+
# 3. Execute tool calls, and add the results back to messages
|
|
122
|
+
for tool_call in assistant_msg.tool_calls:
|
|
123
|
+
invoker = (tool_invokers or {}).get(tool_call.function.name)
|
|
124
|
+
|
|
125
|
+
if invoker is None:
|
|
126
|
+
raise ValueError(f"No invoker for tool: {tool_call.function.name}")
|
|
127
|
+
|
|
128
|
+
tool_call_output = await invoker(tool_call.function.arguments, context)
|
|
129
|
+
|
|
130
|
+
current_messages.append(
|
|
131
|
+
ChatCompletionToolMessageParam(
|
|
132
|
+
role="tool",
|
|
133
|
+
tool_call_id=tool_call.id,
|
|
134
|
+
content=tool_call_output,
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
raise MaxIterationsReached(
|
|
139
|
+
f"Reached max_iterations={max_iterations} without waiting for user input."
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class StreamResult:
|
|
144
|
+
def __init__(
|
|
145
|
+
self,
|
|
146
|
+
messages: List[ChatCompletionMessageParam],
|
|
147
|
+
model: Union[str, ChatModel],
|
|
148
|
+
usages: List["CompletionUsage"],
|
|
149
|
+
):
|
|
150
|
+
self._messages = messages
|
|
151
|
+
self._model = model
|
|
152
|
+
|
|
153
|
+
self.usages = usages
|
|
154
|
+
|
|
155
|
+
def to_input_list(self) -> List[ChatCompletionMessageParam]:
|
|
156
|
+
return json.loads(json.dumps(self._messages, default=str))
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class ChatCompletionStreamHandler(Generic[ResponseFormatT]):
|
|
160
|
+
async def handle(self, event: "ChatCompletionStreamEvent[ResponseFormatT]") -> None:
|
|
161
|
+
"""Internal dispatcher — routes each stream event to the right hook."""
|
|
162
|
+
await self.on_event(event)
|
|
163
|
+
|
|
164
|
+
if event.type == "chunk":
|
|
165
|
+
await self.on_chunk(event)
|
|
166
|
+
elif event.type == "content.delta":
|
|
167
|
+
await self.on_content_delta(event)
|
|
168
|
+
elif event.type == "content.done":
|
|
169
|
+
await self.on_content_done(event)
|
|
170
|
+
elif event.type == "refusal.delta":
|
|
171
|
+
await self.on_refusal_delta(event)
|
|
172
|
+
elif event.type == "refusal.done":
|
|
173
|
+
await self.on_refusal_done(event)
|
|
174
|
+
elif event.type == "tool_calls.function.arguments.delta":
|
|
175
|
+
await self.on_tool_calls_function_arguments_delta(event)
|
|
176
|
+
elif event.type == "tool_calls.function.arguments.done":
|
|
177
|
+
await self.on_tool_calls_function_arguments_done(event)
|
|
178
|
+
elif event.type == "logprobs.content.delta":
|
|
179
|
+
await self.on_logprobs_content_delta(event)
|
|
180
|
+
elif event.type == "logprobs.content.done":
|
|
181
|
+
await self.on_logprobs_content_done(event)
|
|
182
|
+
elif event.type == "logprobs.refusal.delta":
|
|
183
|
+
await self.on_logprobs_refusal_delta(event)
|
|
184
|
+
elif event.type == "logprobs.refusal.done":
|
|
185
|
+
await self.on_logprobs_refusal_done(event)
|
|
186
|
+
else:
|
|
187
|
+
logger.warning(f"Unknown event type: {event.type}")
|
|
188
|
+
|
|
189
|
+
async def on_event(
|
|
190
|
+
self, event: "ChatCompletionStreamEvent[ResponseFormatT]"
|
|
191
|
+
) -> None:
|
|
192
|
+
"""Called for every stream event before more specific hooks."""
|
|
193
|
+
pass
|
|
194
|
+
|
|
195
|
+
async def on_chunk(self, event: ChunkEvent) -> None:
|
|
196
|
+
"""Called for every raw SSE chunk received from the API."""
|
|
197
|
+
pass
|
|
198
|
+
|
|
199
|
+
async def on_content_delta(self, event: ContentDeltaEvent) -> None:
|
|
200
|
+
"""Called each time a new content token arrives."""
|
|
201
|
+
pass
|
|
202
|
+
|
|
203
|
+
async def on_content_done(self, event: ContentDoneEvent[ResponseFormatT]) -> None:
|
|
204
|
+
"""Called once when the full content string is complete."""
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
async def on_refusal_delta(self, event: RefusalDeltaEvent) -> None:
|
|
208
|
+
"""Called each time a new refusal token arrives."""
|
|
209
|
+
pass
|
|
210
|
+
|
|
211
|
+
async def on_refusal_done(self, event: RefusalDoneEvent) -> None:
|
|
212
|
+
"""Called once when the full refusal string is complete."""
|
|
213
|
+
pass
|
|
214
|
+
|
|
215
|
+
async def on_tool_calls_function_arguments_delta(
|
|
216
|
+
self, event: FunctionToolCallArgumentsDeltaEvent
|
|
217
|
+
) -> None:
|
|
218
|
+
"""Called for each incremental JSON fragment of a tool-call's arguments."""
|
|
219
|
+
pass
|
|
220
|
+
|
|
221
|
+
async def on_tool_calls_function_arguments_done(
|
|
222
|
+
self, event: FunctionToolCallArgumentsDoneEvent
|
|
223
|
+
) -> None:
|
|
224
|
+
"""Called once when a tool call's full argument JSON is available."""
|
|
225
|
+
pass
|
|
226
|
+
|
|
227
|
+
async def on_logprobs_content_delta(self, event: LogprobsContentDeltaEvent) -> None:
|
|
228
|
+
"""Called for each incremental list of content log-probability tokens."""
|
|
229
|
+
pass
|
|
230
|
+
|
|
231
|
+
async def on_logprobs_content_done(self, event: LogprobsContentDoneEvent) -> None:
|
|
232
|
+
"""Called once with the complete list of content log-probability tokens."""
|
|
233
|
+
pass
|
|
234
|
+
|
|
235
|
+
async def on_logprobs_refusal_delta(self, event: LogprobsRefusalDeltaEvent) -> None:
|
|
236
|
+
"""Called for each incremental list of refusal log-probability tokens."""
|
|
237
|
+
pass
|
|
238
|
+
|
|
239
|
+
async def on_logprobs_refusal_done(self, event: LogprobsRefusalDoneEvent) -> None:
|
|
240
|
+
"""Called once with the complete list of refusal log-probability tokens."""
|
|
241
|
+
pass
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
class MaxIterationsReached(Exception):
|
|
245
|
+
"""Raised when stream_until_user_input exceeds the maximum iteration limit."""
|
|
246
|
+
|
|
247
|
+
pass
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Monkey-patch for openai SDK streaming: fix providers that return
|
|
2
|
+
``tool_call_delta.index = None`` (e.g. Gemini OpenAI-compat endpoint).
|
|
3
|
+
|
|
4
|
+
The openai SDK assumes ``tool_call_delta.index`` is always an ``int``,
|
|
5
|
+
but some providers (notably Gemini's OpenAI-compatible API) send
|
|
6
|
+
``None``. This patch normalises the index to its positional order
|
|
7
|
+
before the SDK processes the chunk.
|
|
8
|
+
|
|
9
|
+
Usage — call ``apply()`` once before any streaming request::
|
|
10
|
+
|
|
11
|
+
from chat_cmpl_stream_handler._patch_stream_tool_call_index import apply
|
|
12
|
+
apply()
|
|
13
|
+
|
|
14
|
+
It is safe to call ``apply()`` multiple times; only the first call
|
|
15
|
+
takes effect. The patch must be applied before
|
|
16
|
+
``ChatCompletionStreamState`` instances are created (i.e. before
|
|
17
|
+
``openai_client.beta.chat.completions.stream()`` is called).
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
|
|
24
|
+
from openai.lib.streaming.chat._completions import ChatCompletionStreamState
|
|
25
|
+
from openai.types.chat import ChatCompletionChunk
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
_PATCHED = False
|
|
30
|
+
_original_handle_chunk = ChatCompletionStreamState.handle_chunk
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _fix_none_tool_call_indices(chunk: ChatCompletionChunk) -> None:
|
|
34
|
+
"""Mutate *chunk* in-place so every ``tool_call.index`` is an int.
|
|
35
|
+
|
|
36
|
+
When a provider omits the index (sends ``None``), we fall back to the
|
|
37
|
+
positional order of the tool-call deltas within the choice, which is the
|
|
38
|
+
only sane default.
|
|
39
|
+
"""
|
|
40
|
+
for choice in chunk.choices:
|
|
41
|
+
if not choice.delta.tool_calls:
|
|
42
|
+
continue
|
|
43
|
+
for pos, tc in enumerate(choice.delta.tool_calls):
|
|
44
|
+
if tc.index is None:
|
|
45
|
+
tc.index = pos
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _patched_handle_chunk(self, chunk: ChatCompletionChunk):
|
|
49
|
+
_fix_none_tool_call_indices(chunk)
|
|
50
|
+
return _original_handle_chunk(self, chunk)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def apply() -> None:
|
|
54
|
+
global _PATCHED
|
|
55
|
+
if _PATCHED:
|
|
56
|
+
return
|
|
57
|
+
ChatCompletionStreamState.handle_chunk = _patched_handle_chunk
|
|
58
|
+
_PATCHED = True
|
|
59
|
+
logger.debug(
|
|
60
|
+
"Patched ChatCompletionStreamState.handle_chunk" " for None tool_call index"
|
|
61
|
+
)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
authors = [{ name = "Allen Chou", email = "f1470891079@gmail.com" }]
|
|
3
|
+
dependencies = ["openai (>=1)"]
|
|
4
|
+
description = "Chat Completion Stream Handler"
|
|
5
|
+
license = { text = "MIT" }
|
|
6
|
+
name = "chat-cmpl-stream-handler"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
requires-python = ">=3.11,<4"
|
|
9
|
+
version = "0.1.0"
|
|
10
|
+
|
|
11
|
+
[project.urls]
|
|
12
|
+
Homepage = "https://github.com/allen2c/chat-cmpl-stream-handler"
|
|
13
|
+
"PyPI" = "https://pypi.org/project/chat-cmpl-stream-handler/"
|
|
14
|
+
Repository = "https://github.com/allen2c/chat-cmpl-stream-handler"
|
|
15
|
+
|
|
16
|
+
[tool.poetry]
|
|
17
|
+
packages = [{ include = "chat_cmpl_stream_handler" }]
|
|
18
|
+
|
|
19
|
+
[tool.poetry.extras]
|
|
20
|
+
all = []
|
|
21
|
+
|
|
22
|
+
[tool.poetry.group.dev.dependencies]
|
|
23
|
+
black = { extras = ["jupyter"], version = "*" }
|
|
24
|
+
isort = "*"
|
|
25
|
+
mkdocs-material = "*"
|
|
26
|
+
openai-agents = "*"
|
|
27
|
+
poetry-plugin-export = "*"
|
|
28
|
+
pytest = "*"
|
|
29
|
+
pytest-asyncio = "*"
|
|
30
|
+
pytest-cov = "*"
|
|
31
|
+
pytest-env = "*"
|
|
32
|
+
pytest-xdist = "*"
|
|
33
|
+
rich = "*"
|
|
34
|
+
rich-color-support = "*"
|
|
35
|
+
setuptools = "*"
|
|
36
|
+
twine = "*"
|
|
37
|
+
|
|
38
|
+
[tool.isort]
|
|
39
|
+
profile = "black"
|
|
40
|
+
|
|
41
|
+
[tool.flake8]
|
|
42
|
+
ignore = ["E203", "E704", "W503"]
|
|
43
|
+
max-line-length = 88
|
|
44
|
+
|
|
45
|
+
[tool.pytest.ini_options]
|
|
46
|
+
env = ["ENVIRONMENT=test", "PYTEST_IS_RUNNING=true"]
|
|
47
|
+
|
|
48
|
+
[build-system]
|
|
49
|
+
build-backend = "poetry.core.masonry.api"
|
|
50
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|