chat-cmpl-stream-handler 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 AllenChou
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,198 @@
1
+ Metadata-Version: 2.4
2
+ Name: chat-cmpl-stream-handler
3
+ Version: 0.1.0
4
+ Summary: Chat Completion Stream Handler
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Author: Allen Chou
8
+ Author-email: f1470891079@gmail.com
9
+ Requires-Python: >=3.11,<4
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
16
+ Requires-Dist: openai (>=1)
17
+ Project-URL: Homepage, https://github.com/allen2c/chat-cmpl-stream-handler
18
+ Project-URL: PyPI, https://pypi.org/project/chat-cmpl-stream-handler/
19
+ Project-URL: Repository, https://github.com/allen2c/chat-cmpl-stream-handler
20
+ Description-Content-Type: text/markdown
21
+
22
+ # chat-cmpl-stream-handler
23
+
24
+ [![PyPI version](https://img.shields.io/pypi/v/chat-cmpl-stream-handler.svg)](https://pypi.org/project/chat-cmpl-stream-handler/)
25
+ [![Python Version](https://img.shields.io/pypi/pyversions/chat-cmpl-stream-handler.svg)](https://pypi.org/project/chat-cmpl-stream-handler/)
26
+ [![License](https://img.shields.io/pypi/l/chat-cmpl-stream-handler.svg)](https://opensource.org/licenses/MIT)
27
+ [![Tests](https://github.com/allen2c/chat-cmpl-stream-handler/actions/workflows/test.yml/badge.svg)](https://github.com/allen2c/chat-cmpl-stream-handler/actions/workflows/test.yml)
28
+ [![Docs](https://img.shields.io/badge/docs-github%20pages-blue)](https://allen2c.github.io/chat-cmpl-stream-handler/)
29
+
30
+ You've reimplemented the tool call loop for the fifth time. So have I. Never again.
31
+
32
+ ## Why
33
+
34
+ OpenAI Responses API? Deprecated vibes. Agents SDK? Lovely — until the third breaking change in a month. Chat Completions API? Still here. Still boring. Still working.
35
+
36
+ This library does exactly two things that everyone keeps copy-pasting across projects:
37
+
38
+ 1. Stream a chat completion and handle events
39
+ 2. Keep looping tool calls until the model is done
40
+
41
+ That's it. No magic. No framework. Just the loop.
42
+
43
+ ## Installation
44
+
45
+ ```bash
46
+ pip install chat-cmpl-stream-handler
47
+ ```
48
+
49
+ ## Quick Start
50
+
51
+ ```python
52
+ import asyncio
53
+ import json
54
+ from openai import AsyncOpenAI
55
+ from chat_cmpl_stream_handler import ChatCompletionStreamHandler, stream_until_user_input
56
+
57
+ client = AsyncOpenAI(api_key="...")
58
+
59
+ GET_WEATHER_TOOL = {
60
+ "type": "function",
61
+ "function": {
62
+ "name": "get_weather",
63
+ "description": "Get the current weather for a given city.",
64
+ "parameters": {
65
+ "type": "object",
66
+ "properties": {"city": {"type": "string"}},
67
+ "required": ["city"],
68
+ "additionalProperties": False,
69
+ },
70
+ "strict": True,
71
+ },
72
+ }
73
+
74
+
75
+ async def get_weather(arguments: str, context) -> str:
76
+ args = json.loads(arguments)
77
+ return f"The weather in {args['city']} is sunny and 25°C."
78
+
79
+
80
+ async def main():
81
+ result = await stream_until_user_input(
82
+ messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
83
+ model="gpt-4.1-nano",
84
+ openai_client=client,
85
+ stream_handler=ChatCompletionStreamHandler(),
86
+ tool_invokers={"get_weather": get_weather},
87
+ stream_kwargs={
88
+ "tools": [GET_WEATHER_TOOL],
89
+ "stream_options": {"include_usage": True},
90
+ },
91
+ )
92
+
93
+ # user → assistant (tool_calls) → tool → assistant (final answer)
94
+ for msg in result.to_input_list():
95
+ print(msg["role"], "->", msg.get("content", ""))
96
+
97
+ for usage in result.usages:
98
+ print(f"total tokens: {usage.total_tokens}")
99
+
100
+
101
+ asyncio.run(main())
102
+ ```
103
+
104
+ ### Listening to stream events
105
+
106
+ Subclass `ChatCompletionStreamHandler` and override whatever you care about:
107
+
108
+ ```python
109
+ from chat_cmpl_stream_handler import ChatCompletionStreamHandler
110
+ from openai.lib.streaming.chat._events import ContentDeltaEvent, FunctionToolCallArgumentsDoneEvent
111
+
112
+
113
+ class PrintingHandler(ChatCompletionStreamHandler):
114
+ async def on_content_delta(self, event: ContentDeltaEvent) -> None:
115
+ print(event.delta, end="", flush=True)
116
+
117
+ async def on_tool_calls_function_arguments_done(
118
+ self, event: FunctionToolCallArgumentsDoneEvent
119
+ ) -> None:
120
+ print(f"\n[calling] {event.name}({event.arguments})")
121
+ ```
122
+
123
+ ## API Reference
124
+
125
+ ### `stream_until_user_input`
126
+
127
+ ```python
128
+ async def stream_until_user_input(
129
+ messages: Iterable[ChatCompletionMessageParam],
130
+ model: str | ChatModel,
131
+ openai_client: AsyncOpenAI,
132
+ *,
133
+ stream_handler: ChatCompletionStreamHandler[ResponseFormatT],
134
+ tool_invokers: dict[str, ToolInvokerFn] | None = None,
135
+ stream_kwargs: dict[str, Any] | None = None,
136
+ context: Any | None = None,
137
+ max_iterations: int = 10,
138
+ ) -> StreamResult
139
+ ```
140
+
141
+ Streams a completion, executes tool calls, feeds results back, repeats — until the model stops asking for tools. Raises `MaxIterationsReached` if you've somehow ended up in an infinite tool call loop (it happens).
142
+
143
+ | Parameter | Description |
144
+ |------------------|-----------------------------------------------------------------------------------------|
145
+ | `messages` | Initial message list |
146
+ | `model` | Model name |
147
+ | `openai_client` | `AsyncOpenAI` instance |
148
+ | `stream_handler` | Receives stream events |
149
+ | `tool_invokers` | `{"tool_name": async_fn}` — each fn takes `(arguments: str, context)` and returns `str` |
150
+ | `stream_kwargs` | Passed directly to `beta.chat.completions.stream()` (e.g. `tools`, `stream_options`) |
151
+ | `context` | Forwarded to every tool invoker as-is |
152
+ | `max_iterations` | Safety cap. Default: 10 |
153
+
154
+ ### `StreamResult`
155
+
156
+ | Attribute / Method | Description |
157
+ |--------------------|-----------------------------------------------------------------------------|
158
+ | `.to_input_list()` | Full message history as a JSON-serializable list, ready for the next round |
159
+ | `.usages` | `list[CompletionUsage]` — one per iteration, so you can watch the bill grow |
160
+
161
+ ### `ChatCompletionStreamHandler`
162
+
163
+ All methods are no-ops by default. Override only what you need.
164
+
165
+ | Method | When it fires |
166
+ |-------------------------------------------------|-----------------------------------------|
167
+ | `on_event(event)` | Every event, before more specific hooks |
168
+ | `on_chunk(event)` | Every raw SSE chunk |
169
+ | `on_content_delta(event)` | Each content token |
170
+ | `on_content_done(event)` | Full content string complete |
171
+ | `on_refusal_delta(event)` | Each refusal token |
172
+ | `on_refusal_done(event)` | Full refusal string complete |
173
+ | `on_tool_calls_function_arguments_delta(event)` | Each incremental tool argument fragment |
174
+ | `on_tool_calls_function_arguments_done(event)` | Full tool argument JSON available |
175
+ | `on_logprobs_content_delta(event)` | Each logprobs content token |
176
+ | `on_logprobs_content_done(event)` | All logprobs content tokens done |
177
+ | `on_logprobs_refusal_delta(event)` | Each logprobs refusal token |
178
+ | `on_logprobs_refusal_done(event)` | All logprobs refusal tokens done |
179
+
180
+ ## Provider Compatibility
181
+
182
+ Works with any OpenAI-compatible endpoint. Some providers are more compatible than others.
183
+
184
+ ### Gemini
185
+
186
+ Gemini's streaming API sends `tool_call_delta.index = None`, which the OpenAI SDK does not appreciate. Apply the included patch once at startup:
187
+
188
+ ```python
189
+ from chat_cmpl_stream_handler._patch_stream_tool_call_index import apply
190
+ apply() # safe to call multiple times
191
+ ```
192
+
193
+ Put it at the top of `main.py`, or in `conftest.py` if you're testing. This is opt-in — the library won't silently monkey-patch anything on import.
194
+
195
+ ## License
196
+
197
+ MIT
198
+
@@ -0,0 +1,176 @@
1
+ # chat-cmpl-stream-handler
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/chat-cmpl-stream-handler.svg)](https://pypi.org/project/chat-cmpl-stream-handler/)
4
+ [![Python Version](https://img.shields.io/pypi/pyversions/chat-cmpl-stream-handler.svg)](https://pypi.org/project/chat-cmpl-stream-handler/)
5
+ [![License](https://img.shields.io/pypi/l/chat-cmpl-stream-handler.svg)](https://opensource.org/licenses/MIT)
6
+ [![Tests](https://github.com/allen2c/chat-cmpl-stream-handler/actions/workflows/test.yml/badge.svg)](https://github.com/allen2c/chat-cmpl-stream-handler/actions/workflows/test.yml)
7
+ [![Docs](https://img.shields.io/badge/docs-github%20pages-blue)](https://allen2c.github.io/chat-cmpl-stream-handler/)
8
+
9
+ You've reimplemented the tool call loop for the fifth time. So have I. Never again.
10
+
11
+ ## Why
12
+
13
+ OpenAI Responses API? Deprecated vibes. Agents SDK? Lovely — until the third breaking change in a month. Chat Completions API? Still here. Still boring. Still working.
14
+
15
+ This library does exactly two things that everyone keeps copy-pasting across projects:
16
+
17
+ 1. Stream a chat completion and handle events
18
+ 2. Keep looping tool calls until the model is done
19
+
20
+ That's it. No magic. No framework. Just the loop.
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install chat-cmpl-stream-handler
26
+ ```
27
+
28
+ ## Quick Start
29
+
30
+ ```python
31
+ import asyncio
32
+ import json
33
+ from openai import AsyncOpenAI
34
+ from chat_cmpl_stream_handler import ChatCompletionStreamHandler, stream_until_user_input
35
+
36
+ client = AsyncOpenAI(api_key="...")
37
+
38
+ GET_WEATHER_TOOL = {
39
+ "type": "function",
40
+ "function": {
41
+ "name": "get_weather",
42
+ "description": "Get the current weather for a given city.",
43
+ "parameters": {
44
+ "type": "object",
45
+ "properties": {"city": {"type": "string"}},
46
+ "required": ["city"],
47
+ "additionalProperties": False,
48
+ },
49
+ "strict": True,
50
+ },
51
+ }
52
+
53
+
54
+ async def get_weather(arguments: str, context) -> str:
55
+ args = json.loads(arguments)
56
+ return f"The weather in {args['city']} is sunny and 25°C."
57
+
58
+
59
+ async def main():
60
+ result = await stream_until_user_input(
61
+ messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
62
+ model="gpt-4.1-nano",
63
+ openai_client=client,
64
+ stream_handler=ChatCompletionStreamHandler(),
65
+ tool_invokers={"get_weather": get_weather},
66
+ stream_kwargs={
67
+ "tools": [GET_WEATHER_TOOL],
68
+ "stream_options": {"include_usage": True},
69
+ },
70
+ )
71
+
72
+ # user → assistant (tool_calls) → tool → assistant (final answer)
73
+ for msg in result.to_input_list():
74
+ print(msg["role"], "->", msg.get("content", ""))
75
+
76
+ for usage in result.usages:
77
+ print(f"total tokens: {usage.total_tokens}")
78
+
79
+
80
+ asyncio.run(main())
81
+ ```
82
+
83
+ ### Listening to stream events
84
+
85
+ Subclass `ChatCompletionStreamHandler` and override whatever you care about:
86
+
87
+ ```python
88
+ from chat_cmpl_stream_handler import ChatCompletionStreamHandler
89
+ from openai.lib.streaming.chat._events import ContentDeltaEvent, FunctionToolCallArgumentsDoneEvent
90
+
91
+
92
+ class PrintingHandler(ChatCompletionStreamHandler):
93
+ async def on_content_delta(self, event: ContentDeltaEvent) -> None:
94
+ print(event.delta, end="", flush=True)
95
+
96
+ async def on_tool_calls_function_arguments_done(
97
+ self, event: FunctionToolCallArgumentsDoneEvent
98
+ ) -> None:
99
+ print(f"\n[calling] {event.name}({event.arguments})")
100
+ ```
101
+
102
+ ## API Reference
103
+
104
+ ### `stream_until_user_input`
105
+
106
+ ```python
107
+ async def stream_until_user_input(
108
+ messages: Iterable[ChatCompletionMessageParam],
109
+ model: str | ChatModel,
110
+ openai_client: AsyncOpenAI,
111
+ *,
112
+ stream_handler: ChatCompletionStreamHandler[ResponseFormatT],
113
+ tool_invokers: dict[str, ToolInvokerFn] | None = None,
114
+ stream_kwargs: dict[str, Any] | None = None,
115
+ context: Any | None = None,
116
+ max_iterations: int = 10,
117
+ ) -> StreamResult
118
+ ```
119
+
120
+ Streams a completion, executes tool calls, feeds results back, repeats — until the model stops asking for tools. Raises `MaxIterationsReached` if you've somehow ended up in an infinite tool call loop (it happens).
121
+
122
+ | Parameter | Description |
123
+ |------------------|-----------------------------------------------------------------------------------------|
124
+ | `messages` | Initial message list |
125
+ | `model` | Model name |
126
+ | `openai_client` | `AsyncOpenAI` instance |
127
+ | `stream_handler` | Receives stream events |
128
+ | `tool_invokers` | `{"tool_name": async_fn}` — each fn takes `(arguments: str, context)` and returns `str` |
129
+ | `stream_kwargs` | Passed directly to `beta.chat.completions.stream()` (e.g. `tools`, `stream_options`) |
130
+ | `context` | Forwarded to every tool invoker as-is |
131
+ | `max_iterations` | Safety cap. Default: 10 |
132
+
133
+ ### `StreamResult`
134
+
135
+ | Attribute / Method | Description |
136
+ |--------------------|-----------------------------------------------------------------------------|
137
+ | `.to_input_list()` | Full message history as a JSON-serializable list, ready for the next round |
138
+ | `.usages` | `list[CompletionUsage]` — one per iteration, so you can watch the bill grow |
139
+
140
+ ### `ChatCompletionStreamHandler`
141
+
142
+ All methods are no-ops by default. Override only what you need.
143
+
144
+ | Method | When it fires |
145
+ |-------------------------------------------------|-----------------------------------------|
146
+ | `on_event(event)` | Every event, before more specific hooks |
147
+ | `on_chunk(event)` | Every raw SSE chunk |
148
+ | `on_content_delta(event)` | Each content token |
149
+ | `on_content_done(event)` | Full content string complete |
150
+ | `on_refusal_delta(event)` | Each refusal token |
151
+ | `on_refusal_done(event)` | Full refusal string complete |
152
+ | `on_tool_calls_function_arguments_delta(event)` | Each incremental tool argument fragment |
153
+ | `on_tool_calls_function_arguments_done(event)` | Full tool argument JSON available |
154
+ | `on_logprobs_content_delta(event)` | Each logprobs content token |
155
+ | `on_logprobs_content_done(event)` | All logprobs content tokens done |
156
+ | `on_logprobs_refusal_delta(event)` | Each logprobs refusal token |
157
+ | `on_logprobs_refusal_done(event)` | All logprobs refusal tokens done |
158
+
159
+ ## Provider Compatibility
160
+
161
+ Works with any OpenAI-compatible endpoint. Some providers are more compatible than others.
162
+
163
+ ### Gemini
164
+
165
+ Gemini's streaming API sends `tool_call_delta.index = None`, which the OpenAI SDK does not appreciate. Apply the included patch once at startup:
166
+
167
+ ```python
168
+ from chat_cmpl_stream_handler._patch_stream_tool_call_index import apply
169
+ apply() # safe to call multiple times
170
+ ```
171
+
172
+ Put it at the top of `main.py`, or in `conftest.py` if you're testing. This is opt-in — the library won't silently monkey-patch anything on import.
173
+
174
+ ## License
175
+
176
+ MIT
@@ -0,0 +1,247 @@
1
+ import json
2
+ import logging
3
+ from typing import (
4
+ TYPE_CHECKING,
5
+ Any,
6
+ Awaitable,
7
+ Callable,
8
+ Dict,
9
+ Final,
10
+ Generic,
11
+ Iterable,
12
+ List,
13
+ Text,
14
+ Union,
15
+ )
16
+
17
+ from openai import AsyncOpenAI
18
+ from openai.lib._parsing._completions import ResponseFormatT
19
+ from openai.lib.streaming.chat._events import (
20
+ ChunkEvent,
21
+ ContentDeltaEvent,
22
+ ContentDoneEvent,
23
+ FunctionToolCallArgumentsDeltaEvent,
24
+ FunctionToolCallArgumentsDoneEvent,
25
+ LogprobsContentDeltaEvent,
26
+ LogprobsContentDoneEvent,
27
+ LogprobsRefusalDeltaEvent,
28
+ LogprobsRefusalDoneEvent,
29
+ RefusalDeltaEvent,
30
+ RefusalDoneEvent,
31
+ )
32
+ from openai.types.chat import ChatCompletionMessageParam
33
+ from openai.types.chat.chat_completion_assistant_message_param import (
34
+ ChatCompletionAssistantMessageParam,
35
+ )
36
+ from openai.types.chat.chat_completion_message_function_tool_call_param import (
37
+ ChatCompletionMessageFunctionToolCallParam,
38
+ )
39
+ from openai.types.chat.chat_completion_tool_message_param import (
40
+ ChatCompletionToolMessageParam,
41
+ )
42
+ from openai.types.completion_usage import CompletionUsage
43
+ from openai.types.shared.chat_model import ChatModel
44
+
45
+ if TYPE_CHECKING:
46
+ from openai.lib.streaming.chat._events import ChatCompletionStreamEvent
47
+
48
+ __version__: Final[Text] = "0.1.0"
49
+
50
+
51
+ logger = logging.getLogger(__name__)
52
+
53
+
54
+ ToolInvokerFn = Callable[[str, Any], Awaitable[str]]
55
+
56
+
57
+ async def stream_until_user_input(
58
+ messages: Iterable[ChatCompletionMessageParam],
59
+ model: Union[str, ChatModel],
60
+ openai_client: AsyncOpenAI,
61
+ *,
62
+ stream_handler: "ChatCompletionStreamHandler[ResponseFormatT]",
63
+ tool_invokers: Dict[str, ToolInvokerFn] | None = None,
64
+ stream_kwargs: Dict[Text, Any] | None = None,
65
+ context: Any | None = None,
66
+ max_iterations: int = 10,
67
+ **kwargs,
68
+ ) -> "StreamResult":
69
+ current_messages = list(messages)
70
+ usages: List["CompletionUsage"] = []
71
+
72
+ for _ in range(max_iterations):
73
+ # 1. stream the response
74
+ async with openai_client.beta.chat.completions.stream(
75
+ messages=current_messages,
76
+ model=model,
77
+ **{
78
+ k: v
79
+ for k, v in (stream_kwargs or {}).items()
80
+ if k not in ("messages", "model")
81
+ },
82
+ ) as stream:
83
+ async for event in stream:
84
+ await stream_handler.handle(event)
85
+
86
+ final = await stream.get_final_completion()
87
+ if final.usage:
88
+ usages.append(
89
+ CompletionUsage.model_validate_json(final.usage.model_dump_json())
90
+ )
91
+
92
+ assistant_msg = final.choices[0].message
93
+ current_messages.append(
94
+ ChatCompletionAssistantMessageParam(
95
+ role="assistant",
96
+ content=assistant_msg.content,
97
+ **(
98
+ {
99
+ "tool_calls": [
100
+ ChatCompletionMessageFunctionToolCallParam(
101
+ id=tc.id,
102
+ type="function",
103
+ function={
104
+ "name": tc.function.name,
105
+ "arguments": tc.function.arguments or "{}",
106
+ },
107
+ )
108
+ for tc in assistant_msg.tool_calls
109
+ ]
110
+ }
111
+ if assistant_msg.tool_calls
112
+ else {}
113
+ ),
114
+ )
115
+ ) # Add assistant message to history
116
+
117
+ # 2. Check if there are tool calls
118
+ if not assistant_msg.tool_calls:
119
+ return StreamResult(current_messages, model, usages=usages) # End
120
+
121
+ # 3. Execute tool calls, and add the results back to messages
122
+ for tool_call in assistant_msg.tool_calls:
123
+ invoker = (tool_invokers or {}).get(tool_call.function.name)
124
+
125
+ if invoker is None:
126
+ raise ValueError(f"No invoker for tool: {tool_call.function.name}")
127
+
128
+ tool_call_output = await invoker(tool_call.function.arguments, context)
129
+
130
+ current_messages.append(
131
+ ChatCompletionToolMessageParam(
132
+ role="tool",
133
+ tool_call_id=tool_call.id,
134
+ content=tool_call_output,
135
+ )
136
+ )
137
+
138
+ raise MaxIterationsReached(
139
+ f"Reached max_iterations={max_iterations} without waiting for user input."
140
+ )
141
+
142
+
143
+ class StreamResult:
144
+ def __init__(
145
+ self,
146
+ messages: List[ChatCompletionMessageParam],
147
+ model: Union[str, ChatModel],
148
+ usages: List["CompletionUsage"],
149
+ ):
150
+ self._messages = messages
151
+ self._model = model
152
+
153
+ self.usages = usages
154
+
155
+ def to_input_list(self) -> List[ChatCompletionMessageParam]:
156
+ return json.loads(json.dumps(self._messages, default=str))
157
+
158
+
159
+ class ChatCompletionStreamHandler(Generic[ResponseFormatT]):
160
+ async def handle(self, event: "ChatCompletionStreamEvent[ResponseFormatT]") -> None:
161
+ """Internal dispatcher — routes each stream event to the right hook."""
162
+ await self.on_event(event)
163
+
164
+ if event.type == "chunk":
165
+ await self.on_chunk(event)
166
+ elif event.type == "content.delta":
167
+ await self.on_content_delta(event)
168
+ elif event.type == "content.done":
169
+ await self.on_content_done(event)
170
+ elif event.type == "refusal.delta":
171
+ await self.on_refusal_delta(event)
172
+ elif event.type == "refusal.done":
173
+ await self.on_refusal_done(event)
174
+ elif event.type == "tool_calls.function.arguments.delta":
175
+ await self.on_tool_calls_function_arguments_delta(event)
176
+ elif event.type == "tool_calls.function.arguments.done":
177
+ await self.on_tool_calls_function_arguments_done(event)
178
+ elif event.type == "logprobs.content.delta":
179
+ await self.on_logprobs_content_delta(event)
180
+ elif event.type == "logprobs.content.done":
181
+ await self.on_logprobs_content_done(event)
182
+ elif event.type == "logprobs.refusal.delta":
183
+ await self.on_logprobs_refusal_delta(event)
184
+ elif event.type == "logprobs.refusal.done":
185
+ await self.on_logprobs_refusal_done(event)
186
+ else:
187
+ logger.warning(f"Unknown event type: {event.type}")
188
+
189
+ async def on_event(
190
+ self, event: "ChatCompletionStreamEvent[ResponseFormatT]"
191
+ ) -> None:
192
+ """Called for every stream event before more specific hooks."""
193
+ pass
194
+
195
+ async def on_chunk(self, event: ChunkEvent) -> None:
196
+ """Called for every raw SSE chunk received from the API."""
197
+ pass
198
+
199
+ async def on_content_delta(self, event: ContentDeltaEvent) -> None:
200
+ """Called each time a new content token arrives."""
201
+ pass
202
+
203
+ async def on_content_done(self, event: ContentDoneEvent[ResponseFormatT]) -> None:
204
+ """Called once when the full content string is complete."""
205
+ pass
206
+
207
+ async def on_refusal_delta(self, event: RefusalDeltaEvent) -> None:
208
+ """Called each time a new refusal token arrives."""
209
+ pass
210
+
211
+ async def on_refusal_done(self, event: RefusalDoneEvent) -> None:
212
+ """Called once when the full refusal string is complete."""
213
+ pass
214
+
215
+ async def on_tool_calls_function_arguments_delta(
216
+ self, event: FunctionToolCallArgumentsDeltaEvent
217
+ ) -> None:
218
+ """Called for each incremental JSON fragment of a tool-call's arguments."""
219
+ pass
220
+
221
+ async def on_tool_calls_function_arguments_done(
222
+ self, event: FunctionToolCallArgumentsDoneEvent
223
+ ) -> None:
224
+ """Called once when a tool call's full argument JSON is available."""
225
+ pass
226
+
227
+ async def on_logprobs_content_delta(self, event: LogprobsContentDeltaEvent) -> None:
228
+ """Called for each incremental list of content log-probability tokens."""
229
+ pass
230
+
231
+ async def on_logprobs_content_done(self, event: LogprobsContentDoneEvent) -> None:
232
+ """Called once with the complete list of content log-probability tokens."""
233
+ pass
234
+
235
+ async def on_logprobs_refusal_delta(self, event: LogprobsRefusalDeltaEvent) -> None:
236
+ """Called for each incremental list of refusal log-probability tokens."""
237
+ pass
238
+
239
+ async def on_logprobs_refusal_done(self, event: LogprobsRefusalDoneEvent) -> None:
240
+ """Called once with the complete list of refusal log-probability tokens."""
241
+ pass
242
+
243
+
244
+ class MaxIterationsReached(Exception):
245
+ """Raised when stream_until_user_input exceeds the maximum iteration limit."""
246
+
247
+ pass
@@ -0,0 +1,61 @@
1
+ """Monkey-patch for openai SDK streaming: fix providers that return
2
+ ``tool_call_delta.index = None`` (e.g. Gemini OpenAI-compat endpoint).
3
+
4
+ The openai SDK assumes ``tool_call_delta.index`` is always an ``int``,
5
+ but some providers (notably Gemini's OpenAI-compatible API) send
6
+ ``None``. This patch normalises the index to its positional order
7
+ before the SDK processes the chunk.
8
+
9
+ Usage — call ``apply()`` once before any streaming request::
10
+
11
+ from chat_cmpl_stream_handler._patch_stream_tool_call_index import apply
12
+ apply()
13
+
14
+ It is safe to call ``apply()`` multiple times; only the first call
15
+ takes effect. The patch must be applied before
16
+ ``ChatCompletionStreamState`` instances are created (i.e. before
17
+ ``openai_client.beta.chat.completions.stream()`` is called).
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+
24
+ from openai.lib.streaming.chat._completions import ChatCompletionStreamState
25
+ from openai.types.chat import ChatCompletionChunk
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ _PATCHED = False
30
+ _original_handle_chunk = ChatCompletionStreamState.handle_chunk
31
+
32
+
33
+ def _fix_none_tool_call_indices(chunk: ChatCompletionChunk) -> None:
34
+ """Mutate *chunk* in-place so every ``tool_call.index`` is an int.
35
+
36
+ When a provider omits the index (sends ``None``), we fall back to the
37
+ positional order of the tool-call deltas within the choice, which is the
38
+ only sane default.
39
+ """
40
+ for choice in chunk.choices:
41
+ if not choice.delta.tool_calls:
42
+ continue
43
+ for pos, tc in enumerate(choice.delta.tool_calls):
44
+ if tc.index is None:
45
+ tc.index = pos
46
+
47
+
48
+ def _patched_handle_chunk(self, chunk: ChatCompletionChunk):
49
+ _fix_none_tool_call_indices(chunk)
50
+ return _original_handle_chunk(self, chunk)
51
+
52
+
53
+ def apply() -> None:
54
+ global _PATCHED
55
+ if _PATCHED:
56
+ return
57
+ ChatCompletionStreamState.handle_chunk = _patched_handle_chunk
58
+ _PATCHED = True
59
+ logger.debug(
60
+ "Patched ChatCompletionStreamState.handle_chunk" " for None tool_call index"
61
+ )
@@ -0,0 +1,50 @@
1
+ [project]
2
+ authors = [{ name = "Allen Chou", email = "f1470891079@gmail.com" }]
3
+ dependencies = ["openai (>=1)"]
4
+ description = "Chat Completion Stream Handler"
5
+ license = { text = "MIT" }
6
+ name = "chat-cmpl-stream-handler"
7
+ readme = "README.md"
8
+ requires-python = ">=3.11,<4"
9
+ version = "0.1.0"
10
+
11
+ [project.urls]
12
+ Homepage = "https://github.com/allen2c/chat-cmpl-stream-handler"
13
+ "PyPI" = "https://pypi.org/project/chat-cmpl-stream-handler/"
14
+ Repository = "https://github.com/allen2c/chat-cmpl-stream-handler"
15
+
16
+ [tool.poetry]
17
+ packages = [{ include = "chat_cmpl_stream_handler" }]
18
+
19
+ [tool.poetry.extras]
20
+ all = []
21
+
22
+ [tool.poetry.group.dev.dependencies]
23
+ black = { extras = ["jupyter"], version = "*" }
24
+ isort = "*"
25
+ mkdocs-material = "*"
26
+ openai-agents = "*"
27
+ poetry-plugin-export = "*"
28
+ pytest = "*"
29
+ pytest-asyncio = "*"
30
+ pytest-cov = "*"
31
+ pytest-env = "*"
32
+ pytest-xdist = "*"
33
+ rich = "*"
34
+ rich-color-support = "*"
35
+ setuptools = "*"
36
+ twine = "*"
37
+
38
+ [tool.isort]
39
+ profile = "black"
40
+
41
+ [tool.flake8]
42
+ ignore = ["E203", "E704", "W503"]
43
+ max-line-length = 88
44
+
45
+ [tool.pytest.ini_options]
46
+ env = ["ENVIRONMENT=test", "PYTEST_IS_RUNNING=true"]
47
+
48
+ [build-system]
49
+ build-backend = "poetry.core.masonry.api"
50
+ requires = ["poetry-core>=2.0.0,<3.0.0"]