codegnipy 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codegnipy/streaming.py ADDED
@@ -0,0 +1,387 @@
1
+ """
2
+ Codegnipy 流式响应模块
3
+
4
+ 提供 LLM 流式输出支持,实现实时响应。
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import AsyncIterator, Iterator, Optional, Callable, List
9
+ from enum import Enum
10
+ import asyncio
11
+
12
+ from .runtime import LLMConfig, CognitiveContext
13
+
14
+
15
+ class StreamStatus(Enum):
16
+ """流状态"""
17
+ STARTED = "started"
18
+ STREAMING = "streaming"
19
+ COMPLETED = "completed"
20
+ ERROR = "error"
21
+
22
+
23
+ @dataclass
24
+ class StreamChunk:
25
+ """流式响应块"""
26
+ content: str
27
+ status: StreamStatus
28
+ accumulated: str = ""
29
+ metadata: dict = field(default_factory=dict)
30
+
31
+ def __str__(self) -> str:
32
+ return self.content
33
+
34
+
35
+ @dataclass
36
+ class StreamResult:
37
+ """流式响应结果"""
38
+ content: str
39
+ status: StreamStatus
40
+ chunks: List[StreamChunk] = field(default_factory=list)
41
+ metadata: dict = field(default_factory=dict)
42
+
43
+
44
+ def _stream_openai(
45
+ config: LLMConfig,
46
+ prompt: str,
47
+ memory: Optional[list] = None,
48
+ **kwargs
49
+ ) -> Iterator[StreamChunk]:
50
+ """使用 OpenAI API 进行流式调用"""
51
+ try:
52
+ import openai
53
+ except ImportError:
54
+ raise ImportError("需要安装 openai 包。运行: pip install openai")
55
+
56
+ client = openai.OpenAI(
57
+ api_key=config.api_key,
58
+ base_url=config.base_url
59
+ )
60
+
61
+ messages: List = []
62
+ if memory:
63
+ messages.extend(memory)
64
+ messages.append({"role": "user", "content": prompt})
65
+
66
+ accumulated = ""
67
+
68
+ try:
69
+ response = client.chat.completions.create(
70
+ model=config.model,
71
+ messages=messages,
72
+ temperature=config.temperature,
73
+ max_tokens=config.max_tokens,
74
+ stream=True,
75
+ **kwargs
76
+ )
77
+
78
+ # 开始
79
+ yield StreamChunk(
80
+ content="",
81
+ status=StreamStatus.STARTED,
82
+ accumulated=""
83
+ )
84
+
85
+ for chunk in response:
86
+ if chunk.choices and chunk.choices[0].delta.content:
87
+ content = chunk.choices[0].delta.content
88
+ accumulated += content
89
+
90
+ yield StreamChunk(
91
+ content=content,
92
+ status=StreamStatus.STREAMING,
93
+ accumulated=accumulated
94
+ )
95
+
96
+ # 完成
97
+ yield StreamChunk(
98
+ content="",
99
+ status=StreamStatus.COMPLETED,
100
+ accumulated=accumulated
101
+ )
102
+
103
+ except Exception as e:
104
+ yield StreamChunk(
105
+ content=str(e),
106
+ status=StreamStatus.ERROR,
107
+ accumulated=accumulated,
108
+ metadata={"error": str(e)}
109
+ )
110
+
111
+
112
+ async def _stream_openai_async(
113
+ config: LLMConfig,
114
+ prompt: str,
115
+ memory: Optional[list] = None,
116
+ **kwargs
117
+ ) -> AsyncIterator[StreamChunk]:
118
+ """使用 OpenAI API 进行异步流式调用"""
119
+ try:
120
+ from openai import AsyncOpenAI
121
+ except ImportError:
122
+ raise ImportError("需要安装 openai 包。运行: pip install openai")
123
+
124
+ client = AsyncOpenAI(
125
+ api_key=config.api_key,
126
+ base_url=config.base_url
127
+ )
128
+
129
+ messages: List = []
130
+ if memory:
131
+ messages.extend(memory)
132
+ messages.append({"role": "user", "content": prompt})
133
+
134
+ accumulated = ""
135
+
136
+ try:
137
+ response = await client.chat.completions.create(
138
+ model=config.model,
139
+ messages=messages,
140
+ temperature=config.temperature,
141
+ max_tokens=config.max_tokens,
142
+ stream=True,
143
+ **kwargs
144
+ )
145
+
146
+ yield StreamChunk(
147
+ content="",
148
+ status=StreamStatus.STARTED,
149
+ accumulated=""
150
+ )
151
+
152
+ async for chunk in response:
153
+ if chunk.choices and chunk.choices[0].delta.content:
154
+ content = chunk.choices[0].delta.content
155
+ accumulated += content
156
+
157
+ yield StreamChunk(
158
+ content=content,
159
+ status=StreamStatus.STREAMING,
160
+ accumulated=accumulated
161
+ )
162
+
163
+ yield StreamChunk(
164
+ content="",
165
+ status=StreamStatus.COMPLETED,
166
+ accumulated=accumulated
167
+ )
168
+
169
+ except Exception as e:
170
+ yield StreamChunk(
171
+ content=str(e),
172
+ status=StreamStatus.ERROR,
173
+ accumulated=accumulated,
174
+ metadata={"error": str(e)}
175
+ )
176
+
177
+
178
+ def stream_call(
179
+ prompt: str,
180
+ context: Optional[CognitiveContext] = None,
181
+ *,
182
+ on_chunk: Optional[Callable[[StreamChunk], None]] = None,
183
+ model: Optional[str] = None,
184
+ temperature: Optional[float] = None
185
+ ) -> StreamResult:
186
+ """
187
+ 执行流式认知调用
188
+
189
+ 参数:
190
+ prompt: 发送给 LLM 的提示
191
+ context: 认知上下文
192
+ on_chunk: 每个块的回调函数
193
+ model: 覆盖模型设置
194
+ temperature: 覆盖温度设置
195
+
196
+ 返回:
197
+ StreamResult 包含完整响应
198
+
199
+ 示例:
200
+ result = stream_call("解释量子计算", on_chunk=lambda c: print(c.content, end=""))
201
+ print(result.content)
202
+ """
203
+ ctx = context or CognitiveContext.get_current()
204
+
205
+ if ctx is None:
206
+ config = LLMConfig()
207
+ else:
208
+ config = ctx.get_config()
209
+ if model:
210
+ config.model = model
211
+ if temperature is not None:
212
+ config.temperature = temperature
213
+
214
+ if not config.api_key:
215
+ raise ValueError(
216
+ "未配置 API 密钥。请设置 OPENAI_API_KEY 环境变量,"
217
+ "或在 CognitiveContext 中提供 api_key 参数。"
218
+ )
219
+
220
+ memory = ctx.get_memory() if ctx else []
221
+
222
+ chunks = []
223
+ accumulated = ""
224
+
225
+ for chunk in _stream_openai(config, prompt, memory):
226
+ chunks.append(chunk)
227
+ accumulated = chunk.accumulated
228
+
229
+ if on_chunk:
230
+ on_chunk(chunk)
231
+
232
+ # 更新记忆
233
+ if ctx:
234
+ ctx.add_to_memory("user", prompt)
235
+ ctx.add_to_memory("assistant", accumulated)
236
+
237
+ return StreamResult(
238
+ content=accumulated,
239
+ status=chunks[-1].status if chunks else StreamStatus.ERROR,
240
+ chunks=chunks
241
+ )
242
+
243
+
244
+ async def stream_call_async(
245
+ prompt: str,
246
+ context: Optional[CognitiveContext] = None,
247
+ *,
248
+ on_chunk: Optional[Callable[[StreamChunk], None]] = None,
249
+ model: Optional[str] = None,
250
+ temperature: Optional[float] = None
251
+ ) -> StreamResult:
252
+ """
253
+ 执行异步流式认知调用
254
+
255
+ 参数:
256
+ prompt: 发送给 LLM 的提示
257
+ context: 认知上下文
258
+ on_chunk: 每个块的回调函数
259
+ model: 覆盖模型设置
260
+ temperature: 覆盖温度设置
261
+
262
+ 返回:
263
+ StreamResult 包含完整响应
264
+
265
+ 示例:
266
+ result = await stream_call_async("解释量子计算")
267
+ print(result.content)
268
+ """
269
+ ctx = context or CognitiveContext.get_current()
270
+
271
+ if ctx is None:
272
+ config = LLMConfig()
273
+ else:
274
+ config = ctx.get_config()
275
+ if model:
276
+ config.model = model
277
+ if temperature is not None:
278
+ config.temperature = temperature
279
+
280
+ if not config.api_key:
281
+ raise ValueError(
282
+ "未配置 API 密钥。请设置 OPENAI_API_KEY 环境变量,"
283
+ "或在 CognitiveContext 中提供 api_key 参数。"
284
+ )
285
+
286
+ memory = ctx.get_memory() if ctx else []
287
+
288
+ chunks = []
289
+ accumulated = ""
290
+
291
+ async for chunk in _stream_openai_async(config, prompt, memory):
292
+ chunks.append(chunk)
293
+ accumulated = chunk.accumulated
294
+
295
+ if on_chunk:
296
+ if asyncio.iscoroutinefunction(on_chunk):
297
+ await on_chunk(chunk)
298
+ else:
299
+ on_chunk(chunk)
300
+
301
+ if ctx:
302
+ ctx.add_to_memory("user", prompt)
303
+ ctx.add_to_memory("assistant", accumulated)
304
+
305
+ return StreamResult(
306
+ content=accumulated,
307
+ status=chunks[-1].status if chunks else StreamStatus.ERROR,
308
+ chunks=chunks
309
+ )
310
+
311
+
312
+ def stream_iter(
313
+ prompt: str,
314
+ context: Optional[CognitiveContext] = None,
315
+ **kwargs
316
+ ) -> Iterator[StreamChunk]:
317
+ """
318
+ 流式迭代器,逐块返回响应
319
+
320
+ 示例:
321
+ for chunk in stream_iter("解释量子计算"):
322
+ print(chunk.content, end="", flush=True)
323
+ """
324
+ ctx = context or CognitiveContext.get_current()
325
+
326
+ if ctx is None:
327
+ config = LLMConfig()
328
+ else:
329
+ config = ctx.get_config()
330
+ if kwargs.get("model"):
331
+ config.model = kwargs["model"]
332
+ if kwargs.get("temperature") is not None:
333
+ config.temperature = kwargs["temperature"]
334
+
335
+ if not config.api_key:
336
+ raise ValueError("未配置 API 密钥。")
337
+
338
+ memory = ctx.get_memory() if ctx else []
339
+
340
+ accumulated = ""
341
+
342
+ for chunk in _stream_openai(config, prompt, memory):
343
+ yield chunk
344
+ accumulated = chunk.accumulated
345
+
346
+ if ctx:
347
+ ctx.add_to_memory("user", prompt)
348
+ ctx.add_to_memory("assistant", accumulated)
349
+
350
+
351
+ async def stream_iter_async(
352
+ prompt: str,
353
+ context: Optional[CognitiveContext] = None,
354
+ **kwargs
355
+ ) -> AsyncIterator[StreamChunk]:
356
+ """
357
+ 异步流式迭代器
358
+
359
+ 示例:
360
+ async for chunk in stream_iter_async("解释量子计算"):
361
+ print(chunk.content, end="", flush=True)
362
+ """
363
+ ctx = context or CognitiveContext.get_current()
364
+
365
+ if ctx is None:
366
+ config = LLMConfig()
367
+ else:
368
+ config = ctx.get_config()
369
+ if kwargs.get("model"):
370
+ config.model = kwargs["model"]
371
+ if kwargs.get("temperature") is not None:
372
+ config.temperature = kwargs["temperature"]
373
+
374
+ if not config.api_key:
375
+ raise ValueError("未配置 API 密钥。")
376
+
377
+ memory = ctx.get_memory() if ctx else []
378
+
379
+ accumulated = ""
380
+
381
+ async for chunk in _stream_openai_async(config, prompt, memory):
382
+ yield chunk
383
+ accumulated = chunk.accumulated
384
+
385
+ if ctx:
386
+ ctx.add_to_memory("user", prompt)
387
+ ctx.add_to_memory("assistant", accumulated)