ommlds 0.0.0.dev466__py3-none-any.whl → 0.0.0.dev468__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ommlds might be problematic. Click here for more details.
- ommlds/.omlish-manifests.json +129 -6
- ommlds/__about__.py +2 -2
- ommlds/backends/ollama/__init__.py +0 -0
- ommlds/backends/ollama/protocol.py +170 -0
- ommlds/backends/transformers/__init__.py +0 -0
- ommlds/backends/transformers/streamers.py +73 -0
- ommlds/cli/sessions/chat/backends/catalog.py +1 -1
- ommlds/minichain/__init__.py +4 -0
- ommlds/minichain/backends/impls/llamacpp/chat.py +9 -0
- ommlds/minichain/backends/impls/llamacpp/stream.py +26 -10
- ommlds/minichain/backends/impls/mlx/chat.py +95 -21
- ommlds/minichain/backends/impls/ollama/__init__.py +0 -0
- ommlds/minichain/backends/impls/ollama/chat.py +196 -0
- ommlds/minichain/backends/impls/openai/chat.py +2 -2
- ommlds/minichain/backends/impls/openai/format.py +106 -107
- ommlds/minichain/backends/impls/openai/stream.py +14 -13
- ommlds/minichain/backends/impls/transformers/transformers.py +93 -14
- ommlds/minichain/chat/stream/types.py +3 -0
- ommlds/minichain/standard.py +7 -0
- {ommlds-0.0.0.dev466.dist-info → ommlds-0.0.0.dev468.dist-info}/METADATA +7 -7
- {ommlds-0.0.0.dev466.dist-info → ommlds-0.0.0.dev468.dist-info}/RECORD +25 -20
- ommlds/minichain/backends/impls/openai/format2.py +0 -210
- {ommlds-0.0.0.dev466.dist-info → ommlds-0.0.0.dev468.dist-info}/WHEEL +0 -0
- {ommlds-0.0.0.dev466.dist-info → ommlds-0.0.0.dev468.dist-info}/entry_points.txt +0 -0
- {ommlds-0.0.0.dev466.dist-info → ommlds-0.0.0.dev468.dist-info}/licenses/LICENSE +0 -0
- {ommlds-0.0.0.dev466.dist-info → ommlds-0.0.0.dev468.dist-info}/top_level.txt +0 -0
ommlds/.omlish-manifests.json
CHANGED
|
@@ -170,6 +170,21 @@
|
|
|
170
170
|
"attr": null,
|
|
171
171
|
"file": "ommlds/minichain/backends/impls/llamacpp/chat.py",
|
|
172
172
|
"line": 33,
|
|
173
|
+
"value": {
|
|
174
|
+
"!.minichain.backends.strings.manifests.BackendStringsManifest": {
|
|
175
|
+
"service_cls_names": [
|
|
176
|
+
"ChatChoicesService"
|
|
177
|
+
],
|
|
178
|
+
"backend_name": "llamacpp",
|
|
179
|
+
"model_names": null
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
},
|
|
183
|
+
{
|
|
184
|
+
"module": ".minichain.backends.impls.llamacpp.chat",
|
|
185
|
+
"attr": null,
|
|
186
|
+
"file": "ommlds/minichain/backends/impls/llamacpp/chat.py",
|
|
187
|
+
"line": 42,
|
|
173
188
|
"value": {
|
|
174
189
|
"!.minichain.registries.manifests.RegistryManifest": {
|
|
175
190
|
"module": "ommlds.minichain.backends.impls.llamacpp.chat",
|
|
@@ -200,6 +215,21 @@
|
|
|
200
215
|
"attr": null,
|
|
201
216
|
"file": "ommlds/minichain/backends/impls/llamacpp/stream.py",
|
|
202
217
|
"line": 32,
|
|
218
|
+
"value": {
|
|
219
|
+
"!.minichain.backends.strings.manifests.BackendStringsManifest": {
|
|
220
|
+
"service_cls_names": [
|
|
221
|
+
"ChatChoicesStreamService"
|
|
222
|
+
],
|
|
223
|
+
"backend_name": "llamacpp",
|
|
224
|
+
"model_names": null
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
"module": ".minichain.backends.impls.llamacpp.stream",
|
|
230
|
+
"attr": null,
|
|
231
|
+
"file": "ommlds/minichain/backends/impls/llamacpp/stream.py",
|
|
232
|
+
"line": 41,
|
|
203
233
|
"value": {
|
|
204
234
|
"!.minichain.registries.manifests.RegistryManifest": {
|
|
205
235
|
"module": "ommlds.minichain.backends.impls.llamacpp.stream",
|
|
@@ -229,11 +259,12 @@
|
|
|
229
259
|
"module": ".minichain.backends.impls.mlx.chat",
|
|
230
260
|
"attr": null,
|
|
231
261
|
"file": "ommlds/minichain/backends/impls/mlx/chat.py",
|
|
232
|
-
"line":
|
|
262
|
+
"line": 39,
|
|
233
263
|
"value": {
|
|
234
264
|
"!.minichain.backends.strings.manifests.BackendStringsManifest": {
|
|
235
265
|
"service_cls_names": [
|
|
236
|
-
"ChatChoicesService"
|
|
266
|
+
"ChatChoicesService",
|
|
267
|
+
"ChatChoicesStreamService"
|
|
237
268
|
],
|
|
238
269
|
"backend_name": "mlx",
|
|
239
270
|
"model_names": null
|
|
@@ -244,7 +275,7 @@
|
|
|
244
275
|
"module": ".minichain.backends.impls.mlx.chat",
|
|
245
276
|
"attr": null,
|
|
246
277
|
"file": "ommlds/minichain/backends/impls/mlx/chat.py",
|
|
247
|
-
"line":
|
|
278
|
+
"line": 133,
|
|
248
279
|
"value": {
|
|
249
280
|
"!.minichain.registries.manifests.RegistryManifest": {
|
|
250
281
|
"module": "ommlds.minichain.backends.impls.mlx.chat",
|
|
@@ -255,6 +286,67 @@
|
|
|
255
286
|
}
|
|
256
287
|
}
|
|
257
288
|
},
|
|
289
|
+
{
|
|
290
|
+
"module": ".minichain.backends.impls.mlx.chat",
|
|
291
|
+
"attr": null,
|
|
292
|
+
"file": "ommlds/minichain/backends/impls/mlx/chat.py",
|
|
293
|
+
"line": 164,
|
|
294
|
+
"value": {
|
|
295
|
+
"!.minichain.registries.manifests.RegistryManifest": {
|
|
296
|
+
"module": "ommlds.minichain.backends.impls.mlx.chat",
|
|
297
|
+
"attr": "MlxChatChoicesStreamService",
|
|
298
|
+
"name": "mlx",
|
|
299
|
+
"aliases": null,
|
|
300
|
+
"type": "ChatChoicesStreamService"
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
"module": ".minichain.backends.impls.ollama.chat",
|
|
306
|
+
"attr": null,
|
|
307
|
+
"file": "ommlds/minichain/backends/impls/ollama/chat.py",
|
|
308
|
+
"line": 38,
|
|
309
|
+
"value": {
|
|
310
|
+
"!.minichain.backends.strings.manifests.BackendStringsManifest": {
|
|
311
|
+
"service_cls_names": [
|
|
312
|
+
"ChatChoicesService",
|
|
313
|
+
"ChatChoicesStreamService"
|
|
314
|
+
],
|
|
315
|
+
"backend_name": "ollama",
|
|
316
|
+
"model_names": null
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
},
|
|
320
|
+
{
|
|
321
|
+
"module": ".minichain.backends.impls.ollama.chat",
|
|
322
|
+
"attr": null,
|
|
323
|
+
"file": "ommlds/minichain/backends/impls/ollama/chat.py",
|
|
324
|
+
"line": 93,
|
|
325
|
+
"value": {
|
|
326
|
+
"!.minichain.registries.manifests.RegistryManifest": {
|
|
327
|
+
"module": "ommlds.minichain.backends.impls.ollama.chat",
|
|
328
|
+
"attr": "OllamaChatChoicesService",
|
|
329
|
+
"name": "ollama",
|
|
330
|
+
"aliases": null,
|
|
331
|
+
"type": "ChatChoicesService"
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
},
|
|
335
|
+
{
|
|
336
|
+
"module": ".minichain.backends.impls.ollama.chat",
|
|
337
|
+
"attr": null,
|
|
338
|
+
"file": "ommlds/minichain/backends/impls/ollama/chat.py",
|
|
339
|
+
"line": 139,
|
|
340
|
+
"value": {
|
|
341
|
+
"!.minichain.registries.manifests.RegistryManifest": {
|
|
342
|
+
"module": "ommlds.minichain.backends.impls.ollama.chat",
|
|
343
|
+
"attr": "OllamaChatChoicesStreamService",
|
|
344
|
+
"name": "ollama",
|
|
345
|
+
"aliases": null,
|
|
346
|
+
"type": "ChatChoicesStreamService"
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
},
|
|
258
350
|
{
|
|
259
351
|
"module": ".minichain.backends.impls.openai.chat",
|
|
260
352
|
"attr": null,
|
|
@@ -356,7 +448,7 @@
|
|
|
356
448
|
"module": ".minichain.backends.impls.openai.stream",
|
|
357
449
|
"attr": null,
|
|
358
450
|
"file": "ommlds/minichain/backends/impls/openai/stream.py",
|
|
359
|
-
"line":
|
|
451
|
+
"line": 38,
|
|
360
452
|
"value": {
|
|
361
453
|
"!.minichain.registries.manifests.RegistryManifest": {
|
|
362
454
|
"module": "ommlds.minichain.backends.impls.openai.stream",
|
|
@@ -434,7 +526,23 @@
|
|
|
434
526
|
"module": ".minichain.backends.impls.transformers.transformers",
|
|
435
527
|
"attr": null,
|
|
436
528
|
"file": "ommlds/minichain/backends/impls/transformers/transformers.py",
|
|
437
|
-
"line":
|
|
529
|
+
"line": 46,
|
|
530
|
+
"value": {
|
|
531
|
+
"!.minichain.backends.strings.manifests.BackendStringsManifest": {
|
|
532
|
+
"service_cls_names": [
|
|
533
|
+
"ChatChoicesService",
|
|
534
|
+
"ChatChoicesStreamService"
|
|
535
|
+
],
|
|
536
|
+
"backend_name": "transformers",
|
|
537
|
+
"model_names": null
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
},
|
|
541
|
+
{
|
|
542
|
+
"module": ".minichain.backends.impls.transformers.transformers",
|
|
543
|
+
"attr": null,
|
|
544
|
+
"file": "ommlds/minichain/backends/impls/transformers/transformers.py",
|
|
545
|
+
"line": 62,
|
|
438
546
|
"value": {
|
|
439
547
|
"!.minichain.registries.manifests.RegistryManifest": {
|
|
440
548
|
"module": "ommlds.minichain.backends.impls.transformers.transformers",
|
|
@@ -451,7 +559,7 @@
|
|
|
451
559
|
"module": ".minichain.backends.impls.transformers.transformers",
|
|
452
560
|
"attr": null,
|
|
453
561
|
"file": "ommlds/minichain/backends/impls/transformers/transformers.py",
|
|
454
|
-
"line":
|
|
562
|
+
"line": 189,
|
|
455
563
|
"value": {
|
|
456
564
|
"!.minichain.registries.manifests.RegistryManifest": {
|
|
457
565
|
"module": "ommlds.minichain.backends.impls.transformers.transformers",
|
|
@@ -464,6 +572,21 @@
|
|
|
464
572
|
}
|
|
465
573
|
}
|
|
466
574
|
},
|
|
575
|
+
{
|
|
576
|
+
"module": ".minichain.backends.impls.transformers.transformers",
|
|
577
|
+
"attr": null,
|
|
578
|
+
"file": "ommlds/minichain/backends/impls/transformers/transformers.py",
|
|
579
|
+
"line": 219,
|
|
580
|
+
"value": {
|
|
581
|
+
"!.minichain.registries.manifests.RegistryManifest": {
|
|
582
|
+
"module": "ommlds.minichain.backends.impls.transformers.transformers",
|
|
583
|
+
"attr": "TransformersChatChoicesStreamService",
|
|
584
|
+
"name": "transformers",
|
|
585
|
+
"aliases": null,
|
|
586
|
+
"type": "ChatChoicesStreamService"
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
},
|
|
467
590
|
{
|
|
468
591
|
"module": ".minichain.chat.choices.services",
|
|
469
592
|
"attr": null,
|
ommlds/__about__.py
CHANGED
|
File without changes
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
https://docs.ollama.com/api
|
|
3
|
+
"""
|
|
4
|
+
import typing as ta
|
|
5
|
+
|
|
6
|
+
from omlish import dataclasses as dc
|
|
7
|
+
from omlish import lang
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
##
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
14
|
+
@dc.extra_class_params(default_repr_fn=dc.opt_repr)
|
|
15
|
+
class Options:
|
|
16
|
+
# loading
|
|
17
|
+
numa: bool | None = None
|
|
18
|
+
num_ctx: int | None = None
|
|
19
|
+
num_batch: int | None = None
|
|
20
|
+
num_gpu: int | None = None
|
|
21
|
+
main_gpu: int | None = None
|
|
22
|
+
low_vram: bool | None = None
|
|
23
|
+
f16_kv: bool | None = None
|
|
24
|
+
logits_all: bool | None = None
|
|
25
|
+
vocab_only: bool | None = None
|
|
26
|
+
use_mmap: bool | None = None
|
|
27
|
+
use_mlock: bool | None = None
|
|
28
|
+
embedding_only: bool | None = None
|
|
29
|
+
num_thread: int | None = None
|
|
30
|
+
|
|
31
|
+
# querying
|
|
32
|
+
num_keep: int | None = None
|
|
33
|
+
seed: int | None = None
|
|
34
|
+
num_predict: int | None = None
|
|
35
|
+
top_k: int | None = None
|
|
36
|
+
top_p: float | None = None
|
|
37
|
+
tfs_z: float | None = None
|
|
38
|
+
typical_p: float | None = None
|
|
39
|
+
repeat_last_n: int | None = None
|
|
40
|
+
temperature: float | None = None
|
|
41
|
+
repeat_penalty: float | None = None
|
|
42
|
+
presence_penalty: float | None = None
|
|
43
|
+
frequency_penalty: float | None = None
|
|
44
|
+
mirostat: int | None = None
|
|
45
|
+
mirostat_tau: float | None = None
|
|
46
|
+
mirostat_eta: float | None = None
|
|
47
|
+
penalize_newline: bool | None = None
|
|
48
|
+
stop: ta.Sequence[str] | None = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
##
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
55
|
+
class BaseRequest(lang.Abstract):
|
|
56
|
+
model: str
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
60
|
+
class BaseStreamableRequest(BaseRequest, lang.Abstract):
|
|
61
|
+
stream: bool | None = None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
##
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
68
|
+
class BaseGenerateRequest(BaseStreamableRequest, lang.Abstract):
|
|
69
|
+
options: Options | None = None
|
|
70
|
+
format: ta.Literal['', 'json'] | None = None # TODO: jsonschema
|
|
71
|
+
keep_alive: float | str | None = None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
75
|
+
@dc.extra_class_params(default_repr_fn=dc.opt_repr)
|
|
76
|
+
class GenerateRequest(BaseGenerateRequest):
|
|
77
|
+
prompt: str | None = None
|
|
78
|
+
suffix: str | None = None
|
|
79
|
+
system: str | None = None
|
|
80
|
+
template: str | None = None
|
|
81
|
+
context: ta.Sequence[int] | None = None
|
|
82
|
+
raw: bool | None = None
|
|
83
|
+
images: ta.Sequence[bytes] | None = None
|
|
84
|
+
think: bool | ta.Literal['low', 'medium', 'high'] | None = None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
#
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
91
|
+
class BaseGenerateResponse(lang.Abstract):
|
|
92
|
+
model: str | None = None
|
|
93
|
+
created_at: str | None = None
|
|
94
|
+
done: bool | None = None
|
|
95
|
+
done_reason: str | None = None
|
|
96
|
+
total_duration: int | None = None
|
|
97
|
+
load_duration: int | None = None
|
|
98
|
+
prompt_eval_count: int | None = None
|
|
99
|
+
prompt_eval_duration: int | None = None
|
|
100
|
+
eval_count: int | None = None
|
|
101
|
+
eval_duration: int | None = None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
105
|
+
@dc.extra_class_params(default_repr_fn=dc.opt_repr)
|
|
106
|
+
class GenerateResponse(BaseGenerateResponse):
|
|
107
|
+
response: str
|
|
108
|
+
thinking: str | None = None
|
|
109
|
+
context: ta.Sequence[int] | None = None
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
##
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
Role: ta.TypeAlias = ta.Literal[
|
|
116
|
+
'system',
|
|
117
|
+
'user',
|
|
118
|
+
'assistant',
|
|
119
|
+
'tool',
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
124
|
+
@dc.extra_class_params(default_repr_fn=dc.opt_repr)
|
|
125
|
+
class Message:
|
|
126
|
+
role: Role
|
|
127
|
+
content: str | None = None
|
|
128
|
+
thinking: str | None = None
|
|
129
|
+
images: ta.Sequence[bytes] | None = None
|
|
130
|
+
tool_name: str | None = None
|
|
131
|
+
|
|
132
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
133
|
+
class ToolCall:
|
|
134
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
135
|
+
class Function:
|
|
136
|
+
name: str
|
|
137
|
+
arguments: ta.Mapping[str, ta.Any]
|
|
138
|
+
|
|
139
|
+
function: Function
|
|
140
|
+
|
|
141
|
+
tool_calls: ta.Sequence[ToolCall] | None = None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
145
|
+
@dc.extra_class_params(default_repr_fn=dc.opt_repr)
|
|
146
|
+
class Tool:
|
|
147
|
+
type: str | None = 'function'
|
|
148
|
+
|
|
149
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
150
|
+
@dc.extra_class_params(default_repr_fn=dc.opt_repr)
|
|
151
|
+
class Function:
|
|
152
|
+
name: str | None = None
|
|
153
|
+
description: str | None = None
|
|
154
|
+
parameters: ta.Any | None = None
|
|
155
|
+
|
|
156
|
+
function: Function | None = None
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
160
|
+
@dc.extra_class_params(default_repr_fn=dc.opt_repr)
|
|
161
|
+
class ChatRequest(BaseGenerateRequest):
|
|
162
|
+
messages: ta.Sequence[Message] | None = None
|
|
163
|
+
tools: ta.Sequence[Tool] | None = None
|
|
164
|
+
think: bool | ta.Literal['low', 'medium', 'high'] | None = None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
|
168
|
+
@dc.extra_class_params(default_repr_fn=dc.opt_repr)
|
|
169
|
+
class ChatResponse(BaseGenerateResponse):
|
|
170
|
+
message: Message
|
|
File without changes
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import typing as ta
|
|
3
|
+
|
|
4
|
+
import transformers as tfm
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
T = ta.TypeVar('T')
|
|
8
|
+
P = ta.ParamSpec('P')
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
##
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CancellableTextStreamer(tfm.TextStreamer):
|
|
15
|
+
class Callback(ta.Protocol):
|
|
16
|
+
def __call__(self, text: str, *, stream_end: bool) -> None: ...
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
tokenizer: tfm.AutoTokenizer,
|
|
21
|
+
callback: Callback,
|
|
22
|
+
*,
|
|
23
|
+
skip_prompt: bool = False,
|
|
24
|
+
**decode_kwargs: ta.Any,
|
|
25
|
+
) -> None:
|
|
26
|
+
super().__init__(
|
|
27
|
+
tokenizer,
|
|
28
|
+
skip_prompt=skip_prompt,
|
|
29
|
+
**decode_kwargs,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
self.callback = callback
|
|
33
|
+
|
|
34
|
+
_cancelled: bool = False
|
|
35
|
+
|
|
36
|
+
#
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def cancelled(self) -> bool:
|
|
40
|
+
return self._cancelled
|
|
41
|
+
|
|
42
|
+
def cancel(self) -> None:
|
|
43
|
+
self._cancelled = True
|
|
44
|
+
|
|
45
|
+
class Cancelled(BaseException): # noqa
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def ignoring_cancelled(fn: ta.Callable[P, T]) -> ta.Callable[P, T | None]:
|
|
50
|
+
@functools.wraps(fn)
|
|
51
|
+
def inner(*args, **kwargs):
|
|
52
|
+
try:
|
|
53
|
+
return fn(*args, **kwargs)
|
|
54
|
+
except CancellableTextStreamer.Cancelled:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
return inner
|
|
58
|
+
|
|
59
|
+
def _maybe_raise_cancelled(self) -> None:
|
|
60
|
+
if self._cancelled:
|
|
61
|
+
raise CancellableTextStreamer.Cancelled
|
|
62
|
+
|
|
63
|
+
#
|
|
64
|
+
|
|
65
|
+
def put(self, value: ta.Any) -> None:
|
|
66
|
+
self._maybe_raise_cancelled()
|
|
67
|
+
super().put(value)
|
|
68
|
+
self._maybe_raise_cancelled()
|
|
69
|
+
|
|
70
|
+
def on_finalized_text(self, text: str, stream_end: bool = False) -> None:
|
|
71
|
+
self._maybe_raise_cancelled()
|
|
72
|
+
self.callback(text, stream_end=stream_end)
|
|
73
|
+
self._maybe_raise_cancelled()
|
|
@@ -32,7 +32,7 @@ class _CatalogBackendProvider(BackendProvider[ServiceT], lang.Abstract):
|
|
|
32
32
|
@contextlib.asynccontextmanager
|
|
33
33
|
async def _provide_backend(self, cls: type[ServiceT]) -> ta.AsyncIterator[ServiceT]:
|
|
34
34
|
service: ServiceT
|
|
35
|
-
async with lang.
|
|
35
|
+
async with lang.async_or_sync_maybe_managing(self._catalog.get_backend(
|
|
36
36
|
cls,
|
|
37
37
|
self._name,
|
|
38
38
|
*(self._configs or []),
|
ommlds/minichain/__init__.py
CHANGED
|
@@ -30,6 +30,15 @@ from .format import get_msg_content
|
|
|
30
30
|
##
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
# @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
|
|
34
|
+
# ['ChatChoicesService'],
|
|
35
|
+
# 'llamacpp',
|
|
36
|
+
# )
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
##
|
|
40
|
+
|
|
41
|
+
|
|
33
42
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
34
43
|
# name='llamacpp',
|
|
35
44
|
# type='ChatChoicesService',
|
|
@@ -29,6 +29,15 @@ from .format import get_msg_content
|
|
|
29
29
|
##
|
|
30
30
|
|
|
31
31
|
|
|
32
|
+
# @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
|
|
33
|
+
# ['ChatChoicesStreamService'],
|
|
34
|
+
# 'llamacpp',
|
|
35
|
+
# )
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
##
|
|
39
|
+
|
|
40
|
+
|
|
32
41
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
33
42
|
# name='llamacpp',
|
|
34
43
|
# type='ChatChoicesStreamService',
|
|
@@ -76,18 +85,25 @@ class LlamacppChatChoicesStreamService(lang.ExitStacked):
|
|
|
76
85
|
rs.enter_context(lang.defer(close_output))
|
|
77
86
|
|
|
78
87
|
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
|
|
88
|
+
last_role: ta.Any = None
|
|
89
|
+
|
|
79
90
|
for chunk in output:
|
|
80
91
|
check.state(chunk['object'] == 'chat.completion.chunk')
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
92
|
+
|
|
93
|
+
choice = check.single(chunk['choices'])
|
|
94
|
+
|
|
95
|
+
if not (delta := choice.get('delta', {})):
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
# FIXME: check role is assistant
|
|
99
|
+
if (role := delta.get('role')) != last_role:
|
|
100
|
+
last_role = role
|
|
101
|
+
|
|
102
|
+
# FIXME: stop reason
|
|
103
|
+
|
|
104
|
+
if (content := delta.get('content', '')):
|
|
105
|
+
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiChoiceDelta(content)])]))
|
|
106
|
+
|
|
91
107
|
return None
|
|
92
108
|
|
|
93
109
|
return await new_stream_response(rs, inner)
|