ommlds 0.0.0.dev436__py3-none-any.whl → 0.0.0.dev480__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. ommlds/.omlish-manifests.json +332 -35
  2. ommlds/__about__.py +15 -9
  3. ommlds/_hacks/__init__.py +4 -0
  4. ommlds/_hacks/funcs.py +110 -0
  5. ommlds/_hacks/names.py +158 -0
  6. ommlds/_hacks/params.py +73 -0
  7. ommlds/_hacks/patches.py +0 -3
  8. ommlds/backends/anthropic/protocol/_marshal.py +2 -2
  9. ommlds/backends/anthropic/protocol/sse/_marshal.py +1 -1
  10. ommlds/backends/anthropic/protocol/sse/assemble.py +23 -7
  11. ommlds/backends/anthropic/protocol/sse/events.py +13 -0
  12. ommlds/backends/anthropic/protocol/types.py +30 -9
  13. ommlds/backends/google/protocol/__init__.py +3 -0
  14. ommlds/backends/google/protocol/_marshal.py +16 -0
  15. ommlds/backends/google/protocol/types.py +626 -0
  16. ommlds/backends/groq/_marshal.py +23 -0
  17. ommlds/backends/groq/protocol.py +249 -0
  18. ommlds/backends/mlx/generation.py +1 -1
  19. ommlds/backends/mlx/loading.py +58 -1
  20. ommlds/backends/ollama/__init__.py +0 -0
  21. ommlds/backends/ollama/protocol.py +170 -0
  22. ommlds/backends/openai/protocol/__init__.py +9 -28
  23. ommlds/backends/openai/protocol/_common.py +18 -0
  24. ommlds/backends/openai/protocol/_marshal.py +27 -0
  25. ommlds/backends/openai/protocol/chatcompletion/chunk.py +58 -31
  26. ommlds/backends/openai/protocol/chatcompletion/contentpart.py +49 -44
  27. ommlds/backends/openai/protocol/chatcompletion/message.py +55 -43
  28. ommlds/backends/openai/protocol/chatcompletion/request.py +114 -66
  29. ommlds/backends/openai/protocol/chatcompletion/response.py +71 -45
  30. ommlds/backends/openai/protocol/chatcompletion/responseformat.py +27 -20
  31. ommlds/backends/openai/protocol/chatcompletion/tokenlogprob.py +16 -7
  32. ommlds/backends/openai/protocol/completionusage.py +24 -15
  33. ommlds/backends/tavily/__init__.py +0 -0
  34. ommlds/backends/tavily/protocol.py +301 -0
  35. ommlds/backends/tinygrad/models/llama3/__init__.py +22 -14
  36. ommlds/backends/transformers/__init__.py +0 -0
  37. ommlds/backends/transformers/filecache.py +109 -0
  38. ommlds/backends/transformers/streamers.py +73 -0
  39. ommlds/cli/asyncs.py +30 -0
  40. ommlds/cli/backends/catalog.py +93 -0
  41. ommlds/cli/backends/configs.py +9 -0
  42. ommlds/cli/backends/inject.py +31 -36
  43. ommlds/cli/backends/injection.py +16 -0
  44. ommlds/cli/backends/types.py +46 -0
  45. ommlds/cli/content/__init__.py +0 -0
  46. ommlds/cli/content/messages.py +34 -0
  47. ommlds/cli/content/strings.py +42 -0
  48. ommlds/cli/inject.py +15 -32
  49. ommlds/cli/inputs/__init__.py +0 -0
  50. ommlds/cli/inputs/asyncs.py +32 -0
  51. ommlds/cli/inputs/sync.py +75 -0
  52. ommlds/cli/main.py +270 -110
  53. ommlds/cli/rendering/__init__.py +0 -0
  54. ommlds/cli/rendering/configs.py +9 -0
  55. ommlds/cli/rendering/inject.py +31 -0
  56. ommlds/cli/rendering/markdown.py +52 -0
  57. ommlds/cli/rendering/raw.py +73 -0
  58. ommlds/cli/rendering/types.py +21 -0
  59. ommlds/cli/secrets.py +21 -0
  60. ommlds/cli/sessions/base.py +1 -1
  61. ommlds/cli/sessions/chat/chat/__init__.py +0 -0
  62. ommlds/cli/sessions/chat/chat/ai/__init__.py +0 -0
  63. ommlds/cli/sessions/chat/chat/ai/configs.py +11 -0
  64. ommlds/cli/sessions/chat/chat/ai/inject.py +74 -0
  65. ommlds/cli/sessions/chat/chat/ai/injection.py +14 -0
  66. ommlds/cli/sessions/chat/chat/ai/rendering.py +70 -0
  67. ommlds/cli/sessions/chat/chat/ai/services.py +79 -0
  68. ommlds/cli/sessions/chat/chat/ai/tools.py +44 -0
  69. ommlds/cli/sessions/chat/chat/ai/types.py +28 -0
  70. ommlds/cli/sessions/chat/chat/state/__init__.py +0 -0
  71. ommlds/cli/sessions/chat/chat/state/configs.py +11 -0
  72. ommlds/cli/sessions/chat/chat/state/inject.py +36 -0
  73. ommlds/cli/sessions/chat/chat/state/inmemory.py +33 -0
  74. ommlds/cli/sessions/chat/chat/state/storage.py +52 -0
  75. ommlds/cli/sessions/chat/chat/state/types.py +38 -0
  76. ommlds/cli/sessions/chat/chat/user/__init__.py +0 -0
  77. ommlds/cli/sessions/chat/chat/user/configs.py +17 -0
  78. ommlds/cli/sessions/chat/chat/user/inject.py +62 -0
  79. ommlds/cli/sessions/chat/chat/user/interactive.py +31 -0
  80. ommlds/cli/sessions/chat/chat/user/oneshot.py +25 -0
  81. ommlds/cli/sessions/chat/chat/user/types.py +15 -0
  82. ommlds/cli/sessions/chat/configs.py +27 -0
  83. ommlds/cli/sessions/chat/driver.py +43 -0
  84. ommlds/cli/sessions/chat/inject.py +33 -65
  85. ommlds/cli/sessions/chat/phases/__init__.py +0 -0
  86. ommlds/cli/sessions/chat/phases/inject.py +27 -0
  87. ommlds/cli/sessions/chat/phases/injection.py +14 -0
  88. ommlds/cli/sessions/chat/phases/manager.py +29 -0
  89. ommlds/cli/sessions/chat/phases/types.py +29 -0
  90. ommlds/cli/sessions/chat/session.py +27 -0
  91. ommlds/cli/sessions/chat/tools/__init__.py +0 -0
  92. ommlds/cli/sessions/chat/tools/configs.py +22 -0
  93. ommlds/cli/sessions/chat/tools/confirmation.py +46 -0
  94. ommlds/cli/sessions/chat/tools/execution.py +66 -0
  95. ommlds/cli/sessions/chat/tools/fs/__init__.py +0 -0
  96. ommlds/cli/sessions/chat/tools/fs/configs.py +12 -0
  97. ommlds/cli/sessions/chat/tools/fs/inject.py +35 -0
  98. ommlds/cli/sessions/chat/tools/inject.py +88 -0
  99. ommlds/cli/sessions/chat/tools/injection.py +44 -0
  100. ommlds/cli/sessions/chat/tools/rendering.py +58 -0
  101. ommlds/cli/sessions/chat/tools/todo/__init__.py +0 -0
  102. ommlds/cli/sessions/chat/tools/todo/configs.py +12 -0
  103. ommlds/cli/sessions/chat/tools/todo/inject.py +31 -0
  104. ommlds/cli/sessions/chat/tools/weather/__init__.py +0 -0
  105. ommlds/cli/sessions/chat/tools/weather/configs.py +12 -0
  106. ommlds/cli/sessions/chat/tools/weather/inject.py +22 -0
  107. ommlds/cli/{tools/weather.py → sessions/chat/tools/weather/tools.py} +1 -1
  108. ommlds/cli/sessions/completion/configs.py +21 -0
  109. ommlds/cli/sessions/completion/inject.py +42 -0
  110. ommlds/cli/sessions/completion/session.py +35 -0
  111. ommlds/cli/sessions/embedding/configs.py +21 -0
  112. ommlds/cli/sessions/embedding/inject.py +42 -0
  113. ommlds/cli/sessions/embedding/session.py +33 -0
  114. ommlds/cli/sessions/inject.py +28 -11
  115. ommlds/cli/state/__init__.py +0 -0
  116. ommlds/cli/state/inject.py +28 -0
  117. ommlds/cli/{state.py → state/storage.py} +41 -24
  118. ommlds/minichain/__init__.py +84 -24
  119. ommlds/minichain/_marshal.py +49 -9
  120. ommlds/minichain/_typedvalues.py +2 -4
  121. ommlds/minichain/backends/catalogs/base.py +20 -1
  122. ommlds/minichain/backends/catalogs/simple.py +2 -2
  123. ommlds/minichain/backends/catalogs/strings.py +10 -8
  124. ommlds/minichain/backends/impls/anthropic/chat.py +65 -27
  125. ommlds/minichain/backends/impls/anthropic/names.py +10 -8
  126. ommlds/minichain/backends/impls/anthropic/protocol.py +109 -0
  127. ommlds/minichain/backends/impls/anthropic/stream.py +111 -43
  128. ommlds/minichain/backends/impls/duckduckgo/search.py +1 -1
  129. ommlds/minichain/backends/impls/dummy/__init__.py +0 -0
  130. ommlds/minichain/backends/impls/dummy/chat.py +69 -0
  131. ommlds/minichain/backends/impls/google/chat.py +114 -22
  132. ommlds/minichain/backends/impls/google/search.py +7 -2
  133. ommlds/minichain/backends/impls/google/stream.py +219 -0
  134. ommlds/minichain/backends/impls/google/tools.py +149 -0
  135. ommlds/minichain/backends/impls/groq/__init__.py +0 -0
  136. ommlds/minichain/backends/impls/groq/chat.py +75 -0
  137. ommlds/minichain/backends/impls/groq/names.py +48 -0
  138. ommlds/minichain/backends/impls/groq/protocol.py +143 -0
  139. ommlds/minichain/backends/impls/groq/stream.py +125 -0
  140. ommlds/minichain/backends/impls/llamacpp/chat.py +33 -18
  141. ommlds/minichain/backends/impls/llamacpp/completion.py +1 -1
  142. ommlds/minichain/backends/impls/llamacpp/format.py +4 -2
  143. ommlds/minichain/backends/impls/llamacpp/stream.py +37 -20
  144. ommlds/minichain/backends/impls/mistral.py +20 -5
  145. ommlds/minichain/backends/impls/mlx/chat.py +96 -22
  146. ommlds/minichain/backends/impls/ollama/__init__.py +0 -0
  147. ommlds/minichain/backends/impls/ollama/chat.py +199 -0
  148. ommlds/minichain/backends/impls/openai/chat.py +18 -8
  149. ommlds/minichain/backends/impls/openai/completion.py +10 -3
  150. ommlds/minichain/backends/impls/openai/embedding.py +10 -3
  151. ommlds/minichain/backends/impls/openai/format.py +131 -106
  152. ommlds/minichain/backends/impls/openai/names.py +31 -5
  153. ommlds/minichain/backends/impls/openai/stream.py +43 -25
  154. ommlds/minichain/backends/impls/tavily.py +66 -0
  155. ommlds/minichain/backends/impls/tinygrad/chat.py +23 -16
  156. ommlds/minichain/backends/impls/transformers/sentence.py +1 -1
  157. ommlds/minichain/backends/impls/transformers/tokens.py +1 -1
  158. ommlds/minichain/backends/impls/transformers/transformers.py +155 -34
  159. ommlds/minichain/backends/strings/parsing.py +1 -1
  160. ommlds/minichain/backends/strings/resolving.py +4 -1
  161. ommlds/minichain/chat/_marshal.py +16 -9
  162. ommlds/minichain/chat/choices/adapters.py +4 -4
  163. ommlds/minichain/chat/choices/services.py +1 -1
  164. ommlds/minichain/chat/choices/stream/__init__.py +0 -0
  165. ommlds/minichain/chat/choices/stream/adapters.py +35 -0
  166. ommlds/minichain/chat/choices/stream/joining.py +31 -0
  167. ommlds/minichain/chat/choices/stream/services.py +45 -0
  168. ommlds/minichain/chat/choices/stream/types.py +43 -0
  169. ommlds/minichain/chat/choices/types.py +2 -2
  170. ommlds/minichain/chat/history.py +3 -3
  171. ommlds/minichain/chat/messages.py +55 -19
  172. ommlds/minichain/chat/services.py +3 -3
  173. ommlds/minichain/chat/stream/_marshal.py +16 -0
  174. ommlds/minichain/chat/stream/joining.py +85 -0
  175. ommlds/minichain/chat/stream/services.py +15 -21
  176. ommlds/minichain/chat/stream/types.py +32 -19
  177. ommlds/minichain/chat/tools/execution.py +8 -7
  178. ommlds/minichain/chat/tools/ids.py +9 -15
  179. ommlds/minichain/chat/tools/parsing.py +17 -26
  180. ommlds/minichain/chat/transforms/base.py +29 -38
  181. ommlds/minichain/chat/transforms/metadata.py +30 -4
  182. ommlds/minichain/chat/transforms/services.py +9 -11
  183. ommlds/minichain/content/_marshal.py +44 -20
  184. ommlds/minichain/content/json.py +13 -0
  185. ommlds/minichain/content/materialize.py +14 -21
  186. ommlds/minichain/content/prepare.py +4 -0
  187. ommlds/minichain/content/transforms/interleave.py +1 -1
  188. ommlds/minichain/content/transforms/squeeze.py +1 -1
  189. ommlds/minichain/content/transforms/stringify.py +1 -1
  190. ommlds/minichain/json.py +20 -0
  191. ommlds/minichain/lib/code/__init__.py +0 -0
  192. ommlds/minichain/lib/code/prompts.py +6 -0
  193. ommlds/minichain/lib/fs/binfiles.py +108 -0
  194. ommlds/minichain/lib/fs/context.py +126 -0
  195. ommlds/minichain/lib/fs/errors.py +101 -0
  196. ommlds/minichain/lib/fs/suggestions.py +36 -0
  197. ommlds/minichain/lib/fs/tools/__init__.py +0 -0
  198. ommlds/minichain/lib/fs/tools/edit.py +104 -0
  199. ommlds/minichain/lib/fs/tools/ls.py +38 -0
  200. ommlds/minichain/lib/fs/tools/read.py +115 -0
  201. ommlds/minichain/lib/fs/tools/recursivels/__init__.py +0 -0
  202. ommlds/minichain/lib/fs/tools/recursivels/execution.py +40 -0
  203. ommlds/minichain/lib/todo/__init__.py +0 -0
  204. ommlds/minichain/lib/todo/context.py +54 -0
  205. ommlds/minichain/lib/todo/tools/__init__.py +0 -0
  206. ommlds/minichain/lib/todo/tools/read.py +44 -0
  207. ommlds/minichain/lib/todo/tools/write.py +335 -0
  208. ommlds/minichain/lib/todo/types.py +60 -0
  209. ommlds/minichain/llms/_marshal.py +25 -17
  210. ommlds/minichain/llms/types.py +4 -0
  211. ommlds/minichain/registries/globals.py +18 -4
  212. ommlds/minichain/resources.py +66 -43
  213. ommlds/minichain/search.py +1 -1
  214. ommlds/minichain/services/_marshal.py +46 -39
  215. ommlds/minichain/services/facades.py +3 -3
  216. ommlds/minichain/services/services.py +1 -1
  217. ommlds/minichain/standard.py +8 -0
  218. ommlds/minichain/stream/services.py +152 -38
  219. ommlds/minichain/stream/wrap.py +22 -24
  220. ommlds/minichain/tools/_marshal.py +1 -1
  221. ommlds/minichain/tools/execution/catalog.py +2 -1
  222. ommlds/minichain/tools/execution/context.py +34 -14
  223. ommlds/minichain/tools/execution/errors.py +15 -0
  224. ommlds/minichain/tools/execution/executors.py +8 -3
  225. ommlds/minichain/tools/execution/reflect.py +40 -5
  226. ommlds/minichain/tools/fns.py +46 -9
  227. ommlds/minichain/tools/jsonschema.py +14 -5
  228. ommlds/minichain/tools/reflect.py +54 -18
  229. ommlds/minichain/tools/types.py +33 -1
  230. ommlds/minichain/utils.py +27 -0
  231. ommlds/minichain/vectors/_marshal.py +11 -10
  232. ommlds/nanochat/LICENSE +21 -0
  233. ommlds/nanochat/__init__.py +0 -0
  234. ommlds/nanochat/rustbpe/LICENSE +21 -0
  235. ommlds/nanochat/tokenizers.py +406 -0
  236. ommlds/server/server.py +3 -3
  237. ommlds/specs/__init__.py +0 -0
  238. ommlds/specs/mcp/__init__.py +0 -0
  239. ommlds/specs/mcp/_marshal.py +23 -0
  240. ommlds/specs/mcp/protocol.py +266 -0
  241. ommlds/tools/git.py +27 -10
  242. ommlds/tools/ocr.py +8 -9
  243. ommlds/wiki/analyze.py +2 -2
  244. ommlds/wiki/text/mfh.py +1 -5
  245. ommlds/wiki/text/wtp.py +1 -3
  246. ommlds/wiki/utils/xml.py +5 -5
  247. {ommlds-0.0.0.dev436.dist-info → ommlds-0.0.0.dev480.dist-info}/METADATA +24 -21
  248. ommlds-0.0.0.dev480.dist-info/RECORD +427 -0
  249. ommlds/cli/backends/standard.py +0 -20
  250. ommlds/cli/sessions/chat/base.py +0 -42
  251. ommlds/cli/sessions/chat/interactive.py +0 -73
  252. ommlds/cli/sessions/chat/printing.py +0 -96
  253. ommlds/cli/sessions/chat/prompt.py +0 -143
  254. ommlds/cli/sessions/chat/state.py +0 -109
  255. ommlds/cli/sessions/chat/tools.py +0 -91
  256. ommlds/cli/sessions/completion/completion.py +0 -44
  257. ommlds/cli/sessions/embedding/embedding.py +0 -42
  258. ommlds/cli/tools/config.py +0 -13
  259. ommlds/cli/tools/inject.py +0 -64
  260. ommlds/minichain/chat/stream/adapters.py +0 -69
  261. ommlds/minichain/lib/fs/ls/execution.py +0 -32
  262. ommlds-0.0.0.dev436.dist-info/RECORD +0 -303
  263. /ommlds/{cli/tools → backends/google}/__init__.py +0 -0
  264. /ommlds/{minichain/lib/fs/ls → backends/groq}/__init__.py +0 -0
  265. /ommlds/{huggingface.py → backends/huggingface.py} +0 -0
  266. /ommlds/minichain/lib/fs/{ls → tools/recursivels}/rendering.py +0 -0
  267. /ommlds/minichain/lib/fs/{ls → tools/recursivels}/running.py +0 -0
  268. {ommlds-0.0.0.dev436.dist-info → ommlds-0.0.0.dev480.dist-info}/WHEEL +0 -0
  269. {ommlds-0.0.0.dev436.dist-info → ommlds-0.0.0.dev480.dist-info}/entry_points.txt +0 -0
  270. {ommlds-0.0.0.dev436.dist-info → ommlds-0.0.0.dev480.dist-info}/licenses/LICENSE +0 -0
  271. {ommlds-0.0.0.dev436.dist-info → ommlds-0.0.0.dev480.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,125 @@
1
+ import typing as ta
2
+
3
+ from omlish import check
4
+ from omlish import marshal as msh
5
+ from omlish import typedvalues as tv
6
+ from omlish.formats import json
7
+ from omlish.http import all as http
8
+ from omlish.http import sse
9
+ from omlish.io.buffers import DelimitingBuffer
10
+
11
+ from .....backends.groq import protocol as pt
12
+ from ....chat.choices.services import ChatChoicesOutputs
13
+ from ....chat.choices.stream.services import ChatChoicesStreamRequest
14
+ from ....chat.choices.stream.services import ChatChoicesStreamResponse
15
+ from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
16
+ from ....chat.choices.stream.types import AiChoicesDeltas
17
+ from ....chat.tools.types import Tool
18
+ from ....configs import Config
19
+ from ....resources import UseResources
20
+ from ....standard import ApiKey
21
+ from ....stream.services import StreamResponseSink
22
+ from ....stream.services import new_stream_response
23
+ from .chat import GroqChatChoicesService
24
+ from .names import MODEL_NAMES
25
+ from .protocol import build_gq_request_messages
26
+ from .protocol import build_gq_request_tool
27
+ from .protocol import build_mc_ai_choice_deltas
28
+
29
+
30
+ ##
31
+
32
+
33
+ # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
34
+ # name='groq',
35
+ # type='ChatChoicesStreamService',
36
+ # )
37
+ @static_check_is_chat_choices_stream_service
38
+ class GroqChatChoicesStreamService:
39
+ def __init__(
40
+ self,
41
+ *configs: Config,
42
+ http_client: http.AsyncHttpClient | None = None,
43
+ ) -> None:
44
+ super().__init__()
45
+
46
+ self._http_client = http_client
47
+
48
+ with tv.consume(*configs) as cc:
49
+ self._model_name = cc.pop(GroqChatChoicesService.DEFAULT_MODEL_NAME)
50
+ self._api_key = ApiKey.pop_secret(cc, env='GROQ_API_KEY')
51
+
52
+ READ_CHUNK_SIZE: ta.ClassVar[int] = -1
53
+
54
+ async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
55
+ tools: list[pt.ChatCompletionRequest.Tool] = []
56
+ with tv.TypedValues(*request.options).consume() as oc:
57
+ t: Tool
58
+ for t in oc.pop(Tool, []):
59
+ tools.append(build_gq_request_tool(t))
60
+
61
+ gq_request = pt.ChatCompletionRequest(
62
+ messages=build_gq_request_messages(request.v),
63
+ model=MODEL_NAMES.resolve(self._model_name.v),
64
+ tools=tools or None,
65
+ stream=True,
66
+ )
67
+
68
+ raw_request = msh.marshal(gq_request)
69
+
70
+ http_request = http.HttpRequest(
71
+ 'https://api.groq.com/openai/v1/chat/completions',
72
+ headers={
73
+ http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
74
+ http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
75
+ },
76
+ data=json.dumps(raw_request).encode('utf-8'),
77
+ )
78
+
79
+ async with UseResources.or_new(request.options) as rs:
80
+ http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
81
+ http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
82
+
83
+ async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
84
+ db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
85
+ sd = sse.SseDecoder()
86
+ while True:
87
+ b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
88
+ for l in db.feed(b):
89
+ if isinstance(l, DelimitingBuffer.Incomplete):
90
+ # FIXME: handle
91
+ return []
92
+
93
+ # FIXME: https://platform.openai.com/docs/guides/function-calling?api-mode=responses#streaming
94
+ for so in sd.process_line(l):
95
+ if isinstance(so, sse.SseEvent) and so.type == b'message':
96
+ ss = so.data.decode('utf-8')
97
+ if ss == '[DONE]':
98
+ return []
99
+
100
+ sj = json.loads(ss) # ChatCompletionChunk
101
+
102
+ check.state(sj['object'] == 'chat.completion.chunk')
103
+
104
+ ccc = msh.unmarshal(sj, pt.ChatCompletionChunk)
105
+
106
+ # FIXME: stop reason
107
+ if not ccc.choices:
108
+ continue
109
+
110
+ if any(choice.finish_reason for choice in ccc.choices):
111
+ check.state(all(choice.finish_reason for choice in ccc.choices))
112
+ break
113
+
114
+ await sink.emit(AiChoicesDeltas([
115
+ build_mc_ai_choice_deltas(choice.delta)
116
+ for choice in ccc.choices
117
+ ]))
118
+
119
+ if not b:
120
+ return []
121
+
122
+ # raw_response = json.loads(check.not_none(http_response.data).decode('utf-8'))
123
+ # return rh.build_response(raw_response)
124
+
125
+ return await new_stream_response(rs, inner)
@@ -15,7 +15,8 @@ from ....chat.choices.services import static_check_is_chat_choices_service
15
15
  from ....chat.choices.types import AiChoice
16
16
  from ....chat.choices.types import ChatChoicesOptions
17
17
  from ....chat.messages import AiMessage
18
- from ....chat.messages import ToolExecResultMessage
18
+ from ....chat.messages import ToolUseMessage
19
+ from ....chat.messages import ToolUseResultMessage
19
20
  from ....chat.tools.types import Tool
20
21
  from ....configs import Config
21
22
  from ....llms.types import MaxTokens
@@ -29,6 +30,15 @@ from .format import get_msg_content
29
30
  ##
30
31
 
31
32
 
33
+ # @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
34
+ # ['ChatChoicesService'],
35
+ # 'llamacpp',
36
+ # )
37
+
38
+
39
+ ##
40
+
41
+
32
42
  # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
33
43
  # name='llamacpp',
34
44
  # type='ChatChoicesService',
@@ -54,7 +64,7 @@ class LlamacppChatChoicesService:
54
64
  temperatur=Temperature,
55
65
  )
56
66
 
57
- def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
67
+ async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
58
68
  kwargs: dict = dict(
59
69
  # temperature=0,
60
70
  max_tokens=1024,
@@ -100,29 +110,34 @@ class LlamacppChatChoicesService:
100
110
 
101
111
  ims: list = []
102
112
  for rm in request.v:
103
- if isinstance(rm, ToolExecResultMessage):
113
+ if isinstance(rm, ToolUseResultMessage):
104
114
  ims.append(dict(
105
115
  role='tool',
106
- **(dict(id=rm.id) if rm.id is not None else {}),
107
- name=rm.name,
108
- content=check.isinstance(rm.c, str),
116
+ **(dict(id=rm.tur.id) if rm.tur.id is not None else {}),
117
+ name=rm.tur.name,
118
+ content=check.isinstance(rm.tur.c, str),
109
119
  ))
120
+
110
121
  elif isinstance(rm, AiMessage):
111
- tcs: list[dict] = []
112
- for ter in rm.tool_exec_requests or []:
113
- tcs.append(dict(
114
- id=check.not_none(ter.id),
115
- type='function',
116
- function=dict(
117
- name=ter.name,
118
- arguments=check.isinstance(ter.raw_args, str),
119
- ),
120
- ))
121
122
  ims.append(dict(
122
123
  role=ROLES_MAP[type(rm)],
123
124
  **(dict(content=mc) if (mc := get_msg_content(rm)) is not None else {}),
124
- **(dict(tool_calls=tcs) if tcs else {}),
125
125
  ))
126
+
127
+ elif isinstance(rm, ToolUseMessage):
128
+ ims.append(dict(
129
+ role=ROLES_MAP[type(rm)],
130
+ content='',
131
+ tool_calls=[dict(
132
+ id=check.not_none(rm.tu.id),
133
+ type='function',
134
+ function=dict(
135
+ name=rm.tu.name,
136
+ arguments=check.isinstance(rm.tu.raw_args, str),
137
+ ),
138
+ )],
139
+ ))
140
+
126
141
  else:
127
142
  ims.append(dict(
128
143
  role=ROLES_MAP[type(rm)],
@@ -137,6 +152,6 @@ class LlamacppChatChoicesService:
137
152
  out: list[AiChoice] = []
138
153
  for c in ta.cast(ta.Any, output)['choices']:
139
154
  m = c['message']
140
- out.append(AiChoice(AiMessage(m['content'])))
155
+ out.append(AiChoice([AiMessage(m['content'])]))
141
156
 
142
157
  return ChatChoicesResponse(out)
@@ -51,7 +51,7 @@ class LlamacppCompletionService:
51
51
  temperatur=Temperature,
52
52
  )
53
53
 
54
- def invoke(self, request: CompletionRequest) -> CompletionResponse:
54
+ async def invoke(self, request: CompletionRequest) -> CompletionResponse:
55
55
  kwargs: dict = dict(
56
56
  # temperature=0,
57
57
  max_tokens=1024,
@@ -5,7 +5,8 @@ from omlish import check
5
5
  from ....chat.messages import AiMessage
6
6
  from ....chat.messages import Message
7
7
  from ....chat.messages import SystemMessage
8
- from ....chat.messages import ToolExecResultMessage
8
+ from ....chat.messages import ToolUseMessage
9
+ from ....chat.messages import ToolUseResultMessage
9
10
  from ....chat.messages import UserMessage
10
11
 
11
12
 
@@ -16,7 +17,8 @@ ROLES_MAP: ta.Mapping[type[Message], str] = {
16
17
  SystemMessage: 'system',
17
18
  UserMessage: 'user',
18
19
  AiMessage: 'assistant',
19
- ToolExecResultMessage: 'tool',
20
+ ToolUseMessage: 'assistant',
21
+ ToolUseResultMessage: 'tool',
20
22
  }
21
23
 
22
24
 
@@ -10,15 +10,16 @@ from omlish import typedvalues as tv
10
10
 
11
11
  from .....backends import llamacpp as lcu
12
12
  from ....chat.choices.services import ChatChoicesOutputs
13
- from ....chat.stream.services import ChatChoicesStreamRequest
14
- from ....chat.stream.services import ChatChoicesStreamResponse
15
- from ....chat.stream.services import static_check_is_chat_choices_stream_service
16
- from ....chat.stream.types import AiChoiceDelta
17
- from ....chat.stream.types import AiChoiceDeltas
18
- from ....chat.stream.types import AiMessageDelta
13
+ from ....chat.choices.stream.services import ChatChoicesStreamRequest
14
+ from ....chat.choices.stream.services import ChatChoicesStreamResponse
15
+ from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
16
+ from ....chat.choices.stream.types import AiChoiceDeltas
17
+ from ....chat.choices.stream.types import AiChoicesDeltas
18
+ from ....chat.stream.types import ContentAiDelta
19
19
  from ....configs import Config
20
20
  from ....models.configs import ModelPath
21
21
  from ....resources import UseResources
22
+ from ....stream.services import StreamResponseSink
22
23
  from ....stream.services import new_stream_response
23
24
  from .chat import LlamacppChatChoicesService
24
25
  from .format import ROLES_MAP
@@ -28,6 +29,15 @@ from .format import get_msg_content
28
29
  ##
29
30
 
30
31
 
32
+ # @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
33
+ # ['ChatChoicesStreamService'],
34
+ # 'llamacpp',
35
+ # )
36
+
37
+
38
+ ##
39
+
40
+
31
41
  # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
32
42
  # name='llamacpp',
33
43
  # type='ChatChoicesStreamService',
@@ -49,10 +59,10 @@ class LlamacppChatChoicesStreamService(lang.ExitStacked):
49
59
  verbose=False,
50
60
  )))
51
61
 
52
- def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
62
+ async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
53
63
  lcu.install_logging_hook()
54
64
 
55
- with UseResources.or_new(request.options) as rs:
65
+ async with UseResources.or_new(request.options) as rs:
56
66
  rs.enter_context(self._lock)
57
67
 
58
68
  model: ta.Any = self._load_model() # FIXME: the types are awful lol
@@ -74,19 +84,26 @@ class LlamacppChatChoicesStreamService(lang.ExitStacked):
74
84
 
75
85
  rs.enter_context(lang.defer(close_output))
76
86
 
77
- def yield_choices() -> ta.Generator[AiChoiceDeltas, None, ta.Sequence[ChatChoicesOutputs] | None]:
87
+ async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
88
+ last_role: ta.Any = None
89
+
78
90
  for chunk in output:
79
91
  check.state(chunk['object'] == 'chat.completion.chunk')
80
- l: list[AiChoiceDelta] = []
81
- for choice in chunk['choices']:
82
- # FIXME: check role is assistant
83
- # FIXME: stop reason
84
- if not (delta := choice.get('delta', {})):
85
- continue
86
- if not (content := delta.get('content', '')):
87
- continue
88
- l.append(AiChoiceDelta(AiMessageDelta(content)))
89
- yield l
92
+
93
+ choice = check.single(chunk['choices'])
94
+
95
+ if not (delta := choice.get('delta', {})):
96
+ continue
97
+
98
+ # FIXME: check role is assistant
99
+ if (role := delta.get('role')) != last_role:
100
+ last_role = role
101
+
102
+ # FIXME: stop reason
103
+
104
+ if (content := delta.get('content', '')):
105
+ await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiDelta(content)])]))
106
+
90
107
  return None
91
108
 
92
- return new_stream_response(rs, yield_choices())
109
+ return await new_stream_response(rs, inner)
@@ -21,13 +21,18 @@ from ...chat.messages import UserMessage
21
21
  ##
22
22
 
23
23
 
24
+ # TODO: generalize lol
25
+ class TooManyRequestsMistralError(Exception):
26
+ pass
27
+
28
+
24
29
  # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
25
30
  # name='mistral',
26
31
  # type='ChatChoicesService',
27
32
  # )
28
33
  @static_check_is_chat_choices_service
29
34
  class MistralChatChoicesService:
30
- model: ta.ClassVar[str] = 'mistral-large-latest'
35
+ model: ta.ClassVar[str] = 'mistral-medium-2508'
31
36
 
32
37
  ROLES_MAP: ta.ClassVar[ta.Mapping[type[Message], str]] = {
33
38
  SystemMessage: 'system',
@@ -35,10 +40,16 @@ class MistralChatChoicesService:
35
40
  AiMessage: 'assistant',
36
41
  }
37
42
 
38
- def __init__(self, *, api_key: str | None = None) -> None:
43
+ def __init__(
44
+ self,
45
+ *,
46
+ api_key: str | None = None,
47
+ http_client: http.AsyncHttpClient | None = None,
48
+ ) -> None:
39
49
  super().__init__()
40
50
 
41
51
  self._api_key = api_key
52
+ self._http_client = http_client
42
53
 
43
54
  def _get_msg_content(self, m: Message) -> str | None:
44
55
  if isinstance(m, AiMessage):
@@ -50,7 +61,7 @@ class MistralChatChoicesService:
50
61
  else:
51
62
  raise TypeError(m)
52
63
 
53
- def invoke(
64
+ async def invoke(
54
65
  self,
55
66
  request: ChatChoicesRequest,
56
67
  ) -> ChatChoicesResponse:
@@ -68,7 +79,7 @@ class MistralChatChoicesService:
68
79
  ],
69
80
  }
70
81
 
71
- resp = http.request(
82
+ resp = await http.async_request(
72
83
  'https://api.mistral.ai/v1/chat/completions',
73
84
  method='POST',
74
85
  data=json.dumps_compact(req_dct).encode('utf-8'),
@@ -77,11 +88,15 @@ class MistralChatChoicesService:
77
88
  'Accept': 'application/json',
78
89
  'Authorization': f'Bearer {key}',
79
90
  },
91
+ client=self._http_client,
80
92
  )
81
93
 
94
+ if resp.status == 429:
95
+ raise TooManyRequestsMistralError
96
+
82
97
  resp_dct = json.loads(check.not_none(resp.data).decode('utf-8'))
83
98
 
84
99
  return ChatChoicesResponse([
85
- AiChoice(AiMessage(c['message']['content']))
100
+ AiChoice([AiMessage(c['message']['content'])])
86
101
  for c in resp_dct['choices']
87
102
  ])
@@ -1,3 +1,4 @@
1
+ import contextlib
1
2
  import typing as ta
2
3
 
3
4
  from omlish import check
@@ -5,28 +6,38 @@ from omlish import lang
5
6
  from omlish import typedvalues as tv
6
7
 
7
8
  from .....backends import mlx as mlxu
9
+ from ....chat.choices.services import ChatChoicesOutputs
8
10
  from ....chat.choices.services import ChatChoicesRequest
9
11
  from ....chat.choices.services import ChatChoicesResponse
10
12
  from ....chat.choices.services import static_check_is_chat_choices_service
13
+ from ....chat.choices.stream.services import ChatChoicesStreamRequest
14
+ from ....chat.choices.stream.services import ChatChoicesStreamResponse
15
+ from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
16
+ from ....chat.choices.stream.types import AiChoiceDeltas
17
+ from ....chat.choices.stream.types import AiChoicesDeltas
11
18
  from ....chat.choices.types import AiChoice
12
19
  from ....chat.choices.types import ChatChoicesOptions
13
20
  from ....chat.messages import AiMessage
14
21
  from ....chat.messages import Message
15
22
  from ....chat.messages import SystemMessage
16
23
  from ....chat.messages import UserMessage
24
+ from ....chat.stream.types import ContentAiDelta
17
25
  from ....configs import Config
18
26
  from ....llms.types import MaxTokens
19
27
  from ....models.configs import ModelPath
20
28
  from ....models.configs import ModelRepo
21
29
  from ....models.configs import ModelSpecifier
30
+ from ....resources import UseResources
22
31
  from ....standard import DefaultOptions
32
+ from ....stream.services import StreamResponseSink
33
+ from ....stream.services import new_stream_response
23
34
 
24
35
 
25
36
  ##
26
37
 
27
38
 
28
39
  # @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
29
- # ['ChatChoicesService'],
40
+ # ['ChatChoicesService', 'ChatChoicesStreamService'],
30
41
  # 'mlx',
31
42
  # )
32
43
 
@@ -34,12 +45,7 @@ from ....standard import DefaultOptions
34
45
  ##
35
46
 
36
47
 
37
- # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
38
- # name='mlx',
39
- # type='ChatChoicesService',
40
- # )
41
- @static_check_is_chat_choices_service
42
- class MlxChatChoicesService(lang.ExitStacked):
48
+ class BaseMlxChatChoicesService(lang.ExitStacked):
43
49
  DEFAULT_MODEL: ta.ClassVar[ModelSpecifier] = (
44
50
  # 'mlx-community/DeepSeek-Coder-V2-Lite-Instruct-8bit'
45
51
  # 'mlx-community/Llama-3.3-70B-Instruct-4bit'
@@ -52,8 +58,8 @@ class MlxChatChoicesService(lang.ExitStacked):
52
58
  # 'mlx-community/Qwen2.5-0.5B-4bit'
53
59
  # 'mlx-community/Qwen2.5-32B-Instruct-8bit'
54
60
  # 'mlx-community/Qwen2.5-Coder-32B-Instruct-8bit'
55
- # 'mlx-community/mamba-2.8b-hf-f16'
56
61
  # 'mlx-community/Qwen3-30B-A3B-6bit'
62
+ # 'mlx-community/mamba-2.8b-hf-f16'
57
63
  )
58
64
 
59
65
  def __init__(self, *configs: Config) -> None:
@@ -70,10 +76,7 @@ class MlxChatChoicesService(lang.ExitStacked):
70
76
  }
71
77
 
72
78
  def _get_msg_content(self, m: Message) -> str | None:
73
- if isinstance(m, AiMessage):
74
- return check.isinstance(m.c, str)
75
-
76
- elif isinstance(m, (SystemMessage, UserMessage)):
79
+ if isinstance(m, (AiMessage, SystemMessage, UserMessage)):
77
80
  return check.isinstance(m.c, str)
78
81
 
79
82
  else:
@@ -96,10 +99,9 @@ class MlxChatChoicesService(lang.ExitStacked):
96
99
  max_tokens=MaxTokens,
97
100
  )
98
101
 
99
- def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
100
- loaded_model = self._load_model()
101
-
102
- tokenizer = loaded_model.tokenization.tokenizer
102
+ @lang.cached_function(transient=True)
103
+ def _get_tokenizer(self) -> mlxu.tokenization.Tokenizer:
104
+ tokenizer = self._load_model().tokenization.tokenizer
103
105
 
104
106
  if not (
105
107
  hasattr(tokenizer, 'apply_chat_template') and
@@ -107,26 +109,44 @@ class MlxChatChoicesService(lang.ExitStacked):
107
109
  ):
108
110
  raise RuntimeError(tokenizer)
109
111
 
110
- prompt = tokenizer.apply_chat_template(
112
+ return tokenizer
113
+
114
+ def _build_prompt(self, messages: ta.Sequence[Message]) -> str:
115
+ return check.isinstance(self._get_tokenizer().apply_chat_template(
111
116
  [ # type: ignore[arg-type]
112
117
  dict(
113
118
  role=self.ROLES_MAP[type(m)],
114
119
  content=self._get_msg_content(m),
115
120
  )
116
- for m in request.v
121
+ for m in messages
117
122
  ],
118
123
  tokenize=False,
119
124
  add_generation_prompt=True,
120
- )
125
+ ), str)
121
126
 
122
- kwargs = dict()
127
+ def _build_kwargs(self, oc: tv.TypedValuesConsumer) -> dict[str, ta.Any]:
128
+ kwargs: dict[str, ta.Any] = {}
129
+ kwargs.update(oc.pop_scalar_kwargs(**self._OPTION_KWARG_NAMES_MAP))
130
+ return kwargs
131
+
132
+
133
+ # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
134
+ # name='mlx',
135
+ # type='ChatChoicesService',
136
+ # )
137
+ @static_check_is_chat_choices_service
138
+ class MlxChatChoicesService(BaseMlxChatChoicesService):
139
+ async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
140
+ loaded_model = self._load_model()
141
+
142
+ prompt = self._build_prompt(request.v)
123
143
 
124
144
  with tv.consume(
125
145
  *self._default_options,
126
146
  *request.options,
127
147
  override=True,
128
148
  ) as oc:
129
- kwargs.update(oc.pop_scalar_kwargs(**self._OPTION_KWARG_NAMES_MAP))
149
+ kwargs = self._build_kwargs(oc)
130
150
 
131
151
  response = mlxu.generate(
132
152
  loaded_model.model,
@@ -137,5 +157,59 @@ class MlxChatChoicesService(lang.ExitStacked):
137
157
  )
138
158
 
139
159
  return ChatChoicesResponse([
140
- AiChoice(AiMessage(response)) # noqa
160
+ AiChoice([AiMessage(response)]) # noqa
141
161
  ])
162
+
163
+
164
+ # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
165
+ # name='mlx',
166
+ # type='ChatChoicesStreamService',
167
+ # )
168
+ @static_check_is_chat_choices_stream_service
169
+ class MlxChatChoicesStreamService(BaseMlxChatChoicesService):
170
+ def __init__(self, *configs: Config) -> None:
171
+ super().__init__()
172
+
173
+ with tv.consume(*configs) as cc:
174
+ self._model = cc.pop(MlxChatChoicesService.DEFAULT_MODEL)
175
+ self._default_options: tv.TypedValues = DefaultOptions.pop(cc)
176
+
177
+ READ_CHUNK_SIZE = 64 * 1024
178
+
179
+ async def invoke(
180
+ self,
181
+ request: ChatChoicesStreamRequest,
182
+ *,
183
+ max_tokens: int = 4096, # FIXME: ChatOption
184
+ ) -> ChatChoicesStreamResponse:
185
+ loaded_model = self._load_model()
186
+
187
+ prompt = self._build_prompt(request.v)
188
+
189
+ with tv.consume(
190
+ *self._default_options,
191
+ *request.options,
192
+ override=True,
193
+ ) as oc:
194
+ oc.pop(UseResources, None)
195
+ kwargs = self._build_kwargs(oc)
196
+
197
+ async with UseResources.or_new(request.options) as rs:
198
+ gen: ta.Iterator[mlxu.GenerationOutput] = rs.enter_context(contextlib.closing(mlxu.stream_generate(
199
+ loaded_model.model,
200
+ loaded_model.tokenization,
201
+ check.isinstance(prompt, str),
202
+ mlxu.GenerationParams(**kwargs),
203
+ # verbose=True,
204
+ )))
205
+
206
+ async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
207
+ for go in gen:
208
+ if go.text:
209
+ await sink.emit(AiChoicesDeltas([AiChoiceDeltas([
210
+ ContentAiDelta(go.text),
211
+ ])]))
212
+
213
+ return []
214
+
215
+ return await new_stream_response(rs, inner)
File without changes