ommlds 0.0.0.dev456__py3-none-any.whl → 0.0.0.dev485__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. ommlds/.omlish-manifests.json +314 -33
  2. ommlds/__about__.py +15 -9
  3. ommlds/_hacks/__init__.py +4 -0
  4. ommlds/_hacks/funcs.py +110 -0
  5. ommlds/_hacks/names.py +158 -0
  6. ommlds/_hacks/params.py +73 -0
  7. ommlds/_hacks/patches.py +0 -3
  8. ommlds/backends/anthropic/protocol/__init__.py +13 -1
  9. ommlds/backends/anthropic/protocol/_dataclasses.py +1625 -0
  10. ommlds/backends/anthropic/protocol/sse/assemble.py +22 -6
  11. ommlds/backends/anthropic/protocol/sse/events.py +13 -0
  12. ommlds/backends/google/protocol/__init__.py +13 -0
  13. ommlds/backends/google/protocol/_dataclasses.py +5997 -0
  14. ommlds/backends/google/protocol/types.py +5 -1
  15. ommlds/backends/groq/__init__.py +7 -0
  16. ommlds/backends/groq/_dataclasses.py +3901 -0
  17. ommlds/backends/groq/_marshal.py +23 -0
  18. ommlds/backends/groq/protocol.py +249 -0
  19. ommlds/backends/llamacpp/logging.py +4 -1
  20. ommlds/backends/mlx/caching.py +7 -3
  21. ommlds/backends/mlx/cli.py +10 -7
  22. ommlds/backends/mlx/generation.py +18 -16
  23. ommlds/backends/mlx/limits.py +10 -6
  24. ommlds/backends/mlx/loading.py +65 -5
  25. ommlds/backends/ollama/__init__.py +7 -0
  26. ommlds/backends/ollama/_dataclasses.py +3458 -0
  27. ommlds/backends/ollama/protocol.py +170 -0
  28. ommlds/backends/openai/protocol/__init__.py +15 -1
  29. ommlds/backends/openai/protocol/_dataclasses.py +7708 -0
  30. ommlds/backends/tavily/__init__.py +7 -0
  31. ommlds/backends/tavily/_dataclasses.py +1734 -0
  32. ommlds/backends/tavily/protocol.py +301 -0
  33. ommlds/backends/tinygrad/models/llama3/__init__.py +22 -14
  34. ommlds/backends/transformers/__init__.py +14 -0
  35. ommlds/backends/transformers/filecache.py +109 -0
  36. ommlds/backends/transformers/streamers.py +73 -0
  37. ommlds/cli/__init__.py +7 -0
  38. ommlds/cli/_dataclasses.py +2562 -0
  39. ommlds/cli/asyncs.py +30 -0
  40. ommlds/cli/backends/catalog.py +93 -0
  41. ommlds/cli/backends/configs.py +9 -0
  42. ommlds/cli/backends/inject.py +31 -36
  43. ommlds/cli/backends/injection.py +16 -0
  44. ommlds/cli/backends/types.py +46 -0
  45. ommlds/cli/content/messages.py +34 -0
  46. ommlds/cli/content/strings.py +42 -0
  47. ommlds/cli/inject.py +15 -32
  48. ommlds/cli/inputs/__init__.py +0 -0
  49. ommlds/cli/inputs/asyncs.py +32 -0
  50. ommlds/cli/inputs/sync.py +75 -0
  51. ommlds/cli/main.py +267 -128
  52. ommlds/cli/rendering/__init__.py +0 -0
  53. ommlds/cli/rendering/configs.py +9 -0
  54. ommlds/cli/rendering/inject.py +31 -0
  55. ommlds/cli/rendering/markdown.py +52 -0
  56. ommlds/cli/rendering/raw.py +73 -0
  57. ommlds/cli/rendering/types.py +21 -0
  58. ommlds/cli/secrets.py +21 -0
  59. ommlds/cli/sessions/base.py +1 -1
  60. ommlds/cli/sessions/chat/chat/__init__.py +0 -0
  61. ommlds/cli/sessions/chat/chat/ai/__init__.py +0 -0
  62. ommlds/cli/sessions/chat/chat/ai/configs.py +11 -0
  63. ommlds/cli/sessions/chat/chat/ai/inject.py +74 -0
  64. ommlds/cli/sessions/chat/chat/ai/injection.py +14 -0
  65. ommlds/cli/sessions/chat/chat/ai/rendering.py +70 -0
  66. ommlds/cli/sessions/chat/chat/ai/services.py +79 -0
  67. ommlds/cli/sessions/chat/chat/ai/tools.py +44 -0
  68. ommlds/cli/sessions/chat/chat/ai/types.py +28 -0
  69. ommlds/cli/sessions/chat/chat/state/__init__.py +0 -0
  70. ommlds/cli/sessions/chat/chat/state/configs.py +11 -0
  71. ommlds/cli/sessions/chat/chat/state/inject.py +36 -0
  72. ommlds/cli/sessions/chat/chat/state/inmemory.py +33 -0
  73. ommlds/cli/sessions/chat/chat/state/storage.py +52 -0
  74. ommlds/cli/sessions/chat/chat/state/types.py +38 -0
  75. ommlds/cli/sessions/chat/chat/user/__init__.py +0 -0
  76. ommlds/cli/sessions/chat/chat/user/configs.py +17 -0
  77. ommlds/cli/sessions/chat/chat/user/inject.py +62 -0
  78. ommlds/cli/sessions/chat/chat/user/interactive.py +31 -0
  79. ommlds/cli/sessions/chat/chat/user/oneshot.py +25 -0
  80. ommlds/cli/sessions/chat/chat/user/types.py +15 -0
  81. ommlds/cli/sessions/chat/configs.py +27 -0
  82. ommlds/cli/sessions/chat/driver.py +43 -0
  83. ommlds/cli/sessions/chat/inject.py +33 -65
  84. ommlds/cli/sessions/chat/phases/__init__.py +0 -0
  85. ommlds/cli/sessions/chat/phases/inject.py +27 -0
  86. ommlds/cli/sessions/chat/phases/injection.py +14 -0
  87. ommlds/cli/sessions/chat/phases/manager.py +29 -0
  88. ommlds/cli/sessions/chat/phases/types.py +29 -0
  89. ommlds/cli/sessions/chat/session.py +27 -0
  90. ommlds/cli/sessions/chat/tools/__init__.py +0 -0
  91. ommlds/cli/sessions/chat/tools/configs.py +22 -0
  92. ommlds/cli/sessions/chat/tools/confirmation.py +46 -0
  93. ommlds/cli/sessions/chat/tools/execution.py +66 -0
  94. ommlds/cli/sessions/chat/tools/fs/__init__.py +0 -0
  95. ommlds/cli/sessions/chat/tools/fs/configs.py +12 -0
  96. ommlds/cli/sessions/chat/tools/fs/inject.py +35 -0
  97. ommlds/cli/sessions/chat/tools/inject.py +88 -0
  98. ommlds/cli/sessions/chat/tools/injection.py +44 -0
  99. ommlds/cli/sessions/chat/tools/rendering.py +58 -0
  100. ommlds/cli/sessions/chat/tools/todo/__init__.py +0 -0
  101. ommlds/cli/sessions/chat/tools/todo/configs.py +12 -0
  102. ommlds/cli/sessions/chat/tools/todo/inject.py +31 -0
  103. ommlds/cli/sessions/chat/tools/weather/__init__.py +0 -0
  104. ommlds/cli/sessions/chat/tools/weather/configs.py +12 -0
  105. ommlds/cli/sessions/chat/tools/weather/inject.py +22 -0
  106. ommlds/cli/{tools/weather.py → sessions/chat/tools/weather/tools.py} +1 -1
  107. ommlds/cli/sessions/completion/configs.py +21 -0
  108. ommlds/cli/sessions/completion/inject.py +42 -0
  109. ommlds/cli/sessions/completion/session.py +35 -0
  110. ommlds/cli/sessions/embedding/configs.py +21 -0
  111. ommlds/cli/sessions/embedding/inject.py +42 -0
  112. ommlds/cli/sessions/embedding/session.py +33 -0
  113. ommlds/cli/sessions/inject.py +28 -11
  114. ommlds/cli/state/__init__.py +0 -0
  115. ommlds/cli/state/inject.py +28 -0
  116. ommlds/cli/{state.py → state/storage.py} +41 -24
  117. ommlds/minichain/__init__.py +46 -17
  118. ommlds/minichain/_dataclasses.py +15401 -0
  119. ommlds/minichain/backends/catalogs/base.py +20 -1
  120. ommlds/minichain/backends/catalogs/simple.py +2 -2
  121. ommlds/minichain/backends/catalogs/strings.py +10 -8
  122. ommlds/minichain/backends/impls/anthropic/chat.py +31 -65
  123. ommlds/minichain/backends/impls/anthropic/names.py +3 -4
  124. ommlds/minichain/backends/impls/anthropic/protocol.py +109 -0
  125. ommlds/minichain/backends/impls/anthropic/stream.py +53 -31
  126. ommlds/minichain/backends/impls/duckduckgo/search.py +5 -1
  127. ommlds/minichain/backends/impls/dummy/__init__.py +0 -0
  128. ommlds/minichain/backends/impls/dummy/chat.py +69 -0
  129. ommlds/minichain/backends/impls/google/chat.py +9 -2
  130. ommlds/minichain/backends/impls/google/search.py +6 -1
  131. ommlds/minichain/backends/impls/google/stream.py +122 -32
  132. ommlds/minichain/backends/impls/groq/__init__.py +0 -0
  133. ommlds/minichain/backends/impls/groq/chat.py +75 -0
  134. ommlds/minichain/backends/impls/groq/names.py +48 -0
  135. ommlds/minichain/backends/impls/groq/protocol.py +143 -0
  136. ommlds/minichain/backends/impls/groq/stream.py +125 -0
  137. ommlds/minichain/backends/impls/huggingface/repos.py +1 -5
  138. ommlds/minichain/backends/impls/llamacpp/chat.py +15 -3
  139. ommlds/minichain/backends/impls/llamacpp/completion.py +7 -3
  140. ommlds/minichain/backends/impls/llamacpp/stream.py +38 -19
  141. ommlds/minichain/backends/impls/mistral.py +9 -2
  142. ommlds/minichain/backends/impls/mlx/chat.py +100 -23
  143. ommlds/minichain/backends/impls/ollama/__init__.py +0 -0
  144. ommlds/minichain/backends/impls/ollama/chat.py +199 -0
  145. ommlds/minichain/backends/impls/openai/chat.py +14 -7
  146. ommlds/minichain/backends/impls/openai/completion.py +9 -2
  147. ommlds/minichain/backends/impls/openai/embedding.py +9 -2
  148. ommlds/minichain/backends/impls/openai/format.py +115 -109
  149. ommlds/minichain/backends/impls/openai/names.py +31 -5
  150. ommlds/minichain/backends/impls/openai/stream.py +33 -27
  151. ommlds/minichain/backends/impls/sentencepiece/tokens.py +9 -6
  152. ommlds/minichain/backends/impls/tavily.py +66 -0
  153. ommlds/minichain/backends/impls/tinygrad/chat.py +17 -14
  154. ommlds/minichain/backends/impls/tokenizers/tokens.py +9 -6
  155. ommlds/minichain/backends/impls/transformers/sentence.py +5 -2
  156. ommlds/minichain/backends/impls/transformers/tokens.py +10 -7
  157. ommlds/minichain/backends/impls/transformers/transformers.py +139 -20
  158. ommlds/minichain/backends/strings/parsing.py +1 -1
  159. ommlds/minichain/backends/strings/resolving.py +4 -1
  160. ommlds/minichain/chat/choices/stream/__init__.py +0 -0
  161. ommlds/minichain/chat/choices/stream/adapters.py +35 -0
  162. ommlds/minichain/chat/choices/stream/joining.py +31 -0
  163. ommlds/minichain/chat/choices/stream/services.py +45 -0
  164. ommlds/minichain/chat/choices/stream/types.py +43 -0
  165. ommlds/minichain/chat/stream/_marshal.py +4 -4
  166. ommlds/minichain/chat/stream/joining.py +85 -0
  167. ommlds/minichain/chat/stream/services.py +15 -15
  168. ommlds/minichain/chat/stream/types.py +24 -18
  169. ommlds/minichain/llms/types.py +4 -0
  170. ommlds/minichain/registries/globals.py +18 -4
  171. ommlds/minichain/resources.py +28 -3
  172. ommlds/minichain/search.py +1 -1
  173. ommlds/minichain/standard.py +8 -0
  174. ommlds/minichain/stream/services.py +19 -16
  175. ommlds/minichain/tools/reflect.py +5 -1
  176. ommlds/nanochat/LICENSE +21 -0
  177. ommlds/nanochat/__init__.py +0 -0
  178. ommlds/nanochat/rustbpe/LICENSE +21 -0
  179. ommlds/nanochat/tokenizers.py +406 -0
  180. ommlds/specs/__init__.py +0 -0
  181. ommlds/specs/mcp/__init__.py +0 -0
  182. ommlds/specs/mcp/_marshal.py +23 -0
  183. ommlds/specs/mcp/clients.py +146 -0
  184. ommlds/specs/mcp/protocol.py +371 -0
  185. ommlds/tools/git.py +13 -6
  186. ommlds/tools/ocr.py +1 -8
  187. ommlds/wiki/analyze.py +2 -2
  188. ommlds/wiki/text/mfh.py +1 -5
  189. ommlds/wiki/text/wtp.py +1 -3
  190. ommlds/wiki/utils/xml.py +5 -5
  191. {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/METADATA +22 -19
  192. {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/RECORD +198 -95
  193. ommlds/cli/backends/standard.py +0 -20
  194. ommlds/cli/sessions/chat/base.py +0 -42
  195. ommlds/cli/sessions/chat/code.py +0 -129
  196. ommlds/cli/sessions/chat/interactive.py +0 -71
  197. ommlds/cli/sessions/chat/printing.py +0 -97
  198. ommlds/cli/sessions/chat/prompt.py +0 -151
  199. ommlds/cli/sessions/chat/state.py +0 -110
  200. ommlds/cli/sessions/chat/tools.py +0 -100
  201. ommlds/cli/sessions/completion/completion.py +0 -44
  202. ommlds/cli/sessions/embedding/embedding.py +0 -42
  203. ommlds/cli/tools/config.py +0 -14
  204. ommlds/cli/tools/inject.py +0 -75
  205. ommlds/minichain/backends/impls/openai/format2.py +0 -210
  206. ommlds/minichain/chat/stream/adapters.py +0 -80
  207. /ommlds/{huggingface.py → backends/huggingface.py} +0 -0
  208. /ommlds/cli/{tools → content}/__init__.py +0 -0
  209. {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/WHEEL +0 -0
  210. {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/entry_points.txt +0 -0
  211. {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/licenses/LICENSE +0 -0
  212. {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/top_level.txt +0 -0
@@ -2,13 +2,10 @@ import contextlib
2
2
  import os.path
3
3
  import typing as ta
4
4
 
5
- import llama_cpp as lcc
6
-
7
5
  from omlish import check
8
6
  from omlish import lang
9
7
  from omlish import typedvalues as tv
10
8
 
11
- from .....backends import llamacpp as lcu
12
9
  from ....chat.choices.services import ChatChoicesRequest
13
10
  from ....chat.choices.services import ChatChoicesResponse
14
11
  from ....chat.choices.services import static_check_is_chat_choices_service
@@ -27,6 +24,21 @@ from .format import ROLES_MAP
27
24
  from .format import get_msg_content
28
25
 
29
26
 
27
+ with lang.auto_proxy_import(globals()):
28
+ import llama_cpp as lcc
29
+
30
+ from .....backends import llamacpp as lcu
31
+
32
+
33
+ ##
34
+
35
+
36
+ # @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
37
+ # ['ChatChoicesService'],
38
+ # 'llamacpp',
39
+ # )
40
+
41
+
30
42
  ##
31
43
 
32
44
 
@@ -2,11 +2,9 @@ import contextlib
2
2
  import os.path
3
3
  import typing as ta
4
4
 
5
- import llama_cpp as lcc
6
-
5
+ from omlish import lang
7
6
  from omlish import typedvalues as tv
8
7
 
9
- from .....backends import llamacpp as lcu
10
8
  from ....completion import CompletionOption
11
9
  from ....completion import CompletionRequest
12
10
  from ....completion import CompletionResponse
@@ -18,6 +16,12 @@ from ....llms.types import Temperature
18
16
  from ....models.configs import ModelPath
19
17
 
20
18
 
19
+ with lang.auto_proxy_import(globals()):
20
+ import llama_cpp as lcc
21
+
22
+ from .....backends import llamacpp as lcu
23
+
24
+
21
25
  ##
22
26
 
23
27
 
@@ -2,20 +2,17 @@ import contextlib
2
2
  import threading
3
3
  import typing as ta # noqa
4
4
 
5
- import llama_cpp as lcc
6
-
7
5
  from omlish import check
8
6
  from omlish import lang
9
7
  from omlish import typedvalues as tv
10
8
 
11
- from .....backends import llamacpp as lcu
12
9
  from ....chat.choices.services import ChatChoicesOutputs
13
- from ....chat.stream.services import ChatChoicesStreamRequest
14
- from ....chat.stream.services import ChatChoicesStreamResponse
15
- from ....chat.stream.services import static_check_is_chat_choices_stream_service
16
- from ....chat.stream.types import AiChoiceDeltas
17
- from ....chat.stream.types import AiChoicesDeltas
18
- from ....chat.stream.types import ContentAiChoiceDelta
10
+ from ....chat.choices.stream.services import ChatChoicesStreamRequest
11
+ from ....chat.choices.stream.services import ChatChoicesStreamResponse
12
+ from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
13
+ from ....chat.choices.stream.types import AiChoiceDeltas
14
+ from ....chat.choices.stream.types import AiChoicesDeltas
15
+ from ....chat.stream.types import ContentAiDelta
19
16
  from ....configs import Config
20
17
  from ....models.configs import ModelPath
21
18
  from ....resources import UseResources
@@ -26,6 +23,21 @@ from .format import ROLES_MAP
26
23
  from .format import get_msg_content
27
24
 
28
25
 
26
+ with lang.auto_proxy_import(globals()):
27
+ import llama_cpp as lcc
28
+
29
+ from .....backends import llamacpp as lcu
30
+
31
+
32
+ ##
33
+
34
+
35
+ # @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
36
+ # ['ChatChoicesStreamService'],
37
+ # 'llamacpp',
38
+ # )
39
+
40
+
29
41
  ##
30
42
 
31
43
 
@@ -76,18 +88,25 @@ class LlamacppChatChoicesStreamService(lang.ExitStacked):
76
88
  rs.enter_context(lang.defer(close_output))
77
89
 
78
90
  async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
91
+ last_role: ta.Any = None
92
+
79
93
  for chunk in output:
80
94
  check.state(chunk['object'] == 'chat.completion.chunk')
81
- l: list[AiChoiceDeltas] = []
82
- for choice in chunk['choices']:
83
- # FIXME: check role is assistant
84
- # FIXME: stop reason
85
- if not (delta := choice.get('delta', {})):
86
- continue
87
- if not (content := delta.get('content', '')):
88
- continue
89
- l.append(AiChoiceDeltas([ContentAiChoiceDelta(content)]))
90
- await sink.emit(AiChoicesDeltas(l))
95
+
96
+ choice = check.single(chunk['choices'])
97
+
98
+ if not (delta := choice.get('delta', {})):
99
+ continue
100
+
101
+ # FIXME: check role is assistant
102
+ if (role := delta.get('role')) != last_role:
103
+ last_role = role
104
+
105
+ # FIXME: stop reason
106
+
107
+ if (content := delta.get('content', '')):
108
+ await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiDelta(content)])]))
109
+
91
110
  return None
92
111
 
93
112
  return await new_stream_response(rs, inner)
@@ -40,10 +40,16 @@ class MistralChatChoicesService:
40
40
  AiMessage: 'assistant',
41
41
  }
42
42
 
43
- def __init__(self, *, api_key: str | None = None) -> None:
43
+ def __init__(
44
+ self,
45
+ *,
46
+ api_key: str | None = None,
47
+ http_client: http.AsyncHttpClient | None = None,
48
+ ) -> None:
44
49
  super().__init__()
45
50
 
46
51
  self._api_key = api_key
52
+ self._http_client = http_client
47
53
 
48
54
  def _get_msg_content(self, m: Message) -> str | None:
49
55
  if isinstance(m, AiMessage):
@@ -73,7 +79,7 @@ class MistralChatChoicesService:
73
79
  ],
74
80
  }
75
81
 
76
- resp = http.request(
82
+ resp = await http.async_request(
77
83
  'https://api.mistral.ai/v1/chat/completions',
78
84
  method='POST',
79
85
  data=json.dumps_compact(req_dct).encode('utf-8'),
@@ -82,6 +88,7 @@ class MistralChatChoicesService:
82
88
  'Accept': 'application/json',
83
89
  'Authorization': f'Bearer {key}',
84
90
  },
91
+ client=self._http_client,
85
92
  )
86
93
 
87
94
  if resp.status == 429:
@@ -1,32 +1,46 @@
1
+ import contextlib
1
2
  import typing as ta
2
3
 
3
4
  from omlish import check
4
5
  from omlish import lang
5
6
  from omlish import typedvalues as tv
6
7
 
7
- from .....backends import mlx as mlxu
8
+ from ....chat.choices.services import ChatChoicesOutputs
8
9
  from ....chat.choices.services import ChatChoicesRequest
9
10
  from ....chat.choices.services import ChatChoicesResponse
10
11
  from ....chat.choices.services import static_check_is_chat_choices_service
12
+ from ....chat.choices.stream.services import ChatChoicesStreamRequest
13
+ from ....chat.choices.stream.services import ChatChoicesStreamResponse
14
+ from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
15
+ from ....chat.choices.stream.types import AiChoiceDeltas
16
+ from ....chat.choices.stream.types import AiChoicesDeltas
11
17
  from ....chat.choices.types import AiChoice
12
18
  from ....chat.choices.types import ChatChoicesOptions
13
19
  from ....chat.messages import AiMessage
14
20
  from ....chat.messages import Message
15
21
  from ....chat.messages import SystemMessage
16
22
  from ....chat.messages import UserMessage
23
+ from ....chat.stream.types import ContentAiDelta
17
24
  from ....configs import Config
18
25
  from ....llms.types import MaxTokens
19
26
  from ....models.configs import ModelPath
20
27
  from ....models.configs import ModelRepo
21
28
  from ....models.configs import ModelSpecifier
29
+ from ....resources import UseResources
22
30
  from ....standard import DefaultOptions
31
+ from ....stream.services import StreamResponseSink
32
+ from ....stream.services import new_stream_response
33
+
34
+
35
+ with lang.auto_proxy_import(globals()):
36
+ from .....backends import mlx as mlxu
23
37
 
24
38
 
25
39
  ##
26
40
 
27
41
 
28
42
  # @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
29
- # ['ChatChoicesService'],
43
+ # ['ChatChoicesService', 'ChatChoicesStreamService'],
30
44
  # 'mlx',
31
45
  # )
32
46
 
@@ -34,12 +48,7 @@ from ....standard import DefaultOptions
34
48
  ##
35
49
 
36
50
 
37
- # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
38
- # name='mlx',
39
- # type='ChatChoicesService',
40
- # )
41
- @static_check_is_chat_choices_service
42
- class MlxChatChoicesService(lang.ExitStacked):
51
+ class BaseMlxChatChoicesService(lang.ExitStacked):
43
52
  DEFAULT_MODEL: ta.ClassVar[ModelSpecifier] = (
44
53
  # 'mlx-community/DeepSeek-Coder-V2-Lite-Instruct-8bit'
45
54
  # 'mlx-community/Llama-3.3-70B-Instruct-4bit'
@@ -52,8 +61,8 @@ class MlxChatChoicesService(lang.ExitStacked):
52
61
  # 'mlx-community/Qwen2.5-0.5B-4bit'
53
62
  # 'mlx-community/Qwen2.5-32B-Instruct-8bit'
54
63
  # 'mlx-community/Qwen2.5-Coder-32B-Instruct-8bit'
55
- # 'mlx-community/mamba-2.8b-hf-f16'
56
64
  # 'mlx-community/Qwen3-30B-A3B-6bit'
65
+ # 'mlx-community/mamba-2.8b-hf-f16'
57
66
  )
58
67
 
59
68
  def __init__(self, *configs: Config) -> None:
@@ -70,17 +79,14 @@ class MlxChatChoicesService(lang.ExitStacked):
70
79
  }
71
80
 
72
81
  def _get_msg_content(self, m: Message) -> str | None:
73
- if isinstance(m, AiMessage):
74
- return check.isinstance(m.c, str)
75
-
76
- elif isinstance(m, (SystemMessage, UserMessage)):
82
+ if isinstance(m, (AiMessage, SystemMessage, UserMessage)):
77
83
  return check.isinstance(m.c, str)
78
84
 
79
85
  else:
80
86
  raise TypeError(m)
81
87
 
82
88
  @lang.cached_function(transient=True)
83
- def _load_model(self) -> mlxu.LoadedModel:
89
+ def _load_model(self) -> 'mlxu.LoadedModel':
84
90
  # FIXME: walk state, find all mx.arrays, dealloc/set to empty
85
91
  check.not_none(self._exit_stack)
86
92
 
@@ -96,10 +102,9 @@ class MlxChatChoicesService(lang.ExitStacked):
96
102
  max_tokens=MaxTokens,
97
103
  )
98
104
 
99
- async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
100
- loaded_model = self._load_model()
101
-
102
- tokenizer = loaded_model.tokenization.tokenizer
105
+ @lang.cached_function(transient=True)
106
+ def _get_tokenizer(self) -> 'mlxu.tokenization.Tokenizer':
107
+ tokenizer = self._load_model().tokenization.tokenizer
103
108
 
104
109
  if not (
105
110
  hasattr(tokenizer, 'apply_chat_template') and
@@ -107,26 +112,44 @@ class MlxChatChoicesService(lang.ExitStacked):
107
112
  ):
108
113
  raise RuntimeError(tokenizer)
109
114
 
110
- prompt = tokenizer.apply_chat_template(
115
+ return tokenizer
116
+
117
+ def _build_prompt(self, messages: ta.Sequence[Message]) -> str:
118
+ return check.isinstance(self._get_tokenizer().apply_chat_template(
111
119
  [ # type: ignore[arg-type]
112
120
  dict(
113
121
  role=self.ROLES_MAP[type(m)],
114
122
  content=self._get_msg_content(m),
115
123
  )
116
- for m in request.v
124
+ for m in messages
117
125
  ],
118
126
  tokenize=False,
119
127
  add_generation_prompt=True,
120
- )
128
+ ), str)
129
+
130
+ def _build_kwargs(self, oc: tv.TypedValuesConsumer) -> dict[str, ta.Any]:
131
+ kwargs: dict[str, ta.Any] = {}
132
+ kwargs.update(oc.pop_scalar_kwargs(**self._OPTION_KWARG_NAMES_MAP))
133
+ return kwargs
121
134
 
122
- kwargs = dict()
135
+
136
+ # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
137
+ # name='mlx',
138
+ # type='ChatChoicesService',
139
+ # )
140
+ @static_check_is_chat_choices_service
141
+ class MlxChatChoicesService(BaseMlxChatChoicesService):
142
+ async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
143
+ loaded_model = self._load_model()
144
+
145
+ prompt = self._build_prompt(request.v)
123
146
 
124
147
  with tv.consume(
125
148
  *self._default_options,
126
149
  *request.options,
127
150
  override=True,
128
151
  ) as oc:
129
- kwargs.update(oc.pop_scalar_kwargs(**self._OPTION_KWARG_NAMES_MAP))
152
+ kwargs = self._build_kwargs(oc)
130
153
 
131
154
  response = mlxu.generate(
132
155
  loaded_model.model,
@@ -139,3 +162,57 @@ class MlxChatChoicesService(lang.ExitStacked):
139
162
  return ChatChoicesResponse([
140
163
  AiChoice([AiMessage(response)]) # noqa
141
164
  ])
165
+
166
+
167
+ # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
168
+ # name='mlx',
169
+ # type='ChatChoicesStreamService',
170
+ # )
171
+ @static_check_is_chat_choices_stream_service
172
+ class MlxChatChoicesStreamService(BaseMlxChatChoicesService):
173
+ def __init__(self, *configs: Config) -> None:
174
+ super().__init__()
175
+
176
+ with tv.consume(*configs) as cc:
177
+ self._model = cc.pop(MlxChatChoicesService.DEFAULT_MODEL)
178
+ self._default_options: tv.TypedValues = DefaultOptions.pop(cc)
179
+
180
+ READ_CHUNK_SIZE = 64 * 1024
181
+
182
+ async def invoke(
183
+ self,
184
+ request: ChatChoicesStreamRequest,
185
+ *,
186
+ max_tokens: int = 4096, # FIXME: ChatOption
187
+ ) -> ChatChoicesStreamResponse:
188
+ loaded_model = self._load_model()
189
+
190
+ prompt = self._build_prompt(request.v)
191
+
192
+ with tv.consume(
193
+ *self._default_options,
194
+ *request.options,
195
+ override=True,
196
+ ) as oc:
197
+ oc.pop(UseResources, None)
198
+ kwargs = self._build_kwargs(oc)
199
+
200
+ async with UseResources.or_new(request.options) as rs:
201
+ gen: ta.Iterator[mlxu.GenerationOutput] = rs.enter_context(contextlib.closing(mlxu.stream_generate(
202
+ loaded_model.model,
203
+ loaded_model.tokenization,
204
+ check.isinstance(prompt, str),
205
+ mlxu.GenerationParams(**kwargs),
206
+ # verbose=True,
207
+ )))
208
+
209
+ async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
210
+ for go in gen:
211
+ if go.text:
212
+ await sink.emit(AiChoicesDeltas([AiChoiceDeltas([
213
+ ContentAiDelta(go.text),
214
+ ])]))
215
+
216
+ return []
217
+
218
+ return await new_stream_response(rs, inner)
File without changes
@@ -0,0 +1,199 @@
1
+ import typing as ta
2
+
3
+ from omlish import check
4
+ from omlish import lang
5
+ from omlish import marshal as msh
6
+ from omlish import typedvalues as tv
7
+ from omlish.formats import json
8
+ from omlish.http import all as http
9
+ from omlish.io.buffers import DelimitingBuffer
10
+
11
+ from .....backends.ollama import protocol as pt
12
+ from ....chat.choices.services import ChatChoicesOutputs
13
+ from ....chat.choices.services import ChatChoicesRequest
14
+ from ....chat.choices.services import ChatChoicesResponse
15
+ from ....chat.choices.services import static_check_is_chat_choices_service
16
+ from ....chat.choices.stream.services import ChatChoicesStreamRequest
17
+ from ....chat.choices.stream.services import ChatChoicesStreamResponse
18
+ from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
19
+ from ....chat.choices.stream.types import AiChoiceDeltas
20
+ from ....chat.choices.stream.types import AiChoicesDeltas
21
+ from ....chat.choices.types import AiChoice
22
+ from ....chat.messages import AiMessage
23
+ from ....chat.messages import AnyAiMessage
24
+ from ....chat.messages import Message
25
+ from ....chat.messages import SystemMessage
26
+ from ....chat.messages import UserMessage
27
+ from ....chat.stream.types import ContentAiDelta
28
+ from ....models.configs import ModelName
29
+ from ....resources import UseResources
30
+ from ....standard import ApiUrl
31
+ from ....stream.services import StreamResponseSink
32
+ from ....stream.services import new_stream_response
33
+
34
+
35
+ ##
36
+
37
+
38
+ # @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
39
+ # [
40
+ # 'ChatChoicesService',
41
+ # 'ChatChoicesStreamService',
42
+ # ],
43
+ # 'ollama',
44
+ # )
45
+
46
+
47
+ ##
48
+
49
+
50
+ class BaseOllamaChatChoicesService(lang.Abstract):
51
+ DEFAULT_API_URL: ta.ClassVar[ApiUrl] = ApiUrl('http://localhost:11434/api')
52
+ DEFAULT_MODEL_NAME: ta.ClassVar[ModelName] = ModelName('llama3.2')
53
+
54
+ def __init__(
55
+ self,
56
+ *configs: ApiUrl | ModelName,
57
+ http_client: http.AsyncHttpClient | None = None,
58
+ ) -> None:
59
+ super().__init__()
60
+
61
+ self._http_client = http_client
62
+
63
+ with tv.consume(*configs) as cc:
64
+ self._api_url = cc.pop(self.DEFAULT_API_URL)
65
+ self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
66
+
67
+ #
68
+
69
+ ROLE_MAP: ta.ClassVar[ta.Mapping[type[Message], pt.Role]] = { # noqa
70
+ SystemMessage: 'system',
71
+ UserMessage: 'user',
72
+ AiMessage: 'assistant',
73
+ }
74
+
75
+ @classmethod
76
+ def _get_message_content(cls, m: Message) -> str | None:
77
+ if isinstance(m, (AiMessage, UserMessage, SystemMessage)):
78
+ return check.isinstance(m.c, str)
79
+ else:
80
+ raise TypeError(m)
81
+
82
+ @classmethod
83
+ def _build_request_messages(cls, mc_msgs: ta.Iterable[Message]) -> ta.Sequence[pt.Message]:
84
+ messages: list[pt.Message] = []
85
+ for m in mc_msgs:
86
+ messages.append(pt.Message(
87
+ role=cls.ROLE_MAP[type(m)],
88
+ content=cls._get_message_content(m),
89
+ ))
90
+ return messages
91
+
92
+
93
+ ##
94
+
95
+
96
+ # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
97
+ # name='ollama',
98
+ # type='ChatChoicesService',
99
+ # )
100
+ @static_check_is_chat_choices_service
101
+ class OllamaChatChoicesService(BaseOllamaChatChoicesService):
102
+ async def invoke(
103
+ self,
104
+ request: ChatChoicesRequest,
105
+ ) -> ChatChoicesResponse:
106
+ messages = self._build_request_messages(request.v)
107
+
108
+ a_req = pt.ChatRequest(
109
+ model=self._model_name.v,
110
+ messages=messages,
111
+ # tools=tools or None,
112
+ stream=False,
113
+ )
114
+
115
+ raw_request = msh.marshal(a_req)
116
+
117
+ async with http.manage_async_client(self._http_client) as http_client:
118
+ raw_response = await http_client.request(http.HttpRequest(
119
+ self._api_url.v.removesuffix('/') + '/chat',
120
+ data=json.dumps(raw_request).encode('utf-8'),
121
+ ))
122
+
123
+ json_response = json.loads(check.not_none(raw_response.data).decode('utf-8'))
124
+
125
+ resp = msh.unmarshal(json_response, pt.ChatResponse)
126
+
127
+ out: list[AnyAiMessage] = []
128
+ if resp.message.role == 'assistant':
129
+ out.append(AiMessage(
130
+ check.not_none(resp.message.content),
131
+ ))
132
+ else:
133
+ raise TypeError(resp.message.role)
134
+
135
+ return ChatChoicesResponse([
136
+ AiChoice(out),
137
+ ])
138
+
139
+
140
+ ##
141
+
142
+
143
+ # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
144
+ # name='ollama',
145
+ # type='ChatChoicesStreamService',
146
+ # )
147
+ @static_check_is_chat_choices_stream_service
148
+ class OllamaChatChoicesStreamService(BaseOllamaChatChoicesService):
149
+ READ_CHUNK_SIZE: ta.ClassVar[int] = -1
150
+
151
+ async def invoke(
152
+ self,
153
+ request: ChatChoicesStreamRequest,
154
+ ) -> ChatChoicesStreamResponse:
155
+ messages = self._build_request_messages(request.v)
156
+
157
+ a_req = pt.ChatRequest(
158
+ model=self._model_name.v,
159
+ messages=messages,
160
+ # tools=tools or None,
161
+ stream=True,
162
+ )
163
+
164
+ raw_request = msh.marshal(a_req)
165
+
166
+ http_request = http.HttpRequest(
167
+ self._api_url.v.removesuffix('/') + '/chat',
168
+ data=json.dumps(raw_request).encode('utf-8'),
169
+ )
170
+
171
+ async with UseResources.or_new(request.options) as rs:
172
+ http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
173
+ http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
174
+
175
+ async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
176
+ db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
177
+ while True:
178
+ b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
179
+ for l in db.feed(b):
180
+ if isinstance(l, DelimitingBuffer.Incomplete):
181
+ # FIXME: handle
182
+ return []
183
+
184
+ lj = json.loads(l.decode('utf-8'))
185
+ lp: pt.ChatResponse = msh.unmarshal(lj, pt.ChatResponse)
186
+
187
+ check.state(lp.message.role == 'assistant')
188
+ check.none(lp.message.tool_name)
189
+ check.state(not lp.message.tool_calls)
190
+
191
+ if (c := lp.message.content):
192
+ await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiDelta(
193
+ c,
194
+ )])]))
195
+
196
+ if not b:
197
+ return []
198
+
199
+ return await new_stream_response(rs, inner)
@@ -26,9 +26,9 @@ from ....chat.choices.services import static_check_is_chat_choices_service
26
26
  from ....models.configs import ModelName
27
27
  from ....standard import ApiKey
28
28
  from ....standard import DefaultOptions
29
- from .format2 import OpenaiChatRequestHandler
30
- from .format2 import build_mc_choices_response
31
- from .names import MODEL_NAMES
29
+ from .format import OpenaiChatRequestHandler
30
+ from .format import build_mc_choices_response
31
+ from .names import CHAT_MODEL_NAMES
32
32
 
33
33
 
34
34
  ##
@@ -40,11 +40,17 @@ from .names import MODEL_NAMES
40
40
  # )
41
41
  @static_check_is_chat_choices_service
42
42
  class OpenaiChatChoicesService:
43
- DEFAULT_MODEL_NAME: ta.ClassVar[ModelName] = ModelName(check.not_none(MODEL_NAMES.default))
43
+ DEFAULT_MODEL_NAME: ta.ClassVar[ModelName] = ModelName(check.not_none(CHAT_MODEL_NAMES.default))
44
44
 
45
- def __init__(self, *configs: ApiKey | ModelName | DefaultOptions) -> None:
45
+ def __init__(
46
+ self,
47
+ *configs: ApiKey | ModelName | DefaultOptions,
48
+ http_client: http.AsyncHttpClient | None = None,
49
+ ) -> None:
46
50
  super().__init__()
47
51
 
52
+ self._http_client = http_client
53
+
48
54
  with tv.consume(*configs) as cc:
49
55
  self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
50
56
  self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
@@ -60,7 +66,7 @@ class OpenaiChatChoicesService:
60
66
  *request.options,
61
67
  override=True,
62
68
  ),
63
- model=MODEL_NAMES.resolve(self._model_name.v),
69
+ model=CHAT_MODEL_NAMES.resolve(self._model_name.v),
64
70
  mandatory_kwargs=dict(
65
71
  stream=False,
66
72
  ),
@@ -68,13 +74,14 @@ class OpenaiChatChoicesService:
68
74
 
69
75
  raw_request = msh.marshal(rh.oai_request())
70
76
 
71
- http_response = http.request(
77
+ http_response = await http.async_request(
72
78
  'https://api.openai.com/v1/chat/completions',
73
79
  headers={
74
80
  http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
75
81
  http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
76
82
  },
77
83
  data=json.dumps(raw_request).encode('utf-8'),
84
+ client=self._http_client,
78
85
  )
79
86
 
80
87
  raw_response = json.loads(check.not_none(http_response.data).decode('utf-8'))
@@ -23,9 +23,15 @@ from ....standard import ApiKey
23
23
  class OpenaiCompletionService:
24
24
  DEFAULT_MODEL_NAME: ta.ClassVar[str] = 'gpt-3.5-turbo-instruct'
25
25
 
26
- def __init__(self, *configs: Config) -> None:
26
+ def __init__(
27
+ self,
28
+ *configs: Config,
29
+ http_client: http.AsyncHttpClient | None = None,
30
+ ) -> None:
27
31
  super().__init__()
28
32
 
33
+ self._http_client = http_client
34
+
29
35
  with tv.consume(*configs) as cc:
30
36
  self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
31
37
 
@@ -41,13 +47,14 @@ class OpenaiCompletionService:
41
47
  stream=False,
42
48
  )
43
49
 
44
- raw_response = http.request(
50
+ raw_response = await http.async_request(
45
51
  'https://api.openai.com/v1/completions',
46
52
  headers={
47
53
  http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
48
54
  http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
49
55
  },
50
56
  data=json.dumps(raw_request).encode('utf-8'),
57
+ client=self._http_client,
51
58
  )
52
59
 
53
60
  response = json.loads(check.not_none(raw_response.data).decode('utf-8'))