ommlds 0.0.0.dev466__py3-none-any.whl → 0.0.0.dev512__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. ommlds/.omlish-manifests.json +404 -31
  2. ommlds/README.md +11 -0
  3. ommlds/__about__.py +21 -12
  4. ommlds/_hacks/__init__.py +4 -0
  5. ommlds/_hacks/funcs.py +110 -0
  6. ommlds/_hacks/names.py +158 -0
  7. ommlds/_hacks/params.py +73 -0
  8. ommlds/_hacks/patches.py +0 -3
  9. ommlds/backends/anthropic/protocol/__init__.py +13 -1
  10. ommlds/backends/anthropic/protocol/_dataclasses.py +1625 -0
  11. ommlds/backends/anthropic/protocol/sse/events.py +2 -0
  12. ommlds/backends/anthropic/protocol/types.py +5 -7
  13. ommlds/backends/cerebras/__init__.py +7 -0
  14. ommlds/backends/cerebras/_dataclasses.py +4254 -0
  15. ommlds/backends/cerebras/_marshal.py +24 -0
  16. ommlds/backends/cerebras/clients.py +9 -0
  17. ommlds/backends/cerebras/protocol.py +310 -0
  18. ommlds/backends/google/protocol/__init__.py +13 -0
  19. ommlds/backends/google/protocol/_dataclasses.py +5997 -0
  20. ommlds/backends/google/protocol/types.py +6 -8
  21. ommlds/backends/groq/__init__.py +7 -0
  22. ommlds/backends/groq/_dataclasses.py +3901 -0
  23. ommlds/backends/groq/_marshal.py +23 -0
  24. ommlds/backends/groq/clients.py +9 -0
  25. ommlds/backends/groq/protocol.py +247 -0
  26. ommlds/{huggingface.py → backends/huggingface/cache.py} +1 -6
  27. ommlds/backends/huggingface/cli.py +208 -0
  28. ommlds/backends/llamacpp/logging.py +4 -1
  29. ommlds/backends/mlx/caching.py +7 -3
  30. ommlds/backends/mlx/cli.py +10 -7
  31. ommlds/backends/mlx/generation.py +18 -16
  32. ommlds/backends/mlx/limits.py +10 -6
  33. ommlds/backends/mlx/loading.py +7 -4
  34. ommlds/backends/ollama/__init__.py +7 -0
  35. ommlds/backends/ollama/_dataclasses.py +3940 -0
  36. ommlds/backends/ollama/cli.py +36 -0
  37. ommlds/backends/ollama/protocol.py +201 -0
  38. ommlds/backends/openai/protocol/__init__.py +15 -1
  39. ommlds/backends/openai/protocol/_common.py +3 -5
  40. ommlds/backends/openai/protocol/_dataclasses.py +7708 -0
  41. ommlds/backends/tavily/__init__.py +7 -0
  42. ommlds/backends/tavily/_dataclasses.py +1734 -0
  43. ommlds/backends/tavily/protocol.py +299 -0
  44. ommlds/backends/tinygrad/models/llama3/__init__.py +22 -14
  45. ommlds/backends/torch/backends.py +1 -1
  46. ommlds/backends/transformers/__init__.py +14 -0
  47. ommlds/backends/transformers/filecache.py +109 -0
  48. ommlds/backends/transformers/streamers.py +73 -0
  49. ommlds/cli/__init__.py +7 -0
  50. ommlds/cli/_dataclasses.py +3835 -0
  51. ommlds/cli/asyncs.py +30 -0
  52. ommlds/cli/backends/catalog.py +88 -0
  53. ommlds/cli/backends/configs.py +9 -0
  54. ommlds/cli/backends/inject.py +100 -42
  55. ommlds/cli/{sessions/chat/backends → backends}/injection.py +1 -1
  56. ommlds/cli/backends/meta.py +82 -0
  57. ommlds/cli/{sessions/chat/backends → backends}/types.py +11 -1
  58. ommlds/cli/{sessions/chat/content → content}/messages.py +2 -2
  59. ommlds/cli/{sessions/chat/content → content}/strings.py +1 -1
  60. ommlds/cli/inject.py +17 -8
  61. ommlds/cli/inputs/asyncs.py +32 -0
  62. ommlds/cli/inputs/sync.py +75 -0
  63. ommlds/cli/main.py +346 -114
  64. ommlds/cli/rendering/configs.py +9 -0
  65. ommlds/cli/{sessions/chat/rendering → rendering}/inject.py +4 -5
  66. ommlds/cli/{sessions/chat/rendering → rendering}/markdown.py +1 -1
  67. ommlds/cli/{sessions/chat/rendering → rendering}/raw.py +1 -1
  68. ommlds/cli/{sessions/chat/rendering → rendering}/types.py +7 -1
  69. ommlds/cli/secrets.py +22 -0
  70. ommlds/cli/sessions/base.py +1 -10
  71. ommlds/cli/sessions/chat/configs.py +13 -30
  72. ommlds/cli/sessions/chat/drivers/ai/configs.py +13 -0
  73. ommlds/cli/sessions/chat/drivers/ai/events.py +57 -0
  74. ommlds/cli/sessions/chat/{chat → drivers}/ai/inject.py +15 -12
  75. ommlds/cli/sessions/chat/{chat → drivers}/ai/rendering.py +8 -8
  76. ommlds/cli/sessions/chat/{chat → drivers}/ai/services.py +5 -5
  77. ommlds/cli/sessions/chat/{chat → drivers}/ai/tools.py +4 -8
  78. ommlds/cli/sessions/chat/{chat → drivers}/ai/types.py +10 -1
  79. ommlds/cli/sessions/chat/drivers/configs.py +25 -0
  80. ommlds/cli/sessions/chat/drivers/events/inject.py +27 -0
  81. ommlds/cli/sessions/chat/drivers/events/injection.py +14 -0
  82. ommlds/cli/sessions/chat/drivers/events/manager.py +16 -0
  83. ommlds/cli/sessions/chat/drivers/events/types.py +44 -0
  84. ommlds/cli/sessions/chat/drivers/impl.py +50 -0
  85. ommlds/cli/sessions/chat/drivers/inject.py +70 -0
  86. ommlds/cli/sessions/chat/drivers/state/configs.py +13 -0
  87. ommlds/cli/sessions/chat/drivers/state/ids.py +25 -0
  88. ommlds/cli/sessions/chat/drivers/state/inject.py +83 -0
  89. ommlds/cli/sessions/chat/{chat → drivers}/state/inmemory.py +1 -6
  90. ommlds/cli/sessions/chat/{chat → drivers}/state/storage.py +18 -12
  91. ommlds/cli/sessions/chat/{chat → drivers}/state/types.py +11 -6
  92. ommlds/cli/sessions/chat/drivers/tools/configs.py +22 -0
  93. ommlds/cli/sessions/chat/drivers/tools/confirmation.py +44 -0
  94. ommlds/cli/sessions/chat/drivers/tools/errorhandling.py +39 -0
  95. ommlds/cli/sessions/chat/{tools → drivers/tools}/execution.py +3 -4
  96. ommlds/cli/sessions/chat/drivers/tools/fs/__init__.py +0 -0
  97. ommlds/cli/sessions/chat/drivers/tools/fs/configs.py +12 -0
  98. ommlds/cli/sessions/chat/drivers/tools/fs/inject.py +35 -0
  99. ommlds/cli/sessions/chat/drivers/tools/inject.py +83 -0
  100. ommlds/cli/sessions/chat/{tools → drivers/tools}/injection.py +20 -5
  101. ommlds/cli/sessions/chat/{tools → drivers/tools}/rendering.py +3 -3
  102. ommlds/cli/sessions/chat/drivers/tools/todo/__init__.py +0 -0
  103. ommlds/cli/sessions/chat/drivers/tools/todo/configs.py +12 -0
  104. ommlds/cli/sessions/chat/drivers/tools/todo/inject.py +31 -0
  105. ommlds/cli/sessions/chat/drivers/tools/weather/__init__.py +0 -0
  106. ommlds/cli/sessions/chat/drivers/tools/weather/configs.py +12 -0
  107. ommlds/cli/sessions/chat/drivers/tools/weather/inject.py +22 -0
  108. ommlds/cli/sessions/chat/{tools/weather.py → drivers/tools/weather/tools.py} +1 -1
  109. ommlds/cli/sessions/chat/drivers/types.py +31 -0
  110. ommlds/cli/sessions/chat/drivers/user/__init__.py +0 -0
  111. ommlds/cli/sessions/chat/drivers/user/configs.py +14 -0
  112. ommlds/cli/sessions/chat/drivers/user/inject.py +41 -0
  113. ommlds/cli/sessions/chat/facades/__init__.py +0 -0
  114. ommlds/cli/sessions/chat/facades/commands/__init__.py +0 -0
  115. ommlds/cli/sessions/chat/facades/commands/base.py +83 -0
  116. ommlds/cli/sessions/chat/facades/commands/configs.py +9 -0
  117. ommlds/cli/sessions/chat/facades/commands/inject.py +41 -0
  118. ommlds/cli/sessions/chat/facades/commands/injection.py +15 -0
  119. ommlds/cli/sessions/chat/facades/commands/manager.py +59 -0
  120. ommlds/cli/sessions/chat/facades/commands/simple.py +34 -0
  121. ommlds/cli/sessions/chat/facades/commands/types.py +13 -0
  122. ommlds/cli/sessions/chat/facades/configs.py +11 -0
  123. ommlds/cli/sessions/chat/facades/facade.py +26 -0
  124. ommlds/cli/sessions/chat/facades/inject.py +35 -0
  125. ommlds/cli/sessions/chat/facades/ui.py +34 -0
  126. ommlds/cli/sessions/chat/inject.py +10 -49
  127. ommlds/cli/sessions/chat/interfaces/__init__.py +0 -0
  128. ommlds/cli/sessions/chat/interfaces/bare/__init__.py +0 -0
  129. ommlds/cli/sessions/chat/interfaces/bare/configs.py +15 -0
  130. ommlds/cli/sessions/chat/interfaces/bare/inject.py +69 -0
  131. ommlds/cli/sessions/chat/interfaces/bare/interactive.py +49 -0
  132. ommlds/cli/sessions/chat/interfaces/bare/oneshot.py +21 -0
  133. ommlds/cli/sessions/chat/{tools/confirmation.py → interfaces/bare/tools.py} +3 -22
  134. ommlds/cli/sessions/chat/interfaces/base.py +13 -0
  135. ommlds/cli/sessions/chat/interfaces/configs.py +11 -0
  136. ommlds/cli/sessions/chat/interfaces/inject.py +29 -0
  137. ommlds/cli/sessions/chat/interfaces/textual/__init__.py +0 -0
  138. ommlds/cli/sessions/chat/interfaces/textual/app.py +429 -0
  139. ommlds/cli/sessions/chat/interfaces/textual/configs.py +11 -0
  140. ommlds/cli/sessions/chat/interfaces/textual/facades.py +19 -0
  141. ommlds/cli/sessions/chat/interfaces/textual/inject.py +111 -0
  142. ommlds/cli/sessions/chat/interfaces/textual/inputhistory.py +174 -0
  143. ommlds/cli/sessions/chat/interfaces/textual/interface.py +24 -0
  144. ommlds/cli/sessions/chat/interfaces/textual/styles/__init__.py +29 -0
  145. ommlds/cli/sessions/chat/interfaces/textual/styles/input.tcss +53 -0
  146. ommlds/cli/sessions/chat/interfaces/textual/styles/markdown.tcss +7 -0
  147. ommlds/cli/sessions/chat/interfaces/textual/styles/messages.tcss +167 -0
  148. ommlds/cli/sessions/chat/interfaces/textual/tools.py +38 -0
  149. ommlds/cli/sessions/chat/interfaces/textual/widgets/__init__.py +0 -0
  150. ommlds/cli/sessions/chat/interfaces/textual/widgets/input.py +70 -0
  151. ommlds/cli/sessions/chat/interfaces/textual/widgets/messages.py +207 -0
  152. ommlds/cli/sessions/chat/session.py +8 -13
  153. ommlds/cli/sessions/completion/configs.py +5 -6
  154. ommlds/cli/sessions/completion/inject.py +15 -2
  155. ommlds/cli/sessions/completion/session.py +10 -18
  156. ommlds/cli/sessions/configs.py +10 -0
  157. ommlds/cli/sessions/embedding/configs.py +5 -6
  158. ommlds/cli/sessions/embedding/inject.py +15 -2
  159. ommlds/cli/sessions/embedding/session.py +10 -18
  160. ommlds/cli/sessions/inject.py +15 -15
  161. ommlds/cli/state/storage.py +8 -2
  162. ommlds/minichain/__init__.py +217 -60
  163. ommlds/minichain/_dataclasses.py +20640 -0
  164. ommlds/minichain/_typedvalues.py +15 -8
  165. ommlds/minichain/backends/catalogs/base.py +20 -1
  166. ommlds/minichain/backends/catalogs/simple.py +2 -2
  167. ommlds/minichain/backends/catalogs/strings.py +13 -10
  168. ommlds/minichain/backends/impls/anthropic/chat.py +28 -5
  169. ommlds/minichain/backends/impls/anthropic/names.py +3 -3
  170. ommlds/minichain/backends/impls/anthropic/protocol.py +2 -2
  171. ommlds/minichain/backends/impls/anthropic/stream.py +23 -18
  172. ommlds/minichain/backends/impls/cerebras/__init__.py +0 -0
  173. ommlds/minichain/backends/impls/cerebras/chat.py +82 -0
  174. ommlds/minichain/backends/impls/cerebras/names.py +45 -0
  175. ommlds/minichain/backends/impls/cerebras/protocol.py +143 -0
  176. ommlds/minichain/backends/impls/cerebras/stream.py +114 -0
  177. ommlds/minichain/backends/impls/duckduckgo/search.py +5 -1
  178. ommlds/minichain/backends/impls/dummy/__init__.py +0 -0
  179. ommlds/minichain/backends/impls/dummy/chat.py +69 -0
  180. ommlds/minichain/backends/impls/google/chat.py +20 -84
  181. ommlds/minichain/backends/impls/google/names.py +6 -0
  182. ommlds/minichain/backends/impls/google/protocol.py +105 -0
  183. ommlds/minichain/backends/impls/google/search.py +10 -5
  184. ommlds/minichain/backends/impls/google/stream.py +64 -142
  185. ommlds/minichain/backends/impls/google/tools.py +2 -2
  186. ommlds/minichain/backends/impls/groq/__init__.py +0 -0
  187. ommlds/minichain/backends/impls/groq/chat.py +77 -0
  188. ommlds/minichain/backends/impls/groq/names.py +48 -0
  189. ommlds/minichain/backends/impls/groq/protocol.py +143 -0
  190. ommlds/minichain/backends/impls/groq/stream.py +114 -0
  191. ommlds/minichain/backends/impls/huggingface/repos.py +1 -5
  192. ommlds/minichain/backends/impls/llamacpp/chat.py +15 -3
  193. ommlds/minichain/backends/impls/llamacpp/completion.py +7 -3
  194. ommlds/minichain/backends/impls/llamacpp/stream.py +38 -19
  195. ommlds/minichain/backends/impls/mistral.py +9 -2
  196. ommlds/minichain/backends/impls/mlx/chat.py +100 -23
  197. ommlds/minichain/backends/impls/ollama/__init__.py +0 -0
  198. ommlds/minichain/backends/impls/ollama/chat.py +193 -0
  199. ommlds/minichain/backends/impls/ollama/protocol.py +144 -0
  200. ommlds/minichain/backends/impls/openai/chat.py +14 -7
  201. ommlds/minichain/backends/impls/openai/completion.py +9 -2
  202. ommlds/minichain/backends/impls/openai/embedding.py +9 -2
  203. ommlds/minichain/backends/impls/openai/format.py +117 -115
  204. ommlds/minichain/backends/impls/openai/names.py +33 -5
  205. ommlds/minichain/backends/impls/openai/stream.py +61 -70
  206. ommlds/minichain/backends/impls/sentencepiece/tokens.py +9 -6
  207. ommlds/minichain/backends/impls/tavily.py +66 -0
  208. ommlds/minichain/backends/impls/tinygrad/chat.py +17 -14
  209. ommlds/minichain/backends/impls/tokenizers/tokens.py +9 -6
  210. ommlds/minichain/backends/impls/transformers/sentence.py +5 -2
  211. ommlds/minichain/backends/impls/transformers/tokens.py +9 -6
  212. ommlds/minichain/backends/impls/transformers/transformers.py +139 -20
  213. ommlds/minichain/backends/strings/parsing.py +2 -2
  214. ommlds/minichain/backends/strings/resolving.py +7 -2
  215. ommlds/minichain/chat/choices/stream/__init__.py +0 -0
  216. ommlds/minichain/chat/{stream → choices/stream}/adapters.py +7 -7
  217. ommlds/minichain/chat/choices/stream/joining.py +31 -0
  218. ommlds/minichain/chat/choices/stream/services.py +45 -0
  219. ommlds/minichain/chat/choices/stream/types.py +43 -0
  220. ommlds/minichain/chat/content.py +42 -0
  221. ommlds/minichain/chat/messages.py +46 -42
  222. ommlds/minichain/chat/stream/_marshal.py +4 -4
  223. ommlds/minichain/chat/stream/joining.py +56 -43
  224. ommlds/minichain/chat/stream/services.py +15 -15
  225. ommlds/minichain/chat/stream/types.py +17 -24
  226. ommlds/minichain/chat/templating.py +3 -3
  227. ommlds/minichain/content/__init__.py +20 -3
  228. ommlds/minichain/content/_marshal.py +181 -55
  229. ommlds/minichain/content/code.py +26 -0
  230. ommlds/minichain/content/composite.py +28 -0
  231. ommlds/minichain/content/content.py +27 -0
  232. ommlds/minichain/content/dynamic.py +12 -0
  233. ommlds/minichain/content/emphasis.py +27 -0
  234. ommlds/minichain/content/images.py +2 -2
  235. ommlds/minichain/content/json.py +2 -2
  236. ommlds/minichain/content/link.py +13 -0
  237. ommlds/minichain/content/markdown.py +12 -0
  238. ommlds/minichain/content/metadata.py +10 -0
  239. ommlds/minichain/content/namespaces.py +8 -0
  240. ommlds/minichain/content/placeholders.py +10 -9
  241. ommlds/minichain/content/quote.py +26 -0
  242. ommlds/minichain/content/raw.py +49 -0
  243. ommlds/minichain/content/recursive.py +12 -0
  244. ommlds/minichain/content/resources.py +22 -0
  245. ommlds/minichain/content/section.py +26 -0
  246. ommlds/minichain/content/sequence.py +17 -3
  247. ommlds/minichain/content/standard.py +32 -0
  248. ommlds/minichain/content/tag.py +28 -0
  249. ommlds/minichain/content/templates.py +13 -0
  250. ommlds/minichain/content/text.py +2 -2
  251. ommlds/minichain/content/transform/__init__.py +0 -0
  252. ommlds/minichain/content/transform/json.py +55 -0
  253. ommlds/minichain/content/transform/markdown.py +8 -0
  254. ommlds/minichain/content/transform/materialize.py +59 -0
  255. ommlds/minichain/content/transform/metadata.py +16 -0
  256. ommlds/minichain/content/transform/namespaces.py +20 -0
  257. ommlds/minichain/content/transform/placeholders.py +60 -0
  258. ommlds/minichain/content/{prepare.py → transform/prepare.py} +10 -15
  259. ommlds/minichain/content/transform/recursive.py +54 -0
  260. ommlds/minichain/content/transform/resources.py +58 -0
  261. ommlds/minichain/content/transform/standard.py +43 -0
  262. ommlds/minichain/content/{transforms → transform}/stringify.py +1 -7
  263. ommlds/minichain/content/transform/strings.py +33 -0
  264. ommlds/minichain/content/transform/templates.py +25 -0
  265. ommlds/minichain/content/transform/types.py +18 -0
  266. ommlds/minichain/content/transform/visitors.py +38 -0
  267. ommlds/minichain/content/visitors.py +218 -0
  268. ommlds/minichain/http/__init__.py +0 -0
  269. ommlds/minichain/http/stream.py +195 -0
  270. ommlds/minichain/lib/fs/tools/read.py +1 -1
  271. ommlds/minichain/lib/fs/tools/recursivels/rendering.py +1 -1
  272. ommlds/minichain/lib/fs/tools/recursivels/running.py +1 -1
  273. ommlds/minichain/lib/todo/tools/write.py +2 -1
  274. ommlds/minichain/lib/todo/types.py +1 -1
  275. ommlds/minichain/llms/types.py +4 -0
  276. ommlds/minichain/metadata.py +56 -2
  277. ommlds/minichain/models/configs.py +2 -2
  278. ommlds/minichain/models/names.py +2 -0
  279. ommlds/minichain/registries/globals.py +18 -4
  280. ommlds/minichain/resources.py +49 -3
  281. ommlds/minichain/search.py +1 -1
  282. ommlds/minichain/services/README.md +154 -0
  283. ommlds/minichain/services/__init__.py +6 -2
  284. ommlds/minichain/services/_marshal.py +46 -10
  285. ommlds/minichain/services/_origclasses.py +11 -0
  286. ommlds/minichain/services/_typedvalues.py +8 -3
  287. ommlds/minichain/services/requests.py +73 -3
  288. ommlds/minichain/services/responses.py +73 -3
  289. ommlds/minichain/services/services.py +9 -0
  290. ommlds/minichain/standard.py +8 -0
  291. ommlds/minichain/stream/services.py +43 -17
  292. ommlds/minichain/text/applypatch.py +2 -1
  293. ommlds/minichain/text/toolparsing/llamacpp/types.py +1 -1
  294. ommlds/minichain/tokens/specials.py +1 -1
  295. ommlds/minichain/tools/execution/catalog.py +2 -2
  296. ommlds/minichain/tools/execution/errorhandling.py +36 -0
  297. ommlds/minichain/tools/execution/errors.py +2 -2
  298. ommlds/minichain/tools/execution/executors.py +1 -1
  299. ommlds/minichain/tools/fns.py +1 -1
  300. ommlds/minichain/tools/jsonschema.py +2 -2
  301. ommlds/minichain/tools/reflect.py +11 -7
  302. ommlds/minichain/tools/types.py +16 -19
  303. ommlds/minichain/vectors/_marshal.py +1 -1
  304. ommlds/minichain/vectors/embeddings.py +1 -1
  305. ommlds/minichain/wrappers/__init__.py +7 -0
  306. ommlds/minichain/wrappers/firstinwins.py +144 -0
  307. ommlds/minichain/wrappers/instrument.py +146 -0
  308. ommlds/minichain/wrappers/retry.py +168 -0
  309. ommlds/minichain/wrappers/services.py +98 -0
  310. ommlds/minichain/wrappers/stream.py +57 -0
  311. ommlds/nanochat/LICENSE +21 -0
  312. ommlds/nanochat/__init__.py +0 -0
  313. ommlds/nanochat/rustbpe/LICENSE +21 -0
  314. ommlds/nanochat/rustbpe/README.md +10 -0
  315. ommlds/nanochat/tokenizers.py +440 -0
  316. ommlds/specs/__init__.py +0 -0
  317. ommlds/specs/mcp/__init__.py +0 -0
  318. ommlds/specs/mcp/_marshal.py +23 -0
  319. ommlds/specs/mcp/clients.py +146 -0
  320. ommlds/specs/mcp/protocol.py +369 -0
  321. ommlds/tools/git.py +84 -64
  322. ommlds/tools/ocr.py +1 -1
  323. ommlds/wiki/analyze.py +2 -2
  324. ommlds/wiki/models.py +4 -4
  325. ommlds/wiki/text/mfh.py +9 -9
  326. ommlds/wiki/utils/xml.py +5 -5
  327. {ommlds-0.0.0.dev466.dist-info → ommlds-0.0.0.dev512.dist-info}/METADATA +28 -21
  328. ommlds-0.0.0.dev512.dist-info/RECORD +534 -0
  329. {ommlds-0.0.0.dev466.dist-info → ommlds-0.0.0.dev512.dist-info}/WHEEL +1 -1
  330. ommlds/cli/backends/standard.py +0 -20
  331. ommlds/cli/sessions/chat/backends/catalog.py +0 -56
  332. ommlds/cli/sessions/chat/backends/inject.py +0 -37
  333. ommlds/cli/sessions/chat/chat/state/inject.py +0 -40
  334. ommlds/cli/sessions/chat/chat/user/inject.py +0 -61
  335. ommlds/cli/sessions/chat/chat/user/interactive.py +0 -29
  336. ommlds/cli/sessions/chat/chat/user/oneshot.py +0 -25
  337. ommlds/cli/sessions/chat/chat/user/types.py +0 -15
  338. ommlds/cli/sessions/chat/driver.py +0 -43
  339. ommlds/cli/sessions/chat/tools/inject.py +0 -145
  340. ommlds/minichain/backends/impls/openai/format2.py +0 -210
  341. ommlds/minichain/content/materialize.py +0 -196
  342. ommlds/minichain/content/simple.py +0 -47
  343. ommlds/minichain/content/transforms/base.py +0 -46
  344. ommlds/minichain/content/transforms/interleave.py +0 -70
  345. ommlds/minichain/content/transforms/squeeze.py +0 -72
  346. ommlds/minichain/content/transforms/strings.py +0 -24
  347. ommlds/minichain/content/types.py +0 -43
  348. ommlds/minichain/stream/wrap.py +0 -62
  349. ommlds-0.0.0.dev466.dist-info/RECORD +0 -376
  350. /ommlds/{cli/sessions/chat/backends → backends/huggingface}/__init__.py +0 -0
  351. /ommlds/cli/{sessions/chat/chat → content}/__init__.py +0 -0
  352. /ommlds/cli/{sessions/chat/chat/ai → inputs}/__init__.py +0 -0
  353. /ommlds/cli/{sessions/chat/chat/state → rendering}/__init__.py +0 -0
  354. /ommlds/cli/sessions/chat/{chat/user → drivers}/__init__.py +0 -0
  355. /ommlds/cli/sessions/chat/{content → drivers/ai}/__init__.py +0 -0
  356. /ommlds/cli/sessions/chat/{chat → drivers}/ai/injection.py +0 -0
  357. /ommlds/cli/sessions/chat/{phases → drivers/events}/__init__.py +0 -0
  358. /ommlds/cli/sessions/chat/{rendering → drivers/phases}/__init__.py +0 -0
  359. /ommlds/cli/sessions/chat/{phases → drivers/phases}/inject.py +0 -0
  360. /ommlds/cli/sessions/chat/{phases → drivers/phases}/injection.py +0 -0
  361. /ommlds/cli/sessions/chat/{phases → drivers/phases}/manager.py +0 -0
  362. /ommlds/cli/sessions/chat/{phases → drivers/phases}/types.py +0 -0
  363. /ommlds/cli/sessions/chat/{tools → drivers/state}/__init__.py +0 -0
  364. /ommlds/{minichain/content/transforms → cli/sessions/chat/drivers/tools}/__init__.py +0 -0
  365. {ommlds-0.0.0.dev466.dist-info → ommlds-0.0.0.dev512.dist-info}/entry_points.txt +0 -0
  366. {ommlds-0.0.0.dev466.dist-info → ommlds-0.0.0.dev512.dist-info}/licenses/LICENSE +0 -0
  367. {ommlds-0.0.0.dev466.dist-info → ommlds-0.0.0.dev512.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,23 @@
1
+ from omlish import lang
2
+ from omlish import marshal as msh
3
+
4
+ from .protocol import ChatCompletionRequest
5
+
6
+
7
+ ##
8
+
9
+
10
+ @lang.static_init
11
+ def _install_standard_marshaling() -> None:
12
+ for root_cls, tag_field in [
13
+ (ChatCompletionRequest.Message, 'role'),
14
+ ]:
15
+ msh.install_standard_factories(*msh.standard_polymorphism_factories(
16
+ msh.polymorphism_from_subclasses(
17
+ root_cls,
18
+ naming=msh.Naming.SNAKE,
19
+ strip_suffix=msh.AutoStripSuffix,
20
+ ),
21
+ msh.FieldTypeTagging(tag_field),
22
+ unions='partial',
23
+ ))
@@ -0,0 +1,9 @@
1
+ import typing as ta
2
+
3
+
4
+ ##
5
+
6
+
7
+ REQUIRED_HTTP_HEADERS: ta.Mapping[bytes, bytes] = {
8
+ b'User-Agent': b'python-httpx/0.28.1', # required or it 403's lol
9
+ }
@@ -0,0 +1,247 @@
1
+ """
2
+ https://console.groq.com/docs/api-reference#chat-create
3
+ """
4
+ import typing as ta
5
+
6
+ from omlish import dataclasses as dc
7
+ from omlish import lang
8
+ from omlish import marshal as msh
9
+
10
+
11
+ ##
12
+
13
+
14
+ def _set_class_marshal_options(cls):
15
+ msh.update_object_options(
16
+ cls,
17
+ field_defaults=msh.FieldOptions(
18
+ omit_if=lang.is_none,
19
+ ),
20
+ )
21
+
22
+ return cls
23
+
24
+
25
+ ##
26
+
27
+
28
+ @dc.dataclass(frozen=True, kw_only=True)
29
+ @_set_class_marshal_options
30
+ class ChatCompletionRequest(lang.Final):
31
+ @dc.dataclass(frozen=True, kw_only=True)
32
+ class Message(lang.Sealed, lang.Abstract):
33
+ pass
34
+
35
+ @dc.dataclass(frozen=True, kw_only=True)
36
+ @_set_class_marshal_options
37
+ class SystemMessage(Message, lang.Final):
38
+ content: str | ta.Sequence[str]
39
+ name: str | None = None
40
+ role: ta.Literal['system'] = 'system'
41
+
42
+ @dc.dataclass(frozen=True, kw_only=True)
43
+ @_set_class_marshal_options
44
+ class UserMessage(Message, lang.Final):
45
+ content: str | ta.Sequence[str]
46
+ name: str | None = None
47
+ role: ta.Literal['user'] = 'user'
48
+
49
+ @dc.dataclass(frozen=True, kw_only=True)
50
+ @_set_class_marshal_options
51
+ class AssistantMessage(Message, lang.Final):
52
+ content: str | ta.Sequence[str] | None = None
53
+ name: str | None = None
54
+ reasoning: str | None = None
55
+ role: ta.Literal['assistant'] = 'assistant'
56
+
57
+ @dc.dataclass(frozen=True, kw_only=True)
58
+ @_set_class_marshal_options
59
+ class ToolCall(lang.Final):
60
+ @dc.dataclass(frozen=True, kw_only=True)
61
+ @_set_class_marshal_options
62
+ class Function(lang.Final):
63
+ arguments: str
64
+ name: str
65
+
66
+ function: Function
67
+ id: str
68
+ type: ta.Literal['function'] = 'function'
69
+
70
+ tool_calls: ta.Sequence[ToolCall] | None = None
71
+
72
+ @dc.dataclass(frozen=True, kw_only=True)
73
+ @_set_class_marshal_options
74
+ class ToolMessage(Message, lang.Final):
75
+ content: str | ta.Sequence[str]
76
+ role: ta.Literal['tool'] = 'tool'
77
+ tool_call_id: str
78
+
79
+ messages: ta.Sequence[Message]
80
+ model: str
81
+ citation_options: ta.Literal['enabled', 'disabled'] | None = None
82
+ compound_custom: ta.Mapping[str, ta.Any] | None = None
83
+ disable_tool_validation: bool | None = None
84
+ documents: ta.Sequence[ta.Mapping[str, ta.Any]] | None = None
85
+ frequency_penalty: float | None = None
86
+ include_reasoning: bool | None = None
87
+ logit_bias: ta.Mapping[str, ta.Any] | None = None
88
+ logprobs: bool | None = None
89
+ max_completion_tokens: int | None = None
90
+ n: int | None = None
91
+ parallel_tool_calls: bool | None = None
92
+ presence_penalty: float | None = None
93
+ reasoning_effort: ta.Literal['none', 'default', 'low', 'medium', 'high'] | None = None
94
+ reasoning_format: ta.Literal['hidden', 'raw', 'parsed'] | None = None
95
+ response_format: ta.Any | None = None
96
+ search_settings: ta.Mapping[str, ta.Any] | None = None
97
+ seed: int | None = None
98
+ service_tier: ta.Literal['auto', 'on_demand', 'flex', 'performance', 'null'] | None = None
99
+ stop: str | ta.Sequence[str] | None = None
100
+ store: bool | None = None
101
+ stream: bool | None = None
102
+ stream_options: ta.Mapping[str, ta.Any] | None = None
103
+ temperature: float | None = None
104
+ ool_choice: str | None = None
105
+
106
+ @dc.dataclass(frozen=True, kw_only=True)
107
+ @_set_class_marshal_options
108
+ class Tool(lang.Final):
109
+ @dc.dataclass(frozen=True, kw_only=True)
110
+ @_set_class_marshal_options
111
+ class Function(lang.Final):
112
+ description: str | None = None
113
+ name: str
114
+ parameters: ta.Mapping[str, ta.Any] | None = None # json schema
115
+ strict: bool | None = None
116
+
117
+ function: Function
118
+ type: ta.Literal['function', 'browser_search', 'code_interpreter'] = 'function'
119
+
120
+ tools: ta.Sequence[Tool] | None = None
121
+
122
+ top_logprobs: int | None = None
123
+ top_p: float | None = None
124
+ user: str | None = None
125
+
126
+
127
+ @dc.dataclass(frozen=True, kw_only=True)
128
+ @_set_class_marshal_options
129
+ class ExecutedTool(lang.Final):
130
+ arguments: str
131
+ index: int
132
+ type: str
133
+ browser_results: ta.Sequence[ta.Any] | None = None
134
+ code_results: ta.Sequence[ta.Any] | None = None
135
+ output: str | None = None
136
+ search_results: ta.Any | None = None
137
+
138
+
139
+ @dc.dataclass(frozen=True, kw_only=True)
140
+ @_set_class_marshal_options
141
+ class ChatCompletionResponse(lang.Final):
142
+ @dc.dataclass(frozen=True, kw_only=True)
143
+ @_set_class_marshal_options
144
+ class Choice(lang.Final):
145
+ finish_reason: ta.Literal['stop', 'length', 'tool_calls', 'function_call']
146
+ index: int
147
+ logprobs: ta.Mapping[str, ta.Any] | None = None
148
+
149
+ @dc.dataclass(frozen=True, kw_only=True)
150
+ @_set_class_marshal_options
151
+ class Message(lang.Final):
152
+ annotations: ta.Sequence[ta.Mapping[str, ta.Any]] | None = None
153
+ content: str | None = None
154
+
155
+ executed_tools: ta.Sequence[ExecutedTool] | None = None
156
+
157
+ reasoning: str | None = None
158
+ role: ta.Literal['assistant'] = 'assistant'
159
+
160
+ @dc.dataclass(frozen=True, kw_only=True)
161
+ @_set_class_marshal_options
162
+ class ToolCall(lang.Final):
163
+ id: str
164
+
165
+ @dc.dataclass(frozen=True, kw_only=True)
166
+ @_set_class_marshal_options
167
+ class Function(lang.Final):
168
+ arguments: str
169
+ name: str
170
+
171
+ function: Function
172
+ type: ta.Literal['function'] = 'function'
173
+
174
+ tool_calls: ta.Sequence[ToolCall] | None = None
175
+
176
+ message: Message
177
+
178
+ choices: ta.Sequence[Choice]
179
+ created: int
180
+ id: str
181
+ model: str
182
+ object: ta.Literal['chat.completion'] = 'chat.completion'
183
+ system_fingerprint: str
184
+ usage: ta.Mapping[str, ta.Any] | None = None
185
+ usage_breakdown: ta.Mapping[str, ta.Any] | None = None
186
+ x_groq: ta.Mapping[str, ta.Any] | None = None
187
+ service_tier: str | None = None
188
+
189
+
190
+ @dc.dataclass(frozen=True, kw_only=True)
191
+ @_set_class_marshal_options
192
+ class ChatCompletionChunk(lang.Final):
193
+ id: str
194
+ object: ta.Literal['chat.completion.chunk'] = 'chat.completion.chunk'
195
+ created: int
196
+ model: str
197
+ system_fingerprint: str
198
+
199
+ @dc.dataclass(frozen=True, kw_only=True)
200
+ @_set_class_marshal_options
201
+ class Choice(lang.Final):
202
+ index: int
203
+
204
+ @dc.dataclass(frozen=True, kw_only=True)
205
+ @_set_class_marshal_options
206
+ class Delta(lang.Final):
207
+ role: str | None = None
208
+ content: str | None = None
209
+
210
+ channel: str | None = None
211
+ reasoning: str | None = None
212
+
213
+ @dc.dataclass(frozen=True, kw_only=True)
214
+ @_set_class_marshal_options
215
+ class ToolCall(lang.Final):
216
+ index: int
217
+ id: str | None = None
218
+
219
+ @dc.dataclass(frozen=True, kw_only=True)
220
+ @_set_class_marshal_options
221
+ class Function(lang.Final):
222
+ arguments: str | None = None
223
+ name: str | None = None
224
+
225
+ function: Function | None = None
226
+
227
+ type: ta.Literal['function'] = 'function'
228
+
229
+ tool_calls: ta.Sequence[ToolCall] | None = None
230
+
231
+ executed_tools: ta.Sequence[ExecutedTool] | None = None
232
+
233
+ delta: Delta
234
+ logprobs: ta.Mapping[str, ta.Any] | None = None
235
+ finish_reason: ta.Literal['stop', 'length', 'tool_calls', 'function_call'] | None = None
236
+
237
+ choices: ta.Sequence[Choice]
238
+
239
+ x_groq: ta.Mapping[str, ta.Any] | None = None
240
+ service_tier: str | None = None
241
+ usage: ta.Mapping[str, ta.Any] | None = None
242
+
243
+
244
+ ##
245
+
246
+
247
+ msh.register_global_module_import('._marshal', __package__)
@@ -11,15 +11,10 @@ from omlish import check
11
11
  from omlish import lang
12
12
 
13
13
 
14
- if ta.TYPE_CHECKING:
14
+ with lang.auto_proxy_import(globals()):
15
15
  import huggingface_hub as hf
16
16
  import huggingface_hub.errors # noqa
17
17
  import huggingface_hub.utils # noqa
18
- else:
19
- hf = lang.proxy_import('huggingface_hub', extras=[
20
- 'errors',
21
- 'utils',
22
- ])
23
18
 
24
19
 
25
20
  ##
@@ -0,0 +1,208 @@
1
+ import datetime
2
+ import os
3
+ import sys
4
+ import typing as ta
5
+
6
+ from omdev.cli import CliModule
7
+ from omlish import lang
8
+ from omlish.argparse import all as ap
9
+ from omlish.formats import json
10
+ from omlish.logs import all as logs
11
+ from omlish.term.confirm import confirm_action
12
+
13
+
14
+ with lang.auto_proxy_import(globals()):
15
+ import huggingface_hub as hf
16
+ import huggingface_hub.errors # noqa
17
+ import huggingface_hub.utils # noqa
18
+
19
+
20
+ log = logs.get_module_logger(globals())
21
+
22
+
23
+ ##
24
+
25
+
26
+ def fmt_ts(f: float) -> ta.Any:
27
+ dt = datetime.datetime.fromtimestamp(f) # noqa
28
+ return dt.isoformat()
29
+
30
+
31
+ class Cli(ap.Cli):
32
+ def _passthrough_args_cmd(
33
+ self,
34
+ exe: str,
35
+ pre_args: ta.Sequence[str] = (),
36
+ post_args: ta.Sequence[str] = (),
37
+ ) -> ta.NoReturn:
38
+ os.execvp(
39
+ exe,
40
+ [
41
+ sys.executable,
42
+ *pre_args,
43
+ *self.unknown_args,
44
+ *self.args.args,
45
+ *post_args,
46
+ ],
47
+ )
48
+
49
+ @ap.cmd(
50
+ ap.arg('args', nargs=ap.REMAINDER),
51
+ name='cli',
52
+ accepts_unknown=True,
53
+ )
54
+ def cli_cmd(self) -> None:
55
+ self._passthrough_args_cmd(sys.executable, ['-m', 'huggingface_hub.cli.hf'])
56
+
57
+ #
58
+
59
+ @ap.cmd(
60
+ ap.arg('--dir'),
61
+ )
62
+ def scan(self) -> None:
63
+ hf_cache_info = hf.utils.scan_cache_dir(self.args.dir)
64
+
65
+ repo_dcts = [
66
+ {
67
+ 'repo_id': repo.repo_id,
68
+ 'repo_type': repo.repo_type,
69
+
70
+ 'repo_path': str(repo.repo_path),
71
+
72
+ 'size_on_disk': repo.size_on_disk,
73
+ 'size_on_disk_str': repo.size_on_disk_str,
74
+
75
+ 'nb_files': repo.nb_files,
76
+
77
+ 'revisions': [
78
+ {
79
+ 'commit_hash': rev.commit_hash,
80
+
81
+ 'snapshot_path': str(rev.snapshot_path),
82
+
83
+ 'size_on_disk': rev.size_on_disk,
84
+
85
+ 'files': [
86
+ {
87
+ 'file_name': file.file_name,
88
+ 'file_path': str(file.file_path),
89
+ 'blob_path': str(file.blob_path),
90
+
91
+ 'size_on_disk': file.size_on_disk,
92
+ 'size_on_disk_str': file.size_on_disk_str,
93
+
94
+ 'blob_last_modified': fmt_ts(file.blob_last_modified),
95
+ 'blob_last_modified_str': file.blob_last_modified_str,
96
+ 'blob_last_accessed': fmt_ts(file.blob_last_accessed),
97
+ 'blob_last_accessed_str': file.blob_last_accessed_str,
98
+ }
99
+ for file in sorted(rev.files, key=lambda file: file.blob_last_accessed)
100
+ ],
101
+
102
+ 'refs': sorted(rev.refs),
103
+
104
+ 'last_modified': fmt_ts(rev.last_modified),
105
+ 'last_modified_str': rev.last_modified_str,
106
+
107
+ }
108
+ for rev in sorted(repo.revisions, key=lambda rev: rev.last_modified)
109
+ ],
110
+
111
+ 'last_modified': fmt_ts(repo.last_modified),
112
+ 'last_modified_str': repo.last_modified_str,
113
+ 'last_accessed': fmt_ts(repo.last_accessed),
114
+ 'last_accessed_str': repo.last_accessed_str,
115
+
116
+ 'refs': sorted(repo.refs),
117
+ }
118
+ for repo in sorted(hf_cache_info.repos, key=lambda repo: repo.last_accessed)
119
+ ]
120
+
121
+ print(json.dumps_pretty(repo_dcts))
122
+
123
+ @ap.cmd(
124
+ ap.arg('--dir'),
125
+ )
126
+ def list(self) -> None:
127
+ hf_cache_info = hf.utils.scan_cache_dir(self.args.dir)
128
+
129
+ repos = [
130
+ repo
131
+ for repo in hf_cache_info.repos
132
+ if repo.repo_type == 'model'
133
+ and repo.nb_files
134
+ ]
135
+
136
+ repo_dcts = [
137
+ {
138
+ 'repo_id': repo.repo_id,
139
+ 'repo_type': repo.repo_type,
140
+
141
+ 'repo_path': str(repo.repo_path),
142
+
143
+ 'size_on_disk': repo.size_on_disk,
144
+ 'size_on_disk_str': repo.size_on_disk_str,
145
+
146
+ 'nb_files': repo.nb_files,
147
+
148
+ 'last_modified': fmt_ts(repo.last_modified),
149
+ 'last_modified_str': repo.last_modified_str,
150
+ 'last_accessed': fmt_ts(repo.last_accessed),
151
+ 'last_accessed_str': repo.last_accessed_str,
152
+ }
153
+ for repo in sorted(repos, key=lambda repo: repo.last_accessed)
154
+ ]
155
+
156
+ print(json.dumps_pretty(repo_dcts))
157
+
158
+ @ap.cmd(
159
+ ap.arg('key', action='append'),
160
+ ap.arg('--dir'),
161
+ ap.arg('--dry-run', action='store_true'),
162
+ ap.arg('--no-confirm', action='store_true'),
163
+ )
164
+ def rm(self) -> None:
165
+ if not self.args.key:
166
+ raise ValueError('key is required')
167
+
168
+ hf_cache_info = hf.utils.scan_cache_dir(self.args.dir)
169
+
170
+ repos_by_id = {repo.repo_id: repo for repo in hf_cache_info.repos}
171
+ repos_by_rev = {rev.commit_hash: repo for repo in hf_cache_info.repos for rev in repo.revisions}
172
+
173
+ rm_revs: dict[str, None] = {}
174
+
175
+ for key in self.args.key:
176
+ if key in repos_by_id:
177
+ rm_revs.update({rev.commit_hash: None for rev in repos_by_id[key].revisions})
178
+ elif key in repos_by_rev:
179
+ rm_revs.update({key: None})
180
+ else:
181
+ raise ValueError(f'key {key} not found')
182
+
183
+ for rm_rev in rm_revs:
184
+ rm_repo = repos_by_rev[rm_rev]
185
+
186
+ if not self.args.no_confirm:
187
+ if not confirm_action(f'Delete {rm_repo.repo_id}@{rm_rev}?'):
188
+ return
189
+
190
+ if not self.args.dry_run:
191
+ strategy = hf_cache_info.delete_revisions(rm_rev)
192
+ strategy.execute()
193
+
194
+
195
+ ##
196
+
197
+
198
+ def _main() -> None:
199
+ logs.configure_standard_logging('INFO')
200
+ Cli()()
201
+
202
+
203
+ # @omlish-manifest
204
+ _CLI_MODULE = CliModule('hf', __name__)
205
+
206
+
207
+ if __name__ == '__main__':
208
+ _main()
@@ -1,4 +1,7 @@
1
1
  """
2
+ NOTE: This can't be cleaned up too much - the callback can't be a closure to hide its guts because it needs to be
3
+ picklable for multiprocessing.
4
+
2
5
  FIXME:
3
6
  - it outputs newline-terminated so buffer and chop on newlines - DelimitingBuffer again
4
7
  """
@@ -27,4 +30,4 @@ def llama_log_callback(
27
30
 
28
31
  @lang.cached_function
29
32
  def install_logging_hook() -> None:
30
- llama_cpp.llama_log_set(llama_log_callback, ct.c_void_p(0))
33
+ llama_cpp.llama_log_set(llama_log_callback, ct.c_void_p(0)) # noqa
@@ -17,7 +17,11 @@
17
17
  # https://github.com/ml-explore/mlx-lm/blob/ce2358d297af245b002e690623f00195b6507da0/mlx_lm/generate.py
18
18
  import typing as ta
19
19
 
20
- import mlx_lm.models.cache
20
+ from omlish import lang
21
+
22
+
23
+ with lang.auto_proxy_import(globals()):
24
+ import mlx_lm.models.cache as mlx_lm_models_cache
21
25
 
22
26
 
23
27
  ##
@@ -32,13 +36,13 @@ def maybe_quantize_kv_cache(
32
36
  ) -> None:
33
37
  if not (
34
38
  kv_bits is not None and
35
- not isinstance(prompt_cache[0], mlx_lm.models.cache.QuantizedKVCache) and
39
+ not isinstance(prompt_cache[0], mlx_lm_models_cache.QuantizedKVCache) and
36
40
  prompt_cache[0].offset > quantized_kv_start
37
41
  ):
38
42
  return
39
43
 
40
44
  for i in range(len(prompt_cache)):
41
- if isinstance(prompt_cache[i], mlx_lm.models.cache.KVCache):
45
+ if isinstance(prompt_cache[i], mlx_lm_models_cache.KVCache):
42
46
  prompt_cache[i] = prompt_cache[i].to_quantized(
43
47
  bits=kv_bits,
44
48
  group_size=kv_group_size,
@@ -20,16 +20,19 @@ import json
20
20
  import sys
21
21
  import typing as ta
22
22
 
23
- import mlx.core as mx
24
- import mlx_lm.models.cache
25
- import mlx_lm.sample_utils
26
- import mlx_lm.utils
23
+ from omlish import lang
27
24
 
28
25
  from .generation import GenerationParams
29
26
  from .generation import generate
30
27
  from .loading import load_model
31
28
 
32
29
 
30
+ with lang.auto_proxy_import(globals()):
31
+ import mlx.core as mx
32
+ import mlx_lm.models.cache as mlx_lm_models_cache
33
+ import mlx_lm.sample_utils as mlx_lm_sample_utils
34
+
35
+
33
36
  ##
34
37
 
35
38
 
@@ -214,11 +217,11 @@ def _main() -> None:
214
217
  # Load the prompt cache and metadata if a cache file is provided
215
218
  using_cache = args.prompt_cache_file is not None
216
219
  if using_cache:
217
- prompt_cache, metadata = mlx_lm.models.cache.load_prompt_cache(
220
+ prompt_cache, metadata = mlx_lm_models_cache.load_prompt_cache(
218
221
  args.prompt_cache_file,
219
222
  return_metadata=True,
220
223
  )
221
- if isinstance(prompt_cache[0], mlx_lm.models.cache.QuantizedKVCache):
224
+ if isinstance(prompt_cache[0], mlx_lm_models_cache.QuantizedKVCache):
222
225
  if args.kv_bits is not None and args.kv_bits != prompt_cache[0].bits:
223
226
  raise ValueError('--kv-bits does not match the kv cache loaded from --prompt-cache-file.')
224
227
  if args.kv_group_size != prompt_cache[0].group_size:
@@ -293,7 +296,7 @@ def _main() -> None:
293
296
  else:
294
297
  prompt = tokenizer.encode(prompt)
295
298
 
296
- sampler = mlx_lm.sample_utils.make_sampler(
299
+ sampler = mlx_lm_sample_utils.make_sampler(
297
300
  args.temp,
298
301
  args.top_p,
299
302
  args.min_p,
@@ -21,10 +21,6 @@ import io
21
21
  import sys
22
22
  import typing as ta
23
23
 
24
- import mlx.core as mx
25
- import mlx_lm.models.cache
26
- from mlx import nn
27
-
28
24
  from omlish import check
29
25
  from omlish import lang
30
26
 
@@ -33,6 +29,12 @@ from .limits import wired_limit_context
33
29
  from .tokenization import Tokenization
34
30
 
35
31
 
32
+ with lang.auto_proxy_import(globals()):
33
+ import mlx.core as mx
34
+ import mlx.nn as mlx_nn
35
+ import mlx_lm.models.cache as mlx_lm_models_cache
36
+
37
+
36
38
  ##
37
39
 
38
40
 
@@ -47,9 +49,9 @@ def _generation_stream():
47
49
  class LogitProcessor(ta.Protocol):
48
50
  def __call__(
49
51
  self,
50
- tokens: mx.array,
51
- logits: mx.array,
52
- ) -> mx.array:
52
+ tokens: 'mx.array',
53
+ logits: 'mx.array',
54
+ ) -> 'mx.array':
53
55
  ...
54
56
 
55
57
 
@@ -99,12 +101,12 @@ class GenerationParams:
99
101
 
100
102
  class _GenerationStep(ta.NamedTuple):
101
103
  token: int
102
- logprobs: mx.array
104
+ logprobs: 'mx.array'
103
105
 
104
106
 
105
107
  def _generate_step(
106
- prompt: mx.array,
107
- model: nn.Module,
108
+ prompt: 'mx.array',
109
+ model: 'mlx_nn.Module',
108
110
  params: GenerationParams = GenerationParams(),
109
111
  ) -> ta.Generator[_GenerationStep]:
110
112
  y = prompt
@@ -113,7 +115,7 @@ def _generate_step(
113
115
  # Create the Kv cache for generation
114
116
  prompt_cache = params.prompt_cache
115
117
  if prompt_cache is None:
116
- prompt_cache = mlx_lm.models.cache.make_prompt_cache(
118
+ prompt_cache = mlx_lm_models_cache.make_prompt_cache(
117
119
  model,
118
120
  max_kv_size=params.max_kv_size,
119
121
  )
@@ -221,7 +223,7 @@ class GenerationOutput:
221
223
  token: int
222
224
 
223
225
  # A vector of log probabilities.
224
- logprobs: mx.array
226
+ logprobs: 'mx.array'
225
227
 
226
228
  # The number of tokens in the prompt.
227
229
  prompt_tokens: int
@@ -234,9 +236,9 @@ class GenerationOutput:
234
236
 
235
237
 
236
238
  def stream_generate(
237
- model: nn.Module,
239
+ model: 'mlx_nn.Module',
238
240
  tokenization: Tokenization,
239
- prompt: str | mx.array,
241
+ prompt: ta.Union[str, 'mx.array'],
240
242
  params: GenerationParams = GenerationParams(),
241
243
  ) -> ta.Generator[GenerationOutput]:
242
244
  if not isinstance(prompt, mx.array):
@@ -308,9 +310,9 @@ def stream_generate(
308
310
 
309
311
 
310
312
  def generate(
311
- model: nn.Module,
313
+ model: 'mlx_nn.Module',
312
314
  tokenization: Tokenization,
313
- prompt: str | mx.array,
315
+ prompt: ta.Union[str, 'mx.array'],
314
316
  params: GenerationParams = GenerationParams(),
315
317
  *,
316
318
  verbose: bool = False,