inferml 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. inferml-1.0.1/LICENSE +21 -0
  2. inferml-1.0.1/MANIFEST.in +9 -0
  3. inferml-1.0.1/PKG-INFO +137 -0
  4. inferml-1.0.1/README.md +83 -0
  5. inferml-1.0.1/pyproject.toml +67 -0
  6. inferml-1.0.1/python/_win_compat.py +52 -0
  7. inferml-1.0.1/python/adapters/__init__.py +65 -0
  8. inferml-1.0.1/python/adapters/base.py +37 -0
  9. inferml-1.0.1/python/adapters/diffusers_pipeline.py +57 -0
  10. inferml-1.0.1/python/adapters/standard_pipeline.py +31 -0
  11. inferml-1.0.1/python/engine.py +294 -0
  12. inferml-1.0.1/python/inferml.egg-info/PKG-INFO +137 -0
  13. inferml-1.0.1/python/inferml.egg-info/SOURCES.txt +228 -0
  14. inferml-1.0.1/python/inferml.egg-info/dependency_links.txt +1 -0
  15. inferml-1.0.1/python/inferml.egg-info/entry_points.txt +2 -0
  16. inferml-1.0.1/python/inferml.egg-info/requires.txt +21 -0
  17. inferml-1.0.1/python/inferml.egg-info/top_level.txt +10 -0
  18. inferml-1.0.1/python/io_utils.py +68 -0
  19. inferml-1.0.1/python/model_overrides.json +32 -0
  20. inferml-1.0.1/python/models/__init__.py +167 -0
  21. inferml-1.0.1/python/models/_diffusion_helper.py +45 -0
  22. inferml-1.0.1/python/models/_pipeline_helper.py +50 -0
  23. inferml-1.0.1/python/models/aria/__init__.py +6 -0
  24. inferml-1.0.1/python/models/bamba/__init__.py +6 -0
  25. inferml-1.0.1/python/models/bark/__init__.py +6 -0
  26. inferml-1.0.1/python/models/bit/__init__.py +6 -0
  27. inferml-1.0.1/python/models/bitnet/__init__.py +6 -0
  28. inferml-1.0.1/python/models/blip/__init__.py +12 -0
  29. inferml-1.0.1/python/models/bloom/__init__.py +6 -0
  30. inferml-1.0.1/python/models/chameleon/__init__.py +6 -0
  31. inferml-1.0.1/python/models/clip/__init__.py +6 -0
  32. inferml-1.0.1/python/models/codegen/__init__.py +6 -0
  33. inferml-1.0.1/python/models/cohere/__init__.py +6 -0
  34. inferml-1.0.1/python/models/cohere2_vision/__init__.py +6 -0
  35. inferml-1.0.1/python/models/conditional_detr/__init__.py +6 -0
  36. inferml-1.0.1/python/models/convnext/__init__.py +6 -0
  37. inferml-1.0.1/python/models/csm/__init__.py +11 -0
  38. inferml-1.0.1/python/models/cvt/__init__.py +6 -0
  39. inferml-1.0.1/python/models/d_fine/__init__.py +6 -0
  40. inferml-1.0.1/python/models/data2vec_vision/__init__.py +6 -0
  41. inferml-1.0.1/python/models/dbrx/__init__.py +6 -0
  42. inferml-1.0.1/python/models/deepseek/__init__.py +6 -0
  43. inferml-1.0.1/python/models/deepseek_vl/__init__.py +5 -0
  44. inferml-1.0.1/python/models/deepseek_vl/adapter.py +93 -0
  45. inferml-1.0.1/python/models/deformable_detr/__init__.py +6 -0
  46. inferml-1.0.1/python/models/depth_anything/__init__.py +6 -0
  47. inferml-1.0.1/python/models/depth_pro/__init__.py +6 -0
  48. inferml-1.0.1/python/models/detr/__init__.py +7 -0
  49. inferml-1.0.1/python/models/dia/__init__.py +13 -0
  50. inferml-1.0.1/python/models/donut/__init__.py +11 -0
  51. inferml-1.0.1/python/models/dpt/__init__.py +7 -0
  52. inferml-1.0.1/python/models/edgetam/__init__.py +14 -0
  53. inferml-1.0.1/python/models/efficientnet/__init__.py +6 -0
  54. inferml-1.0.1/python/models/emu3/__init__.py +6 -0
  55. inferml-1.0.1/python/models/eomt/__init__.py +6 -0
  56. inferml-1.0.1/python/models/eomt_dinov3/__init__.py +11 -0
  57. inferml-1.0.1/python/models/exaone/__init__.py +6 -0
  58. inferml-1.0.1/python/models/falcon/__init__.py +6 -0
  59. inferml-1.0.1/python/models/fastspeech2/__init__.py +6 -0
  60. inferml-1.0.1/python/models/fastvlm/__init__.py +5 -0
  61. inferml-1.0.1/python/models/fastvlm/adapter.py +99 -0
  62. inferml-1.0.1/python/models/florence2/__init__.py +5 -0
  63. inferml-1.0.1/python/models/florence2/adapter.py +102 -0
  64. inferml-1.0.1/python/models/flux/__init__.py +22 -0
  65. inferml-1.0.1/python/models/focalnet/__init__.py +6 -0
  66. inferml-1.0.1/python/models/fuyu/__init__.py +6 -0
  67. inferml-1.0.1/python/models/gemma/__init__.py +10 -0
  68. inferml-1.0.1/python/models/gemma3_vlm/__init__.py +6 -0
  69. inferml-1.0.1/python/models/git/__init__.py +6 -0
  70. inferml-1.0.1/python/models/glm/__init__.py +6 -0
  71. inferml-1.0.1/python/models/glm4v/__init__.py +6 -0
  72. inferml-1.0.1/python/models/got_ocr2/__init__.py +20 -0
  73. inferml-1.0.1/python/models/gpt2/__init__.py +6 -0
  74. inferml-1.0.1/python/models/gpt_oss/__init__.py +6 -0
  75. inferml-1.0.1/python/models/granite/__init__.py +6 -0
  76. inferml-1.0.1/python/models/granite_speech/__init__.py +15 -0
  77. inferml-1.0.1/python/models/grounding_dino/__init__.py +6 -0
  78. inferml-1.0.1/python/models/hunyuan_vl/__init__.py +6 -0
  79. inferml-1.0.1/python/models/idefics/__init__.py +6 -0
  80. inferml-1.0.1/python/models/instructpix2pix/__init__.py +19 -0
  81. inferml-1.0.1/python/models/internvl/__init__.py +6 -0
  82. inferml-1.0.1/python/models/jamba/__init__.py +6 -0
  83. inferml-1.0.1/python/models/janus/__init__.py +5 -0
  84. inferml-1.0.1/python/models/janus/adapter.py +125 -0
  85. inferml-1.0.1/python/models/kandinsky/__init__.py +14 -0
  86. inferml-1.0.1/python/models/kimi_vl/__init__.py +6 -0
  87. inferml-1.0.1/python/models/kolors/__init__.py +15 -0
  88. inferml-1.0.1/python/models/kosmos/__init__.py +6 -0
  89. inferml-1.0.1/python/models/kyutai_stt/__init__.py +11 -0
  90. inferml-1.0.1/python/models/layoutlmv3/__init__.py +9 -0
  91. inferml-1.0.1/python/models/levit/__init__.py +6 -0
  92. inferml-1.0.1/python/models/lfm2_vl/__init__.py +6 -0
  93. inferml-1.0.1/python/models/llama/__init__.py +6 -0
  94. inferml-1.0.1/python/models/llava/__init__.py +5 -0
  95. inferml-1.0.1/python/models/llava/adapter.py +79 -0
  96. inferml-1.0.1/python/models/m2m_100/__init__.py +6 -0
  97. inferml-1.0.1/python/models/mamba/__init__.py +6 -0
  98. inferml-1.0.1/python/models/marian/__init__.py +6 -0
  99. inferml-1.0.1/python/models/mask2former/__init__.py +6 -0
  100. inferml-1.0.1/python/models/maskformer/__init__.py +6 -0
  101. inferml-1.0.1/python/models/mgp_str/__init__.py +12 -0
  102. inferml-1.0.1/python/models/minicpm_v/__init__.py +6 -0
  103. inferml-1.0.1/python/models/minimax/__init__.py +6 -0
  104. inferml-1.0.1/python/models/mistral/__init__.py +6 -0
  105. inferml-1.0.1/python/models/mllama/__init__.py +6 -0
  106. inferml-1.0.1/python/models/mm_grounding_dino/__init__.py +12 -0
  107. inferml-1.0.1/python/models/mobilenet/__init__.py +7 -0
  108. inferml-1.0.1/python/models/moondream/__init__.py +5 -0
  109. inferml-1.0.1/python/models/moondream/adapter.py +37 -0
  110. inferml-1.0.1/python/models/moonshine/__init__.py +6 -0
  111. inferml-1.0.1/python/models/mpt/__init__.py +6 -0
  112. inferml-1.0.1/python/models/musicgen/__init__.py +6 -0
  113. inferml-1.0.1/python/models/nemotron/__init__.py +6 -0
  114. inferml-1.0.1/python/models/olmo/__init__.py +6 -0
  115. inferml-1.0.1/python/models/omdet_turbo/__init__.py +11 -0
  116. inferml-1.0.1/python/models/oneformer/__init__.py +11 -0
  117. inferml-1.0.1/python/models/opt/__init__.py +6 -0
  118. inferml-1.0.1/python/models/ovis/__init__.py +6 -0
  119. inferml-1.0.1/python/models/owlvit/__init__.py +6 -0
  120. inferml-1.0.1/python/models/paligemma/__init__.py +6 -0
  121. inferml-1.0.1/python/models/parakeet/__init__.py +6 -0
  122. inferml-1.0.1/python/models/persimmon/__init__.py +6 -0
  123. inferml-1.0.1/python/models/phi/__init__.py +6 -0
  124. inferml-1.0.1/python/models/pix2struct/__init__.py +6 -0
  125. inferml-1.0.1/python/models/pixart/__init__.py +14 -0
  126. inferml-1.0.1/python/models/playground/__init__.py +14 -0
  127. inferml-1.0.1/python/models/poolformer/__init__.py +6 -0
  128. inferml-1.0.1/python/models/pop2piano/__init__.py +13 -0
  129. inferml-1.0.1/python/models/prophetnet/__init__.py +6 -0
  130. inferml-1.0.1/python/models/pvt/__init__.py +6 -0
  131. inferml-1.0.1/python/models/qwen/__init__.py +9 -0
  132. inferml-1.0.1/python/models/qwen_vl/__init__.py +5 -0
  133. inferml-1.0.1/python/models/qwen_vl/adapter.py +83 -0
  134. inferml-1.0.1/python/models/regnet/__init__.py +6 -0
  135. inferml-1.0.1/python/models/resnet/__init__.py +6 -0
  136. inferml-1.0.1/python/models/rt_detr/__init__.py +6 -0
  137. inferml-1.0.1/python/models/rwkv/__init__.py +6 -0
  138. inferml-1.0.1/python/models/sam/__init__.py +6 -0
  139. inferml-1.0.1/python/models/sam2/__init__.py +6 -0
  140. inferml-1.0.1/python/models/sam3/__init__.py +6 -0
  141. inferml-1.0.1/python/models/sam_hq/__init__.py +10 -0
  142. inferml-1.0.1/python/models/sana/__init__.py +16 -0
  143. inferml-1.0.1/python/models/sd_inpainting/__init__.py +23 -0
  144. inferml-1.0.1/python/models/sdxl/__init__.py +25 -0
  145. inferml-1.0.1/python/models/sdxl_refiner/__init__.py +18 -0
  146. inferml-1.0.1/python/models/sdxl_turbo/__init__.py +16 -0
  147. inferml-1.0.1/python/models/seamless_m4t/__init__.py +6 -0
  148. inferml-1.0.1/python/models/segformer/__init__.py +12 -0
  149. inferml-1.0.1/python/models/siglip/__init__.py +6 -0
  150. inferml-1.0.1/python/models/smollm/__init__.py +6 -0
  151. inferml-1.0.1/python/models/smolvlm/__init__.py +6 -0
  152. inferml-1.0.1/python/models/speecht5/__init__.py +6 -0
  153. inferml-1.0.1/python/models/stable_diffusion/__init__.py +22 -0
  154. inferml-1.0.1/python/models/stablelm/__init__.py +6 -0
  155. inferml-1.0.1/python/models/starcoder2/__init__.py +6 -0
  156. inferml-1.0.1/python/models/swiftformer/__init__.py +6 -0
  157. inferml-1.0.1/python/models/swin/__init__.py +6 -0
  158. inferml-1.0.1/python/models/table_transformer/__init__.py +6 -0
  159. inferml-1.0.1/python/models/timm/__init__.py +12 -0
  160. inferml-1.0.1/python/models/trocr/__init__.py +6 -0
  161. inferml-1.0.1/python/models/upernet/__init__.py +6 -0
  162. inferml-1.0.1/python/models/vision_encoder_decoder/__init__.py +7 -0
  163. inferml-1.0.1/python/models/vit/__init__.py +12 -0
  164. inferml-1.0.1/python/models/vits/__init__.py +6 -0
  165. inferml-1.0.1/python/models/voxtral/__init__.py +18 -0
  166. inferml-1.0.1/python/models/wav2vec2/__init__.py +21 -0
  167. inferml-1.0.1/python/models/whisper/__init__.py +11 -0
  168. inferml-1.0.1/python/models/xglm/__init__.py +6 -0
  169. inferml-1.0.1/python/models/xlnet/__init__.py +6 -0
  170. inferml-1.0.1/python/models/yolos/__init__.py +6 -0
  171. inferml-1.0.1/python/models/zamba/__init__.py +6 -0
  172. inferml-1.0.1/python/models/zoedepth/__init__.py +6 -0
  173. inferml-1.0.1/python/output_kinds.py +56 -0
  174. inferml-1.0.1/python/routing.py +202 -0
  175. inferml-1.0.1/python/server/__init__.py +13 -0
  176. inferml-1.0.1/python/server/_data/model_overrides.json +32 -0
  177. inferml-1.0.1/python/server/_data/supported_architectures.json +307 -0
  178. inferml-1.0.1/python/server/app.py +71 -0
  179. inferml-1.0.1/python/server/appdata.py +67 -0
  180. inferml-1.0.1/python/server/cli.py +68 -0
  181. inferml-1.0.1/python/server/deps.py +55 -0
  182. inferml-1.0.1/python/server/events.py +42 -0
  183. inferml-1.0.1/python/server/hf_service.py +319 -0
  184. inferml-1.0.1/python/server/hw_service.py +131 -0
  185. inferml-1.0.1/python/server/openai_api/__init__.py +9 -0
  186. inferml-1.0.1/python/server/openai_api/llm.py +236 -0
  187. inferml-1.0.1/python/server/openai_api/routes.py +236 -0
  188. inferml-1.0.1/python/server/openai_api/tools/__init__.py +55 -0
  189. inferml-1.0.1/python/server/openai_api/tools/base.py +80 -0
  190. inferml-1.0.1/python/server/openai_api/tools/hermes_qwen.py +28 -0
  191. inferml-1.0.1/python/server/openai_api/tools/llama.py +29 -0
  192. inferml-1.0.1/python/server/openai_api/tools/mistral.py +31 -0
  193. inferml-1.0.1/python/server/paths.py +24 -0
  194. inferml-1.0.1/python/server/routes/__init__.py +1 -0
  195. inferml-1.0.1/python/server/routes/hf.py +65 -0
  196. inferml-1.0.1/python/server/routes/inference.py +286 -0
  197. inferml-1.0.1/python/server/routes/store.py +42 -0
  198. inferml-1.0.1/python/server/routes/system.py +147 -0
  199. inferml-1.0.1/python/server/store_service.py +134 -0
  200. inferml-1.0.1/python/server/webui/components/app.js +718 -0
  201. inferml-1.0.1/python/server/webui/components/chat.js +288 -0
  202. inferml-1.0.1/python/server/webui/components/home.js +173 -0
  203. inferml-1.0.1/python/server/webui/components/icons.js +50 -0
  204. inferml-1.0.1/python/server/webui/components/model-browser.js +559 -0
  205. inferml-1.0.1/python/server/webui/components/onboarding.js +193 -0
  206. inferml-1.0.1/python/server/webui/components/settings.js +512 -0
  207. inferml-1.0.1/python/server/webui/components/task-workspace.js +1286 -0
  208. inferml-1.0.1/python/server/webui/components/welcome.js +4 -0
  209. inferml-1.0.1/python/server/webui/index.html +26 -0
  210. inferml-1.0.1/python/server/webui/styles.css +2109 -0
  211. inferml-1.0.1/python/server/webui/vendor/marked.umd.js +79 -0
  212. inferml-1.0.1/python/server/webui/vendor/purify.min.js +3 -0
  213. inferml-1.0.1/python/server/webui/vendor/react-dom.production.min.js +267 -0
  214. inferml-1.0.1/python/server/webui/vendor/react.production.min.js +31 -0
  215. inferml-1.0.1/python/server/webui/web-bridge.js +247 -0
  216. inferml-1.0.1/python/supported_architectures.json +307 -0
  217. inferml-1.0.1/python/tasks/__init__.py +61 -0
  218. inferml-1.0.1/python/tasks/_render.py +120 -0
  219. inferml-1.0.1/python/tasks/asr.py +66 -0
  220. inferml-1.0.1/python/tasks/base.py +93 -0
  221. inferml-1.0.1/python/tasks/depth_estimation.py +88 -0
  222. inferml-1.0.1/python/tasks/document_qa.py +58 -0
  223. inferml-1.0.1/python/tasks/image_classification.py +48 -0
  224. inferml-1.0.1/python/tasks/image_segmentation.py +205 -0
  225. inferml-1.0.1/python/tasks/image_to_text.py +94 -0
  226. inferml-1.0.1/python/tasks/mask_generation.py +300 -0
  227. inferml-1.0.1/python/tasks/misc_tasks.py +122 -0
  228. inferml-1.0.1/python/tasks/object_detection.py +112 -0
  229. inferml-1.0.1/python/tasks/text_generation.py +162 -0
  230. inferml-1.0.1/setup.cfg +4 -0
inferml-1.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 LocalML, Gitesh Chawda
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,9 @@
1
+ include LICENSE
2
+ include README.md
3
+ include python/supported_architectures.json
4
+ include python/model_overrides.json
5
+ recursive-include python *.json
6
+ recursive-include python/server/webui *
7
+ recursive-include python/server/_data *
8
+ global-exclude __pycache__/*
9
+ global-exclude *.py[cod]
inferml-1.0.1/PKG-INFO ADDED
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.4
2
+ Name: inferml
3
+ Version: 1.0.1
4
+ Summary: Any HuggingFace model. Local. Multi-modal. Served over an OpenAI-compatible API.
5
+ Author: LocalML
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 LocalML, Gitesh Chawda
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26
+ THE SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/IMvision12/InferML
29
+ Keywords: huggingface,transformers,inference,openai,local,llm,diffusion
30
+ Requires-Python: >=3.10
31
+ Description-Content-Type: text/markdown
32
+ License-File: LICENSE
33
+ Requires-Dist: fastapi>=0.110
34
+ Requires-Dist: uvicorn[standard]>=0.29
35
+ Requires-Dist: huggingface_hub
36
+ Requires-Dist: platformdirs>=4
37
+ Requires-Dist: psutil>=5.9
38
+ Provides-Extra: inference
39
+ Requires-Dist: transformers>=5.7.0; extra == "inference"
40
+ Requires-Dist: torch>=2.6; extra == "inference"
41
+ Requires-Dist: torchvision; extra == "inference"
42
+ Requires-Dist: torchaudio>=2.6; extra == "inference"
43
+ Requires-Dist: diffusers; extra == "inference"
44
+ Requires-Dist: accelerate; extra == "inference"
45
+ Requires-Dist: timm; extra == "inference"
46
+ Requires-Dist: pillow; extra == "inference"
47
+ Requires-Dist: soundfile; extra == "inference"
48
+ Requires-Dist: librosa; extra == "inference"
49
+ Requires-Dist: numpy; extra == "inference"
50
+ Requires-Dist: scipy; extra == "inference"
51
+ Requires-Dist: sentencepiece; extra == "inference"
52
+ Requires-Dist: protobuf; extra == "inference"
53
+ Dynamic: license-file
54
+
55
+ <p align="center">
56
+ <img src="assets/logo.png" alt="LocalML logo" width="140" />
57
+ </p>
58
+
59
+ # LocalML
60
+
61
+ Any Hugging Face model. Local. Multi-modal. Now a **local web server** with an
62
+ **OpenAI-compatible API** - no Electron, no native binary.
63
+
64
+ Run 143+ model families fully on-device (LLMs, VLMs, diffusion, ASR, TTS,
65
+ segmentation, detection) behind a browser UI, and point agent frameworks
66
+ (LangChain, LangGraph, the OpenAI SDK) at it the way you point them at Ollama.
67
+
68
+ ## Install
69
+
70
+ Requires **Python 3.10+** - the installer checks for it but won't install Python
71
+ for you. One line in your terminal:
72
+
73
+ ```bash
74
+ # macOS / Linux
75
+ curl -fsSL https://www.localml.tech/install.sh | sh
76
+ # Windows (PowerShell)
77
+ irm https://www.localml.tech/install.ps1 | iex
78
+ ```
79
+
80
+ The script bootstraps pipx and installs the LocalML server. On first launch the
81
+ app walks you through installing the inference stack (PyTorch + transformers) for
82
+ your hardware - pick **CPU** or **GPU** and it fetches the matching build.
83
+
84
+ Prefer to do it by hand?
85
+
86
+ ```bash
87
+ pipx install inferml # server only; the app installs torch on first run
88
+ pipx install "inferml[inference]" # or grab the whole stack up front (generic torch wheel)
89
+ ```
90
+
91
+ ## Run
92
+
93
+ ```bash
94
+ localml # starts the server and opens http://localhost:11500
95
+ localml --port 8080 # custom port
96
+ localml --host 0.0.0.0 --no-browser # expose on the LAN, headless
97
+ ```
98
+
99
+ Open the printed URL, download a model from the Hub tab, and run it.
100
+
101
+ ## OpenAI-compatible API
102
+
103
+ Point any OpenAI client at `http://localhost:11500/v1` (any api key). It routes
104
+ to whichever LLM is currently loaded in LocalML.
105
+
106
+ ```python
107
+ from openai import OpenAI
108
+ client = OpenAI(base_url="http://localhost:11500/v1", api_key="not-needed")
109
+ client.chat.completions.create(
110
+ model="Qwen/Qwen2.5-0.5B-Instruct",
111
+ messages=[{"role": "user", "content": "Hello!"}],
112
+ )
113
+ ```
114
+
115
+ Supports streaming (`stream=True`), `GET /v1/models`, and tool/function calling
116
+ for the Qwen/Hermes, Llama, and Mistral families.
117
+
118
+ ## Docker
119
+
120
+ ```bash
121
+ docker build -t localml .
122
+ docker run --rm -p 11500:11500 localml # CPU
123
+ docker run --rm --gpus all -p 11500:11500 localml # GPU
124
+ ```
125
+
126
+ ## Development
127
+
128
+ The React UI lives in `src/renderer/` (built with esbuild) and talks to the
129
+ server via `window.localml` (see `src/renderer/web-bridge.js`). The Python
130
+ server + inference engine live in `python/`.
131
+
132
+ ```bash
133
+ npm install # build deps (esbuild + the vendored UMD libs)
134
+ npm run build # compile the renderer and bundle it into the package
135
+ pip install -e ".[inference]"
136
+ localml
137
+ ```
@@ -0,0 +1,83 @@
1
+ <p align="center">
2
+ <img src="assets/logo.png" alt="LocalML logo" width="140" />
3
+ </p>
4
+
5
+ # LocalML
6
+
7
+ Any Hugging Face model. Local. Multi-modal. Now a **local web server** with an
8
+ **OpenAI-compatible API** - no Electron, no native binary.
9
+
10
+ Run 143+ model families fully on-device (LLMs, VLMs, diffusion, ASR, TTS,
11
+ segmentation, detection) behind a browser UI, and point agent frameworks
12
+ (LangChain, LangGraph, the OpenAI SDK) at it the way you point them at Ollama.
13
+
14
+ ## Install
15
+
16
+ Requires **Python 3.10+** - the installer checks for it but won't install Python
17
+ for you. One line in your terminal:
18
+
19
+ ```bash
20
+ # macOS / Linux
21
+ curl -fsSL https://www.localml.tech/install.sh | sh
22
+ # Windows (PowerShell)
23
+ irm https://www.localml.tech/install.ps1 | iex
24
+ ```
25
+
26
+ The script bootstraps pipx and installs the LocalML server. On first launch the
27
+ app walks you through installing the inference stack (PyTorch + transformers) for
28
+ your hardware - pick **CPU** or **GPU** and it fetches the matching build.
29
+
30
+ Prefer to do it by hand?
31
+
32
+ ```bash
33
+ pipx install inferml # server only; the app installs torch on first run
34
+ pipx install "inferml[inference]" # or grab the whole stack up front (generic torch wheel)
35
+ ```
36
+
37
+ ## Run
38
+
39
+ ```bash
40
+ localml # starts the server and opens http://localhost:11500
41
+ localml --port 8080 # custom port
42
+ localml --host 0.0.0.0 --no-browser # expose on the LAN, headless
43
+ ```
44
+
45
+ Open the printed URL, download a model from the Hub tab, and run it.
46
+
47
+ ## OpenAI-compatible API
48
+
49
+ Point any OpenAI client at `http://localhost:11500/v1` (any api key). It routes
50
+ to whichever LLM is currently loaded in LocalML.
51
+
52
+ ```python
53
+ from openai import OpenAI
54
+ client = OpenAI(base_url="http://localhost:11500/v1", api_key="not-needed")
55
+ client.chat.completions.create(
56
+ model="Qwen/Qwen2.5-0.5B-Instruct",
57
+ messages=[{"role": "user", "content": "Hello!"}],
58
+ )
59
+ ```
60
+
61
+ Supports streaming (`stream=True`), `GET /v1/models`, and tool/function calling
62
+ for the Qwen/Hermes, Llama, and Mistral families.
63
+
64
+ ## Docker
65
+
66
+ ```bash
67
+ docker build -t localml .
68
+ docker run --rm -p 11500:11500 localml # CPU
69
+ docker run --rm --gpus all -p 11500:11500 localml # GPU
70
+ ```
71
+
72
+ ## Development
73
+
74
+ The React UI lives in `src/renderer/` (built with esbuild) and talks to the
75
+ server via `window.localml` (see `src/renderer/web-bridge.js`). The Python
76
+ server + inference engine live in `python/`.
77
+
78
+ ```bash
79
+ npm install # build deps (esbuild + the vendored UMD libs)
80
+ npm run build # compile the renderer and bundle it into the package
81
+ pip install -e ".[inference]"
82
+ localml
83
+ ```
@@ -0,0 +1,67 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "inferml"
7
+ version = "1.0.1"
8
+ description = "Any HuggingFace model. Local. Multi-modal. Served over an OpenAI-compatible API."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { file = "LICENSE" }
12
+ authors = [{ name = "LocalML" }]
13
+ keywords = ["huggingface", "transformers", "inference", "openai", "local", "llm", "diffusion"]
14
+
15
+ # The web server layer. The inference stack (torch, transformers, diffusers, …)
16
+ # is declared in the `inference` extra so `pipx install inferml` stays fast and
17
+ # the heavy, hardware-specific torch wheels can be pinned to the right index by
18
+ # the user. Phase 5 finalizes the CPU/GPU install story and bundles the built
19
+ # frontend as package data.
20
+ dependencies = [
21
+ "fastapi>=0.110",
22
+ "uvicorn[standard]>=0.29",
23
+ "huggingface_hub",
24
+ "platformdirs>=4",
25
+ "psutil>=5.9",
26
+ ]
27
+
28
+ [project.optional-dependencies]
29
+ inference = [
30
+ "transformers>=5.7.0",
31
+ "torch>=2.6",
32
+ "torchvision",
33
+ "torchaudio>=2.6",
34
+ "diffusers",
35
+ "accelerate",
36
+ "timm",
37
+ "pillow",
38
+ "soundfile",
39
+ "librosa",
40
+ "numpy",
41
+ "scipy",
42
+ "sentencepiece",
43
+ "protobuf",
44
+ ]
45
+
46
+ [project.scripts]
47
+ localml = "server.cli:main"
48
+
49
+ [project.urls]
50
+ Homepage = "https://github.com/IMvision12/InferML"
51
+
52
+ # The Python sources live under python/ as flat modules + subpackages (the
53
+ # inference engine imports `routing`, `adapters`, `tasks`, `models` at top
54
+ # level). Map the package root to python/ and ship both shapes.
55
+ [tool.setuptools]
56
+ package-dir = { "" = "python" }
57
+ py-modules = ["engine", "routing", "io_utils", "output_kinds", "_win_compat"]
58
+
59
+ [tool.setuptools.packages.find]
60
+ where = ["python"]
61
+ include = ["server*", "adapters*", "tasks*", "models*", "plugins*"]
62
+
63
+ [tool.setuptools.package-data]
64
+ # Per-family JSON + the compiled frontend and routing tables bundled inside the
65
+ # `server` package by `npm run build` (scripts/bundle-webui.js).
66
+ "*" = ["*.json"]
67
+ "server" = ["webui/**/*", "_data/**/*"]
@@ -0,0 +1,52 @@
1
+ """Windows compatibility patches applied process-wide at sidecar boot.
2
+
3
+ Currently:
4
+ - os.symlink → transparent copy fallback when the caller lacks
5
+ SeCreateSymbolicLinkPrivilege (the WinError 1314 case). HuggingFace's
6
+ cache layout uses symlinks to dedup blobs across snapshots, and a
7
+ standard non-admin user without Developer Mode hits this on every
8
+ download. POSIX users keep the real os.symlink (symlinks always work
9
+ there).
10
+
11
+ Import this module before any other library that may call os.symlink.
12
+ On non-Windows platforms the import is a no-op.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import os
17
+ import shutil
18
+ import sys
19
+
20
+
21
+ def _install_symlink_copy_fallback() -> None:
22
+ if sys.platform != "win32":
23
+ return
24
+
25
+ _orig = os.symlink
26
+
27
+ def _resolve(src, dst):
28
+ if os.path.isabs(src):
29
+ return src
30
+ return os.path.normpath(os.path.join(os.path.dirname(dst), src))
31
+
32
+ def _symlink(src, dst, target_is_directory=False, *, dir_fd=None):
33
+ try:
34
+ return _orig(src, dst, target_is_directory=target_is_directory, dir_fd=dir_fd)
35
+ except OSError as e:
36
+ is_privilege_error = (
37
+ getattr(e, "winerror", None) == 1314
38
+ or "privilege" in str(e).lower()
39
+ )
40
+ if not is_privilege_error:
41
+ raise
42
+ real_src = _resolve(src, dst)
43
+ if target_is_directory or os.path.isdir(real_src):
44
+ shutil.copytree(real_src, dst, dirs_exist_ok=True)
45
+ else:
46
+ shutil.copyfile(real_src, dst)
47
+ return None
48
+
49
+ os.symlink = _symlink
50
+
51
+
52
+ _install_symlink_copy_fallback()
@@ -0,0 +1,65 @@
1
+ """Adapter base classes + shared catch-all adapters.
2
+
3
+ Per-family inference code lives in `python/models/<family>/`. This package
4
+ holds only the cross-cutting pieces:
5
+
6
+ - `Adapter` the base class every family inherits from
7
+ - `StandardPipelineAdapter` fallback for repos with no dedicated family
8
+ - `DiffusersAdapter` library passthrough for diffusers checkpoints
9
+
10
+ Routing strategy lives in `routing.py` and is:
11
+
12
+ 1. Named override (via model_overrides.json `"adapter"` field)
13
+ 2. Plugin adapters (python/plugins/*.py)
14
+ 3. models/<family>/ registry (per-family folders)
15
+ 4. DiffusersAdapter (library == "diffusers")
16
+ 5. StandardPipelineAdapter (pipeline_tag in its task list)
17
+ """
18
+ from __future__ import annotations
19
+
20
+ from .base import Adapter # noqa: F401
21
+ from .standard_pipeline import StandardPipelineAdapter
22
+ from .diffusers_pipeline import DiffusersAdapter
23
+
24
+ def _named_adapters() -> dict[str, type]:
25
+ """Build the name→class map used by `model_overrides.json "adapter"` pins.
26
+
27
+ Includes the cross-cutting fallbacks plus every family folder in
28
+ `python/models/`. Built LAZILY (see `__getattr__` below) so we don't
29
+ capture a partially-loaded `models.FAMILIES` if some caller imports
30
+ `models` before `adapters` and the family-folder-discovery chain
31
+ re-enters this module mid-load.
32
+ """
33
+ out: dict[str, type] = {
34
+ "standard": StandardPipelineAdapter,
35
+ "diffusers": DiffusersAdapter,
36
+ }
37
+ try:
38
+ from models import FAMILIES
39
+ for fam_name, entry in FAMILIES.items():
40
+ cls = entry.get("adapter")
41
+ if cls is None:
42
+ continue
43
+ out[fam_name] = cls
44
+ short = cls.__name__.replace("Adapter", "").lower()
45
+ out.setdefault(short, cls)
46
+ except Exception:
47
+ pass
48
+ return out
49
+
50
+ _NAMED_ADAPTERS_CACHE: "dict[str, type] | None" = None
51
+
52
+ def __getattr__(name: str):
53
+ global _NAMED_ADAPTERS_CACHE
54
+ if name == "NAMED_ADAPTERS":
55
+ if _NAMED_ADAPTERS_CACHE is None:
56
+ _NAMED_ADAPTERS_CACHE = _named_adapters()
57
+ return _NAMED_ADAPTERS_CACHE
58
+ raise AttributeError(f"module 'adapters' has no attribute {name!r}")
59
+
60
+ __all__ = [
61
+ "Adapter",
62
+ "StandardPipelineAdapter",
63
+ "DiffusersAdapter",
64
+ "NAMED_ADAPTERS",
65
+ ]
@@ -0,0 +1,37 @@
1
+ """Adapter base class.
2
+
3
+ One instance of an adapter = one loaded model. The router picks the adapter,
4
+ calls `load(info, device)` once, then `run(inputs, params)` per request.
5
+ Instances are cached by (adapter_class, model_id) in the engine.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from abc import ABC, abstractmethod
10
+
11
+ class Adapter(ABC):
12
+ override: dict = {}
13
+
14
+ @classmethod
15
+ def can_handle(cls, info: dict) -> bool:
16
+ """Return True if this adapter can run the described model.
17
+
18
+ `info` is the dict from routing.inspect_model. Implementations should
19
+ inspect `model_id`, `model_type`, `architectures`, `tags`, etc. -
20
+ *not* download any weights."""
21
+ return False
22
+
23
+ @abstractmethod
24
+ def load(self, info: dict, device) -> None:
25
+ """Instantiate the underlying model + any helpers (processor, tokenizer)."""
26
+
27
+ @abstractmethod
28
+ def run(self, inputs: dict, params: dict) -> dict:
29
+ """Execute inference. Must return a dict matching one of the kinds in
30
+ `output_kinds.py` (`boxes`, `masks`, `labels`, `text`, `image`,
31
+ `audio`, `vector`)."""
32
+
33
+ def unload(self) -> None:
34
+ """Hook for freeing GPU memory - default: drop references."""
35
+ for attr in list(self.__dict__.keys()):
36
+ if attr not in ("override",):
37
+ setattr(self, attr, None)
@@ -0,0 +1,57 @@
1
+ """Diffusers fallback. text-to-image, img2img, inpainting."""
2
+ from __future__ import annotations
3
+
4
+ from .base import Adapter
5
+ import output_kinds as ok
6
+ from io_utils import decode_image, resolve_device, torch_dtype_for_device
7
+
8
+ class DiffusersAdapter(Adapter):
9
+ @classmethod
10
+ def can_handle(cls, info):
11
+ if info.get("library") == "diffusers":
12
+ return info.get("pipeline_tag") != "text-to-video"
13
+ tag = info.get("pipeline_tag")
14
+ return tag in ("text-to-image", "image-to-image", "inpainting")
15
+
16
+ def load(self, info, device):
17
+ self.info = info
18
+ self.device = device
19
+ self.task = info.get("pipeline_tag") or "text-to-image"
20
+ dtype = torch_dtype_for_device()
21
+
22
+ from diffusers import (
23
+ AutoPipelineForText2Image,
24
+ AutoPipelineForImage2Image,
25
+ AutoPipelineForInpainting,
26
+ )
27
+ cls = {
28
+ "image-to-image": AutoPipelineForImage2Image,
29
+ "inpainting": AutoPipelineForInpainting,
30
+ }.get(self.task, AutoPipelineForText2Image)
31
+
32
+ kwargs = {"torch_dtype": dtype}
33
+ if self.override.get("trust_remote_code"):
34
+ kwargs["trust_remote_code"] = True
35
+ self.pipe = cls.from_pretrained(info["model_id"], **kwargs)
36
+ resolved = resolve_device()
37
+ if resolved is not False:
38
+ self.pipe = self.pipe.to(resolved)
39
+
40
+ def run(self, inputs, params):
41
+ prompt = (inputs.get("text") or "").strip()
42
+ if not prompt:
43
+ raise ValueError("Prompt required")
44
+ kwargs = {k: params[k] for k in
45
+ ("num_inference_steps", "guidance_scale", "negative_prompt", "strength")
46
+ if k in params}
47
+ kwargs.setdefault("num_inference_steps", 20)
48
+ kwargs.setdefault("guidance_scale", 7.5)
49
+
50
+ if self.task == "image-to-image" and inputs.get("dataUrl"):
51
+ kwargs["image"] = decode_image(inputs["dataUrl"])
52
+ elif self.task == "inpainting" and inputs.get("dataUrl"):
53
+ kwargs["image"] = decode_image(inputs["dataUrl"])
54
+
55
+ result = self.pipe(prompt, **kwargs)
56
+ image = result.images[0]
57
+ return ok.image(image)
@@ -0,0 +1,31 @@
1
+ """Standard HF pipeline adapter. Fallback when no `models/<family>/` matches.
2
+
3
+ Thin dispatcher: the actual per-task logic lives in `python/tasks/`. When a
4
+ model breaks, add a folder under `python/models/` (preferred) or a Variant
5
+ in the relevant task file.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from .base import Adapter
10
+ from tasks import TASK_REGISTRY, get_task
11
+
12
+
13
+ class StandardPipelineAdapter(Adapter):
14
+ SUPPORTED_TASKS = set(TASK_REGISTRY.keys())
15
+
16
+ @classmethod
17
+ def can_handle(cls, info):
18
+ return info.get("pipeline_tag") in cls.SUPPORTED_TASKS
19
+
20
+ def load(self, info, device):
21
+ self.info = info
22
+ self.device = device
23
+ self.task_name = info["pipeline_tag"]
24
+ self.handler = get_task(self.task_name)
25
+ if self.handler is None:
26
+ raise ValueError(f"No task handler registered for {self.task_name!r}")
27
+ extra = {"trust_remote_code": True} if self.override.get("trust_remote_code") else {}
28
+ self.state = self.handler.load_pipeline(info, device, extra_kwargs=extra)
29
+
30
+ def run(self, inputs, params):
31
+ return self.handler.handle(self.state, inputs, params)