PyPI - inferml - Versions diffs - 1.0.1__tar.gz - Mend

inferml 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (230) hide show

inferml-1.0.1/LICENSE +21 -0
inferml-1.0.1/MANIFEST.in +9 -0
inferml-1.0.1/PKG-INFO +137 -0
inferml-1.0.1/README.md +83 -0
inferml-1.0.1/pyproject.toml +67 -0
inferml-1.0.1/python/_win_compat.py +52 -0
inferml-1.0.1/python/adapters/__init__.py +65 -0
inferml-1.0.1/python/adapters/base.py +37 -0
inferml-1.0.1/python/adapters/diffusers_pipeline.py +57 -0
inferml-1.0.1/python/adapters/standard_pipeline.py +31 -0
inferml-1.0.1/python/engine.py +294 -0
inferml-1.0.1/python/inferml.egg-info/PKG-INFO +137 -0
inferml-1.0.1/python/inferml.egg-info/SOURCES.txt +228 -0
inferml-1.0.1/python/inferml.egg-info/dependency_links.txt +1 -0
inferml-1.0.1/python/inferml.egg-info/entry_points.txt +2 -0
inferml-1.0.1/python/inferml.egg-info/requires.txt +21 -0
inferml-1.0.1/python/inferml.egg-info/top_level.txt +10 -0
inferml-1.0.1/python/io_utils.py +68 -0
inferml-1.0.1/python/model_overrides.json +32 -0
inferml-1.0.1/python/models/__init__.py +167 -0
inferml-1.0.1/python/models/_diffusion_helper.py +45 -0
inferml-1.0.1/python/models/_pipeline_helper.py +50 -0
inferml-1.0.1/python/models/aria/__init__.py +6 -0
inferml-1.0.1/python/models/bamba/__init__.py +6 -0
inferml-1.0.1/python/models/bark/__init__.py +6 -0
inferml-1.0.1/python/models/bit/__init__.py +6 -0
inferml-1.0.1/python/models/bitnet/__init__.py +6 -0
inferml-1.0.1/python/models/blip/__init__.py +12 -0
inferml-1.0.1/python/models/bloom/__init__.py +6 -0
inferml-1.0.1/python/models/chameleon/__init__.py +6 -0
inferml-1.0.1/python/models/clip/__init__.py +6 -0
inferml-1.0.1/python/models/codegen/__init__.py +6 -0
inferml-1.0.1/python/models/cohere/__init__.py +6 -0
inferml-1.0.1/python/models/cohere2_vision/__init__.py +6 -0
inferml-1.0.1/python/models/conditional_detr/__init__.py +6 -0
inferml-1.0.1/python/models/convnext/__init__.py +6 -0
inferml-1.0.1/python/models/csm/__init__.py +11 -0
inferml-1.0.1/python/models/cvt/__init__.py +6 -0
inferml-1.0.1/python/models/d_fine/__init__.py +6 -0
inferml-1.0.1/python/models/data2vec_vision/__init__.py +6 -0
inferml-1.0.1/python/models/dbrx/__init__.py +6 -0
inferml-1.0.1/python/models/deepseek/__init__.py +6 -0
inferml-1.0.1/python/models/deepseek_vl/__init__.py +5 -0
inferml-1.0.1/python/models/deepseek_vl/adapter.py +93 -0
inferml-1.0.1/python/models/deformable_detr/__init__.py +6 -0
inferml-1.0.1/python/models/depth_anything/__init__.py +6 -0
inferml-1.0.1/python/models/depth_pro/__init__.py +6 -0
inferml-1.0.1/python/models/detr/__init__.py +7 -0
inferml-1.0.1/python/models/dia/__init__.py +13 -0
inferml-1.0.1/python/models/donut/__init__.py +11 -0
inferml-1.0.1/python/models/dpt/__init__.py +7 -0
inferml-1.0.1/python/models/edgetam/__init__.py +14 -0
inferml-1.0.1/python/models/efficientnet/__init__.py +6 -0
inferml-1.0.1/python/models/emu3/__init__.py +6 -0
inferml-1.0.1/python/models/eomt/__init__.py +6 -0
inferml-1.0.1/python/models/eomt_dinov3/__init__.py +11 -0
inferml-1.0.1/python/models/exaone/__init__.py +6 -0
inferml-1.0.1/python/models/falcon/__init__.py +6 -0
inferml-1.0.1/python/models/fastspeech2/__init__.py +6 -0
inferml-1.0.1/python/models/fastvlm/__init__.py +5 -0
inferml-1.0.1/python/models/fastvlm/adapter.py +99 -0
inferml-1.0.1/python/models/florence2/__init__.py +5 -0
inferml-1.0.1/python/models/florence2/adapter.py +102 -0
inferml-1.0.1/python/models/flux/__init__.py +22 -0
inferml-1.0.1/python/models/focalnet/__init__.py +6 -0
inferml-1.0.1/python/models/fuyu/__init__.py +6 -0
inferml-1.0.1/python/models/gemma/__init__.py +10 -0
inferml-1.0.1/python/models/gemma3_vlm/__init__.py +6 -0
inferml-1.0.1/python/models/git/__init__.py +6 -0
inferml-1.0.1/python/models/glm/__init__.py +6 -0
inferml-1.0.1/python/models/glm4v/__init__.py +6 -0
inferml-1.0.1/python/models/got_ocr2/__init__.py +20 -0
inferml-1.0.1/python/models/gpt2/__init__.py +6 -0
inferml-1.0.1/python/models/gpt_oss/__init__.py +6 -0
inferml-1.0.1/python/models/granite/__init__.py +6 -0
inferml-1.0.1/python/models/granite_speech/__init__.py +15 -0
inferml-1.0.1/python/models/grounding_dino/__init__.py +6 -0
inferml-1.0.1/python/models/hunyuan_vl/__init__.py +6 -0
inferml-1.0.1/python/models/idefics/__init__.py +6 -0
inferml-1.0.1/python/models/instructpix2pix/__init__.py +19 -0
inferml-1.0.1/python/models/internvl/__init__.py +6 -0
inferml-1.0.1/python/models/jamba/__init__.py +6 -0
inferml-1.0.1/python/models/janus/__init__.py +5 -0
inferml-1.0.1/python/models/janus/adapter.py +125 -0
inferml-1.0.1/python/models/kandinsky/__init__.py +14 -0
inferml-1.0.1/python/models/kimi_vl/__init__.py +6 -0
inferml-1.0.1/python/models/kolors/__init__.py +15 -0
inferml-1.0.1/python/models/kosmos/__init__.py +6 -0
inferml-1.0.1/python/models/kyutai_stt/__init__.py +11 -0
inferml-1.0.1/python/models/layoutlmv3/__init__.py +9 -0
inferml-1.0.1/python/models/levit/__init__.py +6 -0
inferml-1.0.1/python/models/lfm2_vl/__init__.py +6 -0
inferml-1.0.1/python/models/llama/__init__.py +6 -0
inferml-1.0.1/python/models/llava/__init__.py +5 -0
inferml-1.0.1/python/models/llava/adapter.py +79 -0
inferml-1.0.1/python/models/m2m_100/__init__.py +6 -0
inferml-1.0.1/python/models/mamba/__init__.py +6 -0
inferml-1.0.1/python/models/marian/__init__.py +6 -0
inferml-1.0.1/python/models/mask2former/__init__.py +6 -0
inferml-1.0.1/python/models/maskformer/__init__.py +6 -0
inferml-1.0.1/python/models/mgp_str/__init__.py +12 -0
inferml-1.0.1/python/models/minicpm_v/__init__.py +6 -0
inferml-1.0.1/python/models/minimax/__init__.py +6 -0
inferml-1.0.1/python/models/mistral/__init__.py +6 -0
inferml-1.0.1/python/models/mllama/__init__.py +6 -0
inferml-1.0.1/python/models/mm_grounding_dino/__init__.py +12 -0
inferml-1.0.1/python/models/mobilenet/__init__.py +7 -0
inferml-1.0.1/python/models/moondream/__init__.py +5 -0
inferml-1.0.1/python/models/moondream/adapter.py +37 -0
inferml-1.0.1/python/models/moonshine/__init__.py +6 -0
inferml-1.0.1/python/models/mpt/__init__.py +6 -0
inferml-1.0.1/python/models/musicgen/__init__.py +6 -0
inferml-1.0.1/python/models/nemotron/__init__.py +6 -0
inferml-1.0.1/python/models/olmo/__init__.py +6 -0
inferml-1.0.1/python/models/omdet_turbo/__init__.py +11 -0
inferml-1.0.1/python/models/oneformer/__init__.py +11 -0
inferml-1.0.1/python/models/opt/__init__.py +6 -0
inferml-1.0.1/python/models/ovis/__init__.py +6 -0
inferml-1.0.1/python/models/owlvit/__init__.py +6 -0
inferml-1.0.1/python/models/paligemma/__init__.py +6 -0
inferml-1.0.1/python/models/parakeet/__init__.py +6 -0
inferml-1.0.1/python/models/persimmon/__init__.py +6 -0
inferml-1.0.1/python/models/phi/__init__.py +6 -0
inferml-1.0.1/python/models/pix2struct/__init__.py +6 -0
inferml-1.0.1/python/models/pixart/__init__.py +14 -0
inferml-1.0.1/python/models/playground/__init__.py +14 -0
inferml-1.0.1/python/models/poolformer/__init__.py +6 -0
inferml-1.0.1/python/models/pop2piano/__init__.py +13 -0
inferml-1.0.1/python/models/prophetnet/__init__.py +6 -0
inferml-1.0.1/python/models/pvt/__init__.py +6 -0
inferml-1.0.1/python/models/qwen/__init__.py +9 -0
inferml-1.0.1/python/models/qwen_vl/__init__.py +5 -0
inferml-1.0.1/python/models/qwen_vl/adapter.py +83 -0
inferml-1.0.1/python/models/regnet/__init__.py +6 -0
inferml-1.0.1/python/models/resnet/__init__.py +6 -0
inferml-1.0.1/python/models/rt_detr/__init__.py +6 -0
inferml-1.0.1/python/models/rwkv/__init__.py +6 -0
inferml-1.0.1/python/models/sam/__init__.py +6 -0
inferml-1.0.1/python/models/sam2/__init__.py +6 -0
inferml-1.0.1/python/models/sam3/__init__.py +6 -0
inferml-1.0.1/python/models/sam_hq/__init__.py +10 -0
inferml-1.0.1/python/models/sana/__init__.py +16 -0
inferml-1.0.1/python/models/sd_inpainting/__init__.py +23 -0
inferml-1.0.1/python/models/sdxl/__init__.py +25 -0
inferml-1.0.1/python/models/sdxl_refiner/__init__.py +18 -0
inferml-1.0.1/python/models/sdxl_turbo/__init__.py +16 -0
inferml-1.0.1/python/models/seamless_m4t/__init__.py +6 -0
inferml-1.0.1/python/models/segformer/__init__.py +12 -0
inferml-1.0.1/python/models/siglip/__init__.py +6 -0
inferml-1.0.1/python/models/smollm/__init__.py +6 -0
inferml-1.0.1/python/models/smolvlm/__init__.py +6 -0
inferml-1.0.1/python/models/speecht5/__init__.py +6 -0
inferml-1.0.1/python/models/stable_diffusion/__init__.py +22 -0
inferml-1.0.1/python/models/stablelm/__init__.py +6 -0
inferml-1.0.1/python/models/starcoder2/__init__.py +6 -0
inferml-1.0.1/python/models/swiftformer/__init__.py +6 -0
inferml-1.0.1/python/models/swin/__init__.py +6 -0
inferml-1.0.1/python/models/table_transformer/__init__.py +6 -0
inferml-1.0.1/python/models/timm/__init__.py +12 -0
inferml-1.0.1/python/models/trocr/__init__.py +6 -0
inferml-1.0.1/python/models/upernet/__init__.py +6 -0
inferml-1.0.1/python/models/vision_encoder_decoder/__init__.py +7 -0
inferml-1.0.1/python/models/vit/__init__.py +12 -0
inferml-1.0.1/python/models/vits/__init__.py +6 -0
inferml-1.0.1/python/models/voxtral/__init__.py +18 -0
inferml-1.0.1/python/models/wav2vec2/__init__.py +21 -0
inferml-1.0.1/python/models/whisper/__init__.py +11 -0
inferml-1.0.1/python/models/xglm/__init__.py +6 -0
inferml-1.0.1/python/models/xlnet/__init__.py +6 -0
inferml-1.0.1/python/models/yolos/__init__.py +6 -0
inferml-1.0.1/python/models/zamba/__init__.py +6 -0
inferml-1.0.1/python/models/zoedepth/__init__.py +6 -0
inferml-1.0.1/python/output_kinds.py +56 -0
inferml-1.0.1/python/routing.py +202 -0
inferml-1.0.1/python/server/__init__.py +13 -0
inferml-1.0.1/python/server/_data/model_overrides.json +32 -0
inferml-1.0.1/python/server/_data/supported_architectures.json +307 -0
inferml-1.0.1/python/server/app.py +71 -0
inferml-1.0.1/python/server/appdata.py +67 -0
inferml-1.0.1/python/server/cli.py +68 -0
inferml-1.0.1/python/server/deps.py +55 -0
inferml-1.0.1/python/server/events.py +42 -0
inferml-1.0.1/python/server/hf_service.py +319 -0
inferml-1.0.1/python/server/hw_service.py +131 -0
inferml-1.0.1/python/server/openai_api/__init__.py +9 -0
inferml-1.0.1/python/server/openai_api/llm.py +236 -0
inferml-1.0.1/python/server/openai_api/routes.py +236 -0
inferml-1.0.1/python/server/openai_api/tools/__init__.py +55 -0
inferml-1.0.1/python/server/openai_api/tools/base.py +80 -0
inferml-1.0.1/python/server/openai_api/tools/hermes_qwen.py +28 -0
inferml-1.0.1/python/server/openai_api/tools/llama.py +29 -0
inferml-1.0.1/python/server/openai_api/tools/mistral.py +31 -0
inferml-1.0.1/python/server/paths.py +24 -0
inferml-1.0.1/python/server/routes/__init__.py +1 -0
inferml-1.0.1/python/server/routes/hf.py +65 -0
inferml-1.0.1/python/server/routes/inference.py +286 -0
inferml-1.0.1/python/server/routes/store.py +42 -0
inferml-1.0.1/python/server/routes/system.py +147 -0
inferml-1.0.1/python/server/store_service.py +134 -0
inferml-1.0.1/python/server/webui/components/app.js +718 -0
inferml-1.0.1/python/server/webui/components/chat.js +288 -0
inferml-1.0.1/python/server/webui/components/home.js +173 -0
inferml-1.0.1/python/server/webui/components/icons.js +50 -0
inferml-1.0.1/python/server/webui/components/model-browser.js +559 -0
inferml-1.0.1/python/server/webui/components/onboarding.js +193 -0
inferml-1.0.1/python/server/webui/components/settings.js +512 -0
inferml-1.0.1/python/server/webui/components/task-workspace.js +1286 -0
inferml-1.0.1/python/server/webui/components/welcome.js +4 -0
inferml-1.0.1/python/server/webui/index.html +26 -0
inferml-1.0.1/python/server/webui/styles.css +2109 -0
inferml-1.0.1/python/server/webui/vendor/marked.umd.js +79 -0
inferml-1.0.1/python/server/webui/vendor/purify.min.js +3 -0
inferml-1.0.1/python/server/webui/vendor/react-dom.production.min.js +267 -0
inferml-1.0.1/python/server/webui/vendor/react.production.min.js +31 -0
inferml-1.0.1/python/server/webui/web-bridge.js +247 -0
inferml-1.0.1/python/supported_architectures.json +307 -0
inferml-1.0.1/python/tasks/__init__.py +61 -0
inferml-1.0.1/python/tasks/_render.py +120 -0
inferml-1.0.1/python/tasks/asr.py +66 -0
inferml-1.0.1/python/tasks/base.py +93 -0
inferml-1.0.1/python/tasks/depth_estimation.py +88 -0
inferml-1.0.1/python/tasks/document_qa.py +58 -0
inferml-1.0.1/python/tasks/image_classification.py +48 -0
inferml-1.0.1/python/tasks/image_segmentation.py +205 -0
inferml-1.0.1/python/tasks/image_to_text.py +94 -0
inferml-1.0.1/python/tasks/mask_generation.py +300 -0
inferml-1.0.1/python/tasks/misc_tasks.py +122 -0
inferml-1.0.1/python/tasks/object_detection.py +112 -0
inferml-1.0.1/python/tasks/text_generation.py +162 -0
inferml-1.0.1/setup.cfg +4 -0

inferml-1.0.1/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 LocalML, Gitesh Chawda
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

inferml-1.0.1/MANIFEST.in ADDED Viewed

@@ -0,0 +1,9 @@
+include LICENSE
+include README.md
+include python/supported_architectures.json
+include python/model_overrides.json
+recursive-include python *.json
+recursive-include python/server/webui *
+recursive-include python/server/_data *
+global-exclude __pycache__/*
+global-exclude *.py[cod]

inferml-1.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,137 @@
+Metadata-Version: 2.4
+Name: inferml
+Version: 1.0.1
+Summary: Any HuggingFace model. Local. Multi-modal. Served over an OpenAI-compatible API.
+Author: LocalML
+License: MIT License
+        Copyright (c) 2026 LocalML, Gitesh Chawda
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+        THE SOFTWARE.
+Project-URL: Homepage, https://github.com/IMvision12/InferML
+Keywords: huggingface,transformers,inference,openai,local,llm,diffusion
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: fastapi>=0.110
+Requires-Dist: uvicorn[standard]>=0.29
+Requires-Dist: huggingface_hub
+Requires-Dist: platformdirs>=4
+Requires-Dist: psutil>=5.9
+Provides-Extra: inference
+Requires-Dist: transformers>=5.7.0; extra == "inference"
+Requires-Dist: torch>=2.6; extra == "inference"
+Requires-Dist: torchvision; extra == "inference"
+Requires-Dist: torchaudio>=2.6; extra == "inference"
+Requires-Dist: diffusers; extra == "inference"
+Requires-Dist: accelerate; extra == "inference"
+Requires-Dist: timm; extra == "inference"
+Requires-Dist: pillow; extra == "inference"
+Requires-Dist: soundfile; extra == "inference"
+Requires-Dist: librosa; extra == "inference"
+Requires-Dist: numpy; extra == "inference"
+Requires-Dist: scipy; extra == "inference"
+Requires-Dist: sentencepiece; extra == "inference"
+Requires-Dist: protobuf; extra == "inference"
+Dynamic: license-file
+<p align="center">
+  <img src="assets/logo.png" alt="LocalML logo" width="140" />
+</p>
+# LocalML
+Any Hugging Face model. Local. Multi-modal. Now a **local web server** with an
+**OpenAI-compatible API** - no Electron, no native binary.
+Run 143+ model families fully on-device (LLMs, VLMs, diffusion, ASR, TTS,
+segmentation, detection) behind a browser UI, and point agent frameworks
+(LangChain, LangGraph, the OpenAI SDK) at it the way you point them at Ollama.
+## Install
+Requires **Python 3.10+** - the installer checks for it but won't install Python
+for you. One line in your terminal:
+```bash
+# macOS / Linux
+curl -fsSL https://www.localml.tech/install.sh | sh
+# Windows (PowerShell)
+irm https://www.localml.tech/install.ps1 | iex
+```
+The script bootstraps pipx and installs the LocalML server. On first launch the
+app walks you through installing the inference stack (PyTorch + transformers) for
+your hardware - pick **CPU** or **GPU** and it fetches the matching build.
+Prefer to do it by hand?
+```bash
+pipx install inferml                 # server only; the app installs torch on first run
+pipx install "inferml[inference]"    # or grab the whole stack up front (generic torch wheel)
+```
+## Run
+```bash
+localml                 # starts the server and opens http://localhost:11500
+localml --port 8080     # custom port
+localml --host 0.0.0.0 --no-browser   # expose on the LAN, headless
+```
+Open the printed URL, download a model from the Hub tab, and run it.
+## OpenAI-compatible API
+Point any OpenAI client at `http://localhost:11500/v1` (any api key). It routes
+to whichever LLM is currently loaded in LocalML.
+```python
+from openai import OpenAI
+client = OpenAI(base_url="http://localhost:11500/v1", api_key="not-needed")
+client.chat.completions.create(
+    model="Qwen/Qwen2.5-0.5B-Instruct",
+    messages=[{"role": "user", "content": "Hello!"}],
+)
+```
+Supports streaming (`stream=True`), `GET /v1/models`, and tool/function calling
+for the Qwen/Hermes, Llama, and Mistral families.
+## Docker
+```bash
+docker build -t localml .
+docker run --rm -p 11500:11500 localml            # CPU
+docker run --rm --gpus all -p 11500:11500 localml # GPU
+```
+## Development
+The React UI lives in `src/renderer/` (built with esbuild) and talks to the
+server via `window.localml` (see `src/renderer/web-bridge.js`). The Python
+server + inference engine live in `python/`.
+```bash
+npm install          # build deps (esbuild + the vendored UMD libs)
+npm run build        # compile the renderer and bundle it into the package
+pip install -e ".[inference]"
+localml
+```

inferml-1.0.1/README.md ADDED Viewed

@@ -0,0 +1,83 @@
+<p align="center">
+  <img src="assets/logo.png" alt="LocalML logo" width="140" />
+</p>
+# LocalML
+Any Hugging Face model. Local. Multi-modal. Now a **local web server** with an
+**OpenAI-compatible API** - no Electron, no native binary.
+Run 143+ model families fully on-device (LLMs, VLMs, diffusion, ASR, TTS,
+segmentation, detection) behind a browser UI, and point agent frameworks
+(LangChain, LangGraph, the OpenAI SDK) at it the way you point them at Ollama.
+## Install
+Requires **Python 3.10+** - the installer checks for it but won't install Python
+for you. One line in your terminal:
+```bash
+# macOS / Linux
+curl -fsSL https://www.localml.tech/install.sh | sh
+# Windows (PowerShell)
+irm https://www.localml.tech/install.ps1 | iex
+```
+The script bootstraps pipx and installs the LocalML server. On first launch the
+app walks you through installing the inference stack (PyTorch + transformers) for
+your hardware - pick **CPU** or **GPU** and it fetches the matching build.
+Prefer to do it by hand?
+```bash
+pipx install inferml                 # server only; the app installs torch on first run
+pipx install "inferml[inference]"    # or grab the whole stack up front (generic torch wheel)
+```
+## Run
+```bash
+localml                 # starts the server and opens http://localhost:11500
+localml --port 8080     # custom port
+localml --host 0.0.0.0 --no-browser   # expose on the LAN, headless
+```
+Open the printed URL, download a model from the Hub tab, and run it.
+## OpenAI-compatible API
+Point any OpenAI client at `http://localhost:11500/v1` (any api key). It routes
+to whichever LLM is currently loaded in LocalML.
+```python
+from openai import OpenAI
+client = OpenAI(base_url="http://localhost:11500/v1", api_key="not-needed")
+client.chat.completions.create(
+    model="Qwen/Qwen2.5-0.5B-Instruct",
+    messages=[{"role": "user", "content": "Hello!"}],
+)
+```
+Supports streaming (`stream=True`), `GET /v1/models`, and tool/function calling
+for the Qwen/Hermes, Llama, and Mistral families.
+## Docker
+```bash
+docker build -t localml .
+docker run --rm -p 11500:11500 localml            # CPU
+docker run --rm --gpus all -p 11500:11500 localml # GPU
+```
+## Development
+The React UI lives in `src/renderer/` (built with esbuild) and talks to the
+server via `window.localml` (see `src/renderer/web-bridge.js`). The Python
+server + inference engine live in `python/`.
+```bash
+npm install          # build deps (esbuild + the vendored UMD libs)
+npm run build        # compile the renderer and bundle it into the package
+pip install -e ".[inference]"
+localml
+```

inferml-1.0.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,67 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "inferml"
+version = "1.0.1"
+description = "Any HuggingFace model. Local. Multi-modal. Served over an OpenAI-compatible API."
+readme = "README.md"
+requires-python = ">=3.10"
+license = { file = "LICENSE" }
+authors = [{ name = "LocalML" }]
+keywords = ["huggingface", "transformers", "inference", "openai", "local", "llm", "diffusion"]
+# The web server layer. The inference stack (torch, transformers, diffusers, …)
+# is declared in the `inference` extra so `pipx install inferml` stays fast and
+# the heavy, hardware-specific torch wheels can be pinned to the right index by
+# the user. Phase 5 finalizes the CPU/GPU install story and bundles the built
+# frontend as package data.
+dependencies = [
+  "fastapi>=0.110",
+  "uvicorn[standard]>=0.29",
+  "huggingface_hub",
+  "platformdirs>=4",
+  "psutil>=5.9",
+]
+[project.optional-dependencies]
+inference = [
+  "transformers>=5.7.0",
+  "torch>=2.6",
+  "torchvision",
+  "torchaudio>=2.6",
+  "diffusers",
+  "accelerate",
+  "timm",
+  "pillow",
+  "soundfile",
+  "librosa",
+  "numpy",
+  "scipy",
+  "sentencepiece",
+  "protobuf",
+]
+[project.scripts]
+localml = "server.cli:main"
+[project.urls]
+Homepage = "https://github.com/IMvision12/InferML"
+# The Python sources live under python/ as flat modules + subpackages (the
+# inference engine imports `routing`, `adapters`, `tasks`, `models` at top
+# level). Map the package root to python/ and ship both shapes.
+[tool.setuptools]
+package-dir = { "" = "python" }
+py-modules = ["engine", "routing", "io_utils", "output_kinds", "_win_compat"]
+[tool.setuptools.packages.find]
+where = ["python"]
+include = ["server*", "adapters*", "tasks*", "models*", "plugins*"]
+[tool.setuptools.package-data]
+# Per-family JSON + the compiled frontend and routing tables bundled inside the
+# `server` package by `npm run build` (scripts/bundle-webui.js).
+"*" = ["*.json"]
+"server" = ["webui/**/*", "_data/**/*"]

inferml-1.0.1/python/_win_compat.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""Windows compatibility patches applied process-wide at sidecar boot.
+Currently:
+- os.symlink → transparent copy fallback when the caller lacks
+  SeCreateSymbolicLinkPrivilege (the WinError 1314 case). HuggingFace's
+  cache layout uses symlinks to dedup blobs across snapshots, and a
+  standard non-admin user without Developer Mode hits this on every
+  download. POSIX users keep the real os.symlink (symlinks always work
+  there).
+Import this module before any other library that may call os.symlink.
+On non-Windows platforms the import is a no-op.
+"""
+from __future__ import annotations
+import os
+import shutil
+import sys
+def _install_symlink_copy_fallback() -> None:
+    if sys.platform != "win32":
+        return
+    _orig = os.symlink
+    def _resolve(src, dst):
+        if os.path.isabs(src):
+            return src
+        return os.path.normpath(os.path.join(os.path.dirname(dst), src))
+    def _symlink(src, dst, target_is_directory=False, *, dir_fd=None):
+        try:
+            return _orig(src, dst, target_is_directory=target_is_directory, dir_fd=dir_fd)
+        except OSError as e:
+            is_privilege_error = (
+                getattr(e, "winerror", None) == 1314
+                or "privilege" in str(e).lower()
+            )
+            if not is_privilege_error:
+                raise
+            real_src = _resolve(src, dst)
+            if target_is_directory or os.path.isdir(real_src):
+                shutil.copytree(real_src, dst, dirs_exist_ok=True)
+            else:
+                shutil.copyfile(real_src, dst)
+            return None
+    os.symlink = _symlink
+_install_symlink_copy_fallback()

inferml-1.0.1/python/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""Adapter base classes + shared catch-all adapters.
+Per-family inference code lives in `python/models/<family>/`. This package
+holds only the cross-cutting pieces:
+  - `Adapter`                  the base class every family inherits from
+  - `StandardPipelineAdapter`  fallback for repos with no dedicated family
+  - `DiffusersAdapter`         library passthrough for diffusers checkpoints
+Routing strategy lives in `routing.py` and is:
+    1. Named override (via model_overrides.json `"adapter"` field)
+    2. Plugin adapters (python/plugins/*.py)
+    3. models/<family>/ registry (per-family folders)
+    4. DiffusersAdapter (library == "diffusers")
+    5. StandardPipelineAdapter (pipeline_tag in its task list)
+"""
+from __future__ import annotations
+from .base import Adapter  # noqa: F401
+from .standard_pipeline import StandardPipelineAdapter
+from .diffusers_pipeline import DiffusersAdapter
+def _named_adapters() -> dict[str, type]:
+    """Build the name→class map used by `model_overrides.json "adapter"` pins.
+    Includes the cross-cutting fallbacks plus every family folder in
+    `python/models/`. Built LAZILY (see `__getattr__` below) so we don't
+    capture a partially-loaded `models.FAMILIES` if some caller imports
+    `models` before `adapters` and the family-folder-discovery chain
+    re-enters this module mid-load.
+    """
+    out: dict[str, type] = {
+        "standard":  StandardPipelineAdapter,
+        "diffusers": DiffusersAdapter,
+    }
+    try:
+        from models import FAMILIES
+        for fam_name, entry in FAMILIES.items():
+            cls = entry.get("adapter")
+            if cls is None:
+                continue
+            out[fam_name] = cls
+            short = cls.__name__.replace("Adapter", "").lower()
+            out.setdefault(short, cls)
+    except Exception:
+        pass
+    return out
+_NAMED_ADAPTERS_CACHE: "dict[str, type] | None" = None
+def __getattr__(name: str):
+    global _NAMED_ADAPTERS_CACHE
+    if name == "NAMED_ADAPTERS":
+        if _NAMED_ADAPTERS_CACHE is None:
+            _NAMED_ADAPTERS_CACHE = _named_adapters()
+        return _NAMED_ADAPTERS_CACHE
+    raise AttributeError(f"module 'adapters' has no attribute {name!r}")
+__all__ = [
+    "Adapter",
+    "StandardPipelineAdapter",
+    "DiffusersAdapter",
+    "NAMED_ADAPTERS",
+]

inferml-1.0.1/python/adapters/base.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Adapter base class.
+One instance of an adapter = one loaded model. The router picks the adapter,
+calls `load(info, device)` once, then `run(inputs, params)` per request.
+Instances are cached by (adapter_class, model_id) in the engine.
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+class Adapter(ABC):
+    override: dict = {}
+    @classmethod
+    def can_handle(cls, info: dict) -> bool:
+        """Return True if this adapter can run the described model.
+        `info` is the dict from routing.inspect_model. Implementations should
+        inspect `model_id`, `model_type`, `architectures`, `tags`, etc. -
+        *not* download any weights."""
+        return False
+    @abstractmethod
+    def load(self, info: dict, device) -> None:
+        """Instantiate the underlying model + any helpers (processor, tokenizer)."""
+    @abstractmethod
+    def run(self, inputs: dict, params: dict) -> dict:
+        """Execute inference. Must return a dict matching one of the kinds in
+        `output_kinds.py` (`boxes`, `masks`, `labels`, `text`, `image`,
+        `audio`, `vector`)."""
+    def unload(self) -> None:
+        """Hook for freeing GPU memory - default: drop references."""
+        for attr in list(self.__dict__.keys()):
+            if attr not in ("override",):
+                setattr(self, attr, None)

inferml-1.0.1/python/adapters/diffusers_pipeline.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""Diffusers fallback. text-to-image, img2img, inpainting."""
+from __future__ import annotations
+from .base import Adapter
+import output_kinds as ok
+from io_utils import decode_image, resolve_device, torch_dtype_for_device
+class DiffusersAdapter(Adapter):
+    @classmethod
+    def can_handle(cls, info):
+        if info.get("library") == "diffusers":
+            return info.get("pipeline_tag") != "text-to-video"
+        tag = info.get("pipeline_tag")
+        return tag in ("text-to-image", "image-to-image", "inpainting")
+    def load(self, info, device):
+        self.info = info
+        self.device = device
+        self.task = info.get("pipeline_tag") or "text-to-image"
+        dtype = torch_dtype_for_device()
+        from diffusers import (
+            AutoPipelineForText2Image,
+            AutoPipelineForImage2Image,
+            AutoPipelineForInpainting,
+        )
+        cls = {
+            "image-to-image": AutoPipelineForImage2Image,
+            "inpainting":     AutoPipelineForInpainting,
+        }.get(self.task, AutoPipelineForText2Image)
+        kwargs = {"torch_dtype": dtype}
+        if self.override.get("trust_remote_code"):
+            kwargs["trust_remote_code"] = True
+        self.pipe = cls.from_pretrained(info["model_id"], **kwargs)
+        resolved = resolve_device()
+        if resolved is not False:
+            self.pipe = self.pipe.to(resolved)
+    def run(self, inputs, params):
+        prompt = (inputs.get("text") or "").strip()
+        if not prompt:
+            raise ValueError("Prompt required")
+        kwargs = {k: params[k] for k in
+                  ("num_inference_steps", "guidance_scale", "negative_prompt", "strength")
+                  if k in params}
+        kwargs.setdefault("num_inference_steps", 20)
+        kwargs.setdefault("guidance_scale", 7.5)
+        if self.task == "image-to-image" and inputs.get("dataUrl"):
+            kwargs["image"] = decode_image(inputs["dataUrl"])
+        elif self.task == "inpainting" and inputs.get("dataUrl"):
+            kwargs["image"] = decode_image(inputs["dataUrl"])
+        result = self.pipe(prompt, **kwargs)
+        image = result.images[0]
+        return ok.image(image)

inferml-1.0.1/python/adapters/standard_pipeline.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""Standard HF pipeline adapter. Fallback when no `models/<family>/` matches.
+Thin dispatcher: the actual per-task logic lives in `python/tasks/`. When a
+model breaks, add a folder under `python/models/` (preferred) or a Variant
+in the relevant task file.
+"""
+from __future__ import annotations
+from .base import Adapter
+from tasks import TASK_REGISTRY, get_task
+class StandardPipelineAdapter(Adapter):
+    SUPPORTED_TASKS = set(TASK_REGISTRY.keys())
+    @classmethod
+    def can_handle(cls, info):
+        return info.get("pipeline_tag") in cls.SUPPORTED_TASKS
+    def load(self, info, device):
+        self.info = info
+        self.device = device
+        self.task_name = info["pipeline_tag"]
+        self.handler = get_task(self.task_name)
+        if self.handler is None:
+            raise ValueError(f"No task handler registered for {self.task_name!r}")
+        extra = {"trust_remote_code": True} if self.override.get("trust_remote_code") else {}
+        self.state = self.handler.load_pipeline(info, device, extra_kwargs=extra)
+    def run(self, inputs, params):
+        return self.handler.handle(self.state, inputs, params)