floudsonnx 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- floudsonnx/__init__.py +90 -0
- floudsonnx/api/__init__.py +7 -0
- floudsonnx/api/client.py +115 -0
- floudsonnx/api/routers/__init__.py +4 -0
- floudsonnx/api/routers/health.py +19 -0
- floudsonnx/api/routers/models.py +93 -0
- floudsonnx/api/server.py +30 -0
- floudsonnx/cli/__init__.py +4 -0
- floudsonnx/cli/main.py +190 -0
- floudsonnx/config/__init__.py +8 -0
- floudsonnx/config/model_config.py +108 -0
- floudsonnx/config/settings.py +46 -0
- floudsonnx/exceptions.py +84 -0
- floudsonnx/runtime/__init__.py +8 -0
- floudsonnx/runtime/loader.py +212 -0
- floudsonnx/runtime/ort_seq2seq.py +57 -0
- floudsonnx/runtime/ort_session.py +79 -0
- floudsonnx/runtime/session_pool.py +116 -0
- floudsonnx/runtime/strategy.py +54 -0
- floudsonnx/runtime/tokenizer_cache.py +108 -0
- floudsonnx/store/__init__.py +8 -0
- floudsonnx/store/exporter_bridge.py +123 -0
- floudsonnx/store/manifest.py +80 -0
- floudsonnx/store/registry.py +202 -0
- floudsonnx/utils/__init__.py +8 -0
- floudsonnx/utils/concurrent_dict.py +89 -0
- floudsonnx/utils/path_guard.py +41 -0
- floudsonnx-1.0.0.dist-info/METADATA +531 -0
- floudsonnx-1.0.0.dist-info/RECORD +33 -0
- floudsonnx-1.0.0.dist-info/WHEEL +5 -0
- floudsonnx-1.0.0.dist-info/entry_points.txt +2 -0
- floudsonnx-1.0.0.dist-info/licenses/LICENSE +17 -0
- floudsonnx-1.0.0.dist-info/top_level.txt +1 -0
floudsonnx/__init__.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# Copyright (c) 2026 Goutam Malakar. All rights reserved.
|
|
3
|
+
# Licensed under the Apache License, Version 2.0.
|
|
4
|
+
# =============================================================================
|
|
5
|
+
"""
|
|
6
|
+
floudsonnx
|
|
7
|
+
~~~~~~~~~~
|
|
8
|
+
Ollama-style ONNX model store and runtime.
|
|
9
|
+
|
|
10
|
+
Quick start:
|
|
11
|
+
from floudsonnx import create_model
|
|
12
|
+
|
|
13
|
+
model = create_model("sentence-transformers/all-MiniLM-L6-v2")
|
|
14
|
+
outputs = model.run(None, {"input_ids": ids, "attention_mask": mask})
|
|
15
|
+
"""
|
|
16
|
+
from typing import Any, List
|
|
17
|
+
|
|
18
|
+
from floudsonnx.api.client import FloudsOnnxClient, get_default_client
|
|
19
|
+
from floudsonnx.config.model_config import ModelConfig
|
|
20
|
+
from floudsonnx.config.settings import FloudsOnnxSettings
|
|
21
|
+
from floudsonnx.exceptions import (
|
|
22
|
+
ExporterNotInstalledError,
|
|
23
|
+
ExportError,
|
|
24
|
+
FloudsOnnxError,
|
|
25
|
+
ManifestError,
|
|
26
|
+
ModelLoadError,
|
|
27
|
+
ModelNotFoundError,
|
|
28
|
+
OptimumNotInstalledError,
|
|
29
|
+
StrategyError,
|
|
30
|
+
TokenizerError,
|
|
31
|
+
)
|
|
32
|
+
from floudsonnx.runtime.loader import LoadedModel
|
|
33
|
+
from floudsonnx.runtime.strategy import SessionStrategy
|
|
34
|
+
from floudsonnx.store.manifest import ModelManifest
|
|
35
|
+
|
|
36
|
+
__version__ = "1.0.0"
|
|
37
|
+
__all__ = [
|
|
38
|
+
# Primary API
|
|
39
|
+
"create_model",
|
|
40
|
+
"load_model",
|
|
41
|
+
"pull",
|
|
42
|
+
"list_models",
|
|
43
|
+
"remove_model",
|
|
44
|
+
# Client / settings
|
|
45
|
+
"FloudsOnnxClient",
|
|
46
|
+
"FloudsOnnxSettings",
|
|
47
|
+
"get_default_client",
|
|
48
|
+
# Types
|
|
49
|
+
"ModelConfig",
|
|
50
|
+
"LoadedModel",
|
|
51
|
+
"SessionStrategy",
|
|
52
|
+
# Exceptions
|
|
53
|
+
"FloudsOnnxError",
|
|
54
|
+
"ModelNotFoundError",
|
|
55
|
+
"ModelLoadError",
|
|
56
|
+
"TokenizerError",
|
|
57
|
+
"ExportError",
|
|
58
|
+
"ExporterNotInstalledError",
|
|
59
|
+
"OptimumNotInstalledError",
|
|
60
|
+
"ManifestError",
|
|
61
|
+
"StrategyError",
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# ── Top-level convenience functions (delegate to default client singleton) ────
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def create_model(model_name: str, model_for: str = "fe", **kwargs: Any) -> LoadedModel:
|
|
69
|
+
"""Pull (auto-export if missing) + load session. Primary entry-point."""
|
|
70
|
+
return get_default_client().create_model(model_name, model_for, **kwargs)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def load_model(model_name: str, model_for: str = "fe") -> LoadedModel:
|
|
74
|
+
"""Load a model already on disk (no auto-pull)."""
|
|
75
|
+
return get_default_client().load_model(model_name, model_for)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def pull(model_name: str, model_for: str = "fe", **kwargs: Any) -> ModelManifest:
|
|
79
|
+
"""Export model to disk only (no session load)."""
|
|
80
|
+
return get_default_client().pull(model_name, model_for, **kwargs)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def list_models() -> List[ModelManifest]:
|
|
84
|
+
"""Return manifests for all locally stored models."""
|
|
85
|
+
return get_default_client().list()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def remove_model(model_name: str, model_for: str = "fe") -> bool:
|
|
89
|
+
"""Delete model from disk and evict from session pool."""
|
|
90
|
+
return get_default_client().remove(model_name, model_for)
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# Copyright (c) 2026 Goutam Malakar. All rights reserved.
|
|
3
|
+
# Licensed under the Apache License, Version 2.0.
|
|
4
|
+
# =============================================================================
|
|
5
|
+
from floudsonnx.api.client import FloudsOnnxClient, get_default_client
|
|
6
|
+
|
|
7
|
+
__all__ = ["FloudsOnnxClient", "get_default_client"]
|
floudsonnx/api/client.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# Copyright (c) 2026 Goutam Malakar. All rights reserved.
|
|
3
|
+
# Licensed under the Apache License, Version 2.0.
|
|
4
|
+
# =============================================================================
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import threading
|
|
8
|
+
from typing import Any, List, Optional
|
|
9
|
+
|
|
10
|
+
from floudsonnx.config.model_config import ModelConfig
|
|
11
|
+
from floudsonnx.config.settings import FloudsOnnxSettings
|
|
12
|
+
from floudsonnx.runtime.loader import LoadedModel, ModelLoader
|
|
13
|
+
from floudsonnx.store.manifest import ModelManifest
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FloudsOnnxClient:
|
|
17
|
+
"""Friendly facade over ModelLoader."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, settings: Optional[FloudsOnnxSettings] = None) -> None:
|
|
20
|
+
self._loader = ModelLoader(settings)
|
|
21
|
+
|
|
22
|
+
def pull(
|
|
23
|
+
self,
|
|
24
|
+
model_name: str,
|
|
25
|
+
model_for: str = "fe",
|
|
26
|
+
task: Optional[str] = None,
|
|
27
|
+
force: bool = False,
|
|
28
|
+
config: Optional[ModelConfig] = None,
|
|
29
|
+
optimize: Optional[bool] = None,
|
|
30
|
+
optimization_level: Optional[int] = None,
|
|
31
|
+
opset_version: Optional[int] = None,
|
|
32
|
+
device: str = "cpu",
|
|
33
|
+
framework: Optional[str] = None,
|
|
34
|
+
library: Optional[str] = None,
|
|
35
|
+
normalize_embeddings: Optional[bool] = None,
|
|
36
|
+
trust_remote_code: bool = False,
|
|
37
|
+
use_external_data_format: bool = False,
|
|
38
|
+
use_subprocess: Optional[bool] = None,
|
|
39
|
+
use_fallback_if_failed: bool = False,
|
|
40
|
+
merge: bool = False,
|
|
41
|
+
skip_validator: bool = False,
|
|
42
|
+
hf_token: Optional[str] = None,
|
|
43
|
+
**kwargs: Any,
|
|
44
|
+
) -> ModelManifest:
|
|
45
|
+
return self._loader.pull(
|
|
46
|
+
model_name,
|
|
47
|
+
model_for,
|
|
48
|
+
task=task,
|
|
49
|
+
force=force,
|
|
50
|
+
config=config,
|
|
51
|
+
optimize=optimize,
|
|
52
|
+
optimization_level=optimization_level,
|
|
53
|
+
opset_version=opset_version,
|
|
54
|
+
device=device,
|
|
55
|
+
framework=framework,
|
|
56
|
+
library=library,
|
|
57
|
+
normalize_embeddings=normalize_embeddings,
|
|
58
|
+
trust_remote_code=trust_remote_code,
|
|
59
|
+
use_external_data_format=use_external_data_format,
|
|
60
|
+
use_subprocess=use_subprocess,
|
|
61
|
+
use_fallback_if_failed=use_fallback_if_failed,
|
|
62
|
+
merge=merge,
|
|
63
|
+
skip_validator=skip_validator,
|
|
64
|
+
hf_token=hf_token,
|
|
65
|
+
**kwargs,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def list(self) -> List[ModelManifest]:
|
|
69
|
+
return self._loader.list()
|
|
70
|
+
|
|
71
|
+
def remove(self, model_name: str, model_for: str = "fe") -> bool:
|
|
72
|
+
return self._loader.remove(model_name, model_for)
|
|
73
|
+
|
|
74
|
+
def create_model(
|
|
75
|
+
self,
|
|
76
|
+
model_name: str,
|
|
77
|
+
model_for: str = "fe",
|
|
78
|
+
task: Optional[str] = None,
|
|
79
|
+
force_pull: bool = False,
|
|
80
|
+
config: Optional[ModelConfig] = None,
|
|
81
|
+
**kwargs: Any,
|
|
82
|
+
) -> LoadedModel:
|
|
83
|
+
return self._loader.create_model(model_name, model_for, task=task, force_pull=force_pull, config=config, **kwargs)
|
|
84
|
+
|
|
85
|
+
def load_model(self, model_name: str, model_for: str = "fe") -> LoadedModel:
|
|
86
|
+
return self._loader.load_model(model_name, model_for)
|
|
87
|
+
|
|
88
|
+
def reload(self, model_name: str, model_for: str = "fe") -> LoadedModel:
|
|
89
|
+
return self._loader.reload(model_name, model_for)
|
|
90
|
+
|
|
91
|
+
def unload(self, model_name: str, model_for: str = "fe") -> bool:
|
|
92
|
+
return self._loader.unload(model_name, model_for)
|
|
93
|
+
|
|
94
|
+
def is_loaded(self, model_name: str, model_for: str = "fe") -> bool:
|
|
95
|
+
return self._loader.is_loaded(model_name, model_for)
|
|
96
|
+
|
|
97
|
+
def cache_stats(self) -> dict[str, Any]:
|
|
98
|
+
return self._loader.cache_stats()
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def settings(self) -> FloudsOnnxSettings:
|
|
102
|
+
return self._loader._settings
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
_default_client: Optional[FloudsOnnxClient] = None
|
|
106
|
+
_default_client_lock = threading.Lock()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def get_default_client() -> FloudsOnnxClient:
|
|
110
|
+
global _default_client
|
|
111
|
+
if _default_client is None:
|
|
112
|
+
with _default_client_lock:
|
|
113
|
+
if _default_client is None:
|
|
114
|
+
_default_client = FloudsOnnxClient()
|
|
115
|
+
return _default_client
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# Copyright (c) 2026 Goutam Malakar. All rights reserved.
|
|
3
|
+
# Licensed under the Apache License, Version 2.0.
|
|
4
|
+
# =============================================================================
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from fastapi import APIRouter
|
|
11
|
+
|
|
12
|
+
router: Any = APIRouter(tags=["health"])
|
|
13
|
+
|
|
14
|
+
@router.get("/health")
|
|
15
|
+
async def health():
|
|
16
|
+
return {"status": "ok", "service": "floudsonnx"}
|
|
17
|
+
|
|
18
|
+
except ImportError:
|
|
19
|
+
router = None
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# Copyright (c) 2026 Goutam Malakar. All rights reserved.
|
|
3
|
+
# Licensed under the Apache License, Version 2.0.
|
|
4
|
+
# =============================================================================
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Annotated, Any, Optional
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from fastapi import APIRouter, HTTPException, Query
|
|
11
|
+
from pydantic import BaseModel as _BM
|
|
12
|
+
|
|
13
|
+
from floudsonnx.api.client import get_default_client
|
|
14
|
+
from floudsonnx.exceptions import FloudsOnnxError
|
|
15
|
+
|
|
16
|
+
router = APIRouter(tags=["models"])
|
|
17
|
+
|
|
18
|
+
class PullRequest(_BM):
|
|
19
|
+
model_name: str
|
|
20
|
+
model_for: str = "fe"
|
|
21
|
+
task: Optional[str] = None
|
|
22
|
+
force: bool = False
|
|
23
|
+
optimize: Optional[bool] = None
|
|
24
|
+
trust_remote_code: bool = False
|
|
25
|
+
use_external_data_format: bool = False
|
|
26
|
+
use_fallback_if_failed: bool = False
|
|
27
|
+
hf_token: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
class LoadRequest(_BM):
|
|
30
|
+
model_name: str
|
|
31
|
+
model_for: str = "fe"
|
|
32
|
+
|
|
33
|
+
@router.get("/models")
|
|
34
|
+
async def list_models() -> list[dict[str, Any]]:
|
|
35
|
+
return [m.model_dump(mode="json") for m in get_default_client().list()]
|
|
36
|
+
|
|
37
|
+
@router.get("/models/{name:path}")
|
|
38
|
+
async def get_model(name: str, model_for: Annotated[str, Query()] = "fe") -> dict[str, Any]:
|
|
39
|
+
manifest = get_default_client()._loader._registry.get_manifest(name, model_for)
|
|
40
|
+
if manifest is None:
|
|
41
|
+
raise HTTPException(status_code=404, detail=f"Model '{name}' not found")
|
|
42
|
+
return manifest.model_dump(mode="json")
|
|
43
|
+
|
|
44
|
+
@router.post("/models/pull")
|
|
45
|
+
async def pull_model(req: PullRequest) -> dict[str, Any]:
|
|
46
|
+
try:
|
|
47
|
+
manifest = get_default_client().pull(
|
|
48
|
+
req.model_name,
|
|
49
|
+
req.model_for,
|
|
50
|
+
task=req.task,
|
|
51
|
+
force=req.force,
|
|
52
|
+
optimize=req.optimize,
|
|
53
|
+
trust_remote_code=req.trust_remote_code,
|
|
54
|
+
use_external_data_format=req.use_external_data_format,
|
|
55
|
+
use_fallback_if_failed=req.use_fallback_if_failed,
|
|
56
|
+
hf_token=req.hf_token,
|
|
57
|
+
)
|
|
58
|
+
return manifest.model_dump(mode="json")
|
|
59
|
+
except FloudsOnnxError as exc:
|
|
60
|
+
raise HTTPException(status_code=500, detail=str(exc))
|
|
61
|
+
|
|
62
|
+
@router.post("/models/load")
|
|
63
|
+
async def load_model(req: LoadRequest) -> dict[str, str]:
|
|
64
|
+
try:
|
|
65
|
+
loaded = get_default_client().load_model(req.model_name, req.model_for)
|
|
66
|
+
return {"model_name": loaded.model_name, "model_for": loaded.model_for, "strategy": loaded.session_strategy.value, "status": "loaded"}
|
|
67
|
+
except FloudsOnnxError as exc:
|
|
68
|
+
raise HTTPException(status_code=500, detail=str(exc))
|
|
69
|
+
|
|
70
|
+
@router.post("/models/reload")
|
|
71
|
+
async def reload_model(req: LoadRequest) -> dict[str, str]:
|
|
72
|
+
try:
|
|
73
|
+
loaded = get_default_client().reload(req.model_name, req.model_for)
|
|
74
|
+
return {"model_name": loaded.model_name, "strategy": loaded.session_strategy.value, "status": "reloaded"}
|
|
75
|
+
except FloudsOnnxError as exc:
|
|
76
|
+
raise HTTPException(status_code=500, detail=str(exc))
|
|
77
|
+
|
|
78
|
+
@router.post("/models/unload")
|
|
79
|
+
async def unload_model(req: LoadRequest) -> dict[str, Any]:
|
|
80
|
+
evicted = get_default_client().unload(req.model_name, req.model_for)
|
|
81
|
+
return {"model_name": req.model_name, "evicted": evicted}
|
|
82
|
+
|
|
83
|
+
@router.delete("/models/{name:path}")
|
|
84
|
+
async def remove_model(name: str, model_for: Annotated[str, Query()] = "fe") -> dict[str, Any]:
|
|
85
|
+
removed = get_default_client().remove(name, model_for)
|
|
86
|
+
return {"model_name": name, "removed": removed}
|
|
87
|
+
|
|
88
|
+
@router.get("/stats")
|
|
89
|
+
async def stats() -> dict[str, Any]:
|
|
90
|
+
return get_default_client().cache_stats()
|
|
91
|
+
|
|
92
|
+
except ImportError:
|
|
93
|
+
router = None
|
floudsonnx/api/server.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# Copyright (c) 2026 Goutam Malakar. All rights reserved.
|
|
3
|
+
# Licensed under the Apache License, Version 2.0.
|
|
4
|
+
# =============================================================================
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from floudsonnx.exceptions import ServerNotInstalledError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_app() -> Any:
|
|
13
|
+
try:
|
|
14
|
+
from fastapi import FastAPI
|
|
15
|
+
except ImportError as exc:
|
|
16
|
+
raise ServerNotInstalledError() from exc
|
|
17
|
+
from floudsonnx.api.routers import health, models
|
|
18
|
+
|
|
19
|
+
app = FastAPI(title="floudsonnx", description="ONNX model store and runtime REST API", version="1.0.0")
|
|
20
|
+
app.include_router(health.router)
|
|
21
|
+
app.include_router(models.router, prefix="/api/v1")
|
|
22
|
+
return app
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def run_server(host: str = "127.0.0.1", port: int = 19720, reload: bool = False) -> None:
|
|
26
|
+
try:
|
|
27
|
+
import uvicorn
|
|
28
|
+
except ImportError as exc:
|
|
29
|
+
raise ServerNotInstalledError() from exc
|
|
30
|
+
uvicorn.run("floudsonnx.api.server:create_app", factory=True, host=host, port=port, reload=reload)
|
floudsonnx/cli/main.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# Copyright (c) 2026 Goutam Malakar. All rights reserved.
|
|
3
|
+
# Licensed under the Apache License, Version 2.0.
|
|
4
|
+
# =============================================================================
|
|
5
|
+
"""
|
|
6
|
+
floudsonnx CLI — entry-point: floudsonnx <command> [options]
|
|
7
|
+
|
|
8
|
+
Commands:
|
|
9
|
+
pull <model_name> [--for fe] [--task T] [--optimize] [--force]
|
|
10
|
+
[--trust-remote-code] [--use-external-data-format]
|
|
11
|
+
[--use-fallback-if-failed] [--hf-token T]
|
|
12
|
+
list
|
|
13
|
+
info <model_name> [--for fe]
|
|
14
|
+
remove <model_name> [--for fe]
|
|
15
|
+
reload <model_name> [--for fe]
|
|
16
|
+
stats
|
|
17
|
+
serve [--host H] [--port P]
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import json
|
|
23
|
+
import sys
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _client() -> Any:
|
|
28
|
+
from floudsonnx.api.client import get_default_client
|
|
29
|
+
|
|
30
|
+
return get_default_client()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def cmd_pull(args: argparse.Namespace) -> int:
|
|
34
|
+
print(f"Pulling '{args.model_name}' (model_for={args.model_for}, optimize={args.optimize}) ...")
|
|
35
|
+
try:
|
|
36
|
+
manifest = _client().pull(
|
|
37
|
+
args.model_name,
|
|
38
|
+
model_for=args.model_for,
|
|
39
|
+
task=args.task or None,
|
|
40
|
+
force=args.force,
|
|
41
|
+
optimize=args.optimize,
|
|
42
|
+
optimization_level=args.optimization_level,
|
|
43
|
+
opset_version=args.opset_version,
|
|
44
|
+
device=args.device,
|
|
45
|
+
framework=args.framework or None,
|
|
46
|
+
library=args.library or None,
|
|
47
|
+
normalize_embeddings=args.normalize_embeddings,
|
|
48
|
+
trust_remote_code=args.trust_remote_code,
|
|
49
|
+
use_external_data_format=args.use_external_data_format,
|
|
50
|
+
use_subprocess=args.use_subprocess,
|
|
51
|
+
use_fallback_if_failed=args.use_fallback_if_failed,
|
|
52
|
+
merge=args.merge,
|
|
53
|
+
skip_validator=args.skip_validator,
|
|
54
|
+
hf_token=args.hf_token or None,
|
|
55
|
+
)
|
|
56
|
+
print(f"OK {manifest.model_name} [{manifest.model_for}] pulled_at={manifest.pulled_at}")
|
|
57
|
+
print(f" strategy : {manifest.session_strategy}")
|
|
58
|
+
print(f" onnx : {', '.join(manifest.onnx_files) or 'none'}")
|
|
59
|
+
return 0
|
|
60
|
+
except Exception as exc:
|
|
61
|
+
print(f"ERROR: {exc}", file=sys.stderr)
|
|
62
|
+
return 1
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def cmd_list(args: argparse.Namespace) -> int:
|
|
66
|
+
manifests = _client().list()
|
|
67
|
+
if not manifests:
|
|
68
|
+
print("No models in local store.")
|
|
69
|
+
return 0
|
|
70
|
+
fmt = "{:<50} {:<10} {:<25} {}"
|
|
71
|
+
print(fmt.format("MODEL", "FOR", "PULLED AT", "STRATEGY"))
|
|
72
|
+
print("-" * 105)
|
|
73
|
+
for m in manifests:
|
|
74
|
+
print(fmt.format(m.model_name[:48], m.model_for, m.pulled_at[:24], m.session_strategy))
|
|
75
|
+
return 0
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def cmd_info(args: argparse.Namespace) -> int:
|
|
79
|
+
manifest = _client()._loader._registry.get_manifest(args.model_name, args.model_for)
|
|
80
|
+
if manifest is None:
|
|
81
|
+
print(f"Model '{args.model_name}' not found.", file=sys.stderr)
|
|
82
|
+
return 1
|
|
83
|
+
print(json.dumps(manifest.model_dump(mode="json"), indent=2))
|
|
84
|
+
return 0
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def cmd_remove(args: argparse.Namespace) -> int:
|
|
88
|
+
removed = _client().remove(args.model_name, args.model_for)
|
|
89
|
+
print(f"Removed '{args.model_name}'" if removed else f"'{args.model_name}' was not in the local store.")
|
|
90
|
+
return 0
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def cmd_reload(args: argparse.Namespace) -> int:
|
|
94
|
+
try:
|
|
95
|
+
loaded = _client().reload(args.model_name, args.model_for)
|
|
96
|
+
print(f"Reloaded '{loaded.model_name}' — strategy={loaded.session_strategy.value}")
|
|
97
|
+
return 0
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
print(f"ERROR: {exc}", file=sys.stderr)
|
|
100
|
+
return 1
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def cmd_stats(args: argparse.Namespace) -> int:
|
|
104
|
+
print(json.dumps(_client().cache_stats(), indent=2))
|
|
105
|
+
return 0
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def cmd_serve(args: argparse.Namespace) -> int:
|
|
109
|
+
try:
|
|
110
|
+
from floudsonnx.api.server import run_server
|
|
111
|
+
|
|
112
|
+
print(f"Starting floudsonnx server on {args.host}:{args.port} ...")
|
|
113
|
+
run_server(host=args.host, port=args.port)
|
|
114
|
+
return 0
|
|
115
|
+
except Exception as exc:
|
|
116
|
+
print(f"ERROR: {exc}", file=sys.stderr)
|
|
117
|
+
return 1
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
121
|
+
parser = argparse.ArgumentParser(prog="floudsonnx", description="Ollama-style ONNX model store and runtime")
|
|
122
|
+
sub = parser.add_subparsers(dest="command", metavar="<command>")
|
|
123
|
+
sub.required = True
|
|
124
|
+
|
|
125
|
+
# pull
|
|
126
|
+
p = sub.add_parser("pull", help="Pull (export) a model to the local store")
|
|
127
|
+
p.add_argument("model_name")
|
|
128
|
+
p.add_argument("--for", dest="model_for", default="fe", help="fe|s2s|sc|ranker|llm (default: fe)")
|
|
129
|
+
p.add_argument("--task", default="", help="Optimum export task")
|
|
130
|
+
p.add_argument("--optimize", action="store_true", default=True)
|
|
131
|
+
p.add_argument("--no-optimize", dest="optimize", action="store_false")
|
|
132
|
+
p.add_argument("--optimization-level", type=int, default=99)
|
|
133
|
+
p.add_argument("--opset-version", type=int, default=None)
|
|
134
|
+
p.add_argument("--device", default="cpu")
|
|
135
|
+
p.add_argument("--framework", default="")
|
|
136
|
+
p.add_argument("--library", default="")
|
|
137
|
+
p.add_argument("--normalize-embeddings", action="store_true")
|
|
138
|
+
p.add_argument("--force", action="store_true")
|
|
139
|
+
p.add_argument("--trust-remote-code", action="store_true")
|
|
140
|
+
p.add_argument("--use-external-data-format", action="store_true")
|
|
141
|
+
p.add_argument("--use-subprocess", action="store_true")
|
|
142
|
+
p.add_argument("--use-fallback-if-failed", action="store_true")
|
|
143
|
+
p.add_argument("--merge", action="store_true")
|
|
144
|
+
p.add_argument("--skip-validator", action="store_true")
|
|
145
|
+
p.add_argument("--hf-token", default="", help="HuggingFace API token")
|
|
146
|
+
p.set_defaults(func=cmd_pull)
|
|
147
|
+
|
|
148
|
+
# list
|
|
149
|
+
p = sub.add_parser("list", help="List all locally stored models")
|
|
150
|
+
p.set_defaults(func=cmd_list)
|
|
151
|
+
|
|
152
|
+
# info
|
|
153
|
+
p = sub.add_parser("info", help="Show manifest for a model")
|
|
154
|
+
p.add_argument("model_name")
|
|
155
|
+
p.add_argument("--for", dest="model_for", default="fe")
|
|
156
|
+
p.set_defaults(func=cmd_info)
|
|
157
|
+
|
|
158
|
+
# remove
|
|
159
|
+
p = sub.add_parser("remove", help="Delete a model from the local store")
|
|
160
|
+
p.add_argument("model_name")
|
|
161
|
+
p.add_argument("--for", dest="model_for", default="fe")
|
|
162
|
+
p.set_defaults(func=cmd_remove)
|
|
163
|
+
|
|
164
|
+
# reload
|
|
165
|
+
p = sub.add_parser("reload", help="Evict and re-load a model session from disk")
|
|
166
|
+
p.add_argument("model_name")
|
|
167
|
+
p.add_argument("--for", dest="model_for", default="fe")
|
|
168
|
+
p.set_defaults(func=cmd_reload)
|
|
169
|
+
|
|
170
|
+
# stats
|
|
171
|
+
p = sub.add_parser("stats", help="Show session cache statistics")
|
|
172
|
+
p.set_defaults(func=cmd_stats)
|
|
173
|
+
|
|
174
|
+
# serve
|
|
175
|
+
p = sub.add_parser("serve", help="Start the HTTP server")
|
|
176
|
+
p.add_argument("--host", default="127.0.0.1")
|
|
177
|
+
p.add_argument("--port", type=int, default=19720)
|
|
178
|
+
p.set_defaults(func=cmd_serve)
|
|
179
|
+
|
|
180
|
+
return parser
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def main() -> None:
|
|
184
|
+
parser = build_parser()
|
|
185
|
+
args = parser.parse_args()
|
|
186
|
+
sys.exit(args.func(args))
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
if __name__ == "__main__":
|
|
190
|
+
main()
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# Copyright (c) 2026 Goutam Malakar. All rights reserved.
|
|
3
|
+
# Licensed under the Apache License, Version 2.0.
|
|
4
|
+
# =============================================================================
|
|
5
|
+
from floudsonnx.config.model_config import DecoderInputNames, InputNames, ModelConfig, OutputNames
|
|
6
|
+
from floudsonnx.config.settings import FloudsOnnxSettings
|
|
7
|
+
|
|
8
|
+
__all__ = ["FloudsOnnxSettings", "ModelConfig", "InputNames", "OutputNames", "DecoderInputNames"]
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# Copyright (c) 2026 Goutam Malakar. All rights reserved.
|
|
3
|
+
# Licensed under the Apache License, Version 2.0.
|
|
4
|
+
# =============================================================================
|
|
5
|
+
"""
|
|
6
|
+
floudsonnx.config.model_config
|
|
7
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
8
|
+
Self-contained ModelConfig — clean replacement for OnnxConfig from model_service.
|
|
9
|
+
All fields mirror OnnxConfig for manifest compatibility.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class InputNames(BaseModel):
|
|
19
|
+
input: str = "input_ids"
|
|
20
|
+
mask: str = "attention_mask"
|
|
21
|
+
position: Optional[str] = None
|
|
22
|
+
token_type: Optional[str] = None
|
|
23
|
+
decoder_input: str = "decoder_input_ids"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class OutputNames(BaseModel):
|
|
27
|
+
output: str = "last_hidden_state"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DecoderInputNames(BaseModel):
|
|
31
|
+
input: str = "input_ids"
|
|
32
|
+
mask: str = "encoder_attention_mask"
|
|
33
|
+
encoder_output: str = "encoder_hidden_states"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ModelConfig(BaseModel):
|
|
37
|
+
model_config = ConfigDict(extra="allow")
|
|
38
|
+
|
|
39
|
+
# ── Identity ──────────────────────────────────────────────────────────────
|
|
40
|
+
model_name: str
|
|
41
|
+
model_for: str = "fe" # fe | s2s | sc | ranker | llm
|
|
42
|
+
tasks: List[str] = Field(default_factory=list)
|
|
43
|
+
model_folder_name: Optional[str] = None
|
|
44
|
+
|
|
45
|
+
# ── Encoder / embedding ───────────────────────────────────────────────────
|
|
46
|
+
dimension: Optional[int] = None
|
|
47
|
+
native_dimension: Optional[int] = None
|
|
48
|
+
max_length: int = 256
|
|
49
|
+
min_length: int = 0
|
|
50
|
+
pooling_strategy: str = "mean"
|
|
51
|
+
normalize: bool = False
|
|
52
|
+
force_pooling: bool = True
|
|
53
|
+
lowercase: bool = True
|
|
54
|
+
remove_emojis: bool = False
|
|
55
|
+
legacy_tokenizer: bool = False
|
|
56
|
+
chunk_logic: str = "sentence"
|
|
57
|
+
chunk_overlap: int = 1
|
|
58
|
+
chunk_size: Optional[int] = None
|
|
59
|
+
|
|
60
|
+
# ── ONNX artifact names ───────────────────────────────────────────────────
|
|
61
|
+
encoder_onnx_model: str = "model.onnx"
|
|
62
|
+
optimized_onnx_model: str = "model_optimized.onnx"
|
|
63
|
+
decoder_onnx_model: str = "decoder_model.onnx"
|
|
64
|
+
decoder_onnx_model_with_past: Optional[str] = None
|
|
65
|
+
|
|
66
|
+
# ── Architecture flags ────────────────────────────────────────────────────
|
|
67
|
+
encoder_only: bool = False
|
|
68
|
+
decoder_only: bool = False
|
|
69
|
+
use_seq2seqlm: bool = False
|
|
70
|
+
merged_with_past: bool = False
|
|
71
|
+
use_cache: Optional[bool] = None
|
|
72
|
+
|
|
73
|
+
# ── I/O name mappings ─────────────────────────────────────────────────────
|
|
74
|
+
inputnames: InputNames = Field(default_factory=InputNames)
|
|
75
|
+
outputnames: OutputNames = Field(default_factory=OutputNames)
|
|
76
|
+
decoder_inputnames: DecoderInputNames = Field(default_factory=DecoderInputNames)
|
|
77
|
+
|
|
78
|
+
# ── Generation parameters ─────────────────────────────────────────────────
|
|
79
|
+
num_beams: int = 4
|
|
80
|
+
temperature: float = 0.0
|
|
81
|
+
top_k: Optional[int] = None
|
|
82
|
+
top_p: Optional[float] = None
|
|
83
|
+
repetition_penalty: Optional[float] = None
|
|
84
|
+
early_stopping: bool = True
|
|
85
|
+
max_new_tokens: Optional[int] = 512
|
|
86
|
+
forced_bos_token_id: Optional[int] = None
|
|
87
|
+
eos_token_id: Optional[int] = None
|
|
88
|
+
bos_token_id: Optional[int] = None
|
|
89
|
+
pad_token_id: int = 0
|
|
90
|
+
vocab_size: Optional[int] = None
|
|
91
|
+
prepend_text: str = "summarize: "
|
|
92
|
+
|
|
93
|
+
# ── Chat / LLM ────────────────────────────────────────────────────────────
|
|
94
|
+
model_family: Optional[str] = None
|
|
95
|
+
chat_template: Optional[str] = None
|
|
96
|
+
extract_assistant_only: bool = False
|
|
97
|
+
assistant_prefix: str = "assistant:"
|
|
98
|
+
|
|
99
|
+
# ── Quantization ─────────────────────────────────────────────────────────
|
|
100
|
+
quantize: bool = False
|
|
101
|
+
quantize_type: str = "int8"
|
|
102
|
+
|
|
103
|
+
# ── Special token file names ──────────────────────────────────────────────
|
|
104
|
+
special_tokens_map_path: str = "special_tokens_map.json"
|
|
105
|
+
generation_config_path: str = "generation_config.json"
|
|
106
|
+
|
|
107
|
+
# ── Extra / pass-through ──────────────────────────────────────────────────
|
|
108
|
+
extra: Dict[str, Any] = Field(default_factory=dict)
|