museq 0.45.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- muse/__init__.py +87 -0
- muse/admin/__init__.py +17 -0
- muse/admin/auth.py +63 -0
- muse/admin/client.py +195 -0
- muse/admin/jobs.py +165 -0
- muse/admin/operations.py +773 -0
- muse/admin/routes/__init__.py +32 -0
- muse/admin/routes/jobs.py +50 -0
- muse/admin/routes/memory.py +187 -0
- muse/admin/routes/models.py +233 -0
- muse/admin/routes/workers.py +95 -0
- muse/cli.py +775 -0
- muse/cli_impl/__init__.py +0 -0
- muse/cli_impl/console.py +98 -0
- muse/cli_impl/gateway.py +633 -0
- muse/cli_impl/idle_sweeper.py +294 -0
- muse/cli_impl/load_director.py +874 -0
- muse/cli_impl/mcp_server.py +86 -0
- muse/cli_impl/models_info_display.py +350 -0
- muse/cli_impl/models_list.py +458 -0
- muse/cli_impl/probe.py +297 -0
- muse/cli_impl/probe_worker.py +268 -0
- muse/cli_impl/refresh.py +323 -0
- muse/cli_impl/search.py +143 -0
- muse/cli_impl/serve.py +23 -0
- muse/cli_impl/supervisor.py +774 -0
- muse/cli_impl/worker.py +118 -0
- muse/core/__init__.py +0 -0
- muse/core/catalog.py +832 -0
- muse/core/chat_formats.py +92 -0
- muse/core/curated.py +194 -0
- muse/core/discovery.py +392 -0
- muse/core/errors.py +28 -0
- muse/core/image_preprocessing.py +231 -0
- muse/core/install.py +58 -0
- muse/core/memory_probe.py +108 -0
- muse/core/registry.py +127 -0
- muse/core/resolvers.py +185 -0
- muse/core/resolvers_hf.py +114 -0
- muse/core/runtime_helpers.py +132 -0
- muse/core/server.py +237 -0
- muse/core/venv.py +169 -0
- muse/mcp/__init__.py +12 -0
- muse/mcp/binary_io.py +146 -0
- muse/mcp/client.py +237 -0
- muse/mcp/server.py +242 -0
- muse/mcp/tools/__init__.py +63 -0
- muse/mcp/tools/admin.py +438 -0
- muse/mcp/tools/inference_audio.py +259 -0
- muse/mcp/tools/inference_image.py +384 -0
- muse/mcp/tools/inference_text.py +234 -0
- muse/mcp/tools/inference_video.py +85 -0
- muse/modalities/__init__.py +0 -0
- muse/modalities/audio_classification/__init__.py +72 -0
- muse/modalities/audio_classification/client.py +69 -0
- muse/modalities/audio_classification/codec.py +37 -0
- muse/modalities/audio_classification/hf.py +121 -0
- muse/modalities/audio_classification/protocol.py +40 -0
- muse/modalities/audio_classification/routes.py +120 -0
- muse/modalities/audio_classification/runtimes/__init__.py +12 -0
- muse/modalities/audio_classification/runtimes/hf_audio_classifier.py +177 -0
- muse/modalities/audio_embedding/__init__.py +74 -0
- muse/modalities/audio_embedding/client.py +143 -0
- muse/modalities/audio_embedding/codec.py +18 -0
- muse/modalities/audio_embedding/hf.py +197 -0
- muse/modalities/audio_embedding/protocol.py +55 -0
- muse/modalities/audio_embedding/routes.py +166 -0
- muse/modalities/audio_embedding/runtimes/__init__.py +0 -0
- muse/modalities/audio_embedding/runtimes/transformers_audio.py +348 -0
- muse/modalities/audio_generation/__init__.py +48 -0
- muse/modalities/audio_generation/client.py +125 -0
- muse/modalities/audio_generation/codec.py +174 -0
- muse/modalities/audio_generation/hf.py +187 -0
- muse/modalities/audio_generation/protocol.py +56 -0
- muse/modalities/audio_generation/routes.py +130 -0
- muse/modalities/audio_generation/runtimes/__init__.py +0 -0
- muse/modalities/audio_generation/runtimes/stable_audio.py +224 -0
- muse/modalities/audio_speech/__init__.py +36 -0
- muse/modalities/audio_speech/alignment.py +168 -0
- muse/modalities/audio_speech/backends/__init__.py +0 -0
- muse/modalities/audio_speech/backends/base.py +294 -0
- muse/modalities/audio_speech/backends/transformers.py +46 -0
- muse/modalities/audio_speech/client.py +110 -0
- muse/modalities/audio_speech/codec.py +57 -0
- muse/modalities/audio_speech/decode_only.py +164 -0
- muse/modalities/audio_speech/encoded.py +229 -0
- muse/modalities/audio_speech/protocol.py +85 -0
- muse/modalities/audio_speech/routes.py +142 -0
- muse/modalities/audio_speech/tts.py +353 -0
- muse/modalities/audio_speech/utils/__init__.py +0 -0
- muse/modalities/audio_speech/utils/text_normalizer.py +380 -0
- muse/modalities/audio_speech/utils/text_splitter.py +76 -0
- muse/modalities/audio_speech/vocos/decoder.py +42 -0
- muse/modalities/audio_speech/vocos/heads.py +41 -0
- muse/modalities/audio_speech/vocos/migrate_weights.py +127 -0
- muse/modalities/audio_speech/vocos/models.py +60 -0
- muse/modalities/audio_speech/vocos/modules.py +68 -0
- muse/modalities/audio_speech/vocos/spectral_ops.py +94 -0
- muse/modalities/audio_transcription/__init__.py +74 -0
- muse/modalities/audio_transcription/client.py +141 -0
- muse/modalities/audio_transcription/codec.py +117 -0
- muse/modalities/audio_transcription/hf.py +104 -0
- muse/modalities/audio_transcription/protocol.py +63 -0
- muse/modalities/audio_transcription/routes.py +196 -0
- muse/modalities/audio_transcription/runtimes/__init__.py +1 -0
- muse/modalities/audio_transcription/runtimes/faster_whisper.py +127 -0
- muse/modalities/chat_completion/__init__.py +46 -0
- muse/modalities/chat_completion/client.py +70 -0
- muse/modalities/chat_completion/codec.py +51 -0
- muse/modalities/chat_completion/hf.py +249 -0
- muse/modalities/chat_completion/protocol.py +88 -0
- muse/modalities/chat_completion/routes.py +314 -0
- muse/modalities/chat_completion/runtimes/__init__.py +0 -0
- muse/modalities/chat_completion/runtimes/llama_cpp.py +168 -0
- muse/modalities/chat_completion/runtimes/transformers_vlm.py +284 -0
- muse/modalities/embedding_text/__init__.py +32 -0
- muse/modalities/embedding_text/client.py +69 -0
- muse/modalities/embedding_text/codec.py +41 -0
- muse/modalities/embedding_text/hf.py +106 -0
- muse/modalities/embedding_text/protocol.py +44 -0
- muse/modalities/embedding_text/routes.py +133 -0
- muse/modalities/embedding_text/runtimes/__init__.py +0 -0
- muse/modalities/embedding_text/runtimes/sentence_transformers.py +157 -0
- muse/modalities/image_animation/__init__.py +34 -0
- muse/modalities/image_animation/client.py +62 -0
- muse/modalities/image_animation/codec.py +98 -0
- muse/modalities/image_animation/hf.py +138 -0
- muse/modalities/image_animation/protocol.py +50 -0
- muse/modalities/image_animation/routes.py +142 -0
- muse/modalities/image_animation/runtimes/__init__.py +0 -0
- muse/modalities/image_animation/runtimes/animatediff.py +194 -0
- muse/modalities/image_cv/__init__.py +98 -0
- muse/modalities/image_cv/client.py +127 -0
- muse/modalities/image_cv/codec.py +163 -0
- muse/modalities/image_cv/hf.py +228 -0
- muse/modalities/image_cv/protocol.py +158 -0
- muse/modalities/image_cv/routes.py +221 -0
- muse/modalities/image_cv/runtimes/__init__.py +34 -0
- muse/modalities/image_cv/runtimes/hf_depth.py +150 -0
- muse/modalities/image_cv/runtimes/hf_keypoint.py +310 -0
- muse/modalities/image_cv/runtimes/hf_object_detection.py +181 -0
- muse/modalities/image_embedding/__init__.py +60 -0
- muse/modalities/image_embedding/client.py +163 -0
- muse/modalities/image_embedding/codec.py +18 -0
- muse/modalities/image_embedding/hf.py +229 -0
- muse/modalities/image_embedding/protocol.py +59 -0
- muse/modalities/image_embedding/routes.py +142 -0
- muse/modalities/image_embedding/runtimes/__init__.py +7 -0
- muse/modalities/image_embedding/runtimes/transformers_image.py +281 -0
- muse/modalities/image_generation/__init__.py +37 -0
- muse/modalities/image_generation/client.py +179 -0
- muse/modalities/image_generation/codec.py +65 -0
- muse/modalities/image_generation/hf.py +187 -0
- muse/modalities/image_generation/image_input.py +225 -0
- muse/modalities/image_generation/protocol.py +51 -0
- muse/modalities/image_generation/routes.py +295 -0
- muse/modalities/image_generation/runtimes/__init__.py +0 -0
- muse/modalities/image_generation/runtimes/diffusers.py +416 -0
- muse/modalities/image_ocr/__init__.py +53 -0
- muse/modalities/image_ocr/client.py +81 -0
- muse/modalities/image_ocr/codec.py +24 -0
- muse/modalities/image_ocr/hf.py +165 -0
- muse/modalities/image_ocr/protocol.py +52 -0
- muse/modalities/image_ocr/routes.py +94 -0
- muse/modalities/image_ocr/runtimes/__init__.py +19 -0
- muse/modalities/image_ocr/runtimes/hf_vision2seq.py +310 -0
- muse/modalities/image_segmentation/__init__.py +54 -0
- muse/modalities/image_segmentation/client.py +70 -0
- muse/modalities/image_segmentation/codec.py +231 -0
- muse/modalities/image_segmentation/hf.py +189 -0
- muse/modalities/image_segmentation/protocol.py +79 -0
- muse/modalities/image_segmentation/routes.py +246 -0
- muse/modalities/image_segmentation/runtimes/__init__.py +1 -0
- muse/modalities/image_segmentation/runtimes/sam2_runtime.py +388 -0
- muse/modalities/image_upscale/__init__.py +48 -0
- muse/modalities/image_upscale/client.py +75 -0
- muse/modalities/image_upscale/codec.py +11 -0
- muse/modalities/image_upscale/hf.py +179 -0
- muse/modalities/image_upscale/protocol.py +56 -0
- muse/modalities/image_upscale/routes.py +154 -0
- muse/modalities/image_upscale/runtimes/__init__.py +1 -0
- muse/modalities/image_upscale/runtimes/diffusers_upscaler.py +180 -0
- muse/modalities/model_3d_generation/__init__.py +79 -0
- muse/modalities/model_3d_generation/client.py +126 -0
- muse/modalities/model_3d_generation/codec.py +78 -0
- muse/modalities/model_3d_generation/hf.py +339 -0
- muse/modalities/model_3d_generation/protocol.py +74 -0
- muse/modalities/model_3d_generation/routes.py +250 -0
- muse/modalities/model_3d_generation/runtimes/__init__.py +6 -0
- muse/modalities/model_3d_generation/runtimes/hunyuan3d.py +326 -0
- muse/modalities/model_3d_generation/runtimes/shape_e.py +186 -0
- muse/modalities/model_3d_generation/runtimes/trellis.py +238 -0
- muse/modalities/model_3d_generation/runtimes/triposr.py +259 -0
- muse/modalities/text_classification/__init__.py +64 -0
- muse/modalities/text_classification/client.py +123 -0
- muse/modalities/text_classification/codec.py +152 -0
- muse/modalities/text_classification/hf.py +153 -0
- muse/modalities/text_classification/protocol.py +76 -0
- muse/modalities/text_classification/routes.py +315 -0
- muse/modalities/text_classification/runtimes/__init__.py +28 -0
- muse/modalities/text_classification/runtimes/hf_text_classifier.py +134 -0
- muse/modalities/text_classification/runtimes/hf_zero_shot.py +159 -0
- muse/modalities/text_rerank/__init__.py +53 -0
- muse/modalities/text_rerank/client.py +62 -0
- muse/modalities/text_rerank/codec.py +44 -0
- muse/modalities/text_rerank/hf.py +143 -0
- muse/modalities/text_rerank/protocol.py +46 -0
- muse/modalities/text_rerank/routes.py +118 -0
- muse/modalities/text_rerank/runtimes/__init__.py +1 -0
- muse/modalities/text_rerank/runtimes/cross_encoder.py +113 -0
- muse/modalities/text_summarization/__init__.py +53 -0
- muse/modalities/text_summarization/client.py +61 -0
- muse/modalities/text_summarization/codec.py +44 -0
- muse/modalities/text_summarization/hf.py +141 -0
- muse/modalities/text_summarization/protocol.py +54 -0
- muse/modalities/text_summarization/routes.py +97 -0
- muse/modalities/text_summarization/runtimes/__init__.py +1 -0
- muse/modalities/text_summarization/runtimes/bart_seq2seq.py +198 -0
- muse/modalities/video_generation/__init__.py +40 -0
- muse/modalities/video_generation/client.py +86 -0
- muse/modalities/video_generation/codec.py +120 -0
- muse/modalities/video_generation/hf.py +211 -0
- muse/modalities/video_generation/protocol.py +58 -0
- muse/modalities/video_generation/routes.py +130 -0
- muse/modalities/video_generation/runtimes/__init__.py +0 -0
- muse/modalities/video_generation/runtimes/cogvideox_runtime.py +176 -0
- muse/modalities/video_generation/runtimes/wan_runtime.py +189 -0
- muse/models/__init__.py +0 -0
- muse/models/animatediff_motion_v3.py +213 -0
- muse/models/ast_audioset.py +42 -0
- muse/models/bark_small.py +202 -0
- muse/models/bart_large_cnn.py +199 -0
- muse/models/bge_reranker_v2_m3.py +134 -0
- muse/models/deberta_v3_base_zeroshot_v2_0.py +45 -0
- muse/models/depth_anything_v2_small.py +48 -0
- muse/models/detr_resnet_50.py +40 -0
- muse/models/dinov2_small.py +195 -0
- muse/models/kokoro_82m.py +163 -0
- muse/models/mert_v1_95m.py +256 -0
- muse/models/nv_embed_v2.py +217 -0
- muse/models/sam2_hiera_tiny.py +195 -0
- muse/models/sd_turbo.py +455 -0
- muse/models/smolvlm_256m_instruct.py +38 -0
- muse/models/soprano_80m.py +150 -0
- muse/models/stable_audio_open_1_0.py +256 -0
- muse/models/stable_diffusion_x4_upscaler.py +210 -0
- muse/models/triposr.py +59 -0
- muse/models/trocr_base_printed.py +43 -0
- muse/models/twitter_roberta_base_sentiment_latest.py +46 -0
- muse/models/vitpose_base_simple.py +41 -0
- muse/models/wan2_1_t2v_1_3b.py +220 -0
- museq-0.45.2.dist-info/METADATA +818 -0
- museq-0.45.2.dist-info/RECORD +257 -0
- museq-0.45.2.dist-info/WHEEL +5 -0
- museq-0.45.2.dist-info/entry_points.txt +2 -0
- museq-0.45.2.dist-info/licenses/LICENSE +201 -0
- museq-0.45.2.dist-info/top_level.txt +1 -0
muse/__init__.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Muse: model-agnostic multi-modality generation server.
|
|
2
|
+
|
|
3
|
+
The authoritative list of supported modalities lives in
|
|
4
|
+
`muse.core.discovery.discover_modalities()`, which scans
|
|
5
|
+
`src/muse/modalities/` plus any user-configured dirs. As of v0.32.0
|
|
6
|
+
the bundled modalities are:
|
|
7
|
+
|
|
8
|
+
- audio/embedding: /v1/audio/embeddings (transformers AutoModel + librosa; MERT, CLAP, wav2vec; multipart upload, OpenAI-shape envelope)
|
|
9
|
+
- audio/generation: /v1/audio/music, /v1/audio/sfx (Stable Audio Open 1.0; capability-gated)
|
|
10
|
+
- audio/speech: /v1/audio/speech (TTS: Soprano, Kokoro, Bark)
|
|
11
|
+
- audio/transcription: /v1/audio/transcriptions, /v1/audio/translations (faster-whisper)
|
|
12
|
+
- chat/completion: /v1/chat/completions (llama-cpp-python over GGUF)
|
|
13
|
+
- embedding/text: /v1/embeddings (sentence-transformers)
|
|
14
|
+
- image/animation: /v1/images/animations (AnimateDiff; short looping clips, animated WebP/GIF/MP4)
|
|
15
|
+
- image/embedding: /v1/images/embeddings (transformers AutoModel; CLIP, SigLIP, DINOv2)
|
|
16
|
+
- image/generation: /v1/images/generations, /v1/images/edits (inpaint), /v1/images/variations (diffusers)
|
|
17
|
+
- image/segmentation: /v1/images/segment (SAM-2; multipart upload, mode-aware: auto/points/boxes/text; PNG or COCO RLE masks)
|
|
18
|
+
- image/upscale: /v1/images/upscale (StableDiffusionUpscalePipeline; SD x4; multipart upload)
|
|
19
|
+
- text/classification: /v1/moderations (HF text-classification)
|
|
20
|
+
- text/rerank: /v1/rerank (sentence-transformers CrossEncoder; Cohere-compat)
|
|
21
|
+
- text/summarization: /v1/summarize (transformers AutoModelForSeq2SeqLM; Cohere-compat)
|
|
22
|
+
- video/generation: /v1/video/generations (Wan, CogVideoX; narrative clips, mp4/webm/frames_b64; GPU-required)
|
|
23
|
+
|
|
24
|
+
v0.32.0 adds CI smoke-tests of fresh per-model venvs (#124). The
|
|
25
|
+
workflow `.github/workflows/fresh-venv-smoke.yml` matrix-tests five
|
|
26
|
+
lightweight bundled models (kokoro-82m, dinov2-small, bart-large-cnn,
|
|
27
|
+
bge-reranker-v2-m3, mert-v1-95m) on every push to main and every PR;
|
|
28
|
+
each job creates a fresh venv, installs only what `muse pull` would
|
|
29
|
+
install, and verifies the model loads via the in-venv probe worker
|
|
30
|
+
(no inference; that's GPU-bound and out of scope). Catches the
|
|
31
|
+
production failure mode where a bundled script's `pip_extras` misses
|
|
32
|
+
a transitive dep that `from_pretrained` (or sentence-transformers, or
|
|
33
|
+
diffusers) pulls in at load time, complementing the v0.30.0 static
|
|
34
|
+
audit (#110) which can only flag direct-import gaps via AST scan.
|
|
35
|
+
Heavy / GPU-only models deferred until paid runner budget allows.
|
|
36
|
+
Local repro: `python scripts/smoke_fresh_venv.py --model_id <id>`.
|
|
37
|
+
|
|
38
|
+
v0.31.0 consolidates cross-runtime utilities into
|
|
39
|
+
`muse.core.runtime_helpers`: `select_device` (cuda/mps/cpu auto-detect),
|
|
40
|
+
`dtype_for_name` (string-to-torch.dtype map with `fp16`/`bf16`/`fp32`
|
|
41
|
+
aliases), `set_inference_mode` (no-grad switch with the literal
|
|
42
|
+
method-name token kept out of caller bodies), and `LoadTimer` (opt-in
|
|
43
|
+
load-time logging context). Removes ~30 per-runtime copies; an AST-based
|
|
44
|
+
meta-test (`tests/core/test_runtime_helpers_meta.py`) walks every
|
|
45
|
+
runtime and bundled script to flag re-implementations. Behavior-
|
|
46
|
+
preserving; the existing 2150 fast-lane tests pass without modification.
|
|
47
|
+
|
|
48
|
+
v0.30.0 bundles three operational improvements:
|
|
49
|
+
- the supervisor starts the gateway after the FIRST worker is healthy
|
|
50
|
+
(was: ALL workers), so clients can hit the fast workers while slow
|
|
51
|
+
ones still load. Remaining workers promote on a daemon thread.
|
|
52
|
+
- bundled scripts in `muse/models/` got a `pip_extras` audit; missing
|
|
53
|
+
transitive deps (torch, numpy) added to seven manifests; a static
|
|
54
|
+
regression-guard test parametrized over every bundled script
|
|
55
|
+
catches future gaps.
|
|
56
|
+
- new `muse models refresh <id> | --all | --enabled` re-installs
|
|
57
|
+
`muse[server,<extras>]` plus the model's `pip_extras` into per-model
|
|
58
|
+
venvs; use after `pip install -U muse` to propagate new server-side
|
|
59
|
+
deps.
|
|
60
|
+
|
|
61
|
+
v0.29.0 adds `muse mcp`: an MCP (Model Context Protocol) server that
|
|
62
|
+
exposes muse to LLM clients (Claude Desktop, Cursor, etc.) as 29
|
|
63
|
+
structured tools. 11 admin tools wrap `/v1/admin/*` (gated by
|
|
64
|
+
`MUSE_ADMIN_TOKEN`); 18 inference tools wrap the generation routes.
|
|
65
|
+
Stdio mode is the default; HTTP+SSE mode is available for remote /
|
|
66
|
+
web embedders. Filter mode lets ops pin to admin-only or
|
|
67
|
+
inference-only. See CLAUDE.md "Using muse from Claude Desktop".
|
|
68
|
+
|
|
69
|
+
v0.28.0 added an admin REST API under `/v1/admin/*` for runtime model
|
|
70
|
+
control (enable/disable/probe/pull/remove without restarting `muse
|
|
71
|
+
serve`). Closed-by-default behind `MUSE_ADMIN_TOKEN`. See README.md
|
|
72
|
+
"Admin endpoints" and CLAUDE.md "Admin REST API" for the full surface.
|
|
73
|
+
|
|
74
|
+
Heavy backends (transformers, diffusers, faster-whisper, llama-cpp,
|
|
75
|
+
sentence-transformers) are imported lazily inside per-modality runtime
|
|
76
|
+
modules to keep `muse --help` and `muse pull` instant. Each pulled
|
|
77
|
+
model lives in its own venv at `~/.muse/venvs/<model-id>/`.
|
|
78
|
+
|
|
79
|
+
`__version__` is read from pyproject.toml at install time; this
|
|
80
|
+
fallback covers in-tree imports without an installed muse.
|
|
81
|
+
"""
|
|
82
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
__version__ = version("muse")
|
|
86
|
+
except PackageNotFoundError:
|
|
87
|
+
__version__ = "0.0.0+unknown"
|
muse/admin/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Admin REST API for runtime model control.
|
|
2
|
+
|
|
3
|
+
Mounted on the gateway under /v1/admin/*; gated by MUSE_ADMIN_TOKEN.
|
|
4
|
+
See docs/superpowers/specs/2026-04-28-admin-api-design.md for the full
|
|
5
|
+
wire contract; this package provides:
|
|
6
|
+
|
|
7
|
+
- auth: bearer-token verification dependency for FastAPI
|
|
8
|
+
- jobs: in-memory async-job tracker with 10-minute retention
|
|
9
|
+
- operations: orchestrates enable / disable / probe / pull / remove
|
|
10
|
+
via the supervisor singleton
|
|
11
|
+
- routes/: per-resource APIRouter modules
|
|
12
|
+
- client: thin Python wrapper for programmatic admin access
|
|
13
|
+
|
|
14
|
+
The admin surface is closed-by-default. Without MUSE_ADMIN_TOKEN set,
|
|
15
|
+
all admin requests return 503 admin_disabled. With the env var set,
|
|
16
|
+
the request must carry Authorization: Bearer <token>.
|
|
17
|
+
"""
|
muse/admin/auth.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Bearer-token verification for admin endpoints.
|
|
2
|
+
|
|
3
|
+
The token is read from the MUSE_ADMIN_TOKEN environment variable. With
|
|
4
|
+
no token configured, every admin request is rejected with 503; this is
|
|
5
|
+
the closed-by-default policy. With a token configured, the request must
|
|
6
|
+
carry an Authorization: Bearer <token> header matching the env var.
|
|
7
|
+
|
|
8
|
+
The token is never echoed in error messages or logs.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import secrets
|
|
14
|
+
|
|
15
|
+
from fastapi import Header, HTTPException
|
|
16
|
+
|
|
17
|
+
ADMIN_TOKEN_ENV = "MUSE_ADMIN_TOKEN"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _err(status: int, code: str, message: str) -> HTTPException:
|
|
21
|
+
"""Build an OpenAI-shape envelope inside an HTTPException.
|
|
22
|
+
|
|
23
|
+
The message text never includes the secret token; only static
|
|
24
|
+
descriptive strings flow through here.
|
|
25
|
+
"""
|
|
26
|
+
return HTTPException(
|
|
27
|
+
status_code=status,
|
|
28
|
+
detail={"error": {
|
|
29
|
+
"code": code,
|
|
30
|
+
"message": message,
|
|
31
|
+
"type": "invalid_request_error",
|
|
32
|
+
}},
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def verify_admin_token(authorization: str | None = Header(default=None)) -> None:
|
|
37
|
+
"""FastAPI dependency: raise unless caller carries the admin bearer.
|
|
38
|
+
|
|
39
|
+
Five paths:
|
|
40
|
+
- env var unset -> 503 admin_disabled
|
|
41
|
+
- header missing -> 401 missing_token
|
|
42
|
+
- header malformed (no "Bearer ") -> 401 missing_token
|
|
43
|
+
- header bearer wrong -> 403 invalid_token
|
|
44
|
+
- header bearer matches -> return None (route runs)
|
|
45
|
+
"""
|
|
46
|
+
expected = os.environ.get(ADMIN_TOKEN_ENV)
|
|
47
|
+
if not expected:
|
|
48
|
+
raise _err(
|
|
49
|
+
503,
|
|
50
|
+
"admin_disabled",
|
|
51
|
+
f"Admin endpoints require the {ADMIN_TOKEN_ENV} env var to be set",
|
|
52
|
+
)
|
|
53
|
+
if not authorization or not authorization.startswith("Bearer "):
|
|
54
|
+
raise _err(
|
|
55
|
+
401,
|
|
56
|
+
"missing_token",
|
|
57
|
+
"Authorization: Bearer <token> required",
|
|
58
|
+
)
|
|
59
|
+
presented = authorization[len("Bearer "):]
|
|
60
|
+
# Constant-time compare prevents recovering the token byte-by-byte
|
|
61
|
+
# via response-time variance.
|
|
62
|
+
if not secrets.compare_digest(presented, expected):
|
|
63
|
+
raise _err(403, "invalid_token", "Bad admin token")
|
muse/admin/client.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""AdminClient: thin Python wrapper over the /v1/admin/* HTTP surface.
|
|
2
|
+
|
|
3
|
+
Use this for programmatic admin against a running `muse serve`. For
|
|
4
|
+
in-process usage (no HTTP), import the operations module directly.
|
|
5
|
+
|
|
6
|
+
Token resolution:
|
|
7
|
+
1. constructor `token=` arg
|
|
8
|
+
2. MUSE_ADMIN_TOKEN env var
|
|
9
|
+
3. None (every call will 503 since the server requires the env var)
|
|
10
|
+
|
|
11
|
+
Server resolution:
|
|
12
|
+
1. constructor `base_url=` arg
|
|
13
|
+
2. MUSE_SERVER env var
|
|
14
|
+
3. http://localhost:8000
|
|
15
|
+
|
|
16
|
+
The `wait` helper polls `/jobs/{id}` until the job lands in done/failed.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
import time
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
import httpx
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class AdminClientError(Exception):
|
|
28
|
+
"""Raised when an admin call returns a non-2xx response.
|
|
29
|
+
|
|
30
|
+
`code` is the OpenAI error envelope's `code` field; `status` is
|
|
31
|
+
the HTTP status. `body` is the raw decoded JSON.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, status: int, code: str, message: str, body: Any):
|
|
35
|
+
super().__init__(f"{status} {code}: {message}")
|
|
36
|
+
self.status = status
|
|
37
|
+
self.code = code
|
|
38
|
+
self.message = message
|
|
39
|
+
self.body = body
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class AdminClient:
|
|
43
|
+
"""HTTP client for /v1/admin/* admin endpoints."""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
base_url: str | None = None,
|
|
48
|
+
token: str | None = None,
|
|
49
|
+
timeout: float = 30.0,
|
|
50
|
+
):
|
|
51
|
+
self.base_url = (
|
|
52
|
+
base_url
|
|
53
|
+
or os.environ.get("MUSE_SERVER")
|
|
54
|
+
or "http://localhost:8000"
|
|
55
|
+
).rstrip("/")
|
|
56
|
+
self.token = token or os.environ.get("MUSE_ADMIN_TOKEN")
|
|
57
|
+
self._timeout = timeout
|
|
58
|
+
|
|
59
|
+
def _headers(self) -> dict:
|
|
60
|
+
if self.token is None:
|
|
61
|
+
return {}
|
|
62
|
+
return {"Authorization": f"Bearer {self.token}"}
|
|
63
|
+
|
|
64
|
+
def _request(self, method: str, path: str, **kwargs: Any) -> Any:
|
|
65
|
+
url = f"{self.base_url}{path}"
|
|
66
|
+
# Per-call `timeout` overrides the constructor's default. None
|
|
67
|
+
# means "use the constructor's value." This is the escape hatch
|
|
68
|
+
# for slow operations like warmup, whose cold-load duration
|
|
69
|
+
# (10-60s) routinely exceeds the constructor default of 30s.
|
|
70
|
+
timeout = kwargs.pop("timeout", None)
|
|
71
|
+
if timeout is None:
|
|
72
|
+
timeout = self._timeout
|
|
73
|
+
with httpx.Client(timeout=timeout) as client:
|
|
74
|
+
r = client.request(
|
|
75
|
+
method,
|
|
76
|
+
url,
|
|
77
|
+
headers={**self._headers(), **kwargs.pop("headers", {})},
|
|
78
|
+
**kwargs,
|
|
79
|
+
)
|
|
80
|
+
if r.status_code >= 400:
|
|
81
|
+
try:
|
|
82
|
+
body = r.json()
|
|
83
|
+
except Exception: # noqa: BLE001
|
|
84
|
+
body = {"raw": r.text}
|
|
85
|
+
err = body.get("error") or body.get("detail", {}).get("error") or {}
|
|
86
|
+
code = err.get("code", "http_error")
|
|
87
|
+
message = err.get("message", r.text)
|
|
88
|
+
raise AdminClientError(r.status_code, code, message, body)
|
|
89
|
+
try:
|
|
90
|
+
return r.json()
|
|
91
|
+
except Exception: # noqa: BLE001
|
|
92
|
+
return {"raw": r.text}
|
|
93
|
+
|
|
94
|
+
# Per-model operations
|
|
95
|
+
|
|
96
|
+
def enable(self, model_id: str) -> dict:
|
|
97
|
+
return self._request("POST", f"/v1/admin/models/{model_id}/enable", json={})
|
|
98
|
+
|
|
99
|
+
def disable(self, model_id: str) -> dict:
|
|
100
|
+
return self._request("POST", f"/v1/admin/models/{model_id}/disable", json={})
|
|
101
|
+
|
|
102
|
+
def warmup(self, model_id: str, *, timeout: float | None = None) -> dict:
|
|
103
|
+
"""Pre-load a model via the supervisor's LoadDirector.
|
|
104
|
+
|
|
105
|
+
Synchronous on the wire: returns once the director's warmup
|
|
106
|
+
completes (cold load duration: 10-60s for real models, longer
|
|
107
|
+
for video / large diffusion models). Returns {"model_id",
|
|
108
|
+
"worker_port"} on success; raises AdminClientError on 4xx/5xx.
|
|
109
|
+
|
|
110
|
+
`timeout` overrides the constructor's default for this call
|
|
111
|
+
only. The default constructor timeout (30s) is too short for
|
|
112
|
+
most cold loads; callers driving warmup should pass a more
|
|
113
|
+
generous value (e.g. 300s) or set timeout via the constructor.
|
|
114
|
+
None means "use the constructor's value."
|
|
115
|
+
"""
|
|
116
|
+
return self._request(
|
|
117
|
+
"POST",
|
|
118
|
+
f"/v1/admin/models/{model_id}/warmup",
|
|
119
|
+
json={},
|
|
120
|
+
timeout=timeout,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def probe(
|
|
124
|
+
self,
|
|
125
|
+
model_id: str,
|
|
126
|
+
*,
|
|
127
|
+
no_inference: bool = False,
|
|
128
|
+
device: str | None = None,
|
|
129
|
+
) -> dict:
|
|
130
|
+
body = {"no_inference": no_inference}
|
|
131
|
+
if device is not None:
|
|
132
|
+
body["device"] = device
|
|
133
|
+
return self._request("POST", f"/v1/admin/models/{model_id}/probe", json=body)
|
|
134
|
+
|
|
135
|
+
def pull(self, identifier: str) -> dict:
|
|
136
|
+
# Use the documented `_` placeholder path; identifier in body
|
|
137
|
+
# avoids URL-encoding hf://... slashes.
|
|
138
|
+
return self._request(
|
|
139
|
+
"POST",
|
|
140
|
+
"/v1/admin/models/_/pull",
|
|
141
|
+
json={"identifier": identifier},
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def remove(self, model_id: str, *, purge: bool = False) -> dict:
|
|
145
|
+
return self._request(
|
|
146
|
+
"DELETE",
|
|
147
|
+
f"/v1/admin/models/{model_id}",
|
|
148
|
+
params={"purge": "true" if purge else "false"},
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def status(self, model_id: str) -> dict:
|
|
152
|
+
return self._request("GET", f"/v1/admin/models/{model_id}/status")
|
|
153
|
+
|
|
154
|
+
# Cluster-wide views
|
|
155
|
+
|
|
156
|
+
def memory(self) -> dict:
|
|
157
|
+
return self._request("GET", "/v1/admin/memory")
|
|
158
|
+
|
|
159
|
+
def workers(self) -> dict:
|
|
160
|
+
return self._request("GET", "/v1/admin/workers")
|
|
161
|
+
|
|
162
|
+
def restart_worker(self, port: int) -> dict:
|
|
163
|
+
return self._request("POST", f"/v1/admin/workers/{port}/restart")
|
|
164
|
+
|
|
165
|
+
# Job tracking
|
|
166
|
+
|
|
167
|
+
def job(self, job_id: str) -> dict:
|
|
168
|
+
return self._request("GET", f"/v1/admin/jobs/{job_id}")
|
|
169
|
+
|
|
170
|
+
def jobs(self) -> dict:
|
|
171
|
+
return self._request("GET", "/v1/admin/jobs")
|
|
172
|
+
|
|
173
|
+
def wait(
|
|
174
|
+
self,
|
|
175
|
+
job_id: str,
|
|
176
|
+
*,
|
|
177
|
+
timeout: float = 300.0,
|
|
178
|
+
poll: float = 1.0,
|
|
179
|
+
) -> dict:
|
|
180
|
+
"""Block until job is done or failed; return the final job record.
|
|
181
|
+
|
|
182
|
+
Raises TimeoutError if the job never reaches a terminal state
|
|
183
|
+
within `timeout` seconds.
|
|
184
|
+
"""
|
|
185
|
+
deadline = time.monotonic() + timeout
|
|
186
|
+
while True:
|
|
187
|
+
job = self.job(job_id)
|
|
188
|
+
if job.get("state") in ("done", "failed"):
|
|
189
|
+
return job
|
|
190
|
+
if time.monotonic() >= deadline:
|
|
191
|
+
raise TimeoutError(
|
|
192
|
+
f"job {job_id} did not finish within {timeout}s "
|
|
193
|
+
f"(last state: {job.get('state')})"
|
|
194
|
+
)
|
|
195
|
+
time.sleep(poll)
|
muse/admin/jobs.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""In-memory async-job tracker for admin operations.
|
|
2
|
+
|
|
3
|
+
Each enable / pull / probe call returns a Job; the caller polls
|
|
4
|
+
GET /v1/admin/jobs/{id} to observe progression. Jobs persist for ten
|
|
5
|
+
minutes after `finished_at`; older jobs are reaped on every list call
|
|
6
|
+
(lazy reap) to keep memory bounded without a dedicated reaper thread.
|
|
7
|
+
|
|
8
|
+
The job_id is a uuid4 hex string. Jobs go through:
|
|
9
|
+
pending -> running -> (done | failed)
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import threading
|
|
15
|
+
import time
|
|
16
|
+
import uuid
|
|
17
|
+
from collections import deque
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
_RETENTION_SECONDS = 600.0 # ten minutes
|
|
25
|
+
_MAX_JOBS = 100
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class Job:
|
|
30
|
+
"""One async admin operation.
|
|
31
|
+
|
|
32
|
+
`thread` is the daemon worker that runs the operation; tracked so
|
|
33
|
+
the gateway can join it on shutdown. Not serialized into to_dict.
|
|
34
|
+
`finished_at_monotonic` is for lazy expiry; not serialized either.
|
|
35
|
+
"""
|
|
36
|
+
job_id: str
|
|
37
|
+
op: str
|
|
38
|
+
model_id: str
|
|
39
|
+
state: str = "pending"
|
|
40
|
+
started_at: str = ""
|
|
41
|
+
finished_at: str | None = None
|
|
42
|
+
result: dict | None = None
|
|
43
|
+
error: str | None = None
|
|
44
|
+
log_lines: list[str] = field(default_factory=list)
|
|
45
|
+
thread: Any = field(default=None, repr=False)
|
|
46
|
+
finished_at_monotonic: float | None = field(default=None, repr=False)
|
|
47
|
+
|
|
48
|
+
def to_dict(self) -> dict:
|
|
49
|
+
return {
|
|
50
|
+
"job_id": self.job_id,
|
|
51
|
+
"op": self.op,
|
|
52
|
+
"model_id": self.model_id,
|
|
53
|
+
"state": self.state,
|
|
54
|
+
"started_at": self.started_at,
|
|
55
|
+
"finished_at": self.finished_at,
|
|
56
|
+
"result": self.result,
|
|
57
|
+
"error": self.error,
|
|
58
|
+
"log_lines": list(self.log_lines),
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class JobStore:
|
|
63
|
+
"""Thread-safe in-memory job map with lazy expiry.
|
|
64
|
+
|
|
65
|
+
`retention_seconds` controls how long a finished job stays
|
|
66
|
+
addressable via `get`/`list_recent`. The default is 10 minutes,
|
|
67
|
+
matching the spec.
|
|
68
|
+
|
|
69
|
+
`max_jobs` caps the live deque so we never grow unboundedly even
|
|
70
|
+
when nothing finishes (e.g. all pending). The deque drops the
|
|
71
|
+
oldest job_id when full; the dict entry stays addressable until
|
|
72
|
+
expiry, but `list_recent` only returns entries that are also in
|
|
73
|
+
the deque.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, retention_seconds: float = _RETENTION_SECONDS, max_jobs: int = _MAX_JOBS):
|
|
77
|
+
self._jobs: dict[str, Job] = {}
|
|
78
|
+
self._order: deque[str] = deque(maxlen=max_jobs)
|
|
79
|
+
self._lock = threading.Lock()
|
|
80
|
+
self._retention = retention_seconds
|
|
81
|
+
|
|
82
|
+
def create(self, op: str, model_id: str) -> Job:
|
|
83
|
+
job = Job(
|
|
84
|
+
job_id=uuid.uuid4().hex,
|
|
85
|
+
op=op,
|
|
86
|
+
model_id=model_id,
|
|
87
|
+
state="pending",
|
|
88
|
+
started_at=_now_iso(),
|
|
89
|
+
)
|
|
90
|
+
with self._lock:
|
|
91
|
+
self._reap_expired()
|
|
92
|
+
self._jobs[job.job_id] = job
|
|
93
|
+
self._order.append(job.job_id)
|
|
94
|
+
logger.info("job %s created (op=%s, model=%s)", job.job_id, op, model_id)
|
|
95
|
+
return job
|
|
96
|
+
|
|
97
|
+
def update(self, job_id: str, **fields: Any) -> Job | None:
|
|
98
|
+
with self._lock:
|
|
99
|
+
job = self._jobs.get(job_id)
|
|
100
|
+
if job is None:
|
|
101
|
+
return None
|
|
102
|
+
for k, v in fields.items():
|
|
103
|
+
setattr(job, k, v)
|
|
104
|
+
if job.state in ("done", "failed") and job.finished_at_monotonic is None:
|
|
105
|
+
job.finished_at = _now_iso()
|
|
106
|
+
job.finished_at_monotonic = time.monotonic()
|
|
107
|
+
return job
|
|
108
|
+
|
|
109
|
+
def get(self, job_id: str) -> Job | None:
|
|
110
|
+
with self._lock:
|
|
111
|
+
self._reap_expired()
|
|
112
|
+
return self._jobs.get(job_id)
|
|
113
|
+
|
|
114
|
+
def list_recent(self) -> list[Job]:
|
|
115
|
+
"""Return jobs newest-first, capped at the deque's maxlen."""
|
|
116
|
+
with self._lock:
|
|
117
|
+
self._reap_expired()
|
|
118
|
+
return [self._jobs[jid] for jid in reversed(self._order) if jid in self._jobs]
|
|
119
|
+
|
|
120
|
+
def shutdown(self, timeout: float = 5.0) -> None:
|
|
121
|
+
"""Join live worker threads; called on gateway shutdown."""
|
|
122
|
+
with self._lock:
|
|
123
|
+
threads = [j.thread for j in self._jobs.values() if j.thread is not None]
|
|
124
|
+
for t in threads:
|
|
125
|
+
try:
|
|
126
|
+
t.join(timeout=timeout)
|
|
127
|
+
except Exception as e: # noqa: BLE001
|
|
128
|
+
logger.warning("error joining job thread: %s", e)
|
|
129
|
+
|
|
130
|
+
def _reap_expired(self) -> None:
|
|
131
|
+
"""Drop jobs whose finished_at_monotonic is older than retention.
|
|
132
|
+
|
|
133
|
+
Caller must hold `self._lock`.
|
|
134
|
+
"""
|
|
135
|
+
if self._retention <= 0:
|
|
136
|
+
return
|
|
137
|
+
cutoff = time.monotonic() - self._retention
|
|
138
|
+
expired = [
|
|
139
|
+
jid for jid, j in self._jobs.items()
|
|
140
|
+
if j.finished_at_monotonic is not None and j.finished_at_monotonic < cutoff
|
|
141
|
+
]
|
|
142
|
+
for jid in expired:
|
|
143
|
+
self._jobs.pop(jid, None)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _now_iso() -> str:
|
|
147
|
+
return datetime.now(timezone.utc).isoformat()
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# Module-level default. Tests can build their own JobStore without
|
|
151
|
+
# touching this; production code reaches it through get_default_store.
|
|
152
|
+
_default_store: JobStore | None = None
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def get_default_store() -> JobStore:
|
|
156
|
+
global _default_store
|
|
157
|
+
if _default_store is None:
|
|
158
|
+
_default_store = JobStore()
|
|
159
|
+
return _default_store
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def reset_default_store() -> None:
|
|
163
|
+
"""Test hook: drop the singleton so next get_default_store rebuilds it."""
|
|
164
|
+
global _default_store
|
|
165
|
+
_default_store = None
|