museq 0.45.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. muse/__init__.py +87 -0
  2. muse/admin/__init__.py +17 -0
  3. muse/admin/auth.py +63 -0
  4. muse/admin/client.py +195 -0
  5. muse/admin/jobs.py +165 -0
  6. muse/admin/operations.py +773 -0
  7. muse/admin/routes/__init__.py +32 -0
  8. muse/admin/routes/jobs.py +50 -0
  9. muse/admin/routes/memory.py +187 -0
  10. muse/admin/routes/models.py +233 -0
  11. muse/admin/routes/workers.py +95 -0
  12. muse/cli.py +775 -0
  13. muse/cli_impl/__init__.py +0 -0
  14. muse/cli_impl/console.py +98 -0
  15. muse/cli_impl/gateway.py +633 -0
  16. muse/cli_impl/idle_sweeper.py +294 -0
  17. muse/cli_impl/load_director.py +874 -0
  18. muse/cli_impl/mcp_server.py +86 -0
  19. muse/cli_impl/models_info_display.py +350 -0
  20. muse/cli_impl/models_list.py +458 -0
  21. muse/cli_impl/probe.py +297 -0
  22. muse/cli_impl/probe_worker.py +268 -0
  23. muse/cli_impl/refresh.py +323 -0
  24. muse/cli_impl/search.py +143 -0
  25. muse/cli_impl/serve.py +23 -0
  26. muse/cli_impl/supervisor.py +774 -0
  27. muse/cli_impl/worker.py +118 -0
  28. muse/core/__init__.py +0 -0
  29. muse/core/catalog.py +832 -0
  30. muse/core/chat_formats.py +92 -0
  31. muse/core/curated.py +194 -0
  32. muse/core/discovery.py +392 -0
  33. muse/core/errors.py +28 -0
  34. muse/core/image_preprocessing.py +231 -0
  35. muse/core/install.py +58 -0
  36. muse/core/memory_probe.py +108 -0
  37. muse/core/registry.py +127 -0
  38. muse/core/resolvers.py +185 -0
  39. muse/core/resolvers_hf.py +114 -0
  40. muse/core/runtime_helpers.py +132 -0
  41. muse/core/server.py +237 -0
  42. muse/core/venv.py +169 -0
  43. muse/mcp/__init__.py +12 -0
  44. muse/mcp/binary_io.py +146 -0
  45. muse/mcp/client.py +237 -0
  46. muse/mcp/server.py +242 -0
  47. muse/mcp/tools/__init__.py +63 -0
  48. muse/mcp/tools/admin.py +438 -0
  49. muse/mcp/tools/inference_audio.py +259 -0
  50. muse/mcp/tools/inference_image.py +384 -0
  51. muse/mcp/tools/inference_text.py +234 -0
  52. muse/mcp/tools/inference_video.py +85 -0
  53. muse/modalities/__init__.py +0 -0
  54. muse/modalities/audio_classification/__init__.py +72 -0
  55. muse/modalities/audio_classification/client.py +69 -0
  56. muse/modalities/audio_classification/codec.py +37 -0
  57. muse/modalities/audio_classification/hf.py +121 -0
  58. muse/modalities/audio_classification/protocol.py +40 -0
  59. muse/modalities/audio_classification/routes.py +120 -0
  60. muse/modalities/audio_classification/runtimes/__init__.py +12 -0
  61. muse/modalities/audio_classification/runtimes/hf_audio_classifier.py +177 -0
  62. muse/modalities/audio_embedding/__init__.py +74 -0
  63. muse/modalities/audio_embedding/client.py +143 -0
  64. muse/modalities/audio_embedding/codec.py +18 -0
  65. muse/modalities/audio_embedding/hf.py +197 -0
  66. muse/modalities/audio_embedding/protocol.py +55 -0
  67. muse/modalities/audio_embedding/routes.py +166 -0
  68. muse/modalities/audio_embedding/runtimes/__init__.py +0 -0
  69. muse/modalities/audio_embedding/runtimes/transformers_audio.py +348 -0
  70. muse/modalities/audio_generation/__init__.py +48 -0
  71. muse/modalities/audio_generation/client.py +125 -0
  72. muse/modalities/audio_generation/codec.py +174 -0
  73. muse/modalities/audio_generation/hf.py +187 -0
  74. muse/modalities/audio_generation/protocol.py +56 -0
  75. muse/modalities/audio_generation/routes.py +130 -0
  76. muse/modalities/audio_generation/runtimes/__init__.py +0 -0
  77. muse/modalities/audio_generation/runtimes/stable_audio.py +224 -0
  78. muse/modalities/audio_speech/__init__.py +36 -0
  79. muse/modalities/audio_speech/alignment.py +168 -0
  80. muse/modalities/audio_speech/backends/__init__.py +0 -0
  81. muse/modalities/audio_speech/backends/base.py +294 -0
  82. muse/modalities/audio_speech/backends/transformers.py +46 -0
  83. muse/modalities/audio_speech/client.py +110 -0
  84. muse/modalities/audio_speech/codec.py +57 -0
  85. muse/modalities/audio_speech/decode_only.py +164 -0
  86. muse/modalities/audio_speech/encoded.py +229 -0
  87. muse/modalities/audio_speech/protocol.py +85 -0
  88. muse/modalities/audio_speech/routes.py +142 -0
  89. muse/modalities/audio_speech/tts.py +353 -0
  90. muse/modalities/audio_speech/utils/__init__.py +0 -0
  91. muse/modalities/audio_speech/utils/text_normalizer.py +380 -0
  92. muse/modalities/audio_speech/utils/text_splitter.py +76 -0
  93. muse/modalities/audio_speech/vocos/decoder.py +42 -0
  94. muse/modalities/audio_speech/vocos/heads.py +41 -0
  95. muse/modalities/audio_speech/vocos/migrate_weights.py +127 -0
  96. muse/modalities/audio_speech/vocos/models.py +60 -0
  97. muse/modalities/audio_speech/vocos/modules.py +68 -0
  98. muse/modalities/audio_speech/vocos/spectral_ops.py +94 -0
  99. muse/modalities/audio_transcription/__init__.py +74 -0
  100. muse/modalities/audio_transcription/client.py +141 -0
  101. muse/modalities/audio_transcription/codec.py +117 -0
  102. muse/modalities/audio_transcription/hf.py +104 -0
  103. muse/modalities/audio_transcription/protocol.py +63 -0
  104. muse/modalities/audio_transcription/routes.py +196 -0
  105. muse/modalities/audio_transcription/runtimes/__init__.py +1 -0
  106. muse/modalities/audio_transcription/runtimes/faster_whisper.py +127 -0
  107. muse/modalities/chat_completion/__init__.py +46 -0
  108. muse/modalities/chat_completion/client.py +70 -0
  109. muse/modalities/chat_completion/codec.py +51 -0
  110. muse/modalities/chat_completion/hf.py +249 -0
  111. muse/modalities/chat_completion/protocol.py +88 -0
  112. muse/modalities/chat_completion/routes.py +314 -0
  113. muse/modalities/chat_completion/runtimes/__init__.py +0 -0
  114. muse/modalities/chat_completion/runtimes/llama_cpp.py +168 -0
  115. muse/modalities/chat_completion/runtimes/transformers_vlm.py +284 -0
  116. muse/modalities/embedding_text/__init__.py +32 -0
  117. muse/modalities/embedding_text/client.py +69 -0
  118. muse/modalities/embedding_text/codec.py +41 -0
  119. muse/modalities/embedding_text/hf.py +106 -0
  120. muse/modalities/embedding_text/protocol.py +44 -0
  121. muse/modalities/embedding_text/routes.py +133 -0
  122. muse/modalities/embedding_text/runtimes/__init__.py +0 -0
  123. muse/modalities/embedding_text/runtimes/sentence_transformers.py +157 -0
  124. muse/modalities/image_animation/__init__.py +34 -0
  125. muse/modalities/image_animation/client.py +62 -0
  126. muse/modalities/image_animation/codec.py +98 -0
  127. muse/modalities/image_animation/hf.py +138 -0
  128. muse/modalities/image_animation/protocol.py +50 -0
  129. muse/modalities/image_animation/routes.py +142 -0
  130. muse/modalities/image_animation/runtimes/__init__.py +0 -0
  131. muse/modalities/image_animation/runtimes/animatediff.py +194 -0
  132. muse/modalities/image_cv/__init__.py +98 -0
  133. muse/modalities/image_cv/client.py +127 -0
  134. muse/modalities/image_cv/codec.py +163 -0
  135. muse/modalities/image_cv/hf.py +228 -0
  136. muse/modalities/image_cv/protocol.py +158 -0
  137. muse/modalities/image_cv/routes.py +221 -0
  138. muse/modalities/image_cv/runtimes/__init__.py +34 -0
  139. muse/modalities/image_cv/runtimes/hf_depth.py +150 -0
  140. muse/modalities/image_cv/runtimes/hf_keypoint.py +310 -0
  141. muse/modalities/image_cv/runtimes/hf_object_detection.py +181 -0
  142. muse/modalities/image_embedding/__init__.py +60 -0
  143. muse/modalities/image_embedding/client.py +163 -0
  144. muse/modalities/image_embedding/codec.py +18 -0
  145. muse/modalities/image_embedding/hf.py +229 -0
  146. muse/modalities/image_embedding/protocol.py +59 -0
  147. muse/modalities/image_embedding/routes.py +142 -0
  148. muse/modalities/image_embedding/runtimes/__init__.py +7 -0
  149. muse/modalities/image_embedding/runtimes/transformers_image.py +281 -0
  150. muse/modalities/image_generation/__init__.py +37 -0
  151. muse/modalities/image_generation/client.py +179 -0
  152. muse/modalities/image_generation/codec.py +65 -0
  153. muse/modalities/image_generation/hf.py +187 -0
  154. muse/modalities/image_generation/image_input.py +225 -0
  155. muse/modalities/image_generation/protocol.py +51 -0
  156. muse/modalities/image_generation/routes.py +295 -0
  157. muse/modalities/image_generation/runtimes/__init__.py +0 -0
  158. muse/modalities/image_generation/runtimes/diffusers.py +416 -0
  159. muse/modalities/image_ocr/__init__.py +53 -0
  160. muse/modalities/image_ocr/client.py +81 -0
  161. muse/modalities/image_ocr/codec.py +24 -0
  162. muse/modalities/image_ocr/hf.py +165 -0
  163. muse/modalities/image_ocr/protocol.py +52 -0
  164. muse/modalities/image_ocr/routes.py +94 -0
  165. muse/modalities/image_ocr/runtimes/__init__.py +19 -0
  166. muse/modalities/image_ocr/runtimes/hf_vision2seq.py +310 -0
  167. muse/modalities/image_segmentation/__init__.py +54 -0
  168. muse/modalities/image_segmentation/client.py +70 -0
  169. muse/modalities/image_segmentation/codec.py +231 -0
  170. muse/modalities/image_segmentation/hf.py +189 -0
  171. muse/modalities/image_segmentation/protocol.py +79 -0
  172. muse/modalities/image_segmentation/routes.py +246 -0
  173. muse/modalities/image_segmentation/runtimes/__init__.py +1 -0
  174. muse/modalities/image_segmentation/runtimes/sam2_runtime.py +388 -0
  175. muse/modalities/image_upscale/__init__.py +48 -0
  176. muse/modalities/image_upscale/client.py +75 -0
  177. muse/modalities/image_upscale/codec.py +11 -0
  178. muse/modalities/image_upscale/hf.py +179 -0
  179. muse/modalities/image_upscale/protocol.py +56 -0
  180. muse/modalities/image_upscale/routes.py +154 -0
  181. muse/modalities/image_upscale/runtimes/__init__.py +1 -0
  182. muse/modalities/image_upscale/runtimes/diffusers_upscaler.py +180 -0
  183. muse/modalities/model_3d_generation/__init__.py +79 -0
  184. muse/modalities/model_3d_generation/client.py +126 -0
  185. muse/modalities/model_3d_generation/codec.py +78 -0
  186. muse/modalities/model_3d_generation/hf.py +339 -0
  187. muse/modalities/model_3d_generation/protocol.py +74 -0
  188. muse/modalities/model_3d_generation/routes.py +250 -0
  189. muse/modalities/model_3d_generation/runtimes/__init__.py +6 -0
  190. muse/modalities/model_3d_generation/runtimes/hunyuan3d.py +326 -0
  191. muse/modalities/model_3d_generation/runtimes/shape_e.py +186 -0
  192. muse/modalities/model_3d_generation/runtimes/trellis.py +238 -0
  193. muse/modalities/model_3d_generation/runtimes/triposr.py +259 -0
  194. muse/modalities/text_classification/__init__.py +64 -0
  195. muse/modalities/text_classification/client.py +123 -0
  196. muse/modalities/text_classification/codec.py +152 -0
  197. muse/modalities/text_classification/hf.py +153 -0
  198. muse/modalities/text_classification/protocol.py +76 -0
  199. muse/modalities/text_classification/routes.py +315 -0
  200. muse/modalities/text_classification/runtimes/__init__.py +28 -0
  201. muse/modalities/text_classification/runtimes/hf_text_classifier.py +134 -0
  202. muse/modalities/text_classification/runtimes/hf_zero_shot.py +159 -0
  203. muse/modalities/text_rerank/__init__.py +53 -0
  204. muse/modalities/text_rerank/client.py +62 -0
  205. muse/modalities/text_rerank/codec.py +44 -0
  206. muse/modalities/text_rerank/hf.py +143 -0
  207. muse/modalities/text_rerank/protocol.py +46 -0
  208. muse/modalities/text_rerank/routes.py +118 -0
  209. muse/modalities/text_rerank/runtimes/__init__.py +1 -0
  210. muse/modalities/text_rerank/runtimes/cross_encoder.py +113 -0
  211. muse/modalities/text_summarization/__init__.py +53 -0
  212. muse/modalities/text_summarization/client.py +61 -0
  213. muse/modalities/text_summarization/codec.py +44 -0
  214. muse/modalities/text_summarization/hf.py +141 -0
  215. muse/modalities/text_summarization/protocol.py +54 -0
  216. muse/modalities/text_summarization/routes.py +97 -0
  217. muse/modalities/text_summarization/runtimes/__init__.py +1 -0
  218. muse/modalities/text_summarization/runtimes/bart_seq2seq.py +198 -0
  219. muse/modalities/video_generation/__init__.py +40 -0
  220. muse/modalities/video_generation/client.py +86 -0
  221. muse/modalities/video_generation/codec.py +120 -0
  222. muse/modalities/video_generation/hf.py +211 -0
  223. muse/modalities/video_generation/protocol.py +58 -0
  224. muse/modalities/video_generation/routes.py +130 -0
  225. muse/modalities/video_generation/runtimes/__init__.py +0 -0
  226. muse/modalities/video_generation/runtimes/cogvideox_runtime.py +176 -0
  227. muse/modalities/video_generation/runtimes/wan_runtime.py +189 -0
  228. muse/models/__init__.py +0 -0
  229. muse/models/animatediff_motion_v3.py +213 -0
  230. muse/models/ast_audioset.py +42 -0
  231. muse/models/bark_small.py +202 -0
  232. muse/models/bart_large_cnn.py +199 -0
  233. muse/models/bge_reranker_v2_m3.py +134 -0
  234. muse/models/deberta_v3_base_zeroshot_v2_0.py +45 -0
  235. muse/models/depth_anything_v2_small.py +48 -0
  236. muse/models/detr_resnet_50.py +40 -0
  237. muse/models/dinov2_small.py +195 -0
  238. muse/models/kokoro_82m.py +163 -0
  239. muse/models/mert_v1_95m.py +256 -0
  240. muse/models/nv_embed_v2.py +217 -0
  241. muse/models/sam2_hiera_tiny.py +195 -0
  242. muse/models/sd_turbo.py +455 -0
  243. muse/models/smolvlm_256m_instruct.py +38 -0
  244. muse/models/soprano_80m.py +150 -0
  245. muse/models/stable_audio_open_1_0.py +256 -0
  246. muse/models/stable_diffusion_x4_upscaler.py +210 -0
  247. muse/models/triposr.py +59 -0
  248. muse/models/trocr_base_printed.py +43 -0
  249. muse/models/twitter_roberta_base_sentiment_latest.py +46 -0
  250. muse/models/vitpose_base_simple.py +41 -0
  251. muse/models/wan2_1_t2v_1_3b.py +220 -0
  252. museq-0.45.2.dist-info/METADATA +818 -0
  253. museq-0.45.2.dist-info/RECORD +257 -0
  254. museq-0.45.2.dist-info/WHEEL +5 -0
  255. museq-0.45.2.dist-info/entry_points.txt +2 -0
  256. museq-0.45.2.dist-info/licenses/LICENSE +201 -0
  257. museq-0.45.2.dist-info/top_level.txt +1 -0
muse/__init__.py ADDED
@@ -0,0 +1,87 @@
1
+ """Muse: model-agnostic multi-modality generation server.
2
+
3
+ The authoritative list of supported modalities lives in
4
+ `muse.core.discovery.discover_modalities()`, which scans
5
+ `src/muse/modalities/` plus any user-configured dirs. As of v0.32.0
6
+ the bundled modalities are:
7
+
8
+ - audio/embedding: /v1/audio/embeddings (transformers AutoModel + librosa; MERT, CLAP, wav2vec; multipart upload, OpenAI-shape envelope)
9
+ - audio/generation: /v1/audio/music, /v1/audio/sfx (Stable Audio Open 1.0; capability-gated)
10
+ - audio/speech: /v1/audio/speech (TTS: Soprano, Kokoro, Bark)
11
+ - audio/transcription: /v1/audio/transcriptions, /v1/audio/translations (faster-whisper)
12
+ - chat/completion: /v1/chat/completions (llama-cpp-python over GGUF)
13
+ - embedding/text: /v1/embeddings (sentence-transformers)
14
+ - image/animation: /v1/images/animations (AnimateDiff; short looping clips, animated WebP/GIF/MP4)
15
+ - image/embedding: /v1/images/embeddings (transformers AutoModel; CLIP, SigLIP, DINOv2)
16
+ - image/generation: /v1/images/generations, /v1/images/edits (inpaint), /v1/images/variations (diffusers)
17
+ - image/segmentation: /v1/images/segment (SAM-2; multipart upload, mode-aware: auto/points/boxes/text; PNG or COCO RLE masks)
18
+ - image/upscale: /v1/images/upscale (StableDiffusionUpscalePipeline; SD x4; multipart upload)
19
+ - text/classification: /v1/moderations (HF text-classification)
20
+ - text/rerank: /v1/rerank (sentence-transformers CrossEncoder; Cohere-compat)
21
+ - text/summarization: /v1/summarize (transformers AutoModelForSeq2SeqLM; Cohere-compat)
22
+ - video/generation: /v1/video/generations (Wan, CogVideoX; narrative clips, mp4/webm/frames_b64; GPU-required)
23
+
24
+ v0.32.0 adds CI smoke-tests of fresh per-model venvs (#124). The
25
+ workflow `.github/workflows/fresh-venv-smoke.yml` matrix-tests five
26
+ lightweight bundled models (kokoro-82m, dinov2-small, bart-large-cnn,
27
+ bge-reranker-v2-m3, mert-v1-95m) on every push to main and every PR;
28
+ each job creates a fresh venv, installs only what `muse pull` would
29
+ install, and verifies the model loads via the in-venv probe worker
30
+ (no inference; that's GPU-bound and out of scope). Catches the
31
+ production failure mode where a bundled script's `pip_extras` misses
32
+ a transitive dep that `from_pretrained` (or sentence-transformers, or
33
+ diffusers) pulls in at load time, complementing the v0.30.0 static
34
+ audit (#110) which can only flag direct-import gaps via AST scan.
35
+ Heavy / GPU-only models deferred until paid runner budget allows.
36
+ Local repro: `python scripts/smoke_fresh_venv.py --model_id <id>`.
37
+
38
+ v0.31.0 consolidates cross-runtime utilities into
39
+ `muse.core.runtime_helpers`: `select_device` (cuda/mps/cpu auto-detect),
40
+ `dtype_for_name` (string-to-torch.dtype map with `fp16`/`bf16`/`fp32`
41
+ aliases), `set_inference_mode` (no-grad switch with the literal
42
+ method-name token kept out of caller bodies), and `LoadTimer` (opt-in
43
+ load-time logging context). Removes ~30 per-runtime copies; an AST-based
44
+ meta-test (`tests/core/test_runtime_helpers_meta.py`) walks every
45
+ runtime and bundled script to flag re-implementations. Behavior-
46
+ preserving; the existing 2150 fast-lane tests pass without modification.
47
+
48
+ v0.30.0 bundles three operational improvements:
49
+ - the supervisor starts the gateway after the FIRST worker is healthy
50
+ (was: ALL workers), so clients can hit the fast workers while slow
51
+ ones still load. Remaining workers promote on a daemon thread.
52
+ - bundled scripts in `muse/models/` got a `pip_extras` audit; missing
53
+ transitive deps (torch, numpy) added to seven manifests; a static
54
+ regression-guard test parametrized over every bundled script
55
+ catches future gaps.
56
+ - new `muse models refresh <id> | --all | --enabled` re-installs
57
+ `muse[server,<extras>]` plus the model's `pip_extras` into per-model
58
+ venvs; use after `pip install -U muse` to propagate new server-side
59
+ deps.
60
+
61
+ v0.29.0 adds `muse mcp`: an MCP (Model Context Protocol) server that
62
+ exposes muse to LLM clients (Claude Desktop, Cursor, etc.) as 29
63
+ structured tools. 11 admin tools wrap `/v1/admin/*` (gated by
64
+ `MUSE_ADMIN_TOKEN`); 18 inference tools wrap the generation routes.
65
+ Stdio mode is the default; HTTP+SSE mode is available for remote /
66
+ web embedders. Filter mode lets ops pin to admin-only or
67
+ inference-only. See CLAUDE.md "Using muse from Claude Desktop".
68
+
69
+ v0.28.0 added an admin REST API under `/v1/admin/*` for runtime model
70
+ control (enable/disable/probe/pull/remove without restarting `muse
71
+ serve`). Closed-by-default behind `MUSE_ADMIN_TOKEN`. See README.md
72
+ "Admin endpoints" and CLAUDE.md "Admin REST API" for the full surface.
73
+
74
+ Heavy backends (transformers, diffusers, faster-whisper, llama-cpp,
75
+ sentence-transformers) are imported lazily inside per-modality runtime
76
+ modules to keep `muse --help` and `muse pull` instant. Each pulled
77
+ model lives in its own venv at `~/.muse/venvs/<model-id>/`.
78
+
79
+ `__version__` is read from pyproject.toml at install time; this
80
+ fallback covers in-tree imports without an installed muse.
81
+ """
82
+ from importlib.metadata import PackageNotFoundError, version
83
+
84
+ try:
85
+ __version__ = version("muse")
86
+ except PackageNotFoundError:
87
+ __version__ = "0.0.0+unknown"
muse/admin/__init__.py ADDED
@@ -0,0 +1,17 @@
1
+ """Admin REST API for runtime model control.
2
+
3
+ Mounted on the gateway under /v1/admin/*; gated by MUSE_ADMIN_TOKEN.
4
+ See docs/superpowers/specs/2026-04-28-admin-api-design.md for the full
5
+ wire contract; this package provides:
6
+
7
+ - auth: bearer-token verification dependency for FastAPI
8
+ - jobs: in-memory async-job tracker with 10-minute retention
9
+ - operations: orchestrates enable / disable / probe / pull / remove
10
+ via the supervisor singleton
11
+ - routes/: per-resource APIRouter modules
12
+ - client: thin Python wrapper for programmatic admin access
13
+
14
+ The admin surface is closed-by-default. Without MUSE_ADMIN_TOKEN set,
15
+ all admin requests return 503 admin_disabled. With the env var set,
16
+ the request must carry Authorization: Bearer <token>.
17
+ """
muse/admin/auth.py ADDED
@@ -0,0 +1,63 @@
1
+ """Bearer-token verification for admin endpoints.
2
+
3
+ The token is read from the MUSE_ADMIN_TOKEN environment variable. With
4
+ no token configured, every admin request is rejected with 503; this is
5
+ the closed-by-default policy. With a token configured, the request must
6
+ carry an Authorization: Bearer <token> header matching the env var.
7
+
8
+ The token is never echoed in error messages or logs.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ import secrets
14
+
15
+ from fastapi import Header, HTTPException
16
+
17
+ ADMIN_TOKEN_ENV = "MUSE_ADMIN_TOKEN"
18
+
19
+
20
+ def _err(status: int, code: str, message: str) -> HTTPException:
21
+ """Build an OpenAI-shape envelope inside an HTTPException.
22
+
23
+ The message text never includes the secret token; only static
24
+ descriptive strings flow through here.
25
+ """
26
+ return HTTPException(
27
+ status_code=status,
28
+ detail={"error": {
29
+ "code": code,
30
+ "message": message,
31
+ "type": "invalid_request_error",
32
+ }},
33
+ )
34
+
35
+
36
+ def verify_admin_token(authorization: str | None = Header(default=None)) -> None:
37
+ """FastAPI dependency: raise unless caller carries the admin bearer.
38
+
39
+ Five paths:
40
+ - env var unset -> 503 admin_disabled
41
+ - header missing -> 401 missing_token
42
+ - header malformed (no "Bearer ") -> 401 missing_token
43
+ - header bearer wrong -> 403 invalid_token
44
+ - header bearer matches -> return None (route runs)
45
+ """
46
+ expected = os.environ.get(ADMIN_TOKEN_ENV)
47
+ if not expected:
48
+ raise _err(
49
+ 503,
50
+ "admin_disabled",
51
+ f"Admin endpoints require the {ADMIN_TOKEN_ENV} env var to be set",
52
+ )
53
+ if not authorization or not authorization.startswith("Bearer "):
54
+ raise _err(
55
+ 401,
56
+ "missing_token",
57
+ "Authorization: Bearer <token> required",
58
+ )
59
+ presented = authorization[len("Bearer "):]
60
+ # Constant-time compare prevents recovering the token byte-by-byte
61
+ # via response-time variance.
62
+ if not secrets.compare_digest(presented, expected):
63
+ raise _err(403, "invalid_token", "Bad admin token")
muse/admin/client.py ADDED
@@ -0,0 +1,195 @@
1
+ """AdminClient: thin Python wrapper over the /v1/admin/* HTTP surface.
2
+
3
+ Use this for programmatic admin against a running `muse serve`. For
4
+ in-process usage (no HTTP), import the operations module directly.
5
+
6
+ Token resolution:
7
+ 1. constructor `token=` arg
8
+ 2. MUSE_ADMIN_TOKEN env var
9
+ 3. None (every call will 503 since the server requires the env var)
10
+
11
+ Server resolution:
12
+ 1. constructor `base_url=` arg
13
+ 2. MUSE_SERVER env var
14
+ 3. http://localhost:8000
15
+
16
+ The `wait` helper polls `/jobs/{id}` until the job lands in done/failed.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ import time
22
+ from typing import Any
23
+
24
+ import httpx
25
+
26
+
27
+ class AdminClientError(Exception):
28
+ """Raised when an admin call returns a non-2xx response.
29
+
30
+ `code` is the OpenAI error envelope's `code` field; `status` is
31
+ the HTTP status. `body` is the raw decoded JSON.
32
+ """
33
+
34
+ def __init__(self, status: int, code: str, message: str, body: Any):
35
+ super().__init__(f"{status} {code}: {message}")
36
+ self.status = status
37
+ self.code = code
38
+ self.message = message
39
+ self.body = body
40
+
41
+
42
+ class AdminClient:
43
+ """HTTP client for /v1/admin/* admin endpoints."""
44
+
45
+ def __init__(
46
+ self,
47
+ base_url: str | None = None,
48
+ token: str | None = None,
49
+ timeout: float = 30.0,
50
+ ):
51
+ self.base_url = (
52
+ base_url
53
+ or os.environ.get("MUSE_SERVER")
54
+ or "http://localhost:8000"
55
+ ).rstrip("/")
56
+ self.token = token or os.environ.get("MUSE_ADMIN_TOKEN")
57
+ self._timeout = timeout
58
+
59
+ def _headers(self) -> dict:
60
+ if self.token is None:
61
+ return {}
62
+ return {"Authorization": f"Bearer {self.token}"}
63
+
64
+ def _request(self, method: str, path: str, **kwargs: Any) -> Any:
65
+ url = f"{self.base_url}{path}"
66
+ # Per-call `timeout` overrides the constructor's default. None
67
+ # means "use the constructor's value." This is the escape hatch
68
+ # for slow operations like warmup, whose cold-load duration
69
+ # (10-60s) routinely exceeds the constructor default of 30s.
70
+ timeout = kwargs.pop("timeout", None)
71
+ if timeout is None:
72
+ timeout = self._timeout
73
+ with httpx.Client(timeout=timeout) as client:
74
+ r = client.request(
75
+ method,
76
+ url,
77
+ headers={**self._headers(), **kwargs.pop("headers", {})},
78
+ **kwargs,
79
+ )
80
+ if r.status_code >= 400:
81
+ try:
82
+ body = r.json()
83
+ except Exception: # noqa: BLE001
84
+ body = {"raw": r.text}
85
+ err = body.get("error") or body.get("detail", {}).get("error") or {}
86
+ code = err.get("code", "http_error")
87
+ message = err.get("message", r.text)
88
+ raise AdminClientError(r.status_code, code, message, body)
89
+ try:
90
+ return r.json()
91
+ except Exception: # noqa: BLE001
92
+ return {"raw": r.text}
93
+
94
+ # Per-model operations
95
+
96
+ def enable(self, model_id: str) -> dict:
97
+ return self._request("POST", f"/v1/admin/models/{model_id}/enable", json={})
98
+
99
+ def disable(self, model_id: str) -> dict:
100
+ return self._request("POST", f"/v1/admin/models/{model_id}/disable", json={})
101
+
102
+ def warmup(self, model_id: str, *, timeout: float | None = None) -> dict:
103
+ """Pre-load a model via the supervisor's LoadDirector.
104
+
105
+ Synchronous on the wire: returns once the director's warmup
106
+ completes (cold load duration: 10-60s for real models, longer
107
+ for video / large diffusion models). Returns {"model_id",
108
+ "worker_port"} on success; raises AdminClientError on 4xx/5xx.
109
+
110
+ `timeout` overrides the constructor's default for this call
111
+ only. The default constructor timeout (30s) is too short for
112
+ most cold loads; callers driving warmup should pass a more
113
+ generous value (e.g. 300s) or set timeout via the constructor.
114
+ None means "use the constructor's value."
115
+ """
116
+ return self._request(
117
+ "POST",
118
+ f"/v1/admin/models/{model_id}/warmup",
119
+ json={},
120
+ timeout=timeout,
121
+ )
122
+
123
+ def probe(
124
+ self,
125
+ model_id: str,
126
+ *,
127
+ no_inference: bool = False,
128
+ device: str | None = None,
129
+ ) -> dict:
130
+ body = {"no_inference": no_inference}
131
+ if device is not None:
132
+ body["device"] = device
133
+ return self._request("POST", f"/v1/admin/models/{model_id}/probe", json=body)
134
+
135
+ def pull(self, identifier: str) -> dict:
136
+ # Use the documented `_` placeholder path; identifier in body
137
+ # avoids URL-encoding hf://... slashes.
138
+ return self._request(
139
+ "POST",
140
+ "/v1/admin/models/_/pull",
141
+ json={"identifier": identifier},
142
+ )
143
+
144
+ def remove(self, model_id: str, *, purge: bool = False) -> dict:
145
+ return self._request(
146
+ "DELETE",
147
+ f"/v1/admin/models/{model_id}",
148
+ params={"purge": "true" if purge else "false"},
149
+ )
150
+
151
+ def status(self, model_id: str) -> dict:
152
+ return self._request("GET", f"/v1/admin/models/{model_id}/status")
153
+
154
+ # Cluster-wide views
155
+
156
+ def memory(self) -> dict:
157
+ return self._request("GET", "/v1/admin/memory")
158
+
159
+ def workers(self) -> dict:
160
+ return self._request("GET", "/v1/admin/workers")
161
+
162
+ def restart_worker(self, port: int) -> dict:
163
+ return self._request("POST", f"/v1/admin/workers/{port}/restart")
164
+
165
+ # Job tracking
166
+
167
+ def job(self, job_id: str) -> dict:
168
+ return self._request("GET", f"/v1/admin/jobs/{job_id}")
169
+
170
+ def jobs(self) -> dict:
171
+ return self._request("GET", "/v1/admin/jobs")
172
+
173
+ def wait(
174
+ self,
175
+ job_id: str,
176
+ *,
177
+ timeout: float = 300.0,
178
+ poll: float = 1.0,
179
+ ) -> dict:
180
+ """Block until job is done or failed; return the final job record.
181
+
182
+ Raises TimeoutError if the job never reaches a terminal state
183
+ within `timeout` seconds.
184
+ """
185
+ deadline = time.monotonic() + timeout
186
+ while True:
187
+ job = self.job(job_id)
188
+ if job.get("state") in ("done", "failed"):
189
+ return job
190
+ if time.monotonic() >= deadline:
191
+ raise TimeoutError(
192
+ f"job {job_id} did not finish within {timeout}s "
193
+ f"(last state: {job.get('state')})"
194
+ )
195
+ time.sleep(poll)
muse/admin/jobs.py ADDED
@@ -0,0 +1,165 @@
1
+ """In-memory async-job tracker for admin operations.
2
+
3
+ Each enable / pull / probe call returns a Job; the caller polls
4
+ GET /v1/admin/jobs/{id} to observe progression. Jobs persist for ten
5
+ minutes after `finished_at`; older jobs are reaped on every list call
6
+ (lazy reap) to keep memory bounded without a dedicated reaper thread.
7
+
8
+ The job_id is a uuid4 hex string. Jobs go through:
9
+ pending -> running -> (done | failed)
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ import threading
15
+ import time
16
+ import uuid
17
+ from collections import deque
18
+ from dataclasses import dataclass, field
19
+ from datetime import datetime, timezone
20
+ from typing import Any
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ _RETENTION_SECONDS = 600.0 # ten minutes
25
+ _MAX_JOBS = 100
26
+
27
+
28
+ @dataclass
29
+ class Job:
30
+ """One async admin operation.
31
+
32
+ `thread` is the daemon worker that runs the operation; tracked so
33
+ the gateway can join it on shutdown. Not serialized into to_dict.
34
+ `finished_at_monotonic` is for lazy expiry; not serialized either.
35
+ """
36
+ job_id: str
37
+ op: str
38
+ model_id: str
39
+ state: str = "pending"
40
+ started_at: str = ""
41
+ finished_at: str | None = None
42
+ result: dict | None = None
43
+ error: str | None = None
44
+ log_lines: list[str] = field(default_factory=list)
45
+ thread: Any = field(default=None, repr=False)
46
+ finished_at_monotonic: float | None = field(default=None, repr=False)
47
+
48
+ def to_dict(self) -> dict:
49
+ return {
50
+ "job_id": self.job_id,
51
+ "op": self.op,
52
+ "model_id": self.model_id,
53
+ "state": self.state,
54
+ "started_at": self.started_at,
55
+ "finished_at": self.finished_at,
56
+ "result": self.result,
57
+ "error": self.error,
58
+ "log_lines": list(self.log_lines),
59
+ }
60
+
61
+
62
+ class JobStore:
63
+ """Thread-safe in-memory job map with lazy expiry.
64
+
65
+ `retention_seconds` controls how long a finished job stays
66
+ addressable via `get`/`list_recent`. The default is 10 minutes,
67
+ matching the spec.
68
+
69
+ `max_jobs` caps the live deque so we never grow unboundedly even
70
+ when nothing finishes (e.g. all pending). The deque drops the
71
+ oldest job_id when full; the dict entry stays addressable until
72
+ expiry, but `list_recent` only returns entries that are also in
73
+ the deque.
74
+ """
75
+
76
+ def __init__(self, retention_seconds: float = _RETENTION_SECONDS, max_jobs: int = _MAX_JOBS):
77
+ self._jobs: dict[str, Job] = {}
78
+ self._order: deque[str] = deque(maxlen=max_jobs)
79
+ self._lock = threading.Lock()
80
+ self._retention = retention_seconds
81
+
82
+ def create(self, op: str, model_id: str) -> Job:
83
+ job = Job(
84
+ job_id=uuid.uuid4().hex,
85
+ op=op,
86
+ model_id=model_id,
87
+ state="pending",
88
+ started_at=_now_iso(),
89
+ )
90
+ with self._lock:
91
+ self._reap_expired()
92
+ self._jobs[job.job_id] = job
93
+ self._order.append(job.job_id)
94
+ logger.info("job %s created (op=%s, model=%s)", job.job_id, op, model_id)
95
+ return job
96
+
97
+ def update(self, job_id: str, **fields: Any) -> Job | None:
98
+ with self._lock:
99
+ job = self._jobs.get(job_id)
100
+ if job is None:
101
+ return None
102
+ for k, v in fields.items():
103
+ setattr(job, k, v)
104
+ if job.state in ("done", "failed") and job.finished_at_monotonic is None:
105
+ job.finished_at = _now_iso()
106
+ job.finished_at_monotonic = time.monotonic()
107
+ return job
108
+
109
+ def get(self, job_id: str) -> Job | None:
110
+ with self._lock:
111
+ self._reap_expired()
112
+ return self._jobs.get(job_id)
113
+
114
+ def list_recent(self) -> list[Job]:
115
+ """Return jobs newest-first, capped at the deque's maxlen."""
116
+ with self._lock:
117
+ self._reap_expired()
118
+ return [self._jobs[jid] for jid in reversed(self._order) if jid in self._jobs]
119
+
120
+ def shutdown(self, timeout: float = 5.0) -> None:
121
+ """Join live worker threads; called on gateway shutdown."""
122
+ with self._lock:
123
+ threads = [j.thread for j in self._jobs.values() if j.thread is not None]
124
+ for t in threads:
125
+ try:
126
+ t.join(timeout=timeout)
127
+ except Exception as e: # noqa: BLE001
128
+ logger.warning("error joining job thread: %s", e)
129
+
130
+ def _reap_expired(self) -> None:
131
+ """Drop jobs whose finished_at_monotonic is older than retention.
132
+
133
+ Caller must hold `self._lock`.
134
+ """
135
+ if self._retention <= 0:
136
+ return
137
+ cutoff = time.monotonic() - self._retention
138
+ expired = [
139
+ jid for jid, j in self._jobs.items()
140
+ if j.finished_at_monotonic is not None and j.finished_at_monotonic < cutoff
141
+ ]
142
+ for jid in expired:
143
+ self._jobs.pop(jid, None)
144
+
145
+
146
+ def _now_iso() -> str:
147
+ return datetime.now(timezone.utc).isoformat()
148
+
149
+
150
+ # Module-level default. Tests can build their own JobStore without
151
+ # touching this; production code reaches it through get_default_store.
152
+ _default_store: JobStore | None = None
153
+
154
+
155
+ def get_default_store() -> JobStore:
156
+ global _default_store
157
+ if _default_store is None:
158
+ _default_store = JobStore()
159
+ return _default_store
160
+
161
+
162
+ def reset_default_store() -> None:
163
+ """Test hook: drop the singleton so next get_default_store rebuilds it."""
164
+ global _default_store
165
+ _default_store = None