hugpy 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. hugpy/__init__.py +3 -0
  2. hugpy/cli.py +93 -0
  3. hugpy/flask_app/__init__.py +2 -0
  4. hugpy/flask_app/app/__init__.py +7 -0
  5. hugpy/flask_app/app/functions/__init__.py +3 -0
  6. hugpy/flask_app/app/functions/chat/__init__.py +1 -0
  7. hugpy/flask_app/app/functions/chat/imports.py +1 -0
  8. hugpy/flask_app/app/functions/chat/streaming.py +168 -0
  9. hugpy/flask_app/app/functions/downloads/__init__.py +4 -0
  10. hugpy/flask_app/app/functions/downloads/cancelable_downloads.py +269 -0
  11. hugpy/flask_app/app/functions/downloads/downloader.py +29 -0
  12. hugpy/flask_app/app/functions/downloads/downloads.py +34 -0
  13. hugpy/flask_app/app/functions/downloads/imports.py +1 -0
  14. hugpy/flask_app/app/functions/imports/__init__.py +4 -0
  15. hugpy/flask_app/app/functions/imports/init_imports.py +6 -0
  16. hugpy/flask_app/app/functions/imports/options/__init__.py +2 -0
  17. hugpy/flask_app/app/functions/imports/options/imports.py +2 -0
  18. hugpy/flask_app/app/functions/imports/options/install.py +67 -0
  19. hugpy/flask_app/app/functions/imports/options/search.py +35 -0
  20. hugpy/flask_app/app/functions/imports/utils/__init__.py +6 -0
  21. hugpy/flask_app/app/functions/imports/utils/api_keys.py +134 -0
  22. hugpy/flask_app/app/functions/imports/utils/constants.py +4 -0
  23. hugpy/flask_app/app/functions/imports/utils/imports.py +1 -0
  24. hugpy/flask_app/app/functions/imports/utils/manifest.py +53 -0
  25. hugpy/flask_app/app/functions/imports/utils/peers.py +124 -0
  26. hugpy/flask_app/app/functions/imports/utils/schemas/__init__.py +8 -0
  27. hugpy/flask_app/app/functions/imports/utils/schemas/chat_schemas.py +29 -0
  28. hugpy/flask_app/app/functions/imports/utils/schemas/config_schemas.py +23 -0
  29. hugpy/flask_app/app/functions/imports/utils/schemas/download_schemas.py +28 -0
  30. hugpy/flask_app/app/functions/imports/utils/schemas/imports.py +3 -0
  31. hugpy/flask_app/app/functions/imports/utils/schemas/install_schemas.py +17 -0
  32. hugpy/flask_app/app/functions/imports/utils/schemas/job_schemas.py +75 -0
  33. hugpy/flask_app/app/functions/imports/utils/schemas/model_schemas.py +37 -0
  34. hugpy/flask_app/app/functions/imports/utils/schemas/request_schemas.py +13 -0
  35. hugpy/flask_app/app/functions/imports/utils/schemas/specs_schemas.py +9 -0
  36. hugpy/flask_app/app/functions/imports/utils/workers.py +619 -0
  37. hugpy/flask_app/app/routes/__init__.py +6 -0
  38. hugpy/flask_app/app/routes/chat_routes.py +10 -0
  39. hugpy/flask_app/app/routes/imports.py +1 -0
  40. hugpy/flask_app/app/routes/llm_storage_routes.py +125 -0
  41. hugpy/flask_app/app/routes/search_routes.py +130 -0
  42. hugpy/flask_app/app/routes/upload_routes.py +15 -0
  43. hugpy/flask_app/app/routes/v1_routes.py +256 -0
  44. hugpy/flask_app/app/routes/worker_routes.py +820 -0
  45. hugpy/flask_app/wsgi_app.py +94 -0
  46. hugpy/imports/__init__.py +3 -0
  47. hugpy/imports/apis/__init__.py +4 -0
  48. hugpy/imports/apis/call_api.py +124 -0
  49. hugpy/imports/apis/download_models.py +252 -0
  50. hugpy/imports/apis/get_module.py +315 -0
  51. hugpy/imports/apis/huggingface_api.py +222 -0
  52. hugpy/imports/apis/imports.py +2 -0
  53. hugpy/imports/apis/serve/__init__.py +2 -0
  54. hugpy/imports/apis/serve/serve.py +510 -0
  55. hugpy/imports/apis/serve/serve_cli.py +85 -0
  56. hugpy/imports/apis/systemd_units.py +325 -0
  57. hugpy/imports/config/__init__.py +3 -0
  58. hugpy/imports/config/imports.py +1 -0
  59. hugpy/imports/config/main.py +183 -0
  60. hugpy/imports/config/models/__init__.py +2 -0
  61. hugpy/imports/config/models/imports.py +1 -0
  62. hugpy/imports/config/models/models_config.py +366 -0
  63. hugpy/imports/config/models/models_default.py +84 -0
  64. hugpy/imports/config/models/models_dict.py +73 -0
  65. hugpy/imports/src/__init__.py +7 -0
  66. hugpy/imports/src/_compat.py +184 -0
  67. hugpy/imports/src/chunking.py +92 -0
  68. hugpy/imports/src/constants/__init__.py +4 -0
  69. hugpy/imports/src/constants/categories.py +36 -0
  70. hugpy/imports/src/constants/constants.py +150 -0
  71. hugpy/imports/src/constants/hugpy_marker.py +112 -0
  72. hugpy/imports/src/constants/imports.py +1 -0
  73. hugpy/imports/src/constants/paths.py +131 -0
  74. hugpy/imports/src/except_utils.py +62 -0
  75. hugpy/imports/src/init_imports.py +45 -0
  76. hugpy/imports/src/module_imports.py +273 -0
  77. hugpy/imports/src/schemas/__init__.py +10 -0
  78. hugpy/imports/src/schemas/chat_schemas.py +61 -0
  79. hugpy/imports/src/schemas/embeded_schemas.py +56 -0
  80. hugpy/imports/src/schemas/event_schemas.py +31 -0
  81. hugpy/imports/src/schemas/imports.py +65 -0
  82. hugpy/imports/src/schemas/metadata_schemas.py +47 -0
  83. hugpy/imports/src/schemas/model_schemas.py +97 -0
  84. hugpy/imports/src/schemas/runner_schemas.py +33 -0
  85. hugpy/imports/src/schemas/summarizer_schemas.py +172 -0
  86. hugpy/imports/src/schemas/task_schemas.py +41 -0
  87. hugpy/imports/src/schemas/video_schemas.py +38 -0
  88. hugpy/imports/src/schemas/whisper_schemas.py +162 -0
  89. hugpy/imports/src/utils.py +324 -0
  90. hugpy/managers/__init__.py +11 -0
  91. hugpy/managers/chat_context/__init__.py +3 -0
  92. hugpy/managers/chat_context/chat_context.py +51 -0
  93. hugpy/managers/chat_context/context_budget.py +223 -0
  94. hugpy/managers/chat_context/imports.py +1 -0
  95. hugpy/managers/chat_context/unbounded.py +55 -0
  96. hugpy/managers/dispatch/__init__.py +2 -0
  97. hugpy/managers/dispatch/acquire.py +46 -0
  98. hugpy/managers/dispatch/dispatch.py +396 -0
  99. hugpy/managers/dispatch/imports.py +1 -0
  100. hugpy/managers/embed/__init__.py +1 -0
  101. hugpy/managers/embed/embed_runner.py +153 -0
  102. hugpy/managers/embed/imports.py +1 -0
  103. hugpy/managers/falconsai/__init__.py +1 -0
  104. hugpy/managers/falconsai/falconsai_module.py +307 -0
  105. hugpy/managers/falconsai/imports.py +1 -0
  106. hugpy/managers/generate/__init__.py +3 -0
  107. hugpy/managers/generate/coder.py +544 -0
  108. hugpy/managers/generate/coder_guff.py +81 -0
  109. hugpy/managers/generate/config.py +121 -0
  110. hugpy/managers/generate/generate_runner.py +186 -0
  111. hugpy/managers/generate/generate_runner2.py +131 -0
  112. hugpy/managers/generate/imports.py +1 -0
  113. hugpy/managers/imports.py +1 -0
  114. hugpy/managers/keywords/__init__.py +3 -0
  115. hugpy/managers/keywords/imports.py +1 -0
  116. hugpy/managers/keywords/keybert_model.py +591 -0
  117. hugpy/managers/keywords/keywords_runner.py +18 -0
  118. hugpy/managers/llama/__init__.py +2 -0
  119. hugpy/managers/llama/imports.py +1 -0
  120. hugpy/managers/llama/runners/__init__.py +2 -0
  121. hugpy/managers/llama/runners/chat_runner.py +111 -0
  122. hugpy/managers/llama/runners/get.py +59 -0
  123. hugpy/managers/llama/runners/imports.py +1 -0
  124. hugpy/managers/llama/runners/src/__init__.py +4 -0
  125. hugpy/managers/llama/runners/src/base_runner.py +214 -0
  126. hugpy/managers/llama/runners/src/ccp_runner.py +157 -0
  127. hugpy/managers/llama/runners/src/imports/__init__.py +4 -0
  128. hugpy/managers/llama/runners/src/imports/config.py +14 -0
  129. hugpy/managers/llama/runners/src/imports/constants.py +36 -0
  130. hugpy/managers/llama/runners/src/imports/init_imports.py +37 -0
  131. hugpy/managers/llama/runners/src/imports/utils.py +60 -0
  132. hugpy/managers/llama/runners/src/python_runner.py +238 -0
  133. hugpy/managers/llama/runners/src/shard_server.py +154 -0
  134. hugpy/managers/llama/serve.py +10 -0
  135. hugpy/managers/llama/testsisiis.py +2 -0
  136. hugpy/managers/resolvers/__init__.py +3 -0
  137. hugpy/managers/resolvers/allocator.py +122 -0
  138. hugpy/managers/resolvers/assure_model_key.py +71 -0
  139. hugpy/managers/resolvers/categories/__init__.py +2 -0
  140. hugpy/managers/resolvers/categories/builders.py +191 -0
  141. hugpy/managers/resolvers/categories/frameworks.py +15 -0
  142. hugpy/managers/resolvers/categories/imports.py +11 -0
  143. hugpy/managers/resolvers/imports.py +1 -0
  144. hugpy/managers/resolvers/model_dict_resolver.py +107 -0
  145. hugpy/managers/resolvers/model_resolver.py +333 -0
  146. hugpy/managers/resolvers/remote.py +355 -0
  147. hugpy/managers/serve/__init__.py +2 -0
  148. hugpy/managers/serve/imports.py +1 -0
  149. hugpy/managers/serve/overrides.py +102 -0
  150. hugpy/managers/serve/serve.py +586 -0
  151. hugpy/managers/serve/serve_cli.py +85 -0
  152. hugpy/managers/serve/slot_agent.py +257 -0
  153. hugpy/managers/serve/slots.py +172 -0
  154. hugpy/managers/spill.py +332 -0
  155. hugpy/managers/summarizers/__init__.py +4 -0
  156. hugpy/managers/summarizers/generation.py +23 -0
  157. hugpy/managers/summarizers/imports.py +1 -0
  158. hugpy/managers/summarizers/media.py +47 -0
  159. hugpy/managers/summarizers/summarize_runner.py +96 -0
  160. hugpy/managers/summarizers/summarizers.py +436 -0
  161. hugpy/managers/video/__init__.py +1 -0
  162. hugpy/managers/video/imports.py +1 -0
  163. hugpy/managers/video/video_analyzer.py +139 -0
  164. hugpy/managers/vision/__init__.py +3 -0
  165. hugpy/managers/vision/imports.py +1 -0
  166. hugpy/managers/vision/schemas.py +99 -0
  167. hugpy/managers/vision/utils.py +26 -0
  168. hugpy/managers/vision/vision_backends.py +100 -0
  169. hugpy/managers/vision/vision_coder.py +345 -0
  170. hugpy/managers/vision/vision_runner.py +27 -0
  171. hugpy/managers/whisper_model/__init__.py +5 -0
  172. hugpy/managers/whisper_model/constants.py +3 -0
  173. hugpy/managers/whisper_model/imports.py +1 -0
  174. hugpy/managers/whisper_model/src/__init__.py +7 -0
  175. hugpy/managers/whisper_model/src/imports.py +3 -0
  176. hugpy/managers/whisper_model/src/model/__init__.py +11 -0
  177. hugpy/managers/whisper_model/src/model/execute.py +190 -0
  178. hugpy/managers/whisper_model/src/model/imports.py +1 -0
  179. hugpy/managers/whisper_model/src/model/model.py +36 -0
  180. hugpy/managers/whisper_model/src/model/utils/__init__.py +2 -0
  181. hugpy/managers/whisper_model/src/model/utils/audio.py +74 -0
  182. hugpy/managers/whisper_model/src/model/utils/files/__init__.py +3 -0
  183. hugpy/managers/whisper_model/src/model/utils/files/artifacts/__init__.py +1 -0
  184. hugpy/managers/whisper_model/src/model/utils/files/artifacts/imports.py +1 -0
  185. hugpy/managers/whisper_model/src/model/utils/files/artifacts/workspace.py +49 -0
  186. hugpy/managers/whisper_model/src/model/utils/files/frames/__init__.py +3 -0
  187. hugpy/managers/whisper_model/src/model/utils/files/frames/extract.py +85 -0
  188. hugpy/managers/whisper_model/src/model/utils/files/frames/imports.py +1 -0
  189. hugpy/managers/whisper_model/src/model/utils/files/frames/utils.py +42 -0
  190. hugpy/managers/whisper_model/src/model/utils/files/imports.py +1 -0
  191. hugpy/managers/whisper_model/src/model/utils/files/save.py +15 -0
  192. hugpy/managers/whisper_model/src/model/utils/imports.py +1 -0
  193. hugpy/managers/whisper_model/src/runner.py +102 -0
  194. hugpy/managers/whisper_model/src/stream.py +159 -0
  195. hugpy/model_sync.py +162 -0
  196. hugpy/phone_brick/__init__.py +41 -0
  197. hugpy/phone_brick/__main__.py +84 -0
  198. hugpy/phone_brick/client.py +69 -0
  199. hugpy/phone_brick/consensus.py +35 -0
  200. hugpy/phone_brick/detector.py +227 -0
  201. hugpy/phone_brick/orchestrator.py +114 -0
  202. hugpy/phone_brick/protocol.py +96 -0
  203. hugpy/phone_brick/rendering.py +43 -0
  204. hugpy/phone_brick/schemas.py +130 -0
  205. hugpy/phone_brick/worker.py +233 -0
  206. hugpy/utils/__init__.py +2 -0
  207. hugpy/utils/imports.py +2 -0
  208. hugpy/utils/pdfs/__init__.py +1 -0
  209. hugpy/utils/pdfs/utils.py +166 -0
  210. hugpy/utils/seo/__init__.py +1 -0
  211. hugpy/utils/seo/imports.py +1 -0
  212. hugpy/utils/seo/pdf_utils.py +231 -0
  213. hugpy/utils/text/__init__.py +1 -0
  214. hugpy/utils/text/combined.py +255 -0
  215. hugpy/utils/text/imports.py +2 -0
  216. hugpy/worker_agent/__init__.py +7 -0
  217. hugpy/worker_agent/__main__.py +4 -0
  218. hugpy/worker_agent/agent.py +1147 -0
  219. hugpy/worker_agent/imports.py +2 -0
  220. hugpy/worker_agent/provision.py +741 -0
  221. hugpy-0.1.0.dist-info/METADATA +80 -0
  222. hugpy-0.1.0.dist-info/RECORD +226 -0
  223. hugpy-0.1.0.dist-info/WHEEL +5 -0
  224. hugpy-0.1.0.dist-info/entry_points.txt +2 -0
  225. hugpy-0.1.0.dist-info/licenses/LICENSE +30 -0
  226. hugpy-0.1.0.dist-info/top_level.txt +1 -0
hugpy/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .imports import *
2
+ from .managers import *
3
+ from .utils import *
hugpy/cli.py ADDED
@@ -0,0 +1,93 @@
1
+ """hugpy command line.
2
+
3
+ hugpy serve [--host 0.0.0.0] [--port 7002] [--auth open|external] ...
4
+ hugpy worker --central https://your-hugpy/ [worker_agent args...]
5
+
6
+ `serve` runs the whole product from one process: the API, the built web
7
+ console (when a ui/dist exists — see flask_app._ui_dist_dir), model downloads,
8
+ chat, and the OpenAI-compatible /v1 surface. No nginx, no node.
9
+
10
+ `worker` joins this machine to a hugpy central as a GPU worker (or, with
11
+ --role rpc, lends its GPU to the cross-machine shard pool). All flags after
12
+ the subcommand go straight to the worker agent's own parser.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ import os
18
+ import sys
19
+
20
+
21
+ def _serve(args: argparse.Namespace) -> int:
22
+ # Distribution default: single-operator instance, no login wall. The
23
+ # /v1 API-key system still gates programmatic access. Deployments that
24
+ # front a real auth service set --auth external (or HUGPY_AUTH_MODE).
25
+ if args.auth:
26
+ os.environ["HUGPY_AUTH_MODE"] = args.auth
27
+ else:
28
+ os.environ.setdefault("HUGPY_AUTH_MODE", "open")
29
+
30
+ from hugpy.flask_app import get_hugpy_flask
31
+
32
+ origins = [o.strip() for o in (args.origins or "").split(",") if o.strip()] or None
33
+ flask_app = get_hugpy_flask(name="hugpy", allowed_origins=origins, debug=args.debug)
34
+
35
+ bind = f"{args.host}:{args.port}"
36
+ try:
37
+ from gunicorn.app.base import BaseApplication
38
+ except ImportError:
39
+ print(f"hugpy: gunicorn not installed; using the Flask dev server on {bind}",
40
+ file=sys.stderr)
41
+ flask_app.run(host=args.host, port=args.port, debug=args.debug)
42
+ return 0
43
+
44
+ class _App(BaseApplication):
45
+ def load_config(self):
46
+ self.cfg.set("bind", bind)
47
+ self.cfg.set("workers", 1) # singleton registries/job store
48
+ self.cfg.set("threads", args.threads)
49
+ self.cfg.set("timeout", 300)
50
+
51
+ def load(self):
52
+ return flask_app
53
+
54
+ print(f"hugpy serving on http://{bind} (console at /, API at /api/v1)")
55
+ _App().run()
56
+ return 0
57
+
58
+
59
+ def _worker(_args: argparse.Namespace, passthrough: list[str]) -> int:
60
+ from hugpy.worker_agent.agent import main as worker_main
61
+ return worker_main(passthrough)
62
+
63
+
64
+ def main(argv: list[str] | None = None) -> int:
65
+ argv = list(sys.argv[1:] if argv is None else argv)
66
+ parser = argparse.ArgumentParser(prog="hugpy", description=__doc__,
67
+ formatter_class=argparse.RawDescriptionHelpFormatter)
68
+ sub = parser.add_subparsers(dest="cmd", required=True)
69
+
70
+ s = sub.add_parser("serve", help="run the hugpy console + API in one process")
71
+ s.add_argument("--host", default="0.0.0.0")
72
+ s.add_argument("--port", type=int, default=7002)
73
+ s.add_argument("--threads", type=int, default=8)
74
+ s.add_argument("--auth", choices=("open", "external"),
75
+ help="auth mode (default: open, or HUGPY_AUTH_MODE)")
76
+ s.add_argument("--origins", help="comma-separated CORS origins (default: same-origin only)")
77
+ s.add_argument("--debug", action="store_true")
78
+
79
+ w = sub.add_parser("worker", help="join a hugpy central as a worker",
80
+ add_help=False) # the agent owns its own --help
81
+
82
+ # Split: everything after `worker` belongs to the agent's parser.
83
+ if argv and argv[0] == "worker":
84
+ return _worker(w, argv[1:])
85
+ args = parser.parse_args(argv)
86
+ if args.cmd == "serve":
87
+ return _serve(args)
88
+ parser.error("unknown command")
89
+ return 2
90
+
91
+
92
+ if __name__ == "__main__":
93
+ raise SystemExit(main())
@@ -0,0 +1,2 @@
1
+ from .app import *
2
+ from .wsgi_app import *
@@ -0,0 +1,7 @@
1
+ from .functions import *
2
+ from .routes.llm_storage_routes import llm_bp
3
+ from .routes.chat_routes import chat_bp
4
+ from .routes.search_routes import search_bp
5
+ from .routes.upload_routes import upload_bp
6
+ from .routes.worker_routes import worker_bp
7
+
@@ -0,0 +1,3 @@
1
+ from .imports import *
2
+ from .downloads import *
3
+ from .chat import *
@@ -0,0 +1 @@
1
+ from .streaming import *
@@ -0,0 +1 @@
1
+ from ..imports import *
@@ -0,0 +1,168 @@
1
+ from .imports import *
2
+
3
+ from flask import Response, stream_with_context
4
+ from pydantic import BaseModel
5
+ from typing import Optional, List
6
+
7
+
8
+ def sse_event(payload: dict) -> bytes:
9
+ return f"data: {json.dumps(payload, ensure_ascii=False)}\n\n".encode("utf-8")
10
+
11
+
12
+ def event_to_sse(ev) -> bytes:
13
+ """Serialize a dispatch StreamEvent to the browser's SSE wire shape.
14
+
15
+ token/done/error get their minimal browser payloads; everything else
16
+ (status / provisioning progress / continuation markers — including events
17
+ relayed from a GPU worker) rides through verbatim via model_dump().
18
+ """
19
+ t = getattr(ev, "type", None)
20
+ if t == "token":
21
+ return sse_event({"type": "token", "text": ev.text})
22
+ if t == "done":
23
+ return sse_event({"type": "done", "finish_reason": ev.finish_reason})
24
+ if t == "error":
25
+ return sse_event({"type": "error", "message": ev.message})
26
+ return sse_event(ev.model_dump())
27
+
28
+
29
+ def chat_iter_sync(agen):
30
+ """Drive an async generator from Flask's synchronous WSGI context."""
31
+ loop = asyncio.new_event_loop()
32
+
33
+ try:
34
+ asyncio.set_event_loop(loop)
35
+
36
+ while True:
37
+ try:
38
+ item = loop.run_until_complete(agen.__anext__())
39
+
40
+ if isinstance(item, str):
41
+ item = item.encode("utf-8")
42
+
43
+ yield item
44
+
45
+ except StopAsyncIteration:
46
+ break
47
+
48
+ finally:
49
+ try:
50
+ loop.run_until_complete(loop.shutdown_asyncgens())
51
+ except Exception:
52
+ pass
53
+
54
+ asyncio.set_event_loop(None)
55
+ loop.close()
56
+
57
+
58
+ def _resolve_max_new_tokens(body: ChatBody) -> int:
59
+ """Default to the model's full context when the client didn't cap it.
60
+
61
+ A tool, not a service — so when max_new_tokens is omitted we give the model
62
+ as much room as it has. The engine auto-continues past this per-call cap, so
63
+ this is the per-pass budget, not a hard ceiling on total output.
64
+ """
65
+ if body.max_new_tokens:
66
+ return body.max_new_tokens
67
+ try:
68
+ from .imports import get_model_config
69
+ cfg = get_model_config(body.model_key) if body.model_key else None
70
+ ctx = getattr(cfg, "model_max_length", None)
71
+ if ctx and int(ctx) > 0:
72
+ return int(ctx)
73
+ except Exception:
74
+ pass
75
+ # Fall back to the global default cap.
76
+ try:
77
+ from .imports import DEFAULT_MAX_TOKENS
78
+ return int(DEFAULT_MAX_TOKENS)
79
+ except Exception:
80
+ return 4096
81
+
82
+
83
+ async def stream_events(body: ChatBody):
84
+ """Build prompt_kwargs and stream the unified chat engine to SSE.
85
+
86
+ The route is deliberately dumb: it does NOT decide local vs worker. It hands
87
+ prompt_kwargs to execute_chat_stream, which drives resolve() — and resolve()
88
+ is the single place that picks in-process / placement-peer / live-GPU-worker
89
+ and falls back to local. So local and worker chat now stream identically
90
+ (token-by-token, with auto-continuation past the cap), and there is no
91
+ separate worker-offload path in this route anymore.
92
+ """
93
+ from .imports import execute_chat_stream
94
+
95
+ prompt_kwargs = {}
96
+ if body.max_new_tokens:
97
+ # Explicit cap from the client -> honor it (bounded, per-call).
98
+ prompt_kwargs["max_new_tokens"] = body.max_new_tokens
99
+ else:
100
+ # No cap requested -> run unbounded: the runner generates chunk-by-chunk
101
+ # until the model naturally stops, so the response is never truncated by
102
+ # a token limit. (Per-chunk size uses the model's context.)
103
+ prompt_kwargs["unbounded"] = True
104
+ prompt_kwargs["max_new_tokens"] = _resolve_max_new_tokens(body)
105
+
106
+ if body.model_key:
107
+ prompt_kwargs["model_key"] = body.model_key
108
+
109
+ if body.temperature is not None:
110
+ prompt_kwargs["temperature"] = body.temperature
111
+
112
+ if body.do_sample is not None:
113
+ prompt_kwargs["do_sample"] = body.do_sample
114
+
115
+ if body.messages:
116
+ prompt_kwargs["messages"] = messages_to_dicts(body.messages)
117
+ else:
118
+ prompt_kwargs["prompt"] = body.prompt
119
+
120
+ if body.file:
121
+ prompt_kwargs["file"] = body.file
122
+ if body.images:
123
+ prompt_kwargs["images"] = body.images
124
+ if body.request_id:
125
+ # Stable id the engine threads through every continuation pass; also lets
126
+ # the browser correlate the stream.
127
+ prompt_kwargs["request_id"] = body.request_id
128
+
129
+ # Text-only chat to a multi-task (e.g. vision) model: route to its
130
+ # text-generation task instead of the default image-text-to-text, so a
131
+ # plain prompt uses the text runner. The vision runner requires an image
132
+ # and would otherwise fail validation. Only do this when no image is given
133
+ # and the model actually lists text-generation.
134
+ if not body.images and not body.file and body.model_key:
135
+ try:
136
+ from .imports import get_model_config
137
+ cfg = get_model_config(body.model_key)
138
+ tasks = getattr(cfg, "tasks", None) or []
139
+ primary = getattr(cfg, "primary_task", None)
140
+ if primary != "text-generation" and "text-generation" in tasks:
141
+ prompt_kwargs["task"] = "text-generation"
142
+ except Exception:
143
+ pass
144
+
145
+ logger.info("prompt_kwargs == %s", prompt_kwargs)
146
+
147
+ try:
148
+ async for event in execute_chat_stream(**prompt_kwargs):
149
+ yield event_to_sse(event)
150
+ except Exception as exc:
151
+ logger.exception("stream_events failed")
152
+ yield sse_event({"type": "error", "message": str(exc)})
153
+
154
+
155
+ def chat_stream(mimetype=None, headers=None, **kwargs):
156
+ logger.info(kwargs)
157
+ body = ChatBody(**kwargs)
158
+
159
+ return Response(
160
+ stream_with_context(chat_iter_sync(stream_events(body))),
161
+ mimetype=mimetype or "text/event-stream",
162
+ headers=headers or {
163
+ "Cache-Control": "no-cache",
164
+ "X-Accel-Buffering": "no",
165
+ "Connection": "keep-alive",
166
+ },
167
+ direct_passthrough=True,
168
+ )
@@ -0,0 +1,4 @@
1
+ from .downloads import *
2
+ from .downloader import *
3
+ from .cancelable_downloads import *
4
+
@@ -0,0 +1,269 @@
1
+ import multiprocessing as mp
2
+ import tempfile
3
+ from datetime import datetime, timezone
4
+ from flask import jsonify, abort
5
+ from .imports import *
6
+ from .downloader import *
7
+ # ──────────────────────────────────────────────────────────────────────────
8
+ # Tunables (env-overridable). A download that writes no new bytes for
9
+ # STALL_SECONDS is considered stalled and gets killed + resumed. Each download
10
+ # is attempted up to MAX_ATTEMPTS times; HF keeps partial files on disk so a
11
+ # resume picks up where the previous attempt stopped.
12
+ # ──────────────────────────────────────────────────────────────────────────
13
+ STALL_SECONDS = int(os.environ.get("HUGPY_DOWNLOAD_STALL_SECONDS", "180"))
14
+ MAX_ATTEMPTS = int(os.environ.get("HUGPY_DOWNLOAD_MAX_ATTEMPTS", "4"))
15
+
16
+
17
+ # ──────────────────────────────────────────────────────────────────────────
18
+ # Error hand-off across the process boundary — the download runs in a child
19
+ # process, so it writes its failure reason to a temp file the monitor reads.
20
+ # ──────────────────────────────────────────────────────────────────────────
21
+ def _error_path(job_id: str) -> str:
22
+ return os.path.join(tempfile.gettempdir(), f"hugpy-download-{job_id}.err")
23
+
24
+
25
+ def _write_error(job_id: str, msg: str) -> None:
26
+ try:
27
+ with open(_error_path(job_id), "w", encoding="utf-8") as fh:
28
+ fh.write(msg[:2000])
29
+ except OSError:
30
+ pass
31
+
32
+
33
+ def _read_error(job_id: str) -> str | None:
34
+ try:
35
+ with open(_error_path(job_id), "r", encoding="utf-8") as fh:
36
+ return fh.read().strip() or None
37
+ except OSError:
38
+ return None
39
+
40
+
41
+ def _clear_error(job_id: str) -> None:
42
+ try:
43
+ os.remove(_error_path(job_id))
44
+ except OSError:
45
+ pass
46
+
47
+
48
+ def update_model_status(model: dict) -> dict:
49
+ model.update(model_status(model))
50
+ return model
51
+
52
+
53
+ def _estimate_total_bytes(model: dict) -> int | None:
54
+ """Sum the sizes of exactly the files this download will fetch, so the
55
+ progress bar can show a real percentage. Respects filename (single GGUF),
56
+ include patterns, or full repo. Returns None on any failure -> the bar
57
+ falls back to indeterminate, which still works."""
58
+ hub_id = model.get("hub_id")
59
+ if not hub_id:
60
+ return None
61
+ repo_id, _ = split_hub_id(hub_id)
62
+ try:
63
+ info = hfApi.model_info(repo_id, files_metadata=True)
64
+ except Exception as exc:
65
+ logger.info("size estimate failed for %s: %s", hub_id, exc)
66
+ return None
67
+
68
+ filename = model.get("filename")
69
+ include = model.get("include")
70
+
71
+ def will_download(path: str) -> bool:
72
+ if filename:
73
+ return path == filename or path.endswith("/" + filename)
74
+ if include:
75
+ pats = include if isinstance(include, list) else [include]
76
+ return any(fnmatch.fnmatch(path, p) for p in pats)
77
+ return True
78
+
79
+ total = sum((s.size or 0) for s in (info.siblings or []) if will_download(s.rfilename))
80
+ return total or None
81
+
82
+
83
+ # ──────────────────────────────────────────────────────────────────────────
84
+ # Subprocess worker — module-level so it's spawn-safe. Captures the real
85
+ # failure reason (HF errors propagate out of download_one) into the error file,
86
+ # then re-raises so the process exits non-zero and the monitor sees the failure.
87
+ # ──────────────────────────────────────────────────────────────────────────
88
+ def _download_worker(job_id: str, model_key: str, model: dict) -> None:
89
+ os.setpgrp()
90
+ try:
91
+ download_one(model=model, model_key=model_key) # writes hugpy.json via _stamp
92
+ _clear_error(job_id)
93
+ except Exception as exc:
94
+ _write_error(job_id, f"{type(exc).__name__}: {exc}")
95
+ raise
96
+
97
+
98
+ def _dir_bytes(path: str) -> int:
99
+ total = 0
100
+ for root, _, files in os.walk(path):
101
+ for f in files:
102
+ try:
103
+ total += os.path.getsize(os.path.join(root, f))
104
+ except OSError:
105
+ pass
106
+ return total
107
+
108
+
109
+ def _is_cancelled(job_id: str) -> bool:
110
+ cur = job_store.get(job_id)
111
+ return bool(cur and cur.status == "cancelled")
112
+
113
+
114
+ def _watch(proc, job_id: str, dest: str, total_bytes: int | None) -> bool:
115
+ """Sample progress every second while ``proc`` runs.
116
+
117
+ Reports bytes/sec and percentage. Returns True if the transfer STALLED
118
+ (no new bytes for STALL_SECONDS) — in which case the process group is
119
+ killed so it can be resumed — or False if the process exited on its own.
120
+ """
121
+ last_bytes = _dir_bytes(dest)
122
+ last_change = time.time()
123
+ prev_bytes, prev_t = last_bytes, last_change
124
+
125
+ while proc.is_alive():
126
+ time.sleep(1.0)
127
+ if _is_cancelled(job_id):
128
+ return False
129
+ now = time.time()
130
+ got = _dir_bytes(dest)
131
+ bps = max(got - prev_bytes, 0) / max(now - prev_t, 1e-6)
132
+ prev_bytes, prev_t = got, now
133
+ if got > last_bytes:
134
+ last_bytes, last_change = got, now
135
+ pct = (got / total_bytes) if total_bytes else 0.0
136
+ job_store.update(job_id, progress=min(pct, 0.999),
137
+ downloaded_bytes=got, bytes_per_second=bps, stalled=False)
138
+
139
+ if (now - last_change) >= STALL_SECONDS:
140
+ job_store.update(job_id, stalled=True)
141
+ try:
142
+ os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
143
+ except (ProcessLookupError, PermissionError):
144
+ pass
145
+ return True
146
+ return False
147
+
148
+
149
+ # ──────────────────────────────────────────────────────────────────────────
150
+ # Launch: spawn the worker under a monitor that auto-resumes a stalled/failed
151
+ # transfer with backoff, surfaces the real error, and resolves the terminal
152
+ # state. A user cancel at any point (status -> cancelled) stops the loop.
153
+ # ──────────────────────────────────────────────────────────────────────────
154
+ def start_cancellable_download(job: Job, model: dict, total_bytes: int | None = None) -> None:
155
+ dest = route_destination(model=model)
156
+ logger.info("download -> %s", dest)
157
+
158
+ job_store.update(
159
+ job.id, status="running", message="Downloading…",
160
+ total_bytes=total_bytes, attempt=1, max_attempts=MAX_ATTEMPTS,
161
+ stalled=False, error=None, _model=model,
162
+ )
163
+
164
+ def _spawn():
165
+ _clear_error(job.id)
166
+ p = mp.Process(target=_download_worker, args=(job.id, job.model_key, model), daemon=True)
167
+ p.start()
168
+ job_store.update(job.id, _proc=p)
169
+ return p
170
+
171
+ def monitor() -> None:
172
+ nonlocal total_bytes
173
+ if total_bytes is None:
174
+ total_bytes = _estimate_total_bytes(model)
175
+ if total_bytes:
176
+ job_store.update(job.id, total_bytes=total_bytes)
177
+
178
+ attempt = 1
179
+ while True:
180
+ if attempt > 1:
181
+ job_store.update(
182
+ job.id, attempt=attempt, status="running", stalled=False,
183
+ message=f"Resuming (attempt {attempt}/{MAX_ATTEMPTS})…",
184
+ )
185
+ proc = _spawn()
186
+ stalled = _watch(proc, job.id, dest, total_bytes)
187
+ proc.join()
188
+
189
+ if _is_cancelled(job.id):
190
+ return
191
+
192
+ if not stalled and proc.exitcode == 0:
193
+ job_store.update(
194
+ job.id, status="completed", progress=1.0, stalled=False,
195
+ downloaded_bytes=_dir_bytes(dest), error=None,
196
+ bytes_per_second=None, message=f"Installed at {dest}",
197
+ )
198
+ try:
199
+ record_downloaded_model(model, dest)
200
+ refresh_registry(run_discovery=False)
201
+ except Exception as exc:
202
+ logger.warning("post-download registry refresh failed: %s", exc)
203
+ return
204
+
205
+ # Failed or stalled — figure out why, then resume or give up.
206
+ detail = _read_error(job.id) or (
207
+ f"stalled: no new data for {STALL_SECONDS}s"
208
+ if stalled else f"worker exited with code {proc.exitcode}"
209
+ )
210
+ if attempt >= MAX_ATTEMPTS:
211
+ job_store.update(
212
+ job.id, status="failed", stalled=stalled, bytes_per_second=None,
213
+ message="Download stalled." if stalled else "Download failed.",
214
+ error=detail,
215
+ )
216
+ return
217
+
218
+ backoff = min(2 ** attempt, 30)
219
+ job_store.update(
220
+ job.id, status="running", stalled=stalled, error=detail,
221
+ message=(f"{'Stalled' if stalled else 'Error'}; retrying in {backoff}s "
222
+ f"(attempt {attempt + 1}/{MAX_ATTEMPTS})…"),
223
+ )
224
+ for _ in range(backoff):
225
+ if _is_cancelled(job.id):
226
+ return
227
+ time.sleep(1.0)
228
+ attempt += 1
229
+
230
+ threading.Thread(target=monitor, daemon=True).start()
231
+
232
+
233
+ def cancel_download(job_id: str) -> dict:
234
+ job = job_store.get(job_id)
235
+ if not job:
236
+ abort(404, description="Unknown job ID.")
237
+ if job.status not in ("queued", "running"):
238
+ return {"cancelled": False, "reason": f"job is {job.status}"}
239
+
240
+ # Set status FIRST so the monitor's auto-resume loop sees the cancel and
241
+ # won't relaunch after we kill the current attempt.
242
+ job_store.update(job_id, status="cancelled", message="Cancelled by user.",
243
+ stalled=False, bytes_per_second=None)
244
+
245
+ proc = getattr(job, "_proc", None)
246
+ if proc is not None and proc.is_alive():
247
+ try:
248
+ os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
249
+ except (ProcessLookupError, PermissionError):
250
+ pass
251
+ return {"cancelled": True}
252
+
253
+
254
+ def retry_download(job_id: str) -> dict:
255
+ """Resume a failed/cancelled download from where it stopped.
256
+
257
+ Reuses the same job id and the model context captured at first launch, so
258
+ partial files already on disk are continued (HF resumes), not re-fetched.
259
+ """
260
+ job = job_store.get(job_id)
261
+ if not job:
262
+ abort(404, description="Unknown job ID.")
263
+ if job.status in ("queued", "running"):
264
+ return {"retried": False, "reason": f"job is already {job.status}"}
265
+ model = getattr(job, "_model", None)
266
+ if not model:
267
+ return {"retried": False, "reason": "no model context to resume from"}
268
+ start_cancellable_download(job, model, total_bytes=job.total_bytes)
269
+ return {"retried": True, "id": job_id}
@@ -0,0 +1,29 @@
1
+ from .imports import *
2
+ def model_status(model: dict) -> dict:
3
+ destination = route_destination(model) # was model_destination(...)
4
+ marker = os.path.join(destination, HUGPY_MARKER) # was install_marker(...)
5
+ if os.path.exists(marker):
6
+ status = "installed"
7
+ elif os.path.exists(destination) and os.listdir(destination):
8
+ status = "partial"
9
+ else:
10
+ status = "not_installed"
11
+ return {"status": status, "destination": destination, "installed_marker": marker}
12
+
13
+ def write_install_marker(destination: str, model_key: str, model: dict[str, Any]) -> None:
14
+ marker = install_marker(destination)
15
+ payload = {
16
+ "model_key": model_key,
17
+ "hub_id": model.get("hub_id"),
18
+ "framework": model.get("framework"),
19
+ "task": model.get("task"),
20
+ "filename": model.get("filename"),
21
+ "include": model.get("include"),
22
+ "installed_at": datetime.now(timezone.utc).isoformat(),
23
+ }
24
+
25
+ with open(marker, "w", encoding="utf-8") as f:
26
+ f.write(json.dumps(payload, indent=2))
27
+
28
+
29
+
@@ -0,0 +1,34 @@
1
+ from .imports import *
2
+ # ---------------------------------------------------------------------------
3
+ # In-process job store
4
+ # ---------------------------------------------------------------------------
5
+
6
+ jobs: Dict[str, Dict] = {}
7
+ jobs_lock = threading.Lock()
8
+
9
+
10
+ def make_job(model_key: str) -> str:
11
+ job_id = uuid.uuid4().hex[:10]
12
+ with jobs_lock:
13
+ jobs[job_id] = {
14
+ "job_id": job_id,
15
+ "model_key": model_key,
16
+ "status": "queued",
17
+ "message": "",
18
+ }
19
+ return job_id
20
+
21
+
22
+ def run_download(job_id: str, model_key: str) -> None:
23
+ with jobs_lock:
24
+ jobs[job_id]["status"] = "running"
25
+ try:
26
+ dest = download_model(model_key, MODELS[model_key])
27
+ with jobs_lock:
28
+ jobs[job_id]["status"] = "done"
29
+ jobs[job_id]["message"] = str(dest)
30
+ except Exception as exc:
31
+ with jobs_lock:
32
+ jobs[job_id]["status"] = "error"
33
+ jobs[job_id]["message"] = str(exc)
34
+
@@ -0,0 +1 @@
1
+ from ..imports import *
@@ -0,0 +1,4 @@
1
+ from .init_imports import *
2
+ from .options import *
3
+ from .utils import *
4
+
@@ -0,0 +1,6 @@
1
+ from abstract_flask import *
2
+ from .....imports import *
3
+ from .....managers import *
4
+ from .....utils import *
5
+ import signal
6
+ import multiprocessing as mp
@@ -0,0 +1,2 @@
1
+ from .install import *
2
+ from .search import *
@@ -0,0 +1,2 @@
1
+ from ..init_imports import *
2
+ from ..utils import *