abstract-hugpy-dev 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_hugpy_dev/__init__.py +3 -0
- abstract_hugpy_dev/flask_app/__init__.py +2 -0
- abstract_hugpy_dev/flask_app/app/__init__.py +7 -0
- abstract_hugpy_dev/flask_app/app/functions/__init__.py +3 -0
- abstract_hugpy_dev/flask_app/app/functions/chat/__init__.py +1 -0
- abstract_hugpy_dev/flask_app/app/functions/chat/imports.py +1 -0
- abstract_hugpy_dev/flask_app/app/functions/chat/streaming.py +168 -0
- abstract_hugpy_dev/flask_app/app/functions/downloads/__init__.py +4 -0
- abstract_hugpy_dev/flask_app/app/functions/downloads/cancelable_downloads.py +133 -0
- abstract_hugpy_dev/flask_app/app/functions/downloads/downloader.py +29 -0
- abstract_hugpy_dev/flask_app/app/functions/downloads/downloads.py +34 -0
- abstract_hugpy_dev/flask_app/app/functions/downloads/imports.py +1 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/__init__.py +4 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/init_imports.py +6 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/options/__init__.py +2 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/options/imports.py +2 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/options/install.py +67 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/options/search.py +35 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/__init__.py +5 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/constants.py +4 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/imports.py +1 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/manifest.py +53 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/peers.py +123 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/schemas/__init__.py +8 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/schemas/chat_schemas.py +29 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/schemas/config_schemas.py +23 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/schemas/download_schemas.py +28 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/schemas/imports.py +3 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/schemas/install_schemas.py +17 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/schemas/job_schemas.py +63 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/schemas/model_schemas.py +37 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/schemas/request_schemas.py +13 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/schemas/specs_schemas.py +9 -0
- abstract_hugpy_dev/flask_app/app/functions/imports/utils/workers.py +434 -0
- abstract_hugpy_dev/flask_app/app/routes/__init__.py +5 -0
- abstract_hugpy_dev/flask_app/app/routes/chat_routes.py +10 -0
- abstract_hugpy_dev/flask_app/app/routes/llm_storage_routes.py +119 -0
- abstract_hugpy_dev/flask_app/app/routes/search_routes.py +130 -0
- abstract_hugpy_dev/flask_app/app/routes/upload_routes.py +15 -0
- abstract_hugpy_dev/flask_app/app/routes/worker_routes.py +685 -0
- abstract_hugpy_dev/flask_app/wsgi_app.py +10 -0
- abstract_hugpy_dev/imports/__init__.py +3 -0
- abstract_hugpy_dev/imports/apis/__init__.py +4 -0
- abstract_hugpy_dev/imports/apis/call_api.py +124 -0
- abstract_hugpy_dev/imports/apis/download_models.py +252 -0
- abstract_hugpy_dev/imports/apis/get_module.py +315 -0
- abstract_hugpy_dev/imports/apis/huggingface_api.py +222 -0
- abstract_hugpy_dev/imports/apis/imports.py +2 -0
- abstract_hugpy_dev/imports/apis/serve/__init__.py +2 -0
- abstract_hugpy_dev/imports/apis/serve/serve.py +509 -0
- abstract_hugpy_dev/imports/apis/serve/serve_cli.py +85 -0
- abstract_hugpy_dev/imports/apis/systemd_units.py +325 -0
- abstract_hugpy_dev/imports/config/__init__.py +3 -0
- abstract_hugpy_dev/imports/config/imports.py +1 -0
- abstract_hugpy_dev/imports/config/main.py +183 -0
- abstract_hugpy_dev/imports/config/models/__init__.py +2 -0
- abstract_hugpy_dev/imports/config/models/imports.py +1 -0
- abstract_hugpy_dev/imports/config/models/models_config.py +350 -0
- abstract_hugpy_dev/imports/config/models/models_default.py +52 -0
- abstract_hugpy_dev/imports/config/models/models_dict.py +73 -0
- abstract_hugpy_dev/imports/src/__init__.py +7 -0
- abstract_hugpy_dev/imports/src/_compat.py +184 -0
- abstract_hugpy_dev/imports/src/chunking.py +92 -0
- abstract_hugpy_dev/imports/src/constants/__init__.py +4 -0
- abstract_hugpy_dev/imports/src/constants/categories.py +36 -0
- abstract_hugpy_dev/imports/src/constants/constants.py +125 -0
- abstract_hugpy_dev/imports/src/constants/hugpy_marker.py +112 -0
- abstract_hugpy_dev/imports/src/constants/imports.py +1 -0
- abstract_hugpy_dev/imports/src/constants/paths.py +131 -0
- abstract_hugpy_dev/imports/src/except_utils.py +62 -0
- abstract_hugpy_dev/imports/src/init_imports.py +45 -0
- abstract_hugpy_dev/imports/src/module_imports.py +273 -0
- abstract_hugpy_dev/imports/src/schemas/__init__.py +10 -0
- abstract_hugpy_dev/imports/src/schemas/chat_schemas.py +61 -0
- abstract_hugpy_dev/imports/src/schemas/embeded_schemas.py +56 -0
- abstract_hugpy_dev/imports/src/schemas/event_schemas.py +31 -0
- abstract_hugpy_dev/imports/src/schemas/imports.py +65 -0
- abstract_hugpy_dev/imports/src/schemas/metadata_schemas.py +47 -0
- abstract_hugpy_dev/imports/src/schemas/model_schemas.py +97 -0
- abstract_hugpy_dev/imports/src/schemas/runner_schemas.py +33 -0
- abstract_hugpy_dev/imports/src/schemas/summarizer_schemas.py +172 -0
- abstract_hugpy_dev/imports/src/schemas/task_schemas.py +41 -0
- abstract_hugpy_dev/imports/src/schemas/video_schemas.py +38 -0
- abstract_hugpy_dev/imports/src/schemas/whisper_schemas.py +162 -0
- abstract_hugpy_dev/imports/src/utils.py +324 -0
- abstract_hugpy_dev/managers/__init__.py +11 -0
- abstract_hugpy_dev/managers/chat_context/__init__.py +3 -0
- abstract_hugpy_dev/managers/chat_context/chat_context.py +51 -0
- abstract_hugpy_dev/managers/chat_context/context_budget.py +223 -0
- abstract_hugpy_dev/managers/chat_context/imports.py +1 -0
- abstract_hugpy_dev/managers/chat_context/unbounded.py +55 -0
- abstract_hugpy_dev/managers/dispatch/__init__.py +2 -0
- abstract_hugpy_dev/managers/dispatch/acquire.py +46 -0
- abstract_hugpy_dev/managers/dispatch/dispatch.py +396 -0
- abstract_hugpy_dev/managers/dispatch/imports.py +1 -0
- abstract_hugpy_dev/managers/embed/__init__.py +1 -0
- abstract_hugpy_dev/managers/embed/embed_runner.py +153 -0
- abstract_hugpy_dev/managers/embed/imports.py +1 -0
- abstract_hugpy_dev/managers/falconsai/__init__.py +1 -0
- abstract_hugpy_dev/managers/falconsai/falconsai_module.py +307 -0
- abstract_hugpy_dev/managers/falconsai/imports.py +1 -0
- abstract_hugpy_dev/managers/generate/__init__.py +3 -0
- abstract_hugpy_dev/managers/generate/coder.py +535 -0
- abstract_hugpy_dev/managers/generate/coder_guff.py +84 -0
- abstract_hugpy_dev/managers/generate/config.py +121 -0
- abstract_hugpy_dev/managers/generate/generate_runner.py +121 -0
- abstract_hugpy_dev/managers/generate/generate_runner2.py +131 -0
- abstract_hugpy_dev/managers/generate/imports.py +1 -0
- abstract_hugpy_dev/managers/imports.py +1 -0
- abstract_hugpy_dev/managers/keywords/__init__.py +3 -0
- abstract_hugpy_dev/managers/keywords/imports.py +1 -0
- abstract_hugpy_dev/managers/keywords/keybert_model.py +591 -0
- abstract_hugpy_dev/managers/keywords/keywords_runner.py +18 -0
- abstract_hugpy_dev/managers/llama/__init__.py +2 -0
- abstract_hugpy_dev/managers/llama/imports.py +1 -0
- abstract_hugpy_dev/managers/llama/runners/__init__.py +2 -0
- abstract_hugpy_dev/managers/llama/runners/chat_runner.py +111 -0
- abstract_hugpy_dev/managers/llama/runners/get.py +43 -0
- abstract_hugpy_dev/managers/llama/runners/imports.py +1 -0
- abstract_hugpy_dev/managers/llama/runners/src/__init__.py +3 -0
- abstract_hugpy_dev/managers/llama/runners/src/base_runner.py +214 -0
- abstract_hugpy_dev/managers/llama/runners/src/ccp_runner.py +149 -0
- abstract_hugpy_dev/managers/llama/runners/src/imports/__init__.py +4 -0
- abstract_hugpy_dev/managers/llama/runners/src/imports/config.py +14 -0
- abstract_hugpy_dev/managers/llama/runners/src/imports/constants.py +36 -0
- abstract_hugpy_dev/managers/llama/runners/src/imports/init_imports.py +37 -0
- abstract_hugpy_dev/managers/llama/runners/src/imports/utils.py +60 -0
- abstract_hugpy_dev/managers/llama/runners/src/python_runner.py +238 -0
- abstract_hugpy_dev/managers/llama/serve.py +10 -0
- abstract_hugpy_dev/managers/llama/testsisiis.py +2 -0
- abstract_hugpy_dev/managers/resolvers/__init__.py +3 -0
- abstract_hugpy_dev/managers/resolvers/assure_model_key.py +54 -0
- abstract_hugpy_dev/managers/resolvers/categories/__init__.py +2 -0
- abstract_hugpy_dev/managers/resolvers/categories/builders.py +191 -0
- abstract_hugpy_dev/managers/resolvers/categories/frameworks.py +15 -0
- abstract_hugpy_dev/managers/resolvers/categories/imports.py +11 -0
- abstract_hugpy_dev/managers/resolvers/imports.py +1 -0
- abstract_hugpy_dev/managers/resolvers/model_dict_resolver.py +107 -0
- abstract_hugpy_dev/managers/resolvers/model_resolver.py +322 -0
- abstract_hugpy_dev/managers/resolvers/remote.py +316 -0
- abstract_hugpy_dev/managers/serve/__init__.py +2 -0
- abstract_hugpy_dev/managers/serve/imports.py +1 -0
- abstract_hugpy_dev/managers/serve/overrides.py +102 -0
- abstract_hugpy_dev/managers/serve/serve.py +585 -0
- abstract_hugpy_dev/managers/serve/serve_cli.py +85 -0
- abstract_hugpy_dev/managers/serve/slot_agent.py +257 -0
- abstract_hugpy_dev/managers/serve/slots.py +172 -0
- abstract_hugpy_dev/managers/spill.py +288 -0
- abstract_hugpy_dev/managers/summarizers/__init__.py +4 -0
- abstract_hugpy_dev/managers/summarizers/generation.py +23 -0
- abstract_hugpy_dev/managers/summarizers/imports.py +1 -0
- abstract_hugpy_dev/managers/summarizers/media.py +47 -0
- abstract_hugpy_dev/managers/summarizers/summarize_runner.py +96 -0
- abstract_hugpy_dev/managers/summarizers/summarizers.py +436 -0
- abstract_hugpy_dev/managers/video/__init__.py +1 -0
- abstract_hugpy_dev/managers/video/imports.py +1 -0
- abstract_hugpy_dev/managers/video/video_analyzer.py +139 -0
- abstract_hugpy_dev/managers/vision/__init__.py +3 -0
- abstract_hugpy_dev/managers/vision/imports.py +1 -0
- abstract_hugpy_dev/managers/vision/schemas.py +99 -0
- abstract_hugpy_dev/managers/vision/utils.py +26 -0
- abstract_hugpy_dev/managers/vision/vision_backends.py +100 -0
- abstract_hugpy_dev/managers/vision/vision_coder.py +345 -0
- abstract_hugpy_dev/managers/vision/vision_runner.py +26 -0
- abstract_hugpy_dev/managers/whisper_model/__init__.py +5 -0
- abstract_hugpy_dev/managers/whisper_model/constants.py +3 -0
- abstract_hugpy_dev/managers/whisper_model/imports.py +1 -0
- abstract_hugpy_dev/managers/whisper_model/src/__init__.py +7 -0
- abstract_hugpy_dev/managers/whisper_model/src/imports.py +3 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/__init__.py +11 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/execute.py +190 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/imports.py +1 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/model.py +36 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/__init__.py +2 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/audio.py +74 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/files/__init__.py +3 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/files/artifacts/__init__.py +1 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/files/artifacts/imports.py +1 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/files/artifacts/workspace.py +49 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/files/frames/__init__.py +3 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/files/frames/extract.py +85 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/files/frames/imports.py +1 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/files/frames/utils.py +42 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/files/imports.py +1 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/files/save.py +15 -0
- abstract_hugpy_dev/managers/whisper_model/src/model/utils/imports.py +1 -0
- abstract_hugpy_dev/managers/whisper_model/src/runner.py +102 -0
- abstract_hugpy_dev/managers/whisper_model/src/stream.py +159 -0
- abstract_hugpy_dev/model_sync.py +162 -0
- abstract_hugpy_dev/phone_brick/__init__.py +41 -0
- abstract_hugpy_dev/phone_brick/__main__.py +84 -0
- abstract_hugpy_dev/phone_brick/client.py +69 -0
- abstract_hugpy_dev/phone_brick/consensus.py +35 -0
- abstract_hugpy_dev/phone_brick/detector.py +227 -0
- abstract_hugpy_dev/phone_brick/orchestrator.py +114 -0
- abstract_hugpy_dev/phone_brick/protocol.py +96 -0
- abstract_hugpy_dev/phone_brick/rendering.py +43 -0
- abstract_hugpy_dev/phone_brick/schemas.py +130 -0
- abstract_hugpy_dev/phone_brick/worker.py +233 -0
- abstract_hugpy_dev/utils/__init__.py +2 -0
- abstract_hugpy_dev/utils/imports.py +2 -0
- abstract_hugpy_dev/utils/pdfs/__init__.py +1 -0
- abstract_hugpy_dev/utils/pdfs/utils.py +166 -0
- abstract_hugpy_dev/utils/seo/__init__.py +1 -0
- abstract_hugpy_dev/utils/seo/imports.py +1 -0
- abstract_hugpy_dev/utils/seo/pdf_utils.py +231 -0
- abstract_hugpy_dev/utils/text/__init__.py +1 -0
- abstract_hugpy_dev/utils/text/combined.py +255 -0
- abstract_hugpy_dev/utils/text/imports.py +2 -0
- abstract_hugpy_dev/worker_agent/__init__.py +7 -0
- abstract_hugpy_dev/worker_agent/__main__.py +4 -0
- abstract_hugpy_dev/worker_agent/agent.py +931 -0
- abstract_hugpy_dev/worker_agent/provision.py +741 -0
- abstract_hugpy_dev-0.0.1.dist-info/METADATA +569 -0
- abstract_hugpy_dev-0.0.1.dist-info/RECORD +217 -0
- abstract_hugpy_dev-0.0.1.dist-info/WHEEL +5 -0
- abstract_hugpy_dev-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .streaming import *
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from ..imports import *
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
|
|
3
|
+
from flask import Response, stream_with_context
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
from typing import Optional, List
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def sse_event(payload: dict) -> bytes:
|
|
9
|
+
return f"data: {json.dumps(payload, ensure_ascii=False)}\n\n".encode("utf-8")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def event_to_sse(ev) -> bytes:
|
|
13
|
+
"""Serialize a dispatch StreamEvent to the browser's SSE wire shape.
|
|
14
|
+
|
|
15
|
+
token/done/error get their minimal browser payloads; everything else
|
|
16
|
+
(status / provisioning progress / continuation markers — including events
|
|
17
|
+
relayed from a GPU worker) rides through verbatim via model_dump().
|
|
18
|
+
"""
|
|
19
|
+
t = getattr(ev, "type", None)
|
|
20
|
+
if t == "token":
|
|
21
|
+
return sse_event({"type": "token", "text": ev.text})
|
|
22
|
+
if t == "done":
|
|
23
|
+
return sse_event({"type": "done", "finish_reason": ev.finish_reason})
|
|
24
|
+
if t == "error":
|
|
25
|
+
return sse_event({"type": "error", "message": ev.message})
|
|
26
|
+
return sse_event(ev.model_dump())
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def chat_iter_sync(agen):
|
|
30
|
+
"""Drive an async generator from Flask's synchronous WSGI context."""
|
|
31
|
+
loop = asyncio.new_event_loop()
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
asyncio.set_event_loop(loop)
|
|
35
|
+
|
|
36
|
+
while True:
|
|
37
|
+
try:
|
|
38
|
+
item = loop.run_until_complete(agen.__anext__())
|
|
39
|
+
|
|
40
|
+
if isinstance(item, str):
|
|
41
|
+
item = item.encode("utf-8")
|
|
42
|
+
|
|
43
|
+
yield item
|
|
44
|
+
|
|
45
|
+
except StopAsyncIteration:
|
|
46
|
+
break
|
|
47
|
+
|
|
48
|
+
finally:
|
|
49
|
+
try:
|
|
50
|
+
loop.run_until_complete(loop.shutdown_asyncgens())
|
|
51
|
+
except Exception:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
asyncio.set_event_loop(None)
|
|
55
|
+
loop.close()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _resolve_max_new_tokens(body: ChatBody) -> int:
|
|
59
|
+
"""Default to the model's full context when the client didn't cap it.
|
|
60
|
+
|
|
61
|
+
A tool, not a service — so when max_new_tokens is omitted we give the model
|
|
62
|
+
as much room as it has. The engine auto-continues past this per-call cap, so
|
|
63
|
+
this is the per-pass budget, not a hard ceiling on total output.
|
|
64
|
+
"""
|
|
65
|
+
if body.max_new_tokens:
|
|
66
|
+
return body.max_new_tokens
|
|
67
|
+
try:
|
|
68
|
+
from abstract_hugpy.imports.config.main import get_model_config
|
|
69
|
+
cfg = get_model_config(body.model_key) if body.model_key else None
|
|
70
|
+
ctx = getattr(cfg, "model_max_length", None)
|
|
71
|
+
if ctx and int(ctx) > 0:
|
|
72
|
+
return int(ctx)
|
|
73
|
+
except Exception:
|
|
74
|
+
pass
|
|
75
|
+
# Fall back to the global default cap.
|
|
76
|
+
try:
|
|
77
|
+
from abstract_hugpy.imports.src.constants.constants import DEFAULT_MAX_TOKENS
|
|
78
|
+
return int(DEFAULT_MAX_TOKENS)
|
|
79
|
+
except Exception:
|
|
80
|
+
return 4096
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
async def stream_events(body: ChatBody):
|
|
84
|
+
"""Build prompt_kwargs and stream the unified chat engine to SSE.
|
|
85
|
+
|
|
86
|
+
The route is deliberately dumb: it does NOT decide local vs worker. It hands
|
|
87
|
+
prompt_kwargs to execute_chat_stream, which drives resolve() — and resolve()
|
|
88
|
+
is the single place that picks in-process / placement-peer / live-GPU-worker
|
|
89
|
+
and falls back to local. So local and worker chat now stream identically
|
|
90
|
+
(token-by-token, with auto-continuation past the cap), and there is no
|
|
91
|
+
separate worker-offload path in this route anymore.
|
|
92
|
+
"""
|
|
93
|
+
from abstract_hugpy.managers.dispatch import execute_chat_stream
|
|
94
|
+
|
|
95
|
+
prompt_kwargs = {}
|
|
96
|
+
if body.max_new_tokens:
|
|
97
|
+
# Explicit cap from the client -> honor it (bounded, per-call).
|
|
98
|
+
prompt_kwargs["max_new_tokens"] = body.max_new_tokens
|
|
99
|
+
else:
|
|
100
|
+
# No cap requested -> run unbounded: the runner generates chunk-by-chunk
|
|
101
|
+
# until the model naturally stops, so the response is never truncated by
|
|
102
|
+
# a token limit. (Per-chunk size uses the model's context.)
|
|
103
|
+
prompt_kwargs["unbounded"] = True
|
|
104
|
+
prompt_kwargs["max_new_tokens"] = _resolve_max_new_tokens(body)
|
|
105
|
+
|
|
106
|
+
if body.model_key:
|
|
107
|
+
prompt_kwargs["model_key"] = body.model_key
|
|
108
|
+
|
|
109
|
+
if body.temperature is not None:
|
|
110
|
+
prompt_kwargs["temperature"] = body.temperature
|
|
111
|
+
|
|
112
|
+
if body.do_sample is not None:
|
|
113
|
+
prompt_kwargs["do_sample"] = body.do_sample
|
|
114
|
+
|
|
115
|
+
if body.messages:
|
|
116
|
+
prompt_kwargs["messages"] = messages_to_dicts(body.messages)
|
|
117
|
+
else:
|
|
118
|
+
prompt_kwargs["prompt"] = body.prompt
|
|
119
|
+
|
|
120
|
+
if body.file:
|
|
121
|
+
prompt_kwargs["file"] = body.file
|
|
122
|
+
if body.images:
|
|
123
|
+
prompt_kwargs["images"] = body.images
|
|
124
|
+
if body.request_id:
|
|
125
|
+
# Stable id the engine threads through every continuation pass; also lets
|
|
126
|
+
# the browser correlate the stream.
|
|
127
|
+
prompt_kwargs["request_id"] = body.request_id
|
|
128
|
+
|
|
129
|
+
# Text-only chat to a multi-task (e.g. vision) model: route to its
|
|
130
|
+
# text-generation task instead of the default image-text-to-text, so a
|
|
131
|
+
# plain prompt uses the text runner. The vision runner requires an image
|
|
132
|
+
# and would otherwise fail validation. Only do this when no image is given
|
|
133
|
+
# and the model actually lists text-generation.
|
|
134
|
+
if not body.images and not body.file and body.model_key:
|
|
135
|
+
try:
|
|
136
|
+
from abstract_hugpy.imports.config.main import get_model_config
|
|
137
|
+
cfg = get_model_config(body.model_key)
|
|
138
|
+
tasks = getattr(cfg, "tasks", None) or []
|
|
139
|
+
primary = getattr(cfg, "primary_task", None)
|
|
140
|
+
if primary != "text-generation" and "text-generation" in tasks:
|
|
141
|
+
prompt_kwargs["task"] = "text-generation"
|
|
142
|
+
except Exception:
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
logger.info("prompt_kwargs == %s", prompt_kwargs)
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
async for event in execute_chat_stream(**prompt_kwargs):
|
|
149
|
+
yield event_to_sse(event)
|
|
150
|
+
except Exception as exc:
|
|
151
|
+
logger.exception("stream_events failed")
|
|
152
|
+
yield sse_event({"type": "error", "message": str(exc)})
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def chat_stream(mimetype=None, headers=None, **kwargs):
|
|
156
|
+
logger.info(kwargs)
|
|
157
|
+
body = ChatBody(**kwargs)
|
|
158
|
+
|
|
159
|
+
return Response(
|
|
160
|
+
stream_with_context(chat_iter_sync(stream_events(body))),
|
|
161
|
+
mimetype=mimetype or "text/event-stream",
|
|
162
|
+
headers=headers or {
|
|
163
|
+
"Cache-Control": "no-cache",
|
|
164
|
+
"X-Accel-Buffering": "no",
|
|
165
|
+
"Connection": "keep-alive",
|
|
166
|
+
},
|
|
167
|
+
direct_passthrough=True,
|
|
168
|
+
)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import multiprocessing as mp
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
from flask import jsonify, abort
|
|
4
|
+
from .imports import *
|
|
5
|
+
from .downloader import *
|
|
6
|
+
# ──────────────────────────────────────────────────────────────────────────
|
|
7
|
+
# Subprocess worker — module-level so it's spawn-safe; underscore-private so
|
|
8
|
+
# nothing imports it (multiprocessing references the function object directly).
|
|
9
|
+
# ──────────────────────────────────────────────────────────────────────────
|
|
10
|
+
def update_model_status(model: dict) -> dict:
|
|
11
|
+
model.update(model_status(model))
|
|
12
|
+
return model
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _estimate_total_bytes(model: dict) -> int | None:
|
|
16
|
+
"""Sum the sizes of exactly the files this download will fetch, so the
|
|
17
|
+
progress bar can show a real percentage. Respects filename (single GGUF),
|
|
18
|
+
include patterns, or full repo. Returns None on any failure -> the bar
|
|
19
|
+
falls back to indeterminate, which still works."""
|
|
20
|
+
hub_id = model.get("hub_id")
|
|
21
|
+
if not hub_id:
|
|
22
|
+
return None
|
|
23
|
+
repo_id, _ = split_hub_id(hub_id)
|
|
24
|
+
try:
|
|
25
|
+
info = hfApi.model_info(repo_id, files_metadata=True)
|
|
26
|
+
except Exception as exc:
|
|
27
|
+
logger.info("size estimate failed for %s: %s", hub_id, exc)
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
filename = model.get("filename")
|
|
31
|
+
include = model.get("include")
|
|
32
|
+
|
|
33
|
+
def will_download(path: str) -> bool:
|
|
34
|
+
if filename:
|
|
35
|
+
return path == filename or path.endswith("/" + filename)
|
|
36
|
+
if include:
|
|
37
|
+
pats = include if isinstance(include, list) else [include]
|
|
38
|
+
return any(fnmatch.fnmatch(path, p) for p in pats)
|
|
39
|
+
return True
|
|
40
|
+
|
|
41
|
+
total = sum((s.size or 0) for s in (info.siblings or []) if will_download(s.rfilename))
|
|
42
|
+
return total or None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _download_worker(model_key: str, model: dict) -> None:
|
|
46
|
+
os.setpgrp()
|
|
47
|
+
download_one(model=model,model_key=model_key ) # writes hugpy.json via _stamp
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _dir_bytes(path: str) -> int:
|
|
51
|
+
total = 0
|
|
52
|
+
for root, _, files in os.walk(path):
|
|
53
|
+
for f in files:
|
|
54
|
+
try:
|
|
55
|
+
total += os.path.getsize(os.path.join(root, f))
|
|
56
|
+
except OSError:
|
|
57
|
+
pass
|
|
58
|
+
return total
|
|
59
|
+
# ──────────────────────────────────────────────────────────────────────────
|
|
60
|
+
# Launch: spawn the worker, then sample dir-size for progress in a monitor
|
|
61
|
+
# thread that also resolves the terminal state.
|
|
62
|
+
# ──────────────────────────────────────────────────────────────────────────
|
|
63
|
+
def start_cancellable_download(job: Job, model: dict, total_bytes: int | None = None) -> None:
|
|
64
|
+
logger.info(model)
|
|
65
|
+
dest = route_destination(model=model)
|
|
66
|
+
logger.info(dest)
|
|
67
|
+
proc = mp.Process(target=_download_worker, args=(job.model_key, model), daemon=True)
|
|
68
|
+
proc.start()
|
|
69
|
+
|
|
70
|
+
job_store.update(
|
|
71
|
+
job.id, status="running", message="Downloading…",
|
|
72
|
+
total_bytes=total_bytes, _proc=proc,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def monitor() -> None:
|
|
76
|
+
nonlocal total_bytes
|
|
77
|
+
# If the caller didn't know the size (registered-model downloads don't),
|
|
78
|
+
# estimate it here so the bar can show a percentage. One HF metadata
|
|
79
|
+
# call, on this background thread — never blocks the POST response.
|
|
80
|
+
if total_bytes is None:
|
|
81
|
+
total_bytes = _estimate_total_bytes(model)
|
|
82
|
+
if total_bytes:
|
|
83
|
+
job_store.update(job.id, total_bytes=total_bytes)
|
|
84
|
+
|
|
85
|
+
while proc.is_alive():
|
|
86
|
+
got = _dir_bytes(dest)
|
|
87
|
+
pct = (got / total_bytes) if total_bytes else 0.0
|
|
88
|
+
job_store.update(job.id, progress=min(pct, 0.999), downloaded_bytes=got)
|
|
89
|
+
time.sleep(1.0)
|
|
90
|
+
|
|
91
|
+
proc.join()
|
|
92
|
+
|
|
93
|
+
cur = job_store.get(job.id)
|
|
94
|
+
if cur and cur.status == "cancelled":
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
if proc.exitcode == 0:
|
|
98
|
+
job_store.update(
|
|
99
|
+
job.id, status="completed", progress=1.0,
|
|
100
|
+
downloaded_bytes=_dir_bytes(dest),
|
|
101
|
+
message=f"Installed at {dest}",
|
|
102
|
+
)
|
|
103
|
+
try:
|
|
104
|
+
record_downloaded_model(model, dest)
|
|
105
|
+
refresh_registry(run_discovery=False)
|
|
106
|
+
except Exception as exc:
|
|
107
|
+
logger.warning("post-download registry refresh failed: %s", exc)
|
|
108
|
+
else:
|
|
109
|
+
job_store.update(
|
|
110
|
+
job.id, status="failed", message="Download failed.",
|
|
111
|
+
error=f"worker exited with code {proc.exitcode}",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
threading.Thread(target=monitor, daemon=True).start()
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def cancel_download(job_id: str) -> dict:
|
|
119
|
+
job = job_store.get(job_id)
|
|
120
|
+
if not job:
|
|
121
|
+
abort(404, description="Unknown job ID.")
|
|
122
|
+
if job.status not in ("queued", "running"):
|
|
123
|
+
return {"cancelled": False, "reason": f"job is {job.status}"}
|
|
124
|
+
|
|
125
|
+
proc = getattr(job, "_proc", None)
|
|
126
|
+
if proc is not None and proc.is_alive():
|
|
127
|
+
try:
|
|
128
|
+
os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
|
|
129
|
+
except (ProcessLookupError, PermissionError):
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
job_store.update(job_id, status="cancelled", message="Cancelled by user.")
|
|
133
|
+
return {"cancelled": True}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
def model_status(model: dict) -> dict:
|
|
3
|
+
destination = route_destination(model) # was model_destination(...)
|
|
4
|
+
marker = os.path.join(destination, HUGPY_MARKER) # was install_marker(...)
|
|
5
|
+
if os.path.exists(marker):
|
|
6
|
+
status = "installed"
|
|
7
|
+
elif os.path.exists(destination) and os.listdir(destination):
|
|
8
|
+
status = "partial"
|
|
9
|
+
else:
|
|
10
|
+
status = "not_installed"
|
|
11
|
+
return {"status": status, "destination": destination, "installed_marker": marker}
|
|
12
|
+
|
|
13
|
+
def write_install_marker(destination: str, model_key: str, model: dict[str, Any]) -> None:
|
|
14
|
+
marker = install_marker(destination)
|
|
15
|
+
payload = {
|
|
16
|
+
"model_key": model_key,
|
|
17
|
+
"hub_id": model.get("hub_id"),
|
|
18
|
+
"framework": model.get("framework"),
|
|
19
|
+
"task": model.get("task"),
|
|
20
|
+
"filename": model.get("filename"),
|
|
21
|
+
"include": model.get("include"),
|
|
22
|
+
"installed_at": datetime.now(timezone.utc).isoformat(),
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
with open(marker, "w", encoding="utf-8") as f:
|
|
26
|
+
f.write(json.dumps(payload, indent=2))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
# ---------------------------------------------------------------------------
|
|
3
|
+
# In-process job store
|
|
4
|
+
# ---------------------------------------------------------------------------
|
|
5
|
+
|
|
6
|
+
jobs: Dict[str, Dict] = {}
|
|
7
|
+
jobs_lock = threading.Lock()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def make_job(model_key: str) -> str:
|
|
11
|
+
job_id = uuid.uuid4().hex[:10]
|
|
12
|
+
with jobs_lock:
|
|
13
|
+
jobs[job_id] = {
|
|
14
|
+
"job_id": job_id,
|
|
15
|
+
"model_key": model_key,
|
|
16
|
+
"status": "queued",
|
|
17
|
+
"message": "",
|
|
18
|
+
}
|
|
19
|
+
return job_id
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def run_download(job_id: str, model_key: str) -> None:
|
|
23
|
+
with jobs_lock:
|
|
24
|
+
jobs[job_id]["status"] = "running"
|
|
25
|
+
try:
|
|
26
|
+
dest = download_model(model_key, MODELS[model_key])
|
|
27
|
+
with jobs_lock:
|
|
28
|
+
jobs[job_id]["status"] = "done"
|
|
29
|
+
jobs[job_id]["message"] = str(dest)
|
|
30
|
+
except Exception as exc:
|
|
31
|
+
with jobs_lock:
|
|
32
|
+
jobs[job_id]["status"] = "error"
|
|
33
|
+
jobs[job_id]["message"] = str(exc)
|
|
34
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from ..imports import *
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
|
|
3
|
+
def _human(n):
|
|
4
|
+
if not n: return ""
|
|
5
|
+
for u in ("B","KB","MB","GB","TB"):
|
|
6
|
+
if n < 1024: return f"{n:.1f} {u}"
|
|
7
|
+
n /= 1024
|
|
8
|
+
return f"{n:.1f} PB"
|
|
9
|
+
|
|
10
|
+
def _gguf_options(files, free_bytes):
|
|
11
|
+
groups: dict[str, list] = {}
|
|
12
|
+
for f in files:
|
|
13
|
+
if not f.path.lower().endswith(".gguf"):
|
|
14
|
+
continue
|
|
15
|
+
m = GGUF_QUANT.search(f.path)
|
|
16
|
+
groups.setdefault(m.group(0).upper() if m else f.path, []).append(f)
|
|
17
|
+
|
|
18
|
+
opts = []
|
|
19
|
+
for label, group in sorted(groups.items()):
|
|
20
|
+
total = sum(g.size for g in group if g.size) or None
|
|
21
|
+
common = dict(
|
|
22
|
+
id=f"gguf:{label}",
|
|
23
|
+
framework="llama_cpp",
|
|
24
|
+
total_bytes=total,
|
|
25
|
+
fits_disk=(None if total is None or free_bytes is None
|
|
26
|
+
else total < free_bytes),
|
|
27
|
+
)
|
|
28
|
+
if len(group) == 1:
|
|
29
|
+
opts.append(InstallOption(
|
|
30
|
+
label=f"GGUF · {label} · {_human(total)}",
|
|
31
|
+
filename=group[0].path, **common))
|
|
32
|
+
else:
|
|
33
|
+
opts.append(InstallOption( # sharded → glob, not filename
|
|
34
|
+
label=f"GGUF · {label} · {len(group)} shards · {_human(total)}",
|
|
35
|
+
include=[f"*{label}*.gguf"], **common))
|
|
36
|
+
return opts
|
|
37
|
+
|
|
38
|
+
def _transformers_option(files, free_bytes):
|
|
39
|
+
has_st = any(f.path.endswith(".safetensors") for f in files)
|
|
40
|
+
has_bin = any(f.path.endswith(".bin") for f in files)
|
|
41
|
+
if not (has_st or has_bin):
|
|
42
|
+
return None
|
|
43
|
+
if has_st and has_bin: # skip duplicate .bin weights
|
|
44
|
+
include = ["*.safetensors", "*.json", "*.model", "tokenizer*", "*.txt"]
|
|
45
|
+
total = sum(f.size for f in files if f.size and (
|
|
46
|
+
f.path.endswith((".safetensors", ".json", ".model", ".txt"))
|
|
47
|
+
or "tokenizer" in f.path)) or None
|
|
48
|
+
label = f"Transformers · safetensors only · {_human(total)}"
|
|
49
|
+
else:
|
|
50
|
+
include, total = None, (sum(f.size for f in files if f.size) or None)
|
|
51
|
+
label = f"Transformers · full snapshot · {_human(total)}"
|
|
52
|
+
return InstallOption(
|
|
53
|
+
id="transformers", framework="transformers", label=label,
|
|
54
|
+
include=include, total_bytes=total,
|
|
55
|
+
fits_disk=(None if total is None or free_bytes is None
|
|
56
|
+
else total < free_bytes),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def resolve_options(hub_id, task, files, free_bytes) -> InstallOptions:
|
|
60
|
+
opts = _gguf_options(files, free_bytes)
|
|
61
|
+
tf = _transformers_option(files, free_bytes)
|
|
62
|
+
if tf:
|
|
63
|
+
opts.append(tf)
|
|
64
|
+
# prefer a mid GGUF quant, else transformers
|
|
65
|
+
rec = next((o.id for o in opts if o.id == "gguf:Q4_K_M"),
|
|
66
|
+
opts[0].id if opts else None)
|
|
67
|
+
return InstallOptions(hub_id=hub_id, task=task, options=opts, recommended=rec)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from .imports import * # ← the missing name
|
|
2
|
+
|
|
3
|
+
_SIZE_CACHE: dict[str, tuple[float, int | None]] = {}
|
|
4
|
+
_SIZE_TTL = 600
|
|
5
|
+
_size_lock = threading.Lock()
|
|
6
|
+
|
|
7
|
+
def model_size(hub_id: str) -> int | None:
|
|
8
|
+
now = time.time()
|
|
9
|
+
with _size_lock:
|
|
10
|
+
hit = _SIZE_CACHE.get(hub_id)
|
|
11
|
+
if hit and now - hit[0] < _SIZE_TTL:
|
|
12
|
+
return hit[1]
|
|
13
|
+
try:
|
|
14
|
+
info = hfApi.model_info(hub_id, files_metadata=True)
|
|
15
|
+
total = sum(s.size for s in info.siblings if s.size) or None
|
|
16
|
+
except Exception as exc:
|
|
17
|
+
logger.warning("model_size(%s) failed: %s", hub_id, exc) # don't hide it
|
|
18
|
+
total = None
|
|
19
|
+
with _size_lock:
|
|
20
|
+
_SIZE_CACHE[hub_id] = (now, total)
|
|
21
|
+
return total
|
|
22
|
+
|
|
23
|
+
def free_bytes() -> int | None:
|
|
24
|
+
"""Headroom on the filesystem where this repo will actually be written.
|
|
25
|
+
|
|
26
|
+
Uses MODELS_DIR (search package's storage root) — the same root
|
|
27
|
+
destination_for_model builds paths from — so fits_disk can't disagree
|
|
28
|
+
with where the file goes. Deliberately NOT list_peers(): that reports a
|
|
29
|
+
pydantic settings.storage_root that may point at a different mount.
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
probe = MODELS_DIR if os.path.exists(MODELS_DIR) else "/"
|
|
33
|
+
return shutil.disk_usage(probe).free
|
|
34
|
+
except OSError:
|
|
35
|
+
return None
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from ..init_imports import *
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_manifest(path: str) -> dict[str, dict[str, Any]]:
|
|
10
|
+
if not os.path.exists(path):
|
|
11
|
+
raise FileNotFoundError(f"Manifest not found: {path}")
|
|
12
|
+
|
|
13
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
14
|
+
data = json.load(f)
|
|
15
|
+
|
|
16
|
+
if not isinstance(data, dict):
|
|
17
|
+
raise ValueError("Manifest root must be a JSON object.")
|
|
18
|
+
|
|
19
|
+
return data
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def load_manifest_or_empty(path: str) -> dict[str, dict[str, Any]]:
|
|
23
|
+
if not os.path.exists(path):
|
|
24
|
+
return {}
|
|
25
|
+
return load_manifest(path)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def save_manifest(path: str, manifest: dict[str, dict[str, Any]]) -> None:
|
|
29
|
+
parent = os.path.dirname(path)
|
|
30
|
+
if parent:
|
|
31
|
+
os.makedirs(parent, exist_ok=True)
|
|
32
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
33
|
+
json.dump(manifest, f, indent=2, sort_keys=True)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def key_for_hub_id(hub_id: str) -> str:
|
|
37
|
+
slug = re.sub(r"[^A-Za-z0-9._-]+", "_", hub_id.strip())
|
|
38
|
+
return slug.strip("_") or hub_id
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def upsert_model(
|
|
42
|
+
path: str,
|
|
43
|
+
model: dict[str, Any],
|
|
44
|
+
*,
|
|
45
|
+
key: str | None = None,
|
|
46
|
+
) -> tuple[str, dict[str, dict[str, Any]]]:
|
|
47
|
+
"""Insert or update a manifest entry. Returns (key, full_manifest)."""
|
|
48
|
+
manifest = load_manifest_or_empty(path)
|
|
49
|
+
chosen_key = key or key_for_hub_id(model.get("hub_id") or model.get("name", ""))
|
|
50
|
+
existing = manifest.get(chosen_key, {})
|
|
51
|
+
manifest[chosen_key] = {**existing, **model}
|
|
52
|
+
save_manifest(path, manifest)
|
|
53
|
+
return chosen_key, manifest
|