lollms-client 1.1.2__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/assets/models_ctx_sizes.json +382 -0
- lollms_client/llm_bindings/ollama/__init__.py +56 -0
- lollms_client/tti_bindings/diffusers/__init__.py +172 -66
- {lollms_client-1.1.2.dist-info → lollms_client-1.1.3.dist-info}/METADATA +1 -1
- {lollms_client-1.1.2.dist-info → lollms_client-1.1.3.dist-info}/RECORD +9 -8
- {lollms_client-1.1.2.dist-info → lollms_client-1.1.3.dist-info}/WHEEL +0 -0
- {lollms_client-1.1.2.dist-info → lollms_client-1.1.3.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.1.2.dist-info → lollms_client-1.1.3.dist-info}/top_level.txt +0 -0
lollms_client/__init__.py
CHANGED
|
@@ -8,7 +8,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
|
|
|
8
8
|
from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
|
|
9
9
|
from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
|
|
10
10
|
|
|
11
|
-
__version__ = "1.1.
|
|
11
|
+
__version__ = "1.1.3" # Updated version
|
|
12
12
|
|
|
13
13
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
14
14
|
__all__ = [
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
{
|
|
2
|
+
"agentica-org/deepcoder-14b-preview": 8192,
|
|
3
|
+
"agentica-org/deepcoder-14b-preview:free": 8192,
|
|
4
|
+
"ai21/jamba-large-1.7": 256000,
|
|
5
|
+
"ai21/jamba-mini-1.7": 256000,
|
|
6
|
+
"aion-labs/aion-1.0": 8192,
|
|
7
|
+
"aion-labs/aion-1.0-mini": 8192,
|
|
8
|
+
"aion-labs/aion-rp-llama-3.1-8b": 131072,
|
|
9
|
+
"alfredpros/codellama-7b-instruct-solidity": 16384,
|
|
10
|
+
"alpindale/goliath-120b": 4096,
|
|
11
|
+
"amazon/nova-lite-v1": 32768,
|
|
12
|
+
"amazon/nova-micro-v1": 32768,
|
|
13
|
+
"amazon/nova-pro-v1": 32768,
|
|
14
|
+
"anthracite-org/magnum-v2-72b": 131072,
|
|
15
|
+
"anthracite-org/magnum-v4-72b": 131072,
|
|
16
|
+
"claude-3-haiku": 200000,
|
|
17
|
+
"claude-3-haiku:beta": 200000,
|
|
18
|
+
"claude-3-opus": 200000,
|
|
19
|
+
"claude-3-opus:beta": 200000,
|
|
20
|
+
"claude-3.5-haiku": 200000,
|
|
21
|
+
"claude-3.5-haiku-20241022": 200000,
|
|
22
|
+
"claude-3.5-haiku:beta": 200000,
|
|
23
|
+
"claude-3.5-sonnet": 200000,
|
|
24
|
+
"claude-3.5-sonnet-20240620": 200000,
|
|
25
|
+
"claude-3.5-sonnet-20240620:beta": 200000,
|
|
26
|
+
"claude-3.5-sonnet:beta": 200000,
|
|
27
|
+
"claude-3.7-sonnet": 200000,
|
|
28
|
+
"claude-3.7-sonnet:beta": 200000,
|
|
29
|
+
"claude-3.7-sonnet:thinking": 200000,
|
|
30
|
+
"claude-3-5-haiku": 200000,
|
|
31
|
+
"claude-3-5-haiku-20241022": 200000,
|
|
32
|
+
"claude-3-5-haiku:beta": 200000,
|
|
33
|
+
"claude-3-5-sonnet": 200000,
|
|
34
|
+
"claude-3-5-sonnet-20240620": 200000,
|
|
35
|
+
"claude-3-5-sonnet-20240620:beta": 200000,
|
|
36
|
+
"claude-3-5-sonnet:beta": 200000,
|
|
37
|
+
"claude-3-7-sonnet": 200000,
|
|
38
|
+
"claude-3-7-sonnet:beta": 200000,
|
|
39
|
+
"claude-3-7-sonnet:thinking": 200000,
|
|
40
|
+
"claude-opus-4": 200000,
|
|
41
|
+
"claude-opus-4.1": 200000,
|
|
42
|
+
"claude-sonnet-4": 200000,
|
|
43
|
+
"arcee-ai/coder-large": 32768,
|
|
44
|
+
"arcee-ai/maestro-reasoning": 32768,
|
|
45
|
+
"arcee-ai/spotlight": 32768,
|
|
46
|
+
"arcee-ai/virtuoso-large": 32768,
|
|
47
|
+
"arliai/qwq-32b-arliai-rpr-v1": 8192,
|
|
48
|
+
"arliai/qwq-32b-arliai-rpr-v1:free": 8192,
|
|
49
|
+
"baidu/ernie-4.5-300b-a47b": 128000,
|
|
50
|
+
"bytedance/ui-tars-1.5-7b": 8192,
|
|
51
|
+
"cognitivecomputations/dolphin-mistral-24b-venice-edition:free": 32768,
|
|
52
|
+
"cognitivecomputations/dolphin-mixtral-8x22b": 65536,
|
|
53
|
+
"cognitivecomputations/dolphin3.0-mistral-24b": 32768,
|
|
54
|
+
"cognitivecomputations/dolphin3.0-mistral-24b:free": 32768,
|
|
55
|
+
"cognitivecomputations/dolphin3.0-r1-mistral-24b": 32768,
|
|
56
|
+
"cognitivecomputations/dolphin3.0-r1-mistral-24b:free": 32768,
|
|
57
|
+
"cohere/command": 8192,
|
|
58
|
+
"cohere/command-a": 8192,
|
|
59
|
+
"cohere/command-r": 128000,
|
|
60
|
+
"cohere/command-r-03-2024": 128000,
|
|
61
|
+
"cohere/command-r-08-2024": 128000,
|
|
62
|
+
"cohere/command-r-plus": 128000,
|
|
63
|
+
"cohere/command-r-plus-04-2024": 128000,
|
|
64
|
+
"cohere/command-r-plus-08-2024": 128000,
|
|
65
|
+
"cohere/command-r7b-12-2024": 128000,
|
|
66
|
+
"deepseek/deepseek-chat": 32768,
|
|
67
|
+
"deepseek/deepseek-chat-v3-0324": 32768,
|
|
68
|
+
"deepseek/deepseek-chat-v3-0324:free": 32768,
|
|
69
|
+
"deepseek/deepseek-prover-v2": 131072,
|
|
70
|
+
"deepseek/deepseek-r1": 32768,
|
|
71
|
+
"deepseek/deepseek-r1-0528": 32768,
|
|
72
|
+
"deepseek/deepseek-r1-0528-qwen3-8b": 32768,
|
|
73
|
+
"deepseek/deepseek-r1-0528-qwen3-8b:free": 32768,
|
|
74
|
+
"deepseek/deepseek-r1-0528:free": 32768,
|
|
75
|
+
"deepseek/deepseek-r1-distill-llama-70b": 131072,
|
|
76
|
+
"deepseek/deepseek-r1-distill-llama-70b:free": 131072,
|
|
77
|
+
"deepseek/deepseek-r1-distill-llama-8b": 131072,
|
|
78
|
+
"deepseek/deepseek-r1-distill-qwen-1.5b": 32768,
|
|
79
|
+
"deepseek/deepseek-r1-distill-qwen-14b": 32768,
|
|
80
|
+
"deepseek/deepseek-r1-distill-qwen-14b:free": 32768,
|
|
81
|
+
"deepseek/deepseek-r1-distill-qwen-32b": 32768,
|
|
82
|
+
"deepseek/deepseek-r1-distill-qwen-7b": 32768,
|
|
83
|
+
"deepseek/deepseek-r1:free": 32768,
|
|
84
|
+
"deepseek/deepseek-v3-base": 32768,
|
|
85
|
+
"eleutherai/llemma_7b": 8192,
|
|
86
|
+
"featherless/qwerky-72b:free": 8192,
|
|
87
|
+
"google/gemini-2.0-flash-001": 1000000,
|
|
88
|
+
"google/gemini-2.0-flash-exp:free": 1000000,
|
|
89
|
+
"google/gemini-2.0-flash-lite-001": 1000000,
|
|
90
|
+
"google/gemini-2.5-flash": 1000000,
|
|
91
|
+
"google/gemini-2.5-flash-lite": 1000000,
|
|
92
|
+
"google/gemini-2.5-flash-lite-preview-06-17": 1000000,
|
|
93
|
+
"google/gemini-2.5-pro": 2000000,
|
|
94
|
+
"google/gemini-2.5-pro-exp-03-25": 2000000,
|
|
95
|
+
"google/gemini-2.5-pro-preview": 2000000,
|
|
96
|
+
"google/gemini-2.5-pro-preview-05-06": 2000000,
|
|
97
|
+
"google/gemini-flash-1.5": 1000000,
|
|
98
|
+
"google/gemini-flash-1.5-8b": 1000000,
|
|
99
|
+
"google/gemini-pro-1.5": 2000000,
|
|
100
|
+
"google/gemma-2-27b-it": 8192,
|
|
101
|
+
"google/gemma-2-9b-it": 8192,
|
|
102
|
+
"google/gemma-2-9b-it:free": 8192,
|
|
103
|
+
"google/gemma-3-12b-it": 131072,
|
|
104
|
+
"google/gemma-3-12b-it:free": 131072,
|
|
105
|
+
"google/gemma-3-27b-it": 131072,
|
|
106
|
+
"google/gemma-3-27b-it:free": 131072,
|
|
107
|
+
"google/gemma-3-4b-it": 131072,
|
|
108
|
+
"google/gemma-3-4b-it:free": 131072,
|
|
109
|
+
"google/gemma-3n-e2b-it:free": 131072,
|
|
110
|
+
"google/gemma-3n-e4b-it": 131072,
|
|
111
|
+
"google/gemma-3n-e4b-it:free": 131072,
|
|
112
|
+
"gryphe/mythomax-l2-13b": 4096,
|
|
113
|
+
"inception/mercury": 32768,
|
|
114
|
+
"inception/mercury-coder": 32768,
|
|
115
|
+
"infermatic/mn-inferor-12b": 8192,
|
|
116
|
+
"inflection/inflection-3-pi": 128000,
|
|
117
|
+
"inflection/inflection-3-productivity": 128000,
|
|
118
|
+
"liquid/lfm-3b": 8192,
|
|
119
|
+
"liquid/lfm-40b": 8192,
|
|
120
|
+
"liquid/lfm-7b": 8192,
|
|
121
|
+
"mancer/weaver": 8192,
|
|
122
|
+
"meta-llama/llama-3-70b-instruct": 8192,
|
|
123
|
+
"meta-llama/llama-3-8b-instruct": 8192,
|
|
124
|
+
"meta-llama/llama-3.1-405b": 131072,
|
|
125
|
+
"meta-llama/llama-3.1-405b-instruct": 131072,
|
|
126
|
+
"meta-llama/llama-3.1-405b-instruct:free": 131072,
|
|
127
|
+
"meta-llama/llama-3.1-70b-instruct": 131072,
|
|
128
|
+
"meta-llama/llama-3.1-8b-instruct": 131072,
|
|
129
|
+
"meta-llama/llama-3.2-11b-vision-instruct": 131072,
|
|
130
|
+
"meta-llama/llama-3.2-11b-vision-instruct:free": 131072,
|
|
131
|
+
"meta-llama/llama-3.2-1b-instruct": 131072,
|
|
132
|
+
"meta-llama/llama-3.2-3b-instruct": 131072,
|
|
133
|
+
"meta-llama/llama-3.2-3b-instruct:free": 131072,
|
|
134
|
+
"meta-llama/llama-3.2-90b-vision-instruct": 131072,
|
|
135
|
+
"meta-llama/llama-3.3-70b-instruct": 131072,
|
|
136
|
+
"meta-llama/llama-3.3-70b-instruct:free": 131072,
|
|
137
|
+
"meta-llama/llama-4-maverick": 131072,
|
|
138
|
+
"meta-llama/llama-4-scout": 131072,
|
|
139
|
+
"meta-llama/llama-guard-2-8b": 8192,
|
|
140
|
+
"meta-llama/llama-guard-3-8b": 131072,
|
|
141
|
+
"meta-llama/llama-guard-4-12b": 131072,
|
|
142
|
+
"microsoft/mai-ds-r1": 32768,
|
|
143
|
+
"microsoft/mai-ds-r1:free": 32768,
|
|
144
|
+
"microsoft/phi-3-medium-128k-instruct": 131072,
|
|
145
|
+
"microsoft/phi-3-mini-128k-instruct": 131072,
|
|
146
|
+
"microsoft/phi-3.5-mini-128k-instruct": 131072,
|
|
147
|
+
"microsoft/phi-4": 131072,
|
|
148
|
+
"microsoft/phi-4-multimodal-instruct": 131072,
|
|
149
|
+
"microsoft/phi-4-reasoning-plus": 131072,
|
|
150
|
+
"microsoft/wizardlm-2-8x22b": 65536,
|
|
151
|
+
"minimax/minimax-01": 200000,
|
|
152
|
+
"minimax/minimax-m1": 200000,
|
|
153
|
+
"mistralai/codestral-2501": 32768,
|
|
154
|
+
"mistralai/codestral-2508": 32768,
|
|
155
|
+
"mistralai/devstral-medium": 32768,
|
|
156
|
+
"mistralai/devstral-small": 32768,
|
|
157
|
+
"mistralai/devstral-small-2505": 32768,
|
|
158
|
+
"mistralai/devstral-small-2505:free": 32768,
|
|
159
|
+
"mistralai/magistral-medium-2506": 32768,
|
|
160
|
+
"mistralai/magistral-medium-2506:thinking": 32768,
|
|
161
|
+
"mistralai/magistral-small-2506": 32768,
|
|
162
|
+
"mistralai/ministral-3b": 32768,
|
|
163
|
+
"mistralai/ministral-8b": 32768,
|
|
164
|
+
"mistralai/mistral-7b-instruct": 32768,
|
|
165
|
+
"mistralai/mistral-7b-instruct-v0.1": 8192,
|
|
166
|
+
"mistralai/mistral-7b-instruct-v0.2": 32768,
|
|
167
|
+
"mistralai/mistral-7b-instruct-v0.3": 32768,
|
|
168
|
+
"mistralai/mistral-7b-instruct:free": 32768,
|
|
169
|
+
"mistralai/mistral-large": 32768,
|
|
170
|
+
"mistralai/mistral-large-2407": 128000,
|
|
171
|
+
"mistralai/mistral-large-2411": 128000,
|
|
172
|
+
"mistralai/mistral-medium-3": 32768,
|
|
173
|
+
"mistralai/mistral-nemo": 128000,
|
|
174
|
+
"mistralai/mistral-nemo:free": 128000,
|
|
175
|
+
"mistralai/mistral-saba": 32768,
|
|
176
|
+
"mistralai/mistral-small": 32768,
|
|
177
|
+
"mistralai/mistral-small-24b-instruct-2501": 32768,
|
|
178
|
+
"mistralai/mistral-small-24b-instruct-2501:free": 32768,
|
|
179
|
+
"mistralai/mistral-small-3.1-24b-instruct": 32768,
|
|
180
|
+
"mistralai/mistral-small-3.1-24b-instruct:free": 32768,
|
|
181
|
+
"mistralai/mistral-small-3.2-24b-instruct": 32768,
|
|
182
|
+
"mistralai/mistral-small-3.2-24b-instruct:free": 32768,
|
|
183
|
+
"mistralai/mistral-tiny": 32768,
|
|
184
|
+
"mistralai/mixtral-8x22b-instruct": 65536,
|
|
185
|
+
"mistralai/mixtral-8x7b-instruct": 32768,
|
|
186
|
+
"mistralai/pixtral-12b": 128000,
|
|
187
|
+
"mistralai/pixtral-large-2411": 128000,
|
|
188
|
+
"moonshotai/kimi-dev-72b:free": 200000,
|
|
189
|
+
"moonshotai/kimi-k2": 200000,
|
|
190
|
+
"moonshotai/kimi-k2:free": 200000,
|
|
191
|
+
"moonshotai/kimi-vl-a3b-thinking": 200000,
|
|
192
|
+
"moonshotai/kimi-vl-a3b-thinking:free": 200000,
|
|
193
|
+
"morph/morph-v3-fast": 8192,
|
|
194
|
+
"morph/morph-v3-large": 8192,
|
|
195
|
+
"neversleep/llama-3-lumimaid-70b": 8192,
|
|
196
|
+
"neversleep/llama-3.1-lumimaid-8b": 131072,
|
|
197
|
+
"neversleep/noromaid-20b": 32768,
|
|
198
|
+
"nousresearch/deephermes-3-llama-3-8b-preview:free": 8192,
|
|
199
|
+
"nousresearch/deephermes-3-mistral-24b-preview": 32768,
|
|
200
|
+
"nousresearch/hermes-2-pro-llama-3-8b": 8192,
|
|
201
|
+
"nousresearch/hermes-3-llama-3.1-405b": 131072,
|
|
202
|
+
"nousresearch/hermes-3-llama-3.1-70b": 131072,
|
|
203
|
+
"nousresearch/nous-hermes-2-mixtral-8x7b-dpo": 32768,
|
|
204
|
+
"nvidia/llama-3.1-nemotron-70b-instruct": 131072,
|
|
205
|
+
"nvidia/llama-3.1-nemotron-ultra-253b-v1": 131072,
|
|
206
|
+
"nvidia/llama-3.1-nemotron-ultra-253b-v1:free": 131072,
|
|
207
|
+
"nvidia/llama-3.3-nemotron-super-49b-v1": 131072,
|
|
208
|
+
"openai/chatgpt-4o-latest": 128000,
|
|
209
|
+
"openai/codex-mini": 2048,
|
|
210
|
+
"openai/gpt-3.5-turbo": 4096,
|
|
211
|
+
"openai/gpt-3.5-turbo-0613": 4096,
|
|
212
|
+
"openai/gpt-3.5-turbo-16k": 16384,
|
|
213
|
+
"openai/gpt-3.5-turbo-instruct": 4096,
|
|
214
|
+
"openai/gpt-4": 8192,
|
|
215
|
+
"openai/gpt-4-0314": 8192,
|
|
216
|
+
"openai/gpt-4-1106-preview": 128000,
|
|
217
|
+
"openai/gpt-4-turbo": 128000,
|
|
218
|
+
"openai/gpt-4-turbo-preview": 128000,
|
|
219
|
+
"openai/gpt-4.1": 128000,
|
|
220
|
+
"openai/gpt-4.1-mini": 128000,
|
|
221
|
+
"openai/gpt-4.1-nano": 128000,
|
|
222
|
+
"openai/gpt-4o": 128000,
|
|
223
|
+
"openai/gpt-4o-2024-05-13": 128000,
|
|
224
|
+
"openai/gpt-4o-2024-08-06": 128000,
|
|
225
|
+
"openai/gpt-4o-2024-11-20": 128000,
|
|
226
|
+
"openai/gpt-4o-mini": 128000,
|
|
227
|
+
"openai/gpt-4o-mini-2024-07-18": 128000,
|
|
228
|
+
"openai/gpt-4o-mini-search-preview": 128000,
|
|
229
|
+
"openai/gpt-4o-search-preview": 128000,
|
|
230
|
+
"openai/gpt-4o:extended": 128000,
|
|
231
|
+
"openai/gpt-5": 200000,
|
|
232
|
+
"openai/gpt-5-chat": 200000,
|
|
233
|
+
"openai/gpt-5-mini": 200000,
|
|
234
|
+
"openai/gpt-5-nano": 200000,
|
|
235
|
+
"openai/gpt-oss-120b": 128000,
|
|
236
|
+
"openai/gpt-oss-20b": 128000,
|
|
237
|
+
"openai/gpt-oss-20b:free": 128000,
|
|
238
|
+
"openai/o1": 128000,
|
|
239
|
+
"openai/o1-mini": 128000,
|
|
240
|
+
"openai/o1-mini-2024-09-12": 128000,
|
|
241
|
+
"openai/o1-pro": 128000,
|
|
242
|
+
"openai/o3": 200000,
|
|
243
|
+
"openai/o3-mini": 200000,
|
|
244
|
+
"openai/o3-mini-high": 200000,
|
|
245
|
+
"openai/o3-pro": 200000,
|
|
246
|
+
"openai/o4-mini": 128000,
|
|
247
|
+
"openai/o4-mini-high": 128000,
|
|
248
|
+
"opengvlab/internvl3-14b": 8192,
|
|
249
|
+
"openrouter/auto": 8192,
|
|
250
|
+
"perplexity/r1-1776": 32768,
|
|
251
|
+
"perplexity/sonar": 32768,
|
|
252
|
+
"perplexity/sonar-deep-research": 32768,
|
|
253
|
+
"perplexity/sonar-pro": 32768,
|
|
254
|
+
"perplexity/sonar-reasoning": 32768,
|
|
255
|
+
"perplexity/sonar-reasoning-pro": 32768,
|
|
256
|
+
"pygmalionai/mythalion-13b": 4096,
|
|
257
|
+
"qwen/qwen-2-72b-instruct": 32768,
|
|
258
|
+
"qwen/qwen-2.5-72b-instruct": 131072,
|
|
259
|
+
"qwen/qwen-2.5-72b-instruct:free": 131072,
|
|
260
|
+
"qwen/qwen-2.5-7b-instruct": 131072,
|
|
261
|
+
"qwen/qwen-2.5-coder-32b-instruct": 131072,
|
|
262
|
+
"qwen/qwen-2.5-coder-32b-instruct:free": 131072,
|
|
263
|
+
"qwen/qwen-2.5-vl-7b-instruct": 131072,
|
|
264
|
+
"qwen/qwen-max": 32768,
|
|
265
|
+
"qwen/qwen-plus": 32768,
|
|
266
|
+
"qwen/qwen-turbo": 8192,
|
|
267
|
+
"qwen/qwen-vl-max": 32768,
|
|
268
|
+
"qwen/qwen-vl-plus": 32768,
|
|
269
|
+
"qwen/qwen2.5-vl-32b-instruct": 131072,
|
|
270
|
+
"qwen/qwen2.5-vl-32b-instruct:free": 131072,
|
|
271
|
+
"qwen/qwen2.5-vl-72b-instruct": 131072,
|
|
272
|
+
"qwen/qwen2.5-vl-72b-instruct:free": 131072,
|
|
273
|
+
"qwen/qwen3-14b": 32768,
|
|
274
|
+
"qwen/qwen3-14b:free": 32768,
|
|
275
|
+
"qwen/qwen3-235b-a22b": 32768,
|
|
276
|
+
"qwen/qwen3-235b-a22b-2507": 32768,
|
|
277
|
+
"qwen/qwen3-235b-a22b-thinking-2507": 32768,
|
|
278
|
+
"qwen/qwen3-235b-a22b:free": 32768,
|
|
279
|
+
"qwen/qwen3-30b-a3b": 32768,
|
|
280
|
+
"qwen/qwen3-30b-a3b-instruct-2507": 32768,
|
|
281
|
+
"qwen/qwen3-30b-a3b:free": 32768,
|
|
282
|
+
"qwen/qwen3-32b": 32768,
|
|
283
|
+
"qwen/qwen3-4b:free": 32768,
|
|
284
|
+
"qwen/qwen3-8b": 32768,
|
|
285
|
+
"qwen/qwen3-8b:free": 32768,
|
|
286
|
+
"qwen/qwen3-coder": 32768,
|
|
287
|
+
"qwen/qwen3-coder:free": 32768,
|
|
288
|
+
"qwen/qwq-32b": 32768,
|
|
289
|
+
"qwen/qwq-32b-preview": 32768,
|
|
290
|
+
"qwen/qwq-32b:free": 32768,
|
|
291
|
+
"raifle/sorcererlm-8x22b": 65536,
|
|
292
|
+
"rekaai/reka-flash-3:free": 128000,
|
|
293
|
+
"sao10k/l3-euryale-70b": 8192,
|
|
294
|
+
"sao10k/l3-lunaris-8b": 8192,
|
|
295
|
+
"sao10k/l3.1-euryale-70b": 131072,
|
|
296
|
+
"sao10k/l3.3-euryale-70b": 131072,
|
|
297
|
+
"sarvamai/sarvam-m:free": 8192,
|
|
298
|
+
"scb10x/llama3.1-typhoon2-70b-instruct": 131072,
|
|
299
|
+
"shisa-ai/shisa-v2-llama3.3-70b": 131072,
|
|
300
|
+
"shisa-ai/shisa-v2-llama3.3-70b:free": 131072,
|
|
301
|
+
"sophosympatheia/midnight-rose-70b": 4096,
|
|
302
|
+
"switchpoint/router": 8192,
|
|
303
|
+
"tencent/hunyuan-a13b-instruct": 8192,
|
|
304
|
+
"tencent/hunyuan-a13b-instruct:free": 8192,
|
|
305
|
+
"thedrummer/anubis-70b-v1.1": 8192,
|
|
306
|
+
"thedrummer/anubis-pro-105b-v1": 8192,
|
|
307
|
+
"thedrummer/rocinante-12b": 8192,
|
|
308
|
+
"thedrummer/skyfall-36b-v2": 8192,
|
|
309
|
+
"thedrummer/unslopnemo-12b": 128000,
|
|
310
|
+
"thedrummer/valkyrie-49b-v1": 8192,
|
|
311
|
+
"thudm/glm-4-32b": 2000000,
|
|
312
|
+
"thudm/glm-4.1v-9b-thinking": 2000000,
|
|
313
|
+
"thudm/glm-z1-32b:free": 2000000,
|
|
314
|
+
"tngtech/deepseek-r1t-chimera": 32768,
|
|
315
|
+
"tngtech/deepseek-r1t-chimera:free": 32768,
|
|
316
|
+
"tngtech/deepseek-r1t2-chimera:free": 32768,
|
|
317
|
+
"undi95/remm-slerp-l2-13b": 4096,
|
|
318
|
+
"x-ai/grok-2-1212": 128000,
|
|
319
|
+
"x-ai/grok-2-vision-1212": 128000,
|
|
320
|
+
"x-ai/grok-3": 128000,
|
|
321
|
+
"x-ai/grok-3-beta": 128000,
|
|
322
|
+
"x-ai/grok-3-mini": 128000,
|
|
323
|
+
"x-ai/grok-3-mini-beta": 128000,
|
|
324
|
+
"x-ai/grok-4": 128000,
|
|
325
|
+
"x-ai/grok-vision-beta": 128000,
|
|
326
|
+
"z-ai/glm-4-32b": 2000000,
|
|
327
|
+
"z-ai/glm-4.5": 2000000,
|
|
328
|
+
"z-ai/glm-4.5-air": 2000000,
|
|
329
|
+
"z-ai/glm-4.5-air:free": 2000000,
|
|
330
|
+
"llama3.1": 131072,
|
|
331
|
+
"llama3.2": 131072,
|
|
332
|
+
"llama3.3": 131072,
|
|
333
|
+
"llama3": 8192,
|
|
334
|
+
"llama2": 4096,
|
|
335
|
+
"mixtral8x22b": 65536,
|
|
336
|
+
"mixtral": 32768,
|
|
337
|
+
"mistral": 32768,
|
|
338
|
+
"gemma3": 131072,
|
|
339
|
+
"gemma2": 8192,
|
|
340
|
+
"gemma": 8192,
|
|
341
|
+
"phi3": 131072,
|
|
342
|
+
"phi2": 2048,
|
|
343
|
+
"phi": 2048,
|
|
344
|
+
"qwen2.5": 131072,
|
|
345
|
+
"qwen2": 32768,
|
|
346
|
+
"qwen": 8192,
|
|
347
|
+
"codellama": 16384,
|
|
348
|
+
"codegemma": 8192,
|
|
349
|
+
"deepseek-coder-v2": 131072,
|
|
350
|
+
"deepseek-coder": 16384,
|
|
351
|
+
"deepseek-v2": 131072,
|
|
352
|
+
"deepseek-llm": 4096,
|
|
353
|
+
"yi1.5": 32768,
|
|
354
|
+
"yi": 4096,
|
|
355
|
+
"command-r": 131072,
|
|
356
|
+
"wizardlm2": 32768,
|
|
357
|
+
"wizardlm": 16384,
|
|
358
|
+
"zephyr": 65536,
|
|
359
|
+
"vicuna": 2048,
|
|
360
|
+
"falcon": 2048,
|
|
361
|
+
"starcoder": 8192,
|
|
362
|
+
"stablelm": 4096,
|
|
363
|
+
"orca2": 4096,
|
|
364
|
+
"orca": 4096,
|
|
365
|
+
"dolphin": 32768,
|
|
366
|
+
"openhermes": 8192,
|
|
367
|
+
"gpt-oss": 128000,
|
|
368
|
+
"gpt-3.5-turbo": 4096,
|
|
369
|
+
"gpt-4": 8192,
|
|
370
|
+
"grok-2": 128000,
|
|
371
|
+
"grok-2-1212": 128000,
|
|
372
|
+
"grok-2-vision-1212": 128000,
|
|
373
|
+
"grok-3": 128000,
|
|
374
|
+
"grok-3-fast": 128000,
|
|
375
|
+
"grok-3-beta": 128000,
|
|
376
|
+
"grok-3-mini": 128000,
|
|
377
|
+
"grok-3-mini-beta": 128000,
|
|
378
|
+
"grok-3-mini-fast": 128000,
|
|
379
|
+
"grok-4-0709": 128000,
|
|
380
|
+
"grok-4": 128000,
|
|
381
|
+
"grok-vision-beta": 128000
|
|
382
|
+
}
|
|
@@ -680,7 +680,63 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
680
680
|
|
|
681
681
|
ASCIIColors.warning(f"Context size not found for model '{model_name}'")
|
|
682
682
|
return None
|
|
683
|
+
|
|
684
|
+
def ps(self):
|
|
685
|
+
"""
|
|
686
|
+
Lists running models in a standardized, flat format.
|
|
687
|
+
|
|
688
|
+
This method corresponds to the /api/ps endpoint in the Ollama API. It retrieves
|
|
689
|
+
the models currently loaded into memory and transforms the data into a simplified,
|
|
690
|
+
flat list of dictionaries.
|
|
691
|
+
|
|
692
|
+
Returns:
|
|
693
|
+
list[dict]: A list of dictionaries, each representing a running model with a standardized set of keys.
|
|
694
|
+
Returns an empty list if the client is not initialized or if an error occurs.
|
|
683
695
|
|
|
696
|
+
Example of a returned model dictionary:
|
|
697
|
+
{
|
|
698
|
+
"model_name": "gemma3:12b",
|
|
699
|
+
"size": 13861175232,
|
|
700
|
+
"vram_size": 10961479680,
|
|
701
|
+
"parameters_size": "12.2B",
|
|
702
|
+
"quantization_level": "Q4_K_M",
|
|
703
|
+
"context_size": 32000,
|
|
704
|
+
"parent_model": "",
|
|
705
|
+
"expires_at": "2025-08-20T22:28:18.6708784+02:00"
|
|
706
|
+
}
|
|
707
|
+
"""
|
|
708
|
+
if not self.ollama_client:
|
|
709
|
+
ASCIIColors.warning("Ollama client not initialized. Cannot list running models.")
|
|
710
|
+
return []
|
|
711
|
+
|
|
712
|
+
try:
|
|
713
|
+
running_models_response = self.ollama_client.ps()
|
|
714
|
+
|
|
715
|
+
models_list = running_models_response.get('models', [])
|
|
716
|
+
standardized_models = []
|
|
717
|
+
|
|
718
|
+
for model_data in models_list:
|
|
719
|
+
details = model_data.get('details', {})
|
|
720
|
+
|
|
721
|
+
flat_model_info = {
|
|
722
|
+
"model_name": model_data.get("name"),
|
|
723
|
+
"size": model_data.get("size"),
|
|
724
|
+
"vram_size": model_data.get("size_vram"),
|
|
725
|
+
"expires_at": model_data.get("expires_at"),
|
|
726
|
+
"parameters_size": details.get("parameter_size"),
|
|
727
|
+
"quantization_level": details.get("quantization_level"),
|
|
728
|
+
"parent_model": details.get("parent_model"),
|
|
729
|
+
# Add context_size if it exists in the details
|
|
730
|
+
"context_size": details.get("context_length")
|
|
731
|
+
}
|
|
732
|
+
standardized_models.append(flat_model_info)
|
|
733
|
+
|
|
734
|
+
return standardized_models
|
|
735
|
+
|
|
736
|
+
except Exception as e:
|
|
737
|
+
ASCIIColors.error(f"Failed to list running models from Ollama at {self.host_address}: {e}")
|
|
738
|
+
return []
|
|
739
|
+
|
|
684
740
|
if __name__ == '__main__':
|
|
685
741
|
global full_streamed_text
|
|
686
742
|
# Example Usage (requires an Ollama server running)
|
|
@@ -152,28 +152,53 @@ if torch:
|
|
|
152
152
|
|
|
153
153
|
# Common Schedulers mapping
|
|
154
154
|
SCHEDULER_MAPPING = {
|
|
155
|
-
"default": None,
|
|
156
|
-
"
|
|
157
|
-
"
|
|
158
|
-
"
|
|
159
|
-
"
|
|
160
|
-
"
|
|
161
|
-
"
|
|
162
|
-
"
|
|
163
|
-
"
|
|
164
|
-
"
|
|
155
|
+
"default": None,
|
|
156
|
+
"ddim": "DDIMScheduler",
|
|
157
|
+
"ddpm": "DDPMScheduler",
|
|
158
|
+
"deis_multistep": "DEISMultistepScheduler",
|
|
159
|
+
"dpm_multistep": "DPMSolverMultistepScheduler",
|
|
160
|
+
"dpm_multistep_karras": "DPMSolverMultistepScheduler",
|
|
161
|
+
"dpm_single": "DPMSolverSinglestepScheduler",
|
|
162
|
+
"dpm_adaptive": "DPMSolverPlusPlusScheduler", # Retained; no direct Diffusers equivalent confirmed, may require custom config
|
|
163
|
+
"dpm++_2m": "DPMSolverMultistepScheduler",
|
|
164
|
+
"dpm++_2m_karras": "DPMSolverMultistepScheduler",
|
|
165
|
+
"dpm++_2s_ancestral": "DPMSolverAncestralDiscreteScheduler", # Retained; consider "KDPM2AncestralDiscreteScheduler" as alternative if class unavailable
|
|
166
|
+
"dpm++_2s_ancestral_karras": "DPMSolverAncestralDiscreteScheduler",
|
|
167
|
+
"dpm++_sde": "DPMSolverSDEScheduler",
|
|
168
|
+
"dpm++_sde_karras": "DPMSolverSDEScheduler",
|
|
169
|
+
"euler_ancestral_discrete": "EulerAncestralDiscreteScheduler",
|
|
170
|
+
"euler_discrete": "EulerDiscreteScheduler",
|
|
171
|
+
"heun_discrete": "HeunDiscreteScheduler",
|
|
172
|
+
"heun_karras": "HeunDiscreteScheduler",
|
|
173
|
+
"lms_discrete": "LMSDiscreteScheduler",
|
|
174
|
+
"lms_karras": "LMSDiscreteScheduler",
|
|
175
|
+
"pndm": "PNDMScheduler",
|
|
176
|
+
"unipc_multistep": "UniPCMultistepScheduler",
|
|
177
|
+
# Additions
|
|
178
|
+
"dpm++_2m_sde": "DPMSolverMultistepScheduler",
|
|
179
|
+
"dpm++_2m_sde_karras": "DPMSolverMultistepScheduler",
|
|
180
|
+
"dpm2": "KDPM2DiscreteScheduler",
|
|
181
|
+
"dpm2_karras": "KDPM2DiscreteScheduler",
|
|
182
|
+
"dpm2_a": "KDPM2AncestralDiscreteScheduler",
|
|
183
|
+
"dpm2_a_karras": "KDPM2AncestralDiscreteScheduler",
|
|
184
|
+
"euler": "EulerDiscreteScheduler",
|
|
185
|
+
"euler_a": "EulerAncestralDiscreteScheduler",
|
|
186
|
+
"heun": "HeunDiscreteScheduler",
|
|
187
|
+
"lms": "LMSDiscreteScheduler",
|
|
165
188
|
}
|
|
166
189
|
SCHEDULER_USES_KARRAS_SIGMAS = [
|
|
167
190
|
"dpm_multistep_karras", "dpm++_2m_karras", "dpm++_2s_ancestral_karras",
|
|
168
|
-
"dpm++_sde_karras", "heun_karras", "lms_karras"
|
|
191
|
+
"dpm++_sde_karras", "heun_karras", "lms_karras",
|
|
192
|
+
# Additions
|
|
193
|
+
"dpm++_2m_sde_karras", "dpm2_karras", "dpm2_a_karras",
|
|
169
194
|
]
|
|
170
195
|
|
|
171
196
|
# --- START: Concurrency and Singleton Management ---
|
|
172
197
|
|
|
173
198
|
class ModelManager:
|
|
174
199
|
"""
|
|
175
|
-
Manages a single pipeline instance, its generation queue,
|
|
176
|
-
|
|
200
|
+
Manages a single pipeline instance, its generation queue, a worker thread,
|
|
201
|
+
and an optional auto-unload timer.
|
|
177
202
|
"""
|
|
178
203
|
def __init__(self, config: Dict[str, Any], models_path: Path):
|
|
179
204
|
self.config = config
|
|
@@ -182,11 +207,18 @@ class ModelManager:
|
|
|
182
207
|
self.ref_count = 0
|
|
183
208
|
self.lock = threading.Lock()
|
|
184
209
|
self.queue = queue.Queue()
|
|
185
|
-
self.worker_thread = threading.Thread(target=self._generation_worker, daemon=True)
|
|
186
|
-
self._stop_event = threading.Event()
|
|
187
210
|
self.is_loaded = False
|
|
211
|
+
self.last_used_time = time.time()
|
|
188
212
|
|
|
213
|
+
# --- Worker and Monitor Threads ---
|
|
214
|
+
self._stop_event = threading.Event()
|
|
215
|
+
self.worker_thread = threading.Thread(target=self._generation_worker, daemon=True)
|
|
189
216
|
self.worker_thread.start()
|
|
217
|
+
|
|
218
|
+
self._stop_monitor_event = threading.Event()
|
|
219
|
+
self._unload_monitor_thread = None
|
|
220
|
+
self._start_unload_monitor()
|
|
221
|
+
|
|
190
222
|
|
|
191
223
|
def acquire(self):
|
|
192
224
|
with self.lock:
|
|
@@ -200,10 +232,35 @@ class ModelManager:
|
|
|
200
232
|
|
|
201
233
|
def stop(self):
|
|
202
234
|
self._stop_event.set()
|
|
203
|
-
self.
|
|
235
|
+
if self._unload_monitor_thread:
|
|
236
|
+
self._stop_monitor_event.set()
|
|
237
|
+
self._unload_monitor_thread.join(timeout=2)
|
|
238
|
+
self.queue.put(None) # Sentinel to unblock queue.get()
|
|
204
239
|
self.worker_thread.join(timeout=5)
|
|
205
240
|
|
|
241
|
+
def _start_unload_monitor(self):
|
|
242
|
+
unload_after = self.config.get("unload_inactive_model_after", 0)
|
|
243
|
+
if unload_after > 0 and self._unload_monitor_thread is None:
|
|
244
|
+
self._stop_monitor_event.clear()
|
|
245
|
+
self._unload_monitor_thread = threading.Thread(target=self._unload_monitor, daemon=True)
|
|
246
|
+
self._unload_monitor_thread.start()
|
|
247
|
+
|
|
248
|
+
def _unload_monitor(self):
|
|
249
|
+
unload_after = self.config.get("unload_inactive_model_after", 0)
|
|
250
|
+
if unload_after <= 0: return
|
|
251
|
+
|
|
252
|
+
ASCIIColors.info(f"Starting inactivity monitor for '{self.config['model_name']}' (timeout: {unload_after}s).")
|
|
253
|
+
while not self._stop_monitor_event.wait(timeout=5.0): # Check every 5 seconds
|
|
254
|
+
with self.lock:
|
|
255
|
+
if not self.is_loaded:
|
|
256
|
+
continue
|
|
257
|
+
|
|
258
|
+
if time.time() - self.last_used_time > unload_after:
|
|
259
|
+
ASCIIColors.info(f"Model '{self.config['model_name']}' has been inactive. Unloading.")
|
|
260
|
+
self._unload_pipeline()
|
|
261
|
+
|
|
206
262
|
def _load_pipeline(self):
|
|
263
|
+
# This method assumes a lock is already held
|
|
207
264
|
if self.pipeline:
|
|
208
265
|
return
|
|
209
266
|
|
|
@@ -271,16 +328,19 @@ class ModelManager:
|
|
|
271
328
|
self.pipeline.enable_sequential_cpu_offload()
|
|
272
329
|
|
|
273
330
|
self.is_loaded = True
|
|
331
|
+
self.last_used_time = time.time()
|
|
274
332
|
ASCIIColors.green(f"Model '{model_name}' loaded successfully on '{self.config['device']}'.")
|
|
275
333
|
|
|
276
334
|
def _unload_pipeline(self):
|
|
335
|
+
# This method assumes a lock is already held
|
|
277
336
|
if self.pipeline:
|
|
337
|
+
model_name = self.config.get('model_name', 'Unknown')
|
|
278
338
|
del self.pipeline
|
|
279
339
|
self.pipeline = None
|
|
280
340
|
if torch and torch.cuda.is_available():
|
|
281
341
|
torch.cuda.empty_cache()
|
|
282
342
|
self.is_loaded = False
|
|
283
|
-
ASCIIColors.info(f"Model '{
|
|
343
|
+
ASCIIColors.info(f"Model '{model_name}' unloaded and VRAM cleared.")
|
|
284
344
|
|
|
285
345
|
def _generation_worker(self):
|
|
286
346
|
while not self._stop_event.is_set():
|
|
@@ -291,8 +351,10 @@ class ModelManager:
|
|
|
291
351
|
future, pipeline_args = job
|
|
292
352
|
try:
|
|
293
353
|
with self.lock:
|
|
294
|
-
|
|
354
|
+
self.last_used_time = time.time()
|
|
355
|
+
if not self.is_loaded:
|
|
295
356
|
self._load_pipeline()
|
|
357
|
+
|
|
296
358
|
with torch.no_grad():
|
|
297
359
|
pipeline_output = self.pipeline(**pipeline_args)
|
|
298
360
|
pil_image: Image.Image = pipeline_output.images[0]
|
|
@@ -363,6 +425,7 @@ class ModelManager:
|
|
|
363
425
|
scheduler_config = self.pipeline.scheduler.config
|
|
364
426
|
scheduler_config["use_karras_sigmas"] = scheduler_name_key in SCHEDULER_USES_KARRAS_SIGMAS
|
|
365
427
|
self.pipeline.scheduler = SchedulerClass.from_config(scheduler_config)
|
|
428
|
+
ASCIIColors.info(f"Switched scheduler to {scheduler_class_name}")
|
|
366
429
|
except Exception as e:
|
|
367
430
|
ASCIIColors.warning(f"Could not switch scheduler to {scheduler_name_key}: {e}. Using current default.")
|
|
368
431
|
|
|
@@ -378,14 +441,17 @@ class PipelineRegistry:
|
|
|
378
441
|
cls._instance._registry_lock = threading.Lock()
|
|
379
442
|
return cls._instance
|
|
380
443
|
|
|
381
|
-
|
|
382
|
-
|
|
444
|
+
@staticmethod
|
|
445
|
+
def _get_critical_keys():
|
|
446
|
+
return [
|
|
383
447
|
"model_name", "device", "torch_dtype_str", "use_safetensors",
|
|
384
448
|
"safety_checker_on", "hf_variant", "enable_cpu_offload",
|
|
385
449
|
"enable_sequential_cpu_offload", "enable_xformers",
|
|
386
|
-
"local_files_only", "hf_cache_path"
|
|
450
|
+
"local_files_only", "hf_cache_path", "unload_inactive_model_after"
|
|
387
451
|
]
|
|
388
|
-
|
|
452
|
+
|
|
453
|
+
def _get_config_key(self, config: Dict[str, Any]) -> str:
|
|
454
|
+
key_data = tuple(sorted((k, config.get(k)) for k in self._get_critical_keys()))
|
|
389
455
|
return hashlib.sha256(str(key_data).encode('utf-8')).hexdigest()
|
|
390
456
|
|
|
391
457
|
def get_manager(self, config: Dict[str, Any], models_path: Path) -> ModelManager:
|
|
@@ -402,11 +468,16 @@ class PipelineRegistry:
|
|
|
402
468
|
manager = self._managers[key]
|
|
403
469
|
ref_count = manager.release()
|
|
404
470
|
if ref_count == 0:
|
|
405
|
-
ASCIIColors.info(f"Reference count for model '{config.get('model_name')}' is zero. Cleaning up.")
|
|
471
|
+
ASCIIColors.info(f"Reference count for model '{config.get('model_name')}' is zero. Cleaning up manager.")
|
|
406
472
|
manager.stop()
|
|
407
|
-
manager.
|
|
473
|
+
with manager.lock:
|
|
474
|
+
manager._unload_pipeline()
|
|
408
475
|
del self._managers[key]
|
|
409
476
|
|
|
477
|
+
def get_active_managers(self) -> List[ModelManager]:
|
|
478
|
+
with self._registry_lock:
|
|
479
|
+
return [m for m in self._managers.values() if m.is_loaded]
|
|
480
|
+
|
|
410
481
|
class DiffusersTTIBinding_Impl(LollmsTTIBinding):
|
|
411
482
|
DEFAULT_CONFIG = {
|
|
412
483
|
"model_name": "", "device": "auto", "torch_dtype_str": "auto", "use_safetensors": True,
|
|
@@ -414,6 +485,7 @@ class DiffusersTTIBinding_Impl(LollmsTTIBinding):
|
|
|
414
485
|
"guidance_scale": 7.0, "default_width": 512, "default_height": 512, "seed": -1,
|
|
415
486
|
"enable_cpu_offload": False, "enable_sequential_cpu_offload": False, "enable_xformers": False,
|
|
416
487
|
"hf_variant": None, "hf_token": None, "hf_cache_path": None, "local_files_only": False,
|
|
488
|
+
"unload_inactive_model_after": 0,
|
|
417
489
|
}
|
|
418
490
|
|
|
419
491
|
def __init__(self, **kwargs):
|
|
@@ -425,9 +497,13 @@ class DiffusersTTIBinding_Impl(LollmsTTIBinding):
|
|
|
425
497
|
"Please run: pip install torch torchvision diffusers Pillow transformers safetensors requests tqdm"
|
|
426
498
|
)
|
|
427
499
|
|
|
428
|
-
|
|
500
|
+
# Initialize config with defaults, then override with user kwargs
|
|
501
|
+
self.config = self.DEFAULT_CONFIG.copy()
|
|
502
|
+
self.config.update(kwargs)
|
|
503
|
+
|
|
429
504
|
self.model_name = self.config.get("model_name", "")
|
|
430
|
-
|
|
505
|
+
models_path_str = kwargs.get("models_path", str(Path(__file__).parent / "models"))
|
|
506
|
+
self.models_path = Path(models_path_str)
|
|
431
507
|
self.models_path.mkdir(parents=True, exist_ok=True)
|
|
432
508
|
|
|
433
509
|
self.registry = PipelineRegistry()
|
|
@@ -437,6 +513,49 @@ class DiffusersTTIBinding_Impl(LollmsTTIBinding):
|
|
|
437
513
|
if self.model_name:
|
|
438
514
|
self._acquire_manager()
|
|
439
515
|
|
|
516
|
+
def ps(self) -> List[dict]:
|
|
517
|
+
"""
|
|
518
|
+
Lists running models in a standardized, flat format.
|
|
519
|
+
"""
|
|
520
|
+
if not self.registry:
|
|
521
|
+
ASCIIColors.warning("Diffusers PipelineRegistry not available.")
|
|
522
|
+
return []
|
|
523
|
+
|
|
524
|
+
try:
|
|
525
|
+
active_managers = self.registry.get_active_managers()
|
|
526
|
+
standardized_models = []
|
|
527
|
+
|
|
528
|
+
for manager in active_managers:
|
|
529
|
+
with manager.lock:
|
|
530
|
+
config = manager.config
|
|
531
|
+
pipeline = manager.pipeline
|
|
532
|
+
|
|
533
|
+
vram_usage_bytes = 0
|
|
534
|
+
if torch.cuda.is_available() and config.get("device") == "cuda" and pipeline:
|
|
535
|
+
for component in pipeline.components.values():
|
|
536
|
+
if hasattr(component, 'parameters'):
|
|
537
|
+
mem_params = sum(p.nelement() * p.element_size() for p in component.parameters())
|
|
538
|
+
mem_bufs = sum(b.nelement() * b.element_size() for b in component.buffers())
|
|
539
|
+
vram_usage_bytes += (mem_params + mem_bufs)
|
|
540
|
+
|
|
541
|
+
flat_model_info = {
|
|
542
|
+
"model_name": config.get("model_name"),
|
|
543
|
+
"vram_size": vram_usage_bytes,
|
|
544
|
+
"device": config.get("device"),
|
|
545
|
+
"torch_dtype": str(pipeline.dtype) if pipeline else config.get("torch_dtype_str"),
|
|
546
|
+
"pipeline_type": pipeline.__class__.__name__ if pipeline else "N/A",
|
|
547
|
+
"scheduler_class": pipeline.scheduler.__class__.__name__ if pipeline and hasattr(pipeline, 'scheduler') else "N/A",
|
|
548
|
+
"status": "Active" if manager.is_loaded else "Idle",
|
|
549
|
+
"queue_size": manager.queue.qsize(),
|
|
550
|
+
}
|
|
551
|
+
standardized_models.append(flat_model_info)
|
|
552
|
+
|
|
553
|
+
return standardized_models
|
|
554
|
+
|
|
555
|
+
except Exception as e:
|
|
556
|
+
ASCIIColors.error(f"Failed to list running models from Diffusers registry: {e}")
|
|
557
|
+
return []
|
|
558
|
+
|
|
440
559
|
def _acquire_manager(self):
|
|
441
560
|
if self.manager:
|
|
442
561
|
self.registry.release_manager(self.manager.config)
|
|
@@ -455,40 +574,29 @@ class DiffusersTTIBinding_Impl(LollmsTTIBinding):
|
|
|
455
574
|
return sorted([f.name for f in self.models_path.iterdir() if f.is_file() and f.suffix == ".safetensors"])
|
|
456
575
|
|
|
457
576
|
def listModels(self) -> list:
|
|
458
|
-
#
|
|
577
|
+
# Implementation is unchanged...
|
|
459
578
|
civitai_list = [
|
|
460
579
|
{'model_name': key, 'display_name': info['display_name'], 'description': info['description'], 'owned_by': info['owned_by']}
|
|
461
580
|
for key, info in CIVITAI_MODELS.items()
|
|
462
581
|
]
|
|
463
582
|
hf_default_list = [
|
|
464
|
-
# SDXL Models (1024x1024 native)
|
|
465
583
|
{'model_name': "stabilityai/stable-diffusion-xl-base-1.0", 'display_name': "Stable Diffusion XL 1.0", 'description': "Official SDXL base model from Stability AI. Native resolution is 1024x1024.", 'owned_by': 'HuggingFace'},
|
|
466
584
|
{'model_name': "playgroundai/playground-v2.5-1024px-aesthetic", 'display_name': "Playground v2.5", 'description': "Known for high aesthetic quality. Native resolution is 1024x1024.", 'owned_by': 'HuggingFace'},
|
|
467
|
-
# SD 1.5 Models (512x512 native)
|
|
468
585
|
{'model_name': "runwayml/stable-diffusion-v1-5", 'display_name': "Stable Diffusion 1.5", 'description': "A popular and versatile open-access text-to-image model.", 'owned_by': 'HuggingFace'},
|
|
469
|
-
{'model_name': "dataautogpt3/OpenDalleV1.1", 'display_name': "OpenDalle v1.1", 'description': "An open-source reproduction of DALL-E 3, good for prompt adherence.", 'owned_by': 'HuggingFace'},
|
|
470
|
-
{'model_name': "stabilityai/stable-diffusion-2-1-base", 'display_name': "Stable Diffusion 2.1 (512px)", 'description': "A 512x512 resolution model from Stability AI.", 'owned_by': 'HuggingFace'},
|
|
471
|
-
{'model_name': "CompVis/stable-diffusion-v1-4", 'display_name': "Stable Diffusion 1.4 (Gated)", 'description': "Original SD v1.4. Requires accepting license on Hugging Face and an HF token.", 'owned_by': 'HuggingFace'}
|
|
472
586
|
]
|
|
473
|
-
|
|
474
|
-
# Discover local .safetensors files
|
|
475
587
|
custom_local_models = []
|
|
476
588
|
civitai_filenames = {info['filename'] for info in CIVITAI_MODELS.values()}
|
|
477
589
|
local_safetensors = self.list_safetensor_models()
|
|
478
|
-
|
|
479
590
|
for filename in local_safetensors:
|
|
480
591
|
if filename not in civitai_filenames:
|
|
481
592
|
custom_local_models.append({
|
|
482
|
-
'model_name': filename,
|
|
483
|
-
'
|
|
484
|
-
'description': 'Local safetensors file from your models folder.',
|
|
485
|
-
'owned_by': 'local_user'
|
|
593
|
+
'model_name': filename, 'display_name': filename,
|
|
594
|
+
'description': 'Local safetensors file from your models folder.', 'owned_by': 'local_user'
|
|
486
595
|
})
|
|
487
|
-
|
|
488
596
|
return civitai_list + hf_default_list + custom_local_models
|
|
489
597
|
|
|
490
598
|
def load_model(self):
|
|
491
|
-
ASCIIColors.info("load_model() called. Loading is now automatic.")
|
|
599
|
+
ASCIIColors.info("load_model() called. Loading is now automatic on first use.")
|
|
492
600
|
if self.model_name and not self.manager:
|
|
493
601
|
self._acquire_manager()
|
|
494
602
|
|
|
@@ -498,26 +606,28 @@ class DiffusersTTIBinding_Impl(LollmsTTIBinding):
|
|
|
498
606
|
self.registry.release_manager(self.manager.config)
|
|
499
607
|
self.manager = None
|
|
500
608
|
|
|
501
|
-
def generate_image(self, prompt: str, negative_prompt: str = "", width: int = None, height: int = None, **kwargs) -> bytes:
|
|
609
|
+
def generate_image(self, prompt: str, negative_prompt: str = "", width: int|None = None, height: int|None = None, **kwargs) -> bytes:
|
|
502
610
|
if not self.model_name:
|
|
503
611
|
raise RuntimeError("No model_name configured. Please select a model in settings.")
|
|
504
612
|
|
|
505
613
|
if not self.manager:
|
|
506
614
|
self._acquire_manager()
|
|
507
|
-
|
|
508
|
-
_width = width or self.config["default_width"]
|
|
509
|
-
_height = height or self.config["default_height"]
|
|
510
|
-
_num_inference_steps = kwargs.get("num_inference_steps", self.config["num_inference_steps"])
|
|
511
|
-
_guidance_scale = kwargs.get("guidance_scale", self.config["guidance_scale"])
|
|
512
|
-
_seed = kwargs.get("seed", self.config["seed"])
|
|
513
615
|
|
|
514
|
-
|
|
515
|
-
|
|
616
|
+
# Build pipeline arguments, prioritizing kwargs over config defaults
|
|
617
|
+
seed = kwargs.pop("seed", self.config["seed"])
|
|
618
|
+
generator = torch.Generator(device=self.config["device"]).manual_seed(seed) if seed != -1 else None
|
|
619
|
+
|
|
516
620
|
pipeline_args = {
|
|
517
|
-
"prompt": prompt,
|
|
518
|
-
"
|
|
519
|
-
"
|
|
621
|
+
"prompt": prompt,
|
|
622
|
+
"negative_prompt": negative_prompt or None,
|
|
623
|
+
"width": width if width is not None else self.config["default_width"],
|
|
624
|
+
"height": height if height is not None else self.config["default_height"],
|
|
625
|
+
"num_inference_steps": self.config["num_inference_steps"],
|
|
626
|
+
"guidance_scale": self.config["guidance_scale"],
|
|
627
|
+
"generator": generator,
|
|
520
628
|
}
|
|
629
|
+
# Allow any other valid pipeline kwargs to be passed through
|
|
630
|
+
pipeline_args.update(kwargs)
|
|
521
631
|
|
|
522
632
|
future = Future()
|
|
523
633
|
self.manager.queue.put((future, pipeline_args))
|
|
@@ -531,8 +641,8 @@ class DiffusersTTIBinding_Impl(LollmsTTIBinding):
|
|
|
531
641
|
raise Exception(f"Image generation failed: {e}") from e
|
|
532
642
|
|
|
533
643
|
def list_local_models(self) -> List[str]:
|
|
644
|
+
# Implementation is unchanged...
|
|
534
645
|
if not self.models_path.exists(): return []
|
|
535
|
-
|
|
536
646
|
folders = [
|
|
537
647
|
d.name for d in self.models_path.iterdir()
|
|
538
648
|
if d.is_dir() and ((d / "model_index.json").exists() or (d / "unet" / "config.json").exists())
|
|
@@ -541,28 +651,22 @@ class DiffusersTTIBinding_Impl(LollmsTTIBinding):
|
|
|
541
651
|
return sorted(folders + safetensors)
|
|
542
652
|
|
|
543
653
|
def list_available_models(self) -> List[str]:
|
|
654
|
+
# Implementation is unchanged...
|
|
544
655
|
discoverable_models = [m['model_name'] for m in self.listModels()]
|
|
545
656
|
local_models = self.list_local_models()
|
|
546
|
-
|
|
547
|
-
combined_list = sorted(list(set(local_models + discoverable_models)))
|
|
548
|
-
return combined_list
|
|
657
|
+
return sorted(list(set(local_models + discoverable_models)))
|
|
549
658
|
|
|
550
659
|
def list_services(self, **kwargs) -> List[Dict[str, str]]:
|
|
660
|
+
# Implementation is unchanged...
|
|
551
661
|
models = self.list_available_models()
|
|
552
662
|
local_models = self.list_local_models()
|
|
553
|
-
|
|
554
663
|
if not models:
|
|
555
664
|
return [{"name": "diffusers_no_models", "caption": "No models found", "help": f"Place models in '{self.models_path.resolve()}'."}]
|
|
556
|
-
|
|
557
665
|
services = []
|
|
558
666
|
for m in models:
|
|
559
667
|
help_text = "Hugging Face model ID"
|
|
560
|
-
if m in local_models:
|
|
561
|
-
|
|
562
|
-
elif m in CIVITAI_MODELS:
|
|
563
|
-
filename = CIVITAI_MODELS[m]['filename']
|
|
564
|
-
help_text = f"Civitai model (downloads as {filename})"
|
|
565
|
-
|
|
668
|
+
if m in local_models: help_text = f"Local model from: {self.models_path.resolve()}"
|
|
669
|
+
elif m in CIVITAI_MODELS: help_text = f"Civitai model (downloads as {CIVITAI_MODELS[m]['filename']})"
|
|
566
670
|
services.append({"name": m, "caption": f"Diffusers: {m}", "help": help_text})
|
|
567
671
|
return services
|
|
568
672
|
|
|
@@ -570,6 +674,7 @@ class DiffusersTTIBinding_Impl(LollmsTTIBinding):
|
|
|
570
674
|
available_models = self.list_available_models()
|
|
571
675
|
return [
|
|
572
676
|
{"name": "model_name", "type": "str", "value": self.model_name, "description": "Local, Civitai, or Hugging Face model.", "options": available_models},
|
|
677
|
+
{"name": "unload_inactive_model_after", "type": "int", "value": self.config["unload_inactive_model_after"], "description": "Unload model after X seconds of inactivity (0 to disable)."},
|
|
573
678
|
{"name": "device", "type": "str", "value": self.config["device"], "description": f"Inference device. Resolved: {self.config['device']}", "options": ["auto", "cuda", "mps", "cpu"]},
|
|
574
679
|
{"name": "torch_dtype_str", "type": "str", "value": self.config["torch_dtype_str"], "description": f"Torch dtype. Resolved: {self.config['torch_dtype_str']}", "options": ["auto", "float16", "bfloat16", "float32"]},
|
|
575
680
|
{"name": "hf_variant", "type": "str", "value": self.config["hf_variant"], "description": "HF model variant (e.g., 'fp16')."},
|
|
@@ -593,7 +698,7 @@ class DiffusersTTIBinding_Impl(LollmsTTIBinding):
|
|
|
593
698
|
parsed_settings = settings if isinstance(settings, dict) else \
|
|
594
699
|
{item["name"]: item["value"] for item in settings if "name" in item and "value" in item}
|
|
595
700
|
|
|
596
|
-
critical_keys = self.registry.
|
|
701
|
+
critical_keys = self.registry._get_critical_keys()
|
|
597
702
|
needs_manager_swap = False
|
|
598
703
|
|
|
599
704
|
for key, value in parsed_settings.items():
|
|
@@ -609,6 +714,7 @@ class DiffusersTTIBinding_Impl(LollmsTTIBinding):
|
|
|
609
714
|
self._acquire_manager()
|
|
610
715
|
|
|
611
716
|
if not needs_manager_swap and self.manager:
|
|
717
|
+
# Update non-critical settings on the existing manager
|
|
612
718
|
self.manager.config.update(parsed_settings)
|
|
613
719
|
if 'scheduler_name' in parsed_settings and self.manager.pipeline:
|
|
614
720
|
with self.manager.lock:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
lollms_client/__init__.py,sha256=
|
|
1
|
+
lollms_client/__init__.py,sha256=rXPMm8tuBofHB9T-0sMMe1e0XF_-OdYFuS1OfUbuKtM,1146
|
|
2
2
|
lollms_client/lollms_config.py,sha256=goEseDwDxYJf3WkYJ4IrLXwg3Tfw73CXV2Avg45M_hE,21876
|
|
3
3
|
lollms_client/lollms_core.py,sha256=zqaxEJJDXiwpDd3E-PFDLJOnmG1d9cOiL_CrYRUa7Z0,167361
|
|
4
4
|
lollms_client/lollms_discussion.py,sha256=wkadV6qiegxOzukMVn5vukdeJivnlyygSzZBkzOi9Gc,106714
|
|
@@ -15,6 +15,7 @@ lollms_client/lollms_tts_binding.py,sha256=5cJYECj8PYLJAyB6SEH7_fhHYK3Om-Y3arkyg
|
|
|
15
15
|
lollms_client/lollms_ttv_binding.py,sha256=KkTaHLBhEEdt4sSVBlbwr5i_g_TlhcrwrT-7DjOsjWQ,4131
|
|
16
16
|
lollms_client/lollms_types.py,sha256=0iSH1QHRRD-ddBqoL9EEKJ8wWCuwDUlN_FrfbCdg7Lw,3522
|
|
17
17
|
lollms_client/lollms_utilities.py,sha256=3DAsII2X9uhRzRL-D0QlALcEdRg82y7OIL4yHVF32gY,19446
|
|
18
|
+
lollms_client/assets/models_ctx_sizes.json,sha256=MzXZFmJv2SC_8GdS33MXLA0cT3YZ0ujliYDFhNXJmLA,15300
|
|
18
19
|
lollms_client/llm_bindings/__init__.py,sha256=9sWGpmWSSj6KQ8H4lKGCjpLYwhnVdL_2N7gXCphPqh4,14
|
|
19
20
|
lollms_client/llm_bindings/azure_openai/__init__.py,sha256=XBDwct0nkvWfpo1J9J9lTOszH_c_4IiCYxEsG6aJLo0,16501
|
|
20
21
|
lollms_client/llm_bindings/claude/__init__.py,sha256=tzt9sR-9WlkgTgDBOtV708ZmuBjMm55fEYhurMnfXO4,24669
|
|
@@ -27,7 +28,7 @@ lollms_client/llm_bindings/llamacpp/__init__.py,sha256=4CbNYpfquVEgfsxuLsxQta_dZ
|
|
|
27
28
|
lollms_client/llm_bindings/lollms/__init__.py,sha256=a4gNH4axiDgsri8NGAcq0OitgYdnzBDLNkzUMhkFArA,24781
|
|
28
29
|
lollms_client/llm_bindings/lollms_webui/__init__.py,sha256=iuDfhZZoLC-PDEPLHrcjk5-962S5c7OeCI7PMdJxI_A,17753
|
|
29
30
|
lollms_client/llm_bindings/mistral/__init__.py,sha256=cddz9xIj8NRFLKHe2JMxzstpUrNIu5s9juci3mhiHfo,14133
|
|
30
|
-
lollms_client/llm_bindings/ollama/__init__.py,sha256=
|
|
31
|
+
lollms_client/llm_bindings/ollama/__init__.py,sha256=W-4Z_lDzNA77e3xniWcPhkHGPlxwdBELVnGe-2y29uw,43587
|
|
31
32
|
lollms_client/llm_bindings/open_router/__init__.py,sha256=cAFWtCWJx0WjIe1w2JReCf6WlAZjrXYA4jZ8l3zqxMs,14915
|
|
32
33
|
lollms_client/llm_bindings/openai/__init__.py,sha256=J8v7XU9TrvXJd1ffwhYkya5YeXxWnNiFuNBAwRfoHDk,26066
|
|
33
34
|
lollms_client/llm_bindings/openllm/__init__.py,sha256=RC9dVeopslS-zXTsSJ7VC4iVsKgZCBwfmccmr_LCHA0,29971
|
|
@@ -48,7 +49,7 @@ lollms_client/stt_bindings/whisper/__init__.py,sha256=1Ej67GdRKBy1bba14jMaYDYHiZ
|
|
|
48
49
|
lollms_client/stt_bindings/whispercpp/__init__.py,sha256=xSAQRjAhljak3vWCpkP0Vmdb6WmwTzPjXyaIB85KLGU,21439
|
|
49
50
|
lollms_client/tti_bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
51
|
lollms_client/tti_bindings/dalle/__init__.py,sha256=1nE36XamKEJOMpm6QUow8OyM1KdpejCLM0KUSXlcePo,24135
|
|
51
|
-
lollms_client/tti_bindings/diffusers/__init__.py,sha256=
|
|
52
|
+
lollms_client/tti_bindings/diffusers/__init__.py,sha256=YI9-VoqdQafoQgkSS0e5GhPNd30CxfI9m3AzdhNWhbs,37021
|
|
52
53
|
lollms_client/tti_bindings/gemini/__init__.py,sha256=f9fPuqnrBZ1Z-obcoP6EVvbEXNbNCSg21cd5efLCk8U,16707
|
|
53
54
|
lollms_client/tti_bindings/lollms/__init__.py,sha256=5Tnsn4b17djvieQkcjtIDBm3qf0pg5ZWWov-4_2wmo0,8762
|
|
54
55
|
lollms_client/ttm_bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -62,8 +63,8 @@ lollms_client/tts_bindings/piper_tts/__init__.py,sha256=0IEWG4zH3_sOkSb9WbZzkeV5
|
|
|
62
63
|
lollms_client/tts_bindings/xtts/__init__.py,sha256=FgcdUH06X6ZR806WQe5ixaYx0QoxtAcOgYo87a2qxYc,18266
|
|
63
64
|
lollms_client/ttv_bindings/__init__.py,sha256=UZ8o2izQOJLQgtZ1D1cXoNST7rzqW22rL2Vufc7ddRc,3141
|
|
64
65
|
lollms_client/ttv_bindings/lollms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
65
|
-
lollms_client-1.1.
|
|
66
|
-
lollms_client-1.1.
|
|
67
|
-
lollms_client-1.1.
|
|
68
|
-
lollms_client-1.1.
|
|
69
|
-
lollms_client-1.1.
|
|
66
|
+
lollms_client-1.1.3.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
67
|
+
lollms_client-1.1.3.dist-info/METADATA,sha256=okzCbWUYkhl1WhMt59fGpoPsURWYdluHDxyj2qCiThw,58549
|
|
68
|
+
lollms_client-1.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
69
|
+
lollms_client-1.1.3.dist-info/top_level.txt,sha256=Bk_kz-ri6Arwsk7YG-T5VsRorV66uVhcHGvb_g2WqgE,14
|
|
70
|
+
lollms_client-1.1.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|