lollms-client 1.5.6__py3-none-any.whl → 1.7.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
- lollms_client/llm_bindings/claude/__init__.py +125 -35
- lollms_client/llm_bindings/gemini/__init__.py +261 -159
- lollms_client/llm_bindings/grok/__init__.py +52 -15
- lollms_client/llm_bindings/groq/__init__.py +2 -2
- lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
- lollms_client/llm_bindings/litellm/__init__.py +1 -1
- lollms_client/llm_bindings/llama_cpp_server/__init__.py +605 -0
- lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
- lollms_client/llm_bindings/lollms/__init__.py +76 -21
- lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
- lollms_client/llm_bindings/mistral/__init__.py +2 -2
- lollms_client/llm_bindings/novita_ai/__init__.py +142 -6
- lollms_client/llm_bindings/ollama/__init__.py +345 -89
- lollms_client/llm_bindings/open_router/__init__.py +2 -2
- lollms_client/llm_bindings/openai/__init__.py +81 -20
- lollms_client/llm_bindings/openllm/__init__.py +362 -506
- lollms_client/llm_bindings/openwebui/__init__.py +333 -171
- lollms_client/llm_bindings/perplexity/__init__.py +2 -2
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
- lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
- lollms_client/llm_bindings/transformers/__init__.py +428 -632
- lollms_client/llm_bindings/vllm/__init__.py +1 -1
- lollms_client/lollms_agentic.py +4 -2
- lollms_client/lollms_base_binding.py +61 -0
- lollms_client/lollms_core.py +512 -1890
- lollms_client/lollms_discussion.py +65 -39
- lollms_client/lollms_llm_binding.py +126 -261
- lollms_client/lollms_mcp_binding.py +49 -77
- lollms_client/lollms_stt_binding.py +99 -52
- lollms_client/lollms_tti_binding.py +38 -38
- lollms_client/lollms_ttm_binding.py +38 -42
- lollms_client/lollms_tts_binding.py +43 -18
- lollms_client/lollms_ttv_binding.py +38 -42
- lollms_client/lollms_types.py +4 -2
- lollms_client/stt_bindings/whisper/__init__.py +108 -23
- lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
- lollms_client/tti_bindings/diffusers/__init__.py +464 -803
- lollms_client/tti_bindings/diffusers/server/main.py +1062 -0
- lollms_client/tti_bindings/gemini/__init__.py +182 -239
- lollms_client/tti_bindings/leonardo_ai/__init__.py +6 -3
- lollms_client/tti_bindings/lollms/__init__.py +4 -1
- lollms_client/tti_bindings/novita_ai/__init__.py +5 -2
- lollms_client/tti_bindings/openai/__init__.py +10 -11
- lollms_client/tti_bindings/stability_ai/__init__.py +5 -3
- lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
- lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
- lollms_client/ttm_bindings/lollms/__init__.py +4 -17
- lollms_client/ttm_bindings/replicate/__init__.py +7 -4
- lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
- lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
- lollms_client/tts_bindings/bark/__init__.py +7 -10
- lollms_client/tts_bindings/lollms/__init__.py +6 -1
- lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
- lollms_client/tts_bindings/xtts/__init__.py +157 -74
- lollms_client/tts_bindings/xtts/server/main.py +241 -280
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/METADATA +113 -5
- lollms_client-1.7.13.dist-info/RECORD +90 -0
- lollms_client-1.5.6.dist-info/RECORD +0 -87
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/WHEEL +0 -0
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# bindings/
|
|
1
|
+
# bindings/lollms/__init__.py
|
|
2
2
|
import requests
|
|
3
3
|
import json
|
|
4
4
|
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
@@ -94,6 +94,7 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
94
94
|
service_key (str): Authentication key for the service. Defaults to None. This is a key generated
|
|
95
95
|
on the lollms interface (it is advised to use LOLLMS_API_KEY environment variable instead)
|
|
96
96
|
verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
|
|
97
|
+
certificate_file_path (str): Path to a specific certificate file for SSL verification.
|
|
97
98
|
personality (Optional[int]): Ignored parameter for compatibility with LollmsLLMBinding.
|
|
98
99
|
"""
|
|
99
100
|
super().__init__(BindingName, **kwargs)
|
|
@@ -103,11 +104,16 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
103
104
|
self.model_name=kwargs.get("model_name")
|
|
104
105
|
self.service_key=kwargs.get("service_key")
|
|
105
106
|
self.verify_ssl_certificate=kwargs.get("verify_ssl_certificate", True)
|
|
107
|
+
self.certificate_file_path=kwargs.get("certificate_file_path")
|
|
106
108
|
self.default_completion_format=kwargs.get("default_completion_format", ELF_COMPLETION_FORMAT.Chat)
|
|
107
109
|
|
|
108
110
|
if not self.service_key:
|
|
109
111
|
self.service_key = os.getenv("LOLLMS_API_KEY", self.service_key)
|
|
110
|
-
|
|
112
|
+
|
|
113
|
+
# Determine verification strategy: specific file takes precedence, otherwise boolean flag
|
|
114
|
+
verify = self.certificate_file_path if self.certificate_file_path else self.verify_ssl_certificate
|
|
115
|
+
|
|
116
|
+
self.client = openai.OpenAI(api_key=self.service_key, base_url=None if self.host_address is None else self.host_address if len(self.host_address)>0 else None, http_client=httpx.Client(verify=verify))
|
|
111
117
|
self.completion_format = ELF_COMPLETION_FORMAT.Chat
|
|
112
118
|
|
|
113
119
|
def lollms_listMountedPersonalities(self, host_address:str|None=None):
|
|
@@ -149,6 +155,12 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
149
155
|
"stop", "max_tokens", "presence_penalty", "frequency_penalty",
|
|
150
156
|
"logit_bias", "stream", "user", "max_completion_tokens"
|
|
151
157
|
}
|
|
158
|
+
if kwargs.get("think", False):
|
|
159
|
+
allowed_params.append("reasoning")
|
|
160
|
+
kwargs["reasoning"]={
|
|
161
|
+
"effort": allowed_params.append("reasoning_effort", "low"),
|
|
162
|
+
"summary": allowed_params.append("reasoning_summary", "auto")
|
|
163
|
+
}
|
|
152
164
|
|
|
153
165
|
params = {
|
|
154
166
|
"model": model,
|
|
@@ -190,7 +202,11 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
190
202
|
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
191
203
|
split: Optional[bool] = False,
|
|
192
204
|
user_keyword: Optional[str] = "!@>user:",
|
|
193
|
-
ai_keyword: Optional[str] = "!@>assistant:"
|
|
205
|
+
ai_keyword: Optional[str] = "!@>assistant:",
|
|
206
|
+
think: Optional[bool] = False,
|
|
207
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
208
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
209
|
+
**kwargs
|
|
194
210
|
) -> Union[str, dict]:
|
|
195
211
|
|
|
196
212
|
count = 0
|
|
@@ -227,12 +243,16 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
227
243
|
temperature=temperature,
|
|
228
244
|
top_p=top_p,
|
|
229
245
|
repeat_penalty=repeat_penalty,
|
|
230
|
-
seed=seed
|
|
246
|
+
seed=seed,
|
|
247
|
+
think = think,
|
|
248
|
+
reasoning_effort=reasoning_effort,
|
|
249
|
+
reasoning_summary=reasoning_summary
|
|
250
|
+
)
|
|
231
251
|
try:
|
|
232
252
|
chat_completion = self.client.chat.completions.create(**params)
|
|
233
253
|
except Exception as ex:
|
|
234
254
|
# exception for new openai models
|
|
235
|
-
params["max_completion_tokens"]=params
|
|
255
|
+
params["max_completion_tokens"]=params.get("max_tokens") or params.get("max_completion_tokens") or self.default_ctx_size
|
|
236
256
|
params["temperature"]=1
|
|
237
257
|
try: del params["max_tokens"]
|
|
238
258
|
except Exception: pass
|
|
@@ -263,7 +283,10 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
263
283
|
temperature=temperature,
|
|
264
284
|
top_p=top_p,
|
|
265
285
|
repeat_penalty=repeat_penalty,
|
|
266
|
-
seed=seed
|
|
286
|
+
seed=seed,
|
|
287
|
+
think = think,
|
|
288
|
+
reasoning_effort=reasoning_effort,
|
|
289
|
+
reasoning_summary=reasoning_summary)
|
|
267
290
|
try:
|
|
268
291
|
completion = self.client.completions.create(**params)
|
|
269
292
|
except Exception as ex:
|
|
@@ -316,6 +339,9 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
316
339
|
n_threads: Optional[int] = None,
|
|
317
340
|
ctx_size: int | None = None,
|
|
318
341
|
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
342
|
+
think: Optional[bool] = False,
|
|
343
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
344
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
319
345
|
**kwargs
|
|
320
346
|
) -> Union[str, dict]:
|
|
321
347
|
# Build the request parameters
|
|
@@ -364,20 +390,22 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
364
390
|
return output
|
|
365
391
|
|
|
366
392
|
def chat(self,
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
393
|
+
discussion: LollmsDiscussion,
|
|
394
|
+
branch_tip_id: Optional[str] = None,
|
|
395
|
+
n_predict: Optional[int] = None,
|
|
396
|
+
stream: Optional[bool] = None,
|
|
397
|
+
temperature: float = 0.7,
|
|
398
|
+
top_k: int = 40,
|
|
399
|
+
top_p: float = 0.9,
|
|
400
|
+
repeat_penalty: float = 1.1,
|
|
401
|
+
repeat_last_n: int = 64,
|
|
402
|
+
seed: Optional[int] = None,
|
|
403
|
+
n_threads: Optional[int] = None,
|
|
404
|
+
ctx_size: Optional[int] = None,
|
|
405
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
406
|
+
think: Optional[bool] = False,
|
|
407
|
+
**kwargs
|
|
408
|
+
) -> Union[str, dict]:
|
|
381
409
|
"""
|
|
382
410
|
Conduct a chat session with the OpenAI model using a LollmsDiscussion object.
|
|
383
411
|
|
|
@@ -564,7 +592,7 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
564
592
|
"model_name": self.model_name
|
|
565
593
|
}
|
|
566
594
|
|
|
567
|
-
def
|
|
595
|
+
def list_models(self) -> List[Dict]:
|
|
568
596
|
# Known context lengths
|
|
569
597
|
known_context_lengths = {
|
|
570
598
|
"gpt-4o": 128000,
|
|
@@ -643,3 +671,30 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
643
671
|
self.model = model_name
|
|
644
672
|
self.model_name = model_name
|
|
645
673
|
return True
|
|
674
|
+
|
|
675
|
+
def ps(self):
|
|
676
|
+
"""
|
|
677
|
+
List models (simulating a process status command).
|
|
678
|
+
Since Lollms/OpenAI API doesn't have a specific 'ps' endpoint for running models with memory stats,
|
|
679
|
+
we list available models and populate structure with available info, leaving hardware stats empty.
|
|
680
|
+
"""
|
|
681
|
+
# Since there is no dedicated ps endpoint to see *running* models in the standard OpenAI API,
|
|
682
|
+
# we list available models and try to map relevant info.
|
|
683
|
+
models = self.list_models()
|
|
684
|
+
standardized_models = []
|
|
685
|
+
for m in models:
|
|
686
|
+
standardized_models.append({
|
|
687
|
+
"model_name": m.get("model_name"),
|
|
688
|
+
"size": None,
|
|
689
|
+
"vram_size": None,
|
|
690
|
+
"gpu_usage_percent": None,
|
|
691
|
+
"cpu_usage_percent": None,
|
|
692
|
+
"expires_at": None,
|
|
693
|
+
"parameters_size": None,
|
|
694
|
+
"quantization_level": None,
|
|
695
|
+
"parent_model": None,
|
|
696
|
+
"context_size": m.get("context_length"),
|
|
697
|
+
"owned_by": m.get("owned_by"),
|
|
698
|
+
"created": m.get("created")
|
|
699
|
+
})
|
|
700
|
+
return standardized_models
|
|
@@ -224,7 +224,7 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
224
224
|
"supports_vision": False, # Mistral API does not currently support vision
|
|
225
225
|
}
|
|
226
226
|
|
|
227
|
-
def
|
|
227
|
+
def list_models(self) -> List[Dict[str, str]]:
|
|
228
228
|
"""Lists available models from the Mistral service."""
|
|
229
229
|
if not self.client:
|
|
230
230
|
ASCIIColors.error("Mistral client not initialized. Cannot list models.")
|
|
@@ -273,7 +273,7 @@ if __name__ == '__main__':
|
|
|
273
273
|
|
|
274
274
|
# --- List Models ---
|
|
275
275
|
ASCIIColors.cyan("\n--- Listing Models ---")
|
|
276
|
-
models = binding.
|
|
276
|
+
models = binding.list_models()
|
|
277
277
|
if models:
|
|
278
278
|
ASCIIColors.green(f"Found {len(models)} models on Mistral. Available models:")
|
|
279
279
|
for m in models:
|
|
@@ -19,8 +19,6 @@ BindingName = "NovitaAIBinding"
|
|
|
19
19
|
API_BASE_URL = "https://api.novita.ai"
|
|
20
20
|
|
|
21
21
|
# A hardcoded list of models based on Novita AI's documentation.
|
|
22
|
-
# The API is OpenAI-compatible but does not provide a models listing endpoint.
|
|
23
|
-
# Sourced from: https://docs.novita.ai/language-model/models
|
|
24
22
|
_FALLBACK_MODELS = [
|
|
25
23
|
{'model_name': 'meta-llama/Llama-3-8B-Instruct', 'display_name': 'Llama 3 8B Instruct', 'description': 'Meta\'s Llama 3 8B instruction-tuned model.', 'owned_by': 'Meta'},
|
|
26
24
|
{'model_name': 'meta-llama/Llama-3-70B-Instruct', 'display_name': 'Llama 3 70B Instruct', 'description': 'Meta\'s Llama 3 70B instruction-tuned model.', 'owned_by': 'Meta'},
|
|
@@ -28,6 +26,7 @@ _FALLBACK_MODELS = [
|
|
|
28
26
|
{'model_name': 'mistralai/Mistral-7B-Instruct-v0.2', 'display_name': 'Mistral 7B Instruct v0.2', 'description': 'Mistral AI\'s 7B instruction-tuned model.', 'owned_by': 'Mistral AI'},
|
|
29
27
|
{'model_name': 'google/gemma-7b-it', 'display_name': 'Gemma 7B IT', 'description': 'Google\'s Gemma 7B instruction-tuned model.', 'owned_by': 'Google'},
|
|
30
28
|
{'model_name': 'google/gemma-2-9b-it', 'display_name': 'Gemma 2 9B IT', 'description': 'Google\'s next-generation Gemma 2 9B instruction-tuned model.', 'owned_by': 'Google'},
|
|
29
|
+
{'model_name': 'deepseek/deepseek-r1', 'display_name': 'Deepseek R1', 'description': 'Deepseek R1 reasoning model.', 'owned_by': 'Deepseek AI'},
|
|
31
30
|
{'model_name': 'deepseek-ai/deepseek-coder-33b-instruct', 'display_name': 'Deepseek Coder 33B Instruct', 'description': 'A powerful coding model from Deepseek AI.', 'owned_by': 'Deepseek AI'},
|
|
32
31
|
]
|
|
33
32
|
|
|
@@ -73,6 +72,119 @@ class NovitaAIBinding(LollmsLLMBinding):
|
|
|
73
72
|
if frequency_penalty is not None: params['frequency_penalty'] = frequency_penalty
|
|
74
73
|
return params
|
|
75
74
|
|
|
75
|
+
def generate_text(self,
|
|
76
|
+
prompt: str,
|
|
77
|
+
images: Optional[List[str]] = None,
|
|
78
|
+
system_prompt: str = "",
|
|
79
|
+
n_predict: Optional[int] = 2048,
|
|
80
|
+
stream: Optional[bool] = False,
|
|
81
|
+
temperature: float = 0.7,
|
|
82
|
+
top_k: int = 50, # Not supported by Novita API
|
|
83
|
+
top_p: float = 0.9,
|
|
84
|
+
repeat_penalty: float = 1.1, # maps to frequency_penalty
|
|
85
|
+
repeat_last_n: int = 64, # Not supported
|
|
86
|
+
seed: Optional[int] = None, # Not supported
|
|
87
|
+
n_threads: Optional[int] = None, # Not applicable
|
|
88
|
+
ctx_size: int | None = None, # Determined by model
|
|
89
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
90
|
+
split:Optional[bool]=False,
|
|
91
|
+
user_keyword:Optional[str]="!@>user:",
|
|
92
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
93
|
+
think: Optional[bool] = False,
|
|
94
|
+
reasoning_effort: Optional[str] = "low", # low, medium, high
|
|
95
|
+
reasoning_summary: Optional[bool] = False, # auto
|
|
96
|
+
) -> Union[str, dict]:
|
|
97
|
+
"""
|
|
98
|
+
Generate text using Novita AI.
|
|
99
|
+
"""
|
|
100
|
+
# Build messages
|
|
101
|
+
messages = []
|
|
102
|
+
if system_prompt and system_prompt.strip():
|
|
103
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
104
|
+
|
|
105
|
+
if split:
|
|
106
|
+
# Simple split logic to support history if provided in prompt string
|
|
107
|
+
# This is a basic fallback; usually chat() is preferred for history
|
|
108
|
+
msgs = self.split_discussion(prompt, user_keyword, ai_keyword)
|
|
109
|
+
messages.extend(msgs)
|
|
110
|
+
else:
|
|
111
|
+
messages.append({"role": "user", "content": prompt})
|
|
112
|
+
|
|
113
|
+
if images:
|
|
114
|
+
ASCIIColors.warning("Novita AI API does not support images in this binding yet. They will be ignored.")
|
|
115
|
+
|
|
116
|
+
# Construct parameters
|
|
117
|
+
# Map repeat_penalty to frequency_penalty loosely if needed, or just pass as is if supported
|
|
118
|
+
# Novita supports standard OpenAI params
|
|
119
|
+
api_params = self._construct_parameters(
|
|
120
|
+
temperature, top_p, n_predict, 0.0, repeat_penalty
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
payload = {
|
|
124
|
+
"model": self.model_name,
|
|
125
|
+
"messages": messages,
|
|
126
|
+
"stream": stream,
|
|
127
|
+
**api_params
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
url = f"{API_BASE_URL}/v1/chat/completions"
|
|
131
|
+
full_response_text = ""
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
if stream:
|
|
135
|
+
with requests.post(url, headers=self.headers, json=payload, stream=True) as response:
|
|
136
|
+
response.raise_for_status()
|
|
137
|
+
for line in response.iter_lines():
|
|
138
|
+
if line:
|
|
139
|
+
decoded_line = line.decode('utf-8')
|
|
140
|
+
if decoded_line.startswith("data:"):
|
|
141
|
+
content = decoded_line[len("data: "):].strip()
|
|
142
|
+
if content == "[DONE]":
|
|
143
|
+
break
|
|
144
|
+
try:
|
|
145
|
+
chunk = json.loads(content)
|
|
146
|
+
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
|
147
|
+
text_chunk = delta.get("content", "")
|
|
148
|
+
# Deepseek R1 might output thinking in content or reasoning_content field
|
|
149
|
+
# Standard OpenAI compatible R1 usually puts thought in <think> tags or reasoning_content
|
|
150
|
+
reasoning_chunk = delta.get("reasoning_content", "")
|
|
151
|
+
|
|
152
|
+
if reasoning_chunk:
|
|
153
|
+
# If we get reasoning content field, wrap it in <think> for lollms UI if think is enabled
|
|
154
|
+
if think:
|
|
155
|
+
formatted_reasoning = f"<think>{reasoning_chunk}</think>" # Naive streaming wrap, might be broken tags
|
|
156
|
+
# Better to just stream it if UI handles it, or just text
|
|
157
|
+
if streaming_callback:
|
|
158
|
+
streaming_callback(reasoning_chunk, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
159
|
+
else:
|
|
160
|
+
# If think disabled, we might skip reasoning or just show it?
|
|
161
|
+
# Typically we want to show it.
|
|
162
|
+
pass
|
|
163
|
+
|
|
164
|
+
if text_chunk:
|
|
165
|
+
full_response_text += text_chunk
|
|
166
|
+
if streaming_callback:
|
|
167
|
+
if not streaming_callback(text_chunk, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
168
|
+
break
|
|
169
|
+
except json.JSONDecodeError:
|
|
170
|
+
continue
|
|
171
|
+
return full_response_text
|
|
172
|
+
else:
|
|
173
|
+
response = requests.post(url, headers=self.headers, json=payload)
|
|
174
|
+
response.raise_for_status()
|
|
175
|
+
data = response.json()
|
|
176
|
+
choice = data["choices"][0]["message"]
|
|
177
|
+
content = choice.get("content", "")
|
|
178
|
+
reasoning = choice.get("reasoning_content", "")
|
|
179
|
+
|
|
180
|
+
if think and reasoning:
|
|
181
|
+
return f"<think>\n{reasoning}\n</think>\n{content}"
|
|
182
|
+
return content
|
|
183
|
+
|
|
184
|
+
except Exception as e:
|
|
185
|
+
trace_exception(e)
|
|
186
|
+
return {"status": False, "error": str(e)}
|
|
187
|
+
|
|
76
188
|
def chat(self,
|
|
77
189
|
discussion: LollmsDiscussion,
|
|
78
190
|
branch_tip_id: Optional[str] = None,
|
|
@@ -86,7 +198,10 @@ class NovitaAIBinding(LollmsLLMBinding):
|
|
|
86
198
|
seed: Optional[int] = None, # Not supported
|
|
87
199
|
n_threads: Optional[int] = None, # Not applicable
|
|
88
200
|
ctx_size: Optional[int] = None, # Determined by model
|
|
89
|
-
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
|
|
201
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
202
|
+
think: Optional[bool] = False,
|
|
203
|
+
reasoning_effort: Optional[str] = "low", # low, medium, high
|
|
204
|
+
reasoning_summary: Optional[bool] = False, # auto
|
|
90
205
|
) -> Union[str, dict]:
|
|
91
206
|
"""
|
|
92
207
|
Conduct a chat session with a Novita AI model using a LollmsDiscussion object.
|
|
@@ -139,6 +254,19 @@ class NovitaAIBinding(LollmsLLMBinding):
|
|
|
139
254
|
chunk = json.loads(content)
|
|
140
255
|
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
|
141
256
|
text_chunk = delta.get("content", "")
|
|
257
|
+
|
|
258
|
+
# Support for reasoning content if provided (e.g. Deepseek R1)
|
|
259
|
+
reasoning_chunk = delta.get("reasoning_content", "")
|
|
260
|
+
if reasoning_chunk and think:
|
|
261
|
+
# Simple handling: stream it as regular chunk or specific type if supported
|
|
262
|
+
# Lollms typically expects <think> tags in the text if it's mixed
|
|
263
|
+
# Since we can't easily inject tags in a stream without state,
|
|
264
|
+
# we assume the model output might contain them or we just output reasoning.
|
|
265
|
+
# For now, append to text.
|
|
266
|
+
if streaming_callback:
|
|
267
|
+
# We could prefix with <think> if it's the start, but that's complex in stateless loop
|
|
268
|
+
streaming_callback(reasoning_chunk, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
269
|
+
|
|
142
270
|
if text_chunk:
|
|
143
271
|
full_response_text += text_chunk
|
|
144
272
|
if streaming_callback:
|
|
@@ -152,7 +280,15 @@ class NovitaAIBinding(LollmsLLMBinding):
|
|
|
152
280
|
response = requests.post(url, headers=self.headers, json=payload)
|
|
153
281
|
response.raise_for_status()
|
|
154
282
|
data = response.json()
|
|
155
|
-
|
|
283
|
+
choice = data["choices"][0]["message"]
|
|
284
|
+
content = choice.get("content", "")
|
|
285
|
+
reasoning = choice.get("reasoning_content", "")
|
|
286
|
+
|
|
287
|
+
if think and reasoning:
|
|
288
|
+
return f"<think>\n{reasoning}\n</think>\n{content}"
|
|
289
|
+
|
|
290
|
+
return content
|
|
291
|
+
|
|
156
292
|
except requests.exceptions.HTTPError as e:
|
|
157
293
|
try:
|
|
158
294
|
error_details = e.response.json()
|
|
@@ -211,7 +347,7 @@ class NovitaAIBinding(LollmsLLMBinding):
|
|
|
211
347
|
"supports_vision": False
|
|
212
348
|
}
|
|
213
349
|
|
|
214
|
-
def
|
|
350
|
+
def list_models(self) -> List[Dict[str, str]]:
|
|
215
351
|
"""
|
|
216
352
|
Lists available models. Novita AI API does not have a models endpoint,
|
|
217
353
|
so a hardcoded list from their documentation is returned.
|
|
@@ -242,7 +378,7 @@ if __name__ == '__main__':
|
|
|
242
378
|
|
|
243
379
|
# --- List Models ---
|
|
244
380
|
ASCIIColors.cyan("\n--- Listing Models (static list) ---")
|
|
245
|
-
models = binding.
|
|
381
|
+
models = binding.list_models()
|
|
246
382
|
if models:
|
|
247
383
|
ASCIIColors.green(f"Found {len(models)} models.")
|
|
248
384
|
for m in models:
|