lollms-client 1.4.1__py3-none-any.whl → 1.7.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
- lollms_client/llm_bindings/claude/__init__.py +125 -34
- lollms_client/llm_bindings/gemini/__init__.py +261 -159
- lollms_client/llm_bindings/grok/__init__.py +52 -14
- lollms_client/llm_bindings/groq/__init__.py +2 -2
- lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
- lollms_client/llm_bindings/litellm/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
- lollms_client/llm_bindings/lollms/__init__.py +151 -32
- lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
- lollms_client/llm_bindings/mistral/__init__.py +2 -2
- lollms_client/llm_bindings/novita_ai/__init__.py +439 -0
- lollms_client/llm_bindings/ollama/__init__.py +309 -93
- lollms_client/llm_bindings/open_router/__init__.py +2 -2
- lollms_client/llm_bindings/openai/__init__.py +148 -29
- lollms_client/llm_bindings/openllm/__init__.py +362 -506
- lollms_client/llm_bindings/openwebui/__init__.py +465 -0
- lollms_client/llm_bindings/perplexity/__init__.py +326 -0
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
- lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
- lollms_client/llm_bindings/transformers/__init__.py +428 -632
- lollms_client/llm_bindings/vllm/__init__.py +1 -1
- lollms_client/lollms_agentic.py +4 -2
- lollms_client/lollms_base_binding.py +61 -0
- lollms_client/lollms_core.py +516 -1890
- lollms_client/lollms_discussion.py +55 -18
- lollms_client/lollms_llm_binding.py +112 -261
- lollms_client/lollms_mcp_binding.py +34 -75
- lollms_client/lollms_personality.py +5 -2
- lollms_client/lollms_stt_binding.py +85 -52
- lollms_client/lollms_tti_binding.py +23 -37
- lollms_client/lollms_ttm_binding.py +24 -42
- lollms_client/lollms_tts_binding.py +28 -17
- lollms_client/lollms_ttv_binding.py +24 -42
- lollms_client/lollms_types.py +4 -2
- lollms_client/stt_bindings/whisper/__init__.py +108 -23
- lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
- lollms_client/tti_bindings/diffusers/__init__.py +418 -810
- lollms_client/tti_bindings/diffusers/server/main.py +1051 -0
- lollms_client/tti_bindings/gemini/__init__.py +182 -239
- lollms_client/tti_bindings/leonardo_ai/__init__.py +127 -0
- lollms_client/tti_bindings/lollms/__init__.py +4 -1
- lollms_client/tti_bindings/novita_ai/__init__.py +105 -0
- lollms_client/tti_bindings/openai/__init__.py +10 -11
- lollms_client/tti_bindings/stability_ai/__init__.py +178 -0
- lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
- lollms_client/ttm_bindings/beatoven_ai/__init__.py +129 -0
- lollms_client/ttm_bindings/lollms/__init__.py +4 -17
- lollms_client/ttm_bindings/replicate/__init__.py +115 -0
- lollms_client/ttm_bindings/stability_ai/__init__.py +117 -0
- lollms_client/ttm_bindings/topmediai/__init__.py +96 -0
- lollms_client/tts_bindings/bark/__init__.py +7 -10
- lollms_client/tts_bindings/lollms/__init__.py +6 -1
- lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
- lollms_client/tts_bindings/xtts/__init__.py +157 -74
- lollms_client/tts_bindings/xtts/server/main.py +241 -280
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/METADATA +316 -6
- lollms_client-1.7.10.dist-info/RECORD +89 -0
- lollms_client/ttm_bindings/bark/__init__.py +0 -339
- lollms_client-1.4.1.dist-info/RECORD +0 -78
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/WHEEL +0 -0
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import requests
|
|
4
|
+
from typing import Optional, Callable, List, Union, Dict
|
|
5
|
+
|
|
6
|
+
from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
|
|
7
|
+
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
8
|
+
from lollms_client.lollms_types import MSG_TYPE
|
|
9
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
10
|
+
|
|
11
|
+
import pipmaster as pm
|
|
12
|
+
|
|
13
|
+
# Ensure the required packages are installed
|
|
14
|
+
pm.ensure_packages(["requests", "tiktoken"])
|
|
15
|
+
|
|
16
|
+
import tiktoken
|
|
17
|
+
|
|
18
|
+
BindingName = "NovitaAIBinding"
|
|
19
|
+
API_BASE_URL = "https://api.novita.ai"
|
|
20
|
+
|
|
21
|
+
# A hardcoded list of models based on Novita AI's documentation.
|
|
22
|
+
_FALLBACK_MODELS = [
|
|
23
|
+
{'model_name': 'meta-llama/Llama-3-8B-Instruct', 'display_name': 'Llama 3 8B Instruct', 'description': 'Meta\'s Llama 3 8B instruction-tuned model.', 'owned_by': 'Meta'},
|
|
24
|
+
{'model_name': 'meta-llama/Llama-3-70B-Instruct', 'display_name': 'Llama 3 70B Instruct', 'description': 'Meta\'s Llama 3 70B instruction-tuned model.', 'owned_by': 'Meta'},
|
|
25
|
+
{'model_name': 'mistralai/Mixtral-8x7B-Instruct-v0.1', 'display_name': 'Mixtral 8x7B Instruct', 'description': 'Mistral AI\'s Mixtral 8x7B instruction-tuned model.', 'owned_by': 'Mistral AI'},
|
|
26
|
+
{'model_name': 'mistralai/Mistral-7B-Instruct-v0.2', 'display_name': 'Mistral 7B Instruct v0.2', 'description': 'Mistral AI\'s 7B instruction-tuned model.', 'owned_by': 'Mistral AI'},
|
|
27
|
+
{'model_name': 'google/gemma-7b-it', 'display_name': 'Gemma 7B IT', 'description': 'Google\'s Gemma 7B instruction-tuned model.', 'owned_by': 'Google'},
|
|
28
|
+
{'model_name': 'google/gemma-2-9b-it', 'display_name': 'Gemma 2 9B IT', 'description': 'Google\'s next-generation Gemma 2 9B instruction-tuned model.', 'owned_by': 'Google'},
|
|
29
|
+
{'model_name': 'deepseek/deepseek-r1', 'display_name': 'Deepseek R1', 'description': 'Deepseek R1 reasoning model.', 'owned_by': 'Deepseek AI'},
|
|
30
|
+
{'model_name': 'deepseek-ai/deepseek-coder-33b-instruct', 'display_name': 'Deepseek Coder 33B Instruct', 'description': 'A powerful coding model from Deepseek AI.', 'owned_by': 'Deepseek AI'},
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
class NovitaAIBinding(LollmsLLMBinding):
|
|
34
|
+
"""Novita AI-specific binding implementation using their OpenAI-compatible API."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, **kwargs):
|
|
37
|
+
"""
|
|
38
|
+
Initialize the Novita AI binding.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
model_name (str): Name of the Novita AI model to use.
|
|
42
|
+
service_key (str): Novita AI API key.
|
|
43
|
+
"""
|
|
44
|
+
super().__init__(BindingName, **kwargs)
|
|
45
|
+
self.model_name = kwargs.get("model_name")
|
|
46
|
+
self.service_key = kwargs.get("service_key")
|
|
47
|
+
|
|
48
|
+
if not self.service_key:
|
|
49
|
+
self.service_key = os.getenv("NOVITA_API_KEY")
|
|
50
|
+
|
|
51
|
+
if not self.service_key:
|
|
52
|
+
raise ValueError("Novita AI API key is required. Please set it via the 'service_key' parameter or the NOVITA_API_KEY environment variable.")
|
|
53
|
+
|
|
54
|
+
self.headers = {
|
|
55
|
+
"Authorization": f"Bearer {self.service_key}",
|
|
56
|
+
"Content-Type": "application/json",
|
|
57
|
+
"Accept": "application/json"
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
def _construct_parameters(self,
|
|
61
|
+
temperature: float,
|
|
62
|
+
top_p: float,
|
|
63
|
+
n_predict: int,
|
|
64
|
+
presence_penalty: float,
|
|
65
|
+
frequency_penalty: float) -> Dict[str, any]:
|
|
66
|
+
"""Builds a parameters dictionary for the Novita AI API."""
|
|
67
|
+
params = {}
|
|
68
|
+
if temperature is not None: params['temperature'] = float(temperature)
|
|
69
|
+
if top_p is not None: params['top_p'] = top_p
|
|
70
|
+
if n_predict is not None: params['max_tokens'] = n_predict
|
|
71
|
+
if presence_penalty is not None: params['presence_penalty'] = presence_penalty
|
|
72
|
+
if frequency_penalty is not None: params['frequency_penalty'] = frequency_penalty
|
|
73
|
+
return params
|
|
74
|
+
|
|
75
|
+
def generate_text(self,
|
|
76
|
+
prompt: str,
|
|
77
|
+
images: Optional[List[str]] = None,
|
|
78
|
+
system_prompt: str = "",
|
|
79
|
+
n_predict: Optional[int] = 2048,
|
|
80
|
+
stream: Optional[bool] = False,
|
|
81
|
+
temperature: float = 0.7,
|
|
82
|
+
top_k: int = 50, # Not supported by Novita API
|
|
83
|
+
top_p: float = 0.9,
|
|
84
|
+
repeat_penalty: float = 1.1, # maps to frequency_penalty
|
|
85
|
+
repeat_last_n: int = 64, # Not supported
|
|
86
|
+
seed: Optional[int] = None, # Not supported
|
|
87
|
+
n_threads: Optional[int] = None, # Not applicable
|
|
88
|
+
ctx_size: int | None = None, # Determined by model
|
|
89
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
90
|
+
split:Optional[bool]=False,
|
|
91
|
+
user_keyword:Optional[str]="!@>user:",
|
|
92
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
93
|
+
think: Optional[bool] = False,
|
|
94
|
+
reasoning_effort: Optional[str] = "low", # low, medium, high
|
|
95
|
+
reasoning_summary: Optional[bool] = False, # auto
|
|
96
|
+
) -> Union[str, dict]:
|
|
97
|
+
"""
|
|
98
|
+
Generate text using Novita AI.
|
|
99
|
+
"""
|
|
100
|
+
# Build messages
|
|
101
|
+
messages = []
|
|
102
|
+
if system_prompt and system_prompt.strip():
|
|
103
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
104
|
+
|
|
105
|
+
if split:
|
|
106
|
+
# Simple split logic to support history if provided in prompt string
|
|
107
|
+
# This is a basic fallback; usually chat() is preferred for history
|
|
108
|
+
msgs = self.split_discussion(prompt, user_keyword, ai_keyword)
|
|
109
|
+
messages.extend(msgs)
|
|
110
|
+
else:
|
|
111
|
+
messages.append({"role": "user", "content": prompt})
|
|
112
|
+
|
|
113
|
+
if images:
|
|
114
|
+
ASCIIColors.warning("Novita AI API does not support images in this binding yet. They will be ignored.")
|
|
115
|
+
|
|
116
|
+
# Construct parameters
|
|
117
|
+
# Map repeat_penalty to frequency_penalty loosely if needed, or just pass as is if supported
|
|
118
|
+
# Novita supports standard OpenAI params
|
|
119
|
+
api_params = self._construct_parameters(
|
|
120
|
+
temperature, top_p, n_predict, 0.0, repeat_penalty
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
payload = {
|
|
124
|
+
"model": self.model_name,
|
|
125
|
+
"messages": messages,
|
|
126
|
+
"stream": stream,
|
|
127
|
+
**api_params
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
url = f"{API_BASE_URL}/v1/chat/completions"
|
|
131
|
+
full_response_text = ""
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
if stream:
|
|
135
|
+
with requests.post(url, headers=self.headers, json=payload, stream=True) as response:
|
|
136
|
+
response.raise_for_status()
|
|
137
|
+
for line in response.iter_lines():
|
|
138
|
+
if line:
|
|
139
|
+
decoded_line = line.decode('utf-8')
|
|
140
|
+
if decoded_line.startswith("data:"):
|
|
141
|
+
content = decoded_line[len("data: "):].strip()
|
|
142
|
+
if content == "[DONE]":
|
|
143
|
+
break
|
|
144
|
+
try:
|
|
145
|
+
chunk = json.loads(content)
|
|
146
|
+
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
|
147
|
+
text_chunk = delta.get("content", "")
|
|
148
|
+
# Deepseek R1 might output thinking in content or reasoning_content field
|
|
149
|
+
# Standard OpenAI compatible R1 usually puts thought in <think> tags or reasoning_content
|
|
150
|
+
reasoning_chunk = delta.get("reasoning_content", "")
|
|
151
|
+
|
|
152
|
+
if reasoning_chunk:
|
|
153
|
+
# If we get reasoning content field, wrap it in <think> for lollms UI if think is enabled
|
|
154
|
+
if think:
|
|
155
|
+
formatted_reasoning = f"<think>{reasoning_chunk}</think>" # Naive streaming wrap, might be broken tags
|
|
156
|
+
# Better to just stream it if UI handles it, or just text
|
|
157
|
+
if streaming_callback:
|
|
158
|
+
streaming_callback(reasoning_chunk, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
159
|
+
else:
|
|
160
|
+
# If think disabled, we might skip reasoning or just show it?
|
|
161
|
+
# Typically we want to show it.
|
|
162
|
+
pass
|
|
163
|
+
|
|
164
|
+
if text_chunk:
|
|
165
|
+
full_response_text += text_chunk
|
|
166
|
+
if streaming_callback:
|
|
167
|
+
if not streaming_callback(text_chunk, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
168
|
+
break
|
|
169
|
+
except json.JSONDecodeError:
|
|
170
|
+
continue
|
|
171
|
+
return full_response_text
|
|
172
|
+
else:
|
|
173
|
+
response = requests.post(url, headers=self.headers, json=payload)
|
|
174
|
+
response.raise_for_status()
|
|
175
|
+
data = response.json()
|
|
176
|
+
choice = data["choices"][0]["message"]
|
|
177
|
+
content = choice.get("content", "")
|
|
178
|
+
reasoning = choice.get("reasoning_content", "")
|
|
179
|
+
|
|
180
|
+
if think and reasoning:
|
|
181
|
+
return f"<think>\n{reasoning}\n</think>\n{content}"
|
|
182
|
+
return content
|
|
183
|
+
|
|
184
|
+
except Exception as e:
|
|
185
|
+
trace_exception(e)
|
|
186
|
+
return {"status": False, "error": str(e)}
|
|
187
|
+
|
|
188
|
+
def chat(self,
|
|
189
|
+
discussion: LollmsDiscussion,
|
|
190
|
+
branch_tip_id: Optional[str] = None,
|
|
191
|
+
n_predict: Optional[int] = 2048,
|
|
192
|
+
stream: Optional[bool] = False,
|
|
193
|
+
temperature: float = 0.7,
|
|
194
|
+
top_k: int = 50, # Not supported by Novita API
|
|
195
|
+
top_p: float = 0.9,
|
|
196
|
+
repeat_penalty: float = 1.1, # maps to frequency_penalty
|
|
197
|
+
presence_penalty: Optional[float] = 0.0,
|
|
198
|
+
seed: Optional[int] = None, # Not supported
|
|
199
|
+
n_threads: Optional[int] = None, # Not applicable
|
|
200
|
+
ctx_size: Optional[int] = None, # Determined by model
|
|
201
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
202
|
+
think: Optional[bool] = False,
|
|
203
|
+
reasoning_effort: Optional[str] = "low", # low, medium, high
|
|
204
|
+
reasoning_summary: Optional[bool] = False, # auto
|
|
205
|
+
) -> Union[str, dict]:
|
|
206
|
+
"""
|
|
207
|
+
Conduct a chat session with a Novita AI model using a LollmsDiscussion object.
|
|
208
|
+
"""
|
|
209
|
+
system_prompt = discussion.system_prompt
|
|
210
|
+
messages = discussion.get_messages(branch_tip_id)
|
|
211
|
+
|
|
212
|
+
history = []
|
|
213
|
+
if system_prompt and system_prompt.strip():
|
|
214
|
+
history.append({"role": "system", "content": system_prompt})
|
|
215
|
+
|
|
216
|
+
for msg in messages:
|
|
217
|
+
role = 'user' if msg.sender_type == "user" else 'assistant'
|
|
218
|
+
|
|
219
|
+
if msg.images:
|
|
220
|
+
ASCIIColors.warning("Novita AI API does not support images. They will be ignored.")
|
|
221
|
+
|
|
222
|
+
if msg.content and msg.content.strip():
|
|
223
|
+
history.append({"role": role, "content": msg.content})
|
|
224
|
+
|
|
225
|
+
if not history:
|
|
226
|
+
return {"status": "error", "message": "Cannot start chat with an empty discussion."}
|
|
227
|
+
|
|
228
|
+
api_params = self._construct_parameters(
|
|
229
|
+
temperature, top_p, n_predict, presence_penalty, repeat_penalty
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
payload = {
|
|
233
|
+
"model": self.model_name,
|
|
234
|
+
"messages": history,
|
|
235
|
+
"stream": stream,
|
|
236
|
+
**api_params
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
url = f"{API_BASE_URL}/v1/chat/completions"
|
|
240
|
+
full_response_text = ""
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
if stream:
|
|
244
|
+
with requests.post(url, headers=self.headers, json=payload, stream=True) as response:
|
|
245
|
+
response.raise_for_status()
|
|
246
|
+
for line in response.iter_lines():
|
|
247
|
+
if line:
|
|
248
|
+
decoded_line = line.decode('utf-8')
|
|
249
|
+
if decoded_line.startswith("data:"):
|
|
250
|
+
content = decoded_line[len("data: "):].strip()
|
|
251
|
+
if content == "[DONE]":
|
|
252
|
+
break
|
|
253
|
+
try:
|
|
254
|
+
chunk = json.loads(content)
|
|
255
|
+
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
|
256
|
+
text_chunk = delta.get("content", "")
|
|
257
|
+
|
|
258
|
+
# Support for reasoning content if provided (e.g. Deepseek R1)
|
|
259
|
+
reasoning_chunk = delta.get("reasoning_content", "")
|
|
260
|
+
if reasoning_chunk and think:
|
|
261
|
+
# Simple handling: stream it as regular chunk or specific type if supported
|
|
262
|
+
# Lollms typically expects <think> tags in the text if it's mixed
|
|
263
|
+
# Since we can't easily inject tags in a stream without state,
|
|
264
|
+
# we assume the model output might contain them or we just output reasoning.
|
|
265
|
+
# For now, append to text.
|
|
266
|
+
if streaming_callback:
|
|
267
|
+
# We could prefix with <think> if it's the start, but that's complex in stateless loop
|
|
268
|
+
streaming_callback(reasoning_chunk, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
269
|
+
|
|
270
|
+
if text_chunk:
|
|
271
|
+
full_response_text += text_chunk
|
|
272
|
+
if streaming_callback:
|
|
273
|
+
if not streaming_callback(text_chunk, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
274
|
+
break
|
|
275
|
+
except json.JSONDecodeError:
|
|
276
|
+
ASCIIColors.error(f"Failed to decode JSON chunk: {content}")
|
|
277
|
+
continue
|
|
278
|
+
return full_response_text
|
|
279
|
+
else:
|
|
280
|
+
response = requests.post(url, headers=self.headers, json=payload)
|
|
281
|
+
response.raise_for_status()
|
|
282
|
+
data = response.json()
|
|
283
|
+
choice = data["choices"][0]["message"]
|
|
284
|
+
content = choice.get("content", "")
|
|
285
|
+
reasoning = choice.get("reasoning_content", "")
|
|
286
|
+
|
|
287
|
+
if think and reasoning:
|
|
288
|
+
return f"<think>\n{reasoning}\n</think>\n{content}"
|
|
289
|
+
|
|
290
|
+
return content
|
|
291
|
+
|
|
292
|
+
except requests.exceptions.HTTPError as e:
|
|
293
|
+
try:
|
|
294
|
+
error_details = e.response.json()
|
|
295
|
+
error_message = error_details.get("error", {}).get("message", e.response.text)
|
|
296
|
+
except json.JSONDecodeError:
|
|
297
|
+
error_message = e.response.text
|
|
298
|
+
ASCIIColors.error(f"HTTP Error received from Novita AI API: {e.response.status_code} - {error_message}")
|
|
299
|
+
return {"status": "error", "message": f"HTTP Error: {e.response.status_code} - {error_message}"}
|
|
300
|
+
except requests.exceptions.RequestException as e:
|
|
301
|
+
error_message = f"An error occurred with the Novita AI API: {e}"
|
|
302
|
+
trace_exception(e)
|
|
303
|
+
return {"status": "error", "message": str(e)}
|
|
304
|
+
|
|
305
|
+
def tokenize(self, text: str) -> list:
|
|
306
|
+
"""
|
|
307
|
+
Tokenize the input text. Novita uses an OpenAI-compatible API,
|
|
308
|
+
so we use the same tokenizer as GPT-4.
|
|
309
|
+
"""
|
|
310
|
+
try:
|
|
311
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
|
312
|
+
return encoding.encode(text)
|
|
313
|
+
except Exception as e:
|
|
314
|
+
ASCIIColors.error(f"Could not use tiktoken, falling back to simple encoding: {e}")
|
|
315
|
+
return list(text.encode('utf-8'))
|
|
316
|
+
|
|
317
|
+
def detokenize(self, tokens: list) -> str:
|
|
318
|
+
"""
|
|
319
|
+
Detokenize a list of tokens.
|
|
320
|
+
"""
|
|
321
|
+
try:
|
|
322
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
|
323
|
+
return encoding.decode(tokens)
|
|
324
|
+
except Exception as e:
|
|
325
|
+
ASCIIColors.error(f"Could not use tiktoken, falling back to simple decoding: {e}")
|
|
326
|
+
return bytes(tokens).decode('utf-8', errors='ignore')
|
|
327
|
+
|
|
328
|
+
def count_tokens(self, text: str) -> int:
|
|
329
|
+
"""
|
|
330
|
+
Count tokens from a text.
|
|
331
|
+
"""
|
|
332
|
+
return len(self.tokenize(text))
|
|
333
|
+
|
|
334
|
+
def embed(self, text: str, **kwargs) -> List[float]:
|
|
335
|
+
"""
|
|
336
|
+
Get embeddings for the input text.
|
|
337
|
+
"""
|
|
338
|
+
ASCIIColors.warning("Novita AI does not offer a public embedding API via this binding. This method is not implemented.")
|
|
339
|
+
raise NotImplementedError("Novita AI binding does not support embeddings.")
|
|
340
|
+
|
|
341
|
+
def get_model_info(self) -> dict:
|
|
342
|
+
"""Return information about the current model setup."""
|
|
343
|
+
return {
|
|
344
|
+
"name": self.binding_name,
|
|
345
|
+
"host_address": API_BASE_URL,
|
|
346
|
+
"model_name": self.model_name,
|
|
347
|
+
"supports_vision": False
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
def list_models(self) -> List[Dict[str, str]]:
|
|
351
|
+
"""
|
|
352
|
+
Lists available models. Novita AI API does not have a models endpoint,
|
|
353
|
+
so a hardcoded list from their documentation is returned.
|
|
354
|
+
"""
|
|
355
|
+
return sorted(_FALLBACK_MODELS, key=lambda x: x['display_name'])
|
|
356
|
+
|
|
357
|
+
def load_model(self, model_name: str) -> bool:
|
|
358
|
+
"""Set the model name for subsequent operations."""
|
|
359
|
+
self.model_name = model_name
|
|
360
|
+
ASCIIColors.info(f"Novita AI model set to: {model_name}.")
|
|
361
|
+
return True
|
|
362
|
+
|
|
363
|
+
if __name__ == '__main__':
|
|
364
|
+
if 'NOVITA_API_KEY' not in os.environ:
|
|
365
|
+
ASCIIColors.red("Error: NOVITA_API_KEY environment variable not set.")
|
|
366
|
+
print("Please get your key from novita.ai and set it.")
|
|
367
|
+
exit(1)
|
|
368
|
+
|
|
369
|
+
ASCIIColors.yellow("--- Testing NovitaAIBinding ---")
|
|
370
|
+
|
|
371
|
+
test_model_name = "meta-llama/Llama-3-8B-Instruct"
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
# --- Initialization ---
|
|
375
|
+
ASCIIColors.cyan("\n--- Initializing Binding ---")
|
|
376
|
+
binding = NovitaAIBinding(model_name=test_model_name)
|
|
377
|
+
ASCIIColors.green("Binding initialized successfully.")
|
|
378
|
+
|
|
379
|
+
# --- List Models ---
|
|
380
|
+
ASCIIColors.cyan("\n--- Listing Models (static list) ---")
|
|
381
|
+
models = binding.list_models()
|
|
382
|
+
if models:
|
|
383
|
+
ASCIIColors.green(f"Found {len(models)} models.")
|
|
384
|
+
for m in models:
|
|
385
|
+
print(f"- {m['model_name']} ({m['display_name']})")
|
|
386
|
+
else:
|
|
387
|
+
ASCIIColors.error("Failed to list models.")
|
|
388
|
+
|
|
389
|
+
# --- Count Tokens ---
|
|
390
|
+
ASCIIColors.cyan("\n--- Counting Tokens ---")
|
|
391
|
+
sample_text = "Hello, world! This is a test."
|
|
392
|
+
token_count = binding.count_tokens(sample_text)
|
|
393
|
+
ASCIIColors.green(f"Token count for '{sample_text}': {token_count}")
|
|
394
|
+
|
|
395
|
+
# --- Chat (Non-Streaming) ---
|
|
396
|
+
ASCIIColors.cyan("\n--- Chat (Non-Streaming) ---")
|
|
397
|
+
discussion_non_stream = LollmsDiscussion.from_messages(
|
|
398
|
+
messages=[
|
|
399
|
+
{"sender":"user", "content": "What is the largest planet in our solar system?"}
|
|
400
|
+
],
|
|
401
|
+
system_prompt="You are a helpful and concise astronomical assistant."
|
|
402
|
+
)
|
|
403
|
+
ASCIIColors.info(f"Prompt: What is the largest planet in our solar system?")
|
|
404
|
+
generated_text = binding.chat(discussion_non_stream, n_predict=50, stream=False)
|
|
405
|
+
if isinstance(generated_text, str):
|
|
406
|
+
ASCIIColors.green(f"Generated text:\n{generated_text}")
|
|
407
|
+
else:
|
|
408
|
+
ASCIIColors.error(f"Generation failed: {generated_text}")
|
|
409
|
+
|
|
410
|
+
# --- Chat (Streaming) ---
|
|
411
|
+
ASCIIColors.cyan("\n--- Chat (Streaming) ---")
|
|
412
|
+
|
|
413
|
+
captured_chunks = []
|
|
414
|
+
def stream_callback(chunk: str, msg_type: int):
|
|
415
|
+
ASCIIColors.green(chunk, end="", flush=True)
|
|
416
|
+
captured_chunks.append(chunk)
|
|
417
|
+
return True
|
|
418
|
+
|
|
419
|
+
discussion_stream = LollmsDiscussion.from_messages(
|
|
420
|
+
messages=[
|
|
421
|
+
{"sender":"user", "content": "Explain the concept of photosynthesis in one short paragraph."}
|
|
422
|
+
]
|
|
423
|
+
)
|
|
424
|
+
ASCIIColors.info(f"Prompt: Explain the concept of photosynthesis in one short paragraph.")
|
|
425
|
+
result = binding.chat(
|
|
426
|
+
discussion_stream,
|
|
427
|
+
n_predict=150,
|
|
428
|
+
stream=True,
|
|
429
|
+
streaming_callback=stream_callback
|
|
430
|
+
)
|
|
431
|
+
print("\n--- End of Stream ---")
|
|
432
|
+
full_streamed_text = "".join(captured_chunks)
|
|
433
|
+
assert result == full_streamed_text
|
|
434
|
+
|
|
435
|
+
except Exception as e:
|
|
436
|
+
ASCIIColors.error(f"An error occurred during testing: {e}")
|
|
437
|
+
trace_exception(e)
|
|
438
|
+
|
|
439
|
+
ASCIIColors.yellow("\nNovitaAIBinding test finished.")
|