lollms-client 0.13.0__py3-none-any.whl → 0.13.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- from lollms_client import LollmsClient, ELF_COMPLETION_FORMAT
1
+ from lollms_client import LollmsClient
2
2
  from lollms_client.lollms_types import MSG_TYPE # For callback signature
3
3
  from ascii_colors import ASCIIColors, trace_exception
4
4
 
@@ -97,6 +97,8 @@ def test_text_generation():
97
97
  print(emb)
98
98
 
99
99
  # else: if callback returns False early, response_stream might be partial.
100
+ nb_tokens = lc.count_tokens("")
101
+ ASCIIColors.yellow("Number of tokens of : Testing count of tokens\n"+f"{nb_tokens}")
100
102
 
101
103
  # 3. Test generation with a specific model (if applicable and different from default)
102
104
  # This tests the switch_model or model loading mechanism of the binding.
@@ -168,4 +170,4 @@ def test_text_generation():
168
170
  trace_exception(e)
169
171
 
170
172
  if __name__ == "__main__":
171
- test_text_generation()
173
+ test_text_generation()
lollms_client/__init__.py CHANGED
@@ -6,7 +6,7 @@ from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
6
6
  from lollms_client.lollms_utilities import PromptReshaper # Keep general utilities
7
7
  from lollms_client.lollms_functions import FunctionCalling_Library
8
8
 
9
- __version__ = "0.13.0"
9
+ __version__ = "0.13.2"
10
10
 
11
11
  # Optionally, you could define __all__ if you want to be explicit about exports
12
12
  __all__ = [
@@ -18,4 +18,4 @@ __all__ = [
18
18
  "LollmsMessage",
19
19
  "PromptReshaper",
20
20
  "FunctionCalling_Library"
21
- ]
21
+ ]
@@ -10,72 +10,55 @@ from typing import Optional, Callable, List, Union, Dict
10
10
 
11
11
  from ascii_colors import ASCIIColors, trace_exception
12
12
  import pipmaster as pm
13
- pm.ensure_packages(["ollama","pillow"])
13
+ pm.ensure_packages(["ollama","pillow","tiktoken"])
14
14
 
15
15
 
16
16
  import ollama
17
+ import tiktoken
17
18
  BindingName = "OllamaBinding"
18
19
 
19
20
 
20
21
  def count_tokens_ollama(
21
22
  text_to_tokenize: str,
22
23
  model_name: str,
23
- ollama_host: str = "http://localhost:11434",
24
- timeout: int = 30,
25
- verify_ssl_certificate: bool = True,
26
- headers: Optional[Dict[str, str]] = None
24
+ ollama_client: ollama.Client,
27
25
  ) -> int:
28
26
  """
29
- Counts the number of tokens in a given text using a specified Ollama model
30
- by calling the Ollama server's /api/tokenize endpoint.
27
+ Counts the number of tokens in a given text for a specified Ollama model
28
+ by making a minimal request to the /api/generate endpoint and extracting
29
+ the 'prompt_eval_count' from the response.
30
+
31
+ This method is generally more accurate for the specific Ollama model instance
32
+ than using an external tokenizer, but it incurs the overhead of an API call
33
+ and model processing for the prompt.
31
34
 
32
35
  Args:
33
- text_to_tokenize (str): The text to be tokenized.
34
- model_name (str): The name of the Ollama model to use (e.g., "llama3", "mistral").
35
- ollama_host (str): The base URL of the Ollama server (default: "http://localhost:11434").
36
- timeout (int): Timeout for the request in seconds (default: 30).
37
- verify_ssl_certificate (bool): Whether to verify SSL.
38
- headers (Optional[Dict[str, str]]): Optional headers for the request.
36
+ text_to_tokenize: The string to tokenize.
37
+ model_name: The name of the Ollama model (e.g., "llama3:8b", "mistral").
38
+ ollama_host: The URL of the Ollama API host.
39
+ timeout: Timeout for the request to Ollama.
40
+ verify_ssl_certificate: Whether to verify SSL certificates for the Ollama host.
41
+ headers: Optional custom headers for the request to Ollama.
42
+ num_predict_for_eval: How many tokens to ask the model to "predict" to get
43
+ the prompt evaluation count. 0 is usually sufficient and most efficient.
44
+ If 0 doesn't consistently yield `prompt_eval_count`, try 1.
39
45
 
40
46
  Returns:
41
- int: The number of tokens. Returns -1 if an error occurs.
42
- """
43
- api_url = f"{ollama_host.rstrip('/')}/api/tokenize"
44
- payload = {
45
- "model": model_name,
46
- "prompt": text_to_tokenize
47
- }
48
- request_headers = headers if headers else {}
49
-
50
- try:
51
- response = requests.post(api_url, json=payload, timeout=timeout, verify=verify_ssl_certificate, headers=request_headers)
52
- response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
53
-
54
- response_data = response.json()
55
-
56
- if "tokens" in response_data and isinstance(response_data["tokens"], list):
57
- return len(response_data["tokens"])
58
- else:
59
- ASCIIColors.warning(
60
- f"Ollama response for token count did not contain a 'tokens' list. Response: {response_data}"
61
- )
62
- return -1 # Or raise ValueError
63
-
64
- except requests.exceptions.HTTPError as http_err:
65
- ASCIIColors.error(f"HTTP error occurred during token count: {http_err} - {http_err.response.text if http_err.response else 'No response text'}")
66
- return -1
67
- except requests.exceptions.RequestException as req_err:
68
- ASCIIColors.error(f"Request error occurred during token count: {req_err}")
69
- return -1
70
- except json.JSONDecodeError as json_err:
71
- ASCIIColors.error(
72
- f"Failed to decode JSON response from Ollama during token count: {json_err}. Response text: {response.text if hasattr(response, 'text') else 'No response object'}"
73
- )
74
- return -1
75
- except Exception as e:
76
- ASCIIColors.error(f"An unexpected error occurred during token count: {e}")
77
- return -1
47
+ The number of tokens as reported by 'prompt_eval_count'.
78
48
 
49
+ Raises:
50
+ requests.exceptions.RequestException: If the API request fails.
51
+ KeyError: If 'prompt_eval_count' is not found in the response.
52
+ json.JSONDecodeError: If the response is not valid JSON.
53
+ RuntimeError: For other operational errors.
54
+ """
55
+ res = ollama_client.chat(
56
+ model=model_name,
57
+ messages=[{"role":"system","content":""},{"role":"user", "content":text_to_tokenize}],
58
+ stream=False,options={"num_predict":1}
59
+ )
60
+
61
+ return res.prompt_eval_count-5
79
62
  class OllamaBinding(LollmsLLMBinding):
80
63
  """Ollama-specific binding implementation using the ollama-python library."""
81
64
 
@@ -132,6 +115,7 @@ class OllamaBinding(LollmsLLMBinding):
132
115
  images: Optional[List[str]] = None, # List of image file paths
133
116
  n_predict: Optional[int] = None,
134
117
  stream: bool = False,
118
+ system_prompt = '',
135
119
  temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
136
120
  top_k: int = 40, # Ollama default is 40
137
121
  top_p: float = 0.9, # Ollama default is 0.9
@@ -191,7 +175,7 @@ class OllamaBinding(LollmsLLMBinding):
191
175
  # If images were base64 strings, they would need decoding to bytes first.
192
176
  processed_images.append(img_path)
193
177
 
194
- messages = [{'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None}]
178
+ messages = [{'role': 'system', 'content':system_prompt},{'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None}]
195
179
 
196
180
  if stream:
197
181
  response_stream = self.ollama_client.chat(
@@ -253,53 +237,31 @@ class OllamaBinding(LollmsLLMBinding):
253
237
  trace_exception(ex)
254
238
  return {"status": False, "error": error_message}
255
239
 
256
- def tokenize(self, text: str) -> List[Union[int, str]]:
240
+ def tokenize(self, text: str) -> list:
257
241
  """
258
- Tokenize the input text. For Ollama, this is complex as tokenization is model-specific
259
- and best done by the server. This method provides a basic character-level tokenization
260
- as a fallback or placeholder, or one could attempt to call /api/tokenize if desired.
261
- The `count_tokens` method is more accurate for Ollama.
242
+ Tokenize the input text into a list of characters.
262
243
 
263
244
  Args:
264
245
  text (str): The text to tokenize.
265
246
 
266
247
  Returns:
267
- list: List of tokens (characters or token IDs if /api/tokenize is used).
248
+ list: List of individual characters.
268
249
  """
269
- # Basic character-level tokenization
270
- # return list(text)
271
-
272
- # For actual token IDs (slower, makes a network request):
273
- api_url = f"{self.host_address.rstrip('/')}/api/tokenize"
274
- payload = {"model": self.model_name, "prompt": text}
275
- try:
276
- response = requests.post(api_url, json=payload, timeout=10, verify=self.verify_ssl_certificate, headers=self.ollama_client_headers)
277
- response.raise_for_status()
278
- return response.json().get("tokens", [])
279
- except Exception as e:
280
- ASCIIColors.warning(f"Failed to tokenize text with Ollama server, falling back to char tokens: {e}")
281
- return list(text)
282
-
283
- def detokenize(self, tokens: List[Union[int,str]]) -> str:
250
+ ## Since ollama has no endpoints to tokenize the text, we use tiktoken to have a rough estimate
251
+ return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
252
+
253
+ def detokenize(self, tokens: list) -> str:
284
254
  """
285
- Convert a list of tokens back to text. If tokens are characters, joins them.
286
- If tokens are IDs, this is non-trivial without the model's tokenizer.
255
+ Convert a list of tokens back to text.
287
256
 
288
257
  Args:
289
- tokens (list): List of tokens to detokenize.
258
+ tokens (list): List of tokens (characters) to detokenize.
290
259
 
291
260
  Returns:
292
261
  str: Detokenized text.
293
262
  """
294
- if not tokens:
295
- return ""
296
- if isinstance(tokens[0], str): # Assuming character tokens
297
- return "".join(tokens)
298
- else:
299
- # Detokenizing IDs from Ollama is not straightforward client-side without specific tokenizer.
300
- # This is a placeholder. For Ollama, detokenization usually happens server-side.
301
- ASCIIColors.warning("Detokenizing integer tokens is not accurately supported by this Ollama client binding. Returning joined string of token IDs.")
302
- return "".join(map(str, tokens))
263
+ ## Since ollama has no endpoints to tokenize the text, we use tiktoken to have a rough estimate
264
+ return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
303
265
 
304
266
  def count_tokens(self, text: str) -> int:
305
267
  """
@@ -314,8 +276,8 @@ class OllamaBinding(LollmsLLMBinding):
314
276
  if not self.model_name:
315
277
  ASCIIColors.warning("Cannot count tokens, model_name is not set.")
316
278
  return -1
317
- return count_tokens_ollama(text, self.model_name, self.host_address, verify_ssl_certificate=self.verify_ssl_certificate, headers=self.ollama_client_headers)
318
-
279
+ #return count_tokens_ollama(text, self.model_name, self.ollama_client)
280
+ return len(self.tokenize(text))
319
281
  def embed(self, text: str, **kwargs) -> List[float]:
320
282
  """
321
283
  Get embeddings for the input text using Ollama API.
@@ -334,7 +296,7 @@ class OllamaBinding(LollmsLLMBinding):
334
296
  if not self.ollama_client:
335
297
  raise Exception("Ollama client not initialized.")
336
298
 
337
- model_to_use = kwargs.get("model", self.model_name)
299
+ model_to_use = kwargs.get("model", "bge-m3")
338
300
  if not model_to_use:
339
301
  raise ValueError("Model name for embedding must be specified either in init or via kwargs.")
340
302
 
@@ -574,4 +536,4 @@ if __name__ == '__main__':
574
536
  ASCIIColors.error(f"An error occurred during testing: {e}")
575
537
  trace_exception(e)
576
538
 
577
- ASCIIColors.yellow("\nOllamaBinding test finished.")
539
+ ASCIIColors.yellow("\nOllamaBinding test finished.")
@@ -278,7 +278,20 @@ class LollmsClient():
278
278
  if self.binding:
279
279
  return self.binding.detokenize(tokens)
280
280
  raise RuntimeError("LLM binding not initialized.")
281
+ def count_tokens(self, text: str) -> int:
282
+ """
283
+ Counts how many tokens are there in the text using the active LLM binding.
284
+
285
+ Args:
286
+ text (str): The text to tokenize.
281
287
 
288
+ Returns:
289
+ int: Number of tokens.
290
+ """
291
+ if self.binding:
292
+ return self.binding.count_tokens(text)
293
+ raise RuntimeError("LLM binding not initialized.")
294
+
282
295
  def get_model_details(self) -> dict:
283
296
  """
284
297
  Get model information from the active LLM binding.
@@ -1611,4 +1624,4 @@ def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators
1611
1624
  if current_pos >= len(text):
1612
1625
  break
1613
1626
 
1614
- return chunks
1627
+ return chunks
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.13.0
3
+ Version: 0.13.2
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Author-email: ParisNeo <parisneoai@gmail.com>
6
6
  License: Apache Software License
@@ -19,6 +19,13 @@ Requires-Python: >=3.7
19
19
  Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
21
  Requires-Dist: requests
22
+ Requires-Dist: ascii-colors
23
+ Requires-Dist: pipmaster
24
+ Requires-Dist: pyyaml
25
+ Requires-Dist: tiktoken
26
+ Requires-Dist: pydantic
27
+ Requires-Dist: numpy
28
+ Requires-Dist: pillow
22
29
  Dynamic: license-file
23
30
 
24
31
  # lollms_client
@@ -1,4 +1,4 @@
1
- examples/simple_text_gen_test.py,sha256=CqBvkG7Zm2ya13AxgRKlhV1M4PiqeSOi9--yKL8MZ-E,8596
1
+ examples/simple_text_gen_test.py,sha256=RoX9ZKJjGMujeep60wh5WT_GoBn0O9YKJY6WOy-ZmOc,8710
2
2
  examples/simple_text_gen_with_image_test.py,sha256=Euv53jbKTVJDvs854lgJvA5F-iRnAATLxAklig24ots,8534
3
3
  examples/text_2_audio.py,sha256=MfL4AH_NNwl6m0I0ywl4BXRZJ0b9Y_9fRqDIe6O-Sbw,3523
4
4
  examples/text_2_image.py,sha256=Ri7lQ-GW54YWQh2eofcaN6LpwFoorbpJsJffrcXl3cg,6415
@@ -12,9 +12,9 @@ examples/personality_test/chat_test.py,sha256=o2jlpoddFc-T592iqAiA29xk3x27KsdK5D
12
12
  examples/personality_test/chat_with_aristotle.py,sha256=4X_fwubMpd0Eq2rCReS2bgVlUoAqJprjkLXk2Jz6pXU,1774
13
13
  examples/personality_test/tesks_test.py,sha256=7LIiwrEbva9WWZOLi34fsmCBN__RZbPpxoUOKA_AtYk,1924
14
14
  examples/test_local_models/local_chat.py,sha256=slakja2zaHOEAUsn2tn_VmI4kLx6luLBrPqAeaNsix8,456
15
- lollms_client/__init__.py,sha256=5Rs-Uis0zGVg94icdyXKveRFsva8Vh1otWtGDKEfSsw,821
15
+ lollms_client/__init__.py,sha256=y-N8Dw10pI9pHtP_zlVzsj7bVjsu873EdOlbqxaxZRU,823
16
16
  lollms_client/lollms_config.py,sha256=goEseDwDxYJf3WkYJ4IrLXwg3Tfw73CXV2Avg45M_hE,21876
17
- lollms_client/lollms_core.py,sha256=_iTQY64ePop-6YxA2Xb90CxuiF9gzz9c5SHcLUsJxVc,77559
17
+ lollms_client/lollms_core.py,sha256=ZTbEVn1M_gHAL3mL5mf3wGYAXidAtnSI3qEjwz2HlwY,77980
18
18
  lollms_client/lollms_discussion.py,sha256=9b83m0D894jwpgssWYTQHbVxp1gJoI-J947Ui_dRXII,2073
19
19
  lollms_client/lollms_functions.py,sha256=p8SFtmEPqvVCsIz2fZ5HxyOHaxjrAo5c12uTzJnb6m8,3594
20
20
  lollms_client/lollms_js_analyzer.py,sha256=01zUvuO2F_lnUe_0NLxe1MF5aHE1hO8RZi48mNPv-aw,8361
@@ -30,7 +30,7 @@ lollms_client/lollms_types.py,sha256=cfc1sremM8KR4avkYX99fIVkkdRvXErrCWKGjLrgv50
30
30
  lollms_client/lollms_utilities.py,sha256=YAgamfp0pBVApR68AHKjhp1lh6isMNF8iadwWLl63c0,7045
31
31
  lollms_client/llm_bindings/__init__.py,sha256=9sWGpmWSSj6KQ8H4lKGCjpLYwhnVdL_2N7gXCphPqh4,14
32
32
  lollms_client/llm_bindings/lollms/__init__.py,sha256=l1q2KnMQALz9QpLa3OUQ8e29KU4RCwkrmrdBvd7Z_kc,12236
33
- lollms_client/llm_bindings/ollama/__init__.py,sha256=mKGLeoRVpKW1YW4fnLQ4KlgbgHsN4i5TTfG3B4CxwVA,28428
33
+ lollms_client/llm_bindings/ollama/__init__.py,sha256=DyueED1cJmmJFg5evYmu-lrkwsN9pAxaVcwgUkcAZHU,26467
34
34
  lollms_client/llm_bindings/openai/__init__.py,sha256=SWBgnOcOWmFRSKTN1S9ATownHNBJ9f6FEtI3L4xNJNM,11861
35
35
  lollms_client/llm_bindings/tensor_rt/__init__.py,sha256=ZpeSKAbN8rh6zkysYl95sXG9Ci702NuPAhXC6zb1zT4,31840
36
36
  lollms_client/llm_bindings/transformers/__init__.py,sha256=8JbX3B-obLt5NNtcNOGD_E0f8OQTma2pNYtVt2urTOM,12572
@@ -45,8 +45,8 @@ lollms_client/tts_bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
45
45
  lollms_client/tts_bindings/lollms/__init__.py,sha256=8x2_T9XscvISw2TiaLoFxvrS7TIsVLdqbwSc04cX-wc,7164
46
46
  lollms_client/ttv_bindings/__init__.py,sha256=UZ8o2izQOJLQgtZ1D1cXoNST7rzqW22rL2Vufc7ddRc,3141
47
47
  lollms_client/ttv_bindings/lollms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- lollms_client-0.13.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
49
- lollms_client-0.13.0.dist-info/METADATA,sha256=I5sQiDvtijyYe72vvQWBjecdglA_-oyfY4I9W23eUZw,7103
50
- lollms_client-0.13.0.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
51
- lollms_client-0.13.0.dist-info/top_level.txt,sha256=NI_W8S4OYZvJjb0QWMZMSIpOrYzpqwPGYaklhyWKH2w,23
52
- lollms_client-0.13.0.dist-info/RECORD,,
48
+ lollms_client-0.13.2.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
49
+ lollms_client-0.13.2.dist-info/METADATA,sha256=IsZiVKLRi7NaMhITn6pFz3CX5zpIJAvb8vsddYIBJQA,7276
50
+ lollms_client-0.13.2.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
51
+ lollms_client-0.13.2.dist-info/top_level.txt,sha256=NI_W8S4OYZvJjb0QWMZMSIpOrYzpqwPGYaklhyWKH2w,23
52
+ lollms_client-0.13.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.4.0)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5