lollms-client 0.29.0__py3-none-any.whl → 0.29.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -689,7 +689,7 @@ class LollmsClient():
689
689
  template=None,
690
690
  language="json",
691
691
  code_tag_format="markdown", # or "html"
692
- max_size = None,
692
+ n_predict = None,
693
693
  temperature = None,
694
694
  top_k = None,
695
695
  top_p=None,
@@ -732,7 +732,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
732
732
  prompt,
733
733
  images=images,
734
734
  system_prompt=system_prompt,
735
- n_predict=max_size,
735
+ n_predict=n_predict,
736
736
  temperature=temperature,
737
737
  top_k=top_k,
738
738
  top_p=top_p,
@@ -1395,80 +1395,6 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1395
1395
  )
1396
1396
  new_scratchpad_text = self.generate_text(prompt=synthesis_prompt, n_predict=1024, temperature=0.0)
1397
1397
  return self.remove_thinking_blocks(new_scratchpad_text).strip()
1398
- def generate_structured_content(
1399
- self,
1400
- prompt: str,
1401
- template: Union[dict, list],
1402
- system_prompt: Optional[str] = None,
1403
- images: Optional[List[str]] = None,
1404
- max_retries: int = 3,
1405
- **kwargs
1406
- ) -> Union[dict, list, None]:
1407
- """
1408
- Generates structured content (JSON) from a prompt, ensuring it matches a given template.
1409
-
1410
- This method repeatedly calls the LLM until a valid JSON object that can be parsed
1411
- and somewhat matches the template is returned, or until max_retries is reached.
1412
-
1413
- Args:
1414
- prompt (str): The main prompt to guide the LLM.
1415
- template (Union[dict, list]): A Python dict or list representing the desired JSON structure.
1416
- system_prompt (Optional[str], optional): An optional system prompt. Defaults to None.
1417
- images (Optional[List[str]], optional): A list of image paths for multimodal prompts. Defaults to None.
1418
- max_retries (int, optional): The maximum number of times to retry generation if parsing fails. Defaults to 3.
1419
- **kwargs: Additional keyword arguments to pass to the underlying generate_text method.
1420
-
1421
- Returns:
1422
- Union[dict, list, None]: The parsed JSON object (as a Python dict or list), or None if it fails after all retries.
1423
- """
1424
- template_str = json.dumps(template, indent=4)
1425
-
1426
- if not system_prompt:
1427
- system_prompt = "You are a highly intelligent AI assistant that excels at generating structured data in JSON format."
1428
-
1429
- final_system_prompt = (
1430
- f"{system_prompt}\n\n"
1431
- "You MUST generate a response that is a single, valid JSON object matching the structure of the template provided by the user. "
1432
- "Your entire response should be enclosed in a single ```json markdown code block. "
1433
- "Do not include any other text, explanations, or apologies outside of the JSON code block.\n"
1434
- f"Here is the JSON template you must follow:\n{template_str}"
1435
- )
1436
-
1437
- current_prompt = prompt
1438
- for attempt in range(max_retries):
1439
- raw_llm_output = self.generate_text(
1440
- prompt=current_prompt,
1441
- system_prompt=final_system_prompt,
1442
- images=images,
1443
- **kwargs
1444
- )
1445
-
1446
- if not raw_llm_output:
1447
- ASCIIColors.warning(f"Structured content generation failed (Attempt {attempt + 1}/{max_retries}): LLM returned an empty response.")
1448
- current_prompt = f"You previously returned an empty response. Please try again and adhere strictly to the JSON format. \nOriginal prompt was: {prompt}"
1449
- continue
1450
-
1451
- try:
1452
- # Use robust_json_parser which handles cleanup of markdown tags, comments, etc.
1453
- parsed_json = robust_json_parser(raw_llm_output)
1454
- # Optional: Add validation against the template's structure here if needed
1455
- return parsed_json
1456
- except (ValueError, json.JSONDecodeError) as e:
1457
- ASCIIColors.warning(f"Structured content parsing failed (Attempt {attempt + 1}/{max_retries}). Error: {e}")
1458
- trace_exception(e)
1459
- # Prepare for retry with more explicit instructions
1460
- current_prompt = (
1461
- "Your previous response could not be parsed as valid JSON. Please review the error and the required template and try again. "
1462
- "Ensure your entire output is a single, clean JSON object inside a ```json code block.\n\n"
1463
- f"--- PARSING ERROR ---\n{str(e)}\n\n"
1464
- f"--- YOUR PREVIOUS INVALID RESPONSE ---\n{raw_llm_output}\n\n"
1465
- f"--- REQUIRED JSON TEMPLATE ---\n{template_str}\n\n"
1466
- f"--- ORIGINAL PROMPT ---\n{prompt}"
1467
- )
1468
-
1469
- ASCIIColors.error("Failed to generate valid structured content after multiple retries.")
1470
- return None
1471
-
1472
1398
  def _synthesize_knowledge(
1473
1399
  self,
1474
1400
  previous_scratchpad: str,
@@ -1892,20 +1818,20 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1892
1818
  }
1893
1819
  def generate_code(
1894
1820
  self,
1895
- prompt,
1821
+ prompt:str,
1896
1822
  images=[],
1897
- system_prompt=None,
1898
- template=None,
1823
+ system_prompt:str|None=None,
1824
+ template:str|None=None,
1899
1825
  language="json",
1900
1826
  code_tag_format="markdown", # or "html"
1901
- max_size = None,
1902
- temperature = None,
1903
- top_k = None,
1904
- top_p=None,
1905
- repeat_penalty=None,
1906
- repeat_last_n=None,
1827
+ n_predict:int|None = None,
1828
+ temperature:float|None = None,
1829
+ top_k:int|None= None,
1830
+ top_p:float|None=None,
1831
+ repeat_penalty:float|None=None,
1832
+ repeat_last_n:int|None=None,
1907
1833
  callback=None,
1908
- debug=False ):
1834
+ debug:bool=False ):
1909
1835
  """
1910
1836
  Generates a single code block based on a prompt.
1911
1837
  Uses the underlying LLM binding via `generate_text`.
@@ -1915,24 +1841,20 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1915
1841
  system_prompt = f"""Act as a code generation assistant that generates code from user prompt."""
1916
1842
 
1917
1843
  if template:
1918
- system_prompt += "Here is a template of the answer:\n"
1844
+ if language in ["json","yaml","xml"]:
1845
+ system_prompt += f"\nMake sure the generated context follows the following schema:\n```{language}\n{template}\n```\n"
1846
+ else:
1847
+ system_prompt += f"\nHere is a template of the answer:\n```{language}\n{template}\n```\n"
1848
+
1919
1849
  if code_tag_format=="markdown":
1920
- system_prompt += f"""You must answer with the code placed inside the markdown code tag like this:
1850
+ system_prompt += f"""You must answer with the code placed inside the markdown code tag:
1921
1851
  ```{language}
1922
- {template}
1923
1852
  ```
1924
- {"Make sure you fill all fields and to use the exact same keys as the template." if language in ["json","yaml","xml"] else ""}
1925
- The code tag is mandatory.
1926
- Don't forget encapsulate the code inside a markdown code tag. This is mandatory.
1927
1853
  """
1928
1854
  elif code_tag_format=="html":
1929
- system_prompt +=f"""You must answer with the code placed inside the html code tag like this:
1855
+ system_prompt +=f"""You must answer with the code placed inside the html code tag:
1930
1856
  <code language="{language}">
1931
- {template}
1932
1857
  </code>
1933
- {"Make sure you fill all fields and to use the exact same keys as the template." if language in ["json","yaml","xml"] else ""}
1934
- The code tag is mandatory.
1935
- Don't forget encapsulate the code inside a html code tag. This is mandatory.
1936
1858
  """
1937
1859
  system_prompt += f"""You must return a single code tag.
1938
1860
  Do not split the code in multiple tags.
@@ -1942,7 +1864,7 @@ Do not split the code in multiple tags.
1942
1864
  prompt,
1943
1865
  images=images,
1944
1866
  system_prompt=system_prompt,
1945
- n_predict=max_size,
1867
+ n_predict=n_predict,
1946
1868
  temperature=temperature,
1947
1869
  top_k=top_k,
1948
1870
  top_p=top_p,
@@ -1973,7 +1895,7 @@ Do not split the code in multiple tags.
1973
1895
  continuation_response = self.generate_text(
1974
1896
  continuation_prompt,
1975
1897
  images=images, # Resend images if needed for context
1976
- n_predict=max_size, # Allow space for continuation
1898
+ n_predict=n_predict, # Allow space for continuation
1977
1899
  temperature=temperature, # Use same parameters
1978
1900
  top_k=top_k,
1979
1901
  top_p=top_p,
@@ -2007,12 +1929,13 @@ Do not split the code in multiple tags.
2007
1929
  def generate_structured_content(
2008
1930
  self,
2009
1931
  prompt,
2010
- output_format,
1932
+ images=[],
1933
+ schema={},
2011
1934
  system_prompt=None,
2012
1935
  **kwargs
2013
1936
  ):
2014
1937
  """
2015
- Generates structured data (a dict) from a prompt using a JSON template.
1938
+ Generates structured data (a dict) from a prompt using a JSON schema.
2016
1939
 
2017
1940
  This method is a high-level wrapper around `generate_code`, specializing it
2018
1941
  for JSON output. It ensures the LLM sticks to a predefined structure,
@@ -2021,43 +1944,39 @@ Do not split the code in multiple tags.
2021
1944
  Args:
2022
1945
  prompt (str):
2023
1946
  The user's request (e.g., "Extract the name, age, and city of the person described").
2024
- output_format (dict or str):
1947
+ schema (dict or str):
2025
1948
  A Python dictionary or a JSON string representing the desired output
2026
- structure. This will be used as a template for the LLM.
1949
+ structure. This will be used as a schema for the LLM.
2027
1950
  Example: {"name": "string", "age": "integer", "city": "string"}
2028
1951
  system_prompt (str, optional):
2029
1952
  Additional instructions for the system prompt, to be appended to the
2030
1953
  main instructions. Defaults to None.
2031
1954
  **kwargs:
2032
1955
  Additional keyword arguments to be passed directly to the
2033
- `generate_code` method (e.g., temperature, max_size, top_k, debug).
1956
+ `generate_code` method (e.g., temperature, n_predict, top_k, debug).
2034
1957
 
2035
1958
  Returns:
2036
1959
  dict: The parsed JSON data as a Python dictionary, or None if
2037
1960
  generation or parsing fails.
2038
1961
  """
2039
- # 1. Validate and prepare the template string from the output_format
2040
- if isinstance(output_format, dict):
2041
- # Convert the dictionary to a nicely formatted JSON string for the template
2042
- template_str = json.dumps(output_format, indent=2)
2043
- elif isinstance(output_format, str):
2044
- # Assume it's already a valid JSON string template
2045
- template_str = output_format
1962
+ # 1. Validate and prepare the schema string from the schema
1963
+ if isinstance(schema, dict):
1964
+ # Convert the dictionary to a nicely formatted JSON string for the schema
1965
+ schema_str = json.dumps(schema, indent=2)
1966
+ elif isinstance(schema, str):
1967
+ # Assume it's already a valid JSON string schema
1968
+ schema_str = schema
2046
1969
  else:
2047
1970
  # It's good practice to fail early for invalid input types
2048
- raise TypeError("output_format must be a dict or a JSON string.")
2049
-
1971
+ raise TypeError("schema must be a dict or a JSON string.")
2050
1972
  # 2. Construct a specialized system prompt for structured data generation
2051
1973
  full_system_prompt = (
2052
- "You are a highly skilled AI assistant that processes user requests "
2053
- "and returns structured data in JSON format. You must strictly adhere "
2054
- "to the provided JSON template, filling in the values accurately based "
2055
- "on the user's prompt. Do not add any commentary, explanations, or text "
2056
- "outside of the final JSON code block. Your entire response must be a single "
2057
- "valid JSON object within a markdown code block."
1974
+ "Your objective is to build a json structured output based on the user's request and the provided schema."
1975
+ "Your entire response must be a single valid JSON object within a markdown code block."
1976
+ "do not use tabs in your response."
2058
1977
  )
2059
1978
  if system_prompt:
2060
- system_prompt += f"\n\nAdditional instructions:\n{system_prompt}"
1979
+ full_system_prompt = f"{system_prompt}\n\n{full_system_prompt}"
2061
1980
 
2062
1981
  # 3. Call the underlying generate_code method with JSON-specific settings
2063
1982
  if kwargs.get('debug'):
@@ -2065,8 +1984,9 @@ Do not split the code in multiple tags.
2065
1984
 
2066
1985
  json_string = self.generate_code(
2067
1986
  prompt=prompt,
1987
+ images=images,
2068
1988
  system_prompt=full_system_prompt,
2069
- template=template_str,
1989
+ template=schema_str,
2070
1990
  language="json",
2071
1991
  code_tag_format="markdown", # Sticking to markdown is generally more reliable
2072
1992
  **kwargs # Pass other params like temperature, top_k, etc.
@@ -2092,6 +2012,7 @@ Do not split the code in multiple tags.
2092
2012
  return parsed_json
2093
2013
 
2094
2014
  except Exception as e:
2015
+ trace_exception(e)
2095
2016
  ASCIIColors.error(f"An unexpected error occurred during JSON parsing: {e}")
2096
2017
  return None
2097
2018
 
@@ -2312,7 +2233,7 @@ Do not split the code in multiple tags.
2312
2233
  language="json",
2313
2234
  template=template,
2314
2235
  code_tag_format="markdown",
2315
- max_size=max_answer_length,
2236
+ n_predict=max_answer_length,
2316
2237
  callback=callback
2317
2238
  )
2318
2239
 
@@ -2407,7 +2328,7 @@ Do not split the code in multiple tags.
2407
2328
  template=template,
2408
2329
  language="json",
2409
2330
  code_tag_format="markdown",
2410
- max_size=max_answer_length,
2331
+ n_predict=max_answer_length,
2411
2332
  callback=callback
2412
2333
  )
2413
2334
 
@@ -2478,7 +2399,7 @@ Do not split the code in multiple tags.
2478
2399
  template=template,
2479
2400
  language="json",
2480
2401
  code_tag_format="markdown",
2481
- max_size=max_answer_length,
2402
+ n_predict=max_answer_length,
2482
2403
  callback=callback
2483
2404
  )
2484
2405
 
@@ -3041,6 +2962,112 @@ Provide the final aggregated answer in {output_format} format, directly addressi
3041
2962
  return final_output
3042
2963
 
3043
2964
 
2965
+ def summarize(
2966
+ self,
2967
+ text_to_summarize: str,
2968
+ contextual_prompt: Optional[str] = None,
2969
+ chunk_size_tokens: int = 1500,
2970
+ overlap_tokens: int = 250,
2971
+ streaming_callback: Optional[Callable] = None,
2972
+ **kwargs
2973
+ ) -> str:
2974
+ """
2975
+ Summarizes a long text that may not fit into the model's context window.
2976
+
2977
+ This method works in two stages:
2978
+ 1. **Chunk & Summarize:** It breaks the text into overlapping chunks and summarizes each one individually.
2979
+ 2. **Synthesize:** It then takes all the chunk summaries and performs a final summarization pass to create a single, coherent, and comprehensive summary.
2980
+
2981
+ Args:
2982
+ text_to_summarize (str): The long text content to be summarized.
2983
+ contextual_prompt (Optional[str], optional): A specific instruction to guide the summary's focus.
2984
+ For example, "Summarize the text focusing on the financial implications."
2985
+ Defaults to None.
2986
+ chunk_size_tokens (int, optional): The number of tokens in each text chunk. This should be well
2987
+ within the model's context limit to allow space for prompts.
2988
+ Defaults to 1500.
2989
+ overlap_tokens (int, optional): The number of tokens to overlap between chunks to ensure context
2990
+ is not lost at the boundaries. Defaults to 250.
2991
+ streaming_callback (Optional[Callable], optional): A callback function to receive real-time updates
2992
+ on the process (e.g., which chunk is being processed).
2993
+ Defaults to None.
2994
+ **kwargs: Additional keyword arguments to be passed to the generation method (e.g., temperature, top_p).
2995
+
2996
+ Returns:
2997
+ str: The final, comprehensive summary of the text.
2998
+ """
2999
+ if not text_to_summarize.strip():
3000
+ return ""
3001
+
3002
+ # Use the binding's tokenizer for accurate chunking
3003
+ tokens = self.binding.tokenize(text_to_summarize)
3004
+
3005
+ if len(tokens) <= chunk_size_tokens:
3006
+ if streaming_callback:
3007
+ streaming_callback("Text is short enough for a single summary.", MSG_TYPE.MSG_TYPE_STEP)
3008
+
3009
+ prompt_objective = contextual_prompt or "Provide a comprehensive summary of the following text."
3010
+ final_prompt = f"{prompt_objective}\n\n--- Text to Summarize ---\n{text_to_summarize}"
3011
+
3012
+ return self.generate_text(final_prompt, **kwargs)
3013
+
3014
+ # --- Stage 1: Chunking and Independent Summarization ---
3015
+ chunks = []
3016
+ step = chunk_size_tokens - overlap_tokens
3017
+ for i in range(0, len(tokens), step):
3018
+ chunk_tokens = tokens[i:i + chunk_size_tokens]
3019
+ chunk_text = self.binding.detokenize(chunk_tokens)
3020
+ chunks.append(chunk_text)
3021
+
3022
+ chunk_summaries = []
3023
+
3024
+ # Define the prompt for summarizing each chunk
3025
+ summarization_objective = contextual_prompt or "Summarize the key points of the following text excerpt."
3026
+ chunk_summary_prompt_template = f"{summarization_objective}\n\n--- Text Excerpt ---\n{{chunk_text}}"
3027
+
3028
+ for i, chunk in enumerate(chunks):
3029
+ if streaming_callback:
3030
+ streaming_callback(f"Summarizing chunk {i + 1} of {len(chunks)}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"chunk_{i+1}"})
3031
+
3032
+ prompt = chunk_summary_prompt_template.format(chunk_text=chunk)
3033
+
3034
+ try:
3035
+ # Generate summary for the current chunk
3036
+ chunk_summary = self.generate_text(prompt, **kwargs)
3037
+ chunk_summaries.append(chunk_summary)
3038
+ if streaming_callback:
3039
+ streaming_callback(f"Chunk {i + 1} summarized.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"chunk_{i+1}", "summary_snippet": chunk_summary[:100]})
3040
+ except Exception as e:
3041
+ trace_exception(e)
3042
+ if streaming_callback:
3043
+ streaming_callback(f"Failed to summarize chunk {i+1}: {e}", MSG_TYPE.MSG_TYPE_EXCEPTION)
3044
+ # Still add a placeholder to not break the chain
3045
+ chunk_summaries.append(f"[Error summarizing chunk {i+1}]")
3046
+
3047
+ # --- Stage 2: Final Synthesis of All Chunk Summaries ---
3048
+ if streaming_callback:
3049
+ streaming_callback("Synthesizing all chunk summaries into a final version...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_synthesis"})
3050
+
3051
+ combined_summaries = "\n\n---\n\n".join(chunk_summaries)
3052
+
3053
+ # Define the prompt for the final synthesis
3054
+ synthesis_objective = contextual_prompt or "Create a single, final, coherent, and comprehensive summary."
3055
+ final_synthesis_prompt = (
3056
+ "You are a master synthesizer. You will be given a series of partial summaries from a long document. "
3057
+ f"Your task is to synthesize them into one high-quality summary. {synthesis_objective}\n\n"
3058
+ "Please remove any redundancy and ensure a smooth, logical flow.\n\n"
3059
+ "--- Collection of Summaries ---\n"
3060
+ f"{combined_summaries}\n\n"
3061
+ "--- Final Comprehensive Summary ---"
3062
+ )
3063
+
3064
+ final_summary = self.generate_text(final_synthesis_prompt, **kwargs)
3065
+
3066
+ if streaming_callback:
3067
+ streaming_callback("Final summary synthesized.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_synthesis"})
3068
+
3069
+ return final_summary.strip()
3070
+
3044
3071
  def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):
3045
3072
  """
3046
3073
  Chunks text based on token count.