PyPI - MindsDB - Versions diffs - 25.7.4.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl - Mend

MindsDB 25.7.4.0py3-none-any.whl → 25.8.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (57) hide show

mindsdb/integrations/handlers/huggingface_handler/requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
-# # NOTE: Any changes made here need to be made to requirements_cpu.txt as well
-# datasets==2.16.1
-# evaluate==0.4.3
-# nltk==3.9.1
-# huggingface-hub==0.29.3
-# torch==2.7.1
-# transformers >= 4.42.4
+# NOTE: Any changes made here need to be made to requirements_cpu.txt as well
+datasets==2.16.1
+evaluate==0.4.3
+nltk==3.9.1
+huggingface-hub==0.29.3
+torch==2.8.0
+transformers >= 4.42.4

mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt CHANGED Viewed

@@ -1,7 +1,7 @@
-# # Needs to be installed with `pip install --extra-index-url https://download.pytorch.org/whl/ .[huggingface_cpu]`
-# datasets==2.16.1
-# evaluate==0.4.3
-# nltk==3.9.1
-# huggingface-hub==0.29.3
-# torch==2.7.1+cpu
-# transformers >= 4.42.4
+# Needs to be installed with `pip install --extra-index-url https://download.pytorch.org/whl/ .[huggingface_cpu]`
+datasets==2.16.1
+evaluate==0.4.3
+nltk==3.9.1
+huggingface-hub==0.29.3
+torch==2.8.0+cpu
+transformers >= 4.42.4

mindsdb/integrations/handlers/huggingface_handler/settings.py CHANGED Viewed

@@ -1,27 +1,27 @@
-# from mindsdb.integrations.handlers.huggingface_handler.finetune import (
-#     _finetune_cls,
-#     _finetune_fill_mask,
-#     _finetune_question_answering,
-#     _finetune_summarization,
-#     _finetune_text_generation,
-#     _finetune_translate,
-# )
+from mindsdb.integrations.handlers.huggingface_handler.finetune import (
+    _finetune_cls,
+    _finetune_fill_mask,
+    _finetune_question_answering,
+    _finetune_summarization,
+    _finetune_text_generation,
+    _finetune_translate,
+)
-# # todo once we have moved predict tasks functions into a separate function
-# # PREDICT_MAP = {
-# #             'text-classification': self.predict_text_classification,
-# #             'zero-shot-classification': self.predict_zero_shot,
-# #             'translation': self.predict_translation,
-# #             'summarization': self.predict_summarization,
-# #             'fill-mask': self.predict_fill_mask
-# #         }
+# todo once we have moved predict tasks functions into a separate function
+# PREDICT_MAP = {
+#             'text-classification': self.predict_text_classification,
+#             'zero-shot-classification': self.predict_zero_shot,
+#             'translation': self.predict_translation,
+#             'summarization': self.predict_summarization,
+#             'fill-mask': self.predict_fill_mask
+#         }
-# FINETUNE_MAP = {
-#     "text-classification": _finetune_cls,
-#     "zero-shot-classification": _finetune_cls,
-#     "translation": _finetune_translate,
-#     "summarization": _finetune_summarization,
-#     "fill-mask": _finetune_fill_mask,
-#     "text-generation": _finetune_text_generation,
-#     "question-answering": _finetune_question_answering,
-# }
+FINETUNE_MAP = {
+    "text-classification": _finetune_cls,
+    "zero-shot-classification": _finetune_cls,
+    "translation": _finetune_translate,
+    "summarization": _finetune_summarization,
+    "fill-mask": _finetune_fill_mask,
+    "text-generation": _finetune_text_generation,
+    "question-answering": _finetune_question_answering,
+}

mindsdb/integrations/handlers/langchain_handler/langchain_handler.py CHANGED Viewed

@@ -36,7 +36,7 @@ from mindsdb.interfaces.storage.model_fs import HandlerStorage, ModelStorage
 from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import (
     construct_model_from_args,
 )
-from mindsdb.integrations.handlers.openai_handler.constants import CHAT_MODELS  # noqa: F401 - for dependency checker
+from mindsdb.integrations.handlers.openai_handler.constants import CHAT_MODELS_PREFIXES  # noqa: F401 - for dependency checker
 from mindsdb.utilities import log
 from mindsdb.utilities.context_executor import ContextThreadPoolExecutor
@@ -54,7 +54,6 @@ class LangChainHandler(BaseMLEngine):
     Supported LLM providers:
         - OpenAI
         - Anthropic
-        - Anyscale
         - Google
         - LiteLLM
         - Ollama

mindsdb/integrations/handlers/openai_handler/constants.py CHANGED Viewed

@@ -1,38 +1,19 @@
-OPENAI_API_BASE = 'https://api.openai.com/v1'
+OPENAI_API_BASE = "https://api.openai.com/v1"
-CHAT_MODELS = (
-    'gpt-3.5-turbo',
-    'gpt-3.5-turbo-16k',
-    'gpt-3.5-turbo-instruct',
-    'gpt-4',
-    'gpt-4-32k',
-    'gpt-4-1106-preview',
-    'gpt-4-0125-preview',
-    'gpt-4o',
-    'o3-mini',
-    'o1-mini'
-)
-COMPLETION_MODELS = ('babbage-002', 'davinci-002')
-FINETUNING_MODELS = ('gpt-3.5-turbo', 'babbage-002', 'davinci-002', 'gpt-4')
-COMPLETION_LEGACY_BASE_MODELS = ('davinci', 'curie', 'babbage', 'ada')
-DEFAULT_CHAT_MODEL = 'gpt-3.5-turbo'
+CHAT_MODELS_PREFIXES = ("gpt-3.5", "gpt-3.5", "gpt-3.5", "gpt-4", "o3-mini", "o1-mini")
+COMPLETION_MODELS = ("babbage-002", "davinci-002")
+FINETUNING_MODELS = ("gpt-3.5-turbo", "babbage-002", "davinci-002", "gpt-4")
+COMPLETION_LEGACY_BASE_MODELS = ("davinci", "curie", "babbage", "ada")
+DEFAULT_CHAT_MODEL = "gpt-4o-mini"
 FINETUNING_LEGACY_MODELS = FINETUNING_MODELS
 COMPLETION_LEGACY_MODELS = (
     COMPLETION_LEGACY_BASE_MODELS
-    + tuple(f'text-{model}-001' for model in COMPLETION_LEGACY_BASE_MODELS)
-    + ('text-davinci-002', 'text-davinci-003')
+    + tuple(f"text-{model}-001" for model in COMPLETION_LEGACY_BASE_MODELS)
+    + ("text-davinci-002", "text-davinci-003")
 )
-EMBEDDING_MODELS = (
-    ('text-embedding-ada-002',)
-    + tuple(f'text-similarity-{model}-001' for model in COMPLETION_LEGACY_BASE_MODELS)
-    + tuple(f'text-search-{model}-query-001' for model in COMPLETION_LEGACY_BASE_MODELS)
-    + tuple(f'text-search-{model}-doc-001' for model in COMPLETION_LEGACY_BASE_MODELS)
-    + tuple(f'code-search-{model}-text-001' for model in COMPLETION_LEGACY_BASE_MODELS)
-    + tuple(f'code-search-{model}-code-001' for model in COMPLETION_LEGACY_BASE_MODELS)
-)
-DEFAULT_EMBEDDING_MODEL = 'text-embedding-ada-002'
+DEFAULT_EMBEDDING_MODEL = "text-embedding-ada-002"
-IMAGE_MODELS = ('dall-e-2', 'dall-e-3')
-DEFAULT_IMAGE_MODEL = 'dall-e-2'
+IMAGE_MODELS = ("dall-e-2", "dall-e-3")
+DEFAULT_IMAGE_MODEL = "dall-e-2"

mindsdb/integrations/handlers/openai_handler/helpers.py CHANGED Viewed

@@ -14,6 +14,7 @@ class PendingFT(openai.OpenAIError):
     """
     Custom exception to handle pending fine-tuning status.
     """
     message: str
     def __init__(self, message) -> None:
@@ -65,10 +66,7 @@ def retry_with_exponential_backoff(
             if isinstance(hour_budget, float) or isinstance(hour_budget, int):
                 try:
-                    max_retries = round(
-                        (math.log((hour_budget * 3600) / initial_delay))
-                        / math.log(exponential_base)
-                    )
+                    max_retries = round((math.log((hour_budget * 3600) / initial_delay)) / math.log(exponential_base))
                 except ValueError:
                     max_retries = 10
             else:
@@ -81,22 +79,20 @@ def retry_with_exponential_backoff(
                 except status_errors as e:
                     raise Exception(
-                        f'Error status {e.status_code} raised by OpenAI API: {e.body.get("message", "Please refer to `https://platform.openai.com/docs/guides/error-codes` for more information.")}'   # noqa
+                        f"Error status {e.status_code} raised by OpenAI API: {e.body.get('message', 'Please refer to `https://platform.openai.com/docs/guides/error-codes` for more information.')}"  # noqa
                     )  # noqa
                 except wait_errors:
                     num_retries += 1
                     if num_retries > max_retries:
-                        raise Exception(
-                            f"Maximum number of retries ({max_retries}) exceeded."
-                        )
+                        raise Exception(f"Maximum number of retries ({max_retries}) exceeded.")
                     # Increment the delay and wait
                     delay *= exponential_base * (1 + jitter * random.random())
                     time.sleep(delay)
                 except openai.OpenAIError as e:
                     raise Exception(
-                        f'General {str(e)} error raised by OpenAI. Please refer to `https://platform.openai.com/docs/guides/error-codes` for more information.'    # noqa
+                        f"General {str(e)} error raised by OpenAI. Please refer to `https://platform.openai.com/docs/guides/error-codes` for more information."  # noqa
                     )
                 except Exception as e:
@@ -107,7 +103,7 @@ def retry_with_exponential_backoff(
     return _retry_with_exponential_backoff
-def truncate_msgs_for_token_limit(messages: List[Dict], model_name: Text, max_tokens: int, truncate: Text = 'first'):
+def truncate_msgs_for_token_limit(messages: List[Dict], model_name: Text, max_tokens: int, truncate: Text = "first"):
     """
     Truncates message list to fit within the token limit.
     The first message for chat completion models are general directives with the system role, which will ideally be kept at all times.
@@ -129,20 +125,18 @@ def truncate_msgs_for_token_limit(messages: List[Dict], model_name: Text, max_to
     except KeyError:
         # If the encoding is not found, defualt to cl100k_base.
         # This is applicable for handlers that extend the OpenAI handler such as Anyscale.
-        model_name = 'gpt-3.5-turbo-0301'
-        encoder = tiktoken.get_encoding('cl100k_base')
+        model_name = "gpt-3.5-turbo-0301"
+        encoder = tiktoken.get_encoding("cl100k_base")
     sys_priming = messages[0:1]
     n_tokens = count_tokens(messages, encoder, model_name)
     while n_tokens > max_tokens:
         if len(messages) == 2:
-            return messages[
-                :-1
-            ]  # edge case: if limit is surpassed by just one input, we remove initial instruction
+            return messages[:-1]  # edge case: if limit is surpassed by just one input, we remove initial instruction
         elif len(messages) == 1:
             return messages
-        if truncate == 'first':
+        if truncate == "first":
             messages = sys_priming + messages[2:]
         else:
             messages = sys_priming + messages[1:-1]
@@ -151,7 +145,7 @@ def truncate_msgs_for_token_limit(messages: List[Dict], model_name: Text, max_to
     return messages
-def count_tokens(messages: List[Dict], encoder: tiktoken.core.Encoding, model_name: Text = 'gpt-3.5-turbo-0301'):
+def count_tokens(messages: List[Dict], encoder: tiktoken.core.Encoding, model_name: Text = "gpt-3.5-turbo-0301"):
     """
     Counts the number of tokens in a list of messages.
@@ -160,24 +154,23 @@ def count_tokens(messages: List[Dict], encoder: tiktoken.core.Encoding, model_na
         encoder: Tokenizer
         model_name: Model name
     """
-    if (
-        "gpt-3.5-turbo" in model_name
-    ):  # note: future models may deviate from this (only 0301 really complies)
-        num_tokens = 0
-        for message in messages:
-            num_tokens += (
-                4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
-            )
-            for key, value in message.items():
-                num_tokens += len(encoder.encode(value))
-                if key == "name":  # if there's a name, the role is omitted
-                    num_tokens += -1  # role is always required and always 1 token
-        num_tokens += 2  # every reply is primed with <im_start>assistant
-        return num_tokens
+    if "gpt-3.5-turbo" in model_name:  # note: future models may deviate from this (only 0301 really complies)
+        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
+        tokens_per_name = -1
     else:
-        raise NotImplementedError(
-            f"""_count_tokens() is not presently implemented for model {model_name}."""
-        )
+        tokens_per_message = 3
+        tokens_per_name = 1
+    num_tokens = 0
+    for message in messages:
+        num_tokens += tokens_per_message
+        for key, value in message.items():
+            num_tokens += len(encoder.encode(value))
+            if key == "name":  # if there's a name, the role is omitted
+                num_tokens += tokens_per_name
+    num_tokens += 2  # every reply is primed with <im_start>assistant
+    return num_tokens
 def get_available_models(client) -> List[Text]:

mindsdb/integrations/handlers/openai_handler/openai_handler.py CHANGED Viewed

@@ -24,7 +24,7 @@ from mindsdb.integrations.handlers.openai_handler.helpers import (
     PendingFT,
 )
 from mindsdb.integrations.handlers.openai_handler.constants import (
-    CHAT_MODELS,
+    CHAT_MODELS_PREFIXES,
     IMAGE_MODELS,
     FINETUNING_MODELS,
     OPENAI_API_BASE,
@@ -62,7 +62,6 @@ class OpenAIHandler(BaseMLEngine):
         self.rate_limit = 60  # requests per minute
         self.max_batch_size = 20
         self.default_max_tokens = 100
-        self.chat_completion_models = CHAT_MODELS
         self.supported_ft_models = FINETUNING_MODELS  # base models compatible with finetuning
         # For now this are only used for handlers that inherits OpenAIHandler and don't need to override base methods
         self.api_key_name = getattr(self, "api_key_name", self.name)
@@ -89,6 +88,13 @@ class OpenAIHandler(BaseMLEngine):
             client = self._get_client(api_key=api_key, base_url=api_base, org=org, args=connection_args)
             OpenAIHandler._check_client_connection(client)
+    @staticmethod
+    def is_chat_model(model_name):
+        for prefix in CHAT_MODELS_PREFIXES:
+            if model_name.startswith(prefix):
+                return True
+        return False
     @staticmethod
     def _check_client_connection(client: OpenAI) -> None:
         """
@@ -350,11 +356,6 @@ class OpenAIHandler(BaseMLEngine):
                 "user": pred_args.get("user", None),
             }
-            if args.get("mode", self.default_mode) != "default" and model_name not in self.chat_completion_models:
-                raise Exception(
-                    f"Conversational modes are only available for the following models: {', '.join(self.chat_completion_models)}"
-                )  # noqa
             if args.get("prompt_template", False):
                 prompts, empty_prompt_ids = get_completed_prompts(base_template, df, strict=strict_prompt_template)
@@ -515,7 +516,7 @@ class OpenAIHandler(BaseMLEngine):
                 return _submit_image_completion(kwargs, prompts, api_args)
             elif model_name == "embedding":
                 return _submit_embedding_completion(kwargs, prompts, api_args)
-            elif model_name in self.chat_completion_models:
+            elif self.is_chat_model(model_name):
                 if model_name == "gpt-3.5-turbo-instruct":
                     return _submit_normal_completion(kwargs, prompts, api_args)
                 else:
@@ -579,13 +580,14 @@ class OpenAIHandler(BaseMLEngine):
                         tidy_comps.append(c.text.strip("\n").strip(""))
                 return tidy_comps
-            kwargs["prompt"] = prompts
             kwargs = {**kwargs, **api_args}
             before_openai_query(kwargs)
-            resp = _tidy(client.completions.create(**kwargs))
-            _log_api_call(kwargs, resp)
-            return resp
+            responses = []
+            for prompt in prompts:
+                responses.extend(_tidy(client.completions.create(prompt=prompt, **kwargs)))
+            _log_api_call(kwargs, responses)
+            return responses
         def _submit_embedding_completion(kwargs: Dict, prompts: List[Text], api_args: Dict) -> List[float]:
             """

mindsdb/integrations/handlers/salesforce_handler/constants.py CHANGED Viewed

@@ -22,6 +22,7 @@ def get_soql_instructions(integration_name):
 - NO subqueries in FROM clause - only relationship-based subqueries allowed
   SQL: SELECT * FROM (SELECT Name FROM Account) AS AccountNames;
   SOQL: Not supported
+- Do not use fields that are not defined in the schema or data catalog. Always reference exact field names.
 **FIELD SELECTION:**
 - Always include Id field when querying
@@ -43,7 +44,10 @@ def get_soql_instructions(integration_name):
 - Special date literals: TODAY, YESTERDAY, LAST_WEEK, LAST_MONTH, LAST_QUARTER, LAST_YEAR, THIS_WEEK, THIS_MONTH, THIS_QUARTER, THIS_YEAR
   CORRECT: WHERE CreatedDate = TODAY
   CORRECT: WHERE LastModifiedDate >= LAST_MONTH
-  CORRECT: WHERE CloseDate = THIS_QUARTER
+  CORRECT: WHERE CloseDate >= THIS_QUARTER
+- Date arithmetic (e.g., TODAY - 10) is not supported. Use literals like LAST_N_DAYS:10 instead.
+  CORRECT: WHERE CloseDate >= LAST_N_DAYS:10
+  INCORRECT: WHERE CloseDate >= TODAY - 10
 - LIKE operator: Only supports % wildcard, NO underscore (_) wildcard
   CORRECT: WHERE Name LIKE '%Corp%'
   CORRECT: WHERE Name LIKE 'Acme%'
@@ -69,6 +73,9 @@ def get_soql_instructions(integration_name):
   CORRECT: WHERE Services__c INCLUDES ('Consulting;Support')
   CORRECT: WHERE Services__c EXCLUDES ('Training')
   INCORRECT: WHERE Services__c = 'Consulting'
+- Limited subquery support - only IN/NOT IN with non-correlated subqueries in WHERE clause
+  CORRECT: SELECT Id FROM Contact WHERE Id NOT IN (SELECT WhoId FROM Task)
+  INCORRECT: SELECT Id FROM Contact WHERE NOT EXISTS (SELECT 1 FROM Task WHERE WhoId = Contact.Id)
 **JOINS:**
 - NO explicit JOIN syntax supported
@@ -195,6 +202,7 @@ def get_soql_instructions(integration_name):
 - Multi-select picklist: SELECT Id, Name FROM Account WHERE Services__c INCLUDES ('Consulting;Support')
 - Sorting and limiting: SELECT Id, Name FROM Account ORDER BY Name ASC LIMIT 50
 ***EXECUTION INSTRUCTIONS. IMPORTANT!***
 After generating the core SOQL (and nothing else), always make sure you wrap it exactly as:
@@ -204,5 +212,4 @@ After generating the core SOQL (and nothing else), always make sure you wrap it
       )
 Return only that wrapper call.
 """

mindsdb/integrations/libs/llm/config.py CHANGED Viewed

@@ -37,20 +37,6 @@ class AnthropicConfig(BaseLLMConfig):
     anthropic_api_url: Optional[str]
-# See https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.anyscale.ChatAnyscale.html
-# This config does not have to be exclusively used with Langchain.
-class AnyscaleConfig(BaseLLMConfig):
-    model_name: str
-    temperature: Optional[float]
-    max_retries: Optional[int]
-    max_tokens: Optional[int]
-    anyscale_api_base: Optional[str]
-    # Inferred from ANYSCALE_API_KEY if not provided.
-    anyscale_api_key: Optional[str]
-    anyscale_proxy: Optional[str]
-    request_timeout: Optional[float]
 # See https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.litellm.ChatLiteLLM.html
 # This config does not have to be exclusively used with Langchain.
 class LiteLLMConfig(BaseLLMConfig):

mindsdb/integrations/libs/llm/utils.py CHANGED Viewed

@@ -8,7 +8,6 @@ import pandas as pd
 from mindsdb.integrations.libs.llm.config import (
     AnthropicConfig,
-    AnyscaleConfig,
     BaseLLMConfig,
     GoogleConfig,
     LiteLLMConfig,
@@ -30,9 +29,6 @@ DEFAULT_OPENAI_MAX_RETRIES = 3
 DEFAULT_ANTHROPIC_MODEL = "claude-3-haiku-20240307"
-DEFAULT_ANYSCALE_MODEL = "meta-llama/Llama-2-7b-chat-hf"
-DEFAULT_ANYSCALE_BASE_URL = "https://api.endpoints.anyscale.com/v1"
 DEFAULT_GOOGLE_MODEL = "gemini-2.5-pro-preview-03-25"
 DEFAULT_LITELLM_MODEL = "gpt-3.5-turbo"
@@ -135,17 +131,6 @@ def get_llm_config(provider: str, args: Dict) -> BaseLLMConfig:
             anthropic_api_key=args["api_keys"].get("anthropic", None),
             anthropic_api_url=args.get("base_url", None),
         )
-    if provider == "anyscale":
-        return AnyscaleConfig(
-            model_name=args.get("model_name", DEFAULT_ANYSCALE_MODEL),
-            temperature=temperature,
-            max_retries=args.get("max_retries", DEFAULT_OPENAI_MAX_RETRIES),
-            max_tokens=args.get("max_tokens", DEFAULT_OPENAI_MAX_TOKENS),
-            anyscale_api_base=args.get("base_url", DEFAULT_ANYSCALE_BASE_URL),
-            anyscale_api_key=args["api_keys"].get("anyscale", None),
-            anyscale_proxy=args.get("proxy", None),
-            request_timeout=args.get("request_timeout", None),
-        )
     if provider == "litellm":
         model_kwargs = {
             "api_key": args["api_keys"].get("litellm", None),

mindsdb/integrations/utilities/files/file_reader.py CHANGED Viewed

@@ -10,6 +10,7 @@ from typing import List, Generator
 import filetype
 import pandas as pd
 from charset_normalizer import from_bytes
+from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter
 from mindsdb.utilities import log
@@ -322,40 +323,25 @@ class FileReader(FormatDetector):
     @staticmethod
     def read_txt(file_obj: BytesIO, name: str | None = None, **kwargs) -> pd.DataFrame:
         # the lib is heavy, so import it only when needed
-        from langchain_text_splitters import RecursiveCharacterTextSplitter
         file_obj = decode(file_obj)
-        try:
-            from langchain_core.documents import Document
-        except ImportError:
-            raise FileProcessingError(
-                "To import TXT document please install 'langchain-community':\n    pip install langchain-community"
-            )
         text = file_obj.read()
-        metadata = {"source_file": name, "file_format": "txt"}
-        documents = [Document(page_content=text, metadata=metadata)]
+        text_splitter = TextSplitter(chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP)
-        text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP
-        )
-        docs = text_splitter.split_documents(documents)
-        return pd.DataFrame([{"content": doc.page_content, "metadata": doc.metadata} for doc in docs])
+        docs = text_splitter.split_text(text)
+        return pd.DataFrame([{"content": doc, "metadata": {"source_file": name, "file_format": "txt"}} for doc in docs])
     @staticmethod
     def read_pdf(file_obj: BytesIO, name: str | None = None, **kwargs) -> pd.DataFrame:
         # the libs are heavy, so import it only when needed
         import fitz  # pymupdf
-        from langchain_text_splitters import RecursiveCharacterTextSplitter
         with fitz.open(stream=file_obj.read()) as pdf:  # open pdf
             text = chr(12).join([page.get_text() for page in pdf])
-        text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP
-        )
+        text_splitter = TextSplitter(chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP)
         split_text = text_splitter.split_text(text)

mindsdb/integrations/utilities/rag/rerankers/base_reranker.py CHANGED Viewed

@@ -33,7 +33,7 @@ class BaseLLMReranker(BaseModel, ABC):
     client: Optional[AsyncOpenAI | BaseMLEngine] = None
     _semaphore: Optional[asyncio.Semaphore] = None
     max_concurrent_requests: int = 20
-    max_retries: int = 2
+    max_retries: int = 4
     retry_delay: float = 1.0
     request_timeout: float = 20.0  # Timeout for API requests
     early_stop: bool = True  # Whether to enable early stopping

MindsDB 25.7.4.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.7.4.0py3-none-any.whl → 25.8.2.0py3-none-any.whl