PyPI - MindsDB - Versions diffs - 25.9.2.0a1__py3-none-any.whl → 25.10.0rc1__py3-none-any.whl - Mend

MindsDB 25.9.2.0a1py3-none-any.whl → 25.10.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (163) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +40 -29
mindsdb/api/a2a/__init__.py +1 -1
mindsdb/api/a2a/agent.py +16 -10
mindsdb/api/a2a/common/server/server.py +7 -3
mindsdb/api/a2a/common/server/task_manager.py +12 -5
mindsdb/api/a2a/common/types.py +66 -0
mindsdb/api/a2a/task_manager.py +65 -17
mindsdb/api/common/middleware.py +10 -12
mindsdb/api/executor/command_executor.py +51 -40
mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +101 -49
mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
mindsdb/api/executor/datahub/datanodes/system_tables.py +3 -2
mindsdb/api/executor/exceptions.py +29 -10
mindsdb/api/executor/planner/plan_join.py +17 -3
mindsdb/api/executor/planner/query_prepare.py +2 -20
mindsdb/api/executor/sql_query/sql_query.py +74 -74
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
mindsdb/api/executor/utilities/functions.py +6 -6
mindsdb/api/executor/utilities/sql.py +37 -20
mindsdb/api/http/gui.py +5 -11
mindsdb/api/http/initialize.py +75 -61
mindsdb/api/http/namespaces/agents.py +10 -15
mindsdb/api/http/namespaces/analysis.py +13 -20
mindsdb/api/http/namespaces/auth.py +1 -1
mindsdb/api/http/namespaces/chatbots.py +0 -5
mindsdb/api/http/namespaces/config.py +15 -11
mindsdb/api/http/namespaces/databases.py +140 -201
mindsdb/api/http/namespaces/file.py +17 -4
mindsdb/api/http/namespaces/handlers.py +17 -7
mindsdb/api/http/namespaces/knowledge_bases.py +28 -7
mindsdb/api/http/namespaces/models.py +94 -126
mindsdb/api/http/namespaces/projects.py +13 -22
mindsdb/api/http/namespaces/sql.py +33 -25
mindsdb/api/http/namespaces/tab.py +27 -37
mindsdb/api/http/namespaces/views.py +1 -1
mindsdb/api/http/start.py +16 -10
mindsdb/api/mcp/__init__.py +2 -1
mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
mindsdb/integrations/handlers/byom_handler/byom_handler.py +165 -190
mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +14 -2
mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
mindsdb/integrations/libs/api_handler.py +10 -10
mindsdb/integrations/libs/base.py +4 -4
mindsdb/integrations/libs/llm/utils.py +2 -2
mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
mindsdb/integrations/libs/process_cache.py +132 -140
mindsdb/integrations/libs/response.py +18 -12
mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
mindsdb/integrations/utilities/files/file_reader.py +6 -7
mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
mindsdb/integrations/utilities/rag/config_loader.py +37 -26
mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +83 -30
mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
mindsdb/integrations/utilities/rag/settings.py +58 -133
mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
mindsdb/interfaces/agents/agents_controller.py +2 -3
mindsdb/interfaces/agents/constants.py +0 -2
mindsdb/interfaces/agents/litellm_server.py +34 -58
mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
mindsdb/interfaces/chatbot/polling.py +30 -18
mindsdb/interfaces/data_catalog/data_catalog_loader.py +16 -17
mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
mindsdb/interfaces/database/data_handlers_cache.py +190 -0
mindsdb/interfaces/database/database.py +3 -3
mindsdb/interfaces/database/integrations.py +7 -110
mindsdb/interfaces/database/projects.py +2 -6
mindsdb/interfaces/database/views.py +1 -4
mindsdb/interfaces/file/file_controller.py +6 -6
mindsdb/interfaces/functions/controller.py +1 -1
mindsdb/interfaces/functions/to_markdown.py +2 -2
mindsdb/interfaces/jobs/jobs_controller.py +5 -9
mindsdb/interfaces/jobs/scheduler.py +3 -9
mindsdb/interfaces/knowledge_base/controller.py +244 -128
mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
mindsdb/interfaces/knowledge_base/executor.py +11 -0
mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
mindsdb/interfaces/model/model_controller.py +172 -168
mindsdb/interfaces/query_context/context_controller.py +14 -2
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +10 -14
mindsdb/interfaces/skills/retrieval_tool.py +43 -50
mindsdb/interfaces/skills/skill_tool.py +2 -2
mindsdb/interfaces/skills/skills_controller.py +1 -4
mindsdb/interfaces/skills/sql_agent.py +25 -19
mindsdb/interfaces/storage/db.py +16 -6
mindsdb/interfaces/storage/fs.py +114 -169
mindsdb/interfaces/storage/json.py +19 -18
mindsdb/interfaces/tabs/tabs_controller.py +49 -72
mindsdb/interfaces/tasks/task_monitor.py +3 -9
mindsdb/interfaces/tasks/task_thread.py +7 -9
mindsdb/interfaces/triggers/trigger_task.py +7 -13
mindsdb/interfaces/triggers/triggers_controller.py +47 -52
mindsdb/migrations/migrate.py +16 -16
mindsdb/utilities/api_status.py +58 -0
mindsdb/utilities/config.py +68 -2
mindsdb/utilities/exception.py +40 -1
mindsdb/utilities/fs.py +0 -1
mindsdb/utilities/hooks/profiling.py +17 -14
mindsdb/utilities/json_encoder.py +24 -10
mindsdb/utilities/langfuse.py +40 -45
mindsdb/utilities/log.py +272 -0
mindsdb/utilities/ml_task_queue/consumer.py +52 -58
mindsdb/utilities/ml_task_queue/producer.py +26 -30
mindsdb/utilities/render/sqlalchemy_render.py +22 -20
mindsdb/utilities/starters.py +0 -10
mindsdb/utilities/utils.py +2 -2
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/METADATA +293 -276
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/RECORD +144 -158
mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
mindsdb/api/postgres/__init__.py +0 -0
mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -189
mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -253
mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
mindsdb/api/postgres/start.py +0 -11
mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/WHEEL +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/top_level.txt +0 -0

mindsdb/interfaces/knowledge_base/evaluate.py CHANGED Viewed

@@ -2,6 +2,7 @@ import json
 import math
 import re
 import time
+import copy
 from typing import List
 import pandas as pd
@@ -10,6 +11,7 @@ import datetime as dt
 from mindsdb.api.executor.sql_query.result_set import ResultSet
 from mindsdb_sql_parser import Identifier, Select, Constant, Star, parse_sql, BinaryOperation
 from mindsdb.utilities import log
+from mindsdb.utilities.config import config
 from mindsdb.interfaces.knowledge_base.llm_client import LLMClient
@@ -105,7 +107,12 @@ class EvaluateBase:
         if llm_params is None:
             llm_params = self.kb._kb.params.get("reranking_model")
-        self.llm_client = LLMClient(llm_params)
+        params = copy.deepcopy(config.get("default_llm", {}))
+        if llm_params:
+            params.update(llm_params)
+        self.llm_client = LLMClient(params)
     def generate_test_data(self, gen_params: dict) -> pd.DataFrame:
         # Extract source data (from users query or from KB itself) and call `generate` to get test data
@@ -241,6 +248,26 @@ class EvaluateBase:
         return cls(session, kb_table).run_evaluate(params)
+    def generate_question_answer(self, text: str) -> (str, str):
+        messages = [
+            {"role": "system", "content": GENERATE_QA_SYSTEM_PROMPT},
+            {"role": "user", "content": f"\n\nText:\n{text}\n\n"},
+        ]
+        answer = self.llm_client.completion(messages, json_output=True)[0]
+        # Sanitize the response by removing markdown code block formatting like ```json
+        sanitized_answer = sanitize_json_response(answer)
+        try:
+            output = json.loads(sanitized_answer)
+        except json.JSONDecodeError:
+            raise ValueError(f"Could not parse response from LLM: {answer}")
+        if "query" not in output or "reference_answer" not in output:
+            raise ValueError("Cant find question/answer in LLM response")
+        return output.get("query"), output.get("reference_answer")
 class EvaluateRerank(EvaluateBase):
     """
@@ -268,28 +295,12 @@ class EvaluateRerank(EvaluateBase):
         df["id"] = df.index
         return df
-    def generate_question_answer(self, text: str) -> (str, str):
-        messages = [
-            {"role": "system", "content": GENERATE_QA_SYSTEM_PROMPT},
-            {"role": "user", "content": f"\n\nText:\n{text}\n\n"},
-        ]
-        answer = self.llm_client.completion(messages, json_output=True)
-        # Sanitize the response by removing markdown code block formatting like ```json
-        sanitized_answer = sanitize_json_response(answer)
-        try:
-            output = json.loads(sanitized_answer)
-        except json.JSONDecodeError:
-            raise ValueError(f"Could not parse response from LLM: {answer}")
-        if "query" not in output or "reference_answer" not in output:
-            raise ValueError("Cant find question/answer in LLM response")
-        return output.get("query"), output.get("reference_answer")
     def evaluate(self, test_data: pd.DataFrame) -> pd.DataFrame:
         json_to_log_list = []
+        if {"question", "answer"} - set(test_data.columns):
+            raise KeyError(
+                f'Test data must contain "question" and "answer" columns. Columns in the provided test data: {list(test_data.columns)}'
+            )
         questions = test_data.to_dict("records")
         for i, item in enumerate(questions):
@@ -483,28 +494,12 @@ class EvaluateDocID(EvaluateBase):
         df = pd.DataFrame(qa_data)
         return df
-    def generate_question_answer(self, text: str) -> (str, str):
-        messages = [
-            {"role": "system", "content": GENERATE_QA_SYSTEM_PROMPT},
-            {"role": "user", "content": f"\n\nText:\n{text}\n\n"},
-        ]
-        answer = self.llm_client.completion(messages, json_output=True)
-        # Sanitize the response by removing markdown code block formatting like ```json
-        sanitized_answer = sanitize_json_response(answer)
-        try:
-            output = json.loads(sanitized_answer)
-        except json.JSONDecodeError:
-            raise ValueError(f"Could not parse response from LLM: {answer}")
-        if "query" not in output or "reference_answer" not in output:
-            raise ValueError("Cant find question/answer in LLM response")
-        return output.get("query"), output.get("reference_answer")
     def evaluate(self, test_data: pd.DataFrame) -> pd.DataFrame:
         stats = []
+        if {"question", "doc_id"} - set(test_data.columns):
+            raise KeyError(
+                f'Test data must contain "question" and "doc_id" columns. Columns in the provided test data: {list(test_data.columns)}'
+            )
         questions = test_data.to_dict("records")
         for i, item in enumerate(questions):

mindsdb/interfaces/knowledge_base/executor.py CHANGED Viewed

@@ -43,7 +43,18 @@ class KnowledgeBaseQueryExecutor:
         if isinstance(node, BinaryOperation):
             if isinstance(node.args[0], Identifier):
                 parts = node.args[0].parts
+                # map chunk_content to content
+                if parts[0].lower() == "chunk_content":
+                    parts[0] = self.content_column
                 if len(parts) == 1 and parts[0].lower() == self.content_column:
+                    if "LIKE" in node.op.upper():
+                        # remove '%'
+                        arg = node.args[1]
+                        if isinstance(arg, Constant) and isinstance(arg.value, str):
+                            arg.value = arg.value.strip(" %")
                     return True
         return False

mindsdb/interfaces/knowledge_base/llm_client.py CHANGED Viewed

@@ -1,11 +1,23 @@
-import copy
 import os
 from typing import List
 from openai import OpenAI, AzureOpenAI
 from mindsdb.integrations.utilities.handler_utils import get_api_key
-from mindsdb.utilities.config import config
+try:
+    from mindsdb.integrations.handlers.openai_handler.helpers import retry_with_exponential_backoff
+except ImportError:
+    def retry_with_exponential_backoff(func):
+        """
+        An empty decorator
+        """
+        def wrapper(*args, **kwargs):
+            return func(*args, **kwargs)
+        return wrapper
 class LLMClient:
@@ -14,12 +26,8 @@ class LLMClient:
     It chooses openai client or litellm handler depending on the config
     """
-    def __init__(self, llm_params: dict = None):
-        params = copy.deepcopy(config.get("default_llm", {}))
-        if llm_params:
-            params.update(llm_params)
+    def __init__(self, params: dict = None, session=None):
+        self._session = session
         self.params = params
         self.provider = params.get("provider", "openai")
@@ -27,11 +35,13 @@ class LLMClient:
         if "api_key" not in params:
             params["api_key"] = get_api_key(self.provider, params, strict=False)
+        self.engine = "openai"
         if self.provider == "azure_openai":
             azure_api_key = params.get("api_key") or os.getenv("AZURE_OPENAI_API_KEY")
             azure_api_endpoint = params.get("base_url") or os.environ.get("AZURE_OPENAI_ENDPOINT")
             azure_api_version = params.get("api_version") or os.environ.get("AZURE_OPENAI_API_VERSION")
-            self._llm_client = AzureOpenAI(
+            self.client = AzureOpenAI(
                 api_key=azure_api_key, azure_endpoint=azure_api_endpoint, api_version=azure_api_version, max_retries=2
             )
         elif self.provider == "openai":
@@ -41,34 +51,58 @@ class LLMClient:
             if base_url:
                 kwargs["base_url"] = base_url
             self.client = OpenAI(**kwargs)
+        elif self.provider == "ollama":
+            kwargs = params.copy()
+            kwargs.pop("model_name")
+            kwargs.pop("provider", None)
+            if kwargs["api_key"] is None:
+                kwargs["api_key"] = "n/a"
+            self.client = OpenAI(**kwargs)
         else:
             # try to use litellm
-            from mindsdb.api.executor.controllers.session_controller import SessionController
+            if self._session is None:
+                from mindsdb.api.executor.controllers.session_controller import SessionController
-            session = SessionController()
-            module = session.integration_controller.get_handler_module("litellm")
+                self._session = SessionController()
+            module = self._session.integration_controller.get_handler_module("litellm")
             if module is None or module.Handler is None:
                 raise ValueError(f'Unable to use "{self.provider}" provider. Litellm handler is not installed')
             self.client = module.Handler
+            self.engine = "litellm"
+    @retry_with_exponential_backoff()
+    def embeddings(self, messages: List[str]):
+        params = self.params
+        if self.engine == "openai":
+            response = self.client.embeddings.create(
+                model=params["model_name"],
+                input=messages,
+            )
+            return [item.embedding for item in response.data]
+        else:
+            kwargs = params.copy()
+            model = kwargs.pop("model_name")
+            kwargs.pop("provider", None)
+            return self.client.embeddings(self.provider, model=model, messages=messages, args=kwargs)
-    def completion(self, messages: List[dict], json_output: bool = False) -> str:
+    def completion(self, messages: List[dict], json_output: bool = False) -> List[str]:
         """
         Call LLM completion and get response
         """
         params = self.params
         params["json_output"] = json_output
-        if self.provider in ("azure_openai", "openai"):
+        if self.engine == "openai":
             response = self.client.chat.completions.create(
                 model=params["model_name"],
                 messages=messages,
             )
-            return response.choices[0].message.content
+            return [item.message.content for item in response.choices]
         else:
             kwargs = params.copy()
             model = kwargs.pop("model_name")
             kwargs.pop("provider", None)
             response = self.client.completion(self.provider, model=model, messages=messages, args=kwargs)
-            return response.choices[0].message.content
+            return [item.message.content for item in response.choices]

mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py CHANGED Viewed

@@ -1,13 +1,10 @@
-from typing import List, Dict, Any, Optional
+import ast
 import json
+from typing import List, Dict, Any, Optional
 import pandas as pd
-import ast
-from mindsdb.interfaces.knowledge_base.preprocessing.models import (
-    Document,
-    ProcessedChunk,
-    JSONChunkingConfig
-)
+from mindsdb.interfaces.knowledge_base.preprocessing.models import Document, ProcessedChunk, JSONChunkingConfig
 from mindsdb.interfaces.knowledge_base.preprocessing.document_preprocessor import DocumentPreprocessor
 from mindsdb.utilities import log
@@ -50,7 +47,7 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
                 chunks = self._process_json_data(json_data, doc)
                 all_chunks.extend(chunks)
             except Exception as e:
-                logger.error(f"Error processing document {doc.id}: {e}")
+                logger.exception(f"Error processing document {doc.id}:")
                 error_chunk = self._create_error_chunk(doc, str(e))
                 all_chunks.append(error_chunk)
@@ -76,8 +73,8 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
             # If JSON parsing fails, try as Python literal
             try:
                 return ast.literal_eval(doc.content)
-            except (SyntaxError, ValueError) as e:
-                logger.error(f"Error parsing content for document {doc.id}: {e}")
+            except (SyntaxError, ValueError):
+                logger.exception(f"Error parsing content for document {doc.id}:")
                 # We'll create the error chunk in the main process_documents method
                 return None
@@ -117,7 +114,7 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
         return ProcessedChunk(
             id=f"{doc.id}_error",
             content=f"Error processing document: {error_message}",
-            metadata=self._prepare_chunk_metadata(doc.id, 0, doc.metadata)
+            metadata=self._prepare_chunk_metadata(doc.id, 0, doc.metadata),
         )
     def _process_json_list(self, json_list: List, doc: Document) -> List[ProcessedChunk]:
@@ -132,20 +129,12 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
             elif isinstance(item, list):
                 # Handle nested lists by converting to string representation
                 chunk = self._create_chunk_from_primitive(
-                    json.dumps(item),
-                    doc,
-                    chunk_index=i,
-                    total_chunks=total_objects
+                    json.dumps(item), doc, chunk_index=i, total_chunks=total_objects
                 )
                 chunks.append(chunk)
             else:
                 # Handle primitive values
-                chunk = self._create_chunk_from_primitive(
-                    item,
-                    doc,
-                    chunk_index=i,
-                    total_chunks=total_objects
-                )
+                chunk = self._create_chunk_from_primitive(item, doc, chunk_index=i, total_chunks=total_objects)
                 chunks.append(chunk)
         return chunks
@@ -159,7 +148,7 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
             try:
                 json_dict = json.loads(json_dict)
             except json.JSONDecodeError:
-                logger.error(f"Error parsing JSON string: {json_dict[:100]}...")
+                logger.exception(f"Error parsing JSON string: {json_dict[:100]}...")
                 return [self._create_error_chunk(doc, "Invalid JSON string")]
         # Filter fields based on include/exclude lists
@@ -190,31 +179,25 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
                 start_char=0,
                 end_char=len(field_content),
                 provided_id=doc.id,
-                content_column=self.config.content_column
+                content_column=self.config.content_column,
             )
             # Create and add the chunk
-            chunk = ProcessedChunk(
-                id=chunk_id,
-                content=field_content,
-                metadata=metadata
-            )
+            chunk = ProcessedChunk(id=chunk_id, content=field_content, metadata=metadata)
             chunks.append(chunk)
         return chunks
-    def _create_chunk_from_dict(self,
-                                json_dict: Dict,
-                                doc: Document,
-                                chunk_index: int,
-                                total_chunks: int) -> ProcessedChunk:
+    def _create_chunk_from_dict(
+        self, json_dict: Dict, doc: Document, chunk_index: int, total_chunks: int
+    ) -> ProcessedChunk:
         """Create a chunk from a JSON dictionary"""
         # Ensure we're working with a dictionary
         if isinstance(json_dict, str):
             try:
                 json_dict = json.loads(json_dict)
             except json.JSONDecodeError:
-                logger.error(f"Error parsing JSON string: {json_dict[:100]}...")
+                logger.exception(f"Error parsing JSON string: {json_dict[:100]}...")
                 return self._create_error_chunk(doc, "Invalid JSON string")
         # Format the content
@@ -223,9 +206,12 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
             filtered_dict = self._filter_fields(flattened)
             content = self._dict_to_text(filtered_dict)
         else:
-            filtered_dict = {k: v for k, v in json_dict.items()
-                             if (not self.config.include_fields or k in self.config.include_fields)
-                             and k not in self.config.exclude_fields}
+            filtered_dict = {
+                k: v
+                for k, v in json_dict.items()
+                if (not self.config.include_fields or k in self.config.include_fields)
+                and k not in self.config.exclude_fields
+            }
             content = json.dumps(filtered_dict, indent=2)
         # Create metadata
@@ -241,22 +227,23 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
             start_char=0,
             end_char=len(content),
             provided_id=doc.id,
-            content_column=self.config.content_column
+            content_column=self.config.content_column,
         )
-        return ProcessedChunk(
-            id=chunk_id,
-            content=content,
-            metadata=metadata
-        )
+        return ProcessedChunk(id=chunk_id, content=content, metadata=metadata)
     def _filter_fields(self, flattened_dict: Dict) -> Dict:
         """Filter fields based on include/exclude configuration"""
         # If include_fields is specified, only keep those fields
         if self.config.include_fields:
-            filtered_dict = {k: v for k, v in flattened_dict.items()
-                             if any(k == field or k.startswith(field + self.config.nested_delimiter)
-                                    for field in self.config.include_fields)}
+            filtered_dict = {
+                k: v
+                for k, v in flattened_dict.items()
+                if any(
+                    k == field or k.startswith(field + self.config.nested_delimiter)
+                    for field in self.config.include_fields
+                )
+            }
         else:
             filtered_dict = flattened_dict.copy()
@@ -276,11 +263,7 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
         return filtered_dict
     def _create_chunk_from_primitive(
-            self,
-            value: Any,
-            doc: Document,
-            chunk_index: int = 0,
-            total_chunks: int = 1
+        self, value: Any, doc: Document, chunk_index: int = 0, total_chunks: int = 1
     ) -> ProcessedChunk:
         """Create a chunk from a primitive value"""
         content = str(value)
@@ -300,16 +283,12 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
             start_char=0,
             end_char=len(content),
             provided_id=doc.id,
-            content_column=self.config.content_column
+            content_column=self.config.content_column,
         )
-        return ProcessedChunk(
-            id=chunk_id,
-            content=content,
-            metadata=metadata
-        )
+        return ProcessedChunk(id=chunk_id, content=content, metadata=metadata)
-    def _flatten_dict(self, d: Dict, delimiter: str = '.', prefix: str = '') -> Dict:
+    def _flatten_dict(self, d: Dict, delimiter: str = ".", prefix: str = "") -> Dict:
         """Flatten a nested dictionary structure"""
         result = {}
         for k, v in d.items():
@@ -337,7 +316,7 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
                     # Format list of dictionaries
                     lines.append(f"{key}:")
                     for i, item in enumerate(value):
-                        lines.append(f"  Item {i+1}:")
+                        lines.append(f"  Item {i + 1}:")
                         for k, v in item.items():
                             lines.append(f"    {k}: {v}")
                 else:
@@ -362,7 +341,7 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
                 # Format list of dictionaries
                 lines = [f"{key}:"]
                 for i, item in enumerate(value):
-                    lines.append(f"  Item {i+1}:")
+                    lines.append(f"  Item {i + 1}:")
                     for k, v in item.items():
                         lines.append(f"    {k}: {v}")
                 return "\n".join(lines)
@@ -380,7 +359,7 @@ class JSONChunkingPreprocessor(DocumentPreprocessor):
             try:
                 json_dict = json.loads(json_dict)
             except json.JSONDecodeError:
-                logger.error(f"Error parsing JSON string: {json_dict[:100]}...")
+                logger.exception(f"Error parsing JSON string: {json_dict[:100]}...")
                 return
         # Always flatten the dictionary for metadata extraction

MindsDB 25.9.2.0a1__py3-none-any.whl → 25.10.0rc1__py3-none-any.whl

Potentially problematic release.

MindsDB 25.9.2.0a1py3-none-any.whl → 25.10.0rc1py3-none-any.whl