MindsDB 25.6.3.0__py3-none-any.whl → 25.6.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = "MindsDB"
2
2
  __package_name__ = "mindsdb"
3
- __version__ = "25.6.3.0"
3
+ __version__ = "25.6.3.1"
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = "MindsDB Inc"
@@ -154,7 +154,7 @@ class ProjectDataNode(DataNode):
154
154
 
155
155
  return DataHubResponse(data_frame=df, columns=columns_info)
156
156
 
157
- raise EntityNotExistsError(f"Can't select from {query_table} in project")
157
+ raise EntityNotExistsError(f"Can't select from <{query_table}> in project")
158
158
  else:
159
159
  raise NotImplementedError(f"Query not supported {query}")
160
160
 
@@ -64,26 +64,25 @@ def query_df_with_type_infer_fallback(query_str: str, dataframes: dict, user_fun
64
64
  pandas.columns
65
65
  """
66
66
 
67
- for name, value in dataframes.items():
68
- locals()[name] = value
69
-
70
- con = duckdb.connect(database=":memory:")
71
- if user_functions:
72
- user_functions.register(con)
73
-
74
- exception = None
75
- for sample_size in [1000, 10000, 1000000]:
76
- try:
77
- con.execute(f"set global pandas_analyze_sample={sample_size};")
78
- result_df = con.execute(query_str).fetchdf()
79
- except InvalidInputException as e:
80
- exception = e
67
+ with duckdb.connect(database=":memory:") as con:
68
+ if user_functions:
69
+ user_functions.register(con)
70
+
71
+ for name, value in dataframes.items():
72
+ con.register(name, value)
73
+
74
+ exception = None
75
+ for sample_size in [1000, 10000, 1000000]:
76
+ try:
77
+ con.execute(f"set global pandas_analyze_sample={sample_size};")
78
+ result_df = con.execute(query_str).fetchdf()
79
+ except InvalidInputException as e:
80
+ exception = e
81
+ else:
82
+ break
81
83
  else:
82
- break
83
- else:
84
- raise exception
85
- description = con.description
86
- con.close()
84
+ raise exception
85
+ description = con.description
87
86
 
88
87
  return result_df, description
89
88
 
@@ -1,3 +1,3 @@
1
1
  pyphoenix
2
2
  phoenixdb
3
- protobuf==3.20.3
3
+ protobuf==4.25.8
@@ -111,24 +111,12 @@ class MindsDBSQL(SQLDatabase):
111
111
  )
112
112
 
113
113
  # Convert ExecuteAnswer to a DataFrame for easier manipulation
114
- df = None
115
- if hasattr(result, "data") and hasattr(result.data, "data_frame"):
116
- df = result.data.data_frame
114
+ if result.data is not None:
115
+ df = result.data.to_df()
116
+ return df.to_string(index=False)
117
+
117
118
  else:
118
- # Fallback to to_df when data_frame attr not available
119
- try:
120
- df = result.data.to_df()
121
- except Exception:
122
- df = None
123
-
124
- # Default behaviour (string)
125
- if df is not None:
126
- if not df.empty:
127
- return df.to_string(index=False)
128
- else:
129
- return "Query executed successfully, but returned no data."
130
-
131
- return str(result)
119
+ return "Query executed successfully, but returned no data."
132
120
 
133
121
  except Exception as e:
134
122
  logger.error(f"Error executing SQL command: {str(e)}\n{traceback.format_exc()}")
@@ -9,6 +9,7 @@ import numpy as np
9
9
 
10
10
  from mindsdb_sql_parser.ast import BinaryOperation, Constant, Identifier, Select, Update, Delete, Star
11
11
  from mindsdb_sql_parser.ast.mindsdb import CreatePredictor
12
+ from mindsdb_sql_parser import parse_sql
12
13
 
13
14
  from mindsdb.integrations.utilities.query_traversal import query_traversal
14
15
 
@@ -52,6 +53,13 @@ def get_model_params(model_params: dict, default_config_key: str):
52
53
  """
53
54
  Get model parameters by combining default config with user provided parameters.
54
55
  """
56
+ # If the default config key is for reranking and the switch to use the default LLM is enabled,
57
+ # switch to the default LLM model.
58
+ if default_config_key == "default_reranking_model" and config.get("default_reranking_model").get(
59
+ "use_default_llm", False
60
+ ):
61
+ default_config_key = "default_llm_model"
62
+
55
63
  combined_model_params = copy.deepcopy(config.get(default_config_key, {}))
56
64
 
57
65
  if model_params:
@@ -97,6 +105,8 @@ def get_reranking_model_from_params(reranking_model_params: dict):
97
105
  params_copy["api_key"] = get_api_key(provider, params_copy, strict=False)
98
106
  params_copy["model"] = params_copy.pop("model_name", None)
99
107
 
108
+ params_copy.pop("use_default_llm", None)
109
+
100
110
  return BaseLLMReranker(**params_copy)
101
111
 
102
112
 
@@ -359,23 +369,30 @@ class KnowledgeBaseTable:
359
369
 
360
370
  def insert_query_result(self, query: str, project_name: str):
361
371
  """Process and insert SQL query results"""
362
- if not self.document_loader:
363
- raise ValueError("Document loader not configured")
372
+ ast_query = parse_sql(query)
364
373
 
365
- documents = list(self.document_loader.load_query_result(query, project_name))
366
- if documents:
367
- self.insert_documents(documents)
374
+ command_executor = ExecuteCommands(self.session)
375
+ response = command_executor.execute_command(ast_query, project_name)
376
+
377
+ if response.error_code is not None:
378
+ raise ValueError(f"Error executing query: {response.error_message}")
379
+
380
+ if response.data is None:
381
+ raise ValueError("Query returned no data")
382
+
383
+ records = response.data.records
384
+ df = pd.DataFrame(records)
385
+
386
+ self.insert(df)
368
387
 
369
388
  def insert_rows(self, rows: List[Dict]):
370
389
  """Process and insert raw data rows"""
371
390
  if not rows:
372
391
  return
373
392
 
374
- documents = [
375
- Document(content=row.get("content", ""), id=row.get("id"), metadata=row.get("metadata", {})) for row in rows
376
- ]
393
+ df = pd.DataFrame(rows)
377
394
 
378
- self.insert_documents(documents)
395
+ self.insert(df)
379
396
 
380
397
  def insert_documents(self, documents: List[Document]):
381
398
  """Process and insert documents with preprocessing if configured"""
@@ -2,7 +2,6 @@ import os
2
2
  from typing import List, Iterator
3
3
  from langchain_core.documents import Document as LangchainDocument
4
4
  from langchain_text_splitters import MarkdownHeaderTextSplitter
5
- import pandas as pd
6
5
 
7
6
  from mindsdb.interfaces.file.file_controller import FileController
8
7
  from mindsdb.integrations.utilities.rag.loaders.file_loader import FileLoader
@@ -20,12 +19,12 @@ class DocumentLoader:
20
19
  """Handles loading documents from various sources including SQL queries"""
21
20
 
22
21
  def __init__(
23
- self,
24
- file_controller: FileController,
25
- file_splitter: FileSplitter,
26
- markdown_splitter: MarkdownHeaderTextSplitter,
27
- file_loader_class=FileLoader,
28
- mysql_proxy=None
22
+ self,
23
+ file_controller: FileController,
24
+ file_splitter: FileSplitter,
25
+ markdown_splitter: MarkdownHeaderTextSplitter,
26
+ file_loader_class=FileLoader,
27
+ mysql_proxy=None,
29
28
  ):
30
29
  """
31
30
  Initialize with required dependencies
@@ -52,8 +51,8 @@ class DocumentLoader:
52
51
  for doc in loader.lazy_load():
53
52
  # Add file extension to metadata for proper splitting
54
53
  extension = os.path.splitext(file_path)[1].lower()
55
- doc.metadata['extension'] = extension
56
- doc.metadata['source'] = file_name
54
+ doc.metadata["extension"] = extension
55
+ doc.metadata["source"] = file_name
57
56
 
58
57
  # Use FileSplitter to handle the document based on its type
59
58
  split_docs = self.file_splitter.split_documents([doc])
@@ -62,34 +61,22 @@ class DocumentLoader:
62
61
  metadata = doc.metadata.copy()
63
62
  metadata.update(split_doc.metadata or {})
64
63
 
65
- yield Document(
66
- content=split_doc.page_content,
67
- metadata=metadata
68
- )
64
+ yield Document(content=split_doc.page_content, metadata=metadata)
69
65
 
70
66
  def load_web_pages(
71
- self,
72
- urls: List[str],
73
- crawl_depth: int,
74
- limit: int,
75
- filters: List[str] = None,
67
+ self,
68
+ urls: List[str],
69
+ crawl_depth: int,
70
+ limit: int,
71
+ filters: List[str] = None,
76
72
  ) -> Iterator[Document]:
77
73
  """Load and split documents from web pages"""
78
- websites_df = get_all_websites(
79
- urls,
80
- crawl_depth=crawl_depth,
81
- limit=limit,
82
- filters=filters
83
- )
74
+ websites_df = get_all_websites(urls, crawl_depth=crawl_depth, limit=limit, filters=filters)
84
75
 
85
76
  for _, row in websites_df.iterrows():
86
77
  # Create a document with HTML extension for proper splitting
87
78
  doc = LangchainDocument(
88
- page_content=row['text_content'],
89
- metadata={
90
- 'extension': '.html',
91
- 'url': row['url']
92
- }
79
+ page_content=row["text_content"], metadata={"extension": ".html", "url": row["url"]}
93
80
  )
94
81
 
95
82
  # Use FileSplitter to handle HTML content
@@ -98,60 +85,4 @@ class DocumentLoader:
98
85
  metadata = doc.metadata.copy()
99
86
  metadata.update(split_doc.metadata or {})
100
87
 
101
- yield Document(
102
- content=split_doc.page_content,
103
- metadata=metadata
104
- )
105
-
106
- def load_query_result(self, query: str, project_name: str) -> Iterator[Document]:
107
- """
108
- Load documents from SQL query results
109
-
110
- Args:
111
- query: SQL query to execute
112
- project_name: Name of the project context
113
-
114
- Returns:
115
- Iterator of Document objects
116
-
117
- Raises:
118
- ValueError: If mysql_proxy is not configured or query returns no data
119
- """
120
- if not self.mysql_proxy:
121
- raise ValueError("MySQL proxy not configured")
122
-
123
- if not query:
124
- return
125
-
126
- # Set project context and execute query
127
- self.mysql_proxy.set_context({'db': project_name})
128
- query_result = self.mysql_proxy.process_query(query)
129
-
130
- if query_result.type != 'table':
131
- raise ValueError('Query returned no data')
132
-
133
- # Convert query result to DataFrame
134
- df = query_result.data.to_df()
135
-
136
- # Process each row into a Document
137
- for _, row in df.iterrows():
138
- # Extract id, content and metadata
139
- content = str(row.get('content', ''))
140
- id = row.get('id', None)
141
-
142
- # Convert remaining columns to metadata
143
- metadata = {
144
- col: str(row[col])
145
- for col in df.columns
146
- if col != 'content' and not pd.isna(row[col])
147
- }
148
- metadata['source'] = 'query'
149
-
150
- # Split content using recursive splitter
151
- if content:
152
-
153
- yield Document(
154
- id=id,
155
- content=content,
156
- metadata=metadata
157
- )
88
+ yield Document(content=split_doc.page_content, metadata=metadata)
@@ -3,6 +3,7 @@ import re
3
3
  import json
4
4
  from pydantic import BaseModel, Field
5
5
  from langchain_core.tools import BaseTool
6
+ from mindsdb_sql_parser.ast import Describe, Select, Identifier, Constant, Star
6
7
 
7
8
 
8
9
  class KnowledgeBaseListToolInput(BaseModel):
@@ -55,6 +56,26 @@ class KnowledgeBaseInfoTool(BaseTool):
55
56
  except (json.JSONDecodeError, TypeError):
56
57
  pass
57
58
 
59
+ def strip(s):
60
+ length = -1
61
+ while length != len(s):
62
+ length = len(s)
63
+
64
+ # remove ```
65
+ if s.startswith("```"):
66
+ s = s[3:]
67
+ if s.endswith("```"):
68
+ s = s[:-3]
69
+
70
+ # remove trailing new lines
71
+ s = s.strip("\n")
72
+
73
+ # remove extra quotes
74
+ for q in ('"', "'", "`"):
75
+ if s.count(q) == 1:
76
+ s = s.strip(q)
77
+ return s
78
+
58
79
  # Finally, try the original regex pattern for $START$ and $STOP$ markers
59
80
  match = re.search(r"\$START\$(.*?)\$STOP\$", tool_input, re.DOTALL)
60
81
  if not match:
@@ -63,12 +84,14 @@ class KnowledgeBaseInfoTool(BaseTool):
63
84
  return [kb.strip() for kb in tool_input.split(",")]
64
85
  # If it's just a single string without formatting, return it as a single item
65
86
  if tool_input.strip():
66
- return [tool_input.strip()]
87
+ return [strip(tool_input)]
67
88
  return []
68
89
 
69
90
  # Extract and clean the knowledge base names
70
91
  kb_names_str = match.group(1).strip()
71
92
  kb_names = re.findall(r"`([^`]+)`", kb_names_str)
93
+
94
+ kb_names = [strip(n) for n in kb_names]
72
95
  return kb_names
73
96
 
74
97
  def _run(self, tool_input: str) -> str:
@@ -83,7 +106,7 @@ class KnowledgeBaseInfoTool(BaseTool):
83
106
  for kb_name in kb_names:
84
107
  try:
85
108
  # Get knowledge base schema
86
- schema_result = self.db.run_no_throw(f"DESCRIBE KNOWLEDGE_BASE `{kb_name}`;")
109
+ schema_result = self.db.run_no_throw(str(Describe(kb_name, type="knowledge_base")))
87
110
 
88
111
  if not schema_result:
89
112
  results.append(f"Knowledge base `{kb_name}` not found or has no schema information.")
@@ -111,7 +134,9 @@ class KnowledgeBaseInfoTool(BaseTool):
111
134
  kb_info += "```\n\n"
112
135
 
113
136
  # Get sample data
114
- sample_data = self.db.run_no_throw(f"SELECT * FROM `{kb_name}` LIMIT 10;")
137
+ sample_data = self.db.run_no_throw(
138
+ str(Select(targets=[Star()], from_table=Identifier(kb_name), limit=Constant(20)))
139
+ )
115
140
 
116
141
  # Sample data
117
142
  kb_info += "### Sample Data:\n"
@@ -76,7 +76,7 @@ def split_table_name(table_name: str) -> List[str]:
76
76
  result.append(current.strip("`"))
77
77
 
78
78
  # ensure we split the table name
79
- result = [r.split(".") for r in result][0]
79
+ # result = [r.split(".") for r in result][0]
80
80
 
81
81
  return result
82
82