MindsDB 25.7.4.0__py3-none-any.whl → 25.8.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (65) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +13 -1
  3. mindsdb/api/a2a/agent.py +6 -16
  4. mindsdb/api/a2a/common/types.py +3 -4
  5. mindsdb/api/a2a/task_manager.py +24 -35
  6. mindsdb/api/a2a/utils.py +63 -0
  7. mindsdb/api/executor/command_executor.py +9 -15
  8. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +21 -24
  9. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +9 -3
  10. mindsdb/api/executor/sql_query/steps/subselect_step.py +11 -8
  11. mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +264 -0
  12. mindsdb/api/executor/utilities/sql.py +30 -0
  13. mindsdb/api/http/initialize.py +2 -1
  14. mindsdb/api/http/namespaces/agents.py +6 -7
  15. mindsdb/api/http/namespaces/views.py +56 -72
  16. mindsdb/integrations/handlers/db2_handler/db2_handler.py +19 -23
  17. mindsdb/integrations/handlers/gong_handler/__about__.py +2 -0
  18. mindsdb/integrations/handlers/gong_handler/__init__.py +30 -0
  19. mindsdb/integrations/handlers/gong_handler/connection_args.py +37 -0
  20. mindsdb/integrations/handlers/gong_handler/gong_handler.py +164 -0
  21. mindsdb/integrations/handlers/gong_handler/gong_tables.py +508 -0
  22. mindsdb/integrations/handlers/gong_handler/icon.svg +25 -0
  23. mindsdb/integrations/handlers/gong_handler/test_gong_handler.py +125 -0
  24. mindsdb/integrations/handlers/huggingface_handler/__init__.py +8 -12
  25. mindsdb/integrations/handlers/huggingface_handler/finetune.py +203 -223
  26. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +360 -383
  27. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -7
  28. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -7
  29. mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
  30. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +1 -2
  31. mindsdb/integrations/handlers/openai_handler/constants.py +11 -30
  32. mindsdb/integrations/handlers/openai_handler/helpers.py +27 -34
  33. mindsdb/integrations/handlers/openai_handler/openai_handler.py +14 -12
  34. mindsdb/integrations/handlers/salesforce_handler/constants.py +9 -2
  35. mindsdb/integrations/libs/llm/config.py +0 -14
  36. mindsdb/integrations/libs/llm/utils.py +0 -15
  37. mindsdb/integrations/utilities/files/file_reader.py +5 -19
  38. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +1 -1
  39. mindsdb/interfaces/agents/agents_controller.py +83 -45
  40. mindsdb/interfaces/agents/constants.py +16 -3
  41. mindsdb/interfaces/agents/langchain_agent.py +84 -21
  42. mindsdb/interfaces/database/projects.py +111 -7
  43. mindsdb/interfaces/knowledge_base/controller.py +7 -1
  44. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +6 -10
  45. mindsdb/interfaces/knowledge_base/preprocessing/text_splitter.py +73 -0
  46. mindsdb/interfaces/query_context/context_controller.py +14 -15
  47. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +7 -1
  48. mindsdb/interfaces/skills/skill_tool.py +7 -1
  49. mindsdb/interfaces/skills/sql_agent.py +6 -2
  50. mindsdb/utilities/config.py +2 -0
  51. mindsdb/utilities/fs.py +60 -17
  52. {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/METADATA +277 -262
  53. {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/RECORD +57 -56
  54. mindsdb/integrations/handlers/anyscale_endpoints_handler/__about__.py +0 -9
  55. mindsdb/integrations/handlers/anyscale_endpoints_handler/__init__.py +0 -20
  56. mindsdb/integrations/handlers/anyscale_endpoints_handler/anyscale_endpoints_handler.py +0 -290
  57. mindsdb/integrations/handlers/anyscale_endpoints_handler/creation_args.py +0 -14
  58. mindsdb/integrations/handlers/anyscale_endpoints_handler/icon.svg +0 -4
  59. mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -2
  60. mindsdb/integrations/handlers/anyscale_endpoints_handler/settings.py +0 -51
  61. mindsdb/integrations/handlers/anyscale_endpoints_handler/tests/test_anyscale_endpoints_handler.py +0 -212
  62. /mindsdb/integrations/handlers/{anyscale_endpoints_handler/tests/__init__.py → gong_handler/requirements.txt} +0 -0
  63. {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/WHEEL +0 -0
  64. {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/licenses/LICENSE +0 -0
  65. {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,73 @@
1
+ from typing import List
2
+
3
+
4
+ class TextSplitter:
5
+ def __init__(
6
+ self,
7
+ chunk_size: int = 1000,
8
+ chunk_overlap: int = 200,
9
+ separators: List[str] = None,
10
+ k_range: float = 0.5,
11
+ k_ratio: float = 1,
12
+ ):
13
+ """
14
+ Split text into chunks. The logic:
15
+ - Get a piece of text with chunk_size and try to find the separator at the end of the piece.
16
+ - The allowed range to find the separator is defined by k_range and k_ratio using formula:
17
+ k_range * chunk_size / (num * k_ratio + 1)
18
+ num - is number of a separator from the list
19
+ - if the separator is not in the rage: switch to the next separator
20
+ - if the found separator is in the middle of the sentence, use overlapping:
21
+ - the found text is the current chunk
22
+ - repeat the search with less strict k_range and k_ratio
23
+ - the found text will be the beginning of the next chunk
24
+
25
+ :param chunk_size: size of the chunk, which must not be exceeded
26
+ :param separators: list of separators in order of priority
27
+ :param k_range: defines the range to look for the separator
28
+ :param k_ratio: defines how much to shrink the range for the next separator
29
+ """
30
+ if separators is None:
31
+ separators = ["\n\n", "\n", ". ", " ", ""]
32
+ self.chunk_size = chunk_size
33
+ self.chunk_overlap = chunk_overlap
34
+ self.separators = separators
35
+ self.k_range = k_range
36
+ self.k_ratio = k_ratio
37
+
38
+ def split_text(self, text: str) -> List[str]:
39
+ chunks = []
40
+
41
+ while True:
42
+ if len(text) < self.chunk_size:
43
+ chunks.append(text)
44
+ break
45
+
46
+ sep, chunk, shift = self.get_next_chunk(text, self.k_range, self.k_ratio)
47
+ chunks.append(chunk)
48
+
49
+ text = text[shift:]
50
+ return chunks
51
+
52
+ def get_next_chunk(self, text: str, k_range: float, k_ratio: float):
53
+ # returns chunk with separator and shift for the next search iteration
54
+
55
+ chunk = text[: self.chunk_size]
56
+ # positions = []
57
+ for i, sep in enumerate(self.separators):
58
+ pos = chunk.rfind(sep)
59
+
60
+ vpos = self.chunk_size - pos
61
+ if vpos < k_range * self.chunk_size / (i * k_ratio + 1):
62
+ shift = len(sep) + pos
63
+ if sep.strip(" ") == "":
64
+ # overlapping
65
+ sep2, _, shift2 = self.get_next_chunk(text, k_range * 1.5, 0)
66
+ if sep2.strip(" ") != "":
67
+ # use shift of previous separator
68
+ if shift - shift2 < self.chunk_overlap:
69
+ shift = shift2
70
+
71
+ return sep, chunk[:pos], shift
72
+
73
+ raise RuntimeError("Cannot split text")
@@ -45,7 +45,7 @@ class RunningQuery:
45
45
  for df in dn.query_stream(query2, fetch_size=self.batch_size):
46
46
  max_track_value = self.get_max_track_value(df)
47
47
  yield df
48
- self.set_progress(df, max_track_value)
48
+ self.set_progress(max_track_value=max_track_value)
49
49
 
50
50
  else:
51
51
  while True:
@@ -59,7 +59,7 @@ class RunningQuery:
59
59
 
60
60
  max_track_value = self.get_max_track_value(df)
61
61
  yield df
62
- self.set_progress(df, max_track_value)
62
+ self.set_progress(max_track_value=max_track_value)
63
63
 
64
64
  def get_partition_query(self, step_num: int, query: Select, stream=False) -> Select:
65
65
  """
@@ -178,24 +178,23 @@ class RunningQuery:
178
178
  # stream mode
179
179
  return None
180
180
 
181
- def set_progress(self, df: pd.DataFrame, max_track_value: int):
181
+ def set_progress(self, processed_rows: int = None, max_track_value: int = None):
182
182
  """
183
183
  Store progres of the query, it is called after processing of batch
184
184
  """
185
185
 
186
- if len(df) == 0:
187
- return
188
-
189
- self.record.processed_rows = self.record.processed_rows + len(df)
190
-
191
- cur_value = self.record.context.get("track_value")
192
- new_value = max_track_value
193
- if new_value is not None:
194
- if cur_value is None or new_value > cur_value:
195
- self.record.context["track_value"] = new_value
196
- flag_modified(self.record, "context")
186
+ if processed_rows is not None and processed_rows > 0:
187
+ self.record.processed_rows = self.record.processed_rows + processed_rows
188
+ db.session.commit()
197
189
 
198
- db.session.commit()
190
+ if max_track_value is not None:
191
+ cur_value = self.record.context.get("track_value")
192
+ new_value = max_track_value
193
+ if new_value is not None:
194
+ if cur_value is None or new_value > cur_value:
195
+ self.record.context["track_value"] = new_value
196
+ flag_modified(self.record, "context")
197
+ db.session.commit()
199
198
 
200
199
  def on_error(self, error: Exception, step_num: int, steps_data: dict):
201
200
  """
@@ -15,6 +15,7 @@ from mindsdb.interfaces.skills.custom.text2sql.mindsdb_kb_tools import (
15
15
 
16
16
 
17
17
  class MindsDBSQLToolkit(SQLDatabaseToolkit):
18
+ include_tables_tools: bool = True
18
19
  include_knowledge_base_tools: bool = True
19
20
 
20
21
  def get_tools(self, prefix="") -> List[BaseTool]:
@@ -212,8 +213,13 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
212
213
  )
213
214
 
214
215
  # Return standard SQL tools and knowledge base tools
215
- return sql_tools + [
216
+ kb_tools = [
216
217
  kb_list_tool,
217
218
  kb_info_tool,
218
219
  kb_query_tool,
219
220
  ]
221
+
222
+ if not self.include_tables_tools:
223
+ return kb_tools
224
+ else:
225
+ return sql_tools + kb_tools
@@ -347,7 +347,13 @@ class SkillToolController:
347
347
  )
348
348
  db = MindsDBSQL.custom_init(sql_agent=sql_agent)
349
349
  should_include_kb_tools = include_knowledge_bases is not None and len(include_knowledge_bases) > 0
350
- toolkit = MindsDBSQLToolkit(db=db, llm=llm, include_knowledge_base_tools=should_include_kb_tools)
350
+ should_include_tables_tools = len(databases_struct) > 0 or len(tables_list) > 0
351
+ toolkit = MindsDBSQLToolkit(
352
+ db=db,
353
+ llm=llm,
354
+ include_tables_tools=should_include_tables_tools,
355
+ include_knowledge_base_tools=should_include_kb_tools,
356
+ )
351
357
  return toolkit.get_tools()
352
358
 
353
359
  def _make_retrieval_tools(self, skill: db.Skills, llm, embedding_model):
@@ -405,6 +405,7 @@ class SQLAgent:
405
405
  tables_idx[tuple(table.parts)] = table
406
406
 
407
407
  tables = []
408
+ not_found = []
408
409
  for table_name in table_names:
409
410
  if not table_name.strip():
410
411
  continue
@@ -419,9 +420,12 @@ class SQLAgent:
419
420
  table_identifier = tables_idx.get(tuple(table_parts))
420
421
 
421
422
  if table_identifier is None:
422
- raise ValueError(f"Table {table_name} not found in the database")
423
- tables.append(table_identifier)
423
+ not_found.append(table_name)
424
+ else:
425
+ tables.append(table_identifier)
424
426
 
427
+ if not_found:
428
+ raise ValueError(f"Tables: {', '.join(not_found)} not found in the database")
425
429
  return tables
426
430
 
427
431
  def get_knowledge_base_info(self, kb_names: Optional[List[str]] = None) -> str:
@@ -599,6 +599,7 @@ class Config:
599
599
  ml_task_queue_consumer=None,
600
600
  agent=None,
601
601
  project=None,
602
+ update_gui=False,
602
603
  )
603
604
  return
604
605
 
@@ -635,6 +636,7 @@ class Config:
635
636
  help="MindsDB agent name to connect to",
636
637
  )
637
638
  parser.add_argument("--project-name", type=str, default=None, help="MindsDB project name")
639
+ parser.add_argument("--update-gui", action="store_true", default=False, help="Update GUI and exit")
638
640
 
639
641
  self._cmd_args = parser.parse_args()
640
642
 
mindsdb/utilities/fs.py CHANGED
@@ -12,6 +12,10 @@ from mindsdb.utilities import log
12
12
  logger = log.getLogger(__name__)
13
13
 
14
14
 
15
+ def get_tmp_dir() -> Path:
16
+ return Path(tempfile.gettempdir()).joinpath("mindsdb")
17
+
18
+
15
19
  def _get_process_mark_id(unified: bool = False) -> str:
16
20
  """Creates a text that can be used to identify process+thread
17
21
  Args:
@@ -26,7 +30,7 @@ def _get_process_mark_id(unified: bool = False) -> str:
26
30
 
27
31
 
28
32
  def create_process_mark(folder="learn"):
29
- p = Path(tempfile.gettempdir()).joinpath(f"mindsdb/processes/{folder}/")
33
+ p = get_tmp_dir().joinpath(f"processes/{folder}/")
30
34
  p.mkdir(parents=True, exist_ok=True)
31
35
  mark = _get_process_mark_id()
32
36
  p.joinpath(mark).touch()
@@ -43,7 +47,7 @@ def set_process_mark(folder: str, mark: str) -> None:
43
47
  Returns:
44
48
  str: process mark
45
49
  """
46
- p = Path(tempfile.gettempdir()).joinpath(f"mindsdb/processes/{folder}/")
50
+ p = get_tmp_dir().joinpath(f"processes/{folder}/")
47
51
  p.mkdir(parents=True, exist_ok=True)
48
52
  mark = f"{os.getpid()}-{threading.get_native_id()}-{mark}"
49
53
  p.joinpath(mark).touch()
@@ -53,11 +57,7 @@ def set_process_mark(folder: str, mark: str) -> None:
53
57
  def delete_process_mark(folder: str = "learn", mark: Optional[str] = None):
54
58
  if mark is None:
55
59
  mark = _get_process_mark_id()
56
- p = (
57
- Path(tempfile.gettempdir())
58
- .joinpath(f"mindsdb/processes/{folder}/")
59
- .joinpath(mark)
60
- )
60
+ p = get_tmp_dir().joinpath(f"processes/{folder}/").joinpath(mark)
61
61
  if p.exists():
62
62
  p.unlink()
63
63
 
@@ -65,7 +65,7 @@ def delete_process_mark(folder: str = "learn", mark: Optional[str] = None):
65
65
  def clean_process_marks():
66
66
  """delete all existing processes marks"""
67
67
  logger.debug("Deleting PIDs..")
68
- p = Path(tempfile.gettempdir()).joinpath("mindsdb/processes/")
68
+ p = get_tmp_dir().joinpath("processes/")
69
69
  if p.exists() is False:
70
70
  return
71
71
  for path in p.iterdir():
@@ -81,7 +81,7 @@ def get_processes_dir_files_generator() -> Tuple[Path, int, int]:
81
81
  Yields:
82
82
  Tuple[Path, int, int]: file object, process is and thread id
83
83
  """
84
- p = Path(tempfile.gettempdir()).joinpath("mindsdb/processes/")
84
+ p = get_tmp_dir().joinpath("processes/")
85
85
  if p.exists() is False:
86
86
  return
87
87
  for path in p.iterdir():
@@ -112,9 +112,7 @@ def clean_unlinked_process_marks() -> List[int]:
112
112
  try:
113
113
  next(t for t in threads if t.id == thread_id)
114
114
  except StopIteration:
115
- logger.warning(
116
- f"We have mark for process/thread {process_id}/{thread_id} but it does not exists"
117
- )
115
+ logger.warning(f"We have mark for process/thread {process_id}/{thread_id} but it does not exists")
118
116
  deleted_pids.append(process_id)
119
117
  file.unlink()
120
118
 
@@ -124,14 +122,59 @@ def clean_unlinked_process_marks() -> List[int]:
124
122
  continue
125
123
 
126
124
  except psutil.NoSuchProcess:
127
- logger.warning(
128
- f"We have mark for process/thread {process_id}/{thread_id} but it does not exists"
129
- )
125
+ logger.warning(f"We have mark for process/thread {process_id}/{thread_id} but it does not exists")
130
126
  deleted_pids.append(process_id)
131
127
  file.unlink()
132
128
  return deleted_pids
133
129
 
134
130
 
131
+ def create_pid_file():
132
+ """
133
+ Create mindsdb process pid file. Check if previous process exists and is running
134
+ """
135
+
136
+ if os.environ.get("USE_PIDFILE") != "1":
137
+ return
138
+
139
+ p = get_tmp_dir()
140
+ p.mkdir(parents=True, exist_ok=True)
141
+ pid_file = p.joinpath("pid")
142
+ if pid_file.exists():
143
+ # if process exists raise exception
144
+ pid = pid_file.read_text().strip()
145
+ try:
146
+ psutil.Process(int(pid))
147
+ raise Exception(f"Found PID file with existing process: {pid} {pid_file}")
148
+ except (psutil.Error, ValueError):
149
+ ...
150
+
151
+ logger.warning(f"Found existing PID file {pid_file}({pid}), removing")
152
+ pid_file.unlink()
153
+
154
+ pid_file.write_text(str(os.getpid()))
155
+
156
+
157
+ def delete_pid_file():
158
+ """
159
+ Remove existing process pid file if it matches current process
160
+ """
161
+
162
+ if os.environ.get("USE_PIDFILE") != "1":
163
+ return
164
+
165
+ pid_file = get_tmp_dir().joinpath("pid")
166
+
167
+ if not pid_file.exists():
168
+ return
169
+
170
+ pid = pid_file.read_text().strip()
171
+ if pid != str(os.getpid()):
172
+ logger.warning(f"Process id in PID file ({pid_file}) doesn't match mindsdb pid")
173
+ return
174
+
175
+ pid_file.unlink()
176
+
177
+
135
178
  def __is_within_directory(directory, target):
136
179
  abs_directory = os.path.abspath(directory)
137
180
  abs_target = os.path.abspath(target)
@@ -141,8 +184,8 @@ def __is_within_directory(directory, target):
141
184
 
142
185
  def safe_extract(tarfile, path=".", members=None, *, numeric_owner=False):
143
186
  # for py >= 3.12
144
- if hasattr(tarfile, 'data_filter'):
145
- tarfile.extractall(path, members=members, numeric_owner=numeric_owner, filter='data')
187
+ if hasattr(tarfile, "data_filter"):
188
+ tarfile.extractall(path, members=members, numeric_owner=numeric_owner, filter="data")
146
189
  return
147
190
 
148
191
  # for py < 3.12