MindsDB 25.7.4.0__py3-none-any.whl → 25.8.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +13 -1
- mindsdb/api/a2a/agent.py +6 -16
- mindsdb/api/a2a/common/types.py +3 -4
- mindsdb/api/a2a/task_manager.py +24 -35
- mindsdb/api/a2a/utils.py +63 -0
- mindsdb/api/executor/command_executor.py +9 -15
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +21 -24
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +9 -3
- mindsdb/api/executor/sql_query/steps/subselect_step.py +11 -8
- mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +264 -0
- mindsdb/api/executor/utilities/sql.py +30 -0
- mindsdb/api/http/initialize.py +2 -1
- mindsdb/api/http/namespaces/agents.py +6 -7
- mindsdb/api/http/namespaces/views.py +56 -72
- mindsdb/integrations/handlers/db2_handler/db2_handler.py +19 -23
- mindsdb/integrations/handlers/gong_handler/__about__.py +2 -0
- mindsdb/integrations/handlers/gong_handler/__init__.py +30 -0
- mindsdb/integrations/handlers/gong_handler/connection_args.py +37 -0
- mindsdb/integrations/handlers/gong_handler/gong_handler.py +164 -0
- mindsdb/integrations/handlers/gong_handler/gong_tables.py +508 -0
- mindsdb/integrations/handlers/gong_handler/icon.svg +25 -0
- mindsdb/integrations/handlers/gong_handler/test_gong_handler.py +125 -0
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +8 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +203 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +360 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +1 -2
- mindsdb/integrations/handlers/openai_handler/constants.py +11 -30
- mindsdb/integrations/handlers/openai_handler/helpers.py +27 -34
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +14 -12
- mindsdb/integrations/handlers/salesforce_handler/constants.py +9 -2
- mindsdb/integrations/libs/llm/config.py +0 -14
- mindsdb/integrations/libs/llm/utils.py +0 -15
- mindsdb/integrations/utilities/files/file_reader.py +5 -19
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +83 -45
- mindsdb/interfaces/agents/constants.py +16 -3
- mindsdb/interfaces/agents/langchain_agent.py +84 -21
- mindsdb/interfaces/database/projects.py +111 -7
- mindsdb/interfaces/knowledge_base/controller.py +7 -1
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +6 -10
- mindsdb/interfaces/knowledge_base/preprocessing/text_splitter.py +73 -0
- mindsdb/interfaces/query_context/context_controller.py +14 -15
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +7 -1
- mindsdb/interfaces/skills/skill_tool.py +7 -1
- mindsdb/interfaces/skills/sql_agent.py +6 -2
- mindsdb/utilities/config.py +2 -0
- mindsdb/utilities/fs.py +60 -17
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/METADATA +277 -262
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/RECORD +57 -56
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__about__.py +0 -9
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__init__.py +0 -20
- mindsdb/integrations/handlers/anyscale_endpoints_handler/anyscale_endpoints_handler.py +0 -290
- mindsdb/integrations/handlers/anyscale_endpoints_handler/creation_args.py +0 -14
- mindsdb/integrations/handlers/anyscale_endpoints_handler/icon.svg +0 -4
- mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -2
- mindsdb/integrations/handlers/anyscale_endpoints_handler/settings.py +0 -51
- mindsdb/integrations/handlers/anyscale_endpoints_handler/tests/test_anyscale_endpoints_handler.py +0 -212
- /mindsdb/integrations/handlers/{anyscale_endpoints_handler/tests/__init__.py → gong_handler/requirements.txt} +0 -0
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TextSplitter:
|
|
5
|
+
def __init__(
|
|
6
|
+
self,
|
|
7
|
+
chunk_size: int = 1000,
|
|
8
|
+
chunk_overlap: int = 200,
|
|
9
|
+
separators: List[str] = None,
|
|
10
|
+
k_range: float = 0.5,
|
|
11
|
+
k_ratio: float = 1,
|
|
12
|
+
):
|
|
13
|
+
"""
|
|
14
|
+
Split text into chunks. The logic:
|
|
15
|
+
- Get a piece of text with chunk_size and try to find the separator at the end of the piece.
|
|
16
|
+
- The allowed range to find the separator is defined by k_range and k_ratio using formula:
|
|
17
|
+
k_range * chunk_size / (num * k_ratio + 1)
|
|
18
|
+
num - is number of a separator from the list
|
|
19
|
+
- if the separator is not in the rage: switch to the next separator
|
|
20
|
+
- if the found separator is in the middle of the sentence, use overlapping:
|
|
21
|
+
- the found text is the current chunk
|
|
22
|
+
- repeat the search with less strict k_range and k_ratio
|
|
23
|
+
- the found text will be the beginning of the next chunk
|
|
24
|
+
|
|
25
|
+
:param chunk_size: size of the chunk, which must not be exceeded
|
|
26
|
+
:param separators: list of separators in order of priority
|
|
27
|
+
:param k_range: defines the range to look for the separator
|
|
28
|
+
:param k_ratio: defines how much to shrink the range for the next separator
|
|
29
|
+
"""
|
|
30
|
+
if separators is None:
|
|
31
|
+
separators = ["\n\n", "\n", ". ", " ", ""]
|
|
32
|
+
self.chunk_size = chunk_size
|
|
33
|
+
self.chunk_overlap = chunk_overlap
|
|
34
|
+
self.separators = separators
|
|
35
|
+
self.k_range = k_range
|
|
36
|
+
self.k_ratio = k_ratio
|
|
37
|
+
|
|
38
|
+
def split_text(self, text: str) -> List[str]:
|
|
39
|
+
chunks = []
|
|
40
|
+
|
|
41
|
+
while True:
|
|
42
|
+
if len(text) < self.chunk_size:
|
|
43
|
+
chunks.append(text)
|
|
44
|
+
break
|
|
45
|
+
|
|
46
|
+
sep, chunk, shift = self.get_next_chunk(text, self.k_range, self.k_ratio)
|
|
47
|
+
chunks.append(chunk)
|
|
48
|
+
|
|
49
|
+
text = text[shift:]
|
|
50
|
+
return chunks
|
|
51
|
+
|
|
52
|
+
def get_next_chunk(self, text: str, k_range: float, k_ratio: float):
|
|
53
|
+
# returns chunk with separator and shift for the next search iteration
|
|
54
|
+
|
|
55
|
+
chunk = text[: self.chunk_size]
|
|
56
|
+
# positions = []
|
|
57
|
+
for i, sep in enumerate(self.separators):
|
|
58
|
+
pos = chunk.rfind(sep)
|
|
59
|
+
|
|
60
|
+
vpos = self.chunk_size - pos
|
|
61
|
+
if vpos < k_range * self.chunk_size / (i * k_ratio + 1):
|
|
62
|
+
shift = len(sep) + pos
|
|
63
|
+
if sep.strip(" ") == "":
|
|
64
|
+
# overlapping
|
|
65
|
+
sep2, _, shift2 = self.get_next_chunk(text, k_range * 1.5, 0)
|
|
66
|
+
if sep2.strip(" ") != "":
|
|
67
|
+
# use shift of previous separator
|
|
68
|
+
if shift - shift2 < self.chunk_overlap:
|
|
69
|
+
shift = shift2
|
|
70
|
+
|
|
71
|
+
return sep, chunk[:pos], shift
|
|
72
|
+
|
|
73
|
+
raise RuntimeError("Cannot split text")
|
|
@@ -45,7 +45,7 @@ class RunningQuery:
|
|
|
45
45
|
for df in dn.query_stream(query2, fetch_size=self.batch_size):
|
|
46
46
|
max_track_value = self.get_max_track_value(df)
|
|
47
47
|
yield df
|
|
48
|
-
self.set_progress(
|
|
48
|
+
self.set_progress(max_track_value=max_track_value)
|
|
49
49
|
|
|
50
50
|
else:
|
|
51
51
|
while True:
|
|
@@ -59,7 +59,7 @@ class RunningQuery:
|
|
|
59
59
|
|
|
60
60
|
max_track_value = self.get_max_track_value(df)
|
|
61
61
|
yield df
|
|
62
|
-
self.set_progress(
|
|
62
|
+
self.set_progress(max_track_value=max_track_value)
|
|
63
63
|
|
|
64
64
|
def get_partition_query(self, step_num: int, query: Select, stream=False) -> Select:
|
|
65
65
|
"""
|
|
@@ -178,24 +178,23 @@ class RunningQuery:
|
|
|
178
178
|
# stream mode
|
|
179
179
|
return None
|
|
180
180
|
|
|
181
|
-
def set_progress(self,
|
|
181
|
+
def set_progress(self, processed_rows: int = None, max_track_value: int = None):
|
|
182
182
|
"""
|
|
183
183
|
Store progres of the query, it is called after processing of batch
|
|
184
184
|
"""
|
|
185
185
|
|
|
186
|
-
if
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
self.record.processed_rows = self.record.processed_rows + len(df)
|
|
190
|
-
|
|
191
|
-
cur_value = self.record.context.get("track_value")
|
|
192
|
-
new_value = max_track_value
|
|
193
|
-
if new_value is not None:
|
|
194
|
-
if cur_value is None or new_value > cur_value:
|
|
195
|
-
self.record.context["track_value"] = new_value
|
|
196
|
-
flag_modified(self.record, "context")
|
|
186
|
+
if processed_rows is not None and processed_rows > 0:
|
|
187
|
+
self.record.processed_rows = self.record.processed_rows + processed_rows
|
|
188
|
+
db.session.commit()
|
|
197
189
|
|
|
198
|
-
|
|
190
|
+
if max_track_value is not None:
|
|
191
|
+
cur_value = self.record.context.get("track_value")
|
|
192
|
+
new_value = max_track_value
|
|
193
|
+
if new_value is not None:
|
|
194
|
+
if cur_value is None or new_value > cur_value:
|
|
195
|
+
self.record.context["track_value"] = new_value
|
|
196
|
+
flag_modified(self.record, "context")
|
|
197
|
+
db.session.commit()
|
|
199
198
|
|
|
200
199
|
def on_error(self, error: Exception, step_num: int, steps_data: dict):
|
|
201
200
|
"""
|
|
@@ -15,6 +15,7 @@ from mindsdb.interfaces.skills.custom.text2sql.mindsdb_kb_tools import (
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
18
|
+
include_tables_tools: bool = True
|
|
18
19
|
include_knowledge_base_tools: bool = True
|
|
19
20
|
|
|
20
21
|
def get_tools(self, prefix="") -> List[BaseTool]:
|
|
@@ -212,8 +213,13 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
212
213
|
)
|
|
213
214
|
|
|
214
215
|
# Return standard SQL tools and knowledge base tools
|
|
215
|
-
|
|
216
|
+
kb_tools = [
|
|
216
217
|
kb_list_tool,
|
|
217
218
|
kb_info_tool,
|
|
218
219
|
kb_query_tool,
|
|
219
220
|
]
|
|
221
|
+
|
|
222
|
+
if not self.include_tables_tools:
|
|
223
|
+
return kb_tools
|
|
224
|
+
else:
|
|
225
|
+
return sql_tools + kb_tools
|
|
@@ -347,7 +347,13 @@ class SkillToolController:
|
|
|
347
347
|
)
|
|
348
348
|
db = MindsDBSQL.custom_init(sql_agent=sql_agent)
|
|
349
349
|
should_include_kb_tools = include_knowledge_bases is not None and len(include_knowledge_bases) > 0
|
|
350
|
-
|
|
350
|
+
should_include_tables_tools = len(databases_struct) > 0 or len(tables_list) > 0
|
|
351
|
+
toolkit = MindsDBSQLToolkit(
|
|
352
|
+
db=db,
|
|
353
|
+
llm=llm,
|
|
354
|
+
include_tables_tools=should_include_tables_tools,
|
|
355
|
+
include_knowledge_base_tools=should_include_kb_tools,
|
|
356
|
+
)
|
|
351
357
|
return toolkit.get_tools()
|
|
352
358
|
|
|
353
359
|
def _make_retrieval_tools(self, skill: db.Skills, llm, embedding_model):
|
|
@@ -405,6 +405,7 @@ class SQLAgent:
|
|
|
405
405
|
tables_idx[tuple(table.parts)] = table
|
|
406
406
|
|
|
407
407
|
tables = []
|
|
408
|
+
not_found = []
|
|
408
409
|
for table_name in table_names:
|
|
409
410
|
if not table_name.strip():
|
|
410
411
|
continue
|
|
@@ -419,9 +420,12 @@ class SQLAgent:
|
|
|
419
420
|
table_identifier = tables_idx.get(tuple(table_parts))
|
|
420
421
|
|
|
421
422
|
if table_identifier is None:
|
|
422
|
-
|
|
423
|
-
|
|
423
|
+
not_found.append(table_name)
|
|
424
|
+
else:
|
|
425
|
+
tables.append(table_identifier)
|
|
424
426
|
|
|
427
|
+
if not_found:
|
|
428
|
+
raise ValueError(f"Tables: {', '.join(not_found)} not found in the database")
|
|
425
429
|
return tables
|
|
426
430
|
|
|
427
431
|
def get_knowledge_base_info(self, kb_names: Optional[List[str]] = None) -> str:
|
mindsdb/utilities/config.py
CHANGED
|
@@ -599,6 +599,7 @@ class Config:
|
|
|
599
599
|
ml_task_queue_consumer=None,
|
|
600
600
|
agent=None,
|
|
601
601
|
project=None,
|
|
602
|
+
update_gui=False,
|
|
602
603
|
)
|
|
603
604
|
return
|
|
604
605
|
|
|
@@ -635,6 +636,7 @@ class Config:
|
|
|
635
636
|
help="MindsDB agent name to connect to",
|
|
636
637
|
)
|
|
637
638
|
parser.add_argument("--project-name", type=str, default=None, help="MindsDB project name")
|
|
639
|
+
parser.add_argument("--update-gui", action="store_true", default=False, help="Update GUI and exit")
|
|
638
640
|
|
|
639
641
|
self._cmd_args = parser.parse_args()
|
|
640
642
|
|
mindsdb/utilities/fs.py
CHANGED
|
@@ -12,6 +12,10 @@ from mindsdb.utilities import log
|
|
|
12
12
|
logger = log.getLogger(__name__)
|
|
13
13
|
|
|
14
14
|
|
|
15
|
+
def get_tmp_dir() -> Path:
|
|
16
|
+
return Path(tempfile.gettempdir()).joinpath("mindsdb")
|
|
17
|
+
|
|
18
|
+
|
|
15
19
|
def _get_process_mark_id(unified: bool = False) -> str:
|
|
16
20
|
"""Creates a text that can be used to identify process+thread
|
|
17
21
|
Args:
|
|
@@ -26,7 +30,7 @@ def _get_process_mark_id(unified: bool = False) -> str:
|
|
|
26
30
|
|
|
27
31
|
|
|
28
32
|
def create_process_mark(folder="learn"):
|
|
29
|
-
p =
|
|
33
|
+
p = get_tmp_dir().joinpath(f"processes/{folder}/")
|
|
30
34
|
p.mkdir(parents=True, exist_ok=True)
|
|
31
35
|
mark = _get_process_mark_id()
|
|
32
36
|
p.joinpath(mark).touch()
|
|
@@ -43,7 +47,7 @@ def set_process_mark(folder: str, mark: str) -> None:
|
|
|
43
47
|
Returns:
|
|
44
48
|
str: process mark
|
|
45
49
|
"""
|
|
46
|
-
p =
|
|
50
|
+
p = get_tmp_dir().joinpath(f"processes/{folder}/")
|
|
47
51
|
p.mkdir(parents=True, exist_ok=True)
|
|
48
52
|
mark = f"{os.getpid()}-{threading.get_native_id()}-{mark}"
|
|
49
53
|
p.joinpath(mark).touch()
|
|
@@ -53,11 +57,7 @@ def set_process_mark(folder: str, mark: str) -> None:
|
|
|
53
57
|
def delete_process_mark(folder: str = "learn", mark: Optional[str] = None):
|
|
54
58
|
if mark is None:
|
|
55
59
|
mark = _get_process_mark_id()
|
|
56
|
-
p = (
|
|
57
|
-
Path(tempfile.gettempdir())
|
|
58
|
-
.joinpath(f"mindsdb/processes/{folder}/")
|
|
59
|
-
.joinpath(mark)
|
|
60
|
-
)
|
|
60
|
+
p = get_tmp_dir().joinpath(f"processes/{folder}/").joinpath(mark)
|
|
61
61
|
if p.exists():
|
|
62
62
|
p.unlink()
|
|
63
63
|
|
|
@@ -65,7 +65,7 @@ def delete_process_mark(folder: str = "learn", mark: Optional[str] = None):
|
|
|
65
65
|
def clean_process_marks():
|
|
66
66
|
"""delete all existing processes marks"""
|
|
67
67
|
logger.debug("Deleting PIDs..")
|
|
68
|
-
p =
|
|
68
|
+
p = get_tmp_dir().joinpath("processes/")
|
|
69
69
|
if p.exists() is False:
|
|
70
70
|
return
|
|
71
71
|
for path in p.iterdir():
|
|
@@ -81,7 +81,7 @@ def get_processes_dir_files_generator() -> Tuple[Path, int, int]:
|
|
|
81
81
|
Yields:
|
|
82
82
|
Tuple[Path, int, int]: file object, process is and thread id
|
|
83
83
|
"""
|
|
84
|
-
p =
|
|
84
|
+
p = get_tmp_dir().joinpath("processes/")
|
|
85
85
|
if p.exists() is False:
|
|
86
86
|
return
|
|
87
87
|
for path in p.iterdir():
|
|
@@ -112,9 +112,7 @@ def clean_unlinked_process_marks() -> List[int]:
|
|
|
112
112
|
try:
|
|
113
113
|
next(t for t in threads if t.id == thread_id)
|
|
114
114
|
except StopIteration:
|
|
115
|
-
logger.warning(
|
|
116
|
-
f"We have mark for process/thread {process_id}/{thread_id} but it does not exists"
|
|
117
|
-
)
|
|
115
|
+
logger.warning(f"We have mark for process/thread {process_id}/{thread_id} but it does not exists")
|
|
118
116
|
deleted_pids.append(process_id)
|
|
119
117
|
file.unlink()
|
|
120
118
|
|
|
@@ -124,14 +122,59 @@ def clean_unlinked_process_marks() -> List[int]:
|
|
|
124
122
|
continue
|
|
125
123
|
|
|
126
124
|
except psutil.NoSuchProcess:
|
|
127
|
-
logger.warning(
|
|
128
|
-
f"We have mark for process/thread {process_id}/{thread_id} but it does not exists"
|
|
129
|
-
)
|
|
125
|
+
logger.warning(f"We have mark for process/thread {process_id}/{thread_id} but it does not exists")
|
|
130
126
|
deleted_pids.append(process_id)
|
|
131
127
|
file.unlink()
|
|
132
128
|
return deleted_pids
|
|
133
129
|
|
|
134
130
|
|
|
131
|
+
def create_pid_file():
|
|
132
|
+
"""
|
|
133
|
+
Create mindsdb process pid file. Check if previous process exists and is running
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
if os.environ.get("USE_PIDFILE") != "1":
|
|
137
|
+
return
|
|
138
|
+
|
|
139
|
+
p = get_tmp_dir()
|
|
140
|
+
p.mkdir(parents=True, exist_ok=True)
|
|
141
|
+
pid_file = p.joinpath("pid")
|
|
142
|
+
if pid_file.exists():
|
|
143
|
+
# if process exists raise exception
|
|
144
|
+
pid = pid_file.read_text().strip()
|
|
145
|
+
try:
|
|
146
|
+
psutil.Process(int(pid))
|
|
147
|
+
raise Exception(f"Found PID file with existing process: {pid} {pid_file}")
|
|
148
|
+
except (psutil.Error, ValueError):
|
|
149
|
+
...
|
|
150
|
+
|
|
151
|
+
logger.warning(f"Found existing PID file {pid_file}({pid}), removing")
|
|
152
|
+
pid_file.unlink()
|
|
153
|
+
|
|
154
|
+
pid_file.write_text(str(os.getpid()))
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def delete_pid_file():
|
|
158
|
+
"""
|
|
159
|
+
Remove existing process pid file if it matches current process
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
if os.environ.get("USE_PIDFILE") != "1":
|
|
163
|
+
return
|
|
164
|
+
|
|
165
|
+
pid_file = get_tmp_dir().joinpath("pid")
|
|
166
|
+
|
|
167
|
+
if not pid_file.exists():
|
|
168
|
+
return
|
|
169
|
+
|
|
170
|
+
pid = pid_file.read_text().strip()
|
|
171
|
+
if pid != str(os.getpid()):
|
|
172
|
+
logger.warning(f"Process id in PID file ({pid_file}) doesn't match mindsdb pid")
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
pid_file.unlink()
|
|
176
|
+
|
|
177
|
+
|
|
135
178
|
def __is_within_directory(directory, target):
|
|
136
179
|
abs_directory = os.path.abspath(directory)
|
|
137
180
|
abs_target = os.path.abspath(target)
|
|
@@ -141,8 +184,8 @@ def __is_within_directory(directory, target):
|
|
|
141
184
|
|
|
142
185
|
def safe_extract(tarfile, path=".", members=None, *, numeric_owner=False):
|
|
143
186
|
# for py >= 3.12
|
|
144
|
-
if hasattr(tarfile,
|
|
145
|
-
tarfile.extractall(path, members=members, numeric_owner=numeric_owner, filter=
|
|
187
|
+
if hasattr(tarfile, "data_filter"):
|
|
188
|
+
tarfile.extractall(path, members=members, numeric_owner=numeric_owner, filter="data")
|
|
146
189
|
return
|
|
147
190
|
|
|
148
191
|
# for py < 3.12
|