MindsDB 25.4.3.2__py3-none-any.whl → 25.4.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +18 -4
- mindsdb/api/executor/data_types/response_type.py +1 -0
- mindsdb/api/executor/datahub/classes/tables_row.py +3 -10
- mindsdb/api/executor/datahub/datanodes/datanode.py +7 -2
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +44 -10
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +57 -38
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +39 -7
- mindsdb/api/executor/datahub/datanodes/system_tables.py +116 -109
- mindsdb/api/executor/planner/query_planner.py +10 -1
- mindsdb/api/executor/planner/steps.py +8 -2
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +5 -5
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +1 -1
- mindsdb/api/executor/sql_query/steps/insert_step.py +2 -1
- mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -3
- mindsdb/api/litellm/start.py +82 -0
- mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +133 -0
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +7 -2
- mindsdb/integrations/handlers/chromadb_handler/settings.py +1 -0
- mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +13 -4
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +14 -5
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +14 -4
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +34 -19
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +21 -18
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +14 -4
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +1 -1
- mindsdb/integrations/libs/response.py +80 -32
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +208 -13
- mindsdb/interfaces/agents/litellm_server.py +345 -0
- mindsdb/interfaces/agents/mcp_client_agent.py +252 -0
- mindsdb/interfaces/agents/run_mcp_agent.py +205 -0
- mindsdb/interfaces/knowledge_base/controller.py +17 -7
- mindsdb/interfaces/skills/skill_tool.py +7 -1
- mindsdb/interfaces/skills/sql_agent.py +8 -3
- mindsdb/utilities/config.py +8 -1
- mindsdb/utilities/starters.py +7 -0
- {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.4.0.dist-info}/METADATA +225 -223
- {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.4.0.dist-info}/RECORD +42 -39
- {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.4.0.dist-info}/WHEEL +1 -1
- mindsdb/integrations/handlers/snowflake_handler/tests/test_snowflake_handler.py +0 -230
- /mindsdb/{integrations/handlers/snowflake_handler/tests → api/litellm}/__init__.py +0 -0
- {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.4.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from typing import Optional, Literal
|
|
2
|
-
from dataclasses import dataclass,
|
|
2
|
+
from dataclasses import dataclass, fields
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
@@ -7,11 +7,10 @@ from mindsdb_sql_parser.ast.base import ASTNode
|
|
|
7
7
|
from mindsdb.utilities import log
|
|
8
8
|
from mindsdb.utilities.config import config
|
|
9
9
|
from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
|
|
10
|
-
from mindsdb.
|
|
11
|
-
from mindsdb.api.
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
)
|
|
10
|
+
from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
|
|
11
|
+
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE, MYSQL_DATA_TYPE_COLUMNS_DEFAULT
|
|
12
|
+
from mindsdb.api.executor.datahub.classes.tables_row import TABLES_ROW_TYPE, TablesRow
|
|
13
|
+
|
|
15
14
|
|
|
16
15
|
logger = log.getLogger(__name__)
|
|
17
16
|
|
|
@@ -165,11 +164,36 @@ class TablesTable(Table):
|
|
|
165
164
|
return df
|
|
166
165
|
|
|
167
166
|
|
|
168
|
-
|
|
167
|
+
def infer_mysql_type(original_type: str) -> MYSQL_DATA_TYPE:
|
|
168
|
+
"""Infer MySQL data type from original type string from a database.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
original_type (str): The original type string from a database.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
MYSQL_DATA_TYPE: The inferred MySQL data type.
|
|
175
|
+
"""
|
|
176
|
+
match original_type.lower():
|
|
177
|
+
case 'double precision' | 'real' | 'numeric' | 'float':
|
|
178
|
+
data_type = MYSQL_DATA_TYPE.FLOAT
|
|
179
|
+
case 'integer' | 'smallint' | 'int' | 'bigint':
|
|
180
|
+
data_type = MYSQL_DATA_TYPE.BIGINT
|
|
181
|
+
case 'timestamp without time zone' | 'timestamp with time zone' | 'date' | 'timestamp':
|
|
182
|
+
data_type = MYSQL_DATA_TYPE.DATETIME
|
|
183
|
+
case _:
|
|
184
|
+
data_type = MYSQL_DATA_TYPE.VARCHAR
|
|
185
|
+
return data_type
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@dataclass(slots=True, kw_only=True)
|
|
169
189
|
class ColumnsTableRow:
|
|
170
|
-
"""Represents a row in the COLUMNS table.
|
|
171
|
-
|
|
172
|
-
|
|
190
|
+
"""Represents a row in the MindsDB's internal INFORMATION_SCHEMA.COLUMNS table.
|
|
191
|
+
This class follows the MySQL-compatible COLUMNS table structure.
|
|
192
|
+
|
|
193
|
+
Detailed field descriptions can be found in MySQL documentation:
|
|
194
|
+
https://dev.mysql.com/doc/refman/8.4/en/information-schema-columns-table.html
|
|
195
|
+
|
|
196
|
+
NOTE: The order of attributes is significant and matches the MySQL column order.
|
|
173
197
|
"""
|
|
174
198
|
TABLE_CATALOG: Literal['def'] = 'def'
|
|
175
199
|
TABLE_SCHEMA: Optional[str] = None
|
|
@@ -192,73 +216,81 @@ class ColumnsTableRow:
|
|
|
192
216
|
PRIVILEGES: str = 'select'
|
|
193
217
|
COLUMN_COMMENT: Optional[str] = None
|
|
194
218
|
GENERATION_EXPRESSION: Optional[str] = None
|
|
219
|
+
SRS_ID: Optional[str] = None
|
|
220
|
+
# MindsDB's specific columns:
|
|
221
|
+
ORIGINAL_TYPE: Optional[str] = None
|
|
222
|
+
|
|
223
|
+
@classmethod
|
|
224
|
+
def from_is_columns_row(cls, table_schema: str, table_name: str, row: pd.Series) -> 'ColumnsTableRow':
|
|
225
|
+
"""Transform row from response of `handler.get_columns(...)` to internal information_schema.columns row.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
table_schema (str): The name of the schema of the table which columns are described.
|
|
229
|
+
table_name (str): The name of the table which columns are described.
|
|
230
|
+
row (pd.Series): A row from the response of `handler.get_columns(...)`.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
ColumnsTableRow: A row in the MindsDB's internal INFORMATION_SCHEMA.COLUMNS table.
|
|
234
|
+
"""
|
|
235
|
+
original_type: str = row[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE] or ''
|
|
236
|
+
data_type: MYSQL_DATA_TYPE | None = row[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE]
|
|
237
|
+
if isinstance(data_type, MYSQL_DATA_TYPE) is False:
|
|
238
|
+
data_type = infer_mysql_type(original_type)
|
|
239
|
+
|
|
240
|
+
# region set default values depend on type
|
|
241
|
+
defaults = MYSQL_DATA_TYPE_COLUMNS_DEFAULT.get(data_type)
|
|
242
|
+
if defaults is not None:
|
|
243
|
+
for key, value in defaults.items():
|
|
244
|
+
if key in row and row[key] is None:
|
|
245
|
+
row[key] = value
|
|
246
|
+
|
|
247
|
+
# region determine COLUMN_TYPE - it is text representation of DATA_TYPE with additioan attributes
|
|
248
|
+
match data_type:
|
|
249
|
+
case MYSQL_DATA_TYPE.DECIMAL:
|
|
250
|
+
column_type = f'decimal({row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION]},{INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE})'
|
|
251
|
+
case MYSQL_DATA_TYPE.VARCHAR:
|
|
252
|
+
column_type = f'varchar({row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH]})'
|
|
253
|
+
case MYSQL_DATA_TYPE.VARBINARY:
|
|
254
|
+
column_type = f'varbinary({row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH]})'
|
|
255
|
+
case MYSQL_DATA_TYPE.BIT | MYSQL_DATA_TYPE.BINARY | MYSQL_DATA_TYPE.CHAR:
|
|
256
|
+
column_type = f'{data_type.value.lower()}(1)'
|
|
257
|
+
case MYSQL_DATA_TYPE.BOOL | MYSQL_DATA_TYPE.BOOLEAN:
|
|
258
|
+
column_type = 'tinyint(1)'
|
|
259
|
+
case _:
|
|
260
|
+
column_type = data_type.value.lower()
|
|
261
|
+
# endregion
|
|
262
|
+
|
|
263
|
+
# BOOLean types had 'tinyint' DATA_TYPE in MySQL
|
|
264
|
+
if data_type in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN):
|
|
265
|
+
data_type = 'tinyint'
|
|
266
|
+
else:
|
|
267
|
+
data_type = data_type.value.lower()
|
|
268
|
+
|
|
269
|
+
return cls(
|
|
270
|
+
TABLE_SCHEMA=table_schema,
|
|
271
|
+
TABLE_NAME=table_name,
|
|
272
|
+
COLUMN_NAME=row[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME],
|
|
273
|
+
ORDINAL_POSITION=row[INF_SCHEMA_COLUMNS_NAMES.ORDINAL_POSITION],
|
|
274
|
+
COLUMN_DEFAULT=row[INF_SCHEMA_COLUMNS_NAMES.COLUMN_DEFAULT],
|
|
275
|
+
IS_NULLABLE=row[INF_SCHEMA_COLUMNS_NAMES.IS_NULLABLE],
|
|
276
|
+
DATA_TYPE=data_type,
|
|
277
|
+
CHARACTER_MAXIMUM_LENGTH=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH],
|
|
278
|
+
CHARACTER_OCTET_LENGTH=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_OCTET_LENGTH],
|
|
279
|
+
NUMERIC_PRECISION=row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION],
|
|
280
|
+
NUMERIC_SCALE=row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE],
|
|
281
|
+
DATETIME_PRECISION=row[INF_SCHEMA_COLUMNS_NAMES.DATETIME_PRECISION],
|
|
282
|
+
CHARACTER_SET_NAME=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_SET_NAME],
|
|
283
|
+
COLLATION_NAME=row[INF_SCHEMA_COLUMNS_NAMES.COLLATION_NAME],
|
|
284
|
+
COLUMN_TYPE=column_type,
|
|
285
|
+
ORIGINAL_TYPE=original_type
|
|
286
|
+
)
|
|
195
287
|
|
|
196
288
|
def __post_init__(self):
|
|
197
|
-
|
|
289
|
+
"""Check if all mandatory fields are filled.
|
|
290
|
+
"""
|
|
198
291
|
mandatory_fields = ['TABLE_SCHEMA', 'TABLE_NAME', 'COLUMN_NAME']
|
|
199
292
|
if any(getattr(self, field_name) is None for field_name in mandatory_fields):
|
|
200
293
|
raise ValueError('One of mandatory fields is missed when creating ColumnsTableRow')
|
|
201
|
-
# endregion
|
|
202
|
-
|
|
203
|
-
# region set default values depend on type
|
|
204
|
-
defaults = {
|
|
205
|
-
'COLUMN_TYPE': self.DATA_TYPE
|
|
206
|
-
}
|
|
207
|
-
if MYSQL_DATA_TYPE(self.DATA_TYPE) in (
|
|
208
|
-
MYSQL_DATA_TYPE.TIMESTAMP,
|
|
209
|
-
MYSQL_DATA_TYPE.DATETIME,
|
|
210
|
-
MYSQL_DATA_TYPE.DATE
|
|
211
|
-
):
|
|
212
|
-
defaults = {
|
|
213
|
-
'DATETIME_PRECISION': 0,
|
|
214
|
-
'COLUMN_TYPE': self.DATA_TYPE
|
|
215
|
-
}
|
|
216
|
-
elif MYSQL_DATA_TYPE(self.DATA_TYPE) in (
|
|
217
|
-
MYSQL_DATA_TYPE.FLOAT,
|
|
218
|
-
MYSQL_DATA_TYPE.DOUBLE,
|
|
219
|
-
MYSQL_DATA_TYPE.DECIMAL
|
|
220
|
-
):
|
|
221
|
-
defaults = {
|
|
222
|
-
'NUMERIC_PRECISION': 12,
|
|
223
|
-
'NUMERIC_SCALE': 0,
|
|
224
|
-
'COLUMN_TYPE': self.DATA_TYPE
|
|
225
|
-
}
|
|
226
|
-
elif MYSQL_DATA_TYPE(self.DATA_TYPE) in (
|
|
227
|
-
MYSQL_DATA_TYPE.TINYINT,
|
|
228
|
-
MYSQL_DATA_TYPE.SMALLINT,
|
|
229
|
-
MYSQL_DATA_TYPE.MEDIUMINT,
|
|
230
|
-
MYSQL_DATA_TYPE.INT,
|
|
231
|
-
MYSQL_DATA_TYPE.BIGINT
|
|
232
|
-
):
|
|
233
|
-
defaults = {
|
|
234
|
-
'NUMERIC_PRECISION': 20,
|
|
235
|
-
'NUMERIC_SCALE': 0,
|
|
236
|
-
'COLUMN_TYPE': self.DATA_TYPE
|
|
237
|
-
}
|
|
238
|
-
elif MYSQL_DATA_TYPE(self.DATA_TYPE) is MYSQL_DATA_TYPE.VARCHAR:
|
|
239
|
-
defaults = {
|
|
240
|
-
'CHARACTER_MAXIMUM_LENGTH': 1024,
|
|
241
|
-
'CHARACTER_OCTET_LENGTH': 3072,
|
|
242
|
-
'CHARACTER_SET_NAME': 'utf8',
|
|
243
|
-
'COLLATION_NAME': 'utf8_bin',
|
|
244
|
-
'COLUMN_TYPE': 'varchar(1024)'
|
|
245
|
-
}
|
|
246
|
-
else:
|
|
247
|
-
# show as MYSQL_DATA_TYPE.TEXT:
|
|
248
|
-
defaults = {
|
|
249
|
-
'CHARACTER_MAXIMUM_LENGTH': 65535, # from https://bugs.mysql.com/bug.php?id=90685
|
|
250
|
-
'CHARACTER_OCTET_LENGTH': 65535, #
|
|
251
|
-
'CHARACTER_SET_NAME': 'utf8',
|
|
252
|
-
'COLLATION_NAME': 'utf8_bin',
|
|
253
|
-
'COLUMN_TYPE': 'text'
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
for key, value in defaults.items():
|
|
257
|
-
setattr(self, key, value)
|
|
258
|
-
|
|
259
|
-
self.DATA_TYPE = self.DATA_TYPE.lower()
|
|
260
|
-
self.COLUMN_TYPE = self.COLUMN_TYPE.lower()
|
|
261
|
-
# endregion
|
|
262
294
|
|
|
263
295
|
|
|
264
296
|
class ColumnsTable(Table):
|
|
@@ -266,9 +298,7 @@ class ColumnsTable(Table):
|
|
|
266
298
|
columns = [field.name for field in fields(ColumnsTableRow)]
|
|
267
299
|
|
|
268
300
|
@classmethod
|
|
269
|
-
def get_data(cls, inf_schema=None, query: ASTNode = None, **kwargs):
|
|
270
|
-
result = []
|
|
271
|
-
|
|
301
|
+
def get_data(cls, inf_schema=None, query: ASTNode = None, **kwargs) -> pd.DataFrame:
|
|
272
302
|
databases, tables_names = _get_scope(query)
|
|
273
303
|
|
|
274
304
|
if databases is None:
|
|
@@ -278,6 +308,7 @@ class ColumnsTable(Table):
|
|
|
278
308
|
'files'
|
|
279
309
|
]
|
|
280
310
|
|
|
311
|
+
result = []
|
|
281
312
|
for db_name in databases:
|
|
282
313
|
tables = {}
|
|
283
314
|
if db_name == 'information_schema':
|
|
@@ -293,43 +324,19 @@ class ColumnsTable(Table):
|
|
|
293
324
|
if tables_names is None:
|
|
294
325
|
tables_names = [t.TABLE_NAME for t in dn.get_tables()]
|
|
295
326
|
for table_name in tables_names:
|
|
296
|
-
tables[table_name] = dn.
|
|
297
|
-
|
|
298
|
-
for table_name,
|
|
299
|
-
for
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
if column_type in ('double precision', 'real', 'numeric', 'float'):
|
|
306
|
-
column_type = MYSQL_DATA_TYPE.FLOAT
|
|
307
|
-
elif column_type in ('integer', 'smallint', 'int', 'bigint'):
|
|
308
|
-
column_type = MYSQL_DATA_TYPE.BIGINT
|
|
309
|
-
elif column_type in (
|
|
310
|
-
'timestamp without time zone',
|
|
311
|
-
'timestamp with time zone',
|
|
312
|
-
'date', 'timestamp'
|
|
313
|
-
):
|
|
314
|
-
column_type = MYSQL_DATA_TYPE.DATETIME
|
|
315
|
-
else:
|
|
316
|
-
column_type = MYSQL_DATA_TYPE.VARCHAR
|
|
317
|
-
# endregion
|
|
318
|
-
|
|
319
|
-
column_row = astuple(
|
|
320
|
-
ColumnsTableRow(
|
|
321
|
-
TABLE_SCHEMA=db_name,
|
|
322
|
-
TABLE_NAME=table_name,
|
|
323
|
-
COLUMN_NAME=column_name,
|
|
324
|
-
DATA_TYPE=column_type.value,
|
|
325
|
-
ORDINAL_POSITION=i
|
|
327
|
+
tables[table_name] = dn.get_table_columns_df(table_name)
|
|
328
|
+
|
|
329
|
+
for table_name, table_columns_df in tables.items():
|
|
330
|
+
for _, row in table_columns_df.iterrows():
|
|
331
|
+
result.append(
|
|
332
|
+
ColumnsTableRow.from_is_columns_row(
|
|
333
|
+
table_schema=db_name,
|
|
334
|
+
table_name=table_name,
|
|
335
|
+
row=row
|
|
326
336
|
)
|
|
327
337
|
)
|
|
328
338
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
df = pd.DataFrame(result, columns=cls.columns)
|
|
332
|
-
return df
|
|
339
|
+
return pd.DataFrame(result, columns=cls.columns)
|
|
333
340
|
|
|
334
341
|
|
|
335
342
|
class EventsTable(Table):
|
|
@@ -656,9 +656,18 @@ class QueryPlanner:
|
|
|
656
656
|
# plan sub-select first
|
|
657
657
|
last_step = self.plan_select(query.from_select, integration=integration_name)
|
|
658
658
|
|
|
659
|
+
# possible knowledge base parameters
|
|
660
|
+
select = query.from_select
|
|
661
|
+
params = {}
|
|
662
|
+
if isinstance(select, Select) and select.using is not None:
|
|
663
|
+
for k, v in select.using.items():
|
|
664
|
+
if k.startswith('kb_'):
|
|
665
|
+
params[k] = v
|
|
666
|
+
|
|
659
667
|
self.plan.add_step(InsertToTable(
|
|
660
668
|
table=table,
|
|
661
669
|
dataframe=last_step,
|
|
670
|
+
params=params,
|
|
662
671
|
))
|
|
663
672
|
else:
|
|
664
673
|
self.plan.add_step(InsertToTable(
|
|
@@ -762,7 +771,7 @@ class QueryPlanner:
|
|
|
762
771
|
elif from_table is None:
|
|
763
772
|
# one line select
|
|
764
773
|
step = QueryStep(query, from_table=pd.DataFrame([None]))
|
|
765
|
-
self.plan.add_step(step)
|
|
774
|
+
return self.plan.add_step(step)
|
|
766
775
|
else:
|
|
767
776
|
raise PlanningException(f'Unsupported from_table {type(from_table)}')
|
|
768
777
|
|
|
@@ -200,7 +200,7 @@ class MultipleSteps(PlanStep):
|
|
|
200
200
|
|
|
201
201
|
|
|
202
202
|
class SaveToTable(PlanStep):
|
|
203
|
-
def __init__(self, table, dataframe, is_replace=False, *args, **kwargs):
|
|
203
|
+
def __init__(self, table, dataframe, is_replace=False, params=None, *args, **kwargs):
|
|
204
204
|
"""
|
|
205
205
|
Creates table if not exists and fills it with content of dataframe
|
|
206
206
|
is_replace - to drop table beforehand
|
|
@@ -209,15 +209,21 @@ class SaveToTable(PlanStep):
|
|
|
209
209
|
self.table = table
|
|
210
210
|
self.dataframe = dataframe
|
|
211
211
|
self.is_replace = is_replace
|
|
212
|
+
if params is None:
|
|
213
|
+
params = {}
|
|
214
|
+
self.params = params
|
|
212
215
|
|
|
213
216
|
|
|
214
217
|
class InsertToTable(PlanStep):
|
|
215
|
-
def __init__(self, table, dataframe=None, query=None, *args, **kwargs):
|
|
218
|
+
def __init__(self, table, dataframe=None, query=None, params=None, *args, **kwargs):
|
|
216
219
|
"""Fills table with content of dataframe"""
|
|
217
220
|
super().__init__(*args, **kwargs)
|
|
218
221
|
self.table = table
|
|
219
222
|
self.dataframe = dataframe
|
|
220
223
|
self.query = query
|
|
224
|
+
if params is None:
|
|
225
|
+
params = {}
|
|
226
|
+
self.params = params
|
|
221
227
|
|
|
222
228
|
|
|
223
229
|
class CreateTableStep(PlanStep):
|
|
@@ -95,8 +95,8 @@ class ApplyPredictorRowStepCall(ApplyPredictorBaseCall):
|
|
|
95
95
|
result = ResultSet()
|
|
96
96
|
result.is_prediction = True
|
|
97
97
|
if len(predictions) == 0:
|
|
98
|
-
|
|
99
|
-
predictions = pd.DataFrame([], columns=
|
|
98
|
+
columns_names = project_datanode.get_table_columns_names(predictor_name)
|
|
99
|
+
predictions = pd.DataFrame([], columns=columns_names)
|
|
100
100
|
|
|
101
101
|
result.from_df(
|
|
102
102
|
predictions,
|
|
@@ -173,10 +173,10 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
173
173
|
|
|
174
174
|
project_datanode = self.session.datahub.get(project_name)
|
|
175
175
|
if len(data) == 0:
|
|
176
|
-
|
|
177
|
-
for
|
|
176
|
+
columns_names = project_datanode.get_table_columns_names(predictor_name) + ['__mindsdb_row_id']
|
|
177
|
+
for column_name in columns_names:
|
|
178
178
|
result.add_column(Column(
|
|
179
|
-
name=
|
|
179
|
+
name=column_name,
|
|
180
180
|
database=table_name[0],
|
|
181
181
|
table_name=table_name[1],
|
|
182
182
|
table_alias=table_name[2]
|
|
@@ -182,7 +182,7 @@ class FetchDataframePartitionCall(BaseStepCall):
|
|
|
182
182
|
thread_count = get_max_thread_count()
|
|
183
183
|
|
|
184
184
|
# 3 tasks per worker during 1 batch
|
|
185
|
-
partition_size = int(run_query.batch_size / thread_count
|
|
185
|
+
partition_size = int(run_query.batch_size / thread_count)
|
|
186
186
|
# min partition size
|
|
187
187
|
if partition_size < 10:
|
|
188
188
|
partition_size = 10
|
|
@@ -20,15 +20,14 @@ class GetPredictorColumnsCall(BaseStepCall):
|
|
|
20
20
|
bind = GetPredictorColumns
|
|
21
21
|
|
|
22
22
|
def call(self, step):
|
|
23
|
-
|
|
24
23
|
mindsdb_database_name = config.get('default_project')
|
|
25
24
|
|
|
26
25
|
predictor_name = step.predictor.parts[-1]
|
|
27
26
|
dn = self.session.datahub.get(mindsdb_database_name)
|
|
28
|
-
|
|
27
|
+
columns_names = dn.get_table_columns_names(predictor_name)
|
|
29
28
|
|
|
30
29
|
data = ResultSet()
|
|
31
|
-
for column_name in
|
|
30
|
+
for column_name in columns_names:
|
|
32
31
|
data.add_column(Column(
|
|
33
32
|
name=column_name,
|
|
34
33
|
table_name=predictor_name,
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from mindsdb.utilities import log
|
|
3
|
+
from mindsdb.utilities.config import Config
|
|
4
|
+
from mindsdb.interfaces.agents.litellm_server import run_server, run_server_async
|
|
5
|
+
|
|
6
|
+
logger = log.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def start_async(verbose=False):
|
|
10
|
+
"""Start the LiteLLM server
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
verbose (bool): Whether to enable verbose logging
|
|
14
|
+
"""
|
|
15
|
+
config = Config()
|
|
16
|
+
|
|
17
|
+
# Get agent name from command line args
|
|
18
|
+
agent_name = config.cmd_args.agent
|
|
19
|
+
if not agent_name:
|
|
20
|
+
logger.error("Agent name is required for LiteLLM server. Use --agent parameter.")
|
|
21
|
+
return 1
|
|
22
|
+
|
|
23
|
+
# Get project name or use default
|
|
24
|
+
project_name = config.cmd_args.project or "mindsdb"
|
|
25
|
+
|
|
26
|
+
# Get MCP server connection details
|
|
27
|
+
mcp_host = config.get('api', {}).get('mcp', {}).get('host', '127.0.0.1')
|
|
28
|
+
mcp_port = int(config.get('api', {}).get('mcp', {}).get('port', 47337))
|
|
29
|
+
|
|
30
|
+
# Get LiteLLM server settings
|
|
31
|
+
litellm_host = config.get('api', {}).get('litellm', {}).get('host', '0.0.0.0')
|
|
32
|
+
litellm_port = int(config.get('api', {}).get('litellm', {}).get('port', 8000))
|
|
33
|
+
|
|
34
|
+
logger.info(f"Starting LiteLLM server for agent '{agent_name}' in project '{project_name}'")
|
|
35
|
+
logger.info(f"Connecting to MCP server at {mcp_host}:{mcp_port}")
|
|
36
|
+
logger.info(f"Binding to {litellm_host}:{litellm_port}")
|
|
37
|
+
|
|
38
|
+
return await run_server_async(
|
|
39
|
+
agent_name=agent_name,
|
|
40
|
+
project_name=project_name,
|
|
41
|
+
mcp_host=mcp_host,
|
|
42
|
+
mcp_port=mcp_port,
|
|
43
|
+
host=litellm_host,
|
|
44
|
+
port=litellm_port
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def start(verbose=False):
|
|
49
|
+
"""Start the LiteLLM server (synchronous wrapper)
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
verbose (bool): Whether to enable verbose logging
|
|
53
|
+
"""
|
|
54
|
+
from mindsdb.interfaces.storage import db
|
|
55
|
+
db.init()
|
|
56
|
+
|
|
57
|
+
# Run the async function in the event loop
|
|
58
|
+
loop = asyncio.new_event_loop()
|
|
59
|
+
asyncio.set_event_loop(loop)
|
|
60
|
+
result = loop.run_until_complete(start_async(verbose))
|
|
61
|
+
|
|
62
|
+
if result == 0:
|
|
63
|
+
# Run the server
|
|
64
|
+
config = Config()
|
|
65
|
+
agent_name = config.cmd_args.agent
|
|
66
|
+
project_name = config.cmd_args.project or "mindsdb"
|
|
67
|
+
mcp_host = config.get('api', {}).get('mcp', {}).get('host', '127.0.0.1')
|
|
68
|
+
mcp_port = int(config.get('api', {}).get('mcp', {}).get('port', 47337))
|
|
69
|
+
litellm_host = config.get('api', {}).get('litellm', {}).get('host', '0.0.0.0')
|
|
70
|
+
litellm_port = int(config.get('api', {}).get('litellm', {}).get('port', 8000))
|
|
71
|
+
|
|
72
|
+
return run_server(
|
|
73
|
+
agent_name=agent_name,
|
|
74
|
+
project_name=project_name,
|
|
75
|
+
mcp_host=mcp_host,
|
|
76
|
+
mcp_port=mcp_port,
|
|
77
|
+
host=litellm_host,
|
|
78
|
+
port=litellm_port
|
|
79
|
+
)
|
|
80
|
+
else:
|
|
81
|
+
logger.error("LiteLLM server initialization failed")
|
|
82
|
+
return result
|
|
@@ -179,6 +179,139 @@ class MYSQL_DATA_TYPE(enum.Enum):
|
|
|
179
179
|
BOOLEAN = 'BOOLEAN'
|
|
180
180
|
|
|
181
181
|
|
|
182
|
+
# Default values for attributes of MySQL data types as they appear in information_schema.columns
|
|
183
|
+
# These values match the MySQL v8.0.37 defaults and are used to properly represent column metadata
|
|
184
|
+
MYSQL_DATA_TYPE_COLUMNS_DEFAULT = {
|
|
185
|
+
MYSQL_DATA_TYPE.TINYINT: {
|
|
186
|
+
'NUMERIC_PRECISION': 3,
|
|
187
|
+
'NUMERIC_SCALE': 0
|
|
188
|
+
},
|
|
189
|
+
MYSQL_DATA_TYPE.SMALLINT: {
|
|
190
|
+
'NUMERIC_PRECISION': 5,
|
|
191
|
+
'NUMERIC_SCALE': 0
|
|
192
|
+
},
|
|
193
|
+
MYSQL_DATA_TYPE.MEDIUMINT: {
|
|
194
|
+
'NUMERIC_PRECISION': 7,
|
|
195
|
+
'NUMERIC_SCALE': 0
|
|
196
|
+
},
|
|
197
|
+
MYSQL_DATA_TYPE.INT: {
|
|
198
|
+
'NUMERIC_PRECISION': 10,
|
|
199
|
+
'NUMERIC_SCALE': 0
|
|
200
|
+
},
|
|
201
|
+
MYSQL_DATA_TYPE.BIGINT: {
|
|
202
|
+
'NUMERIC_PRECISION': 19,
|
|
203
|
+
'NUMERIC_SCALE': 0
|
|
204
|
+
},
|
|
205
|
+
MYSQL_DATA_TYPE.FLOAT: {
|
|
206
|
+
'NUMERIC_PRECISION': 12
|
|
207
|
+
},
|
|
208
|
+
MYSQL_DATA_TYPE.DOUBLE: {
|
|
209
|
+
'NUMERIC_PRECISION': 22
|
|
210
|
+
},
|
|
211
|
+
MYSQL_DATA_TYPE.DECIMAL: {
|
|
212
|
+
'NUMERIC_PRECISION': 10,
|
|
213
|
+
'NUMERIC_SCALE': 0,
|
|
214
|
+
'COLUMN_TYPE': 'decimal(10,0)'
|
|
215
|
+
},
|
|
216
|
+
MYSQL_DATA_TYPE.YEAR: {
|
|
217
|
+
# every column is null
|
|
218
|
+
},
|
|
219
|
+
MYSQL_DATA_TYPE.TIME: {
|
|
220
|
+
'DATETIME_PRECISION': 0
|
|
221
|
+
},
|
|
222
|
+
MYSQL_DATA_TYPE.DATE: {
|
|
223
|
+
# every column is null
|
|
224
|
+
},
|
|
225
|
+
MYSQL_DATA_TYPE.DATETIME: {
|
|
226
|
+
'DATETIME_PRECISION': 0
|
|
227
|
+
},
|
|
228
|
+
MYSQL_DATA_TYPE.TIMESTAMP: {
|
|
229
|
+
'DATETIME_PRECISION': 0
|
|
230
|
+
},
|
|
231
|
+
MYSQL_DATA_TYPE.CHAR: {
|
|
232
|
+
'CHARACTER_MAXIMUM_LENGTH': 1,
|
|
233
|
+
'CHARACTER_OCTET_LENGTH': 4,
|
|
234
|
+
'CHARACTER_SET_NAME': 'utf8',
|
|
235
|
+
'COLLATION_NAME': 'utf8_bin',
|
|
236
|
+
'COLUMN_TYPE': 'char(1)'
|
|
237
|
+
},
|
|
238
|
+
MYSQL_DATA_TYPE.BINARY: {
|
|
239
|
+
'CHARACTER_MAXIMUM_LENGTH': 1,
|
|
240
|
+
'CHARACTER_OCTET_LENGTH': 1,
|
|
241
|
+
'COLUMN_TYPE': 'binary(1)'
|
|
242
|
+
},
|
|
243
|
+
MYSQL_DATA_TYPE.VARCHAR: {
|
|
244
|
+
'CHARACTER_MAXIMUM_LENGTH': 1024, # NOTE mandatory for field creation
|
|
245
|
+
'CHARACTER_OCTET_LENGTH': 4096, # NOTE mandatory for field creation
|
|
246
|
+
'CHARACTER_SET_NAME': 'utf8',
|
|
247
|
+
'COLLATION_NAME': 'utf8_bin',
|
|
248
|
+
'COLUMN_TYPE': 'varchar(1024)'
|
|
249
|
+
},
|
|
250
|
+
MYSQL_DATA_TYPE.VARBINARY: {
|
|
251
|
+
'CHARACTER_MAXIMUM_LENGTH': 1024, # NOTE mandatory for field creation
|
|
252
|
+
'CHARACTER_OCTET_LENGTH': 1024, # NOTE mandatory for field creation
|
|
253
|
+
'COLUMN_TYPE': 'varbinary(1024)'
|
|
254
|
+
},
|
|
255
|
+
MYSQL_DATA_TYPE.TINYBLOB: {
|
|
256
|
+
'CHARACTER_MAXIMUM_LENGTH': 255,
|
|
257
|
+
'CHARACTER_OCTET_LENGTH': 255
|
|
258
|
+
},
|
|
259
|
+
MYSQL_DATA_TYPE.TINYTEXT: {
|
|
260
|
+
'CHARACTER_MAXIMUM_LENGTH': 255,
|
|
261
|
+
'CHARACTER_OCTET_LENGTH': 255,
|
|
262
|
+
'CHARACTER_SET_NAME': 'utf8',
|
|
263
|
+
'COLLATION_NAME': 'utf8_bin'
|
|
264
|
+
},
|
|
265
|
+
MYSQL_DATA_TYPE.BLOB: {
|
|
266
|
+
'CHARACTER_MAXIMUM_LENGTH': 65535,
|
|
267
|
+
'CHARACTER_OCTET_LENGTH': 65535
|
|
268
|
+
},
|
|
269
|
+
MYSQL_DATA_TYPE.TEXT: {
|
|
270
|
+
'CHARACTER_MAXIMUM_LENGTH': 65535,
|
|
271
|
+
'CHARACTER_OCTET_LENGTH': 65535,
|
|
272
|
+
'CHARACTER_SET_NAME': 'utf8',
|
|
273
|
+
'COLLATION_NAME': 'utf8_bin'
|
|
274
|
+
},
|
|
275
|
+
MYSQL_DATA_TYPE.MEDIUMBLOB: {
|
|
276
|
+
'CHARACTER_MAXIMUM_LENGTH': 16777215,
|
|
277
|
+
'CHARACTER_OCTET_LENGTH': 16777215
|
|
278
|
+
},
|
|
279
|
+
MYSQL_DATA_TYPE.MEDIUMTEXT: {
|
|
280
|
+
'CHARACTER_MAXIMUM_LENGTH': 16777215,
|
|
281
|
+
'CHARACTER_OCTET_LENGTH': 16777215,
|
|
282
|
+
'CHARACTER_SET_NAME': 'utf8',
|
|
283
|
+
'COLLATION_NAME': 'utf8_bin'
|
|
284
|
+
},
|
|
285
|
+
MYSQL_DATA_TYPE.LONGBLOB: {
|
|
286
|
+
'CHARACTER_MAXIMUM_LENGTH': 4294967295,
|
|
287
|
+
'CHARACTER_OCTET_LENGTH': 4294967295,
|
|
288
|
+
},
|
|
289
|
+
MYSQL_DATA_TYPE.LONGTEXT: {
|
|
290
|
+
'CHARACTER_MAXIMUM_LENGTH': 4294967295,
|
|
291
|
+
'CHARACTER_OCTET_LENGTH': 4294967295,
|
|
292
|
+
'CHARACTER_SET_NAME': 'utf8',
|
|
293
|
+
'COLLATION_NAME': 'utf8_bin'
|
|
294
|
+
},
|
|
295
|
+
MYSQL_DATA_TYPE.BIT: {
|
|
296
|
+
'NUMERIC_PRECISION': 1,
|
|
297
|
+
'COLUMN_TYPE': 'bit(1)'
|
|
298
|
+
# 'NUMERIC_SCALE': null
|
|
299
|
+
},
|
|
300
|
+
MYSQL_DATA_TYPE.BOOL: {
|
|
301
|
+
'DATA_TYPE': 'tinyint',
|
|
302
|
+
'NUMERIC_PRECISION': 3,
|
|
303
|
+
'NUMERIC_SCALE': 0,
|
|
304
|
+
'COLUMN_TYPE': 'tinyint(1)'
|
|
305
|
+
},
|
|
306
|
+
MYSQL_DATA_TYPE.BOOLEAN: {
|
|
307
|
+
'DATA_TYPE': 'tinyint',
|
|
308
|
+
'NUMERIC_PRECISION': 3,
|
|
309
|
+
'NUMERIC_SCALE': 0,
|
|
310
|
+
'COLUMN_TYPE': 'tinyint(1)'
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
|
|
182
315
|
# Map between data types and C types
|
|
183
316
|
# https://dev.mysql.com/doc/c-api/8.0/en/c-api-prepared-statement-type-codes.html
|
|
184
317
|
DATA_C_TYPE_MAP = {
|
|
@@ -68,6 +68,10 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
68
68
|
"persist_directory": self.persist_directory,
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
+
self.create_collection_metadata = {
|
|
72
|
+
"hnsw:space": config.distance,
|
|
73
|
+
}
|
|
74
|
+
|
|
71
75
|
self._use_handler_storage = False
|
|
72
76
|
|
|
73
77
|
self.connect()
|
|
@@ -398,7 +402,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
398
402
|
Insert/Upsert data into ChromaDB collection.
|
|
399
403
|
If records with same IDs exist, they will be updated.
|
|
400
404
|
"""
|
|
401
|
-
collection = self._client.get_or_create_collection(collection_name)
|
|
405
|
+
collection = self._client.get_or_create_collection(collection_name, metadata=self.create_collection_metadata)
|
|
402
406
|
|
|
403
407
|
# Convert metadata from string to dict if needed
|
|
404
408
|
if TableField.METADATA.value in df.columns:
|
|
@@ -484,7 +488,8 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
484
488
|
"""
|
|
485
489
|
Create a collection with the given name in the ChromaDB database.
|
|
486
490
|
"""
|
|
487
|
-
self._client.create_collection(table_name, get_or_create=if_not_exists
|
|
491
|
+
self._client.create_collection(table_name, get_or_create=if_not_exists,
|
|
492
|
+
metadata=self.create_collection_metadata)
|
|
488
493
|
self._sync()
|
|
489
494
|
|
|
490
495
|
def drop_table(self, table_name: str, if_exists=True):
|