MindsDB 25.4.3.2__py3-none-any.whl → 25.4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (43) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +18 -4
  3. mindsdb/api/executor/data_types/response_type.py +1 -0
  4. mindsdb/api/executor/datahub/classes/tables_row.py +3 -10
  5. mindsdb/api/executor/datahub/datanodes/datanode.py +7 -2
  6. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +44 -10
  7. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +57 -38
  8. mindsdb/api/executor/datahub/datanodes/project_datanode.py +39 -7
  9. mindsdb/api/executor/datahub/datanodes/system_tables.py +116 -109
  10. mindsdb/api/executor/planner/query_planner.py +10 -1
  11. mindsdb/api/executor/planner/steps.py +8 -2
  12. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +5 -5
  13. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +1 -1
  14. mindsdb/api/executor/sql_query/steps/insert_step.py +2 -1
  15. mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -3
  16. mindsdb/api/litellm/start.py +82 -0
  17. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +133 -0
  18. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +7 -2
  19. mindsdb/integrations/handlers/chromadb_handler/settings.py +1 -0
  20. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +13 -4
  21. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +14 -5
  22. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +14 -4
  23. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +34 -19
  24. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +21 -18
  25. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +14 -4
  26. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +1 -1
  27. mindsdb/integrations/libs/response.py +80 -32
  28. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +208 -13
  29. mindsdb/interfaces/agents/litellm_server.py +345 -0
  30. mindsdb/interfaces/agents/mcp_client_agent.py +252 -0
  31. mindsdb/interfaces/agents/run_mcp_agent.py +205 -0
  32. mindsdb/interfaces/knowledge_base/controller.py +17 -7
  33. mindsdb/interfaces/skills/skill_tool.py +7 -1
  34. mindsdb/interfaces/skills/sql_agent.py +8 -3
  35. mindsdb/utilities/config.py +8 -1
  36. mindsdb/utilities/starters.py +7 -0
  37. {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.4.0.dist-info}/METADATA +225 -223
  38. {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.4.0.dist-info}/RECORD +42 -39
  39. {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.4.0.dist-info}/WHEEL +1 -1
  40. mindsdb/integrations/handlers/snowflake_handler/tests/test_snowflake_handler.py +0 -230
  41. /mindsdb/{integrations/handlers/snowflake_handler/tests → api/litellm}/__init__.py +0 -0
  42. {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.4.0.dist-info}/licenses/LICENSE +0 -0
  43. {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.4.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  from typing import Optional, Literal
2
- from dataclasses import dataclass, astuple, fields
2
+ from dataclasses import dataclass, fields
3
3
 
4
4
  import pandas as pd
5
5
  from mindsdb_sql_parser.ast.base import ASTNode
@@ -7,11 +7,10 @@ from mindsdb_sql_parser.ast.base import ASTNode
7
7
  from mindsdb.utilities import log
8
8
  from mindsdb.utilities.config import config
9
9
  from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
10
- from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
11
- from mindsdb.api.executor.datahub.classes.tables_row import (
12
- TABLES_ROW_TYPE,
13
- TablesRow,
14
- )
10
+ from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
11
+ from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE, MYSQL_DATA_TYPE_COLUMNS_DEFAULT
12
+ from mindsdb.api.executor.datahub.classes.tables_row import TABLES_ROW_TYPE, TablesRow
13
+
15
14
 
16
15
  logger = log.getLogger(__name__)
17
16
 
@@ -165,11 +164,36 @@ class TablesTable(Table):
165
164
  return df
166
165
 
167
166
 
168
- @dataclass
167
+ def infer_mysql_type(original_type: str) -> MYSQL_DATA_TYPE:
168
+ """Infer MySQL data type from original type string from a database.
169
+
170
+ Args:
171
+ original_type (str): The original type string from a database.
172
+
173
+ Returns:
174
+ MYSQL_DATA_TYPE: The inferred MySQL data type.
175
+ """
176
+ match original_type.lower():
177
+ case 'double precision' | 'real' | 'numeric' | 'float':
178
+ data_type = MYSQL_DATA_TYPE.FLOAT
179
+ case 'integer' | 'smallint' | 'int' | 'bigint':
180
+ data_type = MYSQL_DATA_TYPE.BIGINT
181
+ case 'timestamp without time zone' | 'timestamp with time zone' | 'date' | 'timestamp':
182
+ data_type = MYSQL_DATA_TYPE.DATETIME
183
+ case _:
184
+ data_type = MYSQL_DATA_TYPE.VARCHAR
185
+ return data_type
186
+
187
+
188
+ @dataclass(slots=True, kw_only=True)
169
189
  class ColumnsTableRow:
170
- """Represents a row in the COLUMNS table.
171
- Fields description: https://dev.mysql.com/doc/refman/8.4/en/information-schema-columns-table.html
172
- NOTE: attrs order matter, don't change it.
190
+ """Represents a row in the MindsDB's internal INFORMATION_SCHEMA.COLUMNS table.
191
+ This class follows the MySQL-compatible COLUMNS table structure.
192
+
193
+ Detailed field descriptions can be found in MySQL documentation:
194
+ https://dev.mysql.com/doc/refman/8.4/en/information-schema-columns-table.html
195
+
196
+ NOTE: The order of attributes is significant and matches the MySQL column order.
173
197
  """
174
198
  TABLE_CATALOG: Literal['def'] = 'def'
175
199
  TABLE_SCHEMA: Optional[str] = None
@@ -192,73 +216,81 @@ class ColumnsTableRow:
192
216
  PRIVILEGES: str = 'select'
193
217
  COLUMN_COMMENT: Optional[str] = None
194
218
  GENERATION_EXPRESSION: Optional[str] = None
219
+ SRS_ID: Optional[str] = None
220
+ # MindsDB's specific columns:
221
+ ORIGINAL_TYPE: Optional[str] = None
222
+
223
+ @classmethod
224
+ def from_is_columns_row(cls, table_schema: str, table_name: str, row: pd.Series) -> 'ColumnsTableRow':
225
+ """Transform row from response of `handler.get_columns(...)` to internal information_schema.columns row.
226
+
227
+ Args:
228
+ table_schema (str): The name of the schema of the table which columns are described.
229
+ table_name (str): The name of the table which columns are described.
230
+ row (pd.Series): A row from the response of `handler.get_columns(...)`.
231
+
232
+ Returns:
233
+ ColumnsTableRow: A row in the MindsDB's internal INFORMATION_SCHEMA.COLUMNS table.
234
+ """
235
+ original_type: str = row[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE] or ''
236
+ data_type: MYSQL_DATA_TYPE | None = row[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE]
237
+ if isinstance(data_type, MYSQL_DATA_TYPE) is False:
238
+ data_type = infer_mysql_type(original_type)
239
+
240
+ # region set default values depend on type
241
+ defaults = MYSQL_DATA_TYPE_COLUMNS_DEFAULT.get(data_type)
242
+ if defaults is not None:
243
+ for key, value in defaults.items():
244
+ if key in row and row[key] is None:
245
+ row[key] = value
246
+
247
+ # region determine COLUMN_TYPE - it is text representation of DATA_TYPE with additioan attributes
248
+ match data_type:
249
+ case MYSQL_DATA_TYPE.DECIMAL:
250
+ column_type = f'decimal({row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION]},{INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE})'
251
+ case MYSQL_DATA_TYPE.VARCHAR:
252
+ column_type = f'varchar({row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH]})'
253
+ case MYSQL_DATA_TYPE.VARBINARY:
254
+ column_type = f'varbinary({row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH]})'
255
+ case MYSQL_DATA_TYPE.BIT | MYSQL_DATA_TYPE.BINARY | MYSQL_DATA_TYPE.CHAR:
256
+ column_type = f'{data_type.value.lower()}(1)'
257
+ case MYSQL_DATA_TYPE.BOOL | MYSQL_DATA_TYPE.BOOLEAN:
258
+ column_type = 'tinyint(1)'
259
+ case _:
260
+ column_type = data_type.value.lower()
261
+ # endregion
262
+
263
+ # BOOLean types had 'tinyint' DATA_TYPE in MySQL
264
+ if data_type in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN):
265
+ data_type = 'tinyint'
266
+ else:
267
+ data_type = data_type.value.lower()
268
+
269
+ return cls(
270
+ TABLE_SCHEMA=table_schema,
271
+ TABLE_NAME=table_name,
272
+ COLUMN_NAME=row[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME],
273
+ ORDINAL_POSITION=row[INF_SCHEMA_COLUMNS_NAMES.ORDINAL_POSITION],
274
+ COLUMN_DEFAULT=row[INF_SCHEMA_COLUMNS_NAMES.COLUMN_DEFAULT],
275
+ IS_NULLABLE=row[INF_SCHEMA_COLUMNS_NAMES.IS_NULLABLE],
276
+ DATA_TYPE=data_type,
277
+ CHARACTER_MAXIMUM_LENGTH=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH],
278
+ CHARACTER_OCTET_LENGTH=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_OCTET_LENGTH],
279
+ NUMERIC_PRECISION=row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION],
280
+ NUMERIC_SCALE=row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE],
281
+ DATETIME_PRECISION=row[INF_SCHEMA_COLUMNS_NAMES.DATETIME_PRECISION],
282
+ CHARACTER_SET_NAME=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_SET_NAME],
283
+ COLLATION_NAME=row[INF_SCHEMA_COLUMNS_NAMES.COLLATION_NAME],
284
+ COLUMN_TYPE=column_type,
285
+ ORIGINAL_TYPE=original_type
286
+ )
195
287
 
196
288
  def __post_init__(self):
197
- # region check mandatory fields
289
+ """Check if all mandatory fields are filled.
290
+ """
198
291
  mandatory_fields = ['TABLE_SCHEMA', 'TABLE_NAME', 'COLUMN_NAME']
199
292
  if any(getattr(self, field_name) is None for field_name in mandatory_fields):
200
293
  raise ValueError('One of mandatory fields is missed when creating ColumnsTableRow')
201
- # endregion
202
-
203
- # region set default values depend on type
204
- defaults = {
205
- 'COLUMN_TYPE': self.DATA_TYPE
206
- }
207
- if MYSQL_DATA_TYPE(self.DATA_TYPE) in (
208
- MYSQL_DATA_TYPE.TIMESTAMP,
209
- MYSQL_DATA_TYPE.DATETIME,
210
- MYSQL_DATA_TYPE.DATE
211
- ):
212
- defaults = {
213
- 'DATETIME_PRECISION': 0,
214
- 'COLUMN_TYPE': self.DATA_TYPE
215
- }
216
- elif MYSQL_DATA_TYPE(self.DATA_TYPE) in (
217
- MYSQL_DATA_TYPE.FLOAT,
218
- MYSQL_DATA_TYPE.DOUBLE,
219
- MYSQL_DATA_TYPE.DECIMAL
220
- ):
221
- defaults = {
222
- 'NUMERIC_PRECISION': 12,
223
- 'NUMERIC_SCALE': 0,
224
- 'COLUMN_TYPE': self.DATA_TYPE
225
- }
226
- elif MYSQL_DATA_TYPE(self.DATA_TYPE) in (
227
- MYSQL_DATA_TYPE.TINYINT,
228
- MYSQL_DATA_TYPE.SMALLINT,
229
- MYSQL_DATA_TYPE.MEDIUMINT,
230
- MYSQL_DATA_TYPE.INT,
231
- MYSQL_DATA_TYPE.BIGINT
232
- ):
233
- defaults = {
234
- 'NUMERIC_PRECISION': 20,
235
- 'NUMERIC_SCALE': 0,
236
- 'COLUMN_TYPE': self.DATA_TYPE
237
- }
238
- elif MYSQL_DATA_TYPE(self.DATA_TYPE) is MYSQL_DATA_TYPE.VARCHAR:
239
- defaults = {
240
- 'CHARACTER_MAXIMUM_LENGTH': 1024,
241
- 'CHARACTER_OCTET_LENGTH': 3072,
242
- 'CHARACTER_SET_NAME': 'utf8',
243
- 'COLLATION_NAME': 'utf8_bin',
244
- 'COLUMN_TYPE': 'varchar(1024)'
245
- }
246
- else:
247
- # show as MYSQL_DATA_TYPE.TEXT:
248
- defaults = {
249
- 'CHARACTER_MAXIMUM_LENGTH': 65535, # from https://bugs.mysql.com/bug.php?id=90685
250
- 'CHARACTER_OCTET_LENGTH': 65535, #
251
- 'CHARACTER_SET_NAME': 'utf8',
252
- 'COLLATION_NAME': 'utf8_bin',
253
- 'COLUMN_TYPE': 'text'
254
- }
255
-
256
- for key, value in defaults.items():
257
- setattr(self, key, value)
258
-
259
- self.DATA_TYPE = self.DATA_TYPE.lower()
260
- self.COLUMN_TYPE = self.COLUMN_TYPE.lower()
261
- # endregion
262
294
 
263
295
 
264
296
  class ColumnsTable(Table):
@@ -266,9 +298,7 @@ class ColumnsTable(Table):
266
298
  columns = [field.name for field in fields(ColumnsTableRow)]
267
299
 
268
300
  @classmethod
269
- def get_data(cls, inf_schema=None, query: ASTNode = None, **kwargs):
270
- result = []
271
-
301
+ def get_data(cls, inf_schema=None, query: ASTNode = None, **kwargs) -> pd.DataFrame:
272
302
  databases, tables_names = _get_scope(query)
273
303
 
274
304
  if databases is None:
@@ -278,6 +308,7 @@ class ColumnsTable(Table):
278
308
  'files'
279
309
  ]
280
310
 
311
+ result = []
281
312
  for db_name in databases:
282
313
  tables = {}
283
314
  if db_name == 'information_schema':
@@ -293,43 +324,19 @@ class ColumnsTable(Table):
293
324
  if tables_names is None:
294
325
  tables_names = [t.TABLE_NAME for t in dn.get_tables()]
295
326
  for table_name in tables_names:
296
- tables[table_name] = dn.get_table_columns(table_name)
297
-
298
- for table_name, table_columns in tables.items():
299
- for i, column in enumerate(table_columns):
300
- column_name = column['name']
301
- column_type = column.get('type', 'text')
302
-
303
- # region infer type
304
- if isinstance(column_type, MYSQL_DATA_TYPE) is False:
305
- if column_type in ('double precision', 'real', 'numeric', 'float'):
306
- column_type = MYSQL_DATA_TYPE.FLOAT
307
- elif column_type in ('integer', 'smallint', 'int', 'bigint'):
308
- column_type = MYSQL_DATA_TYPE.BIGINT
309
- elif column_type in (
310
- 'timestamp without time zone',
311
- 'timestamp with time zone',
312
- 'date', 'timestamp'
313
- ):
314
- column_type = MYSQL_DATA_TYPE.DATETIME
315
- else:
316
- column_type = MYSQL_DATA_TYPE.VARCHAR
317
- # endregion
318
-
319
- column_row = astuple(
320
- ColumnsTableRow(
321
- TABLE_SCHEMA=db_name,
322
- TABLE_NAME=table_name,
323
- COLUMN_NAME=column_name,
324
- DATA_TYPE=column_type.value,
325
- ORDINAL_POSITION=i
327
+ tables[table_name] = dn.get_table_columns_df(table_name)
328
+
329
+ for table_name, table_columns_df in tables.items():
330
+ for _, row in table_columns_df.iterrows():
331
+ result.append(
332
+ ColumnsTableRow.from_is_columns_row(
333
+ table_schema=db_name,
334
+ table_name=table_name,
335
+ row=row
326
336
  )
327
337
  )
328
338
 
329
- result.append(column_row)
330
-
331
- df = pd.DataFrame(result, columns=cls.columns)
332
- return df
339
+ return pd.DataFrame(result, columns=cls.columns)
333
340
 
334
341
 
335
342
  class EventsTable(Table):
@@ -656,9 +656,18 @@ class QueryPlanner:
656
656
  # plan sub-select first
657
657
  last_step = self.plan_select(query.from_select, integration=integration_name)
658
658
 
659
+ # possible knowledge base parameters
660
+ select = query.from_select
661
+ params = {}
662
+ if isinstance(select, Select) and select.using is not None:
663
+ for k, v in select.using.items():
664
+ if k.startswith('kb_'):
665
+ params[k] = v
666
+
659
667
  self.plan.add_step(InsertToTable(
660
668
  table=table,
661
669
  dataframe=last_step,
670
+ params=params,
662
671
  ))
663
672
  else:
664
673
  self.plan.add_step(InsertToTable(
@@ -762,7 +771,7 @@ class QueryPlanner:
762
771
  elif from_table is None:
763
772
  # one line select
764
773
  step = QueryStep(query, from_table=pd.DataFrame([None]))
765
- self.plan.add_step(step)
774
+ return self.plan.add_step(step)
766
775
  else:
767
776
  raise PlanningException(f'Unsupported from_table {type(from_table)}')
768
777
 
@@ -200,7 +200,7 @@ class MultipleSteps(PlanStep):
200
200
 
201
201
 
202
202
  class SaveToTable(PlanStep):
203
- def __init__(self, table, dataframe, is_replace=False, *args, **kwargs):
203
+ def __init__(self, table, dataframe, is_replace=False, params=None, *args, **kwargs):
204
204
  """
205
205
  Creates table if not exists and fills it with content of dataframe
206
206
  is_replace - to drop table beforehand
@@ -209,15 +209,21 @@ class SaveToTable(PlanStep):
209
209
  self.table = table
210
210
  self.dataframe = dataframe
211
211
  self.is_replace = is_replace
212
+ if params is None:
213
+ params = {}
214
+ self.params = params
212
215
 
213
216
 
214
217
  class InsertToTable(PlanStep):
215
- def __init__(self, table, dataframe=None, query=None, *args, **kwargs):
218
+ def __init__(self, table, dataframe=None, query=None, params=None, *args, **kwargs):
216
219
  """Fills table with content of dataframe"""
217
220
  super().__init__(*args, **kwargs)
218
221
  self.table = table
219
222
  self.dataframe = dataframe
220
223
  self.query = query
224
+ if params is None:
225
+ params = {}
226
+ self.params = params
221
227
 
222
228
 
223
229
  class CreateTableStep(PlanStep):
@@ -95,8 +95,8 @@ class ApplyPredictorRowStepCall(ApplyPredictorBaseCall):
95
95
  result = ResultSet()
96
96
  result.is_prediction = True
97
97
  if len(predictions) == 0:
98
- columns = [col['name'] for col in project_datanode.get_table_columns(predictor_name)]
99
- predictions = pd.DataFrame([], columns=columns)
98
+ columns_names = project_datanode.get_table_columns_names(predictor_name)
99
+ predictions = pd.DataFrame([], columns=columns_names)
100
100
 
101
101
  result.from_df(
102
102
  predictions,
@@ -173,10 +173,10 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
173
173
 
174
174
  project_datanode = self.session.datahub.get(project_name)
175
175
  if len(data) == 0:
176
- cols = [col['name'] for col in project_datanode.get_table_columns(predictor_name)] + ['__mindsdb_row_id']
177
- for col in cols:
176
+ columns_names = project_datanode.get_table_columns_names(predictor_name) + ['__mindsdb_row_id']
177
+ for column_name in columns_names:
178
178
  result.add_column(Column(
179
- name=col,
179
+ name=column_name,
180
180
  database=table_name[0],
181
181
  table_name=table_name[1],
182
182
  table_alias=table_name[2]
@@ -182,7 +182,7 @@ class FetchDataframePartitionCall(BaseStepCall):
182
182
  thread_count = get_max_thread_count()
183
183
 
184
184
  # 3 tasks per worker during 1 batch
185
- partition_size = int(run_query.batch_size / thread_count / 3)
185
+ partition_size = int(run_query.batch_size / thread_count)
186
186
  # min partition size
187
187
  if partition_size < 10:
188
188
  partition_size = 10
@@ -95,7 +95,8 @@ class InsertToTableCall(BaseStepCall):
95
95
  table_name=table_name,
96
96
  result_set=data,
97
97
  is_replace=is_replace,
98
- is_create=is_create
98
+ is_create=is_create,
99
+ params=step.params
99
100
  )
100
101
  return ResultSet(affected_rows=response.affected_rows)
101
102
 
@@ -20,15 +20,14 @@ class GetPredictorColumnsCall(BaseStepCall):
20
20
  bind = GetPredictorColumns
21
21
 
22
22
  def call(self, step):
23
-
24
23
  mindsdb_database_name = config.get('default_project')
25
24
 
26
25
  predictor_name = step.predictor.parts[-1]
27
26
  dn = self.session.datahub.get(mindsdb_database_name)
28
- columns = [col['name'] for col in dn.get_table_columns(predictor_name)]
27
+ columns_names = dn.get_table_columns_names(predictor_name)
29
28
 
30
29
  data = ResultSet()
31
- for column_name in columns:
30
+ for column_name in columns_names:
32
31
  data.add_column(Column(
33
32
  name=column_name,
34
33
  table_name=predictor_name,
@@ -0,0 +1,82 @@
1
+ import asyncio
2
+ from mindsdb.utilities import log
3
+ from mindsdb.utilities.config import Config
4
+ from mindsdb.interfaces.agents.litellm_server import run_server, run_server_async
5
+
6
+ logger = log.getLogger(__name__)
7
+
8
+
9
+ async def start_async(verbose=False):
10
+ """Start the LiteLLM server
11
+
12
+ Args:
13
+ verbose (bool): Whether to enable verbose logging
14
+ """
15
+ config = Config()
16
+
17
+ # Get agent name from command line args
18
+ agent_name = config.cmd_args.agent
19
+ if not agent_name:
20
+ logger.error("Agent name is required for LiteLLM server. Use --agent parameter.")
21
+ return 1
22
+
23
+ # Get project name or use default
24
+ project_name = config.cmd_args.project or "mindsdb"
25
+
26
+ # Get MCP server connection details
27
+ mcp_host = config.get('api', {}).get('mcp', {}).get('host', '127.0.0.1')
28
+ mcp_port = int(config.get('api', {}).get('mcp', {}).get('port', 47337))
29
+
30
+ # Get LiteLLM server settings
31
+ litellm_host = config.get('api', {}).get('litellm', {}).get('host', '0.0.0.0')
32
+ litellm_port = int(config.get('api', {}).get('litellm', {}).get('port', 8000))
33
+
34
+ logger.info(f"Starting LiteLLM server for agent '{agent_name}' in project '{project_name}'")
35
+ logger.info(f"Connecting to MCP server at {mcp_host}:{mcp_port}")
36
+ logger.info(f"Binding to {litellm_host}:{litellm_port}")
37
+
38
+ return await run_server_async(
39
+ agent_name=agent_name,
40
+ project_name=project_name,
41
+ mcp_host=mcp_host,
42
+ mcp_port=mcp_port,
43
+ host=litellm_host,
44
+ port=litellm_port
45
+ )
46
+
47
+
48
+ def start(verbose=False):
49
+ """Start the LiteLLM server (synchronous wrapper)
50
+
51
+ Args:
52
+ verbose (bool): Whether to enable verbose logging
53
+ """
54
+ from mindsdb.interfaces.storage import db
55
+ db.init()
56
+
57
+ # Run the async function in the event loop
58
+ loop = asyncio.new_event_loop()
59
+ asyncio.set_event_loop(loop)
60
+ result = loop.run_until_complete(start_async(verbose))
61
+
62
+ if result == 0:
63
+ # Run the server
64
+ config = Config()
65
+ agent_name = config.cmd_args.agent
66
+ project_name = config.cmd_args.project or "mindsdb"
67
+ mcp_host = config.get('api', {}).get('mcp', {}).get('host', '127.0.0.1')
68
+ mcp_port = int(config.get('api', {}).get('mcp', {}).get('port', 47337))
69
+ litellm_host = config.get('api', {}).get('litellm', {}).get('host', '0.0.0.0')
70
+ litellm_port = int(config.get('api', {}).get('litellm', {}).get('port', 8000))
71
+
72
+ return run_server(
73
+ agent_name=agent_name,
74
+ project_name=project_name,
75
+ mcp_host=mcp_host,
76
+ mcp_port=mcp_port,
77
+ host=litellm_host,
78
+ port=litellm_port
79
+ )
80
+ else:
81
+ logger.error("LiteLLM server initialization failed")
82
+ return result
@@ -179,6 +179,139 @@ class MYSQL_DATA_TYPE(enum.Enum):
179
179
  BOOLEAN = 'BOOLEAN'
180
180
 
181
181
 
182
+ # Default values for attributes of MySQL data types as they appear in information_schema.columns
183
+ # These values match the MySQL v8.0.37 defaults and are used to properly represent column metadata
184
+ MYSQL_DATA_TYPE_COLUMNS_DEFAULT = {
185
+ MYSQL_DATA_TYPE.TINYINT: {
186
+ 'NUMERIC_PRECISION': 3,
187
+ 'NUMERIC_SCALE': 0
188
+ },
189
+ MYSQL_DATA_TYPE.SMALLINT: {
190
+ 'NUMERIC_PRECISION': 5,
191
+ 'NUMERIC_SCALE': 0
192
+ },
193
+ MYSQL_DATA_TYPE.MEDIUMINT: {
194
+ 'NUMERIC_PRECISION': 7,
195
+ 'NUMERIC_SCALE': 0
196
+ },
197
+ MYSQL_DATA_TYPE.INT: {
198
+ 'NUMERIC_PRECISION': 10,
199
+ 'NUMERIC_SCALE': 0
200
+ },
201
+ MYSQL_DATA_TYPE.BIGINT: {
202
+ 'NUMERIC_PRECISION': 19,
203
+ 'NUMERIC_SCALE': 0
204
+ },
205
+ MYSQL_DATA_TYPE.FLOAT: {
206
+ 'NUMERIC_PRECISION': 12
207
+ },
208
+ MYSQL_DATA_TYPE.DOUBLE: {
209
+ 'NUMERIC_PRECISION': 22
210
+ },
211
+ MYSQL_DATA_TYPE.DECIMAL: {
212
+ 'NUMERIC_PRECISION': 10,
213
+ 'NUMERIC_SCALE': 0,
214
+ 'COLUMN_TYPE': 'decimal(10,0)'
215
+ },
216
+ MYSQL_DATA_TYPE.YEAR: {
217
+ # every column is null
218
+ },
219
+ MYSQL_DATA_TYPE.TIME: {
220
+ 'DATETIME_PRECISION': 0
221
+ },
222
+ MYSQL_DATA_TYPE.DATE: {
223
+ # every column is null
224
+ },
225
+ MYSQL_DATA_TYPE.DATETIME: {
226
+ 'DATETIME_PRECISION': 0
227
+ },
228
+ MYSQL_DATA_TYPE.TIMESTAMP: {
229
+ 'DATETIME_PRECISION': 0
230
+ },
231
+ MYSQL_DATA_TYPE.CHAR: {
232
+ 'CHARACTER_MAXIMUM_LENGTH': 1,
233
+ 'CHARACTER_OCTET_LENGTH': 4,
234
+ 'CHARACTER_SET_NAME': 'utf8',
235
+ 'COLLATION_NAME': 'utf8_bin',
236
+ 'COLUMN_TYPE': 'char(1)'
237
+ },
238
+ MYSQL_DATA_TYPE.BINARY: {
239
+ 'CHARACTER_MAXIMUM_LENGTH': 1,
240
+ 'CHARACTER_OCTET_LENGTH': 1,
241
+ 'COLUMN_TYPE': 'binary(1)'
242
+ },
243
+ MYSQL_DATA_TYPE.VARCHAR: {
244
+ 'CHARACTER_MAXIMUM_LENGTH': 1024, # NOTE mandatory for field creation
245
+ 'CHARACTER_OCTET_LENGTH': 4096, # NOTE mandatory for field creation
246
+ 'CHARACTER_SET_NAME': 'utf8',
247
+ 'COLLATION_NAME': 'utf8_bin',
248
+ 'COLUMN_TYPE': 'varchar(1024)'
249
+ },
250
+ MYSQL_DATA_TYPE.VARBINARY: {
251
+ 'CHARACTER_MAXIMUM_LENGTH': 1024, # NOTE mandatory for field creation
252
+ 'CHARACTER_OCTET_LENGTH': 1024, # NOTE mandatory for field creation
253
+ 'COLUMN_TYPE': 'varbinary(1024)'
254
+ },
255
+ MYSQL_DATA_TYPE.TINYBLOB: {
256
+ 'CHARACTER_MAXIMUM_LENGTH': 255,
257
+ 'CHARACTER_OCTET_LENGTH': 255
258
+ },
259
+ MYSQL_DATA_TYPE.TINYTEXT: {
260
+ 'CHARACTER_MAXIMUM_LENGTH': 255,
261
+ 'CHARACTER_OCTET_LENGTH': 255,
262
+ 'CHARACTER_SET_NAME': 'utf8',
263
+ 'COLLATION_NAME': 'utf8_bin'
264
+ },
265
+ MYSQL_DATA_TYPE.BLOB: {
266
+ 'CHARACTER_MAXIMUM_LENGTH': 65535,
267
+ 'CHARACTER_OCTET_LENGTH': 65535
268
+ },
269
+ MYSQL_DATA_TYPE.TEXT: {
270
+ 'CHARACTER_MAXIMUM_LENGTH': 65535,
271
+ 'CHARACTER_OCTET_LENGTH': 65535,
272
+ 'CHARACTER_SET_NAME': 'utf8',
273
+ 'COLLATION_NAME': 'utf8_bin'
274
+ },
275
+ MYSQL_DATA_TYPE.MEDIUMBLOB: {
276
+ 'CHARACTER_MAXIMUM_LENGTH': 16777215,
277
+ 'CHARACTER_OCTET_LENGTH': 16777215
278
+ },
279
+ MYSQL_DATA_TYPE.MEDIUMTEXT: {
280
+ 'CHARACTER_MAXIMUM_LENGTH': 16777215,
281
+ 'CHARACTER_OCTET_LENGTH': 16777215,
282
+ 'CHARACTER_SET_NAME': 'utf8',
283
+ 'COLLATION_NAME': 'utf8_bin'
284
+ },
285
+ MYSQL_DATA_TYPE.LONGBLOB: {
286
+ 'CHARACTER_MAXIMUM_LENGTH': 4294967295,
287
+ 'CHARACTER_OCTET_LENGTH': 4294967295,
288
+ },
289
+ MYSQL_DATA_TYPE.LONGTEXT: {
290
+ 'CHARACTER_MAXIMUM_LENGTH': 4294967295,
291
+ 'CHARACTER_OCTET_LENGTH': 4294967295,
292
+ 'CHARACTER_SET_NAME': 'utf8',
293
+ 'COLLATION_NAME': 'utf8_bin'
294
+ },
295
+ MYSQL_DATA_TYPE.BIT: {
296
+ 'NUMERIC_PRECISION': 1,
297
+ 'COLUMN_TYPE': 'bit(1)'
298
+ # 'NUMERIC_SCALE': null
299
+ },
300
+ MYSQL_DATA_TYPE.BOOL: {
301
+ 'DATA_TYPE': 'tinyint',
302
+ 'NUMERIC_PRECISION': 3,
303
+ 'NUMERIC_SCALE': 0,
304
+ 'COLUMN_TYPE': 'tinyint(1)'
305
+ },
306
+ MYSQL_DATA_TYPE.BOOLEAN: {
307
+ 'DATA_TYPE': 'tinyint',
308
+ 'NUMERIC_PRECISION': 3,
309
+ 'NUMERIC_SCALE': 0,
310
+ 'COLUMN_TYPE': 'tinyint(1)'
311
+ }
312
+ }
313
+
314
+
182
315
  # Map between data types and C types
183
316
  # https://dev.mysql.com/doc/c-api/8.0/en/c-api-prepared-statement-type-codes.html
184
317
  DATA_C_TYPE_MAP = {
@@ -68,6 +68,10 @@ class ChromaDBHandler(VectorStoreHandler):
68
68
  "persist_directory": self.persist_directory,
69
69
  }
70
70
 
71
+ self.create_collection_metadata = {
72
+ "hnsw:space": config.distance,
73
+ }
74
+
71
75
  self._use_handler_storage = False
72
76
 
73
77
  self.connect()
@@ -398,7 +402,7 @@ class ChromaDBHandler(VectorStoreHandler):
398
402
  Insert/Upsert data into ChromaDB collection.
399
403
  If records with same IDs exist, they will be updated.
400
404
  """
401
- collection = self._client.get_or_create_collection(collection_name)
405
+ collection = self._client.get_or_create_collection(collection_name, metadata=self.create_collection_metadata)
402
406
 
403
407
  # Convert metadata from string to dict if needed
404
408
  if TableField.METADATA.value in df.columns:
@@ -484,7 +488,8 @@ class ChromaDBHandler(VectorStoreHandler):
484
488
  """
485
489
  Create a collection with the given name in the ChromaDB database.
486
490
  """
487
- self._client.create_collection(table_name, get_or_create=if_not_exists)
491
+ self._client.create_collection(table_name, get_or_create=if_not_exists,
492
+ metadata=self.create_collection_metadata)
488
493
  self._sync()
489
494
 
490
495
  def drop_table(self, table_name: str, if_exists=True):
@@ -14,6 +14,7 @@ class ChromaHandlerConfig(BaseModel):
14
14
  host: str = None
15
15
  port: str = None
16
16
  password: str = None
17
+ distance: str = 'cosine'
17
18
 
18
19
  class Config:
19
20
  extra = "forbid"