MindsDB 25.8.3.0__py3-none-any.whl → 25.9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (109) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +3 -45
  3. mindsdb/api/a2a/__init__.py +52 -0
  4. mindsdb/api/a2a/agent.py +11 -12
  5. mindsdb/api/a2a/common/server/server.py +17 -36
  6. mindsdb/api/a2a/common/server/task_manager.py +14 -28
  7. mindsdb/api/a2a/task_manager.py +20 -21
  8. mindsdb/api/a2a/utils.py +1 -1
  9. mindsdb/api/common/middleware.py +106 -0
  10. mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +466 -18
  11. mindsdb/api/executor/utilities/sql.py +9 -31
  12. mindsdb/api/http/initialize.py +34 -43
  13. mindsdb/api/http/namespaces/auth.py +6 -14
  14. mindsdb/api/http/namespaces/config.py +0 -2
  15. mindsdb/api/http/namespaces/default.py +74 -106
  16. mindsdb/api/http/namespaces/file.py +9 -3
  17. mindsdb/api/http/namespaces/handlers.py +77 -87
  18. mindsdb/api/http/start.py +29 -47
  19. mindsdb/api/litellm/start.py +11 -10
  20. mindsdb/api/mcp/__init__.py +165 -0
  21. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +33 -64
  22. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +86 -85
  23. mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
  24. mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
  25. mindsdb/integrations/handlers/crate_handler/crate_handler.py +3 -7
  26. mindsdb/integrations/handlers/derby_handler/derby_handler.py +32 -34
  27. mindsdb/integrations/handlers/documentdb_handler/requirements.txt +1 -0
  28. mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py +12 -13
  29. mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
  30. mindsdb/integrations/handlers/google_books_handler/google_books_handler.py +45 -44
  31. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +101 -95
  32. mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py +129 -129
  33. mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py +59 -43
  34. mindsdb/integrations/handlers/google_search_handler/google_search_handler.py +38 -39
  35. mindsdb/integrations/handlers/informix_handler/informix_handler.py +5 -18
  36. mindsdb/integrations/handlers/lightfm_handler/requirements.txt +1 -1
  37. mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
  38. mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py +22 -28
  39. mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py +3 -7
  40. mindsdb/integrations/handlers/mongodb_handler/mongodb_handler.py +53 -67
  41. mindsdb/integrations/handlers/mongodb_handler/requirements.txt +1 -0
  42. mindsdb/{api/mongo/utilities → integrations/handlers/mongodb_handler/utils}/mongodb_ast.py +43 -68
  43. mindsdb/{api/mongo/utilities → integrations/handlers/mongodb_handler/utils}/mongodb_parser.py +17 -25
  44. mindsdb/{api/mongo/utilities → integrations/handlers/mongodb_handler/utils}/mongodb_query.py +10 -16
  45. mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_render.py +43 -69
  46. mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
  47. mindsdb/integrations/libs/base.py +1 -1
  48. mindsdb/integrations/libs/llm/config.py +15 -0
  49. mindsdb/integrations/libs/llm/utils.py +15 -0
  50. mindsdb/interfaces/agents/constants.py +1 -0
  51. mindsdb/interfaces/agents/langchain_agent.py +4 -0
  52. mindsdb/interfaces/agents/providers.py +20 -0
  53. mindsdb/interfaces/knowledge_base/controller.py +25 -7
  54. mindsdb/utilities/config.py +15 -158
  55. mindsdb/utilities/log.py +0 -25
  56. mindsdb/utilities/render/sqlalchemy_render.py +7 -1
  57. mindsdb/utilities/starters.py +0 -39
  58. {mindsdb-25.8.3.0.dist-info → mindsdb-25.9.1.1.dist-info}/METADATA +269 -267
  59. {mindsdb-25.8.3.0.dist-info → mindsdb-25.9.1.1.dist-info}/RECORD +62 -105
  60. mindsdb/api/a2a/__main__.py +0 -144
  61. mindsdb/api/a2a/run_a2a.py +0 -86
  62. mindsdb/api/common/check_auth.py +0 -42
  63. mindsdb/api/http/gunicorn_wrapper.py +0 -17
  64. mindsdb/api/mcp/start.py +0 -205
  65. mindsdb/api/mongo/__init__.py +0 -0
  66. mindsdb/api/mongo/classes/__init__.py +0 -5
  67. mindsdb/api/mongo/classes/query_sql.py +0 -19
  68. mindsdb/api/mongo/classes/responder.py +0 -45
  69. mindsdb/api/mongo/classes/responder_collection.py +0 -34
  70. mindsdb/api/mongo/classes/scram.py +0 -86
  71. mindsdb/api/mongo/classes/session.py +0 -23
  72. mindsdb/api/mongo/functions/__init__.py +0 -19
  73. mindsdb/api/mongo/responders/__init__.py +0 -73
  74. mindsdb/api/mongo/responders/add_shard.py +0 -13
  75. mindsdb/api/mongo/responders/aggregate.py +0 -90
  76. mindsdb/api/mongo/responders/buildinfo.py +0 -17
  77. mindsdb/api/mongo/responders/coll_stats.py +0 -63
  78. mindsdb/api/mongo/responders/company_id.py +0 -25
  79. mindsdb/api/mongo/responders/connection_status.py +0 -22
  80. mindsdb/api/mongo/responders/count.py +0 -21
  81. mindsdb/api/mongo/responders/db_stats.py +0 -32
  82. mindsdb/api/mongo/responders/delete.py +0 -105
  83. mindsdb/api/mongo/responders/describe.py +0 -23
  84. mindsdb/api/mongo/responders/end_sessions.py +0 -13
  85. mindsdb/api/mongo/responders/find.py +0 -175
  86. mindsdb/api/mongo/responders/get_cmd_line_opts.py +0 -18
  87. mindsdb/api/mongo/responders/get_free_monitoring_status.py +0 -14
  88. mindsdb/api/mongo/responders/get_parameter.py +0 -23
  89. mindsdb/api/mongo/responders/getlog.py +0 -14
  90. mindsdb/api/mongo/responders/host_info.py +0 -28
  91. mindsdb/api/mongo/responders/insert.py +0 -270
  92. mindsdb/api/mongo/responders/is_master.py +0 -20
  93. mindsdb/api/mongo/responders/is_master_lower.py +0 -13
  94. mindsdb/api/mongo/responders/list_collections.py +0 -55
  95. mindsdb/api/mongo/responders/list_databases.py +0 -37
  96. mindsdb/api/mongo/responders/list_indexes.py +0 -22
  97. mindsdb/api/mongo/responders/ping.py +0 -13
  98. mindsdb/api/mongo/responders/recv_chunk_start.py +0 -13
  99. mindsdb/api/mongo/responders/replsetgetstatus.py +0 -13
  100. mindsdb/api/mongo/responders/sasl_continue.py +0 -34
  101. mindsdb/api/mongo/responders/sasl_start.py +0 -33
  102. mindsdb/api/mongo/responders/update_range_deletions.py +0 -12
  103. mindsdb/api/mongo/responders/whatsmyuri.py +0 -18
  104. mindsdb/api/mongo/server.py +0 -388
  105. mindsdb/api/mongo/start.py +0 -15
  106. mindsdb/api/mongo/utilities/__init__.py +0 -0
  107. {mindsdb-25.8.3.0.dist-info → mindsdb-25.9.1.1.dist-info}/WHEEL +0 -0
  108. {mindsdb-25.8.3.0.dist-info → mindsdb-25.9.1.1.dist-info}/licenses/LICENSE +0 -0
  109. {mindsdb-25.8.3.0.dist-info → mindsdb-25.9.1.1.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ from bson.objectid import ObjectId
5
5
  from mindsdb_sql_parser.ast import Select, Update, Identifier, Star, Constant, Tuple, BinaryOperation, Latest, TypeCast
6
6
  from mindsdb_sql_parser.ast.base import ASTNode
7
7
 
8
- from mindsdb.api.mongo.utilities.mongodb_query import MongoQuery
8
+ from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_query import MongoQuery
9
9
 
10
10
 
11
11
  class MongodbRender:
@@ -27,7 +27,7 @@ class MongodbRender:
27
27
  return self.select(node)
28
28
  elif isinstance(node, Update):
29
29
  return self.update(node)
30
- raise NotImplementedError(f'Unknown statement: {node.__class__.__name__}')
30
+ raise NotImplementedError(f"Unknown statement: {node.__class__.__name__}")
31
31
 
32
32
  def update(self, node: Update) -> MongoQuery:
33
33
  """
@@ -43,17 +43,8 @@ class MongodbRender:
43
43
  mquery = MongoQuery(collection)
44
44
 
45
45
  filters = self.handle_where(node.where)
46
- row = {
47
- k: v.value
48
- for k, v in node.update_columns.items()
49
- }
50
- mquery.add_step({
51
- 'method': 'update_many',
52
- 'args': [
53
- filters,
54
- {"$set": row}
55
- ]
56
- })
46
+ row = {k: v.value for k, v in node.update_columns.items()}
47
+ mquery.add_step({"method": "update_many", "args": [filters, {"$set": row}]})
57
48
  return mquery
58
49
 
59
50
  def select(self, node: Select):
@@ -67,7 +58,7 @@ class MongodbRender:
67
58
  MongoQuery: The converted MongoQuery instance.
68
59
  """
69
60
  if not isinstance(node.from_table, Identifier):
70
- raise NotImplementedError(f'Not supported from {node.from_table}')
61
+ raise NotImplementedError(f"Not supported from {node.from_table}")
71
62
 
72
63
  collection = node.from_table.parts[-1]
73
64
 
@@ -77,10 +68,10 @@ class MongodbRender:
77
68
  filters = self.handle_where(node.where)
78
69
 
79
70
  group = {}
80
- project = {'_id': 0} # Hide _id field when it has not been explicitly requested.
71
+ project = {"_id": 0} # Hide _id field when it has not been explicitly requested.
81
72
  if node.distinct:
82
73
  # Group by distinct fields.
83
- group = {'_id': {}}
74
+ group = {"_id": {}}
84
75
 
85
76
  if node.targets is not None:
86
77
  for col in node.targets:
@@ -95,12 +86,12 @@ class MongodbRender:
95
86
  else:
96
87
  alias = col.alias.parts[-1]
97
88
 
98
- project[alias] = f'${name}' # Project field.
89
+ project[alias] = f"${name}" # Project field.
99
90
 
100
91
  # Group by distinct fields.
101
92
  if node.distinct:
102
- group['_id'][name] = f'${name}' # Group field.
103
- group[name] = {'$first': f'${name}'} # Show field.
93
+ group["_id"][name] = f"${name}" # Group field.
94
+ group[name] = {"$first": f"${name}"} # Show field.
104
95
 
105
96
  elif isinstance(col, Constant):
106
97
  val = str(col.value) # Convert to string becuase it is interpreted as an index.
@@ -112,19 +103,19 @@ class MongodbRender:
112
103
 
113
104
  if node.group_by is not None:
114
105
  # TODO
115
- raise NotImplementedError(f'Group {node.group_by}')
106
+ raise NotImplementedError(f"Group {node.group_by}")
116
107
 
117
108
  sort = {}
118
109
  if node.order_by is not None:
119
110
  for col in node.order_by:
120
111
  name = col.field.parts[-1]
121
- direction = 1 if col.direction.upper() == 'ASC' else -1
112
+ direction = 1 if col.direction.upper() == "ASC" else -1
122
113
  sort[name] = direction
123
114
 
124
115
  # Compose the MongoDB query.
125
116
  mquery = MongoQuery(collection)
126
117
 
127
- method = 'aggregate'
118
+ method = "aggregate"
128
119
  arg = []
129
120
 
130
121
  # MongoDB related pipeline steps for the aggregate method.
@@ -150,10 +141,7 @@ class MongodbRender:
150
141
  if node.limit is not None:
151
142
  arg.append({"$limit": int(node.limit.value)})
152
143
 
153
- mquery.add_step({
154
- 'method': method,
155
- 'args': [arg]
156
- })
144
+ mquery.add_step({"method": method, "args": [arg]})
157
145
 
158
146
  return mquery
159
147
 
@@ -168,34 +156,34 @@ class MongodbRender:
168
156
  dict: The converted MongoDB query filters.
169
157
  """
170
158
  # TODO: UnaryOperation, function.
171
- if not type(node) in [BinaryOperation]:
172
- raise NotImplementedError(f'Not supported type {type(node)}')
159
+ if type(node) not in [BinaryOperation]:
160
+ raise NotImplementedError(f"Not supported type {type(node)}")
173
161
 
174
162
  op = node.op.lower()
175
163
  arg1, arg2 = node.args
176
164
 
177
- if op in ('and', 'or'):
165
+ if op in ("and", "or"):
178
166
  query1 = self.handle_where(arg1)
179
167
  query2 = self.handle_where(arg2)
180
168
 
181
169
  ops = {
182
- 'and': '$and',
183
- 'or': '$or',
170
+ "and": "$and",
171
+ "or": "$or",
184
172
  }
185
173
  query = {ops[op]: [query1, query2]}
186
174
  return query
187
175
 
188
176
  ops_map = {
189
- '>=': '$gte',
190
- '>': '$gt',
191
- '<': '$lt',
192
- '<=': '$lte',
193
- '<>': '$ne',
194
- '!=': '$ne',
195
- '=': '$eq',
196
- '==': '$eq',
197
- 'is': '$eq',
198
- 'is not': '$ne',
177
+ ">=": "$gte",
178
+ ">": "$gt",
179
+ "<": "$lt",
180
+ "<=": "$lte",
181
+ "<>": "$ne",
182
+ "!=": "$ne",
183
+ "=": "$eq",
184
+ "==": "$eq",
185
+ "is": "$eq",
186
+ "is not": "$ne",
199
187
  }
200
188
 
201
189
  if isinstance(arg1, Identifier):
@@ -203,35 +191,29 @@ class MongodbRender:
203
191
  # Simple operation.
204
192
  if isinstance(arg2, Constant):
205
193
  # Identifier and Constant.
206
- val = ObjectId(arg2.value) if var_name == '_id' else arg2.value
207
- if op in ('=', '=='):
194
+ val = ObjectId(arg2.value) if var_name == "_id" else arg2.value
195
+ if op in ("=", "=="):
208
196
  pass
209
197
  elif op in ops_map:
210
198
  op2 = ops_map[op]
211
199
  val = {op2: val}
212
200
  else:
213
- raise NotImplementedError(f'Not supported operator {op}')
201
+ raise NotImplementedError(f"Not supported operator {op}")
214
202
 
215
203
  return {var_name: val}
216
204
 
217
205
  # IN condition.
218
206
  elif isinstance(arg2, Tuple):
219
207
  # Should be IN, NOT IN.
220
- ops = {
221
- 'in': '$in',
222
- 'not in': '$nin'
223
- }
208
+ ops = {"in": "$in", "not in": "$nin"}
224
209
  # Must be list of Constants.
225
- values = [
226
- i.value
227
- for i in arg2.items
228
- ]
210
+ values = [i.value for i in arg2.items]
229
211
 
230
212
  if op in ops:
231
213
  op2 = ops[op]
232
214
  cond = {op2: values}
233
215
  else:
234
- raise NotImplementedError(f'Not supported operator {op}')
216
+ raise NotImplementedError(f"Not supported operator {op}")
235
217
 
236
218
  return {var_name: cond}
237
219
 
@@ -242,13 +224,9 @@ class MongodbRender:
242
224
  if op in ops_map:
243
225
  op2 = ops_map[op]
244
226
  else:
245
- raise NotImplementedError(f'Not supported operator {op}')
227
+ raise NotImplementedError(f"Not supported operator {op}")
246
228
 
247
- return {
248
- '$expr': {
249
- op2: [val1, val2]
250
- }
251
- }
229
+ return {"$expr": {op2: [val1, val2]}}
252
230
 
253
231
  def where_element_convert(self, node: Union[Identifier, Latest, Constant, TypeCast]) -> Any:
254
232
  """
@@ -265,22 +243,18 @@ class MongodbRender:
265
243
  RuntimeError: If the date format is not supported.
266
244
  """
267
245
  if isinstance(node, Identifier):
268
- return f'${node.parts[-1]}'
246
+ return f"${node.parts[-1]}"
269
247
  elif isinstance(node, Latest):
270
- return 'LATEST'
248
+ return "LATEST"
271
249
  elif isinstance(node, Constant):
272
250
  return node.value
273
- elif isinstance(node, TypeCast)\
274
- and node.type_name.upper() in ('DATE', 'DATETIME'):
275
- formats = [
276
- "%Y-%m-%d",
277
- "%Y-%m-%dT%H:%M:%S.%f"
278
- ]
251
+ elif isinstance(node, TypeCast) and node.type_name.upper() in ("DATE", "DATETIME"):
252
+ formats = ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S.%f"]
279
253
  for format in formats:
280
254
  try:
281
255
  return dt.datetime.strptime(node.arg.value, format)
282
256
  except ValueError:
283
257
  pass
284
- raise RuntimeError(f'Not supported date format. Supported: {formats}')
258
+ raise RuntimeError(f"Not supported date format. Supported: {formats}")
285
259
  else:
286
- raise NotImplementedError(f'Unknown where element {node}')
260
+ raise NotImplementedError(f"Unknown where element {node}")
@@ -1,2 +1,2 @@
1
1
  tpot<=0.11.7
2
- type_infer==0.0.23
2
+ type_infer==0.0.25
@@ -58,7 +58,7 @@ class BaseHandler:
58
58
 
59
59
  Args:
60
60
  query (Any): query in native format (str for sql databases,
61
- dict for mongo, etc)
61
+ etc)
62
62
 
63
63
  Returns:
64
64
  HandlerResponse
@@ -113,3 +113,18 @@ class WriterConfig(BaseLLMConfig):
113
113
  writer_api_key: Optional[str] = Field(default=None)
114
114
  writer_org_id: Optional[str] = Field(default=None)
115
115
  base_url: Optional[str] = Field(default=None)
116
+
117
+
118
+ # https://api.python.langchain.com/en/latest/llms/langchain_aws.llms.bedrock.BedrockLLM.html#langchain_aws.llms.bedrock.BedrockLLM
119
+ class BedrockConfig(BaseLLMConfig):
120
+ model_id: str
121
+ aws_access_key_id: Optional[str] = Field(default=None)
122
+ aws_secret_access_key: Optional[str] = Field(default=None)
123
+ aws_session_token: Optional[str] = Field(default=None)
124
+ region_name: Optional[str] = Field(default=None)
125
+ credentials_profile_name: Optional[str] = Field(default=None)
126
+ endpoint_url: Optional[str] = Field(default=None)
127
+ stop: Optional[List[str]] = Field(default=None)
128
+ temperature: Optional[float] = Field(default=0.7)
129
+ max_tokens: Optional[int] = Field(default=None)
130
+ model_kwargs: Optional[Dict[str, Any]] = Field(default=None)
@@ -16,6 +16,7 @@ from mindsdb.integrations.libs.llm.config import (
16
16
  NvidiaNIMConfig,
17
17
  MindsdbConfig,
18
18
  WriterConfig,
19
+ BedrockConfig,
19
20
  )
20
21
  from mindsdb.utilities.config import config
21
22
  from langchain_text_splitters import Language, RecursiveCharacterTextSplitter
@@ -222,6 +223,20 @@ def get_llm_config(provider: str, args: Dict) -> BaseLLMConfig:
222
223
  writer_org_id=args.get("writer_org_id", None),
223
224
  base_url=args.get("base_url", None),
224
225
  )
226
+ if provider == "bedrock":
227
+ return BedrockConfig(
228
+ model_id=args.get("model_name"),
229
+ temperature=temperature,
230
+ max_tokens=args.get("max_tokens", None),
231
+ stop=args.get("stop", None),
232
+ base_url=args.get("endpoint_url", None),
233
+ aws_access_key_id=args.get("aws_access_key_id", None),
234
+ aws_secret_access_key=args.get("aws_secret_access_key", None),
235
+ aws_session_token=args.get("aws_session_token", None),
236
+ region_name=args.get("aws_region_name", None),
237
+ credentials_profile_name=args.get("credentials_profile_name", None),
238
+ model_kwargs=args.get("model_kwargs", None),
239
+ )
225
240
 
226
241
  raise ValueError(f"Provider {provider} is not supported.")
227
242
 
@@ -208,6 +208,7 @@ DEFAULT_TEMPERATURE = 0.0
208
208
  USER_COLUMN = "question"
209
209
  DEFAULT_EMBEDDINGS_MODEL_PROVIDER = "openai"
210
210
  DEFAULT_EMBEDDINGS_MODEL_CLASS = OpenAIEmbeddings
211
+ MAX_INSERT_BATCH_SIZE = 50_000
211
212
  DEFAULT_TIKTOKEN_MODEL_NAME = os.getenv("DEFAULT_TIKTOKEN_MODEL_NAME", "gpt-4")
212
213
  AGENT_CHUNK_POLLING_INTERVAL_SECONDS = os.getenv("AGENT_CHUNK_POLLING_INTERVAL_SECONDS", 1.0)
213
214
  DEFAULT_TEXT2SQL_DATABASE = "mindsdb"
@@ -40,6 +40,7 @@ from .mindsdb_chat_model import ChatMindsdb
40
40
  from .callback_handlers import LogCallbackHandler, ContextCaptureCallback
41
41
  from .langfuse_callback_handler import LangfuseCallbackHandler, get_skills
42
42
  from .safe_output_parser import SafeOutputParser
43
+ from .providers import get_bedrock_chat_model
43
44
 
44
45
  from mindsdb.interfaces.agents.constants import (
45
46
  OPEN_AI_CHAT_MODELS,
@@ -176,6 +177,9 @@ def create_chat_model(args: Dict):
176
177
  return ChatGoogleGenerativeAI(**model_kwargs)
177
178
  if args["provider"] == "writer":
178
179
  return ChatWriter(**model_kwargs)
180
+ if args["provider"] == "bedrock":
181
+ ChatBedrock = get_bedrock_chat_model()
182
+ return ChatBedrock(**model_kwargs)
179
183
  if args["provider"] == "mindsdb":
180
184
  return ChatMindsdb(**model_kwargs)
181
185
  raise ValueError(f"Unknown provider: {args['provider']}")
@@ -0,0 +1,20 @@
1
+ import importlib.util
2
+
3
+
4
+ def get_bedrock_chat_model():
5
+ try:
6
+ from langchain_aws.chat_models import ChatBedrock
7
+ except ModuleNotFoundError:
8
+ raise RuntimeError("bedrock connector is not installed. Please install it with `pip install langchain-aws`")
9
+
10
+ if not importlib.util.find_spec("transformers"):
11
+ raise RuntimeError(
12
+ "`transformers` module is required for bedrock to count tokens. Please install it with `pip install transformers`"
13
+ )
14
+
15
+ class ChatBedrockPatched(ChatBedrock):
16
+ def _prepare_input_and_invoke(self, *args, **kwargs):
17
+ kwargs.pop("stop_sequences", None)
18
+ return super()._prepare_input_and_invoke(*args, **kwargs)
19
+
20
+ return ChatBedrockPatched
@@ -29,7 +29,7 @@ from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embeddi
29
29
  construct_model_from_args,
30
30
  )
31
31
 
32
- from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
32
+ from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS, MAX_INSERT_BATCH_SIZE
33
33
  from mindsdb.interfaces.agents.langchain_agent import create_chat_model, get_llm_provider
34
34
  from mindsdb.interfaces.database.projects import ProjectController
35
35
  from mindsdb.interfaces.variables.variables_controller import variables_controller
@@ -245,22 +245,34 @@ class KnowledgeBaseTable:
245
245
  keyword_search_cols_and_values = []
246
246
  query_text = None
247
247
  relevance_threshold = None
248
+ relevance_threshold_allowed_operators = [
249
+ FilterOperator.GREATER_THAN_OR_EQUAL.value,
250
+ FilterOperator.GREATER_THAN.value,
251
+ ]
252
+ gt_filtering = False
248
253
  hybrid_search_enabled_flag = False
249
254
  query_conditions = db_handler.extract_conditions(query.where)
250
255
  hybrid_search_alpha = None # Default to None, meaning no alpha weighted blending
251
256
  if query_conditions is not None:
252
257
  for item in query_conditions:
253
- if item.column == "relevance" and item.op.value == FilterOperator.GREATER_THAN_OR_EQUAL.value:
258
+ if (item.column == "relevance") and (item.op.value in relevance_threshold_allowed_operators):
254
259
  try:
255
260
  relevance_threshold = float(item.value)
256
261
  # Validate range: must be between 0 and 1
257
262
  if not (0 <= relevance_threshold <= 1):
258
263
  raise ValueError(f"relevance_threshold must be between 0 and 1, got: {relevance_threshold}")
264
+ if item.op.value == FilterOperator.GREATER_THAN.value:
265
+ gt_filtering = True
259
266
  logger.debug(f"Found relevance_threshold in query: {relevance_threshold}")
260
267
  except (ValueError, TypeError) as e:
261
268
  error_msg = f"Invalid relevance_threshold value: {item.value}. {str(e)}"
262
269
  logger.error(error_msg)
263
270
  raise ValueError(error_msg)
271
+ elif (item.column == "relevance") and (item.op.value not in relevance_threshold_allowed_operators):
272
+ raise ValueError(
273
+ f"Invalid operator for relevance: {item.op.value}. Only the following operators are allowed: "
274
+ f"{','.join(relevance_threshold_allowed_operators)}."
275
+ )
264
276
  elif item.column == "reranking":
265
277
  if item.value is False or (isinstance(item.value, str) and item.value.lower() == "false"):
266
278
  disable_reranking = True
@@ -279,10 +291,6 @@ class KnowledgeBaseTable:
279
291
  if not (0 <= item.value <= 1):
280
292
  raise ValueError(f"Invalid hybrid_search_alpha value: {item.value}. Must be between 0 and 1.")
281
293
  hybrid_search_alpha = item.value
282
- elif item.column == "relevance" and item.op.value != FilterOperator.GREATER_THAN_OR_EQUAL.value:
283
- raise ValueError(
284
- f"Invalid operator for relevance: {item.op.value}. Only GREATER_THAN_OR_EQUAL is allowed."
285
- )
286
294
  elif item.column == TableField.CONTENT.value:
287
295
  query_text = item.value
288
296
 
@@ -368,6 +376,11 @@ class KnowledgeBaseTable:
368
376
  # Check if we have a rerank_model configured in KB params
369
377
  df = self.add_relevance(df, query_text, relevance_threshold, disable_reranking)
370
378
 
379
+ # if relevance filtering method is strictly GREATER THAN we filter the df
380
+ if gt_filtering:
381
+ relevance_scores = TableField.RELEVANCE.value
382
+ df = df[relevance_scores > relevance_threshold]
383
+
371
384
  return df
372
385
 
373
386
  def _get_allowed_metadata_columns(self) -> List[str] | None:
@@ -410,7 +423,7 @@ class KnowledgeBaseTable:
410
423
 
411
424
  # Filter by threshold
412
425
  scores_array = np.array(scores)
413
- df = df[scores_array > reranker.filtering_threshold]
426
+ df = df[scores_array >= reranker.filtering_threshold]
414
427
  logger.debug(f"Applied reranking with params: {reranking_model_params}")
415
428
 
416
429
  elif "distance" in df.columns:
@@ -493,6 +506,8 @@ class KnowledgeBaseTable:
493
506
  """Process and insert raw data rows"""
494
507
  if not rows:
495
508
  return
509
+ if len(rows) > MAX_INSERT_BATCH_SIZE:
510
+ raise ValueError("Input data is too large, please load data in batches")
496
511
 
497
512
  df = pd.DataFrame(rows)
498
513
 
@@ -1078,6 +1093,7 @@ class KnowledgeBaseController:
1078
1093
  raise EntityExistsError("Knowledge base already exists", name)
1079
1094
 
1080
1095
  embedding_params = get_model_params(params.get("embedding_model", {}), "default_embedding_model")
1096
+ params["embedding_model"] = embedding_params
1081
1097
 
1082
1098
  # if model_name is None: # Legacy
1083
1099
  model_name = self._create_embedding_model(
@@ -1104,6 +1120,7 @@ class KnowledgeBaseController:
1104
1120
  params["reranking_model"] = {}
1105
1121
 
1106
1122
  reranking_model_params = get_model_params(reranking_model_params, "default_reranking_model")
1123
+ params["reranking_model"] = reranking_model_params
1107
1124
  if reranking_model_params:
1108
1125
  # Get reranking model from params.
1109
1126
  # This is called here to check validaity of the parameters.
@@ -1228,6 +1245,7 @@ class KnowledgeBaseController:
1228
1245
  raise RuntimeError(f"Problem with embedding model config: {e}")
1229
1246
  return
1230
1247
 
1248
+ params = copy.deepcopy(params)
1231
1249
  if "provider" in params:
1232
1250
  engine = params.pop("provider").lower()
1233
1251