MindsDB 25.9.1.0__py3-none-any.whl → 25.9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

@@ -245,22 +245,34 @@ class KnowledgeBaseTable:
245
245
  keyword_search_cols_and_values = []
246
246
  query_text = None
247
247
  relevance_threshold = None
248
+ relevance_threshold_allowed_operators = [
249
+ FilterOperator.GREATER_THAN_OR_EQUAL.value,
250
+ FilterOperator.GREATER_THAN.value,
251
+ ]
252
+ gt_filtering = False
248
253
  hybrid_search_enabled_flag = False
249
254
  query_conditions = db_handler.extract_conditions(query.where)
250
255
  hybrid_search_alpha = None # Default to None, meaning no alpha weighted blending
251
256
  if query_conditions is not None:
252
257
  for item in query_conditions:
253
- if item.column == "relevance" and item.op.value == FilterOperator.GREATER_THAN_OR_EQUAL.value:
258
+ if (item.column == "relevance") and (item.op.value in relevance_threshold_allowed_operators):
254
259
  try:
255
260
  relevance_threshold = float(item.value)
256
261
  # Validate range: must be between 0 and 1
257
262
  if not (0 <= relevance_threshold <= 1):
258
263
  raise ValueError(f"relevance_threshold must be between 0 and 1, got: {relevance_threshold}")
264
+ if item.op.value == FilterOperator.GREATER_THAN.value:
265
+ gt_filtering = True
259
266
  logger.debug(f"Found relevance_threshold in query: {relevance_threshold}")
260
267
  except (ValueError, TypeError) as e:
261
268
  error_msg = f"Invalid relevance_threshold value: {item.value}. {str(e)}"
262
269
  logger.error(error_msg)
263
270
  raise ValueError(error_msg)
271
+ elif (item.column == "relevance") and (item.op.value not in relevance_threshold_allowed_operators):
272
+ raise ValueError(
273
+ f"Invalid operator for relevance: {item.op.value}. Only the following operators are allowed: "
274
+ f"{','.join(relevance_threshold_allowed_operators)}."
275
+ )
264
276
  elif item.column == "reranking":
265
277
  if item.value is False or (isinstance(item.value, str) and item.value.lower() == "false"):
266
278
  disable_reranking = True
@@ -279,10 +291,6 @@ class KnowledgeBaseTable:
279
291
  if not (0 <= item.value <= 1):
280
292
  raise ValueError(f"Invalid hybrid_search_alpha value: {item.value}. Must be between 0 and 1.")
281
293
  hybrid_search_alpha = item.value
282
- elif item.column == "relevance" and item.op.value != FilterOperator.GREATER_THAN_OR_EQUAL.value:
283
- raise ValueError(
284
- f"Invalid operator for relevance: {item.op.value}. Only GREATER_THAN_OR_EQUAL is allowed."
285
- )
286
294
  elif item.column == TableField.CONTENT.value:
287
295
  query_text = item.value
288
296
 
@@ -368,6 +376,11 @@ class KnowledgeBaseTable:
368
376
  # Check if we have a rerank_model configured in KB params
369
377
  df = self.add_relevance(df, query_text, relevance_threshold, disable_reranking)
370
378
 
379
+ # if relevance filtering method is strictly GREATER THAN we filter the df
380
+ if gt_filtering:
381
+ relevance_scores = TableField.RELEVANCE.value
382
+ df = df[relevance_scores > relevance_threshold]
383
+
371
384
  return df
372
385
 
373
386
  def _get_allowed_metadata_columns(self) -> List[str] | None:
@@ -410,7 +423,7 @@ class KnowledgeBaseTable:
410
423
 
411
424
  # Filter by threshold
412
425
  scores_array = np.array(scores)
413
- df = df[scores_array > reranker.filtering_threshold]
426
+ df = df[scores_array >= reranker.filtering_threshold]
414
427
  logger.debug(f"Applied reranking with params: {reranking_model_params}")
415
428
 
416
429
  elif "distance" in df.columns:
@@ -1080,6 +1093,7 @@ class KnowledgeBaseController:
1080
1093
  raise EntityExistsError("Knowledge base already exists", name)
1081
1094
 
1082
1095
  embedding_params = get_model_params(params.get("embedding_model", {}), "default_embedding_model")
1096
+ params["embedding_model"] = embedding_params
1083
1097
 
1084
1098
  # if model_name is None: # Legacy
1085
1099
  model_name = self._create_embedding_model(
@@ -1106,6 +1120,7 @@ class KnowledgeBaseController:
1106
1120
  params["reranking_model"] = {}
1107
1121
 
1108
1122
  reranking_model_params = get_model_params(reranking_model_params, "default_reranking_model")
1123
+ params["reranking_model"] = reranking_model_params
1109
1124
  if reranking_model_params:
1110
1125
  # Get reranking model from params.
1111
1126
  # This is called here to check validaity of the parameters.
@@ -1230,6 +1245,7 @@ class KnowledgeBaseController:
1230
1245
  raise RuntimeError(f"Problem with embedding model config: {e}")
1231
1246
  return
1232
1247
 
1248
+ params = copy.deepcopy(params)
1233
1249
  if "provider" in params:
1234
1250
  engine = params.pop("provider").lower()
1235
1251
 
@@ -3,14 +3,12 @@ import sys
3
3
  import json
4
4
  import argparse
5
5
  import datetime
6
- import logging
7
6
  from pathlib import Path
8
7
  from copy import deepcopy
9
8
 
10
9
  from appdirs import user_data_dir
11
10
 
12
11
  # NOTE do not `import from mindsdb` here
13
- logger = logging.getLogger(__name__)
14
12
 
15
13
 
16
14
  def _merge_key_recursive(target_dict, source_dict, key):
@@ -161,7 +159,7 @@ class Config:
161
159
  },
162
160
  }
163
161
  },
164
- "gui": {"autoupdate": True},
162
+ "gui": {"open_on_start": True, "autoupdate": True},
165
163
  "debug": False,
166
164
  "environment": "local",
167
165
  "integrations": {},
@@ -230,6 +228,7 @@ class Config:
230
228
  "paths": {},
231
229
  "permanent_storage": {},
232
230
  "ml_task_queue": {},
231
+ "gui": {},
233
232
  }
234
233
 
235
234
  # region storage root path
@@ -304,6 +303,10 @@ class Config:
304
303
  if os.environ.get("MINDSDB_DATA_CATALOG_ENABLED", "").lower() in ("1", "true"):
305
304
  self._env_config["data_catalog"] = {"enabled": True}
306
305
 
306
+ if os.environ.get("MINDSDB_NO_STUDIO", "").lower() in ("1", "true"):
307
+ self._env_config["gui"]["open_on_start"] = False
308
+ self._env_config["gui"]["autoupdate"] = False
309
+
307
310
  def fetch_auto_config(self) -> bool:
308
311
  """Load dict readed from config.auto.json to `auto_config`.
309
312
  Do it only if `auto_config` was not loaded before or config.auto.json been changed.
@@ -335,10 +338,11 @@ class Config:
335
338
  """
336
339
  if self._user_config is None:
337
340
  cmd_args_config = self.cmd_args.config
338
- if isinstance(cmd_args_config, str):
339
- self.config_path = cmd_args_config
340
- elif isinstance(os.environ.get("MINDSDB_CONFIG_PATH"), str):
341
+ if isinstance(os.environ.get("MINDSDB_CONFIG_PATH"), str):
341
342
  self.config_path = os.environ["MINDSDB_CONFIG_PATH"]
343
+ elif isinstance(cmd_args_config, str):
344
+ self.config_path = cmd_args_config
345
+
342
346
  if self.config_path == "absent":
343
347
  self.config_path = None
344
348
  if isinstance(self.config_path, str):
@@ -364,6 +368,11 @@ class Config:
364
368
  """Merge multiple configs to one."""
365
369
  new_config = deepcopy(self._default_config)
366
370
  _merge_configs(new_config, self._user_config)
371
+
372
+ if getattr(self.cmd_args, "no_studio", None) is True:
373
+ new_config["gui"]["open_on_start"] = False
374
+ new_config["gui"]["autoupdate"] = False
375
+
367
376
  _merge_configs(new_config, self._auto_config or {})
368
377
  _merge_configs(new_config, self._env_config or {})
369
378
 
@@ -47,7 +47,7 @@ def _compile_interval(element, compiler, **kw):
47
47
  if items[1].upper().endswith("S"):
48
48
  items[1] = items[1][:-1]
49
49
 
50
- if compiler.dialect.driver in ["snowflake"]:
50
+ if compiler.dialect.driver in ["snowflake"] or compiler.dialect.name in ["postgresql"]:
51
51
  # quote all
52
52
  args = " ".join(map(str, items))
53
53
  args = f"'{args}'"
@@ -282,6 +282,12 @@ class SqlalchemyRender:
282
282
  func = functions[t.op.lower()]
283
283
  col = func(arg0, arg1)
284
284
  else:
285
+ # for unknown operators wrap arguments into parens
286
+ if isinstance(t.args[0], ast.BinaryOperation):
287
+ arg0 = arg0.self_group()
288
+ if isinstance(t.args[1], ast.BinaryOperation):
289
+ arg1 = arg1.self_group()
290
+
285
291
  col = arg0.op(t.op)(arg1)
286
292
 
287
293
  if t.alias: