MindsDB 25.2.3.0__py3-none-any.whl → 25.2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (46) hide show
  1. {MindsDB-25.2.3.0.dist-info → MindsDB-25.2.4.0.dist-info}/METADATA +224 -243
  2. {MindsDB-25.2.3.0.dist-info → MindsDB-25.2.4.0.dist-info}/RECORD +44 -43
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/__main__.py +1 -11
  5. mindsdb/api/executor/datahub/datanodes/system_tables.py +4 -1
  6. mindsdb/api/http/initialize.py +8 -5
  7. mindsdb/api/http/namespaces/agents.py +0 -7
  8. mindsdb/api/http/namespaces/config.py +0 -48
  9. mindsdb/api/http/namespaces/knowledge_bases.py +1 -1
  10. mindsdb/api/http/namespaces/util.py +0 -28
  11. mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -1
  12. mindsdb/integrations/handlers/dspy_handler/requirements.txt +0 -1
  13. mindsdb/integrations/handlers/langchain_embedding_handler/requirements.txt +0 -1
  14. mindsdb/integrations/handlers/langchain_handler/requirements.txt +0 -1
  15. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +0 -1
  16. mindsdb/integrations/handlers/openai_handler/constants.py +3 -1
  17. mindsdb/integrations/handlers/openai_handler/requirements.txt +0 -1
  18. mindsdb/integrations/handlers/rag_handler/requirements.txt +0 -1
  19. mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +33 -8
  20. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +3 -2
  21. mindsdb/integrations/handlers/web_handler/web_handler.py +42 -33
  22. mindsdb/integrations/handlers/youtube_handler/__init__.py +2 -0
  23. mindsdb/integrations/handlers/youtube_handler/connection_args.py +32 -0
  24. mindsdb/integrations/libs/llm/utils.py +5 -0
  25. mindsdb/integrations/libs/process_cache.py +2 -2
  26. mindsdb/integrations/utilities/rag/chains/local_context_summarizer_chain.py +227 -0
  27. mindsdb/interfaces/agents/agents_controller.py +3 -3
  28. mindsdb/interfaces/agents/callback_handlers.py +52 -5
  29. mindsdb/interfaces/agents/langchain_agent.py +5 -3
  30. mindsdb/interfaces/database/database.py +1 -1
  31. mindsdb/interfaces/database/integrations.py +1 -1
  32. mindsdb/interfaces/jobs/scheduler.py +1 -1
  33. mindsdb/interfaces/knowledge_base/preprocessing/constants.py +2 -2
  34. mindsdb/interfaces/skills/skills_controller.py +2 -2
  35. mindsdb/interfaces/skills/sql_agent.py +6 -1
  36. mindsdb/interfaces/storage/db.py +0 -12
  37. mindsdb/migrations/versions/2025-02-10_6ab9903fc59a_del_log_table.py +33 -0
  38. mindsdb/utilities/config.py +1 -0
  39. mindsdb/utilities/log.py +17 -2
  40. mindsdb/utilities/ml_task_queue/consumer.py +4 -2
  41. mindsdb/utilities/render/sqlalchemy_render.py +4 -0
  42. mindsdb/utilities/log_controller.py +0 -39
  43. mindsdb/utilities/telemetry.py +0 -44
  44. {MindsDB-25.2.3.0.dist-info → MindsDB-25.2.4.0.dist-info}/LICENSE +0 -0
  45. {MindsDB-25.2.3.0.dist-info → MindsDB-25.2.4.0.dist-info}/WHEEL +0 -0
  46. {MindsDB-25.2.3.0.dist-info → MindsDB-25.2.4.0.dist-info}/top_level.txt +0 -0
@@ -400,7 +400,7 @@ class LangchainAgent:
400
400
  "max_iterations", args.get("max_iterations", DEFAULT_MAX_ITERATIONS)
401
401
  ),
402
402
  memory=memory,
403
- verbose=args.get("verbose", args.get("verbose", True)),
403
+ verbose=args.get("verbose", args.get("verbose", False))
404
404
  )
405
405
  return agent_executor
406
406
 
@@ -435,7 +435,7 @@ class LangchainAgent:
435
435
  all_callbacks = []
436
436
 
437
437
  if self.log_callback_handler is None:
438
- self.log_callback_handler = LogCallbackHandler(logger)
438
+ self.log_callback_handler = LogCallbackHandler(logger, verbose=args.get("verbose", True))
439
439
 
440
440
  all_callbacks.append(self.log_callback_handler)
441
441
 
@@ -599,7 +599,9 @@ AI: {response}"""
599
599
  agent_executor_finished_event.set()
600
600
 
601
601
  # Enqueue Langchain agent streaming chunks in a separate thread to not block event chunks.
602
- executor_stream_thread = threading.Thread(target=stream_worker, daemon=True, args=(ctx.dump(),))
602
+ executor_stream_thread = threading.Thread(
603
+ target=stream_worker, daemon=True, args=(ctx.dump(),), name='LangchainAgent.stream_worker'
604
+ )
603
605
  executor_stream_thread.start()
604
606
 
605
607
  while not agent_executor_finished_event.is_set():
@@ -106,7 +106,7 @@ class DatabaseController:
106
106
  }
107
107
 
108
108
  def exists(self, db_name: str) -> bool:
109
- return db_name in self.get_dict()
109
+ return db_name.lower() in self.get_dict()
110
110
 
111
111
  def get_project(self, name: str):
112
112
  return self.project_controller.get(name=name)
@@ -64,7 +64,7 @@ class HandlersCache:
64
64
  ):
65
65
  return
66
66
  self._stop_event.clear()
67
- self.cleaner_thread = threading.Thread(target=self._clean)
67
+ self.cleaner_thread = threading.Thread(target=self._clean, name='HandlersCache.clean')
68
68
  self.cleaner_thread.daemon = True
69
69
  self.cleaner_thread.start()
70
70
 
@@ -44,7 +44,7 @@ class Scheduler:
44
44
  self.q_in = queue.Queue()
45
45
  self.q_out = queue.Queue()
46
46
  self.work_thread = threading.Thread(
47
- target=execute_async, args=(self.q_in, self.q_out)
47
+ target=execute_async, args=(self.q_in, self.q_out), name='Scheduler.execute_async'
48
48
  )
49
49
  self.work_thread.start()
50
50
 
@@ -6,8 +6,8 @@ DEFAULT_MARKDOWN_HEADERS = [
6
6
  ]
7
7
 
8
8
  # Limits for web crawling
9
- DEFAULT_CRAWL_DEPTH = 1
10
- DEFAULT_WEB_CRAWL_LIMIT = 100
9
+ DEFAULT_CRAWL_DEPTH = None
10
+ DEFAULT_WEB_CRAWL_LIMIT = 1
11
11
  DEFAULT_WEB_FILTERS = []
12
12
 
13
13
  DEFAULT_CONTEXT_DOCUMENT_LIMIT = 50
@@ -1,7 +1,7 @@
1
1
  import datetime
2
2
  from typing import Dict, List, Optional
3
3
 
4
- from sqlalchemy import null
4
+ from sqlalchemy import null, func
5
5
  from sqlalchemy.orm.attributes import flag_modified
6
6
 
7
7
  from mindsdb.interfaces.storage import db
@@ -33,7 +33,7 @@ class SkillsController:
33
33
 
34
34
  project = self.project_controller.get(name=project_name)
35
35
  return db.Skills.query.filter(
36
- db.Skills.name == skill_name,
36
+ func.lower(db.Skills.name) == func.lower(skill_name),
37
37
  db.Skills.project_id == project.id,
38
38
  db.Skills.deleted_at == null()
39
39
  ).first()
@@ -287,6 +287,7 @@ class SQLAgent:
287
287
  return info
288
288
 
289
289
  def _get_sample_rows(self, table: str, fields: List[str]) -> str:
290
+ logger.info(f'_get_sample_rows: table={table} fields={fields}')
290
291
  command = f"select {', '.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
291
292
  try:
292
293
  ret = self._call_engine(command)
@@ -300,7 +301,7 @@ class SQLAgent:
300
301
  map(lambda row: [truncate_value(value) for value in row], sample_rows))
301
302
  sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
302
303
  except Exception as e:
303
- logger.warning(e)
304
+ logger.info(f'_get_sample_rows error: {e}')
304
305
  sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
305
306
 
306
307
  return sample_rows_str
@@ -347,14 +348,18 @@ class SQLAgent:
347
348
 
348
349
  def get_table_info_safe(self, table_names: Optional[List[str]] = None) -> str:
349
350
  try:
351
+ logger.info(f'get_table_info_safe: {table_names}')
350
352
  return self.get_table_info(table_names)
351
353
  except Exception as e:
354
+ logger.info(f'get_table_info_safe error: {e}')
352
355
  return f"Error: {e}"
353
356
 
354
357
  def query_safe(self, command: str, fetch: str = "all") -> str:
355
358
  try:
359
+ logger.info(f'query_safe (fetch={fetch}): {command}')
356
360
  return self.query(command, fetch)
357
361
  except Exception as e:
362
+ logger.info(f'query_safe error: {e}')
358
363
  msg = f"Error: {e}"
359
364
  if 'does not exist' in msg and ' relation ' in msg:
360
365
  msg += '\nAvailable tables: ' + ', '.join(self.get_usable_table_names())
@@ -218,18 +218,6 @@ class Project(Base):
218
218
  )
219
219
 
220
220
 
221
- class Log(Base):
222
- __tablename__ = "log"
223
-
224
- id = Column(Integer, primary_key=True)
225
- created_at = Column(DateTime, default=datetime.datetime.now)
226
- log_type = Column(String) # log, info, warning, traceback etc
227
- source = Column(String) # file + line
228
- company_id = Column(Integer)
229
- payload = Column(String)
230
- created_at_index = Index("some_index", "created_at_index")
231
-
232
-
233
221
  class Integration(Base):
234
222
  __tablename__ = "integration"
235
223
  id = Column(Integer, primary_key=True)
@@ -0,0 +1,33 @@
1
+ """del_log_table
2
+
3
+ Revision ID: 6ab9903fc59a
4
+ Revises: 4943359e354a
5
+ Create Date: 2025-02-10 16:50:27.186697
6
+
7
+ """
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+ import mindsdb.interfaces.storage.db # noqa
11
+
12
+ # revision identifiers, used by Alembic.
13
+ revision = '6ab9903fc59a'
14
+ down_revision = '4943359e354a'
15
+ branch_labels = None
16
+ depends_on = None
17
+
18
+
19
+ def upgrade():
20
+ op.drop_table('log')
21
+
22
+
23
+ def downgrade():
24
+ op.create_table(
25
+ 'log',
26
+ sa.Column('id', sa.INTEGER(), nullable=False),
27
+ sa.Column('created_at', sa.DATETIME(), nullable=True),
28
+ sa.Column('log_type', sa.VARCHAR(), nullable=True),
29
+ sa.Column('source', sa.VARCHAR(), nullable=True),
30
+ sa.Column('company_id', sa.INTEGER(), nullable=True),
31
+ sa.Column('payload', sa.VARCHAR(), nullable=True),
32
+ sa.PrimaryKeyConstraint('id')
33
+ )
@@ -149,6 +149,7 @@ class Config:
149
149
  "handlers": {
150
150
  "console": {
151
151
  "enabled": True,
152
+ "formatter": "default",
152
153
  "level": "INFO" # MINDSDB_CONSOLE_LOG_LEVEL or MINDSDB_LOG_LEVEL (obsolete)
153
154
  },
154
155
  "file": {
mindsdb/utilities/log.py CHANGED
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import logging
2
3
  from logging.config import dictConfig
3
4
 
@@ -7,6 +8,19 @@ from mindsdb.utilities.config import config as app_config
7
8
  logging_initialized = False
8
9
 
9
10
 
11
+ class JsonFormatter(logging.Formatter):
12
+ def format(self, record):
13
+ record_message = super().format(record)
14
+ log_record = {
15
+ 'process_name': record.processName,
16
+ 'name': record.name,
17
+ 'message': record_message,
18
+ 'level': record.levelname,
19
+ 'time': record.created
20
+ }
21
+ return json.dumps(log_record)
22
+
23
+
10
24
  class ColorFormatter(logging.Formatter):
11
25
  green = "\x1b[32;20m"
12
26
  default = "\x1b[39;20m"
@@ -53,7 +67,7 @@ def configure_logging():
53
67
  if console_handler_config['enabled'] is True:
54
68
  handlers_config['console'] = {
55
69
  "class": "logging.StreamHandler",
56
- "formatter": "f",
70
+ "formatter": console_handler_config.get('formatter', 'default'),
57
71
  "level": console_handler_config_level
58
72
  }
59
73
 
@@ -74,7 +88,8 @@ def configure_logging():
74
88
  logging_config = dict(
75
89
  version=1,
76
90
  formatters={
77
- "f": {"()": ColorFormatter},
91
+ "default": {"()": ColorFormatter},
92
+ "json": {"()": JsonFormatter},
78
93
  "file": {
79
94
  "format": "%(asctime)s %(processName)15s %(levelname)-8s %(name)s: %(message)s"
80
95
  }
@@ -74,7 +74,9 @@ class MLTaskConsumer(BaseRedisQueue):
74
74
 
75
75
  # region collect cpu usage statistic
76
76
  self.cpu_stat = [0] * 10
77
- self._collect_cpu_stat_thread = threading.Thread(target=self._collect_cpu_stat)
77
+ self._collect_cpu_stat_thread = threading.Thread(
78
+ target=self._collect_cpu_stat, name='MLTaskConsumer._collect_cpu_stat'
79
+ )
78
80
  self._collect_cpu_stat_thread.start()
79
81
  # endregion
80
82
 
@@ -221,7 +223,7 @@ class MLTaskConsumer(BaseRedisQueue):
221
223
  if self._ready_event.is_set() is False:
222
224
  continue
223
225
  self._ready_event.clear()
224
- threading.Thread(target=self._listen).start()
226
+ threading.Thread(target=self._listen, name='MLTaskConsumer._listen').start()
225
227
  self.stop()
226
228
 
227
229
  def stop(self) -> None:
@@ -63,6 +63,10 @@ class AttributedStr(str):
63
63
  obj.is_quoted = is_quoted
64
64
  return obj
65
65
 
66
+ def replace(self, *args):
67
+ obj = super().replace(*args)
68
+ return AttributedStr(obj, self.is_quoted)
69
+
66
70
 
67
71
  def get_is_quoted(identifier: ast.Identifier):
68
72
  quoted = getattr(identifier, 'is_quoted', [])
@@ -1,39 +0,0 @@
1
- from mindsdb.interfaces.storage import db
2
- from mindsdb.utilities.context import context as ctx
3
-
4
-
5
- def fmt_log_record(log_record):
6
- return {
7
- 'log_from': 'mindsdb',
8
- 'level': log_record.log_type,
9
- 'context': 'unknown',
10
- 'text': log_record.payload,
11
- 'created_at': str(log_record.created_at).split('.')[0]
12
- }
13
-
14
-
15
- def get_logs(min_timestamp, max_timestamp, context, level, log_from, limit):
16
- logs = db.session.query(db.Log).filter(
17
- db.Log.company_id == ctx.company_id,
18
- db.Log.created_at > min_timestamp
19
- )
20
-
21
- if max_timestamp is not None:
22
- logs = logs.filter(db.Log.created_at < max_timestamp)
23
-
24
- if context is not None:
25
- # e.g. datasource/predictor and assoicated id
26
- pass
27
-
28
- if level is not None:
29
- logs = logs.filter(db.Log.log_type == level)
30
-
31
- if log_from is not None:
32
- # mindsdb/native/lightwood/all
33
- pass
34
-
35
- if limit is not None:
36
- logs = logs.limit(limit)
37
-
38
- logs = [fmt_log_record(x) for x in logs]
39
- return logs
@@ -1,44 +0,0 @@
1
- import os
2
- from pathlib import Path
3
-
4
- TELEMETRY_FILE = 'telemetry.lock'
5
-
6
-
7
- def enable_telemetry(storage_dir):
8
- os.environ['CHECK_FOR_UPDATES'] = '1'
9
- path = os.path.join(storage_dir, TELEMETRY_FILE)
10
- if os.path.exists(path):
11
- os.remove(path)
12
-
13
-
14
- def disable_telemetry(storage_dir):
15
- os.environ['CHECK_FOR_UPDATES'] = '0'
16
- path = os.path.join(storage_dir, TELEMETRY_FILE)
17
- with open(path, 'w') as _:
18
- pass
19
-
20
-
21
- def telemetry_file_exists(storage_dir):
22
- path = os.path.join(storage_dir, TELEMETRY_FILE)
23
- return os.path.exists(path)
24
-
25
-
26
- def inject_telemetry_to_static(static_folder):
27
- TEXT = '<script>localStorage.isTestUser = true;</script>'
28
- index = Path(static_folder).joinpath('index.html')
29
- disable_telemetry = os.getenv('CHECK_FOR_UPDATES', '1').lower() in ['0', 'false', 'False']
30
- if index.is_file():
31
- with open(str(index), 'rt') as f:
32
- content = f.read()
33
- script_index = content.find('<script>')
34
- need_update = True
35
- if TEXT not in content and disable_telemetry:
36
- content = content[:script_index] + TEXT + content[script_index:]
37
- elif not disable_telemetry and TEXT in content:
38
- content = content.replace(TEXT, '')
39
- else:
40
- need_update = False
41
-
42
- if need_update:
43
- with open(str(index), 'wt') as f:
44
- f.write(content)