MindsDB 25.4.4.0__py3-none-any.whl → 25.5.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (86) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +107 -125
  3. mindsdb/api/executor/command_executor.py +14 -3
  4. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +8 -0
  5. mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +2 -1
  6. mindsdb/api/executor/datahub/datanodes/system_tables.py +10 -13
  7. mindsdb/api/executor/planner/query_plan.py +1 -0
  8. mindsdb/api/executor/planner/query_planner.py +9 -1
  9. mindsdb/api/executor/sql_query/sql_query.py +24 -8
  10. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +21 -3
  11. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +3 -1
  12. mindsdb/api/http/initialize.py +20 -3
  13. mindsdb/api/http/namespaces/analysis.py +14 -1
  14. mindsdb/api/http/namespaces/config.py +19 -11
  15. mindsdb/api/http/namespaces/tree.py +1 -1
  16. mindsdb/api/http/start.py +7 -2
  17. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +4 -8
  18. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -4
  19. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +2 -2
  20. mindsdb/integrations/handlers/bigquery_handler/requirements.txt +1 -0
  21. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -0
  22. mindsdb/integrations/handlers/gmail_handler/requirements.txt +1 -0
  23. mindsdb/integrations/handlers/google_analytics_handler/requirements.txt +2 -1
  24. mindsdb/integrations/handlers/google_books_handler/requirements.txt +1 -1
  25. mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +1 -0
  26. mindsdb/integrations/handlers/google_content_shopping_handler/requirements.txt +1 -1
  27. mindsdb/integrations/handlers/google_fit_handler/requirements.txt +2 -0
  28. mindsdb/integrations/handlers/google_search_handler/requirements.txt +1 -1
  29. mindsdb/integrations/handlers/jira_handler/jira_handler.archived.py +75 -0
  30. mindsdb/integrations/handlers/jira_handler/jira_handler.py +113 -38
  31. mindsdb/integrations/handlers/jira_handler/jira_tables.py +229 -0
  32. mindsdb/integrations/handlers/jira_handler/requirements.txt +1 -0
  33. mindsdb/integrations/handlers/lightfm_handler/requirements.txt +1 -0
  34. mindsdb/integrations/handlers/lightwood_handler/lightwood_handler.py +0 -2
  35. mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
  36. mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -0
  37. mindsdb/integrations/handlers/ms_one_drive_handler/requirements.txt +2 -0
  38. mindsdb/integrations/handlers/ms_teams_handler/requirements.txt +3 -1
  39. mindsdb/integrations/handlers/openai_handler/helpers.py +3 -5
  40. mindsdb/integrations/handlers/openai_handler/openai_handler.py +25 -12
  41. mindsdb/integrations/handlers/snowflake_handler/requirements.txt +1 -1
  42. mindsdb/integrations/handlers/togetherai_handler/__about__.py +9 -0
  43. mindsdb/integrations/handlers/togetherai_handler/__init__.py +20 -0
  44. mindsdb/integrations/handlers/togetherai_handler/creation_args.py +14 -0
  45. mindsdb/integrations/handlers/togetherai_handler/icon.svg +15 -0
  46. mindsdb/integrations/handlers/togetherai_handler/model_using_args.py +5 -0
  47. mindsdb/integrations/handlers/togetherai_handler/requirements.txt +2 -0
  48. mindsdb/integrations/handlers/togetherai_handler/settings.py +33 -0
  49. mindsdb/integrations/handlers/togetherai_handler/togetherai_handler.py +234 -0
  50. mindsdb/integrations/handlers/vertex_handler/requirements.txt +1 -0
  51. mindsdb/integrations/handlers/youtube_handler/requirements.txt +1 -0
  52. mindsdb/integrations/utilities/files/file_reader.py +5 -2
  53. mindsdb/integrations/utilities/handler_utils.py +4 -0
  54. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +360 -0
  55. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +6 -346
  56. mindsdb/interfaces/agents/constants.py +14 -2
  57. mindsdb/interfaces/agents/langchain_agent.py +2 -4
  58. mindsdb/interfaces/database/projects.py +1 -7
  59. mindsdb/interfaces/functions/controller.py +14 -16
  60. mindsdb/interfaces/functions/to_markdown.py +9 -124
  61. mindsdb/interfaces/knowledge_base/controller.py +109 -92
  62. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +28 -5
  63. mindsdb/interfaces/knowledge_base/utils.py +10 -15
  64. mindsdb/interfaces/model/model_controller.py +0 -2
  65. mindsdb/interfaces/query_context/context_controller.py +55 -15
  66. mindsdb/interfaces/query_context/query_task.py +19 -0
  67. mindsdb/interfaces/skills/sql_agent.py +33 -11
  68. mindsdb/interfaces/storage/db.py +2 -2
  69. mindsdb/interfaces/tasks/task_monitor.py +5 -1
  70. mindsdb/interfaces/tasks/task_thread.py +6 -0
  71. mindsdb/migrations/migrate.py +0 -2
  72. mindsdb/migrations/versions/2025-04-22_53502b6d63bf_query_database.py +27 -0
  73. mindsdb/utilities/config.py +15 -3
  74. mindsdb/utilities/context.py +2 -1
  75. mindsdb/utilities/functions.py +0 -36
  76. mindsdb/utilities/langfuse.py +19 -10
  77. mindsdb/utilities/otel/__init__.py +9 -193
  78. mindsdb/utilities/otel/metric_handlers/__init__.py +5 -1
  79. mindsdb/utilities/otel/prepare.py +198 -0
  80. mindsdb/utilities/sql.py +83 -0
  81. {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/METADATA +662 -592
  82. {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/RECORD +85 -69
  83. {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/WHEEL +1 -1
  84. mindsdb/api/mysql/mysql_proxy/classes/sql_statement_parser.py +0 -151
  85. {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/licenses/LICENSE +0 -0
  86. {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/top_level.txt +0 -0
@@ -2,343 +2,22 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import logging
5
- import math
6
- import os
7
- import random
8
- from typing import Any, Dict, List, Optional, Sequence, Tuple
5
+ from typing import Any, Dict, Optional, Sequence
9
6
 
10
7
  from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
11
8
  from langchain_core.callbacks import Callbacks, dispatch_custom_event
12
9
  from langchain_core.documents import Document
13
- from openai import AsyncOpenAI, AsyncAzureOpenAI
14
- from pydantic import field_validator
15
10
 
16
- from mindsdb.integrations.utilities.rag.settings import DEFAULT_RERANKING_MODEL, DEFAULT_LLM_ENDPOINT
11
+ from mindsdb.integrations.utilities.rag.rerankers.base_reranker import BaseLLMReranker
17
12
 
18
13
  log = logging.getLogger(__name__)
19
14
 
20
15
 
21
- class LLMReranker(BaseDocumentCompressor):
22
- filtering_threshold: float = 0.0 # Default threshold for filtering
23
- provider: str = 'openai'
24
- model: str = DEFAULT_RERANKING_MODEL # Model to use for reranking
25
- temperature: float = 0.0 # Temperature for the model
26
- api_key: Optional[str] = None
16
+ class LLMReranker(BaseDocumentCompressor, BaseLLMReranker):
27
17
  remove_irrelevant: bool = True # New flag to control removal of irrelevant documents
28
- base_url: Optional[str] = None
29
- api_version: Optional[str] = None
30
- num_docs_to_keep: Optional[int] = None # How many of the top documents to keep after reranking & compressing.
31
- method: str = "multi-class" # Scoring method: 'multi-class' or 'binary'
32
- _api_key_var: str = "OPENAI_API_KEY"
33
- client: Optional[AsyncOpenAI] = None
34
- _semaphore: Optional[asyncio.Semaphore] = None
35
- max_concurrent_requests: int = 20
36
- max_retries: int = 3
37
- retry_delay: float = 1.0
38
- request_timeout: float = 20.0 # Timeout for API requests
39
- early_stop: bool = True # Whether to enable early stopping
40
- early_stop_threshold: float = 0.8 # Confidence threshold for early stopping
41
18
 
42
- class Config:
43
- arbitrary_types_allowed = True
44
-
45
- @field_validator('provider')
46
- @classmethod
47
- def validate_provider(cls, v: str) -> str:
48
- allowed = {'openai', 'azure_openai'}
49
- v_lower = v.lower()
50
- if v_lower not in allowed:
51
- raise ValueError(f"Unsupported provider: {v}.")
52
- return v_lower
53
-
54
- def __init__(self, **kwargs):
55
- super().__init__(**kwargs)
56
- self._semaphore = asyncio.Semaphore(self.max_concurrent_requests)
57
-
58
- async def _init_client(self):
59
- if self.client is None:
60
-
61
- if self.provider == "azure_openai":
62
-
63
- azure_api_key = self.api_key or os.getenv("AZURE_OPENAI_API_KEY")
64
- azure_api_endpoint = self.base_url or os.environ.get("AZURE_OPENAI_ENDPOINT")
65
- azure_api_version = self.api_version or os.environ.get("AZURE_OPENAI_API_VERSION")
66
- self.client = AsyncAzureOpenAI(api_key=azure_api_key,
67
- azure_endpoint=azure_api_endpoint,
68
- api_version=azure_api_version,
69
- timeout=self.request_timeout,
70
- max_retries=2)
71
- elif self.provider == "openai":
72
- api_key_var: str = "OPENAI_API_KEY"
73
- openai_api_key = self.api_key or os.getenv(api_key_var)
74
- if not openai_api_key:
75
- raise ValueError(f"OpenAI API key not found in environment variable {api_key_var}")
76
-
77
- base_url = self.base_url or DEFAULT_LLM_ENDPOINT
78
- self.client = AsyncOpenAI(api_key=openai_api_key, base_url=base_url, timeout=self.request_timeout, max_retries=2)
79
-
80
- async def search_relevancy(self, query: str, document: str, custom_event: bool = True) -> Any:
81
- await self._init_client()
82
-
83
- async with self._semaphore:
84
- for attempt in range(self.max_retries):
85
- try:
86
- response = await self.client.chat.completions.create(
87
- model=self.model,
88
- messages=[
89
- {"role": "system", "content": "Rate the relevance of the document to the query. Respond with 'yes' or 'no'."},
90
- {"role": "user", "content": f"Query: {query}\nDocument: {document}\nIs this document relevant?"}
91
- ],
92
- temperature=self.temperature,
93
- n=1,
94
- logprobs=True,
95
- max_tokens=1
96
- )
97
-
98
- # Extract response and logprobs
99
- answer = response.choices[0].message.content
100
- logprob = response.choices[0].logprobs.content[0].logprob
101
- rerank_data = {
102
- "document": document,
103
- "answer": answer,
104
- "logprob": logprob
105
- }
106
-
107
- # Stream reranking update.
108
- if custom_event:
109
- dispatch_custom_event("rerank", rerank_data)
110
- return rerank_data
111
-
112
- except Exception as e:
113
- if attempt == self.max_retries - 1:
114
- log.error(f"Failed after {self.max_retries} attempts: {str(e)}")
115
- raise
116
- # Exponential backoff with jitter
117
- retry_delay = self.retry_delay * (2 ** attempt) + random.uniform(0, 0.1)
118
- await asyncio.sleep(retry_delay)
119
-
120
- async def _rank(self, query_document_pairs: List[Tuple[str, str]], custom_event: bool = True) -> List[Tuple[str, float]]:
121
- ranked_results = []
122
-
123
- # Process in larger batches for better throughput
124
- batch_size = min(self.max_concurrent_requests * 2, len(query_document_pairs))
125
- for i in range(0, len(query_document_pairs), batch_size):
126
- batch = query_document_pairs[i:i + batch_size]
127
- try:
128
- results = await asyncio.gather(
129
- *[self.search_relevancy(query=query, document=document, custom_event=custom_event) for (query, document) in batch],
130
- return_exceptions=True
131
- )
132
-
133
- for idx, result in enumerate(results):
134
- if isinstance(result, Exception):
135
- log.error(f"Error processing document {i+idx}: {str(result)}")
136
- ranked_results.append((batch[idx][1], 0.0))
137
- continue
138
-
139
- answer = result["answer"]
140
- logprob = result["logprob"]
141
- prob = math.exp(logprob)
142
-
143
- # Convert answer to score using the model's confidence
144
- if answer.lower().strip() == "yes":
145
- score = prob # If yes, use the model's confidence
146
- elif answer.lower().strip() == "no":
147
- score = 1 - prob # If no, invert the confidence
148
- else:
149
- score = 0.5 * prob # For unclear answers, reduce confidence
150
-
151
- ranked_results.append((batch[idx][1], score))
152
-
153
- # Check if we should stop early
154
- try:
155
- high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
156
- can_stop_early = (
157
- self.early_stop # Early stopping is enabled
158
- and self.num_docs_to_keep # We have a target number of docs
159
- and len(high_scoring_docs) >= self.num_docs_to_keep # Found enough good docs
160
- and score >= self.early_stop_threshold # Current doc is good enough
161
- )
162
-
163
- if can_stop_early:
164
- log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
165
- return ranked_results
166
- except Exception as e:
167
- # Don't let early stopping errors stop the whole process
168
- log.warning(f"Error in early stopping check: {str(e)}")
169
-
170
- except Exception as e:
171
- log.error(f"Batch processing error: {str(e)}")
172
- continue
173
- return ranked_results
174
-
175
- async def search_relevancy_score(self, query: str, document: str) -> Any:
176
- await self._init_client()
177
-
178
- async with self._semaphore:
179
- for attempt in range(self.max_retries):
180
- try:
181
- response = await self.client.chat.completions.create(
182
- model=self.model,
183
- messages=[
184
- {"role": "system", "content": """
185
- You are an intelligent assistant that evaluates how relevant a given document chunk is to a user's search query.
186
- Your task is to analyze the similarity between the search query and the document chunk, and return **only the class label** that best represents the relevance:
187
-
188
- - "class_1": Not relevant (score between 0.0 and 0.25)
189
- - "class_2": Slightly relevant (score between 0.25 and 0.5)
190
- - "class_3": Moderately relevant (score between 0.5 and 0.75)
191
- - "class_4": Highly relevant (score between 0.75 and 1.0)
192
-
193
- Respond with only one of: "class_1", "class_2", "class_3", or "class_4".
194
-
195
- Examples:
196
-
197
- Search query: "How to reset a router to factory settings?"
198
- Document chunk: "Computers often come with customizable parental control settings."
199
- Score: class_1
200
-
201
- Search query: "Symptoms of vitamin D deficiency"
202
- Document chunk: "Vitamin D deficiency has been linked to fatigue, bone pain, and muscle weakness."
203
- Score: class_4
204
-
205
- Search query: "Best practices for onboarding remote employees"
206
- Document chunk: "An employee handbook can be useful for new hires, outlining company policies and benefits."
207
- Score: class_2
208
-
209
- Search query: "Benefits of mindfulness meditation"
210
- Document chunk: "Practicing mindfulness has shown to reduce stress and improve focus in multiple studies."
211
- Score: class_3
212
-
213
- Search query: "What is Kubernetes used for?"
214
- Document chunk: "Kubernetes is an open-source system for automating deployment, scaling, and management of containerized applications."
215
- Score: class_4
216
-
217
- Search query: "How to bake sourdough bread at home"
218
- Document chunk: "The French Revolution began in 1789 and radically transformed society."
219
- Score: class_1
220
-
221
- Search query: "Machine learning algorithms for image classification"
222
- Document chunk: "Convolutional Neural Networks (CNNs) are particularly effective in image classification tasks."
223
- Score: class_4
224
-
225
- Search query: "How to improve focus while working remotely"
226
- Document chunk: "Creating a dedicated workspace and setting a consistent schedule can significantly improve focus during remote work."
227
- Score: class_4
228
-
229
- Search query: "Carbon emissions from electric vehicles vs gas cars"
230
- Document chunk: "Electric vehicles produce zero emissions while driving, but battery production has environmental impacts."
231
- Score: class_3
232
-
233
- Search query: "Time zones in the United States"
234
- Document chunk: "The U.S. is divided into six primary time zones: Eastern, Central, Mountain, Pacific, Alaska, and Hawaii-Aleutian."
235
- Score: class_4
236
- """},
237
-
238
- {"role": "user", "content": f"""
239
- Now evaluate the following pair:
240
-
241
- Search query: {query}
242
- Document chunk: {document}
243
-
244
- Which class best represents the relevance?
245
- """}
246
- ],
247
- temperature=self.temperature,
248
- n=1,
249
- logprobs=True,
250
- top_logprobs=4,
251
- max_tokens=3
252
- )
253
-
254
- # Extract response and logprobs
255
- class_label = response.choices[0].message.content.strip()
256
- token_logprobs = response.choices[0].logprobs.content
257
- # Reconstruct the prediction and extract the top logprobs from the final token (e.g., "1")
258
- final_token_logprob = token_logprobs[-1]
259
- top_logprobs = final_token_logprob.top_logprobs
260
- # Create a map of 'class_1' -> probability, using token combinations
261
- class_probs = {}
262
- for top_token in top_logprobs:
263
- full_label = f"class_{top_token.token}"
264
- prob = math.exp(top_token.logprob)
265
- class_probs[full_label] = prob
266
- # Optional: normalize in case some are missing
267
- total_prob = sum(class_probs.values())
268
- class_probs = {k: v / total_prob for k, v in class_probs.items()}
269
- # Assign weights to classes
270
- class_weights = {
271
- "class_1": 0.25,
272
- "class_2": 0.5,
273
- "class_3": 0.75,
274
- "class_4": 1.0
275
- }
276
- # Compute the final smooth score
277
- relevance_score = sum(class_weights.get(class_label, 0) * prob for class_label, prob in class_probs.items())
278
- rerank_data = {
279
- "document": document,
280
- "answer": class_label,
281
- "relevance_score": relevance_score
282
- }
283
- return rerank_data
284
-
285
- except Exception as e:
286
- if attempt == self.max_retries - 1:
287
- log.error(f"Failed after {self.max_retries} attempts: {str(e)}")
288
- raise
289
- # Exponential backoff with jitter
290
- retry_delay = self.retry_delay * (2 ** attempt) + random.uniform(0, 0.1)
291
- await asyncio.sleep(retry_delay)
292
-
293
- async def _rank_score(self, query_document_pairs: List[Tuple[str, str]]) -> List[Tuple[str, float]]:
294
- ranked_results = []
295
-
296
- # Process in larger batches for better throughput
297
- batch_size = min(self.max_concurrent_requests * 2, len(query_document_pairs))
298
- for i in range(0, len(query_document_pairs), batch_size):
299
- batch = query_document_pairs[i:i + batch_size]
300
- try:
301
- results = await asyncio.gather(
302
- *[self.search_relevancy_score(query=query, document=document) for (query, document) in batch],
303
- return_exceptions=True
304
- )
305
-
306
- for idx, result in enumerate(results):
307
- if isinstance(result, Exception):
308
- log.error(f"Error processing document {i+idx}: {str(result)}")
309
- ranked_results.append((batch[idx][1], 0.0))
310
- continue
311
-
312
- score = result["relevance_score"]
313
- if score is not None:
314
- if score > 1.0:
315
- score = 1.0
316
- elif score < 0.0:
317
- score = 0.0
318
-
319
- ranked_results.append((batch[idx][1], score))
320
- # Check if we should stop early
321
- try:
322
- high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
323
- can_stop_early = (
324
- self.early_stop # Early stopping is enabled
325
- and self.num_docs_to_keep # We have a target number of docs
326
- and len(high_scoring_docs) >= self.num_docs_to_keep # Found enough good docs
327
- and score >= self.early_stop_threshold # Current doc is good enough
328
- )
329
-
330
- if can_stop_early:
331
- log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
332
- return ranked_results
333
- except Exception as e:
334
- # Don't let early stopping errors stop the whole process
335
- log.warning(f"Error in early stopping check: {str(e)}")
336
-
337
- except Exception as e:
338
- log.error(f"Batch processing error: {str(e)}")
339
- continue
340
-
341
- return ranked_results
19
+ def _dispatch_rerank_event(self, data):
20
+ dispatch_custom_event("rerank", data)
342
21
 
343
22
  async def acompress_documents(
344
23
  self,
@@ -367,7 +46,7 @@ class LLMReranker(BaseDocumentCompressor):
367
46
  await callbacks.on_text("Starting document reranking...")
368
47
 
369
48
  # Get ranked results
370
- ranked_results = await self._rank(query_document_pairs)
49
+ ranked_results = await self._rank(query_document_pairs, rerank_callback=self._dispatch_rerank_event)
371
50
 
372
51
  # Sort by score in descending order
373
52
  ranked_results.sort(key=lambda x: x[1], reverse=True)
@@ -418,22 +97,3 @@ class LLMReranker(BaseDocumentCompressor):
418
97
  "remove_irrelevant": self.remove_irrelevant,
419
98
  "method": self.method,
420
99
  }
421
-
422
- def get_scores(self, query: str, documents: list[str], custom_event: bool = False):
423
- query_document_pairs = [(query, doc) for doc in documents]
424
- # Create event loop and run async code
425
- import asyncio
426
- try:
427
- loop = asyncio.get_running_loop()
428
- except RuntimeError:
429
- # If no running loop exists, create a new one
430
- loop = asyncio.new_event_loop()
431
- asyncio.set_event_loop(loop)
432
-
433
- if self.method == "multi-class": # default 'multi-class' method
434
- documents_and_scores = loop.run_until_complete(self._rank_score(query_document_pairs))
435
- else:
436
- documents_and_scores = loop.run_until_complete(self._rank(query_document_pairs, custom_event=custom_event))
437
-
438
- scores = [score for _, score in documents_and_scores]
439
- return scores
@@ -4,8 +4,20 @@ from langchain.agents import AgentType
4
4
  from langchain_openai import OpenAIEmbeddings
5
5
 
6
6
  from types import MappingProxyType
7
- from mindsdb.integrations.handlers.openai_handler.constants import (
8
- CHAT_MODELS as OPEN_AI_CHAT_MODELS,
7
+
8
+ # the same as
9
+ # from mindsdb.integrations.handlers.openai_handler.constants import CHAT_MODELS
10
+ OPEN_AI_CHAT_MODELS = (
11
+ 'gpt-3.5-turbo',
12
+ 'gpt-3.5-turbo-16k',
13
+ 'gpt-3.5-turbo-instruct',
14
+ 'gpt-4',
15
+ 'gpt-4-32k',
16
+ 'gpt-4-1106-preview',
17
+ 'gpt-4-0125-preview',
18
+ 'gpt-4o',
19
+ 'o3-mini',
20
+ 'o1-mini'
9
21
  )
10
22
 
11
23
  SUPPORTED_PROVIDERS = {
@@ -24,9 +24,6 @@ from langchain_core.messages.base import BaseMessage
24
24
  from langchain_core.prompts import PromptTemplate
25
25
  from langchain_core.tools import Tool
26
26
 
27
- from mindsdb.integrations.handlers.openai_handler.constants import (
28
- CHAT_MODELS as OPEN_AI_CHAT_MODELS,
29
- )
30
27
  from mindsdb.integrations.libs.llm.utils import get_llm_config
31
28
  from mindsdb.integrations.utilities.handler_utils import get_api_key
32
29
  from mindsdb.integrations.utilities.rag.settings import DEFAULT_RAG_PROMPT_TEMPLATE
@@ -42,7 +39,8 @@ from .callback_handlers import LogCallbackHandler, ContextCaptureCallback
42
39
  from .langfuse_callback_handler import LangfuseCallbackHandler, get_skills
43
40
  from .safe_output_parser import SafeOutputParser
44
41
 
45
- from .constants import (
42
+ from mindsdb.interfaces.agents.constants import (
43
+ OPEN_AI_CHAT_MODELS,
46
44
  DEFAULT_AGENT_TIMEOUT_SECONDS,
47
45
  DEFAULT_AGENT_TYPE,
48
46
  DEFAULT_EMBEDDINGS_MODEL_PROVIDER,
@@ -4,7 +4,6 @@ from typing import List, Optional
4
4
  from collections import OrderedDict
5
5
 
6
6
  import sqlalchemy as sa
7
- from sqlalchemy.orm.attributes import flag_modified
8
7
  import numpy as np
9
8
 
10
9
  from mindsdb_sql_parser.ast.base import ASTNode
@@ -457,7 +456,7 @@ class ProjectController:
457
456
  project.create(name=name)
458
457
  return project
459
458
 
460
- def update(self, id: Optional[int] = None, name: Optional[str] = None, new_name: str = None, new_metadata: dict = None) -> Project:
459
+ def update(self, id: Optional[int] = None, name: Optional[str] = None, new_name: str = None) -> Project:
461
460
  if id is not None and name is not None:
462
461
  raise ValueError("Both 'id' and 'name' can't be provided at the same time")
463
462
 
@@ -470,10 +469,5 @@ class ProjectController:
470
469
  project.name = new_name
471
470
  project.record.name = new_name
472
471
 
473
- if new_metadata is not None:
474
- project.metadata = new_metadata
475
- project.record.metadata_ = new_metadata
476
- flag_modified(project.record, 'metadata_')
477
-
478
472
  db.session.commit()
479
473
  return project
@@ -1,8 +1,9 @@
1
1
  import os
2
+ import copy
2
3
 
3
4
  from duckdb.typing import BIGINT, DOUBLE, VARCHAR, BLOB, BOOLEAN
4
- from mindsdb.interfaces.functions.to_markdown import ToMarkdown
5
5
  from mindsdb.interfaces.storage.model_fs import HandlerStorage
6
+ from mindsdb.utilities.config import config
6
7
 
7
8
 
8
9
  def python_to_duckdb_type(py_type):
@@ -158,31 +159,28 @@ class FunctionController(BYOMFunctionsController):
158
159
  return meta
159
160
 
160
161
  def to_markdown_call_function(self, node):
162
+ # load on-demand because lib is heavy
163
+ from mindsdb.interfaces.functions.to_markdown import ToMarkdown
161
164
  name = node.op.lower()
162
165
 
163
166
  if name in self.callbacks:
164
167
  return self.callbacks[name]
165
168
 
166
- def callback(file_path_or_url, use_llm):
167
- chat_model_params = self._parse_chat_model_params()
169
+ def callback(file_path_or_url):
170
+ chat_model_params = self._parse_chat_model_params('TO_MARKDOWN_FUNCTION_')
168
171
 
169
- llm_client = None
170
- llm_model = None
171
- try:
172
- from mindsdb.interfaces.agents.langchain_agent import create_chat_model
173
- llm = create_chat_model(chat_model_params)
174
- llm_client = llm.root_client
175
- llm_model = llm.model_name
176
- except Exception:
177
- pass
172
+ params_copy = copy.deepcopy(chat_model_params)
173
+ params_copy['model'] = params_copy.pop('model_name')
174
+ params_copy.pop('api_keys')
175
+ params_copy.pop('provider')
178
176
 
179
- to_markdown = ToMarkdown(use_llm, llm_client, llm_model)
180
- return to_markdown.call(file_path_or_url)
177
+ to_markdown = ToMarkdown()
178
+ return to_markdown.call(file_path_or_url, **params_copy)
181
179
 
182
180
  meta = {
183
181
  'name': name,
184
182
  'callback': callback,
185
- 'input_types': ['str', 'bool'],
183
+ 'input_types': ['str'],
186
184
  'output_type': 'str'
187
185
  }
188
186
  self.callbacks[name] = meta
@@ -192,7 +190,7 @@ class FunctionController(BYOMFunctionsController):
192
190
  """
193
191
  Parses the environment variables for chat model parameters.
194
192
  """
195
- chat_model_params = {}
193
+ chat_model_params = config.get("default_llm") or {}
196
194
  for k, v in os.environ.items():
197
195
  if k.startswith(param_prefix):
198
196
  param_name = k[len(param_prefix):]