MindsDB 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (77) hide show
  1. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +244 -242
  2. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +76 -67
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/__main__.py +5 -3
  5. mindsdb/api/executor/__init__.py +0 -1
  6. mindsdb/api/executor/command_executor.py +2 -1
  7. mindsdb/api/executor/data_types/answer.py +1 -1
  8. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
  9. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
  10. mindsdb/api/executor/sql_query/__init__.py +1 -0
  11. mindsdb/api/executor/sql_query/result_set.py +36 -21
  12. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
  13. mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
  14. mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
  15. mindsdb/api/executor/utilities/sql.py +2 -10
  16. mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
  17. mindsdb/api/http/namespaces/sql.py +3 -1
  18. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
  19. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
  20. mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
  21. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
  22. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
  23. mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
  24. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
  25. mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
  26. mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
  27. mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
  28. mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
  29. mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
  30. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +48 -16
  31. mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
  32. mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
  33. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
  34. mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
  35. mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
  36. mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
  37. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
  38. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
  39. mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
  40. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
  41. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +36 -14
  42. mindsdb/integrations/utilities/rag/settings.py +8 -2
  43. mindsdb/integrations/utilities/sql_utils.py +1 -1
  44. mindsdb/interfaces/agents/agents_controller.py +3 -5
  45. mindsdb/interfaces/agents/langchain_agent.py +112 -150
  46. mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
  47. mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
  48. mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
  49. mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
  50. mindsdb/interfaces/chatbot/memory.py +58 -13
  51. mindsdb/interfaces/database/projects.py +17 -15
  52. mindsdb/interfaces/database/views.py +12 -25
  53. mindsdb/interfaces/knowledge_base/controller.py +39 -15
  54. mindsdb/interfaces/model/functions.py +15 -4
  55. mindsdb/interfaces/model/model_controller.py +4 -7
  56. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
  57. mindsdb/interfaces/skills/retrieval_tool.py +10 -3
  58. mindsdb/interfaces/skills/skill_tool.py +97 -53
  59. mindsdb/interfaces/skills/sql_agent.py +77 -36
  60. mindsdb/interfaces/storage/db.py +1 -1
  61. mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
  62. mindsdb/utilities/cache.py +7 -4
  63. mindsdb/utilities/context.py +11 -1
  64. mindsdb/utilities/langfuse.py +264 -0
  65. mindsdb/utilities/log.py +20 -2
  66. mindsdb/utilities/otel/__init__.py +206 -0
  67. mindsdb/utilities/otel/logger.py +25 -0
  68. mindsdb/utilities/otel/meter.py +19 -0
  69. mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
  70. mindsdb/utilities/otel/tracer.py +16 -0
  71. mindsdb/utilities/partitioning.py +52 -0
  72. mindsdb/utilities/render/sqlalchemy_render.py +7 -1
  73. mindsdb/utilities/utils.py +34 -0
  74. mindsdb/utilities/otel.py +0 -72
  75. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
  76. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
  77. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
@@ -136,7 +136,7 @@ Output:
136
136
  )
137
137
  retry_chain = LLMChain(llm=self.llm, prompt=retry_prompt)
138
138
  # Generate rewritten query.
139
- return retry_chain.predict(
139
+ sql_query = retry_chain.predict(
140
140
  query=query,
141
141
  dialect='postgres',
142
142
  error=error,
@@ -144,6 +144,16 @@ Output:
144
144
  schema=schema,
145
145
  callbacks=run_manager.get_child() if run_manager else None
146
146
  )
147
+ query_checker_prompt = PromptTemplate(
148
+ input_variables=['dialect', 'query'],
149
+ template=self.query_checker_template
150
+ )
151
+ query_checker_chain = LLMChain(llm=self.llm, prompt=query_checker_prompt)
152
+ # Check the query & return the final result to be executed.
153
+ return query_checker_chain.predict(
154
+ dialect='postgres',
155
+ query=sql_query
156
+ )
147
157
 
148
158
  def _get_relevant_documents(
149
159
  self, query: str, *, run_manager: CallbackManagerForRetrieverRun
@@ -163,20 +173,28 @@ Output:
163
173
  # Actually execute the similarity search with metadata filters.
164
174
  document_response = self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
165
175
  num_retries = 0
166
- while document_response.resp_type == RESPONSE_TYPE.ERROR:
167
- error_msg = document_response.error_message
168
- # LLMs won't always generate a working SQL query so we should have a fallback after retrying.
169
- logger.info(f'SQL Retriever query {checked_sql_query} failed with error {error_msg}')
170
- if num_retries >= self.num_retries:
171
- logger.info('Using fallback retriever in SQL retriever.')
172
- return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager)
173
- query_to_retry = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
174
- query_to_retry_with_embeddings = query_to_retry.format(embeddings=str(embedded_query))
176
+ while num_retries < self.num_retries:
177
+ if document_response.resp_type == RESPONSE_TYPE.ERROR:
178
+ error_msg = document_response.error_message
179
+ # LLMs won't always generate a working SQL query so we should have a fallback after retrying.
180
+ logger.info(f'SQL Retriever query {checked_sql_query} failed with error {error_msg}')
181
+ checked_sql_query = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
182
+ elif len(document_response.data_frame) == 0:
183
+ error_msg = "No documents retrieved from query."
184
+ checked_sql_query = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
185
+ else:
186
+ break
187
+
188
+ checked_sql_query_with_embeddings = checked_sql_query.format(embeddings=str(embedded_query))
175
189
  # Handle LLM output that has the ```sql delimiter possibly.
176
- query_to_retry_with_embeddings = query_to_retry_with_embeddings.replace('```sql', '')
177
- query_to_retry_with_embeddings = query_to_retry_with_embeddings.replace('```', '')
178
- document_response = self.vector_store_handler.native_query(query_to_retry_with_embeddings)
190
+ checked_sql_query_with_embeddings = checked_sql_query_with_embeddings.replace('```sql', '')
191
+ checked_sql_query_with_embeddings = checked_sql_query_with_embeddings.replace('```', '')
192
+ document_response = self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
193
+
179
194
  num_retries += 1
195
+ if num_retries >= self.num_retries:
196
+ logger.info('Using fallback retriever in SQL retriever.')
197
+ return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
180
198
 
181
199
  document_df = document_response.data_frame
182
200
  retrieved_documents = []
@@ -185,4 +203,8 @@ Output:
185
203
  document_row.get('content', ''),
186
204
  metadata=document_row.get('metadata', {})
187
205
  ))
188
- return retrieved_documents
206
+ if retrieved_documents:
207
+ return retrieved_documents
208
+ # If the SQL query constructed did not return any documents, fallback.
209
+ logger.info('No documents returned from SQL retriever. using fallback retriever.')
210
+ return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
@@ -136,7 +136,6 @@ Columns:
136
136
  "description": "Metadata for the document chunk. Always select metadata and always join with the {source_table} table on the string metadata field 'original_row_id'"
137
137
  }}
138
138
  }}
139
- ```
140
139
 
141
140
  {schema}
142
141
 
@@ -290,6 +289,8 @@ class VectorStoreConfig(BaseModel):
290
289
  collection_name: str = DEFAULT_COLLECTION_NAME
291
290
  connection_string: str = None
292
291
  kb_table: Any = None
292
+ is_sparse: bool = False
293
+ vector_size: Optional[int] = None
293
294
 
294
295
  class Config:
295
296
  arbitrary_types_allowed = True
@@ -449,8 +450,13 @@ class SummarizationConfig(BaseModel):
449
450
  class RerankerConfig(BaseModel):
450
451
  model: str = DEFAULT_RERANKING_MODEL
451
452
  base_url: str = DEFAULT_LLM_ENDPOINT
452
- filtering_threshold: float = 0.99
453
+ filtering_threshold: float = 0.5
453
454
  num_docs_to_keep: Optional[int] = None
455
+ max_concurrent_requests: int = 20
456
+ max_retries: int = 3
457
+ retry_delay: float = 1.0
458
+ early_stop: bool = True # Whether to enable early stopping
459
+ early_stop_threshold: float = 0.8 # Confidence threshold for early stopping
454
460
 
455
461
 
456
462
  class MultiHopRetrieverConfig(BaseModel):
@@ -178,7 +178,7 @@ def project_dataframe(df, targets, table_columns):
178
178
 
179
179
  # adapt column names to projection
180
180
  if len(df_col_rename) > 0:
181
- df = df.rename(columns=df_col_rename)
181
+ df.rename(columns=df_col_rename, inplace=True)
182
182
  return df
183
183
 
184
184
 
@@ -362,24 +362,22 @@ class AgentsController:
362
362
  project_name: str = 'mindsdb',
363
363
  tools: List[BaseTool] = None,
364
364
  stream: bool = False) -> Union[Iterator[object], pd.DataFrame]:
365
- '''
365
+ """
366
366
  Queries an agent to get a completion.
367
367
 
368
368
  Parameters:
369
369
  agent (db.Agents): Existing agent to get completion from
370
370
  messages (List[Dict[str, str]]): Chat history to send to the agent
371
- trace_id (str): ID of Langfuse trace to use
372
- observation_id (str): ID of parent Langfuse observation to use
373
371
  project_name (str): Project the agent belongs to (default mindsdb)
374
372
  tools (List[BaseTool]): Tools to use while getting the completion
375
- stream (bool): Whether or not to stream the response
373
+ stream (bool): Whether to stream the response
376
374
 
377
375
  Returns:
378
376
  response (Union[Iterator[object], pd.DataFrame]): Completion as a DataFrame or iterator of completion chunks
379
377
 
380
378
  Raises:
381
379
  ValueError: Agent's model does not exist.
382
- '''
380
+ """
383
381
  if stream:
384
382
  return self._get_completion_stream(
385
383
  agent,
@@ -1,8 +1,7 @@
1
1
  import json
2
2
  from concurrent.futures import as_completed, TimeoutError
3
- from typing import Dict, Iterable, List
3
+ from typing import Dict, Iterable, List, Optional
4
4
  from uuid import uuid4
5
- import os
6
5
  import re
7
6
  import numpy as np
8
7
  import pandas as pd
@@ -20,9 +19,6 @@ from langchain_nvidia_ai_endpoints import ChatNVIDIA
20
19
  from langchain_core.messages.base import BaseMessage
21
20
  from langchain_core.prompts import PromptTemplate
22
21
  from langchain_core.tools import Tool
23
- from langfuse import Langfuse
24
- from langfuse.api.resources.commons.errors.not_found_error import NotFoundError as TraceNotFoundError
25
- from langfuse.callback import CallbackHandler
26
22
 
27
23
  from mindsdb.integrations.handlers.openai_handler.constants import (
28
24
  CHAT_MODELS as OPEN_AI_CHAT_MODELS,
@@ -35,13 +31,11 @@ from mindsdb.utilities.context_executor import ContextThreadPoolExecutor
35
31
  from mindsdb.interfaces.storage import db
36
32
  from mindsdb.utilities.context import context as ctx
37
33
 
38
-
39
34
  from .mindsdb_chat_model import ChatMindsdb
40
35
  from .callback_handlers import LogCallbackHandler, ContextCaptureCallback
41
- from .langfuse_callback_handler import LangfuseCallbackHandler, get_metadata, get_tags, get_tool_usage, get_skills
36
+ from .langfuse_callback_handler import LangfuseCallbackHandler, get_skills
42
37
  from .safe_output_parser import SafeOutputParser
43
38
 
44
-
45
39
  from .constants import (
46
40
  DEFAULT_AGENT_TIMEOUT_SECONDS,
47
41
  DEFAULT_AGENT_TYPE,
@@ -62,6 +56,8 @@ from langchain_anthropic import ChatAnthropic
62
56
  from langchain_core.messages import SystemMessage
63
57
  from langchain_openai import ChatOpenAI
64
58
 
59
+ from mindsdb.utilities.langfuse import LangfuseClientWrapper
60
+
65
61
  _PARSING_ERROR_PREFIXES = [
66
62
  "An output parsing error occurred",
67
63
  "Could not parse LLM output",
@@ -207,34 +203,42 @@ def process_chunk(chunk):
207
203
 
208
204
 
209
205
  class LangchainAgent:
206
+
210
207
  def __init__(self, agent: db.Agents, model: dict = None):
208
+
211
209
  self.agent = agent
212
210
  self.model = model
213
- self.llm = None
214
- self.embedding_model = None
215
- args = agent.params.copy()
216
- args["model_name"] = agent.model_name
217
- args["provider"] = agent.provider
211
+
212
+ self.run_completion_span: Optional[object] = None
213
+ self.llm: Optional[object] = None
214
+ self.embedding_model: Optional[object] = None
215
+
216
+ self.log_callback_handler: Optional[object] = None
217
+ self.langfuse_callback_handler: Optional[object] = None # native langfuse callback handler
218
+ self.mdb_langfuse_callback_handler: Optional[object] = None # custom (see langfuse_callback_handler.py)
219
+
220
+ self.langfuse_client_wrapper = LangfuseClientWrapper()
221
+ self.args = self._initialize_args()
222
+
223
+ # Back compatibility for old models
224
+ self.provider = self.args.get("provider", get_llm_provider(self.args))
225
+
226
+ def _initialize_args(self) -> dict:
227
+ """Initialize the arguments based on the agent's parameters."""
228
+ args = self.agent.params.copy()
229
+ args["model_name"] = self.agent.model_name
230
+ args["provider"] = self.agent.provider
218
231
  args["embedding_model_provider"] = args.get(
219
232
  "embedding_model", get_embedding_model_provider(args)
220
233
  )
221
234
 
222
- self.langfuse = None
223
- if os.getenv('LANGFUSE_PUBLIC_KEY') is not None:
224
- self.langfuse = Langfuse(
225
- public_key=os.getenv('LANGFUSE_PUBLIC_KEY'),
226
- secret_key=os.getenv('LANGFUSE_SECRET_KEY'),
227
- host=os.getenv('LANGFUSE_HOST'),
228
- release=os.getenv('LANGFUSE_RELEASE', 'local'),
229
- )
230
-
231
235
  # agent is using current langchain model
232
- if agent.provider == "mindsdb":
233
- args["model_name"] = agent.model_name
236
+ if self.agent.provider == "mindsdb":
237
+ args["model_name"] = self.agent.model_name
234
238
 
235
239
  # get prompt
236
240
  prompt_template = (
237
- model["problem_definition"].get("using", {}).get("prompt_template")
241
+ self.model["problem_definition"].get("using", {}).get("prompt_template")
238
242
  )
239
243
  if prompt_template is not None:
240
244
  # only update prompt_template if it is set on the model
@@ -248,57 +252,47 @@ class LangchainAgent:
248
252
  "Please provide a `prompt_template` or set `mode=retrieval`"
249
253
  )
250
254
 
251
- self.args = args
252
- self.trace_id = None
253
- self.observation_id = None
254
- self.log_callback_handler = None
255
- self.langfuse_callback_handler = None # native langfuse callback handler
256
- self.mdb_langfuse_callback_handler = (
257
- None # custom (see langfuse_callback_handler.py)
258
- )
255
+ return args
256
+
257
+ def get_metadata(self) -> Dict:
258
+ return {
259
+ 'provider': self.provider,
260
+ 'model_name': self.args["model_name"],
261
+ 'embedding_model_provider': self.args.get('embedding_model_provider',
262
+ get_embedding_model_provider(self.args)),
263
+ 'skills': get_skills(self.agent),
264
+ 'user_id': ctx.user_id,
265
+ 'session_id': ctx.session_id,
266
+ 'company_id': ctx.company_id,
267
+ 'user_class': ctx.user_class,
268
+ 'email_confirmed': ctx.email_confirmed
269
+ }
270
+
271
+ def get_tags(self) -> List:
272
+ return [
273
+ self.provider,
274
+ ]
259
275
 
260
276
  def get_completion(self, messages, stream: bool = False):
261
277
 
262
- self.run_completion_span = None
263
- self.api_trace = None
264
- if self.langfuse:
265
-
266
- # todo we need to fix this as this assumes that the model is always langchain
267
- # since decoupling the model from langchain, we need to find a way to get the model name
268
- # this breaks retrieval agents
269
-
270
- # metadata retrieval
271
- trace_metadata = {
272
- 'provider': self.args["provider"],
273
- 'model_name': self.args["model_name"],
274
- 'embedding_model_provider': self.args.get('embedding_model_provider', get_embedding_model_provider(self.args))
275
- }
276
- trace_metadata['skills'] = get_skills(self.agent)
277
- trace_tags = get_tags(trace_metadata)
278
-
279
- # Set our user info to pass into langfuse trace, with fault tolerance in each individual one just incase on purpose
280
- trace_metadata['user_id'] = ctx.user_id
281
- trace_metadata['session_id'] = ctx.session_id
282
- trace_metadata['company_id'] = ctx.company_id
283
- trace_metadata['user_class'] = ctx.user_class
284
- trace_metadata['email_confirmed'] = ctx.email_confirmed
285
-
286
- self.api_trace = self.langfuse.trace(
287
- name='api-completion',
288
- input=messages,
289
- tags=trace_tags,
290
- metadata=trace_metadata,
291
- user_id=ctx.user_id,
292
- session_id=ctx.session_id,
293
- )
294
-
295
- self.run_completion_span = self.api_trace.span(name='run-completion', input=messages)
296
- trace_id = self.api_trace.id
297
- observation_id = self.run_completion_span.id
278
+ # Get metadata and tags to be used in the trace
279
+ metadata = self.get_metadata()
280
+ tags = self.get_tags()
281
+
282
+ # Set up trace for the API completion in Langfuse
283
+ self.langfuse_client_wrapper.setup_trace(
284
+ name='api-completion',
285
+ input=messages,
286
+ tags=tags,
287
+ metadata=metadata,
288
+ user_id=ctx.user_id,
289
+ session_id=ctx.session_id,
290
+ )
298
291
 
299
- self.trace_id = trace_id
300
- self.observation_id = observation_id
301
- logger.info(f"Langfuse trace created with ID: {trace_id}")
292
+ # Set up trace for the run completion in Langfuse
293
+ self.run_completion_span = self.langfuse_client_wrapper.start_span(
294
+ name='run-completion',
295
+ input=messages)
302
296
 
303
297
  if stream:
304
298
  return self._get_completion_stream(messages)
@@ -317,21 +311,8 @@ class LangchainAgent:
317
311
  df.iloc[:-1, df.columns.get_loc(user_column)] = None
318
312
  response = self.run_agent(df, agent, args)
319
313
 
320
- if self.run_completion_span is not None and self.api_trace is not None:
321
- self.run_completion_span.end(output=response)
322
- self.api_trace.update(output=response)
323
-
324
- # update metadata with tool usage
325
- try:
326
- # Ensure all batched traces are sent before fetching.
327
- self.langfuse.flush()
328
- trace = self.langfuse.get_trace(self.trace_id)
329
- trace_metadata['tool_usage'] = get_tool_usage(trace)
330
- self.api_trace.update(metadata=trace_metadata)
331
- except TraceNotFoundError:
332
- logger.warning(f'Langfuse trace {self.trace_id} not found')
333
- except Exception as e:
334
- logger.error(f'Something went wrong while processing Langfuse trace {self.trace_id}: {str(e)}')
314
+ # End the run completion span and update the metadata with tool usage
315
+ self.langfuse_client_wrapper.end_span(span=self.run_completion_span, output=response)
335
316
 
336
317
  return response
337
318
 
@@ -349,6 +330,7 @@ class LangchainAgent:
349
330
 
350
331
  df = pd.DataFrame(messages)
351
332
 
333
+ self.embedding_model_provider = args.get('embedding_model_provider', get_embedding_model_provider(args))
352
334
  # Back compatibility for old models
353
335
  self.provider = args.get("provider", get_llm_provider(args))
354
336
 
@@ -445,69 +427,49 @@ class LangchainAgent:
445
427
  return all_tools
446
428
 
447
429
  def _get_agent_callbacks(self, args: Dict) -> List:
430
+ all_callbacks = []
448
431
 
449
432
  if self.log_callback_handler is None:
450
433
  self.log_callback_handler = LogCallbackHandler(logger)
451
434
 
452
- all_callbacks = [self.log_callback_handler]
435
+ all_callbacks.append(self.log_callback_handler)
453
436
 
454
- langfuse_public_key = args.get(
455
- "langfuse_public_key", os.getenv("LANGFUSE_PUBLIC_KEY")
456
- )
457
- langfuse_secret_key = args.get(
458
- "langfuse_secret_key", os.getenv("LANGFUSE_SECRET_KEY")
459
- )
460
- langfuse_host = args.get("langfuse_host", os.getenv("LANGFUSE_HOST"))
461
- are_langfuse_args_present = (
462
- bool(langfuse_public_key)
463
- and bool(langfuse_secret_key)
464
- and bool(langfuse_host)
465
- )
437
+ if self.langfuse_client_wrapper.trace is None:
438
+ # Get metadata and tags to be used in the trace
439
+ metadata = self.get_metadata()
440
+ tags = self.get_tags()
466
441
 
467
- if are_langfuse_args_present:
468
- if self.langfuse_callback_handler is None:
469
- trace_name = args.get(
470
- "trace_id",
471
- (
472
- f"NativeTrace-...{self.trace_id[-7:]}"
473
- if self.trace_id is not None
474
- else "NativeTrace-MindsDB-AgentExecutor"
475
- ),
476
- )
477
- metadata = get_metadata(args)
478
- self.langfuse_callback_handler = CallbackHandler(
479
- public_key=langfuse_public_key,
480
- secret_key=langfuse_secret_key,
481
- host=langfuse_host,
482
- trace_name=trace_name,
483
- tags=get_tags(metadata),
484
- metadata=metadata,
485
- )
486
- try:
487
- # This try is critical to catch fatal errors which would otherwise prevent the agent from running properly
488
- if not self.langfuse_callback_handler.auth_check():
489
- logger.error(
490
- f"Incorrect Langfuse credentials provided to Langchain handler. Full args: {args}"
491
- )
492
- except Exception as e:
493
- logger.error(f'Something went wrong while running langfuse_callback_handler.auth_check {str(e)}')
494
-
495
- # custom tracer
496
- if self.mdb_langfuse_callback_handler is None:
497
- trace_id = args.get("trace_id", self.trace_id or None)
498
- observation_id = args.get(
499
- "observation_id", self.observation_id or uuid4().hex
500
- )
501
- langfuse = Langfuse(
502
- host=langfuse_host,
503
- public_key=langfuse_public_key,
504
- secret_key=langfuse_secret_key,
505
- )
506
- self.mdb_langfuse_callback_handler = LangfuseCallbackHandler(
507
- langfuse=langfuse,
508
- trace_id=trace_id,
509
- observation_id=observation_id,
510
- )
442
+ trace_name = "NativeTrace-MindsDB-AgentExecutor"
443
+
444
+ # Set up trace for the API completion in Langfuse
445
+ self.langfuse_client_wrapper.setup_trace(
446
+ name=trace_name,
447
+ tags=tags,
448
+ metadata=metadata,
449
+ user_id=ctx.user_id,
450
+ session_id=ctx.session_id,
451
+ )
452
+
453
+ if self.langfuse_callback_handler is None:
454
+ self.langfuse_callback_handler = self.langfuse_client_wrapper.get_langchain_handler()
455
+
456
+ # custom tracer
457
+ if self.mdb_langfuse_callback_handler is None:
458
+ trace_id = None
459
+ if self.langfuse_client_wrapper.trace is not None:
460
+ trace_id = args.get("trace_id", self.langfuse_client_wrapper.trace.id)
461
+
462
+ span_id = None
463
+ if self.run_completion_span is not None:
464
+ span_id = self.run_completion_span.id
465
+
466
+ observation_id = args.get("observation_id", span_id or uuid4().hex)
467
+
468
+ self.mdb_langfuse_callback_handler = LangfuseCallbackHandler(
469
+ langfuse=self.langfuse_client_wrapper.client,
470
+ trace_id=trace_id,
471
+ observation_id=observation_id,
472
+ )
511
473
 
512
474
  # obs: we may want to unify these; native langfuse handler provides details as a tree on a sub-step of the overarching custom one # noqa
513
475
  if self.langfuse_callback_handler is not None:
@@ -542,7 +504,8 @@ AI: {response}"""
542
504
  return_context = args.get('return_context', True)
543
505
  input_variables = re.findall(r"{{(.*?)}}", base_template)
544
506
 
545
- prompts, empty_prompt_ids = prepare_prompts(df, base_template, input_variables, args.get('user_column', USER_COLUMN))
507
+ prompts, empty_prompt_ids = prepare_prompts(df, base_template, input_variables,
508
+ args.get('user_column', USER_COLUMN))
546
509
 
547
510
  def _invoke_agent_executor_with_prompt(agent_executor, prompt):
548
511
  if not prompt:
@@ -621,7 +584,8 @@ AI: {response}"""
621
584
  if not hasattr(agent_executor, 'stream') or not callable(agent_executor.stream):
622
585
  raise AttributeError("The agent_executor does not have a 'stream' method")
623
586
 
624
- stream_iterator = agent_executor.stream(prompts[0], config={'callbacks': callbacks})
587
+ stream_iterator = agent_executor.stream(prompts[0],
588
+ config={'callbacks': callbacks})
625
589
 
626
590
  if not hasattr(stream_iterator, '__iter__'):
627
591
  raise TypeError("The stream method did not return an iterable")
@@ -642,10 +606,8 @@ AI: {response}"""
642
606
  # Yield generated SQL if available
643
607
  yield {"type": "sql", "content": self.log_callback_handler.generated_sql}
644
608
 
645
- if self.run_completion_span is not None:
646
- self.run_completion_span.end()
647
- self.api_trace.update()
648
- logger.info("Langfuse trace updated")
609
+ # End the run completion span and update the metadata with tool usage
610
+ self.langfuse_client_wrapper.end_span_stream(span=self.run_completion_span)
649
611
 
650
612
  @staticmethod
651
613
  def process_chunk(chunk):
@@ -1,7 +1,6 @@
1
1
  from typing import Any, Dict, Union, Optional, List
2
2
  from uuid import uuid4
3
3
  import datetime
4
- import os
5
4
 
6
5
  from langchain_core.callbacks.base import BaseCallbackHandler
7
6
 
@@ -122,42 +121,6 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
122
121
  return False
123
122
 
124
123
 
125
- def get_metadata(model_using: Dict) -> Dict:
126
- """ Generates initial metadata mapping from information provided in a model's `using` clause.
127
- Includes providers and model name.
128
- """
129
- metadata_keys = ['provider', 'model_name', 'embedding_model_provider'] # keeps keys relevant for tracing
130
- trace_metadata = {}
131
- for key in metadata_keys:
132
- if key in model_using:
133
- trace_metadata[key] = model_using.get(key)
134
- return trace_metadata
135
-
136
-
137
124
  def get_skills(agent: db.Agents) -> List:
138
125
  """ Retrieve skills from agent `skills` attribute. Specific to agent endpoints. """
139
126
  return [rel.skill.type for rel in agent.skills_relationships]
140
-
141
-
142
- def get_tags(metadata: Dict) -> List:
143
- """ Retrieves tags from existing langfuse metadata (built using `get_metadata` and `get_skills`), and environment variables. """
144
- trace_tags = []
145
- if os.getenv('FLASK_ENV'):
146
- trace_tags.append(os.getenv('FLASK_ENV')) # Fix: use something other than flask_env
147
- if 'provider' in metadata:
148
- trace_tags.append(metadata['provider'])
149
- return trace_tags
150
-
151
-
152
- def get_tool_usage(trace) -> Dict:
153
- """ Retrieves tool usage information from a langfuse trace.
154
- Note: assumes trace marks an action with string `AgentAction` """
155
- tool_usage = {}
156
- steps = [s.name for s in trace.observations]
157
- for step in steps:
158
- if 'AgentAction' in step:
159
- tool_name = step.split('-')[1]
160
- if tool_name not in tool_usage:
161
- tool_usage[tool_name] = 0
162
- tool_usage[tool_name] += 1
163
- return tool_usage
@@ -4,36 +4,38 @@
4
4
  """
5
5
  from typing import Any, Iterable, List, Optional
6
6
 
7
- from mindsdb.interfaces.skills.skill_tool import skill_tool
8
7
  from mindsdb.utilities import log
9
8
  from langchain_community.utilities import SQLDatabase
9
+ from mindsdb.interfaces.skills.sql_agent import SQLAgent
10
10
 
11
11
  logger = log.getLogger(__name__)
12
12
 
13
13
 
14
14
  class MindsDBSQL(SQLDatabase):
15
+ @staticmethod
16
+ def custom_init(
17
+ sql_agent: 'SQLAgent'
18
+ ) -> 'MindsDBSQL':
19
+ instance = MindsDBSQL()
20
+ instance._sql_agent = sql_agent
21
+ return instance
22
+
15
23
  """ Can't modify signature, as LangChain does a Pydantic check."""
16
24
  def __init__(
17
25
  self,
18
- engine=None,
19
- database: Optional[str] = 'mindsdb',
26
+ engine: Optional[Any] = None,
27
+ schema: Optional[str] = None,
20
28
  metadata: Optional[Any] = None,
21
29
  ignore_tables: Optional[List[str]] = None,
22
30
  include_tables: Optional[List[str]] = None,
23
31
  sample_rows_in_table_info: int = 3,
24
- schema: Optional[str] = None,
25
32
  indexes_in_table_info: bool = False,
26
33
  custom_table_info: Optional[dict] = None,
27
- view_support: Optional[bool] = True,
34
+ view_support: bool = True,
35
+ max_string_length: int = 300,
36
+ lazy_table_reflection: bool = False,
28
37
  ):
29
- # Some args above are not used in this class, but are kept for compatibility
30
-
31
- self._sql_agent = skill_tool.get_sql_agent(
32
- database,
33
- include_tables,
34
- ignore_tables,
35
- sample_rows_in_table_info
36
- )
38
+ pass
37
39
 
38
40
  @property
39
41
  def dialect(self) -> str:
@@ -4,6 +4,7 @@ from mindsdb.interfaces.agents.agents_controller import AgentsController
4
4
  from mindsdb.interfaces.chatbot.chatbot_task import ChatBotTask
5
5
  from mindsdb.interfaces.database.projects import ProjectController
6
6
  from mindsdb.interfaces.storage import db
7
+ from mindsdb.interfaces.model.functions import get_project_records
7
8
 
8
9
  from mindsdb.utilities.context import context as ctx
9
10
 
@@ -128,16 +129,11 @@ class ChatBotController:
128
129
  all_bots (List[db.ChatBots]): List of database chatbot object
129
130
  '''
130
131
 
131
- query = db.session.query(db.Project).filter_by(
132
- company_id=ctx.company_id,
133
- deleted_at=None
134
- )
135
- if project_name is not None:
136
- query = query.filter_by(name=project_name)
137
- project_names = {
138
- i.id: i.name
139
- for i in query
140
- }
132
+ project_names = {}
133
+ for project in get_project_records():
134
+ if project_name is not None and project.name != project_name:
135
+ continue
136
+ project_names[project.id] = project.name
141
137
 
142
138
  query = db.session.query(
143
139
  db.ChatBots, db.Tasks
@@ -228,9 +224,9 @@ class ChatBotController:
228
224
  raise ValueError('Need to provide either "model_name" or "agent_name" when creating a chatbot')
229
225
  if agent_name is not None:
230
226
  agent = self.agents_controller.get_agent(agent_name, project_name)
231
- model_name = agent.model_name
232
227
  if agent is None:
233
228
  raise ValueError(f"Agent with name doesn't exist: {agent_name}")
229
+ model_name = agent.model_name
234
230
  agent_id = agent.id
235
231
  else:
236
232
  # Create a new agent with the given model name.