MindsDB 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +244 -242
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +76 -67
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +5 -3
- mindsdb/api/executor/__init__.py +0 -1
- mindsdb/api/executor/command_executor.py +2 -1
- mindsdb/api/executor/data_types/answer.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
- mindsdb/api/executor/sql_query/__init__.py +1 -0
- mindsdb/api/executor/sql_query/result_set.py +36 -21
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
- mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
- mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
- mindsdb/api/executor/utilities/sql.py +2 -10
- mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
- mindsdb/api/http/namespaces/sql.py +3 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
- mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
- mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
- mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
- mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +48 -16
- mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
- mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
- mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
- mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +36 -14
- mindsdb/integrations/utilities/rag/settings.py +8 -2
- mindsdb/integrations/utilities/sql_utils.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +3 -5
- mindsdb/interfaces/agents/langchain_agent.py +112 -150
- mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
- mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
- mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
- mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
- mindsdb/interfaces/chatbot/memory.py +58 -13
- mindsdb/interfaces/database/projects.py +17 -15
- mindsdb/interfaces/database/views.py +12 -25
- mindsdb/interfaces/knowledge_base/controller.py +39 -15
- mindsdb/interfaces/model/functions.py +15 -4
- mindsdb/interfaces/model/model_controller.py +4 -7
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
- mindsdb/interfaces/skills/retrieval_tool.py +10 -3
- mindsdb/interfaces/skills/skill_tool.py +97 -53
- mindsdb/interfaces/skills/sql_agent.py +77 -36
- mindsdb/interfaces/storage/db.py +1 -1
- mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
- mindsdb/utilities/cache.py +7 -4
- mindsdb/utilities/context.py +11 -1
- mindsdb/utilities/langfuse.py +264 -0
- mindsdb/utilities/log.py +20 -2
- mindsdb/utilities/otel/__init__.py +206 -0
- mindsdb/utilities/otel/logger.py +25 -0
- mindsdb/utilities/otel/meter.py +19 -0
- mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
- mindsdb/utilities/otel/tracer.py +16 -0
- mindsdb/utilities/partitioning.py +52 -0
- mindsdb/utilities/render/sqlalchemy_render.py +7 -1
- mindsdb/utilities/utils.py +34 -0
- mindsdb/utilities/otel.py +0 -72
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -136,7 +136,7 @@ Output:
|
|
|
136
136
|
)
|
|
137
137
|
retry_chain = LLMChain(llm=self.llm, prompt=retry_prompt)
|
|
138
138
|
# Generate rewritten query.
|
|
139
|
-
|
|
139
|
+
sql_query = retry_chain.predict(
|
|
140
140
|
query=query,
|
|
141
141
|
dialect='postgres',
|
|
142
142
|
error=error,
|
|
@@ -144,6 +144,16 @@ Output:
|
|
|
144
144
|
schema=schema,
|
|
145
145
|
callbacks=run_manager.get_child() if run_manager else None
|
|
146
146
|
)
|
|
147
|
+
query_checker_prompt = PromptTemplate(
|
|
148
|
+
input_variables=['dialect', 'query'],
|
|
149
|
+
template=self.query_checker_template
|
|
150
|
+
)
|
|
151
|
+
query_checker_chain = LLMChain(llm=self.llm, prompt=query_checker_prompt)
|
|
152
|
+
# Check the query & return the final result to be executed.
|
|
153
|
+
return query_checker_chain.predict(
|
|
154
|
+
dialect='postgres',
|
|
155
|
+
query=sql_query
|
|
156
|
+
)
|
|
147
157
|
|
|
148
158
|
def _get_relevant_documents(
|
|
149
159
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
|
@@ -163,20 +173,28 @@ Output:
|
|
|
163
173
|
# Actually execute the similarity search with metadata filters.
|
|
164
174
|
document_response = self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
|
|
165
175
|
num_retries = 0
|
|
166
|
-
while
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
176
|
+
while num_retries < self.num_retries:
|
|
177
|
+
if document_response.resp_type == RESPONSE_TYPE.ERROR:
|
|
178
|
+
error_msg = document_response.error_message
|
|
179
|
+
# LLMs won't always generate a working SQL query so we should have a fallback after retrying.
|
|
180
|
+
logger.info(f'SQL Retriever query {checked_sql_query} failed with error {error_msg}')
|
|
181
|
+
checked_sql_query = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
|
|
182
|
+
elif len(document_response.data_frame) == 0:
|
|
183
|
+
error_msg = "No documents retrieved from query."
|
|
184
|
+
checked_sql_query = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
|
|
185
|
+
else:
|
|
186
|
+
break
|
|
187
|
+
|
|
188
|
+
checked_sql_query_with_embeddings = checked_sql_query.format(embeddings=str(embedded_query))
|
|
175
189
|
# Handle LLM output that has the ```sql delimiter possibly.
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
document_response = self.vector_store_handler.native_query(
|
|
190
|
+
checked_sql_query_with_embeddings = checked_sql_query_with_embeddings.replace('```sql', '')
|
|
191
|
+
checked_sql_query_with_embeddings = checked_sql_query_with_embeddings.replace('```', '')
|
|
192
|
+
document_response = self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
|
|
193
|
+
|
|
179
194
|
num_retries += 1
|
|
195
|
+
if num_retries >= self.num_retries:
|
|
196
|
+
logger.info('Using fallback retriever in SQL retriever.')
|
|
197
|
+
return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
|
|
180
198
|
|
|
181
199
|
document_df = document_response.data_frame
|
|
182
200
|
retrieved_documents = []
|
|
@@ -185,4 +203,8 @@ Output:
|
|
|
185
203
|
document_row.get('content', ''),
|
|
186
204
|
metadata=document_row.get('metadata', {})
|
|
187
205
|
))
|
|
188
|
-
|
|
206
|
+
if retrieved_documents:
|
|
207
|
+
return retrieved_documents
|
|
208
|
+
# If the SQL query constructed did not return any documents, fallback.
|
|
209
|
+
logger.info('No documents returned from SQL retriever. using fallback retriever.')
|
|
210
|
+
return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
|
|
@@ -136,7 +136,6 @@ Columns:
|
|
|
136
136
|
"description": "Metadata for the document chunk. Always select metadata and always join with the {source_table} table on the string metadata field 'original_row_id'"
|
|
137
137
|
}}
|
|
138
138
|
}}
|
|
139
|
-
```
|
|
140
139
|
|
|
141
140
|
{schema}
|
|
142
141
|
|
|
@@ -290,6 +289,8 @@ class VectorStoreConfig(BaseModel):
|
|
|
290
289
|
collection_name: str = DEFAULT_COLLECTION_NAME
|
|
291
290
|
connection_string: str = None
|
|
292
291
|
kb_table: Any = None
|
|
292
|
+
is_sparse: bool = False
|
|
293
|
+
vector_size: Optional[int] = None
|
|
293
294
|
|
|
294
295
|
class Config:
|
|
295
296
|
arbitrary_types_allowed = True
|
|
@@ -449,8 +450,13 @@ class SummarizationConfig(BaseModel):
|
|
|
449
450
|
class RerankerConfig(BaseModel):
|
|
450
451
|
model: str = DEFAULT_RERANKING_MODEL
|
|
451
452
|
base_url: str = DEFAULT_LLM_ENDPOINT
|
|
452
|
-
filtering_threshold: float = 0.
|
|
453
|
+
filtering_threshold: float = 0.5
|
|
453
454
|
num_docs_to_keep: Optional[int] = None
|
|
455
|
+
max_concurrent_requests: int = 20
|
|
456
|
+
max_retries: int = 3
|
|
457
|
+
retry_delay: float = 1.0
|
|
458
|
+
early_stop: bool = True # Whether to enable early stopping
|
|
459
|
+
early_stop_threshold: float = 0.8 # Confidence threshold for early stopping
|
|
454
460
|
|
|
455
461
|
|
|
456
462
|
class MultiHopRetrieverConfig(BaseModel):
|
|
@@ -362,24 +362,22 @@ class AgentsController:
|
|
|
362
362
|
project_name: str = 'mindsdb',
|
|
363
363
|
tools: List[BaseTool] = None,
|
|
364
364
|
stream: bool = False) -> Union[Iterator[object], pd.DataFrame]:
|
|
365
|
-
|
|
365
|
+
"""
|
|
366
366
|
Queries an agent to get a completion.
|
|
367
367
|
|
|
368
368
|
Parameters:
|
|
369
369
|
agent (db.Agents): Existing agent to get completion from
|
|
370
370
|
messages (List[Dict[str, str]]): Chat history to send to the agent
|
|
371
|
-
trace_id (str): ID of Langfuse trace to use
|
|
372
|
-
observation_id (str): ID of parent Langfuse observation to use
|
|
373
371
|
project_name (str): Project the agent belongs to (default mindsdb)
|
|
374
372
|
tools (List[BaseTool]): Tools to use while getting the completion
|
|
375
|
-
stream (bool): Whether
|
|
373
|
+
stream (bool): Whether to stream the response
|
|
376
374
|
|
|
377
375
|
Returns:
|
|
378
376
|
response (Union[Iterator[object], pd.DataFrame]): Completion as a DataFrame or iterator of completion chunks
|
|
379
377
|
|
|
380
378
|
Raises:
|
|
381
379
|
ValueError: Agent's model does not exist.
|
|
382
|
-
|
|
380
|
+
"""
|
|
383
381
|
if stream:
|
|
384
382
|
return self._get_completion_stream(
|
|
385
383
|
agent,
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from concurrent.futures import as_completed, TimeoutError
|
|
3
|
-
from typing import Dict, Iterable, List
|
|
3
|
+
from typing import Dict, Iterable, List, Optional
|
|
4
4
|
from uuid import uuid4
|
|
5
|
-
import os
|
|
6
5
|
import re
|
|
7
6
|
import numpy as np
|
|
8
7
|
import pandas as pd
|
|
@@ -20,9 +19,6 @@ from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
|
|
20
19
|
from langchain_core.messages.base import BaseMessage
|
|
21
20
|
from langchain_core.prompts import PromptTemplate
|
|
22
21
|
from langchain_core.tools import Tool
|
|
23
|
-
from langfuse import Langfuse
|
|
24
|
-
from langfuse.api.resources.commons.errors.not_found_error import NotFoundError as TraceNotFoundError
|
|
25
|
-
from langfuse.callback import CallbackHandler
|
|
26
22
|
|
|
27
23
|
from mindsdb.integrations.handlers.openai_handler.constants import (
|
|
28
24
|
CHAT_MODELS as OPEN_AI_CHAT_MODELS,
|
|
@@ -35,13 +31,11 @@ from mindsdb.utilities.context_executor import ContextThreadPoolExecutor
|
|
|
35
31
|
from mindsdb.interfaces.storage import db
|
|
36
32
|
from mindsdb.utilities.context import context as ctx
|
|
37
33
|
|
|
38
|
-
|
|
39
34
|
from .mindsdb_chat_model import ChatMindsdb
|
|
40
35
|
from .callback_handlers import LogCallbackHandler, ContextCaptureCallback
|
|
41
|
-
from .langfuse_callback_handler import LangfuseCallbackHandler,
|
|
36
|
+
from .langfuse_callback_handler import LangfuseCallbackHandler, get_skills
|
|
42
37
|
from .safe_output_parser import SafeOutputParser
|
|
43
38
|
|
|
44
|
-
|
|
45
39
|
from .constants import (
|
|
46
40
|
DEFAULT_AGENT_TIMEOUT_SECONDS,
|
|
47
41
|
DEFAULT_AGENT_TYPE,
|
|
@@ -62,6 +56,8 @@ from langchain_anthropic import ChatAnthropic
|
|
|
62
56
|
from langchain_core.messages import SystemMessage
|
|
63
57
|
from langchain_openai import ChatOpenAI
|
|
64
58
|
|
|
59
|
+
from mindsdb.utilities.langfuse import LangfuseClientWrapper
|
|
60
|
+
|
|
65
61
|
_PARSING_ERROR_PREFIXES = [
|
|
66
62
|
"An output parsing error occurred",
|
|
67
63
|
"Could not parse LLM output",
|
|
@@ -207,34 +203,42 @@ def process_chunk(chunk):
|
|
|
207
203
|
|
|
208
204
|
|
|
209
205
|
class LangchainAgent:
|
|
206
|
+
|
|
210
207
|
def __init__(self, agent: db.Agents, model: dict = None):
|
|
208
|
+
|
|
211
209
|
self.agent = agent
|
|
212
210
|
self.model = model
|
|
213
|
-
|
|
214
|
-
self.
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
211
|
+
|
|
212
|
+
self.run_completion_span: Optional[object] = None
|
|
213
|
+
self.llm: Optional[object] = None
|
|
214
|
+
self.embedding_model: Optional[object] = None
|
|
215
|
+
|
|
216
|
+
self.log_callback_handler: Optional[object] = None
|
|
217
|
+
self.langfuse_callback_handler: Optional[object] = None # native langfuse callback handler
|
|
218
|
+
self.mdb_langfuse_callback_handler: Optional[object] = None # custom (see langfuse_callback_handler.py)
|
|
219
|
+
|
|
220
|
+
self.langfuse_client_wrapper = LangfuseClientWrapper()
|
|
221
|
+
self.args = self._initialize_args()
|
|
222
|
+
|
|
223
|
+
# Back compatibility for old models
|
|
224
|
+
self.provider = self.args.get("provider", get_llm_provider(self.args))
|
|
225
|
+
|
|
226
|
+
def _initialize_args(self) -> dict:
|
|
227
|
+
"""Initialize the arguments based on the agent's parameters."""
|
|
228
|
+
args = self.agent.params.copy()
|
|
229
|
+
args["model_name"] = self.agent.model_name
|
|
230
|
+
args["provider"] = self.agent.provider
|
|
218
231
|
args["embedding_model_provider"] = args.get(
|
|
219
232
|
"embedding_model", get_embedding_model_provider(args)
|
|
220
233
|
)
|
|
221
234
|
|
|
222
|
-
self.langfuse = None
|
|
223
|
-
if os.getenv('LANGFUSE_PUBLIC_KEY') is not None:
|
|
224
|
-
self.langfuse = Langfuse(
|
|
225
|
-
public_key=os.getenv('LANGFUSE_PUBLIC_KEY'),
|
|
226
|
-
secret_key=os.getenv('LANGFUSE_SECRET_KEY'),
|
|
227
|
-
host=os.getenv('LANGFUSE_HOST'),
|
|
228
|
-
release=os.getenv('LANGFUSE_RELEASE', 'local'),
|
|
229
|
-
)
|
|
230
|
-
|
|
231
235
|
# agent is using current langchain model
|
|
232
|
-
if agent.provider == "mindsdb":
|
|
233
|
-
args["model_name"] = agent.model_name
|
|
236
|
+
if self.agent.provider == "mindsdb":
|
|
237
|
+
args["model_name"] = self.agent.model_name
|
|
234
238
|
|
|
235
239
|
# get prompt
|
|
236
240
|
prompt_template = (
|
|
237
|
-
model["problem_definition"].get("using", {}).get("prompt_template")
|
|
241
|
+
self.model["problem_definition"].get("using", {}).get("prompt_template")
|
|
238
242
|
)
|
|
239
243
|
if prompt_template is not None:
|
|
240
244
|
# only update prompt_template if it is set on the model
|
|
@@ -248,57 +252,47 @@ class LangchainAgent:
|
|
|
248
252
|
"Please provide a `prompt_template` or set `mode=retrieval`"
|
|
249
253
|
)
|
|
250
254
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
255
|
+
return args
|
|
256
|
+
|
|
257
|
+
def get_metadata(self) -> Dict:
|
|
258
|
+
return {
|
|
259
|
+
'provider': self.provider,
|
|
260
|
+
'model_name': self.args["model_name"],
|
|
261
|
+
'embedding_model_provider': self.args.get('embedding_model_provider',
|
|
262
|
+
get_embedding_model_provider(self.args)),
|
|
263
|
+
'skills': get_skills(self.agent),
|
|
264
|
+
'user_id': ctx.user_id,
|
|
265
|
+
'session_id': ctx.session_id,
|
|
266
|
+
'company_id': ctx.company_id,
|
|
267
|
+
'user_class': ctx.user_class,
|
|
268
|
+
'email_confirmed': ctx.email_confirmed
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
def get_tags(self) -> List:
|
|
272
|
+
return [
|
|
273
|
+
self.provider,
|
|
274
|
+
]
|
|
259
275
|
|
|
260
276
|
def get_completion(self, messages, stream: bool = False):
|
|
261
277
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
}
|
|
276
|
-
trace_metadata['skills'] = get_skills(self.agent)
|
|
277
|
-
trace_tags = get_tags(trace_metadata)
|
|
278
|
-
|
|
279
|
-
# Set our user info to pass into langfuse trace, with fault tolerance in each individual one just incase on purpose
|
|
280
|
-
trace_metadata['user_id'] = ctx.user_id
|
|
281
|
-
trace_metadata['session_id'] = ctx.session_id
|
|
282
|
-
trace_metadata['company_id'] = ctx.company_id
|
|
283
|
-
trace_metadata['user_class'] = ctx.user_class
|
|
284
|
-
trace_metadata['email_confirmed'] = ctx.email_confirmed
|
|
285
|
-
|
|
286
|
-
self.api_trace = self.langfuse.trace(
|
|
287
|
-
name='api-completion',
|
|
288
|
-
input=messages,
|
|
289
|
-
tags=trace_tags,
|
|
290
|
-
metadata=trace_metadata,
|
|
291
|
-
user_id=ctx.user_id,
|
|
292
|
-
session_id=ctx.session_id,
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
self.run_completion_span = self.api_trace.span(name='run-completion', input=messages)
|
|
296
|
-
trace_id = self.api_trace.id
|
|
297
|
-
observation_id = self.run_completion_span.id
|
|
278
|
+
# Get metadata and tags to be used in the trace
|
|
279
|
+
metadata = self.get_metadata()
|
|
280
|
+
tags = self.get_tags()
|
|
281
|
+
|
|
282
|
+
# Set up trace for the API completion in Langfuse
|
|
283
|
+
self.langfuse_client_wrapper.setup_trace(
|
|
284
|
+
name='api-completion',
|
|
285
|
+
input=messages,
|
|
286
|
+
tags=tags,
|
|
287
|
+
metadata=metadata,
|
|
288
|
+
user_id=ctx.user_id,
|
|
289
|
+
session_id=ctx.session_id,
|
|
290
|
+
)
|
|
298
291
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
292
|
+
# Set up trace for the run completion in Langfuse
|
|
293
|
+
self.run_completion_span = self.langfuse_client_wrapper.start_span(
|
|
294
|
+
name='run-completion',
|
|
295
|
+
input=messages)
|
|
302
296
|
|
|
303
297
|
if stream:
|
|
304
298
|
return self._get_completion_stream(messages)
|
|
@@ -317,21 +311,8 @@ class LangchainAgent:
|
|
|
317
311
|
df.iloc[:-1, df.columns.get_loc(user_column)] = None
|
|
318
312
|
response = self.run_agent(df, agent, args)
|
|
319
313
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
self.api_trace.update(output=response)
|
|
323
|
-
|
|
324
|
-
# update metadata with tool usage
|
|
325
|
-
try:
|
|
326
|
-
# Ensure all batched traces are sent before fetching.
|
|
327
|
-
self.langfuse.flush()
|
|
328
|
-
trace = self.langfuse.get_trace(self.trace_id)
|
|
329
|
-
trace_metadata['tool_usage'] = get_tool_usage(trace)
|
|
330
|
-
self.api_trace.update(metadata=trace_metadata)
|
|
331
|
-
except TraceNotFoundError:
|
|
332
|
-
logger.warning(f'Langfuse trace {self.trace_id} not found')
|
|
333
|
-
except Exception as e:
|
|
334
|
-
logger.error(f'Something went wrong while processing Langfuse trace {self.trace_id}: {str(e)}')
|
|
314
|
+
# End the run completion span and update the metadata with tool usage
|
|
315
|
+
self.langfuse_client_wrapper.end_span(span=self.run_completion_span, output=response)
|
|
335
316
|
|
|
336
317
|
return response
|
|
337
318
|
|
|
@@ -349,6 +330,7 @@ class LangchainAgent:
|
|
|
349
330
|
|
|
350
331
|
df = pd.DataFrame(messages)
|
|
351
332
|
|
|
333
|
+
self.embedding_model_provider = args.get('embedding_model_provider', get_embedding_model_provider(args))
|
|
352
334
|
# Back compatibility for old models
|
|
353
335
|
self.provider = args.get("provider", get_llm_provider(args))
|
|
354
336
|
|
|
@@ -445,69 +427,49 @@ class LangchainAgent:
|
|
|
445
427
|
return all_tools
|
|
446
428
|
|
|
447
429
|
def _get_agent_callbacks(self, args: Dict) -> List:
|
|
430
|
+
all_callbacks = []
|
|
448
431
|
|
|
449
432
|
if self.log_callback_handler is None:
|
|
450
433
|
self.log_callback_handler = LogCallbackHandler(logger)
|
|
451
434
|
|
|
452
|
-
all_callbacks
|
|
435
|
+
all_callbacks.append(self.log_callback_handler)
|
|
453
436
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
"langfuse_secret_key", os.getenv("LANGFUSE_SECRET_KEY")
|
|
459
|
-
)
|
|
460
|
-
langfuse_host = args.get("langfuse_host", os.getenv("LANGFUSE_HOST"))
|
|
461
|
-
are_langfuse_args_present = (
|
|
462
|
-
bool(langfuse_public_key)
|
|
463
|
-
and bool(langfuse_secret_key)
|
|
464
|
-
and bool(langfuse_host)
|
|
465
|
-
)
|
|
437
|
+
if self.langfuse_client_wrapper.trace is None:
|
|
438
|
+
# Get metadata and tags to be used in the trace
|
|
439
|
+
metadata = self.get_metadata()
|
|
440
|
+
tags = self.get_tags()
|
|
466
441
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
)
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
observation_id = args.get(
|
|
499
|
-
"observation_id", self.observation_id or uuid4().hex
|
|
500
|
-
)
|
|
501
|
-
langfuse = Langfuse(
|
|
502
|
-
host=langfuse_host,
|
|
503
|
-
public_key=langfuse_public_key,
|
|
504
|
-
secret_key=langfuse_secret_key,
|
|
505
|
-
)
|
|
506
|
-
self.mdb_langfuse_callback_handler = LangfuseCallbackHandler(
|
|
507
|
-
langfuse=langfuse,
|
|
508
|
-
trace_id=trace_id,
|
|
509
|
-
observation_id=observation_id,
|
|
510
|
-
)
|
|
442
|
+
trace_name = "NativeTrace-MindsDB-AgentExecutor"
|
|
443
|
+
|
|
444
|
+
# Set up trace for the API completion in Langfuse
|
|
445
|
+
self.langfuse_client_wrapper.setup_trace(
|
|
446
|
+
name=trace_name,
|
|
447
|
+
tags=tags,
|
|
448
|
+
metadata=metadata,
|
|
449
|
+
user_id=ctx.user_id,
|
|
450
|
+
session_id=ctx.session_id,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if self.langfuse_callback_handler is None:
|
|
454
|
+
self.langfuse_callback_handler = self.langfuse_client_wrapper.get_langchain_handler()
|
|
455
|
+
|
|
456
|
+
# custom tracer
|
|
457
|
+
if self.mdb_langfuse_callback_handler is None:
|
|
458
|
+
trace_id = None
|
|
459
|
+
if self.langfuse_client_wrapper.trace is not None:
|
|
460
|
+
trace_id = args.get("trace_id", self.langfuse_client_wrapper.trace.id)
|
|
461
|
+
|
|
462
|
+
span_id = None
|
|
463
|
+
if self.run_completion_span is not None:
|
|
464
|
+
span_id = self.run_completion_span.id
|
|
465
|
+
|
|
466
|
+
observation_id = args.get("observation_id", span_id or uuid4().hex)
|
|
467
|
+
|
|
468
|
+
self.mdb_langfuse_callback_handler = LangfuseCallbackHandler(
|
|
469
|
+
langfuse=self.langfuse_client_wrapper.client,
|
|
470
|
+
trace_id=trace_id,
|
|
471
|
+
observation_id=observation_id,
|
|
472
|
+
)
|
|
511
473
|
|
|
512
474
|
# obs: we may want to unify these; native langfuse handler provides details as a tree on a sub-step of the overarching custom one # noqa
|
|
513
475
|
if self.langfuse_callback_handler is not None:
|
|
@@ -542,7 +504,8 @@ AI: {response}"""
|
|
|
542
504
|
return_context = args.get('return_context', True)
|
|
543
505
|
input_variables = re.findall(r"{{(.*?)}}", base_template)
|
|
544
506
|
|
|
545
|
-
prompts, empty_prompt_ids = prepare_prompts(df, base_template, input_variables,
|
|
507
|
+
prompts, empty_prompt_ids = prepare_prompts(df, base_template, input_variables,
|
|
508
|
+
args.get('user_column', USER_COLUMN))
|
|
546
509
|
|
|
547
510
|
def _invoke_agent_executor_with_prompt(agent_executor, prompt):
|
|
548
511
|
if not prompt:
|
|
@@ -621,7 +584,8 @@ AI: {response}"""
|
|
|
621
584
|
if not hasattr(agent_executor, 'stream') or not callable(agent_executor.stream):
|
|
622
585
|
raise AttributeError("The agent_executor does not have a 'stream' method")
|
|
623
586
|
|
|
624
|
-
stream_iterator = agent_executor.stream(prompts[0],
|
|
587
|
+
stream_iterator = agent_executor.stream(prompts[0],
|
|
588
|
+
config={'callbacks': callbacks})
|
|
625
589
|
|
|
626
590
|
if not hasattr(stream_iterator, '__iter__'):
|
|
627
591
|
raise TypeError("The stream method did not return an iterable")
|
|
@@ -642,10 +606,8 @@ AI: {response}"""
|
|
|
642
606
|
# Yield generated SQL if available
|
|
643
607
|
yield {"type": "sql", "content": self.log_callback_handler.generated_sql}
|
|
644
608
|
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
self.api_trace.update()
|
|
648
|
-
logger.info("Langfuse trace updated")
|
|
609
|
+
# End the run completion span and update the metadata with tool usage
|
|
610
|
+
self.langfuse_client_wrapper.end_span_stream(span=self.run_completion_span)
|
|
649
611
|
|
|
650
612
|
@staticmethod
|
|
651
613
|
def process_chunk(chunk):
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from typing import Any, Dict, Union, Optional, List
|
|
2
2
|
from uuid import uuid4
|
|
3
3
|
import datetime
|
|
4
|
-
import os
|
|
5
4
|
|
|
6
5
|
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
7
6
|
|
|
@@ -122,42 +121,6 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
|
|
|
122
121
|
return False
|
|
123
122
|
|
|
124
123
|
|
|
125
|
-
def get_metadata(model_using: Dict) -> Dict:
|
|
126
|
-
""" Generates initial metadata mapping from information provided in a model's `using` clause.
|
|
127
|
-
Includes providers and model name.
|
|
128
|
-
"""
|
|
129
|
-
metadata_keys = ['provider', 'model_name', 'embedding_model_provider'] # keeps keys relevant for tracing
|
|
130
|
-
trace_metadata = {}
|
|
131
|
-
for key in metadata_keys:
|
|
132
|
-
if key in model_using:
|
|
133
|
-
trace_metadata[key] = model_using.get(key)
|
|
134
|
-
return trace_metadata
|
|
135
|
-
|
|
136
|
-
|
|
137
124
|
def get_skills(agent: db.Agents) -> List:
|
|
138
125
|
""" Retrieve skills from agent `skills` attribute. Specific to agent endpoints. """
|
|
139
126
|
return [rel.skill.type for rel in agent.skills_relationships]
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def get_tags(metadata: Dict) -> List:
|
|
143
|
-
""" Retrieves tags from existing langfuse metadata (built using `get_metadata` and `get_skills`), and environment variables. """
|
|
144
|
-
trace_tags = []
|
|
145
|
-
if os.getenv('FLASK_ENV'):
|
|
146
|
-
trace_tags.append(os.getenv('FLASK_ENV')) # Fix: use something other than flask_env
|
|
147
|
-
if 'provider' in metadata:
|
|
148
|
-
trace_tags.append(metadata['provider'])
|
|
149
|
-
return trace_tags
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def get_tool_usage(trace) -> Dict:
|
|
153
|
-
""" Retrieves tool usage information from a langfuse trace.
|
|
154
|
-
Note: assumes trace marks an action with string `AgentAction` """
|
|
155
|
-
tool_usage = {}
|
|
156
|
-
steps = [s.name for s in trace.observations]
|
|
157
|
-
for step in steps:
|
|
158
|
-
if 'AgentAction' in step:
|
|
159
|
-
tool_name = step.split('-')[1]
|
|
160
|
-
if tool_name not in tool_usage:
|
|
161
|
-
tool_usage[tool_name] = 0
|
|
162
|
-
tool_usage[tool_name] += 1
|
|
163
|
-
return tool_usage
|
|
@@ -4,36 +4,38 @@
|
|
|
4
4
|
"""
|
|
5
5
|
from typing import Any, Iterable, List, Optional
|
|
6
6
|
|
|
7
|
-
from mindsdb.interfaces.skills.skill_tool import skill_tool
|
|
8
7
|
from mindsdb.utilities import log
|
|
9
8
|
from langchain_community.utilities import SQLDatabase
|
|
9
|
+
from mindsdb.interfaces.skills.sql_agent import SQLAgent
|
|
10
10
|
|
|
11
11
|
logger = log.getLogger(__name__)
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class MindsDBSQL(SQLDatabase):
|
|
15
|
+
@staticmethod
|
|
16
|
+
def custom_init(
|
|
17
|
+
sql_agent: 'SQLAgent'
|
|
18
|
+
) -> 'MindsDBSQL':
|
|
19
|
+
instance = MindsDBSQL()
|
|
20
|
+
instance._sql_agent = sql_agent
|
|
21
|
+
return instance
|
|
22
|
+
|
|
15
23
|
""" Can't modify signature, as LangChain does a Pydantic check."""
|
|
16
24
|
def __init__(
|
|
17
25
|
self,
|
|
18
|
-
engine=None,
|
|
19
|
-
|
|
26
|
+
engine: Optional[Any] = None,
|
|
27
|
+
schema: Optional[str] = None,
|
|
20
28
|
metadata: Optional[Any] = None,
|
|
21
29
|
ignore_tables: Optional[List[str]] = None,
|
|
22
30
|
include_tables: Optional[List[str]] = None,
|
|
23
31
|
sample_rows_in_table_info: int = 3,
|
|
24
|
-
schema: Optional[str] = None,
|
|
25
32
|
indexes_in_table_info: bool = False,
|
|
26
33
|
custom_table_info: Optional[dict] = None,
|
|
27
|
-
view_support:
|
|
34
|
+
view_support: bool = True,
|
|
35
|
+
max_string_length: int = 300,
|
|
36
|
+
lazy_table_reflection: bool = False,
|
|
28
37
|
):
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
self._sql_agent = skill_tool.get_sql_agent(
|
|
32
|
-
database,
|
|
33
|
-
include_tables,
|
|
34
|
-
ignore_tables,
|
|
35
|
-
sample_rows_in_table_info
|
|
36
|
-
)
|
|
38
|
+
pass
|
|
37
39
|
|
|
38
40
|
@property
|
|
39
41
|
def dialect(self) -> str:
|
|
@@ -4,6 +4,7 @@ from mindsdb.interfaces.agents.agents_controller import AgentsController
|
|
|
4
4
|
from mindsdb.interfaces.chatbot.chatbot_task import ChatBotTask
|
|
5
5
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
6
6
|
from mindsdb.interfaces.storage import db
|
|
7
|
+
from mindsdb.interfaces.model.functions import get_project_records
|
|
7
8
|
|
|
8
9
|
from mindsdb.utilities.context import context as ctx
|
|
9
10
|
|
|
@@ -128,16 +129,11 @@ class ChatBotController:
|
|
|
128
129
|
all_bots (List[db.ChatBots]): List of database chatbot object
|
|
129
130
|
'''
|
|
130
131
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
query = query.filter_by(name=project_name)
|
|
137
|
-
project_names = {
|
|
138
|
-
i.id: i.name
|
|
139
|
-
for i in query
|
|
140
|
-
}
|
|
132
|
+
project_names = {}
|
|
133
|
+
for project in get_project_records():
|
|
134
|
+
if project_name is not None and project.name != project_name:
|
|
135
|
+
continue
|
|
136
|
+
project_names[project.id] = project.name
|
|
141
137
|
|
|
142
138
|
query = db.session.query(
|
|
143
139
|
db.ChatBots, db.Tasks
|
|
@@ -228,9 +224,9 @@ class ChatBotController:
|
|
|
228
224
|
raise ValueError('Need to provide either "model_name" or "agent_name" when creating a chatbot')
|
|
229
225
|
if agent_name is not None:
|
|
230
226
|
agent = self.agents_controller.get_agent(agent_name, project_name)
|
|
231
|
-
model_name = agent.model_name
|
|
232
227
|
if agent is None:
|
|
233
228
|
raise ValueError(f"Agent with name doesn't exist: {agent_name}")
|
|
229
|
+
model_name = agent.model_name
|
|
234
230
|
agent_id = agent.id
|
|
235
231
|
else:
|
|
236
232
|
# Create a new agent with the given model name.
|