MindsDB 25.1.2.0__py3-none-any.whl → 25.1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.2.0.dist-info → MindsDB-25.1.5.0.dist-info}/METADATA +258 -255
- {MindsDB-25.1.2.0.dist-info → MindsDB-25.1.5.0.dist-info}/RECORD +98 -85
- {MindsDB-25.1.2.0.dist-info → MindsDB-25.1.5.0.dist-info}/WHEEL +1 -1
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +5 -3
- mindsdb/api/executor/__init__.py +0 -1
- mindsdb/api/executor/command_executor.py +2 -1
- mindsdb/api/executor/data_types/answer.py +1 -1
- mindsdb/api/executor/datahub/datanodes/datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +8 -3
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +9 -26
- mindsdb/api/executor/sql_query/__init__.py +1 -0
- mindsdb/api/executor/sql_query/result_set.py +36 -21
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
- mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
- mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
- mindsdb/api/executor/utilities/sql.py +2 -10
- mindsdb/api/http/namespaces/agents.py +3 -1
- mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
- mindsdb/api/http/namespaces/sql.py +3 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
- mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/databricks_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
- mindsdb/integrations/handlers/file_handler/requirements.txt +0 -4
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
- mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
- mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
- mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +8 -0
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +49 -12
- mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
- mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
- mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +5 -3
- mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
- mindsdb/integrations/handlers/web_handler/requirements.txt +0 -1
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +2 -2
- mindsdb/integrations/utilities/files/__init__.py +0 -0
- mindsdb/integrations/utilities/files/file_reader.py +258 -0
- mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +2 -1
- mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/ms_graph_api_auth_utilities.py +8 -3
- mindsdb/integrations/utilities/rag/chains/map_reduce_summarizer_chain.py +5 -9
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
- mindsdb/integrations/utilities/rag/pipelines/rag.py +84 -20
- mindsdb/integrations/utilities/rag/rag_pipeline_builder.py +16 -1
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
- mindsdb/integrations/utilities/rag/retrievers/__init__.py +3 -0
- mindsdb/integrations/utilities/rag/retrievers/multi_hop_retriever.py +85 -0
- mindsdb/integrations/utilities/rag/retrievers/retriever_factory.py +57 -0
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +117 -48
- mindsdb/integrations/utilities/rag/settings.py +190 -17
- mindsdb/integrations/utilities/sql_utils.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +18 -8
- mindsdb/interfaces/agents/constants.py +1 -0
- mindsdb/interfaces/agents/langchain_agent.py +124 -157
- mindsdb/interfaces/agents/langfuse_callback_handler.py +4 -37
- mindsdb/interfaces/agents/mindsdb_database_agent.py +21 -13
- mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
- mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
- mindsdb/interfaces/chatbot/memory.py +58 -13
- mindsdb/interfaces/database/integrations.py +5 -1
- mindsdb/interfaces/database/projects.py +55 -16
- mindsdb/interfaces/database/views.py +12 -25
- mindsdb/interfaces/knowledge_base/controller.py +38 -9
- mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +7 -26
- mindsdb/interfaces/model/functions.py +15 -4
- mindsdb/interfaces/model/model_controller.py +4 -7
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +51 -40
- mindsdb/interfaces/skills/retrieval_tool.py +10 -3
- mindsdb/interfaces/skills/skill_tool.py +97 -54
- mindsdb/interfaces/skills/skills_controller.py +7 -3
- mindsdb/interfaces/skills/sql_agent.py +127 -41
- mindsdb/interfaces/storage/db.py +1 -1
- mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
- mindsdb/utilities/cache.py +7 -4
- mindsdb/utilities/context.py +11 -1
- mindsdb/utilities/langfuse.py +279 -0
- mindsdb/utilities/log.py +20 -2
- mindsdb/utilities/otel/__init__.py +206 -0
- mindsdb/utilities/otel/logger.py +25 -0
- mindsdb/utilities/otel/meter.py +19 -0
- mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
- mindsdb/utilities/otel/tracer.py +16 -0
- mindsdb/utilities/partitioning.py +52 -0
- mindsdb/utilities/render/sqlalchemy_render.py +7 -1
- mindsdb/utilities/utils.py +34 -0
- mindsdb/utilities/otel.py +0 -72
- {MindsDB-25.1.2.0.dist-info → MindsDB-25.1.5.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.2.0.dist-info → MindsDB-25.1.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from concurrent.futures import as_completed, TimeoutError
|
|
3
|
-
from typing import Dict, Iterable, List
|
|
3
|
+
from typing import Dict, Iterable, List, Optional
|
|
4
4
|
from uuid import uuid4
|
|
5
|
-
import os
|
|
6
5
|
import re
|
|
7
6
|
import numpy as np
|
|
8
7
|
import pandas as pd
|
|
@@ -20,9 +19,6 @@ from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
|
|
20
19
|
from langchain_core.messages.base import BaseMessage
|
|
21
20
|
from langchain_core.prompts import PromptTemplate
|
|
22
21
|
from langchain_core.tools import Tool
|
|
23
|
-
from langfuse import Langfuse
|
|
24
|
-
from langfuse.api.resources.commons.errors.not_found_error import NotFoundError as TraceNotFoundError
|
|
25
|
-
from langfuse.callback import CallbackHandler
|
|
26
22
|
|
|
27
23
|
from mindsdb.integrations.handlers.openai_handler.constants import (
|
|
28
24
|
CHAT_MODELS as OPEN_AI_CHAT_MODELS,
|
|
@@ -35,13 +31,11 @@ from mindsdb.utilities.context_executor import ContextThreadPoolExecutor
|
|
|
35
31
|
from mindsdb.interfaces.storage import db
|
|
36
32
|
from mindsdb.utilities.context import context as ctx
|
|
37
33
|
|
|
38
|
-
|
|
39
34
|
from .mindsdb_chat_model import ChatMindsdb
|
|
40
35
|
from .callback_handlers import LogCallbackHandler, ContextCaptureCallback
|
|
41
|
-
from .langfuse_callback_handler import LangfuseCallbackHandler,
|
|
36
|
+
from .langfuse_callback_handler import LangfuseCallbackHandler, get_skills
|
|
42
37
|
from .safe_output_parser import SafeOutputParser
|
|
43
38
|
|
|
44
|
-
|
|
45
39
|
from .constants import (
|
|
46
40
|
DEFAULT_AGENT_TIMEOUT_SECONDS,
|
|
47
41
|
DEFAULT_AGENT_TYPE,
|
|
@@ -55,13 +49,15 @@ from .constants import (
|
|
|
55
49
|
NVIDIA_NIM_CHAT_MODELS,
|
|
56
50
|
USER_COLUMN,
|
|
57
51
|
ASSISTANT_COLUMN,
|
|
58
|
-
CONTEXT_COLUMN
|
|
52
|
+
CONTEXT_COLUMN, TRACE_ID_COLUMN
|
|
59
53
|
)
|
|
60
54
|
from mindsdb.interfaces.skills.skill_tool import skill_tool, SkillData
|
|
61
55
|
from langchain_anthropic import ChatAnthropic
|
|
62
56
|
from langchain_core.messages import SystemMessage
|
|
63
57
|
from langchain_openai import ChatOpenAI
|
|
64
58
|
|
|
59
|
+
from mindsdb.utilities.langfuse import LangfuseClientWrapper
|
|
60
|
+
|
|
65
61
|
_PARSING_ERROR_PREFIXES = [
|
|
66
62
|
"An output parsing error occurred",
|
|
67
63
|
"Could not parse LLM output",
|
|
@@ -207,34 +203,42 @@ def process_chunk(chunk):
|
|
|
207
203
|
|
|
208
204
|
|
|
209
205
|
class LangchainAgent:
|
|
206
|
+
|
|
210
207
|
def __init__(self, agent: db.Agents, model: dict = None):
|
|
208
|
+
|
|
211
209
|
self.agent = agent
|
|
212
210
|
self.model = model
|
|
213
|
-
|
|
214
|
-
self.
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
211
|
+
|
|
212
|
+
self.run_completion_span: Optional[object] = None
|
|
213
|
+
self.llm: Optional[object] = None
|
|
214
|
+
self.embedding_model: Optional[object] = None
|
|
215
|
+
|
|
216
|
+
self.log_callback_handler: Optional[object] = None
|
|
217
|
+
self.langfuse_callback_handler: Optional[object] = None # native langfuse callback handler
|
|
218
|
+
self.mdb_langfuse_callback_handler: Optional[object] = None # custom (see langfuse_callback_handler.py)
|
|
219
|
+
|
|
220
|
+
self.langfuse_client_wrapper = LangfuseClientWrapper()
|
|
221
|
+
self.args = self._initialize_args()
|
|
222
|
+
|
|
223
|
+
# Back compatibility for old models
|
|
224
|
+
self.provider = self.args.get("provider", get_llm_provider(self.args))
|
|
225
|
+
|
|
226
|
+
def _initialize_args(self) -> dict:
|
|
227
|
+
"""Initialize the arguments based on the agent's parameters."""
|
|
228
|
+
args = self.agent.params.copy()
|
|
229
|
+
args["model_name"] = self.agent.model_name
|
|
230
|
+
args["provider"] = self.agent.provider
|
|
218
231
|
args["embedding_model_provider"] = args.get(
|
|
219
232
|
"embedding_model", get_embedding_model_provider(args)
|
|
220
233
|
)
|
|
221
234
|
|
|
222
|
-
self.langfuse = None
|
|
223
|
-
if os.getenv('LANGFUSE_PUBLIC_KEY') is not None:
|
|
224
|
-
self.langfuse = Langfuse(
|
|
225
|
-
public_key=os.getenv('LANGFUSE_PUBLIC_KEY'),
|
|
226
|
-
secret_key=os.getenv('LANGFUSE_SECRET_KEY'),
|
|
227
|
-
host=os.getenv('LANGFUSE_HOST'),
|
|
228
|
-
release=os.getenv('LANGFUSE_RELEASE', 'local'),
|
|
229
|
-
)
|
|
230
|
-
|
|
231
235
|
# agent is using current langchain model
|
|
232
|
-
if agent.provider == "mindsdb":
|
|
233
|
-
args["model_name"] = agent.model_name
|
|
236
|
+
if self.agent.provider == "mindsdb":
|
|
237
|
+
args["model_name"] = self.agent.model_name
|
|
234
238
|
|
|
235
239
|
# get prompt
|
|
236
240
|
prompt_template = (
|
|
237
|
-
model["problem_definition"].get("using", {}).get("prompt_template")
|
|
241
|
+
self.model["problem_definition"].get("using", {}).get("prompt_template")
|
|
238
242
|
)
|
|
239
243
|
if prompt_template is not None:
|
|
240
244
|
# only update prompt_template if it is set on the model
|
|
@@ -248,57 +252,47 @@ class LangchainAgent:
|
|
|
248
252
|
"Please provide a `prompt_template` or set `mode=retrieval`"
|
|
249
253
|
)
|
|
250
254
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
255
|
+
return args
|
|
256
|
+
|
|
257
|
+
def get_metadata(self) -> Dict:
|
|
258
|
+
return {
|
|
259
|
+
'provider': self.provider,
|
|
260
|
+
'model_name': self.args["model_name"],
|
|
261
|
+
'embedding_model_provider': self.args.get('embedding_model_provider',
|
|
262
|
+
get_embedding_model_provider(self.args)),
|
|
263
|
+
'skills': get_skills(self.agent),
|
|
264
|
+
'user_id': ctx.user_id,
|
|
265
|
+
'session_id': ctx.session_id,
|
|
266
|
+
'company_id': ctx.company_id,
|
|
267
|
+
'user_class': ctx.user_class,
|
|
268
|
+
'email_confirmed': ctx.email_confirmed
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
def get_tags(self) -> List:
|
|
272
|
+
return [
|
|
273
|
+
self.provider,
|
|
274
|
+
]
|
|
259
275
|
|
|
260
276
|
def get_completion(self, messages, stream: bool = False):
|
|
261
277
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
}
|
|
276
|
-
trace_metadata['skills'] = get_skills(self.agent)
|
|
277
|
-
trace_tags = get_tags(trace_metadata)
|
|
278
|
-
|
|
279
|
-
# Set our user info to pass into langfuse trace, with fault tolerance in each individual one just incase on purpose
|
|
280
|
-
trace_metadata['user_id'] = ctx.user_id
|
|
281
|
-
trace_metadata['session_id'] = ctx.session_id
|
|
282
|
-
trace_metadata['company_id'] = ctx.company_id
|
|
283
|
-
trace_metadata['user_class'] = ctx.user_class
|
|
284
|
-
trace_metadata['email_confirmed'] = ctx.email_confirmed
|
|
285
|
-
|
|
286
|
-
self.api_trace = self.langfuse.trace(
|
|
287
|
-
name='api-completion',
|
|
288
|
-
input=messages,
|
|
289
|
-
tags=trace_tags,
|
|
290
|
-
metadata=trace_metadata,
|
|
291
|
-
user_id=ctx.user_id,
|
|
292
|
-
session_id=ctx.session_id,
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
self.run_completion_span = self.api_trace.span(name='run-completion', input=messages)
|
|
296
|
-
trace_id = self.api_trace.id
|
|
297
|
-
observation_id = self.run_completion_span.id
|
|
278
|
+
# Get metadata and tags to be used in the trace
|
|
279
|
+
metadata = self.get_metadata()
|
|
280
|
+
tags = self.get_tags()
|
|
281
|
+
|
|
282
|
+
# Set up trace for the API completion in Langfuse
|
|
283
|
+
self.langfuse_client_wrapper.setup_trace(
|
|
284
|
+
name='api-completion',
|
|
285
|
+
input=messages,
|
|
286
|
+
tags=tags,
|
|
287
|
+
metadata=metadata,
|
|
288
|
+
user_id=ctx.user_id,
|
|
289
|
+
session_id=ctx.session_id,
|
|
290
|
+
)
|
|
298
291
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
292
|
+
# Set up trace for the run completion in Langfuse
|
|
293
|
+
self.run_completion_span = self.langfuse_client_wrapper.start_span(
|
|
294
|
+
name='run-completion',
|
|
295
|
+
input=messages)
|
|
302
296
|
|
|
303
297
|
if stream:
|
|
304
298
|
return self._get_completion_stream(messages)
|
|
@@ -317,21 +311,8 @@ class LangchainAgent:
|
|
|
317
311
|
df.iloc[:-1, df.columns.get_loc(user_column)] = None
|
|
318
312
|
response = self.run_agent(df, agent, args)
|
|
319
313
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
self.api_trace.update(output=response)
|
|
323
|
-
|
|
324
|
-
# update metadata with tool usage
|
|
325
|
-
try:
|
|
326
|
-
# Ensure all batched traces are sent before fetching.
|
|
327
|
-
self.langfuse.flush()
|
|
328
|
-
trace = self.langfuse.get_trace(self.trace_id)
|
|
329
|
-
trace_metadata['tool_usage'] = get_tool_usage(trace)
|
|
330
|
-
self.api_trace.update(metadata=trace_metadata)
|
|
331
|
-
except TraceNotFoundError:
|
|
332
|
-
logger.warning(f'Langfuse trace {self.trace_id} not found')
|
|
333
|
-
except Exception as e:
|
|
334
|
-
logger.error(f'Something went wrong while processing Langfuse trace {self.trace_id}: {str(e)}')
|
|
314
|
+
# End the run completion span and update the metadata with tool usage
|
|
315
|
+
self.langfuse_client_wrapper.end_span(span=self.run_completion_span, output=response)
|
|
335
316
|
|
|
336
317
|
return response
|
|
337
318
|
|
|
@@ -349,6 +330,7 @@ class LangchainAgent:
|
|
|
349
330
|
|
|
350
331
|
df = pd.DataFrame(messages)
|
|
351
332
|
|
|
333
|
+
self.embedding_model_provider = args.get('embedding_model_provider', get_embedding_model_provider(args))
|
|
352
334
|
# Back compatibility for old models
|
|
353
335
|
self.provider = args.get("provider", get_llm_provider(args))
|
|
354
336
|
|
|
@@ -389,9 +371,9 @@ class LangchainAgent:
|
|
|
389
371
|
for row in df[:-1].to_dict("records"):
|
|
390
372
|
question = row[user_column]
|
|
391
373
|
answer = row[assistant_column]
|
|
392
|
-
if question:
|
|
374
|
+
if isinstance(question, str) and len(question) > 0:
|
|
393
375
|
memory.chat_memory.add_user_message(question)
|
|
394
|
-
if answer:
|
|
376
|
+
if isinstance(answer, str) and len(answer) > 0:
|
|
395
377
|
memory.chat_memory.add_ai_message(answer)
|
|
396
378
|
|
|
397
379
|
agent_type = args.get("agent_type", DEFAULT_AGENT_TYPE)
|
|
@@ -445,69 +427,47 @@ class LangchainAgent:
|
|
|
445
427
|
return all_tools
|
|
446
428
|
|
|
447
429
|
def _get_agent_callbacks(self, args: Dict) -> List:
|
|
430
|
+
all_callbacks = []
|
|
448
431
|
|
|
449
432
|
if self.log_callback_handler is None:
|
|
450
433
|
self.log_callback_handler = LogCallbackHandler(logger)
|
|
451
434
|
|
|
452
|
-
all_callbacks
|
|
435
|
+
all_callbacks.append(self.log_callback_handler)
|
|
453
436
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
"langfuse_secret_key", os.getenv("LANGFUSE_SECRET_KEY")
|
|
459
|
-
)
|
|
460
|
-
langfuse_host = args.get("langfuse_host", os.getenv("LANGFUSE_HOST"))
|
|
461
|
-
are_langfuse_args_present = (
|
|
462
|
-
bool(langfuse_public_key)
|
|
463
|
-
and bool(langfuse_secret_key)
|
|
464
|
-
and bool(langfuse_host)
|
|
465
|
-
)
|
|
437
|
+
if self.langfuse_client_wrapper.trace is None:
|
|
438
|
+
# Get metadata and tags to be used in the trace
|
|
439
|
+
metadata = self.get_metadata()
|
|
440
|
+
tags = self.get_tags()
|
|
466
441
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
if self.mdb_langfuse_callback_handler is None:
|
|
497
|
-
trace_id = args.get("trace_id", self.trace_id or None)
|
|
498
|
-
observation_id = args.get(
|
|
499
|
-
"observation_id", self.observation_id or uuid4().hex
|
|
500
|
-
)
|
|
501
|
-
langfuse = Langfuse(
|
|
502
|
-
host=langfuse_host,
|
|
503
|
-
public_key=langfuse_public_key,
|
|
504
|
-
secret_key=langfuse_secret_key,
|
|
505
|
-
)
|
|
506
|
-
self.mdb_langfuse_callback_handler = LangfuseCallbackHandler(
|
|
507
|
-
langfuse=langfuse,
|
|
508
|
-
trace_id=trace_id,
|
|
509
|
-
observation_id=observation_id,
|
|
510
|
-
)
|
|
442
|
+
trace_name = "NativeTrace-MindsDB-AgentExecutor"
|
|
443
|
+
|
|
444
|
+
# Set up trace for the API completion in Langfuse
|
|
445
|
+
self.langfuse_client_wrapper.setup_trace(
|
|
446
|
+
name=trace_name,
|
|
447
|
+
tags=tags,
|
|
448
|
+
metadata=metadata,
|
|
449
|
+
user_id=ctx.user_id,
|
|
450
|
+
session_id=ctx.session_id,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if self.langfuse_callback_handler is None:
|
|
454
|
+
self.langfuse_callback_handler = self.langfuse_client_wrapper.get_langchain_handler()
|
|
455
|
+
|
|
456
|
+
# custom tracer
|
|
457
|
+
if self.mdb_langfuse_callback_handler is None:
|
|
458
|
+
trace_id = self.langfuse_client_wrapper.get_trace_id()
|
|
459
|
+
|
|
460
|
+
span_id = None
|
|
461
|
+
if self.run_completion_span is not None:
|
|
462
|
+
span_id = self.run_completion_span.id
|
|
463
|
+
|
|
464
|
+
observation_id = args.get("observation_id", span_id or uuid4().hex)
|
|
465
|
+
|
|
466
|
+
self.mdb_langfuse_callback_handler = LangfuseCallbackHandler(
|
|
467
|
+
langfuse=self.langfuse_client_wrapper.client,
|
|
468
|
+
trace_id=trace_id,
|
|
469
|
+
observation_id=observation_id,
|
|
470
|
+
)
|
|
511
471
|
|
|
512
472
|
# obs: we may want to unify these; native langfuse handler provides details as a tree on a sub-step of the overarching custom one # noqa
|
|
513
473
|
if self.langfuse_callback_handler is not None:
|
|
@@ -542,7 +502,8 @@ AI: {response}"""
|
|
|
542
502
|
return_context = args.get('return_context', True)
|
|
543
503
|
input_variables = re.findall(r"{{(.*?)}}", base_template)
|
|
544
504
|
|
|
545
|
-
prompts, empty_prompt_ids = prepare_prompts(df, base_template, input_variables,
|
|
505
|
+
prompts, empty_prompt_ids = prepare_prompts(df, base_template, input_variables,
|
|
506
|
+
args.get('user_column', USER_COLUMN))
|
|
546
507
|
|
|
547
508
|
def _invoke_agent_executor_with_prompt(agent_executor, prompt):
|
|
548
509
|
if not prompt:
|
|
@@ -599,6 +560,7 @@ AI: {response}"""
|
|
|
599
560
|
CONTEXT_COLUMN: [
|
|
600
561
|
json.dumps(ctx) for ctx in contexts
|
|
601
562
|
], # Serialize context to JSON string
|
|
563
|
+
TRACE_ID_COLUMN: self.langfuse_client_wrapper.get_trace_id()
|
|
602
564
|
}
|
|
603
565
|
)
|
|
604
566
|
|
|
@@ -607,6 +569,12 @@ AI: {response}"""
|
|
|
607
569
|
|
|
608
570
|
return pred_df
|
|
609
571
|
|
|
572
|
+
def add_chunk_metadata(self, chunk: Dict) -> Dict:
|
|
573
|
+
logger.debug(f'Adding metadata to chunk: {chunk}')
|
|
574
|
+
logger.debug(f'Trace ID: {self.langfuse_client_wrapper.get_trace_id()}')
|
|
575
|
+
chunk["trace_id"] = self.langfuse_client_wrapper.get_trace_id()
|
|
576
|
+
return chunk
|
|
577
|
+
|
|
610
578
|
def stream_agent(self, df: pd.DataFrame, agent_executor: AgentExecutor, args: Dict) -> Iterable[Dict]:
|
|
611
579
|
base_template = args.get('prompt_template', args['prompt_template'])
|
|
612
580
|
input_variables = re.findall(r"{{(.*?)}}", base_template)
|
|
@@ -616,21 +584,22 @@ AI: {response}"""
|
|
|
616
584
|
|
|
617
585
|
callbacks, context_callback = prepare_callbacks(self, args)
|
|
618
586
|
|
|
619
|
-
yield {"type": "start", "prompt": prompts[0]}
|
|
587
|
+
yield self.add_chunk_metadata({"type": "start", "prompt": prompts[0]})
|
|
620
588
|
|
|
621
589
|
if not hasattr(agent_executor, 'stream') or not callable(agent_executor.stream):
|
|
622
590
|
raise AttributeError("The agent_executor does not have a 'stream' method")
|
|
623
591
|
|
|
624
|
-
stream_iterator = agent_executor.stream(prompts[0],
|
|
592
|
+
stream_iterator = agent_executor.stream(prompts[0],
|
|
593
|
+
config={'callbacks': callbacks})
|
|
625
594
|
|
|
626
595
|
if not hasattr(stream_iterator, '__iter__'):
|
|
627
596
|
raise TypeError("The stream method did not return an iterable")
|
|
628
597
|
|
|
629
598
|
for chunk in stream_iterator:
|
|
630
|
-
logger.
|
|
599
|
+
logger.debug(f'Processing streaming chunk {chunk}')
|
|
631
600
|
processed_chunk = self.process_chunk(chunk)
|
|
632
601
|
logger.info(f'Processed chunk: {processed_chunk}')
|
|
633
|
-
yield processed_chunk
|
|
602
|
+
yield self.add_chunk_metadata(processed_chunk)
|
|
634
603
|
|
|
635
604
|
if return_context:
|
|
636
605
|
# Yield context if required
|
|
@@ -640,12 +609,10 @@ AI: {response}"""
|
|
|
640
609
|
|
|
641
610
|
if self.log_callback_handler.generated_sql:
|
|
642
611
|
# Yield generated SQL if available
|
|
643
|
-
yield {"type": "sql", "content": self.log_callback_handler.generated_sql}
|
|
612
|
+
yield self.add_chunk_metadata({"type": "sql", "content": self.log_callback_handler.generated_sql})
|
|
644
613
|
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
self.api_trace.update()
|
|
648
|
-
logger.info("Langfuse trace updated")
|
|
614
|
+
# End the run completion span and update the metadata with tool usage
|
|
615
|
+
self.langfuse_client_wrapper.end_span_stream(span=self.run_completion_span)
|
|
649
616
|
|
|
650
617
|
@staticmethod
|
|
651
618
|
def process_chunk(chunk):
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from typing import Any, Dict, Union, Optional, List
|
|
2
2
|
from uuid import uuid4
|
|
3
3
|
import datetime
|
|
4
|
-
import os
|
|
5
4
|
|
|
6
5
|
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
7
6
|
|
|
@@ -67,6 +66,10 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
|
|
|
67
66
|
) -> Any:
|
|
68
67
|
"""Run when chain starts running."""
|
|
69
68
|
run_uuid = kwargs.get('run_id', uuid4()).hex
|
|
69
|
+
|
|
70
|
+
if serialized is None:
|
|
71
|
+
serialized = {}
|
|
72
|
+
|
|
70
73
|
chain_span = self.langfuse.span(
|
|
71
74
|
name=f'{serialized.get("name", "chain")}-{run_uuid}',
|
|
72
75
|
trace_id=self.trace_id,
|
|
@@ -122,42 +125,6 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
|
|
|
122
125
|
return False
|
|
123
126
|
|
|
124
127
|
|
|
125
|
-
def get_metadata(model_using: Dict) -> Dict:
|
|
126
|
-
""" Generates initial metadata mapping from information provided in a model's `using` clause.
|
|
127
|
-
Includes providers and model name.
|
|
128
|
-
"""
|
|
129
|
-
metadata_keys = ['provider', 'model_name', 'embedding_model_provider'] # keeps keys relevant for tracing
|
|
130
|
-
trace_metadata = {}
|
|
131
|
-
for key in metadata_keys:
|
|
132
|
-
if key in model_using:
|
|
133
|
-
trace_metadata[key] = model_using.get(key)
|
|
134
|
-
return trace_metadata
|
|
135
|
-
|
|
136
|
-
|
|
137
128
|
def get_skills(agent: db.Agents) -> List:
|
|
138
129
|
""" Retrieve skills from agent `skills` attribute. Specific to agent endpoints. """
|
|
139
130
|
return [rel.skill.type for rel in agent.skills_relationships]
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def get_tags(metadata: Dict) -> List:
|
|
143
|
-
""" Retrieves tags from existing langfuse metadata (built using `get_metadata` and `get_skills`), and environment variables. """
|
|
144
|
-
trace_tags = []
|
|
145
|
-
if os.getenv('FLASK_ENV'):
|
|
146
|
-
trace_tags.append(os.getenv('FLASK_ENV')) # Fix: use something other than flask_env
|
|
147
|
-
if 'provider' in metadata:
|
|
148
|
-
trace_tags.append(metadata['provider'])
|
|
149
|
-
return trace_tags
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def get_tool_usage(trace) -> Dict:
|
|
153
|
-
""" Retrieves tool usage information from a langfuse trace.
|
|
154
|
-
Note: assumes trace marks an action with string `AgentAction` """
|
|
155
|
-
tool_usage = {}
|
|
156
|
-
steps = [s.name for s in trace.observations]
|
|
157
|
-
for step in steps:
|
|
158
|
-
if 'AgentAction' in step:
|
|
159
|
-
tool_name = step.split('-')[1]
|
|
160
|
-
if tool_name not in tool_usage:
|
|
161
|
-
tool_usage[tool_name] = 0
|
|
162
|
-
tool_usage[tool_name] += 1
|
|
163
|
-
return tool_usage
|
|
@@ -4,36 +4,38 @@
|
|
|
4
4
|
"""
|
|
5
5
|
from typing import Any, Iterable, List, Optional
|
|
6
6
|
|
|
7
|
-
from mindsdb.interfaces.skills.skill_tool import skill_tool
|
|
8
7
|
from mindsdb.utilities import log
|
|
9
8
|
from langchain_community.utilities import SQLDatabase
|
|
9
|
+
from mindsdb.interfaces.skills.sql_agent import SQLAgent
|
|
10
10
|
|
|
11
11
|
logger = log.getLogger(__name__)
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class MindsDBSQL(SQLDatabase):
|
|
15
|
+
@staticmethod
|
|
16
|
+
def custom_init(
|
|
17
|
+
sql_agent: 'SQLAgent'
|
|
18
|
+
) -> 'MindsDBSQL':
|
|
19
|
+
instance = MindsDBSQL()
|
|
20
|
+
instance._sql_agent = sql_agent
|
|
21
|
+
return instance
|
|
22
|
+
|
|
15
23
|
""" Can't modify signature, as LangChain does a Pydantic check."""
|
|
16
24
|
def __init__(
|
|
17
25
|
self,
|
|
18
|
-
engine=None,
|
|
19
|
-
|
|
26
|
+
engine: Optional[Any] = None,
|
|
27
|
+
schema: Optional[str] = None,
|
|
20
28
|
metadata: Optional[Any] = None,
|
|
21
29
|
ignore_tables: Optional[List[str]] = None,
|
|
22
30
|
include_tables: Optional[List[str]] = None,
|
|
23
31
|
sample_rows_in_table_info: int = 3,
|
|
24
|
-
schema: Optional[str] = None,
|
|
25
32
|
indexes_in_table_info: bool = False,
|
|
26
33
|
custom_table_info: Optional[dict] = None,
|
|
27
|
-
view_support:
|
|
34
|
+
view_support: bool = True,
|
|
35
|
+
max_string_length: int = 300,
|
|
36
|
+
lazy_table_reflection: bool = False,
|
|
28
37
|
):
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
self._sql_agent = skill_tool.get_sql_agent(
|
|
32
|
-
database,
|
|
33
|
-
include_tables,
|
|
34
|
-
ignore_tables,
|
|
35
|
-
sample_rows_in_table_info
|
|
36
|
-
)
|
|
38
|
+
pass
|
|
37
39
|
|
|
38
40
|
@property
|
|
39
41
|
def dialect(self) -> str:
|
|
@@ -48,6 +50,12 @@ class MindsDBSQL(SQLDatabase):
|
|
|
48
50
|
return self._sql_agent.get_usable_table_names()
|
|
49
51
|
|
|
50
52
|
def get_table_info_no_throw(self, table_names: Optional[List[str]] = None) -> str:
|
|
53
|
+
for i in range(len(table_names)):
|
|
54
|
+
if '$START$' in table_names[i]:
|
|
55
|
+
table_names[i] = table_names[i].partition('$START$')[-1]
|
|
56
|
+
if '$END$' in table_names[i]:
|
|
57
|
+
table_names[i] = table_names[i].partition('$END$')[0]
|
|
58
|
+
table_names[i] = table_names[i].strip(' ')
|
|
51
59
|
return self._sql_agent.get_table_info_safe(table_names)
|
|
52
60
|
|
|
53
61
|
def run_no_throw(self, command: str, fetch: str = "all") -> str:
|
|
@@ -4,6 +4,7 @@ from mindsdb.interfaces.agents.agents_controller import AgentsController
|
|
|
4
4
|
from mindsdb.interfaces.chatbot.chatbot_task import ChatBotTask
|
|
5
5
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
6
6
|
from mindsdb.interfaces.storage import db
|
|
7
|
+
from mindsdb.interfaces.model.functions import get_project_records
|
|
7
8
|
|
|
8
9
|
from mindsdb.utilities.context import context as ctx
|
|
9
10
|
|
|
@@ -128,16 +129,11 @@ class ChatBotController:
|
|
|
128
129
|
all_bots (List[db.ChatBots]): List of database chatbot object
|
|
129
130
|
'''
|
|
130
131
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
query = query.filter_by(name=project_name)
|
|
137
|
-
project_names = {
|
|
138
|
-
i.id: i.name
|
|
139
|
-
for i in query
|
|
140
|
-
}
|
|
132
|
+
project_names = {}
|
|
133
|
+
for project in get_project_records():
|
|
134
|
+
if project_name is not None and project.name != project_name:
|
|
135
|
+
continue
|
|
136
|
+
project_names[project.id] = project.name
|
|
141
137
|
|
|
142
138
|
query = db.session.query(
|
|
143
139
|
db.ChatBots, db.Tasks
|
|
@@ -228,9 +224,9 @@ class ChatBotController:
|
|
|
228
224
|
raise ValueError('Need to provide either "model_name" or "agent_name" when creating a chatbot')
|
|
229
225
|
if agent_name is not None:
|
|
230
226
|
agent = self.agents_controller.get_agent(agent_name, project_name)
|
|
231
|
-
model_name = agent.model_name
|
|
232
227
|
if agent is None:
|
|
233
228
|
raise ValueError(f"Agent with name doesn't exist: {agent_name}")
|
|
229
|
+
model_name = agent.model_name
|
|
234
230
|
agent_id = agent.id
|
|
235
231
|
else:
|
|
236
232
|
# Create a new agent with the given model name.
|
|
@@ -53,15 +53,23 @@ class ChatBotTask(BaseTask):
|
|
|
53
53
|
|
|
54
54
|
chat_params = self.chat_handler.get_chat_config()
|
|
55
55
|
polling = chat_params['polling']['type']
|
|
56
|
+
|
|
57
|
+
memory = chat_params['memory']['type'] if 'memory' in chat_params else None
|
|
58
|
+
memory_cls = None
|
|
59
|
+
if memory:
|
|
60
|
+
memory_cls = DBMemory if memory == 'db' else HandlerMemory
|
|
61
|
+
|
|
56
62
|
if polling == 'message_count':
|
|
57
63
|
chat_params = chat_params['tables'] if 'tables' in chat_params else [chat_params]
|
|
58
64
|
self.chat_pooling = MessageCountPolling(self, chat_params)
|
|
59
|
-
|
|
65
|
+
# The default type for message count polling is HandlerMemory if not specified.
|
|
66
|
+
self.memory = HandlerMemory(self, chat_params) if memory_cls is None else memory_cls(self, chat_params)
|
|
60
67
|
|
|
61
68
|
elif polling == 'realtime':
|
|
62
69
|
chat_params = chat_params['tables'] if 'tables' in chat_params else [chat_params]
|
|
63
70
|
self.chat_pooling = RealtimePolling(self, chat_params)
|
|
64
|
-
|
|
71
|
+
# The default type for real-time polling is DBMemory if not specified.
|
|
72
|
+
self.memory = DBMemory(self, chat_params) if memory_cls is None else memory_cls(self, chat_params)
|
|
65
73
|
|
|
66
74
|
elif polling == 'webhook':
|
|
67
75
|
self.chat_pooling = WebhookPolling(self, chat_params)
|
|
@@ -80,11 +88,11 @@ class ChatBotTask(BaseTask):
|
|
|
80
88
|
self.chat_pooling.run(stop_event)
|
|
81
89
|
|
|
82
90
|
def on_message(self, message: ChatBotMessage, chat_id=None, chat_memory=None, table_name=None):
|
|
83
|
-
if not chat_id and chat_memory:
|
|
91
|
+
if not chat_id and not chat_memory:
|
|
84
92
|
raise Exception('chat_id or chat_memory should be provided')
|
|
85
93
|
|
|
86
94
|
try:
|
|
87
|
-
self._on_holding_message(chat_id, table_name)
|
|
95
|
+
self._on_holding_message(chat_id, chat_memory, table_name)
|
|
88
96
|
self._on_message(message, chat_id, chat_memory, table_name)
|
|
89
97
|
except (SystemExit, KeyboardInterrupt):
|
|
90
98
|
raise
|
|
@@ -93,15 +101,18 @@ class ChatBotTask(BaseTask):
|
|
|
93
101
|
logger.error(error)
|
|
94
102
|
self.set_error(str(error))
|
|
95
103
|
|
|
96
|
-
def _on_holding_message(self, chat_id: str, table_name: str = None):
|
|
104
|
+
def _on_holding_message(self, chat_id: str = None, chat_memory: BaseMemory = None, table_name: str = None):
|
|
97
105
|
"""
|
|
98
106
|
Send a message to hold the user's attention while the bot is processing the request.
|
|
99
107
|
This message will not be saved in the chat memory.
|
|
100
108
|
|
|
101
109
|
Args:
|
|
102
110
|
chat_id (str): The ID of the chat.
|
|
111
|
+
chat_memory (BaseMemory): The memory of the chat.
|
|
103
112
|
table_name (str): The name of the table.
|
|
104
113
|
"""
|
|
114
|
+
chat_id = chat_id if chat_id else chat_memory.chat_id
|
|
115
|
+
|
|
105
116
|
response_message = ChatBotMessage(
|
|
106
117
|
ChatBotMessage.Type.DIRECT,
|
|
107
118
|
HOLDING_MESSAGE,
|