MindsDB 25.1.3.0__py3-none-any.whl → 25.1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +254 -253
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +55 -52
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/__init__.py +0 -1
- mindsdb/api/executor/command_executor.py +2 -1
- mindsdb/api/executor/data_types/answer.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
- mindsdb/api/executor/sql_query/__init__.py +1 -0
- mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
- mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
- mindsdb/api/http/namespaces/sql.py +3 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
- mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
- mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
- mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +9 -3
- mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +1 -1
- mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +30 -12
- mindsdb/integrations/utilities/rag/settings.py +6 -2
- mindsdb/interfaces/agents/agents_controller.py +3 -5
- mindsdb/interfaces/agents/langchain_agent.py +112 -150
- mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
- mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
- mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
- mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
- mindsdb/interfaces/chatbot/memory.py +58 -13
- mindsdb/interfaces/database/projects.py +17 -15
- mindsdb/interfaces/database/views.py +12 -25
- mindsdb/interfaces/knowledge_base/controller.py +6 -1
- mindsdb/interfaces/model/functions.py +15 -4
- mindsdb/interfaces/model/model_controller.py +4 -7
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
- mindsdb/interfaces/skills/skill_tool.py +97 -53
- mindsdb/interfaces/skills/sql_agent.py +77 -36
- mindsdb/interfaces/storage/db.py +1 -1
- mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
- mindsdb/utilities/context.py +2 -1
- mindsdb/utilities/langfuse.py +264 -0
- mindsdb/utilities/partitioning.py +52 -0
- mindsdb/utilities/render/sqlalchemy_render.py +7 -1
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from concurrent.futures import as_completed, TimeoutError
|
|
3
|
-
from typing import Dict, Iterable, List
|
|
3
|
+
from typing import Dict, Iterable, List, Optional
|
|
4
4
|
from uuid import uuid4
|
|
5
|
-
import os
|
|
6
5
|
import re
|
|
7
6
|
import numpy as np
|
|
8
7
|
import pandas as pd
|
|
@@ -20,9 +19,6 @@ from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
|
|
20
19
|
from langchain_core.messages.base import BaseMessage
|
|
21
20
|
from langchain_core.prompts import PromptTemplate
|
|
22
21
|
from langchain_core.tools import Tool
|
|
23
|
-
from langfuse import Langfuse
|
|
24
|
-
from langfuse.api.resources.commons.errors.not_found_error import NotFoundError as TraceNotFoundError
|
|
25
|
-
from langfuse.callback import CallbackHandler
|
|
26
22
|
|
|
27
23
|
from mindsdb.integrations.handlers.openai_handler.constants import (
|
|
28
24
|
CHAT_MODELS as OPEN_AI_CHAT_MODELS,
|
|
@@ -35,13 +31,11 @@ from mindsdb.utilities.context_executor import ContextThreadPoolExecutor
|
|
|
35
31
|
from mindsdb.interfaces.storage import db
|
|
36
32
|
from mindsdb.utilities.context import context as ctx
|
|
37
33
|
|
|
38
|
-
|
|
39
34
|
from .mindsdb_chat_model import ChatMindsdb
|
|
40
35
|
from .callback_handlers import LogCallbackHandler, ContextCaptureCallback
|
|
41
|
-
from .langfuse_callback_handler import LangfuseCallbackHandler,
|
|
36
|
+
from .langfuse_callback_handler import LangfuseCallbackHandler, get_skills
|
|
42
37
|
from .safe_output_parser import SafeOutputParser
|
|
43
38
|
|
|
44
|
-
|
|
45
39
|
from .constants import (
|
|
46
40
|
DEFAULT_AGENT_TIMEOUT_SECONDS,
|
|
47
41
|
DEFAULT_AGENT_TYPE,
|
|
@@ -62,6 +56,8 @@ from langchain_anthropic import ChatAnthropic
|
|
|
62
56
|
from langchain_core.messages import SystemMessage
|
|
63
57
|
from langchain_openai import ChatOpenAI
|
|
64
58
|
|
|
59
|
+
from mindsdb.utilities.langfuse import LangfuseClientWrapper
|
|
60
|
+
|
|
65
61
|
_PARSING_ERROR_PREFIXES = [
|
|
66
62
|
"An output parsing error occurred",
|
|
67
63
|
"Could not parse LLM output",
|
|
@@ -207,34 +203,42 @@ def process_chunk(chunk):
|
|
|
207
203
|
|
|
208
204
|
|
|
209
205
|
class LangchainAgent:
|
|
206
|
+
|
|
210
207
|
def __init__(self, agent: db.Agents, model: dict = None):
|
|
208
|
+
|
|
211
209
|
self.agent = agent
|
|
212
210
|
self.model = model
|
|
213
|
-
|
|
214
|
-
self.
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
211
|
+
|
|
212
|
+
self.run_completion_span: Optional[object] = None
|
|
213
|
+
self.llm: Optional[object] = None
|
|
214
|
+
self.embedding_model: Optional[object] = None
|
|
215
|
+
|
|
216
|
+
self.log_callback_handler: Optional[object] = None
|
|
217
|
+
self.langfuse_callback_handler: Optional[object] = None # native langfuse callback handler
|
|
218
|
+
self.mdb_langfuse_callback_handler: Optional[object] = None # custom (see langfuse_callback_handler.py)
|
|
219
|
+
|
|
220
|
+
self.langfuse_client_wrapper = LangfuseClientWrapper()
|
|
221
|
+
self.args = self._initialize_args()
|
|
222
|
+
|
|
223
|
+
# Back compatibility for old models
|
|
224
|
+
self.provider = self.args.get("provider", get_llm_provider(self.args))
|
|
225
|
+
|
|
226
|
+
def _initialize_args(self) -> dict:
|
|
227
|
+
"""Initialize the arguments based on the agent's parameters."""
|
|
228
|
+
args = self.agent.params.copy()
|
|
229
|
+
args["model_name"] = self.agent.model_name
|
|
230
|
+
args["provider"] = self.agent.provider
|
|
218
231
|
args["embedding_model_provider"] = args.get(
|
|
219
232
|
"embedding_model", get_embedding_model_provider(args)
|
|
220
233
|
)
|
|
221
234
|
|
|
222
|
-
self.langfuse = None
|
|
223
|
-
if os.getenv('LANGFUSE_PUBLIC_KEY') is not None:
|
|
224
|
-
self.langfuse = Langfuse(
|
|
225
|
-
public_key=os.getenv('LANGFUSE_PUBLIC_KEY'),
|
|
226
|
-
secret_key=os.getenv('LANGFUSE_SECRET_KEY'),
|
|
227
|
-
host=os.getenv('LANGFUSE_HOST'),
|
|
228
|
-
release=os.getenv('LANGFUSE_RELEASE', 'local'),
|
|
229
|
-
)
|
|
230
|
-
|
|
231
235
|
# agent is using current langchain model
|
|
232
|
-
if agent.provider == "mindsdb":
|
|
233
|
-
args["model_name"] = agent.model_name
|
|
236
|
+
if self.agent.provider == "mindsdb":
|
|
237
|
+
args["model_name"] = self.agent.model_name
|
|
234
238
|
|
|
235
239
|
# get prompt
|
|
236
240
|
prompt_template = (
|
|
237
|
-
model["problem_definition"].get("using", {}).get("prompt_template")
|
|
241
|
+
self.model["problem_definition"].get("using", {}).get("prompt_template")
|
|
238
242
|
)
|
|
239
243
|
if prompt_template is not None:
|
|
240
244
|
# only update prompt_template if it is set on the model
|
|
@@ -248,57 +252,47 @@ class LangchainAgent:
|
|
|
248
252
|
"Please provide a `prompt_template` or set `mode=retrieval`"
|
|
249
253
|
)
|
|
250
254
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
255
|
+
return args
|
|
256
|
+
|
|
257
|
+
def get_metadata(self) -> Dict:
|
|
258
|
+
return {
|
|
259
|
+
'provider': self.provider,
|
|
260
|
+
'model_name': self.args["model_name"],
|
|
261
|
+
'embedding_model_provider': self.args.get('embedding_model_provider',
|
|
262
|
+
get_embedding_model_provider(self.args)),
|
|
263
|
+
'skills': get_skills(self.agent),
|
|
264
|
+
'user_id': ctx.user_id,
|
|
265
|
+
'session_id': ctx.session_id,
|
|
266
|
+
'company_id': ctx.company_id,
|
|
267
|
+
'user_class': ctx.user_class,
|
|
268
|
+
'email_confirmed': ctx.email_confirmed
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
def get_tags(self) -> List:
|
|
272
|
+
return [
|
|
273
|
+
self.provider,
|
|
274
|
+
]
|
|
259
275
|
|
|
260
276
|
def get_completion(self, messages, stream: bool = False):
|
|
261
277
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
}
|
|
276
|
-
trace_metadata['skills'] = get_skills(self.agent)
|
|
277
|
-
trace_tags = get_tags(trace_metadata)
|
|
278
|
-
|
|
279
|
-
# Set our user info to pass into langfuse trace, with fault tolerance in each individual one just incase on purpose
|
|
280
|
-
trace_metadata['user_id'] = ctx.user_id
|
|
281
|
-
trace_metadata['session_id'] = ctx.session_id
|
|
282
|
-
trace_metadata['company_id'] = ctx.company_id
|
|
283
|
-
trace_metadata['user_class'] = ctx.user_class
|
|
284
|
-
trace_metadata['email_confirmed'] = ctx.email_confirmed
|
|
285
|
-
|
|
286
|
-
self.api_trace = self.langfuse.trace(
|
|
287
|
-
name='api-completion',
|
|
288
|
-
input=messages,
|
|
289
|
-
tags=trace_tags,
|
|
290
|
-
metadata=trace_metadata,
|
|
291
|
-
user_id=ctx.user_id,
|
|
292
|
-
session_id=ctx.session_id,
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
self.run_completion_span = self.api_trace.span(name='run-completion', input=messages)
|
|
296
|
-
trace_id = self.api_trace.id
|
|
297
|
-
observation_id = self.run_completion_span.id
|
|
278
|
+
# Get metadata and tags to be used in the trace
|
|
279
|
+
metadata = self.get_metadata()
|
|
280
|
+
tags = self.get_tags()
|
|
281
|
+
|
|
282
|
+
# Set up trace for the API completion in Langfuse
|
|
283
|
+
self.langfuse_client_wrapper.setup_trace(
|
|
284
|
+
name='api-completion',
|
|
285
|
+
input=messages,
|
|
286
|
+
tags=tags,
|
|
287
|
+
metadata=metadata,
|
|
288
|
+
user_id=ctx.user_id,
|
|
289
|
+
session_id=ctx.session_id,
|
|
290
|
+
)
|
|
298
291
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
292
|
+
# Set up trace for the run completion in Langfuse
|
|
293
|
+
self.run_completion_span = self.langfuse_client_wrapper.start_span(
|
|
294
|
+
name='run-completion',
|
|
295
|
+
input=messages)
|
|
302
296
|
|
|
303
297
|
if stream:
|
|
304
298
|
return self._get_completion_stream(messages)
|
|
@@ -317,21 +311,8 @@ class LangchainAgent:
|
|
|
317
311
|
df.iloc[:-1, df.columns.get_loc(user_column)] = None
|
|
318
312
|
response = self.run_agent(df, agent, args)
|
|
319
313
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
self.api_trace.update(output=response)
|
|
323
|
-
|
|
324
|
-
# update metadata with tool usage
|
|
325
|
-
try:
|
|
326
|
-
# Ensure all batched traces are sent before fetching.
|
|
327
|
-
self.langfuse.flush()
|
|
328
|
-
trace = self.langfuse.get_trace(self.trace_id)
|
|
329
|
-
trace_metadata['tool_usage'] = get_tool_usage(trace)
|
|
330
|
-
self.api_trace.update(metadata=trace_metadata)
|
|
331
|
-
except TraceNotFoundError:
|
|
332
|
-
logger.warning(f'Langfuse trace {self.trace_id} not found')
|
|
333
|
-
except Exception as e:
|
|
334
|
-
logger.error(f'Something went wrong while processing Langfuse trace {self.trace_id}: {str(e)}')
|
|
314
|
+
# End the run completion span and update the metadata with tool usage
|
|
315
|
+
self.langfuse_client_wrapper.end_span(span=self.run_completion_span, output=response)
|
|
335
316
|
|
|
336
317
|
return response
|
|
337
318
|
|
|
@@ -349,6 +330,7 @@ class LangchainAgent:
|
|
|
349
330
|
|
|
350
331
|
df = pd.DataFrame(messages)
|
|
351
332
|
|
|
333
|
+
self.embedding_model_provider = args.get('embedding_model_provider', get_embedding_model_provider(args))
|
|
352
334
|
# Back compatibility for old models
|
|
353
335
|
self.provider = args.get("provider", get_llm_provider(args))
|
|
354
336
|
|
|
@@ -445,69 +427,49 @@ class LangchainAgent:
|
|
|
445
427
|
return all_tools
|
|
446
428
|
|
|
447
429
|
def _get_agent_callbacks(self, args: Dict) -> List:
|
|
430
|
+
all_callbacks = []
|
|
448
431
|
|
|
449
432
|
if self.log_callback_handler is None:
|
|
450
433
|
self.log_callback_handler = LogCallbackHandler(logger)
|
|
451
434
|
|
|
452
|
-
all_callbacks
|
|
435
|
+
all_callbacks.append(self.log_callback_handler)
|
|
453
436
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
"langfuse_secret_key", os.getenv("LANGFUSE_SECRET_KEY")
|
|
459
|
-
)
|
|
460
|
-
langfuse_host = args.get("langfuse_host", os.getenv("LANGFUSE_HOST"))
|
|
461
|
-
are_langfuse_args_present = (
|
|
462
|
-
bool(langfuse_public_key)
|
|
463
|
-
and bool(langfuse_secret_key)
|
|
464
|
-
and bool(langfuse_host)
|
|
465
|
-
)
|
|
437
|
+
if self.langfuse_client_wrapper.trace is None:
|
|
438
|
+
# Get metadata and tags to be used in the trace
|
|
439
|
+
metadata = self.get_metadata()
|
|
440
|
+
tags = self.get_tags()
|
|
466
441
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
)
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
observation_id = args.get(
|
|
499
|
-
"observation_id", self.observation_id or uuid4().hex
|
|
500
|
-
)
|
|
501
|
-
langfuse = Langfuse(
|
|
502
|
-
host=langfuse_host,
|
|
503
|
-
public_key=langfuse_public_key,
|
|
504
|
-
secret_key=langfuse_secret_key,
|
|
505
|
-
)
|
|
506
|
-
self.mdb_langfuse_callback_handler = LangfuseCallbackHandler(
|
|
507
|
-
langfuse=langfuse,
|
|
508
|
-
trace_id=trace_id,
|
|
509
|
-
observation_id=observation_id,
|
|
510
|
-
)
|
|
442
|
+
trace_name = "NativeTrace-MindsDB-AgentExecutor"
|
|
443
|
+
|
|
444
|
+
# Set up trace for the API completion in Langfuse
|
|
445
|
+
self.langfuse_client_wrapper.setup_trace(
|
|
446
|
+
name=trace_name,
|
|
447
|
+
tags=tags,
|
|
448
|
+
metadata=metadata,
|
|
449
|
+
user_id=ctx.user_id,
|
|
450
|
+
session_id=ctx.session_id,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if self.langfuse_callback_handler is None:
|
|
454
|
+
self.langfuse_callback_handler = self.langfuse_client_wrapper.get_langchain_handler()
|
|
455
|
+
|
|
456
|
+
# custom tracer
|
|
457
|
+
if self.mdb_langfuse_callback_handler is None:
|
|
458
|
+
trace_id = None
|
|
459
|
+
if self.langfuse_client_wrapper.trace is not None:
|
|
460
|
+
trace_id = args.get("trace_id", self.langfuse_client_wrapper.trace.id)
|
|
461
|
+
|
|
462
|
+
span_id = None
|
|
463
|
+
if self.run_completion_span is not None:
|
|
464
|
+
span_id = self.run_completion_span.id
|
|
465
|
+
|
|
466
|
+
observation_id = args.get("observation_id", span_id or uuid4().hex)
|
|
467
|
+
|
|
468
|
+
self.mdb_langfuse_callback_handler = LangfuseCallbackHandler(
|
|
469
|
+
langfuse=self.langfuse_client_wrapper.client,
|
|
470
|
+
trace_id=trace_id,
|
|
471
|
+
observation_id=observation_id,
|
|
472
|
+
)
|
|
511
473
|
|
|
512
474
|
# obs: we may want to unify these; native langfuse handler provides details as a tree on a sub-step of the overarching custom one # noqa
|
|
513
475
|
if self.langfuse_callback_handler is not None:
|
|
@@ -542,7 +504,8 @@ AI: {response}"""
|
|
|
542
504
|
return_context = args.get('return_context', True)
|
|
543
505
|
input_variables = re.findall(r"{{(.*?)}}", base_template)
|
|
544
506
|
|
|
545
|
-
prompts, empty_prompt_ids = prepare_prompts(df, base_template, input_variables,
|
|
507
|
+
prompts, empty_prompt_ids = prepare_prompts(df, base_template, input_variables,
|
|
508
|
+
args.get('user_column', USER_COLUMN))
|
|
546
509
|
|
|
547
510
|
def _invoke_agent_executor_with_prompt(agent_executor, prompt):
|
|
548
511
|
if not prompt:
|
|
@@ -621,7 +584,8 @@ AI: {response}"""
|
|
|
621
584
|
if not hasattr(agent_executor, 'stream') or not callable(agent_executor.stream):
|
|
622
585
|
raise AttributeError("The agent_executor does not have a 'stream' method")
|
|
623
586
|
|
|
624
|
-
stream_iterator = agent_executor.stream(prompts[0],
|
|
587
|
+
stream_iterator = agent_executor.stream(prompts[0],
|
|
588
|
+
config={'callbacks': callbacks})
|
|
625
589
|
|
|
626
590
|
if not hasattr(stream_iterator, '__iter__'):
|
|
627
591
|
raise TypeError("The stream method did not return an iterable")
|
|
@@ -642,10 +606,8 @@ AI: {response}"""
|
|
|
642
606
|
# Yield generated SQL if available
|
|
643
607
|
yield {"type": "sql", "content": self.log_callback_handler.generated_sql}
|
|
644
608
|
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
self.api_trace.update()
|
|
648
|
-
logger.info("Langfuse trace updated")
|
|
609
|
+
# End the run completion span and update the metadata with tool usage
|
|
610
|
+
self.langfuse_client_wrapper.end_span_stream(span=self.run_completion_span)
|
|
649
611
|
|
|
650
612
|
@staticmethod
|
|
651
613
|
def process_chunk(chunk):
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from typing import Any, Dict, Union, Optional, List
|
|
2
2
|
from uuid import uuid4
|
|
3
3
|
import datetime
|
|
4
|
-
import os
|
|
5
4
|
|
|
6
5
|
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
7
6
|
|
|
@@ -122,42 +121,6 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
|
|
|
122
121
|
return False
|
|
123
122
|
|
|
124
123
|
|
|
125
|
-
def get_metadata(model_using: Dict) -> Dict:
|
|
126
|
-
""" Generates initial metadata mapping from information provided in a model's `using` clause.
|
|
127
|
-
Includes providers and model name.
|
|
128
|
-
"""
|
|
129
|
-
metadata_keys = ['provider', 'model_name', 'embedding_model_provider'] # keeps keys relevant for tracing
|
|
130
|
-
trace_metadata = {}
|
|
131
|
-
for key in metadata_keys:
|
|
132
|
-
if key in model_using:
|
|
133
|
-
trace_metadata[key] = model_using.get(key)
|
|
134
|
-
return trace_metadata
|
|
135
|
-
|
|
136
|
-
|
|
137
124
|
def get_skills(agent: db.Agents) -> List:
|
|
138
125
|
""" Retrieve skills from agent `skills` attribute. Specific to agent endpoints. """
|
|
139
126
|
return [rel.skill.type for rel in agent.skills_relationships]
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def get_tags(metadata: Dict) -> List:
|
|
143
|
-
""" Retrieves tags from existing langfuse metadata (built using `get_metadata` and `get_skills`), and environment variables. """
|
|
144
|
-
trace_tags = []
|
|
145
|
-
if os.getenv('FLASK_ENV'):
|
|
146
|
-
trace_tags.append(os.getenv('FLASK_ENV')) # Fix: use something other than flask_env
|
|
147
|
-
if 'provider' in metadata:
|
|
148
|
-
trace_tags.append(metadata['provider'])
|
|
149
|
-
return trace_tags
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def get_tool_usage(trace) -> Dict:
|
|
153
|
-
""" Retrieves tool usage information from a langfuse trace.
|
|
154
|
-
Note: assumes trace marks an action with string `AgentAction` """
|
|
155
|
-
tool_usage = {}
|
|
156
|
-
steps = [s.name for s in trace.observations]
|
|
157
|
-
for step in steps:
|
|
158
|
-
if 'AgentAction' in step:
|
|
159
|
-
tool_name = step.split('-')[1]
|
|
160
|
-
if tool_name not in tool_usage:
|
|
161
|
-
tool_usage[tool_name] = 0
|
|
162
|
-
tool_usage[tool_name] += 1
|
|
163
|
-
return tool_usage
|
|
@@ -4,36 +4,38 @@
|
|
|
4
4
|
"""
|
|
5
5
|
from typing import Any, Iterable, List, Optional
|
|
6
6
|
|
|
7
|
-
from mindsdb.interfaces.skills.skill_tool import skill_tool
|
|
8
7
|
from mindsdb.utilities import log
|
|
9
8
|
from langchain_community.utilities import SQLDatabase
|
|
9
|
+
from mindsdb.interfaces.skills.sql_agent import SQLAgent
|
|
10
10
|
|
|
11
11
|
logger = log.getLogger(__name__)
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class MindsDBSQL(SQLDatabase):
|
|
15
|
+
@staticmethod
|
|
16
|
+
def custom_init(
|
|
17
|
+
sql_agent: 'SQLAgent'
|
|
18
|
+
) -> 'MindsDBSQL':
|
|
19
|
+
instance = MindsDBSQL()
|
|
20
|
+
instance._sql_agent = sql_agent
|
|
21
|
+
return instance
|
|
22
|
+
|
|
15
23
|
""" Can't modify signature, as LangChain does a Pydantic check."""
|
|
16
24
|
def __init__(
|
|
17
25
|
self,
|
|
18
|
-
engine=None,
|
|
19
|
-
|
|
26
|
+
engine: Optional[Any] = None,
|
|
27
|
+
schema: Optional[str] = None,
|
|
20
28
|
metadata: Optional[Any] = None,
|
|
21
29
|
ignore_tables: Optional[List[str]] = None,
|
|
22
30
|
include_tables: Optional[List[str]] = None,
|
|
23
31
|
sample_rows_in_table_info: int = 3,
|
|
24
|
-
schema: Optional[str] = None,
|
|
25
32
|
indexes_in_table_info: bool = False,
|
|
26
33
|
custom_table_info: Optional[dict] = None,
|
|
27
|
-
view_support:
|
|
34
|
+
view_support: bool = True,
|
|
35
|
+
max_string_length: int = 300,
|
|
36
|
+
lazy_table_reflection: bool = False,
|
|
28
37
|
):
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
self._sql_agent = skill_tool.get_sql_agent(
|
|
32
|
-
database,
|
|
33
|
-
include_tables,
|
|
34
|
-
ignore_tables,
|
|
35
|
-
sample_rows_in_table_info
|
|
36
|
-
)
|
|
38
|
+
pass
|
|
37
39
|
|
|
38
40
|
@property
|
|
39
41
|
def dialect(self) -> str:
|
|
@@ -4,6 +4,7 @@ from mindsdb.interfaces.agents.agents_controller import AgentsController
|
|
|
4
4
|
from mindsdb.interfaces.chatbot.chatbot_task import ChatBotTask
|
|
5
5
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
6
6
|
from mindsdb.interfaces.storage import db
|
|
7
|
+
from mindsdb.interfaces.model.functions import get_project_records
|
|
7
8
|
|
|
8
9
|
from mindsdb.utilities.context import context as ctx
|
|
9
10
|
|
|
@@ -128,16 +129,11 @@ class ChatBotController:
|
|
|
128
129
|
all_bots (List[db.ChatBots]): List of database chatbot object
|
|
129
130
|
'''
|
|
130
131
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
query = query.filter_by(name=project_name)
|
|
137
|
-
project_names = {
|
|
138
|
-
i.id: i.name
|
|
139
|
-
for i in query
|
|
140
|
-
}
|
|
132
|
+
project_names = {}
|
|
133
|
+
for project in get_project_records():
|
|
134
|
+
if project_name is not None and project.name != project_name:
|
|
135
|
+
continue
|
|
136
|
+
project_names[project.id] = project.name
|
|
141
137
|
|
|
142
138
|
query = db.session.query(
|
|
143
139
|
db.ChatBots, db.Tasks
|
|
@@ -228,9 +224,9 @@ class ChatBotController:
|
|
|
228
224
|
raise ValueError('Need to provide either "model_name" or "agent_name" when creating a chatbot')
|
|
229
225
|
if agent_name is not None:
|
|
230
226
|
agent = self.agents_controller.get_agent(agent_name, project_name)
|
|
231
|
-
model_name = agent.model_name
|
|
232
227
|
if agent is None:
|
|
233
228
|
raise ValueError(f"Agent with name doesn't exist: {agent_name}")
|
|
229
|
+
model_name = agent.model_name
|
|
234
230
|
agent_id = agent.id
|
|
235
231
|
else:
|
|
236
232
|
# Create a new agent with the given model name.
|
|
@@ -53,15 +53,23 @@ class ChatBotTask(BaseTask):
|
|
|
53
53
|
|
|
54
54
|
chat_params = self.chat_handler.get_chat_config()
|
|
55
55
|
polling = chat_params['polling']['type']
|
|
56
|
+
|
|
57
|
+
memory = chat_params['memory']['type'] if 'memory' in chat_params else None
|
|
58
|
+
memory_cls = None
|
|
59
|
+
if memory:
|
|
60
|
+
memory_cls = DBMemory if memory == 'db' else HandlerMemory
|
|
61
|
+
|
|
56
62
|
if polling == 'message_count':
|
|
57
63
|
chat_params = chat_params['tables'] if 'tables' in chat_params else [chat_params]
|
|
58
64
|
self.chat_pooling = MessageCountPolling(self, chat_params)
|
|
59
|
-
|
|
65
|
+
# The default type for message count polling is HandlerMemory if not specified.
|
|
66
|
+
self.memory = HandlerMemory(self, chat_params) if memory_cls is None else memory_cls(self, chat_params)
|
|
60
67
|
|
|
61
68
|
elif polling == 'realtime':
|
|
62
69
|
chat_params = chat_params['tables'] if 'tables' in chat_params else [chat_params]
|
|
63
70
|
self.chat_pooling = RealtimePolling(self, chat_params)
|
|
64
|
-
|
|
71
|
+
# The default type for real-time polling is DBMemory if not specified.
|
|
72
|
+
self.memory = DBMemory(self, chat_params) if memory_cls is None else memory_cls(self, chat_params)
|
|
65
73
|
|
|
66
74
|
elif polling == 'webhook':
|
|
67
75
|
self.chat_pooling = WebhookPolling(self, chat_params)
|
|
@@ -80,11 +88,11 @@ class ChatBotTask(BaseTask):
|
|
|
80
88
|
self.chat_pooling.run(stop_event)
|
|
81
89
|
|
|
82
90
|
def on_message(self, message: ChatBotMessage, chat_id=None, chat_memory=None, table_name=None):
|
|
83
|
-
if not chat_id and chat_memory:
|
|
91
|
+
if not chat_id and not chat_memory:
|
|
84
92
|
raise Exception('chat_id or chat_memory should be provided')
|
|
85
93
|
|
|
86
94
|
try:
|
|
87
|
-
self._on_holding_message(chat_id, table_name)
|
|
95
|
+
self._on_holding_message(chat_id, chat_memory, table_name)
|
|
88
96
|
self._on_message(message, chat_id, chat_memory, table_name)
|
|
89
97
|
except (SystemExit, KeyboardInterrupt):
|
|
90
98
|
raise
|
|
@@ -93,15 +101,18 @@ class ChatBotTask(BaseTask):
|
|
|
93
101
|
logger.error(error)
|
|
94
102
|
self.set_error(str(error))
|
|
95
103
|
|
|
96
|
-
def _on_holding_message(self, chat_id: str, table_name: str = None):
|
|
104
|
+
def _on_holding_message(self, chat_id: str = None, chat_memory: BaseMemory = None, table_name: str = None):
|
|
97
105
|
"""
|
|
98
106
|
Send a message to hold the user's attention while the bot is processing the request.
|
|
99
107
|
This message will not be saved in the chat memory.
|
|
100
108
|
|
|
101
109
|
Args:
|
|
102
110
|
chat_id (str): The ID of the chat.
|
|
111
|
+
chat_memory (BaseMemory): The memory of the chat.
|
|
103
112
|
table_name (str): The name of the table.
|
|
104
113
|
"""
|
|
114
|
+
chat_id = chat_id if chat_id else chat_memory.chat_id
|
|
115
|
+
|
|
105
116
|
response_message = ChatBotMessage(
|
|
106
117
|
ChatBotMessage.Type.DIRECT,
|
|
107
118
|
HOLDING_MESSAGE,
|