MindsDB 25.1.3.0__py3-none-any.whl → 25.1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +254 -253
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +55 -52
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/__init__.py +0 -1
- mindsdb/api/executor/command_executor.py +2 -1
- mindsdb/api/executor/data_types/answer.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
- mindsdb/api/executor/sql_query/__init__.py +1 -0
- mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
- mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
- mindsdb/api/http/namespaces/sql.py +3 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
- mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
- mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
- mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +9 -3
- mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +1 -1
- mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +30 -12
- mindsdb/integrations/utilities/rag/settings.py +6 -2
- mindsdb/interfaces/agents/agents_controller.py +3 -5
- mindsdb/interfaces/agents/langchain_agent.py +112 -150
- mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
- mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
- mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
- mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
- mindsdb/interfaces/chatbot/memory.py +58 -13
- mindsdb/interfaces/database/projects.py +17 -15
- mindsdb/interfaces/database/views.py +12 -25
- mindsdb/interfaces/knowledge_base/controller.py +6 -1
- mindsdb/interfaces/model/functions.py +15 -4
- mindsdb/interfaces/model/model_controller.py +4 -7
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
- mindsdb/interfaces/skills/skill_tool.py +97 -53
- mindsdb/interfaces/skills/sql_agent.py +77 -36
- mindsdb/interfaces/storage/db.py +1 -1
- mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
- mindsdb/utilities/context.py +2 -1
- mindsdb/utilities/langfuse.py +264 -0
- mindsdb/utilities/partitioning.py +52 -0
- mindsdb/utilities/render/sqlalchemy_render.py +7 -1
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
from mindsdb.utilities import log
|
|
5
|
+
from langfuse import Langfuse
|
|
6
|
+
from langfuse.client import StatefulSpanClient
|
|
7
|
+
from langfuse.callback import CallbackHandler
|
|
8
|
+
from langfuse.api.resources.commons.errors.not_found_error import NotFoundError as TraceNotFoundError
|
|
9
|
+
|
|
10
|
+
logger = log.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
# Define Langfuse public key.
|
|
13
|
+
LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY", "langfuse_public_key")
|
|
14
|
+
|
|
15
|
+
# Define Langfuse secret key.
|
|
16
|
+
LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY", "langfuse_secret_key")
|
|
17
|
+
|
|
18
|
+
# Define Langfuse host.
|
|
19
|
+
LANGFUSE_HOST = os.getenv("LANGFUSE_HOST", "http://localhost:3000")
|
|
20
|
+
|
|
21
|
+
# Define Langfuse environment.
|
|
22
|
+
LANGFUSE_ENVIRONMENT = os.getenv("LANGFUSE_ENVIRONMENT", "local")
|
|
23
|
+
|
|
24
|
+
# Define Langfuse release.
|
|
25
|
+
LANGFUSE_RELEASE = os.getenv("LANGFUSE_RELEASE", "local")
|
|
26
|
+
|
|
27
|
+
# Define Langfuse debug mode.
|
|
28
|
+
LANGFUSE_DEBUG = os.getenv("LANGFUSE_DEBUG", "false").lower() == "true"
|
|
29
|
+
|
|
30
|
+
# Define Langfuse timeout.
|
|
31
|
+
LANGFUSE_TIMEOUT = int(os.getenv("LANGFUSE_TIMEOUT", 10))
|
|
32
|
+
|
|
33
|
+
# Define Langfuse sample rate.
|
|
34
|
+
LANGFUSE_SAMPLE_RATE = float(os.getenv("LANGFUSE_SAMPLE_RATE", 1.0))
|
|
35
|
+
|
|
36
|
+
# Define if Langfuse is disabled.
|
|
37
|
+
LANGFUSE_DISABLED = os.getenv("LANGFUSE_DISABLED", "false").lower() == "true" or LANGFUSE_ENVIRONMENT == "local"
|
|
38
|
+
LANGFUSE_FORCE_RUN = os.getenv("LANGFUSE_FORCE_RUN", "false").lower() == "true"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LangfuseClientWrapper:
|
|
42
|
+
"""
|
|
43
|
+
Langfuse client wrapper. Defines Langfuse client configuration and initializes Langfuse client.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self,
|
|
47
|
+
public_key: str = LANGFUSE_PUBLIC_KEY,
|
|
48
|
+
secret_key: str = LANGFUSE_SECRET_KEY,
|
|
49
|
+
host: str = LANGFUSE_HOST,
|
|
50
|
+
environment: str = LANGFUSE_ENVIRONMENT,
|
|
51
|
+
release: str = LANGFUSE_RELEASE,
|
|
52
|
+
debug: bool = LANGFUSE_DEBUG,
|
|
53
|
+
timeout: int = LANGFUSE_TIMEOUT,
|
|
54
|
+
sample_rate: float = LANGFUSE_SAMPLE_RATE,
|
|
55
|
+
disable: bool = LANGFUSE_DISABLED,
|
|
56
|
+
force_run: bool = LANGFUSE_FORCE_RUN) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Initialize Langfuse client.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
public_key (str): Langfuse public key.
|
|
62
|
+
secret_key (str): Langfuse secret key.
|
|
63
|
+
host (str): Langfuse host.
|
|
64
|
+
release (str): Langfuse release.
|
|
65
|
+
timeout (int): Langfuse timeout.
|
|
66
|
+
sample_rate (float): Langfuse sample rate.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
self.metadata = None
|
|
70
|
+
self.public_key = public_key
|
|
71
|
+
self.secret_key = secret_key
|
|
72
|
+
self.host = host
|
|
73
|
+
self.environment = environment
|
|
74
|
+
self.release = release
|
|
75
|
+
self.debug = debug
|
|
76
|
+
self.timeout = timeout
|
|
77
|
+
self.sample_rate = sample_rate
|
|
78
|
+
self.disable = disable
|
|
79
|
+
self.force_run = force_run
|
|
80
|
+
|
|
81
|
+
self.client = None
|
|
82
|
+
self.trace = None
|
|
83
|
+
self.metadata = None
|
|
84
|
+
self.tags = None
|
|
85
|
+
|
|
86
|
+
# Check if Langfuse is disabled.
|
|
87
|
+
if LANGFUSE_DISABLED and not LANGFUSE_FORCE_RUN:
|
|
88
|
+
logger.info("Langfuse is disabled.")
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
logger.info("Langfuse enabled")
|
|
92
|
+
logger.debug(f"LANGFUSE_PUBLIC_KEY: {LANGFUSE_PUBLIC_KEY}")
|
|
93
|
+
logger.debug(f"LANGFUSE_SECRET_KEY: {'*' * len(LANGFUSE_SECRET_KEY)}")
|
|
94
|
+
logger.debug(f"LANGFUSE_HOST: {LANGFUSE_HOST}")
|
|
95
|
+
logger.debug(f"LANGFUSE_ENVIRONMENT: {LANGFUSE_ENVIRONMENT}")
|
|
96
|
+
logger.debug(f"LANGFUSE_RELEASE: {LANGFUSE_RELEASE}")
|
|
97
|
+
logger.debug(f"LANGFUSE_DEBUG: {LANGFUSE_DEBUG}")
|
|
98
|
+
logger.debug(f"LANGFUSE_TIMEOUT: {LANGFUSE_TIMEOUT}")
|
|
99
|
+
logger.debug(f"LANGFUSE_SAMPLE_RATE: {LANGFUSE_SAMPLE_RATE * 100}%")
|
|
100
|
+
|
|
101
|
+
self.client = Langfuse(
|
|
102
|
+
public_key=public_key,
|
|
103
|
+
secret_key=secret_key,
|
|
104
|
+
host=host,
|
|
105
|
+
release=release,
|
|
106
|
+
debug=debug,
|
|
107
|
+
timeout=timeout,
|
|
108
|
+
sample_rate=sample_rate
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def setup_trace(self,
|
|
112
|
+
name: str,
|
|
113
|
+
input: typing.Optional[typing.Any] = None,
|
|
114
|
+
tags: typing.Optional[typing.List] = None,
|
|
115
|
+
metadata: typing.Optional[typing.Dict] = None,
|
|
116
|
+
user_id: str = None,
|
|
117
|
+
session_id: str = None) -> None:
|
|
118
|
+
"""
|
|
119
|
+
Setup trace. If Langfuse is disabled, nothing will be done.
|
|
120
|
+
Args:
|
|
121
|
+
name (str): Trace name.
|
|
122
|
+
input (dict): Trace input.
|
|
123
|
+
tags (dict): Trace tags.
|
|
124
|
+
metadata (dict): Trace metadata.
|
|
125
|
+
user_id (str): User ID.
|
|
126
|
+
session_id (str): Session ID.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
if self.client is None:
|
|
130
|
+
logger.debug("Langfuse is disabled.")
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
self.set_metadata(metadata)
|
|
134
|
+
self.set_tags(tags)
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
self.trace = self.client.trace(
|
|
138
|
+
name=name,
|
|
139
|
+
input=input,
|
|
140
|
+
metadata=self.metadata,
|
|
141
|
+
tags=self.tags,
|
|
142
|
+
user_id=user_id,
|
|
143
|
+
session_id=session_id
|
|
144
|
+
)
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logger.error(f'Something went wrong while processing Langfuse trace {self.trace.id}: {str(e)}')
|
|
147
|
+
|
|
148
|
+
logger.info(f"Langfuse trace configured with ID: {self.trace.id}")
|
|
149
|
+
|
|
150
|
+
def start_span(self,
|
|
151
|
+
name: str,
|
|
152
|
+
input: typing.Optional[typing.Any] = None) -> typing.Optional[StatefulSpanClient]:
|
|
153
|
+
"""
|
|
154
|
+
Create span. If Langfuse is disabled, nothing will be done.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
name (str): Span name.
|
|
158
|
+
input (dict): Span input.
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
if self.client is None:
|
|
162
|
+
logger.debug("Langfuse is disabled.")
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
return self.trace.span(name=name, input=input)
|
|
166
|
+
|
|
167
|
+
def end_span_stream(self,
|
|
168
|
+
span: typing.Optional[StatefulSpanClient] = None) -> None:
|
|
169
|
+
"""
|
|
170
|
+
End span. If Langfuse is disabled, nothing will happen.
|
|
171
|
+
Args:
|
|
172
|
+
span (Any): Span object.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
if self.client is None:
|
|
176
|
+
logger.debug("Langfuse is disabled.")
|
|
177
|
+
return
|
|
178
|
+
|
|
179
|
+
span.end()
|
|
180
|
+
self.trace.update()
|
|
181
|
+
|
|
182
|
+
def end_span(self,
|
|
183
|
+
span: typing.Optional[StatefulSpanClient] = None,
|
|
184
|
+
output: typing.Optional[typing.Any] = None) -> None:
|
|
185
|
+
"""
|
|
186
|
+
End trace. If Langfuse is disabled, nothing will be done.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
span (Any): Span object.
|
|
190
|
+
output (Any): Span output.
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
if self.client is None:
|
|
194
|
+
logger.debug("Langfuse is disabled.")
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
if span is None:
|
|
198
|
+
logger.debug("Langfuse span is not created.")
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
span.end(output=output)
|
|
202
|
+
self.trace.update(output=output)
|
|
203
|
+
|
|
204
|
+
metadata = self.metadata or {}
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
# Ensure all batched traces are sent before fetching.
|
|
208
|
+
self.client.flush()
|
|
209
|
+
metadata['tool_usage'] = self._get_tool_usage()
|
|
210
|
+
self.trace.update(metadata=metadata)
|
|
211
|
+
|
|
212
|
+
except Exception as e:
|
|
213
|
+
logger.error(f'Something went wrong while processing Langfuse trace {self.trace.id}: {str(e)}')
|
|
214
|
+
|
|
215
|
+
def get_langchain_handler(self) -> typing.Optional[CallbackHandler]:
|
|
216
|
+
"""
|
|
217
|
+
Get Langchain handler. If Langfuse is disabled, returns None.
|
|
218
|
+
"""
|
|
219
|
+
|
|
220
|
+
if self.client is None:
|
|
221
|
+
logger.debug("Langfuse is disabled.")
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
return self.trace.get_langchain_handler()
|
|
225
|
+
|
|
226
|
+
def set_metadata(self, custom_metadata: dict = None) -> None:
|
|
227
|
+
"""
|
|
228
|
+
Get default metadata.
|
|
229
|
+
"""
|
|
230
|
+
self.metadata = custom_metadata or {}
|
|
231
|
+
|
|
232
|
+
self.metadata["environment"] = self.environment
|
|
233
|
+
self.metadata["release"] = self.release
|
|
234
|
+
|
|
235
|
+
def set_tags(self, custom_tags: typing.Optional[typing.List] = None) -> None:
|
|
236
|
+
"""
|
|
237
|
+
Get default tags.
|
|
238
|
+
"""
|
|
239
|
+
self.tags = custom_tags or []
|
|
240
|
+
|
|
241
|
+
self.tags.append(self.environment)
|
|
242
|
+
self.tags.append(self.release)
|
|
243
|
+
|
|
244
|
+
def _get_tool_usage(self) -> typing.Dict:
|
|
245
|
+
""" Retrieves tool usage information from a langfuse trace.
|
|
246
|
+
Note: assumes trace marks an action with string `AgentAction` """
|
|
247
|
+
|
|
248
|
+
tool_usage = {}
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
fetched_trace = self.client.get_trace(self.trace.id)
|
|
252
|
+
steps = [s.name for s in fetched_trace.observations]
|
|
253
|
+
for step in steps:
|
|
254
|
+
if 'AgentAction' in step:
|
|
255
|
+
tool_name = step.split('-')[1]
|
|
256
|
+
if tool_name not in tool_usage:
|
|
257
|
+
tool_usage[tool_name] = 0
|
|
258
|
+
tool_usage[tool_name] += 1
|
|
259
|
+
except TraceNotFoundError:
|
|
260
|
+
logger.warning(f'Langfuse trace {self.trace.id} not found')
|
|
261
|
+
except Exception as e:
|
|
262
|
+
logger.error(f'Something went wrong while processing Langfuse trace {self.trace.id}: {str(e)}')
|
|
263
|
+
|
|
264
|
+
return tool_usage
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Iterable, Callable
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from mindsdb.utilities.config import Config
|
|
6
|
+
from mindsdb.utilities.context_executor import execute_in_threads
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def process_dataframe_in_partitions(df: pd.DataFrame, callback: Callable, partition_size: int) -> Iterable:
|
|
10
|
+
"""
|
|
11
|
+
Splits dataframe into partitions and apply callback on each partition
|
|
12
|
+
|
|
13
|
+
:param df: input dataframe
|
|
14
|
+
:param callback: function to apply on each partition
|
|
15
|
+
:param partition_size: size of each partition
|
|
16
|
+
:return: yield results
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# tasks
|
|
20
|
+
def split_data_f(df):
|
|
21
|
+
chunk = 0
|
|
22
|
+
while chunk * partition_size < len(df):
|
|
23
|
+
# create results with partition
|
|
24
|
+
df1 = df.iloc[chunk * partition_size: (chunk + 1) * partition_size]
|
|
25
|
+
chunk += 1
|
|
26
|
+
yield [df1]
|
|
27
|
+
|
|
28
|
+
tasks = split_data_f(df)
|
|
29
|
+
|
|
30
|
+
# workers count
|
|
31
|
+
is_cloud = Config().is_cloud
|
|
32
|
+
if is_cloud:
|
|
33
|
+
max_threads = int(os.getenv('MINDSDB_MAX_PARTITIONING_THREADS', 10))
|
|
34
|
+
else:
|
|
35
|
+
max_threads = os.cpu_count() - 2
|
|
36
|
+
|
|
37
|
+
# don't exceed chunk_count
|
|
38
|
+
chunk_count = int(len(df) / partition_size)
|
|
39
|
+
max_threads = min(max_threads, chunk_count)
|
|
40
|
+
|
|
41
|
+
if max_threads < 1:
|
|
42
|
+
max_threads = 1
|
|
43
|
+
|
|
44
|
+
if max_threads == 1:
|
|
45
|
+
# don't spawn threads
|
|
46
|
+
|
|
47
|
+
for task in tasks:
|
|
48
|
+
yield callback(*task)
|
|
49
|
+
|
|
50
|
+
else:
|
|
51
|
+
for result in execute_in_threads(callback, tasks, thread_count=max_threads):
|
|
52
|
+
yield result
|
|
@@ -14,6 +14,10 @@ from sqlalchemy.sql import functions as sa_fnc
|
|
|
14
14
|
from mindsdb_sql_parser import ast
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
RESERVED_WORDS = {
|
|
18
|
+
"collation"
|
|
19
|
+
}
|
|
20
|
+
|
|
17
21
|
sa_type_names = [
|
|
18
22
|
key for key, val in sa.types.__dict__.items() if hasattr(val, '__module__')
|
|
19
23
|
and val.__module__ in ('sqlalchemy.sql.sqltypes', 'sqlalchemy.sql.type_api')
|
|
@@ -98,7 +102,7 @@ class SqlalchemyRender:
|
|
|
98
102
|
# in that case use origin string
|
|
99
103
|
|
|
100
104
|
part_lower = str(sa.column(i.lower()).compile(dialect=self.dialect))
|
|
101
|
-
if part.lower() != part_lower:
|
|
105
|
+
if part.lower() != part_lower and i.lower() not in RESERVED_WORDS:
|
|
102
106
|
part = i
|
|
103
107
|
|
|
104
108
|
parts2.append(part)
|
|
@@ -506,6 +510,8 @@ class SqlalchemyRender:
|
|
|
506
510
|
condition = self.to_expression(item['condition'])
|
|
507
511
|
|
|
508
512
|
join_type = item['join_type']
|
|
513
|
+
if 'ASOF' in join_type:
|
|
514
|
+
raise NotImplementedError(f'Unsupported join type: {join_type}')
|
|
509
515
|
method = 'join'
|
|
510
516
|
is_full = False
|
|
511
517
|
if join_type == 'LEFT JOIN':
|
|
File without changes
|
|
File without changes
|
|
File without changes
|