MindsDB 25.1.2.1__py3-none-any.whl → 25.1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/METADATA +246 -255
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/RECORD +94 -83
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +5 -3
- mindsdb/api/executor/__init__.py +0 -1
- mindsdb/api/executor/command_executor.py +2 -1
- mindsdb/api/executor/data_types/answer.py +1 -1
- mindsdb/api/executor/datahub/datanodes/datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +8 -3
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +9 -26
- mindsdb/api/executor/sql_query/__init__.py +1 -0
- mindsdb/api/executor/sql_query/result_set.py +36 -21
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
- mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
- mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
- mindsdb/api/executor/utilities/sql.py +2 -10
- mindsdb/api/http/namespaces/agents.py +3 -1
- mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
- mindsdb/api/http/namespaces/sql.py +3 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
- mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/databricks_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
- mindsdb/integrations/handlers/file_handler/requirements.txt +0 -4
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
- mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
- mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
- mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +8 -0
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +50 -16
- mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
- mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
- mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +5 -3
- mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
- mindsdb/integrations/handlers/web_handler/requirements.txt +0 -1
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +2 -2
- mindsdb/integrations/utilities/files/__init__.py +0 -0
- mindsdb/integrations/utilities/files/file_reader.py +258 -0
- mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +2 -1
- mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/ms_graph_api_auth_utilities.py +8 -3
- mindsdb/integrations/utilities/rag/chains/map_reduce_summarizer_chain.py +5 -9
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
- mindsdb/integrations/utilities/rag/pipelines/rag.py +74 -21
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +108 -78
- mindsdb/integrations/utilities/rag/settings.py +37 -16
- mindsdb/integrations/utilities/sql_utils.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +18 -8
- mindsdb/interfaces/agents/constants.py +1 -0
- mindsdb/interfaces/agents/langchain_agent.py +124 -157
- mindsdb/interfaces/agents/langfuse_callback_handler.py +4 -37
- mindsdb/interfaces/agents/mindsdb_database_agent.py +21 -13
- mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
- mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
- mindsdb/interfaces/chatbot/memory.py +58 -13
- mindsdb/interfaces/database/integrations.py +5 -1
- mindsdb/interfaces/database/projects.py +55 -16
- mindsdb/interfaces/database/views.py +12 -25
- mindsdb/interfaces/knowledge_base/controller.py +39 -15
- mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +7 -26
- mindsdb/interfaces/model/functions.py +15 -4
- mindsdb/interfaces/model/model_controller.py +4 -7
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +51 -40
- mindsdb/interfaces/skills/retrieval_tool.py +10 -3
- mindsdb/interfaces/skills/skill_tool.py +97 -54
- mindsdb/interfaces/skills/skills_controller.py +7 -3
- mindsdb/interfaces/skills/sql_agent.py +127 -41
- mindsdb/interfaces/storage/db.py +1 -1
- mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
- mindsdb/utilities/cache.py +7 -4
- mindsdb/utilities/context.py +11 -1
- mindsdb/utilities/langfuse.py +279 -0
- mindsdb/utilities/log.py +20 -2
- mindsdb/utilities/otel/__init__.py +206 -0
- mindsdb/utilities/otel/logger.py +25 -0
- mindsdb/utilities/otel/meter.py +19 -0
- mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
- mindsdb/utilities/otel/tracer.py +16 -0
- mindsdb/utilities/partitioning.py +52 -0
- mindsdb/utilities/render/sqlalchemy_render.py +7 -1
- mindsdb/utilities/utils.py +34 -0
- mindsdb/utilities/otel.py +0 -72
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/WHEEL +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,20 @@
|
|
|
1
1
|
import json
|
|
2
|
-
|
|
2
|
+
import re
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
from typing import Any, List, Optional
|
|
3
5
|
|
|
4
6
|
from langchain.chains.llm import LLMChain
|
|
5
7
|
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
|
|
6
8
|
from langchain_core.documents.base import Document
|
|
7
9
|
from langchain_core.embeddings import Embeddings
|
|
10
|
+
from langchain_core.exceptions import OutputParserException
|
|
8
11
|
from langchain_core.language_models.chat_models import BaseChatModel
|
|
12
|
+
from langchain_core.output_parsers import PydanticOutputParser
|
|
9
13
|
from langchain_core.prompts import PromptTemplate
|
|
10
14
|
from langchain_core.retrievers import BaseRetriever
|
|
11
15
|
|
|
12
16
|
from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
|
|
17
|
+
from mindsdb.integrations.libs.response import HandlerResponse
|
|
13
18
|
from mindsdb.integrations.libs.vectordatabase_handler import DistanceFunction, VectorStoreHandler
|
|
14
19
|
from mindsdb.integrations.utilities.rag.settings import LLMExample, MetadataSchema, SearchKwargs
|
|
15
20
|
from mindsdb.utilities import log
|
|
@@ -17,6 +22,18 @@ from mindsdb.utilities import log
|
|
|
17
22
|
logger = log.getLogger(__name__)
|
|
18
23
|
|
|
19
24
|
|
|
25
|
+
class MetadataFilter(BaseModel):
|
|
26
|
+
'''Represents an LLM generated metadata filter to apply to a PostgreSQL query.'''
|
|
27
|
+
attribute: str = Field(description="Database column to apply filter to")
|
|
28
|
+
comparator: str = Field(description="PostgreSQL comparator to use to filter database column")
|
|
29
|
+
value: Any = Field(description="Value to use to filter database column")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class MetadataFilters(BaseModel):
|
|
33
|
+
'''List of LLM generated metadata filters to apply to a PostgreSQL query.'''
|
|
34
|
+
filters: List[MetadataFilter] = Field(description="List of PostgreSQL metadata filters to apply for user query")
|
|
35
|
+
|
|
36
|
+
|
|
20
37
|
class SQLRetriever(BaseRetriever):
|
|
21
38
|
'''Retriever that uses a LLM to generate pgvector queries to do similarity search with metadata filters.
|
|
22
39
|
|
|
@@ -25,10 +42,10 @@ class SQLRetriever(BaseRetriever):
|
|
|
25
42
|
1. Use a LLM to rewrite the user input to something more suitable for retrieval. For example:
|
|
26
43
|
"Show me documents containing how to finetune a LLM please" --> "how to finetune a LLM"
|
|
27
44
|
|
|
28
|
-
2. Use a LLM to generate
|
|
29
|
-
metadata schemas & examples are used as additional context
|
|
45
|
+
2. Use a LLM to generate structured metadata filters based on the user input. Provided
|
|
46
|
+
metadata schemas & examples are used as additional context.
|
|
30
47
|
|
|
31
|
-
3.
|
|
48
|
+
3. Generate a prepared PostgreSQL query from the structured metadata filters.
|
|
32
49
|
|
|
33
50
|
4. Actually execute the query against our vector database to retrieve documents & return them.
|
|
34
51
|
'''
|
|
@@ -37,23 +54,22 @@ class SQLRetriever(BaseRetriever):
|
|
|
37
54
|
metadata_schemas: Optional[List[MetadataSchema]] = None
|
|
38
55
|
examples: Optional[List[LLMExample]] = None
|
|
39
56
|
|
|
40
|
-
embeddings_model: Embeddings
|
|
41
57
|
rewrite_prompt_template: str
|
|
42
|
-
|
|
58
|
+
metadata_filters_prompt_template: str
|
|
59
|
+
embeddings_model: Embeddings
|
|
43
60
|
num_retries: int
|
|
44
|
-
sql_prompt_template: str
|
|
45
|
-
query_checker_template: str
|
|
46
61
|
embeddings_table: str
|
|
47
62
|
source_table: str
|
|
63
|
+
source_id_column: str = 'Id'
|
|
48
64
|
distance_function: DistanceFunction
|
|
49
65
|
search_kwargs: SearchKwargs
|
|
50
66
|
|
|
51
67
|
llm: BaseChatModel
|
|
52
68
|
|
|
53
|
-
def
|
|
69
|
+
def _prepare_metadata_prompt(self) -> PromptTemplate:
|
|
54
70
|
base_prompt_template = PromptTemplate(
|
|
55
|
-
input_variables=['
|
|
56
|
-
template=self.
|
|
71
|
+
input_variables=['format_instructions', 'schema', 'examples', 'input', 'embeddings'],
|
|
72
|
+
template=self.metadata_filters_prompt_template
|
|
57
73
|
)
|
|
58
74
|
schema_prompt_str = ''
|
|
59
75
|
if self.metadata_schemas is not None:
|
|
@@ -67,7 +83,7 @@ class SQLRetriever(BaseRetriever):
|
|
|
67
83
|
if column.values is not None:
|
|
68
84
|
column_mapping[column.name]['values'] = column.values
|
|
69
85
|
column_mapping_json_str = json.dumps(column_mapping, indent=4)
|
|
70
|
-
schema_str = f'''{i+
|
|
86
|
+
schema_str = f'''{i+1}. {schema.table} - {schema.description}
|
|
71
87
|
|
|
72
88
|
Columns:
|
|
73
89
|
```json
|
|
@@ -86,7 +102,7 @@ Output:
|
|
|
86
102
|
{example.output}
|
|
87
103
|
|
|
88
104
|
'''
|
|
89
|
-
|
|
105
|
+
examples_prompt_str += example_str
|
|
90
106
|
return base_prompt_template.partial(
|
|
91
107
|
schema=schema_prompt_str,
|
|
92
108
|
examples=examples_prompt_str
|
|
@@ -100,83 +116,93 @@ Output:
|
|
|
100
116
|
rewrite_chain = LLMChain(llm=self.llm, prompt=rewrite_prompt)
|
|
101
117
|
return rewrite_chain.predict(input=query)
|
|
102
118
|
|
|
103
|
-
def _prepare_pgvector_query(self,
|
|
104
|
-
#
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
query=query,
|
|
141
|
-
dialect='postgres',
|
|
142
|
-
error=error,
|
|
143
|
-
embeddings_table=self.embeddings_table,
|
|
144
|
-
schema=schema,
|
|
145
|
-
callbacks=run_manager.get_child() if run_manager else None
|
|
119
|
+
def _prepare_pgvector_query(self, metadata_filters: List[MetadataFilter]) -> str:
|
|
120
|
+
# Base select JOINed with document source table.
|
|
121
|
+
base_query = f'''SELECT * FROM {self.embeddings_table} AS e INNER JOIN {self.source_table} AS s ON (e.metadata->>'original_row_id')::int = s."{self.source_id_column}" '''
|
|
122
|
+
col_to_schema = {}
|
|
123
|
+
if not self.metadata_schemas:
|
|
124
|
+
return ''
|
|
125
|
+
for schema in self.metadata_schemas:
|
|
126
|
+
for col in schema.columns:
|
|
127
|
+
col_to_schema[col.name] = schema
|
|
128
|
+
joined_schemas = set()
|
|
129
|
+
for filter in metadata_filters:
|
|
130
|
+
# Join schemas before filtering.
|
|
131
|
+
schema = col_to_schema.get(filter.attribute)
|
|
132
|
+
if schema is None or schema.table in joined_schemas or schema.table == self.source_table:
|
|
133
|
+
continue
|
|
134
|
+
joined_schemas.add(schema.table)
|
|
135
|
+
base_query += schema.join + ' '
|
|
136
|
+
# Actually construct WHERE conditions from metadata filters.
|
|
137
|
+
if metadata_filters:
|
|
138
|
+
base_query += 'WHERE '
|
|
139
|
+
for i, filter in enumerate(metadata_filters):
|
|
140
|
+
value = filter.value
|
|
141
|
+
if isinstance(value, str):
|
|
142
|
+
value = f"'{value}'"
|
|
143
|
+
base_query += f'"{filter.attribute}" {filter.comparator} {value}'
|
|
144
|
+
if i < len(metadata_filters) - 1:
|
|
145
|
+
base_query += ' AND '
|
|
146
|
+
base_query += f" ORDER BY e.embeddings {self.distance_function.value[0]} '{{embeddings}}' LIMIT {self.search_kwargs.k};"
|
|
147
|
+
return base_query
|
|
148
|
+
|
|
149
|
+
def _generate_metadata_filters(self, query: str) -> List[MetadataFilter]:
|
|
150
|
+
parser = PydanticOutputParser(pydantic_object=MetadataFilters)
|
|
151
|
+
metadata_prompt = self._prepare_metadata_prompt()
|
|
152
|
+
metadata_filters_chain = LLMChain(llm=self.llm, prompt=metadata_prompt)
|
|
153
|
+
metadata_filters_output = metadata_filters_chain.predict(
|
|
154
|
+
format_instructions=parser.get_format_instructions(),
|
|
155
|
+
input=query
|
|
146
156
|
)
|
|
157
|
+
# If the LLM outputs raw JSON, use it as-is.
|
|
158
|
+
# If the LLM outputs anything including a json markdown section, use the last one.
|
|
159
|
+
json_markdown_output = re.findall(r'```json.*```', metadata_filters_output, re.DOTALL)
|
|
160
|
+
if json_markdown_output:
|
|
161
|
+
metadata_filters_output = json_markdown_output[-1]
|
|
162
|
+
# Clean the json tags.
|
|
163
|
+
metadata_filters_output = metadata_filters_output[7:]
|
|
164
|
+
metadata_filters_output = metadata_filters_output[:-3]
|
|
165
|
+
metadata_filters = parser.invoke(metadata_filters_output)
|
|
166
|
+
return metadata_filters.filters
|
|
167
|
+
|
|
168
|
+
def _prepare_and_execute_query(self, query: str, embeddings_str: str) -> HandlerResponse:
|
|
169
|
+
try:
|
|
170
|
+
metadata_filters = self._generate_metadata_filters(query)
|
|
171
|
+
checked_sql_query = self._prepare_pgvector_query(metadata_filters)
|
|
172
|
+
checked_sql_query_with_embeddings = checked_sql_query.format(embeddings=embeddings_str)
|
|
173
|
+
return self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
|
|
174
|
+
except OutputParserException as e:
|
|
175
|
+
logger.warning(f'LLM failed to generate structured metadata filters: {str(e)}')
|
|
176
|
+
return HandlerResponse(RESPONSE_TYPE.ERROR, error_message=str(e))
|
|
177
|
+
except Exception as e:
|
|
178
|
+
logger.warning(f'Failed to prepare and execute SQL query from structured metadata: {str(e)}')
|
|
179
|
+
return HandlerResponse(RESPONSE_TYPE.ERROR, error_message=str(e))
|
|
147
180
|
|
|
148
181
|
def _get_relevant_documents(
|
|
149
182
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
|
150
183
|
) -> List[Document]:
|
|
151
184
|
# Rewrite query to be suitable for retrieval.
|
|
152
185
|
retrieval_query = self._prepare_retrieval_query(query)
|
|
153
|
-
|
|
154
|
-
# Generate & check the query to be executed
|
|
155
|
-
checked_sql_query = self._prepare_pgvector_query(query, run_manager)
|
|
156
|
-
|
|
157
186
|
# Embed the rewritten retrieval query & include it in the similarity search pgvector query.
|
|
158
187
|
embedded_query = self.embeddings_model.embed_query(retrieval_query)
|
|
159
|
-
checked_sql_query_with_embeddings = checked_sql_query.format(embeddings=str(embedded_query))
|
|
160
|
-
# Handle LLM output that has the ```sql delimiter possibly.
|
|
161
|
-
checked_sql_query_with_embeddings = checked_sql_query_with_embeddings.replace('```sql', '')
|
|
162
|
-
checked_sql_query_with_embeddings = checked_sql_query_with_embeddings.replace('```', '')
|
|
163
188
|
# Actually execute the similarity search with metadata filters.
|
|
164
|
-
document_response = self.
|
|
189
|
+
document_response = self._prepare_and_execute_query(retrieval_query, str(embedded_query))
|
|
165
190
|
num_retries = 0
|
|
166
|
-
while
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
191
|
+
while num_retries < self.num_retries:
|
|
192
|
+
if document_response.resp_type != RESPONSE_TYPE.ERROR and len(document_response.data_frame) > 0:
|
|
193
|
+
# Successfully retrieved documents.
|
|
194
|
+
break
|
|
195
|
+
if document_response.resp_type == RESPONSE_TYPE.ERROR:
|
|
196
|
+
# LLMs won't always generate structured metadata so we should have a fallback after retrying.
|
|
197
|
+
logger.info(f'SQL Retriever query failed with error {document_response.error_message}')
|
|
198
|
+
elif len(document_response.data_frame) == 0:
|
|
199
|
+
logger.info('No documents retrieved from SQL Retriever query')
|
|
200
|
+
|
|
201
|
+
document_response = self._prepare_and_execute_query(retrieval_query, str(embedded_query))
|
|
202
|
+
num_retries += 1
|
|
170
203
|
if num_retries >= self.num_retries:
|
|
171
204
|
logger.info('Using fallback retriever in SQL retriever.')
|
|
172
|
-
return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager)
|
|
173
|
-
query_to_retry = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
|
|
174
|
-
query_to_retry_with_embeddings = query_to_retry.format(embeddings=str(embedded_query))
|
|
175
|
-
# Handle LLM output that has the ```sql delimiter possibly.
|
|
176
|
-
query_to_retry_with_embeddings = query_to_retry_with_embeddings.replace('```sql', '')
|
|
177
|
-
query_to_retry_with_embeddings = query_to_retry_with_embeddings.replace('```', '')
|
|
178
|
-
document_response = self.vector_store_handler.native_query(query_to_retry_with_embeddings)
|
|
179
|
-
num_retries += 1
|
|
205
|
+
return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
|
|
180
206
|
|
|
181
207
|
document_df = document_response.data_frame
|
|
182
208
|
retrieved_documents = []
|
|
@@ -185,4 +211,8 @@ Output:
|
|
|
185
211
|
document_row.get('content', ''),
|
|
186
212
|
metadata=document_row.get('metadata', {})
|
|
187
213
|
))
|
|
188
|
-
|
|
214
|
+
if retrieved_documents:
|
|
215
|
+
return retrieved_documents
|
|
216
|
+
# If the SQL query constructed did not return any documents, fallback.
|
|
217
|
+
logger.info('No documents returned from SQL retriever. using fallback retriever.')
|
|
218
|
+
return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
|
|
@@ -3,7 +3,6 @@ from typing import List, Union, Any, Optional, Dict
|
|
|
3
3
|
|
|
4
4
|
from langchain_community.vectorstores.chroma import Chroma
|
|
5
5
|
from langchain_community.vectorstores.pgvector import PGVector
|
|
6
|
-
from langchain_community.tools.sql_database.prompt import QUERY_CHECKER as DEFAULT_QUERY_CHECKER_PROMPT_TEMPLATE
|
|
7
6
|
from langchain_core.documents import Document
|
|
8
7
|
from langchain_core.embeddings import Embeddings
|
|
9
8
|
from langchain_core.language_models import BaseChatModel
|
|
@@ -94,6 +93,25 @@ Output only a single better search query and nothing else like in the example.
|
|
|
94
93
|
Here is the user input: {input}
|
|
95
94
|
'''
|
|
96
95
|
|
|
96
|
+
DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE = '''Construct a list of PostgreSQL metadata filters to filter documents in the database based on the user input.
|
|
97
|
+
|
|
98
|
+
<< INSTRUCTIONS >>
|
|
99
|
+
{format_instructions}
|
|
100
|
+
|
|
101
|
+
RETURN ONLY THE FINAL JSON. DO NOT EXPLAIN, JUST RETURN THE FINAL JSON.
|
|
102
|
+
|
|
103
|
+
<< TABLES YOU HAVE ACCESS TO >>
|
|
104
|
+
|
|
105
|
+
{schema}
|
|
106
|
+
|
|
107
|
+
<< EXAMPLES >>
|
|
108
|
+
|
|
109
|
+
{examples}
|
|
110
|
+
|
|
111
|
+
Here is the user input:
|
|
112
|
+
{input}
|
|
113
|
+
'''
|
|
114
|
+
|
|
97
115
|
DEFAULT_SQL_PROMPT_TEMPLATE = '''
|
|
98
116
|
Construct a valid {dialect} SQL query to select documents relevant to the user input.
|
|
99
117
|
Source documents are found in the {source_table} table. You may need to join with other tables to get additional document metadata.
|
|
@@ -136,7 +154,6 @@ Columns:
|
|
|
136
154
|
"description": "Metadata for the document chunk. Always select metadata and always join with the {source_table} table on the string metadata field 'original_row_id'"
|
|
137
155
|
}}
|
|
138
156
|
}}
|
|
139
|
-
```
|
|
140
157
|
|
|
141
158
|
{schema}
|
|
142
159
|
|
|
@@ -290,6 +307,8 @@ class VectorStoreConfig(BaseModel):
|
|
|
290
307
|
collection_name: str = DEFAULT_COLLECTION_NAME
|
|
291
308
|
connection_string: str = None
|
|
292
309
|
kb_table: Any = None
|
|
310
|
+
is_sparse: bool = False
|
|
311
|
+
vector_size: Optional[int] = None
|
|
293
312
|
|
|
294
313
|
class Config:
|
|
295
314
|
arbitrary_types_allowed = True
|
|
@@ -376,6 +395,13 @@ class MetadataSchema(BaseModel):
|
|
|
376
395
|
columns: List[ColumnSchema] = Field(
|
|
377
396
|
description="List of column schemas describing the metadata columns available for the table"
|
|
378
397
|
)
|
|
398
|
+
join: str = Field(
|
|
399
|
+
description="SQL join string to join this table with source documents table",
|
|
400
|
+
default=''
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
class Config:
|
|
404
|
+
frozen = True
|
|
379
405
|
|
|
380
406
|
|
|
381
407
|
class LLMExample(BaseModel):
|
|
@@ -392,19 +418,9 @@ class SQLRetrieverConfig(BaseModel):
|
|
|
392
418
|
default_factory=LLMConfig,
|
|
393
419
|
description="LLM configuration to use for generating the final SQL query for retrieval"
|
|
394
420
|
)
|
|
395
|
-
|
|
396
|
-
default=
|
|
397
|
-
description="
|
|
398
|
-
Has 'dialect', 'input', 'embeddings_table', 'source_table', 'embeddings', 'distance_function', 'schema', and 'examples' input variables.
|
|
399
|
-
"""
|
|
400
|
-
)
|
|
401
|
-
query_checker_template: str = Field(
|
|
402
|
-
default=DEFAULT_QUERY_CHECKER_PROMPT_TEMPLATE,
|
|
403
|
-
description="Prompt template to use for double checking SQL queries before execution. Has 'query' and 'dialect' input variables."
|
|
404
|
-
)
|
|
405
|
-
query_retry_template: str = Field(
|
|
406
|
-
default=DEFAULT_QUERY_RETRY_PROMPT_TEMPLATE,
|
|
407
|
-
description="Prompt template to rewrite SQL query that failed. Has 'dialect', 'query', and 'error' input variables."
|
|
421
|
+
metadata_filters_prompt_template: str = Field(
|
|
422
|
+
default=DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE,
|
|
423
|
+
description="Prompt template to generate PostgreSQL metadata filters. Has 'format_instructions', 'schema', 'examples', and 'input' input variables"
|
|
408
424
|
)
|
|
409
425
|
num_retries: int = Field(
|
|
410
426
|
default=DEFAULT_NUM_QUERY_RETRIES,
|
|
@@ -449,8 +465,13 @@ class SummarizationConfig(BaseModel):
|
|
|
449
465
|
class RerankerConfig(BaseModel):
|
|
450
466
|
model: str = DEFAULT_RERANKING_MODEL
|
|
451
467
|
base_url: str = DEFAULT_LLM_ENDPOINT
|
|
452
|
-
filtering_threshold: float = 0.
|
|
468
|
+
filtering_threshold: float = 0.5
|
|
453
469
|
num_docs_to_keep: Optional[int] = None
|
|
470
|
+
max_concurrent_requests: int = 20
|
|
471
|
+
max_retries: int = 3
|
|
472
|
+
retry_delay: float = 1.0
|
|
473
|
+
early_stop: bool = True # Whether to enable early stopping
|
|
474
|
+
early_stop_threshold: float = 0.8 # Confidence threshold for early stopping
|
|
454
475
|
|
|
455
476
|
|
|
456
477
|
class MultiHopRetrieverConfig(BaseModel):
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import datetime
|
|
2
|
-
from typing import Dict, Iterator, List, Union, Tuple
|
|
2
|
+
from typing import Dict, Iterator, List, Union, Tuple, Optional
|
|
3
3
|
|
|
4
4
|
from langchain_core.tools import BaseTool
|
|
5
5
|
from sqlalchemy.orm.attributes import flag_modified
|
|
@@ -70,7 +70,7 @@ class AgentsController:
|
|
|
70
70
|
|
|
71
71
|
return model, provider
|
|
72
72
|
|
|
73
|
-
def get_agent(self, agent_name: str, project_name: str = 'mindsdb') -> db.Agents:
|
|
73
|
+
def get_agent(self, agent_name: str, project_name: str = 'mindsdb') -> Optional[db.Agents]:
|
|
74
74
|
'''
|
|
75
75
|
Gets an agent by name.
|
|
76
76
|
|
|
@@ -79,7 +79,7 @@ class AgentsController:
|
|
|
79
79
|
project_name (str): The name of the containing project - must exist
|
|
80
80
|
|
|
81
81
|
Returns:
|
|
82
|
-
agent (db.Agents): The database agent object
|
|
82
|
+
agent (Optional[db.Agents]): The database agent object
|
|
83
83
|
'''
|
|
84
84
|
|
|
85
85
|
project = self.project_controller.get(name=project_name)
|
|
@@ -252,6 +252,16 @@ class AgentsController:
|
|
|
252
252
|
existing_agent = self.get_agent(agent_name, project_name=project_name)
|
|
253
253
|
if existing_agent is None:
|
|
254
254
|
raise EntityNotExistsError(f'Agent with name not found: {agent_name}')
|
|
255
|
+
is_demo = (existing_agent.params or {}).get('is_demo', False)
|
|
256
|
+
if (
|
|
257
|
+
is_demo and (
|
|
258
|
+
(name is not None and name != agent_name)
|
|
259
|
+
or (model_name or provider)
|
|
260
|
+
or (len(skills_to_add) > 0 or len(skills_to_remove) > 0 or len(skills_to_rewrite) > 0)
|
|
261
|
+
or (isinstance(params, dict) and len(params) > 1 and 'prompt_template' not in params)
|
|
262
|
+
)
|
|
263
|
+
):
|
|
264
|
+
raise ValueError("It is forbidden to change properties of the demo object")
|
|
255
265
|
|
|
256
266
|
if name is not None and name != agent_name:
|
|
257
267
|
# Check to see if updated name already exists
|
|
@@ -352,6 +362,8 @@ class AgentsController:
|
|
|
352
362
|
agent = self.get_agent(agent_name, project_name)
|
|
353
363
|
if agent is None:
|
|
354
364
|
raise ValueError(f'Agent with name does not exist: {agent_name}')
|
|
365
|
+
if isinstance(agent.params, dict) and agent.params.get('is_demo') is True:
|
|
366
|
+
raise ValueError('Unable to delete demo object')
|
|
355
367
|
agent.deleted_at = datetime.datetime.now()
|
|
356
368
|
db.session.commit()
|
|
357
369
|
|
|
@@ -362,24 +374,22 @@ class AgentsController:
|
|
|
362
374
|
project_name: str = 'mindsdb',
|
|
363
375
|
tools: List[BaseTool] = None,
|
|
364
376
|
stream: bool = False) -> Union[Iterator[object], pd.DataFrame]:
|
|
365
|
-
|
|
377
|
+
"""
|
|
366
378
|
Queries an agent to get a completion.
|
|
367
379
|
|
|
368
380
|
Parameters:
|
|
369
381
|
agent (db.Agents): Existing agent to get completion from
|
|
370
382
|
messages (List[Dict[str, str]]): Chat history to send to the agent
|
|
371
|
-
trace_id (str): ID of Langfuse trace to use
|
|
372
|
-
observation_id (str): ID of parent Langfuse observation to use
|
|
373
383
|
project_name (str): Project the agent belongs to (default mindsdb)
|
|
374
384
|
tools (List[BaseTool]): Tools to use while getting the completion
|
|
375
|
-
stream (bool): Whether
|
|
385
|
+
stream (bool): Whether to stream the response
|
|
376
386
|
|
|
377
387
|
Returns:
|
|
378
388
|
response (Union[Iterator[object], pd.DataFrame]): Completion as a DataFrame or iterator of completion chunks
|
|
379
389
|
|
|
380
390
|
Raises:
|
|
381
391
|
ValueError: Agent's model does not exist.
|
|
382
|
-
|
|
392
|
+
"""
|
|
383
393
|
if stream:
|
|
384
394
|
return self._get_completion_stream(
|
|
385
395
|
agent,
|
|
@@ -165,6 +165,7 @@ PROVIDER_TO_MODELS = MappingProxyType(
|
|
|
165
165
|
|
|
166
166
|
ASSISTANT_COLUMN = "answer"
|
|
167
167
|
CONTEXT_COLUMN = "context"
|
|
168
|
+
TRACE_ID_COLUMN = "trace_id"
|
|
168
169
|
DEFAULT_AGENT_TIMEOUT_SECONDS = 300
|
|
169
170
|
# These should require no additional arguments.
|
|
170
171
|
DEFAULT_AGENT_TOOLS = []
|