MindsDB 25.1.2.1__py3-none-any.whl → 25.1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (95) hide show
  1. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/METADATA +246 -255
  2. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/RECORD +94 -83
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/__main__.py +5 -3
  5. mindsdb/api/executor/__init__.py +0 -1
  6. mindsdb/api/executor/command_executor.py +2 -1
  7. mindsdb/api/executor/data_types/answer.py +1 -1
  8. mindsdb/api/executor/datahub/datanodes/datanode.py +1 -1
  9. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
  10. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +8 -3
  11. mindsdb/api/executor/datahub/datanodes/project_datanode.py +9 -26
  12. mindsdb/api/executor/sql_query/__init__.py +1 -0
  13. mindsdb/api/executor/sql_query/result_set.py +36 -21
  14. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
  15. mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
  16. mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
  17. mindsdb/api/executor/utilities/sql.py +2 -10
  18. mindsdb/api/http/namespaces/agents.py +3 -1
  19. mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
  20. mindsdb/api/http/namespaces/sql.py +3 -1
  21. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
  22. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
  23. mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
  24. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
  25. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
  26. mindsdb/integrations/handlers/databricks_handler/requirements.txt +1 -1
  27. mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
  28. mindsdb/integrations/handlers/file_handler/requirements.txt +0 -4
  29. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
  30. mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
  31. mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
  32. mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
  33. mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
  34. mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
  35. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
  36. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +8 -0
  37. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +50 -16
  38. mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
  39. mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
  40. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
  41. mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +5 -3
  42. mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
  43. mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
  44. mindsdb/integrations/handlers/web_handler/requirements.txt +0 -1
  45. mindsdb/integrations/libs/ml_handler_process/learn_process.py +2 -2
  46. mindsdb/integrations/utilities/files/__init__.py +0 -0
  47. mindsdb/integrations/utilities/files/file_reader.py +258 -0
  48. mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +2 -1
  49. mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/ms_graph_api_auth_utilities.py +8 -3
  50. mindsdb/integrations/utilities/rag/chains/map_reduce_summarizer_chain.py +5 -9
  51. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
  52. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
  53. mindsdb/integrations/utilities/rag/pipelines/rag.py +74 -21
  54. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
  55. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +108 -78
  56. mindsdb/integrations/utilities/rag/settings.py +37 -16
  57. mindsdb/integrations/utilities/sql_utils.py +1 -1
  58. mindsdb/interfaces/agents/agents_controller.py +18 -8
  59. mindsdb/interfaces/agents/constants.py +1 -0
  60. mindsdb/interfaces/agents/langchain_agent.py +124 -157
  61. mindsdb/interfaces/agents/langfuse_callback_handler.py +4 -37
  62. mindsdb/interfaces/agents/mindsdb_database_agent.py +21 -13
  63. mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
  64. mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
  65. mindsdb/interfaces/chatbot/memory.py +58 -13
  66. mindsdb/interfaces/database/integrations.py +5 -1
  67. mindsdb/interfaces/database/projects.py +55 -16
  68. mindsdb/interfaces/database/views.py +12 -25
  69. mindsdb/interfaces/knowledge_base/controller.py +39 -15
  70. mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +7 -26
  71. mindsdb/interfaces/model/functions.py +15 -4
  72. mindsdb/interfaces/model/model_controller.py +4 -7
  73. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +51 -40
  74. mindsdb/interfaces/skills/retrieval_tool.py +10 -3
  75. mindsdb/interfaces/skills/skill_tool.py +97 -54
  76. mindsdb/interfaces/skills/skills_controller.py +7 -3
  77. mindsdb/interfaces/skills/sql_agent.py +127 -41
  78. mindsdb/interfaces/storage/db.py +1 -1
  79. mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
  80. mindsdb/utilities/cache.py +7 -4
  81. mindsdb/utilities/context.py +11 -1
  82. mindsdb/utilities/langfuse.py +279 -0
  83. mindsdb/utilities/log.py +20 -2
  84. mindsdb/utilities/otel/__init__.py +206 -0
  85. mindsdb/utilities/otel/logger.py +25 -0
  86. mindsdb/utilities/otel/meter.py +19 -0
  87. mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
  88. mindsdb/utilities/otel/tracer.py +16 -0
  89. mindsdb/utilities/partitioning.py +52 -0
  90. mindsdb/utilities/render/sqlalchemy_render.py +7 -1
  91. mindsdb/utilities/utils.py +34 -0
  92. mindsdb/utilities/otel.py +0 -72
  93. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/LICENSE +0 -0
  94. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/WHEEL +0 -0
  95. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,20 @@
1
1
  import json
2
- from typing import List, Optional
2
+ import re
3
+ from pydantic import BaseModel, Field
4
+ from typing import Any, List, Optional
3
5
 
4
6
  from langchain.chains.llm import LLMChain
5
7
  from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
6
8
  from langchain_core.documents.base import Document
7
9
  from langchain_core.embeddings import Embeddings
10
+ from langchain_core.exceptions import OutputParserException
8
11
  from langchain_core.language_models.chat_models import BaseChatModel
12
+ from langchain_core.output_parsers import PydanticOutputParser
9
13
  from langchain_core.prompts import PromptTemplate
10
14
  from langchain_core.retrievers import BaseRetriever
11
15
 
12
16
  from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
17
+ from mindsdb.integrations.libs.response import HandlerResponse
13
18
  from mindsdb.integrations.libs.vectordatabase_handler import DistanceFunction, VectorStoreHandler
14
19
  from mindsdb.integrations.utilities.rag.settings import LLMExample, MetadataSchema, SearchKwargs
15
20
  from mindsdb.utilities import log
@@ -17,6 +22,18 @@ from mindsdb.utilities import log
17
22
  logger = log.getLogger(__name__)
18
23
 
19
24
 
25
+ class MetadataFilter(BaseModel):
26
+ '''Represents an LLM generated metadata filter to apply to a PostgreSQL query.'''
27
+ attribute: str = Field(description="Database column to apply filter to")
28
+ comparator: str = Field(description="PostgreSQL comparator to use to filter database column")
29
+ value: Any = Field(description="Value to use to filter database column")
30
+
31
+
32
+ class MetadataFilters(BaseModel):
33
+ '''List of LLM generated metadata filters to apply to a PostgreSQL query.'''
34
+ filters: List[MetadataFilter] = Field(description="List of PostgreSQL metadata filters to apply for user query")
35
+
36
+
20
37
  class SQLRetriever(BaseRetriever):
21
38
  '''Retriever that uses a LLM to generate pgvector queries to do similarity search with metadata filters.
22
39
 
@@ -25,10 +42,10 @@ class SQLRetriever(BaseRetriever):
25
42
  1. Use a LLM to rewrite the user input to something more suitable for retrieval. For example:
26
43
  "Show me documents containing how to finetune a LLM please" --> "how to finetune a LLM"
27
44
 
28
- 2. Use a LLM to generate a pgvector query with metadata filters based on the user input. Provided
29
- metadata schemas & examples are used as additional context to generate the query.
45
+ 2. Use a LLM to generate structured metadata filters based on the user input. Provided
46
+ metadata schemas & examples are used as additional context.
30
47
 
31
- 3. Use a LLM to double check the generated pgvector query is correct.
48
+ 3. Generate a prepared PostgreSQL query from the structured metadata filters.
32
49
 
33
50
  4. Actually execute the query against our vector database to retrieve documents & return them.
34
51
  '''
@@ -37,23 +54,22 @@ class SQLRetriever(BaseRetriever):
37
54
  metadata_schemas: Optional[List[MetadataSchema]] = None
38
55
  examples: Optional[List[LLMExample]] = None
39
56
 
40
- embeddings_model: Embeddings
41
57
  rewrite_prompt_template: str
42
- retry_prompt_template: str
58
+ metadata_filters_prompt_template: str
59
+ embeddings_model: Embeddings
43
60
  num_retries: int
44
- sql_prompt_template: str
45
- query_checker_template: str
46
61
  embeddings_table: str
47
62
  source_table: str
63
+ source_id_column: str = 'Id'
48
64
  distance_function: DistanceFunction
49
65
  search_kwargs: SearchKwargs
50
66
 
51
67
  llm: BaseChatModel
52
68
 
53
- def _prepare_sql_prompt(self) -> PromptTemplate:
69
+ def _prepare_metadata_prompt(self) -> PromptTemplate:
54
70
  base_prompt_template = PromptTemplate(
55
- input_variables=['dialect', 'input', 'embeddings_table', 'source_table', 'embeddings', 'distance_function', 'schema', 'examples'],
56
- template=self.sql_prompt_template
71
+ input_variables=['format_instructions', 'schema', 'examples', 'input', 'embeddings'],
72
+ template=self.metadata_filters_prompt_template
57
73
  )
58
74
  schema_prompt_str = ''
59
75
  if self.metadata_schemas is not None:
@@ -67,7 +83,7 @@ class SQLRetriever(BaseRetriever):
67
83
  if column.values is not None:
68
84
  column_mapping[column.name]['values'] = column.values
69
85
  column_mapping_json_str = json.dumps(column_mapping, indent=4)
70
- schema_str = f'''{i+2}. {schema.table} - {schema.description}
86
+ schema_str = f'''{i+1}. {schema.table} - {schema.description}
71
87
 
72
88
  Columns:
73
89
  ```json
@@ -86,7 +102,7 @@ Output:
86
102
  {example.output}
87
103
 
88
104
  '''
89
- examples_prompt_str += example_str
105
+ examples_prompt_str += example_str
90
106
  return base_prompt_template.partial(
91
107
  schema=schema_prompt_str,
92
108
  examples=examples_prompt_str
@@ -100,83 +116,93 @@ Output:
100
116
  rewrite_chain = LLMChain(llm=self.llm, prompt=rewrite_prompt)
101
117
  return rewrite_chain.predict(input=query)
102
118
 
103
- def _prepare_pgvector_query(self, query: str, run_manager: CallbackManagerForRetrieverRun) -> str:
104
- # Incorporate metadata schemas & examples into prompt.
105
- sql_prompt = self._prepare_sql_prompt()
106
- sql_chain = LLMChain(llm=self.llm, prompt=sql_prompt)
107
- # Generate the initial pgvector query.
108
- sql_query = sql_chain.predict(
109
- # Only pgvector & similarity search is supported.
110
- dialect='postgres',
111
- input=query,
112
- embeddings_table=self.embeddings_table,
113
- source_table=self.source_table,
114
- distance_function=self.distance_function.value[0],
115
- k=self.search_kwargs.k,
116
- callbacks=run_manager.get_child() if run_manager else None
117
- )
118
- query_checker_prompt = PromptTemplate(
119
- input_variables=['dialect', 'query'],
120
- template=self.query_checker_template
121
- )
122
- query_checker_chain = LLMChain(llm=self.llm, prompt=query_checker_prompt)
123
- # Check the query & return the final result to be executed.
124
- return query_checker_chain.predict(
125
- dialect='postgres',
126
- query=sql_query
127
- )
128
-
129
- def _prepare_retry_query(self, query: str, error: str, run_manager: CallbackManagerForRetrieverRun) -> str:
130
- sql_prompt = self._prepare_sql_prompt()
131
- # Use provided schema as context for retrying failed queries.
132
- schema = sql_prompt.partial_variables.get('schema', '')
133
- retry_prompt = PromptTemplate(
134
- input_variables=['query', 'dialect', 'error', 'embeddings_table', 'schema'],
135
- template=self.retry_prompt_template
136
- )
137
- retry_chain = LLMChain(llm=self.llm, prompt=retry_prompt)
138
- # Generate rewritten query.
139
- return retry_chain.predict(
140
- query=query,
141
- dialect='postgres',
142
- error=error,
143
- embeddings_table=self.embeddings_table,
144
- schema=schema,
145
- callbacks=run_manager.get_child() if run_manager else None
119
+ def _prepare_pgvector_query(self, metadata_filters: List[MetadataFilter]) -> str:
120
+ # Base select JOINed with document source table.
121
+ base_query = f'''SELECT * FROM {self.embeddings_table} AS e INNER JOIN {self.source_table} AS s ON (e.metadata->>'original_row_id')::int = s."{self.source_id_column}" '''
122
+ col_to_schema = {}
123
+ if not self.metadata_schemas:
124
+ return ''
125
+ for schema in self.metadata_schemas:
126
+ for col in schema.columns:
127
+ col_to_schema[col.name] = schema
128
+ joined_schemas = set()
129
+ for filter in metadata_filters:
130
+ # Join schemas before filtering.
131
+ schema = col_to_schema.get(filter.attribute)
132
+ if schema is None or schema.table in joined_schemas or schema.table == self.source_table:
133
+ continue
134
+ joined_schemas.add(schema.table)
135
+ base_query += schema.join + ' '
136
+ # Actually construct WHERE conditions from metadata filters.
137
+ if metadata_filters:
138
+ base_query += 'WHERE '
139
+ for i, filter in enumerate(metadata_filters):
140
+ value = filter.value
141
+ if isinstance(value, str):
142
+ value = f"'{value}'"
143
+ base_query += f'"{filter.attribute}" {filter.comparator} {value}'
144
+ if i < len(metadata_filters) - 1:
145
+ base_query += ' AND '
146
+ base_query += f" ORDER BY e.embeddings {self.distance_function.value[0]} '{{embeddings}}' LIMIT {self.search_kwargs.k};"
147
+ return base_query
148
+
149
+ def _generate_metadata_filters(self, query: str) -> List[MetadataFilter]:
150
+ parser = PydanticOutputParser(pydantic_object=MetadataFilters)
151
+ metadata_prompt = self._prepare_metadata_prompt()
152
+ metadata_filters_chain = LLMChain(llm=self.llm, prompt=metadata_prompt)
153
+ metadata_filters_output = metadata_filters_chain.predict(
154
+ format_instructions=parser.get_format_instructions(),
155
+ input=query
146
156
  )
157
+ # If the LLM outputs raw JSON, use it as-is.
158
+ # If the LLM outputs anything including a json markdown section, use the last one.
159
+ json_markdown_output = re.findall(r'```json.*```', metadata_filters_output, re.DOTALL)
160
+ if json_markdown_output:
161
+ metadata_filters_output = json_markdown_output[-1]
162
+ # Clean the json tags.
163
+ metadata_filters_output = metadata_filters_output[7:]
164
+ metadata_filters_output = metadata_filters_output[:-3]
165
+ metadata_filters = parser.invoke(metadata_filters_output)
166
+ return metadata_filters.filters
167
+
168
+ def _prepare_and_execute_query(self, query: str, embeddings_str: str) -> HandlerResponse:
169
+ try:
170
+ metadata_filters = self._generate_metadata_filters(query)
171
+ checked_sql_query = self._prepare_pgvector_query(metadata_filters)
172
+ checked_sql_query_with_embeddings = checked_sql_query.format(embeddings=embeddings_str)
173
+ return self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
174
+ except OutputParserException as e:
175
+ logger.warning(f'LLM failed to generate structured metadata filters: {str(e)}')
176
+ return HandlerResponse(RESPONSE_TYPE.ERROR, error_message=str(e))
177
+ except Exception as e:
178
+ logger.warning(f'Failed to prepare and execute SQL query from structured metadata: {str(e)}')
179
+ return HandlerResponse(RESPONSE_TYPE.ERROR, error_message=str(e))
147
180
 
148
181
  def _get_relevant_documents(
149
182
  self, query: str, *, run_manager: CallbackManagerForRetrieverRun
150
183
  ) -> List[Document]:
151
184
  # Rewrite query to be suitable for retrieval.
152
185
  retrieval_query = self._prepare_retrieval_query(query)
153
-
154
- # Generate & check the query to be executed
155
- checked_sql_query = self._prepare_pgvector_query(query, run_manager)
156
-
157
186
  # Embed the rewritten retrieval query & include it in the similarity search pgvector query.
158
187
  embedded_query = self.embeddings_model.embed_query(retrieval_query)
159
- checked_sql_query_with_embeddings = checked_sql_query.format(embeddings=str(embedded_query))
160
- # Handle LLM output that has the ```sql delimiter possibly.
161
- checked_sql_query_with_embeddings = checked_sql_query_with_embeddings.replace('```sql', '')
162
- checked_sql_query_with_embeddings = checked_sql_query_with_embeddings.replace('```', '')
163
188
  # Actually execute the similarity search with metadata filters.
164
- document_response = self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
189
+ document_response = self._prepare_and_execute_query(retrieval_query, str(embedded_query))
165
190
  num_retries = 0
166
- while document_response.resp_type == RESPONSE_TYPE.ERROR:
167
- error_msg = document_response.error_message
168
- # LLMs won't always generate a working SQL query so we should have a fallback after retrying.
169
- logger.info(f'SQL Retriever query {checked_sql_query} failed with error {error_msg}')
191
+ while num_retries < self.num_retries:
192
+ if document_response.resp_type != RESPONSE_TYPE.ERROR and len(document_response.data_frame) > 0:
193
+ # Successfully retrieved documents.
194
+ break
195
+ if document_response.resp_type == RESPONSE_TYPE.ERROR:
196
+ # LLMs won't always generate structured metadata so we should have a fallback after retrying.
197
+ logger.info(f'SQL Retriever query failed with error {document_response.error_message}')
198
+ elif len(document_response.data_frame) == 0:
199
+ logger.info('No documents retrieved from SQL Retriever query')
200
+
201
+ document_response = self._prepare_and_execute_query(retrieval_query, str(embedded_query))
202
+ num_retries += 1
170
203
  if num_retries >= self.num_retries:
171
204
  logger.info('Using fallback retriever in SQL retriever.')
172
- return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager)
173
- query_to_retry = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
174
- query_to_retry_with_embeddings = query_to_retry.format(embeddings=str(embedded_query))
175
- # Handle LLM output that has the ```sql delimiter possibly.
176
- query_to_retry_with_embeddings = query_to_retry_with_embeddings.replace('```sql', '')
177
- query_to_retry_with_embeddings = query_to_retry_with_embeddings.replace('```', '')
178
- document_response = self.vector_store_handler.native_query(query_to_retry_with_embeddings)
179
- num_retries += 1
205
+ return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
180
206
 
181
207
  document_df = document_response.data_frame
182
208
  retrieved_documents = []
@@ -185,4 +211,8 @@ Output:
185
211
  document_row.get('content', ''),
186
212
  metadata=document_row.get('metadata', {})
187
213
  ))
188
- return retrieved_documents
214
+ if retrieved_documents:
215
+ return retrieved_documents
216
+ # If the SQL query constructed did not return any documents, fallback.
217
+ logger.info('No documents returned from SQL retriever. using fallback retriever.')
218
+ return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
@@ -3,7 +3,6 @@ from typing import List, Union, Any, Optional, Dict
3
3
 
4
4
  from langchain_community.vectorstores.chroma import Chroma
5
5
  from langchain_community.vectorstores.pgvector import PGVector
6
- from langchain_community.tools.sql_database.prompt import QUERY_CHECKER as DEFAULT_QUERY_CHECKER_PROMPT_TEMPLATE
7
6
  from langchain_core.documents import Document
8
7
  from langchain_core.embeddings import Embeddings
9
8
  from langchain_core.language_models import BaseChatModel
@@ -94,6 +93,25 @@ Output only a single better search query and nothing else like in the example.
94
93
  Here is the user input: {input}
95
94
  '''
96
95
 
96
+ DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE = '''Construct a list of PostgreSQL metadata filters to filter documents in the database based on the user input.
97
+
98
+ << INSTRUCTIONS >>
99
+ {format_instructions}
100
+
101
+ RETURN ONLY THE FINAL JSON. DO NOT EXPLAIN, JUST RETURN THE FINAL JSON.
102
+
103
+ << TABLES YOU HAVE ACCESS TO >>
104
+
105
+ {schema}
106
+
107
+ << EXAMPLES >>
108
+
109
+ {examples}
110
+
111
+ Here is the user input:
112
+ {input}
113
+ '''
114
+
97
115
  DEFAULT_SQL_PROMPT_TEMPLATE = '''
98
116
  Construct a valid {dialect} SQL query to select documents relevant to the user input.
99
117
  Source documents are found in the {source_table} table. You may need to join with other tables to get additional document metadata.
@@ -136,7 +154,6 @@ Columns:
136
154
  "description": "Metadata for the document chunk. Always select metadata and always join with the {source_table} table on the string metadata field 'original_row_id'"
137
155
  }}
138
156
  }}
139
- ```
140
157
 
141
158
  {schema}
142
159
 
@@ -290,6 +307,8 @@ class VectorStoreConfig(BaseModel):
290
307
  collection_name: str = DEFAULT_COLLECTION_NAME
291
308
  connection_string: str = None
292
309
  kb_table: Any = None
310
+ is_sparse: bool = False
311
+ vector_size: Optional[int] = None
293
312
 
294
313
  class Config:
295
314
  arbitrary_types_allowed = True
@@ -376,6 +395,13 @@ class MetadataSchema(BaseModel):
376
395
  columns: List[ColumnSchema] = Field(
377
396
  description="List of column schemas describing the metadata columns available for the table"
378
397
  )
398
+ join: str = Field(
399
+ description="SQL join string to join this table with source documents table",
400
+ default=''
401
+ )
402
+
403
+ class Config:
404
+ frozen = True
379
405
 
380
406
 
381
407
  class LLMExample(BaseModel):
@@ -392,19 +418,9 @@ class SQLRetrieverConfig(BaseModel):
392
418
  default_factory=LLMConfig,
393
419
  description="LLM configuration to use for generating the final SQL query for retrieval"
394
420
  )
395
- sql_prompt_template: str = Field(
396
- default=DEFAULT_SQL_PROMPT_TEMPLATE,
397
- description="""Prompt template to generate the SQL query to execute against the vector database. Currently only pgvector is supported.
398
- Has 'dialect', 'input', 'embeddings_table', 'source_table', 'embeddings', 'distance_function', 'schema', and 'examples' input variables.
399
- """
400
- )
401
- query_checker_template: str = Field(
402
- default=DEFAULT_QUERY_CHECKER_PROMPT_TEMPLATE,
403
- description="Prompt template to use for double checking SQL queries before execution. Has 'query' and 'dialect' input variables."
404
- )
405
- query_retry_template: str = Field(
406
- default=DEFAULT_QUERY_RETRY_PROMPT_TEMPLATE,
407
- description="Prompt template to rewrite SQL query that failed. Has 'dialect', 'query', and 'error' input variables."
421
+ metadata_filters_prompt_template: str = Field(
422
+ default=DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE,
423
+ description="Prompt template to generate PostgreSQL metadata filters. Has 'format_instructions', 'schema', 'examples', and 'input' input variables"
408
424
  )
409
425
  num_retries: int = Field(
410
426
  default=DEFAULT_NUM_QUERY_RETRIES,
@@ -449,8 +465,13 @@ class SummarizationConfig(BaseModel):
449
465
  class RerankerConfig(BaseModel):
450
466
  model: str = DEFAULT_RERANKING_MODEL
451
467
  base_url: str = DEFAULT_LLM_ENDPOINT
452
- filtering_threshold: float = 0.99
468
+ filtering_threshold: float = 0.5
453
469
  num_docs_to_keep: Optional[int] = None
470
+ max_concurrent_requests: int = 20
471
+ max_retries: int = 3
472
+ retry_delay: float = 1.0
473
+ early_stop: bool = True # Whether to enable early stopping
474
+ early_stop_threshold: float = 0.8 # Confidence threshold for early stopping
454
475
 
455
476
 
456
477
  class MultiHopRetrieverConfig(BaseModel):
@@ -178,7 +178,7 @@ def project_dataframe(df, targets, table_columns):
178
178
 
179
179
  # adapt column names to projection
180
180
  if len(df_col_rename) > 0:
181
- df = df.rename(columns=df_col_rename)
181
+ df.rename(columns=df_col_rename, inplace=True)
182
182
  return df
183
183
 
184
184
 
@@ -1,5 +1,5 @@
1
1
  import datetime
2
- from typing import Dict, Iterator, List, Union, Tuple
2
+ from typing import Dict, Iterator, List, Union, Tuple, Optional
3
3
 
4
4
  from langchain_core.tools import BaseTool
5
5
  from sqlalchemy.orm.attributes import flag_modified
@@ -70,7 +70,7 @@ class AgentsController:
70
70
 
71
71
  return model, provider
72
72
 
73
- def get_agent(self, agent_name: str, project_name: str = 'mindsdb') -> db.Agents:
73
+ def get_agent(self, agent_name: str, project_name: str = 'mindsdb') -> Optional[db.Agents]:
74
74
  '''
75
75
  Gets an agent by name.
76
76
 
@@ -79,7 +79,7 @@ class AgentsController:
79
79
  project_name (str): The name of the containing project - must exist
80
80
 
81
81
  Returns:
82
- agent (db.Agents): The database agent object
82
+ agent (Optional[db.Agents]): The database agent object
83
83
  '''
84
84
 
85
85
  project = self.project_controller.get(name=project_name)
@@ -252,6 +252,16 @@ class AgentsController:
252
252
  existing_agent = self.get_agent(agent_name, project_name=project_name)
253
253
  if existing_agent is None:
254
254
  raise EntityNotExistsError(f'Agent with name not found: {agent_name}')
255
+ is_demo = (existing_agent.params or {}).get('is_demo', False)
256
+ if (
257
+ is_demo and (
258
+ (name is not None and name != agent_name)
259
+ or (model_name or provider)
260
+ or (len(skills_to_add) > 0 or len(skills_to_remove) > 0 or len(skills_to_rewrite) > 0)
261
+ or (isinstance(params, dict) and len(params) > 1 and 'prompt_template' not in params)
262
+ )
263
+ ):
264
+ raise ValueError("It is forbidden to change properties of the demo object")
255
265
 
256
266
  if name is not None and name != agent_name:
257
267
  # Check to see if updated name already exists
@@ -352,6 +362,8 @@ class AgentsController:
352
362
  agent = self.get_agent(agent_name, project_name)
353
363
  if agent is None:
354
364
  raise ValueError(f'Agent with name does not exist: {agent_name}')
365
+ if isinstance(agent.params, dict) and agent.params.get('is_demo') is True:
366
+ raise ValueError('Unable to delete demo object')
355
367
  agent.deleted_at = datetime.datetime.now()
356
368
  db.session.commit()
357
369
 
@@ -362,24 +374,22 @@ class AgentsController:
362
374
  project_name: str = 'mindsdb',
363
375
  tools: List[BaseTool] = None,
364
376
  stream: bool = False) -> Union[Iterator[object], pd.DataFrame]:
365
- '''
377
+ """
366
378
  Queries an agent to get a completion.
367
379
 
368
380
  Parameters:
369
381
  agent (db.Agents): Existing agent to get completion from
370
382
  messages (List[Dict[str, str]]): Chat history to send to the agent
371
- trace_id (str): ID of Langfuse trace to use
372
- observation_id (str): ID of parent Langfuse observation to use
373
383
  project_name (str): Project the agent belongs to (default mindsdb)
374
384
  tools (List[BaseTool]): Tools to use while getting the completion
375
- stream (bool): Whether or not to stream the response
385
+ stream (bool): Whether to stream the response
376
386
 
377
387
  Returns:
378
388
  response (Union[Iterator[object], pd.DataFrame]): Completion as a DataFrame or iterator of completion chunks
379
389
 
380
390
  Raises:
381
391
  ValueError: Agent's model does not exist.
382
- '''
392
+ """
383
393
  if stream:
384
394
  return self._get_completion_stream(
385
395
  agent,
@@ -165,6 +165,7 @@ PROVIDER_TO_MODELS = MappingProxyType(
165
165
 
166
166
  ASSISTANT_COLUMN = "answer"
167
167
  CONTEXT_COLUMN = "context"
168
+ TRACE_ID_COLUMN = "trace_id"
168
169
  DEFAULT_AGENT_TIMEOUT_SECONDS = 300
169
170
  # These should require no additional arguments.
170
171
  DEFAULT_AGENT_TOOLS = []