MindsDB 25.9.2.0a1__py3-none-any.whl → 25.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +40 -29
- mindsdb/api/a2a/__init__.py +1 -1
- mindsdb/api/a2a/agent.py +16 -10
- mindsdb/api/a2a/common/server/server.py +7 -3
- mindsdb/api/a2a/common/server/task_manager.py +12 -5
- mindsdb/api/a2a/common/types.py +66 -0
- mindsdb/api/a2a/task_manager.py +65 -17
- mindsdb/api/common/middleware.py +10 -12
- mindsdb/api/executor/command_executor.py +51 -40
- mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +101 -49
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
- mindsdb/api/executor/datahub/datanodes/system_tables.py +3 -2
- mindsdb/api/executor/exceptions.py +29 -10
- mindsdb/api/executor/planner/plan_join.py +17 -3
- mindsdb/api/executor/planner/query_prepare.py +2 -20
- mindsdb/api/executor/sql_query/sql_query.py +74 -74
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
- mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
- mindsdb/api/executor/utilities/functions.py +6 -6
- mindsdb/api/executor/utilities/sql.py +37 -20
- mindsdb/api/http/gui.py +5 -11
- mindsdb/api/http/initialize.py +75 -61
- mindsdb/api/http/namespaces/agents.py +10 -15
- mindsdb/api/http/namespaces/analysis.py +13 -20
- mindsdb/api/http/namespaces/auth.py +1 -1
- mindsdb/api/http/namespaces/chatbots.py +0 -5
- mindsdb/api/http/namespaces/config.py +15 -11
- mindsdb/api/http/namespaces/databases.py +140 -201
- mindsdb/api/http/namespaces/file.py +17 -4
- mindsdb/api/http/namespaces/handlers.py +17 -7
- mindsdb/api/http/namespaces/knowledge_bases.py +28 -7
- mindsdb/api/http/namespaces/models.py +94 -126
- mindsdb/api/http/namespaces/projects.py +13 -22
- mindsdb/api/http/namespaces/sql.py +33 -25
- mindsdb/api/http/namespaces/tab.py +27 -37
- mindsdb/api/http/namespaces/views.py +1 -1
- mindsdb/api/http/start.py +16 -10
- mindsdb/api/mcp/__init__.py +2 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
- mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
- mindsdb/integrations/handlers/byom_handler/byom_handler.py +165 -190
- mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
- mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
- mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
- mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
- mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
- mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
- mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
- mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
- mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
- mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +14 -2
- mindsdb/integrations/handlers/shopify_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +80 -13
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
- mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
- mindsdb/integrations/libs/api_handler.py +10 -10
- mindsdb/integrations/libs/base.py +4 -4
- mindsdb/integrations/libs/llm/utils.py +2 -2
- mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
- mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
- mindsdb/integrations/libs/process_cache.py +132 -140
- mindsdb/integrations/libs/response.py +18 -12
- mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
- mindsdb/integrations/utilities/files/file_reader.py +6 -7
- mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
- mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
- mindsdb/integrations/utilities/rag/config_loader.py +37 -26
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +83 -30
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
- mindsdb/integrations/utilities/rag/settings.py +58 -133
- mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
- mindsdb/interfaces/agents/agents_controller.py +2 -3
- mindsdb/interfaces/agents/constants.py +0 -2
- mindsdb/interfaces/agents/litellm_server.py +34 -58
- mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
- mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
- mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
- mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
- mindsdb/interfaces/chatbot/polling.py +30 -18
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +16 -17
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
- mindsdb/interfaces/database/data_handlers_cache.py +190 -0
- mindsdb/interfaces/database/database.py +3 -3
- mindsdb/interfaces/database/integrations.py +7 -110
- mindsdb/interfaces/database/projects.py +2 -6
- mindsdb/interfaces/database/views.py +1 -4
- mindsdb/interfaces/file/file_controller.py +6 -6
- mindsdb/interfaces/functions/controller.py +1 -1
- mindsdb/interfaces/functions/to_markdown.py +2 -2
- mindsdb/interfaces/jobs/jobs_controller.py +5 -9
- mindsdb/interfaces/jobs/scheduler.py +3 -9
- mindsdb/interfaces/knowledge_base/controller.py +244 -128
- mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
- mindsdb/interfaces/knowledge_base/executor.py +11 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
- mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
- mindsdb/interfaces/model/model_controller.py +172 -168
- mindsdb/interfaces/query_context/context_controller.py +14 -2
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +10 -14
- mindsdb/interfaces/skills/retrieval_tool.py +43 -50
- mindsdb/interfaces/skills/skill_tool.py +2 -2
- mindsdb/interfaces/skills/skills_controller.py +1 -4
- mindsdb/interfaces/skills/sql_agent.py +25 -19
- mindsdb/interfaces/storage/db.py +16 -6
- mindsdb/interfaces/storage/fs.py +114 -169
- mindsdb/interfaces/storage/json.py +19 -18
- mindsdb/interfaces/tabs/tabs_controller.py +49 -72
- mindsdb/interfaces/tasks/task_monitor.py +3 -9
- mindsdb/interfaces/tasks/task_thread.py +7 -9
- mindsdb/interfaces/triggers/trigger_task.py +7 -13
- mindsdb/interfaces/triggers/triggers_controller.py +47 -52
- mindsdb/migrations/migrate.py +16 -16
- mindsdb/utilities/api_status.py +58 -0
- mindsdb/utilities/config.py +68 -2
- mindsdb/utilities/exception.py +40 -1
- mindsdb/utilities/fs.py +0 -1
- mindsdb/utilities/hooks/profiling.py +17 -14
- mindsdb/utilities/json_encoder.py +24 -10
- mindsdb/utilities/langfuse.py +40 -45
- mindsdb/utilities/log.py +272 -0
- mindsdb/utilities/ml_task_queue/consumer.py +52 -58
- mindsdb/utilities/ml_task_queue/producer.py +26 -30
- mindsdb/utilities/render/sqlalchemy_render.py +22 -20
- mindsdb/utilities/starters.py +0 -10
- mindsdb/utilities/utils.py +2 -2
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/METADATA +286 -267
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/RECORD +145 -159
- mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
- mindsdb/api/postgres/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -189
- mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -253
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
- mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
- mindsdb/api/postgres/start.py +0 -11
- mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
- mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
- mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
- mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import re
|
|
2
|
-
|
|
3
|
-
from pydantic import BaseModel, Field
|
|
4
|
-
from typing import List, Any, Optional, Dict, Tuple, Union, Callable
|
|
5
|
-
import collections
|
|
6
2
|
import math
|
|
3
|
+
import logging
|
|
4
|
+
import collections
|
|
5
|
+
from typing import List, Any, Optional, Dict, Tuple, Union, Callable
|
|
7
6
|
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
8
|
from langchain.chains.llm import LLMChain
|
|
9
9
|
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
|
|
10
10
|
from langchain_core.documents.base import Document
|
|
@@ -39,9 +39,7 @@ class MetadataFilter(BaseModel):
|
|
|
39
39
|
"""Represents an LLM generated metadata filter to apply to a PostgreSQL query."""
|
|
40
40
|
|
|
41
41
|
attribute: str = Field(description="Database column to apply filter to")
|
|
42
|
-
comparator: str = Field(
|
|
43
|
-
description="PostgreSQL comparator to use to filter database column"
|
|
44
|
-
)
|
|
42
|
+
comparator: str = Field(description="PostgreSQL comparator to use to filter database column")
|
|
45
43
|
value: Any = Field(description="Value to use to filter database column")
|
|
46
44
|
|
|
47
45
|
|
|
@@ -56,9 +54,7 @@ class AblativeMetadataFilter(MetadataFilter):
|
|
|
56
54
|
class MetadataFilters(BaseModel):
|
|
57
55
|
"""List of LLM generated metadata filters to apply to a PostgreSQL query."""
|
|
58
56
|
|
|
59
|
-
filters: List[MetadataFilter] = Field(
|
|
60
|
-
description="List of PostgreSQL metadata filters to apply for user query"
|
|
61
|
-
)
|
|
57
|
+
filters: List[MetadataFilter] = Field(description="List of PostgreSQL metadata filters to apply for user query")
|
|
62
58
|
|
|
63
59
|
|
|
64
60
|
class SQLRetriever(BaseRetriever):
|
|
@@ -142,25 +138,17 @@ class SQLRetriever(BaseRetriever):
|
|
|
142
138
|
elif isinstance(schema, ColumnSchema):
|
|
143
139
|
collection_key = "values"
|
|
144
140
|
else:
|
|
145
|
-
raise Exception(
|
|
146
|
-
"schema must be either a DatabaseSchema, TableSchema, or ColumnSchema."
|
|
147
|
-
)
|
|
141
|
+
raise Exception("schema must be either a DatabaseSchema, TableSchema, or ColumnSchema.")
|
|
148
142
|
|
|
149
143
|
if update is not None:
|
|
150
|
-
ordered = collections.OrderedDict(
|
|
151
|
-
sorted(update.items(), key=key, reverse=True)
|
|
152
|
-
)
|
|
144
|
+
ordered = collections.OrderedDict(sorted(update.items(), key=key, reverse=True))
|
|
153
145
|
else:
|
|
154
|
-
ordered = collections.OrderedDict(
|
|
155
|
-
sorted(getattr(schema, collection_key).items(), key=key, reverse=True)
|
|
156
|
-
)
|
|
146
|
+
ordered = collections.OrderedDict(sorted(getattr(schema, collection_key).items(), key=key, reverse=True))
|
|
157
147
|
schema = schema.model_copy(update={collection_key: ordered})
|
|
158
148
|
|
|
159
149
|
return schema
|
|
160
150
|
|
|
161
|
-
def _sort_database_schema_by_key(
|
|
162
|
-
self, database_schema: DatabaseSchema, key: Callable
|
|
163
|
-
) -> DatabaseSchema:
|
|
151
|
+
def _sort_database_schema_by_key(self, database_schema: DatabaseSchema, key: Callable) -> DatabaseSchema:
|
|
164
152
|
"""Re-build schema with OrderedDicts"""
|
|
165
153
|
tables = {}
|
|
166
154
|
# build new tables dict
|
|
@@ -169,17 +157,11 @@ class SQLRetriever(BaseRetriever):
|
|
|
169
157
|
# build new column dict
|
|
170
158
|
for column_key, column_schema in table_schema.columns.items():
|
|
171
159
|
# sort values directly and update column schema
|
|
172
|
-
columns[column_key] = self._sort_schema_by_key(
|
|
173
|
-
schema=column_schema, key=key
|
|
174
|
-
)
|
|
160
|
+
columns[column_key] = self._sort_schema_by_key(schema=column_schema, key=key)
|
|
175
161
|
# update table schema and sort
|
|
176
|
-
tables[table_key] = self._sort_schema_by_key(
|
|
177
|
-
schema=table_schema, key=key, update=columns
|
|
178
|
-
)
|
|
162
|
+
tables[table_key] = self._sort_schema_by_key(schema=table_schema, key=key, update=columns)
|
|
179
163
|
# update table schema and sort
|
|
180
|
-
database_schema = self._sort_schema_by_key(
|
|
181
|
-
schema=database_schema, key=key, update=tables
|
|
182
|
-
)
|
|
164
|
+
database_schema = self._sort_schema_by_key(schema=database_schema, key=key, update=tables)
|
|
183
165
|
|
|
184
166
|
return database_schema
|
|
185
167
|
|
|
@@ -191,15 +173,12 @@ class SQLRetriever(BaseRetriever):
|
|
|
191
173
|
boolean_system_prompt: bool = True,
|
|
192
174
|
format_instructions: Optional[str] = None,
|
|
193
175
|
) -> ChatPromptTemplate:
|
|
194
|
-
|
|
195
176
|
if boolean_system_prompt is True:
|
|
196
177
|
system_prompt = self.boolean_system_prompt
|
|
197
178
|
else:
|
|
198
179
|
system_prompt = self.generative_system_prompt
|
|
199
180
|
|
|
200
|
-
prepared_column_prompt = self._prepare_column_prompt(
|
|
201
|
-
column_schema=column_schema, table_schema=table_schema
|
|
202
|
-
)
|
|
181
|
+
prepared_column_prompt = self._prepare_column_prompt(column_schema=column_schema, table_schema=table_schema)
|
|
203
182
|
column_schema_str = (
|
|
204
183
|
prepared_column_prompt.messages[1]
|
|
205
184
|
.format(
|
|
@@ -290,7 +269,6 @@ Below is a list of comparison operators for constructing filters for this value
|
|
|
290
269
|
table_schema: TableSchema,
|
|
291
270
|
boolean_system_prompt: bool = True,
|
|
292
271
|
) -> ChatPromptTemplate:
|
|
293
|
-
|
|
294
272
|
if boolean_system_prompt is True:
|
|
295
273
|
system_prompt = self.boolean_system_prompt
|
|
296
274
|
else:
|
|
@@ -312,9 +290,7 @@ Below is a list of comparison operators for constructing filters for this value
|
|
|
312
290
|
[("system", system_prompt), ("user", self.column_prompt_template)]
|
|
313
291
|
)
|
|
314
292
|
|
|
315
|
-
header_str =
|
|
316
|
-
f"This schema describes a column in the {table_schema.table} table."
|
|
317
|
-
)
|
|
293
|
+
header_str = f"This schema describes a column in the {table_schema.table} table."
|
|
318
294
|
|
|
319
295
|
value_str = """
|
|
320
296
|
## **Content**
|
|
@@ -388,26 +364,18 @@ Below is a description of the contents in this column in list format:
|
|
|
388
364
|
)
|
|
389
365
|
|
|
390
366
|
def _rank_schema(self, prompt: ChatPromptTemplate, query: str) -> float:
|
|
391
|
-
rank_chain = LLMChain(
|
|
392
|
-
llm=self.llm.bind(logprobs=True), prompt=prompt, return_final_only=False
|
|
393
|
-
)
|
|
367
|
+
rank_chain = LLMChain(llm=self.llm.bind(logprobs=True), prompt=prompt, return_final_only=False)
|
|
394
368
|
output = rank_chain({"query": query}) # returns metadata
|
|
395
369
|
|
|
396
370
|
# parse through metadata tokens until encountering either yes, or no.
|
|
397
371
|
score = None # a None score indicates the model output could not be parsed.
|
|
398
|
-
for content in output["full_generation"][0].message.response_metadata[
|
|
399
|
-
"logprobs"
|
|
400
|
-
]["content"]:
|
|
372
|
+
for content in output["full_generation"][0].message.response_metadata["logprobs"]["content"]:
|
|
401
373
|
# Convert answer to score using the model's confidence
|
|
402
374
|
if content["token"].lower().strip() == "yes":
|
|
403
|
-
score = (
|
|
404
|
-
1 + math.exp(content["logprob"])
|
|
405
|
-
) / 2 # If yes, use the model's confidence
|
|
375
|
+
score = (1 + math.exp(content["logprob"])) / 2 # If yes, use the model's confidence
|
|
406
376
|
break
|
|
407
377
|
elif content["token"].lower().strip() == "no":
|
|
408
|
-
score = (
|
|
409
|
-
1 - math.exp(content["logprob"])
|
|
410
|
-
) / 2 # If no, invert the confidence
|
|
378
|
+
score = (1 - math.exp(content["logprob"])) / 2 # If no, invert the confidence
|
|
411
379
|
break
|
|
412
380
|
|
|
413
381
|
if score is None:
|
|
@@ -465,9 +433,7 @@ Below is a description of the contents in this column in list format:
|
|
|
465
433
|
table_schema=table_schema,
|
|
466
434
|
boolean_system_prompt=True,
|
|
467
435
|
)
|
|
468
|
-
column_schema.relevance = self._rank_schema(
|
|
469
|
-
prompt=prompt, query=query
|
|
470
|
-
)
|
|
436
|
+
column_schema.relevance = self._rank_schema(prompt=prompt, query=query)
|
|
471
437
|
|
|
472
438
|
columns[column_key] = column_schema
|
|
473
439
|
|
|
@@ -512,9 +478,7 @@ Below is a description of the contents in this column in list format:
|
|
|
512
478
|
table_schema=table_schema,
|
|
513
479
|
boolean_system_prompt=True,
|
|
514
480
|
)
|
|
515
|
-
value_schema.relevance = self._rank_schema(
|
|
516
|
-
prompt=prompt, query=query
|
|
517
|
-
)
|
|
481
|
+
value_schema.relevance = self._rank_schema(prompt=prompt, query=query)
|
|
518
482
|
|
|
519
483
|
values[value_key] = value_schema
|
|
520
484
|
|
|
@@ -592,19 +556,13 @@ Below is a description of the contents in this column in list format:
|
|
|
592
556
|
for table_key, table_schema in ordered_database_schema.tables.items():
|
|
593
557
|
for column_key, column_schema in table_schema.columns.items():
|
|
594
558
|
for value_key, value_schema in column_schema.values.items():
|
|
595
|
-
ablation_value_dict[(table_key, column_key, value_key)] =
|
|
596
|
-
value_schema.relevance
|
|
597
|
-
)
|
|
559
|
+
ablation_value_dict[(table_key, column_key, value_key)] = value_schema.relevance
|
|
598
560
|
|
|
599
|
-
ablation_value_dict = collections.OrderedDict(
|
|
600
|
-
sorted(ablation_value_dict.items(), key=lambda x: x[1])
|
|
601
|
-
)
|
|
561
|
+
ablation_value_dict = collections.OrderedDict(sorted(ablation_value_dict.items(), key=lambda x: x[1]))
|
|
602
562
|
|
|
603
563
|
relevance_scores = list(ablation_value_dict.values())
|
|
604
564
|
if len(relevance_scores) > 0:
|
|
605
|
-
ablation_quantiles = np.quantile(
|
|
606
|
-
relevance_scores, np.linspace(0, 1, self.num_retries + 2)[1:-1]
|
|
607
|
-
)
|
|
565
|
+
ablation_quantiles = np.quantile(relevance_scores, np.linspace(0, 1, self.num_retries + 2)[1:-1])
|
|
608
566
|
else:
|
|
609
567
|
ablation_quantiles = None
|
|
610
568
|
|
|
@@ -628,11 +586,7 @@ Below is a description of the contents in this column in list format:
|
|
|
628
586
|
ablated_filters = []
|
|
629
587
|
for filter in metadata_filters:
|
|
630
588
|
for key in ablated_dict.keys():
|
|
631
|
-
if
|
|
632
|
-
filter.schema_table in key
|
|
633
|
-
and filter.schema_column in key
|
|
634
|
-
and filter.schema_value in key
|
|
635
|
-
):
|
|
589
|
+
if filter.schema_table in key and filter.schema_column in key and filter.schema_value in key:
|
|
636
590
|
ablated_filters.append(filter)
|
|
637
591
|
|
|
638
592
|
return ablated_filters
|
|
@@ -646,9 +600,7 @@ Below is a description of the contents in this column in list format:
|
|
|
646
600
|
pass
|
|
647
601
|
|
|
648
602
|
def _prepare_retrieval_query(self, query: str) -> str:
|
|
649
|
-
rewrite_prompt = PromptTemplate(
|
|
650
|
-
input_variables=["input"], template=self.rewrite_prompt_template
|
|
651
|
-
)
|
|
603
|
+
rewrite_prompt = PromptTemplate(input_variables=["input"], template=self.rewrite_prompt_template)
|
|
652
604
|
rewrite_chain = LLMChain(llm=self.llm, prompt=rewrite_prompt)
|
|
653
605
|
return rewrite_chain.predict(input=query)
|
|
654
606
|
|
|
@@ -668,9 +620,7 @@ Below is a description of the contents in this column in list format:
|
|
|
668
620
|
# Add Table JOIN statements
|
|
669
621
|
join_clauses = set()
|
|
670
622
|
for metadata_filter in metadata_filters:
|
|
671
|
-
join_clause = ranked_database_schema.tables[
|
|
672
|
-
metadata_filter.schema_table
|
|
673
|
-
].join
|
|
623
|
+
join_clause = ranked_database_schema.tables[metadata_filter.schema_table].join
|
|
674
624
|
if join_clause in join_clauses:
|
|
675
625
|
continue
|
|
676
626
|
else:
|
|
@@ -688,12 +638,12 @@ Below is a description of the contents in this column in list format:
|
|
|
688
638
|
if i < len(metadata_filters) - 1:
|
|
689
639
|
base_query += " AND "
|
|
690
640
|
|
|
691
|
-
base_query +=
|
|
641
|
+
base_query += (
|
|
642
|
+
f" ORDER BY e.embeddings {self.distance_function.value[0]} '{{embeddings}}' LIMIT {self.search_kwargs.k};"
|
|
643
|
+
)
|
|
692
644
|
return base_query
|
|
693
645
|
|
|
694
|
-
def _generate_filter(
|
|
695
|
-
self, prompt: ChatPromptTemplate, query: str
|
|
696
|
-
) -> MetadataFilter:
|
|
646
|
+
def _generate_filter(self, prompt: ChatPromptTemplate, query: str) -> MetadataFilter:
|
|
697
647
|
gen_filter_chain = LLMChain(llm=self.llm, prompt=prompt)
|
|
698
648
|
output = gen_filter_chain({"query": query})
|
|
699
649
|
return output
|
|
@@ -714,28 +664,22 @@ Below is a description of the contents in this column in list format:
|
|
|
714
664
|
# must use generation if field is a dictionary of tuples or a list
|
|
715
665
|
if type(value_schema.value) in [list, dict]:
|
|
716
666
|
try:
|
|
717
|
-
metadata_prompt: ChatPromptTemplate = (
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
boolean_system_prompt=False,
|
|
724
|
-
)
|
|
667
|
+
metadata_prompt: ChatPromptTemplate = self._prepare_value_prompt(
|
|
668
|
+
format_instructions=parser.get_format_instructions(),
|
|
669
|
+
value_schema=value_schema,
|
|
670
|
+
column_schema=column_schema,
|
|
671
|
+
table_schema=table_schema,
|
|
672
|
+
boolean_system_prompt=False,
|
|
725
673
|
)
|
|
726
674
|
|
|
727
|
-
metadata_filters_chain = LLMChain(
|
|
728
|
-
llm=self.llm, prompt=metadata_prompt
|
|
729
|
-
)
|
|
675
|
+
metadata_filters_chain = LLMChain(llm=self.llm, prompt=metadata_prompt)
|
|
730
676
|
metadata_filter_output = metadata_filters_chain.predict(
|
|
731
677
|
query=query,
|
|
732
678
|
)
|
|
733
679
|
|
|
734
680
|
# If the LLM outputs raw JSON, use it as-is.
|
|
735
681
|
# If the LLM outputs anything including a json markdown section, use the last one.
|
|
736
|
-
json_markdown_output = re.findall(
|
|
737
|
-
r"```json.*```", metadata_filter_output, re.DOTALL
|
|
738
|
-
)
|
|
682
|
+
json_markdown_output = re.findall(r"```json.*```", metadata_filter_output, re.DOTALL)
|
|
739
683
|
if json_markdown_output:
|
|
740
684
|
metadata_filter_output = json_markdown_output[-1]
|
|
741
685
|
# Clean the json tags.
|
|
@@ -754,11 +698,10 @@ Below is a description of the contents in this column in list format:
|
|
|
754
698
|
metadata_filter = AblativeMetadataFilter(**model_dump)
|
|
755
699
|
except OutputParserException as e:
|
|
756
700
|
logger.warning(
|
|
757
|
-
f"LLM failed to generate structured metadata filters: {
|
|
758
|
-
|
|
759
|
-
return HandlerResponse(
|
|
760
|
-
RESPONSE_TYPE.ERROR, error_message=str(e)
|
|
701
|
+
f"LLM failed to generate structured metadata filters: {e}",
|
|
702
|
+
exc_info=logger.isEnabledFor(logging.DEBUG),
|
|
761
703
|
)
|
|
704
|
+
return HandlerResponse(RESPONSE_TYPE.ERROR, error_message=str(e))
|
|
762
705
|
else:
|
|
763
706
|
metadata_filter = AblativeMetadataFilter(
|
|
764
707
|
attribute=column_schema.column,
|
|
@@ -779,24 +722,17 @@ Below is a description of the contents in this column in list format:
|
|
|
779
722
|
embeddings_str: str,
|
|
780
723
|
) -> HandlerResponse:
|
|
781
724
|
try:
|
|
782
|
-
checked_sql_query = self._prepare_pgvector_query(
|
|
783
|
-
|
|
784
|
-
)
|
|
785
|
-
checked_sql_query_with_embeddings = checked_sql_query.format(
|
|
786
|
-
embeddings=embeddings_str
|
|
787
|
-
)
|
|
788
|
-
return self.vector_store_handler.native_query(
|
|
789
|
-
checked_sql_query_with_embeddings
|
|
790
|
-
)
|
|
725
|
+
checked_sql_query = self._prepare_pgvector_query(ranked_database_schema, metadata_filters)
|
|
726
|
+
checked_sql_query_with_embeddings = checked_sql_query.format(embeddings=embeddings_str)
|
|
727
|
+
return self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
|
|
791
728
|
except Exception as e:
|
|
792
729
|
logger.warning(
|
|
793
|
-
f"Failed to prepare and execute SQL query from structured metadata: {
|
|
730
|
+
f"Failed to prepare and execute SQL query from structured metadata: {e}",
|
|
731
|
+
exc_info=logger.isEnabledFor(logging.DEBUG),
|
|
794
732
|
)
|
|
795
733
|
return HandlerResponse(RESPONSE_TYPE.ERROR, error_message=str(e))
|
|
796
734
|
|
|
797
|
-
def _get_relevant_documents(
|
|
798
|
-
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
|
799
|
-
) -> List[Document]:
|
|
735
|
+
def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun) -> List[Document]:
|
|
800
736
|
# Rewrite query to be suitable for retrieval.
|
|
801
737
|
retrieval_query = self._prepare_retrieval_query(query)
|
|
802
738
|
|
|
@@ -804,14 +740,10 @@ Below is a description of the contents in this column in list format:
|
|
|
804
740
|
embedded_query = self.embeddings_model.embed_query(retrieval_query)
|
|
805
741
|
|
|
806
742
|
# Search for relevant filters
|
|
807
|
-
ranked_database_schema, ablation_value_dict, ablation_quantiles = (
|
|
808
|
-
self._breadth_first_search(query=query)
|
|
809
|
-
)
|
|
743
|
+
ranked_database_schema, ablation_value_dict, ablation_quantiles = self._breadth_first_search(query=query)
|
|
810
744
|
|
|
811
745
|
# Generate metadata filters
|
|
812
|
-
metadata_filters = self._generate_metadata_filters(
|
|
813
|
-
query=query, ranked_database_schema=ranked_database_schema
|
|
814
|
-
)
|
|
746
|
+
metadata_filters = self._generate_metadata_filters(query=query, ranked_database_schema=ranked_database_schema)
|
|
815
747
|
|
|
816
748
|
if type(metadata_filters) is list:
|
|
817
749
|
# Initial Execution of the similarity search with metadata filters.
|
|
@@ -830,9 +762,7 @@ Below is a description of the contents in this column in list format:
|
|
|
830
762
|
break
|
|
831
763
|
elif document_response.resp_type == RESPONSE_TYPE.ERROR:
|
|
832
764
|
# LLMs won't always generate structured metadata so we should have a fallback after retrying.
|
|
833
|
-
logger.info(
|
|
834
|
-
f"SQL Retriever query failed with error {document_response.error_message}"
|
|
835
|
-
)
|
|
765
|
+
logger.info(f"SQL Retriever query failed with error {document_response.error_message}")
|
|
836
766
|
else:
|
|
837
767
|
logger.info(
|
|
838
768
|
f"SQL Retriever did not retrieve {self.min_k} documents: {len(document_response.data_frame)} documents retrieved."
|
|
@@ -867,17 +797,9 @@ Below is a description of the contents in this column in list format:
|
|
|
867
797
|
return retrieved_documents
|
|
868
798
|
|
|
869
799
|
# If the SQL query constructed did not return any documents, fallback.
|
|
870
|
-
logger.info(
|
|
871
|
-
|
|
872
|
-
)
|
|
873
|
-
return self.fallback_retriever._get_relevant_documents(
|
|
874
|
-
retrieval_query, run_manager=run_manager
|
|
875
|
-
)
|
|
800
|
+
logger.info("No documents returned from SQL retriever, using fallback retriever.")
|
|
801
|
+
return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
|
|
876
802
|
else:
|
|
877
803
|
# If no metadata fields could be generated fallback.
|
|
878
|
-
logger.info(
|
|
879
|
-
|
|
880
|
-
)
|
|
881
|
-
return self.fallback_retriever._get_relevant_documents(
|
|
882
|
-
retrieval_query, run_manager=run_manager
|
|
883
|
-
)
|
|
804
|
+
logger.info("No metadata fields were successfully generated, using fallback retriever.")
|
|
805
|
+
return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
|