MindsDB 25.2.3.0__py3-none-any.whl → 25.3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +16 -11
- mindsdb/api/executor/command_executor.py +1 -1
- mindsdb/api/executor/datahub/datanodes/system_tables.py +10 -2
- mindsdb/api/executor/planner/query_planner.py +6 -2
- mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -1
- mindsdb/api/http/initialize.py +8 -5
- mindsdb/api/http/namespaces/agents.py +0 -7
- mindsdb/api/http/namespaces/config.py +0 -48
- mindsdb/api/http/namespaces/knowledge_bases.py +1 -1
- mindsdb/api/http/namespaces/util.py +0 -28
- mindsdb/api/mongo/classes/query_sql.py +2 -1
- mindsdb/api/mongo/responders/aggregate.py +2 -2
- mindsdb/api/mongo/responders/coll_stats.py +3 -2
- mindsdb/api/mongo/responders/db_stats.py +2 -1
- mindsdb/api/mongo/responders/insert.py +4 -2
- mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/fake_mysql_proxy.py +2 -1
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +5 -4
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +2 -4
- mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
- mindsdb/integrations/handlers/dspy_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/gmail_handler/connection_args.py +2 -2
- mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +19 -66
- mindsdb/integrations/handlers/gmail_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/google_calendar_handler/connection_args.py +15 -0
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +31 -41
- mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +0 -2
- mindsdb/integrations/handlers/langchain_embedding_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/openai_handler/constants.py +3 -1
- mindsdb/integrations/handlers/openai_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/rag_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +33 -8
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +3 -2
- mindsdb/integrations/handlers/web_handler/web_handler.py +42 -33
- mindsdb/integrations/handlers/youtube_handler/__init__.py +2 -0
- mindsdb/integrations/handlers/youtube_handler/connection_args.py +32 -0
- mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +2 -38
- mindsdb/integrations/libs/llm/utils.py +7 -1
- mindsdb/integrations/libs/process_cache.py +2 -2
- mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py +29 -38
- mindsdb/integrations/utilities/pydantic_utils.py +208 -0
- mindsdb/integrations/utilities/rag/chains/local_context_summarizer_chain.py +227 -0
- mindsdb/integrations/utilities/rag/pipelines/rag.py +11 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +800 -135
- mindsdb/integrations/utilities/rag/settings.py +390 -152
- mindsdb/integrations/utilities/sql_utils.py +2 -1
- mindsdb/interfaces/agents/agents_controller.py +14 -10
- mindsdb/interfaces/agents/callback_handlers.py +52 -5
- mindsdb/interfaces/agents/langchain_agent.py +5 -3
- mindsdb/interfaces/agents/mindsdb_chat_model.py +4 -2
- mindsdb/interfaces/chatbot/chatbot_controller.py +9 -8
- mindsdb/interfaces/database/database.py +3 -2
- mindsdb/interfaces/database/integrations.py +1 -1
- mindsdb/interfaces/database/projects.py +28 -2
- mindsdb/interfaces/jobs/jobs_controller.py +4 -1
- mindsdb/interfaces/jobs/scheduler.py +1 -1
- mindsdb/interfaces/knowledge_base/preprocessing/constants.py +2 -2
- mindsdb/interfaces/model/model_controller.py +5 -2
- mindsdb/interfaces/skills/retrieval_tool.py +128 -39
- mindsdb/interfaces/skills/skill_tool.py +7 -7
- mindsdb/interfaces/skills/skills_controller.py +10 -6
- mindsdb/interfaces/skills/sql_agent.py +6 -1
- mindsdb/interfaces/storage/db.py +14 -12
- mindsdb/interfaces/storage/json.py +59 -0
- mindsdb/interfaces/storage/model_fs.py +85 -3
- mindsdb/interfaces/triggers/triggers_controller.py +2 -1
- mindsdb/migrations/versions/2022-10-14_43c52d23845a_projects.py +17 -3
- mindsdb/migrations/versions/2025-02-10_6ab9903fc59a_del_log_table.py +33 -0
- mindsdb/migrations/versions/2025-02-14_4521dafe89ab_added_encrypted_content_to_json_storage.py +29 -0
- mindsdb/migrations/versions/2025-02-19_11347c213b36_added_metadata_to_projects.py +41 -0
- mindsdb/utilities/config.py +6 -1
- mindsdb/utilities/functions.py +11 -0
- mindsdb/utilities/log.py +17 -2
- mindsdb/utilities/ml_task_queue/consumer.py +4 -2
- mindsdb/utilities/render/sqlalchemy_render.py +4 -0
- {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/METADATA +226 -247
- {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/RECORD +83 -80
- {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/WHEEL +1 -1
- mindsdb/integrations/handlers/gmail_handler/utils.py +0 -45
- mindsdb/utilities/log_controller.py +0 -39
- mindsdb/utilities/telemetry.py +0 -44
- {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import List, Union, Any, Optional, Dict
|
|
2
|
+
from typing import List, Union, Any, Optional, Dict, OrderedDict
|
|
3
3
|
|
|
4
4
|
from langchain_community.vectorstores.chroma import Chroma
|
|
5
5
|
from langchain_community.vectorstores.pgvector import PGVector
|
|
@@ -11,7 +11,7 @@ from langchain_core.stores import BaseStore
|
|
|
11
11
|
from pydantic import BaseModel, Field, field_validator
|
|
12
12
|
from langchain_text_splitters import TextSplitter
|
|
13
13
|
|
|
14
|
-
DEFAULT_COLLECTION_NAME =
|
|
14
|
+
DEFAULT_COLLECTION_NAME = "default_collection"
|
|
15
15
|
|
|
16
16
|
# Multi retriever specific
|
|
17
17
|
DEFAULT_ID_KEY = "doc_id"
|
|
@@ -38,15 +38,15 @@ Return a JSON list with an entry for each column. Each entry should have
|
|
|
38
38
|
{{"name": "column name", "description": "column description", "type": "column data type"}}
|
|
39
39
|
\n\n{dataframe}\n\nJSON:\n
|
|
40
40
|
"""
|
|
41
|
-
DEFAULT_RAG_PROMPT_TEMPLATE =
|
|
41
|
+
DEFAULT_RAG_PROMPT_TEMPLATE = """You are an assistant for
|
|
42
42
|
question-answering tasks. Use the following pieces of retrieved context
|
|
43
43
|
to answer the question. If you don't know the answer, just say that you
|
|
44
44
|
don't know. Use two sentences maximum and keep the answer concise.
|
|
45
45
|
Question: {question}
|
|
46
46
|
Context: {context}
|
|
47
|
-
Answer:
|
|
47
|
+
Answer:"""
|
|
48
48
|
|
|
49
|
-
DEFAULT_QA_GENERATION_PROMPT_TEMPLATE =
|
|
49
|
+
DEFAULT_QA_GENERATION_PROMPT_TEMPLATE = """You are an assistant for
|
|
50
50
|
generating sample questions and answers from the given document and metadata. Given
|
|
51
51
|
a document and its metadata as context, generate a question and answer from that document and its metadata.
|
|
52
52
|
|
|
@@ -64,25 +64,25 @@ in the specified JSON format no matter what.
|
|
|
64
64
|
|
|
65
65
|
Document: {document}
|
|
66
66
|
Metadata: {metadata}
|
|
67
|
-
Answer:
|
|
67
|
+
Answer:"""
|
|
68
68
|
|
|
69
|
-
DEFAULT_MAP_PROMPT_TEMPLATE =
|
|
69
|
+
DEFAULT_MAP_PROMPT_TEMPLATE = """The following is a set of documents
|
|
70
70
|
{docs}
|
|
71
71
|
Based on this list of docs, please summarize based on the user input.
|
|
72
72
|
|
|
73
73
|
User input: {input}
|
|
74
74
|
|
|
75
|
-
Helpful Answer:
|
|
75
|
+
Helpful Answer:"""
|
|
76
76
|
|
|
77
|
-
DEFAULT_REDUCE_PROMPT_TEMPLATE =
|
|
77
|
+
DEFAULT_REDUCE_PROMPT_TEMPLATE = """The following is set of summaries:
|
|
78
78
|
{docs}
|
|
79
79
|
Take these and distill it into a final, consolidated summary related to the user input.
|
|
80
80
|
|
|
81
81
|
User input: {input}
|
|
82
82
|
|
|
83
|
-
Helpful Answer:
|
|
83
|
+
Helpful Answer:"""
|
|
84
84
|
|
|
85
|
-
DEFAULT_SEMANTIC_PROMPT_TEMPLATE =
|
|
85
|
+
DEFAULT_SEMANTIC_PROMPT_TEMPLATE = """Provide a better search query for web search engine to answer the given question.
|
|
86
86
|
|
|
87
87
|
<< EXAMPLES >>
|
|
88
88
|
1. Input: "Show me documents containing how to finetune a LLM please"
|
|
@@ -91,9 +91,9 @@ Output: "how to finetune a LLM"
|
|
|
91
91
|
Output only a single better search query and nothing else like in the example.
|
|
92
92
|
|
|
93
93
|
Here is the user input: {input}
|
|
94
|
-
|
|
94
|
+
"""
|
|
95
95
|
|
|
96
|
-
DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE =
|
|
96
|
+
DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE = """Construct a list of PostgreSQL metadata filters to filter documents in the database based on the user input.
|
|
97
97
|
|
|
98
98
|
<< INSTRUCTIONS >>
|
|
99
99
|
{format_instructions}
|
|
@@ -110,9 +110,99 @@ RETURN ONLY THE FINAL JSON. DO NOT EXPLAIN, JUST RETURN THE FINAL JSON.
|
|
|
110
110
|
|
|
111
111
|
Here is the user input:
|
|
112
112
|
{input}
|
|
113
|
-
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
DEFAULT_BOOLEAN_PROMPT_TEMPLATE = """**Task:** Determine Schema Relevance for Database Search Queries
|
|
116
|
+
|
|
117
|
+
As an expert in constructing database search queries, you are provided with database schemas detailing tables, columns, and values. Your task is to assess whether these elements can be used to effectively search the database in relation to a given user query.
|
|
118
|
+
|
|
119
|
+
**Instructions:**
|
|
120
|
+
|
|
121
|
+
- **Evaluate the Schema**:
|
|
122
|
+
- Analyze the tables, columns, and values described.
|
|
123
|
+
- Consider their potential usefulness in retrieving information pertinent to the user query.
|
|
124
|
+
|
|
125
|
+
- **Decision Criteria**:
|
|
126
|
+
- Determine if any part of the schema could assist in forming a relevant search query for the information requested.
|
|
127
|
+
|
|
128
|
+
- **Response**:
|
|
129
|
+
- Reply with a single word: 'yes' if the schema components are useful, otherwise 'no'.
|
|
130
|
+
|
|
131
|
+
**Note:** Provide your answer based solely on the relevance of the described schema to the user query."""
|
|
132
|
+
|
|
133
|
+
DEFAULT_GENERATIVE_SYSTEM_PROMPT = """You are an expert database analyst that can assist in building SQL queries by providing structured output. Follow these format instructions precisely to generate a metadata filter given the provided schema description.
|
|
134
|
+
|
|
135
|
+
## Format instructions:
|
|
136
|
+
{format_instructions}
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
DEFAULT_VALUE_PROMPT_TEMPLATE = """
|
|
140
|
+
{column_schema}
|
|
141
|
+
|
|
142
|
+
# **Value Schema**
|
|
143
|
+
{header}
|
|
144
|
+
|
|
145
|
+
- The type of the value: {type}
|
|
146
|
+
|
|
147
|
+
## **Description**
|
|
148
|
+
{description}
|
|
149
|
+
|
|
150
|
+
{value}{comparator}
|
|
151
|
+
|
|
152
|
+
## **Usage**
|
|
153
|
+
{usage}
|
|
154
|
+
|
|
155
|
+
{examples}
|
|
156
|
+
|
|
157
|
+
## **Query**
|
|
158
|
+
{query}
|
|
159
|
+
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
DEFAULT_COLUMN_PROMPT_TEMPLATE = """
|
|
163
|
+
{table_schema}
|
|
164
|
+
|
|
165
|
+
# **Column Schema**
|
|
166
|
+
{header}
|
|
167
|
+
|
|
168
|
+
- The column name in the database table: {column}
|
|
169
|
+
- The type of the values in this column: {type}
|
|
170
|
+
|
|
171
|
+
## **Description**
|
|
172
|
+
{description}
|
|
173
|
+
|
|
174
|
+
## **Usage**
|
|
175
|
+
{usage}
|
|
176
|
+
|
|
177
|
+
{examples}
|
|
114
178
|
|
|
115
|
-
|
|
179
|
+
## **Query**
|
|
180
|
+
{query}
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
DEFAULT_TABLE_PROMPT_TEMPLATE = """# **Table Schema**
|
|
184
|
+
{header}
|
|
185
|
+
|
|
186
|
+
- The name of this table in the database: {table}
|
|
187
|
+
|
|
188
|
+
## **Description**
|
|
189
|
+
{description}
|
|
190
|
+
|
|
191
|
+
## **Usage**
|
|
192
|
+
{usage}
|
|
193
|
+
|
|
194
|
+
## **Column Descriptions**
|
|
195
|
+
Below are descriptions of each column in this table:
|
|
196
|
+
|
|
197
|
+
{columns}
|
|
198
|
+
|
|
199
|
+
{examples}
|
|
200
|
+
|
|
201
|
+
## **Query**
|
|
202
|
+
{query}
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
DEFAULT_SQL_PROMPT_TEMPLATE = """
|
|
116
206
|
Construct a valid {dialect} SQL query to select documents relevant to the user input.
|
|
117
207
|
Source documents are found in the {source_table} table. You may need to join with other tables to get additional document metadata.
|
|
118
208
|
|
|
@@ -165,7 +255,7 @@ Output the {dialect} SQL query that is ready to be executed only WITHOUT ANY DEL
|
|
|
165
255
|
|
|
166
256
|
Here is the user input:
|
|
167
257
|
{input}
|
|
168
|
-
|
|
258
|
+
"""
|
|
169
259
|
|
|
170
260
|
DEFAULT_QUESTION_REFORMULATION_TEMPLATE = """Given the original question and the retrieved context,
|
|
171
261
|
analyze what additional information is needed for a complete, accurate answer.
|
|
@@ -234,7 +324,7 @@ If no additional information is needed, output an empty array [].
|
|
|
234
324
|
|
|
235
325
|
Follow-up Questions:"""
|
|
236
326
|
|
|
237
|
-
DEFAULT_QUERY_RETRY_PROMPT_TEMPLATE =
|
|
327
|
+
DEFAULT_QUERY_RETRY_PROMPT_TEMPLATE = """
|
|
238
328
|
{query}
|
|
239
329
|
|
|
240
330
|
The {dialect} query above failed with the error message: {error}.
|
|
@@ -270,14 +360,19 @@ Rewrite the query so it works.
|
|
|
270
360
|
Output the final SQL query only.
|
|
271
361
|
|
|
272
362
|
SQL Query:
|
|
273
|
-
|
|
363
|
+
"""
|
|
274
364
|
|
|
275
365
|
DEFAULT_NUM_QUERY_RETRIES = 2
|
|
276
366
|
|
|
277
367
|
|
|
278
368
|
class LLMConfig(BaseModel):
|
|
279
|
-
model_name: str = Field(
|
|
280
|
-
|
|
369
|
+
model_name: str = Field(
|
|
370
|
+
default=DEFAULT_LLM_MODEL, description="LLM model to use for generation"
|
|
371
|
+
)
|
|
372
|
+
provider: str = Field(
|
|
373
|
+
default=DEFAULT_LLM_MODEL_PROVIDER,
|
|
374
|
+
description="LLM model provider to use for generation",
|
|
375
|
+
)
|
|
281
376
|
params: Dict[str, Any] = Field(default_factory=dict)
|
|
282
377
|
|
|
283
378
|
|
|
@@ -285,20 +380,18 @@ class MultiVectorRetrieverMode(Enum):
|
|
|
285
380
|
"""
|
|
286
381
|
Enum for MultiVectorRetriever types.
|
|
287
382
|
"""
|
|
383
|
+
|
|
288
384
|
SPLIT = "split"
|
|
289
385
|
SUMMARIZE = "summarize"
|
|
290
386
|
BOTH = "both"
|
|
291
387
|
|
|
292
388
|
|
|
293
389
|
class VectorStoreType(Enum):
|
|
294
|
-
CHROMA =
|
|
295
|
-
PGVECTOR =
|
|
390
|
+
CHROMA = "chromadb"
|
|
391
|
+
PGVECTOR = "pgvector"
|
|
296
392
|
|
|
297
393
|
|
|
298
|
-
vector_store_map = {
|
|
299
|
-
VectorStoreType.CHROMA: Chroma,
|
|
300
|
-
VectorStoreType.PGVECTOR: PGVector
|
|
301
|
-
}
|
|
394
|
+
vector_store_map = {VectorStoreType.CHROMA: Chroma, VectorStoreType.PGVECTOR: PGVector}
|
|
302
395
|
|
|
303
396
|
|
|
304
397
|
class VectorStoreConfig(BaseModel):
|
|
@@ -317,6 +410,7 @@ class VectorStoreConfig(BaseModel):
|
|
|
317
410
|
|
|
318
411
|
class RetrieverType(str, Enum):
|
|
319
412
|
"""Retriever type for RAG pipeline"""
|
|
413
|
+
|
|
320
414
|
VECTOR_STORE = "vector_store"
|
|
321
415
|
AUTO = "auto"
|
|
322
416
|
MULTI = "multi"
|
|
@@ -328,137 +422,298 @@ class SearchType(Enum):
|
|
|
328
422
|
"""
|
|
329
423
|
Enum for vector store search types.
|
|
330
424
|
"""
|
|
425
|
+
|
|
331
426
|
SIMILARITY = "similarity"
|
|
332
427
|
MMR = "mmr"
|
|
333
428
|
SIMILARITY_SCORE_THRESHOLD = "similarity_score_threshold"
|
|
334
429
|
|
|
335
430
|
|
|
336
431
|
class SearchKwargs(BaseModel):
|
|
337
|
-
k: int = Field(
|
|
338
|
-
default=DEFAULT_K,
|
|
339
|
-
description="Amount of documents to return",
|
|
340
|
-
ge=1
|
|
341
|
-
)
|
|
432
|
+
k: int = Field(default=DEFAULT_K, description="Amount of documents to return", ge=1)
|
|
342
433
|
filter: Optional[Dict[str, Any]] = Field(
|
|
343
|
-
default=None,
|
|
344
|
-
description="Filter by document metadata"
|
|
434
|
+
default=None, description="Filter by document metadata"
|
|
345
435
|
)
|
|
346
436
|
# For similarity_score_threshold search type
|
|
347
437
|
score_threshold: Optional[float] = Field(
|
|
348
438
|
default=None,
|
|
349
439
|
description="Minimum relevance threshold for similarity_score_threshold search",
|
|
350
440
|
ge=0.0,
|
|
351
|
-
le=1.0
|
|
441
|
+
le=1.0,
|
|
352
442
|
)
|
|
353
443
|
# For MMR search type
|
|
354
444
|
fetch_k: Optional[int] = Field(
|
|
355
|
-
default=None,
|
|
356
|
-
description="Amount of documents to pass to MMR algorithm",
|
|
357
|
-
ge=1
|
|
445
|
+
default=None, description="Amount of documents to pass to MMR algorithm", ge=1
|
|
358
446
|
)
|
|
359
447
|
lambda_mult: Optional[float] = Field(
|
|
360
448
|
default=None,
|
|
361
449
|
description="Diversity of results returned by MMR (1=min diversity, 0=max)",
|
|
362
450
|
ge=0.0,
|
|
363
|
-
le=1.0
|
|
451
|
+
le=1.0,
|
|
364
452
|
)
|
|
365
453
|
|
|
366
454
|
def model_dump(self, *args, **kwargs):
|
|
367
455
|
# Override model_dump to exclude None values by default
|
|
368
|
-
kwargs[
|
|
456
|
+
kwargs["exclude_none"] = True
|
|
369
457
|
return super().model_dump(*args, **kwargs)
|
|
370
458
|
|
|
371
459
|
|
|
372
|
-
class
|
|
373
|
-
|
|
374
|
-
|
|
460
|
+
class LLMExample(BaseModel):
|
|
461
|
+
input: str = Field(description="User input for the example")
|
|
462
|
+
output: str = Field(
|
|
463
|
+
description="What the LLM should generate for this example's input"
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
class ValueSchema(BaseModel):
|
|
468
|
+
value: Union[
|
|
469
|
+
Union[str, int, float],
|
|
470
|
+
Dict[Union[str, int, float], str],
|
|
471
|
+
List[Union[str, int, float]],
|
|
472
|
+
] = Field(
|
|
473
|
+
description="One of the following. The value as it exists in the table column. A dict of {table_value: descriptive value, ...}, where table_value is the value in the table. A list of sample values taken from the column."
|
|
474
|
+
)
|
|
475
|
+
comparator: Optional[Union[str, List[str]]] = Field(
|
|
476
|
+
description="The posgtres sql operators used to compare two values. For example: `>`, `<`, `=`, or `%`.",
|
|
477
|
+
default="=",
|
|
375
478
|
)
|
|
376
479
|
type: str = Field(
|
|
377
|
-
description="
|
|
480
|
+
description="A valid postgres type for this value. One of: int, string, float, or bool. When numbers appear they should be of type int or float."
|
|
481
|
+
)
|
|
482
|
+
description: str = Field(description="Description of what the value represents.")
|
|
483
|
+
usage: str = Field(description="How and when to use this value for search.")
|
|
484
|
+
example_questions: Optional[List[LLMExample]] = Field(
|
|
485
|
+
default=None, description="Example questions where this value is set."
|
|
486
|
+
)
|
|
487
|
+
filter_threshold: Optional[float] = Field(
|
|
488
|
+
default=0.0,
|
|
489
|
+
description="Minimum relevance threshold to include metadata filters from this column.",
|
|
490
|
+
exclude=True,
|
|
378
491
|
)
|
|
379
|
-
|
|
380
|
-
|
|
492
|
+
priority: Optional[int] = Field(
|
|
493
|
+
default=0,
|
|
494
|
+
description="Priority level for this column, lower numbers will be processed first.",
|
|
381
495
|
)
|
|
382
|
-
|
|
496
|
+
relevance: Optional[float] = Field(
|
|
383
497
|
default=None,
|
|
384
|
-
description="
|
|
498
|
+
description="Relevance computed during search. Should not be set by the end user.",
|
|
499
|
+
exclude=True,
|
|
385
500
|
)
|
|
386
501
|
|
|
387
502
|
|
|
388
|
-
class
|
|
503
|
+
class MetadataConfig(BaseModel):
|
|
504
|
+
"""Class to configure metadata for retrieval. Only supports very basic document name lookup at the moment."""
|
|
389
505
|
table: str = Field(
|
|
390
|
-
description="
|
|
506
|
+
description="Source table for metadata."
|
|
507
|
+
)
|
|
508
|
+
max_document_context: int = Field(
|
|
509
|
+
# To work well with models with context window of 32768.
|
|
510
|
+
default=16384,
|
|
511
|
+
description="Truncate a document before using as context with an LLM if it exceeds this amount of tokens"
|
|
512
|
+
)
|
|
513
|
+
embeddings_table: str = Field(
|
|
514
|
+
default="embeddings",
|
|
515
|
+
description="Source table for embeddings"
|
|
516
|
+
)
|
|
517
|
+
id_column: str = Field(
|
|
518
|
+
default="Id",
|
|
519
|
+
description="Name of ID column in metadata table"
|
|
520
|
+
)
|
|
521
|
+
name_column: str = Field(
|
|
522
|
+
default="Title",
|
|
523
|
+
description="Name of column containing name or title of document"
|
|
391
524
|
)
|
|
392
|
-
|
|
393
|
-
|
|
525
|
+
name_column_index: Optional[str] = Field(
|
|
526
|
+
default=None,
|
|
527
|
+
description="Name of GIN index to use when looking up name."
|
|
528
|
+
)
|
|
529
|
+
content_column: str = Field(
|
|
530
|
+
default="content",
|
|
531
|
+
description="Name of column in embeddings table containing chunk content"
|
|
532
|
+
)
|
|
533
|
+
embeddings_metadata_column: str = Field(
|
|
534
|
+
default="metadata",
|
|
535
|
+
description="Name of column in embeddings table containing chunk metadata"
|
|
536
|
+
)
|
|
537
|
+
doc_id_key: str = Field(
|
|
538
|
+
default="original_row_id",
|
|
539
|
+
description="Metadata field that links an embedded chunk back to source document ID"
|
|
394
540
|
)
|
|
395
|
-
|
|
396
|
-
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
class ColumnSchema(BaseModel):
|
|
544
|
+
column: str = Field(description="Name of the column in the database")
|
|
545
|
+
type: str = Field(description="Type of the column (e.g. int, string, datetime)")
|
|
546
|
+
description: str = Field(description="Description of what the column represents")
|
|
547
|
+
usage: str = Field(description="How and when to use this Table for search.")
|
|
548
|
+
values: Optional[
|
|
549
|
+
Union[
|
|
550
|
+
OrderedDict[Union[str, int, float], ValueSchema],
|
|
551
|
+
Dict[Union[str, int, float], ValueSchema],
|
|
552
|
+
]
|
|
553
|
+
] = Field(
|
|
554
|
+
description="One of the following. A dict or ordered dict of {schema_value: ValueSchema, ...}, where schema value is the name given for this value description in the schema."
|
|
555
|
+
)
|
|
556
|
+
example_questions: Optional[List[LLMExample]] = Field(
|
|
557
|
+
default=None, description="Example questions where this table is useful."
|
|
558
|
+
)
|
|
559
|
+
max_filters: Optional[int] = Field(
|
|
560
|
+
default=1, description="Maximum number of filters to generate for this column."
|
|
561
|
+
)
|
|
562
|
+
filter_threshold: Optional[float] = Field(
|
|
563
|
+
default=0.0,
|
|
564
|
+
description="Minimum relevance threshold to include metadata filters from this column.",
|
|
565
|
+
)
|
|
566
|
+
priority: Optional[int] = Field(
|
|
567
|
+
default=1,
|
|
568
|
+
description="Priority level for this column, lower numbers will be processed first.",
|
|
569
|
+
)
|
|
570
|
+
relevance: Optional[float] = Field(
|
|
571
|
+
default=None,
|
|
572
|
+
description="Relevance computed during search. Should not be set by the end user.",
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
class TableSchema(BaseModel):
|
|
577
|
+
table: str = Field(description="Name of table in the database")
|
|
578
|
+
description: str = Field(description="Description of what the table represents")
|
|
579
|
+
usage: str = Field(description="How and when to use this Table for search.")
|
|
580
|
+
columns: Optional[
|
|
581
|
+
Union[OrderedDict[str, ColumnSchema], Dict[str, ColumnSchema]]
|
|
582
|
+
] = Field(
|
|
583
|
+
description="Dict or Ordered Dict of {column_name: ColumnSchemas} describing the metadata columns available for the table"
|
|
584
|
+
)
|
|
585
|
+
example_questions: Optional[List[LLMExample]] = Field(
|
|
586
|
+
default=None, description="Example questions where this table is useful."
|
|
397
587
|
)
|
|
398
588
|
join: str = Field(
|
|
399
589
|
description="SQL join string to join this table with source documents table",
|
|
400
|
-
default=
|
|
590
|
+
default="",
|
|
591
|
+
)
|
|
592
|
+
max_filters: Optional[int] = Field(
|
|
593
|
+
default=1, description="Maximum number of filters to generate for this table."
|
|
594
|
+
)
|
|
595
|
+
filter_threshold: Optional[float] = Field(
|
|
596
|
+
default=0.0,
|
|
597
|
+
description="Minimum relevance required to use this table to generate filters.",
|
|
598
|
+
)
|
|
599
|
+
priority: Optional[int] = Field(
|
|
600
|
+
default=1,
|
|
601
|
+
description="Priority level for this table, lower numbers will be processed first.",
|
|
602
|
+
)
|
|
603
|
+
relevance: Optional[float] = Field(
|
|
604
|
+
default=None,
|
|
605
|
+
description="Relevance computed during search. Should not be set by the end user.",
|
|
401
606
|
)
|
|
402
|
-
|
|
403
|
-
class Config:
|
|
404
|
-
frozen = True
|
|
405
607
|
|
|
406
608
|
|
|
407
|
-
class
|
|
408
|
-
|
|
409
|
-
|
|
609
|
+
class DatabaseSchema(BaseModel):
|
|
610
|
+
database: str = Field(description="Name of database in the Database")
|
|
611
|
+
description: str = Field(description="Description of what the Database represents")
|
|
612
|
+
usage: str = Field(description="How and when to use this Database for search.")
|
|
613
|
+
tables: Union[OrderedDict[str, TableSchema], Dict[str, TableSchema]] = Field(
|
|
614
|
+
description="Dict of {column_name: ColumnSchemas} describing the metadata columns available for the table"
|
|
410
615
|
)
|
|
411
|
-
|
|
412
|
-
description="
|
|
616
|
+
example_questions: Optional[List[LLMExample]] = Field(
|
|
617
|
+
default=None, description="Example questions where this Database is useful."
|
|
618
|
+
)
|
|
619
|
+
max_filters: Optional[int] = Field(
|
|
620
|
+
default=1,
|
|
621
|
+
description="Maximum number of filters to generate for this Database.",
|
|
622
|
+
)
|
|
623
|
+
filter_threshold: Optional[float] = Field(
|
|
624
|
+
default=0.0,
|
|
625
|
+
description="Minimum relevance required to use this Database to generate filters.",
|
|
626
|
+
)
|
|
627
|
+
priority: Optional[int] = Field(
|
|
628
|
+
default=0,
|
|
629
|
+
description="Priority level for this Database, lower numbers will be processed first.",
|
|
630
|
+
)
|
|
631
|
+
relevance: Optional[float] = Field(
|
|
632
|
+
default=None,
|
|
633
|
+
description="Relevance computed during search. Should not be set by the end user.",
|
|
413
634
|
)
|
|
414
635
|
|
|
415
636
|
|
|
416
637
|
class SQLRetrieverConfig(BaseModel):
|
|
417
638
|
llm_config: LLMConfig = Field(
|
|
418
639
|
default_factory=LLMConfig,
|
|
419
|
-
description="LLM configuration to use for generating the final SQL query for retrieval"
|
|
640
|
+
description="LLM configuration to use for generating the final SQL query for retrieval",
|
|
420
641
|
)
|
|
421
642
|
metadata_filters_prompt_template: str = Field(
|
|
422
643
|
default=DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE,
|
|
423
|
-
description="Prompt template to generate PostgreSQL metadata filters. Has 'format_instructions', 'schema', 'examples', and 'input' input variables"
|
|
644
|
+
description="Prompt template to generate PostgreSQL metadata filters. Has 'format_instructions', 'schema', 'examples', and 'input' input variables",
|
|
424
645
|
)
|
|
425
646
|
num_retries: int = Field(
|
|
426
647
|
default=DEFAULT_NUM_QUERY_RETRIES,
|
|
427
|
-
description="How many times for an LLM to try rewriting a failed SQL query before using the fallback retriever."
|
|
648
|
+
description="How many times for an LLM to try rewriting a failed SQL query before using the fallback retriever.",
|
|
428
649
|
)
|
|
429
650
|
rewrite_prompt_template: str = Field(
|
|
430
651
|
default=DEFAULT_SEMANTIC_PROMPT_TEMPLATE,
|
|
431
|
-
description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable."
|
|
652
|
+
description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
|
|
653
|
+
)
|
|
654
|
+
table_prompt_template: str = Field(
|
|
655
|
+
default=DEFAULT_TABLE_PROMPT_TEMPLATE,
|
|
656
|
+
description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
|
|
657
|
+
)
|
|
658
|
+
column_prompt_template: str = Field(
|
|
659
|
+
default=DEFAULT_COLUMN_PROMPT_TEMPLATE,
|
|
660
|
+
description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
|
|
661
|
+
)
|
|
662
|
+
value_prompt_template: str = Field(
|
|
663
|
+
default=DEFAULT_VALUE_PROMPT_TEMPLATE,
|
|
664
|
+
description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
|
|
665
|
+
)
|
|
666
|
+
boolean_system_prompt: str = Field(
|
|
667
|
+
default=DEFAULT_BOOLEAN_PROMPT_TEMPLATE,
|
|
668
|
+
description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
|
|
669
|
+
)
|
|
670
|
+
generative_system_prompt: str = Field(
|
|
671
|
+
default=DEFAULT_GENERATIVE_SYSTEM_PROMPT,
|
|
672
|
+
description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
|
|
432
673
|
)
|
|
433
674
|
source_table: str = Field(
|
|
434
675
|
description="Name of the source table containing the original documents that were embedded"
|
|
435
676
|
)
|
|
436
|
-
|
|
677
|
+
source_id_column: str = Field(
|
|
678
|
+
description="Name of the column containing the UUID.", default="Id"
|
|
679
|
+
)
|
|
680
|
+
max_filters: Optional[int] = Field(
|
|
681
|
+
description="Maximum number of filters to generate for sql queries.", default=10
|
|
682
|
+
)
|
|
683
|
+
filter_threshold: Optional[float] = Field(
|
|
684
|
+
description="Minimum relevance required to use this Database to generate filters.",
|
|
685
|
+
default=0.0,
|
|
686
|
+
)
|
|
687
|
+
min_k: Optional[int] = Field(
|
|
688
|
+
description="Minimum number of documents accepted from a generated sql query.",
|
|
689
|
+
default=10,
|
|
690
|
+
)
|
|
691
|
+
database_schema: Optional[DatabaseSchema] = Field(
|
|
437
692
|
default=None,
|
|
438
|
-
description="
|
|
693
|
+
description="DatabaseSchema describing the database.",
|
|
439
694
|
)
|
|
440
695
|
examples: Optional[List[LLMExample]] = Field(
|
|
441
696
|
default=None,
|
|
442
|
-
description="Optional examples of final generated pgvector queries based on user input."
|
|
697
|
+
description="Optional examples of final generated pgvector queries based on user input.",
|
|
443
698
|
)
|
|
444
699
|
|
|
445
700
|
|
|
446
701
|
class SummarizationConfig(BaseModel):
|
|
447
702
|
llm_config: LLMConfig = Field(
|
|
448
703
|
default_factory=LLMConfig,
|
|
449
|
-
description="LLM configuration to use for summarization"
|
|
704
|
+
description="LLM configuration to use for summarization",
|
|
450
705
|
)
|
|
451
706
|
map_prompt_template: str = Field(
|
|
452
707
|
default=DEFAULT_MAP_PROMPT_TEMPLATE,
|
|
453
|
-
description="Prompt for an LLM to summarize a single document"
|
|
708
|
+
description="Prompt for an LLM to summarize a single document",
|
|
454
709
|
)
|
|
455
710
|
reduce_prompt_template: str = Field(
|
|
456
711
|
default=DEFAULT_REDUCE_PROMPT_TEMPLATE,
|
|
457
|
-
description="Prompt for an LLM to summarize a set of summaries of documents into one"
|
|
712
|
+
description="Prompt for an LLM to summarize a set of summaries of documents into one",
|
|
458
713
|
)
|
|
459
714
|
max_summarization_tokens: int = Field(
|
|
460
715
|
default=DEFAULT_MAX_SUMMARIZATION_TOKENS,
|
|
461
|
-
description="Max number of tokens for summarized documents"
|
|
716
|
+
description="Max number of tokens for summarized documents",
|
|
462
717
|
)
|
|
463
718
|
|
|
464
719
|
|
|
@@ -476,154 +731,122 @@ class RerankerConfig(BaseModel):
|
|
|
476
731
|
|
|
477
732
|
class MultiHopRetrieverConfig(BaseModel):
|
|
478
733
|
"""Configuration for multi-hop retrieval"""
|
|
734
|
+
|
|
479
735
|
base_retriever_type: RetrieverType = Field(
|
|
480
736
|
default=RetrieverType.VECTOR_STORE,
|
|
481
|
-
description="Type of base retriever to use for multi-hop retrieval"
|
|
737
|
+
description="Type of base retriever to use for multi-hop retrieval",
|
|
482
738
|
)
|
|
483
739
|
max_hops: int = Field(
|
|
484
|
-
default=3,
|
|
485
|
-
description="Maximum number of follow-up questions to generate",
|
|
486
|
-
ge=1
|
|
740
|
+
default=3, description="Maximum number of follow-up questions to generate", ge=1
|
|
487
741
|
)
|
|
488
742
|
reformulation_template: str = Field(
|
|
489
743
|
default=DEFAULT_QUESTION_REFORMULATION_TEMPLATE,
|
|
490
|
-
description="Template for reformulating questions"
|
|
744
|
+
description="Template for reformulating questions",
|
|
491
745
|
)
|
|
492
746
|
llm_config: LLMConfig = Field(
|
|
493
747
|
default_factory=LLMConfig,
|
|
494
|
-
description="LLM configuration to use for generating follow-up questions"
|
|
748
|
+
description="LLM configuration to use for generating follow-up questions",
|
|
495
749
|
)
|
|
496
750
|
|
|
497
751
|
|
|
498
752
|
class RAGPipelineModel(BaseModel):
|
|
499
753
|
documents: Optional[List[Document]] = Field(
|
|
500
|
-
default=None,
|
|
501
|
-
description="List of documents"
|
|
754
|
+
default=None, description="List of documents"
|
|
502
755
|
)
|
|
503
756
|
|
|
504
757
|
vector_store_config: VectorStoreConfig = Field(
|
|
505
|
-
default_factory=VectorStoreConfig,
|
|
506
|
-
description="Vector store configuration"
|
|
758
|
+
default_factory=VectorStoreConfig, description="Vector store configuration"
|
|
507
759
|
)
|
|
508
760
|
|
|
509
|
-
llm: Optional[BaseChatModel] = Field(
|
|
510
|
-
default=None,
|
|
511
|
-
description="Language model"
|
|
512
|
-
)
|
|
761
|
+
llm: Optional[BaseChatModel] = Field(default=None, description="Language model")
|
|
513
762
|
llm_model_name: str = Field(
|
|
514
|
-
default=DEFAULT_LLM_MODEL,
|
|
515
|
-
description="Language model name"
|
|
763
|
+
default=DEFAULT_LLM_MODEL, description="Language model name"
|
|
516
764
|
)
|
|
517
765
|
llm_provider: Optional[str] = Field(
|
|
518
|
-
default=None,
|
|
519
|
-
description="Language model provider"
|
|
766
|
+
default=None, description="Language model provider"
|
|
520
767
|
)
|
|
521
|
-
|
|
522
768
|
vector_store: VectorStore = Field(
|
|
523
769
|
default_factory=lambda: vector_store_map[VectorStoreConfig().vector_store_type],
|
|
524
|
-
description="Vector store"
|
|
770
|
+
description="Vector store",
|
|
525
771
|
)
|
|
526
772
|
db_connection_string: Optional[str] = Field(
|
|
527
|
-
default=None,
|
|
528
|
-
description="Database connection string"
|
|
773
|
+
default=None, description="Database connection string"
|
|
529
774
|
)
|
|
530
|
-
|
|
531
|
-
default=
|
|
532
|
-
description="
|
|
775
|
+
metadata_config: Optional[MetadataConfig] = Field(
|
|
776
|
+
default=None,
|
|
777
|
+
description="Configuration for metadata to be used for retrieval"
|
|
533
778
|
)
|
|
779
|
+
table_name: str = Field(default=DEFAULT_TEST_TABLE_NAME, description="Table name")
|
|
534
780
|
embedding_model: Optional[Embeddings] = Field(
|
|
535
|
-
default=None,
|
|
536
|
-
description="Embedding model"
|
|
781
|
+
default=None, description="Embedding model"
|
|
537
782
|
)
|
|
538
783
|
rag_prompt_template: str = Field(
|
|
539
|
-
default=DEFAULT_RAG_PROMPT_TEMPLATE,
|
|
540
|
-
description="RAG prompt template"
|
|
784
|
+
default=DEFAULT_RAG_PROMPT_TEMPLATE, description="RAG prompt template"
|
|
541
785
|
)
|
|
542
786
|
retriever_prompt_template: Optional[Union[str, dict]] = Field(
|
|
543
|
-
default=None,
|
|
544
|
-
description="Retriever prompt template"
|
|
787
|
+
default=None, description="Retriever prompt template"
|
|
545
788
|
)
|
|
546
789
|
retriever_type: RetrieverType = Field(
|
|
547
|
-
default=RetrieverType.VECTOR_STORE,
|
|
548
|
-
description="Retriever type"
|
|
790
|
+
default=RetrieverType.VECTOR_STORE, description="Retriever type"
|
|
549
791
|
)
|
|
550
792
|
search_type: SearchType = Field(
|
|
551
|
-
default=SearchType.SIMILARITY,
|
|
552
|
-
description="Type of search to perform"
|
|
793
|
+
default=SearchType.SIMILARITY, description="Type of search to perform"
|
|
553
794
|
)
|
|
554
795
|
search_kwargs: SearchKwargs = Field(
|
|
555
796
|
default_factory=SearchKwargs,
|
|
556
|
-
description="Search configuration for the retriever"
|
|
797
|
+
description="Search configuration for the retriever",
|
|
557
798
|
)
|
|
558
799
|
summarization_config: Optional[SummarizationConfig] = Field(
|
|
559
800
|
default=None,
|
|
560
|
-
description="Configuration for summarizing retrieved documents as context"
|
|
801
|
+
description="Configuration for summarizing retrieved documents as context",
|
|
561
802
|
)
|
|
562
803
|
# SQL retriever specific.
|
|
563
804
|
sql_retriever_config: Optional[SQLRetrieverConfig] = Field(
|
|
564
805
|
default=None,
|
|
565
|
-
description="Configuration for retrieving documents by generating SQL to filter by metadata & order by distance function"
|
|
806
|
+
description="Configuration for retrieving documents by generating SQL to filter by metadata & order by distance function",
|
|
566
807
|
)
|
|
567
808
|
|
|
568
809
|
# Multi retriever specific
|
|
569
810
|
multi_retriever_mode: MultiVectorRetrieverMode = Field(
|
|
570
|
-
default=MultiVectorRetrieverMode.BOTH,
|
|
571
|
-
description="Multi retriever mode"
|
|
811
|
+
default=MultiVectorRetrieverMode.BOTH, description="Multi retriever mode"
|
|
572
812
|
)
|
|
573
813
|
max_concurrency: int = Field(
|
|
574
|
-
default=DEFAULT_MAX_CONCURRENCY,
|
|
575
|
-
description="Maximum concurrency"
|
|
576
|
-
)
|
|
577
|
-
id_key: int = Field(
|
|
578
|
-
default=DEFAULT_ID_KEY,
|
|
579
|
-
description="ID key"
|
|
580
|
-
)
|
|
581
|
-
parent_store: Optional[BaseStore] = Field(
|
|
582
|
-
default=None,
|
|
583
|
-
description="Parent store"
|
|
814
|
+
default=DEFAULT_MAX_CONCURRENCY, description="Maximum concurrency"
|
|
584
815
|
)
|
|
816
|
+
id_key: int = Field(default=DEFAULT_ID_KEY, description="ID key")
|
|
817
|
+
parent_store: Optional[BaseStore] = Field(default=None, description="Parent store")
|
|
585
818
|
text_splitter: Optional[TextSplitter] = Field(
|
|
586
|
-
default=None,
|
|
587
|
-
description="Text splitter"
|
|
588
|
-
)
|
|
589
|
-
chunk_size: int = Field(
|
|
590
|
-
default=DEFAULT_CHUNK_SIZE,
|
|
591
|
-
description="Chunk size"
|
|
819
|
+
default=None, description="Text splitter"
|
|
592
820
|
)
|
|
821
|
+
chunk_size: int = Field(default=DEFAULT_CHUNK_SIZE, description="Chunk size")
|
|
593
822
|
chunk_overlap: int = Field(
|
|
594
|
-
default=DEFAULT_CHUNK_OVERLAP,
|
|
595
|
-
description="Chunk overlap"
|
|
823
|
+
default=DEFAULT_CHUNK_OVERLAP, description="Chunk overlap"
|
|
596
824
|
)
|
|
597
825
|
|
|
598
826
|
# Auto retriever specific
|
|
599
827
|
auto_retriever_filter_columns: Optional[List[str]] = Field(
|
|
600
|
-
default=None,
|
|
601
|
-
description="Filter columns"
|
|
828
|
+
default=None, description="Filter columns"
|
|
602
829
|
)
|
|
603
830
|
cardinality_threshold: int = Field(
|
|
604
|
-
default=DEFAULT_CARDINALITY_THRESHOLD,
|
|
605
|
-
description="Cardinality threshold"
|
|
831
|
+
default=DEFAULT_CARDINALITY_THRESHOLD, description="Cardinality threshold"
|
|
606
832
|
)
|
|
607
833
|
content_column_name: str = Field(
|
|
608
834
|
default=DEFAULT_CONTENT_COLUMN_NAME,
|
|
609
|
-
description="Content column name (the column we will get embeddings)"
|
|
835
|
+
description="Content column name (the column we will get embeddings)",
|
|
610
836
|
)
|
|
611
837
|
dataset_description: str = Field(
|
|
612
|
-
default=DEFAULT_DATASET_DESCRIPTION,
|
|
613
|
-
description="Description of the dataset"
|
|
838
|
+
default=DEFAULT_DATASET_DESCRIPTION, description="Description of the dataset"
|
|
614
839
|
)
|
|
615
840
|
reranker: bool = Field(
|
|
616
|
-
default=DEFAULT_RERANKER_FLAG,
|
|
617
|
-
description="Whether to use reranker"
|
|
841
|
+
default=DEFAULT_RERANKER_FLAG, description="Whether to use reranker"
|
|
618
842
|
)
|
|
619
843
|
reranker_config: RerankerConfig = Field(
|
|
620
|
-
default_factory=RerankerConfig,
|
|
621
|
-
description="Reranker configuration"
|
|
844
|
+
default_factory=RerankerConfig, description="Reranker configuration"
|
|
622
845
|
)
|
|
623
846
|
|
|
624
847
|
multi_hop_config: Optional[MultiHopRetrieverConfig] = Field(
|
|
625
848
|
default=None,
|
|
626
|
-
description="Configuration for multi-hop retrieval. Required when retriever_type is MULTI_HOP."
|
|
849
|
+
description="Configuration for multi-hop retrieval. Required when retriever_type is MULTI_HOP.",
|
|
627
850
|
)
|
|
628
851
|
|
|
629
852
|
@field_validator("multi_hop_config")
|
|
@@ -632,7 +855,9 @@ class RAGPipelineModel(BaseModel):
|
|
|
632
855
|
"""Validate that multi_hop_config is set when using multi-hop retrieval."""
|
|
633
856
|
values = info.data
|
|
634
857
|
if values.get("retriever_type") == RetrieverType.MULTI_HOP and v is None:
|
|
635
|
-
raise ValueError(
|
|
858
|
+
raise ValueError(
|
|
859
|
+
"multi_hop_config must be set when using multi-hop retrieval"
|
|
860
|
+
)
|
|
636
861
|
return v
|
|
637
862
|
|
|
638
863
|
class Config:
|
|
@@ -651,10 +876,10 @@ class RAGPipelineModel(BaseModel):
|
|
|
651
876
|
def get_field_names(cls):
|
|
652
877
|
return list(cls.model_fields.keys())
|
|
653
878
|
|
|
654
|
-
@field_validator(
|
|
879
|
+
@field_validator("search_kwargs")
|
|
655
880
|
@classmethod
|
|
656
881
|
def validate_search_kwargs(cls, v: SearchKwargs, info) -> SearchKwargs:
|
|
657
|
-
search_type = info.data.get(
|
|
882
|
+
search_type = info.data.get("search_type", SearchType.SIMILARITY)
|
|
658
883
|
|
|
659
884
|
# Validate MMR-specific parameters
|
|
660
885
|
if search_type == SearchType.MMR:
|
|
@@ -663,9 +888,13 @@ class RAGPipelineModel(BaseModel):
|
|
|
663
888
|
if v.lambda_mult is not None and (v.lambda_mult < 0 or v.lambda_mult > 1):
|
|
664
889
|
raise ValueError("lambda_mult must be between 0 and 1")
|
|
665
890
|
if v.fetch_k is None and v.lambda_mult is not None:
|
|
666
|
-
raise ValueError(
|
|
891
|
+
raise ValueError(
|
|
892
|
+
"fetch_k is required when using lambda_mult with MMR search type"
|
|
893
|
+
)
|
|
667
894
|
if v.lambda_mult is None and v.fetch_k is not None:
|
|
668
|
-
raise ValueError(
|
|
895
|
+
raise ValueError(
|
|
896
|
+
"lambda_mult is required when using fetch_k with MMR search type"
|
|
897
|
+
)
|
|
669
898
|
elif search_type != SearchType.MMR:
|
|
670
899
|
if v.fetch_k is not None:
|
|
671
900
|
raise ValueError("fetch_k is only valid for MMR search type")
|
|
@@ -674,11 +903,20 @@ class RAGPipelineModel(BaseModel):
|
|
|
674
903
|
|
|
675
904
|
# Validate similarity_score_threshold parameters
|
|
676
905
|
if search_type == SearchType.SIMILARITY_SCORE_THRESHOLD:
|
|
677
|
-
if v.score_threshold is not None and (
|
|
906
|
+
if v.score_threshold is not None and (
|
|
907
|
+
v.score_threshold < 0 or v.score_threshold > 1
|
|
908
|
+
):
|
|
678
909
|
raise ValueError("score_threshold must be between 0 and 1")
|
|
679
910
|
if v.score_threshold is None:
|
|
680
|
-
raise ValueError(
|
|
681
|
-
|
|
682
|
-
|
|
911
|
+
raise ValueError(
|
|
912
|
+
"score_threshold is required for similarity_score_threshold search type"
|
|
913
|
+
)
|
|
914
|
+
elif (
|
|
915
|
+
search_type != SearchType.SIMILARITY_SCORE_THRESHOLD
|
|
916
|
+
and v.score_threshold is not None
|
|
917
|
+
):
|
|
918
|
+
raise ValueError(
|
|
919
|
+
"score_threshold is only valid for similarity_score_threshold search type"
|
|
920
|
+
)
|
|
683
921
|
|
|
684
922
|
return v
|