ai-parrot 0.8.3__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ai-parrot might be problematic. Click here for more details.

Files changed (128) hide show
  1. ai_parrot-0.8.3.dist-info/LICENSE +21 -0
  2. ai_parrot-0.8.3.dist-info/METADATA +306 -0
  3. ai_parrot-0.8.3.dist-info/RECORD +128 -0
  4. ai_parrot-0.8.3.dist-info/WHEEL +6 -0
  5. ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
  6. parrot/__init__.py +30 -0
  7. parrot/bots/__init__.py +5 -0
  8. parrot/bots/abstract.py +1115 -0
  9. parrot/bots/agent.py +492 -0
  10. parrot/bots/basic.py +9 -0
  11. parrot/bots/bose.py +17 -0
  12. parrot/bots/chatbot.py +271 -0
  13. parrot/bots/cody.py +17 -0
  14. parrot/bots/copilot.py +117 -0
  15. parrot/bots/data.py +730 -0
  16. parrot/bots/dataframe.py +103 -0
  17. parrot/bots/hrbot.py +15 -0
  18. parrot/bots/interfaces/__init__.py +1 -0
  19. parrot/bots/interfaces/retrievers.py +12 -0
  20. parrot/bots/notebook.py +619 -0
  21. parrot/bots/odoo.py +17 -0
  22. parrot/bots/prompts/__init__.py +41 -0
  23. parrot/bots/prompts/agents.py +91 -0
  24. parrot/bots/prompts/data.py +214 -0
  25. parrot/bots/retrievals/__init__.py +1 -0
  26. parrot/bots/retrievals/constitutional.py +19 -0
  27. parrot/bots/retrievals/multi.py +122 -0
  28. parrot/bots/retrievals/retrieval.py +610 -0
  29. parrot/bots/tools/__init__.py +7 -0
  30. parrot/bots/tools/eda.py +325 -0
  31. parrot/bots/tools/pdf.py +50 -0
  32. parrot/bots/tools/plot.py +48 -0
  33. parrot/bots/troc.py +16 -0
  34. parrot/conf.py +170 -0
  35. parrot/crew/__init__.py +3 -0
  36. parrot/crew/tools/__init__.py +22 -0
  37. parrot/crew/tools/bing.py +13 -0
  38. parrot/crew/tools/config.py +43 -0
  39. parrot/crew/tools/duckgo.py +62 -0
  40. parrot/crew/tools/file.py +24 -0
  41. parrot/crew/tools/google.py +168 -0
  42. parrot/crew/tools/gtrends.py +16 -0
  43. parrot/crew/tools/md2pdf.py +25 -0
  44. parrot/crew/tools/rag.py +42 -0
  45. parrot/crew/tools/search.py +32 -0
  46. parrot/crew/tools/url.py +21 -0
  47. parrot/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
  48. parrot/handlers/__init__.py +4 -0
  49. parrot/handlers/agents.py +292 -0
  50. parrot/handlers/bots.py +196 -0
  51. parrot/handlers/chat.py +192 -0
  52. parrot/interfaces/__init__.py +6 -0
  53. parrot/interfaces/database.py +27 -0
  54. parrot/interfaces/http.py +805 -0
  55. parrot/interfaces/images/__init__.py +0 -0
  56. parrot/interfaces/images/plugins/__init__.py +18 -0
  57. parrot/interfaces/images/plugins/abstract.py +58 -0
  58. parrot/interfaces/images/plugins/exif.py +709 -0
  59. parrot/interfaces/images/plugins/hash.py +52 -0
  60. parrot/interfaces/images/plugins/vision.py +104 -0
  61. parrot/interfaces/images/plugins/yolo.py +66 -0
  62. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  63. parrot/llms/__init__.py +1 -0
  64. parrot/llms/abstract.py +69 -0
  65. parrot/llms/anthropic.py +58 -0
  66. parrot/llms/gemma.py +15 -0
  67. parrot/llms/google.py +44 -0
  68. parrot/llms/groq.py +67 -0
  69. parrot/llms/hf.py +45 -0
  70. parrot/llms/openai.py +61 -0
  71. parrot/llms/pipes.py +114 -0
  72. parrot/llms/vertex.py +89 -0
  73. parrot/loaders/__init__.py +9 -0
  74. parrot/loaders/abstract.py +628 -0
  75. parrot/loaders/files/__init__.py +0 -0
  76. parrot/loaders/files/abstract.py +39 -0
  77. parrot/loaders/files/text.py +63 -0
  78. parrot/loaders/txt.py +26 -0
  79. parrot/manager.py +333 -0
  80. parrot/models.py +504 -0
  81. parrot/py.typed +0 -0
  82. parrot/stores/__init__.py +11 -0
  83. parrot/stores/abstract.py +248 -0
  84. parrot/stores/chroma.py +188 -0
  85. parrot/stores/duck.py +162 -0
  86. parrot/stores/embeddings/__init__.py +10 -0
  87. parrot/stores/embeddings/abstract.py +46 -0
  88. parrot/stores/embeddings/base.py +52 -0
  89. parrot/stores/embeddings/bge.py +20 -0
  90. parrot/stores/embeddings/fastembed.py +17 -0
  91. parrot/stores/embeddings/google.py +18 -0
  92. parrot/stores/embeddings/huggingface.py +20 -0
  93. parrot/stores/embeddings/ollama.py +14 -0
  94. parrot/stores/embeddings/openai.py +26 -0
  95. parrot/stores/embeddings/transformers.py +21 -0
  96. parrot/stores/embeddings/vertexai.py +17 -0
  97. parrot/stores/empty.py +10 -0
  98. parrot/stores/faiss.py +160 -0
  99. parrot/stores/milvus.py +397 -0
  100. parrot/stores/postgres.py +653 -0
  101. parrot/stores/qdrant.py +170 -0
  102. parrot/tools/__init__.py +23 -0
  103. parrot/tools/abstract.py +68 -0
  104. parrot/tools/asknews.py +33 -0
  105. parrot/tools/basic.py +51 -0
  106. parrot/tools/bby.py +359 -0
  107. parrot/tools/bing.py +13 -0
  108. parrot/tools/docx.py +343 -0
  109. parrot/tools/duck.py +62 -0
  110. parrot/tools/execute.py +56 -0
  111. parrot/tools/gamma.py +28 -0
  112. parrot/tools/google.py +170 -0
  113. parrot/tools/gvoice.py +301 -0
  114. parrot/tools/results.py +278 -0
  115. parrot/tools/stack.py +27 -0
  116. parrot/tools/weather.py +70 -0
  117. parrot/tools/wikipedia.py +58 -0
  118. parrot/tools/zipcode.py +198 -0
  119. parrot/utils/__init__.py +2 -0
  120. parrot/utils/parsers/__init__.py +5 -0
  121. parrot/utils/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
  122. parrot/utils/toml.py +11 -0
  123. parrot/utils/types.cpython-312-x86_64-linux-gnu.so +0 -0
  124. parrot/utils/uv.py +11 -0
  125. parrot/version.py +10 -0
  126. resources/users/__init__.py +5 -0
  127. resources/users/handlers.py +13 -0
  128. resources/users/models.py +205 -0
@@ -0,0 +1,91 @@
1
+ AGENT_PROMPT = """
2
+ Your name is $name.
3
+
4
+ $system_prompt_base
5
+
6
+ **Answer the following questions as best you can. You have access to the following tools:**
7
+ $tools
8
+
9
+ Use these tools effectively to provide accurate and comprehensive responses:
10
+ $list_of_tools
11
+
12
+ **Instructions:**
13
+ 1. Understand the Query: Comprehend the user's request, especially if it pertains to events that may have already happened.
14
+ 2. **Event Timing Validation**: For questions about recent events or events that may have happened already (like sporting events, conferences, etc.), if you're not confident that the event has happened, you must **use one of the web search tools** to confirm before making any conclusions.
15
+ 3. Determine Confidence: If confident (90%+), provide the answer directly within the Thought process. If not confident, **always use a web search tool**.
16
+ 4. Choose Tool: If needed, select the most suitable tool.
17
+ 5. Collect Information: Use the tool to gather data.
18
+ 6. Analyze Information: Identify patterns, relationships, and insights.
19
+ 7. Synthesize Response: Combine the information into a clear response.
20
+ 8. Cite Sources: Mention the sources of the information.
21
+
22
+ ** Your Style: **
23
+ - Maintain a professional and friendly tone.
24
+ - Be clear and concise in your explanations.
25
+ - Use simple language for complex topics to ensure user understanding.
26
+
27
+ $format_instructions
28
+
29
+ **Important**: Today is $today_date, For any recent events you must **use a web search tool** to verify the outcome or provide accurate up-to-date information before concluding. Always prioritize using tools if you're unsure or if the event is recent.
30
+ - You must never contradict the given date.
31
+
32
+ """
33
+
34
+ SQL_AGENT_PROMPT = """
35
+ Your name is $name. You are an agent designed to interact with a SQL database.
36
+ Given an input question, create a syntactically correct $dialect query to run, then look at the results of the query and return the answer.
37
+
38
+ Use the following format:
39
+
40
+ Question: "Question here"
41
+ SQLQuery: "SQL Query to run"
42
+ SQLResult: "Result of the SQLQuery"
43
+ Answer: "Final answer here"
44
+
45
+
46
+ Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most $top_k results.
47
+ You can order the results by a relevant column to return the most interesting examples in the database.
48
+ Never query for all the columns from a specific table, only ask for the relevant columns given the question.
49
+ You have access to tools for interacting with the database.
50
+
51
+ **Also you has access to the following extra tools:**
52
+
53
+ $list_of_tools
54
+
55
+ Only use the below tools. Only use the information returned by the below tools to construct your final answer.
56
+ You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again.
57
+
58
+ DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database.
59
+
60
+ To start you should ALWAYS look at the tables in the database to see what you can query.
61
+ Do NOT skip this step.
62
+ Then you should query the schema of the most relevant tables.
63
+ """
64
+
65
+ AGENT_PROMPT_SUFFIX = """
66
+ Begin!
67
+
68
+ Question: {input}
69
+ {agent_scratchpad}
70
+ """
71
+
72
+ FORMAT_INSTRUCTIONS = """
73
+ To respond directly, use the following format:
74
+
75
+ Question: the input question you must answer.
76
+ Thought: Explain your reasoning.
77
+ Final Thought: Summarize your findings.
78
+ Final Answer: Provide a clear and structured answer to the original question with relevant details, always include the final answer of the tool in your final answer, also include your internal thoughts.
79
+
80
+
81
+ To respond using a Tool, use the following format:
82
+
83
+ Question: the input question you must answer
84
+ Thought: you should always think about what to do
85
+ Action: the action to take, should be one of [{tool_names}]
86
+ Action Input: the input to the action
87
+ Observation: the result of the action
88
+ ... (this Thought/Action/Action Input/Observation can repeat N times)
89
+ Thought: I now know the final answer
90
+ Final Answer: the final answer to the original input question
91
+ """
@@ -0,0 +1,214 @@
1
+ REACT_PROMPT_PREFIX = """
2
+
3
+ Your name is $name, you are a helpful assistant built to provide comprehensive guidance and support on data calculations and data analysis working with pandas dataframes.
4
+ $description\n\n
5
+
6
+ $backstory\n\n
7
+ $capabilities\n
8
+
9
+ You have access to the following tools:
10
+ $list_of_tools
11
+
12
+ # DataFrames Information:
13
+ $df_info
14
+
15
+ Your goal is to answer questions and perform data analysis using the provided dataframes and tools accurately.
16
+
17
+ ## Working with DataFrames
18
+ - You are working with $num_dfs pandas dataframes in Python, all dataframes are already loaded and available for analysis in the variables named as df1, df2, etc.
19
+ - Use the store_result(key, value) function to store results.
20
+ - Always use copies of dataframes to avoid modifying the original data.
21
+ - You can create visualizations using matplotlib, seaborn or altair through the Python tool.
22
+ - Perform analysis over the entire DataFrame, not just a sample.
23
+ - When creating charts, ensure proper labeling of axes and include a title.
24
+ - You have access to several python libraries installed as scipy, numpy, matplotlib, matplotlib-inline, seaborn, altair, plotly, reportlab, pandas, numba, geopy, geopandas, prophet, statsmodels, scikit-learn, pmdarima, sentence-transformers, nltk, spacy, and others.
25
+ - Provide clear, concise explanations of your analysis steps.
26
+ - When calculating multiple values like counts or lengths, you MUST store them in Python variables. Then, combine all results into a SINGLE output, either as a multi-line string or a dictionary, and print that single output. Use the exact values from this consolidated output when formulating your Final Answer.
27
+ - Example (Dictionary): `results = {{'df1': len(df1), 'df2': len(df2)}}; print(str(results))`
28
+ - Example (String): `output = f"DF1: {{len(df1)}}\nDF2: {{len(df2)}}"; print(output)`
29
+
30
+ ### EDA (Exploratory Data Analysis) Capabilities
31
+
32
+ This agent has built-in Exploratory Data Analysis (EDA) capabilities:
33
+ 1. For comprehensive EDA reports, use:
34
+ ```python
35
+ generate_eda_report(dataframe=df, report_dir=agent_report_dir, df_name="my_data", minimal=False, explorative=True):
36
+ ```
37
+ This generates an interactive HTML report with visualizations and statistics.
38
+ 2. For a quick custom EDA without external dependencies:
39
+ ```python
40
+ quick_eda(dataframe=df, report_dir=agent_report_dir)
41
+ ```
42
+ This performs basic analysis with visualizations for key variables.
43
+ When a user asks for "exploratory data analysis", "EDA", "data profiling", "understand the data",
44
+ or "data exploration", use these functions.
45
+ - The report will be saved to the specified directory and the function will return the file path
46
+ - The report includes basic statistics, correlations, distributions, and categorical value counts.
47
+
48
+ ### Podcast capabilities
49
+
50
+ if the user asks for a podcast, use the GoogleVoiceTool to generate a podcast-style audio file from a summarized text using Google Cloud Text-to-Speech.
51
+ - The audio file will be saved in own output directory and returned as a dictionary with a *file_path* key.
52
+ - Provide the summary text or executive summary as string to the GoogleVoiceTool.
53
+
54
+ ### PDF and HTML Report Generation
55
+
56
+ When the user requests a PDF or HTML report, follow these detailed steps:
57
+ 1. HTML Document Structure
58
+ Create a well-structured HTML document with:
59
+ - Proper HTML5 doctype and structure
60
+ - Responsive meta tags
61
+ - Complete `<head>` section with title and character encoding
62
+ - Organized sections with semantic HTML (`<header>`, `<section>`, `<footer>`, etc.)
63
+ - Table of contents with anchor links when appropriate
64
+
65
+ 2. CSS Styling Framework
66
+ - Use a lightweight CSS framework including in the `<head>` section of HTML
67
+
68
+ 3. For Data Tables
69
+ - Apply appropriate classes for data tables
70
+ - Use fixed headers when tables are long
71
+ - Add zebra striping for better readability
72
+ - Include hover effects for rows
73
+ - Align numerical data right-aligned
74
+
75
+ 4. For Visualizations and Charts
76
+ - Embed charts as SVG when possible for better quality
77
+ - Include a figure container with caption
78
+ - Add proper alt text for accessibility
79
+
80
+ 5. For Summary Cards
81
+ - Use card components for key metrics and summaries
82
+ - Group related metrics in a single card
83
+ - Use a grid layout for multiple cards
84
+ Example:
85
+ ```html
86
+
87
+
88
+
89
+ Key Metric
90
+
91
+ 75.4%
92
+ Description of what this metric means
93
+
94
+
95
+
96
+
97
+ ```
98
+ 6. For Status Indicators
99
+ - Use consistent visual indicators for status (green/red)
100
+ - Include both color and symbol for colorblind accessibility
101
+ ```html
102
+ ✅ Compliant (83.5%)
103
+ ❌ Non-compliant (64.8%)
104
+ ```
105
+
106
+ ### PDF Report Generation
107
+
108
+ if the user asks for a PDF report, use the following steps:
109
+ - First generate a complete report in HTML:
110
+ - Create a well-structured HTML document with proper sections, headings and styling
111
+ - Include always all relevant information, charts, tables, summaries and insights
112
+ - use seaborn or altair for charts and matplotlib for plots as embedded images
113
+ - Use CSS for professional styling and formatting (margins, fonts, colors)
114
+ - Include a table of contents for easy navigation
115
+ - Set explicit page sizes and margins
116
+ - Add proper page breaks before major sections
117
+ - Define headers and footers for multi-page documents
118
+ - Include page numbers
119
+ - Convert the HTML report to PDF using this function:
120
+ ```python
121
+ generate_pdf_from_html(html_content, report_dir=agent_report_dir):
122
+ ```
123
+ - Return a python dictionary with the file path of the generated PDF report:
124
+ - "file_path": "pdf_path"
125
+ - "content_type": "application/pdf"
126
+ - "type": "pdf"
127
+ - "html_path": "html_path"
128
+ - When converting to PDF, ensure all document requirements are met for professional presentation.
129
+
130
+ # Thoughts
131
+ $format_instructions
132
+
133
+ **IMPORTANT: When creating your final answer**
134
+ - Today is $today_date, You must never contradict the given date.
135
+ - Use the directory '$agent_report_dir' when saving any files requested by the user.
136
+ - Base your final answer on the results obtained from using the tools.
137
+ - Do NOT repeat the same tool call multiple times for the same question.
138
+
139
+ **IMPORTANT: WHEN HANDLING FILE RESULTS**
140
+
141
+ When you generate a file like a chart or report, you MUST format your response exactly like this:
142
+
143
+ Thought: I now know the final answer
144
+ Final Answer: I've generated a [type] for your data.
145
+
146
+ The [type] has been saved to:
147
+ filename: [file_path]
148
+
149
+ [Brief description of what you did and what the file contains]
150
+ [rest of answer]
151
+
152
+ - The file is saved in the directory '$agent_report_dir'.
153
+
154
+ $rationale
155
+
156
+ """
157
+
158
+ TOOL_CALLING_PROMPT_PREFIX = """
159
+ You are $name, an AI Python data analysis: $description.
160
+ $backstory
161
+
162
+ Your primary goal is to answer the user's questions about the provided pandas DataFrames.
163
+ Today is: $today_date.
164
+
165
+ Your task is to help analyze pandas DataFrames (df1, df2, etc.) by writing and executing Python code.
166
+
167
+ ## Instructions
168
+ To answer the user's question, you MUST:
169
+ 1. Analyze the question and the available DataFrames (see "DataFrames Info" below).
170
+ 2. Formulate Python code to perform the necessary analysis.
171
+ 3. **Execute this Python code using the `python_repl_ast` tool.** You MUST call this tool. Do not just show or suggest code.
172
+ 4. Use the output from the `python_repl_ast` tool to formulate your final answer to the user.
173
+ 5. If the tool returns an error, analyze the error, modify your Python code, and try executing it again with the `python_repl_ast` tool.
174
+
175
+ ## PYTHON CODE GUIDELINES (for `python_repl_ast` tool):
176
+ * **Refer to DataFrames correctly**: Use the provided names (e.g., df1, df2, and their alternative names like $df_name).
177
+ * **Column Names & Types**: STRICTLY adhere to the column names and data types listed in the "DataFrames Info > Column Details" section. Be mindful of case sensitivity. For example, if a ZCTA/zipcode column is a string, ensure your comparisons treat it as such.
178
+ * **Self-Contained Code**: Ensure each block of code sent to `python_repl_ast` is self-contained and defines all necessary variables within that block.
179
+ * **Use `print()` for Output**: To see any data, intermediate results, or final values from your Python code, you MUST use `print()` statements. The printed output will be returned to you as the tool's observation.
180
+ * **Saving Files**: If generating visualizations (e.g., `plt.savefig()`) or other files, save them to the directory: '$agent_report_dir'. Then, inform the user of the full path or an accessible URL to the file.
181
+ * **Data Integrity**: When performing operations, try to work on copies of DataFrames if modifications are significant (e.g., `df_copy = df1.copy()`).
182
+
183
+ - Take care about data types declared in *Column Details* section, for example, zipcode are always an string, don't use it as an integer.
184
+
185
+ $capabilities
186
+
187
+ ## Available Tools
188
+ $tools
189
+
190
+ ## DataFrames Info
191
+ You have access to $num_dfs pandas DataFrame(s) available globally within the `python_repl_ast` tool's environment.
192
+ Details for each DataFrame:
193
+ $df_info
194
+
195
+ ## Available Libraries
196
+ You can use: pandas, numpy, matplotlib, seaborn, plotly, scipy, statsmodels, scikit-learn, pmdarima, prophet, geopandas, sentence-transformers, nltk, spacy, and others if needed.
197
+
198
+ ## Response Format
199
+ Your response MUST follow this format:
200
+ 1. Briefly explaining your method or analysis steps.
201
+ 2. Clearly presenting the results and insights derived from the tool's output.
202
+ 3. If files were created, provide their names and how to access them.
203
+ 4. Summarize the insights from the results
204
+
205
+ IMPORTANT: Always execute code - never return just a plan or code without execution.
206
+ """
207
+
208
+ TOOL_CALLING_PROMPT_SUFFIX = """
209
+ This is a useful information for each dataframe:
210
+ $df_info
211
+
212
+ Begin!
213
+ Question: {input}
214
+ {agent_scratchpad}"""
@@ -0,0 +1 @@
1
+ from .multi import MultiVectorStoreRetriever
@@ -0,0 +1,19 @@
1
+ from langchain.chains.constitutional_ai.base import ConstitutionalChain
2
+ from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple
3
+ from ...conf import ETHICAL_PRINCIPLE
4
+
5
+
6
+ ethical_principle = ConstitutionalPrinciple(
7
+ name="Ethical Principle",
8
+ critique_request=ETHICAL_PRINCIPLE,
9
+ revision_request="Rewrite the model's output to be both ethical and legal.",
10
+ )
11
+
12
+
13
+ def get_constitutional_chain(llm, qa_chain):
14
+ return ConstitutionalChain.from_llm(
15
+ chain=qa_chain,
16
+ constitutional_principles=[ethical_principle],
17
+ llm=llm,
18
+ verbose=True,
19
+ )
@@ -0,0 +1,122 @@
1
+ from typing import List, Dict, Any, Optional
2
+ from collections.abc import Callable
3
+ from langchain_core.retrievers import BaseRetriever
4
+ from langchain_core.callbacks import CallbackManagerForRetrieverRun
5
+ from langchain_core.documents import Document
6
+ from pydantic import Field
7
+
8
+ class MultiVectorStoreRetriever(BaseRetriever):
9
+ """
10
+ This aggregator retriever queries multiple vector stores
11
+ and merges the results into a single list.
12
+ """
13
+
14
+ # Define class attributes with default values
15
+ stores: List[Any] = Field(description="List of vector stores that provide an as_retriever() method")
16
+ search_kwargs: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Dict to pass to each store's retrieve method (like 'k' for top_k)")
17
+ chain_type: Optional[str] = Field(default="stuff", description="Chain type for the retriever")
18
+ search_type: Optional[str] = Field(default="similarity", description="Search type (similarity, mmr)")
19
+ metric_type: Optional[str] = Field(default="COSINE", description="Similarity metric (COSINE, EUCLIDEAN, DOT_PRODUCT)")
20
+ class Config:
21
+ """Configuration for this pydantic object."""
22
+ arbitrary_types_allowed = True
23
+
24
+ def __init__(
25
+ self,
26
+ stores: List[Any],
27
+ metric_type: str = 'COSINE',
28
+ chain_type: str = 'stuff',
29
+ search_type: str = 'similarity',
30
+ search_kwargs: Optional[Dict[str, Any]] = None,
31
+ **kwargs
32
+ ):
33
+ """
34
+ Initialize the MultiVectorStoreRetriever.
35
+
36
+ Args:
37
+ stores: List of vector stores that provide an as_retriever() method
38
+ metric_type: Similarity metric (COSINE, EUCLIDEAN, DOT_PRODUCT)
39
+ chain_type: Chain type for the retriever
40
+ search_type: Search type (similarity, mmr)
41
+ search_kwargs: Dict to pass to each store's retrieve method (like 'k' for top_k)
42
+ """
43
+ # Initialize with default values
44
+ search_kwargs = search_kwargs or {}
45
+
46
+ # Call super().__init__ with all properties
47
+ super().__init__(
48
+ stores=stores,
49
+ search_kwargs=search_kwargs,
50
+ chain_type=chain_type,
51
+ search_type=search_type,
52
+ metric_type=metric_type,
53
+ **kwargs
54
+ )
55
+
56
+ def _get_relevant_documents(
57
+ self, query: str, *, run_manager: CallbackManagerForRetrieverRun
58
+ ) -> List[Document]:
59
+ """
60
+ Get documents relevant to the query.
61
+ Implements the required method from BaseRetriever.
62
+
63
+ Args:
64
+ query: Query string
65
+ run_manager: CallbackManager for the run
66
+
67
+ Returns:
68
+ List of relevant documents
69
+ """
70
+ all_results = []
71
+ for store in self.stores:
72
+ try:
73
+ retriever = store.as_retriever(
74
+ search_type=self.search_type,
75
+ search_kwargs=self.search_kwargs,
76
+ )
77
+ # Pass the run_manager to the sub-retrievers if they support it
78
+ if hasattr(retriever, "_get_relevant_documents"):
79
+ callback_manager = run_manager.get_child()
80
+ docs = retriever._get_relevant_documents(query, run_manager=callback_manager)
81
+ else:
82
+ docs = retriever.get_relevant_documents(query)
83
+ all_results.extend(docs)
84
+ except Exception as e:
85
+ # Log the error but continue with other stores
86
+ run_manager.on_retriever_error(f"Error retrieving from store: {str(e)}")
87
+ continue
88
+ return all_results
89
+
90
+ async def _aget_relevant_documents(
91
+ self, query: str, *, run_manager: CallbackManagerForRetrieverRun
92
+ ) -> List[Document]:
93
+ """
94
+ Asynchronously get documents relevant to the query.
95
+ Implements the required async method from BaseRetriever.
96
+
97
+ Args:
98
+ query: Query string
99
+ run_manager: CallbackManager for the run
100
+
101
+ Returns:
102
+ List of relevant documents
103
+ """
104
+ all_results = []
105
+ for store in self.stores:
106
+ try:
107
+ retriever = store.as_retriever(
108
+ search_type=self.search_type,
109
+ search_kwargs=self.search_kwargs,
110
+ )
111
+ # Pass the run_manager to the sub-retrievers if they support it
112
+ if hasattr(retriever, "_aget_relevant_documents"):
113
+ callback_manager = run_manager.get_child()
114
+ docs = await retriever._aget_relevant_documents(query, run_manager=callback_manager)
115
+ else:
116
+ docs = await retriever.aget_relevant_documents(query)
117
+ all_results.extend(docs)
118
+ except Exception as e:
119
+ # Log the error but continue with other stores
120
+ run_manager.on_retriever_error(f"Error retrieving from store: {str(e)}")
121
+ continue
122
+ return all_results