langchain 0.3.26__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain/__init__.py +110 -96
- langchain/_api/__init__.py +2 -2
- langchain/_api/deprecation.py +3 -3
- langchain/_api/module_import.py +51 -46
- langchain/_api/path.py +1 -1
- langchain/adapters/openai.py +8 -8
- langchain/agents/__init__.py +15 -12
- langchain/agents/agent.py +174 -151
- langchain/agents/agent_iterator.py +50 -26
- langchain/agents/agent_toolkits/__init__.py +7 -6
- langchain/agents/agent_toolkits/ainetwork/toolkit.py +1 -1
- langchain/agents/agent_toolkits/amadeus/toolkit.py +1 -1
- langchain/agents/agent_toolkits/azure_cognitive_services.py +1 -1
- langchain/agents/agent_toolkits/clickup/toolkit.py +1 -1
- langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +6 -4
- langchain/agents/agent_toolkits/csv/__init__.py +4 -2
- langchain/agents/agent_toolkits/file_management/__init__.py +1 -1
- langchain/agents/agent_toolkits/file_management/toolkit.py +1 -1
- langchain/agents/agent_toolkits/github/toolkit.py +9 -9
- langchain/agents/agent_toolkits/gitlab/toolkit.py +1 -1
- langchain/agents/agent_toolkits/json/base.py +1 -1
- langchain/agents/agent_toolkits/multion/toolkit.py +1 -1
- langchain/agents/agent_toolkits/office365/toolkit.py +1 -1
- langchain/agents/agent_toolkits/openapi/base.py +1 -1
- langchain/agents/agent_toolkits/openapi/planner.py +2 -2
- langchain/agents/agent_toolkits/openapi/planner_prompt.py +10 -10
- langchain/agents/agent_toolkits/openapi/prompt.py +1 -1
- langchain/agents/agent_toolkits/openapi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/pandas/__init__.py +4 -2
- langchain/agents/agent_toolkits/playwright/__init__.py +1 -1
- langchain/agents/agent_toolkits/playwright/toolkit.py +1 -1
- langchain/agents/agent_toolkits/powerbi/base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/chat_base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/prompt.py +2 -2
- langchain/agents/agent_toolkits/powerbi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/python/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark_sql/base.py +1 -1
- langchain/agents/agent_toolkits/spark_sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/sql/prompt.py +1 -1
- langchain/agents/agent_toolkits/sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/vectorstore/base.py +4 -2
- langchain/agents/agent_toolkits/vectorstore/prompt.py +2 -4
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +12 -11
- langchain/agents/agent_toolkits/xorbits/__init__.py +4 -2
- langchain/agents/agent_toolkits/zapier/toolkit.py +1 -1
- langchain/agents/agent_types.py +6 -6
- langchain/agents/chat/base.py +8 -12
- langchain/agents/chat/output_parser.py +9 -6
- langchain/agents/chat/prompt.py +3 -4
- langchain/agents/conversational/base.py +11 -5
- langchain/agents/conversational/output_parser.py +4 -2
- langchain/agents/conversational/prompt.py +2 -3
- langchain/agents/conversational_chat/base.py +9 -5
- langchain/agents/conversational_chat/output_parser.py +9 -11
- langchain/agents/conversational_chat/prompt.py +5 -6
- langchain/agents/format_scratchpad/__init__.py +3 -3
- langchain/agents/format_scratchpad/log_to_messages.py +1 -1
- langchain/agents/format_scratchpad/openai_functions.py +8 -6
- langchain/agents/format_scratchpad/tools.py +5 -3
- langchain/agents/format_scratchpad/xml.py +33 -2
- langchain/agents/initialize.py +17 -9
- langchain/agents/json_chat/base.py +19 -18
- langchain/agents/json_chat/prompt.py +2 -3
- langchain/agents/load_tools.py +2 -1
- langchain/agents/loading.py +28 -18
- langchain/agents/mrkl/base.py +11 -4
- langchain/agents/mrkl/output_parser.py +17 -13
- langchain/agents/mrkl/prompt.py +1 -2
- langchain/agents/openai_assistant/base.py +81 -71
- langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +2 -0
- langchain/agents/openai_functions_agent/base.py +47 -37
- langchain/agents/openai_functions_multi_agent/base.py +40 -27
- langchain/agents/openai_tools/base.py +9 -8
- langchain/agents/output_parsers/__init__.py +3 -3
- langchain/agents/output_parsers/json.py +8 -6
- langchain/agents/output_parsers/openai_functions.py +24 -9
- langchain/agents/output_parsers/openai_tools.py +16 -4
- langchain/agents/output_parsers/react_json_single_input.py +13 -5
- langchain/agents/output_parsers/react_single_input.py +18 -11
- langchain/agents/output_parsers/self_ask.py +5 -2
- langchain/agents/output_parsers/tools.py +32 -13
- langchain/agents/output_parsers/xml.py +102 -28
- langchain/agents/react/agent.py +5 -4
- langchain/agents/react/base.py +26 -17
- langchain/agents/react/output_parser.py +7 -6
- langchain/agents/react/textworld_prompt.py +0 -1
- langchain/agents/react/wiki_prompt.py +14 -15
- langchain/agents/schema.py +5 -2
- langchain/agents/self_ask_with_search/base.py +23 -15
- langchain/agents/self_ask_with_search/prompt.py +0 -1
- langchain/agents/structured_chat/base.py +19 -11
- langchain/agents/structured_chat/output_parser.py +29 -18
- langchain/agents/structured_chat/prompt.py +3 -4
- langchain/agents/tool_calling_agent/base.py +8 -6
- langchain/agents/tools.py +5 -2
- langchain/agents/utils.py +2 -3
- langchain/agents/xml/base.py +12 -6
- langchain/agents/xml/prompt.py +1 -2
- langchain/cache.py +12 -12
- langchain/callbacks/__init__.py +11 -11
- langchain/callbacks/aim_callback.py +2 -2
- langchain/callbacks/argilla_callback.py +1 -1
- langchain/callbacks/arize_callback.py +1 -1
- langchain/callbacks/arthur_callback.py +1 -1
- langchain/callbacks/base.py +7 -7
- langchain/callbacks/clearml_callback.py +1 -1
- langchain/callbacks/comet_ml_callback.py +1 -1
- langchain/callbacks/confident_callback.py +1 -1
- langchain/callbacks/context_callback.py +1 -1
- langchain/callbacks/flyte_callback.py +1 -1
- langchain/callbacks/human.py +2 -2
- langchain/callbacks/infino_callback.py +1 -1
- langchain/callbacks/labelstudio_callback.py +1 -1
- langchain/callbacks/llmonitor_callback.py +1 -1
- langchain/callbacks/manager.py +5 -5
- langchain/callbacks/mlflow_callback.py +2 -2
- langchain/callbacks/openai_info.py +1 -1
- langchain/callbacks/promptlayer_callback.py +1 -1
- langchain/callbacks/sagemaker_callback.py +1 -1
- langchain/callbacks/streaming_aiter.py +17 -3
- langchain/callbacks/streaming_aiter_final_only.py +16 -5
- langchain/callbacks/streaming_stdout_final_only.py +10 -3
- langchain/callbacks/streamlit/__init__.py +3 -2
- langchain/callbacks/streamlit/mutable_expander.py +1 -1
- langchain/callbacks/streamlit/streamlit_callback_handler.py +3 -3
- langchain/callbacks/tracers/__init__.py +1 -1
- langchain/callbacks/tracers/comet.py +1 -1
- langchain/callbacks/tracers/evaluation.py +1 -1
- langchain/callbacks/tracers/log_stream.py +1 -1
- langchain/callbacks/tracers/logging.py +12 -1
- langchain/callbacks/tracers/stdout.py +1 -1
- langchain/callbacks/trubrics_callback.py +1 -1
- langchain/callbacks/utils.py +4 -4
- langchain/callbacks/wandb_callback.py +1 -1
- langchain/callbacks/whylabs_callback.py +1 -1
- langchain/chains/api/base.py +41 -23
- langchain/chains/api/news_docs.py +1 -2
- langchain/chains/api/open_meteo_docs.py +1 -2
- langchain/chains/api/openapi/requests_chain.py +1 -1
- langchain/chains/api/openapi/response_chain.py +1 -1
- langchain/chains/api/podcast_docs.py +1 -2
- langchain/chains/api/prompt.py +1 -2
- langchain/chains/api/tmdb_docs.py +1 -2
- langchain/chains/base.py +96 -56
- langchain/chains/chat_vector_db/prompts.py +2 -3
- langchain/chains/combine_documents/__init__.py +1 -1
- langchain/chains/combine_documents/base.py +30 -11
- langchain/chains/combine_documents/map_reduce.py +41 -30
- langchain/chains/combine_documents/map_rerank.py +39 -24
- langchain/chains/combine_documents/reduce.py +48 -26
- langchain/chains/combine_documents/refine.py +27 -17
- langchain/chains/combine_documents/stuff.py +24 -13
- langchain/chains/constitutional_ai/base.py +11 -4
- langchain/chains/constitutional_ai/principles.py +22 -25
- langchain/chains/constitutional_ai/prompts.py +25 -28
- langchain/chains/conversation/base.py +9 -4
- langchain/chains/conversation/memory.py +5 -5
- langchain/chains/conversation/prompt.py +5 -5
- langchain/chains/conversational_retrieval/base.py +108 -79
- langchain/chains/conversational_retrieval/prompts.py +2 -3
- langchain/chains/elasticsearch_database/base.py +10 -10
- langchain/chains/elasticsearch_database/prompts.py +2 -3
- langchain/chains/ernie_functions/__init__.py +2 -2
- langchain/chains/example_generator.py +3 -1
- langchain/chains/flare/base.py +28 -12
- langchain/chains/flare/prompts.py +2 -0
- langchain/chains/graph_qa/cypher.py +2 -2
- langchain/chains/graph_qa/falkordb.py +1 -1
- langchain/chains/graph_qa/gremlin.py +1 -1
- langchain/chains/graph_qa/neptune_sparql.py +1 -1
- langchain/chains/graph_qa/prompts.py +2 -2
- langchain/chains/history_aware_retriever.py +2 -1
- langchain/chains/hyde/base.py +6 -5
- langchain/chains/hyde/prompts.py +5 -6
- langchain/chains/llm.py +82 -61
- langchain/chains/llm_bash/__init__.py +3 -2
- langchain/chains/llm_checker/base.py +19 -6
- langchain/chains/llm_checker/prompt.py +3 -4
- langchain/chains/llm_math/base.py +25 -10
- langchain/chains/llm_math/prompt.py +1 -2
- langchain/chains/llm_summarization_checker/base.py +22 -7
- langchain/chains/llm_symbolic_math/__init__.py +3 -2
- langchain/chains/loading.py +155 -97
- langchain/chains/mapreduce.py +4 -3
- langchain/chains/moderation.py +11 -9
- langchain/chains/natbot/base.py +11 -9
- langchain/chains/natbot/crawler.py +102 -76
- langchain/chains/natbot/prompt.py +2 -3
- langchain/chains/openai_functions/__init__.py +7 -7
- langchain/chains/openai_functions/base.py +15 -10
- langchain/chains/openai_functions/citation_fuzzy_match.py +21 -11
- langchain/chains/openai_functions/extraction.py +19 -19
- langchain/chains/openai_functions/openapi.py +39 -35
- langchain/chains/openai_functions/qa_with_structure.py +22 -15
- langchain/chains/openai_functions/tagging.py +4 -4
- langchain/chains/openai_tools/extraction.py +7 -8
- langchain/chains/qa_generation/base.py +8 -3
- langchain/chains/qa_generation/prompt.py +5 -5
- langchain/chains/qa_with_sources/base.py +17 -6
- langchain/chains/qa_with_sources/loading.py +16 -8
- langchain/chains/qa_with_sources/map_reduce_prompt.py +8 -9
- langchain/chains/qa_with_sources/refine_prompts.py +0 -1
- langchain/chains/qa_with_sources/retrieval.py +15 -6
- langchain/chains/qa_with_sources/stuff_prompt.py +6 -7
- langchain/chains/qa_with_sources/vector_db.py +21 -8
- langchain/chains/query_constructor/base.py +37 -34
- langchain/chains/query_constructor/ir.py +4 -4
- langchain/chains/query_constructor/parser.py +101 -34
- langchain/chains/query_constructor/prompt.py +5 -6
- langchain/chains/question_answering/chain.py +21 -10
- langchain/chains/question_answering/map_reduce_prompt.py +14 -14
- langchain/chains/question_answering/map_rerank_prompt.py +3 -3
- langchain/chains/question_answering/refine_prompts.py +2 -5
- langchain/chains/question_answering/stuff_prompt.py +5 -5
- langchain/chains/retrieval.py +1 -3
- langchain/chains/retrieval_qa/base.py +38 -27
- langchain/chains/retrieval_qa/prompt.py +1 -2
- langchain/chains/router/__init__.py +3 -3
- langchain/chains/router/base.py +38 -22
- langchain/chains/router/embedding_router.py +15 -8
- langchain/chains/router/llm_router.py +23 -20
- langchain/chains/router/multi_prompt.py +5 -2
- langchain/chains/router/multi_retrieval_qa.py +28 -5
- langchain/chains/sequential.py +30 -18
- langchain/chains/sql_database/prompt.py +14 -16
- langchain/chains/sql_database/query.py +7 -5
- langchain/chains/structured_output/__init__.py +1 -1
- langchain/chains/structured_output/base.py +77 -67
- langchain/chains/summarize/chain.py +11 -5
- langchain/chains/summarize/map_reduce_prompt.py +0 -1
- langchain/chains/summarize/stuff_prompt.py +0 -1
- langchain/chains/transform.py +9 -6
- langchain/chat_loaders/facebook_messenger.py +1 -1
- langchain/chat_loaders/langsmith.py +1 -1
- langchain/chat_loaders/utils.py +3 -3
- langchain/chat_models/__init__.py +20 -19
- langchain/chat_models/anthropic.py +1 -1
- langchain/chat_models/azureml_endpoint.py +1 -1
- langchain/chat_models/baidu_qianfan_endpoint.py +1 -1
- langchain/chat_models/base.py +213 -139
- langchain/chat_models/bedrock.py +1 -1
- langchain/chat_models/fake.py +1 -1
- langchain/chat_models/meta.py +1 -1
- langchain/chat_models/pai_eas_endpoint.py +1 -1
- langchain/chat_models/promptlayer_openai.py +1 -1
- langchain/chat_models/volcengine_maas.py +1 -1
- langchain/docstore/base.py +1 -1
- langchain/document_loaders/__init__.py +9 -9
- langchain/document_loaders/airbyte.py +3 -3
- langchain/document_loaders/assemblyai.py +1 -1
- langchain/document_loaders/azure_blob_storage_container.py +1 -1
- langchain/document_loaders/azure_blob_storage_file.py +1 -1
- langchain/document_loaders/baiducloud_bos_file.py +1 -1
- langchain/document_loaders/base.py +1 -1
- langchain/document_loaders/blob_loaders/__init__.py +1 -1
- langchain/document_loaders/blob_loaders/schema.py +1 -4
- langchain/document_loaders/blockchain.py +1 -1
- langchain/document_loaders/chatgpt.py +1 -1
- langchain/document_loaders/college_confidential.py +1 -1
- langchain/document_loaders/confluence.py +1 -1
- langchain/document_loaders/email.py +1 -1
- langchain/document_loaders/facebook_chat.py +1 -1
- langchain/document_loaders/markdown.py +1 -1
- langchain/document_loaders/notebook.py +1 -1
- langchain/document_loaders/org_mode.py +1 -1
- langchain/document_loaders/parsers/__init__.py +1 -1
- langchain/document_loaders/parsers/docai.py +1 -1
- langchain/document_loaders/parsers/generic.py +1 -1
- langchain/document_loaders/parsers/html/__init__.py +1 -1
- langchain/document_loaders/parsers/html/bs4.py +1 -1
- langchain/document_loaders/parsers/language/cobol.py +1 -1
- langchain/document_loaders/parsers/language/python.py +1 -1
- langchain/document_loaders/parsers/msword.py +1 -1
- langchain/document_loaders/parsers/pdf.py +5 -5
- langchain/document_loaders/parsers/registry.py +1 -1
- langchain/document_loaders/pdf.py +8 -8
- langchain/document_loaders/powerpoint.py +1 -1
- langchain/document_loaders/pyspark_dataframe.py +1 -1
- langchain/document_loaders/telegram.py +2 -2
- langchain/document_loaders/tencent_cos_directory.py +1 -1
- langchain/document_loaders/unstructured.py +5 -5
- langchain/document_loaders/url_playwright.py +1 -1
- langchain/document_loaders/whatsapp_chat.py +1 -1
- langchain/document_loaders/youtube.py +2 -2
- langchain/document_transformers/__init__.py +3 -3
- langchain/document_transformers/beautiful_soup_transformer.py +1 -1
- langchain/document_transformers/doctran_text_extract.py +1 -1
- langchain/document_transformers/doctran_text_qa.py +1 -1
- langchain/document_transformers/doctran_text_translate.py +1 -1
- langchain/document_transformers/embeddings_redundant_filter.py +3 -3
- langchain/document_transformers/google_translate.py +1 -1
- langchain/document_transformers/html2text.py +1 -1
- langchain/document_transformers/nuclia_text_transform.py +1 -1
- langchain/embeddings/__init__.py +5 -5
- langchain/embeddings/base.py +35 -24
- langchain/embeddings/cache.py +37 -32
- langchain/embeddings/fake.py +1 -1
- langchain/embeddings/huggingface.py +2 -2
- langchain/evaluation/__init__.py +22 -22
- langchain/evaluation/agents/trajectory_eval_chain.py +26 -25
- langchain/evaluation/agents/trajectory_eval_prompt.py +6 -9
- langchain/evaluation/comparison/__init__.py +1 -1
- langchain/evaluation/comparison/eval_chain.py +21 -13
- langchain/evaluation/comparison/prompt.py +1 -2
- langchain/evaluation/criteria/__init__.py +1 -1
- langchain/evaluation/criteria/eval_chain.py +23 -11
- langchain/evaluation/criteria/prompt.py +2 -3
- langchain/evaluation/embedding_distance/base.py +34 -20
- langchain/evaluation/exact_match/base.py +14 -1
- langchain/evaluation/loading.py +16 -11
- langchain/evaluation/parsing/base.py +20 -4
- langchain/evaluation/parsing/json_distance.py +24 -10
- langchain/evaluation/parsing/json_schema.py +13 -12
- langchain/evaluation/qa/__init__.py +1 -1
- langchain/evaluation/qa/eval_chain.py +20 -5
- langchain/evaluation/qa/eval_prompt.py +7 -8
- langchain/evaluation/qa/generate_chain.py +4 -1
- langchain/evaluation/qa/generate_prompt.py +2 -4
- langchain/evaluation/regex_match/base.py +9 -1
- langchain/evaluation/schema.py +38 -30
- langchain/evaluation/scoring/__init__.py +1 -1
- langchain/evaluation/scoring/eval_chain.py +23 -15
- langchain/evaluation/scoring/prompt.py +0 -1
- langchain/evaluation/string_distance/base.py +20 -9
- langchain/globals.py +12 -11
- langchain/graphs/__init__.py +6 -6
- langchain/graphs/graph_document.py +1 -1
- langchain/graphs/networkx_graph.py +2 -2
- langchain/hub.py +9 -11
- langchain/indexes/__init__.py +3 -3
- langchain/indexes/_sql_record_manager.py +63 -46
- langchain/indexes/prompts/entity_extraction.py +1 -2
- langchain/indexes/prompts/entity_summarization.py +1 -2
- langchain/indexes/prompts/knowledge_triplet_extraction.py +1 -3
- langchain/indexes/vectorstore.py +35 -19
- langchain/llms/__init__.py +13 -13
- langchain/llms/ai21.py +1 -1
- langchain/llms/azureml_endpoint.py +4 -4
- langchain/llms/base.py +15 -7
- langchain/llms/bedrock.py +1 -1
- langchain/llms/cloudflare_workersai.py +1 -1
- langchain/llms/gradient_ai.py +1 -1
- langchain/llms/loading.py +1 -1
- langchain/llms/openai.py +1 -1
- langchain/llms/sagemaker_endpoint.py +1 -1
- langchain/load/dump.py +1 -1
- langchain/load/load.py +1 -1
- langchain/load/serializable.py +3 -3
- langchain/memory/__init__.py +3 -3
- langchain/memory/buffer.py +14 -7
- langchain/memory/buffer_window.py +2 -0
- langchain/memory/chat_memory.py +14 -8
- langchain/memory/chat_message_histories/__init__.py +1 -1
- langchain/memory/chat_message_histories/astradb.py +1 -1
- langchain/memory/chat_message_histories/cassandra.py +1 -1
- langchain/memory/chat_message_histories/cosmos_db.py +1 -1
- langchain/memory/chat_message_histories/dynamodb.py +1 -1
- langchain/memory/chat_message_histories/elasticsearch.py +1 -1
- langchain/memory/chat_message_histories/file.py +1 -1
- langchain/memory/chat_message_histories/firestore.py +1 -1
- langchain/memory/chat_message_histories/momento.py +1 -1
- langchain/memory/chat_message_histories/mongodb.py +1 -1
- langchain/memory/chat_message_histories/neo4j.py +1 -1
- langchain/memory/chat_message_histories/postgres.py +1 -1
- langchain/memory/chat_message_histories/redis.py +1 -1
- langchain/memory/chat_message_histories/rocksetdb.py +1 -1
- langchain/memory/chat_message_histories/singlestoredb.py +1 -1
- langchain/memory/chat_message_histories/streamlit.py +1 -1
- langchain/memory/chat_message_histories/upstash_redis.py +1 -1
- langchain/memory/chat_message_histories/xata.py +1 -1
- langchain/memory/chat_message_histories/zep.py +1 -1
- langchain/memory/combined.py +14 -13
- langchain/memory/entity.py +131 -61
- langchain/memory/prompt.py +10 -11
- langchain/memory/readonly.py +0 -2
- langchain/memory/simple.py +4 -3
- langchain/memory/summary.py +43 -11
- langchain/memory/summary_buffer.py +20 -8
- langchain/memory/token_buffer.py +2 -0
- langchain/memory/utils.py +3 -2
- langchain/memory/vectorstore.py +12 -5
- langchain/memory/vectorstore_token_buffer_memory.py +5 -5
- langchain/model_laboratory.py +12 -11
- langchain/output_parsers/__init__.py +4 -4
- langchain/output_parsers/boolean.py +7 -4
- langchain/output_parsers/combining.py +14 -7
- langchain/output_parsers/datetime.py +32 -31
- langchain/output_parsers/enum.py +10 -4
- langchain/output_parsers/fix.py +60 -53
- langchain/output_parsers/format_instructions.py +6 -8
- langchain/output_parsers/json.py +2 -2
- langchain/output_parsers/list.py +2 -2
- langchain/output_parsers/loading.py +9 -9
- langchain/output_parsers/openai_functions.py +3 -3
- langchain/output_parsers/openai_tools.py +1 -1
- langchain/output_parsers/pandas_dataframe.py +59 -48
- langchain/output_parsers/prompts.py +1 -2
- langchain/output_parsers/rail_parser.py +1 -1
- langchain/output_parsers/regex.py +9 -8
- langchain/output_parsers/regex_dict.py +7 -10
- langchain/output_parsers/retry.py +99 -80
- langchain/output_parsers/structured.py +21 -6
- langchain/output_parsers/yaml.py +19 -11
- langchain/prompts/__init__.py +5 -3
- langchain/prompts/base.py +5 -5
- langchain/prompts/chat.py +8 -8
- langchain/prompts/example_selector/__init__.py +3 -1
- langchain/prompts/example_selector/semantic_similarity.py +2 -2
- langchain/prompts/few_shot.py +1 -1
- langchain/prompts/loading.py +3 -3
- langchain/prompts/prompt.py +1 -1
- langchain/pydantic_v1/__init__.py +1 -1
- langchain/retrievers/__init__.py +5 -5
- langchain/retrievers/bedrock.py +2 -2
- langchain/retrievers/bm25.py +1 -1
- langchain/retrievers/contextual_compression.py +14 -8
- langchain/retrievers/docarray.py +1 -1
- langchain/retrievers/document_compressors/__init__.py +5 -4
- langchain/retrievers/document_compressors/base.py +12 -6
- langchain/retrievers/document_compressors/chain_extract.py +5 -3
- langchain/retrievers/document_compressors/chain_extract_prompt.py +2 -3
- langchain/retrievers/document_compressors/chain_filter.py +9 -9
- langchain/retrievers/document_compressors/chain_filter_prompt.py +1 -2
- langchain/retrievers/document_compressors/cohere_rerank.py +17 -15
- langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -0
- langchain/retrievers/document_compressors/embeddings_filter.py +24 -17
- langchain/retrievers/document_compressors/flashrank_rerank.py +1 -1
- langchain/retrievers/document_compressors/listwise_rerank.py +8 -5
- langchain/retrievers/ensemble.py +30 -27
- langchain/retrievers/google_cloud_documentai_warehouse.py +1 -1
- langchain/retrievers/google_vertex_ai_search.py +2 -2
- langchain/retrievers/kendra.py +10 -10
- langchain/retrievers/llama_index.py +1 -1
- langchain/retrievers/merger_retriever.py +11 -11
- langchain/retrievers/milvus.py +1 -1
- langchain/retrievers/multi_query.py +35 -27
- langchain/retrievers/multi_vector.py +24 -9
- langchain/retrievers/parent_document_retriever.py +33 -9
- langchain/retrievers/re_phraser.py +6 -5
- langchain/retrievers/self_query/base.py +157 -127
- langchain/retrievers/time_weighted_retriever.py +21 -7
- langchain/retrievers/zilliz.py +1 -1
- langchain/runnables/hub.py +12 -0
- langchain/runnables/openai_functions.py +12 -2
- langchain/schema/__init__.py +23 -23
- langchain/schema/cache.py +1 -1
- langchain/schema/callbacks/base.py +7 -7
- langchain/schema/callbacks/manager.py +19 -19
- langchain/schema/callbacks/tracers/base.py +1 -1
- langchain/schema/callbacks/tracers/evaluation.py +1 -1
- langchain/schema/callbacks/tracers/langchain.py +1 -1
- langchain/schema/callbacks/tracers/langchain_v1.py +1 -1
- langchain/schema/callbacks/tracers/log_stream.py +1 -1
- langchain/schema/callbacks/tracers/schemas.py +8 -8
- langchain/schema/callbacks/tracers/stdout.py +3 -3
- langchain/schema/document.py +1 -1
- langchain/schema/language_model.py +2 -2
- langchain/schema/messages.py +12 -12
- langchain/schema/output.py +3 -3
- langchain/schema/output_parser.py +3 -3
- langchain/schema/runnable/__init__.py +3 -3
- langchain/schema/runnable/base.py +9 -9
- langchain/schema/runnable/config.py +5 -5
- langchain/schema/runnable/configurable.py +1 -1
- langchain/schema/runnable/history.py +1 -1
- langchain/schema/runnable/passthrough.py +1 -1
- langchain/schema/runnable/utils.py +16 -16
- langchain/schema/vectorstore.py +1 -1
- langchain/smith/__init__.py +2 -1
- langchain/smith/evaluation/__init__.py +2 -2
- langchain/smith/evaluation/config.py +9 -23
- langchain/smith/evaluation/name_generation.py +3 -3
- langchain/smith/evaluation/progress.py +22 -4
- langchain/smith/evaluation/runner_utils.py +416 -247
- langchain/smith/evaluation/string_run_evaluator.py +102 -68
- langchain/storage/__init__.py +2 -2
- langchain/storage/_lc_store.py +4 -2
- langchain/storage/encoder_backed.py +7 -2
- langchain/storage/file_system.py +19 -16
- langchain/storage/in_memory.py +1 -1
- langchain/storage/upstash_redis.py +1 -1
- langchain/text_splitter.py +15 -15
- langchain/tools/__init__.py +28 -26
- langchain/tools/ainetwork/app.py +1 -1
- langchain/tools/ainetwork/base.py +1 -1
- langchain/tools/ainetwork/owner.py +1 -1
- langchain/tools/ainetwork/rule.py +1 -1
- langchain/tools/ainetwork/transfer.py +1 -1
- langchain/tools/ainetwork/value.py +1 -1
- langchain/tools/amadeus/closest_airport.py +1 -1
- langchain/tools/amadeus/flight_search.py +1 -1
- langchain/tools/azure_cognitive_services/__init__.py +1 -1
- langchain/tools/base.py +4 -4
- langchain/tools/bearly/tool.py +1 -1
- langchain/tools/bing_search/__init__.py +1 -1
- langchain/tools/bing_search/tool.py +1 -1
- langchain/tools/dataforseo_api_search/__init__.py +1 -1
- langchain/tools/dataforseo_api_search/tool.py +1 -1
- langchain/tools/ddg_search/tool.py +1 -1
- langchain/tools/e2b_data_analysis/tool.py +2 -2
- langchain/tools/edenai/__init__.py +1 -1
- langchain/tools/file_management/__init__.py +1 -1
- langchain/tools/file_management/copy.py +1 -1
- langchain/tools/file_management/delete.py +1 -1
- langchain/tools/gmail/__init__.py +2 -2
- langchain/tools/gmail/get_message.py +1 -1
- langchain/tools/gmail/search.py +1 -1
- langchain/tools/gmail/send_message.py +1 -1
- langchain/tools/google_finance/__init__.py +1 -1
- langchain/tools/google_finance/tool.py +1 -1
- langchain/tools/google_scholar/__init__.py +1 -1
- langchain/tools/google_scholar/tool.py +1 -1
- langchain/tools/google_search/__init__.py +1 -1
- langchain/tools/google_search/tool.py +1 -1
- langchain/tools/google_serper/__init__.py +1 -1
- langchain/tools/google_serper/tool.py +1 -1
- langchain/tools/google_trends/__init__.py +1 -1
- langchain/tools/google_trends/tool.py +1 -1
- langchain/tools/jira/tool.py +20 -1
- langchain/tools/json/tool.py +25 -3
- langchain/tools/memorize/tool.py +1 -1
- langchain/tools/multion/__init__.py +1 -1
- langchain/tools/multion/update_session.py +1 -1
- langchain/tools/office365/__init__.py +2 -2
- langchain/tools/office365/events_search.py +1 -1
- langchain/tools/office365/messages_search.py +1 -1
- langchain/tools/office365/send_event.py +1 -1
- langchain/tools/office365/send_message.py +1 -1
- langchain/tools/openapi/utils/api_models.py +6 -6
- langchain/tools/playwright/__init__.py +5 -5
- langchain/tools/playwright/click.py +1 -1
- langchain/tools/playwright/extract_hyperlinks.py +1 -1
- langchain/tools/playwright/get_elements.py +1 -1
- langchain/tools/playwright/navigate.py +1 -1
- langchain/tools/plugin.py +2 -2
- langchain/tools/powerbi/tool.py +1 -1
- langchain/tools/python/__init__.py +3 -2
- langchain/tools/reddit_search/tool.py +1 -1
- langchain/tools/render.py +2 -2
- langchain/tools/requests/tool.py +2 -2
- langchain/tools/searchapi/tool.py +1 -1
- langchain/tools/searx_search/tool.py +1 -1
- langchain/tools/slack/get_message.py +1 -1
- langchain/tools/spark_sql/tool.py +1 -1
- langchain/tools/sql_database/tool.py +1 -1
- langchain/tools/tavily_search/__init__.py +1 -1
- langchain/tools/tavily_search/tool.py +1 -1
- langchain/tools/zapier/__init__.py +1 -1
- langchain/tools/zapier/tool.py +24 -2
- langchain/utilities/__init__.py +4 -4
- langchain/utilities/arcee.py +4 -4
- langchain/utilities/clickup.py +4 -4
- langchain/utilities/dalle_image_generator.py +1 -1
- langchain/utilities/dataforseo_api_search.py +1 -1
- langchain/utilities/opaqueprompts.py +1 -1
- langchain/utilities/reddit_search.py +1 -1
- langchain/utilities/sql_database.py +1 -1
- langchain/utilities/tavily_search.py +1 -1
- langchain/utilities/vertexai.py +2 -2
- langchain/utils/__init__.py +1 -1
- langchain/utils/aiter.py +1 -1
- langchain/utils/html.py +3 -3
- langchain/utils/input.py +1 -1
- langchain/utils/iter.py +1 -1
- langchain/utils/json_schema.py +1 -3
- langchain/utils/strings.py +1 -1
- langchain/utils/utils.py +6 -6
- langchain/vectorstores/__init__.py +5 -5
- langchain/vectorstores/alibabacloud_opensearch.py +1 -1
- langchain/vectorstores/azure_cosmos_db.py +1 -1
- langchain/vectorstores/clickhouse.py +1 -1
- langchain/vectorstores/elastic_vector_search.py +1 -1
- langchain/vectorstores/elasticsearch.py +2 -2
- langchain/vectorstores/myscale.py +1 -1
- langchain/vectorstores/neo4j_vector.py +1 -1
- langchain/vectorstores/pgembedding.py +1 -1
- langchain/vectorstores/qdrant.py +1 -1
- langchain/vectorstores/redis/__init__.py +1 -1
- langchain/vectorstores/redis/base.py +1 -1
- langchain/vectorstores/redis/filters.py +4 -4
- langchain/vectorstores/redis/schema.py +6 -6
- langchain/vectorstores/sklearn.py +2 -2
- langchain/vectorstores/starrocks.py +1 -1
- langchain/vectorstores/utils.py +1 -1
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/METADATA +4 -14
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/RECORD +590 -591
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/WHEEL +1 -1
- langchain/smith/evaluation/utils.py +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/entry_points.txt +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/licenses/LICENSE +0 -0
|
@@ -31,9 +31,7 @@ class MergerRetriever(BaseRetriever):
|
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
# Merge the results of the retrievers.
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
return merged_documents
|
|
34
|
+
return self.merge_documents(query, run_manager)
|
|
37
35
|
|
|
38
36
|
async def _aget_relevant_documents(
|
|
39
37
|
self,
|
|
@@ -52,12 +50,12 @@ class MergerRetriever(BaseRetriever):
|
|
|
52
50
|
"""
|
|
53
51
|
|
|
54
52
|
# Merge the results of the retrievers.
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
return merged_documents
|
|
53
|
+
return await self.amerge_documents(query, run_manager)
|
|
58
54
|
|
|
59
55
|
def merge_documents(
|
|
60
|
-
self,
|
|
56
|
+
self,
|
|
57
|
+
query: str,
|
|
58
|
+
run_manager: CallbackManagerForRetrieverRun,
|
|
61
59
|
) -> list[Document]:
|
|
62
60
|
"""
|
|
63
61
|
Merge the results of the retrievers.
|
|
@@ -82,14 +80,16 @@ class MergerRetriever(BaseRetriever):
|
|
|
82
80
|
merged_documents = []
|
|
83
81
|
max_docs = max(map(len, retriever_docs), default=0)
|
|
84
82
|
for i in range(max_docs):
|
|
85
|
-
for
|
|
83
|
+
for _retriever, doc in zip(self.retrievers, retriever_docs):
|
|
86
84
|
if i < len(doc):
|
|
87
85
|
merged_documents.append(doc[i])
|
|
88
86
|
|
|
89
87
|
return merged_documents
|
|
90
88
|
|
|
91
89
|
async def amerge_documents(
|
|
92
|
-
self,
|
|
90
|
+
self,
|
|
91
|
+
query: str,
|
|
92
|
+
run_manager: AsyncCallbackManagerForRetrieverRun,
|
|
93
93
|
) -> list[Document]:
|
|
94
94
|
"""
|
|
95
95
|
Asynchronously merge the results of the retrievers.
|
|
@@ -109,14 +109,14 @@ class MergerRetriever(BaseRetriever):
|
|
|
109
109
|
config={"callbacks": run_manager.get_child(f"retriever_{i + 1}")},
|
|
110
110
|
)
|
|
111
111
|
for i, retriever in enumerate(self.retrievers)
|
|
112
|
-
)
|
|
112
|
+
),
|
|
113
113
|
)
|
|
114
114
|
|
|
115
115
|
# Merge the results of the retrievers.
|
|
116
116
|
merged_documents = []
|
|
117
117
|
max_docs = max(map(len, retriever_docs), default=0)
|
|
118
118
|
for i in range(max_docs):
|
|
119
|
-
for
|
|
119
|
+
for _retriever, doc in zip(self.retrievers, retriever_docs):
|
|
120
120
|
if i < len(doc):
|
|
121
121
|
merged_documents.append(doc[i])
|
|
122
122
|
|
langchain/retrievers/milvus.py
CHANGED
|
@@ -14,6 +14,7 @@ from langchain_core.prompts import BasePromptTemplate
|
|
|
14
14
|
from langchain_core.prompts.prompt import PromptTemplate
|
|
15
15
|
from langchain_core.retrievers import BaseRetriever
|
|
16
16
|
from langchain_core.runnables import Runnable
|
|
17
|
+
from typing_extensions import override
|
|
17
18
|
|
|
18
19
|
from langchain.chains.llm import LLMChain
|
|
19
20
|
|
|
@@ -23,6 +24,7 @@ logger = logging.getLogger(__name__)
|
|
|
23
24
|
class LineListOutputParser(BaseOutputParser[list[str]]):
|
|
24
25
|
"""Output parser for a list of lines."""
|
|
25
26
|
|
|
27
|
+
@override
|
|
26
28
|
def parse(self, text: str) -> list[str]:
|
|
27
29
|
lines = text.strip().split("\n")
|
|
28
30
|
return list(filter(None, lines)) # Remove empty lines
|
|
@@ -31,12 +33,12 @@ class LineListOutputParser(BaseOutputParser[list[str]]):
|
|
|
31
33
|
# Default prompt
|
|
32
34
|
DEFAULT_QUERY_PROMPT = PromptTemplate(
|
|
33
35
|
input_variables=["question"],
|
|
34
|
-
template="""You are an AI language model assistant. Your task is
|
|
35
|
-
to generate 3 different versions of the given user
|
|
36
|
-
question to retrieve relevant documents from a vector database.
|
|
37
|
-
By generating multiple perspectives on the user question,
|
|
38
|
-
your goal is to help the user overcome some of the limitations
|
|
39
|
-
of distance-based similarity search. Provide these alternative
|
|
36
|
+
template="""You are an AI language model assistant. Your task is
|
|
37
|
+
to generate 3 different versions of the given user
|
|
38
|
+
question to retrieve relevant documents from a vector database.
|
|
39
|
+
By generating multiple perspectives on the user question,
|
|
40
|
+
your goal is to help the user overcome some of the limitations
|
|
41
|
+
of distance-based similarity search. Provide these alternative
|
|
40
42
|
questions separated by newlines. Original question: {question}""",
|
|
41
43
|
)
|
|
42
44
|
|
|
@@ -65,8 +67,8 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
65
67
|
retriever: BaseRetriever,
|
|
66
68
|
llm: BaseLanguageModel,
|
|
67
69
|
prompt: BasePromptTemplate = DEFAULT_QUERY_PROMPT,
|
|
68
|
-
parser_key: Optional[str] = None,
|
|
69
|
-
include_original: bool = False,
|
|
70
|
+
parser_key: Optional[str] = None, # noqa: ARG003
|
|
71
|
+
include_original: bool = False, # noqa: FBT001,FBT002
|
|
70
72
|
) -> "MultiQueryRetriever":
|
|
71
73
|
"""Initialize from llm using default template.
|
|
72
74
|
|
|
@@ -110,7 +112,9 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
110
112
|
return self.unique_union(documents)
|
|
111
113
|
|
|
112
114
|
async def agenerate_queries(
|
|
113
|
-
self,
|
|
115
|
+
self,
|
|
116
|
+
question: str,
|
|
117
|
+
run_manager: AsyncCallbackManagerForRetrieverRun,
|
|
114
118
|
) -> list[str]:
|
|
115
119
|
"""Generate queries based upon user input.
|
|
116
120
|
|
|
@@ -121,18 +125,18 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
121
125
|
List of LLM generated queries that are similar to the user input
|
|
122
126
|
"""
|
|
123
127
|
response = await self.llm_chain.ainvoke(
|
|
124
|
-
{"question": question},
|
|
128
|
+
{"question": question},
|
|
129
|
+
config={"callbacks": run_manager.get_child()},
|
|
125
130
|
)
|
|
126
|
-
if isinstance(self.llm_chain, LLMChain)
|
|
127
|
-
lines = response["text"]
|
|
128
|
-
else:
|
|
129
|
-
lines = response
|
|
131
|
+
lines = response["text"] if isinstance(self.llm_chain, LLMChain) else response
|
|
130
132
|
if self.verbose:
|
|
131
|
-
logger.info(
|
|
133
|
+
logger.info("Generated queries: %s", lines)
|
|
132
134
|
return lines
|
|
133
135
|
|
|
134
136
|
async def aretrieve_documents(
|
|
135
|
-
self,
|
|
137
|
+
self,
|
|
138
|
+
queries: list[str],
|
|
139
|
+
run_manager: AsyncCallbackManagerForRetrieverRun,
|
|
136
140
|
) -> list[Document]:
|
|
137
141
|
"""Run all LLM generated queries.
|
|
138
142
|
|
|
@@ -145,10 +149,11 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
145
149
|
document_lists = await asyncio.gather(
|
|
146
150
|
*(
|
|
147
151
|
self.retriever.ainvoke(
|
|
148
|
-
query,
|
|
152
|
+
query,
|
|
153
|
+
config={"callbacks": run_manager.get_child()},
|
|
149
154
|
)
|
|
150
155
|
for query in queries
|
|
151
|
-
)
|
|
156
|
+
),
|
|
152
157
|
)
|
|
153
158
|
return [doc for docs in document_lists for doc in docs]
|
|
154
159
|
|
|
@@ -173,7 +178,9 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
173
178
|
return self.unique_union(documents)
|
|
174
179
|
|
|
175
180
|
def generate_queries(
|
|
176
|
-
self,
|
|
181
|
+
self,
|
|
182
|
+
question: str,
|
|
183
|
+
run_manager: CallbackManagerForRetrieverRun,
|
|
177
184
|
) -> list[str]:
|
|
178
185
|
"""Generate queries based upon user input.
|
|
179
186
|
|
|
@@ -184,18 +191,18 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
184
191
|
List of LLM generated queries that are similar to the user input
|
|
185
192
|
"""
|
|
186
193
|
response = self.llm_chain.invoke(
|
|
187
|
-
{"question": question},
|
|
194
|
+
{"question": question},
|
|
195
|
+
config={"callbacks": run_manager.get_child()},
|
|
188
196
|
)
|
|
189
|
-
if isinstance(self.llm_chain, LLMChain)
|
|
190
|
-
lines = response["text"]
|
|
191
|
-
else:
|
|
192
|
-
lines = response
|
|
197
|
+
lines = response["text"] if isinstance(self.llm_chain, LLMChain) else response
|
|
193
198
|
if self.verbose:
|
|
194
|
-
logger.info(
|
|
199
|
+
logger.info("Generated queries: %s", lines)
|
|
195
200
|
return lines
|
|
196
201
|
|
|
197
202
|
def retrieve_documents(
|
|
198
|
-
self,
|
|
203
|
+
self,
|
|
204
|
+
queries: list[str],
|
|
205
|
+
run_manager: CallbackManagerForRetrieverRun,
|
|
199
206
|
) -> list[Document]:
|
|
200
207
|
"""Run all LLM generated queries.
|
|
201
208
|
|
|
@@ -208,7 +215,8 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
208
215
|
documents = []
|
|
209
216
|
for query in queries:
|
|
210
217
|
docs = self.retriever.invoke(
|
|
211
|
-
query,
|
|
218
|
+
query,
|
|
219
|
+
config={"callbacks": run_manager.get_child()},
|
|
212
220
|
)
|
|
213
221
|
documents.extend(docs)
|
|
214
222
|
return documents
|
|
@@ -10,6 +10,7 @@ from langchain_core.retrievers import BaseRetriever
|
|
|
10
10
|
from langchain_core.stores import BaseStore, ByteStore
|
|
11
11
|
from langchain_core.vectorstores import VectorStore
|
|
12
12
|
from pydantic import Field, model_validator
|
|
13
|
+
from typing_extensions import override
|
|
13
14
|
|
|
14
15
|
from langchain.storage._lc_store import create_kv_docstore
|
|
15
16
|
|
|
@@ -43,18 +44,23 @@ class MultiVectorRetriever(BaseRetriever):
|
|
|
43
44
|
|
|
44
45
|
@model_validator(mode="before")
|
|
45
46
|
@classmethod
|
|
46
|
-
def
|
|
47
|
+
def _shim_docstore(cls, values: dict) -> Any:
|
|
47
48
|
byte_store = values.get("byte_store")
|
|
48
49
|
docstore = values.get("docstore")
|
|
49
50
|
if byte_store is not None:
|
|
50
51
|
docstore = create_kv_docstore(byte_store)
|
|
51
52
|
elif docstore is None:
|
|
52
|
-
|
|
53
|
+
msg = "You must pass a `byte_store` parameter."
|
|
54
|
+
raise ValueError(msg)
|
|
53
55
|
values["docstore"] = docstore
|
|
54
56
|
return values
|
|
55
57
|
|
|
58
|
+
@override
|
|
56
59
|
def _get_relevant_documents(
|
|
57
|
-
self,
|
|
60
|
+
self,
|
|
61
|
+
query: str,
|
|
62
|
+
*,
|
|
63
|
+
run_manager: CallbackManagerForRetrieverRun,
|
|
58
64
|
) -> list[Document]:
|
|
59
65
|
"""Get documents relevant to a query.
|
|
60
66
|
Args:
|
|
@@ -65,12 +71,14 @@ class MultiVectorRetriever(BaseRetriever):
|
|
|
65
71
|
"""
|
|
66
72
|
if self.search_type == SearchType.mmr:
|
|
67
73
|
sub_docs = self.vectorstore.max_marginal_relevance_search(
|
|
68
|
-
query,
|
|
74
|
+
query,
|
|
75
|
+
**self.search_kwargs,
|
|
69
76
|
)
|
|
70
77
|
elif self.search_type == SearchType.similarity_score_threshold:
|
|
71
78
|
sub_docs_and_similarities = (
|
|
72
79
|
self.vectorstore.similarity_search_with_relevance_scores(
|
|
73
|
-
query,
|
|
80
|
+
query,
|
|
81
|
+
**self.search_kwargs,
|
|
74
82
|
)
|
|
75
83
|
)
|
|
76
84
|
sub_docs = [sub_doc for sub_doc, _ in sub_docs_and_similarities]
|
|
@@ -85,8 +93,12 @@ class MultiVectorRetriever(BaseRetriever):
|
|
|
85
93
|
docs = self.docstore.mget(ids)
|
|
86
94
|
return [d for d in docs if d is not None]
|
|
87
95
|
|
|
96
|
+
@override
|
|
88
97
|
async def _aget_relevant_documents(
|
|
89
|
-
self,
|
|
98
|
+
self,
|
|
99
|
+
query: str,
|
|
100
|
+
*,
|
|
101
|
+
run_manager: AsyncCallbackManagerForRetrieverRun,
|
|
90
102
|
) -> list[Document]:
|
|
91
103
|
"""Asynchronously get documents relevant to a query.
|
|
92
104
|
Args:
|
|
@@ -97,18 +109,21 @@ class MultiVectorRetriever(BaseRetriever):
|
|
|
97
109
|
"""
|
|
98
110
|
if self.search_type == SearchType.mmr:
|
|
99
111
|
sub_docs = await self.vectorstore.amax_marginal_relevance_search(
|
|
100
|
-
query,
|
|
112
|
+
query,
|
|
113
|
+
**self.search_kwargs,
|
|
101
114
|
)
|
|
102
115
|
elif self.search_type == SearchType.similarity_score_threshold:
|
|
103
116
|
sub_docs_and_similarities = (
|
|
104
117
|
await self.vectorstore.asimilarity_search_with_relevance_scores(
|
|
105
|
-
query,
|
|
118
|
+
query,
|
|
119
|
+
**self.search_kwargs,
|
|
106
120
|
)
|
|
107
121
|
)
|
|
108
122
|
sub_docs = [sub_doc for sub_doc, _ in sub_docs_and_similarities]
|
|
109
123
|
else:
|
|
110
124
|
sub_docs = await self.vectorstore.asimilarity_search(
|
|
111
|
-
query,
|
|
125
|
+
query,
|
|
126
|
+
**self.search_kwargs,
|
|
112
127
|
)
|
|
113
128
|
|
|
114
129
|
# We do this to maintain the order of the ids that are returned
|
|
@@ -54,6 +54,7 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|
|
54
54
|
child_splitter=child_splitter,
|
|
55
55
|
parent_splitter=parent_splitter,
|
|
56
56
|
)
|
|
57
|
+
|
|
57
58
|
""" # noqa: E501
|
|
58
59
|
|
|
59
60
|
child_splitter: TextSplitter
|
|
@@ -66,7 +67,7 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|
|
66
67
|
If none, then the parent documents will be the raw documents passed in."""
|
|
67
68
|
|
|
68
69
|
child_metadata_fields: Optional[Sequence[str]] = None
|
|
69
|
-
"""Metadata fields to leave in child documents. If None, leave all parent document
|
|
70
|
+
"""Metadata fields to leave in child documents. If None, leave all parent document
|
|
70
71
|
metadata.
|
|
71
72
|
"""
|
|
72
73
|
|
|
@@ -74,6 +75,7 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|
|
74
75
|
self,
|
|
75
76
|
documents: list[Document],
|
|
76
77
|
ids: Optional[list[str]] = None,
|
|
78
|
+
*,
|
|
77
79
|
add_to_docstore: bool = True,
|
|
78
80
|
) -> tuple[list[Document], list[tuple[str, Document]]]:
|
|
79
81
|
if self.parent_splitter is not None:
|
|
@@ -81,15 +83,15 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|
|
81
83
|
if ids is None:
|
|
82
84
|
doc_ids = [str(uuid.uuid4()) for _ in documents]
|
|
83
85
|
if not add_to_docstore:
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
)
|
|
86
|
+
msg = "If ids are not passed in, `add_to_docstore` MUST be True"
|
|
87
|
+
raise ValueError(msg)
|
|
87
88
|
else:
|
|
88
89
|
if len(documents) != len(ids):
|
|
89
|
-
|
|
90
|
+
msg = (
|
|
90
91
|
"Got uneven list of documents and ids. "
|
|
91
92
|
"If `ids` is provided, should be same length as `documents`."
|
|
92
93
|
)
|
|
94
|
+
raise ValueError(msg)
|
|
93
95
|
doc_ids = ids
|
|
94
96
|
|
|
95
97
|
docs = []
|
|
@@ -113,7 +115,7 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|
|
113
115
|
self,
|
|
114
116
|
documents: list[Document],
|
|
115
117
|
ids: Optional[list[str]] = None,
|
|
116
|
-
add_to_docstore: bool = True,
|
|
118
|
+
add_to_docstore: bool = True, # noqa: FBT001,FBT002
|
|
117
119
|
**kwargs: Any,
|
|
118
120
|
) -> None:
|
|
119
121
|
"""Adds documents to the docstore and vectorstores.
|
|
@@ -130,7 +132,11 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|
|
130
132
|
to set this to False if the documents are already in the docstore
|
|
131
133
|
and you don't want to re-add them.
|
|
132
134
|
"""
|
|
133
|
-
docs, full_docs = self._split_docs_for_adding(
|
|
135
|
+
docs, full_docs = self._split_docs_for_adding(
|
|
136
|
+
documents,
|
|
137
|
+
ids,
|
|
138
|
+
add_to_docstore=add_to_docstore,
|
|
139
|
+
)
|
|
134
140
|
self.vectorstore.add_documents(docs, **kwargs)
|
|
135
141
|
if add_to_docstore:
|
|
136
142
|
self.docstore.mset(full_docs)
|
|
@@ -139,10 +145,28 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|
|
139
145
|
self,
|
|
140
146
|
documents: list[Document],
|
|
141
147
|
ids: Optional[list[str]] = None,
|
|
142
|
-
add_to_docstore: bool = True,
|
|
148
|
+
add_to_docstore: bool = True, # noqa: FBT001,FBT002
|
|
143
149
|
**kwargs: Any,
|
|
144
150
|
) -> None:
|
|
145
|
-
|
|
151
|
+
"""Adds documents to the docstore and vectorstores.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
documents: List of documents to add
|
|
155
|
+
ids: Optional list of ids for documents. If provided should be the same
|
|
156
|
+
length as the list of documents. Can be provided if parent documents
|
|
157
|
+
are already in the document store and you don't want to re-add
|
|
158
|
+
to the docstore. If not provided, random UUIDs will be used as
|
|
159
|
+
ids.
|
|
160
|
+
add_to_docstore: Boolean of whether to add documents to docstore.
|
|
161
|
+
This can be false if and only if `ids` are provided. You may want
|
|
162
|
+
to set this to False if the documents are already in the docstore
|
|
163
|
+
and you don't want to re-add them.
|
|
164
|
+
"""
|
|
165
|
+
docs, full_docs = self._split_docs_for_adding(
|
|
166
|
+
documents,
|
|
167
|
+
ids,
|
|
168
|
+
add_to_docstore=add_to_docstore,
|
|
169
|
+
)
|
|
146
170
|
await self.vectorstore.aadd_documents(docs, **kwargs)
|
|
147
171
|
if add_to_docstore:
|
|
148
172
|
await self.docstore.amset(full_docs)
|
|
@@ -71,13 +71,14 @@ class RePhraseQueryRetriever(BaseRetriever):
|
|
|
71
71
|
Relevant documents for re-phrased question
|
|
72
72
|
"""
|
|
73
73
|
re_phrased_question = self.llm_chain.invoke(
|
|
74
|
-
query,
|
|
74
|
+
query,
|
|
75
|
+
{"callbacks": run_manager.get_child()},
|
|
75
76
|
)
|
|
76
|
-
logger.info(
|
|
77
|
-
|
|
78
|
-
re_phrased_question,
|
|
77
|
+
logger.info("Re-phrased question: %s", re_phrased_question)
|
|
78
|
+
return self.retriever.invoke(
|
|
79
|
+
re_phrased_question,
|
|
80
|
+
config={"callbacks": run_manager.get_child()},
|
|
79
81
|
)
|
|
80
|
-
return docs
|
|
81
82
|
|
|
82
83
|
async def _aget_relevant_documents(
|
|
83
84
|
self,
|