langchain 0.3.26__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain/__init__.py +110 -96
- langchain/_api/__init__.py +2 -2
- langchain/_api/deprecation.py +3 -3
- langchain/_api/module_import.py +51 -46
- langchain/_api/path.py +1 -1
- langchain/adapters/openai.py +8 -8
- langchain/agents/__init__.py +15 -12
- langchain/agents/agent.py +174 -151
- langchain/agents/agent_iterator.py +50 -26
- langchain/agents/agent_toolkits/__init__.py +7 -6
- langchain/agents/agent_toolkits/ainetwork/toolkit.py +1 -1
- langchain/agents/agent_toolkits/amadeus/toolkit.py +1 -1
- langchain/agents/agent_toolkits/azure_cognitive_services.py +1 -1
- langchain/agents/agent_toolkits/clickup/toolkit.py +1 -1
- langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +6 -4
- langchain/agents/agent_toolkits/csv/__init__.py +4 -2
- langchain/agents/agent_toolkits/file_management/__init__.py +1 -1
- langchain/agents/agent_toolkits/file_management/toolkit.py +1 -1
- langchain/agents/agent_toolkits/github/toolkit.py +9 -9
- langchain/agents/agent_toolkits/gitlab/toolkit.py +1 -1
- langchain/agents/agent_toolkits/json/base.py +1 -1
- langchain/agents/agent_toolkits/multion/toolkit.py +1 -1
- langchain/agents/agent_toolkits/office365/toolkit.py +1 -1
- langchain/agents/agent_toolkits/openapi/base.py +1 -1
- langchain/agents/agent_toolkits/openapi/planner.py +2 -2
- langchain/agents/agent_toolkits/openapi/planner_prompt.py +10 -10
- langchain/agents/agent_toolkits/openapi/prompt.py +1 -1
- langchain/agents/agent_toolkits/openapi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/pandas/__init__.py +4 -2
- langchain/agents/agent_toolkits/playwright/__init__.py +1 -1
- langchain/agents/agent_toolkits/playwright/toolkit.py +1 -1
- langchain/agents/agent_toolkits/powerbi/base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/chat_base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/prompt.py +2 -2
- langchain/agents/agent_toolkits/powerbi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/python/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark_sql/base.py +1 -1
- langchain/agents/agent_toolkits/spark_sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/sql/prompt.py +1 -1
- langchain/agents/agent_toolkits/sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/vectorstore/base.py +4 -2
- langchain/agents/agent_toolkits/vectorstore/prompt.py +2 -4
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +12 -11
- langchain/agents/agent_toolkits/xorbits/__init__.py +4 -2
- langchain/agents/agent_toolkits/zapier/toolkit.py +1 -1
- langchain/agents/agent_types.py +6 -6
- langchain/agents/chat/base.py +8 -12
- langchain/agents/chat/output_parser.py +9 -6
- langchain/agents/chat/prompt.py +3 -4
- langchain/agents/conversational/base.py +11 -5
- langchain/agents/conversational/output_parser.py +4 -2
- langchain/agents/conversational/prompt.py +2 -3
- langchain/agents/conversational_chat/base.py +9 -5
- langchain/agents/conversational_chat/output_parser.py +9 -11
- langchain/agents/conversational_chat/prompt.py +5 -6
- langchain/agents/format_scratchpad/__init__.py +3 -3
- langchain/agents/format_scratchpad/log_to_messages.py +1 -1
- langchain/agents/format_scratchpad/openai_functions.py +8 -6
- langchain/agents/format_scratchpad/tools.py +5 -3
- langchain/agents/format_scratchpad/xml.py +33 -2
- langchain/agents/initialize.py +17 -9
- langchain/agents/json_chat/base.py +19 -18
- langchain/agents/json_chat/prompt.py +2 -3
- langchain/agents/load_tools.py +2 -1
- langchain/agents/loading.py +28 -18
- langchain/agents/mrkl/base.py +11 -4
- langchain/agents/mrkl/output_parser.py +17 -13
- langchain/agents/mrkl/prompt.py +1 -2
- langchain/agents/openai_assistant/base.py +81 -71
- langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +2 -0
- langchain/agents/openai_functions_agent/base.py +47 -37
- langchain/agents/openai_functions_multi_agent/base.py +40 -27
- langchain/agents/openai_tools/base.py +9 -8
- langchain/agents/output_parsers/__init__.py +3 -3
- langchain/agents/output_parsers/json.py +8 -6
- langchain/agents/output_parsers/openai_functions.py +24 -9
- langchain/agents/output_parsers/openai_tools.py +16 -4
- langchain/agents/output_parsers/react_json_single_input.py +13 -5
- langchain/agents/output_parsers/react_single_input.py +18 -11
- langchain/agents/output_parsers/self_ask.py +5 -2
- langchain/agents/output_parsers/tools.py +32 -13
- langchain/agents/output_parsers/xml.py +102 -28
- langchain/agents/react/agent.py +5 -4
- langchain/agents/react/base.py +26 -17
- langchain/agents/react/output_parser.py +7 -6
- langchain/agents/react/textworld_prompt.py +0 -1
- langchain/agents/react/wiki_prompt.py +14 -15
- langchain/agents/schema.py +5 -2
- langchain/agents/self_ask_with_search/base.py +23 -15
- langchain/agents/self_ask_with_search/prompt.py +0 -1
- langchain/agents/structured_chat/base.py +19 -11
- langchain/agents/structured_chat/output_parser.py +29 -18
- langchain/agents/structured_chat/prompt.py +3 -4
- langchain/agents/tool_calling_agent/base.py +8 -6
- langchain/agents/tools.py +5 -2
- langchain/agents/utils.py +2 -3
- langchain/agents/xml/base.py +12 -6
- langchain/agents/xml/prompt.py +1 -2
- langchain/cache.py +12 -12
- langchain/callbacks/__init__.py +11 -11
- langchain/callbacks/aim_callback.py +2 -2
- langchain/callbacks/argilla_callback.py +1 -1
- langchain/callbacks/arize_callback.py +1 -1
- langchain/callbacks/arthur_callback.py +1 -1
- langchain/callbacks/base.py +7 -7
- langchain/callbacks/clearml_callback.py +1 -1
- langchain/callbacks/comet_ml_callback.py +1 -1
- langchain/callbacks/confident_callback.py +1 -1
- langchain/callbacks/context_callback.py +1 -1
- langchain/callbacks/flyte_callback.py +1 -1
- langchain/callbacks/human.py +2 -2
- langchain/callbacks/infino_callback.py +1 -1
- langchain/callbacks/labelstudio_callback.py +1 -1
- langchain/callbacks/llmonitor_callback.py +1 -1
- langchain/callbacks/manager.py +5 -5
- langchain/callbacks/mlflow_callback.py +2 -2
- langchain/callbacks/openai_info.py +1 -1
- langchain/callbacks/promptlayer_callback.py +1 -1
- langchain/callbacks/sagemaker_callback.py +1 -1
- langchain/callbacks/streaming_aiter.py +17 -3
- langchain/callbacks/streaming_aiter_final_only.py +16 -5
- langchain/callbacks/streaming_stdout_final_only.py +10 -3
- langchain/callbacks/streamlit/__init__.py +3 -2
- langchain/callbacks/streamlit/mutable_expander.py +1 -1
- langchain/callbacks/streamlit/streamlit_callback_handler.py +3 -3
- langchain/callbacks/tracers/__init__.py +1 -1
- langchain/callbacks/tracers/comet.py +1 -1
- langchain/callbacks/tracers/evaluation.py +1 -1
- langchain/callbacks/tracers/log_stream.py +1 -1
- langchain/callbacks/tracers/logging.py +12 -1
- langchain/callbacks/tracers/stdout.py +1 -1
- langchain/callbacks/trubrics_callback.py +1 -1
- langchain/callbacks/utils.py +4 -4
- langchain/callbacks/wandb_callback.py +1 -1
- langchain/callbacks/whylabs_callback.py +1 -1
- langchain/chains/api/base.py +41 -23
- langchain/chains/api/news_docs.py +1 -2
- langchain/chains/api/open_meteo_docs.py +1 -2
- langchain/chains/api/openapi/requests_chain.py +1 -1
- langchain/chains/api/openapi/response_chain.py +1 -1
- langchain/chains/api/podcast_docs.py +1 -2
- langchain/chains/api/prompt.py +1 -2
- langchain/chains/api/tmdb_docs.py +1 -2
- langchain/chains/base.py +96 -56
- langchain/chains/chat_vector_db/prompts.py +2 -3
- langchain/chains/combine_documents/__init__.py +1 -1
- langchain/chains/combine_documents/base.py +30 -11
- langchain/chains/combine_documents/map_reduce.py +41 -30
- langchain/chains/combine_documents/map_rerank.py +39 -24
- langchain/chains/combine_documents/reduce.py +48 -26
- langchain/chains/combine_documents/refine.py +27 -17
- langchain/chains/combine_documents/stuff.py +24 -13
- langchain/chains/constitutional_ai/base.py +11 -4
- langchain/chains/constitutional_ai/principles.py +22 -25
- langchain/chains/constitutional_ai/prompts.py +25 -28
- langchain/chains/conversation/base.py +9 -4
- langchain/chains/conversation/memory.py +5 -5
- langchain/chains/conversation/prompt.py +5 -5
- langchain/chains/conversational_retrieval/base.py +108 -79
- langchain/chains/conversational_retrieval/prompts.py +2 -3
- langchain/chains/elasticsearch_database/base.py +10 -10
- langchain/chains/elasticsearch_database/prompts.py +2 -3
- langchain/chains/ernie_functions/__init__.py +2 -2
- langchain/chains/example_generator.py +3 -1
- langchain/chains/flare/base.py +28 -12
- langchain/chains/flare/prompts.py +2 -0
- langchain/chains/graph_qa/cypher.py +2 -2
- langchain/chains/graph_qa/falkordb.py +1 -1
- langchain/chains/graph_qa/gremlin.py +1 -1
- langchain/chains/graph_qa/neptune_sparql.py +1 -1
- langchain/chains/graph_qa/prompts.py +2 -2
- langchain/chains/history_aware_retriever.py +2 -1
- langchain/chains/hyde/base.py +6 -5
- langchain/chains/hyde/prompts.py +5 -6
- langchain/chains/llm.py +82 -61
- langchain/chains/llm_bash/__init__.py +3 -2
- langchain/chains/llm_checker/base.py +19 -6
- langchain/chains/llm_checker/prompt.py +3 -4
- langchain/chains/llm_math/base.py +25 -10
- langchain/chains/llm_math/prompt.py +1 -2
- langchain/chains/llm_summarization_checker/base.py +22 -7
- langchain/chains/llm_symbolic_math/__init__.py +3 -2
- langchain/chains/loading.py +155 -97
- langchain/chains/mapreduce.py +4 -3
- langchain/chains/moderation.py +11 -9
- langchain/chains/natbot/base.py +11 -9
- langchain/chains/natbot/crawler.py +102 -76
- langchain/chains/natbot/prompt.py +2 -3
- langchain/chains/openai_functions/__init__.py +7 -7
- langchain/chains/openai_functions/base.py +15 -10
- langchain/chains/openai_functions/citation_fuzzy_match.py +21 -11
- langchain/chains/openai_functions/extraction.py +19 -19
- langchain/chains/openai_functions/openapi.py +39 -35
- langchain/chains/openai_functions/qa_with_structure.py +22 -15
- langchain/chains/openai_functions/tagging.py +4 -4
- langchain/chains/openai_tools/extraction.py +7 -8
- langchain/chains/qa_generation/base.py +8 -3
- langchain/chains/qa_generation/prompt.py +5 -5
- langchain/chains/qa_with_sources/base.py +17 -6
- langchain/chains/qa_with_sources/loading.py +16 -8
- langchain/chains/qa_with_sources/map_reduce_prompt.py +8 -9
- langchain/chains/qa_with_sources/refine_prompts.py +0 -1
- langchain/chains/qa_with_sources/retrieval.py +15 -6
- langchain/chains/qa_with_sources/stuff_prompt.py +6 -7
- langchain/chains/qa_with_sources/vector_db.py +21 -8
- langchain/chains/query_constructor/base.py +37 -34
- langchain/chains/query_constructor/ir.py +4 -4
- langchain/chains/query_constructor/parser.py +101 -34
- langchain/chains/query_constructor/prompt.py +5 -6
- langchain/chains/question_answering/chain.py +21 -10
- langchain/chains/question_answering/map_reduce_prompt.py +14 -14
- langchain/chains/question_answering/map_rerank_prompt.py +3 -3
- langchain/chains/question_answering/refine_prompts.py +2 -5
- langchain/chains/question_answering/stuff_prompt.py +5 -5
- langchain/chains/retrieval.py +1 -3
- langchain/chains/retrieval_qa/base.py +38 -27
- langchain/chains/retrieval_qa/prompt.py +1 -2
- langchain/chains/router/__init__.py +3 -3
- langchain/chains/router/base.py +38 -22
- langchain/chains/router/embedding_router.py +15 -8
- langchain/chains/router/llm_router.py +23 -20
- langchain/chains/router/multi_prompt.py +5 -2
- langchain/chains/router/multi_retrieval_qa.py +28 -5
- langchain/chains/sequential.py +30 -18
- langchain/chains/sql_database/prompt.py +14 -16
- langchain/chains/sql_database/query.py +7 -5
- langchain/chains/structured_output/__init__.py +1 -1
- langchain/chains/structured_output/base.py +77 -67
- langchain/chains/summarize/chain.py +11 -5
- langchain/chains/summarize/map_reduce_prompt.py +0 -1
- langchain/chains/summarize/stuff_prompt.py +0 -1
- langchain/chains/transform.py +9 -6
- langchain/chat_loaders/facebook_messenger.py +1 -1
- langchain/chat_loaders/langsmith.py +1 -1
- langchain/chat_loaders/utils.py +3 -3
- langchain/chat_models/__init__.py +20 -19
- langchain/chat_models/anthropic.py +1 -1
- langchain/chat_models/azureml_endpoint.py +1 -1
- langchain/chat_models/baidu_qianfan_endpoint.py +1 -1
- langchain/chat_models/base.py +213 -139
- langchain/chat_models/bedrock.py +1 -1
- langchain/chat_models/fake.py +1 -1
- langchain/chat_models/meta.py +1 -1
- langchain/chat_models/pai_eas_endpoint.py +1 -1
- langchain/chat_models/promptlayer_openai.py +1 -1
- langchain/chat_models/volcengine_maas.py +1 -1
- langchain/docstore/base.py +1 -1
- langchain/document_loaders/__init__.py +9 -9
- langchain/document_loaders/airbyte.py +3 -3
- langchain/document_loaders/assemblyai.py +1 -1
- langchain/document_loaders/azure_blob_storage_container.py +1 -1
- langchain/document_loaders/azure_blob_storage_file.py +1 -1
- langchain/document_loaders/baiducloud_bos_file.py +1 -1
- langchain/document_loaders/base.py +1 -1
- langchain/document_loaders/blob_loaders/__init__.py +1 -1
- langchain/document_loaders/blob_loaders/schema.py +1 -4
- langchain/document_loaders/blockchain.py +1 -1
- langchain/document_loaders/chatgpt.py +1 -1
- langchain/document_loaders/college_confidential.py +1 -1
- langchain/document_loaders/confluence.py +1 -1
- langchain/document_loaders/email.py +1 -1
- langchain/document_loaders/facebook_chat.py +1 -1
- langchain/document_loaders/markdown.py +1 -1
- langchain/document_loaders/notebook.py +1 -1
- langchain/document_loaders/org_mode.py +1 -1
- langchain/document_loaders/parsers/__init__.py +1 -1
- langchain/document_loaders/parsers/docai.py +1 -1
- langchain/document_loaders/parsers/generic.py +1 -1
- langchain/document_loaders/parsers/html/__init__.py +1 -1
- langchain/document_loaders/parsers/html/bs4.py +1 -1
- langchain/document_loaders/parsers/language/cobol.py +1 -1
- langchain/document_loaders/parsers/language/python.py +1 -1
- langchain/document_loaders/parsers/msword.py +1 -1
- langchain/document_loaders/parsers/pdf.py +5 -5
- langchain/document_loaders/parsers/registry.py +1 -1
- langchain/document_loaders/pdf.py +8 -8
- langchain/document_loaders/powerpoint.py +1 -1
- langchain/document_loaders/pyspark_dataframe.py +1 -1
- langchain/document_loaders/telegram.py +2 -2
- langchain/document_loaders/tencent_cos_directory.py +1 -1
- langchain/document_loaders/unstructured.py +5 -5
- langchain/document_loaders/url_playwright.py +1 -1
- langchain/document_loaders/whatsapp_chat.py +1 -1
- langchain/document_loaders/youtube.py +2 -2
- langchain/document_transformers/__init__.py +3 -3
- langchain/document_transformers/beautiful_soup_transformer.py +1 -1
- langchain/document_transformers/doctran_text_extract.py +1 -1
- langchain/document_transformers/doctran_text_qa.py +1 -1
- langchain/document_transformers/doctran_text_translate.py +1 -1
- langchain/document_transformers/embeddings_redundant_filter.py +3 -3
- langchain/document_transformers/google_translate.py +1 -1
- langchain/document_transformers/html2text.py +1 -1
- langchain/document_transformers/nuclia_text_transform.py +1 -1
- langchain/embeddings/__init__.py +5 -5
- langchain/embeddings/base.py +35 -24
- langchain/embeddings/cache.py +37 -32
- langchain/embeddings/fake.py +1 -1
- langchain/embeddings/huggingface.py +2 -2
- langchain/evaluation/__init__.py +22 -22
- langchain/evaluation/agents/trajectory_eval_chain.py +26 -25
- langchain/evaluation/agents/trajectory_eval_prompt.py +6 -9
- langchain/evaluation/comparison/__init__.py +1 -1
- langchain/evaluation/comparison/eval_chain.py +21 -13
- langchain/evaluation/comparison/prompt.py +1 -2
- langchain/evaluation/criteria/__init__.py +1 -1
- langchain/evaluation/criteria/eval_chain.py +23 -11
- langchain/evaluation/criteria/prompt.py +2 -3
- langchain/evaluation/embedding_distance/base.py +34 -20
- langchain/evaluation/exact_match/base.py +14 -1
- langchain/evaluation/loading.py +16 -11
- langchain/evaluation/parsing/base.py +20 -4
- langchain/evaluation/parsing/json_distance.py +24 -10
- langchain/evaluation/parsing/json_schema.py +13 -12
- langchain/evaluation/qa/__init__.py +1 -1
- langchain/evaluation/qa/eval_chain.py +20 -5
- langchain/evaluation/qa/eval_prompt.py +7 -8
- langchain/evaluation/qa/generate_chain.py +4 -1
- langchain/evaluation/qa/generate_prompt.py +2 -4
- langchain/evaluation/regex_match/base.py +9 -1
- langchain/evaluation/schema.py +38 -30
- langchain/evaluation/scoring/__init__.py +1 -1
- langchain/evaluation/scoring/eval_chain.py +23 -15
- langchain/evaluation/scoring/prompt.py +0 -1
- langchain/evaluation/string_distance/base.py +20 -9
- langchain/globals.py +12 -11
- langchain/graphs/__init__.py +6 -6
- langchain/graphs/graph_document.py +1 -1
- langchain/graphs/networkx_graph.py +2 -2
- langchain/hub.py +9 -11
- langchain/indexes/__init__.py +3 -3
- langchain/indexes/_sql_record_manager.py +63 -46
- langchain/indexes/prompts/entity_extraction.py +1 -2
- langchain/indexes/prompts/entity_summarization.py +1 -2
- langchain/indexes/prompts/knowledge_triplet_extraction.py +1 -3
- langchain/indexes/vectorstore.py +35 -19
- langchain/llms/__init__.py +13 -13
- langchain/llms/ai21.py +1 -1
- langchain/llms/azureml_endpoint.py +4 -4
- langchain/llms/base.py +15 -7
- langchain/llms/bedrock.py +1 -1
- langchain/llms/cloudflare_workersai.py +1 -1
- langchain/llms/gradient_ai.py +1 -1
- langchain/llms/loading.py +1 -1
- langchain/llms/openai.py +1 -1
- langchain/llms/sagemaker_endpoint.py +1 -1
- langchain/load/dump.py +1 -1
- langchain/load/load.py +1 -1
- langchain/load/serializable.py +3 -3
- langchain/memory/__init__.py +3 -3
- langchain/memory/buffer.py +14 -7
- langchain/memory/buffer_window.py +2 -0
- langchain/memory/chat_memory.py +14 -8
- langchain/memory/chat_message_histories/__init__.py +1 -1
- langchain/memory/chat_message_histories/astradb.py +1 -1
- langchain/memory/chat_message_histories/cassandra.py +1 -1
- langchain/memory/chat_message_histories/cosmos_db.py +1 -1
- langchain/memory/chat_message_histories/dynamodb.py +1 -1
- langchain/memory/chat_message_histories/elasticsearch.py +1 -1
- langchain/memory/chat_message_histories/file.py +1 -1
- langchain/memory/chat_message_histories/firestore.py +1 -1
- langchain/memory/chat_message_histories/momento.py +1 -1
- langchain/memory/chat_message_histories/mongodb.py +1 -1
- langchain/memory/chat_message_histories/neo4j.py +1 -1
- langchain/memory/chat_message_histories/postgres.py +1 -1
- langchain/memory/chat_message_histories/redis.py +1 -1
- langchain/memory/chat_message_histories/rocksetdb.py +1 -1
- langchain/memory/chat_message_histories/singlestoredb.py +1 -1
- langchain/memory/chat_message_histories/streamlit.py +1 -1
- langchain/memory/chat_message_histories/upstash_redis.py +1 -1
- langchain/memory/chat_message_histories/xata.py +1 -1
- langchain/memory/chat_message_histories/zep.py +1 -1
- langchain/memory/combined.py +14 -13
- langchain/memory/entity.py +131 -61
- langchain/memory/prompt.py +10 -11
- langchain/memory/readonly.py +0 -2
- langchain/memory/simple.py +4 -3
- langchain/memory/summary.py +43 -11
- langchain/memory/summary_buffer.py +20 -8
- langchain/memory/token_buffer.py +2 -0
- langchain/memory/utils.py +3 -2
- langchain/memory/vectorstore.py +12 -5
- langchain/memory/vectorstore_token_buffer_memory.py +5 -5
- langchain/model_laboratory.py +12 -11
- langchain/output_parsers/__init__.py +4 -4
- langchain/output_parsers/boolean.py +7 -4
- langchain/output_parsers/combining.py +14 -7
- langchain/output_parsers/datetime.py +32 -31
- langchain/output_parsers/enum.py +10 -4
- langchain/output_parsers/fix.py +60 -53
- langchain/output_parsers/format_instructions.py +6 -8
- langchain/output_parsers/json.py +2 -2
- langchain/output_parsers/list.py +2 -2
- langchain/output_parsers/loading.py +9 -9
- langchain/output_parsers/openai_functions.py +3 -3
- langchain/output_parsers/openai_tools.py +1 -1
- langchain/output_parsers/pandas_dataframe.py +59 -48
- langchain/output_parsers/prompts.py +1 -2
- langchain/output_parsers/rail_parser.py +1 -1
- langchain/output_parsers/regex.py +9 -8
- langchain/output_parsers/regex_dict.py +7 -10
- langchain/output_parsers/retry.py +99 -80
- langchain/output_parsers/structured.py +21 -6
- langchain/output_parsers/yaml.py +19 -11
- langchain/prompts/__init__.py +5 -3
- langchain/prompts/base.py +5 -5
- langchain/prompts/chat.py +8 -8
- langchain/prompts/example_selector/__init__.py +3 -1
- langchain/prompts/example_selector/semantic_similarity.py +2 -2
- langchain/prompts/few_shot.py +1 -1
- langchain/prompts/loading.py +3 -3
- langchain/prompts/prompt.py +1 -1
- langchain/pydantic_v1/__init__.py +1 -1
- langchain/retrievers/__init__.py +5 -5
- langchain/retrievers/bedrock.py +2 -2
- langchain/retrievers/bm25.py +1 -1
- langchain/retrievers/contextual_compression.py +14 -8
- langchain/retrievers/docarray.py +1 -1
- langchain/retrievers/document_compressors/__init__.py +5 -4
- langchain/retrievers/document_compressors/base.py +12 -6
- langchain/retrievers/document_compressors/chain_extract.py +5 -3
- langchain/retrievers/document_compressors/chain_extract_prompt.py +2 -3
- langchain/retrievers/document_compressors/chain_filter.py +9 -9
- langchain/retrievers/document_compressors/chain_filter_prompt.py +1 -2
- langchain/retrievers/document_compressors/cohere_rerank.py +17 -15
- langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -0
- langchain/retrievers/document_compressors/embeddings_filter.py +24 -17
- langchain/retrievers/document_compressors/flashrank_rerank.py +1 -1
- langchain/retrievers/document_compressors/listwise_rerank.py +8 -5
- langchain/retrievers/ensemble.py +30 -27
- langchain/retrievers/google_cloud_documentai_warehouse.py +1 -1
- langchain/retrievers/google_vertex_ai_search.py +2 -2
- langchain/retrievers/kendra.py +10 -10
- langchain/retrievers/llama_index.py +1 -1
- langchain/retrievers/merger_retriever.py +11 -11
- langchain/retrievers/milvus.py +1 -1
- langchain/retrievers/multi_query.py +35 -27
- langchain/retrievers/multi_vector.py +24 -9
- langchain/retrievers/parent_document_retriever.py +33 -9
- langchain/retrievers/re_phraser.py +6 -5
- langchain/retrievers/self_query/base.py +157 -127
- langchain/retrievers/time_weighted_retriever.py +21 -7
- langchain/retrievers/zilliz.py +1 -1
- langchain/runnables/hub.py +12 -0
- langchain/runnables/openai_functions.py +12 -2
- langchain/schema/__init__.py +23 -23
- langchain/schema/cache.py +1 -1
- langchain/schema/callbacks/base.py +7 -7
- langchain/schema/callbacks/manager.py +19 -19
- langchain/schema/callbacks/tracers/base.py +1 -1
- langchain/schema/callbacks/tracers/evaluation.py +1 -1
- langchain/schema/callbacks/tracers/langchain.py +1 -1
- langchain/schema/callbacks/tracers/langchain_v1.py +1 -1
- langchain/schema/callbacks/tracers/log_stream.py +1 -1
- langchain/schema/callbacks/tracers/schemas.py +8 -8
- langchain/schema/callbacks/tracers/stdout.py +3 -3
- langchain/schema/document.py +1 -1
- langchain/schema/language_model.py +2 -2
- langchain/schema/messages.py +12 -12
- langchain/schema/output.py +3 -3
- langchain/schema/output_parser.py +3 -3
- langchain/schema/runnable/__init__.py +3 -3
- langchain/schema/runnable/base.py +9 -9
- langchain/schema/runnable/config.py +5 -5
- langchain/schema/runnable/configurable.py +1 -1
- langchain/schema/runnable/history.py +1 -1
- langchain/schema/runnable/passthrough.py +1 -1
- langchain/schema/runnable/utils.py +16 -16
- langchain/schema/vectorstore.py +1 -1
- langchain/smith/__init__.py +2 -1
- langchain/smith/evaluation/__init__.py +2 -2
- langchain/smith/evaluation/config.py +9 -23
- langchain/smith/evaluation/name_generation.py +3 -3
- langchain/smith/evaluation/progress.py +22 -4
- langchain/smith/evaluation/runner_utils.py +416 -247
- langchain/smith/evaluation/string_run_evaluator.py +102 -68
- langchain/storage/__init__.py +2 -2
- langchain/storage/_lc_store.py +4 -2
- langchain/storage/encoder_backed.py +7 -2
- langchain/storage/file_system.py +19 -16
- langchain/storage/in_memory.py +1 -1
- langchain/storage/upstash_redis.py +1 -1
- langchain/text_splitter.py +15 -15
- langchain/tools/__init__.py +28 -26
- langchain/tools/ainetwork/app.py +1 -1
- langchain/tools/ainetwork/base.py +1 -1
- langchain/tools/ainetwork/owner.py +1 -1
- langchain/tools/ainetwork/rule.py +1 -1
- langchain/tools/ainetwork/transfer.py +1 -1
- langchain/tools/ainetwork/value.py +1 -1
- langchain/tools/amadeus/closest_airport.py +1 -1
- langchain/tools/amadeus/flight_search.py +1 -1
- langchain/tools/azure_cognitive_services/__init__.py +1 -1
- langchain/tools/base.py +4 -4
- langchain/tools/bearly/tool.py +1 -1
- langchain/tools/bing_search/__init__.py +1 -1
- langchain/tools/bing_search/tool.py +1 -1
- langchain/tools/dataforseo_api_search/__init__.py +1 -1
- langchain/tools/dataforseo_api_search/tool.py +1 -1
- langchain/tools/ddg_search/tool.py +1 -1
- langchain/tools/e2b_data_analysis/tool.py +2 -2
- langchain/tools/edenai/__init__.py +1 -1
- langchain/tools/file_management/__init__.py +1 -1
- langchain/tools/file_management/copy.py +1 -1
- langchain/tools/file_management/delete.py +1 -1
- langchain/tools/gmail/__init__.py +2 -2
- langchain/tools/gmail/get_message.py +1 -1
- langchain/tools/gmail/search.py +1 -1
- langchain/tools/gmail/send_message.py +1 -1
- langchain/tools/google_finance/__init__.py +1 -1
- langchain/tools/google_finance/tool.py +1 -1
- langchain/tools/google_scholar/__init__.py +1 -1
- langchain/tools/google_scholar/tool.py +1 -1
- langchain/tools/google_search/__init__.py +1 -1
- langchain/tools/google_search/tool.py +1 -1
- langchain/tools/google_serper/__init__.py +1 -1
- langchain/tools/google_serper/tool.py +1 -1
- langchain/tools/google_trends/__init__.py +1 -1
- langchain/tools/google_trends/tool.py +1 -1
- langchain/tools/jira/tool.py +20 -1
- langchain/tools/json/tool.py +25 -3
- langchain/tools/memorize/tool.py +1 -1
- langchain/tools/multion/__init__.py +1 -1
- langchain/tools/multion/update_session.py +1 -1
- langchain/tools/office365/__init__.py +2 -2
- langchain/tools/office365/events_search.py +1 -1
- langchain/tools/office365/messages_search.py +1 -1
- langchain/tools/office365/send_event.py +1 -1
- langchain/tools/office365/send_message.py +1 -1
- langchain/tools/openapi/utils/api_models.py +6 -6
- langchain/tools/playwright/__init__.py +5 -5
- langchain/tools/playwright/click.py +1 -1
- langchain/tools/playwright/extract_hyperlinks.py +1 -1
- langchain/tools/playwright/get_elements.py +1 -1
- langchain/tools/playwright/navigate.py +1 -1
- langchain/tools/plugin.py +2 -2
- langchain/tools/powerbi/tool.py +1 -1
- langchain/tools/python/__init__.py +3 -2
- langchain/tools/reddit_search/tool.py +1 -1
- langchain/tools/render.py +2 -2
- langchain/tools/requests/tool.py +2 -2
- langchain/tools/searchapi/tool.py +1 -1
- langchain/tools/searx_search/tool.py +1 -1
- langchain/tools/slack/get_message.py +1 -1
- langchain/tools/spark_sql/tool.py +1 -1
- langchain/tools/sql_database/tool.py +1 -1
- langchain/tools/tavily_search/__init__.py +1 -1
- langchain/tools/tavily_search/tool.py +1 -1
- langchain/tools/zapier/__init__.py +1 -1
- langchain/tools/zapier/tool.py +24 -2
- langchain/utilities/__init__.py +4 -4
- langchain/utilities/arcee.py +4 -4
- langchain/utilities/clickup.py +4 -4
- langchain/utilities/dalle_image_generator.py +1 -1
- langchain/utilities/dataforseo_api_search.py +1 -1
- langchain/utilities/opaqueprompts.py +1 -1
- langchain/utilities/reddit_search.py +1 -1
- langchain/utilities/sql_database.py +1 -1
- langchain/utilities/tavily_search.py +1 -1
- langchain/utilities/vertexai.py +2 -2
- langchain/utils/__init__.py +1 -1
- langchain/utils/aiter.py +1 -1
- langchain/utils/html.py +3 -3
- langchain/utils/input.py +1 -1
- langchain/utils/iter.py +1 -1
- langchain/utils/json_schema.py +1 -3
- langchain/utils/strings.py +1 -1
- langchain/utils/utils.py +6 -6
- langchain/vectorstores/__init__.py +5 -5
- langchain/vectorstores/alibabacloud_opensearch.py +1 -1
- langchain/vectorstores/azure_cosmos_db.py +1 -1
- langchain/vectorstores/clickhouse.py +1 -1
- langchain/vectorstores/elastic_vector_search.py +1 -1
- langchain/vectorstores/elasticsearch.py +2 -2
- langchain/vectorstores/myscale.py +1 -1
- langchain/vectorstores/neo4j_vector.py +1 -1
- langchain/vectorstores/pgembedding.py +1 -1
- langchain/vectorstores/qdrant.py +1 -1
- langchain/vectorstores/redis/__init__.py +1 -1
- langchain/vectorstores/redis/base.py +1 -1
- langchain/vectorstores/redis/filters.py +4 -4
- langchain/vectorstores/redis/schema.py +6 -6
- langchain/vectorstores/sklearn.py +2 -2
- langchain/vectorstores/starrocks.py +1 -1
- langchain/vectorstores/utils.py +1 -1
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/METADATA +4 -14
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/RECORD +590 -591
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/WHEEL +1 -1
- langchain/smith/evaluation/utils.py +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/entry_points.txt +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/licenses/LICENSE +0 -0
|
@@ -10,6 +10,7 @@ from langchain_core.documents import Document
|
|
|
10
10
|
from langchain_core.runnables.config import RunnableConfig
|
|
11
11
|
from langchain_core.utils.pydantic import create_model
|
|
12
12
|
from pydantic import BaseModel, ConfigDict, model_validator
|
|
13
|
+
from typing_extensions import override
|
|
13
14
|
|
|
14
15
|
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
|
|
15
16
|
from langchain.chains.combine_documents.reduce import ReduceDocumentsChain
|
|
@@ -98,6 +99,7 @@ class MapReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
98
99
|
llm_chain=llm_chain,
|
|
99
100
|
reduce_documents_chain=reduce_documents_chain,
|
|
100
101
|
)
|
|
102
|
+
|
|
101
103
|
"""
|
|
102
104
|
|
|
103
105
|
llm_chain: LLMChain
|
|
@@ -111,8 +113,10 @@ class MapReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
111
113
|
return_intermediate_steps: bool = False
|
|
112
114
|
"""Return the results of the map steps in the output."""
|
|
113
115
|
|
|
116
|
+
@override
|
|
114
117
|
def get_output_schema(
|
|
115
|
-
self,
|
|
118
|
+
self,
|
|
119
|
+
config: Optional[RunnableConfig] = None,
|
|
116
120
|
) -> type[BaseModel]:
|
|
117
121
|
if self.return_intermediate_steps:
|
|
118
122
|
return create_model(
|
|
@@ -133,7 +137,7 @@ class MapReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
133
137
|
"""
|
|
134
138
|
_output_keys = super().output_keys
|
|
135
139
|
if self.return_intermediate_steps:
|
|
136
|
-
_output_keys = _output_keys
|
|
140
|
+
_output_keys = [*_output_keys, "intermediate_steps"]
|
|
137
141
|
return _output_keys
|
|
138
142
|
|
|
139
143
|
model_config = ConfigDict(
|
|
@@ -147,11 +151,12 @@ class MapReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
147
151
|
"""For backwards compatibility."""
|
|
148
152
|
if "combine_document_chain" in values:
|
|
149
153
|
if "reduce_documents_chain" in values:
|
|
150
|
-
|
|
154
|
+
msg = (
|
|
151
155
|
"Both `reduce_documents_chain` and `combine_document_chain` "
|
|
152
156
|
"cannot be provided at the same time. `combine_document_chain` "
|
|
153
157
|
"is deprecated, please only provide `reduce_documents_chain`"
|
|
154
158
|
)
|
|
159
|
+
raise ValueError(msg)
|
|
155
160
|
combine_chain = values["combine_document_chain"]
|
|
156
161
|
collapse_chain = values.get("collapse_document_chain")
|
|
157
162
|
reduce_chain = ReduceDocumentsChain(
|
|
@@ -160,8 +165,7 @@ class MapReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
160
165
|
)
|
|
161
166
|
values["reduce_documents_chain"] = reduce_chain
|
|
162
167
|
del values["combine_document_chain"]
|
|
163
|
-
|
|
164
|
-
del values["collapse_document_chain"]
|
|
168
|
+
values.pop("collapse_document_chain", None)
|
|
165
169
|
|
|
166
170
|
return values
|
|
167
171
|
|
|
@@ -179,23 +183,25 @@ class MapReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
179
183
|
def get_default_document_variable_name(cls, values: dict) -> Any:
|
|
180
184
|
"""Get default document variable name, if not provided."""
|
|
181
185
|
if "llm_chain" not in values:
|
|
182
|
-
|
|
186
|
+
msg = "llm_chain must be provided"
|
|
187
|
+
raise ValueError(msg)
|
|
183
188
|
|
|
184
189
|
llm_chain_variables = values["llm_chain"].prompt.input_variables
|
|
185
190
|
if "document_variable_name" not in values:
|
|
186
191
|
if len(llm_chain_variables) == 1:
|
|
187
192
|
values["document_variable_name"] = llm_chain_variables[0]
|
|
188
193
|
else:
|
|
189
|
-
|
|
194
|
+
msg = (
|
|
190
195
|
"document_variable_name must be provided if there are "
|
|
191
196
|
"multiple llm_chain input_variables"
|
|
192
197
|
)
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
198
|
+
raise ValueError(msg)
|
|
199
|
+
elif values["document_variable_name"] not in llm_chain_variables:
|
|
200
|
+
msg = (
|
|
201
|
+
f"document_variable_name {values['document_variable_name']} was "
|
|
202
|
+
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
|
203
|
+
)
|
|
204
|
+
raise ValueError(msg)
|
|
199
205
|
return values
|
|
200
206
|
|
|
201
207
|
@property
|
|
@@ -204,26 +210,25 @@ class MapReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
204
210
|
if isinstance(self.reduce_documents_chain, ReduceDocumentsChain):
|
|
205
211
|
if self.reduce_documents_chain.collapse_documents_chain:
|
|
206
212
|
return self.reduce_documents_chain.collapse_documents_chain
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
)
|
|
213
|
+
return self.reduce_documents_chain.combine_documents_chain
|
|
214
|
+
msg = (
|
|
215
|
+
f"`reduce_documents_chain` is of type "
|
|
216
|
+
f"{type(self.reduce_documents_chain)} so it does not have "
|
|
217
|
+
f"this attribute."
|
|
218
|
+
)
|
|
219
|
+
raise ValueError(msg)
|
|
215
220
|
|
|
216
221
|
@property
|
|
217
222
|
def combine_document_chain(self) -> BaseCombineDocumentsChain:
|
|
218
223
|
"""Kept for backward compatibility."""
|
|
219
224
|
if isinstance(self.reduce_documents_chain, ReduceDocumentsChain):
|
|
220
225
|
return self.reduce_documents_chain.combine_documents_chain
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
226
|
+
msg = (
|
|
227
|
+
f"`reduce_documents_chain` is of type "
|
|
228
|
+
f"{type(self.reduce_documents_chain)} so it does not have "
|
|
229
|
+
f"this attribute."
|
|
230
|
+
)
|
|
231
|
+
raise ValueError(msg)
|
|
227
232
|
|
|
228
233
|
def combine_docs(
|
|
229
234
|
self,
|
|
@@ -249,7 +254,10 @@ class MapReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
249
254
|
for i, r in enumerate(map_results)
|
|
250
255
|
]
|
|
251
256
|
result, extra_return_dict = self.reduce_documents_chain.combine_docs(
|
|
252
|
-
result_docs,
|
|
257
|
+
result_docs,
|
|
258
|
+
token_max=token_max,
|
|
259
|
+
callbacks=callbacks,
|
|
260
|
+
**kwargs,
|
|
253
261
|
)
|
|
254
262
|
if self.return_intermediate_steps:
|
|
255
263
|
intermediate_steps = [r[question_result_key] for r in map_results]
|
|
@@ -270,7 +278,7 @@ class MapReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
270
278
|
"""
|
|
271
279
|
map_results = await self.llm_chain.aapply(
|
|
272
280
|
# FYI - this is parallelized and so it is fast.
|
|
273
|
-
[{
|
|
281
|
+
[{self.document_variable_name: d.page_content, **kwargs} for d in docs],
|
|
274
282
|
callbacks=callbacks,
|
|
275
283
|
)
|
|
276
284
|
question_result_key = self.llm_chain.output_key
|
|
@@ -280,7 +288,10 @@ class MapReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
280
288
|
for i, r in enumerate(map_results)
|
|
281
289
|
]
|
|
282
290
|
result, extra_return_dict = await self.reduce_documents_chain.acombine_docs(
|
|
283
|
-
result_docs,
|
|
291
|
+
result_docs,
|
|
292
|
+
token_max=token_max,
|
|
293
|
+
callbacks=callbacks,
|
|
294
|
+
**kwargs,
|
|
284
295
|
)
|
|
285
296
|
if self.return_intermediate_steps:
|
|
286
297
|
intermediate_steps = [r[question_result_key] for r in map_results]
|
|
@@ -11,7 +11,7 @@ from langchain_core.documents import Document
|
|
|
11
11
|
from langchain_core.runnables.config import RunnableConfig
|
|
12
12
|
from langchain_core.utils.pydantic import create_model
|
|
13
13
|
from pydantic import BaseModel, ConfigDict, model_validator
|
|
14
|
-
from typing_extensions import Self
|
|
14
|
+
from typing_extensions import Self, override
|
|
15
15
|
|
|
16
16
|
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
|
|
17
17
|
from langchain.chains.llm import LLMChain
|
|
@@ -24,15 +24,15 @@ from langchain.output_parsers.regex import RegexParser
|
|
|
24
24
|
message=(
|
|
25
25
|
"This class is deprecated. Please see the migration guide here for "
|
|
26
26
|
"a recommended replacement: "
|
|
27
|
-
"https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain/"
|
|
27
|
+
"https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain/"
|
|
28
28
|
),
|
|
29
29
|
)
|
|
30
30
|
class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
|
31
31
|
"""Combining documents by mapping a chain over them, then reranking results.
|
|
32
32
|
|
|
33
33
|
This algorithm calls an LLMChain on each input document. The LLMChain is expected
|
|
34
|
-
to have an OutputParser that parses the result into both an answer (
|
|
35
|
-
and a score (
|
|
34
|
+
to have an OutputParser that parses the result into both an answer (``answer_key``)
|
|
35
|
+
and a score (``rank_key``). The answer with the highest score is then returned.
|
|
36
36
|
|
|
37
37
|
Example:
|
|
38
38
|
.. code-block:: python
|
|
@@ -69,6 +69,7 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
|
|
69
69
|
rank_key="score",
|
|
70
70
|
answer_key="answer",
|
|
71
71
|
)
|
|
72
|
+
|
|
72
73
|
"""
|
|
73
74
|
|
|
74
75
|
llm_chain: LLMChain
|
|
@@ -91,8 +92,10 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
|
|
91
92
|
extra="forbid",
|
|
92
93
|
)
|
|
93
94
|
|
|
95
|
+
@override
|
|
94
96
|
def get_output_schema(
|
|
95
|
-
self,
|
|
97
|
+
self,
|
|
98
|
+
config: Optional[RunnableConfig] = None,
|
|
96
99
|
) -> type[BaseModel]:
|
|
97
100
|
schema: dict[str, Any] = {
|
|
98
101
|
self.output_key: (str, None),
|
|
@@ -100,7 +103,7 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
|
|
100
103
|
if self.return_intermediate_steps:
|
|
101
104
|
schema["intermediate_steps"] = (list[str], None)
|
|
102
105
|
if self.metadata_keys:
|
|
103
|
-
schema.update(
|
|
106
|
+
schema.update(dict.fromkeys(self.metadata_keys, (Any, None)))
|
|
104
107
|
|
|
105
108
|
return create_model("MapRerankOutput", **schema)
|
|
106
109
|
|
|
@@ -112,7 +115,7 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
|
|
112
115
|
"""
|
|
113
116
|
_output_keys = super().output_keys
|
|
114
117
|
if self.return_intermediate_steps:
|
|
115
|
-
_output_keys = _output_keys
|
|
118
|
+
_output_keys = [*_output_keys, "intermediate_steps"]
|
|
116
119
|
if self.metadata_keys is not None:
|
|
117
120
|
_output_keys += self.metadata_keys
|
|
118
121
|
return _output_keys
|
|
@@ -122,21 +125,24 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
|
|
122
125
|
"""Validate that the combine chain outputs a dictionary."""
|
|
123
126
|
output_parser = self.llm_chain.prompt.output_parser
|
|
124
127
|
if not isinstance(output_parser, RegexParser):
|
|
125
|
-
|
|
128
|
+
msg = (
|
|
126
129
|
"Output parser of llm_chain should be a RegexParser,"
|
|
127
130
|
f" got {output_parser}"
|
|
128
131
|
)
|
|
132
|
+
raise ValueError(msg) # noqa: TRY004
|
|
129
133
|
output_keys = output_parser.output_keys
|
|
130
134
|
if self.rank_key not in output_keys:
|
|
131
|
-
|
|
135
|
+
msg = (
|
|
132
136
|
f"Got {self.rank_key} as key to rank on, but did not find "
|
|
133
137
|
f"it in the llm_chain output keys ({output_keys})"
|
|
134
138
|
)
|
|
139
|
+
raise ValueError(msg)
|
|
135
140
|
if self.answer_key not in output_keys:
|
|
136
|
-
|
|
141
|
+
msg = (
|
|
137
142
|
f"Got {self.answer_key} as key to return, but did not find "
|
|
138
143
|
f"it in the llm_chain output keys ({output_keys})"
|
|
139
144
|
)
|
|
145
|
+
raise ValueError(msg)
|
|
140
146
|
return self
|
|
141
147
|
|
|
142
148
|
@model_validator(mode="before")
|
|
@@ -144,27 +150,32 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
|
|
144
150
|
def get_default_document_variable_name(cls, values: dict) -> Any:
|
|
145
151
|
"""Get default document variable name, if not provided."""
|
|
146
152
|
if "llm_chain" not in values:
|
|
147
|
-
|
|
153
|
+
msg = "llm_chain must be provided"
|
|
154
|
+
raise ValueError(msg)
|
|
148
155
|
|
|
149
156
|
llm_chain_variables = values["llm_chain"].prompt.input_variables
|
|
150
157
|
if "document_variable_name" not in values:
|
|
151
158
|
if len(llm_chain_variables) == 1:
|
|
152
159
|
values["document_variable_name"] = llm_chain_variables[0]
|
|
153
160
|
else:
|
|
154
|
-
|
|
161
|
+
msg = (
|
|
155
162
|
"document_variable_name must be provided if there are "
|
|
156
163
|
"multiple llm_chain input_variables"
|
|
157
164
|
)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
165
|
+
raise ValueError(msg)
|
|
166
|
+
elif values["document_variable_name"] not in llm_chain_variables:
|
|
167
|
+
msg = (
|
|
168
|
+
f"document_variable_name {values['document_variable_name']} was "
|
|
169
|
+
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
|
170
|
+
)
|
|
171
|
+
raise ValueError(msg)
|
|
164
172
|
return values
|
|
165
173
|
|
|
166
174
|
def combine_docs(
|
|
167
|
-
self,
|
|
175
|
+
self,
|
|
176
|
+
docs: list[Document],
|
|
177
|
+
callbacks: Callbacks = None,
|
|
178
|
+
**kwargs: Any,
|
|
168
179
|
) -> tuple[str, dict]:
|
|
169
180
|
"""Combine documents in a map rerank manner.
|
|
170
181
|
|
|
@@ -182,13 +193,16 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
|
|
182
193
|
"""
|
|
183
194
|
results = self.llm_chain.apply_and_parse(
|
|
184
195
|
# FYI - this is parallelized and so it is fast.
|
|
185
|
-
[{
|
|
196
|
+
[{self.document_variable_name: d.page_content, **kwargs} for d in docs],
|
|
186
197
|
callbacks=callbacks,
|
|
187
198
|
)
|
|
188
199
|
return self._process_results(docs, results)
|
|
189
200
|
|
|
190
201
|
async def acombine_docs(
|
|
191
|
-
self,
|
|
202
|
+
self,
|
|
203
|
+
docs: list[Document],
|
|
204
|
+
callbacks: Callbacks = None,
|
|
205
|
+
**kwargs: Any,
|
|
192
206
|
) -> tuple[str, dict]:
|
|
193
207
|
"""Combine documents in a map rerank manner.
|
|
194
208
|
|
|
@@ -206,7 +220,7 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
|
|
206
220
|
"""
|
|
207
221
|
results = await self.llm_chain.aapply_and_parse(
|
|
208
222
|
# FYI - this is parallelized and so it is fast.
|
|
209
|
-
[{
|
|
223
|
+
[{self.document_variable_name: d.page_content, **kwargs} for d in docs],
|
|
210
224
|
callbacks=callbacks,
|
|
211
225
|
)
|
|
212
226
|
return self._process_results(docs, results)
|
|
@@ -216,9 +230,10 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
|
|
216
230
|
docs: list[Document],
|
|
217
231
|
results: Sequence[Union[str, list[str], dict[str, str]]],
|
|
218
232
|
) -> tuple[str, dict]:
|
|
219
|
-
typed_results = cast(list[dict], results)
|
|
233
|
+
typed_results = cast("list[dict]", results)
|
|
220
234
|
sorted_res = sorted(
|
|
221
|
-
zip(typed_results, docs),
|
|
235
|
+
zip(typed_results, docs),
|
|
236
|
+
key=lambda x: -int(x[0][self.rank_key]),
|
|
222
237
|
)
|
|
223
238
|
output, document = sorted_res[0]
|
|
224
239
|
extra_info = {}
|
|
@@ -27,7 +27,10 @@ class AsyncCombineDocsProtocol(Protocol):
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def split_list_of_docs(
|
|
30
|
-
docs: list[Document],
|
|
30
|
+
docs: list[Document],
|
|
31
|
+
length_func: Callable,
|
|
32
|
+
token_max: int,
|
|
33
|
+
**kwargs: Any,
|
|
31
34
|
) -> list[list[Document]]:
|
|
32
35
|
"""Split Documents into subsets that each meet a cumulative length constraint.
|
|
33
36
|
|
|
@@ -48,10 +51,11 @@ def split_list_of_docs(
|
|
|
48
51
|
_num_tokens = length_func(_sub_result_docs, **kwargs)
|
|
49
52
|
if _num_tokens > token_max:
|
|
50
53
|
if len(_sub_result_docs) == 1:
|
|
51
|
-
|
|
54
|
+
msg = (
|
|
52
55
|
"A single document was longer than the context length,"
|
|
53
56
|
" we cannot handle this."
|
|
54
57
|
)
|
|
58
|
+
raise ValueError(msg)
|
|
55
59
|
new_result_doc_list.append(_sub_result_docs[:-1])
|
|
56
60
|
_sub_result_docs = _sub_result_docs[-1:]
|
|
57
61
|
new_result_doc_list.append(_sub_result_docs)
|
|
@@ -197,6 +201,7 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
197
201
|
combine_documents_chain=combine_documents_chain,
|
|
198
202
|
collapse_documents_chain=collapse_documents_chain,
|
|
199
203
|
)
|
|
204
|
+
|
|
200
205
|
"""
|
|
201
206
|
|
|
202
207
|
combine_documents_chain: BaseCombineDocumentsChain
|
|
@@ -224,8 +229,7 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
224
229
|
def _collapse_chain(self) -> BaseCombineDocumentsChain:
|
|
225
230
|
if self.collapse_documents_chain is not None:
|
|
226
231
|
return self.collapse_documents_chain
|
|
227
|
-
|
|
228
|
-
return self.combine_documents_chain
|
|
232
|
+
return self.combine_documents_chain
|
|
229
233
|
|
|
230
234
|
def combine_docs(
|
|
231
235
|
self,
|
|
@@ -250,10 +254,15 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
250
254
|
element returned is a dictionary of other keys to return.
|
|
251
255
|
"""
|
|
252
256
|
result_docs, extra_return_dict = self._collapse(
|
|
253
|
-
docs,
|
|
257
|
+
docs,
|
|
258
|
+
token_max=token_max,
|
|
259
|
+
callbacks=callbacks,
|
|
260
|
+
**kwargs,
|
|
254
261
|
)
|
|
255
262
|
return self.combine_documents_chain.combine_docs(
|
|
256
|
-
docs=result_docs,
|
|
263
|
+
docs=result_docs,
|
|
264
|
+
callbacks=callbacks,
|
|
265
|
+
**kwargs,
|
|
257
266
|
)
|
|
258
267
|
|
|
259
268
|
async def acombine_docs(
|
|
@@ -279,10 +288,15 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
279
288
|
element returned is a dictionary of other keys to return.
|
|
280
289
|
"""
|
|
281
290
|
result_docs, extra_return_dict = await self._acollapse(
|
|
282
|
-
docs,
|
|
291
|
+
docs,
|
|
292
|
+
token_max=token_max,
|
|
293
|
+
callbacks=callbacks,
|
|
294
|
+
**kwargs,
|
|
283
295
|
)
|
|
284
296
|
return await self.combine_documents_chain.acombine_docs(
|
|
285
|
-
docs=result_docs,
|
|
297
|
+
docs=result_docs,
|
|
298
|
+
callbacks=callbacks,
|
|
299
|
+
**kwargs,
|
|
286
300
|
)
|
|
287
301
|
|
|
288
302
|
def _collapse(
|
|
@@ -298,26 +312,30 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
298
312
|
|
|
299
313
|
def _collapse_docs_func(docs: list[Document], **kwargs: Any) -> str:
|
|
300
314
|
return self._collapse_chain.run(
|
|
301
|
-
input_documents=docs,
|
|
315
|
+
input_documents=docs,
|
|
316
|
+
callbacks=callbacks,
|
|
317
|
+
**kwargs,
|
|
302
318
|
)
|
|
303
319
|
|
|
304
320
|
_token_max = token_max or self.token_max
|
|
305
321
|
retries: int = 0
|
|
306
322
|
while num_tokens is not None and num_tokens > _token_max:
|
|
307
323
|
new_result_doc_list = split_list_of_docs(
|
|
308
|
-
result_docs,
|
|
324
|
+
result_docs,
|
|
325
|
+
length_func,
|
|
326
|
+
_token_max,
|
|
327
|
+
**kwargs,
|
|
309
328
|
)
|
|
310
|
-
result_docs = [
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
329
|
+
result_docs = [
|
|
330
|
+
collapse_docs(docs_, _collapse_docs_func, **kwargs)
|
|
331
|
+
for docs_ in new_result_doc_list
|
|
332
|
+
]
|
|
314
333
|
num_tokens = length_func(result_docs, **kwargs)
|
|
315
334
|
retries += 1
|
|
316
335
|
if self.collapse_max_retries and retries == self.collapse_max_retries:
|
|
317
|
-
|
|
318
|
-
f"Exceed {self.collapse_max_retries} tries to \
|
|
336
|
+
msg = f"Exceed {self.collapse_max_retries} tries to \
|
|
319
337
|
collapse document to {_token_max} tokens."
|
|
320
|
-
)
|
|
338
|
+
raise ValueError(msg)
|
|
321
339
|
return result_docs, {}
|
|
322
340
|
|
|
323
341
|
async def _acollapse(
|
|
@@ -333,26 +351,30 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
|
|
333
351
|
|
|
334
352
|
async def _collapse_docs_func(docs: list[Document], **kwargs: Any) -> str:
|
|
335
353
|
return await self._collapse_chain.arun(
|
|
336
|
-
input_documents=docs,
|
|
354
|
+
input_documents=docs,
|
|
355
|
+
callbacks=callbacks,
|
|
356
|
+
**kwargs,
|
|
337
357
|
)
|
|
338
358
|
|
|
339
359
|
_token_max = token_max or self.token_max
|
|
340
360
|
retries: int = 0
|
|
341
361
|
while num_tokens is not None and num_tokens > _token_max:
|
|
342
362
|
new_result_doc_list = split_list_of_docs(
|
|
343
|
-
result_docs,
|
|
363
|
+
result_docs,
|
|
364
|
+
length_func,
|
|
365
|
+
_token_max,
|
|
366
|
+
**kwargs,
|
|
344
367
|
)
|
|
345
|
-
result_docs = [
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
368
|
+
result_docs = [
|
|
369
|
+
await acollapse_docs(docs_, _collapse_docs_func, **kwargs)
|
|
370
|
+
for docs_ in new_result_doc_list
|
|
371
|
+
]
|
|
349
372
|
num_tokens = length_func(result_docs, **kwargs)
|
|
350
373
|
retries += 1
|
|
351
374
|
if self.collapse_max_retries and retries == self.collapse_max_retries:
|
|
352
|
-
|
|
353
|
-
f"Exceed {self.collapse_max_retries} tries to \
|
|
375
|
+
msg = f"Exceed {self.collapse_max_retries} tries to \
|
|
354
376
|
collapse document to {_token_max} tokens."
|
|
355
|
-
)
|
|
377
|
+
raise ValueError(msg)
|
|
356
378
|
return result_docs, {}
|
|
357
379
|
|
|
358
380
|
@property
|
|
@@ -27,7 +27,7 @@ def _get_default_document_prompt() -> PromptTemplate:
|
|
|
27
27
|
message=(
|
|
28
28
|
"This class is deprecated. Please see the migration guide here for "
|
|
29
29
|
"a recommended replacement: "
|
|
30
|
-
"https://python.langchain.com/docs/versions/migrating_chains/refine_docs_chain/"
|
|
30
|
+
"https://python.langchain.com/docs/versions/migrating_chains/refine_docs_chain/"
|
|
31
31
|
),
|
|
32
32
|
)
|
|
33
33
|
class RefineDocumentsChain(BaseCombineDocumentsChain):
|
|
@@ -79,6 +79,7 @@ class RefineDocumentsChain(BaseCombineDocumentsChain):
|
|
|
79
79
|
document_variable_name=document_variable_name,
|
|
80
80
|
initial_response_name=initial_response_name,
|
|
81
81
|
)
|
|
82
|
+
|
|
82
83
|
"""
|
|
83
84
|
|
|
84
85
|
initial_llm_chain: LLMChain
|
|
@@ -91,7 +92,7 @@ class RefineDocumentsChain(BaseCombineDocumentsChain):
|
|
|
91
92
|
initial_response_name: str
|
|
92
93
|
"""The variable name to format the initial response in when refining."""
|
|
93
94
|
document_prompt: BasePromptTemplate = Field(
|
|
94
|
-
default_factory=_get_default_document_prompt
|
|
95
|
+
default_factory=_get_default_document_prompt,
|
|
95
96
|
)
|
|
96
97
|
"""Prompt to use to format each document, gets passed to `format_document`."""
|
|
97
98
|
return_intermediate_steps: bool = False
|
|
@@ -105,7 +106,7 @@ class RefineDocumentsChain(BaseCombineDocumentsChain):
|
|
|
105
106
|
"""
|
|
106
107
|
_output_keys = super().output_keys
|
|
107
108
|
if self.return_intermediate_steps:
|
|
108
|
-
_output_keys = _output_keys
|
|
109
|
+
_output_keys = [*_output_keys, "intermediate_steps"]
|
|
109
110
|
return _output_keys
|
|
110
111
|
|
|
111
112
|
model_config = ConfigDict(
|
|
@@ -127,27 +128,32 @@ class RefineDocumentsChain(BaseCombineDocumentsChain):
|
|
|
127
128
|
def get_default_document_variable_name(cls, values: dict) -> Any:
|
|
128
129
|
"""Get default document variable name, if not provided."""
|
|
129
130
|
if "initial_llm_chain" not in values:
|
|
130
|
-
|
|
131
|
+
msg = "initial_llm_chain must be provided"
|
|
132
|
+
raise ValueError(msg)
|
|
131
133
|
|
|
132
134
|
llm_chain_variables = values["initial_llm_chain"].prompt.input_variables
|
|
133
135
|
if "document_variable_name" not in values:
|
|
134
136
|
if len(llm_chain_variables) == 1:
|
|
135
137
|
values["document_variable_name"] = llm_chain_variables[0]
|
|
136
138
|
else:
|
|
137
|
-
|
|
139
|
+
msg = (
|
|
138
140
|
"document_variable_name must be provided if there are "
|
|
139
141
|
"multiple llm_chain input_variables"
|
|
140
142
|
)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
143
|
+
raise ValueError(msg)
|
|
144
|
+
elif values["document_variable_name"] not in llm_chain_variables:
|
|
145
|
+
msg = (
|
|
146
|
+
f"document_variable_name {values['document_variable_name']} was "
|
|
147
|
+
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
|
148
|
+
)
|
|
149
|
+
raise ValueError(msg)
|
|
147
150
|
return values
|
|
148
151
|
|
|
149
152
|
def combine_docs(
|
|
150
|
-
self,
|
|
153
|
+
self,
|
|
154
|
+
docs: list[Document],
|
|
155
|
+
callbacks: Callbacks = None,
|
|
156
|
+
**kwargs: Any,
|
|
151
157
|
) -> tuple[str, dict]:
|
|
152
158
|
"""Combine by mapping first chain over all, then stuffing into final chain.
|
|
153
159
|
|
|
@@ -172,7 +178,10 @@ class RefineDocumentsChain(BaseCombineDocumentsChain):
|
|
|
172
178
|
return self._construct_result(refine_steps, res)
|
|
173
179
|
|
|
174
180
|
async def acombine_docs(
|
|
175
|
-
self,
|
|
181
|
+
self,
|
|
182
|
+
docs: list[Document],
|
|
183
|
+
callbacks: Callbacks = None,
|
|
184
|
+
**kwargs: Any,
|
|
176
185
|
) -> tuple[str, dict]:
|
|
177
186
|
"""Async combine by mapping a first chain over all, then stuffing
|
|
178
187
|
into a final chain.
|
|
@@ -211,16 +220,17 @@ class RefineDocumentsChain(BaseCombineDocumentsChain):
|
|
|
211
220
|
}
|
|
212
221
|
|
|
213
222
|
def _construct_initial_inputs(
|
|
214
|
-
self,
|
|
223
|
+
self,
|
|
224
|
+
docs: list[Document],
|
|
225
|
+
**kwargs: Any,
|
|
215
226
|
) -> dict[str, Any]:
|
|
216
227
|
base_info = {"page_content": docs[0].page_content}
|
|
217
228
|
base_info.update(docs[0].metadata)
|
|
218
229
|
document_info = {k: base_info[k] for k in self.document_prompt.input_variables}
|
|
219
230
|
base_inputs: dict = {
|
|
220
|
-
self.document_variable_name: self.document_prompt.format(**document_info)
|
|
231
|
+
self.document_variable_name: self.document_prompt.format(**document_info),
|
|
221
232
|
}
|
|
222
|
-
|
|
223
|
-
return inputs
|
|
233
|
+
return {**base_inputs, **kwargs}
|
|
224
234
|
|
|
225
235
|
@property
|
|
226
236
|
def _chain_type(self) -> str:
|