langchain 0.3.26__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain/__init__.py +110 -96
- langchain/_api/__init__.py +2 -2
- langchain/_api/deprecation.py +3 -3
- langchain/_api/module_import.py +51 -46
- langchain/_api/path.py +1 -1
- langchain/adapters/openai.py +8 -8
- langchain/agents/__init__.py +15 -12
- langchain/agents/agent.py +174 -151
- langchain/agents/agent_iterator.py +50 -26
- langchain/agents/agent_toolkits/__init__.py +7 -6
- langchain/agents/agent_toolkits/ainetwork/toolkit.py +1 -1
- langchain/agents/agent_toolkits/amadeus/toolkit.py +1 -1
- langchain/agents/agent_toolkits/azure_cognitive_services.py +1 -1
- langchain/agents/agent_toolkits/clickup/toolkit.py +1 -1
- langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +6 -4
- langchain/agents/agent_toolkits/csv/__init__.py +4 -2
- langchain/agents/agent_toolkits/file_management/__init__.py +1 -1
- langchain/agents/agent_toolkits/file_management/toolkit.py +1 -1
- langchain/agents/agent_toolkits/github/toolkit.py +9 -9
- langchain/agents/agent_toolkits/gitlab/toolkit.py +1 -1
- langchain/agents/agent_toolkits/json/base.py +1 -1
- langchain/agents/agent_toolkits/multion/toolkit.py +1 -1
- langchain/agents/agent_toolkits/office365/toolkit.py +1 -1
- langchain/agents/agent_toolkits/openapi/base.py +1 -1
- langchain/agents/agent_toolkits/openapi/planner.py +2 -2
- langchain/agents/agent_toolkits/openapi/planner_prompt.py +10 -10
- langchain/agents/agent_toolkits/openapi/prompt.py +1 -1
- langchain/agents/agent_toolkits/openapi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/pandas/__init__.py +4 -2
- langchain/agents/agent_toolkits/playwright/__init__.py +1 -1
- langchain/agents/agent_toolkits/playwright/toolkit.py +1 -1
- langchain/agents/agent_toolkits/powerbi/base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/chat_base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/prompt.py +2 -2
- langchain/agents/agent_toolkits/powerbi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/python/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark_sql/base.py +1 -1
- langchain/agents/agent_toolkits/spark_sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/sql/prompt.py +1 -1
- langchain/agents/agent_toolkits/sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/vectorstore/base.py +4 -2
- langchain/agents/agent_toolkits/vectorstore/prompt.py +2 -4
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +12 -11
- langchain/agents/agent_toolkits/xorbits/__init__.py +4 -2
- langchain/agents/agent_toolkits/zapier/toolkit.py +1 -1
- langchain/agents/agent_types.py +6 -6
- langchain/agents/chat/base.py +8 -12
- langchain/agents/chat/output_parser.py +9 -6
- langchain/agents/chat/prompt.py +3 -4
- langchain/agents/conversational/base.py +11 -5
- langchain/agents/conversational/output_parser.py +4 -2
- langchain/agents/conversational/prompt.py +2 -3
- langchain/agents/conversational_chat/base.py +9 -5
- langchain/agents/conversational_chat/output_parser.py +9 -11
- langchain/agents/conversational_chat/prompt.py +5 -6
- langchain/agents/format_scratchpad/__init__.py +3 -3
- langchain/agents/format_scratchpad/log_to_messages.py +1 -1
- langchain/agents/format_scratchpad/openai_functions.py +8 -6
- langchain/agents/format_scratchpad/tools.py +5 -3
- langchain/agents/format_scratchpad/xml.py +33 -2
- langchain/agents/initialize.py +17 -9
- langchain/agents/json_chat/base.py +19 -18
- langchain/agents/json_chat/prompt.py +2 -3
- langchain/agents/load_tools.py +2 -1
- langchain/agents/loading.py +28 -18
- langchain/agents/mrkl/base.py +11 -4
- langchain/agents/mrkl/output_parser.py +17 -13
- langchain/agents/mrkl/prompt.py +1 -2
- langchain/agents/openai_assistant/base.py +81 -71
- langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +2 -0
- langchain/agents/openai_functions_agent/base.py +47 -37
- langchain/agents/openai_functions_multi_agent/base.py +40 -27
- langchain/agents/openai_tools/base.py +9 -8
- langchain/agents/output_parsers/__init__.py +3 -3
- langchain/agents/output_parsers/json.py +8 -6
- langchain/agents/output_parsers/openai_functions.py +24 -9
- langchain/agents/output_parsers/openai_tools.py +16 -4
- langchain/agents/output_parsers/react_json_single_input.py +13 -5
- langchain/agents/output_parsers/react_single_input.py +18 -11
- langchain/agents/output_parsers/self_ask.py +5 -2
- langchain/agents/output_parsers/tools.py +32 -13
- langchain/agents/output_parsers/xml.py +102 -28
- langchain/agents/react/agent.py +5 -4
- langchain/agents/react/base.py +26 -17
- langchain/agents/react/output_parser.py +7 -6
- langchain/agents/react/textworld_prompt.py +0 -1
- langchain/agents/react/wiki_prompt.py +14 -15
- langchain/agents/schema.py +5 -2
- langchain/agents/self_ask_with_search/base.py +23 -15
- langchain/agents/self_ask_with_search/prompt.py +0 -1
- langchain/agents/structured_chat/base.py +19 -11
- langchain/agents/structured_chat/output_parser.py +29 -18
- langchain/agents/structured_chat/prompt.py +3 -4
- langchain/agents/tool_calling_agent/base.py +8 -6
- langchain/agents/tools.py +5 -2
- langchain/agents/utils.py +2 -3
- langchain/agents/xml/base.py +12 -6
- langchain/agents/xml/prompt.py +1 -2
- langchain/cache.py +12 -12
- langchain/callbacks/__init__.py +11 -11
- langchain/callbacks/aim_callback.py +2 -2
- langchain/callbacks/argilla_callback.py +1 -1
- langchain/callbacks/arize_callback.py +1 -1
- langchain/callbacks/arthur_callback.py +1 -1
- langchain/callbacks/base.py +7 -7
- langchain/callbacks/clearml_callback.py +1 -1
- langchain/callbacks/comet_ml_callback.py +1 -1
- langchain/callbacks/confident_callback.py +1 -1
- langchain/callbacks/context_callback.py +1 -1
- langchain/callbacks/flyte_callback.py +1 -1
- langchain/callbacks/human.py +2 -2
- langchain/callbacks/infino_callback.py +1 -1
- langchain/callbacks/labelstudio_callback.py +1 -1
- langchain/callbacks/llmonitor_callback.py +1 -1
- langchain/callbacks/manager.py +5 -5
- langchain/callbacks/mlflow_callback.py +2 -2
- langchain/callbacks/openai_info.py +1 -1
- langchain/callbacks/promptlayer_callback.py +1 -1
- langchain/callbacks/sagemaker_callback.py +1 -1
- langchain/callbacks/streaming_aiter.py +17 -3
- langchain/callbacks/streaming_aiter_final_only.py +16 -5
- langchain/callbacks/streaming_stdout_final_only.py +10 -3
- langchain/callbacks/streamlit/__init__.py +3 -2
- langchain/callbacks/streamlit/mutable_expander.py +1 -1
- langchain/callbacks/streamlit/streamlit_callback_handler.py +3 -3
- langchain/callbacks/tracers/__init__.py +1 -1
- langchain/callbacks/tracers/comet.py +1 -1
- langchain/callbacks/tracers/evaluation.py +1 -1
- langchain/callbacks/tracers/log_stream.py +1 -1
- langchain/callbacks/tracers/logging.py +12 -1
- langchain/callbacks/tracers/stdout.py +1 -1
- langchain/callbacks/trubrics_callback.py +1 -1
- langchain/callbacks/utils.py +4 -4
- langchain/callbacks/wandb_callback.py +1 -1
- langchain/callbacks/whylabs_callback.py +1 -1
- langchain/chains/api/base.py +41 -23
- langchain/chains/api/news_docs.py +1 -2
- langchain/chains/api/open_meteo_docs.py +1 -2
- langchain/chains/api/openapi/requests_chain.py +1 -1
- langchain/chains/api/openapi/response_chain.py +1 -1
- langchain/chains/api/podcast_docs.py +1 -2
- langchain/chains/api/prompt.py +1 -2
- langchain/chains/api/tmdb_docs.py +1 -2
- langchain/chains/base.py +96 -56
- langchain/chains/chat_vector_db/prompts.py +2 -3
- langchain/chains/combine_documents/__init__.py +1 -1
- langchain/chains/combine_documents/base.py +30 -11
- langchain/chains/combine_documents/map_reduce.py +41 -30
- langchain/chains/combine_documents/map_rerank.py +39 -24
- langchain/chains/combine_documents/reduce.py +48 -26
- langchain/chains/combine_documents/refine.py +27 -17
- langchain/chains/combine_documents/stuff.py +24 -13
- langchain/chains/constitutional_ai/base.py +11 -4
- langchain/chains/constitutional_ai/principles.py +22 -25
- langchain/chains/constitutional_ai/prompts.py +25 -28
- langchain/chains/conversation/base.py +9 -4
- langchain/chains/conversation/memory.py +5 -5
- langchain/chains/conversation/prompt.py +5 -5
- langchain/chains/conversational_retrieval/base.py +108 -79
- langchain/chains/conversational_retrieval/prompts.py +2 -3
- langchain/chains/elasticsearch_database/base.py +10 -10
- langchain/chains/elasticsearch_database/prompts.py +2 -3
- langchain/chains/ernie_functions/__init__.py +2 -2
- langchain/chains/example_generator.py +3 -1
- langchain/chains/flare/base.py +28 -12
- langchain/chains/flare/prompts.py +2 -0
- langchain/chains/graph_qa/cypher.py +2 -2
- langchain/chains/graph_qa/falkordb.py +1 -1
- langchain/chains/graph_qa/gremlin.py +1 -1
- langchain/chains/graph_qa/neptune_sparql.py +1 -1
- langchain/chains/graph_qa/prompts.py +2 -2
- langchain/chains/history_aware_retriever.py +2 -1
- langchain/chains/hyde/base.py +6 -5
- langchain/chains/hyde/prompts.py +5 -6
- langchain/chains/llm.py +82 -61
- langchain/chains/llm_bash/__init__.py +3 -2
- langchain/chains/llm_checker/base.py +19 -6
- langchain/chains/llm_checker/prompt.py +3 -4
- langchain/chains/llm_math/base.py +25 -10
- langchain/chains/llm_math/prompt.py +1 -2
- langchain/chains/llm_summarization_checker/base.py +22 -7
- langchain/chains/llm_symbolic_math/__init__.py +3 -2
- langchain/chains/loading.py +155 -97
- langchain/chains/mapreduce.py +4 -3
- langchain/chains/moderation.py +11 -9
- langchain/chains/natbot/base.py +11 -9
- langchain/chains/natbot/crawler.py +102 -76
- langchain/chains/natbot/prompt.py +2 -3
- langchain/chains/openai_functions/__init__.py +7 -7
- langchain/chains/openai_functions/base.py +15 -10
- langchain/chains/openai_functions/citation_fuzzy_match.py +21 -11
- langchain/chains/openai_functions/extraction.py +19 -19
- langchain/chains/openai_functions/openapi.py +39 -35
- langchain/chains/openai_functions/qa_with_structure.py +22 -15
- langchain/chains/openai_functions/tagging.py +4 -4
- langchain/chains/openai_tools/extraction.py +7 -8
- langchain/chains/qa_generation/base.py +8 -3
- langchain/chains/qa_generation/prompt.py +5 -5
- langchain/chains/qa_with_sources/base.py +17 -6
- langchain/chains/qa_with_sources/loading.py +16 -8
- langchain/chains/qa_with_sources/map_reduce_prompt.py +8 -9
- langchain/chains/qa_with_sources/refine_prompts.py +0 -1
- langchain/chains/qa_with_sources/retrieval.py +15 -6
- langchain/chains/qa_with_sources/stuff_prompt.py +6 -7
- langchain/chains/qa_with_sources/vector_db.py +21 -8
- langchain/chains/query_constructor/base.py +37 -34
- langchain/chains/query_constructor/ir.py +4 -4
- langchain/chains/query_constructor/parser.py +101 -34
- langchain/chains/query_constructor/prompt.py +5 -6
- langchain/chains/question_answering/chain.py +21 -10
- langchain/chains/question_answering/map_reduce_prompt.py +14 -14
- langchain/chains/question_answering/map_rerank_prompt.py +3 -3
- langchain/chains/question_answering/refine_prompts.py +2 -5
- langchain/chains/question_answering/stuff_prompt.py +5 -5
- langchain/chains/retrieval.py +1 -3
- langchain/chains/retrieval_qa/base.py +38 -27
- langchain/chains/retrieval_qa/prompt.py +1 -2
- langchain/chains/router/__init__.py +3 -3
- langchain/chains/router/base.py +38 -22
- langchain/chains/router/embedding_router.py +15 -8
- langchain/chains/router/llm_router.py +23 -20
- langchain/chains/router/multi_prompt.py +5 -2
- langchain/chains/router/multi_retrieval_qa.py +28 -5
- langchain/chains/sequential.py +30 -18
- langchain/chains/sql_database/prompt.py +14 -16
- langchain/chains/sql_database/query.py +7 -5
- langchain/chains/structured_output/__init__.py +1 -1
- langchain/chains/structured_output/base.py +77 -67
- langchain/chains/summarize/chain.py +11 -5
- langchain/chains/summarize/map_reduce_prompt.py +0 -1
- langchain/chains/summarize/stuff_prompt.py +0 -1
- langchain/chains/transform.py +9 -6
- langchain/chat_loaders/facebook_messenger.py +1 -1
- langchain/chat_loaders/langsmith.py +1 -1
- langchain/chat_loaders/utils.py +3 -3
- langchain/chat_models/__init__.py +20 -19
- langchain/chat_models/anthropic.py +1 -1
- langchain/chat_models/azureml_endpoint.py +1 -1
- langchain/chat_models/baidu_qianfan_endpoint.py +1 -1
- langchain/chat_models/base.py +213 -139
- langchain/chat_models/bedrock.py +1 -1
- langchain/chat_models/fake.py +1 -1
- langchain/chat_models/meta.py +1 -1
- langchain/chat_models/pai_eas_endpoint.py +1 -1
- langchain/chat_models/promptlayer_openai.py +1 -1
- langchain/chat_models/volcengine_maas.py +1 -1
- langchain/docstore/base.py +1 -1
- langchain/document_loaders/__init__.py +9 -9
- langchain/document_loaders/airbyte.py +3 -3
- langchain/document_loaders/assemblyai.py +1 -1
- langchain/document_loaders/azure_blob_storage_container.py +1 -1
- langchain/document_loaders/azure_blob_storage_file.py +1 -1
- langchain/document_loaders/baiducloud_bos_file.py +1 -1
- langchain/document_loaders/base.py +1 -1
- langchain/document_loaders/blob_loaders/__init__.py +1 -1
- langchain/document_loaders/blob_loaders/schema.py +1 -4
- langchain/document_loaders/blockchain.py +1 -1
- langchain/document_loaders/chatgpt.py +1 -1
- langchain/document_loaders/college_confidential.py +1 -1
- langchain/document_loaders/confluence.py +1 -1
- langchain/document_loaders/email.py +1 -1
- langchain/document_loaders/facebook_chat.py +1 -1
- langchain/document_loaders/markdown.py +1 -1
- langchain/document_loaders/notebook.py +1 -1
- langchain/document_loaders/org_mode.py +1 -1
- langchain/document_loaders/parsers/__init__.py +1 -1
- langchain/document_loaders/parsers/docai.py +1 -1
- langchain/document_loaders/parsers/generic.py +1 -1
- langchain/document_loaders/parsers/html/__init__.py +1 -1
- langchain/document_loaders/parsers/html/bs4.py +1 -1
- langchain/document_loaders/parsers/language/cobol.py +1 -1
- langchain/document_loaders/parsers/language/python.py +1 -1
- langchain/document_loaders/parsers/msword.py +1 -1
- langchain/document_loaders/parsers/pdf.py +5 -5
- langchain/document_loaders/parsers/registry.py +1 -1
- langchain/document_loaders/pdf.py +8 -8
- langchain/document_loaders/powerpoint.py +1 -1
- langchain/document_loaders/pyspark_dataframe.py +1 -1
- langchain/document_loaders/telegram.py +2 -2
- langchain/document_loaders/tencent_cos_directory.py +1 -1
- langchain/document_loaders/unstructured.py +5 -5
- langchain/document_loaders/url_playwright.py +1 -1
- langchain/document_loaders/whatsapp_chat.py +1 -1
- langchain/document_loaders/youtube.py +2 -2
- langchain/document_transformers/__init__.py +3 -3
- langchain/document_transformers/beautiful_soup_transformer.py +1 -1
- langchain/document_transformers/doctran_text_extract.py +1 -1
- langchain/document_transformers/doctran_text_qa.py +1 -1
- langchain/document_transformers/doctran_text_translate.py +1 -1
- langchain/document_transformers/embeddings_redundant_filter.py +3 -3
- langchain/document_transformers/google_translate.py +1 -1
- langchain/document_transformers/html2text.py +1 -1
- langchain/document_transformers/nuclia_text_transform.py +1 -1
- langchain/embeddings/__init__.py +5 -5
- langchain/embeddings/base.py +35 -24
- langchain/embeddings/cache.py +37 -32
- langchain/embeddings/fake.py +1 -1
- langchain/embeddings/huggingface.py +2 -2
- langchain/evaluation/__init__.py +22 -22
- langchain/evaluation/agents/trajectory_eval_chain.py +26 -25
- langchain/evaluation/agents/trajectory_eval_prompt.py +6 -9
- langchain/evaluation/comparison/__init__.py +1 -1
- langchain/evaluation/comparison/eval_chain.py +21 -13
- langchain/evaluation/comparison/prompt.py +1 -2
- langchain/evaluation/criteria/__init__.py +1 -1
- langchain/evaluation/criteria/eval_chain.py +23 -11
- langchain/evaluation/criteria/prompt.py +2 -3
- langchain/evaluation/embedding_distance/base.py +34 -20
- langchain/evaluation/exact_match/base.py +14 -1
- langchain/evaluation/loading.py +16 -11
- langchain/evaluation/parsing/base.py +20 -4
- langchain/evaluation/parsing/json_distance.py +24 -10
- langchain/evaluation/parsing/json_schema.py +13 -12
- langchain/evaluation/qa/__init__.py +1 -1
- langchain/evaluation/qa/eval_chain.py +20 -5
- langchain/evaluation/qa/eval_prompt.py +7 -8
- langchain/evaluation/qa/generate_chain.py +4 -1
- langchain/evaluation/qa/generate_prompt.py +2 -4
- langchain/evaluation/regex_match/base.py +9 -1
- langchain/evaluation/schema.py +38 -30
- langchain/evaluation/scoring/__init__.py +1 -1
- langchain/evaluation/scoring/eval_chain.py +23 -15
- langchain/evaluation/scoring/prompt.py +0 -1
- langchain/evaluation/string_distance/base.py +20 -9
- langchain/globals.py +12 -11
- langchain/graphs/__init__.py +6 -6
- langchain/graphs/graph_document.py +1 -1
- langchain/graphs/networkx_graph.py +2 -2
- langchain/hub.py +9 -11
- langchain/indexes/__init__.py +3 -3
- langchain/indexes/_sql_record_manager.py +63 -46
- langchain/indexes/prompts/entity_extraction.py +1 -2
- langchain/indexes/prompts/entity_summarization.py +1 -2
- langchain/indexes/prompts/knowledge_triplet_extraction.py +1 -3
- langchain/indexes/vectorstore.py +35 -19
- langchain/llms/__init__.py +13 -13
- langchain/llms/ai21.py +1 -1
- langchain/llms/azureml_endpoint.py +4 -4
- langchain/llms/base.py +15 -7
- langchain/llms/bedrock.py +1 -1
- langchain/llms/cloudflare_workersai.py +1 -1
- langchain/llms/gradient_ai.py +1 -1
- langchain/llms/loading.py +1 -1
- langchain/llms/openai.py +1 -1
- langchain/llms/sagemaker_endpoint.py +1 -1
- langchain/load/dump.py +1 -1
- langchain/load/load.py +1 -1
- langchain/load/serializable.py +3 -3
- langchain/memory/__init__.py +3 -3
- langchain/memory/buffer.py +14 -7
- langchain/memory/buffer_window.py +2 -0
- langchain/memory/chat_memory.py +14 -8
- langchain/memory/chat_message_histories/__init__.py +1 -1
- langchain/memory/chat_message_histories/astradb.py +1 -1
- langchain/memory/chat_message_histories/cassandra.py +1 -1
- langchain/memory/chat_message_histories/cosmos_db.py +1 -1
- langchain/memory/chat_message_histories/dynamodb.py +1 -1
- langchain/memory/chat_message_histories/elasticsearch.py +1 -1
- langchain/memory/chat_message_histories/file.py +1 -1
- langchain/memory/chat_message_histories/firestore.py +1 -1
- langchain/memory/chat_message_histories/momento.py +1 -1
- langchain/memory/chat_message_histories/mongodb.py +1 -1
- langchain/memory/chat_message_histories/neo4j.py +1 -1
- langchain/memory/chat_message_histories/postgres.py +1 -1
- langchain/memory/chat_message_histories/redis.py +1 -1
- langchain/memory/chat_message_histories/rocksetdb.py +1 -1
- langchain/memory/chat_message_histories/singlestoredb.py +1 -1
- langchain/memory/chat_message_histories/streamlit.py +1 -1
- langchain/memory/chat_message_histories/upstash_redis.py +1 -1
- langchain/memory/chat_message_histories/xata.py +1 -1
- langchain/memory/chat_message_histories/zep.py +1 -1
- langchain/memory/combined.py +14 -13
- langchain/memory/entity.py +131 -61
- langchain/memory/prompt.py +10 -11
- langchain/memory/readonly.py +0 -2
- langchain/memory/simple.py +4 -3
- langchain/memory/summary.py +43 -11
- langchain/memory/summary_buffer.py +20 -8
- langchain/memory/token_buffer.py +2 -0
- langchain/memory/utils.py +3 -2
- langchain/memory/vectorstore.py +12 -5
- langchain/memory/vectorstore_token_buffer_memory.py +5 -5
- langchain/model_laboratory.py +12 -11
- langchain/output_parsers/__init__.py +4 -4
- langchain/output_parsers/boolean.py +7 -4
- langchain/output_parsers/combining.py +14 -7
- langchain/output_parsers/datetime.py +32 -31
- langchain/output_parsers/enum.py +10 -4
- langchain/output_parsers/fix.py +60 -53
- langchain/output_parsers/format_instructions.py +6 -8
- langchain/output_parsers/json.py +2 -2
- langchain/output_parsers/list.py +2 -2
- langchain/output_parsers/loading.py +9 -9
- langchain/output_parsers/openai_functions.py +3 -3
- langchain/output_parsers/openai_tools.py +1 -1
- langchain/output_parsers/pandas_dataframe.py +59 -48
- langchain/output_parsers/prompts.py +1 -2
- langchain/output_parsers/rail_parser.py +1 -1
- langchain/output_parsers/regex.py +9 -8
- langchain/output_parsers/regex_dict.py +7 -10
- langchain/output_parsers/retry.py +99 -80
- langchain/output_parsers/structured.py +21 -6
- langchain/output_parsers/yaml.py +19 -11
- langchain/prompts/__init__.py +5 -3
- langchain/prompts/base.py +5 -5
- langchain/prompts/chat.py +8 -8
- langchain/prompts/example_selector/__init__.py +3 -1
- langchain/prompts/example_selector/semantic_similarity.py +2 -2
- langchain/prompts/few_shot.py +1 -1
- langchain/prompts/loading.py +3 -3
- langchain/prompts/prompt.py +1 -1
- langchain/pydantic_v1/__init__.py +1 -1
- langchain/retrievers/__init__.py +5 -5
- langchain/retrievers/bedrock.py +2 -2
- langchain/retrievers/bm25.py +1 -1
- langchain/retrievers/contextual_compression.py +14 -8
- langchain/retrievers/docarray.py +1 -1
- langchain/retrievers/document_compressors/__init__.py +5 -4
- langchain/retrievers/document_compressors/base.py +12 -6
- langchain/retrievers/document_compressors/chain_extract.py +5 -3
- langchain/retrievers/document_compressors/chain_extract_prompt.py +2 -3
- langchain/retrievers/document_compressors/chain_filter.py +9 -9
- langchain/retrievers/document_compressors/chain_filter_prompt.py +1 -2
- langchain/retrievers/document_compressors/cohere_rerank.py +17 -15
- langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -0
- langchain/retrievers/document_compressors/embeddings_filter.py +24 -17
- langchain/retrievers/document_compressors/flashrank_rerank.py +1 -1
- langchain/retrievers/document_compressors/listwise_rerank.py +8 -5
- langchain/retrievers/ensemble.py +30 -27
- langchain/retrievers/google_cloud_documentai_warehouse.py +1 -1
- langchain/retrievers/google_vertex_ai_search.py +2 -2
- langchain/retrievers/kendra.py +10 -10
- langchain/retrievers/llama_index.py +1 -1
- langchain/retrievers/merger_retriever.py +11 -11
- langchain/retrievers/milvus.py +1 -1
- langchain/retrievers/multi_query.py +35 -27
- langchain/retrievers/multi_vector.py +24 -9
- langchain/retrievers/parent_document_retriever.py +33 -9
- langchain/retrievers/re_phraser.py +6 -5
- langchain/retrievers/self_query/base.py +157 -127
- langchain/retrievers/time_weighted_retriever.py +21 -7
- langchain/retrievers/zilliz.py +1 -1
- langchain/runnables/hub.py +12 -0
- langchain/runnables/openai_functions.py +12 -2
- langchain/schema/__init__.py +23 -23
- langchain/schema/cache.py +1 -1
- langchain/schema/callbacks/base.py +7 -7
- langchain/schema/callbacks/manager.py +19 -19
- langchain/schema/callbacks/tracers/base.py +1 -1
- langchain/schema/callbacks/tracers/evaluation.py +1 -1
- langchain/schema/callbacks/tracers/langchain.py +1 -1
- langchain/schema/callbacks/tracers/langchain_v1.py +1 -1
- langchain/schema/callbacks/tracers/log_stream.py +1 -1
- langchain/schema/callbacks/tracers/schemas.py +8 -8
- langchain/schema/callbacks/tracers/stdout.py +3 -3
- langchain/schema/document.py +1 -1
- langchain/schema/language_model.py +2 -2
- langchain/schema/messages.py +12 -12
- langchain/schema/output.py +3 -3
- langchain/schema/output_parser.py +3 -3
- langchain/schema/runnable/__init__.py +3 -3
- langchain/schema/runnable/base.py +9 -9
- langchain/schema/runnable/config.py +5 -5
- langchain/schema/runnable/configurable.py +1 -1
- langchain/schema/runnable/history.py +1 -1
- langchain/schema/runnable/passthrough.py +1 -1
- langchain/schema/runnable/utils.py +16 -16
- langchain/schema/vectorstore.py +1 -1
- langchain/smith/__init__.py +2 -1
- langchain/smith/evaluation/__init__.py +2 -2
- langchain/smith/evaluation/config.py +9 -23
- langchain/smith/evaluation/name_generation.py +3 -3
- langchain/smith/evaluation/progress.py +22 -4
- langchain/smith/evaluation/runner_utils.py +416 -247
- langchain/smith/evaluation/string_run_evaluator.py +102 -68
- langchain/storage/__init__.py +2 -2
- langchain/storage/_lc_store.py +4 -2
- langchain/storage/encoder_backed.py +7 -2
- langchain/storage/file_system.py +19 -16
- langchain/storage/in_memory.py +1 -1
- langchain/storage/upstash_redis.py +1 -1
- langchain/text_splitter.py +15 -15
- langchain/tools/__init__.py +28 -26
- langchain/tools/ainetwork/app.py +1 -1
- langchain/tools/ainetwork/base.py +1 -1
- langchain/tools/ainetwork/owner.py +1 -1
- langchain/tools/ainetwork/rule.py +1 -1
- langchain/tools/ainetwork/transfer.py +1 -1
- langchain/tools/ainetwork/value.py +1 -1
- langchain/tools/amadeus/closest_airport.py +1 -1
- langchain/tools/amadeus/flight_search.py +1 -1
- langchain/tools/azure_cognitive_services/__init__.py +1 -1
- langchain/tools/base.py +4 -4
- langchain/tools/bearly/tool.py +1 -1
- langchain/tools/bing_search/__init__.py +1 -1
- langchain/tools/bing_search/tool.py +1 -1
- langchain/tools/dataforseo_api_search/__init__.py +1 -1
- langchain/tools/dataforseo_api_search/tool.py +1 -1
- langchain/tools/ddg_search/tool.py +1 -1
- langchain/tools/e2b_data_analysis/tool.py +2 -2
- langchain/tools/edenai/__init__.py +1 -1
- langchain/tools/file_management/__init__.py +1 -1
- langchain/tools/file_management/copy.py +1 -1
- langchain/tools/file_management/delete.py +1 -1
- langchain/tools/gmail/__init__.py +2 -2
- langchain/tools/gmail/get_message.py +1 -1
- langchain/tools/gmail/search.py +1 -1
- langchain/tools/gmail/send_message.py +1 -1
- langchain/tools/google_finance/__init__.py +1 -1
- langchain/tools/google_finance/tool.py +1 -1
- langchain/tools/google_scholar/__init__.py +1 -1
- langchain/tools/google_scholar/tool.py +1 -1
- langchain/tools/google_search/__init__.py +1 -1
- langchain/tools/google_search/tool.py +1 -1
- langchain/tools/google_serper/__init__.py +1 -1
- langchain/tools/google_serper/tool.py +1 -1
- langchain/tools/google_trends/__init__.py +1 -1
- langchain/tools/google_trends/tool.py +1 -1
- langchain/tools/jira/tool.py +20 -1
- langchain/tools/json/tool.py +25 -3
- langchain/tools/memorize/tool.py +1 -1
- langchain/tools/multion/__init__.py +1 -1
- langchain/tools/multion/update_session.py +1 -1
- langchain/tools/office365/__init__.py +2 -2
- langchain/tools/office365/events_search.py +1 -1
- langchain/tools/office365/messages_search.py +1 -1
- langchain/tools/office365/send_event.py +1 -1
- langchain/tools/office365/send_message.py +1 -1
- langchain/tools/openapi/utils/api_models.py +6 -6
- langchain/tools/playwright/__init__.py +5 -5
- langchain/tools/playwright/click.py +1 -1
- langchain/tools/playwright/extract_hyperlinks.py +1 -1
- langchain/tools/playwright/get_elements.py +1 -1
- langchain/tools/playwright/navigate.py +1 -1
- langchain/tools/plugin.py +2 -2
- langchain/tools/powerbi/tool.py +1 -1
- langchain/tools/python/__init__.py +3 -2
- langchain/tools/reddit_search/tool.py +1 -1
- langchain/tools/render.py +2 -2
- langchain/tools/requests/tool.py +2 -2
- langchain/tools/searchapi/tool.py +1 -1
- langchain/tools/searx_search/tool.py +1 -1
- langchain/tools/slack/get_message.py +1 -1
- langchain/tools/spark_sql/tool.py +1 -1
- langchain/tools/sql_database/tool.py +1 -1
- langchain/tools/tavily_search/__init__.py +1 -1
- langchain/tools/tavily_search/tool.py +1 -1
- langchain/tools/zapier/__init__.py +1 -1
- langchain/tools/zapier/tool.py +24 -2
- langchain/utilities/__init__.py +4 -4
- langchain/utilities/arcee.py +4 -4
- langchain/utilities/clickup.py +4 -4
- langchain/utilities/dalle_image_generator.py +1 -1
- langchain/utilities/dataforseo_api_search.py +1 -1
- langchain/utilities/opaqueprompts.py +1 -1
- langchain/utilities/reddit_search.py +1 -1
- langchain/utilities/sql_database.py +1 -1
- langchain/utilities/tavily_search.py +1 -1
- langchain/utilities/vertexai.py +2 -2
- langchain/utils/__init__.py +1 -1
- langchain/utils/aiter.py +1 -1
- langchain/utils/html.py +3 -3
- langchain/utils/input.py +1 -1
- langchain/utils/iter.py +1 -1
- langchain/utils/json_schema.py +1 -3
- langchain/utils/strings.py +1 -1
- langchain/utils/utils.py +6 -6
- langchain/vectorstores/__init__.py +5 -5
- langchain/vectorstores/alibabacloud_opensearch.py +1 -1
- langchain/vectorstores/azure_cosmos_db.py +1 -1
- langchain/vectorstores/clickhouse.py +1 -1
- langchain/vectorstores/elastic_vector_search.py +1 -1
- langchain/vectorstores/elasticsearch.py +2 -2
- langchain/vectorstores/myscale.py +1 -1
- langchain/vectorstores/neo4j_vector.py +1 -1
- langchain/vectorstores/pgembedding.py +1 -1
- langchain/vectorstores/qdrant.py +1 -1
- langchain/vectorstores/redis/__init__.py +1 -1
- langchain/vectorstores/redis/base.py +1 -1
- langchain/vectorstores/redis/filters.py +4 -4
- langchain/vectorstores/redis/schema.py +6 -6
- langchain/vectorstores/sklearn.py +2 -2
- langchain/vectorstores/starrocks.py +1 -1
- langchain/vectorstores/utils.py +1 -1
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/METADATA +4 -14
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/RECORD +590 -591
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/WHEEL +1 -1
- langchain/smith/evaluation/utils.py +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/entry_points.txt +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/licenses/LICENSE +0 -0
|
@@ -28,16 +28,17 @@ def __getattr__(name: str) -> Any:
|
|
|
28
28
|
if name in _module_lookup:
|
|
29
29
|
module = importlib.import_module(_module_lookup[name])
|
|
30
30
|
return getattr(module, name)
|
|
31
|
-
|
|
31
|
+
msg = f"module {__name__} has no attribute {name}"
|
|
32
|
+
raise AttributeError(msg)
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
__all__ = [
|
|
36
|
+
"CohereRerank",
|
|
37
|
+
"CrossEncoderReranker",
|
|
35
38
|
"DocumentCompressorPipeline",
|
|
36
39
|
"EmbeddingsFilter",
|
|
37
40
|
"FlashrankRerank",
|
|
38
|
-
"LLMListwiseRerank",
|
|
39
41
|
"LLMChainExtractor",
|
|
40
42
|
"LLMChainFilter",
|
|
41
|
-
"
|
|
42
|
-
"CrossEncoderReranker",
|
|
43
|
+
"LLMListwiseRerank",
|
|
43
44
|
]
|
|
@@ -32,20 +32,23 @@ class DocumentCompressorPipeline(BaseDocumentCompressor):
|
|
|
32
32
|
if isinstance(_transformer, BaseDocumentCompressor):
|
|
33
33
|
accepts_callbacks = (
|
|
34
34
|
signature(_transformer.compress_documents).parameters.get(
|
|
35
|
-
"callbacks"
|
|
35
|
+
"callbacks",
|
|
36
36
|
)
|
|
37
37
|
is not None
|
|
38
38
|
)
|
|
39
39
|
if accepts_callbacks:
|
|
40
40
|
documents = _transformer.compress_documents(
|
|
41
|
-
documents,
|
|
41
|
+
documents,
|
|
42
|
+
query,
|
|
43
|
+
callbacks=callbacks,
|
|
42
44
|
)
|
|
43
45
|
else:
|
|
44
46
|
documents = _transformer.compress_documents(documents, query)
|
|
45
47
|
elif isinstance(_transformer, BaseDocumentTransformer):
|
|
46
48
|
documents = _transformer.transform_documents(documents)
|
|
47
49
|
else:
|
|
48
|
-
|
|
50
|
+
msg = f"Got unexpected transformer type: {_transformer}"
|
|
51
|
+
raise ValueError(msg) # noqa: TRY004
|
|
49
52
|
return documents
|
|
50
53
|
|
|
51
54
|
async def acompress_documents(
|
|
@@ -59,18 +62,21 @@ class DocumentCompressorPipeline(BaseDocumentCompressor):
|
|
|
59
62
|
if isinstance(_transformer, BaseDocumentCompressor):
|
|
60
63
|
accepts_callbacks = (
|
|
61
64
|
signature(_transformer.acompress_documents).parameters.get(
|
|
62
|
-
"callbacks"
|
|
65
|
+
"callbacks",
|
|
63
66
|
)
|
|
64
67
|
is not None
|
|
65
68
|
)
|
|
66
69
|
if accepts_callbacks:
|
|
67
70
|
documents = await _transformer.acompress_documents(
|
|
68
|
-
documents,
|
|
71
|
+
documents,
|
|
72
|
+
query,
|
|
73
|
+
callbacks=callbacks,
|
|
69
74
|
)
|
|
70
75
|
else:
|
|
71
76
|
documents = await _transformer.acompress_documents(documents, query)
|
|
72
77
|
elif isinstance(_transformer, BaseDocumentTransformer):
|
|
73
78
|
documents = await _transformer.atransform_documents(documents)
|
|
74
79
|
else:
|
|
75
|
-
|
|
80
|
+
msg = f"Got unexpected transformer type: {_transformer}"
|
|
81
|
+
raise ValueError(msg) # noqa: TRY004
|
|
76
82
|
return documents
|
|
@@ -12,6 +12,7 @@ from langchain_core.output_parsers import BaseOutputParser, StrOutputParser
|
|
|
12
12
|
from langchain_core.prompts import PromptTemplate
|
|
13
13
|
from langchain_core.runnables import Runnable
|
|
14
14
|
from pydantic import ConfigDict
|
|
15
|
+
from typing_extensions import override
|
|
15
16
|
|
|
16
17
|
from langchain.chains.llm import LLMChain
|
|
17
18
|
from langchain.retrievers.document_compressors.chain_extract_prompt import (
|
|
@@ -29,6 +30,7 @@ class NoOutputParser(BaseOutputParser[str]):
|
|
|
29
30
|
|
|
30
31
|
no_output_str: str = "NO_OUTPUT"
|
|
31
32
|
|
|
33
|
+
@override
|
|
32
34
|
def parse(self, text: str) -> str:
|
|
33
35
|
cleaned_text = text.strip()
|
|
34
36
|
if cleaned_text == self.no_output_str:
|
|
@@ -80,7 +82,7 @@ class LLMChainExtractor(BaseDocumentCompressor):
|
|
|
80
82
|
if len(output) == 0:
|
|
81
83
|
continue
|
|
82
84
|
compressed_docs.append(
|
|
83
|
-
Document(page_content=cast(str, output), metadata=doc.metadata)
|
|
85
|
+
Document(page_content=cast("str", output), metadata=doc.metadata),
|
|
84
86
|
)
|
|
85
87
|
return compressed_docs
|
|
86
88
|
|
|
@@ -98,7 +100,7 @@ class LLMChainExtractor(BaseDocumentCompressor):
|
|
|
98
100
|
if len(outputs[i]) == 0:
|
|
99
101
|
continue
|
|
100
102
|
compressed_docs.append(
|
|
101
|
-
Document(page_content=outputs[i], metadata=doc.metadata)
|
|
103
|
+
Document(page_content=outputs[i], metadata=doc.metadata),
|
|
102
104
|
)
|
|
103
105
|
return compressed_docs
|
|
104
106
|
|
|
@@ -108,7 +110,7 @@ class LLMChainExtractor(BaseDocumentCompressor):
|
|
|
108
110
|
llm: BaseLanguageModel,
|
|
109
111
|
prompt: Optional[PromptTemplate] = None,
|
|
110
112
|
get_input: Optional[Callable[[str, Document], str]] = None,
|
|
111
|
-
llm_chain_kwargs: Optional[dict] = None,
|
|
113
|
+
llm_chain_kwargs: Optional[dict] = None, # noqa: ARG003
|
|
112
114
|
) -> LLMChainExtractor:
|
|
113
115
|
"""Initialize from LLM."""
|
|
114
116
|
_prompt = prompt if prompt is not None else _get_default_chain_prompt()
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
prompt_template = """Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context is relevant return {no_output_str}.
|
|
1
|
+
prompt_template = """Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context is relevant return {no_output_str}.
|
|
3
2
|
|
|
4
3
|
Remember, *DO NOT* edit the extracted parts of the context.
|
|
5
4
|
|
|
@@ -8,4 +7,4 @@ Remember, *DO NOT* edit the extracted parts of the context.
|
|
|
8
7
|
>>>
|
|
9
8
|
{{context}}
|
|
10
9
|
>>>
|
|
11
|
-
Extracted relevant parts:"""
|
|
10
|
+
Extracted relevant parts:""" # noqa: E501
|
|
@@ -36,7 +36,7 @@ class LLMChainFilter(BaseDocumentCompressor):
|
|
|
36
36
|
"""Filter that drops documents that aren't relevant to the query."""
|
|
37
37
|
|
|
38
38
|
llm_chain: Runnable
|
|
39
|
-
"""LLM wrapper to use for filtering documents.
|
|
39
|
+
"""LLM wrapper to use for filtering documents.
|
|
40
40
|
The chain prompt is expected to have a BooleanOutputParser."""
|
|
41
41
|
|
|
42
42
|
get_input: Callable[[str, Document], dict] = default_get_input
|
|
@@ -58,7 +58,8 @@ class LLMChainFilter(BaseDocumentCompressor):
|
|
|
58
58
|
config = RunnableConfig(callbacks=callbacks)
|
|
59
59
|
outputs = zip(
|
|
60
60
|
self.llm_chain.batch(
|
|
61
|
-
[self.get_input(query, doc) for doc in documents],
|
|
61
|
+
[self.get_input(query, doc) for doc in documents],
|
|
62
|
+
config=config,
|
|
62
63
|
),
|
|
63
64
|
documents,
|
|
64
65
|
)
|
|
@@ -69,9 +70,8 @@ class LLMChainFilter(BaseDocumentCompressor):
|
|
|
69
70
|
output = output_[self.llm_chain.output_key]
|
|
70
71
|
if self.llm_chain.prompt.output_parser is not None:
|
|
71
72
|
include_doc = self.llm_chain.prompt.output_parser.parse(output)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
include_doc = output_
|
|
73
|
+
elif isinstance(output_, bool):
|
|
74
|
+
include_doc = output_
|
|
75
75
|
if include_doc:
|
|
76
76
|
filtered_docs.append(doc)
|
|
77
77
|
|
|
@@ -89,7 +89,8 @@ class LLMChainFilter(BaseDocumentCompressor):
|
|
|
89
89
|
config = RunnableConfig(callbacks=callbacks)
|
|
90
90
|
outputs = zip(
|
|
91
91
|
await self.llm_chain.abatch(
|
|
92
|
-
[self.get_input(query, doc) for doc in documents],
|
|
92
|
+
[self.get_input(query, doc) for doc in documents],
|
|
93
|
+
config=config,
|
|
93
94
|
),
|
|
94
95
|
documents,
|
|
95
96
|
)
|
|
@@ -99,9 +100,8 @@ class LLMChainFilter(BaseDocumentCompressor):
|
|
|
99
100
|
output = output_[self.llm_chain.output_key]
|
|
100
101
|
if self.llm_chain.prompt.output_parser is not None:
|
|
101
102
|
include_doc = self.llm_chain.prompt.output_parser.parse(output)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
include_doc = output_
|
|
103
|
+
elif isinstance(output_, bool):
|
|
104
|
+
include_doc = output_
|
|
105
105
|
if include_doc:
|
|
106
106
|
filtered_docs.append(doc)
|
|
107
107
|
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# flake8: noqa
|
|
2
1
|
prompt_template = """Given the following question and context, return YES if the context is relevant to the question and NO if it isn't.
|
|
3
2
|
|
|
4
3
|
> Question: {question}
|
|
@@ -6,4 +5,4 @@ prompt_template = """Given the following question and context, return YES if the
|
|
|
6
5
|
>>>
|
|
7
6
|
{context}
|
|
8
7
|
>>>
|
|
9
|
-
> Relevant (YES / NO):"""
|
|
8
|
+
> Relevant (YES / NO):""" # noqa: E501
|
|
@@ -9,10 +9,13 @@ from langchain_core.callbacks import Callbacks
|
|
|
9
9
|
from langchain_core.documents import BaseDocumentCompressor, Document
|
|
10
10
|
from langchain_core.utils import get_from_dict_or_env
|
|
11
11
|
from pydantic import ConfigDict, model_validator
|
|
12
|
+
from typing_extensions import override
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
@deprecated(
|
|
15
|
-
since="0.0.30",
|
|
16
|
+
since="0.0.30",
|
|
17
|
+
removal="1.0",
|
|
18
|
+
alternative_import="langchain_cohere.CohereRerank",
|
|
16
19
|
)
|
|
17
20
|
class CohereRerank(BaseDocumentCompressor):
|
|
18
21
|
"""Document compressor that uses `Cohere Rerank API`."""
|
|
@@ -24,7 +27,7 @@ class CohereRerank(BaseDocumentCompressor):
|
|
|
24
27
|
model: str = "rerank-english-v2.0"
|
|
25
28
|
"""Model to use for reranking."""
|
|
26
29
|
cohere_api_key: Optional[str] = None
|
|
27
|
-
"""Cohere API key. Must be specified directly or via environment variable
|
|
30
|
+
"""Cohere API key. Must be specified directly or via environment variable
|
|
28
31
|
COHERE_API_KEY."""
|
|
29
32
|
user_agent: str = "langchain"
|
|
30
33
|
"""Identifier for the application making the request."""
|
|
@@ -41,13 +44,16 @@ class CohereRerank(BaseDocumentCompressor):
|
|
|
41
44
|
if not values.get("client"):
|
|
42
45
|
try:
|
|
43
46
|
import cohere
|
|
44
|
-
except ImportError:
|
|
45
|
-
|
|
47
|
+
except ImportError as e:
|
|
48
|
+
msg = (
|
|
46
49
|
"Could not import cohere python package. "
|
|
47
50
|
"Please install it with `pip install cohere`."
|
|
48
51
|
)
|
|
52
|
+
raise ImportError(msg) from e
|
|
49
53
|
cohere_api_key = get_from_dict_or_env(
|
|
50
|
-
values,
|
|
54
|
+
values,
|
|
55
|
+
"cohere_api_key",
|
|
56
|
+
"COHERE_API_KEY",
|
|
51
57
|
)
|
|
52
58
|
client_name = values.get("user_agent", "langchain")
|
|
53
59
|
values["client"] = cohere.Client(cohere_api_key, client_name=client_name)
|
|
@@ -87,17 +93,13 @@ class CohereRerank(BaseDocumentCompressor):
|
|
|
87
93
|
max_chunks_per_doc=max_chunks_per_doc,
|
|
88
94
|
)
|
|
89
95
|
if hasattr(results, "results"):
|
|
90
|
-
results =
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
"index": res.index,
|
|
96
|
-
"relevance_score": res.relevance_score,
|
|
97
|
-
}
|
|
98
|
-
)
|
|
99
|
-
return result_dicts
|
|
96
|
+
results = results.results
|
|
97
|
+
return [
|
|
98
|
+
{"index": res.index, "relevance_score": res.relevance_score}
|
|
99
|
+
for res in results
|
|
100
|
+
]
|
|
100
101
|
|
|
102
|
+
@override
|
|
101
103
|
def compress_documents(
|
|
102
104
|
self,
|
|
103
105
|
documents: Sequence[Document],
|
|
@@ -7,6 +7,7 @@ from typing import Optional
|
|
|
7
7
|
from langchain_core.callbacks import Callbacks
|
|
8
8
|
from langchain_core.documents import BaseDocumentCompressor, Document
|
|
9
9
|
from pydantic import ConfigDict
|
|
10
|
+
from typing_extensions import override
|
|
10
11
|
|
|
11
12
|
from langchain.retrievers.document_compressors.cross_encoder import BaseCrossEncoder
|
|
12
13
|
|
|
@@ -25,6 +26,7 @@ class CrossEncoderReranker(BaseDocumentCompressor):
|
|
|
25
26
|
extra="forbid",
|
|
26
27
|
)
|
|
27
28
|
|
|
29
|
+
@override
|
|
28
30
|
def compress_documents(
|
|
29
31
|
self,
|
|
30
32
|
documents: Sequence[Document],
|
|
@@ -6,16 +6,18 @@ from langchain_core.documents import BaseDocumentCompressor, Document
|
|
|
6
6
|
from langchain_core.embeddings import Embeddings
|
|
7
7
|
from langchain_core.utils import pre_init
|
|
8
8
|
from pydantic import ConfigDict, Field
|
|
9
|
+
from typing_extensions import override
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
def _get_similarity_function() -> Callable:
|
|
12
13
|
try:
|
|
13
14
|
from langchain_community.utils.math import cosine_similarity
|
|
14
|
-
except ImportError:
|
|
15
|
-
|
|
15
|
+
except ImportError as e:
|
|
16
|
+
msg = (
|
|
16
17
|
"To use please install langchain-community "
|
|
17
18
|
"with `pip install langchain-community`."
|
|
18
19
|
)
|
|
20
|
+
raise ImportError(msg) from e
|
|
19
21
|
return cosine_similarity
|
|
20
22
|
|
|
21
23
|
|
|
@@ -45,9 +47,11 @@ class EmbeddingsFilter(BaseDocumentCompressor):
|
|
|
45
47
|
def validate_params(cls, values: dict) -> dict:
|
|
46
48
|
"""Validate similarity parameters."""
|
|
47
49
|
if values["k"] is None and values["similarity_threshold"] is None:
|
|
48
|
-
|
|
50
|
+
msg = "Must specify one of `k` or `similarity_threshold`."
|
|
51
|
+
raise ValueError(msg)
|
|
49
52
|
return values
|
|
50
53
|
|
|
54
|
+
@override
|
|
51
55
|
def compress_documents(
|
|
52
56
|
self,
|
|
53
57
|
documents: Sequence[Document],
|
|
@@ -60,21 +64,22 @@ class EmbeddingsFilter(BaseDocumentCompressor):
|
|
|
60
64
|
_get_embeddings_from_stateful_docs,
|
|
61
65
|
get_stateful_documents,
|
|
62
66
|
)
|
|
63
|
-
except ImportError:
|
|
64
|
-
|
|
67
|
+
except ImportError as e:
|
|
68
|
+
msg = (
|
|
65
69
|
"To use please install langchain-community "
|
|
66
70
|
"with `pip install langchain-community`."
|
|
67
71
|
)
|
|
72
|
+
raise ImportError(msg) from e
|
|
68
73
|
|
|
69
74
|
try:
|
|
70
75
|
import numpy as np
|
|
71
76
|
except ImportError as e:
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
) from e
|
|
77
|
+
msg = "Could not import numpy, please install with `pip install numpy`."
|
|
78
|
+
raise ImportError(msg) from e
|
|
75
79
|
stateful_documents = get_stateful_documents(documents)
|
|
76
80
|
embedded_documents = _get_embeddings_from_stateful_docs(
|
|
77
|
-
self.embeddings,
|
|
81
|
+
self.embeddings,
|
|
82
|
+
stateful_documents,
|
|
78
83
|
)
|
|
79
84
|
embedded_query = self.embeddings.embed_query(query)
|
|
80
85
|
similarity = self.similarity_fn([embedded_query], embedded_documents)[0]
|
|
@@ -83,13 +88,14 @@ class EmbeddingsFilter(BaseDocumentCompressor):
|
|
|
83
88
|
included_idxs = np.argsort(similarity)[::-1][: self.k]
|
|
84
89
|
if self.similarity_threshold is not None:
|
|
85
90
|
similar_enough = np.where(
|
|
86
|
-
similarity[included_idxs] > self.similarity_threshold
|
|
91
|
+
similarity[included_idxs] > self.similarity_threshold,
|
|
87
92
|
)
|
|
88
93
|
included_idxs = included_idxs[similar_enough]
|
|
89
94
|
for i in included_idxs:
|
|
90
95
|
stateful_documents[i].state["query_similarity_score"] = similarity[i]
|
|
91
96
|
return [stateful_documents[i] for i in included_idxs]
|
|
92
97
|
|
|
98
|
+
@override
|
|
93
99
|
async def acompress_documents(
|
|
94
100
|
self,
|
|
95
101
|
documents: Sequence[Document],
|
|
@@ -102,21 +108,22 @@ class EmbeddingsFilter(BaseDocumentCompressor):
|
|
|
102
108
|
_aget_embeddings_from_stateful_docs,
|
|
103
109
|
get_stateful_documents,
|
|
104
110
|
)
|
|
105
|
-
except ImportError:
|
|
106
|
-
|
|
111
|
+
except ImportError as e:
|
|
112
|
+
msg = (
|
|
107
113
|
"To use please install langchain-community "
|
|
108
114
|
"with `pip install langchain-community`."
|
|
109
115
|
)
|
|
116
|
+
raise ImportError(msg) from e
|
|
110
117
|
|
|
111
118
|
try:
|
|
112
119
|
import numpy as np
|
|
113
120
|
except ImportError as e:
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
) from e
|
|
121
|
+
msg = "Could not import numpy, please install with `pip install numpy`."
|
|
122
|
+
raise ImportError(msg) from e
|
|
117
123
|
stateful_documents = get_stateful_documents(documents)
|
|
118
124
|
embedded_documents = await _aget_embeddings_from_stateful_docs(
|
|
119
|
-
self.embeddings,
|
|
125
|
+
self.embeddings,
|
|
126
|
+
stateful_documents,
|
|
120
127
|
)
|
|
121
128
|
embedded_query = await self.embeddings.aembed_query(query)
|
|
122
129
|
similarity = self.similarity_fn([embedded_query], embedded_documents)[0]
|
|
@@ -125,7 +132,7 @@ class EmbeddingsFilter(BaseDocumentCompressor):
|
|
|
125
132
|
included_idxs = np.argsort(similarity)[::-1][: self.k]
|
|
126
133
|
if self.similarity_threshold is not None:
|
|
127
134
|
similar_enough = np.where(
|
|
128
|
-
similarity[included_idxs] > self.similarity_threshold
|
|
135
|
+
similarity[included_idxs] > self.similarity_threshold,
|
|
129
136
|
)
|
|
130
137
|
included_idxs = included_idxs[similar_enough]
|
|
131
138
|
for i in included_idxs:
|
|
@@ -11,7 +11,7 @@ if TYPE_CHECKING:
|
|
|
11
11
|
# Used to consolidate logic for raising deprecation warnings and
|
|
12
12
|
# handling optional imports.
|
|
13
13
|
DEPRECATED_LOOKUP = {
|
|
14
|
-
"FlashrankRerank": "langchain_community.document_compressors.flashrank_rerank"
|
|
14
|
+
"FlashrankRerank": "langchain_community.document_compressors.flashrank_rerank",
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
|
|
@@ -70,11 +70,12 @@ class LLMListwiseRerank(BaseDocumentCompressor):
|
|
|
70
70
|
compressed_docs = reranker.compress_documents(documents, "Who is steve")
|
|
71
71
|
assert len(compressed_docs) == 3
|
|
72
72
|
assert "Steve" in compressed_docs[0].page_content
|
|
73
|
+
|
|
73
74
|
"""
|
|
74
75
|
|
|
75
76
|
reranker: Runnable[dict, list[Document]]
|
|
76
|
-
"""LLM-based reranker to use for filtering documents. Expected to take in a dict
|
|
77
|
-
with 'documents: Sequence[Document]' and 'query: str' keys and output a
|
|
77
|
+
"""LLM-based reranker to use for filtering documents. Expected to take in a dict
|
|
78
|
+
with 'documents: Sequence[Document]' and 'query: str' keys and output a
|
|
78
79
|
List[Document]."""
|
|
79
80
|
|
|
80
81
|
top_n: int = 3
|
|
@@ -92,7 +93,8 @@ class LLMListwiseRerank(BaseDocumentCompressor):
|
|
|
92
93
|
) -> Sequence[Document]:
|
|
93
94
|
"""Filter down documents based on their relevance to the query."""
|
|
94
95
|
results = self.reranker.invoke(
|
|
95
|
-
{"documents": documents, "query": query},
|
|
96
|
+
{"documents": documents, "query": query},
|
|
97
|
+
config={"callbacks": callbacks},
|
|
96
98
|
)
|
|
97
99
|
return results[: self.top_n]
|
|
98
100
|
|
|
@@ -117,9 +119,10 @@ class LLMListwiseRerank(BaseDocumentCompressor):
|
|
|
117
119
|
"""
|
|
118
120
|
|
|
119
121
|
if llm.with_structured_output == BaseLanguageModel.with_structured_output:
|
|
120
|
-
|
|
122
|
+
msg = (
|
|
121
123
|
f"llm of type {type(llm)} does not implement `with_structured_output`."
|
|
122
124
|
)
|
|
125
|
+
raise ValueError(msg)
|
|
123
126
|
|
|
124
127
|
class RankDocuments(BaseModel):
|
|
125
128
|
"""Rank the documents by their relevance to the user question.
|
|
@@ -137,6 +140,6 @@ class LLMListwiseRerank(BaseDocumentCompressor):
|
|
|
137
140
|
reranker = RunnablePassthrough.assign(
|
|
138
141
|
ranking=RunnableLambda(_get_prompt_input)
|
|
139
142
|
| _prompt
|
|
140
|
-
| llm.with_structured_output(RankDocuments)
|
|
143
|
+
| llm.with_structured_output(RankDocuments),
|
|
141
144
|
) | RunnableLambda(_parse_ranking)
|
|
142
145
|
return cls(reranker=reranker, **kwargs)
|
langchain/retrievers/ensemble.py
CHANGED
|
@@ -28,6 +28,7 @@ from langchain_core.runnables.utils import (
|
|
|
28
28
|
get_unique_config_specs,
|
|
29
29
|
)
|
|
30
30
|
from pydantic import model_validator
|
|
31
|
+
from typing_extensions import override
|
|
31
32
|
|
|
32
33
|
T = TypeVar("T")
|
|
33
34
|
H = TypeVar("H", bound=Hashable)
|
|
@@ -80,14 +81,18 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
80
81
|
|
|
81
82
|
@model_validator(mode="before")
|
|
82
83
|
@classmethod
|
|
83
|
-
def
|
|
84
|
+
def _set_weights(cls, values: dict[str, Any]) -> Any:
|
|
84
85
|
if not values.get("weights"):
|
|
85
86
|
n_retrievers = len(values["retrievers"])
|
|
86
87
|
values["weights"] = [1 / n_retrievers] * n_retrievers
|
|
87
88
|
return values
|
|
88
89
|
|
|
90
|
+
@override
|
|
89
91
|
def invoke(
|
|
90
|
-
self,
|
|
92
|
+
self,
|
|
93
|
+
input: str,
|
|
94
|
+
config: Optional[RunnableConfig] = None,
|
|
95
|
+
**kwargs: Any,
|
|
91
96
|
) -> list[Document]:
|
|
92
97
|
from langchain_core.callbacks import CallbackManager
|
|
93
98
|
|
|
@@ -111,7 +116,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
111
116
|
result = self.rank_fusion(input, run_manager=run_manager, config=config)
|
|
112
117
|
except Exception as e:
|
|
113
118
|
run_manager.on_retriever_error(e)
|
|
114
|
-
raise
|
|
119
|
+
raise
|
|
115
120
|
else:
|
|
116
121
|
run_manager.on_retriever_end(
|
|
117
122
|
result,
|
|
@@ -119,8 +124,12 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
119
124
|
)
|
|
120
125
|
return result
|
|
121
126
|
|
|
127
|
+
@override
|
|
122
128
|
async def ainvoke(
|
|
123
|
-
self,
|
|
129
|
+
self,
|
|
130
|
+
input: str,
|
|
131
|
+
config: Optional[RunnableConfig] = None,
|
|
132
|
+
**kwargs: Any,
|
|
124
133
|
) -> list[Document]:
|
|
125
134
|
from langchain_core.callbacks import AsyncCallbackManager
|
|
126
135
|
|
|
@@ -142,11 +151,13 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
142
151
|
)
|
|
143
152
|
try:
|
|
144
153
|
result = await self.arank_fusion(
|
|
145
|
-
input,
|
|
154
|
+
input,
|
|
155
|
+
run_manager=run_manager,
|
|
156
|
+
config=config,
|
|
146
157
|
)
|
|
147
158
|
except Exception as e:
|
|
148
159
|
await run_manager.on_retriever_error(e)
|
|
149
|
-
raise
|
|
160
|
+
raise
|
|
150
161
|
else:
|
|
151
162
|
await run_manager.on_retriever_end(
|
|
152
163
|
result,
|
|
@@ -171,9 +182,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
171
182
|
"""
|
|
172
183
|
|
|
173
184
|
# Get fused result of the retrievers.
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
return fused_documents
|
|
185
|
+
return self.rank_fusion(query, run_manager)
|
|
177
186
|
|
|
178
187
|
async def _aget_relevant_documents(
|
|
179
188
|
self,
|
|
@@ -192,9 +201,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
192
201
|
"""
|
|
193
202
|
|
|
194
203
|
# Get fused result of the retrievers.
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
return fused_documents
|
|
204
|
+
return await self.arank_fusion(query, run_manager)
|
|
198
205
|
|
|
199
206
|
def rank_fusion(
|
|
200
207
|
self,
|
|
@@ -219,7 +226,8 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
219
226
|
retriever.invoke(
|
|
220
227
|
query,
|
|
221
228
|
patch_config(
|
|
222
|
-
config,
|
|
229
|
+
config,
|
|
230
|
+
callbacks=run_manager.get_child(tag=f"retriever_{i + 1}"),
|
|
223
231
|
),
|
|
224
232
|
)
|
|
225
233
|
for i, retriever in enumerate(self.retrievers)
|
|
@@ -228,14 +236,12 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
228
236
|
# Enforce that retrieved docs are Documents for each list in retriever_docs
|
|
229
237
|
for i in range(len(retriever_docs)):
|
|
230
238
|
retriever_docs[i] = [
|
|
231
|
-
Document(page_content=cast(str, doc)) if isinstance(doc, str) else doc
|
|
239
|
+
Document(page_content=cast("str", doc)) if isinstance(doc, str) else doc
|
|
232
240
|
for doc in retriever_docs[i]
|
|
233
241
|
]
|
|
234
242
|
|
|
235
243
|
# apply rank fusion
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
return fused_documents
|
|
244
|
+
return self.weighted_reciprocal_rank(retriever_docs)
|
|
239
245
|
|
|
240
246
|
async def arank_fusion(
|
|
241
247
|
self,
|
|
@@ -266,7 +272,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
266
272
|
),
|
|
267
273
|
)
|
|
268
274
|
for i, retriever in enumerate(self.retrievers)
|
|
269
|
-
]
|
|
275
|
+
],
|
|
270
276
|
)
|
|
271
277
|
|
|
272
278
|
# Enforce that retrieved docs are Documents for each list in retriever_docs
|
|
@@ -277,12 +283,11 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
277
283
|
]
|
|
278
284
|
|
|
279
285
|
# apply rank fusion
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
return fused_documents
|
|
286
|
+
return self.weighted_reciprocal_rank(retriever_docs)
|
|
283
287
|
|
|
284
288
|
def weighted_reciprocal_rank(
|
|
285
|
-
self,
|
|
289
|
+
self,
|
|
290
|
+
doc_lists: list[list[Document]],
|
|
286
291
|
) -> list[Document]:
|
|
287
292
|
"""
|
|
288
293
|
Perform weighted Reciprocal Rank Fusion on multiple rank lists.
|
|
@@ -297,9 +302,8 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
297
302
|
scores in descending order.
|
|
298
303
|
"""
|
|
299
304
|
if len(doc_lists) != len(self.weights):
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
)
|
|
305
|
+
msg = "Number of rank lists must be equal to the number of weights."
|
|
306
|
+
raise ValueError(msg)
|
|
303
307
|
|
|
304
308
|
# Associate each doc's content with its RRF score for later sorting by it
|
|
305
309
|
# Duplicated contents across retrievers are collapsed & scored cumulatively
|
|
@@ -316,7 +320,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
316
320
|
|
|
317
321
|
# Docs are deduplicated by their contents then sorted by their scores
|
|
318
322
|
all_docs = chain.from_iterable(doc_lists)
|
|
319
|
-
|
|
323
|
+
return sorted(
|
|
320
324
|
unique_by_key(
|
|
321
325
|
all_docs,
|
|
322
326
|
lambda doc: (
|
|
@@ -330,4 +334,3 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
330
334
|
doc.page_content if self.id_key is None else doc.metadata[self.id_key]
|
|
331
335
|
],
|
|
332
336
|
)
|
|
333
|
-
return sorted_docs
|
|
@@ -9,7 +9,7 @@ if TYPE_CHECKING:
|
|
|
9
9
|
# Used to consolidate logic for raising deprecation warnings and
|
|
10
10
|
# handling optional imports.
|
|
11
11
|
DEPRECATED_LOOKUP = {
|
|
12
|
-
"GoogleDocumentAIWarehouseRetriever": "langchain_community.retrievers"
|
|
12
|
+
"GoogleDocumentAIWarehouseRetriever": "langchain_community.retrievers",
|
|
13
13
|
}
|
|
14
14
|
|
|
15
15
|
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
|
|
@@ -27,7 +27,7 @@ def __getattr__(name: str) -> Any:
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
__all__ = [
|
|
30
|
-
"GoogleVertexAISearchRetriever",
|
|
31
|
-
"GoogleVertexAIMultiTurnSearchRetriever",
|
|
32
30
|
"GoogleCloudEnterpriseSearchRetriever",
|
|
31
|
+
"GoogleVertexAIMultiTurnSearchRetriever",
|
|
32
|
+
"GoogleVertexAISearchRetriever",
|
|
33
33
|
]
|
langchain/retrievers/kendra.py
CHANGED
|
@@ -49,18 +49,18 @@ def __getattr__(name: str) -> Any:
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
__all__ = [
|
|
52
|
-
"clean_excerpt",
|
|
53
|
-
"combined_text",
|
|
54
|
-
"Highlight",
|
|
55
|
-
"TextWithHighLights",
|
|
56
|
-
"AdditionalResultAttributeValue",
|
|
57
52
|
"AdditionalResultAttribute",
|
|
58
|
-
"
|
|
53
|
+
"AdditionalResultAttributeValue",
|
|
54
|
+
"AmazonKendraRetriever",
|
|
59
55
|
"DocumentAttribute",
|
|
60
|
-
"
|
|
61
|
-
"
|
|
62
|
-
"RetrieveResultItem",
|
|
56
|
+
"DocumentAttributeValue",
|
|
57
|
+
"Highlight",
|
|
63
58
|
"QueryResult",
|
|
59
|
+
"QueryResultItem",
|
|
60
|
+
"ResultItem",
|
|
64
61
|
"RetrieveResult",
|
|
65
|
-
"
|
|
62
|
+
"RetrieveResultItem",
|
|
63
|
+
"TextWithHighLights",
|
|
64
|
+
"clean_excerpt",
|
|
65
|
+
"combined_text",
|
|
66
66
|
]
|