langchain 0.3.26__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain/__init__.py +110 -96
- langchain/_api/__init__.py +2 -2
- langchain/_api/deprecation.py +3 -3
- langchain/_api/module_import.py +51 -46
- langchain/_api/path.py +1 -1
- langchain/adapters/openai.py +8 -8
- langchain/agents/__init__.py +15 -12
- langchain/agents/agent.py +174 -151
- langchain/agents/agent_iterator.py +50 -26
- langchain/agents/agent_toolkits/__init__.py +7 -6
- langchain/agents/agent_toolkits/ainetwork/toolkit.py +1 -1
- langchain/agents/agent_toolkits/amadeus/toolkit.py +1 -1
- langchain/agents/agent_toolkits/azure_cognitive_services.py +1 -1
- langchain/agents/agent_toolkits/clickup/toolkit.py +1 -1
- langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +6 -4
- langchain/agents/agent_toolkits/csv/__init__.py +4 -2
- langchain/agents/agent_toolkits/file_management/__init__.py +1 -1
- langchain/agents/agent_toolkits/file_management/toolkit.py +1 -1
- langchain/agents/agent_toolkits/github/toolkit.py +9 -9
- langchain/agents/agent_toolkits/gitlab/toolkit.py +1 -1
- langchain/agents/agent_toolkits/json/base.py +1 -1
- langchain/agents/agent_toolkits/multion/toolkit.py +1 -1
- langchain/agents/agent_toolkits/office365/toolkit.py +1 -1
- langchain/agents/agent_toolkits/openapi/base.py +1 -1
- langchain/agents/agent_toolkits/openapi/planner.py +2 -2
- langchain/agents/agent_toolkits/openapi/planner_prompt.py +10 -10
- langchain/agents/agent_toolkits/openapi/prompt.py +1 -1
- langchain/agents/agent_toolkits/openapi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/pandas/__init__.py +4 -2
- langchain/agents/agent_toolkits/playwright/__init__.py +1 -1
- langchain/agents/agent_toolkits/playwright/toolkit.py +1 -1
- langchain/agents/agent_toolkits/powerbi/base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/chat_base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/prompt.py +2 -2
- langchain/agents/agent_toolkits/powerbi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/python/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark_sql/base.py +1 -1
- langchain/agents/agent_toolkits/spark_sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/sql/prompt.py +1 -1
- langchain/agents/agent_toolkits/sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/vectorstore/base.py +4 -2
- langchain/agents/agent_toolkits/vectorstore/prompt.py +2 -4
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +12 -11
- langchain/agents/agent_toolkits/xorbits/__init__.py +4 -2
- langchain/agents/agent_toolkits/zapier/toolkit.py +1 -1
- langchain/agents/agent_types.py +6 -6
- langchain/agents/chat/base.py +8 -12
- langchain/agents/chat/output_parser.py +9 -6
- langchain/agents/chat/prompt.py +3 -4
- langchain/agents/conversational/base.py +11 -5
- langchain/agents/conversational/output_parser.py +4 -2
- langchain/agents/conversational/prompt.py +2 -3
- langchain/agents/conversational_chat/base.py +9 -5
- langchain/agents/conversational_chat/output_parser.py +9 -11
- langchain/agents/conversational_chat/prompt.py +5 -6
- langchain/agents/format_scratchpad/__init__.py +3 -3
- langchain/agents/format_scratchpad/log_to_messages.py +1 -1
- langchain/agents/format_scratchpad/openai_functions.py +8 -6
- langchain/agents/format_scratchpad/tools.py +5 -3
- langchain/agents/format_scratchpad/xml.py +33 -2
- langchain/agents/initialize.py +17 -9
- langchain/agents/json_chat/base.py +19 -18
- langchain/agents/json_chat/prompt.py +2 -3
- langchain/agents/load_tools.py +2 -1
- langchain/agents/loading.py +28 -18
- langchain/agents/mrkl/base.py +11 -4
- langchain/agents/mrkl/output_parser.py +17 -13
- langchain/agents/mrkl/prompt.py +1 -2
- langchain/agents/openai_assistant/base.py +81 -71
- langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +2 -0
- langchain/agents/openai_functions_agent/base.py +47 -37
- langchain/agents/openai_functions_multi_agent/base.py +40 -27
- langchain/agents/openai_tools/base.py +9 -8
- langchain/agents/output_parsers/__init__.py +3 -3
- langchain/agents/output_parsers/json.py +8 -6
- langchain/agents/output_parsers/openai_functions.py +24 -9
- langchain/agents/output_parsers/openai_tools.py +16 -4
- langchain/agents/output_parsers/react_json_single_input.py +13 -5
- langchain/agents/output_parsers/react_single_input.py +18 -11
- langchain/agents/output_parsers/self_ask.py +5 -2
- langchain/agents/output_parsers/tools.py +32 -13
- langchain/agents/output_parsers/xml.py +102 -28
- langchain/agents/react/agent.py +5 -4
- langchain/agents/react/base.py +26 -17
- langchain/agents/react/output_parser.py +7 -6
- langchain/agents/react/textworld_prompt.py +0 -1
- langchain/agents/react/wiki_prompt.py +14 -15
- langchain/agents/schema.py +5 -2
- langchain/agents/self_ask_with_search/base.py +23 -15
- langchain/agents/self_ask_with_search/prompt.py +0 -1
- langchain/agents/structured_chat/base.py +19 -11
- langchain/agents/structured_chat/output_parser.py +29 -18
- langchain/agents/structured_chat/prompt.py +3 -4
- langchain/agents/tool_calling_agent/base.py +8 -6
- langchain/agents/tools.py +5 -2
- langchain/agents/utils.py +2 -3
- langchain/agents/xml/base.py +12 -6
- langchain/agents/xml/prompt.py +1 -2
- langchain/cache.py +12 -12
- langchain/callbacks/__init__.py +11 -11
- langchain/callbacks/aim_callback.py +2 -2
- langchain/callbacks/argilla_callback.py +1 -1
- langchain/callbacks/arize_callback.py +1 -1
- langchain/callbacks/arthur_callback.py +1 -1
- langchain/callbacks/base.py +7 -7
- langchain/callbacks/clearml_callback.py +1 -1
- langchain/callbacks/comet_ml_callback.py +1 -1
- langchain/callbacks/confident_callback.py +1 -1
- langchain/callbacks/context_callback.py +1 -1
- langchain/callbacks/flyte_callback.py +1 -1
- langchain/callbacks/human.py +2 -2
- langchain/callbacks/infino_callback.py +1 -1
- langchain/callbacks/labelstudio_callback.py +1 -1
- langchain/callbacks/llmonitor_callback.py +1 -1
- langchain/callbacks/manager.py +5 -5
- langchain/callbacks/mlflow_callback.py +2 -2
- langchain/callbacks/openai_info.py +1 -1
- langchain/callbacks/promptlayer_callback.py +1 -1
- langchain/callbacks/sagemaker_callback.py +1 -1
- langchain/callbacks/streaming_aiter.py +17 -3
- langchain/callbacks/streaming_aiter_final_only.py +16 -5
- langchain/callbacks/streaming_stdout_final_only.py +10 -3
- langchain/callbacks/streamlit/__init__.py +3 -2
- langchain/callbacks/streamlit/mutable_expander.py +1 -1
- langchain/callbacks/streamlit/streamlit_callback_handler.py +3 -3
- langchain/callbacks/tracers/__init__.py +1 -1
- langchain/callbacks/tracers/comet.py +1 -1
- langchain/callbacks/tracers/evaluation.py +1 -1
- langchain/callbacks/tracers/log_stream.py +1 -1
- langchain/callbacks/tracers/logging.py +12 -1
- langchain/callbacks/tracers/stdout.py +1 -1
- langchain/callbacks/trubrics_callback.py +1 -1
- langchain/callbacks/utils.py +4 -4
- langchain/callbacks/wandb_callback.py +1 -1
- langchain/callbacks/whylabs_callback.py +1 -1
- langchain/chains/api/base.py +41 -23
- langchain/chains/api/news_docs.py +1 -2
- langchain/chains/api/open_meteo_docs.py +1 -2
- langchain/chains/api/openapi/requests_chain.py +1 -1
- langchain/chains/api/openapi/response_chain.py +1 -1
- langchain/chains/api/podcast_docs.py +1 -2
- langchain/chains/api/prompt.py +1 -2
- langchain/chains/api/tmdb_docs.py +1 -2
- langchain/chains/base.py +96 -56
- langchain/chains/chat_vector_db/prompts.py +2 -3
- langchain/chains/combine_documents/__init__.py +1 -1
- langchain/chains/combine_documents/base.py +30 -11
- langchain/chains/combine_documents/map_reduce.py +41 -30
- langchain/chains/combine_documents/map_rerank.py +39 -24
- langchain/chains/combine_documents/reduce.py +48 -26
- langchain/chains/combine_documents/refine.py +27 -17
- langchain/chains/combine_documents/stuff.py +24 -13
- langchain/chains/constitutional_ai/base.py +11 -4
- langchain/chains/constitutional_ai/principles.py +22 -25
- langchain/chains/constitutional_ai/prompts.py +25 -28
- langchain/chains/conversation/base.py +9 -4
- langchain/chains/conversation/memory.py +5 -5
- langchain/chains/conversation/prompt.py +5 -5
- langchain/chains/conversational_retrieval/base.py +108 -79
- langchain/chains/conversational_retrieval/prompts.py +2 -3
- langchain/chains/elasticsearch_database/base.py +10 -10
- langchain/chains/elasticsearch_database/prompts.py +2 -3
- langchain/chains/ernie_functions/__init__.py +2 -2
- langchain/chains/example_generator.py +3 -1
- langchain/chains/flare/base.py +28 -12
- langchain/chains/flare/prompts.py +2 -0
- langchain/chains/graph_qa/cypher.py +2 -2
- langchain/chains/graph_qa/falkordb.py +1 -1
- langchain/chains/graph_qa/gremlin.py +1 -1
- langchain/chains/graph_qa/neptune_sparql.py +1 -1
- langchain/chains/graph_qa/prompts.py +2 -2
- langchain/chains/history_aware_retriever.py +2 -1
- langchain/chains/hyde/base.py +6 -5
- langchain/chains/hyde/prompts.py +5 -6
- langchain/chains/llm.py +82 -61
- langchain/chains/llm_bash/__init__.py +3 -2
- langchain/chains/llm_checker/base.py +19 -6
- langchain/chains/llm_checker/prompt.py +3 -4
- langchain/chains/llm_math/base.py +25 -10
- langchain/chains/llm_math/prompt.py +1 -2
- langchain/chains/llm_summarization_checker/base.py +22 -7
- langchain/chains/llm_symbolic_math/__init__.py +3 -2
- langchain/chains/loading.py +155 -97
- langchain/chains/mapreduce.py +4 -3
- langchain/chains/moderation.py +11 -9
- langchain/chains/natbot/base.py +11 -9
- langchain/chains/natbot/crawler.py +102 -76
- langchain/chains/natbot/prompt.py +2 -3
- langchain/chains/openai_functions/__init__.py +7 -7
- langchain/chains/openai_functions/base.py +15 -10
- langchain/chains/openai_functions/citation_fuzzy_match.py +21 -11
- langchain/chains/openai_functions/extraction.py +19 -19
- langchain/chains/openai_functions/openapi.py +39 -35
- langchain/chains/openai_functions/qa_with_structure.py +22 -15
- langchain/chains/openai_functions/tagging.py +4 -4
- langchain/chains/openai_tools/extraction.py +7 -8
- langchain/chains/qa_generation/base.py +8 -3
- langchain/chains/qa_generation/prompt.py +5 -5
- langchain/chains/qa_with_sources/base.py +17 -6
- langchain/chains/qa_with_sources/loading.py +16 -8
- langchain/chains/qa_with_sources/map_reduce_prompt.py +8 -9
- langchain/chains/qa_with_sources/refine_prompts.py +0 -1
- langchain/chains/qa_with_sources/retrieval.py +15 -6
- langchain/chains/qa_with_sources/stuff_prompt.py +6 -7
- langchain/chains/qa_with_sources/vector_db.py +21 -8
- langchain/chains/query_constructor/base.py +37 -34
- langchain/chains/query_constructor/ir.py +4 -4
- langchain/chains/query_constructor/parser.py +101 -34
- langchain/chains/query_constructor/prompt.py +5 -6
- langchain/chains/question_answering/chain.py +21 -10
- langchain/chains/question_answering/map_reduce_prompt.py +14 -14
- langchain/chains/question_answering/map_rerank_prompt.py +3 -3
- langchain/chains/question_answering/refine_prompts.py +2 -5
- langchain/chains/question_answering/stuff_prompt.py +5 -5
- langchain/chains/retrieval.py +1 -3
- langchain/chains/retrieval_qa/base.py +38 -27
- langchain/chains/retrieval_qa/prompt.py +1 -2
- langchain/chains/router/__init__.py +3 -3
- langchain/chains/router/base.py +38 -22
- langchain/chains/router/embedding_router.py +15 -8
- langchain/chains/router/llm_router.py +23 -20
- langchain/chains/router/multi_prompt.py +5 -2
- langchain/chains/router/multi_retrieval_qa.py +28 -5
- langchain/chains/sequential.py +30 -18
- langchain/chains/sql_database/prompt.py +14 -16
- langchain/chains/sql_database/query.py +7 -5
- langchain/chains/structured_output/__init__.py +1 -1
- langchain/chains/structured_output/base.py +77 -67
- langchain/chains/summarize/chain.py +11 -5
- langchain/chains/summarize/map_reduce_prompt.py +0 -1
- langchain/chains/summarize/stuff_prompt.py +0 -1
- langchain/chains/transform.py +9 -6
- langchain/chat_loaders/facebook_messenger.py +1 -1
- langchain/chat_loaders/langsmith.py +1 -1
- langchain/chat_loaders/utils.py +3 -3
- langchain/chat_models/__init__.py +20 -19
- langchain/chat_models/anthropic.py +1 -1
- langchain/chat_models/azureml_endpoint.py +1 -1
- langchain/chat_models/baidu_qianfan_endpoint.py +1 -1
- langchain/chat_models/base.py +213 -139
- langchain/chat_models/bedrock.py +1 -1
- langchain/chat_models/fake.py +1 -1
- langchain/chat_models/meta.py +1 -1
- langchain/chat_models/pai_eas_endpoint.py +1 -1
- langchain/chat_models/promptlayer_openai.py +1 -1
- langchain/chat_models/volcengine_maas.py +1 -1
- langchain/docstore/base.py +1 -1
- langchain/document_loaders/__init__.py +9 -9
- langchain/document_loaders/airbyte.py +3 -3
- langchain/document_loaders/assemblyai.py +1 -1
- langchain/document_loaders/azure_blob_storage_container.py +1 -1
- langchain/document_loaders/azure_blob_storage_file.py +1 -1
- langchain/document_loaders/baiducloud_bos_file.py +1 -1
- langchain/document_loaders/base.py +1 -1
- langchain/document_loaders/blob_loaders/__init__.py +1 -1
- langchain/document_loaders/blob_loaders/schema.py +1 -4
- langchain/document_loaders/blockchain.py +1 -1
- langchain/document_loaders/chatgpt.py +1 -1
- langchain/document_loaders/college_confidential.py +1 -1
- langchain/document_loaders/confluence.py +1 -1
- langchain/document_loaders/email.py +1 -1
- langchain/document_loaders/facebook_chat.py +1 -1
- langchain/document_loaders/markdown.py +1 -1
- langchain/document_loaders/notebook.py +1 -1
- langchain/document_loaders/org_mode.py +1 -1
- langchain/document_loaders/parsers/__init__.py +1 -1
- langchain/document_loaders/parsers/docai.py +1 -1
- langchain/document_loaders/parsers/generic.py +1 -1
- langchain/document_loaders/parsers/html/__init__.py +1 -1
- langchain/document_loaders/parsers/html/bs4.py +1 -1
- langchain/document_loaders/parsers/language/cobol.py +1 -1
- langchain/document_loaders/parsers/language/python.py +1 -1
- langchain/document_loaders/parsers/msword.py +1 -1
- langchain/document_loaders/parsers/pdf.py +5 -5
- langchain/document_loaders/parsers/registry.py +1 -1
- langchain/document_loaders/pdf.py +8 -8
- langchain/document_loaders/powerpoint.py +1 -1
- langchain/document_loaders/pyspark_dataframe.py +1 -1
- langchain/document_loaders/telegram.py +2 -2
- langchain/document_loaders/tencent_cos_directory.py +1 -1
- langchain/document_loaders/unstructured.py +5 -5
- langchain/document_loaders/url_playwright.py +1 -1
- langchain/document_loaders/whatsapp_chat.py +1 -1
- langchain/document_loaders/youtube.py +2 -2
- langchain/document_transformers/__init__.py +3 -3
- langchain/document_transformers/beautiful_soup_transformer.py +1 -1
- langchain/document_transformers/doctran_text_extract.py +1 -1
- langchain/document_transformers/doctran_text_qa.py +1 -1
- langchain/document_transformers/doctran_text_translate.py +1 -1
- langchain/document_transformers/embeddings_redundant_filter.py +3 -3
- langchain/document_transformers/google_translate.py +1 -1
- langchain/document_transformers/html2text.py +1 -1
- langchain/document_transformers/nuclia_text_transform.py +1 -1
- langchain/embeddings/__init__.py +5 -5
- langchain/embeddings/base.py +35 -24
- langchain/embeddings/cache.py +37 -32
- langchain/embeddings/fake.py +1 -1
- langchain/embeddings/huggingface.py +2 -2
- langchain/evaluation/__init__.py +22 -22
- langchain/evaluation/agents/trajectory_eval_chain.py +26 -25
- langchain/evaluation/agents/trajectory_eval_prompt.py +6 -9
- langchain/evaluation/comparison/__init__.py +1 -1
- langchain/evaluation/comparison/eval_chain.py +21 -13
- langchain/evaluation/comparison/prompt.py +1 -2
- langchain/evaluation/criteria/__init__.py +1 -1
- langchain/evaluation/criteria/eval_chain.py +23 -11
- langchain/evaluation/criteria/prompt.py +2 -3
- langchain/evaluation/embedding_distance/base.py +34 -20
- langchain/evaluation/exact_match/base.py +14 -1
- langchain/evaluation/loading.py +16 -11
- langchain/evaluation/parsing/base.py +20 -4
- langchain/evaluation/parsing/json_distance.py +24 -10
- langchain/evaluation/parsing/json_schema.py +13 -12
- langchain/evaluation/qa/__init__.py +1 -1
- langchain/evaluation/qa/eval_chain.py +20 -5
- langchain/evaluation/qa/eval_prompt.py +7 -8
- langchain/evaluation/qa/generate_chain.py +4 -1
- langchain/evaluation/qa/generate_prompt.py +2 -4
- langchain/evaluation/regex_match/base.py +9 -1
- langchain/evaluation/schema.py +38 -30
- langchain/evaluation/scoring/__init__.py +1 -1
- langchain/evaluation/scoring/eval_chain.py +23 -15
- langchain/evaluation/scoring/prompt.py +0 -1
- langchain/evaluation/string_distance/base.py +20 -9
- langchain/globals.py +12 -11
- langchain/graphs/__init__.py +6 -6
- langchain/graphs/graph_document.py +1 -1
- langchain/graphs/networkx_graph.py +2 -2
- langchain/hub.py +9 -11
- langchain/indexes/__init__.py +3 -3
- langchain/indexes/_sql_record_manager.py +63 -46
- langchain/indexes/prompts/entity_extraction.py +1 -2
- langchain/indexes/prompts/entity_summarization.py +1 -2
- langchain/indexes/prompts/knowledge_triplet_extraction.py +1 -3
- langchain/indexes/vectorstore.py +35 -19
- langchain/llms/__init__.py +13 -13
- langchain/llms/ai21.py +1 -1
- langchain/llms/azureml_endpoint.py +4 -4
- langchain/llms/base.py +15 -7
- langchain/llms/bedrock.py +1 -1
- langchain/llms/cloudflare_workersai.py +1 -1
- langchain/llms/gradient_ai.py +1 -1
- langchain/llms/loading.py +1 -1
- langchain/llms/openai.py +1 -1
- langchain/llms/sagemaker_endpoint.py +1 -1
- langchain/load/dump.py +1 -1
- langchain/load/load.py +1 -1
- langchain/load/serializable.py +3 -3
- langchain/memory/__init__.py +3 -3
- langchain/memory/buffer.py +14 -7
- langchain/memory/buffer_window.py +2 -0
- langchain/memory/chat_memory.py +14 -8
- langchain/memory/chat_message_histories/__init__.py +1 -1
- langchain/memory/chat_message_histories/astradb.py +1 -1
- langchain/memory/chat_message_histories/cassandra.py +1 -1
- langchain/memory/chat_message_histories/cosmos_db.py +1 -1
- langchain/memory/chat_message_histories/dynamodb.py +1 -1
- langchain/memory/chat_message_histories/elasticsearch.py +1 -1
- langchain/memory/chat_message_histories/file.py +1 -1
- langchain/memory/chat_message_histories/firestore.py +1 -1
- langchain/memory/chat_message_histories/momento.py +1 -1
- langchain/memory/chat_message_histories/mongodb.py +1 -1
- langchain/memory/chat_message_histories/neo4j.py +1 -1
- langchain/memory/chat_message_histories/postgres.py +1 -1
- langchain/memory/chat_message_histories/redis.py +1 -1
- langchain/memory/chat_message_histories/rocksetdb.py +1 -1
- langchain/memory/chat_message_histories/singlestoredb.py +1 -1
- langchain/memory/chat_message_histories/streamlit.py +1 -1
- langchain/memory/chat_message_histories/upstash_redis.py +1 -1
- langchain/memory/chat_message_histories/xata.py +1 -1
- langchain/memory/chat_message_histories/zep.py +1 -1
- langchain/memory/combined.py +14 -13
- langchain/memory/entity.py +131 -61
- langchain/memory/prompt.py +10 -11
- langchain/memory/readonly.py +0 -2
- langchain/memory/simple.py +4 -3
- langchain/memory/summary.py +43 -11
- langchain/memory/summary_buffer.py +20 -8
- langchain/memory/token_buffer.py +2 -0
- langchain/memory/utils.py +3 -2
- langchain/memory/vectorstore.py +12 -5
- langchain/memory/vectorstore_token_buffer_memory.py +5 -5
- langchain/model_laboratory.py +12 -11
- langchain/output_parsers/__init__.py +4 -4
- langchain/output_parsers/boolean.py +7 -4
- langchain/output_parsers/combining.py +14 -7
- langchain/output_parsers/datetime.py +32 -31
- langchain/output_parsers/enum.py +10 -4
- langchain/output_parsers/fix.py +60 -53
- langchain/output_parsers/format_instructions.py +6 -8
- langchain/output_parsers/json.py +2 -2
- langchain/output_parsers/list.py +2 -2
- langchain/output_parsers/loading.py +9 -9
- langchain/output_parsers/openai_functions.py +3 -3
- langchain/output_parsers/openai_tools.py +1 -1
- langchain/output_parsers/pandas_dataframe.py +59 -48
- langchain/output_parsers/prompts.py +1 -2
- langchain/output_parsers/rail_parser.py +1 -1
- langchain/output_parsers/regex.py +9 -8
- langchain/output_parsers/regex_dict.py +7 -10
- langchain/output_parsers/retry.py +99 -80
- langchain/output_parsers/structured.py +21 -6
- langchain/output_parsers/yaml.py +19 -11
- langchain/prompts/__init__.py +5 -3
- langchain/prompts/base.py +5 -5
- langchain/prompts/chat.py +8 -8
- langchain/prompts/example_selector/__init__.py +3 -1
- langchain/prompts/example_selector/semantic_similarity.py +2 -2
- langchain/prompts/few_shot.py +1 -1
- langchain/prompts/loading.py +3 -3
- langchain/prompts/prompt.py +1 -1
- langchain/pydantic_v1/__init__.py +1 -1
- langchain/retrievers/__init__.py +5 -5
- langchain/retrievers/bedrock.py +2 -2
- langchain/retrievers/bm25.py +1 -1
- langchain/retrievers/contextual_compression.py +14 -8
- langchain/retrievers/docarray.py +1 -1
- langchain/retrievers/document_compressors/__init__.py +5 -4
- langchain/retrievers/document_compressors/base.py +12 -6
- langchain/retrievers/document_compressors/chain_extract.py +5 -3
- langchain/retrievers/document_compressors/chain_extract_prompt.py +2 -3
- langchain/retrievers/document_compressors/chain_filter.py +9 -9
- langchain/retrievers/document_compressors/chain_filter_prompt.py +1 -2
- langchain/retrievers/document_compressors/cohere_rerank.py +17 -15
- langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -0
- langchain/retrievers/document_compressors/embeddings_filter.py +24 -17
- langchain/retrievers/document_compressors/flashrank_rerank.py +1 -1
- langchain/retrievers/document_compressors/listwise_rerank.py +8 -5
- langchain/retrievers/ensemble.py +30 -27
- langchain/retrievers/google_cloud_documentai_warehouse.py +1 -1
- langchain/retrievers/google_vertex_ai_search.py +2 -2
- langchain/retrievers/kendra.py +10 -10
- langchain/retrievers/llama_index.py +1 -1
- langchain/retrievers/merger_retriever.py +11 -11
- langchain/retrievers/milvus.py +1 -1
- langchain/retrievers/multi_query.py +35 -27
- langchain/retrievers/multi_vector.py +24 -9
- langchain/retrievers/parent_document_retriever.py +33 -9
- langchain/retrievers/re_phraser.py +6 -5
- langchain/retrievers/self_query/base.py +157 -127
- langchain/retrievers/time_weighted_retriever.py +21 -7
- langchain/retrievers/zilliz.py +1 -1
- langchain/runnables/hub.py +12 -0
- langchain/runnables/openai_functions.py +12 -2
- langchain/schema/__init__.py +23 -23
- langchain/schema/cache.py +1 -1
- langchain/schema/callbacks/base.py +7 -7
- langchain/schema/callbacks/manager.py +19 -19
- langchain/schema/callbacks/tracers/base.py +1 -1
- langchain/schema/callbacks/tracers/evaluation.py +1 -1
- langchain/schema/callbacks/tracers/langchain.py +1 -1
- langchain/schema/callbacks/tracers/langchain_v1.py +1 -1
- langchain/schema/callbacks/tracers/log_stream.py +1 -1
- langchain/schema/callbacks/tracers/schemas.py +8 -8
- langchain/schema/callbacks/tracers/stdout.py +3 -3
- langchain/schema/document.py +1 -1
- langchain/schema/language_model.py +2 -2
- langchain/schema/messages.py +12 -12
- langchain/schema/output.py +3 -3
- langchain/schema/output_parser.py +3 -3
- langchain/schema/runnable/__init__.py +3 -3
- langchain/schema/runnable/base.py +9 -9
- langchain/schema/runnable/config.py +5 -5
- langchain/schema/runnable/configurable.py +1 -1
- langchain/schema/runnable/history.py +1 -1
- langchain/schema/runnable/passthrough.py +1 -1
- langchain/schema/runnable/utils.py +16 -16
- langchain/schema/vectorstore.py +1 -1
- langchain/smith/__init__.py +2 -1
- langchain/smith/evaluation/__init__.py +2 -2
- langchain/smith/evaluation/config.py +9 -23
- langchain/smith/evaluation/name_generation.py +3 -3
- langchain/smith/evaluation/progress.py +22 -4
- langchain/smith/evaluation/runner_utils.py +416 -247
- langchain/smith/evaluation/string_run_evaluator.py +102 -68
- langchain/storage/__init__.py +2 -2
- langchain/storage/_lc_store.py +4 -2
- langchain/storage/encoder_backed.py +7 -2
- langchain/storage/file_system.py +19 -16
- langchain/storage/in_memory.py +1 -1
- langchain/storage/upstash_redis.py +1 -1
- langchain/text_splitter.py +15 -15
- langchain/tools/__init__.py +28 -26
- langchain/tools/ainetwork/app.py +1 -1
- langchain/tools/ainetwork/base.py +1 -1
- langchain/tools/ainetwork/owner.py +1 -1
- langchain/tools/ainetwork/rule.py +1 -1
- langchain/tools/ainetwork/transfer.py +1 -1
- langchain/tools/ainetwork/value.py +1 -1
- langchain/tools/amadeus/closest_airport.py +1 -1
- langchain/tools/amadeus/flight_search.py +1 -1
- langchain/tools/azure_cognitive_services/__init__.py +1 -1
- langchain/tools/base.py +4 -4
- langchain/tools/bearly/tool.py +1 -1
- langchain/tools/bing_search/__init__.py +1 -1
- langchain/tools/bing_search/tool.py +1 -1
- langchain/tools/dataforseo_api_search/__init__.py +1 -1
- langchain/tools/dataforseo_api_search/tool.py +1 -1
- langchain/tools/ddg_search/tool.py +1 -1
- langchain/tools/e2b_data_analysis/tool.py +2 -2
- langchain/tools/edenai/__init__.py +1 -1
- langchain/tools/file_management/__init__.py +1 -1
- langchain/tools/file_management/copy.py +1 -1
- langchain/tools/file_management/delete.py +1 -1
- langchain/tools/gmail/__init__.py +2 -2
- langchain/tools/gmail/get_message.py +1 -1
- langchain/tools/gmail/search.py +1 -1
- langchain/tools/gmail/send_message.py +1 -1
- langchain/tools/google_finance/__init__.py +1 -1
- langchain/tools/google_finance/tool.py +1 -1
- langchain/tools/google_scholar/__init__.py +1 -1
- langchain/tools/google_scholar/tool.py +1 -1
- langchain/tools/google_search/__init__.py +1 -1
- langchain/tools/google_search/tool.py +1 -1
- langchain/tools/google_serper/__init__.py +1 -1
- langchain/tools/google_serper/tool.py +1 -1
- langchain/tools/google_trends/__init__.py +1 -1
- langchain/tools/google_trends/tool.py +1 -1
- langchain/tools/jira/tool.py +20 -1
- langchain/tools/json/tool.py +25 -3
- langchain/tools/memorize/tool.py +1 -1
- langchain/tools/multion/__init__.py +1 -1
- langchain/tools/multion/update_session.py +1 -1
- langchain/tools/office365/__init__.py +2 -2
- langchain/tools/office365/events_search.py +1 -1
- langchain/tools/office365/messages_search.py +1 -1
- langchain/tools/office365/send_event.py +1 -1
- langchain/tools/office365/send_message.py +1 -1
- langchain/tools/openapi/utils/api_models.py +6 -6
- langchain/tools/playwright/__init__.py +5 -5
- langchain/tools/playwright/click.py +1 -1
- langchain/tools/playwright/extract_hyperlinks.py +1 -1
- langchain/tools/playwright/get_elements.py +1 -1
- langchain/tools/playwright/navigate.py +1 -1
- langchain/tools/plugin.py +2 -2
- langchain/tools/powerbi/tool.py +1 -1
- langchain/tools/python/__init__.py +3 -2
- langchain/tools/reddit_search/tool.py +1 -1
- langchain/tools/render.py +2 -2
- langchain/tools/requests/tool.py +2 -2
- langchain/tools/searchapi/tool.py +1 -1
- langchain/tools/searx_search/tool.py +1 -1
- langchain/tools/slack/get_message.py +1 -1
- langchain/tools/spark_sql/tool.py +1 -1
- langchain/tools/sql_database/tool.py +1 -1
- langchain/tools/tavily_search/__init__.py +1 -1
- langchain/tools/tavily_search/tool.py +1 -1
- langchain/tools/zapier/__init__.py +1 -1
- langchain/tools/zapier/tool.py +24 -2
- langchain/utilities/__init__.py +4 -4
- langchain/utilities/arcee.py +4 -4
- langchain/utilities/clickup.py +4 -4
- langchain/utilities/dalle_image_generator.py +1 -1
- langchain/utilities/dataforseo_api_search.py +1 -1
- langchain/utilities/opaqueprompts.py +1 -1
- langchain/utilities/reddit_search.py +1 -1
- langchain/utilities/sql_database.py +1 -1
- langchain/utilities/tavily_search.py +1 -1
- langchain/utilities/vertexai.py +2 -2
- langchain/utils/__init__.py +1 -1
- langchain/utils/aiter.py +1 -1
- langchain/utils/html.py +3 -3
- langchain/utils/input.py +1 -1
- langchain/utils/iter.py +1 -1
- langchain/utils/json_schema.py +1 -3
- langchain/utils/strings.py +1 -1
- langchain/utils/utils.py +6 -6
- langchain/vectorstores/__init__.py +5 -5
- langchain/vectorstores/alibabacloud_opensearch.py +1 -1
- langchain/vectorstores/azure_cosmos_db.py +1 -1
- langchain/vectorstores/clickhouse.py +1 -1
- langchain/vectorstores/elastic_vector_search.py +1 -1
- langchain/vectorstores/elasticsearch.py +2 -2
- langchain/vectorstores/myscale.py +1 -1
- langchain/vectorstores/neo4j_vector.py +1 -1
- langchain/vectorstores/pgembedding.py +1 -1
- langchain/vectorstores/qdrant.py +1 -1
- langchain/vectorstores/redis/__init__.py +1 -1
- langchain/vectorstores/redis/base.py +1 -1
- langchain/vectorstores/redis/filters.py +4 -4
- langchain/vectorstores/redis/schema.py +6 -6
- langchain/vectorstores/sklearn.py +2 -2
- langchain/vectorstores/starrocks.py +1 -1
- langchain/vectorstores/utils.py +1 -1
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/METADATA +4 -14
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/RECORD +590 -591
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/WHEEL +1 -1
- langchain/smith/evaluation/utils.py +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/entry_points.txt +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/licenses/LICENSE +0 -0
|
@@ -9,7 +9,7 @@ if TYPE_CHECKING:
|
|
|
9
9
|
# Used to consolidate logic for raising deprecation warnings and
|
|
10
10
|
# handling optional imports.
|
|
11
11
|
DEPRECATED_LOOKUP = {
|
|
12
|
-
"NucliaTextTransformer": "langchain_community.document_transformers"
|
|
12
|
+
"NucliaTextTransformer": "langchain_community.document_transformers",
|
|
13
13
|
}
|
|
14
14
|
|
|
15
15
|
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
|
langchain/embeddings/__init__.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""**Embedding models** are wrappers around embedding models
|
|
2
2
|
from different APIs and services.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Embedding models can be LLMs or not.
|
|
5
5
|
|
|
6
6
|
**Class hierarchy:**
|
|
7
7
|
|
|
@@ -83,17 +83,17 @@ class HypotheticalDocumentEmbedder:
|
|
|
83
83
|
def __init__(self, *args: Any, **kwargs: Any):
|
|
84
84
|
logger.warning(
|
|
85
85
|
"Using a deprecated class. Please use "
|
|
86
|
-
"`from langchain.chains import HypotheticalDocumentEmbedder` instead"
|
|
86
|
+
"`from langchain.chains import HypotheticalDocumentEmbedder` instead",
|
|
87
87
|
)
|
|
88
88
|
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder as H
|
|
89
89
|
|
|
90
|
-
return H(*args, **kwargs) # type: ignore[return-value]
|
|
90
|
+
return H(*args, **kwargs) # type: ignore[return-value] # noqa: PLE0101
|
|
91
91
|
|
|
92
92
|
@classmethod
|
|
93
93
|
def from_llm(cls, *args: Any, **kwargs: Any) -> Any:
|
|
94
94
|
logger.warning(
|
|
95
95
|
"Using a deprecated class. Please use "
|
|
96
|
-
"`from langchain.chains import HypotheticalDocumentEmbedder` instead"
|
|
96
|
+
"`from langchain.chains import HypotheticalDocumentEmbedder` instead",
|
|
97
97
|
)
|
|
98
98
|
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder as H
|
|
99
99
|
|
|
@@ -187,8 +187,8 @@ __all__ = [
|
|
|
187
187
|
"ErnieEmbeddings",
|
|
188
188
|
"FakeEmbeddings",
|
|
189
189
|
"FastEmbedEmbeddings",
|
|
190
|
-
"GooglePalmEmbeddings",
|
|
191
190
|
"GPT4AllEmbeddings",
|
|
191
|
+
"GooglePalmEmbeddings",
|
|
192
192
|
"GradientEmbeddings",
|
|
193
193
|
"HuggingFaceBgeEmbeddings",
|
|
194
194
|
"HuggingFaceEmbeddings",
|
langchain/embeddings/base.py
CHANGED
|
@@ -47,10 +47,11 @@ def _parse_model_string(model_name: str) -> tuple[str, str]:
|
|
|
47
47
|
Raises:
|
|
48
48
|
ValueError: If the model string is not in the correct format or
|
|
49
49
|
the provider is unsupported
|
|
50
|
+
|
|
50
51
|
"""
|
|
51
52
|
if ":" not in model_name:
|
|
52
53
|
providers = _SUPPORTED_PROVIDERS
|
|
53
|
-
|
|
54
|
+
msg = (
|
|
54
55
|
f"Invalid model format '{model_name}'.\n"
|
|
55
56
|
f"Model name must be in format 'provider:model-name'\n"
|
|
56
57
|
f"Example valid model strings:\n"
|
|
@@ -59,49 +60,56 @@ def _parse_model_string(model_name: str) -> tuple[str, str]:
|
|
|
59
60
|
f" - cohere:embed-english-v3.0\n"
|
|
60
61
|
f"Supported providers: {providers}"
|
|
61
62
|
)
|
|
63
|
+
raise ValueError(msg)
|
|
62
64
|
|
|
63
65
|
provider, model = model_name.split(":", 1)
|
|
64
66
|
provider = provider.lower().strip()
|
|
65
67
|
model = model.strip()
|
|
66
68
|
|
|
67
69
|
if provider not in _SUPPORTED_PROVIDERS:
|
|
68
|
-
|
|
70
|
+
msg = (
|
|
69
71
|
f"Provider '{provider}' is not supported.\n"
|
|
70
72
|
f"Supported providers and their required packages:\n"
|
|
71
73
|
f"{_get_provider_list()}"
|
|
72
74
|
)
|
|
75
|
+
raise ValueError(msg)
|
|
73
76
|
if not model:
|
|
74
|
-
|
|
77
|
+
msg = "Model name cannot be empty"
|
|
78
|
+
raise ValueError(msg)
|
|
75
79
|
return provider, model
|
|
76
80
|
|
|
77
81
|
|
|
78
82
|
def _infer_model_and_provider(
|
|
79
|
-
model: str,
|
|
83
|
+
model: str,
|
|
84
|
+
*,
|
|
85
|
+
provider: Optional[str] = None,
|
|
80
86
|
) -> tuple[str, str]:
|
|
81
87
|
if not model.strip():
|
|
82
|
-
|
|
88
|
+
msg = "Model name cannot be empty"
|
|
89
|
+
raise ValueError(msg)
|
|
83
90
|
if provider is None and ":" in model:
|
|
84
91
|
provider, model_name = _parse_model_string(model)
|
|
85
92
|
else:
|
|
86
|
-
provider = provider
|
|
87
93
|
model_name = model
|
|
88
94
|
|
|
89
95
|
if not provider:
|
|
90
96
|
providers = _SUPPORTED_PROVIDERS
|
|
91
|
-
|
|
97
|
+
msg = (
|
|
92
98
|
"Must specify either:\n"
|
|
93
99
|
"1. A model string in format 'provider:model-name'\n"
|
|
94
100
|
" Example: 'openai:text-embedding-3-small'\n"
|
|
95
101
|
"2. Or explicitly set provider from: "
|
|
96
102
|
f"{providers}"
|
|
97
103
|
)
|
|
104
|
+
raise ValueError(msg)
|
|
98
105
|
|
|
99
106
|
if provider not in _SUPPORTED_PROVIDERS:
|
|
100
|
-
|
|
107
|
+
msg = (
|
|
101
108
|
f"Provider '{provider}' is not supported.\n"
|
|
102
109
|
f"Supported providers and their required packages:\n"
|
|
103
110
|
f"{_get_provider_list()}"
|
|
104
111
|
)
|
|
112
|
+
raise ValueError(msg)
|
|
105
113
|
return provider, model_name
|
|
106
114
|
|
|
107
115
|
|
|
@@ -109,10 +117,11 @@ def _infer_model_and_provider(
|
|
|
109
117
|
def _check_pkg(pkg: str) -> None:
|
|
110
118
|
"""Check if a package is installed."""
|
|
111
119
|
if not util.find_spec(pkg):
|
|
112
|
-
|
|
120
|
+
msg = (
|
|
113
121
|
f"Could not import {pkg} python package. "
|
|
114
122
|
f"Please install it with `pip install {pkg}`"
|
|
115
123
|
)
|
|
124
|
+
raise ImportError(msg)
|
|
116
125
|
|
|
117
126
|
|
|
118
127
|
def init_embeddings(
|
|
@@ -169,12 +178,14 @@ def init_embeddings(
|
|
|
169
178
|
)
|
|
170
179
|
|
|
171
180
|
.. versionadded:: 0.3.9
|
|
181
|
+
|
|
172
182
|
"""
|
|
173
183
|
if not model:
|
|
174
184
|
providers = _SUPPORTED_PROVIDERS.keys()
|
|
175
|
-
|
|
185
|
+
msg = (
|
|
176
186
|
f"Must specify model name. Supported providers are: {', '.join(providers)}"
|
|
177
187
|
)
|
|
188
|
+
raise ValueError(msg)
|
|
178
189
|
|
|
179
190
|
provider, model_name = _infer_model_and_provider(model, provider=provider)
|
|
180
191
|
pkg = _SUPPORTED_PROVIDERS[provider]
|
|
@@ -184,43 +195,43 @@ def init_embeddings(
|
|
|
184
195
|
from langchain_openai import OpenAIEmbeddings
|
|
185
196
|
|
|
186
197
|
return OpenAIEmbeddings(model=model_name, **kwargs)
|
|
187
|
-
|
|
198
|
+
if provider == "azure_openai":
|
|
188
199
|
from langchain_openai import AzureOpenAIEmbeddings
|
|
189
200
|
|
|
190
201
|
return AzureOpenAIEmbeddings(model=model_name, **kwargs)
|
|
191
|
-
|
|
202
|
+
if provider == "google_vertexai":
|
|
192
203
|
from langchain_google_vertexai import VertexAIEmbeddings
|
|
193
204
|
|
|
194
205
|
return VertexAIEmbeddings(model=model_name, **kwargs)
|
|
195
|
-
|
|
206
|
+
if provider == "bedrock":
|
|
196
207
|
from langchain_aws import BedrockEmbeddings
|
|
197
208
|
|
|
198
209
|
return BedrockEmbeddings(model_id=model_name, **kwargs)
|
|
199
|
-
|
|
210
|
+
if provider == "cohere":
|
|
200
211
|
from langchain_cohere import CohereEmbeddings
|
|
201
212
|
|
|
202
213
|
return CohereEmbeddings(model=model_name, **kwargs)
|
|
203
|
-
|
|
214
|
+
if provider == "mistralai":
|
|
204
215
|
from langchain_mistralai import MistralAIEmbeddings
|
|
205
216
|
|
|
206
217
|
return MistralAIEmbeddings(model=model_name, **kwargs)
|
|
207
|
-
|
|
218
|
+
if provider == "huggingface":
|
|
208
219
|
from langchain_huggingface import HuggingFaceEmbeddings
|
|
209
220
|
|
|
210
221
|
return HuggingFaceEmbeddings(model_name=model_name, **kwargs)
|
|
211
|
-
|
|
222
|
+
if provider == "ollama":
|
|
212
223
|
from langchain_ollama import OllamaEmbeddings
|
|
213
224
|
|
|
214
225
|
return OllamaEmbeddings(model=model_name, **kwargs)
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
226
|
+
msg = (
|
|
227
|
+
f"Provider '{provider}' is not supported.\n"
|
|
228
|
+
f"Supported providers and their required packages:\n"
|
|
229
|
+
f"{_get_provider_list()}"
|
|
230
|
+
)
|
|
231
|
+
raise ValueError(msg)
|
|
221
232
|
|
|
222
233
|
|
|
223
234
|
__all__ = [
|
|
224
|
-
"init_embeddings",
|
|
225
235
|
"Embeddings", # This one is for backwards compatibility
|
|
236
|
+
"init_embeddings",
|
|
226
237
|
]
|
langchain/embeddings/cache.py
CHANGED
|
@@ -26,16 +26,16 @@ NAMESPACE_UUID = uuid.UUID(int=1985)
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def _sha1_hash_to_uuid(text: str) -> uuid.UUID:
|
|
29
|
-
"""Return a UUID derived from *text* using SHA
|
|
29
|
+
"""Return a UUID derived from *text* using SHA-1 (deterministic).
|
|
30
30
|
|
|
31
|
-
Deterministic and fast, **but not collision
|
|
31
|
+
Deterministic and fast, **but not collision-resistant**.
|
|
32
32
|
|
|
33
33
|
A malicious attacker could try to create two different texts that hash to the same
|
|
34
34
|
UUID. This may not necessarily be an issue in the context of caching embeddings,
|
|
35
35
|
but new applications should swap this out for a stronger hash function like
|
|
36
|
-
xxHash, BLAKE2 or SHA
|
|
36
|
+
xxHash, BLAKE2 or SHA-256, which are collision-resistant.
|
|
37
37
|
"""
|
|
38
|
-
sha1_hex = hashlib.sha1(text.encode("utf-8")).hexdigest()
|
|
38
|
+
sha1_hex = hashlib.sha1(text.encode("utf-8"), usedforsecurity=False).hexdigest()
|
|
39
39
|
# Embed the hex string in `uuid5` to obtain a valid UUID.
|
|
40
40
|
return uuid.uuid5(NAMESPACE_UUID, sha1_hex)
|
|
41
41
|
|
|
@@ -44,12 +44,12 @@ def _make_default_key_encoder(namespace: str, algorithm: str) -> Callable[[str],
|
|
|
44
44
|
"""Create a default key encoder function.
|
|
45
45
|
|
|
46
46
|
Args:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
*
|
|
50
|
-
*
|
|
51
|
-
*
|
|
52
|
-
*
|
|
47
|
+
namespace: Prefix that segregates keys from different embedding models.
|
|
48
|
+
algorithm:
|
|
49
|
+
* ``'sha1'`` - fast but not collision-resistant
|
|
50
|
+
* ``'blake2b'`` - cryptographically strong, faster than SHA-1
|
|
51
|
+
* ``'sha256'`` - cryptographically strong, slower than SHA-1
|
|
52
|
+
* ``'sha512'`` - cryptographically strong, slower than SHA-1
|
|
53
53
|
|
|
54
54
|
Returns:
|
|
55
55
|
A function that encodes a key using the specified algorithm.
|
|
@@ -67,7 +67,8 @@ def _make_default_key_encoder(namespace: str, algorithm: str) -> Callable[[str],
|
|
|
67
67
|
return f"{namespace}{hashlib.sha256(key.encode('utf-8')).hexdigest()}"
|
|
68
68
|
if algorithm == "sha512":
|
|
69
69
|
return f"{namespace}{hashlib.sha512(key.encode('utf-8')).hexdigest()}"
|
|
70
|
-
|
|
70
|
+
msg = f"Unsupported algorithm: {algorithm}"
|
|
71
|
+
raise ValueError(msg)
|
|
71
72
|
|
|
72
73
|
return _key_encoder
|
|
73
74
|
|
|
@@ -79,7 +80,7 @@ def _value_serializer(value: Sequence[float]) -> bytes:
|
|
|
79
80
|
|
|
80
81
|
def _value_deserializer(serialized_value: bytes) -> list[float]:
|
|
81
82
|
"""Deserialize a value."""
|
|
82
|
-
return cast(list[float], json.loads(serialized_value.decode()))
|
|
83
|
+
return cast("list[float]", json.loads(serialized_value.decode()))
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
# The warning is global; track emission, so it appears only once.
|
|
@@ -87,15 +88,15 @@ _warned_about_sha1: bool = False
|
|
|
87
88
|
|
|
88
89
|
|
|
89
90
|
def _warn_about_sha1_encoder() -> None:
|
|
90
|
-
"""Emit a one
|
|
91
|
-
global _warned_about_sha1
|
|
91
|
+
"""Emit a one-time warning about SHA-1 collision weaknesses."""
|
|
92
|
+
global _warned_about_sha1 # noqa: PLW0603
|
|
92
93
|
if not _warned_about_sha1:
|
|
93
94
|
warnings.warn(
|
|
94
|
-
"Using default key encoder: SHA
|
|
95
|
+
"Using default key encoder: SHA-1 is *not* collision-resistant. "
|
|
95
96
|
"While acceptable for most cache scenarios, a motivated attacker "
|
|
96
97
|
"can craft two different payloads that map to the same cache key. "
|
|
97
98
|
"If that risk matters in your environment, supply a stronger "
|
|
98
|
-
"encoder (e.g. SHA
|
|
99
|
+
"encoder (e.g. SHA-256 or BLAKE2) via the `key_encoder` argument. "
|
|
99
100
|
"If you change the key encoder, consider also creating a new cache, "
|
|
100
101
|
"to avoid (the potential for) collisions with existing keys.",
|
|
101
102
|
category=UserWarning,
|
|
@@ -118,7 +119,6 @@ class CacheBackedEmbeddings(Embeddings):
|
|
|
118
119
|
embeddings too, pass in a query_embedding_store to constructor.
|
|
119
120
|
|
|
120
121
|
Examples:
|
|
121
|
-
|
|
122
122
|
.. code-block: python
|
|
123
123
|
|
|
124
124
|
from langchain.embeddings import CacheBackedEmbeddings
|
|
@@ -154,7 +154,7 @@ class CacheBackedEmbeddings(Embeddings):
|
|
|
154
154
|
document_embedding_store: The store to use for caching document embeddings.
|
|
155
155
|
batch_size: The number of documents to embed between store updates.
|
|
156
156
|
query_embedding_store: The store to use for caching query embeddings.
|
|
157
|
-
If None
|
|
157
|
+
If ``None``, query embeddings are not cached.
|
|
158
158
|
"""
|
|
159
159
|
super().__init__()
|
|
160
160
|
self.document_embedding_store = document_embedding_store
|
|
@@ -176,7 +176,7 @@ class CacheBackedEmbeddings(Embeddings):
|
|
|
176
176
|
A list of embeddings for the given texts.
|
|
177
177
|
"""
|
|
178
178
|
vectors: list[Union[list[float], None]] = self.document_embedding_store.mget(
|
|
179
|
-
texts
|
|
179
|
+
texts,
|
|
180
180
|
)
|
|
181
181
|
all_missing_indices: list[int] = [
|
|
182
182
|
i for i, vector in enumerate(vectors) if vector is None
|
|
@@ -186,13 +186,14 @@ class CacheBackedEmbeddings(Embeddings):
|
|
|
186
186
|
missing_texts = [texts[i] for i in missing_indices]
|
|
187
187
|
missing_vectors = self.underlying_embeddings.embed_documents(missing_texts)
|
|
188
188
|
self.document_embedding_store.mset(
|
|
189
|
-
list(zip(missing_texts, missing_vectors))
|
|
189
|
+
list(zip(missing_texts, missing_vectors)),
|
|
190
190
|
)
|
|
191
191
|
for index, updated_vector in zip(missing_indices, missing_vectors):
|
|
192
192
|
vectors[index] = updated_vector
|
|
193
193
|
|
|
194
194
|
return cast(
|
|
195
|
-
list[list[float]],
|
|
195
|
+
"list[list[float]]",
|
|
196
|
+
vectors,
|
|
196
197
|
) # Nones should have been resolved by now
|
|
197
198
|
|
|
198
199
|
async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
|
|
@@ -220,23 +221,24 @@ class CacheBackedEmbeddings(Embeddings):
|
|
|
220
221
|
for missing_indices in batch_iterate(self.batch_size, all_missing_indices):
|
|
221
222
|
missing_texts = [texts[i] for i in missing_indices]
|
|
222
223
|
missing_vectors = await self.underlying_embeddings.aembed_documents(
|
|
223
|
-
missing_texts
|
|
224
|
+
missing_texts,
|
|
224
225
|
)
|
|
225
226
|
await self.document_embedding_store.amset(
|
|
226
|
-
list(zip(missing_texts, missing_vectors))
|
|
227
|
+
list(zip(missing_texts, missing_vectors)),
|
|
227
228
|
)
|
|
228
229
|
for index, updated_vector in zip(missing_indices, missing_vectors):
|
|
229
230
|
vectors[index] = updated_vector
|
|
230
231
|
|
|
231
232
|
return cast(
|
|
232
|
-
list[list[float]],
|
|
233
|
+
"list[list[float]]",
|
|
234
|
+
vectors,
|
|
233
235
|
) # Nones should have been resolved by now
|
|
234
236
|
|
|
235
237
|
def embed_query(self, text: str) -> list[float]:
|
|
236
238
|
"""Embed query text.
|
|
237
239
|
|
|
238
240
|
By default, this method does not cache queries. To enable caching, set the
|
|
239
|
-
|
|
241
|
+
``cache_query`` parameter to ``True`` when initializing the embedder.
|
|
240
242
|
|
|
241
243
|
Args:
|
|
242
244
|
text: The text to embed.
|
|
@@ -259,7 +261,7 @@ class CacheBackedEmbeddings(Embeddings):
|
|
|
259
261
|
"""Embed query text.
|
|
260
262
|
|
|
261
263
|
By default, this method does not cache queries. To enable caching, set the
|
|
262
|
-
|
|
264
|
+
``cache_query`` parameter to ``True`` when initializing the embedder.
|
|
263
265
|
|
|
264
266
|
Args:
|
|
265
267
|
text: The text to embed.
|
|
@@ -288,7 +290,8 @@ class CacheBackedEmbeddings(Embeddings):
|
|
|
288
290
|
batch_size: Optional[int] = None,
|
|
289
291
|
query_embedding_cache: Union[bool, ByteStore] = False,
|
|
290
292
|
key_encoder: Union[
|
|
291
|
-
Callable[[str], str],
|
|
293
|
+
Callable[[str], str],
|
|
294
|
+
Literal["sha1", "blake2b", "sha256", "sha512"],
|
|
292
295
|
] = "sha1",
|
|
293
296
|
) -> CacheBackedEmbeddings:
|
|
294
297
|
"""On-ramp that adds the necessary serialization and encoding to the store.
|
|
@@ -298,14 +301,14 @@ class CacheBackedEmbeddings(Embeddings):
|
|
|
298
301
|
document_embedding_cache: The cache to use for storing document embeddings.
|
|
299
302
|
*,
|
|
300
303
|
namespace: The namespace to use for document cache.
|
|
301
|
-
|
|
302
|
-
|
|
304
|
+
This namespace is used to avoid collisions with other caches.
|
|
305
|
+
For example, set it to the name of the embedding model used.
|
|
303
306
|
batch_size: The number of documents to embed between store updates.
|
|
304
307
|
query_embedding_cache: The cache to use for storing query embeddings.
|
|
305
308
|
True to use the same cache as document embeddings.
|
|
306
309
|
False to not cache query embeddings.
|
|
307
310
|
key_encoder: Optional callable to encode keys. If not provided,
|
|
308
|
-
a default encoder using SHA
|
|
311
|
+
a default encoder using SHA-1 will be used. SHA-1 is not
|
|
309
312
|
collision-resistant, and a motivated attacker could craft two
|
|
310
313
|
different texts that hash to the same cache key.
|
|
311
314
|
|
|
@@ -327,15 +330,17 @@ class CacheBackedEmbeddings(Embeddings):
|
|
|
327
330
|
# namespace.
|
|
328
331
|
# A user can handle namespacing in directly their custom key encoder.
|
|
329
332
|
if namespace:
|
|
330
|
-
|
|
333
|
+
msg = (
|
|
331
334
|
"Do not supply `namespace` when using a custom key_encoder; "
|
|
332
335
|
"add any prefixing inside the encoder itself."
|
|
333
336
|
)
|
|
337
|
+
raise ValueError(msg)
|
|
334
338
|
else:
|
|
335
|
-
|
|
339
|
+
msg = (
|
|
336
340
|
"key_encoder must be either 'blake2b', 'sha1', 'sha256', 'sha512' "
|
|
337
341
|
"or a callable that encodes keys."
|
|
338
342
|
)
|
|
343
|
+
raise ValueError(msg) # noqa: TRY004
|
|
339
344
|
|
|
340
345
|
document_embedding_store = EncoderBackedStore[str, list[float]](
|
|
341
346
|
document_embedding_cache,
|
langchain/embeddings/fake.py
CHANGED
|
@@ -29,8 +29,8 @@ def __getattr__(name: str) -> Any:
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
__all__ = [
|
|
32
|
-
"HuggingFaceEmbeddings",
|
|
33
|
-
"HuggingFaceInstructEmbeddings",
|
|
34
32
|
"HuggingFaceBgeEmbeddings",
|
|
33
|
+
"HuggingFaceEmbeddings",
|
|
35
34
|
"HuggingFaceInferenceAPIEmbeddings",
|
|
35
|
+
"HuggingFaceInstructEmbeddings",
|
|
36
36
|
]
|
langchain/evaluation/__init__.py
CHANGED
|
@@ -95,34 +95,34 @@ from langchain.evaluation.string_distance import (
|
|
|
95
95
|
)
|
|
96
96
|
|
|
97
97
|
__all__ = [
|
|
98
|
-
"
|
|
99
|
-
"ExactMatchStringEvaluator",
|
|
100
|
-
"RegexMatchStringEvaluator",
|
|
101
|
-
"PairwiseStringEvalChain",
|
|
102
|
-
"LabeledPairwiseStringEvalChain",
|
|
103
|
-
"QAEvalChain",
|
|
104
|
-
"CotQAEvalChain",
|
|
98
|
+
"AgentTrajectoryEvaluator",
|
|
105
99
|
"ContextQAEvalChain",
|
|
106
|
-
"
|
|
107
|
-
"PairwiseStringEvaluator",
|
|
108
|
-
"TrajectoryEvalChain",
|
|
109
|
-
"CriteriaEvalChain",
|
|
100
|
+
"CotQAEvalChain",
|
|
110
101
|
"Criteria",
|
|
102
|
+
"CriteriaEvalChain",
|
|
111
103
|
"EmbeddingDistance",
|
|
112
104
|
"EmbeddingDistanceEvalChain",
|
|
105
|
+
"EvaluatorType",
|
|
106
|
+
"ExactMatchStringEvaluator",
|
|
107
|
+
"JsonEditDistanceEvaluator",
|
|
108
|
+
"JsonEqualityEvaluator",
|
|
109
|
+
"JsonSchemaEvaluator",
|
|
110
|
+
"JsonValidityEvaluator",
|
|
111
|
+
"LabeledCriteriaEvalChain",
|
|
112
|
+
"LabeledPairwiseStringEvalChain",
|
|
113
|
+
"LabeledScoreStringEvalChain",
|
|
113
114
|
"PairwiseEmbeddingDistanceEvalChain",
|
|
115
|
+
"PairwiseStringDistanceEvalChain",
|
|
116
|
+
"PairwiseStringEvalChain",
|
|
117
|
+
"PairwiseStringEvaluator",
|
|
118
|
+
"QAEvalChain",
|
|
119
|
+
"RegexMatchStringEvaluator",
|
|
120
|
+
"ScoreStringEvalChain",
|
|
114
121
|
"StringDistance",
|
|
115
122
|
"StringDistanceEvalChain",
|
|
116
|
-
"
|
|
117
|
-
"
|
|
118
|
-
"load_evaluators",
|
|
119
|
-
"load_evaluator",
|
|
123
|
+
"StringEvaluator",
|
|
124
|
+
"TrajectoryEvalChain",
|
|
120
125
|
"load_dataset",
|
|
121
|
-
"
|
|
122
|
-
"
|
|
123
|
-
"LabeledScoreStringEvalChain",
|
|
124
|
-
"JsonValidityEvaluator",
|
|
125
|
-
"JsonEqualityEvaluator",
|
|
126
|
-
"JsonEditDistanceEvaluator",
|
|
127
|
-
"JsonSchemaEvaluator",
|
|
126
|
+
"load_evaluator",
|
|
127
|
+
"load_evaluators",
|
|
128
128
|
]
|
|
@@ -27,6 +27,7 @@ from langchain_core.language_models.chat_models import BaseChatModel
|
|
|
27
27
|
from langchain_core.output_parsers import BaseOutputParser
|
|
28
28
|
from langchain_core.tools import BaseTool
|
|
29
29
|
from pydantic import ConfigDict, Field
|
|
30
|
+
from typing_extensions import override
|
|
30
31
|
|
|
31
32
|
from langchain.chains.llm import LLMChain
|
|
32
33
|
from langchain.evaluation.agents.trajectory_eval_prompt import (
|
|
@@ -35,6 +36,8 @@ from langchain.evaluation.agents.trajectory_eval_prompt import (
|
|
|
35
36
|
)
|
|
36
37
|
from langchain.evaluation.schema import AgentTrajectoryEvaluator, LLMEvalChain
|
|
37
38
|
|
|
39
|
+
_MAX_SCORE = 5
|
|
40
|
+
|
|
38
41
|
|
|
39
42
|
class TrajectoryEval(TypedDict):
|
|
40
43
|
"""A named tuple containing the score and reasoning for a trajectory."""
|
|
@@ -66,9 +69,8 @@ class TrajectoryOutputParser(BaseOutputParser):
|
|
|
66
69
|
if the LLM's score is not a digit in the range 1-5.
|
|
67
70
|
"""
|
|
68
71
|
if "Score:" not in text:
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
)
|
|
72
|
+
msg = f"Could not find score in model eval output: {text}"
|
|
73
|
+
raise OutputParserException(msg)
|
|
72
74
|
|
|
73
75
|
reasoning, score_str = text.split("Score: ", maxsplit=1)
|
|
74
76
|
|
|
@@ -82,16 +84,14 @@ class TrajectoryOutputParser(BaseOutputParser):
|
|
|
82
84
|
_score = re.search(r"(\d+(\.\d+)?)", score_str)
|
|
83
85
|
# If the score is not found or is a float, raise an exception.
|
|
84
86
|
if _score is None or "." in _score.group(1):
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
)
|
|
87
|
+
msg = f"Score is not an integer digit in the range 1-5: {text}"
|
|
88
|
+
raise OutputParserException(msg)
|
|
88
89
|
score = int(_score.group(1))
|
|
89
90
|
# If the score is not in the range 1-5, raise an exception.
|
|
90
|
-
if not 1 <= score <=
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
normalized_score = (score - 1) / 4
|
|
91
|
+
if not 1 <= score <= _MAX_SCORE:
|
|
92
|
+
msg = f"Score is not a digit in the range 1-5: {text}"
|
|
93
|
+
raise OutputParserException(msg)
|
|
94
|
+
normalized_score = (score - 1) / (_MAX_SCORE - 1)
|
|
95
95
|
return TrajectoryEval(score=normalized_score, reasoning=reasoning)
|
|
96
96
|
|
|
97
97
|
|
|
@@ -140,6 +140,7 @@ class TrajectoryEvalChain(AgentTrajectoryEvaluator, LLMEvalChain):
|
|
|
140
140
|
)
|
|
141
141
|
print(result["score"]) # noqa: T201
|
|
142
142
|
# 0
|
|
143
|
+
|
|
143
144
|
"""
|
|
144
145
|
|
|
145
146
|
agent_tools: Optional[list[BaseTool]] = None
|
|
@@ -147,7 +148,7 @@ class TrajectoryEvalChain(AgentTrajectoryEvaluator, LLMEvalChain):
|
|
|
147
148
|
eval_chain: LLMChain
|
|
148
149
|
"""The language model chain used for evaluation."""
|
|
149
150
|
output_parser: TrajectoryOutputParser = Field(
|
|
150
|
-
default_factory=TrajectoryOutputParser
|
|
151
|
+
default_factory=TrajectoryOutputParser,
|
|
151
152
|
)
|
|
152
153
|
"""The output parser used to parse the output."""
|
|
153
154
|
return_reasoning: bool = False # :meta private:
|
|
@@ -176,7 +177,7 @@ class TrajectoryEvalChain(AgentTrajectoryEvaluator, LLMEvalChain):
|
|
|
176
177
|
f"""Tool {i}: {tool.name}
|
|
177
178
|
Description: {tool.description}"""
|
|
178
179
|
for i, tool in enumerate(self.agent_tools, 1)
|
|
179
|
-
]
|
|
180
|
+
],
|
|
180
181
|
)
|
|
181
182
|
|
|
182
183
|
@staticmethod
|
|
@@ -201,7 +202,7 @@ Tool used: {action.tool}
|
|
|
201
202
|
Tool input: {action.tool_input}
|
|
202
203
|
Tool output: {output}"""
|
|
203
204
|
for i, (action, output) in enumerate(steps, 1)
|
|
204
|
-
]
|
|
205
|
+
],
|
|
205
206
|
)
|
|
206
207
|
|
|
207
208
|
@staticmethod
|
|
@@ -244,13 +245,9 @@ The following is the expected answer. Use this to measure correctness:
|
|
|
244
245
|
TrajectoryEvalChain: The TrajectoryEvalChain object.
|
|
245
246
|
"""
|
|
246
247
|
if not isinstance(llm, BaseChatModel):
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
if agent_tools:
|
|
251
|
-
prompt = EVAL_CHAT_PROMPT
|
|
252
|
-
else:
|
|
253
|
-
prompt = TOOL_FREE_EVAL_CHAT_PROMPT
|
|
248
|
+
msg = "Only chat models supported by the current trajectory eval"
|
|
249
|
+
raise NotImplementedError(msg)
|
|
250
|
+
prompt = EVAL_CHAT_PROMPT if agent_tools else TOOL_FREE_EVAL_CHAT_PROMPT
|
|
254
251
|
eval_chain = LLMChain(llm=llm, prompt=prompt)
|
|
255
252
|
return cls(
|
|
256
253
|
agent_tools=agent_tools, # type: ignore[arg-type]
|
|
@@ -302,9 +299,10 @@ The following is the expected answer. Use this to measure correctness:
|
|
|
302
299
|
chain_input["tool_descriptions"] = self._tools_description
|
|
303
300
|
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
|
304
301
|
raw_output = self.eval_chain.run(
|
|
305
|
-
chain_input,
|
|
302
|
+
chain_input,
|
|
303
|
+
callbacks=_run_manager.get_child(),
|
|
306
304
|
)
|
|
307
|
-
return cast(dict, self.output_parser.parse(raw_output))
|
|
305
|
+
return cast("dict", self.output_parser.parse(raw_output))
|
|
308
306
|
|
|
309
307
|
async def _acall(
|
|
310
308
|
self,
|
|
@@ -326,10 +324,12 @@ The following is the expected answer. Use this to measure correctness:
|
|
|
326
324
|
chain_input["tool_descriptions"] = self._tools_description
|
|
327
325
|
_run_manager = run_manager or AsyncCallbackManagerForChainRun.get_noop_manager()
|
|
328
326
|
raw_output = await self.eval_chain.arun(
|
|
329
|
-
chain_input,
|
|
327
|
+
chain_input,
|
|
328
|
+
callbacks=_run_manager.get_child(),
|
|
330
329
|
)
|
|
331
|
-
return cast(dict, self.output_parser.parse(raw_output))
|
|
330
|
+
return cast("dict", self.output_parser.parse(raw_output))
|
|
332
331
|
|
|
332
|
+
@override
|
|
333
333
|
def _evaluate_agent_trajectory(
|
|
334
334
|
self,
|
|
335
335
|
*,
|
|
@@ -372,6 +372,7 @@ The following is the expected answer. Use this to measure correctness:
|
|
|
372
372
|
return_only_outputs=True,
|
|
373
373
|
)
|
|
374
374
|
|
|
375
|
+
@override
|
|
375
376
|
async def _aevaluate_agent_trajectory(
|
|
376
377
|
self,
|
|
377
378
|
*,
|