langchain 0.3.26__py3-none-any.whl → 0.3.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain might be problematic. Click here for more details.
- langchain/__init__.py +110 -96
- langchain/_api/__init__.py +2 -2
- langchain/_api/deprecation.py +3 -3
- langchain/_api/module_import.py +51 -46
- langchain/_api/path.py +1 -1
- langchain/adapters/openai.py +8 -8
- langchain/agents/__init__.py +15 -12
- langchain/agents/agent.py +160 -133
- langchain/agents/agent_iterator.py +31 -14
- langchain/agents/agent_toolkits/__init__.py +7 -6
- langchain/agents/agent_toolkits/ainetwork/toolkit.py +1 -1
- langchain/agents/agent_toolkits/amadeus/toolkit.py +1 -1
- langchain/agents/agent_toolkits/azure_cognitive_services.py +1 -1
- langchain/agents/agent_toolkits/clickup/toolkit.py +1 -1
- langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +6 -4
- langchain/agents/agent_toolkits/csv/__init__.py +4 -2
- langchain/agents/agent_toolkits/file_management/__init__.py +1 -1
- langchain/agents/agent_toolkits/file_management/toolkit.py +1 -1
- langchain/agents/agent_toolkits/github/toolkit.py +9 -9
- langchain/agents/agent_toolkits/gitlab/toolkit.py +1 -1
- langchain/agents/agent_toolkits/json/base.py +1 -1
- langchain/agents/agent_toolkits/multion/toolkit.py +1 -1
- langchain/agents/agent_toolkits/office365/toolkit.py +1 -1
- langchain/agents/agent_toolkits/openapi/base.py +1 -1
- langchain/agents/agent_toolkits/openapi/planner.py +2 -2
- langchain/agents/agent_toolkits/openapi/planner_prompt.py +10 -10
- langchain/agents/agent_toolkits/openapi/prompt.py +1 -1
- langchain/agents/agent_toolkits/openapi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/pandas/__init__.py +4 -2
- langchain/agents/agent_toolkits/playwright/__init__.py +1 -1
- langchain/agents/agent_toolkits/playwright/toolkit.py +1 -1
- langchain/agents/agent_toolkits/powerbi/base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/chat_base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/prompt.py +2 -2
- langchain/agents/agent_toolkits/powerbi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/python/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark_sql/base.py +1 -1
- langchain/agents/agent_toolkits/spark_sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/sql/prompt.py +1 -1
- langchain/agents/agent_toolkits/sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/vectorstore/base.py +2 -2
- langchain/agents/agent_toolkits/vectorstore/prompt.py +2 -4
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +12 -11
- langchain/agents/agent_toolkits/xorbits/__init__.py +4 -2
- langchain/agents/agent_toolkits/zapier/toolkit.py +1 -1
- langchain/agents/agent_types.py +6 -6
- langchain/agents/chat/base.py +6 -12
- langchain/agents/chat/output_parser.py +9 -6
- langchain/agents/chat/prompt.py +3 -4
- langchain/agents/conversational/base.py +9 -5
- langchain/agents/conversational/output_parser.py +4 -2
- langchain/agents/conversational/prompt.py +2 -3
- langchain/agents/conversational_chat/base.py +7 -5
- langchain/agents/conversational_chat/output_parser.py +9 -11
- langchain/agents/conversational_chat/prompt.py +5 -6
- langchain/agents/format_scratchpad/__init__.py +3 -3
- langchain/agents/format_scratchpad/log_to_messages.py +1 -1
- langchain/agents/format_scratchpad/openai_functions.py +8 -6
- langchain/agents/format_scratchpad/tools.py +5 -3
- langchain/agents/format_scratchpad/xml.py +33 -2
- langchain/agents/initialize.py +16 -8
- langchain/agents/json_chat/base.py +18 -18
- langchain/agents/json_chat/prompt.py +2 -3
- langchain/agents/load_tools.py +2 -1
- langchain/agents/loading.py +28 -18
- langchain/agents/mrkl/base.py +9 -4
- langchain/agents/mrkl/output_parser.py +17 -13
- langchain/agents/mrkl/prompt.py +1 -2
- langchain/agents/openai_assistant/base.py +80 -70
- langchain/agents/openai_functions_agent/base.py +46 -37
- langchain/agents/openai_functions_multi_agent/base.py +39 -26
- langchain/agents/openai_tools/base.py +8 -8
- langchain/agents/output_parsers/__init__.py +3 -3
- langchain/agents/output_parsers/json.py +6 -6
- langchain/agents/output_parsers/openai_functions.py +15 -7
- langchain/agents/output_parsers/openai_tools.py +9 -4
- langchain/agents/output_parsers/react_json_single_input.py +10 -5
- langchain/agents/output_parsers/react_single_input.py +15 -11
- langchain/agents/output_parsers/self_ask.py +3 -2
- langchain/agents/output_parsers/tools.py +18 -13
- langchain/agents/output_parsers/xml.py +99 -28
- langchain/agents/react/agent.py +4 -4
- langchain/agents/react/base.py +22 -17
- langchain/agents/react/output_parser.py +5 -6
- langchain/agents/react/textworld_prompt.py +0 -1
- langchain/agents/react/wiki_prompt.py +14 -15
- langchain/agents/schema.py +3 -2
- langchain/agents/self_ask_with_search/base.py +19 -15
- langchain/agents/self_ask_with_search/prompt.py +0 -1
- langchain/agents/structured_chat/base.py +14 -11
- langchain/agents/structured_chat/output_parser.py +16 -18
- langchain/agents/structured_chat/prompt.py +3 -4
- langchain/agents/tool_calling_agent/base.py +7 -6
- langchain/agents/tools.py +2 -2
- langchain/agents/utils.py +2 -3
- langchain/agents/xml/base.py +5 -5
- langchain/agents/xml/prompt.py +1 -2
- langchain/cache.py +12 -12
- langchain/callbacks/__init__.py +11 -11
- langchain/callbacks/aim_callback.py +2 -2
- langchain/callbacks/argilla_callback.py +1 -1
- langchain/callbacks/arize_callback.py +1 -1
- langchain/callbacks/arthur_callback.py +1 -1
- langchain/callbacks/base.py +7 -7
- langchain/callbacks/clearml_callback.py +1 -1
- langchain/callbacks/comet_ml_callback.py +1 -1
- langchain/callbacks/confident_callback.py +1 -1
- langchain/callbacks/context_callback.py +1 -1
- langchain/callbacks/flyte_callback.py +1 -1
- langchain/callbacks/human.py +2 -2
- langchain/callbacks/infino_callback.py +1 -1
- langchain/callbacks/labelstudio_callback.py +1 -1
- langchain/callbacks/llmonitor_callback.py +1 -1
- langchain/callbacks/manager.py +5 -5
- langchain/callbacks/mlflow_callback.py +2 -2
- langchain/callbacks/openai_info.py +1 -1
- langchain/callbacks/promptlayer_callback.py +1 -1
- langchain/callbacks/sagemaker_callback.py +1 -1
- langchain/callbacks/streaming_aiter.py +4 -1
- langchain/callbacks/streaming_aiter_final_only.py +5 -3
- langchain/callbacks/streaming_stdout_final_only.py +5 -3
- langchain/callbacks/streamlit/__init__.py +3 -2
- langchain/callbacks/streamlit/mutable_expander.py +1 -1
- langchain/callbacks/streamlit/streamlit_callback_handler.py +3 -3
- langchain/callbacks/tracers/__init__.py +1 -1
- langchain/callbacks/tracers/comet.py +1 -1
- langchain/callbacks/tracers/evaluation.py +1 -1
- langchain/callbacks/tracers/log_stream.py +1 -1
- langchain/callbacks/tracers/logging.py +1 -1
- langchain/callbacks/tracers/stdout.py +1 -1
- langchain/callbacks/trubrics_callback.py +1 -1
- langchain/callbacks/utils.py +4 -4
- langchain/callbacks/wandb_callback.py +1 -1
- langchain/callbacks/whylabs_callback.py +1 -1
- langchain/chains/api/base.py +36 -22
- langchain/chains/api/news_docs.py +1 -2
- langchain/chains/api/open_meteo_docs.py +1 -2
- langchain/chains/api/openapi/requests_chain.py +1 -1
- langchain/chains/api/openapi/response_chain.py +1 -1
- langchain/chains/api/podcast_docs.py +1 -2
- langchain/chains/api/prompt.py +1 -2
- langchain/chains/api/tmdb_docs.py +1 -2
- langchain/chains/base.py +88 -54
- langchain/chains/chat_vector_db/prompts.py +2 -3
- langchain/chains/combine_documents/__init__.py +1 -1
- langchain/chains/combine_documents/base.py +23 -10
- langchain/chains/combine_documents/map_reduce.py +38 -30
- langchain/chains/combine_documents/map_rerank.py +33 -20
- langchain/chains/combine_documents/reduce.py +47 -26
- langchain/chains/combine_documents/refine.py +26 -17
- langchain/chains/combine_documents/stuff.py +19 -12
- langchain/chains/constitutional_ai/base.py +4 -4
- langchain/chains/constitutional_ai/principles.py +22 -25
- langchain/chains/constitutional_ai/prompts.py +25 -28
- langchain/chains/conversation/base.py +5 -3
- langchain/chains/conversation/memory.py +5 -5
- langchain/chains/conversation/prompt.py +5 -5
- langchain/chains/conversational_retrieval/base.py +41 -20
- langchain/chains/conversational_retrieval/prompts.py +2 -3
- langchain/chains/elasticsearch_database/base.py +8 -9
- langchain/chains/elasticsearch_database/prompts.py +2 -3
- langchain/chains/ernie_functions/__init__.py +2 -2
- langchain/chains/example_generator.py +3 -1
- langchain/chains/flare/base.py +26 -12
- langchain/chains/graph_qa/cypher.py +2 -2
- langchain/chains/graph_qa/falkordb.py +1 -1
- langchain/chains/graph_qa/gremlin.py +1 -1
- langchain/chains/graph_qa/neptune_sparql.py +1 -1
- langchain/chains/graph_qa/prompts.py +2 -2
- langchain/chains/history_aware_retriever.py +2 -1
- langchain/chains/hyde/base.py +6 -5
- langchain/chains/hyde/prompts.py +5 -6
- langchain/chains/llm.py +77 -61
- langchain/chains/llm_bash/__init__.py +2 -1
- langchain/chains/llm_checker/base.py +7 -5
- langchain/chains/llm_checker/prompt.py +3 -4
- langchain/chains/llm_math/base.py +16 -9
- langchain/chains/llm_math/prompt.py +1 -2
- langchain/chains/llm_summarization_checker/base.py +9 -6
- langchain/chains/llm_symbolic_math/__init__.py +2 -1
- langchain/chains/loading.py +151 -95
- langchain/chains/mapreduce.py +4 -3
- langchain/chains/moderation.py +8 -9
- langchain/chains/natbot/base.py +8 -8
- langchain/chains/natbot/crawler.py +73 -76
- langchain/chains/natbot/prompt.py +2 -3
- langchain/chains/openai_functions/__init__.py +7 -7
- langchain/chains/openai_functions/base.py +13 -10
- langchain/chains/openai_functions/citation_fuzzy_match.py +12 -11
- langchain/chains/openai_functions/extraction.py +19 -19
- langchain/chains/openai_functions/openapi.py +35 -35
- langchain/chains/openai_functions/qa_with_structure.py +19 -12
- langchain/chains/openai_functions/tagging.py +2 -4
- langchain/chains/openai_tools/extraction.py +7 -8
- langchain/chains/qa_generation/base.py +4 -3
- langchain/chains/qa_generation/prompt.py +5 -5
- langchain/chains/qa_with_sources/base.py +14 -6
- langchain/chains/qa_with_sources/loading.py +16 -8
- langchain/chains/qa_with_sources/map_reduce_prompt.py +8 -9
- langchain/chains/qa_with_sources/refine_prompts.py +0 -1
- langchain/chains/qa_with_sources/retrieval.py +14 -5
- langchain/chains/qa_with_sources/stuff_prompt.py +6 -7
- langchain/chains/qa_with_sources/vector_db.py +17 -6
- langchain/chains/query_constructor/base.py +34 -33
- langchain/chains/query_constructor/ir.py +4 -4
- langchain/chains/query_constructor/parser.py +37 -32
- langchain/chains/query_constructor/prompt.py +5 -6
- langchain/chains/question_answering/chain.py +21 -10
- langchain/chains/question_answering/map_reduce_prompt.py +14 -14
- langchain/chains/question_answering/map_rerank_prompt.py +3 -3
- langchain/chains/question_answering/refine_prompts.py +2 -5
- langchain/chains/question_answering/stuff_prompt.py +5 -5
- langchain/chains/retrieval.py +1 -3
- langchain/chains/retrieval_qa/base.py +34 -27
- langchain/chains/retrieval_qa/prompt.py +1 -2
- langchain/chains/router/__init__.py +3 -3
- langchain/chains/router/base.py +24 -20
- langchain/chains/router/embedding_router.py +12 -8
- langchain/chains/router/llm_router.py +17 -16
- langchain/chains/router/multi_prompt.py +2 -2
- langchain/chains/router/multi_retrieval_qa.py +10 -5
- langchain/chains/sequential.py +30 -18
- langchain/chains/sql_database/prompt.py +14 -16
- langchain/chains/sql_database/query.py +6 -5
- langchain/chains/structured_output/__init__.py +1 -1
- langchain/chains/structured_output/base.py +75 -67
- langchain/chains/summarize/chain.py +11 -5
- langchain/chains/summarize/map_reduce_prompt.py +0 -1
- langchain/chains/summarize/stuff_prompt.py +0 -1
- langchain/chains/transform.py +5 -6
- langchain/chat_loaders/facebook_messenger.py +1 -1
- langchain/chat_loaders/langsmith.py +1 -1
- langchain/chat_loaders/utils.py +3 -3
- langchain/chat_models/__init__.py +20 -19
- langchain/chat_models/anthropic.py +1 -1
- langchain/chat_models/azureml_endpoint.py +1 -1
- langchain/chat_models/baidu_qianfan_endpoint.py +1 -1
- langchain/chat_models/base.py +160 -123
- langchain/chat_models/bedrock.py +1 -1
- langchain/chat_models/fake.py +1 -1
- langchain/chat_models/meta.py +1 -1
- langchain/chat_models/pai_eas_endpoint.py +1 -1
- langchain/chat_models/promptlayer_openai.py +1 -1
- langchain/chat_models/volcengine_maas.py +1 -1
- langchain/docstore/base.py +1 -1
- langchain/document_loaders/__init__.py +9 -9
- langchain/document_loaders/airbyte.py +3 -3
- langchain/document_loaders/assemblyai.py +1 -1
- langchain/document_loaders/azure_blob_storage_container.py +1 -1
- langchain/document_loaders/azure_blob_storage_file.py +1 -1
- langchain/document_loaders/baiducloud_bos_file.py +1 -1
- langchain/document_loaders/base.py +1 -1
- langchain/document_loaders/blob_loaders/__init__.py +1 -1
- langchain/document_loaders/blockchain.py +1 -1
- langchain/document_loaders/chatgpt.py +1 -1
- langchain/document_loaders/college_confidential.py +1 -1
- langchain/document_loaders/confluence.py +1 -1
- langchain/document_loaders/email.py +1 -1
- langchain/document_loaders/facebook_chat.py +1 -1
- langchain/document_loaders/markdown.py +1 -1
- langchain/document_loaders/notebook.py +1 -1
- langchain/document_loaders/org_mode.py +1 -1
- langchain/document_loaders/parsers/__init__.py +1 -1
- langchain/document_loaders/parsers/docai.py +1 -1
- langchain/document_loaders/parsers/generic.py +1 -1
- langchain/document_loaders/parsers/html/__init__.py +1 -1
- langchain/document_loaders/parsers/html/bs4.py +1 -1
- langchain/document_loaders/parsers/language/cobol.py +1 -1
- langchain/document_loaders/parsers/language/python.py +1 -1
- langchain/document_loaders/parsers/msword.py +1 -1
- langchain/document_loaders/parsers/pdf.py +5 -5
- langchain/document_loaders/parsers/registry.py +1 -1
- langchain/document_loaders/pdf.py +8 -8
- langchain/document_loaders/powerpoint.py +1 -1
- langchain/document_loaders/pyspark_dataframe.py +1 -1
- langchain/document_loaders/telegram.py +2 -2
- langchain/document_loaders/tencent_cos_directory.py +1 -1
- langchain/document_loaders/unstructured.py +5 -5
- langchain/document_loaders/url_playwright.py +1 -1
- langchain/document_loaders/whatsapp_chat.py +1 -1
- langchain/document_loaders/youtube.py +2 -2
- langchain/document_transformers/__init__.py +3 -3
- langchain/document_transformers/beautiful_soup_transformer.py +1 -1
- langchain/document_transformers/doctran_text_extract.py +1 -1
- langchain/document_transformers/doctran_text_qa.py +1 -1
- langchain/document_transformers/doctran_text_translate.py +1 -1
- langchain/document_transformers/embeddings_redundant_filter.py +3 -3
- langchain/document_transformers/google_translate.py +1 -1
- langchain/document_transformers/html2text.py +1 -1
- langchain/document_transformers/nuclia_text_transform.py +1 -1
- langchain/embeddings/__init__.py +5 -5
- langchain/embeddings/base.py +33 -24
- langchain/embeddings/cache.py +36 -31
- langchain/embeddings/fake.py +1 -1
- langchain/embeddings/huggingface.py +2 -2
- langchain/evaluation/__init__.py +22 -22
- langchain/evaluation/agents/trajectory_eval_chain.py +23 -23
- langchain/evaluation/agents/trajectory_eval_prompt.py +6 -9
- langchain/evaluation/comparison/__init__.py +1 -1
- langchain/evaluation/comparison/eval_chain.py +20 -13
- langchain/evaluation/comparison/prompt.py +1 -2
- langchain/evaluation/criteria/__init__.py +1 -1
- langchain/evaluation/criteria/eval_chain.py +20 -11
- langchain/evaluation/criteria/prompt.py +2 -3
- langchain/evaluation/embedding_distance/base.py +23 -20
- langchain/evaluation/loading.py +15 -11
- langchain/evaluation/parsing/base.py +4 -1
- langchain/evaluation/parsing/json_distance.py +5 -2
- langchain/evaluation/parsing/json_schema.py +12 -8
- langchain/evaluation/qa/__init__.py +1 -1
- langchain/evaluation/qa/eval_chain.py +12 -5
- langchain/evaluation/qa/eval_prompt.py +7 -8
- langchain/evaluation/qa/generate_chain.py +2 -1
- langchain/evaluation/qa/generate_prompt.py +2 -4
- langchain/evaluation/schema.py +38 -30
- langchain/evaluation/scoring/__init__.py +1 -1
- langchain/evaluation/scoring/eval_chain.py +22 -15
- langchain/evaluation/scoring/prompt.py +0 -1
- langchain/evaluation/string_distance/base.py +14 -9
- langchain/globals.py +12 -11
- langchain/graphs/__init__.py +6 -6
- langchain/graphs/graph_document.py +1 -1
- langchain/graphs/networkx_graph.py +2 -2
- langchain/hub.py +9 -11
- langchain/indexes/__init__.py +3 -3
- langchain/indexes/_sql_record_manager.py +63 -46
- langchain/indexes/prompts/entity_extraction.py +1 -2
- langchain/indexes/prompts/entity_summarization.py +1 -2
- langchain/indexes/prompts/knowledge_triplet_extraction.py +1 -3
- langchain/indexes/vectorstore.py +35 -19
- langchain/llms/__init__.py +13 -13
- langchain/llms/ai21.py +1 -1
- langchain/llms/azureml_endpoint.py +4 -4
- langchain/llms/base.py +15 -7
- langchain/llms/bedrock.py +1 -1
- langchain/llms/cloudflare_workersai.py +1 -1
- langchain/llms/gradient_ai.py +1 -1
- langchain/llms/loading.py +1 -1
- langchain/llms/openai.py +1 -1
- langchain/llms/sagemaker_endpoint.py +1 -1
- langchain/load/dump.py +1 -1
- langchain/load/load.py +1 -1
- langchain/load/serializable.py +3 -3
- langchain/memory/__init__.py +3 -3
- langchain/memory/buffer.py +9 -7
- langchain/memory/chat_memory.py +14 -8
- langchain/memory/chat_message_histories/__init__.py +1 -1
- langchain/memory/chat_message_histories/astradb.py +1 -1
- langchain/memory/chat_message_histories/cassandra.py +1 -1
- langchain/memory/chat_message_histories/cosmos_db.py +1 -1
- langchain/memory/chat_message_histories/dynamodb.py +1 -1
- langchain/memory/chat_message_histories/elasticsearch.py +1 -1
- langchain/memory/chat_message_histories/file.py +1 -1
- langchain/memory/chat_message_histories/firestore.py +1 -1
- langchain/memory/chat_message_histories/momento.py +1 -1
- langchain/memory/chat_message_histories/mongodb.py +1 -1
- langchain/memory/chat_message_histories/neo4j.py +1 -1
- langchain/memory/chat_message_histories/postgres.py +1 -1
- langchain/memory/chat_message_histories/redis.py +1 -1
- langchain/memory/chat_message_histories/rocksetdb.py +1 -1
- langchain/memory/chat_message_histories/singlestoredb.py +1 -1
- langchain/memory/chat_message_histories/streamlit.py +1 -1
- langchain/memory/chat_message_histories/upstash_redis.py +1 -1
- langchain/memory/chat_message_histories/xata.py +1 -1
- langchain/memory/chat_message_histories/zep.py +1 -1
- langchain/memory/combined.py +13 -12
- langchain/memory/entity.py +84 -61
- langchain/memory/prompt.py +10 -11
- langchain/memory/readonly.py +0 -2
- langchain/memory/simple.py +1 -3
- langchain/memory/summary.py +13 -11
- langchain/memory/summary_buffer.py +17 -8
- langchain/memory/utils.py +3 -2
- langchain/memory/vectorstore.py +12 -5
- langchain/memory/vectorstore_token_buffer_memory.py +5 -5
- langchain/model_laboratory.py +12 -11
- langchain/output_parsers/__init__.py +4 -4
- langchain/output_parsers/boolean.py +7 -4
- langchain/output_parsers/combining.py +10 -5
- langchain/output_parsers/datetime.py +32 -31
- langchain/output_parsers/enum.py +5 -3
- langchain/output_parsers/fix.py +52 -52
- langchain/output_parsers/format_instructions.py +6 -8
- langchain/output_parsers/json.py +2 -2
- langchain/output_parsers/list.py +2 -2
- langchain/output_parsers/loading.py +9 -9
- langchain/output_parsers/openai_functions.py +3 -3
- langchain/output_parsers/openai_tools.py +1 -1
- langchain/output_parsers/pandas_dataframe.py +43 -47
- langchain/output_parsers/prompts.py +1 -2
- langchain/output_parsers/rail_parser.py +1 -1
- langchain/output_parsers/regex.py +7 -8
- langchain/output_parsers/regex_dict.py +7 -10
- langchain/output_parsers/retry.py +77 -78
- langchain/output_parsers/structured.py +11 -6
- langchain/output_parsers/yaml.py +15 -11
- langchain/prompts/__init__.py +5 -3
- langchain/prompts/base.py +5 -5
- langchain/prompts/chat.py +8 -8
- langchain/prompts/example_selector/__init__.py +3 -1
- langchain/prompts/example_selector/semantic_similarity.py +2 -2
- langchain/prompts/few_shot.py +1 -1
- langchain/prompts/loading.py +3 -3
- langchain/prompts/prompt.py +1 -1
- langchain/retrievers/__init__.py +5 -5
- langchain/retrievers/bedrock.py +2 -2
- langchain/retrievers/bm25.py +1 -1
- langchain/retrievers/contextual_compression.py +14 -8
- langchain/retrievers/docarray.py +1 -1
- langchain/retrievers/document_compressors/__init__.py +5 -4
- langchain/retrievers/document_compressors/base.py +12 -6
- langchain/retrievers/document_compressors/chain_extract.py +2 -2
- langchain/retrievers/document_compressors/chain_extract_prompt.py +2 -3
- langchain/retrievers/document_compressors/chain_filter.py +9 -9
- langchain/retrievers/document_compressors/chain_filter_prompt.py +1 -2
- langchain/retrievers/document_compressors/cohere_rerank.py +15 -15
- langchain/retrievers/document_compressors/embeddings_filter.py +21 -17
- langchain/retrievers/document_compressors/flashrank_rerank.py +1 -1
- langchain/retrievers/document_compressors/listwise_rerank.py +7 -5
- langchain/retrievers/ensemble.py +28 -25
- langchain/retrievers/google_cloud_documentai_warehouse.py +1 -1
- langchain/retrievers/google_vertex_ai_search.py +2 -2
- langchain/retrievers/kendra.py +10 -10
- langchain/retrievers/llama_index.py +1 -1
- langchain/retrievers/merger_retriever.py +11 -11
- langchain/retrievers/milvus.py +1 -1
- langchain/retrievers/multi_query.py +32 -26
- langchain/retrievers/multi_vector.py +20 -8
- langchain/retrievers/parent_document_retriever.py +18 -9
- langchain/retrievers/re_phraser.py +6 -5
- langchain/retrievers/self_query/base.py +138 -127
- langchain/retrievers/time_weighted_retriever.py +18 -7
- langchain/retrievers/zilliz.py +1 -1
- langchain/runnables/openai_functions.py +6 -2
- langchain/schema/__init__.py +23 -23
- langchain/schema/cache.py +1 -1
- langchain/schema/callbacks/base.py +7 -7
- langchain/schema/callbacks/manager.py +19 -19
- langchain/schema/callbacks/tracers/base.py +1 -1
- langchain/schema/callbacks/tracers/evaluation.py +1 -1
- langchain/schema/callbacks/tracers/langchain.py +1 -1
- langchain/schema/callbacks/tracers/langchain_v1.py +1 -1
- langchain/schema/callbacks/tracers/log_stream.py +1 -1
- langchain/schema/callbacks/tracers/schemas.py +8 -8
- langchain/schema/callbacks/tracers/stdout.py +3 -3
- langchain/schema/document.py +1 -1
- langchain/schema/language_model.py +2 -2
- langchain/schema/messages.py +12 -12
- langchain/schema/output.py +3 -3
- langchain/schema/output_parser.py +3 -3
- langchain/schema/runnable/__init__.py +3 -3
- langchain/schema/runnable/base.py +9 -9
- langchain/schema/runnable/config.py +5 -5
- langchain/schema/runnable/configurable.py +1 -1
- langchain/schema/runnable/history.py +1 -1
- langchain/schema/runnable/passthrough.py +1 -1
- langchain/schema/runnable/utils.py +16 -16
- langchain/schema/vectorstore.py +1 -1
- langchain/smith/__init__.py +1 -1
- langchain/smith/evaluation/__init__.py +2 -2
- langchain/smith/evaluation/config.py +10 -7
- langchain/smith/evaluation/name_generation.py +3 -3
- langchain/smith/evaluation/progress.py +11 -2
- langchain/smith/evaluation/runner_utils.py +179 -127
- langchain/smith/evaluation/string_run_evaluator.py +75 -68
- langchain/storage/__init__.py +2 -2
- langchain/storage/_lc_store.py +4 -2
- langchain/storage/encoder_backed.py +6 -2
- langchain/storage/file_system.py +19 -16
- langchain/storage/in_memory.py +1 -1
- langchain/storage/upstash_redis.py +1 -1
- langchain/text_splitter.py +15 -15
- langchain/tools/__init__.py +28 -26
- langchain/tools/ainetwork/app.py +1 -1
- langchain/tools/ainetwork/base.py +1 -1
- langchain/tools/ainetwork/owner.py +1 -1
- langchain/tools/ainetwork/rule.py +1 -1
- langchain/tools/ainetwork/transfer.py +1 -1
- langchain/tools/ainetwork/value.py +1 -1
- langchain/tools/amadeus/closest_airport.py +1 -1
- langchain/tools/amadeus/flight_search.py +1 -1
- langchain/tools/azure_cognitive_services/__init__.py +1 -1
- langchain/tools/base.py +4 -4
- langchain/tools/bearly/tool.py +1 -1
- langchain/tools/bing_search/__init__.py +1 -1
- langchain/tools/bing_search/tool.py +1 -1
- langchain/tools/dataforseo_api_search/__init__.py +1 -1
- langchain/tools/dataforseo_api_search/tool.py +1 -1
- langchain/tools/ddg_search/tool.py +1 -1
- langchain/tools/e2b_data_analysis/tool.py +2 -2
- langchain/tools/edenai/__init__.py +1 -1
- langchain/tools/file_management/__init__.py +1 -1
- langchain/tools/file_management/copy.py +1 -1
- langchain/tools/file_management/delete.py +1 -1
- langchain/tools/gmail/__init__.py +2 -2
- langchain/tools/gmail/get_message.py +1 -1
- langchain/tools/gmail/search.py +1 -1
- langchain/tools/gmail/send_message.py +1 -1
- langchain/tools/google_finance/__init__.py +1 -1
- langchain/tools/google_finance/tool.py +1 -1
- langchain/tools/google_scholar/__init__.py +1 -1
- langchain/tools/google_scholar/tool.py +1 -1
- langchain/tools/google_search/__init__.py +1 -1
- langchain/tools/google_search/tool.py +1 -1
- langchain/tools/google_serper/__init__.py +1 -1
- langchain/tools/google_serper/tool.py +1 -1
- langchain/tools/google_trends/__init__.py +1 -1
- langchain/tools/google_trends/tool.py +1 -1
- langchain/tools/jira/tool.py +20 -1
- langchain/tools/json/tool.py +25 -3
- langchain/tools/memorize/tool.py +1 -1
- langchain/tools/multion/__init__.py +1 -1
- langchain/tools/multion/update_session.py +1 -1
- langchain/tools/office365/__init__.py +2 -2
- langchain/tools/office365/events_search.py +1 -1
- langchain/tools/office365/messages_search.py +1 -1
- langchain/tools/office365/send_event.py +1 -1
- langchain/tools/office365/send_message.py +1 -1
- langchain/tools/openapi/utils/api_models.py +6 -6
- langchain/tools/playwright/__init__.py +5 -5
- langchain/tools/playwright/click.py +1 -1
- langchain/tools/playwright/extract_hyperlinks.py +1 -1
- langchain/tools/playwright/get_elements.py +1 -1
- langchain/tools/playwright/navigate.py +1 -1
- langchain/tools/plugin.py +2 -2
- langchain/tools/powerbi/tool.py +1 -1
- langchain/tools/python/__init__.py +2 -1
- langchain/tools/reddit_search/tool.py +1 -1
- langchain/tools/render.py +2 -2
- langchain/tools/requests/tool.py +2 -2
- langchain/tools/searchapi/tool.py +1 -1
- langchain/tools/searx_search/tool.py +1 -1
- langchain/tools/slack/get_message.py +1 -1
- langchain/tools/spark_sql/tool.py +1 -1
- langchain/tools/sql_database/tool.py +1 -1
- langchain/tools/tavily_search/__init__.py +1 -1
- langchain/tools/tavily_search/tool.py +1 -1
- langchain/tools/zapier/__init__.py +1 -1
- langchain/tools/zapier/tool.py +24 -2
- langchain/utilities/__init__.py +4 -4
- langchain/utilities/arcee.py +4 -4
- langchain/utilities/clickup.py +4 -4
- langchain/utilities/dalle_image_generator.py +1 -1
- langchain/utilities/dataforseo_api_search.py +1 -1
- langchain/utilities/opaqueprompts.py +1 -1
- langchain/utilities/reddit_search.py +1 -1
- langchain/utilities/sql_database.py +1 -1
- langchain/utilities/tavily_search.py +1 -1
- langchain/utilities/vertexai.py +2 -2
- langchain/utils/__init__.py +1 -1
- langchain/utils/aiter.py +1 -1
- langchain/utils/html.py +3 -3
- langchain/utils/input.py +1 -1
- langchain/utils/iter.py +1 -1
- langchain/utils/json_schema.py +1 -3
- langchain/utils/strings.py +1 -1
- langchain/utils/utils.py +6 -6
- langchain/vectorstores/__init__.py +5 -5
- langchain/vectorstores/alibabacloud_opensearch.py +1 -1
- langchain/vectorstores/azure_cosmos_db.py +1 -1
- langchain/vectorstores/clickhouse.py +1 -1
- langchain/vectorstores/elastic_vector_search.py +1 -1
- langchain/vectorstores/elasticsearch.py +2 -2
- langchain/vectorstores/myscale.py +1 -1
- langchain/vectorstores/neo4j_vector.py +1 -1
- langchain/vectorstores/pgembedding.py +1 -1
- langchain/vectorstores/qdrant.py +1 -1
- langchain/vectorstores/redis/__init__.py +1 -1
- langchain/vectorstores/redis/base.py +1 -1
- langchain/vectorstores/redis/filters.py +4 -4
- langchain/vectorstores/redis/schema.py +6 -6
- langchain/vectorstores/sklearn.py +2 -2
- langchain/vectorstores/starrocks.py +1 -1
- langchain/vectorstores/utils.py +1 -1
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/METADATA +4 -4
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/RECORD +580 -580
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/WHEEL +1 -1
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/entry_points.txt +0 -0
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/licenses/LICENSE +0 -0
|
@@ -98,10 +98,8 @@ class TestResult(dict):
|
|
|
98
98
|
to_drop = [
|
|
99
99
|
col
|
|
100
100
|
for col in df.columns
|
|
101
|
-
if col.startswith("inputs.")
|
|
102
|
-
or col.startswith("outputs.")
|
|
101
|
+
if col.startswith(("inputs.", "outputs.", "reference"))
|
|
103
102
|
or col in {"input", "output"}
|
|
104
|
-
or col.startswith("reference")
|
|
105
103
|
]
|
|
106
104
|
return df.describe(include="all").drop(to_drop, axis=1)
|
|
107
105
|
|
|
@@ -110,10 +108,11 @@ class TestResult(dict):
|
|
|
110
108
|
try:
|
|
111
109
|
import pandas as pd
|
|
112
110
|
except ImportError as e:
|
|
113
|
-
|
|
111
|
+
msg = (
|
|
114
112
|
"Pandas is required to convert the results to a dataframe."
|
|
115
113
|
" to install pandas, run `pip install pandas`."
|
|
116
|
-
)
|
|
114
|
+
)
|
|
115
|
+
raise ImportError(msg) from e
|
|
117
116
|
|
|
118
117
|
indices = []
|
|
119
118
|
records = []
|
|
@@ -134,7 +133,7 @@ class TestResult(dict):
|
|
|
134
133
|
if "reference" in result:
|
|
135
134
|
if isinstance(result["reference"], dict):
|
|
136
135
|
r.update(
|
|
137
|
-
{f"reference.{k}": v for k, v in result["reference"].items()}
|
|
136
|
+
{f"reference.{k}": v for k, v in result["reference"].items()},
|
|
138
137
|
)
|
|
139
138
|
else:
|
|
140
139
|
r["reference"] = result["reference"]
|
|
@@ -144,7 +143,7 @@ class TestResult(dict):
|
|
|
144
143
|
"error": result.get("Error"),
|
|
145
144
|
"execution_time": result["execution_time"],
|
|
146
145
|
"run_id": result.get("run_id"),
|
|
147
|
-
}
|
|
146
|
+
},
|
|
148
147
|
)
|
|
149
148
|
records.append(r)
|
|
150
149
|
indices.append(example_id)
|
|
@@ -161,8 +160,9 @@ class EvalError(dict):
|
|
|
161
160
|
def __getattr__(self, name: str) -> Any:
|
|
162
161
|
try:
|
|
163
162
|
return self[name]
|
|
164
|
-
except KeyError:
|
|
165
|
-
|
|
163
|
+
except KeyError as e:
|
|
164
|
+
msg = f"'EvalError' object has no attribute '{name}'"
|
|
165
|
+
raise AttributeError(msg) from e
|
|
166
166
|
|
|
167
167
|
|
|
168
168
|
def _wrap_in_chain_factory(
|
|
@@ -176,7 +176,7 @@ def _wrap_in_chain_factory(
|
|
|
176
176
|
chain_class = chain.__class__.__name__
|
|
177
177
|
if llm_or_chain_factory.memory is not None:
|
|
178
178
|
memory_class = chain.memory.__class__.__name__
|
|
179
|
-
|
|
179
|
+
msg = (
|
|
180
180
|
"Cannot directly evaluate a chain with stateful memory."
|
|
181
181
|
" To evaluate this chain, pass in a chain constructor"
|
|
182
182
|
" that initializes fresh memory each time it is called."
|
|
@@ -189,14 +189,15 @@ def _wrap_in_chain_factory(
|
|
|
189
189
|
"(memory=new_memory, ...)\n\n"
|
|
190
190
|
f'run_on_dataset("{dataset_name}", chain_constructor, ...)'
|
|
191
191
|
)
|
|
192
|
+
raise ValueError(msg)
|
|
192
193
|
return lambda: chain
|
|
193
|
-
|
|
194
|
+
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
|
194
195
|
return llm_or_chain_factory
|
|
195
|
-
|
|
196
|
+
if isinstance(llm_or_chain_factory, Runnable):
|
|
196
197
|
# Memory may exist here, but it's not elegant to check all those cases.
|
|
197
198
|
lcf = llm_or_chain_factory
|
|
198
199
|
return lambda: lcf
|
|
199
|
-
|
|
200
|
+
if callable(llm_or_chain_factory):
|
|
200
201
|
if is_traceable_function(llm_or_chain_factory):
|
|
201
202
|
runnable_ = as_runnable(cast(Callable, llm_or_chain_factory))
|
|
202
203
|
return lambda: runnable_
|
|
@@ -206,7 +207,7 @@ def _wrap_in_chain_factory(
|
|
|
206
207
|
# It's an arbitrary function, wrap it in a RunnableLambda
|
|
207
208
|
user_func = cast(Callable, llm_or_chain_factory)
|
|
208
209
|
sig = inspect.signature(user_func)
|
|
209
|
-
logger.info(
|
|
210
|
+
logger.info("Wrapping function %s as RunnableLambda.", sig)
|
|
210
211
|
wrapped = RunnableLambda(user_func)
|
|
211
212
|
return lambda: wrapped
|
|
212
213
|
constructor = cast(Callable, llm_or_chain_factory)
|
|
@@ -214,15 +215,14 @@ def _wrap_in_chain_factory(
|
|
|
214
215
|
# It's not uncommon to do an LLM constructor instead of raw LLM,
|
|
215
216
|
# so we'll unpack it for the user.
|
|
216
217
|
return _model
|
|
217
|
-
|
|
218
|
+
if is_traceable_function(cast(Callable, _model)):
|
|
218
219
|
runnable_ = as_runnable(cast(Callable, _model))
|
|
219
220
|
return lambda: runnable_
|
|
220
|
-
|
|
221
|
+
if not isinstance(_model, Runnable):
|
|
221
222
|
# This is unlikely to happen - a constructor for a model function
|
|
222
223
|
return lambda: RunnableLambda(constructor)
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
return constructor
|
|
224
|
+
# Typical correct case
|
|
225
|
+
return constructor
|
|
226
226
|
return llm_or_chain_factory
|
|
227
227
|
|
|
228
228
|
|
|
@@ -238,23 +238,24 @@ def _get_prompt(inputs: dict[str, Any]) -> str:
|
|
|
238
238
|
InputFormatError: If the input format is invalid.
|
|
239
239
|
"""
|
|
240
240
|
if not inputs:
|
|
241
|
-
|
|
241
|
+
msg = "Inputs should not be empty."
|
|
242
|
+
raise InputFormatError(msg)
|
|
242
243
|
|
|
243
244
|
prompts = []
|
|
244
245
|
if "prompt" in inputs:
|
|
245
246
|
if not isinstance(inputs["prompt"], str):
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
)
|
|
247
|
+
msg = f"Expected string for 'prompt', got {type(inputs['prompt']).__name__}"
|
|
248
|
+
raise InputFormatError(msg)
|
|
249
249
|
prompts = [inputs["prompt"]]
|
|
250
250
|
elif "prompts" in inputs:
|
|
251
251
|
if not isinstance(inputs["prompts"], list) or not all(
|
|
252
252
|
isinstance(i, str) for i in inputs["prompts"]
|
|
253
253
|
):
|
|
254
|
-
|
|
254
|
+
msg = (
|
|
255
255
|
"Expected list of strings for 'prompts',"
|
|
256
256
|
f" got {type(inputs['prompts']).__name__}"
|
|
257
257
|
)
|
|
258
|
+
raise InputFormatError(msg)
|
|
258
259
|
prompts = inputs["prompts"]
|
|
259
260
|
elif len(inputs) == 1:
|
|
260
261
|
prompt_ = next(iter(inputs.values()))
|
|
@@ -263,17 +264,15 @@ def _get_prompt(inputs: dict[str, Any]) -> str:
|
|
|
263
264
|
elif isinstance(prompt_, list) and all(isinstance(i, str) for i in prompt_):
|
|
264
265
|
prompts = prompt_
|
|
265
266
|
else:
|
|
266
|
-
|
|
267
|
+
msg = f"LLM Run expects string prompt input. Got {inputs}"
|
|
268
|
+
raise InputFormatError(msg)
|
|
267
269
|
else:
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
)
|
|
270
|
+
msg = f"LLM Run expects 'prompt' or 'prompts' in inputs. Got {inputs}"
|
|
271
|
+
raise InputFormatError(msg)
|
|
271
272
|
if len(prompts) == 1:
|
|
272
273
|
return prompts[0]
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
f"LLM Run expects single prompt input. Got {len(prompts)} prompts."
|
|
276
|
-
)
|
|
274
|
+
msg = f"LLM Run expects single prompt input. Got {len(prompts)} prompts."
|
|
275
|
+
raise InputFormatError(msg)
|
|
277
276
|
|
|
278
277
|
|
|
279
278
|
class ChatModelInput(TypedDict):
|
|
@@ -298,7 +297,8 @@ def _get_messages(inputs: dict[str, Any]) -> dict:
|
|
|
298
297
|
InputFormatError: If the input format is invalid.
|
|
299
298
|
"""
|
|
300
299
|
if not inputs:
|
|
301
|
-
|
|
300
|
+
msg = "Inputs should not be empty."
|
|
301
|
+
raise InputFormatError(msg)
|
|
302
302
|
input_copy = inputs.copy()
|
|
303
303
|
if "messages" in inputs:
|
|
304
304
|
input_copy["input"] = input_copy.pop("messages")
|
|
@@ -313,16 +313,17 @@ def _get_messages(inputs: dict[str, Any]) -> dict:
|
|
|
313
313
|
if len(raw_messages) == 1:
|
|
314
314
|
input_copy["input"] = messages_from_dict(raw_messages[0])
|
|
315
315
|
else:
|
|
316
|
-
|
|
316
|
+
msg = (
|
|
317
317
|
"Batch messages not supported. Please provide a"
|
|
318
318
|
" single list of messages."
|
|
319
319
|
)
|
|
320
|
+
raise InputFormatError(msg)
|
|
320
321
|
return input_copy
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
322
|
+
msg = (
|
|
323
|
+
f"Chat Run expects single List[dict] or List[List[dict]] 'messages'"
|
|
324
|
+
f" input. Got {inputs}"
|
|
325
|
+
)
|
|
326
|
+
raise InputFormatError(msg)
|
|
326
327
|
|
|
327
328
|
|
|
328
329
|
## Shared data validation utilities
|
|
@@ -336,20 +337,21 @@ def _validate_example_inputs_for_language_model(
|
|
|
336
337
|
isinstance(prompt_input, list)
|
|
337
338
|
and all(isinstance(msg, BaseMessage) for msg in prompt_input)
|
|
338
339
|
):
|
|
339
|
-
|
|
340
|
+
msg = (
|
|
340
341
|
"When using an input_mapper to prepare dataset example inputs"
|
|
341
342
|
" for an LLM or chat model, the output must a single string or"
|
|
342
343
|
" a list of chat messages."
|
|
343
344
|
f"\nGot: {prompt_input} of type {type(prompt_input)}."
|
|
344
345
|
)
|
|
346
|
+
raise InputFormatError(msg)
|
|
345
347
|
else:
|
|
346
348
|
try:
|
|
347
349
|
_get_prompt(first_example.inputs or {})
|
|
348
350
|
except InputFormatError:
|
|
349
351
|
try:
|
|
350
352
|
_get_messages(first_example.inputs or {})
|
|
351
|
-
except InputFormatError:
|
|
352
|
-
|
|
353
|
+
except InputFormatError as err2:
|
|
354
|
+
msg = (
|
|
353
355
|
"Example inputs do not match language model input format. "
|
|
354
356
|
"Expected a dictionary with messages or a single prompt."
|
|
355
357
|
f" Got: {first_example.inputs}"
|
|
@@ -357,6 +359,7 @@ def _validate_example_inputs_for_language_model(
|
|
|
357
359
|
" to convert the example.inputs to a compatible format"
|
|
358
360
|
" for the llm or chat model you wish to evaluate."
|
|
359
361
|
)
|
|
362
|
+
raise InputFormatError(msg) from err2
|
|
360
363
|
|
|
361
364
|
|
|
362
365
|
def _validate_example_inputs_for_chain(
|
|
@@ -369,16 +372,18 @@ def _validate_example_inputs_for_chain(
|
|
|
369
372
|
first_inputs = input_mapper(first_example.inputs or {})
|
|
370
373
|
missing_keys = set(chain.input_keys).difference(first_inputs)
|
|
371
374
|
if not isinstance(first_inputs, dict):
|
|
372
|
-
|
|
375
|
+
msg = (
|
|
373
376
|
"When using an input_mapper to prepare dataset example"
|
|
374
377
|
" inputs for a chain, the mapped value must be a dictionary."
|
|
375
378
|
f"\nGot: {first_inputs} of type {type(first_inputs)}."
|
|
376
379
|
)
|
|
380
|
+
raise InputFormatError(msg)
|
|
377
381
|
if missing_keys:
|
|
378
|
-
|
|
382
|
+
msg = (
|
|
379
383
|
"Missing keys after loading example using input_mapper."
|
|
380
384
|
f"\nExpected: {chain.input_keys}. Got: {first_inputs.keys()}"
|
|
381
385
|
)
|
|
386
|
+
raise InputFormatError(msg)
|
|
382
387
|
else:
|
|
383
388
|
first_inputs = first_example.inputs
|
|
384
389
|
missing_keys = set(chain.input_keys).difference(first_inputs)
|
|
@@ -387,13 +392,14 @@ def _validate_example_inputs_for_chain(
|
|
|
387
392
|
# Refrain from calling to validate.
|
|
388
393
|
pass
|
|
389
394
|
elif missing_keys:
|
|
390
|
-
|
|
395
|
+
msg = (
|
|
391
396
|
"Example inputs missing expected chain input keys."
|
|
392
397
|
" Please provide an input_mapper to convert the example.inputs"
|
|
393
398
|
" to a compatible format for the chain you wish to evaluate."
|
|
394
399
|
f"Expected: {chain.input_keys}. "
|
|
395
400
|
f"Got: {first_inputs.keys()}"
|
|
396
401
|
)
|
|
402
|
+
raise InputFormatError(msg)
|
|
397
403
|
|
|
398
404
|
|
|
399
405
|
def _validate_example_inputs(
|
|
@@ -410,7 +416,7 @@ def _validate_example_inputs(
|
|
|
410
416
|
# Otherwise it's a runnable
|
|
411
417
|
_validate_example_inputs_for_chain(example, chain, input_mapper)
|
|
412
418
|
elif isinstance(chain, Runnable):
|
|
413
|
-
logger.debug(
|
|
419
|
+
logger.debug("Skipping input validation for %s", chain)
|
|
414
420
|
|
|
415
421
|
|
|
416
422
|
## Shared Evaluator Setup Utilities
|
|
@@ -455,16 +461,19 @@ def _determine_input_key(
|
|
|
455
461
|
input_key = config.input_key
|
|
456
462
|
if run_inputs and input_key not in run_inputs:
|
|
457
463
|
logger.warning(
|
|
458
|
-
|
|
459
|
-
|
|
464
|
+
"Input key %s not in chain's specified input keys %s. "
|
|
465
|
+
"Evaluation behavior may be undefined.",
|
|
466
|
+
input_key,
|
|
467
|
+
run_inputs,
|
|
460
468
|
)
|
|
461
469
|
elif run_inputs and len(run_inputs) == 1:
|
|
462
470
|
input_key = run_inputs[0]
|
|
463
471
|
elif run_inputs is not None and len(run_inputs) > 1:
|
|
464
472
|
logger.warning(
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
" Specify an input_key in the RunEvalConfig to avoid this warning."
|
|
473
|
+
"Chain expects multiple input keys: %s,"
|
|
474
|
+
" Evaluator is likely to fail. Evaluation behavior may be undefined."
|
|
475
|
+
" Specify an input_key in the RunEvalConfig to avoid this warning.",
|
|
476
|
+
run_inputs,
|
|
468
477
|
)
|
|
469
478
|
|
|
470
479
|
return input_key
|
|
@@ -479,16 +488,19 @@ def _determine_prediction_key(
|
|
|
479
488
|
prediction_key = config.prediction_key
|
|
480
489
|
if run_outputs and prediction_key not in run_outputs:
|
|
481
490
|
logger.warning(
|
|
482
|
-
|
|
483
|
-
|
|
491
|
+
"Prediction key %s not in chain's specified output keys %s. "
|
|
492
|
+
"Evaluation behavior may be undefined.",
|
|
493
|
+
prediction_key,
|
|
494
|
+
run_outputs,
|
|
484
495
|
)
|
|
485
496
|
elif run_outputs and len(run_outputs) == 1:
|
|
486
497
|
prediction_key = run_outputs[0]
|
|
487
498
|
elif run_outputs is not None and len(run_outputs) > 1:
|
|
488
499
|
logger.warning(
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
" in the RunEvalConfig to avoid this warning."
|
|
500
|
+
"Chain expects multiple output keys: %s,"
|
|
501
|
+
" Evaluation behavior may be undefined. Specify a prediction_key"
|
|
502
|
+
" in the RunEvalConfig to avoid this warning.",
|
|
503
|
+
run_outputs,
|
|
492
504
|
)
|
|
493
505
|
return prediction_key
|
|
494
506
|
|
|
@@ -500,12 +512,13 @@ def _determine_reference_key(
|
|
|
500
512
|
if config.reference_key:
|
|
501
513
|
reference_key = config.reference_key
|
|
502
514
|
if example_outputs and reference_key not in example_outputs:
|
|
503
|
-
|
|
515
|
+
msg = (
|
|
504
516
|
f"Reference key {reference_key} not in Dataset"
|
|
505
517
|
f" example outputs: {example_outputs}"
|
|
506
518
|
)
|
|
519
|
+
raise ValueError(msg)
|
|
507
520
|
elif example_outputs and len(example_outputs) == 1:
|
|
508
|
-
reference_key =
|
|
521
|
+
reference_key = next(iter(example_outputs))
|
|
509
522
|
else:
|
|
510
523
|
reference_key = None
|
|
511
524
|
return reference_key
|
|
@@ -544,15 +557,17 @@ def _construct_run_evaluator(
|
|
|
544
557
|
# Assume we can decorate
|
|
545
558
|
return run_evaluator_dec(eval_config)
|
|
546
559
|
else:
|
|
547
|
-
|
|
560
|
+
msg = f"Unknown evaluator type: {type(eval_config)}"
|
|
561
|
+
raise ValueError(msg) # noqa: TRY004
|
|
548
562
|
|
|
549
563
|
if isinstance(evaluator_, StringEvaluator):
|
|
550
564
|
if evaluator_.requires_reference and reference_key is None:
|
|
551
|
-
|
|
565
|
+
msg = (
|
|
552
566
|
f"Must specify reference_key in smith_eval.RunEvalConfig to use"
|
|
553
567
|
f" evaluator of type {eval_type_tag} with"
|
|
554
568
|
f" dataset with multiple output keys: {example_outputs}."
|
|
555
569
|
)
|
|
570
|
+
raise ValueError(msg)
|
|
556
571
|
run_evaluator = smith_eval.StringRunEvaluatorChain.from_run_and_data_type(
|
|
557
572
|
evaluator_,
|
|
558
573
|
run_type,
|
|
@@ -563,18 +578,18 @@ def _construct_run_evaluator(
|
|
|
563
578
|
tags=[eval_type_tag],
|
|
564
579
|
)
|
|
565
580
|
elif isinstance(evaluator_, PairwiseStringEvaluator):
|
|
566
|
-
|
|
581
|
+
msg = (
|
|
567
582
|
f"Run evaluator for {eval_type_tag} is not implemented."
|
|
568
583
|
" PairwiseStringEvaluators compare the outputs of two different models"
|
|
569
584
|
" rather than the output of a single model."
|
|
570
585
|
" Did you mean to use a StringEvaluator instead?"
|
|
571
586
|
"\nSee: https://python.langchain.com/docs/guides/evaluation/string/"
|
|
572
587
|
)
|
|
588
|
+
raise NotImplementedError(msg)
|
|
573
589
|
|
|
574
590
|
else:
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
)
|
|
591
|
+
msg = f"Run evaluator for {eval_type_tag} is not implemented"
|
|
592
|
+
raise NotImplementedError(msg)
|
|
578
593
|
return run_evaluator
|
|
579
594
|
|
|
580
595
|
|
|
@@ -611,10 +626,13 @@ def _load_run_evaluators(
|
|
|
611
626
|
input_key, prediction_key, reference_key = None, None, None
|
|
612
627
|
if config.evaluators or (
|
|
613
628
|
config.custom_evaluators
|
|
614
|
-
and any(
|
|
629
|
+
and any(isinstance(e, StringEvaluator) for e in config.custom_evaluators)
|
|
615
630
|
):
|
|
616
631
|
input_key, prediction_key, reference_key = _get_keys(
|
|
617
|
-
config,
|
|
632
|
+
config,
|
|
633
|
+
run_inputs,
|
|
634
|
+
run_outputs,
|
|
635
|
+
example_outputs,
|
|
618
636
|
)
|
|
619
637
|
for eval_config in config.evaluators:
|
|
620
638
|
run_evaluator = _construct_run_evaluator(
|
|
@@ -641,15 +659,16 @@ def _load_run_evaluators(
|
|
|
641
659
|
input_key=input_key,
|
|
642
660
|
prediction_key=prediction_key,
|
|
643
661
|
reference_key=reference_key,
|
|
644
|
-
)
|
|
662
|
+
),
|
|
645
663
|
)
|
|
646
664
|
elif callable(custom_evaluator):
|
|
647
665
|
run_evaluators.append(run_evaluator_dec(custom_evaluator))
|
|
648
666
|
else:
|
|
649
|
-
|
|
667
|
+
msg = (
|
|
650
668
|
f"Unsupported custom evaluator: {custom_evaluator}."
|
|
651
669
|
f" Expected RunEvaluator or StringEvaluator."
|
|
652
670
|
)
|
|
671
|
+
raise ValueError(msg) # noqa: TRY004
|
|
653
672
|
|
|
654
673
|
return run_evaluators
|
|
655
674
|
|
|
@@ -683,41 +702,45 @@ async def _arun_llm(
|
|
|
683
702
|
"""
|
|
684
703
|
if input_mapper is not None:
|
|
685
704
|
prompt_or_messages = input_mapper(inputs)
|
|
686
|
-
if (
|
|
687
|
-
isinstance(prompt_or_messages,
|
|
688
|
-
or isinstance(prompt_or_messages, list)
|
|
705
|
+
if isinstance(prompt_or_messages, str) or (
|
|
706
|
+
isinstance(prompt_or_messages, list)
|
|
689
707
|
and all(isinstance(msg, BaseMessage) for msg in prompt_or_messages)
|
|
690
708
|
):
|
|
691
709
|
return await llm.ainvoke(
|
|
692
710
|
prompt_or_messages,
|
|
693
711
|
config=RunnableConfig(
|
|
694
|
-
callbacks=callbacks,
|
|
712
|
+
callbacks=callbacks,
|
|
713
|
+
tags=tags or [],
|
|
714
|
+
metadata=metadata or {},
|
|
695
715
|
),
|
|
696
716
|
)
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
717
|
+
msg = (
|
|
718
|
+
"Input mapper returned invalid format"
|
|
719
|
+
f" {prompt_or_messages}"
|
|
720
|
+
"\nExpected a single string or list of chat messages."
|
|
721
|
+
)
|
|
722
|
+
raise InputFormatError(msg)
|
|
703
723
|
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
)
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
724
|
+
try:
|
|
725
|
+
prompt = _get_prompt(inputs)
|
|
726
|
+
llm_output: Union[str, BaseMessage] = await llm.ainvoke(
|
|
727
|
+
prompt,
|
|
728
|
+
config=RunnableConfig(
|
|
729
|
+
callbacks=callbacks,
|
|
730
|
+
tags=tags or [],
|
|
731
|
+
metadata=metadata or {},
|
|
732
|
+
),
|
|
733
|
+
)
|
|
734
|
+
except InputFormatError:
|
|
735
|
+
llm_inputs = _get_messages(inputs)
|
|
736
|
+
llm_output = await llm.ainvoke(
|
|
737
|
+
**llm_inputs,
|
|
738
|
+
config=RunnableConfig(
|
|
739
|
+
callbacks=callbacks,
|
|
740
|
+
tags=tags or [],
|
|
741
|
+
metadata=metadata or {},
|
|
742
|
+
),
|
|
743
|
+
)
|
|
721
744
|
return llm_output
|
|
722
745
|
|
|
723
746
|
|
|
@@ -742,12 +765,16 @@ async def _arun_chain(
|
|
|
742
765
|
output = await chain.ainvoke(
|
|
743
766
|
val,
|
|
744
767
|
config=RunnableConfig(
|
|
745
|
-
callbacks=callbacks,
|
|
768
|
+
callbacks=callbacks,
|
|
769
|
+
tags=tags or [],
|
|
770
|
+
metadata=metadata or {},
|
|
746
771
|
),
|
|
747
772
|
)
|
|
748
773
|
else:
|
|
749
774
|
runnable_config = RunnableConfig(
|
|
750
|
-
tags=tags or [],
|
|
775
|
+
tags=tags or [],
|
|
776
|
+
callbacks=callbacks,
|
|
777
|
+
metadata=metadata or {},
|
|
751
778
|
)
|
|
752
779
|
output = await chain.ainvoke(inputs_, config=runnable_config)
|
|
753
780
|
return output
|
|
@@ -799,9 +826,11 @@ async def _arun_llm_or_chain(
|
|
|
799
826
|
result = output
|
|
800
827
|
except Exception as e:
|
|
801
828
|
logger.warning(
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
829
|
+
"%s failed for example %s with inputs %s\n%s",
|
|
830
|
+
chain_or_llm,
|
|
831
|
+
example.id,
|
|
832
|
+
example.inputs,
|
|
833
|
+
e,
|
|
805
834
|
)
|
|
806
835
|
result = EvalError(Error=e)
|
|
807
836
|
return result
|
|
@@ -837,30 +866,34 @@ def _run_llm(
|
|
|
837
866
|
# Most of this is legacy code; we could probably remove a lot of it.
|
|
838
867
|
if input_mapper is not None:
|
|
839
868
|
prompt_or_messages = input_mapper(inputs)
|
|
840
|
-
if (
|
|
841
|
-
isinstance(prompt_or_messages,
|
|
842
|
-
or isinstance(prompt_or_messages, list)
|
|
869
|
+
if isinstance(prompt_or_messages, str) or (
|
|
870
|
+
isinstance(prompt_or_messages, list)
|
|
843
871
|
and all(isinstance(msg, BaseMessage) for msg in prompt_or_messages)
|
|
844
872
|
):
|
|
845
873
|
llm_output: Union[str, BaseMessage] = llm.invoke(
|
|
846
874
|
prompt_or_messages,
|
|
847
875
|
config=RunnableConfig(
|
|
848
|
-
callbacks=callbacks,
|
|
876
|
+
callbacks=callbacks,
|
|
877
|
+
tags=tags or [],
|
|
878
|
+
metadata=metadata or {},
|
|
849
879
|
),
|
|
850
880
|
)
|
|
851
881
|
else:
|
|
852
|
-
|
|
882
|
+
msg = (
|
|
853
883
|
"Input mapper returned invalid format: "
|
|
854
884
|
f" {prompt_or_messages}"
|
|
855
885
|
"\nExpected a single string or list of chat messages."
|
|
856
886
|
)
|
|
887
|
+
raise InputFormatError(msg)
|
|
857
888
|
else:
|
|
858
889
|
try:
|
|
859
890
|
llm_prompts = _get_prompt(inputs)
|
|
860
891
|
llm_output = llm.invoke(
|
|
861
892
|
llm_prompts,
|
|
862
893
|
config=RunnableConfig(
|
|
863
|
-
callbacks=callbacks,
|
|
894
|
+
callbacks=callbacks,
|
|
895
|
+
tags=tags or [],
|
|
896
|
+
metadata=metadata or {},
|
|
864
897
|
),
|
|
865
898
|
)
|
|
866
899
|
except InputFormatError:
|
|
@@ -893,12 +926,16 @@ def _run_chain(
|
|
|
893
926
|
output = chain.invoke(
|
|
894
927
|
val,
|
|
895
928
|
config=RunnableConfig(
|
|
896
|
-
callbacks=callbacks,
|
|
929
|
+
callbacks=callbacks,
|
|
930
|
+
tags=tags or [],
|
|
931
|
+
metadata=metadata or {},
|
|
897
932
|
),
|
|
898
933
|
)
|
|
899
934
|
else:
|
|
900
935
|
runnable_config = RunnableConfig(
|
|
901
|
-
tags=tags or [],
|
|
936
|
+
tags=tags or [],
|
|
937
|
+
callbacks=callbacks,
|
|
938
|
+
metadata=metadata or {},
|
|
902
939
|
)
|
|
903
940
|
output = chain.invoke(inputs_, config=runnable_config)
|
|
904
941
|
return output
|
|
@@ -952,9 +989,12 @@ def _run_llm_or_chain(
|
|
|
952
989
|
except Exception as e:
|
|
953
990
|
error_type = type(e).__name__
|
|
954
991
|
logger.warning(
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
992
|
+
"%s failed for example %s with inputs %s\nError Type: %s, Message: %s",
|
|
993
|
+
chain_or_llm,
|
|
994
|
+
example.id,
|
|
995
|
+
example.inputs,
|
|
996
|
+
error_type,
|
|
997
|
+
e,
|
|
958
998
|
)
|
|
959
999
|
result = EvalError(Error=e)
|
|
960
1000
|
return result
|
|
@@ -974,7 +1014,8 @@ def _prepare_eval_run(
|
|
|
974
1014
|
|
|
975
1015
|
examples = list(client.list_examples(dataset_id=dataset.id, as_of=dataset_version))
|
|
976
1016
|
if not examples:
|
|
977
|
-
|
|
1017
|
+
msg = f"Dataset {dataset_name} has no example rows."
|
|
1018
|
+
raise ValueError(msg)
|
|
978
1019
|
modified_at = [ex.modified_at for ex in examples if ex.modified_at]
|
|
979
1020
|
# Should always be defined in practice when fetched,
|
|
980
1021
|
# but the typing permits None
|
|
@@ -999,7 +1040,7 @@ def _prepare_eval_run(
|
|
|
999
1040
|
)
|
|
1000
1041
|
except (HTTPError, ValueError, LangSmithError) as e:
|
|
1001
1042
|
if "already exists " not in str(e):
|
|
1002
|
-
raise
|
|
1043
|
+
raise
|
|
1003
1044
|
uid = uuid.uuid4()
|
|
1004
1045
|
example_msg = f"""
|
|
1005
1046
|
run_on_dataset(
|
|
@@ -1007,10 +1048,11 @@ run_on_dataset(
|
|
|
1007
1048
|
project_name="{project_name} - {uid}", # Update since {project_name} already exists
|
|
1008
1049
|
)
|
|
1009
1050
|
"""
|
|
1010
|
-
|
|
1051
|
+
msg = (
|
|
1011
1052
|
f"Test project {project_name} already exists. Please use a different name:"
|
|
1012
1053
|
f"\n\n{example_msg}"
|
|
1013
1054
|
)
|
|
1055
|
+
raise ValueError(msg) from e
|
|
1014
1056
|
comparison_url = dataset.url + f"/compare?selectedSessions={project.id}"
|
|
1015
1057
|
print( # noqa: T201
|
|
1016
1058
|
f"View the evaluation results for project '{project_name}'"
|
|
@@ -1081,9 +1123,9 @@ class _DatasetRunContainer:
|
|
|
1081
1123
|
run_id=None,
|
|
1082
1124
|
project_id=self.project.id,
|
|
1083
1125
|
)
|
|
1084
|
-
except Exception
|
|
1085
|
-
logger.
|
|
1086
|
-
|
|
1126
|
+
except Exception:
|
|
1127
|
+
logger.exception(
|
|
1128
|
+
"Error running batch evaluator %s", repr(evaluator)
|
|
1087
1129
|
)
|
|
1088
1130
|
return aggregate_feedback
|
|
1089
1131
|
|
|
@@ -1096,7 +1138,7 @@ class _DatasetRunContainer:
|
|
|
1096
1138
|
eval_results = callback.logged_eval_results
|
|
1097
1139
|
for (_, example_id), v in eval_results.items():
|
|
1098
1140
|
all_eval_results.setdefault(str(example_id), {}).update(
|
|
1099
|
-
{"feedback": v}
|
|
1141
|
+
{"feedback": v},
|
|
1100
1142
|
)
|
|
1101
1143
|
elif isinstance(callback, LangChainTracer):
|
|
1102
1144
|
run = callback.latest_run
|
|
@@ -1111,7 +1153,7 @@ class _DatasetRunContainer:
|
|
|
1111
1153
|
"execution_time": execution_time,
|
|
1112
1154
|
"run_id": run_id,
|
|
1113
1155
|
"run": run,
|
|
1114
|
-
}
|
|
1156
|
+
},
|
|
1115
1157
|
)
|
|
1116
1158
|
all_runs[str(callback.example_id)] = run
|
|
1117
1159
|
return cast(dict[str, _RowResult], all_eval_results), all_runs
|
|
@@ -1134,21 +1176,26 @@ class _DatasetRunContainer:
|
|
|
1134
1176
|
aggregate_metrics=aggregate_feedback,
|
|
1135
1177
|
)
|
|
1136
1178
|
|
|
1137
|
-
def finish(
|
|
1179
|
+
def finish(
|
|
1180
|
+
self,
|
|
1181
|
+
batch_results: list,
|
|
1182
|
+
verbose: bool = False, # noqa: FBT001,FBT002
|
|
1183
|
+
) -> TestResult:
|
|
1138
1184
|
results = self._collect_test_results(batch_results)
|
|
1139
1185
|
if verbose:
|
|
1140
1186
|
try:
|
|
1141
1187
|
agg_feedback = results.get_aggregate_feedback()
|
|
1142
1188
|
_display_aggregate_results(agg_feedback)
|
|
1143
1189
|
except Exception as e:
|
|
1144
|
-
logger.debug(
|
|
1190
|
+
logger.debug("Failed to print aggregate feedback: %s", e, exc_info=True)
|
|
1145
1191
|
try:
|
|
1146
1192
|
# Closing the project permits name changing and metric optimizations
|
|
1147
1193
|
self.client.update_project(
|
|
1148
|
-
self.project.id,
|
|
1194
|
+
self.project.id,
|
|
1195
|
+
end_time=datetime.now(timezone.utc),
|
|
1149
1196
|
)
|
|
1150
1197
|
except Exception as e:
|
|
1151
|
-
logger.debug(
|
|
1198
|
+
logger.debug("Failed to close project: %s", e, exc_info=True)
|
|
1152
1199
|
return results
|
|
1153
1200
|
|
|
1154
1201
|
@classmethod
|
|
@@ -1188,7 +1235,10 @@ class _DatasetRunContainer:
|
|
|
1188
1235
|
run_metadata["revision_id"] = revision_id
|
|
1189
1236
|
wrapped_model = _wrap_in_chain_factory(llm_or_chain_factory)
|
|
1190
1237
|
run_evaluators = _setup_evaluation(
|
|
1191
|
-
wrapped_model,
|
|
1238
|
+
wrapped_model,
|
|
1239
|
+
examples,
|
|
1240
|
+
evaluation,
|
|
1241
|
+
dataset.data_type or DataType.kv,
|
|
1192
1242
|
)
|
|
1193
1243
|
_validate_example_inputs(examples[0], wrapped_model, input_mapper)
|
|
1194
1244
|
progress_bar = progress.ProgressBarCallback(len(examples))
|
|
@@ -1242,7 +1292,8 @@ def _display_aggregate_results(aggregate_results: pd.DataFrame) -> None:
|
|
|
1242
1292
|
display(aggregate_results)
|
|
1243
1293
|
else:
|
|
1244
1294
|
formatted_string = aggregate_results.to_string(
|
|
1245
|
-
float_format=lambda x: f"{x:.2f}",
|
|
1295
|
+
float_format=lambda x: f"{x:.2f}",
|
|
1296
|
+
justify="right",
|
|
1246
1297
|
)
|
|
1247
1298
|
print("\n Experiment Results:") # noqa: T201
|
|
1248
1299
|
print(formatted_string) # noqa: T201
|
|
@@ -1401,7 +1452,7 @@ def run_on_dataset(
|
|
|
1401
1452
|
),
|
|
1402
1453
|
container.examples,
|
|
1403
1454
|
container.configs,
|
|
1404
|
-
)
|
|
1455
|
+
),
|
|
1405
1456
|
)
|
|
1406
1457
|
|
|
1407
1458
|
return container.finish(batch_results, verbose=verbose)
|
|
@@ -1516,5 +1567,6 @@ or LangSmith's `RunEvaluator` classes.
|
|
|
1516
1567
|
""" # noqa: E501
|
|
1517
1568
|
run_on_dataset.__doc__ = _RUN_ON_DATASET_DOCSTRING
|
|
1518
1569
|
arun_on_dataset.__doc__ = _RUN_ON_DATASET_DOCSTRING.replace(
|
|
1519
|
-
"run_on_dataset(",
|
|
1570
|
+
"run_on_dataset(",
|
|
1571
|
+
"await arun_on_dataset(",
|
|
1520
1572
|
)
|