langchain 0.3.26__py3-none-any.whl → 0.3.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain might be problematic. Click here for more details.
- langchain/__init__.py +110 -96
- langchain/_api/__init__.py +2 -2
- langchain/_api/deprecation.py +3 -3
- langchain/_api/module_import.py +51 -46
- langchain/_api/path.py +1 -1
- langchain/adapters/openai.py +8 -8
- langchain/agents/__init__.py +15 -12
- langchain/agents/agent.py +160 -133
- langchain/agents/agent_iterator.py +31 -14
- langchain/agents/agent_toolkits/__init__.py +7 -6
- langchain/agents/agent_toolkits/ainetwork/toolkit.py +1 -1
- langchain/agents/agent_toolkits/amadeus/toolkit.py +1 -1
- langchain/agents/agent_toolkits/azure_cognitive_services.py +1 -1
- langchain/agents/agent_toolkits/clickup/toolkit.py +1 -1
- langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +6 -4
- langchain/agents/agent_toolkits/csv/__init__.py +4 -2
- langchain/agents/agent_toolkits/file_management/__init__.py +1 -1
- langchain/agents/agent_toolkits/file_management/toolkit.py +1 -1
- langchain/agents/agent_toolkits/github/toolkit.py +9 -9
- langchain/agents/agent_toolkits/gitlab/toolkit.py +1 -1
- langchain/agents/agent_toolkits/json/base.py +1 -1
- langchain/agents/agent_toolkits/multion/toolkit.py +1 -1
- langchain/agents/agent_toolkits/office365/toolkit.py +1 -1
- langchain/agents/agent_toolkits/openapi/base.py +1 -1
- langchain/agents/agent_toolkits/openapi/planner.py +2 -2
- langchain/agents/agent_toolkits/openapi/planner_prompt.py +10 -10
- langchain/agents/agent_toolkits/openapi/prompt.py +1 -1
- langchain/agents/agent_toolkits/openapi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/pandas/__init__.py +4 -2
- langchain/agents/agent_toolkits/playwright/__init__.py +1 -1
- langchain/agents/agent_toolkits/playwright/toolkit.py +1 -1
- langchain/agents/agent_toolkits/powerbi/base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/chat_base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/prompt.py +2 -2
- langchain/agents/agent_toolkits/powerbi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/python/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark_sql/base.py +1 -1
- langchain/agents/agent_toolkits/spark_sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/sql/prompt.py +1 -1
- langchain/agents/agent_toolkits/sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/vectorstore/base.py +2 -2
- langchain/agents/agent_toolkits/vectorstore/prompt.py +2 -4
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +12 -11
- langchain/agents/agent_toolkits/xorbits/__init__.py +4 -2
- langchain/agents/agent_toolkits/zapier/toolkit.py +1 -1
- langchain/agents/agent_types.py +6 -6
- langchain/agents/chat/base.py +6 -12
- langchain/agents/chat/output_parser.py +9 -6
- langchain/agents/chat/prompt.py +3 -4
- langchain/agents/conversational/base.py +9 -5
- langchain/agents/conversational/output_parser.py +4 -2
- langchain/agents/conversational/prompt.py +2 -3
- langchain/agents/conversational_chat/base.py +7 -5
- langchain/agents/conversational_chat/output_parser.py +9 -11
- langchain/agents/conversational_chat/prompt.py +5 -6
- langchain/agents/format_scratchpad/__init__.py +3 -3
- langchain/agents/format_scratchpad/log_to_messages.py +1 -1
- langchain/agents/format_scratchpad/openai_functions.py +8 -6
- langchain/agents/format_scratchpad/tools.py +5 -3
- langchain/agents/format_scratchpad/xml.py +33 -2
- langchain/agents/initialize.py +16 -8
- langchain/agents/json_chat/base.py +18 -18
- langchain/agents/json_chat/prompt.py +2 -3
- langchain/agents/load_tools.py +2 -1
- langchain/agents/loading.py +28 -18
- langchain/agents/mrkl/base.py +9 -4
- langchain/agents/mrkl/output_parser.py +17 -13
- langchain/agents/mrkl/prompt.py +1 -2
- langchain/agents/openai_assistant/base.py +80 -70
- langchain/agents/openai_functions_agent/base.py +46 -37
- langchain/agents/openai_functions_multi_agent/base.py +39 -26
- langchain/agents/openai_tools/base.py +8 -8
- langchain/agents/output_parsers/__init__.py +3 -3
- langchain/agents/output_parsers/json.py +6 -6
- langchain/agents/output_parsers/openai_functions.py +15 -7
- langchain/agents/output_parsers/openai_tools.py +9 -4
- langchain/agents/output_parsers/react_json_single_input.py +10 -5
- langchain/agents/output_parsers/react_single_input.py +15 -11
- langchain/agents/output_parsers/self_ask.py +3 -2
- langchain/agents/output_parsers/tools.py +18 -13
- langchain/agents/output_parsers/xml.py +99 -28
- langchain/agents/react/agent.py +4 -4
- langchain/agents/react/base.py +22 -17
- langchain/agents/react/output_parser.py +5 -6
- langchain/agents/react/textworld_prompt.py +0 -1
- langchain/agents/react/wiki_prompt.py +14 -15
- langchain/agents/schema.py +3 -2
- langchain/agents/self_ask_with_search/base.py +19 -15
- langchain/agents/self_ask_with_search/prompt.py +0 -1
- langchain/agents/structured_chat/base.py +14 -11
- langchain/agents/structured_chat/output_parser.py +16 -18
- langchain/agents/structured_chat/prompt.py +3 -4
- langchain/agents/tool_calling_agent/base.py +7 -6
- langchain/agents/tools.py +2 -2
- langchain/agents/utils.py +2 -3
- langchain/agents/xml/base.py +5 -5
- langchain/agents/xml/prompt.py +1 -2
- langchain/cache.py +12 -12
- langchain/callbacks/__init__.py +11 -11
- langchain/callbacks/aim_callback.py +2 -2
- langchain/callbacks/argilla_callback.py +1 -1
- langchain/callbacks/arize_callback.py +1 -1
- langchain/callbacks/arthur_callback.py +1 -1
- langchain/callbacks/base.py +7 -7
- langchain/callbacks/clearml_callback.py +1 -1
- langchain/callbacks/comet_ml_callback.py +1 -1
- langchain/callbacks/confident_callback.py +1 -1
- langchain/callbacks/context_callback.py +1 -1
- langchain/callbacks/flyte_callback.py +1 -1
- langchain/callbacks/human.py +2 -2
- langchain/callbacks/infino_callback.py +1 -1
- langchain/callbacks/labelstudio_callback.py +1 -1
- langchain/callbacks/llmonitor_callback.py +1 -1
- langchain/callbacks/manager.py +5 -5
- langchain/callbacks/mlflow_callback.py +2 -2
- langchain/callbacks/openai_info.py +1 -1
- langchain/callbacks/promptlayer_callback.py +1 -1
- langchain/callbacks/sagemaker_callback.py +1 -1
- langchain/callbacks/streaming_aiter.py +4 -1
- langchain/callbacks/streaming_aiter_final_only.py +5 -3
- langchain/callbacks/streaming_stdout_final_only.py +5 -3
- langchain/callbacks/streamlit/__init__.py +3 -2
- langchain/callbacks/streamlit/mutable_expander.py +1 -1
- langchain/callbacks/streamlit/streamlit_callback_handler.py +3 -3
- langchain/callbacks/tracers/__init__.py +1 -1
- langchain/callbacks/tracers/comet.py +1 -1
- langchain/callbacks/tracers/evaluation.py +1 -1
- langchain/callbacks/tracers/log_stream.py +1 -1
- langchain/callbacks/tracers/logging.py +1 -1
- langchain/callbacks/tracers/stdout.py +1 -1
- langchain/callbacks/trubrics_callback.py +1 -1
- langchain/callbacks/utils.py +4 -4
- langchain/callbacks/wandb_callback.py +1 -1
- langchain/callbacks/whylabs_callback.py +1 -1
- langchain/chains/api/base.py +36 -22
- langchain/chains/api/news_docs.py +1 -2
- langchain/chains/api/open_meteo_docs.py +1 -2
- langchain/chains/api/openapi/requests_chain.py +1 -1
- langchain/chains/api/openapi/response_chain.py +1 -1
- langchain/chains/api/podcast_docs.py +1 -2
- langchain/chains/api/prompt.py +1 -2
- langchain/chains/api/tmdb_docs.py +1 -2
- langchain/chains/base.py +88 -54
- langchain/chains/chat_vector_db/prompts.py +2 -3
- langchain/chains/combine_documents/__init__.py +1 -1
- langchain/chains/combine_documents/base.py +23 -10
- langchain/chains/combine_documents/map_reduce.py +38 -30
- langchain/chains/combine_documents/map_rerank.py +33 -20
- langchain/chains/combine_documents/reduce.py +47 -26
- langchain/chains/combine_documents/refine.py +26 -17
- langchain/chains/combine_documents/stuff.py +19 -12
- langchain/chains/constitutional_ai/base.py +4 -4
- langchain/chains/constitutional_ai/principles.py +22 -25
- langchain/chains/constitutional_ai/prompts.py +25 -28
- langchain/chains/conversation/base.py +5 -3
- langchain/chains/conversation/memory.py +5 -5
- langchain/chains/conversation/prompt.py +5 -5
- langchain/chains/conversational_retrieval/base.py +41 -20
- langchain/chains/conversational_retrieval/prompts.py +2 -3
- langchain/chains/elasticsearch_database/base.py +8 -9
- langchain/chains/elasticsearch_database/prompts.py +2 -3
- langchain/chains/ernie_functions/__init__.py +2 -2
- langchain/chains/example_generator.py +3 -1
- langchain/chains/flare/base.py +26 -12
- langchain/chains/graph_qa/cypher.py +2 -2
- langchain/chains/graph_qa/falkordb.py +1 -1
- langchain/chains/graph_qa/gremlin.py +1 -1
- langchain/chains/graph_qa/neptune_sparql.py +1 -1
- langchain/chains/graph_qa/prompts.py +2 -2
- langchain/chains/history_aware_retriever.py +2 -1
- langchain/chains/hyde/base.py +6 -5
- langchain/chains/hyde/prompts.py +5 -6
- langchain/chains/llm.py +77 -61
- langchain/chains/llm_bash/__init__.py +2 -1
- langchain/chains/llm_checker/base.py +7 -5
- langchain/chains/llm_checker/prompt.py +3 -4
- langchain/chains/llm_math/base.py +16 -9
- langchain/chains/llm_math/prompt.py +1 -2
- langchain/chains/llm_summarization_checker/base.py +9 -6
- langchain/chains/llm_symbolic_math/__init__.py +2 -1
- langchain/chains/loading.py +151 -95
- langchain/chains/mapreduce.py +4 -3
- langchain/chains/moderation.py +8 -9
- langchain/chains/natbot/base.py +8 -8
- langchain/chains/natbot/crawler.py +73 -76
- langchain/chains/natbot/prompt.py +2 -3
- langchain/chains/openai_functions/__init__.py +7 -7
- langchain/chains/openai_functions/base.py +13 -10
- langchain/chains/openai_functions/citation_fuzzy_match.py +12 -11
- langchain/chains/openai_functions/extraction.py +19 -19
- langchain/chains/openai_functions/openapi.py +35 -35
- langchain/chains/openai_functions/qa_with_structure.py +19 -12
- langchain/chains/openai_functions/tagging.py +2 -4
- langchain/chains/openai_tools/extraction.py +7 -8
- langchain/chains/qa_generation/base.py +4 -3
- langchain/chains/qa_generation/prompt.py +5 -5
- langchain/chains/qa_with_sources/base.py +14 -6
- langchain/chains/qa_with_sources/loading.py +16 -8
- langchain/chains/qa_with_sources/map_reduce_prompt.py +8 -9
- langchain/chains/qa_with_sources/refine_prompts.py +0 -1
- langchain/chains/qa_with_sources/retrieval.py +14 -5
- langchain/chains/qa_with_sources/stuff_prompt.py +6 -7
- langchain/chains/qa_with_sources/vector_db.py +17 -6
- langchain/chains/query_constructor/base.py +34 -33
- langchain/chains/query_constructor/ir.py +4 -4
- langchain/chains/query_constructor/parser.py +37 -32
- langchain/chains/query_constructor/prompt.py +5 -6
- langchain/chains/question_answering/chain.py +21 -10
- langchain/chains/question_answering/map_reduce_prompt.py +14 -14
- langchain/chains/question_answering/map_rerank_prompt.py +3 -3
- langchain/chains/question_answering/refine_prompts.py +2 -5
- langchain/chains/question_answering/stuff_prompt.py +5 -5
- langchain/chains/retrieval.py +1 -3
- langchain/chains/retrieval_qa/base.py +34 -27
- langchain/chains/retrieval_qa/prompt.py +1 -2
- langchain/chains/router/__init__.py +3 -3
- langchain/chains/router/base.py +24 -20
- langchain/chains/router/embedding_router.py +12 -8
- langchain/chains/router/llm_router.py +17 -16
- langchain/chains/router/multi_prompt.py +2 -2
- langchain/chains/router/multi_retrieval_qa.py +10 -5
- langchain/chains/sequential.py +30 -18
- langchain/chains/sql_database/prompt.py +14 -16
- langchain/chains/sql_database/query.py +6 -5
- langchain/chains/structured_output/__init__.py +1 -1
- langchain/chains/structured_output/base.py +75 -67
- langchain/chains/summarize/chain.py +11 -5
- langchain/chains/summarize/map_reduce_prompt.py +0 -1
- langchain/chains/summarize/stuff_prompt.py +0 -1
- langchain/chains/transform.py +5 -6
- langchain/chat_loaders/facebook_messenger.py +1 -1
- langchain/chat_loaders/langsmith.py +1 -1
- langchain/chat_loaders/utils.py +3 -3
- langchain/chat_models/__init__.py +20 -19
- langchain/chat_models/anthropic.py +1 -1
- langchain/chat_models/azureml_endpoint.py +1 -1
- langchain/chat_models/baidu_qianfan_endpoint.py +1 -1
- langchain/chat_models/base.py +160 -123
- langchain/chat_models/bedrock.py +1 -1
- langchain/chat_models/fake.py +1 -1
- langchain/chat_models/meta.py +1 -1
- langchain/chat_models/pai_eas_endpoint.py +1 -1
- langchain/chat_models/promptlayer_openai.py +1 -1
- langchain/chat_models/volcengine_maas.py +1 -1
- langchain/docstore/base.py +1 -1
- langchain/document_loaders/__init__.py +9 -9
- langchain/document_loaders/airbyte.py +3 -3
- langchain/document_loaders/assemblyai.py +1 -1
- langchain/document_loaders/azure_blob_storage_container.py +1 -1
- langchain/document_loaders/azure_blob_storage_file.py +1 -1
- langchain/document_loaders/baiducloud_bos_file.py +1 -1
- langchain/document_loaders/base.py +1 -1
- langchain/document_loaders/blob_loaders/__init__.py +1 -1
- langchain/document_loaders/blockchain.py +1 -1
- langchain/document_loaders/chatgpt.py +1 -1
- langchain/document_loaders/college_confidential.py +1 -1
- langchain/document_loaders/confluence.py +1 -1
- langchain/document_loaders/email.py +1 -1
- langchain/document_loaders/facebook_chat.py +1 -1
- langchain/document_loaders/markdown.py +1 -1
- langchain/document_loaders/notebook.py +1 -1
- langchain/document_loaders/org_mode.py +1 -1
- langchain/document_loaders/parsers/__init__.py +1 -1
- langchain/document_loaders/parsers/docai.py +1 -1
- langchain/document_loaders/parsers/generic.py +1 -1
- langchain/document_loaders/parsers/html/__init__.py +1 -1
- langchain/document_loaders/parsers/html/bs4.py +1 -1
- langchain/document_loaders/parsers/language/cobol.py +1 -1
- langchain/document_loaders/parsers/language/python.py +1 -1
- langchain/document_loaders/parsers/msword.py +1 -1
- langchain/document_loaders/parsers/pdf.py +5 -5
- langchain/document_loaders/parsers/registry.py +1 -1
- langchain/document_loaders/pdf.py +8 -8
- langchain/document_loaders/powerpoint.py +1 -1
- langchain/document_loaders/pyspark_dataframe.py +1 -1
- langchain/document_loaders/telegram.py +2 -2
- langchain/document_loaders/tencent_cos_directory.py +1 -1
- langchain/document_loaders/unstructured.py +5 -5
- langchain/document_loaders/url_playwright.py +1 -1
- langchain/document_loaders/whatsapp_chat.py +1 -1
- langchain/document_loaders/youtube.py +2 -2
- langchain/document_transformers/__init__.py +3 -3
- langchain/document_transformers/beautiful_soup_transformer.py +1 -1
- langchain/document_transformers/doctran_text_extract.py +1 -1
- langchain/document_transformers/doctran_text_qa.py +1 -1
- langchain/document_transformers/doctran_text_translate.py +1 -1
- langchain/document_transformers/embeddings_redundant_filter.py +3 -3
- langchain/document_transformers/google_translate.py +1 -1
- langchain/document_transformers/html2text.py +1 -1
- langchain/document_transformers/nuclia_text_transform.py +1 -1
- langchain/embeddings/__init__.py +5 -5
- langchain/embeddings/base.py +33 -24
- langchain/embeddings/cache.py +36 -31
- langchain/embeddings/fake.py +1 -1
- langchain/embeddings/huggingface.py +2 -2
- langchain/evaluation/__init__.py +22 -22
- langchain/evaluation/agents/trajectory_eval_chain.py +23 -23
- langchain/evaluation/agents/trajectory_eval_prompt.py +6 -9
- langchain/evaluation/comparison/__init__.py +1 -1
- langchain/evaluation/comparison/eval_chain.py +20 -13
- langchain/evaluation/comparison/prompt.py +1 -2
- langchain/evaluation/criteria/__init__.py +1 -1
- langchain/evaluation/criteria/eval_chain.py +20 -11
- langchain/evaluation/criteria/prompt.py +2 -3
- langchain/evaluation/embedding_distance/base.py +23 -20
- langchain/evaluation/loading.py +15 -11
- langchain/evaluation/parsing/base.py +4 -1
- langchain/evaluation/parsing/json_distance.py +5 -2
- langchain/evaluation/parsing/json_schema.py +12 -8
- langchain/evaluation/qa/__init__.py +1 -1
- langchain/evaluation/qa/eval_chain.py +12 -5
- langchain/evaluation/qa/eval_prompt.py +7 -8
- langchain/evaluation/qa/generate_chain.py +2 -1
- langchain/evaluation/qa/generate_prompt.py +2 -4
- langchain/evaluation/schema.py +38 -30
- langchain/evaluation/scoring/__init__.py +1 -1
- langchain/evaluation/scoring/eval_chain.py +22 -15
- langchain/evaluation/scoring/prompt.py +0 -1
- langchain/evaluation/string_distance/base.py +14 -9
- langchain/globals.py +12 -11
- langchain/graphs/__init__.py +6 -6
- langchain/graphs/graph_document.py +1 -1
- langchain/graphs/networkx_graph.py +2 -2
- langchain/hub.py +9 -11
- langchain/indexes/__init__.py +3 -3
- langchain/indexes/_sql_record_manager.py +63 -46
- langchain/indexes/prompts/entity_extraction.py +1 -2
- langchain/indexes/prompts/entity_summarization.py +1 -2
- langchain/indexes/prompts/knowledge_triplet_extraction.py +1 -3
- langchain/indexes/vectorstore.py +35 -19
- langchain/llms/__init__.py +13 -13
- langchain/llms/ai21.py +1 -1
- langchain/llms/azureml_endpoint.py +4 -4
- langchain/llms/base.py +15 -7
- langchain/llms/bedrock.py +1 -1
- langchain/llms/cloudflare_workersai.py +1 -1
- langchain/llms/gradient_ai.py +1 -1
- langchain/llms/loading.py +1 -1
- langchain/llms/openai.py +1 -1
- langchain/llms/sagemaker_endpoint.py +1 -1
- langchain/load/dump.py +1 -1
- langchain/load/load.py +1 -1
- langchain/load/serializable.py +3 -3
- langchain/memory/__init__.py +3 -3
- langchain/memory/buffer.py +9 -7
- langchain/memory/chat_memory.py +14 -8
- langchain/memory/chat_message_histories/__init__.py +1 -1
- langchain/memory/chat_message_histories/astradb.py +1 -1
- langchain/memory/chat_message_histories/cassandra.py +1 -1
- langchain/memory/chat_message_histories/cosmos_db.py +1 -1
- langchain/memory/chat_message_histories/dynamodb.py +1 -1
- langchain/memory/chat_message_histories/elasticsearch.py +1 -1
- langchain/memory/chat_message_histories/file.py +1 -1
- langchain/memory/chat_message_histories/firestore.py +1 -1
- langchain/memory/chat_message_histories/momento.py +1 -1
- langchain/memory/chat_message_histories/mongodb.py +1 -1
- langchain/memory/chat_message_histories/neo4j.py +1 -1
- langchain/memory/chat_message_histories/postgres.py +1 -1
- langchain/memory/chat_message_histories/redis.py +1 -1
- langchain/memory/chat_message_histories/rocksetdb.py +1 -1
- langchain/memory/chat_message_histories/singlestoredb.py +1 -1
- langchain/memory/chat_message_histories/streamlit.py +1 -1
- langchain/memory/chat_message_histories/upstash_redis.py +1 -1
- langchain/memory/chat_message_histories/xata.py +1 -1
- langchain/memory/chat_message_histories/zep.py +1 -1
- langchain/memory/combined.py +13 -12
- langchain/memory/entity.py +84 -61
- langchain/memory/prompt.py +10 -11
- langchain/memory/readonly.py +0 -2
- langchain/memory/simple.py +1 -3
- langchain/memory/summary.py +13 -11
- langchain/memory/summary_buffer.py +17 -8
- langchain/memory/utils.py +3 -2
- langchain/memory/vectorstore.py +12 -5
- langchain/memory/vectorstore_token_buffer_memory.py +5 -5
- langchain/model_laboratory.py +12 -11
- langchain/output_parsers/__init__.py +4 -4
- langchain/output_parsers/boolean.py +7 -4
- langchain/output_parsers/combining.py +10 -5
- langchain/output_parsers/datetime.py +32 -31
- langchain/output_parsers/enum.py +5 -3
- langchain/output_parsers/fix.py +52 -52
- langchain/output_parsers/format_instructions.py +6 -8
- langchain/output_parsers/json.py +2 -2
- langchain/output_parsers/list.py +2 -2
- langchain/output_parsers/loading.py +9 -9
- langchain/output_parsers/openai_functions.py +3 -3
- langchain/output_parsers/openai_tools.py +1 -1
- langchain/output_parsers/pandas_dataframe.py +43 -47
- langchain/output_parsers/prompts.py +1 -2
- langchain/output_parsers/rail_parser.py +1 -1
- langchain/output_parsers/regex.py +7 -8
- langchain/output_parsers/regex_dict.py +7 -10
- langchain/output_parsers/retry.py +77 -78
- langchain/output_parsers/structured.py +11 -6
- langchain/output_parsers/yaml.py +15 -11
- langchain/prompts/__init__.py +5 -3
- langchain/prompts/base.py +5 -5
- langchain/prompts/chat.py +8 -8
- langchain/prompts/example_selector/__init__.py +3 -1
- langchain/prompts/example_selector/semantic_similarity.py +2 -2
- langchain/prompts/few_shot.py +1 -1
- langchain/prompts/loading.py +3 -3
- langchain/prompts/prompt.py +1 -1
- langchain/retrievers/__init__.py +5 -5
- langchain/retrievers/bedrock.py +2 -2
- langchain/retrievers/bm25.py +1 -1
- langchain/retrievers/contextual_compression.py +14 -8
- langchain/retrievers/docarray.py +1 -1
- langchain/retrievers/document_compressors/__init__.py +5 -4
- langchain/retrievers/document_compressors/base.py +12 -6
- langchain/retrievers/document_compressors/chain_extract.py +2 -2
- langchain/retrievers/document_compressors/chain_extract_prompt.py +2 -3
- langchain/retrievers/document_compressors/chain_filter.py +9 -9
- langchain/retrievers/document_compressors/chain_filter_prompt.py +1 -2
- langchain/retrievers/document_compressors/cohere_rerank.py +15 -15
- langchain/retrievers/document_compressors/embeddings_filter.py +21 -17
- langchain/retrievers/document_compressors/flashrank_rerank.py +1 -1
- langchain/retrievers/document_compressors/listwise_rerank.py +7 -5
- langchain/retrievers/ensemble.py +28 -25
- langchain/retrievers/google_cloud_documentai_warehouse.py +1 -1
- langchain/retrievers/google_vertex_ai_search.py +2 -2
- langchain/retrievers/kendra.py +10 -10
- langchain/retrievers/llama_index.py +1 -1
- langchain/retrievers/merger_retriever.py +11 -11
- langchain/retrievers/milvus.py +1 -1
- langchain/retrievers/multi_query.py +32 -26
- langchain/retrievers/multi_vector.py +20 -8
- langchain/retrievers/parent_document_retriever.py +18 -9
- langchain/retrievers/re_phraser.py +6 -5
- langchain/retrievers/self_query/base.py +138 -127
- langchain/retrievers/time_weighted_retriever.py +18 -7
- langchain/retrievers/zilliz.py +1 -1
- langchain/runnables/openai_functions.py +6 -2
- langchain/schema/__init__.py +23 -23
- langchain/schema/cache.py +1 -1
- langchain/schema/callbacks/base.py +7 -7
- langchain/schema/callbacks/manager.py +19 -19
- langchain/schema/callbacks/tracers/base.py +1 -1
- langchain/schema/callbacks/tracers/evaluation.py +1 -1
- langchain/schema/callbacks/tracers/langchain.py +1 -1
- langchain/schema/callbacks/tracers/langchain_v1.py +1 -1
- langchain/schema/callbacks/tracers/log_stream.py +1 -1
- langchain/schema/callbacks/tracers/schemas.py +8 -8
- langchain/schema/callbacks/tracers/stdout.py +3 -3
- langchain/schema/document.py +1 -1
- langchain/schema/language_model.py +2 -2
- langchain/schema/messages.py +12 -12
- langchain/schema/output.py +3 -3
- langchain/schema/output_parser.py +3 -3
- langchain/schema/runnable/__init__.py +3 -3
- langchain/schema/runnable/base.py +9 -9
- langchain/schema/runnable/config.py +5 -5
- langchain/schema/runnable/configurable.py +1 -1
- langchain/schema/runnable/history.py +1 -1
- langchain/schema/runnable/passthrough.py +1 -1
- langchain/schema/runnable/utils.py +16 -16
- langchain/schema/vectorstore.py +1 -1
- langchain/smith/__init__.py +1 -1
- langchain/smith/evaluation/__init__.py +2 -2
- langchain/smith/evaluation/config.py +10 -7
- langchain/smith/evaluation/name_generation.py +3 -3
- langchain/smith/evaluation/progress.py +11 -2
- langchain/smith/evaluation/runner_utils.py +179 -127
- langchain/smith/evaluation/string_run_evaluator.py +75 -68
- langchain/storage/__init__.py +2 -2
- langchain/storage/_lc_store.py +4 -2
- langchain/storage/encoder_backed.py +6 -2
- langchain/storage/file_system.py +19 -16
- langchain/storage/in_memory.py +1 -1
- langchain/storage/upstash_redis.py +1 -1
- langchain/text_splitter.py +15 -15
- langchain/tools/__init__.py +28 -26
- langchain/tools/ainetwork/app.py +1 -1
- langchain/tools/ainetwork/base.py +1 -1
- langchain/tools/ainetwork/owner.py +1 -1
- langchain/tools/ainetwork/rule.py +1 -1
- langchain/tools/ainetwork/transfer.py +1 -1
- langchain/tools/ainetwork/value.py +1 -1
- langchain/tools/amadeus/closest_airport.py +1 -1
- langchain/tools/amadeus/flight_search.py +1 -1
- langchain/tools/azure_cognitive_services/__init__.py +1 -1
- langchain/tools/base.py +4 -4
- langchain/tools/bearly/tool.py +1 -1
- langchain/tools/bing_search/__init__.py +1 -1
- langchain/tools/bing_search/tool.py +1 -1
- langchain/tools/dataforseo_api_search/__init__.py +1 -1
- langchain/tools/dataforseo_api_search/tool.py +1 -1
- langchain/tools/ddg_search/tool.py +1 -1
- langchain/tools/e2b_data_analysis/tool.py +2 -2
- langchain/tools/edenai/__init__.py +1 -1
- langchain/tools/file_management/__init__.py +1 -1
- langchain/tools/file_management/copy.py +1 -1
- langchain/tools/file_management/delete.py +1 -1
- langchain/tools/gmail/__init__.py +2 -2
- langchain/tools/gmail/get_message.py +1 -1
- langchain/tools/gmail/search.py +1 -1
- langchain/tools/gmail/send_message.py +1 -1
- langchain/tools/google_finance/__init__.py +1 -1
- langchain/tools/google_finance/tool.py +1 -1
- langchain/tools/google_scholar/__init__.py +1 -1
- langchain/tools/google_scholar/tool.py +1 -1
- langchain/tools/google_search/__init__.py +1 -1
- langchain/tools/google_search/tool.py +1 -1
- langchain/tools/google_serper/__init__.py +1 -1
- langchain/tools/google_serper/tool.py +1 -1
- langchain/tools/google_trends/__init__.py +1 -1
- langchain/tools/google_trends/tool.py +1 -1
- langchain/tools/jira/tool.py +20 -1
- langchain/tools/json/tool.py +25 -3
- langchain/tools/memorize/tool.py +1 -1
- langchain/tools/multion/__init__.py +1 -1
- langchain/tools/multion/update_session.py +1 -1
- langchain/tools/office365/__init__.py +2 -2
- langchain/tools/office365/events_search.py +1 -1
- langchain/tools/office365/messages_search.py +1 -1
- langchain/tools/office365/send_event.py +1 -1
- langchain/tools/office365/send_message.py +1 -1
- langchain/tools/openapi/utils/api_models.py +6 -6
- langchain/tools/playwright/__init__.py +5 -5
- langchain/tools/playwright/click.py +1 -1
- langchain/tools/playwright/extract_hyperlinks.py +1 -1
- langchain/tools/playwright/get_elements.py +1 -1
- langchain/tools/playwright/navigate.py +1 -1
- langchain/tools/plugin.py +2 -2
- langchain/tools/powerbi/tool.py +1 -1
- langchain/tools/python/__init__.py +2 -1
- langchain/tools/reddit_search/tool.py +1 -1
- langchain/tools/render.py +2 -2
- langchain/tools/requests/tool.py +2 -2
- langchain/tools/searchapi/tool.py +1 -1
- langchain/tools/searx_search/tool.py +1 -1
- langchain/tools/slack/get_message.py +1 -1
- langchain/tools/spark_sql/tool.py +1 -1
- langchain/tools/sql_database/tool.py +1 -1
- langchain/tools/tavily_search/__init__.py +1 -1
- langchain/tools/tavily_search/tool.py +1 -1
- langchain/tools/zapier/__init__.py +1 -1
- langchain/tools/zapier/tool.py +24 -2
- langchain/utilities/__init__.py +4 -4
- langchain/utilities/arcee.py +4 -4
- langchain/utilities/clickup.py +4 -4
- langchain/utilities/dalle_image_generator.py +1 -1
- langchain/utilities/dataforseo_api_search.py +1 -1
- langchain/utilities/opaqueprompts.py +1 -1
- langchain/utilities/reddit_search.py +1 -1
- langchain/utilities/sql_database.py +1 -1
- langchain/utilities/tavily_search.py +1 -1
- langchain/utilities/vertexai.py +2 -2
- langchain/utils/__init__.py +1 -1
- langchain/utils/aiter.py +1 -1
- langchain/utils/html.py +3 -3
- langchain/utils/input.py +1 -1
- langchain/utils/iter.py +1 -1
- langchain/utils/json_schema.py +1 -3
- langchain/utils/strings.py +1 -1
- langchain/utils/utils.py +6 -6
- langchain/vectorstores/__init__.py +5 -5
- langchain/vectorstores/alibabacloud_opensearch.py +1 -1
- langchain/vectorstores/azure_cosmos_db.py +1 -1
- langchain/vectorstores/clickhouse.py +1 -1
- langchain/vectorstores/elastic_vector_search.py +1 -1
- langchain/vectorstores/elasticsearch.py +2 -2
- langchain/vectorstores/myscale.py +1 -1
- langchain/vectorstores/neo4j_vector.py +1 -1
- langchain/vectorstores/pgembedding.py +1 -1
- langchain/vectorstores/qdrant.py +1 -1
- langchain/vectorstores/redis/__init__.py +1 -1
- langchain/vectorstores/redis/base.py +1 -1
- langchain/vectorstores/redis/filters.py +4 -4
- langchain/vectorstores/redis/schema.py +6 -6
- langchain/vectorstores/sklearn.py +2 -2
- langchain/vectorstores/starrocks.py +1 -1
- langchain/vectorstores/utils.py +1 -1
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/METADATA +4 -4
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/RECORD +580 -580
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/WHEEL +1 -1
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/entry_points.txt +0 -0
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,6 +11,7 @@ from langchain_core.callbacks import Callbacks
|
|
|
11
11
|
from langchain_core.language_models import BaseLanguageModel
|
|
12
12
|
from langchain_core.prompts import PromptTemplate
|
|
13
13
|
from pydantic import ConfigDict
|
|
14
|
+
from typing_extensions import override
|
|
14
15
|
|
|
15
16
|
from langchain.chains.llm import LLMChain
|
|
16
17
|
from langchain.evaluation.qa.eval_prompt import CONTEXT_PROMPT, COT_PROMPT, PROMPT
|
|
@@ -23,7 +24,7 @@ def _get_score(text: str) -> Optional[tuple[str, int]]:
|
|
|
23
24
|
if match:
|
|
24
25
|
if match.group(1).upper() == "CORRECT":
|
|
25
26
|
return "CORRECT", 1
|
|
26
|
-
|
|
27
|
+
if match.group(1).upper() == "INCORRECT":
|
|
27
28
|
return "INCORRECT", 0
|
|
28
29
|
try:
|
|
29
30
|
first_word = (
|
|
@@ -31,7 +32,7 @@ def _get_score(text: str) -> Optional[tuple[str, int]]:
|
|
|
31
32
|
)
|
|
32
33
|
if first_word.upper() == "CORRECT":
|
|
33
34
|
return "CORRECT", 1
|
|
34
|
-
|
|
35
|
+
if first_word.upper() == "INCORRECT":
|
|
35
36
|
return "INCORRECT", 0
|
|
36
37
|
last_word = (
|
|
37
38
|
text.strip()
|
|
@@ -40,7 +41,7 @@ def _get_score(text: str) -> Optional[tuple[str, int]]:
|
|
|
40
41
|
)
|
|
41
42
|
if last_word.upper() == "CORRECT":
|
|
42
43
|
return "CORRECT", 1
|
|
43
|
-
|
|
44
|
+
if last_word.upper() == "INCORRECT":
|
|
44
45
|
return "INCORRECT", 0
|
|
45
46
|
except IndexError:
|
|
46
47
|
pass
|
|
@@ -119,10 +120,11 @@ class QAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
|
|
|
119
120
|
prompt = prompt or PROMPT
|
|
120
121
|
expected_input_vars = {"query", "answer", "result"}
|
|
121
122
|
if expected_input_vars != set(prompt.input_variables):
|
|
122
|
-
|
|
123
|
+
msg = (
|
|
123
124
|
f"Input variables should be {expected_input_vars}, "
|
|
124
125
|
f"but got {prompt.input_variables}"
|
|
125
126
|
)
|
|
127
|
+
raise ValueError(msg)
|
|
126
128
|
return cls(llm=llm, prompt=prompt, **kwargs)
|
|
127
129
|
|
|
128
130
|
def evaluate(
|
|
@@ -153,6 +155,7 @@ class QAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
|
|
|
153
155
|
parsed_result[RUN_KEY] = result[RUN_KEY]
|
|
154
156
|
return parsed_result
|
|
155
157
|
|
|
158
|
+
@override
|
|
156
159
|
def _evaluate_strings(
|
|
157
160
|
self,
|
|
158
161
|
*,
|
|
@@ -188,6 +191,7 @@ class QAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
|
|
|
188
191
|
)
|
|
189
192
|
return self._prepare_output(result)
|
|
190
193
|
|
|
194
|
+
@override
|
|
191
195
|
async def _aevaluate_strings(
|
|
192
196
|
self,
|
|
193
197
|
*,
|
|
@@ -231,10 +235,11 @@ class ContextQAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
|
|
|
231
235
|
def _validate_input_vars(cls, prompt: PromptTemplate) -> None:
|
|
232
236
|
expected_input_vars = {"query", "context", "result"}
|
|
233
237
|
if expected_input_vars != set(prompt.input_variables):
|
|
234
|
-
|
|
238
|
+
msg = (
|
|
235
239
|
f"Input variables should be {expected_input_vars}, "
|
|
236
240
|
f"but got {prompt.input_variables}"
|
|
237
241
|
)
|
|
242
|
+
raise ValueError(msg)
|
|
238
243
|
|
|
239
244
|
@property
|
|
240
245
|
def evaluation_name(self) -> str:
|
|
@@ -294,6 +299,7 @@ class ContextQAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
|
|
|
294
299
|
parsed_result[RUN_KEY] = result[RUN_KEY]
|
|
295
300
|
return parsed_result
|
|
296
301
|
|
|
302
|
+
@override
|
|
297
303
|
def _evaluate_strings(
|
|
298
304
|
self,
|
|
299
305
|
*,
|
|
@@ -315,6 +321,7 @@ class ContextQAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
|
|
|
315
321
|
)
|
|
316
322
|
return self._prepare_output(result)
|
|
317
323
|
|
|
324
|
+
@override
|
|
318
325
|
async def _aevaluate_strings(
|
|
319
326
|
self,
|
|
320
327
|
*,
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# flake8: noqa
|
|
2
1
|
from langchain_core.prompts import PromptTemplate
|
|
3
2
|
|
|
4
3
|
template = """You are a teacher grading a quiz.
|
|
@@ -10,12 +9,12 @@ STUDENT ANSWER: student's answer here
|
|
|
10
9
|
TRUE ANSWER: true answer here
|
|
11
10
|
GRADE: CORRECT or INCORRECT here
|
|
12
11
|
|
|
13
|
-
Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. Begin!
|
|
12
|
+
Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. Begin!
|
|
14
13
|
|
|
15
14
|
QUESTION: {query}
|
|
16
15
|
STUDENT ANSWER: {result}
|
|
17
16
|
TRUE ANSWER: {answer}
|
|
18
|
-
GRADE:"""
|
|
17
|
+
GRADE:""" # noqa: E501
|
|
19
18
|
PROMPT = PromptTemplate(
|
|
20
19
|
input_variables=["query", "result", "answer"], template=template
|
|
21
20
|
)
|
|
@@ -29,12 +28,12 @@ CONTEXT: context the question is about here
|
|
|
29
28
|
STUDENT ANSWER: student's answer here
|
|
30
29
|
GRADE: CORRECT or INCORRECT here
|
|
31
30
|
|
|
32
|
-
Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. Begin!
|
|
31
|
+
Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. Begin!
|
|
33
32
|
|
|
34
33
|
QUESTION: {query}
|
|
35
34
|
CONTEXT: {context}
|
|
36
35
|
STUDENT ANSWER: {result}
|
|
37
|
-
GRADE:"""
|
|
36
|
+
GRADE:""" # noqa: E501
|
|
38
37
|
CONTEXT_PROMPT = PromptTemplate(
|
|
39
38
|
input_variables=["query", "context", "result"], template=context_template
|
|
40
39
|
)
|
|
@@ -51,12 +50,12 @@ STUDENT ANSWER: student's answer here
|
|
|
51
50
|
EXPLANATION: step by step reasoning here
|
|
52
51
|
GRADE: CORRECT or INCORRECT here
|
|
53
52
|
|
|
54
|
-
Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. Begin!
|
|
53
|
+
Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. Begin!
|
|
55
54
|
|
|
56
55
|
QUESTION: {query}
|
|
57
56
|
CONTEXT: {context}
|
|
58
57
|
STUDENT ANSWER: {result}
|
|
59
|
-
EXPLANATION:"""
|
|
58
|
+
EXPLANATION:""" # noqa: E501
|
|
60
59
|
COT_PROMPT = PromptTemplate(
|
|
61
60
|
input_variables=["query", "context", "result"], template=cot_template
|
|
62
61
|
)
|
|
@@ -72,7 +71,7 @@ template = """You are comparing a submitted answer to an expert answer on a give
|
|
|
72
71
|
[Submission]: {result}
|
|
73
72
|
***
|
|
74
73
|
[END DATA]
|
|
75
|
-
Compare the content and correctness of the submitted SQL with the expert answer. Ignore any differences in whitespace, style, or output column names. The submitted answer may either be correct or incorrect. Determine which case applies. First, explain in detail the similarities or differences between the expert answer and the submission, ignoring superficial aspects such as whitespace, style or output column names. Do not state the final answer in your initial explanation. Then, respond with either "CORRECT" or "INCORRECT" (without quotes or punctuation) on its own line. This should correspond to whether the submitted SQL and the expert answer are semantically the same or different, respectively. Then, repeat your final answer on a new line."""
|
|
74
|
+
Compare the content and correctness of the submitted SQL with the expert answer. Ignore any differences in whitespace, style, or output column names. The submitted answer may either be correct or incorrect. Determine which case applies. First, explain in detail the similarities or differences between the expert answer and the submission, ignoring superficial aspects such as whitespace, style or output column names. Do not state the final answer in your initial explanation. Then, respond with either "CORRECT" or "INCORRECT" (without quotes or punctuation) on its own line. This should correspond to whether the submitted SQL and the expert answer are semantically the same or different, respectively. Then, repeat your final answer on a new line.""" # noqa: E501
|
|
76
75
|
|
|
77
76
|
SQL_PROMPT = PromptTemplate(
|
|
78
77
|
input_variables=["query", "answer", "result"], template=template
|
|
@@ -13,7 +13,8 @@ from langchain.evaluation.qa.generate_prompt import PROMPT
|
|
|
13
13
|
from langchain.output_parsers.regex import RegexParser
|
|
14
14
|
|
|
15
15
|
_QA_OUTPUT_PARSER = RegexParser(
|
|
16
|
-
regex=r"QUESTION: (.*?)\n+ANSWER: (.*)",
|
|
16
|
+
regex=r"QUESTION: (.*?)\n+ANSWER: (.*)",
|
|
17
|
+
output_keys=["query", "answer"],
|
|
17
18
|
)
|
|
18
19
|
|
|
19
20
|
|
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
# flake8: noqa
|
|
2
|
-
from langchain.output_parsers.regex import RegexParser
|
|
3
1
|
from langchain_core.prompts import PromptTemplate
|
|
4
2
|
|
|
5
|
-
template = """You are a teacher coming up with questions to ask on a quiz.
|
|
3
|
+
template = """You are a teacher coming up with questions to ask on a quiz.
|
|
6
4
|
Given the following document, please generate a question and answer based on that document.
|
|
7
5
|
|
|
8
6
|
Example Format:
|
|
@@ -16,7 +14,7 @@ These questions should be detailed and be based explicitly on information in the
|
|
|
16
14
|
|
|
17
15
|
<Begin Document>
|
|
18
16
|
{doc}
|
|
19
|
-
<End Document>"""
|
|
17
|
+
<End Document>""" # noqa: E501
|
|
20
18
|
PROMPT = PromptTemplate(
|
|
21
19
|
input_variables=["doc"],
|
|
22
20
|
template=template,
|
langchain/evaluation/schema.py
CHANGED
|
@@ -34,7 +34,7 @@ class EvaluatorType(str, Enum):
|
|
|
34
34
|
"""The pairwise string evaluator, which predicts the preferred prediction from
|
|
35
35
|
between two models."""
|
|
36
36
|
SCORE_STRING = "score_string"
|
|
37
|
-
"""The scored string evaluator, which gives a score between 1 and 10
|
|
37
|
+
"""The scored string evaluator, which gives a score between 1 and 10
|
|
38
38
|
to a prediction."""
|
|
39
39
|
LABELED_PAIRWISE_STRING = "labeled_pairwise_string"
|
|
40
40
|
"""The labeled pairwise string evaluator, which predicts the preferred prediction
|
|
@@ -109,25 +109,27 @@ class _EvalArgsMixin:
|
|
|
109
109
|
def _check_evaluation_args(
|
|
110
110
|
self,
|
|
111
111
|
reference: Optional[str] = None,
|
|
112
|
-
|
|
112
|
+
input_: Optional[str] = None,
|
|
113
113
|
) -> None:
|
|
114
114
|
"""Check if the evaluation arguments are valid.
|
|
115
115
|
|
|
116
116
|
Args:
|
|
117
117
|
reference (Optional[str], optional): The reference label.
|
|
118
|
-
|
|
118
|
+
input_ (Optional[str], optional): The input string.
|
|
119
119
|
Raises:
|
|
120
120
|
ValueError: If the evaluator requires an input string but none is provided,
|
|
121
121
|
or if the evaluator requires a reference label but none is provided.
|
|
122
122
|
"""
|
|
123
|
-
if self.requires_input and
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
123
|
+
if self.requires_input and input_ is None:
|
|
124
|
+
msg = f"{self.__class__.__name__} requires an input string."
|
|
125
|
+
raise ValueError(msg)
|
|
126
|
+
if input_ is not None and not self.requires_input:
|
|
127
|
+
warn(self._skip_input_warning, stacklevel=3)
|
|
127
128
|
if self.requires_reference and reference is None:
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
129
|
+
msg = f"{self.__class__.__name__} requires a reference string."
|
|
130
|
+
raise ValueError(msg)
|
|
131
|
+
if reference is not None and not self.requires_reference:
|
|
132
|
+
warn(self._skip_reference_warning, stacklevel=3)
|
|
131
133
|
|
|
132
134
|
|
|
133
135
|
class StringEvaluator(_EvalArgsMixin, ABC):
|
|
@@ -150,7 +152,7 @@ class StringEvaluator(_EvalArgsMixin, ABC):
|
|
|
150
152
|
*,
|
|
151
153
|
prediction: Union[str, Any],
|
|
152
154
|
reference: Optional[Union[str, Any]] = None,
|
|
153
|
-
input: Optional[Union[str, Any]] = None,
|
|
155
|
+
input: Optional[Union[str, Any]] = None, # noqa: A002
|
|
154
156
|
**kwargs: Any,
|
|
155
157
|
) -> dict:
|
|
156
158
|
"""Evaluate Chain or LLM output, based on optional input and label.
|
|
@@ -173,7 +175,7 @@ class StringEvaluator(_EvalArgsMixin, ABC):
|
|
|
173
175
|
*,
|
|
174
176
|
prediction: Union[str, Any],
|
|
175
177
|
reference: Optional[Union[str, Any]] = None,
|
|
176
|
-
input: Optional[Union[str, Any]] = None,
|
|
178
|
+
input: Optional[Union[str, Any]] = None, # noqa: A002
|
|
177
179
|
**kwargs: Any,
|
|
178
180
|
) -> dict:
|
|
179
181
|
"""Asynchronously evaluate Chain or LLM output, based on optional input and label.
|
|
@@ -204,7 +206,7 @@ class StringEvaluator(_EvalArgsMixin, ABC):
|
|
|
204
206
|
*,
|
|
205
207
|
prediction: str,
|
|
206
208
|
reference: Optional[str] = None,
|
|
207
|
-
input: Optional[str] = None,
|
|
209
|
+
input: Optional[str] = None, # noqa: A002
|
|
208
210
|
**kwargs: Any,
|
|
209
211
|
) -> dict:
|
|
210
212
|
"""Evaluate Chain or LLM output, based on optional input and label.
|
|
@@ -217,9 +219,12 @@ class StringEvaluator(_EvalArgsMixin, ABC):
|
|
|
217
219
|
Returns:
|
|
218
220
|
dict: The evaluation results containing the score or value.
|
|
219
221
|
""" # noqa: E501
|
|
220
|
-
self._check_evaluation_args(reference=reference,
|
|
222
|
+
self._check_evaluation_args(reference=reference, input_=input)
|
|
221
223
|
return self._evaluate_strings(
|
|
222
|
-
prediction=prediction,
|
|
224
|
+
prediction=prediction,
|
|
225
|
+
reference=reference,
|
|
226
|
+
input=input,
|
|
227
|
+
**kwargs,
|
|
223
228
|
)
|
|
224
229
|
|
|
225
230
|
async def aevaluate_strings(
|
|
@@ -227,7 +232,7 @@ class StringEvaluator(_EvalArgsMixin, ABC):
|
|
|
227
232
|
*,
|
|
228
233
|
prediction: str,
|
|
229
234
|
reference: Optional[str] = None,
|
|
230
|
-
input: Optional[str] = None,
|
|
235
|
+
input: Optional[str] = None, # noqa: A002
|
|
231
236
|
**kwargs: Any,
|
|
232
237
|
) -> dict:
|
|
233
238
|
"""Asynchronously evaluate Chain or LLM output, based on optional input and label.
|
|
@@ -240,9 +245,12 @@ class StringEvaluator(_EvalArgsMixin, ABC):
|
|
|
240
245
|
Returns:
|
|
241
246
|
dict: The evaluation results containing the score or value.
|
|
242
247
|
""" # noqa: E501
|
|
243
|
-
self._check_evaluation_args(reference=reference,
|
|
248
|
+
self._check_evaluation_args(reference=reference, input_=input)
|
|
244
249
|
return await self._aevaluate_strings(
|
|
245
|
-
prediction=prediction,
|
|
250
|
+
prediction=prediction,
|
|
251
|
+
reference=reference,
|
|
252
|
+
input=input,
|
|
253
|
+
**kwargs,
|
|
246
254
|
)
|
|
247
255
|
|
|
248
256
|
|
|
@@ -256,7 +264,7 @@ class PairwiseStringEvaluator(_EvalArgsMixin, ABC):
|
|
|
256
264
|
prediction: str,
|
|
257
265
|
prediction_b: str,
|
|
258
266
|
reference: Optional[str] = None,
|
|
259
|
-
input: Optional[str] = None,
|
|
267
|
+
input: Optional[str] = None, # noqa: A002
|
|
260
268
|
**kwargs: Any,
|
|
261
269
|
) -> dict:
|
|
262
270
|
"""Evaluate the output string pairs.
|
|
@@ -277,7 +285,7 @@ class PairwiseStringEvaluator(_EvalArgsMixin, ABC):
|
|
|
277
285
|
prediction: str,
|
|
278
286
|
prediction_b: str,
|
|
279
287
|
reference: Optional[str] = None,
|
|
280
|
-
input: Optional[str] = None,
|
|
288
|
+
input: Optional[str] = None, # noqa: A002
|
|
281
289
|
**kwargs: Any,
|
|
282
290
|
) -> dict:
|
|
283
291
|
"""Asynchronously evaluate the output string pairs.
|
|
@@ -307,7 +315,7 @@ class PairwiseStringEvaluator(_EvalArgsMixin, ABC):
|
|
|
307
315
|
prediction: str,
|
|
308
316
|
prediction_b: str,
|
|
309
317
|
reference: Optional[str] = None,
|
|
310
|
-
input: Optional[str] = None,
|
|
318
|
+
input: Optional[str] = None, # noqa: A002
|
|
311
319
|
**kwargs: Any,
|
|
312
320
|
) -> dict:
|
|
313
321
|
"""Evaluate the output string pairs.
|
|
@@ -321,7 +329,7 @@ class PairwiseStringEvaluator(_EvalArgsMixin, ABC):
|
|
|
321
329
|
Returns:
|
|
322
330
|
dict: A dictionary containing the preference, scores, and/or other information.
|
|
323
331
|
""" # noqa: E501
|
|
324
|
-
self._check_evaluation_args(reference=reference,
|
|
332
|
+
self._check_evaluation_args(reference=reference, input_=input)
|
|
325
333
|
return self._evaluate_string_pairs(
|
|
326
334
|
prediction=prediction,
|
|
327
335
|
prediction_b=prediction_b,
|
|
@@ -336,7 +344,7 @@ class PairwiseStringEvaluator(_EvalArgsMixin, ABC):
|
|
|
336
344
|
prediction: str,
|
|
337
345
|
prediction_b: str,
|
|
338
346
|
reference: Optional[str] = None,
|
|
339
|
-
input: Optional[str] = None,
|
|
347
|
+
input: Optional[str] = None, # noqa: A002
|
|
340
348
|
**kwargs: Any,
|
|
341
349
|
) -> dict:
|
|
342
350
|
"""Asynchronously evaluate the output string pairs.
|
|
@@ -350,7 +358,7 @@ class PairwiseStringEvaluator(_EvalArgsMixin, ABC):
|
|
|
350
358
|
Returns:
|
|
351
359
|
dict: A dictionary containing the preference, scores, and/or other information.
|
|
352
360
|
""" # noqa: E501
|
|
353
|
-
self._check_evaluation_args(reference=reference,
|
|
361
|
+
self._check_evaluation_args(reference=reference, input_=input)
|
|
354
362
|
return await self._aevaluate_string_pairs(
|
|
355
363
|
prediction=prediction,
|
|
356
364
|
prediction_b=prediction_b,
|
|
@@ -374,7 +382,7 @@ class AgentTrajectoryEvaluator(_EvalArgsMixin, ABC):
|
|
|
374
382
|
*,
|
|
375
383
|
prediction: str,
|
|
376
384
|
agent_trajectory: Sequence[tuple[AgentAction, str]],
|
|
377
|
-
input: str,
|
|
385
|
+
input: str, # noqa: A002
|
|
378
386
|
reference: Optional[str] = None,
|
|
379
387
|
**kwargs: Any,
|
|
380
388
|
) -> dict:
|
|
@@ -396,7 +404,7 @@ class AgentTrajectoryEvaluator(_EvalArgsMixin, ABC):
|
|
|
396
404
|
*,
|
|
397
405
|
prediction: str,
|
|
398
406
|
agent_trajectory: Sequence[tuple[AgentAction, str]],
|
|
399
|
-
input: str,
|
|
407
|
+
input: str, # noqa: A002
|
|
400
408
|
reference: Optional[str] = None,
|
|
401
409
|
**kwargs: Any,
|
|
402
410
|
) -> dict:
|
|
@@ -427,7 +435,7 @@ class AgentTrajectoryEvaluator(_EvalArgsMixin, ABC):
|
|
|
427
435
|
*,
|
|
428
436
|
prediction: str,
|
|
429
437
|
agent_trajectory: Sequence[tuple[AgentAction, str]],
|
|
430
|
-
input: str,
|
|
438
|
+
input: str, # noqa: A002
|
|
431
439
|
reference: Optional[str] = None,
|
|
432
440
|
**kwargs: Any,
|
|
433
441
|
) -> dict:
|
|
@@ -443,7 +451,7 @@ class AgentTrajectoryEvaluator(_EvalArgsMixin, ABC):
|
|
|
443
451
|
Returns:
|
|
444
452
|
dict: The evaluation result.
|
|
445
453
|
"""
|
|
446
|
-
self._check_evaluation_args(reference=reference,
|
|
454
|
+
self._check_evaluation_args(reference=reference, input_=input)
|
|
447
455
|
return self._evaluate_agent_trajectory(
|
|
448
456
|
prediction=prediction,
|
|
449
457
|
input=input,
|
|
@@ -457,7 +465,7 @@ class AgentTrajectoryEvaluator(_EvalArgsMixin, ABC):
|
|
|
457
465
|
*,
|
|
458
466
|
prediction: str,
|
|
459
467
|
agent_trajectory: Sequence[tuple[AgentAction, str]],
|
|
460
|
-
input: str,
|
|
468
|
+
input: str, # noqa: A002
|
|
461
469
|
reference: Optional[str] = None,
|
|
462
470
|
**kwargs: Any,
|
|
463
471
|
) -> dict:
|
|
@@ -473,7 +481,7 @@ class AgentTrajectoryEvaluator(_EvalArgsMixin, ABC):
|
|
|
473
481
|
Returns:
|
|
474
482
|
dict: The evaluation result.
|
|
475
483
|
"""
|
|
476
|
-
self._check_evaluation_args(reference=reference,
|
|
484
|
+
self._check_evaluation_args(reference=reference, input_=input)
|
|
477
485
|
return await self._aevaluate_agent_trajectory(
|
|
478
486
|
prediction=prediction,
|
|
479
487
|
input=input,
|
|
@@ -11,6 +11,7 @@ from langchain_core.language_models import BaseLanguageModel
|
|
|
11
11
|
from langchain_core.output_parsers import BaseOutputParser
|
|
12
12
|
from langchain_core.prompts.prompt import PromptTemplate
|
|
13
13
|
from pydantic import ConfigDict, Field
|
|
14
|
+
from typing_extensions import override
|
|
14
15
|
|
|
15
16
|
from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple
|
|
16
17
|
from langchain.chains.llm import LLMChain
|
|
@@ -69,7 +70,7 @@ def resolve_criteria(
|
|
|
69
70
|
Criteria.DEPTH,
|
|
70
71
|
]
|
|
71
72
|
return {k.value: _SUPPORTED_CRITERIA[k] for k in _default_criteria}
|
|
72
|
-
|
|
73
|
+
if isinstance(criteria, Criteria):
|
|
73
74
|
criteria_ = {criteria.value: _SUPPORTED_CRITERIA[criteria]}
|
|
74
75
|
elif isinstance(criteria, str):
|
|
75
76
|
if criteria in _SUPPORTED_CRITERIA:
|
|
@@ -86,11 +87,12 @@ def resolve_criteria(
|
|
|
86
87
|
}
|
|
87
88
|
else:
|
|
88
89
|
if not criteria:
|
|
89
|
-
|
|
90
|
+
msg = (
|
|
90
91
|
"Criteria cannot be empty. "
|
|
91
92
|
"Please provide a criterion name or a mapping of the criterion name"
|
|
92
93
|
" to its description."
|
|
93
94
|
)
|
|
95
|
+
raise ValueError(msg)
|
|
94
96
|
criteria_ = dict(criteria)
|
|
95
97
|
return criteria_
|
|
96
98
|
|
|
@@ -131,12 +133,13 @@ class ScoreStringResultOutputParser(BaseOutputParser[dict]):
|
|
|
131
133
|
if match:
|
|
132
134
|
verdict = match.group(1)
|
|
133
135
|
|
|
134
|
-
if not match or verdict not in list("123456789")
|
|
135
|
-
|
|
136
|
+
if not match or verdict not in [*list("123456789"), "10"]:
|
|
137
|
+
msg = (
|
|
136
138
|
f"Invalid output: {text}. "
|
|
137
139
|
"Output must contain a double bracketed string\
|
|
138
140
|
with the verdict between 1 and 10."
|
|
139
141
|
)
|
|
142
|
+
raise ValueError(msg)
|
|
140
143
|
|
|
141
144
|
return {
|
|
142
145
|
"reasoning": text,
|
|
@@ -172,7 +175,7 @@ class ScoreStringEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
|
|
172
175
|
|
|
173
176
|
output_key: str = "results" #: :meta private:
|
|
174
177
|
output_parser: BaseOutputParser = Field(
|
|
175
|
-
default_factory=ScoreStringResultOutputParser
|
|
178
|
+
default_factory=ScoreStringResultOutputParser,
|
|
176
179
|
)
|
|
177
180
|
normalize_by: Optional[float] = None
|
|
178
181
|
"""The value to normalize the score by, if specified."""
|
|
@@ -259,16 +262,17 @@ class ScoreStringEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
|
|
259
262
|
if not (hasattr(llm, "model_name") and not llm.model_name.startswith("gpt-4")):
|
|
260
263
|
logger.warning(
|
|
261
264
|
"This chain was only tested with GPT-4. \
|
|
262
|
-
Performance may be significantly worse with other models."
|
|
265
|
+
Performance may be significantly worse with other models.",
|
|
263
266
|
)
|
|
264
267
|
|
|
265
268
|
expected_input_vars = {"prediction", "input", "criteria"}
|
|
266
269
|
prompt_ = prompt or SCORING_TEMPLATE.partial(reference="")
|
|
267
270
|
if expected_input_vars != set(prompt_.input_variables):
|
|
268
|
-
|
|
271
|
+
msg = (
|
|
269
272
|
f"Input variables should be {expected_input_vars}, "
|
|
270
273
|
f"but got {prompt_.input_variables}"
|
|
271
274
|
)
|
|
275
|
+
raise ValueError(msg)
|
|
272
276
|
criteria_ = resolve_criteria(criteria)
|
|
273
277
|
criteria_str = "\n".join(
|
|
274
278
|
f"{k}: {v}" if v else k for k, v in criteria_.items()
|
|
@@ -289,7 +293,7 @@ Performance may be significantly worse with other models."
|
|
|
289
293
|
def _prepare_input(
|
|
290
294
|
self,
|
|
291
295
|
prediction: str,
|
|
292
|
-
|
|
296
|
+
input_: Optional[str],
|
|
293
297
|
reference: Optional[str],
|
|
294
298
|
) -> dict:
|
|
295
299
|
"""Prepare the input for the chain.
|
|
@@ -297,20 +301,20 @@ Performance may be significantly worse with other models."
|
|
|
297
301
|
Args:
|
|
298
302
|
prediction (str): The output string from the first model.
|
|
299
303
|
prediction_b (str): The output string from the second model.
|
|
300
|
-
|
|
304
|
+
input_ (str, optional): The input or task string.
|
|
301
305
|
reference (str, optional): The reference string, if any.
|
|
302
306
|
|
|
303
307
|
Returns:
|
|
304
308
|
dict: The prepared input for the chain.
|
|
305
309
|
|
|
306
310
|
"""
|
|
307
|
-
|
|
311
|
+
input_dict = {
|
|
308
312
|
"prediction": prediction,
|
|
309
|
-
"input":
|
|
313
|
+
"input": input_,
|
|
310
314
|
}
|
|
311
315
|
if self.requires_reference:
|
|
312
|
-
|
|
313
|
-
return
|
|
316
|
+
input_dict["reference"] = reference
|
|
317
|
+
return input_dict
|
|
314
318
|
|
|
315
319
|
def _prepare_output(self, result: dict) -> dict:
|
|
316
320
|
"""Prepare the output."""
|
|
@@ -321,6 +325,7 @@ Performance may be significantly worse with other models."
|
|
|
321
325
|
parsed["score"] = parsed["score"] / self.normalize_by
|
|
322
326
|
return parsed
|
|
323
327
|
|
|
328
|
+
@override
|
|
324
329
|
def _evaluate_strings(
|
|
325
330
|
self,
|
|
326
331
|
*,
|
|
@@ -358,7 +363,8 @@ Performance may be significantly worse with other models."
|
|
|
358
363
|
)
|
|
359
364
|
return self._prepare_output(result)
|
|
360
365
|
|
|
361
|
-
|
|
366
|
+
@override
|
|
367
|
+
async def _aevaluate_strings(
|
|
362
368
|
self,
|
|
363
369
|
*,
|
|
364
370
|
prediction: str,
|
|
@@ -448,10 +454,11 @@ class LabeledScoreStringEvalChain(ScoreStringEvalChain):
|
|
|
448
454
|
}
|
|
449
455
|
prompt_ = prompt or SCORING_TEMPLATE_WITH_REFERENCE
|
|
450
456
|
if expected_input_vars != set(prompt_.input_variables):
|
|
451
|
-
|
|
457
|
+
msg = (
|
|
452
458
|
f"Input variables should be {expected_input_vars}, "
|
|
453
459
|
f"but got {prompt_.input_variables}"
|
|
454
460
|
)
|
|
461
|
+
raise ValueError(msg)
|
|
455
462
|
criteria_ = resolve_criteria(criteria)
|
|
456
463
|
criteria_str = "\n".join(f"{k}: {v}" for k, v in criteria_.items()).strip()
|
|
457
464
|
criteria_str = (
|
|
@@ -10,6 +10,7 @@ from langchain_core.callbacks.manager import (
|
|
|
10
10
|
)
|
|
11
11
|
from langchain_core.utils import pre_init
|
|
12
12
|
from pydantic import Field
|
|
13
|
+
from typing_extensions import override
|
|
13
14
|
|
|
14
15
|
from langchain.chains.base import Chain
|
|
15
16
|
from langchain.evaluation.schema import PairwiseStringEvaluator, StringEvaluator
|
|
@@ -28,11 +29,12 @@ def _load_rapidfuzz() -> Any:
|
|
|
28
29
|
"""
|
|
29
30
|
try:
|
|
30
31
|
import rapidfuzz
|
|
31
|
-
except ImportError:
|
|
32
|
-
|
|
32
|
+
except ImportError as e:
|
|
33
|
+
msg = (
|
|
33
34
|
"Please install the rapidfuzz library to use the FuzzyMatchStringEvaluator."
|
|
34
35
|
"Please install it with `pip install rapidfuzz`."
|
|
35
36
|
)
|
|
37
|
+
raise ImportError(msg) from e
|
|
36
38
|
return rapidfuzz.distance
|
|
37
39
|
|
|
38
40
|
|
|
@@ -104,7 +106,7 @@ class _RapidFuzzChainMixin(Chain):
|
|
|
104
106
|
return result
|
|
105
107
|
|
|
106
108
|
@staticmethod
|
|
107
|
-
def _get_metric(distance: str, normalize_score: bool = False) -> Callable:
|
|
109
|
+
def _get_metric(distance: str, *, normalize_score: bool = False) -> Callable:
|
|
108
110
|
"""
|
|
109
111
|
Get the distance metric function based on the distance type.
|
|
110
112
|
|
|
@@ -128,15 +130,15 @@ class _RapidFuzzChainMixin(Chain):
|
|
|
128
130
|
StringDistance.INDEL: rf_distance.Indel,
|
|
129
131
|
}
|
|
130
132
|
if distance not in module_map:
|
|
131
|
-
|
|
133
|
+
msg = (
|
|
132
134
|
f"Invalid distance metric: {distance}"
|
|
133
135
|
f"\nMust be one of: {list(StringDistance)}"
|
|
134
136
|
)
|
|
137
|
+
raise ValueError(msg)
|
|
135
138
|
module = module_map[distance]
|
|
136
139
|
if normalize_score:
|
|
137
140
|
return module.normalized_distance
|
|
138
|
-
|
|
139
|
-
return module.distance
|
|
141
|
+
return module.distance
|
|
140
142
|
|
|
141
143
|
@property
|
|
142
144
|
def metric(self) -> Callable:
|
|
@@ -147,7 +149,8 @@ class _RapidFuzzChainMixin(Chain):
|
|
|
147
149
|
Callable: The distance metric function.
|
|
148
150
|
"""
|
|
149
151
|
return _RapidFuzzChainMixin._get_metric(
|
|
150
|
-
self.distance,
|
|
152
|
+
self.distance,
|
|
153
|
+
normalize_score=self.normalize_score,
|
|
151
154
|
)
|
|
152
155
|
|
|
153
156
|
def compute_metric(self, a: str, b: str) -> float:
|
|
@@ -258,6 +261,7 @@ class StringDistanceEvalChain(StringEvaluator, _RapidFuzzChainMixin):
|
|
|
258
261
|
"""
|
|
259
262
|
return {"score": self.compute_metric(inputs["reference"], inputs["prediction"])}
|
|
260
263
|
|
|
264
|
+
@override
|
|
261
265
|
def _evaluate_strings(
|
|
262
266
|
self,
|
|
263
267
|
*,
|
|
@@ -293,6 +297,7 @@ class StringDistanceEvalChain(StringEvaluator, _RapidFuzzChainMixin):
|
|
|
293
297
|
|
|
294
298
|
return self._prepare_output(result)
|
|
295
299
|
|
|
300
|
+
@override
|
|
296
301
|
async def _aevaluate_strings(
|
|
297
302
|
self,
|
|
298
303
|
*,
|
|
@@ -369,7 +374,7 @@ class PairwiseStringDistanceEvalChain(PairwiseStringEvaluator, _RapidFuzzChainMi
|
|
|
369
374
|
Dict[str, Any]: The evaluation results containing the score.
|
|
370
375
|
"""
|
|
371
376
|
return {
|
|
372
|
-
"score": self.compute_metric(inputs["prediction"], inputs["prediction_b"])
|
|
377
|
+
"score": self.compute_metric(inputs["prediction"], inputs["prediction_b"]),
|
|
373
378
|
}
|
|
374
379
|
|
|
375
380
|
async def _acall(
|
|
@@ -389,7 +394,7 @@ class PairwiseStringDistanceEvalChain(PairwiseStringEvaluator, _RapidFuzzChainMi
|
|
|
389
394
|
Dict[str, Any]: The evaluation results containing the score.
|
|
390
395
|
"""
|
|
391
396
|
return {
|
|
392
|
-
"score": self.compute_metric(inputs["prediction"], inputs["prediction_b"])
|
|
397
|
+
"score": self.compute_metric(inputs["prediction"], inputs["prediction_b"]),
|
|
393
398
|
}
|
|
394
399
|
|
|
395
400
|
def _evaluate_string_pairs(
|