langchain 0.3.26__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain/__init__.py +110 -96
- langchain/_api/__init__.py +2 -2
- langchain/_api/deprecation.py +3 -3
- langchain/_api/module_import.py +51 -46
- langchain/_api/path.py +1 -1
- langchain/adapters/openai.py +8 -8
- langchain/agents/__init__.py +15 -12
- langchain/agents/agent.py +174 -151
- langchain/agents/agent_iterator.py +50 -26
- langchain/agents/agent_toolkits/__init__.py +7 -6
- langchain/agents/agent_toolkits/ainetwork/toolkit.py +1 -1
- langchain/agents/agent_toolkits/amadeus/toolkit.py +1 -1
- langchain/agents/agent_toolkits/azure_cognitive_services.py +1 -1
- langchain/agents/agent_toolkits/clickup/toolkit.py +1 -1
- langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +6 -4
- langchain/agents/agent_toolkits/csv/__init__.py +4 -2
- langchain/agents/agent_toolkits/file_management/__init__.py +1 -1
- langchain/agents/agent_toolkits/file_management/toolkit.py +1 -1
- langchain/agents/agent_toolkits/github/toolkit.py +9 -9
- langchain/agents/agent_toolkits/gitlab/toolkit.py +1 -1
- langchain/agents/agent_toolkits/json/base.py +1 -1
- langchain/agents/agent_toolkits/multion/toolkit.py +1 -1
- langchain/agents/agent_toolkits/office365/toolkit.py +1 -1
- langchain/agents/agent_toolkits/openapi/base.py +1 -1
- langchain/agents/agent_toolkits/openapi/planner.py +2 -2
- langchain/agents/agent_toolkits/openapi/planner_prompt.py +10 -10
- langchain/agents/agent_toolkits/openapi/prompt.py +1 -1
- langchain/agents/agent_toolkits/openapi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/pandas/__init__.py +4 -2
- langchain/agents/agent_toolkits/playwright/__init__.py +1 -1
- langchain/agents/agent_toolkits/playwright/toolkit.py +1 -1
- langchain/agents/agent_toolkits/powerbi/base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/chat_base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/prompt.py +2 -2
- langchain/agents/agent_toolkits/powerbi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/python/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark_sql/base.py +1 -1
- langchain/agents/agent_toolkits/spark_sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/sql/prompt.py +1 -1
- langchain/agents/agent_toolkits/sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/vectorstore/base.py +4 -2
- langchain/agents/agent_toolkits/vectorstore/prompt.py +2 -4
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +12 -11
- langchain/agents/agent_toolkits/xorbits/__init__.py +4 -2
- langchain/agents/agent_toolkits/zapier/toolkit.py +1 -1
- langchain/agents/agent_types.py +6 -6
- langchain/agents/chat/base.py +8 -12
- langchain/agents/chat/output_parser.py +9 -6
- langchain/agents/chat/prompt.py +3 -4
- langchain/agents/conversational/base.py +11 -5
- langchain/agents/conversational/output_parser.py +4 -2
- langchain/agents/conversational/prompt.py +2 -3
- langchain/agents/conversational_chat/base.py +9 -5
- langchain/agents/conversational_chat/output_parser.py +9 -11
- langchain/agents/conversational_chat/prompt.py +5 -6
- langchain/agents/format_scratchpad/__init__.py +3 -3
- langchain/agents/format_scratchpad/log_to_messages.py +1 -1
- langchain/agents/format_scratchpad/openai_functions.py +8 -6
- langchain/agents/format_scratchpad/tools.py +5 -3
- langchain/agents/format_scratchpad/xml.py +33 -2
- langchain/agents/initialize.py +17 -9
- langchain/agents/json_chat/base.py +19 -18
- langchain/agents/json_chat/prompt.py +2 -3
- langchain/agents/load_tools.py +2 -1
- langchain/agents/loading.py +28 -18
- langchain/agents/mrkl/base.py +11 -4
- langchain/agents/mrkl/output_parser.py +17 -13
- langchain/agents/mrkl/prompt.py +1 -2
- langchain/agents/openai_assistant/base.py +81 -71
- langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +2 -0
- langchain/agents/openai_functions_agent/base.py +47 -37
- langchain/agents/openai_functions_multi_agent/base.py +40 -27
- langchain/agents/openai_tools/base.py +9 -8
- langchain/agents/output_parsers/__init__.py +3 -3
- langchain/agents/output_parsers/json.py +8 -6
- langchain/agents/output_parsers/openai_functions.py +24 -9
- langchain/agents/output_parsers/openai_tools.py +16 -4
- langchain/agents/output_parsers/react_json_single_input.py +13 -5
- langchain/agents/output_parsers/react_single_input.py +18 -11
- langchain/agents/output_parsers/self_ask.py +5 -2
- langchain/agents/output_parsers/tools.py +32 -13
- langchain/agents/output_parsers/xml.py +102 -28
- langchain/agents/react/agent.py +5 -4
- langchain/agents/react/base.py +26 -17
- langchain/agents/react/output_parser.py +7 -6
- langchain/agents/react/textworld_prompt.py +0 -1
- langchain/agents/react/wiki_prompt.py +14 -15
- langchain/agents/schema.py +5 -2
- langchain/agents/self_ask_with_search/base.py +23 -15
- langchain/agents/self_ask_with_search/prompt.py +0 -1
- langchain/agents/structured_chat/base.py +19 -11
- langchain/agents/structured_chat/output_parser.py +29 -18
- langchain/agents/structured_chat/prompt.py +3 -4
- langchain/agents/tool_calling_agent/base.py +8 -6
- langchain/agents/tools.py +5 -2
- langchain/agents/utils.py +2 -3
- langchain/agents/xml/base.py +12 -6
- langchain/agents/xml/prompt.py +1 -2
- langchain/cache.py +12 -12
- langchain/callbacks/__init__.py +11 -11
- langchain/callbacks/aim_callback.py +2 -2
- langchain/callbacks/argilla_callback.py +1 -1
- langchain/callbacks/arize_callback.py +1 -1
- langchain/callbacks/arthur_callback.py +1 -1
- langchain/callbacks/base.py +7 -7
- langchain/callbacks/clearml_callback.py +1 -1
- langchain/callbacks/comet_ml_callback.py +1 -1
- langchain/callbacks/confident_callback.py +1 -1
- langchain/callbacks/context_callback.py +1 -1
- langchain/callbacks/flyte_callback.py +1 -1
- langchain/callbacks/human.py +2 -2
- langchain/callbacks/infino_callback.py +1 -1
- langchain/callbacks/labelstudio_callback.py +1 -1
- langchain/callbacks/llmonitor_callback.py +1 -1
- langchain/callbacks/manager.py +5 -5
- langchain/callbacks/mlflow_callback.py +2 -2
- langchain/callbacks/openai_info.py +1 -1
- langchain/callbacks/promptlayer_callback.py +1 -1
- langchain/callbacks/sagemaker_callback.py +1 -1
- langchain/callbacks/streaming_aiter.py +17 -3
- langchain/callbacks/streaming_aiter_final_only.py +16 -5
- langchain/callbacks/streaming_stdout_final_only.py +10 -3
- langchain/callbacks/streamlit/__init__.py +3 -2
- langchain/callbacks/streamlit/mutable_expander.py +1 -1
- langchain/callbacks/streamlit/streamlit_callback_handler.py +3 -3
- langchain/callbacks/tracers/__init__.py +1 -1
- langchain/callbacks/tracers/comet.py +1 -1
- langchain/callbacks/tracers/evaluation.py +1 -1
- langchain/callbacks/tracers/log_stream.py +1 -1
- langchain/callbacks/tracers/logging.py +12 -1
- langchain/callbacks/tracers/stdout.py +1 -1
- langchain/callbacks/trubrics_callback.py +1 -1
- langchain/callbacks/utils.py +4 -4
- langchain/callbacks/wandb_callback.py +1 -1
- langchain/callbacks/whylabs_callback.py +1 -1
- langchain/chains/api/base.py +41 -23
- langchain/chains/api/news_docs.py +1 -2
- langchain/chains/api/open_meteo_docs.py +1 -2
- langchain/chains/api/openapi/requests_chain.py +1 -1
- langchain/chains/api/openapi/response_chain.py +1 -1
- langchain/chains/api/podcast_docs.py +1 -2
- langchain/chains/api/prompt.py +1 -2
- langchain/chains/api/tmdb_docs.py +1 -2
- langchain/chains/base.py +96 -56
- langchain/chains/chat_vector_db/prompts.py +2 -3
- langchain/chains/combine_documents/__init__.py +1 -1
- langchain/chains/combine_documents/base.py +30 -11
- langchain/chains/combine_documents/map_reduce.py +41 -30
- langchain/chains/combine_documents/map_rerank.py +39 -24
- langchain/chains/combine_documents/reduce.py +48 -26
- langchain/chains/combine_documents/refine.py +27 -17
- langchain/chains/combine_documents/stuff.py +24 -13
- langchain/chains/constitutional_ai/base.py +11 -4
- langchain/chains/constitutional_ai/principles.py +22 -25
- langchain/chains/constitutional_ai/prompts.py +25 -28
- langchain/chains/conversation/base.py +9 -4
- langchain/chains/conversation/memory.py +5 -5
- langchain/chains/conversation/prompt.py +5 -5
- langchain/chains/conversational_retrieval/base.py +108 -79
- langchain/chains/conversational_retrieval/prompts.py +2 -3
- langchain/chains/elasticsearch_database/base.py +10 -10
- langchain/chains/elasticsearch_database/prompts.py +2 -3
- langchain/chains/ernie_functions/__init__.py +2 -2
- langchain/chains/example_generator.py +3 -1
- langchain/chains/flare/base.py +28 -12
- langchain/chains/flare/prompts.py +2 -0
- langchain/chains/graph_qa/cypher.py +2 -2
- langchain/chains/graph_qa/falkordb.py +1 -1
- langchain/chains/graph_qa/gremlin.py +1 -1
- langchain/chains/graph_qa/neptune_sparql.py +1 -1
- langchain/chains/graph_qa/prompts.py +2 -2
- langchain/chains/history_aware_retriever.py +2 -1
- langchain/chains/hyde/base.py +6 -5
- langchain/chains/hyde/prompts.py +5 -6
- langchain/chains/llm.py +82 -61
- langchain/chains/llm_bash/__init__.py +3 -2
- langchain/chains/llm_checker/base.py +19 -6
- langchain/chains/llm_checker/prompt.py +3 -4
- langchain/chains/llm_math/base.py +25 -10
- langchain/chains/llm_math/prompt.py +1 -2
- langchain/chains/llm_summarization_checker/base.py +22 -7
- langchain/chains/llm_symbolic_math/__init__.py +3 -2
- langchain/chains/loading.py +155 -97
- langchain/chains/mapreduce.py +4 -3
- langchain/chains/moderation.py +11 -9
- langchain/chains/natbot/base.py +11 -9
- langchain/chains/natbot/crawler.py +102 -76
- langchain/chains/natbot/prompt.py +2 -3
- langchain/chains/openai_functions/__init__.py +7 -7
- langchain/chains/openai_functions/base.py +15 -10
- langchain/chains/openai_functions/citation_fuzzy_match.py +21 -11
- langchain/chains/openai_functions/extraction.py +19 -19
- langchain/chains/openai_functions/openapi.py +39 -35
- langchain/chains/openai_functions/qa_with_structure.py +22 -15
- langchain/chains/openai_functions/tagging.py +4 -4
- langchain/chains/openai_tools/extraction.py +7 -8
- langchain/chains/qa_generation/base.py +8 -3
- langchain/chains/qa_generation/prompt.py +5 -5
- langchain/chains/qa_with_sources/base.py +17 -6
- langchain/chains/qa_with_sources/loading.py +16 -8
- langchain/chains/qa_with_sources/map_reduce_prompt.py +8 -9
- langchain/chains/qa_with_sources/refine_prompts.py +0 -1
- langchain/chains/qa_with_sources/retrieval.py +15 -6
- langchain/chains/qa_with_sources/stuff_prompt.py +6 -7
- langchain/chains/qa_with_sources/vector_db.py +21 -8
- langchain/chains/query_constructor/base.py +37 -34
- langchain/chains/query_constructor/ir.py +4 -4
- langchain/chains/query_constructor/parser.py +101 -34
- langchain/chains/query_constructor/prompt.py +5 -6
- langchain/chains/question_answering/chain.py +21 -10
- langchain/chains/question_answering/map_reduce_prompt.py +14 -14
- langchain/chains/question_answering/map_rerank_prompt.py +3 -3
- langchain/chains/question_answering/refine_prompts.py +2 -5
- langchain/chains/question_answering/stuff_prompt.py +5 -5
- langchain/chains/retrieval.py +1 -3
- langchain/chains/retrieval_qa/base.py +38 -27
- langchain/chains/retrieval_qa/prompt.py +1 -2
- langchain/chains/router/__init__.py +3 -3
- langchain/chains/router/base.py +38 -22
- langchain/chains/router/embedding_router.py +15 -8
- langchain/chains/router/llm_router.py +23 -20
- langchain/chains/router/multi_prompt.py +5 -2
- langchain/chains/router/multi_retrieval_qa.py +28 -5
- langchain/chains/sequential.py +30 -18
- langchain/chains/sql_database/prompt.py +14 -16
- langchain/chains/sql_database/query.py +7 -5
- langchain/chains/structured_output/__init__.py +1 -1
- langchain/chains/structured_output/base.py +77 -67
- langchain/chains/summarize/chain.py +11 -5
- langchain/chains/summarize/map_reduce_prompt.py +0 -1
- langchain/chains/summarize/stuff_prompt.py +0 -1
- langchain/chains/transform.py +9 -6
- langchain/chat_loaders/facebook_messenger.py +1 -1
- langchain/chat_loaders/langsmith.py +1 -1
- langchain/chat_loaders/utils.py +3 -3
- langchain/chat_models/__init__.py +20 -19
- langchain/chat_models/anthropic.py +1 -1
- langchain/chat_models/azureml_endpoint.py +1 -1
- langchain/chat_models/baidu_qianfan_endpoint.py +1 -1
- langchain/chat_models/base.py +213 -139
- langchain/chat_models/bedrock.py +1 -1
- langchain/chat_models/fake.py +1 -1
- langchain/chat_models/meta.py +1 -1
- langchain/chat_models/pai_eas_endpoint.py +1 -1
- langchain/chat_models/promptlayer_openai.py +1 -1
- langchain/chat_models/volcengine_maas.py +1 -1
- langchain/docstore/base.py +1 -1
- langchain/document_loaders/__init__.py +9 -9
- langchain/document_loaders/airbyte.py +3 -3
- langchain/document_loaders/assemblyai.py +1 -1
- langchain/document_loaders/azure_blob_storage_container.py +1 -1
- langchain/document_loaders/azure_blob_storage_file.py +1 -1
- langchain/document_loaders/baiducloud_bos_file.py +1 -1
- langchain/document_loaders/base.py +1 -1
- langchain/document_loaders/blob_loaders/__init__.py +1 -1
- langchain/document_loaders/blob_loaders/schema.py +1 -4
- langchain/document_loaders/blockchain.py +1 -1
- langchain/document_loaders/chatgpt.py +1 -1
- langchain/document_loaders/college_confidential.py +1 -1
- langchain/document_loaders/confluence.py +1 -1
- langchain/document_loaders/email.py +1 -1
- langchain/document_loaders/facebook_chat.py +1 -1
- langchain/document_loaders/markdown.py +1 -1
- langchain/document_loaders/notebook.py +1 -1
- langchain/document_loaders/org_mode.py +1 -1
- langchain/document_loaders/parsers/__init__.py +1 -1
- langchain/document_loaders/parsers/docai.py +1 -1
- langchain/document_loaders/parsers/generic.py +1 -1
- langchain/document_loaders/parsers/html/__init__.py +1 -1
- langchain/document_loaders/parsers/html/bs4.py +1 -1
- langchain/document_loaders/parsers/language/cobol.py +1 -1
- langchain/document_loaders/parsers/language/python.py +1 -1
- langchain/document_loaders/parsers/msword.py +1 -1
- langchain/document_loaders/parsers/pdf.py +5 -5
- langchain/document_loaders/parsers/registry.py +1 -1
- langchain/document_loaders/pdf.py +8 -8
- langchain/document_loaders/powerpoint.py +1 -1
- langchain/document_loaders/pyspark_dataframe.py +1 -1
- langchain/document_loaders/telegram.py +2 -2
- langchain/document_loaders/tencent_cos_directory.py +1 -1
- langchain/document_loaders/unstructured.py +5 -5
- langchain/document_loaders/url_playwright.py +1 -1
- langchain/document_loaders/whatsapp_chat.py +1 -1
- langchain/document_loaders/youtube.py +2 -2
- langchain/document_transformers/__init__.py +3 -3
- langchain/document_transformers/beautiful_soup_transformer.py +1 -1
- langchain/document_transformers/doctran_text_extract.py +1 -1
- langchain/document_transformers/doctran_text_qa.py +1 -1
- langchain/document_transformers/doctran_text_translate.py +1 -1
- langchain/document_transformers/embeddings_redundant_filter.py +3 -3
- langchain/document_transformers/google_translate.py +1 -1
- langchain/document_transformers/html2text.py +1 -1
- langchain/document_transformers/nuclia_text_transform.py +1 -1
- langchain/embeddings/__init__.py +5 -5
- langchain/embeddings/base.py +35 -24
- langchain/embeddings/cache.py +37 -32
- langchain/embeddings/fake.py +1 -1
- langchain/embeddings/huggingface.py +2 -2
- langchain/evaluation/__init__.py +22 -22
- langchain/evaluation/agents/trajectory_eval_chain.py +26 -25
- langchain/evaluation/agents/trajectory_eval_prompt.py +6 -9
- langchain/evaluation/comparison/__init__.py +1 -1
- langchain/evaluation/comparison/eval_chain.py +21 -13
- langchain/evaluation/comparison/prompt.py +1 -2
- langchain/evaluation/criteria/__init__.py +1 -1
- langchain/evaluation/criteria/eval_chain.py +23 -11
- langchain/evaluation/criteria/prompt.py +2 -3
- langchain/evaluation/embedding_distance/base.py +34 -20
- langchain/evaluation/exact_match/base.py +14 -1
- langchain/evaluation/loading.py +16 -11
- langchain/evaluation/parsing/base.py +20 -4
- langchain/evaluation/parsing/json_distance.py +24 -10
- langchain/evaluation/parsing/json_schema.py +13 -12
- langchain/evaluation/qa/__init__.py +1 -1
- langchain/evaluation/qa/eval_chain.py +20 -5
- langchain/evaluation/qa/eval_prompt.py +7 -8
- langchain/evaluation/qa/generate_chain.py +4 -1
- langchain/evaluation/qa/generate_prompt.py +2 -4
- langchain/evaluation/regex_match/base.py +9 -1
- langchain/evaluation/schema.py +38 -30
- langchain/evaluation/scoring/__init__.py +1 -1
- langchain/evaluation/scoring/eval_chain.py +23 -15
- langchain/evaluation/scoring/prompt.py +0 -1
- langchain/evaluation/string_distance/base.py +20 -9
- langchain/globals.py +12 -11
- langchain/graphs/__init__.py +6 -6
- langchain/graphs/graph_document.py +1 -1
- langchain/graphs/networkx_graph.py +2 -2
- langchain/hub.py +9 -11
- langchain/indexes/__init__.py +3 -3
- langchain/indexes/_sql_record_manager.py +63 -46
- langchain/indexes/prompts/entity_extraction.py +1 -2
- langchain/indexes/prompts/entity_summarization.py +1 -2
- langchain/indexes/prompts/knowledge_triplet_extraction.py +1 -3
- langchain/indexes/vectorstore.py +35 -19
- langchain/llms/__init__.py +13 -13
- langchain/llms/ai21.py +1 -1
- langchain/llms/azureml_endpoint.py +4 -4
- langchain/llms/base.py +15 -7
- langchain/llms/bedrock.py +1 -1
- langchain/llms/cloudflare_workersai.py +1 -1
- langchain/llms/gradient_ai.py +1 -1
- langchain/llms/loading.py +1 -1
- langchain/llms/openai.py +1 -1
- langchain/llms/sagemaker_endpoint.py +1 -1
- langchain/load/dump.py +1 -1
- langchain/load/load.py +1 -1
- langchain/load/serializable.py +3 -3
- langchain/memory/__init__.py +3 -3
- langchain/memory/buffer.py +14 -7
- langchain/memory/buffer_window.py +2 -0
- langchain/memory/chat_memory.py +14 -8
- langchain/memory/chat_message_histories/__init__.py +1 -1
- langchain/memory/chat_message_histories/astradb.py +1 -1
- langchain/memory/chat_message_histories/cassandra.py +1 -1
- langchain/memory/chat_message_histories/cosmos_db.py +1 -1
- langchain/memory/chat_message_histories/dynamodb.py +1 -1
- langchain/memory/chat_message_histories/elasticsearch.py +1 -1
- langchain/memory/chat_message_histories/file.py +1 -1
- langchain/memory/chat_message_histories/firestore.py +1 -1
- langchain/memory/chat_message_histories/momento.py +1 -1
- langchain/memory/chat_message_histories/mongodb.py +1 -1
- langchain/memory/chat_message_histories/neo4j.py +1 -1
- langchain/memory/chat_message_histories/postgres.py +1 -1
- langchain/memory/chat_message_histories/redis.py +1 -1
- langchain/memory/chat_message_histories/rocksetdb.py +1 -1
- langchain/memory/chat_message_histories/singlestoredb.py +1 -1
- langchain/memory/chat_message_histories/streamlit.py +1 -1
- langchain/memory/chat_message_histories/upstash_redis.py +1 -1
- langchain/memory/chat_message_histories/xata.py +1 -1
- langchain/memory/chat_message_histories/zep.py +1 -1
- langchain/memory/combined.py +14 -13
- langchain/memory/entity.py +131 -61
- langchain/memory/prompt.py +10 -11
- langchain/memory/readonly.py +0 -2
- langchain/memory/simple.py +4 -3
- langchain/memory/summary.py +43 -11
- langchain/memory/summary_buffer.py +20 -8
- langchain/memory/token_buffer.py +2 -0
- langchain/memory/utils.py +3 -2
- langchain/memory/vectorstore.py +12 -5
- langchain/memory/vectorstore_token_buffer_memory.py +5 -5
- langchain/model_laboratory.py +12 -11
- langchain/output_parsers/__init__.py +4 -4
- langchain/output_parsers/boolean.py +7 -4
- langchain/output_parsers/combining.py +14 -7
- langchain/output_parsers/datetime.py +32 -31
- langchain/output_parsers/enum.py +10 -4
- langchain/output_parsers/fix.py +60 -53
- langchain/output_parsers/format_instructions.py +6 -8
- langchain/output_parsers/json.py +2 -2
- langchain/output_parsers/list.py +2 -2
- langchain/output_parsers/loading.py +9 -9
- langchain/output_parsers/openai_functions.py +3 -3
- langchain/output_parsers/openai_tools.py +1 -1
- langchain/output_parsers/pandas_dataframe.py +59 -48
- langchain/output_parsers/prompts.py +1 -2
- langchain/output_parsers/rail_parser.py +1 -1
- langchain/output_parsers/regex.py +9 -8
- langchain/output_parsers/regex_dict.py +7 -10
- langchain/output_parsers/retry.py +99 -80
- langchain/output_parsers/structured.py +21 -6
- langchain/output_parsers/yaml.py +19 -11
- langchain/prompts/__init__.py +5 -3
- langchain/prompts/base.py +5 -5
- langchain/prompts/chat.py +8 -8
- langchain/prompts/example_selector/__init__.py +3 -1
- langchain/prompts/example_selector/semantic_similarity.py +2 -2
- langchain/prompts/few_shot.py +1 -1
- langchain/prompts/loading.py +3 -3
- langchain/prompts/prompt.py +1 -1
- langchain/pydantic_v1/__init__.py +1 -1
- langchain/retrievers/__init__.py +5 -5
- langchain/retrievers/bedrock.py +2 -2
- langchain/retrievers/bm25.py +1 -1
- langchain/retrievers/contextual_compression.py +14 -8
- langchain/retrievers/docarray.py +1 -1
- langchain/retrievers/document_compressors/__init__.py +5 -4
- langchain/retrievers/document_compressors/base.py +12 -6
- langchain/retrievers/document_compressors/chain_extract.py +5 -3
- langchain/retrievers/document_compressors/chain_extract_prompt.py +2 -3
- langchain/retrievers/document_compressors/chain_filter.py +9 -9
- langchain/retrievers/document_compressors/chain_filter_prompt.py +1 -2
- langchain/retrievers/document_compressors/cohere_rerank.py +17 -15
- langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -0
- langchain/retrievers/document_compressors/embeddings_filter.py +24 -17
- langchain/retrievers/document_compressors/flashrank_rerank.py +1 -1
- langchain/retrievers/document_compressors/listwise_rerank.py +8 -5
- langchain/retrievers/ensemble.py +30 -27
- langchain/retrievers/google_cloud_documentai_warehouse.py +1 -1
- langchain/retrievers/google_vertex_ai_search.py +2 -2
- langchain/retrievers/kendra.py +10 -10
- langchain/retrievers/llama_index.py +1 -1
- langchain/retrievers/merger_retriever.py +11 -11
- langchain/retrievers/milvus.py +1 -1
- langchain/retrievers/multi_query.py +35 -27
- langchain/retrievers/multi_vector.py +24 -9
- langchain/retrievers/parent_document_retriever.py +33 -9
- langchain/retrievers/re_phraser.py +6 -5
- langchain/retrievers/self_query/base.py +157 -127
- langchain/retrievers/time_weighted_retriever.py +21 -7
- langchain/retrievers/zilliz.py +1 -1
- langchain/runnables/hub.py +12 -0
- langchain/runnables/openai_functions.py +12 -2
- langchain/schema/__init__.py +23 -23
- langchain/schema/cache.py +1 -1
- langchain/schema/callbacks/base.py +7 -7
- langchain/schema/callbacks/manager.py +19 -19
- langchain/schema/callbacks/tracers/base.py +1 -1
- langchain/schema/callbacks/tracers/evaluation.py +1 -1
- langchain/schema/callbacks/tracers/langchain.py +1 -1
- langchain/schema/callbacks/tracers/langchain_v1.py +1 -1
- langchain/schema/callbacks/tracers/log_stream.py +1 -1
- langchain/schema/callbacks/tracers/schemas.py +8 -8
- langchain/schema/callbacks/tracers/stdout.py +3 -3
- langchain/schema/document.py +1 -1
- langchain/schema/language_model.py +2 -2
- langchain/schema/messages.py +12 -12
- langchain/schema/output.py +3 -3
- langchain/schema/output_parser.py +3 -3
- langchain/schema/runnable/__init__.py +3 -3
- langchain/schema/runnable/base.py +9 -9
- langchain/schema/runnable/config.py +5 -5
- langchain/schema/runnable/configurable.py +1 -1
- langchain/schema/runnable/history.py +1 -1
- langchain/schema/runnable/passthrough.py +1 -1
- langchain/schema/runnable/utils.py +16 -16
- langchain/schema/vectorstore.py +1 -1
- langchain/smith/__init__.py +2 -1
- langchain/smith/evaluation/__init__.py +2 -2
- langchain/smith/evaluation/config.py +9 -23
- langchain/smith/evaluation/name_generation.py +3 -3
- langchain/smith/evaluation/progress.py +22 -4
- langchain/smith/evaluation/runner_utils.py +416 -247
- langchain/smith/evaluation/string_run_evaluator.py +102 -68
- langchain/storage/__init__.py +2 -2
- langchain/storage/_lc_store.py +4 -2
- langchain/storage/encoder_backed.py +7 -2
- langchain/storage/file_system.py +19 -16
- langchain/storage/in_memory.py +1 -1
- langchain/storage/upstash_redis.py +1 -1
- langchain/text_splitter.py +15 -15
- langchain/tools/__init__.py +28 -26
- langchain/tools/ainetwork/app.py +1 -1
- langchain/tools/ainetwork/base.py +1 -1
- langchain/tools/ainetwork/owner.py +1 -1
- langchain/tools/ainetwork/rule.py +1 -1
- langchain/tools/ainetwork/transfer.py +1 -1
- langchain/tools/ainetwork/value.py +1 -1
- langchain/tools/amadeus/closest_airport.py +1 -1
- langchain/tools/amadeus/flight_search.py +1 -1
- langchain/tools/azure_cognitive_services/__init__.py +1 -1
- langchain/tools/base.py +4 -4
- langchain/tools/bearly/tool.py +1 -1
- langchain/tools/bing_search/__init__.py +1 -1
- langchain/tools/bing_search/tool.py +1 -1
- langchain/tools/dataforseo_api_search/__init__.py +1 -1
- langchain/tools/dataforseo_api_search/tool.py +1 -1
- langchain/tools/ddg_search/tool.py +1 -1
- langchain/tools/e2b_data_analysis/tool.py +2 -2
- langchain/tools/edenai/__init__.py +1 -1
- langchain/tools/file_management/__init__.py +1 -1
- langchain/tools/file_management/copy.py +1 -1
- langchain/tools/file_management/delete.py +1 -1
- langchain/tools/gmail/__init__.py +2 -2
- langchain/tools/gmail/get_message.py +1 -1
- langchain/tools/gmail/search.py +1 -1
- langchain/tools/gmail/send_message.py +1 -1
- langchain/tools/google_finance/__init__.py +1 -1
- langchain/tools/google_finance/tool.py +1 -1
- langchain/tools/google_scholar/__init__.py +1 -1
- langchain/tools/google_scholar/tool.py +1 -1
- langchain/tools/google_search/__init__.py +1 -1
- langchain/tools/google_search/tool.py +1 -1
- langchain/tools/google_serper/__init__.py +1 -1
- langchain/tools/google_serper/tool.py +1 -1
- langchain/tools/google_trends/__init__.py +1 -1
- langchain/tools/google_trends/tool.py +1 -1
- langchain/tools/jira/tool.py +20 -1
- langchain/tools/json/tool.py +25 -3
- langchain/tools/memorize/tool.py +1 -1
- langchain/tools/multion/__init__.py +1 -1
- langchain/tools/multion/update_session.py +1 -1
- langchain/tools/office365/__init__.py +2 -2
- langchain/tools/office365/events_search.py +1 -1
- langchain/tools/office365/messages_search.py +1 -1
- langchain/tools/office365/send_event.py +1 -1
- langchain/tools/office365/send_message.py +1 -1
- langchain/tools/openapi/utils/api_models.py +6 -6
- langchain/tools/playwright/__init__.py +5 -5
- langchain/tools/playwright/click.py +1 -1
- langchain/tools/playwright/extract_hyperlinks.py +1 -1
- langchain/tools/playwright/get_elements.py +1 -1
- langchain/tools/playwright/navigate.py +1 -1
- langchain/tools/plugin.py +2 -2
- langchain/tools/powerbi/tool.py +1 -1
- langchain/tools/python/__init__.py +3 -2
- langchain/tools/reddit_search/tool.py +1 -1
- langchain/tools/render.py +2 -2
- langchain/tools/requests/tool.py +2 -2
- langchain/tools/searchapi/tool.py +1 -1
- langchain/tools/searx_search/tool.py +1 -1
- langchain/tools/slack/get_message.py +1 -1
- langchain/tools/spark_sql/tool.py +1 -1
- langchain/tools/sql_database/tool.py +1 -1
- langchain/tools/tavily_search/__init__.py +1 -1
- langchain/tools/tavily_search/tool.py +1 -1
- langchain/tools/zapier/__init__.py +1 -1
- langchain/tools/zapier/tool.py +24 -2
- langchain/utilities/__init__.py +4 -4
- langchain/utilities/arcee.py +4 -4
- langchain/utilities/clickup.py +4 -4
- langchain/utilities/dalle_image_generator.py +1 -1
- langchain/utilities/dataforseo_api_search.py +1 -1
- langchain/utilities/opaqueprompts.py +1 -1
- langchain/utilities/reddit_search.py +1 -1
- langchain/utilities/sql_database.py +1 -1
- langchain/utilities/tavily_search.py +1 -1
- langchain/utilities/vertexai.py +2 -2
- langchain/utils/__init__.py +1 -1
- langchain/utils/aiter.py +1 -1
- langchain/utils/html.py +3 -3
- langchain/utils/input.py +1 -1
- langchain/utils/iter.py +1 -1
- langchain/utils/json_schema.py +1 -3
- langchain/utils/strings.py +1 -1
- langchain/utils/utils.py +6 -6
- langchain/vectorstores/__init__.py +5 -5
- langchain/vectorstores/alibabacloud_opensearch.py +1 -1
- langchain/vectorstores/azure_cosmos_db.py +1 -1
- langchain/vectorstores/clickhouse.py +1 -1
- langchain/vectorstores/elastic_vector_search.py +1 -1
- langchain/vectorstores/elasticsearch.py +2 -2
- langchain/vectorstores/myscale.py +1 -1
- langchain/vectorstores/neo4j_vector.py +1 -1
- langchain/vectorstores/pgembedding.py +1 -1
- langchain/vectorstores/qdrant.py +1 -1
- langchain/vectorstores/redis/__init__.py +1 -1
- langchain/vectorstores/redis/base.py +1 -1
- langchain/vectorstores/redis/filters.py +4 -4
- langchain/vectorstores/redis/schema.py +6 -6
- langchain/vectorstores/sklearn.py +2 -2
- langchain/vectorstores/starrocks.py +1 -1
- langchain/vectorstores/utils.py +1 -1
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/METADATA +4 -14
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/RECORD +590 -591
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/WHEEL +1 -1
- langchain/smith/evaluation/utils.py +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/entry_points.txt +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,11 @@
|
|
|
1
1
|
"""Prompt for trajectory evaluation chain."""
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
|
|
5
|
-
|
|
3
|
+
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
|
|
6
4
|
from langchain_core.prompts.chat import (
|
|
7
5
|
ChatPromptTemplate,
|
|
8
6
|
HumanMessagePromptTemplate,
|
|
9
7
|
)
|
|
10
8
|
|
|
11
|
-
|
|
12
9
|
EVAL_TEMPLATE = """An AI language model has been given access to the following set of tools to help answer a user's question.
|
|
13
10
|
|
|
14
11
|
The tools given to the AI model are:
|
|
@@ -39,7 +36,7 @@ i. Is the final answer helpful?
|
|
|
39
36
|
ii. Does the AI language use a logical sequence of tools to answer the question?
|
|
40
37
|
iii. Does the AI language model use the tools in a helpful way?
|
|
41
38
|
iv. Does the AI language model use too many steps to answer the question?
|
|
42
|
-
v. Are the appropriate tools used to answer the question?"""
|
|
39
|
+
v. Are the appropriate tools used to answer the question?""" # noqa: E501
|
|
43
40
|
|
|
44
41
|
EXAMPLE_INPUT = """An AI language model has been given access to the following set of tools to help answer a user's question.
|
|
45
42
|
|
|
@@ -84,7 +81,7 @@ i. Is the final answer helpful?
|
|
|
84
81
|
ii. Does the AI language use a logical sequence of tools to answer the question?
|
|
85
82
|
iii. Does the AI language model use the tools in a helpful way?
|
|
86
83
|
iv. Does the AI language model use too many steps to answer the question?
|
|
87
|
-
v. Are the appropriate tools used to answer the question?"""
|
|
84
|
+
v. Are the appropriate tools used to answer the question?""" # noqa: E501
|
|
88
85
|
|
|
89
86
|
EXAMPLE_OUTPUT = """First, let's evaluate the final answer. The final uses good reasoning but is wrong. 2,857 divided by 305 is not 17.5.\
|
|
90
87
|
The model should have used the calculator to figure this out. Second does the model use a logical sequence of tools to answer the question?\
|
|
@@ -92,10 +89,10 @@ The way model uses the search is not helpful. The model should have used the sea
|
|
|
92
89
|
The model didn't use the calculator tool and gave an incorrect answer. The search API should be used for current events or specific questions.\
|
|
93
90
|
The tools were not used in a helpful way. The model did not use too many steps to answer the question.\
|
|
94
91
|
The model did not use the appropriate tools to answer the question.\
|
|
95
|
-
|
|
92
|
+
|
|
96
93
|
Judgment: Given the good reasoning in the final answer but otherwise poor performance, we give the model a score of 2.
|
|
97
94
|
|
|
98
|
-
Score: 2"""
|
|
95
|
+
Score: 2""" # noqa: E501
|
|
99
96
|
|
|
100
97
|
EVAL_CHAT_PROMPT = ChatPromptTemplate.from_messages(
|
|
101
98
|
messages=[
|
|
@@ -134,7 +131,7 @@ i. Is the final answer helpful?
|
|
|
134
131
|
ii. Does the AI language use a logical sequence of tools to answer the question?
|
|
135
132
|
iii. Does the AI language model use the tools in a helpful way?
|
|
136
133
|
iv. Does the AI language model use too many steps to answer the question?
|
|
137
|
-
v. Are the appropriate tools used to answer the question?"""
|
|
134
|
+
v. Are the appropriate tools used to answer the question?""" # noqa: E501
|
|
138
135
|
|
|
139
136
|
|
|
140
137
|
TOOL_FREE_EVAL_CHAT_PROMPT = ChatPromptTemplate.from_messages(
|
|
@@ -11,6 +11,7 @@ from langchain_core.language_models import BaseLanguageModel
|
|
|
11
11
|
from langchain_core.output_parsers import BaseOutputParser
|
|
12
12
|
from langchain_core.prompts.prompt import PromptTemplate
|
|
13
13
|
from pydantic import ConfigDict, Field
|
|
14
|
+
from typing_extensions import override
|
|
14
15
|
|
|
15
16
|
from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple
|
|
16
17
|
from langchain.chains.llm import LLMChain
|
|
@@ -69,7 +70,7 @@ def resolve_pairwise_criteria(
|
|
|
69
70
|
Criteria.DEPTH,
|
|
70
71
|
]
|
|
71
72
|
return {k.value: _SUPPORTED_CRITERIA[k] for k in _default_criteria}
|
|
72
|
-
|
|
73
|
+
if isinstance(criteria, Criteria):
|
|
73
74
|
criteria_ = {criteria.value: _SUPPORTED_CRITERIA[criteria]}
|
|
74
75
|
elif isinstance(criteria, str):
|
|
75
76
|
if criteria in _SUPPORTED_CRITERIA:
|
|
@@ -86,11 +87,12 @@ def resolve_pairwise_criteria(
|
|
|
86
87
|
}
|
|
87
88
|
else:
|
|
88
89
|
if not criteria:
|
|
89
|
-
|
|
90
|
+
msg = (
|
|
90
91
|
"Criteria cannot be empty. "
|
|
91
92
|
"Please provide a criterion name or a mapping of the criterion name"
|
|
92
93
|
" to its description."
|
|
93
94
|
)
|
|
95
|
+
raise ValueError(msg)
|
|
94
96
|
criteria_ = dict(criteria)
|
|
95
97
|
return criteria_
|
|
96
98
|
|
|
@@ -132,11 +134,12 @@ class PairwiseStringResultOutputParser(BaseOutputParser[dict]):
|
|
|
132
134
|
verdict = match.group(1)
|
|
133
135
|
|
|
134
136
|
if not match or verdict not in {"A", "B", "C"}:
|
|
135
|
-
|
|
137
|
+
msg = (
|
|
136
138
|
f"Invalid output: {text}. "
|
|
137
139
|
"Output must contain a double bracketed string\
|
|
138
140
|
with the verdict 'A', 'B', or 'C'."
|
|
139
141
|
)
|
|
142
|
+
raise ValueError(msg)
|
|
140
143
|
# C means the models are tied. Return 'None' meaning no preference
|
|
141
144
|
verdict_ = None if verdict == "C" else verdict
|
|
142
145
|
score = {
|
|
@@ -184,10 +187,11 @@ class PairwiseStringEvalChain(PairwiseStringEvaluator, LLMEvalChain, LLMChain):
|
|
|
184
187
|
|
|
185
188
|
output_key: str = "results" #: :meta private:
|
|
186
189
|
output_parser: BaseOutputParser = Field(
|
|
187
|
-
default_factory=PairwiseStringResultOutputParser
|
|
190
|
+
default_factory=PairwiseStringResultOutputParser,
|
|
188
191
|
)
|
|
189
192
|
|
|
190
193
|
@classmethod
|
|
194
|
+
@override
|
|
191
195
|
def is_lc_serializable(cls) -> bool:
|
|
192
196
|
return False
|
|
193
197
|
|
|
@@ -256,16 +260,17 @@ class PairwiseStringEvalChain(PairwiseStringEvaluator, LLMEvalChain, LLMChain):
|
|
|
256
260
|
if not hasattr(llm, "model_name") or not llm.model_name.startswith("gpt-4"):
|
|
257
261
|
logger.warning(
|
|
258
262
|
"This chain was only tested with GPT-4. \
|
|
259
|
-
Performance may be significantly worse with other models."
|
|
263
|
+
Performance may be significantly worse with other models.",
|
|
260
264
|
)
|
|
261
265
|
|
|
262
266
|
expected_input_vars = {"prediction", "prediction_b", "input", "criteria"}
|
|
263
267
|
prompt_ = prompt or COMPARISON_TEMPLATE.partial(reference="")
|
|
264
268
|
if expected_input_vars != set(prompt_.input_variables):
|
|
265
|
-
|
|
269
|
+
msg = (
|
|
266
270
|
f"Input variables should be {expected_input_vars}, "
|
|
267
271
|
f"but got {prompt_.input_variables}"
|
|
268
272
|
)
|
|
273
|
+
raise ValueError(msg)
|
|
269
274
|
criteria_ = resolve_pairwise_criteria(criteria)
|
|
270
275
|
criteria_str = "\n".join(f"{k}: {v}" if v else k for k, v in criteria_.items())
|
|
271
276
|
criteria_str = CRITERIA_INSTRUCTIONS + criteria_str if criteria_str else ""
|
|
@@ -275,7 +280,7 @@ Performance may be significantly worse with other models."
|
|
|
275
280
|
self,
|
|
276
281
|
prediction: str,
|
|
277
282
|
prediction_b: str,
|
|
278
|
-
|
|
283
|
+
input_: Optional[str],
|
|
279
284
|
reference: Optional[str],
|
|
280
285
|
) -> dict:
|
|
281
286
|
"""Prepare the input for the chain.
|
|
@@ -283,21 +288,21 @@ Performance may be significantly worse with other models."
|
|
|
283
288
|
Args:
|
|
284
289
|
prediction (str): The output string from the first model.
|
|
285
290
|
prediction_b (str): The output string from the second model.
|
|
286
|
-
|
|
291
|
+
input_ (str, optional): The input or task string.
|
|
287
292
|
reference (str, optional): The reference string, if any.
|
|
288
293
|
|
|
289
294
|
Returns:
|
|
290
295
|
dict: The prepared input for the chain.
|
|
291
296
|
|
|
292
297
|
"""
|
|
293
|
-
|
|
298
|
+
input_dict = {
|
|
294
299
|
"prediction": prediction,
|
|
295
300
|
"prediction_b": prediction_b,
|
|
296
|
-
"input":
|
|
301
|
+
"input": input_,
|
|
297
302
|
}
|
|
298
303
|
if self.requires_reference:
|
|
299
|
-
|
|
300
|
-
return
|
|
304
|
+
input_dict["reference"] = reference
|
|
305
|
+
return input_dict
|
|
301
306
|
|
|
302
307
|
def _prepare_output(self, result: dict) -> dict:
|
|
303
308
|
"""Prepare the output."""
|
|
@@ -306,6 +311,7 @@ Performance may be significantly worse with other models."
|
|
|
306
311
|
parsed[RUN_KEY] = result[RUN_KEY]
|
|
307
312
|
return parsed
|
|
308
313
|
|
|
314
|
+
@override
|
|
309
315
|
def _evaluate_string_pairs(
|
|
310
316
|
self,
|
|
311
317
|
*,
|
|
@@ -348,6 +354,7 @@ Performance may be significantly worse with other models."
|
|
|
348
354
|
)
|
|
349
355
|
return self._prepare_output(result)
|
|
350
356
|
|
|
357
|
+
@override
|
|
351
358
|
async def _aevaluate_string_pairs(
|
|
352
359
|
self,
|
|
353
360
|
*,
|
|
@@ -444,10 +451,11 @@ class LabeledPairwiseStringEvalChain(PairwiseStringEvalChain):
|
|
|
444
451
|
}
|
|
445
452
|
prompt_ = prompt or COMPARISON_TEMPLATE_WITH_REFERENCE
|
|
446
453
|
if expected_input_vars != set(prompt_.input_variables):
|
|
447
|
-
|
|
454
|
+
msg = (
|
|
448
455
|
f"Input variables should be {expected_input_vars}, "
|
|
449
456
|
f"but got {prompt_.input_variables}"
|
|
450
457
|
)
|
|
458
|
+
raise ValueError(msg)
|
|
451
459
|
criteria_ = resolve_pairwise_criteria(criteria)
|
|
452
460
|
criteria_str = "\n".join(f"{k}: {v}" for k, v in criteria_.items())
|
|
453
461
|
criteria_str = CRITERIA_INSTRUCTIONS + criteria_str if criteria_str else ""
|
|
@@ -3,9 +3,8 @@
|
|
|
3
3
|
This prompt is used to compare two responses and evaluate which one best follows the instructions
|
|
4
4
|
and answers the question. The prompt is based on the paper from
|
|
5
5
|
Zheng, et. al. https://arxiv.org/abs/2306.05685
|
|
6
|
-
"""
|
|
6
|
+
""" # noqa: E501
|
|
7
7
|
|
|
8
|
-
# flake8: noqa
|
|
9
8
|
from langchain_core.prompts.chat import ChatPromptTemplate
|
|
10
9
|
|
|
11
10
|
SYSTEM_MESSAGE = 'Please act as an impartial judge and evaluate the quality \
|
|
@@ -10,6 +10,7 @@ from langchain_core.language_models import BaseLanguageModel
|
|
|
10
10
|
from langchain_core.output_parsers import BaseOutputParser
|
|
11
11
|
from langchain_core.prompts import BasePromptTemplate
|
|
12
12
|
from pydantic import ConfigDict, Field
|
|
13
|
+
from typing_extensions import override
|
|
13
14
|
|
|
14
15
|
from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple
|
|
15
16
|
from langchain.chains.llm import LLMChain
|
|
@@ -156,11 +157,12 @@ def resolve_criteria(
|
|
|
156
157
|
criteria_ = {criteria.name: criteria.critique_request}
|
|
157
158
|
else:
|
|
158
159
|
if not criteria:
|
|
159
|
-
|
|
160
|
+
msg = (
|
|
160
161
|
"Criteria cannot be empty. "
|
|
161
162
|
"Please provide a criterion name or a mapping of the criterion name"
|
|
162
163
|
" to its description."
|
|
163
164
|
)
|
|
165
|
+
raise ValueError(msg)
|
|
164
166
|
criteria_ = dict(criteria)
|
|
165
167
|
return criteria_
|
|
166
168
|
|
|
@@ -234,6 +236,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
|
|
234
236
|
output_key: str = "results" #: :meta private:
|
|
235
237
|
|
|
236
238
|
@classmethod
|
|
239
|
+
@override
|
|
237
240
|
def is_lc_serializable(cls) -> bool:
|
|
238
241
|
return False
|
|
239
242
|
|
|
@@ -247,6 +250,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
|
|
247
250
|
return False
|
|
248
251
|
|
|
249
252
|
@property
|
|
253
|
+
@override
|
|
250
254
|
def requires_input(self) -> bool:
|
|
251
255
|
return True
|
|
252
256
|
|
|
@@ -271,15 +275,17 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
|
|
271
275
|
|
|
272
276
|
@classmethod
|
|
273
277
|
def _resolve_prompt(
|
|
274
|
-
cls,
|
|
278
|
+
cls,
|
|
279
|
+
prompt: Optional[BasePromptTemplate] = None,
|
|
275
280
|
) -> BasePromptTemplate:
|
|
276
281
|
expected_input_vars = {"input", "output", "criteria"}
|
|
277
282
|
prompt_ = prompt or PROMPT
|
|
278
283
|
if expected_input_vars != set(prompt_.input_variables):
|
|
279
|
-
|
|
284
|
+
msg = (
|
|
280
285
|
f"Input variables should be {expected_input_vars}, "
|
|
281
286
|
f"but got {prompt_.input_variables}"
|
|
282
287
|
)
|
|
288
|
+
raise ValueError(msg)
|
|
283
289
|
return prompt_
|
|
284
290
|
|
|
285
291
|
@classmethod
|
|
@@ -360,12 +366,13 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
|
|
360
366
|
"""
|
|
361
367
|
prompt_ = cls._resolve_prompt(prompt)
|
|
362
368
|
if criteria == Criteria.CORRECTNESS:
|
|
363
|
-
|
|
369
|
+
msg = (
|
|
364
370
|
"Correctness should not be used in the reference-free"
|
|
365
371
|
" 'criteria' evaluator (CriteriaEvalChain)."
|
|
366
372
|
" Please use the 'labeled_criteria' evaluator"
|
|
367
373
|
" (LabeledCriteriaEvalChain) instead."
|
|
368
374
|
)
|
|
375
|
+
raise ValueError(msg)
|
|
369
376
|
criteria_ = cls.resolve_criteria(criteria)
|
|
370
377
|
criteria_str = "\n".join(f"{k}: {v}" for k, v in criteria_.items())
|
|
371
378
|
prompt_ = prompt_.partial(criteria=criteria_str)
|
|
@@ -380,16 +387,16 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
|
|
380
387
|
self,
|
|
381
388
|
prediction: str,
|
|
382
389
|
reference: Optional[str],
|
|
383
|
-
|
|
390
|
+
input_: Optional[str],
|
|
384
391
|
) -> dict:
|
|
385
392
|
"""Get the evaluation input."""
|
|
386
|
-
|
|
387
|
-
"input":
|
|
393
|
+
input_dict = {
|
|
394
|
+
"input": input_,
|
|
388
395
|
"output": prediction,
|
|
389
396
|
}
|
|
390
397
|
if self.requires_reference:
|
|
391
|
-
|
|
392
|
-
return
|
|
398
|
+
input_dict["reference"] = reference
|
|
399
|
+
return input_dict
|
|
393
400
|
|
|
394
401
|
def _prepare_output(self, result: dict) -> dict:
|
|
395
402
|
"""Prepare the output."""
|
|
@@ -398,6 +405,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
|
|
398
405
|
parsed[RUN_KEY] = result[RUN_KEY]
|
|
399
406
|
return parsed
|
|
400
407
|
|
|
408
|
+
@override
|
|
401
409
|
def _evaluate_strings(
|
|
402
410
|
self,
|
|
403
411
|
*,
|
|
@@ -453,6 +461,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
|
|
453
461
|
)
|
|
454
462
|
return self._prepare_output(result)
|
|
455
463
|
|
|
464
|
+
@override
|
|
456
465
|
async def _aevaluate_strings(
|
|
457
466
|
self,
|
|
458
467
|
*,
|
|
@@ -513,6 +522,7 @@ class LabeledCriteriaEvalChain(CriteriaEvalChain):
|
|
|
513
522
|
"""Criteria evaluation chain that requires references."""
|
|
514
523
|
|
|
515
524
|
@classmethod
|
|
525
|
+
@override
|
|
516
526
|
def is_lc_serializable(cls) -> bool:
|
|
517
527
|
return False
|
|
518
528
|
|
|
@@ -523,15 +533,17 @@ class LabeledCriteriaEvalChain(CriteriaEvalChain):
|
|
|
523
533
|
|
|
524
534
|
@classmethod
|
|
525
535
|
def _resolve_prompt(
|
|
526
|
-
cls,
|
|
536
|
+
cls,
|
|
537
|
+
prompt: Optional[BasePromptTemplate] = None,
|
|
527
538
|
) -> BasePromptTemplate:
|
|
528
539
|
expected_input_vars = {"input", "output", "criteria", "reference"}
|
|
529
540
|
prompt_ = prompt or PROMPT_WITH_REFERENCES
|
|
530
541
|
if expected_input_vars != set(prompt_.input_variables):
|
|
531
|
-
|
|
542
|
+
msg = (
|
|
532
543
|
f"Input variables should be {expected_input_vars}, "
|
|
533
544
|
f"but got {prompt_.input_variables}"
|
|
534
545
|
)
|
|
546
|
+
raise ValueError(msg)
|
|
535
547
|
return prompt_
|
|
536
548
|
|
|
537
549
|
@classmethod
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# flake8: noqa
|
|
2
1
|
# Credit to https://github.com/openai/evals/tree/main
|
|
3
2
|
|
|
4
3
|
from langchain_core.prompts import PromptTemplate
|
|
@@ -13,7 +12,7 @@ template = """You are assessing a submitted answer on a given task or input base
|
|
|
13
12
|
[Criteria]: {criteria}
|
|
14
13
|
***
|
|
15
14
|
[END DATA]
|
|
16
|
-
Does the submission meet the Criteria? First, write out in a step by step manner your reasoning about each criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print only the single character "Y" or "N" (without quotes or punctuation) on its own line corresponding to the correct answer of whether the submission meets all criteria. At the end, repeat just the letter again by itself on a new line."""
|
|
15
|
+
Does the submission meet the Criteria? First, write out in a step by step manner your reasoning about each criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print only the single character "Y" or "N" (without quotes or punctuation) on its own line corresponding to the correct answer of whether the submission meets all criteria. At the end, repeat just the letter again by itself on a new line.""" # noqa: E501
|
|
17
16
|
|
|
18
17
|
PROMPT = PromptTemplate(
|
|
19
18
|
input_variables=["input", "output", "criteria"], template=template
|
|
@@ -31,7 +30,7 @@ template = """You are assessing a submitted answer on a given task or input base
|
|
|
31
30
|
[Reference]: {reference}
|
|
32
31
|
***
|
|
33
32
|
[END DATA]
|
|
34
|
-
Does the submission meet the Criteria? First, write out in a step by step manner your reasoning about each criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print only the single character "Y" or "N" (without quotes or punctuation) on its own line corresponding to the correct answer of whether the submission meets all criteria. At the end, repeat just the letter again by itself on a new line."""
|
|
33
|
+
Does the submission meet the Criteria? First, write out in a step by step manner your reasoning about each criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print only the single character "Y" or "N" (without quotes or punctuation) on its own line corresponding to the correct answer of whether the submission meets all criteria. At the end, repeat just the letter again by itself on a new line.""" # noqa: E501
|
|
35
34
|
|
|
36
35
|
PROMPT_WITH_REFERENCES = PromptTemplate(
|
|
37
36
|
input_variables=["input", "output", "criteria", "reference"], template=template
|
|
@@ -14,6 +14,7 @@ from langchain_core.callbacks.manager import (
|
|
|
14
14
|
from langchain_core.embeddings import Embeddings
|
|
15
15
|
from langchain_core.utils import pre_init
|
|
16
16
|
from pydantic import ConfigDict, Field
|
|
17
|
+
from typing_extensions import override
|
|
17
18
|
|
|
18
19
|
from langchain.chains.base import Chain
|
|
19
20
|
from langchain.evaluation.schema import PairwiseStringEvaluator, StringEvaluator
|
|
@@ -23,12 +24,10 @@ from langchain.schema import RUN_KEY
|
|
|
23
24
|
def _import_numpy() -> Any:
|
|
24
25
|
try:
|
|
25
26
|
import numpy as np
|
|
26
|
-
|
|
27
|
-
return np
|
|
28
27
|
except ImportError as e:
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
28
|
+
msg = "Could not import numpy, please install with `pip install numpy`."
|
|
29
|
+
raise ImportError(msg) from e
|
|
30
|
+
return np
|
|
32
31
|
|
|
33
32
|
|
|
34
33
|
logger = logging.getLogger(__name__)
|
|
@@ -43,7 +42,7 @@ def _check_numpy() -> bool:
|
|
|
43
42
|
"langchain will use a pure Python implementation for embedding distance "
|
|
44
43
|
"operations, which may significantly impact performance, especially for large "
|
|
45
44
|
"datasets. For optimal speed and efficiency, consider installing NumPy: "
|
|
46
|
-
"pip install numpy"
|
|
45
|
+
"pip install numpy",
|
|
47
46
|
)
|
|
48
47
|
return False
|
|
49
48
|
|
|
@@ -63,11 +62,12 @@ def _embedding_factory() -> Embeddings:
|
|
|
63
62
|
from langchain_community.embeddings.openai import ( # type: ignore[no-redef]
|
|
64
63
|
OpenAIEmbeddings,
|
|
65
64
|
)
|
|
66
|
-
except ImportError:
|
|
67
|
-
|
|
65
|
+
except ImportError as e:
|
|
66
|
+
msg = (
|
|
68
67
|
"Could not import OpenAIEmbeddings. Please install the "
|
|
69
68
|
"OpenAIEmbeddings package using `pip install langchain-openai`."
|
|
70
69
|
)
|
|
70
|
+
raise ImportError(msg) from e
|
|
71
71
|
return OpenAIEmbeddings()
|
|
72
72
|
|
|
73
73
|
|
|
@@ -130,21 +130,23 @@ class _EmbeddingDistanceChainMixin(Chain):
|
|
|
130
130
|
pass
|
|
131
131
|
|
|
132
132
|
if not types_:
|
|
133
|
-
|
|
133
|
+
msg = (
|
|
134
134
|
"Could not import OpenAIEmbeddings. Please install the "
|
|
135
135
|
"OpenAIEmbeddings package using `pip install langchain-openai`."
|
|
136
136
|
)
|
|
137
|
+
raise ImportError(msg)
|
|
137
138
|
|
|
138
139
|
if isinstance(embeddings, tuple(types_)):
|
|
139
140
|
try:
|
|
140
141
|
import tiktoken # noqa: F401
|
|
141
|
-
except ImportError:
|
|
142
|
-
|
|
142
|
+
except ImportError as e:
|
|
143
|
+
msg = (
|
|
143
144
|
"The tiktoken library is required to use the default "
|
|
144
145
|
"OpenAI embeddings with embedding distance evaluators."
|
|
145
146
|
" Please either manually select a different Embeddings object"
|
|
146
147
|
" or install tiktoken using `pip install tiktoken`."
|
|
147
148
|
)
|
|
149
|
+
raise ImportError(msg) from e
|
|
148
150
|
return values
|
|
149
151
|
|
|
150
152
|
model_config = ConfigDict(
|
|
@@ -184,8 +186,8 @@ class _EmbeddingDistanceChainMixin(Chain):
|
|
|
184
186
|
}
|
|
185
187
|
if metric in metrics:
|
|
186
188
|
return metrics[metric]
|
|
187
|
-
|
|
188
|
-
|
|
189
|
+
msg = f"Invalid metric: {metric}"
|
|
190
|
+
raise ValueError(msg)
|
|
189
191
|
|
|
190
192
|
@staticmethod
|
|
191
193
|
def _cosine_distance(a: Any, b: Any) -> Any:
|
|
@@ -200,12 +202,13 @@ class _EmbeddingDistanceChainMixin(Chain):
|
|
|
200
202
|
"""
|
|
201
203
|
try:
|
|
202
204
|
from langchain_community.utils.math import cosine_similarity
|
|
203
|
-
except ImportError:
|
|
204
|
-
|
|
205
|
+
except ImportError as e:
|
|
206
|
+
msg = (
|
|
205
207
|
"The cosine_similarity function is required to compute cosine distance."
|
|
206
208
|
" Please install the langchain-community package using"
|
|
207
209
|
" `pip install langchain-community`."
|
|
208
210
|
)
|
|
211
|
+
raise ImportError(msg) from e
|
|
209
212
|
return 1.0 - cosine_similarity(a, b)
|
|
210
213
|
|
|
211
214
|
@staticmethod
|
|
@@ -315,6 +318,7 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
|
|
315
318
|
return True
|
|
316
319
|
|
|
317
320
|
@property
|
|
321
|
+
@override
|
|
318
322
|
def evaluation_name(self) -> str:
|
|
319
323
|
return f"embedding_{self.distance_metric.value}_distance"
|
|
320
324
|
|
|
@@ -327,6 +331,7 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
|
|
327
331
|
"""
|
|
328
332
|
return ["prediction", "reference"]
|
|
329
333
|
|
|
334
|
+
@override
|
|
330
335
|
def _call(
|
|
331
336
|
self,
|
|
332
337
|
inputs: dict[str, Any],
|
|
@@ -343,7 +348,7 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
|
|
343
348
|
Dict[str, Any]: The computed score.
|
|
344
349
|
"""
|
|
345
350
|
vectors = self.embeddings.embed_documents(
|
|
346
|
-
[inputs["prediction"], inputs["reference"]]
|
|
351
|
+
[inputs["prediction"], inputs["reference"]],
|
|
347
352
|
)
|
|
348
353
|
if _check_numpy():
|
|
349
354
|
np = _import_numpy()
|
|
@@ -351,6 +356,7 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
|
|
351
356
|
score = self._compute_score(vectors)
|
|
352
357
|
return {"score": score}
|
|
353
358
|
|
|
359
|
+
@override
|
|
354
360
|
async def _acall(
|
|
355
361
|
self,
|
|
356
362
|
inputs: dict[str, Any],
|
|
@@ -370,7 +376,7 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
|
|
370
376
|
[
|
|
371
377
|
inputs["prediction"],
|
|
372
378
|
inputs["reference"],
|
|
373
|
-
]
|
|
379
|
+
],
|
|
374
380
|
)
|
|
375
381
|
if _check_numpy():
|
|
376
382
|
np = _import_numpy()
|
|
@@ -378,6 +384,7 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
|
|
378
384
|
score = self._compute_score(vectors)
|
|
379
385
|
return {"score": score}
|
|
380
386
|
|
|
387
|
+
@override
|
|
381
388
|
def _evaluate_strings(
|
|
382
389
|
self,
|
|
383
390
|
*,
|
|
@@ -412,6 +419,7 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
|
|
412
419
|
)
|
|
413
420
|
return self._prepare_output(result)
|
|
414
421
|
|
|
422
|
+
@override
|
|
415
423
|
async def _aevaluate_strings(
|
|
416
424
|
self,
|
|
417
425
|
*,
|
|
@@ -448,7 +456,8 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
|
|
448
456
|
|
|
449
457
|
|
|
450
458
|
class PairwiseEmbeddingDistanceEvalChain(
|
|
451
|
-
_EmbeddingDistanceChainMixin,
|
|
459
|
+
_EmbeddingDistanceChainMixin,
|
|
460
|
+
PairwiseStringEvaluator,
|
|
452
461
|
):
|
|
453
462
|
"""Use embedding distances to score semantic difference between two predictions.
|
|
454
463
|
|
|
@@ -470,8 +479,10 @@ class PairwiseEmbeddingDistanceEvalChain(
|
|
|
470
479
|
|
|
471
480
|
@property
|
|
472
481
|
def evaluation_name(self) -> str:
|
|
482
|
+
"""Return the evaluation name."""
|
|
473
483
|
return f"pairwise_embedding_{self.distance_metric.value}_distance"
|
|
474
484
|
|
|
485
|
+
@override
|
|
475
486
|
def _call(
|
|
476
487
|
self,
|
|
477
488
|
inputs: dict[str, Any],
|
|
@@ -491,7 +502,7 @@ class PairwiseEmbeddingDistanceEvalChain(
|
|
|
491
502
|
[
|
|
492
503
|
inputs["prediction"],
|
|
493
504
|
inputs["prediction_b"],
|
|
494
|
-
]
|
|
505
|
+
],
|
|
495
506
|
)
|
|
496
507
|
if _check_numpy():
|
|
497
508
|
np = _import_numpy()
|
|
@@ -499,6 +510,7 @@ class PairwiseEmbeddingDistanceEvalChain(
|
|
|
499
510
|
score = self._compute_score(vectors)
|
|
500
511
|
return {"score": score}
|
|
501
512
|
|
|
513
|
+
@override
|
|
502
514
|
async def _acall(
|
|
503
515
|
self,
|
|
504
516
|
inputs: dict[str, Any],
|
|
@@ -518,7 +530,7 @@ class PairwiseEmbeddingDistanceEvalChain(
|
|
|
518
530
|
[
|
|
519
531
|
inputs["prediction"],
|
|
520
532
|
inputs["prediction_b"],
|
|
521
|
-
]
|
|
533
|
+
],
|
|
522
534
|
)
|
|
523
535
|
if _check_numpy():
|
|
524
536
|
np = _import_numpy()
|
|
@@ -526,6 +538,7 @@ class PairwiseEmbeddingDistanceEvalChain(
|
|
|
526
538
|
score = self._compute_score(vectors)
|
|
527
539
|
return {"score": score}
|
|
528
540
|
|
|
541
|
+
@override
|
|
529
542
|
def _evaluate_string_pairs(
|
|
530
543
|
self,
|
|
531
544
|
*,
|
|
@@ -561,6 +574,7 @@ class PairwiseEmbeddingDistanceEvalChain(
|
|
|
561
574
|
)
|
|
562
575
|
return self._prepare_output(result)
|
|
563
576
|
|
|
577
|
+
@override
|
|
564
578
|
async def _aevaluate_string_pairs(
|
|
565
579
|
self,
|
|
566
580
|
*,
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import string
|
|
2
2
|
from typing import Any
|
|
3
3
|
|
|
4
|
+
from typing_extensions import override
|
|
5
|
+
|
|
4
6
|
from langchain.evaluation.schema import StringEvaluator
|
|
5
7
|
|
|
6
8
|
|
|
@@ -27,8 +29,18 @@ class ExactMatchStringEvaluator(StringEvaluator):
|
|
|
27
29
|
ignore_case: bool = False,
|
|
28
30
|
ignore_punctuation: bool = False,
|
|
29
31
|
ignore_numbers: bool = False,
|
|
30
|
-
**
|
|
32
|
+
**_: Any,
|
|
31
33
|
):
|
|
34
|
+
"""Initialize the ExactMatchStringEvaluator.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
ignore_case: Whether to ignore case when comparing strings.
|
|
38
|
+
Defaults to False.
|
|
39
|
+
ignore_punctuation: Whether to ignore punctuation when comparing strings.
|
|
40
|
+
Defaults to False.
|
|
41
|
+
ignore_numbers: Whether to ignore numbers when comparing strings.
|
|
42
|
+
Defaults to False.
|
|
43
|
+
"""
|
|
32
44
|
super().__init__()
|
|
33
45
|
self.ignore_case = ignore_case
|
|
34
46
|
self.ignore_punctuation = ignore_punctuation
|
|
@@ -68,6 +80,7 @@ class ExactMatchStringEvaluator(StringEvaluator):
|
|
|
68
80
|
"""
|
|
69
81
|
return "exact_match"
|
|
70
82
|
|
|
83
|
+
@override
|
|
71
84
|
def _evaluate_strings( # type: ignore[override]
|
|
72
85
|
self,
|
|
73
86
|
*,
|