langchain 0.3.26__py3-none-any.whl → 0.3.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain might be problematic. Click here for more details.
- langchain/__init__.py +110 -96
- langchain/_api/__init__.py +2 -2
- langchain/_api/deprecation.py +3 -3
- langchain/_api/module_import.py +51 -46
- langchain/_api/path.py +1 -1
- langchain/adapters/openai.py +8 -8
- langchain/agents/__init__.py +15 -12
- langchain/agents/agent.py +160 -133
- langchain/agents/agent_iterator.py +31 -14
- langchain/agents/agent_toolkits/__init__.py +7 -6
- langchain/agents/agent_toolkits/ainetwork/toolkit.py +1 -1
- langchain/agents/agent_toolkits/amadeus/toolkit.py +1 -1
- langchain/agents/agent_toolkits/azure_cognitive_services.py +1 -1
- langchain/agents/agent_toolkits/clickup/toolkit.py +1 -1
- langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +6 -4
- langchain/agents/agent_toolkits/csv/__init__.py +4 -2
- langchain/agents/agent_toolkits/file_management/__init__.py +1 -1
- langchain/agents/agent_toolkits/file_management/toolkit.py +1 -1
- langchain/agents/agent_toolkits/github/toolkit.py +9 -9
- langchain/agents/agent_toolkits/gitlab/toolkit.py +1 -1
- langchain/agents/agent_toolkits/json/base.py +1 -1
- langchain/agents/agent_toolkits/multion/toolkit.py +1 -1
- langchain/agents/agent_toolkits/office365/toolkit.py +1 -1
- langchain/agents/agent_toolkits/openapi/base.py +1 -1
- langchain/agents/agent_toolkits/openapi/planner.py +2 -2
- langchain/agents/agent_toolkits/openapi/planner_prompt.py +10 -10
- langchain/agents/agent_toolkits/openapi/prompt.py +1 -1
- langchain/agents/agent_toolkits/openapi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/pandas/__init__.py +4 -2
- langchain/agents/agent_toolkits/playwright/__init__.py +1 -1
- langchain/agents/agent_toolkits/playwright/toolkit.py +1 -1
- langchain/agents/agent_toolkits/powerbi/base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/chat_base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/prompt.py +2 -2
- langchain/agents/agent_toolkits/powerbi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/python/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark_sql/base.py +1 -1
- langchain/agents/agent_toolkits/spark_sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/sql/prompt.py +1 -1
- langchain/agents/agent_toolkits/sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/vectorstore/base.py +2 -2
- langchain/agents/agent_toolkits/vectorstore/prompt.py +2 -4
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +12 -11
- langchain/agents/agent_toolkits/xorbits/__init__.py +4 -2
- langchain/agents/agent_toolkits/zapier/toolkit.py +1 -1
- langchain/agents/agent_types.py +6 -6
- langchain/agents/chat/base.py +6 -12
- langchain/agents/chat/output_parser.py +9 -6
- langchain/agents/chat/prompt.py +3 -4
- langchain/agents/conversational/base.py +9 -5
- langchain/agents/conversational/output_parser.py +4 -2
- langchain/agents/conversational/prompt.py +2 -3
- langchain/agents/conversational_chat/base.py +7 -5
- langchain/agents/conversational_chat/output_parser.py +9 -11
- langchain/agents/conversational_chat/prompt.py +5 -6
- langchain/agents/format_scratchpad/__init__.py +3 -3
- langchain/agents/format_scratchpad/log_to_messages.py +1 -1
- langchain/agents/format_scratchpad/openai_functions.py +8 -6
- langchain/agents/format_scratchpad/tools.py +5 -3
- langchain/agents/format_scratchpad/xml.py +33 -2
- langchain/agents/initialize.py +16 -8
- langchain/agents/json_chat/base.py +18 -18
- langchain/agents/json_chat/prompt.py +2 -3
- langchain/agents/load_tools.py +2 -1
- langchain/agents/loading.py +28 -18
- langchain/agents/mrkl/base.py +9 -4
- langchain/agents/mrkl/output_parser.py +17 -13
- langchain/agents/mrkl/prompt.py +1 -2
- langchain/agents/openai_assistant/base.py +80 -70
- langchain/agents/openai_functions_agent/base.py +46 -37
- langchain/agents/openai_functions_multi_agent/base.py +39 -26
- langchain/agents/openai_tools/base.py +8 -8
- langchain/agents/output_parsers/__init__.py +3 -3
- langchain/agents/output_parsers/json.py +6 -6
- langchain/agents/output_parsers/openai_functions.py +15 -7
- langchain/agents/output_parsers/openai_tools.py +9 -4
- langchain/agents/output_parsers/react_json_single_input.py +10 -5
- langchain/agents/output_parsers/react_single_input.py +15 -11
- langchain/agents/output_parsers/self_ask.py +3 -2
- langchain/agents/output_parsers/tools.py +18 -13
- langchain/agents/output_parsers/xml.py +99 -28
- langchain/agents/react/agent.py +4 -4
- langchain/agents/react/base.py +22 -17
- langchain/agents/react/output_parser.py +5 -6
- langchain/agents/react/textworld_prompt.py +0 -1
- langchain/agents/react/wiki_prompt.py +14 -15
- langchain/agents/schema.py +3 -2
- langchain/agents/self_ask_with_search/base.py +19 -15
- langchain/agents/self_ask_with_search/prompt.py +0 -1
- langchain/agents/structured_chat/base.py +14 -11
- langchain/agents/structured_chat/output_parser.py +16 -18
- langchain/agents/structured_chat/prompt.py +3 -4
- langchain/agents/tool_calling_agent/base.py +7 -6
- langchain/agents/tools.py +2 -2
- langchain/agents/utils.py +2 -3
- langchain/agents/xml/base.py +5 -5
- langchain/agents/xml/prompt.py +1 -2
- langchain/cache.py +12 -12
- langchain/callbacks/__init__.py +11 -11
- langchain/callbacks/aim_callback.py +2 -2
- langchain/callbacks/argilla_callback.py +1 -1
- langchain/callbacks/arize_callback.py +1 -1
- langchain/callbacks/arthur_callback.py +1 -1
- langchain/callbacks/base.py +7 -7
- langchain/callbacks/clearml_callback.py +1 -1
- langchain/callbacks/comet_ml_callback.py +1 -1
- langchain/callbacks/confident_callback.py +1 -1
- langchain/callbacks/context_callback.py +1 -1
- langchain/callbacks/flyte_callback.py +1 -1
- langchain/callbacks/human.py +2 -2
- langchain/callbacks/infino_callback.py +1 -1
- langchain/callbacks/labelstudio_callback.py +1 -1
- langchain/callbacks/llmonitor_callback.py +1 -1
- langchain/callbacks/manager.py +5 -5
- langchain/callbacks/mlflow_callback.py +2 -2
- langchain/callbacks/openai_info.py +1 -1
- langchain/callbacks/promptlayer_callback.py +1 -1
- langchain/callbacks/sagemaker_callback.py +1 -1
- langchain/callbacks/streaming_aiter.py +4 -1
- langchain/callbacks/streaming_aiter_final_only.py +5 -3
- langchain/callbacks/streaming_stdout_final_only.py +5 -3
- langchain/callbacks/streamlit/__init__.py +3 -2
- langchain/callbacks/streamlit/mutable_expander.py +1 -1
- langchain/callbacks/streamlit/streamlit_callback_handler.py +3 -3
- langchain/callbacks/tracers/__init__.py +1 -1
- langchain/callbacks/tracers/comet.py +1 -1
- langchain/callbacks/tracers/evaluation.py +1 -1
- langchain/callbacks/tracers/log_stream.py +1 -1
- langchain/callbacks/tracers/logging.py +1 -1
- langchain/callbacks/tracers/stdout.py +1 -1
- langchain/callbacks/trubrics_callback.py +1 -1
- langchain/callbacks/utils.py +4 -4
- langchain/callbacks/wandb_callback.py +1 -1
- langchain/callbacks/whylabs_callback.py +1 -1
- langchain/chains/api/base.py +36 -22
- langchain/chains/api/news_docs.py +1 -2
- langchain/chains/api/open_meteo_docs.py +1 -2
- langchain/chains/api/openapi/requests_chain.py +1 -1
- langchain/chains/api/openapi/response_chain.py +1 -1
- langchain/chains/api/podcast_docs.py +1 -2
- langchain/chains/api/prompt.py +1 -2
- langchain/chains/api/tmdb_docs.py +1 -2
- langchain/chains/base.py +88 -54
- langchain/chains/chat_vector_db/prompts.py +2 -3
- langchain/chains/combine_documents/__init__.py +1 -1
- langchain/chains/combine_documents/base.py +23 -10
- langchain/chains/combine_documents/map_reduce.py +38 -30
- langchain/chains/combine_documents/map_rerank.py +33 -20
- langchain/chains/combine_documents/reduce.py +47 -26
- langchain/chains/combine_documents/refine.py +26 -17
- langchain/chains/combine_documents/stuff.py +19 -12
- langchain/chains/constitutional_ai/base.py +4 -4
- langchain/chains/constitutional_ai/principles.py +22 -25
- langchain/chains/constitutional_ai/prompts.py +25 -28
- langchain/chains/conversation/base.py +5 -3
- langchain/chains/conversation/memory.py +5 -5
- langchain/chains/conversation/prompt.py +5 -5
- langchain/chains/conversational_retrieval/base.py +41 -20
- langchain/chains/conversational_retrieval/prompts.py +2 -3
- langchain/chains/elasticsearch_database/base.py +8 -9
- langchain/chains/elasticsearch_database/prompts.py +2 -3
- langchain/chains/ernie_functions/__init__.py +2 -2
- langchain/chains/example_generator.py +3 -1
- langchain/chains/flare/base.py +26 -12
- langchain/chains/graph_qa/cypher.py +2 -2
- langchain/chains/graph_qa/falkordb.py +1 -1
- langchain/chains/graph_qa/gremlin.py +1 -1
- langchain/chains/graph_qa/neptune_sparql.py +1 -1
- langchain/chains/graph_qa/prompts.py +2 -2
- langchain/chains/history_aware_retriever.py +2 -1
- langchain/chains/hyde/base.py +6 -5
- langchain/chains/hyde/prompts.py +5 -6
- langchain/chains/llm.py +77 -61
- langchain/chains/llm_bash/__init__.py +2 -1
- langchain/chains/llm_checker/base.py +7 -5
- langchain/chains/llm_checker/prompt.py +3 -4
- langchain/chains/llm_math/base.py +16 -9
- langchain/chains/llm_math/prompt.py +1 -2
- langchain/chains/llm_summarization_checker/base.py +9 -6
- langchain/chains/llm_symbolic_math/__init__.py +2 -1
- langchain/chains/loading.py +151 -95
- langchain/chains/mapreduce.py +4 -3
- langchain/chains/moderation.py +8 -9
- langchain/chains/natbot/base.py +8 -8
- langchain/chains/natbot/crawler.py +73 -76
- langchain/chains/natbot/prompt.py +2 -3
- langchain/chains/openai_functions/__init__.py +7 -7
- langchain/chains/openai_functions/base.py +13 -10
- langchain/chains/openai_functions/citation_fuzzy_match.py +12 -11
- langchain/chains/openai_functions/extraction.py +19 -19
- langchain/chains/openai_functions/openapi.py +35 -35
- langchain/chains/openai_functions/qa_with_structure.py +19 -12
- langchain/chains/openai_functions/tagging.py +2 -4
- langchain/chains/openai_tools/extraction.py +7 -8
- langchain/chains/qa_generation/base.py +4 -3
- langchain/chains/qa_generation/prompt.py +5 -5
- langchain/chains/qa_with_sources/base.py +14 -6
- langchain/chains/qa_with_sources/loading.py +16 -8
- langchain/chains/qa_with_sources/map_reduce_prompt.py +8 -9
- langchain/chains/qa_with_sources/refine_prompts.py +0 -1
- langchain/chains/qa_with_sources/retrieval.py +14 -5
- langchain/chains/qa_with_sources/stuff_prompt.py +6 -7
- langchain/chains/qa_with_sources/vector_db.py +17 -6
- langchain/chains/query_constructor/base.py +34 -33
- langchain/chains/query_constructor/ir.py +4 -4
- langchain/chains/query_constructor/parser.py +37 -32
- langchain/chains/query_constructor/prompt.py +5 -6
- langchain/chains/question_answering/chain.py +21 -10
- langchain/chains/question_answering/map_reduce_prompt.py +14 -14
- langchain/chains/question_answering/map_rerank_prompt.py +3 -3
- langchain/chains/question_answering/refine_prompts.py +2 -5
- langchain/chains/question_answering/stuff_prompt.py +5 -5
- langchain/chains/retrieval.py +1 -3
- langchain/chains/retrieval_qa/base.py +34 -27
- langchain/chains/retrieval_qa/prompt.py +1 -2
- langchain/chains/router/__init__.py +3 -3
- langchain/chains/router/base.py +24 -20
- langchain/chains/router/embedding_router.py +12 -8
- langchain/chains/router/llm_router.py +17 -16
- langchain/chains/router/multi_prompt.py +2 -2
- langchain/chains/router/multi_retrieval_qa.py +10 -5
- langchain/chains/sequential.py +30 -18
- langchain/chains/sql_database/prompt.py +14 -16
- langchain/chains/sql_database/query.py +6 -5
- langchain/chains/structured_output/__init__.py +1 -1
- langchain/chains/structured_output/base.py +75 -67
- langchain/chains/summarize/chain.py +11 -5
- langchain/chains/summarize/map_reduce_prompt.py +0 -1
- langchain/chains/summarize/stuff_prompt.py +0 -1
- langchain/chains/transform.py +5 -6
- langchain/chat_loaders/facebook_messenger.py +1 -1
- langchain/chat_loaders/langsmith.py +1 -1
- langchain/chat_loaders/utils.py +3 -3
- langchain/chat_models/__init__.py +20 -19
- langchain/chat_models/anthropic.py +1 -1
- langchain/chat_models/azureml_endpoint.py +1 -1
- langchain/chat_models/baidu_qianfan_endpoint.py +1 -1
- langchain/chat_models/base.py +160 -123
- langchain/chat_models/bedrock.py +1 -1
- langchain/chat_models/fake.py +1 -1
- langchain/chat_models/meta.py +1 -1
- langchain/chat_models/pai_eas_endpoint.py +1 -1
- langchain/chat_models/promptlayer_openai.py +1 -1
- langchain/chat_models/volcengine_maas.py +1 -1
- langchain/docstore/base.py +1 -1
- langchain/document_loaders/__init__.py +9 -9
- langchain/document_loaders/airbyte.py +3 -3
- langchain/document_loaders/assemblyai.py +1 -1
- langchain/document_loaders/azure_blob_storage_container.py +1 -1
- langchain/document_loaders/azure_blob_storage_file.py +1 -1
- langchain/document_loaders/baiducloud_bos_file.py +1 -1
- langchain/document_loaders/base.py +1 -1
- langchain/document_loaders/blob_loaders/__init__.py +1 -1
- langchain/document_loaders/blockchain.py +1 -1
- langchain/document_loaders/chatgpt.py +1 -1
- langchain/document_loaders/college_confidential.py +1 -1
- langchain/document_loaders/confluence.py +1 -1
- langchain/document_loaders/email.py +1 -1
- langchain/document_loaders/facebook_chat.py +1 -1
- langchain/document_loaders/markdown.py +1 -1
- langchain/document_loaders/notebook.py +1 -1
- langchain/document_loaders/org_mode.py +1 -1
- langchain/document_loaders/parsers/__init__.py +1 -1
- langchain/document_loaders/parsers/docai.py +1 -1
- langchain/document_loaders/parsers/generic.py +1 -1
- langchain/document_loaders/parsers/html/__init__.py +1 -1
- langchain/document_loaders/parsers/html/bs4.py +1 -1
- langchain/document_loaders/parsers/language/cobol.py +1 -1
- langchain/document_loaders/parsers/language/python.py +1 -1
- langchain/document_loaders/parsers/msword.py +1 -1
- langchain/document_loaders/parsers/pdf.py +5 -5
- langchain/document_loaders/parsers/registry.py +1 -1
- langchain/document_loaders/pdf.py +8 -8
- langchain/document_loaders/powerpoint.py +1 -1
- langchain/document_loaders/pyspark_dataframe.py +1 -1
- langchain/document_loaders/telegram.py +2 -2
- langchain/document_loaders/tencent_cos_directory.py +1 -1
- langchain/document_loaders/unstructured.py +5 -5
- langchain/document_loaders/url_playwright.py +1 -1
- langchain/document_loaders/whatsapp_chat.py +1 -1
- langchain/document_loaders/youtube.py +2 -2
- langchain/document_transformers/__init__.py +3 -3
- langchain/document_transformers/beautiful_soup_transformer.py +1 -1
- langchain/document_transformers/doctran_text_extract.py +1 -1
- langchain/document_transformers/doctran_text_qa.py +1 -1
- langchain/document_transformers/doctran_text_translate.py +1 -1
- langchain/document_transformers/embeddings_redundant_filter.py +3 -3
- langchain/document_transformers/google_translate.py +1 -1
- langchain/document_transformers/html2text.py +1 -1
- langchain/document_transformers/nuclia_text_transform.py +1 -1
- langchain/embeddings/__init__.py +5 -5
- langchain/embeddings/base.py +33 -24
- langchain/embeddings/cache.py +36 -31
- langchain/embeddings/fake.py +1 -1
- langchain/embeddings/huggingface.py +2 -2
- langchain/evaluation/__init__.py +22 -22
- langchain/evaluation/agents/trajectory_eval_chain.py +23 -23
- langchain/evaluation/agents/trajectory_eval_prompt.py +6 -9
- langchain/evaluation/comparison/__init__.py +1 -1
- langchain/evaluation/comparison/eval_chain.py +20 -13
- langchain/evaluation/comparison/prompt.py +1 -2
- langchain/evaluation/criteria/__init__.py +1 -1
- langchain/evaluation/criteria/eval_chain.py +20 -11
- langchain/evaluation/criteria/prompt.py +2 -3
- langchain/evaluation/embedding_distance/base.py +23 -20
- langchain/evaluation/loading.py +15 -11
- langchain/evaluation/parsing/base.py +4 -1
- langchain/evaluation/parsing/json_distance.py +5 -2
- langchain/evaluation/parsing/json_schema.py +12 -8
- langchain/evaluation/qa/__init__.py +1 -1
- langchain/evaluation/qa/eval_chain.py +12 -5
- langchain/evaluation/qa/eval_prompt.py +7 -8
- langchain/evaluation/qa/generate_chain.py +2 -1
- langchain/evaluation/qa/generate_prompt.py +2 -4
- langchain/evaluation/schema.py +38 -30
- langchain/evaluation/scoring/__init__.py +1 -1
- langchain/evaluation/scoring/eval_chain.py +22 -15
- langchain/evaluation/scoring/prompt.py +0 -1
- langchain/evaluation/string_distance/base.py +14 -9
- langchain/globals.py +12 -11
- langchain/graphs/__init__.py +6 -6
- langchain/graphs/graph_document.py +1 -1
- langchain/graphs/networkx_graph.py +2 -2
- langchain/hub.py +9 -11
- langchain/indexes/__init__.py +3 -3
- langchain/indexes/_sql_record_manager.py +63 -46
- langchain/indexes/prompts/entity_extraction.py +1 -2
- langchain/indexes/prompts/entity_summarization.py +1 -2
- langchain/indexes/prompts/knowledge_triplet_extraction.py +1 -3
- langchain/indexes/vectorstore.py +35 -19
- langchain/llms/__init__.py +13 -13
- langchain/llms/ai21.py +1 -1
- langchain/llms/azureml_endpoint.py +4 -4
- langchain/llms/base.py +15 -7
- langchain/llms/bedrock.py +1 -1
- langchain/llms/cloudflare_workersai.py +1 -1
- langchain/llms/gradient_ai.py +1 -1
- langchain/llms/loading.py +1 -1
- langchain/llms/openai.py +1 -1
- langchain/llms/sagemaker_endpoint.py +1 -1
- langchain/load/dump.py +1 -1
- langchain/load/load.py +1 -1
- langchain/load/serializable.py +3 -3
- langchain/memory/__init__.py +3 -3
- langchain/memory/buffer.py +9 -7
- langchain/memory/chat_memory.py +14 -8
- langchain/memory/chat_message_histories/__init__.py +1 -1
- langchain/memory/chat_message_histories/astradb.py +1 -1
- langchain/memory/chat_message_histories/cassandra.py +1 -1
- langchain/memory/chat_message_histories/cosmos_db.py +1 -1
- langchain/memory/chat_message_histories/dynamodb.py +1 -1
- langchain/memory/chat_message_histories/elasticsearch.py +1 -1
- langchain/memory/chat_message_histories/file.py +1 -1
- langchain/memory/chat_message_histories/firestore.py +1 -1
- langchain/memory/chat_message_histories/momento.py +1 -1
- langchain/memory/chat_message_histories/mongodb.py +1 -1
- langchain/memory/chat_message_histories/neo4j.py +1 -1
- langchain/memory/chat_message_histories/postgres.py +1 -1
- langchain/memory/chat_message_histories/redis.py +1 -1
- langchain/memory/chat_message_histories/rocksetdb.py +1 -1
- langchain/memory/chat_message_histories/singlestoredb.py +1 -1
- langchain/memory/chat_message_histories/streamlit.py +1 -1
- langchain/memory/chat_message_histories/upstash_redis.py +1 -1
- langchain/memory/chat_message_histories/xata.py +1 -1
- langchain/memory/chat_message_histories/zep.py +1 -1
- langchain/memory/combined.py +13 -12
- langchain/memory/entity.py +84 -61
- langchain/memory/prompt.py +10 -11
- langchain/memory/readonly.py +0 -2
- langchain/memory/simple.py +1 -3
- langchain/memory/summary.py +13 -11
- langchain/memory/summary_buffer.py +17 -8
- langchain/memory/utils.py +3 -2
- langchain/memory/vectorstore.py +12 -5
- langchain/memory/vectorstore_token_buffer_memory.py +5 -5
- langchain/model_laboratory.py +12 -11
- langchain/output_parsers/__init__.py +4 -4
- langchain/output_parsers/boolean.py +7 -4
- langchain/output_parsers/combining.py +10 -5
- langchain/output_parsers/datetime.py +32 -31
- langchain/output_parsers/enum.py +5 -3
- langchain/output_parsers/fix.py +52 -52
- langchain/output_parsers/format_instructions.py +6 -8
- langchain/output_parsers/json.py +2 -2
- langchain/output_parsers/list.py +2 -2
- langchain/output_parsers/loading.py +9 -9
- langchain/output_parsers/openai_functions.py +3 -3
- langchain/output_parsers/openai_tools.py +1 -1
- langchain/output_parsers/pandas_dataframe.py +43 -47
- langchain/output_parsers/prompts.py +1 -2
- langchain/output_parsers/rail_parser.py +1 -1
- langchain/output_parsers/regex.py +7 -8
- langchain/output_parsers/regex_dict.py +7 -10
- langchain/output_parsers/retry.py +77 -78
- langchain/output_parsers/structured.py +11 -6
- langchain/output_parsers/yaml.py +15 -11
- langchain/prompts/__init__.py +5 -3
- langchain/prompts/base.py +5 -5
- langchain/prompts/chat.py +8 -8
- langchain/prompts/example_selector/__init__.py +3 -1
- langchain/prompts/example_selector/semantic_similarity.py +2 -2
- langchain/prompts/few_shot.py +1 -1
- langchain/prompts/loading.py +3 -3
- langchain/prompts/prompt.py +1 -1
- langchain/retrievers/__init__.py +5 -5
- langchain/retrievers/bedrock.py +2 -2
- langchain/retrievers/bm25.py +1 -1
- langchain/retrievers/contextual_compression.py +14 -8
- langchain/retrievers/docarray.py +1 -1
- langchain/retrievers/document_compressors/__init__.py +5 -4
- langchain/retrievers/document_compressors/base.py +12 -6
- langchain/retrievers/document_compressors/chain_extract.py +2 -2
- langchain/retrievers/document_compressors/chain_extract_prompt.py +2 -3
- langchain/retrievers/document_compressors/chain_filter.py +9 -9
- langchain/retrievers/document_compressors/chain_filter_prompt.py +1 -2
- langchain/retrievers/document_compressors/cohere_rerank.py +15 -15
- langchain/retrievers/document_compressors/embeddings_filter.py +21 -17
- langchain/retrievers/document_compressors/flashrank_rerank.py +1 -1
- langchain/retrievers/document_compressors/listwise_rerank.py +7 -5
- langchain/retrievers/ensemble.py +28 -25
- langchain/retrievers/google_cloud_documentai_warehouse.py +1 -1
- langchain/retrievers/google_vertex_ai_search.py +2 -2
- langchain/retrievers/kendra.py +10 -10
- langchain/retrievers/llama_index.py +1 -1
- langchain/retrievers/merger_retriever.py +11 -11
- langchain/retrievers/milvus.py +1 -1
- langchain/retrievers/multi_query.py +32 -26
- langchain/retrievers/multi_vector.py +20 -8
- langchain/retrievers/parent_document_retriever.py +18 -9
- langchain/retrievers/re_phraser.py +6 -5
- langchain/retrievers/self_query/base.py +138 -127
- langchain/retrievers/time_weighted_retriever.py +18 -7
- langchain/retrievers/zilliz.py +1 -1
- langchain/runnables/openai_functions.py +6 -2
- langchain/schema/__init__.py +23 -23
- langchain/schema/cache.py +1 -1
- langchain/schema/callbacks/base.py +7 -7
- langchain/schema/callbacks/manager.py +19 -19
- langchain/schema/callbacks/tracers/base.py +1 -1
- langchain/schema/callbacks/tracers/evaluation.py +1 -1
- langchain/schema/callbacks/tracers/langchain.py +1 -1
- langchain/schema/callbacks/tracers/langchain_v1.py +1 -1
- langchain/schema/callbacks/tracers/log_stream.py +1 -1
- langchain/schema/callbacks/tracers/schemas.py +8 -8
- langchain/schema/callbacks/tracers/stdout.py +3 -3
- langchain/schema/document.py +1 -1
- langchain/schema/language_model.py +2 -2
- langchain/schema/messages.py +12 -12
- langchain/schema/output.py +3 -3
- langchain/schema/output_parser.py +3 -3
- langchain/schema/runnable/__init__.py +3 -3
- langchain/schema/runnable/base.py +9 -9
- langchain/schema/runnable/config.py +5 -5
- langchain/schema/runnable/configurable.py +1 -1
- langchain/schema/runnable/history.py +1 -1
- langchain/schema/runnable/passthrough.py +1 -1
- langchain/schema/runnable/utils.py +16 -16
- langchain/schema/vectorstore.py +1 -1
- langchain/smith/__init__.py +1 -1
- langchain/smith/evaluation/__init__.py +2 -2
- langchain/smith/evaluation/config.py +10 -7
- langchain/smith/evaluation/name_generation.py +3 -3
- langchain/smith/evaluation/progress.py +11 -2
- langchain/smith/evaluation/runner_utils.py +179 -127
- langchain/smith/evaluation/string_run_evaluator.py +75 -68
- langchain/storage/__init__.py +2 -2
- langchain/storage/_lc_store.py +4 -2
- langchain/storage/encoder_backed.py +6 -2
- langchain/storage/file_system.py +19 -16
- langchain/storage/in_memory.py +1 -1
- langchain/storage/upstash_redis.py +1 -1
- langchain/text_splitter.py +15 -15
- langchain/tools/__init__.py +28 -26
- langchain/tools/ainetwork/app.py +1 -1
- langchain/tools/ainetwork/base.py +1 -1
- langchain/tools/ainetwork/owner.py +1 -1
- langchain/tools/ainetwork/rule.py +1 -1
- langchain/tools/ainetwork/transfer.py +1 -1
- langchain/tools/ainetwork/value.py +1 -1
- langchain/tools/amadeus/closest_airport.py +1 -1
- langchain/tools/amadeus/flight_search.py +1 -1
- langchain/tools/azure_cognitive_services/__init__.py +1 -1
- langchain/tools/base.py +4 -4
- langchain/tools/bearly/tool.py +1 -1
- langchain/tools/bing_search/__init__.py +1 -1
- langchain/tools/bing_search/tool.py +1 -1
- langchain/tools/dataforseo_api_search/__init__.py +1 -1
- langchain/tools/dataforseo_api_search/tool.py +1 -1
- langchain/tools/ddg_search/tool.py +1 -1
- langchain/tools/e2b_data_analysis/tool.py +2 -2
- langchain/tools/edenai/__init__.py +1 -1
- langchain/tools/file_management/__init__.py +1 -1
- langchain/tools/file_management/copy.py +1 -1
- langchain/tools/file_management/delete.py +1 -1
- langchain/tools/gmail/__init__.py +2 -2
- langchain/tools/gmail/get_message.py +1 -1
- langchain/tools/gmail/search.py +1 -1
- langchain/tools/gmail/send_message.py +1 -1
- langchain/tools/google_finance/__init__.py +1 -1
- langchain/tools/google_finance/tool.py +1 -1
- langchain/tools/google_scholar/__init__.py +1 -1
- langchain/tools/google_scholar/tool.py +1 -1
- langchain/tools/google_search/__init__.py +1 -1
- langchain/tools/google_search/tool.py +1 -1
- langchain/tools/google_serper/__init__.py +1 -1
- langchain/tools/google_serper/tool.py +1 -1
- langchain/tools/google_trends/__init__.py +1 -1
- langchain/tools/google_trends/tool.py +1 -1
- langchain/tools/jira/tool.py +20 -1
- langchain/tools/json/tool.py +25 -3
- langchain/tools/memorize/tool.py +1 -1
- langchain/tools/multion/__init__.py +1 -1
- langchain/tools/multion/update_session.py +1 -1
- langchain/tools/office365/__init__.py +2 -2
- langchain/tools/office365/events_search.py +1 -1
- langchain/tools/office365/messages_search.py +1 -1
- langchain/tools/office365/send_event.py +1 -1
- langchain/tools/office365/send_message.py +1 -1
- langchain/tools/openapi/utils/api_models.py +6 -6
- langchain/tools/playwright/__init__.py +5 -5
- langchain/tools/playwright/click.py +1 -1
- langchain/tools/playwright/extract_hyperlinks.py +1 -1
- langchain/tools/playwright/get_elements.py +1 -1
- langchain/tools/playwright/navigate.py +1 -1
- langchain/tools/plugin.py +2 -2
- langchain/tools/powerbi/tool.py +1 -1
- langchain/tools/python/__init__.py +2 -1
- langchain/tools/reddit_search/tool.py +1 -1
- langchain/tools/render.py +2 -2
- langchain/tools/requests/tool.py +2 -2
- langchain/tools/searchapi/tool.py +1 -1
- langchain/tools/searx_search/tool.py +1 -1
- langchain/tools/slack/get_message.py +1 -1
- langchain/tools/spark_sql/tool.py +1 -1
- langchain/tools/sql_database/tool.py +1 -1
- langchain/tools/tavily_search/__init__.py +1 -1
- langchain/tools/tavily_search/tool.py +1 -1
- langchain/tools/zapier/__init__.py +1 -1
- langchain/tools/zapier/tool.py +24 -2
- langchain/utilities/__init__.py +4 -4
- langchain/utilities/arcee.py +4 -4
- langchain/utilities/clickup.py +4 -4
- langchain/utilities/dalle_image_generator.py +1 -1
- langchain/utilities/dataforseo_api_search.py +1 -1
- langchain/utilities/opaqueprompts.py +1 -1
- langchain/utilities/reddit_search.py +1 -1
- langchain/utilities/sql_database.py +1 -1
- langchain/utilities/tavily_search.py +1 -1
- langchain/utilities/vertexai.py +2 -2
- langchain/utils/__init__.py +1 -1
- langchain/utils/aiter.py +1 -1
- langchain/utils/html.py +3 -3
- langchain/utils/input.py +1 -1
- langchain/utils/iter.py +1 -1
- langchain/utils/json_schema.py +1 -3
- langchain/utils/strings.py +1 -1
- langchain/utils/utils.py +6 -6
- langchain/vectorstores/__init__.py +5 -5
- langchain/vectorstores/alibabacloud_opensearch.py +1 -1
- langchain/vectorstores/azure_cosmos_db.py +1 -1
- langchain/vectorstores/clickhouse.py +1 -1
- langchain/vectorstores/elastic_vector_search.py +1 -1
- langchain/vectorstores/elasticsearch.py +2 -2
- langchain/vectorstores/myscale.py +1 -1
- langchain/vectorstores/neo4j_vector.py +1 -1
- langchain/vectorstores/pgembedding.py +1 -1
- langchain/vectorstores/qdrant.py +1 -1
- langchain/vectorstores/redis/__init__.py +1 -1
- langchain/vectorstores/redis/base.py +1 -1
- langchain/vectorstores/redis/filters.py +4 -4
- langchain/vectorstores/redis/schema.py +6 -6
- langchain/vectorstores/sklearn.py +2 -2
- langchain/vectorstores/starrocks.py +1 -1
- langchain/vectorstores/utils.py +1 -1
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/METADATA +4 -4
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/RECORD +580 -580
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/WHEEL +1 -1
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/entry_points.txt +0 -0
- {langchain-0.3.26.dist-info → langchain-0.3.27.dist-info}/licenses/LICENSE +0 -0
langchain/chains/moderation.py
CHANGED
|
@@ -45,7 +45,9 @@ class OpenAIModerationChain(Chain):
|
|
|
45
45
|
def validate_environment(cls, values: dict) -> Any:
|
|
46
46
|
"""Validate that api key and python package exists in environment."""
|
|
47
47
|
openai_api_key = get_from_dict_or_env(
|
|
48
|
-
values,
|
|
48
|
+
values,
|
|
49
|
+
"openai_api_key",
|
|
50
|
+
"OPENAI_API_KEY",
|
|
49
51
|
)
|
|
50
52
|
openai_organization = get_from_dict_or_env(
|
|
51
53
|
values,
|
|
@@ -70,11 +72,12 @@ class OpenAIModerationChain(Chain):
|
|
|
70
72
|
values["client"] = openai.OpenAI(api_key=openai_api_key)
|
|
71
73
|
values["async_client"] = openai.AsyncOpenAI(api_key=openai_api_key)
|
|
72
74
|
|
|
73
|
-
except ImportError:
|
|
74
|
-
|
|
75
|
+
except ImportError as e:
|
|
76
|
+
msg = (
|
|
75
77
|
"Could not import openai python package. "
|
|
76
78
|
"Please install it with `pip install openai`."
|
|
77
79
|
)
|
|
80
|
+
raise ImportError(msg) from e
|
|
78
81
|
return values
|
|
79
82
|
|
|
80
83
|
@property
|
|
@@ -94,16 +97,12 @@ class OpenAIModerationChain(Chain):
|
|
|
94
97
|
return [self.output_key]
|
|
95
98
|
|
|
96
99
|
def _moderate(self, text: str, results: Any) -> str:
|
|
97
|
-
if self.openai_pre_1_0
|
|
98
|
-
condition = results["flagged"]
|
|
99
|
-
else:
|
|
100
|
-
condition = results.flagged
|
|
100
|
+
condition = results["flagged"] if self.openai_pre_1_0 else results.flagged
|
|
101
101
|
if condition:
|
|
102
102
|
error_str = "Text was found that violates OpenAI's content policy."
|
|
103
103
|
if self.error:
|
|
104
104
|
raise ValueError(error_str)
|
|
105
|
-
|
|
106
|
-
return error_str
|
|
105
|
+
return error_str
|
|
107
106
|
return text
|
|
108
107
|
|
|
109
108
|
def _call(
|
langchain/chains/natbot/base.py
CHANGED
|
@@ -6,9 +6,7 @@ import warnings
|
|
|
6
6
|
from typing import Any, Optional
|
|
7
7
|
|
|
8
8
|
from langchain_core._api import deprecated
|
|
9
|
-
from langchain_core.caches import BaseCache as BaseCache
|
|
10
9
|
from langchain_core.callbacks import CallbackManagerForChainRun
|
|
11
|
-
from langchain_core.callbacks import Callbacks as Callbacks
|
|
12
10
|
from langchain_core.language_models import BaseLanguageModel
|
|
13
11
|
from langchain_core.output_parsers import StrOutputParser
|
|
14
12
|
from langchain_core.runnables import Runnable
|
|
@@ -73,7 +71,8 @@ class NatBotChain(Chain):
|
|
|
73
71
|
warnings.warn(
|
|
74
72
|
"Directly instantiating an NatBotChain with an llm is deprecated. "
|
|
75
73
|
"Please instantiate with llm_chain argument or using the from_llm "
|
|
76
|
-
"class method."
|
|
74
|
+
"class method.",
|
|
75
|
+
stacklevel=5,
|
|
77
76
|
)
|
|
78
77
|
if "llm_chain" not in values and values["llm"] is not None:
|
|
79
78
|
values["llm_chain"] = PROMPT | values["llm"] | StrOutputParser()
|
|
@@ -82,15 +81,19 @@ class NatBotChain(Chain):
|
|
|
82
81
|
@classmethod
|
|
83
82
|
def from_default(cls, objective: str, **kwargs: Any) -> NatBotChain:
|
|
84
83
|
"""Load with default LLMChain."""
|
|
85
|
-
|
|
84
|
+
msg = (
|
|
86
85
|
"This method is no longer implemented. Please use from_llm."
|
|
87
86
|
"llm = OpenAI(temperature=0.5, best_of=10, n=3, max_tokens=50)"
|
|
88
87
|
"For example, NatBotChain.from_llm(llm, objective)"
|
|
89
88
|
)
|
|
89
|
+
raise NotImplementedError(msg)
|
|
90
90
|
|
|
91
91
|
@classmethod
|
|
92
92
|
def from_llm(
|
|
93
|
-
cls,
|
|
93
|
+
cls,
|
|
94
|
+
llm: BaseLanguageModel,
|
|
95
|
+
objective: str,
|
|
96
|
+
**kwargs: Any,
|
|
94
97
|
) -> NatBotChain:
|
|
95
98
|
"""Load from LLM."""
|
|
96
99
|
llm_chain = PROMPT | llm | StrOutputParser()
|
|
@@ -158,6 +161,3 @@ class NatBotChain(Chain):
|
|
|
158
161
|
@property
|
|
159
162
|
def _chain_type(self) -> str:
|
|
160
163
|
return "nat_bot_chain"
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
NatBotChain.model_rebuild()
|
|
@@ -1,23 +1,17 @@
|
|
|
1
|
-
# flake8: noqa
|
|
2
1
|
import time
|
|
3
2
|
from sys import platform
|
|
4
3
|
from typing import (
|
|
5
4
|
TYPE_CHECKING,
|
|
6
5
|
Any,
|
|
7
|
-
Dict,
|
|
8
|
-
Iterable,
|
|
9
|
-
List,
|
|
10
6
|
Optional,
|
|
11
|
-
Set,
|
|
12
|
-
Tuple,
|
|
13
7
|
TypedDict,
|
|
14
8
|
Union,
|
|
15
9
|
)
|
|
16
10
|
|
|
17
11
|
if TYPE_CHECKING:
|
|
18
|
-
from playwright.sync_api import Browser, CDPSession, Page
|
|
12
|
+
from playwright.sync_api import Browser, CDPSession, Page
|
|
19
13
|
|
|
20
|
-
black_listed_elements:
|
|
14
|
+
black_listed_elements: set[str] = {
|
|
21
15
|
"html",
|
|
22
16
|
"head",
|
|
23
17
|
"title",
|
|
@@ -40,7 +34,7 @@ class ElementInViewPort(TypedDict):
|
|
|
40
34
|
backend_node_id: int
|
|
41
35
|
node_name: Optional[str]
|
|
42
36
|
node_value: Optional[str]
|
|
43
|
-
node_meta:
|
|
37
|
+
node_meta: list[str]
|
|
44
38
|
is_clickable: bool
|
|
45
39
|
origin_x: int
|
|
46
40
|
origin_y: int
|
|
@@ -69,17 +63,18 @@ class Crawler:
|
|
|
69
63
|
def __init__(self) -> None:
|
|
70
64
|
try:
|
|
71
65
|
from playwright.sync_api import sync_playwright
|
|
72
|
-
except ImportError:
|
|
73
|
-
|
|
66
|
+
except ImportError as e:
|
|
67
|
+
msg = (
|
|
74
68
|
"Could not import playwright python package. "
|
|
75
69
|
"Please install it with `pip install playwright`."
|
|
76
70
|
)
|
|
71
|
+
raise ImportError(msg) from e
|
|
77
72
|
self.browser: Browser = (
|
|
78
73
|
sync_playwright().start().chromium.launch(headless=False)
|
|
79
74
|
)
|
|
80
75
|
self.page: Page = self.browser.new_page()
|
|
81
76
|
self.page.set_viewport_size({"width": 1280, "height": 1080})
|
|
82
|
-
self.page_element_buffer:
|
|
77
|
+
self.page_element_buffer: dict[int, ElementInViewPort]
|
|
83
78
|
self.client: CDPSession
|
|
84
79
|
|
|
85
80
|
def go_to_page(self, url: str) -> None:
|
|
@@ -90,15 +85,15 @@ class Crawler:
|
|
|
90
85
|
def scroll(self, direction: str) -> None:
|
|
91
86
|
if direction == "up":
|
|
92
87
|
self.page.evaluate(
|
|
93
|
-
"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;"
|
|
88
|
+
"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;" # noqa: E501
|
|
94
89
|
)
|
|
95
90
|
elif direction == "down":
|
|
96
91
|
self.page.evaluate(
|
|
97
|
-
"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;"
|
|
92
|
+
"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;" # noqa: E501
|
|
98
93
|
)
|
|
99
94
|
|
|
100
|
-
def click(self,
|
|
101
|
-
# Inject javascript into the page which removes the target= attribute from
|
|
95
|
+
def click(self, id_: Union[str, int]) -> None:
|
|
96
|
+
# Inject javascript into the page which removes the target= attribute from links
|
|
102
97
|
js = """
|
|
103
98
|
links = document.getElementsByTagName("a");
|
|
104
99
|
for (var i = 0; i < links.length; i++) {
|
|
@@ -107,7 +102,7 @@ class Crawler:
|
|
|
107
102
|
"""
|
|
108
103
|
self.page.evaluate(js)
|
|
109
104
|
|
|
110
|
-
element = self.page_element_buffer.get(int(
|
|
105
|
+
element = self.page_element_buffer.get(int(id_))
|
|
111
106
|
if element:
|
|
112
107
|
x: float = element["center_x"]
|
|
113
108
|
y: float = element["center_y"]
|
|
@@ -116,14 +111,14 @@ class Crawler:
|
|
|
116
111
|
else:
|
|
117
112
|
print("Could not find element") # noqa: T201
|
|
118
113
|
|
|
119
|
-
def type(self,
|
|
120
|
-
self.click(
|
|
114
|
+
def type(self, id_: Union[str, int], text: str) -> None:
|
|
115
|
+
self.click(id_)
|
|
121
116
|
self.page.keyboard.type(text)
|
|
122
117
|
|
|
123
118
|
def enter(self) -> None:
|
|
124
119
|
self.page.keyboard.press("Enter")
|
|
125
120
|
|
|
126
|
-
def crawl(self) ->
|
|
121
|
+
def crawl(self) -> list[str]:
|
|
127
122
|
page = self.page
|
|
128
123
|
page_element_buffer = self.page_element_buffer
|
|
129
124
|
start = time.time()
|
|
@@ -141,10 +136,10 @@ class Crawler:
|
|
|
141
136
|
win_right_bound: float = win_left_bound + win_width
|
|
142
137
|
win_lower_bound: float = win_upper_bound + win_height
|
|
143
138
|
|
|
144
|
-
#
|
|
145
|
-
#
|
|
146
|
-
#
|
|
147
|
-
#
|
|
139
|
+
# percentage_progress_start = (win_upper_bound / document_scroll_height) * 100
|
|
140
|
+
# percentage_progress_end = (
|
|
141
|
+
# (win_height + win_upper_bound) / document_scroll_height
|
|
142
|
+
# ) * 100
|
|
148
143
|
percentage_progress_start = 1
|
|
149
144
|
percentage_progress_end = 2
|
|
150
145
|
|
|
@@ -152,9 +147,7 @@ class Crawler:
|
|
|
152
147
|
{
|
|
153
148
|
"x": 0,
|
|
154
149
|
"y": 0,
|
|
155
|
-
"text": "[scrollbar {:0.2f}-{:0.2f}%]"
|
|
156
|
-
round(percentage_progress_start, 2), round(percentage_progress_end)
|
|
157
|
-
),
|
|
150
|
+
"text": f"[scrollbar {percentage_progress_start:0.2f}-{percentage_progress_end:0.2f}%]", # noqa: E501
|
|
158
151
|
}
|
|
159
152
|
)
|
|
160
153
|
|
|
@@ -162,34 +155,35 @@ class Crawler:
|
|
|
162
155
|
"DOMSnapshot.captureSnapshot",
|
|
163
156
|
{"computedStyles": [], "includeDOMRects": True, "includePaintOrder": True},
|
|
164
157
|
)
|
|
165
|
-
strings:
|
|
166
|
-
document:
|
|
167
|
-
nodes:
|
|
168
|
-
backend_node_id:
|
|
169
|
-
attributes:
|
|
170
|
-
node_value:
|
|
171
|
-
parent:
|
|
172
|
-
node_names:
|
|
173
|
-
is_clickable:
|
|
174
|
-
|
|
175
|
-
input_value:
|
|
176
|
-
input_value_index:
|
|
177
|
-
input_value_values:
|
|
178
|
-
|
|
179
|
-
layout:
|
|
180
|
-
layout_node_index:
|
|
181
|
-
bounds:
|
|
158
|
+
strings: dict[int, str] = tree["strings"]
|
|
159
|
+
document: dict[str, Any] = tree["documents"][0]
|
|
160
|
+
nodes: dict[str, Any] = document["nodes"]
|
|
161
|
+
backend_node_id: dict[int, int] = nodes["backendNodeId"]
|
|
162
|
+
attributes: dict[int, dict[int, Any]] = nodes["attributes"]
|
|
163
|
+
node_value: dict[int, int] = nodes["nodeValue"]
|
|
164
|
+
parent: dict[int, int] = nodes["parentIndex"]
|
|
165
|
+
node_names: dict[int, int] = nodes["nodeName"]
|
|
166
|
+
is_clickable: set[int] = set(nodes["isClickable"]["index"])
|
|
167
|
+
|
|
168
|
+
input_value: dict[str, Any] = nodes["inputValue"]
|
|
169
|
+
input_value_index: list[int] = input_value["index"]
|
|
170
|
+
input_value_values: list[int] = input_value["value"]
|
|
171
|
+
|
|
172
|
+
layout: dict[str, Any] = document["layout"]
|
|
173
|
+
layout_node_index: list[int] = layout["nodeIndex"]
|
|
174
|
+
bounds: dict[int, list[float]] = layout["bounds"]
|
|
182
175
|
|
|
183
176
|
cursor: int = 0
|
|
184
177
|
|
|
185
|
-
child_nodes:
|
|
186
|
-
elements_in_view_port:
|
|
178
|
+
child_nodes: dict[str, list[dict[str, Any]]] = {}
|
|
179
|
+
elements_in_view_port: list[ElementInViewPort] = []
|
|
187
180
|
|
|
188
|
-
anchor_ancestry:
|
|
189
|
-
button_ancestry:
|
|
181
|
+
anchor_ancestry: dict[str, tuple[bool, Optional[int]]] = {"-1": (False, None)}
|
|
182
|
+
button_ancestry: dict[str, tuple[bool, Optional[int]]] = {"-1": (False, None)}
|
|
190
183
|
|
|
191
184
|
def convert_name(
|
|
192
|
-
node_name: Optional[str],
|
|
185
|
+
node_name: Optional[str],
|
|
186
|
+
has_click_handler: Optional[bool], # noqa: FBT001
|
|
193
187
|
) -> str:
|
|
194
188
|
if node_name == "a":
|
|
195
189
|
return "link"
|
|
@@ -201,12 +195,11 @@ class Crawler:
|
|
|
201
195
|
node_name == "button" or has_click_handler
|
|
202
196
|
): # found pages that needed this quirk
|
|
203
197
|
return "button"
|
|
204
|
-
|
|
205
|
-
return "text"
|
|
198
|
+
return "text"
|
|
206
199
|
|
|
207
200
|
def find_attributes(
|
|
208
|
-
attributes:
|
|
209
|
-
) ->
|
|
201
|
+
attributes: dict[int, Any], keys: list[str]
|
|
202
|
+
) -> dict[str, str]:
|
|
210
203
|
values = {}
|
|
211
204
|
|
|
212
205
|
for [key_index, value_index] in zip(*(iter(attributes),) * 2):
|
|
@@ -225,14 +218,14 @@ class Crawler:
|
|
|
225
218
|
return values
|
|
226
219
|
|
|
227
220
|
def add_to_hash_tree(
|
|
228
|
-
hash_tree:
|
|
221
|
+
hash_tree: dict[str, tuple[bool, Optional[int]]],
|
|
229
222
|
tag: str,
|
|
230
223
|
node_id: int,
|
|
231
224
|
node_name: Optional[str],
|
|
232
225
|
parent_id: int,
|
|
233
|
-
) ->
|
|
226
|
+
) -> tuple[bool, Optional[int]]:
|
|
234
227
|
parent_id_str = str(parent_id)
|
|
235
|
-
if not
|
|
228
|
+
if parent_id_str not in hash_tree:
|
|
236
229
|
parent_name = strings[node_names[parent_id]].lower()
|
|
237
230
|
grand_parent_id = parent[parent_id]
|
|
238
231
|
|
|
@@ -242,9 +235,10 @@ class Crawler:
|
|
|
242
235
|
|
|
243
236
|
is_parent_desc_anchor, anchor_id = hash_tree[parent_id_str]
|
|
244
237
|
|
|
245
|
-
# even if the anchor is nested in another anchor, we set the "root" for all
|
|
238
|
+
# even if the anchor is nested in another anchor, we set the "root" for all
|
|
239
|
+
# descendants to be ::Self
|
|
246
240
|
if node_name == tag:
|
|
247
|
-
value:
|
|
241
|
+
value: tuple[bool, Optional[int]] = (True, node_id)
|
|
248
242
|
elif (
|
|
249
243
|
is_parent_desc_anchor
|
|
250
244
|
): # reuse the parent's anchor_id (which could be much higher in the tree)
|
|
@@ -253,7 +247,9 @@ class Crawler:
|
|
|
253
247
|
value = (
|
|
254
248
|
False,
|
|
255
249
|
None,
|
|
256
|
-
)
|
|
250
|
+
)
|
|
251
|
+
# not a descendant of an anchor, most likely it will become text, an
|
|
252
|
+
# interactive element or discarded
|
|
257
253
|
|
|
258
254
|
hash_tree[str(node_id)] = value
|
|
259
255
|
|
|
@@ -272,10 +268,10 @@ class Crawler:
|
|
|
272
268
|
)
|
|
273
269
|
|
|
274
270
|
try:
|
|
275
|
-
cursor = layout_node_index.index(
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
except:
|
|
271
|
+
cursor = layout_node_index.index(index)
|
|
272
|
+
# TODO replace this with proper cursoring, ignoring the fact this is
|
|
273
|
+
# O(n^2) for the moment
|
|
274
|
+
except ValueError:
|
|
279
275
|
continue
|
|
280
276
|
|
|
281
277
|
if node_name in black_listed_elements:
|
|
@@ -302,9 +298,10 @@ class Crawler:
|
|
|
302
298
|
if not partially_is_in_viewport:
|
|
303
299
|
continue
|
|
304
300
|
|
|
305
|
-
meta_data:
|
|
301
|
+
meta_data: list[str] = []
|
|
306
302
|
|
|
307
|
-
# inefficient to grab the same set of keys for kinds of objects, but it's
|
|
303
|
+
# inefficient to grab the same set of keys for kinds of objects, but it's
|
|
304
|
+
# fine for now
|
|
308
305
|
element_attributes = find_attributes(
|
|
309
306
|
attributes[index], ["type", "placeholder", "aria-label", "title", "alt"]
|
|
310
307
|
)
|
|
@@ -325,7 +322,7 @@ class Crawler:
|
|
|
325
322
|
|
|
326
323
|
if node_name == "#text" and ancestor_exception and ancestor_node:
|
|
327
324
|
text = strings[node_value[index]]
|
|
328
|
-
if text
|
|
325
|
+
if text in {"|", "•"}:
|
|
329
326
|
continue
|
|
330
327
|
ancestor_node.append({"type": "type", "value": text})
|
|
331
328
|
else:
|
|
@@ -355,7 +352,9 @@ class Crawler:
|
|
|
355
352
|
element_node_value = strings[node_value[index]]
|
|
356
353
|
if (
|
|
357
354
|
element_node_value == "|"
|
|
358
|
-
|
|
355
|
+
# commonly used as a separator, does not add much context - lets
|
|
356
|
+
# save ourselves some token space
|
|
357
|
+
):
|
|
359
358
|
continue
|
|
360
359
|
elif (
|
|
361
360
|
node_name == "input"
|
|
@@ -368,7 +367,7 @@ class Crawler:
|
|
|
368
367
|
element_node_value = strings[text_index]
|
|
369
368
|
|
|
370
369
|
# remove redundant elements
|
|
371
|
-
if ancestor_exception and (node_name
|
|
370
|
+
if ancestor_exception and (node_name not in {"a", "button"}):
|
|
372
371
|
continue
|
|
373
372
|
|
|
374
373
|
elements_in_view_port.append(
|
|
@@ -386,7 +385,8 @@ class Crawler:
|
|
|
386
385
|
}
|
|
387
386
|
)
|
|
388
387
|
|
|
389
|
-
# lets filter further to remove anything that does not hold any text nor has
|
|
388
|
+
# lets filter further to remove anything that does not hold any text nor has
|
|
389
|
+
# click handlers + merge text from leaf#text nodes with the parent
|
|
390
390
|
elements_of_interest = []
|
|
391
391
|
id_counter = 0
|
|
392
392
|
|
|
@@ -395,7 +395,7 @@ class Crawler:
|
|
|
395
395
|
node_name = element.get("node_name")
|
|
396
396
|
element_node_value = element.get("node_value")
|
|
397
397
|
node_is_clickable = element.get("is_clickable")
|
|
398
|
-
node_meta_data: Optional[
|
|
398
|
+
node_meta_data: Optional[list[str]] = element.get("node_meta")
|
|
399
399
|
|
|
400
400
|
inner_text = f"{element_node_value} " if element_node_value else ""
|
|
401
401
|
meta = ""
|
|
@@ -423,10 +423,7 @@ class Crawler:
|
|
|
423
423
|
# not very elegant, more like a placeholder
|
|
424
424
|
if (
|
|
425
425
|
(converted_node_name != "button" or meta == "")
|
|
426
|
-
and converted_node_name
|
|
427
|
-
and converted_node_name != "input"
|
|
428
|
-
and converted_node_name != "img"
|
|
429
|
-
and converted_node_name != "textarea"
|
|
426
|
+
and converted_node_name not in {"link", "input", "img", "textarea"}
|
|
430
427
|
) and inner_text.strip() == "":
|
|
431
428
|
continue
|
|
432
429
|
|
|
@@ -434,7 +431,7 @@ class Crawler:
|
|
|
434
431
|
|
|
435
432
|
if inner_text != "":
|
|
436
433
|
elements_of_interest.append(
|
|
437
|
-
f"""<{converted_node_name} id={id_counter}{meta}>{inner_text}</{converted_node_name}>"""
|
|
434
|
+
f"""<{converted_node_name} id={id_counter}{meta}>{inner_text}</{converted_node_name}>""" # noqa: E501
|
|
438
435
|
)
|
|
439
436
|
else:
|
|
440
437
|
elements_of_interest.append(
|
|
@@ -442,5 +439,5 @@ class Crawler:
|
|
|
442
439
|
)
|
|
443
440
|
id_counter += 1
|
|
444
441
|
|
|
445
|
-
print("Parsing time: {
|
|
442
|
+
print(f"Parsing time: {time.time() - start:0.2f} seconds") # noqa: T201
|
|
446
443
|
return elements_of_interest
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# flake8: noqa
|
|
2
1
|
from langchain_core.prompts.prompt import PromptTemplate
|
|
3
2
|
|
|
4
3
|
_PROMPT_TEMPLATE = """
|
|
@@ -115,7 +114,7 @@ CURRENT BROWSER CONTENT:
|
|
|
115
114
|
<text id=10>7:00 PM</text>
|
|
116
115
|
<text id=11>2 people</text>
|
|
117
116
|
<input id=12 alt="Location, Restaurant, or Cuisine"></input>
|
|
118
|
-
<button id=13>Let
|
|
117
|
+
<button id=13>Let's go</button>
|
|
119
118
|
<text id=14>It looks like you're in Peninsula. Not correct?</text>
|
|
120
119
|
<button id=15>Get current location</button>
|
|
121
120
|
<button id=16>Next</button>
|
|
@@ -137,7 +136,7 @@ OBJECTIVE: {objective}
|
|
|
137
136
|
CURRENT URL: {url}
|
|
138
137
|
PREVIOUS COMMAND: {previous_command}
|
|
139
138
|
YOUR COMMAND:
|
|
140
|
-
"""
|
|
139
|
+
""" # noqa: E501
|
|
141
140
|
PROMPT = PromptTemplate(
|
|
142
141
|
input_variables=["browser_content", "url", "previous_command", "objective"],
|
|
143
142
|
template=_PROMPT_TEMPLATE,
|
|
@@ -28,17 +28,17 @@ from langchain.chains.structured_output.base import (
|
|
|
28
28
|
|
|
29
29
|
__all__ = [
|
|
30
30
|
"convert_to_openai_function",
|
|
31
|
-
"create_tagging_chain",
|
|
32
|
-
"create_tagging_chain_pydantic",
|
|
33
|
-
"create_extraction_chain_pydantic",
|
|
34
|
-
"create_extraction_chain",
|
|
35
31
|
"create_citation_fuzzy_match_chain",
|
|
36
32
|
"create_citation_fuzzy_match_runnable",
|
|
37
|
-
"
|
|
33
|
+
"create_extraction_chain",
|
|
34
|
+
"create_extraction_chain_pydantic",
|
|
35
|
+
"create_openai_fn_chain",
|
|
36
|
+
"create_openai_fn_runnable", # backwards compatibility
|
|
38
37
|
"create_qa_with_sources_chain",
|
|
38
|
+
"create_qa_with_structure_chain",
|
|
39
39
|
"create_structured_output_chain",
|
|
40
|
-
"create_openai_fn_chain",
|
|
41
40
|
"create_structured_output_runnable", # backwards compatibility
|
|
42
|
-
"
|
|
41
|
+
"create_tagging_chain",
|
|
42
|
+
"create_tagging_chain_pydantic",
|
|
43
43
|
"get_openai_output_parser", # backwards compatibility
|
|
44
44
|
]
|
|
@@ -31,13 +31,13 @@ from langchain.chains.structured_output.base import (
|
|
|
31
31
|
)
|
|
32
32
|
|
|
33
33
|
__all__ = [
|
|
34
|
-
"get_openai_output_parser",
|
|
35
|
-
"create_openai_fn_runnable",
|
|
36
|
-
"create_structured_output_runnable", # deprecated
|
|
37
|
-
"create_openai_fn_chain", # deprecated
|
|
38
|
-
"create_structured_output_chain", # deprecated
|
|
39
34
|
"PYTHON_TO_JSON_TYPES", # backwards compatibility
|
|
40
35
|
"convert_to_openai_function", # backwards compatibility
|
|
36
|
+
"create_openai_fn_chain", # deprecated
|
|
37
|
+
"create_openai_fn_runnable",
|
|
38
|
+
"create_structured_output_chain", # deprecated
|
|
39
|
+
"create_structured_output_runnable", # deprecated
|
|
40
|
+
"get_openai_output_parser",
|
|
41
41
|
]
|
|
42
42
|
|
|
43
43
|
|
|
@@ -123,7 +123,8 @@ def create_openai_fn_chain(
|
|
|
123
123
|
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")
|
|
124
124
|
""" # noqa: E501
|
|
125
125
|
if not functions:
|
|
126
|
-
|
|
126
|
+
msg = "Need to pass in at least one function. Received zero."
|
|
127
|
+
raise ValueError(msg)
|
|
127
128
|
openai_functions = [convert_to_openai_function(f) for f in functions]
|
|
128
129
|
output_parser = output_parser or get_openai_output_parser(functions)
|
|
129
130
|
llm_kwargs: dict[str, Any] = {
|
|
@@ -131,7 +132,7 @@ def create_openai_fn_chain(
|
|
|
131
132
|
}
|
|
132
133
|
if len(openai_functions) == 1 and enforce_single_function_usage:
|
|
133
134
|
llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
|
|
134
|
-
|
|
135
|
+
return LLMChain(
|
|
135
136
|
llm=llm,
|
|
136
137
|
prompt=prompt,
|
|
137
138
|
output_parser=output_parser,
|
|
@@ -139,11 +140,12 @@ def create_openai_fn_chain(
|
|
|
139
140
|
output_key=output_key,
|
|
140
141
|
**kwargs,
|
|
141
142
|
)
|
|
142
|
-
return llm_chain
|
|
143
143
|
|
|
144
144
|
|
|
145
145
|
@deprecated(
|
|
146
|
-
since="0.1.1",
|
|
146
|
+
since="0.1.1",
|
|
147
|
+
removal="1.0",
|
|
148
|
+
alternative="ChatOpenAI.with_structured_output",
|
|
147
149
|
)
|
|
148
150
|
def create_structured_output_chain(
|
|
149
151
|
output_schema: Union[dict[str, Any], type[BaseModel]],
|
|
@@ -220,7 +222,8 @@ def create_structured_output_chain(
|
|
|
220
222
|
|
|
221
223
|
function = _OutputFormatter
|
|
222
224
|
output_parser = output_parser or PydanticAttrOutputFunctionsParser(
|
|
223
|
-
pydantic_schema=_OutputFormatter,
|
|
225
|
+
pydantic_schema=_OutputFormatter,
|
|
226
|
+
attr_name="output",
|
|
224
227
|
)
|
|
225
228
|
return create_openai_fn_chain(
|
|
226
229
|
[function],
|
|
@@ -88,23 +88,22 @@ def create_citation_fuzzy_match_runnable(llm: BaseChatModel) -> Runnable:
|
|
|
88
88
|
Runnable that can be used to answer questions with citations.
|
|
89
89
|
"""
|
|
90
90
|
if llm.bind_tools is BaseChatModel.bind_tools:
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
)
|
|
91
|
+
msg = "Language model must implement bind_tools to use this function."
|
|
92
|
+
raise ValueError(msg)
|
|
94
93
|
prompt = ChatPromptTemplate(
|
|
95
94
|
[
|
|
96
95
|
SystemMessage(
|
|
97
96
|
"You are a world class algorithm to answer "
|
|
98
|
-
"questions with correct and exact citations."
|
|
97
|
+
"questions with correct and exact citations.",
|
|
99
98
|
),
|
|
100
99
|
HumanMessagePromptTemplate.from_template(
|
|
101
100
|
"Answer question using the following context."
|
|
102
101
|
"\n\n{context}"
|
|
103
102
|
"\n\nQuestion: {question}"
|
|
104
103
|
"\n\nTips: Make sure to cite your sources, "
|
|
105
|
-
"and use the exact words from the context."
|
|
104
|
+
"and use the exact words from the context.",
|
|
106
105
|
),
|
|
107
|
-
]
|
|
106
|
+
],
|
|
108
107
|
)
|
|
109
108
|
return prompt | llm.with_structured_output(QuestionAnswer)
|
|
110
109
|
|
|
@@ -124,7 +123,10 @@ def create_citation_fuzzy_match_chain(llm: BaseLanguageModel) -> LLMChain:
|
|
|
124
123
|
Chain (LLMChain) that can be used to answer questions with citations.
|
|
125
124
|
"""
|
|
126
125
|
output_parser = PydanticOutputFunctionsParser(pydantic_schema=QuestionAnswer)
|
|
127
|
-
|
|
126
|
+
if hasattr(QuestionAnswer, "model_json_schema"):
|
|
127
|
+
schema = QuestionAnswer.model_json_schema()
|
|
128
|
+
else:
|
|
129
|
+
schema = QuestionAnswer.schema()
|
|
128
130
|
function = {
|
|
129
131
|
"name": schema["title"],
|
|
130
132
|
"description": schema["description"],
|
|
@@ -136,7 +138,7 @@ def create_citation_fuzzy_match_chain(llm: BaseLanguageModel) -> LLMChain:
|
|
|
136
138
|
content=(
|
|
137
139
|
"You are a world class algorithm to answer "
|
|
138
140
|
"questions with correct and exact citations."
|
|
139
|
-
)
|
|
141
|
+
),
|
|
140
142
|
),
|
|
141
143
|
HumanMessage(content="Answer question using the following context"),
|
|
142
144
|
HumanMessagePromptTemplate.from_template("{context}"),
|
|
@@ -145,15 +147,14 @@ def create_citation_fuzzy_match_chain(llm: BaseLanguageModel) -> LLMChain:
|
|
|
145
147
|
content=(
|
|
146
148
|
"Tips: Make sure to cite your sources, "
|
|
147
149
|
"and use the exact words from the context."
|
|
148
|
-
)
|
|
150
|
+
),
|
|
149
151
|
),
|
|
150
152
|
]
|
|
151
153
|
prompt = ChatPromptTemplate(messages=messages) # type: ignore[arg-type]
|
|
152
154
|
|
|
153
|
-
|
|
155
|
+
return LLMChain(
|
|
154
156
|
llm=llm,
|
|
155
157
|
prompt=prompt,
|
|
156
158
|
llm_kwargs=llm_kwargs,
|
|
157
159
|
output_parser=output_parser,
|
|
158
160
|
)
|
|
159
|
-
return chain
|