langchain 0.3.26__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain/__init__.py +110 -96
- langchain/_api/__init__.py +2 -2
- langchain/_api/deprecation.py +3 -3
- langchain/_api/module_import.py +51 -46
- langchain/_api/path.py +1 -1
- langchain/adapters/openai.py +8 -8
- langchain/agents/__init__.py +15 -12
- langchain/agents/agent.py +174 -151
- langchain/agents/agent_iterator.py +50 -26
- langchain/agents/agent_toolkits/__init__.py +7 -6
- langchain/agents/agent_toolkits/ainetwork/toolkit.py +1 -1
- langchain/agents/agent_toolkits/amadeus/toolkit.py +1 -1
- langchain/agents/agent_toolkits/azure_cognitive_services.py +1 -1
- langchain/agents/agent_toolkits/clickup/toolkit.py +1 -1
- langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +6 -4
- langchain/agents/agent_toolkits/csv/__init__.py +4 -2
- langchain/agents/agent_toolkits/file_management/__init__.py +1 -1
- langchain/agents/agent_toolkits/file_management/toolkit.py +1 -1
- langchain/agents/agent_toolkits/github/toolkit.py +9 -9
- langchain/agents/agent_toolkits/gitlab/toolkit.py +1 -1
- langchain/agents/agent_toolkits/json/base.py +1 -1
- langchain/agents/agent_toolkits/multion/toolkit.py +1 -1
- langchain/agents/agent_toolkits/office365/toolkit.py +1 -1
- langchain/agents/agent_toolkits/openapi/base.py +1 -1
- langchain/agents/agent_toolkits/openapi/planner.py +2 -2
- langchain/agents/agent_toolkits/openapi/planner_prompt.py +10 -10
- langchain/agents/agent_toolkits/openapi/prompt.py +1 -1
- langchain/agents/agent_toolkits/openapi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/pandas/__init__.py +4 -2
- langchain/agents/agent_toolkits/playwright/__init__.py +1 -1
- langchain/agents/agent_toolkits/playwright/toolkit.py +1 -1
- langchain/agents/agent_toolkits/powerbi/base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/chat_base.py +1 -1
- langchain/agents/agent_toolkits/powerbi/prompt.py +2 -2
- langchain/agents/agent_toolkits/powerbi/toolkit.py +1 -1
- langchain/agents/agent_toolkits/python/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark/__init__.py +4 -2
- langchain/agents/agent_toolkits/spark_sql/base.py +1 -1
- langchain/agents/agent_toolkits/spark_sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/sql/prompt.py +1 -1
- langchain/agents/agent_toolkits/sql/toolkit.py +1 -1
- langchain/agents/agent_toolkits/vectorstore/base.py +4 -2
- langchain/agents/agent_toolkits/vectorstore/prompt.py +2 -4
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +12 -11
- langchain/agents/agent_toolkits/xorbits/__init__.py +4 -2
- langchain/agents/agent_toolkits/zapier/toolkit.py +1 -1
- langchain/agents/agent_types.py +6 -6
- langchain/agents/chat/base.py +8 -12
- langchain/agents/chat/output_parser.py +9 -6
- langchain/agents/chat/prompt.py +3 -4
- langchain/agents/conversational/base.py +11 -5
- langchain/agents/conversational/output_parser.py +4 -2
- langchain/agents/conversational/prompt.py +2 -3
- langchain/agents/conversational_chat/base.py +9 -5
- langchain/agents/conversational_chat/output_parser.py +9 -11
- langchain/agents/conversational_chat/prompt.py +5 -6
- langchain/agents/format_scratchpad/__init__.py +3 -3
- langchain/agents/format_scratchpad/log_to_messages.py +1 -1
- langchain/agents/format_scratchpad/openai_functions.py +8 -6
- langchain/agents/format_scratchpad/tools.py +5 -3
- langchain/agents/format_scratchpad/xml.py +33 -2
- langchain/agents/initialize.py +17 -9
- langchain/agents/json_chat/base.py +19 -18
- langchain/agents/json_chat/prompt.py +2 -3
- langchain/agents/load_tools.py +2 -1
- langchain/agents/loading.py +28 -18
- langchain/agents/mrkl/base.py +11 -4
- langchain/agents/mrkl/output_parser.py +17 -13
- langchain/agents/mrkl/prompt.py +1 -2
- langchain/agents/openai_assistant/base.py +81 -71
- langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +2 -0
- langchain/agents/openai_functions_agent/base.py +47 -37
- langchain/agents/openai_functions_multi_agent/base.py +40 -27
- langchain/agents/openai_tools/base.py +9 -8
- langchain/agents/output_parsers/__init__.py +3 -3
- langchain/agents/output_parsers/json.py +8 -6
- langchain/agents/output_parsers/openai_functions.py +24 -9
- langchain/agents/output_parsers/openai_tools.py +16 -4
- langchain/agents/output_parsers/react_json_single_input.py +13 -5
- langchain/agents/output_parsers/react_single_input.py +18 -11
- langchain/agents/output_parsers/self_ask.py +5 -2
- langchain/agents/output_parsers/tools.py +32 -13
- langchain/agents/output_parsers/xml.py +102 -28
- langchain/agents/react/agent.py +5 -4
- langchain/agents/react/base.py +26 -17
- langchain/agents/react/output_parser.py +7 -6
- langchain/agents/react/textworld_prompt.py +0 -1
- langchain/agents/react/wiki_prompt.py +14 -15
- langchain/agents/schema.py +5 -2
- langchain/agents/self_ask_with_search/base.py +23 -15
- langchain/agents/self_ask_with_search/prompt.py +0 -1
- langchain/agents/structured_chat/base.py +19 -11
- langchain/agents/structured_chat/output_parser.py +29 -18
- langchain/agents/structured_chat/prompt.py +3 -4
- langchain/agents/tool_calling_agent/base.py +8 -6
- langchain/agents/tools.py +5 -2
- langchain/agents/utils.py +2 -3
- langchain/agents/xml/base.py +12 -6
- langchain/agents/xml/prompt.py +1 -2
- langchain/cache.py +12 -12
- langchain/callbacks/__init__.py +11 -11
- langchain/callbacks/aim_callback.py +2 -2
- langchain/callbacks/argilla_callback.py +1 -1
- langchain/callbacks/arize_callback.py +1 -1
- langchain/callbacks/arthur_callback.py +1 -1
- langchain/callbacks/base.py +7 -7
- langchain/callbacks/clearml_callback.py +1 -1
- langchain/callbacks/comet_ml_callback.py +1 -1
- langchain/callbacks/confident_callback.py +1 -1
- langchain/callbacks/context_callback.py +1 -1
- langchain/callbacks/flyte_callback.py +1 -1
- langchain/callbacks/human.py +2 -2
- langchain/callbacks/infino_callback.py +1 -1
- langchain/callbacks/labelstudio_callback.py +1 -1
- langchain/callbacks/llmonitor_callback.py +1 -1
- langchain/callbacks/manager.py +5 -5
- langchain/callbacks/mlflow_callback.py +2 -2
- langchain/callbacks/openai_info.py +1 -1
- langchain/callbacks/promptlayer_callback.py +1 -1
- langchain/callbacks/sagemaker_callback.py +1 -1
- langchain/callbacks/streaming_aiter.py +17 -3
- langchain/callbacks/streaming_aiter_final_only.py +16 -5
- langchain/callbacks/streaming_stdout_final_only.py +10 -3
- langchain/callbacks/streamlit/__init__.py +3 -2
- langchain/callbacks/streamlit/mutable_expander.py +1 -1
- langchain/callbacks/streamlit/streamlit_callback_handler.py +3 -3
- langchain/callbacks/tracers/__init__.py +1 -1
- langchain/callbacks/tracers/comet.py +1 -1
- langchain/callbacks/tracers/evaluation.py +1 -1
- langchain/callbacks/tracers/log_stream.py +1 -1
- langchain/callbacks/tracers/logging.py +12 -1
- langchain/callbacks/tracers/stdout.py +1 -1
- langchain/callbacks/trubrics_callback.py +1 -1
- langchain/callbacks/utils.py +4 -4
- langchain/callbacks/wandb_callback.py +1 -1
- langchain/callbacks/whylabs_callback.py +1 -1
- langchain/chains/api/base.py +41 -23
- langchain/chains/api/news_docs.py +1 -2
- langchain/chains/api/open_meteo_docs.py +1 -2
- langchain/chains/api/openapi/requests_chain.py +1 -1
- langchain/chains/api/openapi/response_chain.py +1 -1
- langchain/chains/api/podcast_docs.py +1 -2
- langchain/chains/api/prompt.py +1 -2
- langchain/chains/api/tmdb_docs.py +1 -2
- langchain/chains/base.py +96 -56
- langchain/chains/chat_vector_db/prompts.py +2 -3
- langchain/chains/combine_documents/__init__.py +1 -1
- langchain/chains/combine_documents/base.py +30 -11
- langchain/chains/combine_documents/map_reduce.py +41 -30
- langchain/chains/combine_documents/map_rerank.py +39 -24
- langchain/chains/combine_documents/reduce.py +48 -26
- langchain/chains/combine_documents/refine.py +27 -17
- langchain/chains/combine_documents/stuff.py +24 -13
- langchain/chains/constitutional_ai/base.py +11 -4
- langchain/chains/constitutional_ai/principles.py +22 -25
- langchain/chains/constitutional_ai/prompts.py +25 -28
- langchain/chains/conversation/base.py +9 -4
- langchain/chains/conversation/memory.py +5 -5
- langchain/chains/conversation/prompt.py +5 -5
- langchain/chains/conversational_retrieval/base.py +108 -79
- langchain/chains/conversational_retrieval/prompts.py +2 -3
- langchain/chains/elasticsearch_database/base.py +10 -10
- langchain/chains/elasticsearch_database/prompts.py +2 -3
- langchain/chains/ernie_functions/__init__.py +2 -2
- langchain/chains/example_generator.py +3 -1
- langchain/chains/flare/base.py +28 -12
- langchain/chains/flare/prompts.py +2 -0
- langchain/chains/graph_qa/cypher.py +2 -2
- langchain/chains/graph_qa/falkordb.py +1 -1
- langchain/chains/graph_qa/gremlin.py +1 -1
- langchain/chains/graph_qa/neptune_sparql.py +1 -1
- langchain/chains/graph_qa/prompts.py +2 -2
- langchain/chains/history_aware_retriever.py +2 -1
- langchain/chains/hyde/base.py +6 -5
- langchain/chains/hyde/prompts.py +5 -6
- langchain/chains/llm.py +82 -61
- langchain/chains/llm_bash/__init__.py +3 -2
- langchain/chains/llm_checker/base.py +19 -6
- langchain/chains/llm_checker/prompt.py +3 -4
- langchain/chains/llm_math/base.py +25 -10
- langchain/chains/llm_math/prompt.py +1 -2
- langchain/chains/llm_summarization_checker/base.py +22 -7
- langchain/chains/llm_symbolic_math/__init__.py +3 -2
- langchain/chains/loading.py +155 -97
- langchain/chains/mapreduce.py +4 -3
- langchain/chains/moderation.py +11 -9
- langchain/chains/natbot/base.py +11 -9
- langchain/chains/natbot/crawler.py +102 -76
- langchain/chains/natbot/prompt.py +2 -3
- langchain/chains/openai_functions/__init__.py +7 -7
- langchain/chains/openai_functions/base.py +15 -10
- langchain/chains/openai_functions/citation_fuzzy_match.py +21 -11
- langchain/chains/openai_functions/extraction.py +19 -19
- langchain/chains/openai_functions/openapi.py +39 -35
- langchain/chains/openai_functions/qa_with_structure.py +22 -15
- langchain/chains/openai_functions/tagging.py +4 -4
- langchain/chains/openai_tools/extraction.py +7 -8
- langchain/chains/qa_generation/base.py +8 -3
- langchain/chains/qa_generation/prompt.py +5 -5
- langchain/chains/qa_with_sources/base.py +17 -6
- langchain/chains/qa_with_sources/loading.py +16 -8
- langchain/chains/qa_with_sources/map_reduce_prompt.py +8 -9
- langchain/chains/qa_with_sources/refine_prompts.py +0 -1
- langchain/chains/qa_with_sources/retrieval.py +15 -6
- langchain/chains/qa_with_sources/stuff_prompt.py +6 -7
- langchain/chains/qa_with_sources/vector_db.py +21 -8
- langchain/chains/query_constructor/base.py +37 -34
- langchain/chains/query_constructor/ir.py +4 -4
- langchain/chains/query_constructor/parser.py +101 -34
- langchain/chains/query_constructor/prompt.py +5 -6
- langchain/chains/question_answering/chain.py +21 -10
- langchain/chains/question_answering/map_reduce_prompt.py +14 -14
- langchain/chains/question_answering/map_rerank_prompt.py +3 -3
- langchain/chains/question_answering/refine_prompts.py +2 -5
- langchain/chains/question_answering/stuff_prompt.py +5 -5
- langchain/chains/retrieval.py +1 -3
- langchain/chains/retrieval_qa/base.py +38 -27
- langchain/chains/retrieval_qa/prompt.py +1 -2
- langchain/chains/router/__init__.py +3 -3
- langchain/chains/router/base.py +38 -22
- langchain/chains/router/embedding_router.py +15 -8
- langchain/chains/router/llm_router.py +23 -20
- langchain/chains/router/multi_prompt.py +5 -2
- langchain/chains/router/multi_retrieval_qa.py +28 -5
- langchain/chains/sequential.py +30 -18
- langchain/chains/sql_database/prompt.py +14 -16
- langchain/chains/sql_database/query.py +7 -5
- langchain/chains/structured_output/__init__.py +1 -1
- langchain/chains/structured_output/base.py +77 -67
- langchain/chains/summarize/chain.py +11 -5
- langchain/chains/summarize/map_reduce_prompt.py +0 -1
- langchain/chains/summarize/stuff_prompt.py +0 -1
- langchain/chains/transform.py +9 -6
- langchain/chat_loaders/facebook_messenger.py +1 -1
- langchain/chat_loaders/langsmith.py +1 -1
- langchain/chat_loaders/utils.py +3 -3
- langchain/chat_models/__init__.py +20 -19
- langchain/chat_models/anthropic.py +1 -1
- langchain/chat_models/azureml_endpoint.py +1 -1
- langchain/chat_models/baidu_qianfan_endpoint.py +1 -1
- langchain/chat_models/base.py +213 -139
- langchain/chat_models/bedrock.py +1 -1
- langchain/chat_models/fake.py +1 -1
- langchain/chat_models/meta.py +1 -1
- langchain/chat_models/pai_eas_endpoint.py +1 -1
- langchain/chat_models/promptlayer_openai.py +1 -1
- langchain/chat_models/volcengine_maas.py +1 -1
- langchain/docstore/base.py +1 -1
- langchain/document_loaders/__init__.py +9 -9
- langchain/document_loaders/airbyte.py +3 -3
- langchain/document_loaders/assemblyai.py +1 -1
- langchain/document_loaders/azure_blob_storage_container.py +1 -1
- langchain/document_loaders/azure_blob_storage_file.py +1 -1
- langchain/document_loaders/baiducloud_bos_file.py +1 -1
- langchain/document_loaders/base.py +1 -1
- langchain/document_loaders/blob_loaders/__init__.py +1 -1
- langchain/document_loaders/blob_loaders/schema.py +1 -4
- langchain/document_loaders/blockchain.py +1 -1
- langchain/document_loaders/chatgpt.py +1 -1
- langchain/document_loaders/college_confidential.py +1 -1
- langchain/document_loaders/confluence.py +1 -1
- langchain/document_loaders/email.py +1 -1
- langchain/document_loaders/facebook_chat.py +1 -1
- langchain/document_loaders/markdown.py +1 -1
- langchain/document_loaders/notebook.py +1 -1
- langchain/document_loaders/org_mode.py +1 -1
- langchain/document_loaders/parsers/__init__.py +1 -1
- langchain/document_loaders/parsers/docai.py +1 -1
- langchain/document_loaders/parsers/generic.py +1 -1
- langchain/document_loaders/parsers/html/__init__.py +1 -1
- langchain/document_loaders/parsers/html/bs4.py +1 -1
- langchain/document_loaders/parsers/language/cobol.py +1 -1
- langchain/document_loaders/parsers/language/python.py +1 -1
- langchain/document_loaders/parsers/msword.py +1 -1
- langchain/document_loaders/parsers/pdf.py +5 -5
- langchain/document_loaders/parsers/registry.py +1 -1
- langchain/document_loaders/pdf.py +8 -8
- langchain/document_loaders/powerpoint.py +1 -1
- langchain/document_loaders/pyspark_dataframe.py +1 -1
- langchain/document_loaders/telegram.py +2 -2
- langchain/document_loaders/tencent_cos_directory.py +1 -1
- langchain/document_loaders/unstructured.py +5 -5
- langchain/document_loaders/url_playwright.py +1 -1
- langchain/document_loaders/whatsapp_chat.py +1 -1
- langchain/document_loaders/youtube.py +2 -2
- langchain/document_transformers/__init__.py +3 -3
- langchain/document_transformers/beautiful_soup_transformer.py +1 -1
- langchain/document_transformers/doctran_text_extract.py +1 -1
- langchain/document_transformers/doctran_text_qa.py +1 -1
- langchain/document_transformers/doctran_text_translate.py +1 -1
- langchain/document_transformers/embeddings_redundant_filter.py +3 -3
- langchain/document_transformers/google_translate.py +1 -1
- langchain/document_transformers/html2text.py +1 -1
- langchain/document_transformers/nuclia_text_transform.py +1 -1
- langchain/embeddings/__init__.py +5 -5
- langchain/embeddings/base.py +35 -24
- langchain/embeddings/cache.py +37 -32
- langchain/embeddings/fake.py +1 -1
- langchain/embeddings/huggingface.py +2 -2
- langchain/evaluation/__init__.py +22 -22
- langchain/evaluation/agents/trajectory_eval_chain.py +26 -25
- langchain/evaluation/agents/trajectory_eval_prompt.py +6 -9
- langchain/evaluation/comparison/__init__.py +1 -1
- langchain/evaluation/comparison/eval_chain.py +21 -13
- langchain/evaluation/comparison/prompt.py +1 -2
- langchain/evaluation/criteria/__init__.py +1 -1
- langchain/evaluation/criteria/eval_chain.py +23 -11
- langchain/evaluation/criteria/prompt.py +2 -3
- langchain/evaluation/embedding_distance/base.py +34 -20
- langchain/evaluation/exact_match/base.py +14 -1
- langchain/evaluation/loading.py +16 -11
- langchain/evaluation/parsing/base.py +20 -4
- langchain/evaluation/parsing/json_distance.py +24 -10
- langchain/evaluation/parsing/json_schema.py +13 -12
- langchain/evaluation/qa/__init__.py +1 -1
- langchain/evaluation/qa/eval_chain.py +20 -5
- langchain/evaluation/qa/eval_prompt.py +7 -8
- langchain/evaluation/qa/generate_chain.py +4 -1
- langchain/evaluation/qa/generate_prompt.py +2 -4
- langchain/evaluation/regex_match/base.py +9 -1
- langchain/evaluation/schema.py +38 -30
- langchain/evaluation/scoring/__init__.py +1 -1
- langchain/evaluation/scoring/eval_chain.py +23 -15
- langchain/evaluation/scoring/prompt.py +0 -1
- langchain/evaluation/string_distance/base.py +20 -9
- langchain/globals.py +12 -11
- langchain/graphs/__init__.py +6 -6
- langchain/graphs/graph_document.py +1 -1
- langchain/graphs/networkx_graph.py +2 -2
- langchain/hub.py +9 -11
- langchain/indexes/__init__.py +3 -3
- langchain/indexes/_sql_record_manager.py +63 -46
- langchain/indexes/prompts/entity_extraction.py +1 -2
- langchain/indexes/prompts/entity_summarization.py +1 -2
- langchain/indexes/prompts/knowledge_triplet_extraction.py +1 -3
- langchain/indexes/vectorstore.py +35 -19
- langchain/llms/__init__.py +13 -13
- langchain/llms/ai21.py +1 -1
- langchain/llms/azureml_endpoint.py +4 -4
- langchain/llms/base.py +15 -7
- langchain/llms/bedrock.py +1 -1
- langchain/llms/cloudflare_workersai.py +1 -1
- langchain/llms/gradient_ai.py +1 -1
- langchain/llms/loading.py +1 -1
- langchain/llms/openai.py +1 -1
- langchain/llms/sagemaker_endpoint.py +1 -1
- langchain/load/dump.py +1 -1
- langchain/load/load.py +1 -1
- langchain/load/serializable.py +3 -3
- langchain/memory/__init__.py +3 -3
- langchain/memory/buffer.py +14 -7
- langchain/memory/buffer_window.py +2 -0
- langchain/memory/chat_memory.py +14 -8
- langchain/memory/chat_message_histories/__init__.py +1 -1
- langchain/memory/chat_message_histories/astradb.py +1 -1
- langchain/memory/chat_message_histories/cassandra.py +1 -1
- langchain/memory/chat_message_histories/cosmos_db.py +1 -1
- langchain/memory/chat_message_histories/dynamodb.py +1 -1
- langchain/memory/chat_message_histories/elasticsearch.py +1 -1
- langchain/memory/chat_message_histories/file.py +1 -1
- langchain/memory/chat_message_histories/firestore.py +1 -1
- langchain/memory/chat_message_histories/momento.py +1 -1
- langchain/memory/chat_message_histories/mongodb.py +1 -1
- langchain/memory/chat_message_histories/neo4j.py +1 -1
- langchain/memory/chat_message_histories/postgres.py +1 -1
- langchain/memory/chat_message_histories/redis.py +1 -1
- langchain/memory/chat_message_histories/rocksetdb.py +1 -1
- langchain/memory/chat_message_histories/singlestoredb.py +1 -1
- langchain/memory/chat_message_histories/streamlit.py +1 -1
- langchain/memory/chat_message_histories/upstash_redis.py +1 -1
- langchain/memory/chat_message_histories/xata.py +1 -1
- langchain/memory/chat_message_histories/zep.py +1 -1
- langchain/memory/combined.py +14 -13
- langchain/memory/entity.py +131 -61
- langchain/memory/prompt.py +10 -11
- langchain/memory/readonly.py +0 -2
- langchain/memory/simple.py +4 -3
- langchain/memory/summary.py +43 -11
- langchain/memory/summary_buffer.py +20 -8
- langchain/memory/token_buffer.py +2 -0
- langchain/memory/utils.py +3 -2
- langchain/memory/vectorstore.py +12 -5
- langchain/memory/vectorstore_token_buffer_memory.py +5 -5
- langchain/model_laboratory.py +12 -11
- langchain/output_parsers/__init__.py +4 -4
- langchain/output_parsers/boolean.py +7 -4
- langchain/output_parsers/combining.py +14 -7
- langchain/output_parsers/datetime.py +32 -31
- langchain/output_parsers/enum.py +10 -4
- langchain/output_parsers/fix.py +60 -53
- langchain/output_parsers/format_instructions.py +6 -8
- langchain/output_parsers/json.py +2 -2
- langchain/output_parsers/list.py +2 -2
- langchain/output_parsers/loading.py +9 -9
- langchain/output_parsers/openai_functions.py +3 -3
- langchain/output_parsers/openai_tools.py +1 -1
- langchain/output_parsers/pandas_dataframe.py +59 -48
- langchain/output_parsers/prompts.py +1 -2
- langchain/output_parsers/rail_parser.py +1 -1
- langchain/output_parsers/regex.py +9 -8
- langchain/output_parsers/regex_dict.py +7 -10
- langchain/output_parsers/retry.py +99 -80
- langchain/output_parsers/structured.py +21 -6
- langchain/output_parsers/yaml.py +19 -11
- langchain/prompts/__init__.py +5 -3
- langchain/prompts/base.py +5 -5
- langchain/prompts/chat.py +8 -8
- langchain/prompts/example_selector/__init__.py +3 -1
- langchain/prompts/example_selector/semantic_similarity.py +2 -2
- langchain/prompts/few_shot.py +1 -1
- langchain/prompts/loading.py +3 -3
- langchain/prompts/prompt.py +1 -1
- langchain/pydantic_v1/__init__.py +1 -1
- langchain/retrievers/__init__.py +5 -5
- langchain/retrievers/bedrock.py +2 -2
- langchain/retrievers/bm25.py +1 -1
- langchain/retrievers/contextual_compression.py +14 -8
- langchain/retrievers/docarray.py +1 -1
- langchain/retrievers/document_compressors/__init__.py +5 -4
- langchain/retrievers/document_compressors/base.py +12 -6
- langchain/retrievers/document_compressors/chain_extract.py +5 -3
- langchain/retrievers/document_compressors/chain_extract_prompt.py +2 -3
- langchain/retrievers/document_compressors/chain_filter.py +9 -9
- langchain/retrievers/document_compressors/chain_filter_prompt.py +1 -2
- langchain/retrievers/document_compressors/cohere_rerank.py +17 -15
- langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -0
- langchain/retrievers/document_compressors/embeddings_filter.py +24 -17
- langchain/retrievers/document_compressors/flashrank_rerank.py +1 -1
- langchain/retrievers/document_compressors/listwise_rerank.py +8 -5
- langchain/retrievers/ensemble.py +30 -27
- langchain/retrievers/google_cloud_documentai_warehouse.py +1 -1
- langchain/retrievers/google_vertex_ai_search.py +2 -2
- langchain/retrievers/kendra.py +10 -10
- langchain/retrievers/llama_index.py +1 -1
- langchain/retrievers/merger_retriever.py +11 -11
- langchain/retrievers/milvus.py +1 -1
- langchain/retrievers/multi_query.py +35 -27
- langchain/retrievers/multi_vector.py +24 -9
- langchain/retrievers/parent_document_retriever.py +33 -9
- langchain/retrievers/re_phraser.py +6 -5
- langchain/retrievers/self_query/base.py +157 -127
- langchain/retrievers/time_weighted_retriever.py +21 -7
- langchain/retrievers/zilliz.py +1 -1
- langchain/runnables/hub.py +12 -0
- langchain/runnables/openai_functions.py +12 -2
- langchain/schema/__init__.py +23 -23
- langchain/schema/cache.py +1 -1
- langchain/schema/callbacks/base.py +7 -7
- langchain/schema/callbacks/manager.py +19 -19
- langchain/schema/callbacks/tracers/base.py +1 -1
- langchain/schema/callbacks/tracers/evaluation.py +1 -1
- langchain/schema/callbacks/tracers/langchain.py +1 -1
- langchain/schema/callbacks/tracers/langchain_v1.py +1 -1
- langchain/schema/callbacks/tracers/log_stream.py +1 -1
- langchain/schema/callbacks/tracers/schemas.py +8 -8
- langchain/schema/callbacks/tracers/stdout.py +3 -3
- langchain/schema/document.py +1 -1
- langchain/schema/language_model.py +2 -2
- langchain/schema/messages.py +12 -12
- langchain/schema/output.py +3 -3
- langchain/schema/output_parser.py +3 -3
- langchain/schema/runnable/__init__.py +3 -3
- langchain/schema/runnable/base.py +9 -9
- langchain/schema/runnable/config.py +5 -5
- langchain/schema/runnable/configurable.py +1 -1
- langchain/schema/runnable/history.py +1 -1
- langchain/schema/runnable/passthrough.py +1 -1
- langchain/schema/runnable/utils.py +16 -16
- langchain/schema/vectorstore.py +1 -1
- langchain/smith/__init__.py +2 -1
- langchain/smith/evaluation/__init__.py +2 -2
- langchain/smith/evaluation/config.py +9 -23
- langchain/smith/evaluation/name_generation.py +3 -3
- langchain/smith/evaluation/progress.py +22 -4
- langchain/smith/evaluation/runner_utils.py +416 -247
- langchain/smith/evaluation/string_run_evaluator.py +102 -68
- langchain/storage/__init__.py +2 -2
- langchain/storage/_lc_store.py +4 -2
- langchain/storage/encoder_backed.py +7 -2
- langchain/storage/file_system.py +19 -16
- langchain/storage/in_memory.py +1 -1
- langchain/storage/upstash_redis.py +1 -1
- langchain/text_splitter.py +15 -15
- langchain/tools/__init__.py +28 -26
- langchain/tools/ainetwork/app.py +1 -1
- langchain/tools/ainetwork/base.py +1 -1
- langchain/tools/ainetwork/owner.py +1 -1
- langchain/tools/ainetwork/rule.py +1 -1
- langchain/tools/ainetwork/transfer.py +1 -1
- langchain/tools/ainetwork/value.py +1 -1
- langchain/tools/amadeus/closest_airport.py +1 -1
- langchain/tools/amadeus/flight_search.py +1 -1
- langchain/tools/azure_cognitive_services/__init__.py +1 -1
- langchain/tools/base.py +4 -4
- langchain/tools/bearly/tool.py +1 -1
- langchain/tools/bing_search/__init__.py +1 -1
- langchain/tools/bing_search/tool.py +1 -1
- langchain/tools/dataforseo_api_search/__init__.py +1 -1
- langchain/tools/dataforseo_api_search/tool.py +1 -1
- langchain/tools/ddg_search/tool.py +1 -1
- langchain/tools/e2b_data_analysis/tool.py +2 -2
- langchain/tools/edenai/__init__.py +1 -1
- langchain/tools/file_management/__init__.py +1 -1
- langchain/tools/file_management/copy.py +1 -1
- langchain/tools/file_management/delete.py +1 -1
- langchain/tools/gmail/__init__.py +2 -2
- langchain/tools/gmail/get_message.py +1 -1
- langchain/tools/gmail/search.py +1 -1
- langchain/tools/gmail/send_message.py +1 -1
- langchain/tools/google_finance/__init__.py +1 -1
- langchain/tools/google_finance/tool.py +1 -1
- langchain/tools/google_scholar/__init__.py +1 -1
- langchain/tools/google_scholar/tool.py +1 -1
- langchain/tools/google_search/__init__.py +1 -1
- langchain/tools/google_search/tool.py +1 -1
- langchain/tools/google_serper/__init__.py +1 -1
- langchain/tools/google_serper/tool.py +1 -1
- langchain/tools/google_trends/__init__.py +1 -1
- langchain/tools/google_trends/tool.py +1 -1
- langchain/tools/jira/tool.py +20 -1
- langchain/tools/json/tool.py +25 -3
- langchain/tools/memorize/tool.py +1 -1
- langchain/tools/multion/__init__.py +1 -1
- langchain/tools/multion/update_session.py +1 -1
- langchain/tools/office365/__init__.py +2 -2
- langchain/tools/office365/events_search.py +1 -1
- langchain/tools/office365/messages_search.py +1 -1
- langchain/tools/office365/send_event.py +1 -1
- langchain/tools/office365/send_message.py +1 -1
- langchain/tools/openapi/utils/api_models.py +6 -6
- langchain/tools/playwright/__init__.py +5 -5
- langchain/tools/playwright/click.py +1 -1
- langchain/tools/playwright/extract_hyperlinks.py +1 -1
- langchain/tools/playwright/get_elements.py +1 -1
- langchain/tools/playwright/navigate.py +1 -1
- langchain/tools/plugin.py +2 -2
- langchain/tools/powerbi/tool.py +1 -1
- langchain/tools/python/__init__.py +3 -2
- langchain/tools/reddit_search/tool.py +1 -1
- langchain/tools/render.py +2 -2
- langchain/tools/requests/tool.py +2 -2
- langchain/tools/searchapi/tool.py +1 -1
- langchain/tools/searx_search/tool.py +1 -1
- langchain/tools/slack/get_message.py +1 -1
- langchain/tools/spark_sql/tool.py +1 -1
- langchain/tools/sql_database/tool.py +1 -1
- langchain/tools/tavily_search/__init__.py +1 -1
- langchain/tools/tavily_search/tool.py +1 -1
- langchain/tools/zapier/__init__.py +1 -1
- langchain/tools/zapier/tool.py +24 -2
- langchain/utilities/__init__.py +4 -4
- langchain/utilities/arcee.py +4 -4
- langchain/utilities/clickup.py +4 -4
- langchain/utilities/dalle_image_generator.py +1 -1
- langchain/utilities/dataforseo_api_search.py +1 -1
- langchain/utilities/opaqueprompts.py +1 -1
- langchain/utilities/reddit_search.py +1 -1
- langchain/utilities/sql_database.py +1 -1
- langchain/utilities/tavily_search.py +1 -1
- langchain/utilities/vertexai.py +2 -2
- langchain/utils/__init__.py +1 -1
- langchain/utils/aiter.py +1 -1
- langchain/utils/html.py +3 -3
- langchain/utils/input.py +1 -1
- langchain/utils/iter.py +1 -1
- langchain/utils/json_schema.py +1 -3
- langchain/utils/strings.py +1 -1
- langchain/utils/utils.py +6 -6
- langchain/vectorstores/__init__.py +5 -5
- langchain/vectorstores/alibabacloud_opensearch.py +1 -1
- langchain/vectorstores/azure_cosmos_db.py +1 -1
- langchain/vectorstores/clickhouse.py +1 -1
- langchain/vectorstores/elastic_vector_search.py +1 -1
- langchain/vectorstores/elasticsearch.py +2 -2
- langchain/vectorstores/myscale.py +1 -1
- langchain/vectorstores/neo4j_vector.py +1 -1
- langchain/vectorstores/pgembedding.py +1 -1
- langchain/vectorstores/qdrant.py +1 -1
- langchain/vectorstores/redis/__init__.py +1 -1
- langchain/vectorstores/redis/base.py +1 -1
- langchain/vectorstores/redis/filters.py +4 -4
- langchain/vectorstores/redis/schema.py +6 -6
- langchain/vectorstores/sklearn.py +2 -2
- langchain/vectorstores/starrocks.py +1 -1
- langchain/vectorstores/utils.py +1 -1
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/METADATA +4 -14
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/RECORD +590 -591
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/WHEEL +1 -1
- langchain/smith/evaluation/utils.py +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/entry_points.txt +0 -0
- {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/licenses/LICENSE +0 -0
langchain/chains/moderation.py
CHANGED
|
@@ -8,6 +8,7 @@ from langchain_core.callbacks import (
|
|
|
8
8
|
)
|
|
9
9
|
from langchain_core.utils import check_package_version, get_from_dict_or_env
|
|
10
10
|
from pydantic import Field, model_validator
|
|
11
|
+
from typing_extensions import override
|
|
11
12
|
|
|
12
13
|
from langchain.chains.base import Chain
|
|
13
14
|
|
|
@@ -26,6 +27,7 @@ class OpenAIModerationChain(Chain):
|
|
|
26
27
|
|
|
27
28
|
from langchain.chains import OpenAIModerationChain
|
|
28
29
|
moderation = OpenAIModerationChain()
|
|
30
|
+
|
|
29
31
|
"""
|
|
30
32
|
|
|
31
33
|
client: Any = None #: :meta private:
|
|
@@ -45,7 +47,9 @@ class OpenAIModerationChain(Chain):
|
|
|
45
47
|
def validate_environment(cls, values: dict) -> Any:
|
|
46
48
|
"""Validate that api key and python package exists in environment."""
|
|
47
49
|
openai_api_key = get_from_dict_or_env(
|
|
48
|
-
values,
|
|
50
|
+
values,
|
|
51
|
+
"openai_api_key",
|
|
52
|
+
"OPENAI_API_KEY",
|
|
49
53
|
)
|
|
50
54
|
openai_organization = get_from_dict_or_env(
|
|
51
55
|
values,
|
|
@@ -70,11 +74,12 @@ class OpenAIModerationChain(Chain):
|
|
|
70
74
|
values["client"] = openai.OpenAI(api_key=openai_api_key)
|
|
71
75
|
values["async_client"] = openai.AsyncOpenAI(api_key=openai_api_key)
|
|
72
76
|
|
|
73
|
-
except ImportError:
|
|
74
|
-
|
|
77
|
+
except ImportError as e:
|
|
78
|
+
msg = (
|
|
75
79
|
"Could not import openai python package. "
|
|
76
80
|
"Please install it with `pip install openai`."
|
|
77
81
|
)
|
|
82
|
+
raise ImportError(msg) from e
|
|
78
83
|
return values
|
|
79
84
|
|
|
80
85
|
@property
|
|
@@ -94,18 +99,15 @@ class OpenAIModerationChain(Chain):
|
|
|
94
99
|
return [self.output_key]
|
|
95
100
|
|
|
96
101
|
def _moderate(self, text: str, results: Any) -> str:
|
|
97
|
-
if self.openai_pre_1_0
|
|
98
|
-
condition = results["flagged"]
|
|
99
|
-
else:
|
|
100
|
-
condition = results.flagged
|
|
102
|
+
condition = results["flagged"] if self.openai_pre_1_0 else results.flagged
|
|
101
103
|
if condition:
|
|
102
104
|
error_str = "Text was found that violates OpenAI's content policy."
|
|
103
105
|
if self.error:
|
|
104
106
|
raise ValueError(error_str)
|
|
105
|
-
|
|
106
|
-
return error_str
|
|
107
|
+
return error_str
|
|
107
108
|
return text
|
|
108
109
|
|
|
110
|
+
@override
|
|
109
111
|
def _call(
|
|
110
112
|
self,
|
|
111
113
|
inputs: dict[str, Any],
|
langchain/chains/natbot/base.py
CHANGED
|
@@ -6,9 +6,7 @@ import warnings
|
|
|
6
6
|
from typing import Any, Optional
|
|
7
7
|
|
|
8
8
|
from langchain_core._api import deprecated
|
|
9
|
-
from langchain_core.caches import BaseCache as BaseCache
|
|
10
9
|
from langchain_core.callbacks import CallbackManagerForChainRun
|
|
11
|
-
from langchain_core.callbacks import Callbacks as Callbacks
|
|
12
10
|
from langchain_core.language_models import BaseLanguageModel
|
|
13
11
|
from langchain_core.output_parsers import StrOutputParser
|
|
14
12
|
from langchain_core.runnables import Runnable
|
|
@@ -49,6 +47,7 @@ class NatBotChain(Chain):
|
|
|
49
47
|
|
|
50
48
|
from langchain.chains import NatBotChain
|
|
51
49
|
natbot = NatBotChain.from_default("Buy me a new hat.")
|
|
50
|
+
|
|
52
51
|
"""
|
|
53
52
|
|
|
54
53
|
llm_chain: Runnable
|
|
@@ -68,12 +67,13 @@ class NatBotChain(Chain):
|
|
|
68
67
|
|
|
69
68
|
@model_validator(mode="before")
|
|
70
69
|
@classmethod
|
|
71
|
-
def
|
|
70
|
+
def _raise_deprecation(cls, values: dict) -> Any:
|
|
72
71
|
if "llm" in values:
|
|
73
72
|
warnings.warn(
|
|
74
73
|
"Directly instantiating an NatBotChain with an llm is deprecated. "
|
|
75
74
|
"Please instantiate with llm_chain argument or using the from_llm "
|
|
76
|
-
"class method."
|
|
75
|
+
"class method.",
|
|
76
|
+
stacklevel=5,
|
|
77
77
|
)
|
|
78
78
|
if "llm_chain" not in values and values["llm"] is not None:
|
|
79
79
|
values["llm_chain"] = PROMPT | values["llm"] | StrOutputParser()
|
|
@@ -82,15 +82,19 @@ class NatBotChain(Chain):
|
|
|
82
82
|
@classmethod
|
|
83
83
|
def from_default(cls, objective: str, **kwargs: Any) -> NatBotChain:
|
|
84
84
|
"""Load with default LLMChain."""
|
|
85
|
-
|
|
85
|
+
msg = (
|
|
86
86
|
"This method is no longer implemented. Please use from_llm."
|
|
87
87
|
"llm = OpenAI(temperature=0.5, best_of=10, n=3, max_tokens=50)"
|
|
88
88
|
"For example, NatBotChain.from_llm(llm, objective)"
|
|
89
89
|
)
|
|
90
|
+
raise NotImplementedError(msg)
|
|
90
91
|
|
|
91
92
|
@classmethod
|
|
92
93
|
def from_llm(
|
|
93
|
-
cls,
|
|
94
|
+
cls,
|
|
95
|
+
llm: BaseLanguageModel,
|
|
96
|
+
objective: str,
|
|
97
|
+
**kwargs: Any,
|
|
94
98
|
) -> NatBotChain:
|
|
95
99
|
"""Load from LLM."""
|
|
96
100
|
llm_chain = PROMPT | llm | StrOutputParser()
|
|
@@ -148,6 +152,7 @@ class NatBotChain(Chain):
|
|
|
148
152
|
|
|
149
153
|
browser_content = "...."
|
|
150
154
|
llm_command = natbot.run("www.google.com", browser_content)
|
|
155
|
+
|
|
151
156
|
"""
|
|
152
157
|
_inputs = {
|
|
153
158
|
self.input_url_key: url,
|
|
@@ -158,6 +163,3 @@ class NatBotChain(Chain):
|
|
|
158
163
|
@property
|
|
159
164
|
def _chain_type(self) -> str:
|
|
160
165
|
return "nat_bot_chain"
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
NatBotChain.model_rebuild()
|
|
@@ -1,23 +1,17 @@
|
|
|
1
|
-
# flake8: noqa
|
|
2
1
|
import time
|
|
3
2
|
from sys import platform
|
|
4
3
|
from typing import (
|
|
5
4
|
TYPE_CHECKING,
|
|
6
5
|
Any,
|
|
7
|
-
Dict,
|
|
8
|
-
Iterable,
|
|
9
|
-
List,
|
|
10
6
|
Optional,
|
|
11
|
-
Set,
|
|
12
|
-
Tuple,
|
|
13
7
|
TypedDict,
|
|
14
8
|
Union,
|
|
15
9
|
)
|
|
16
10
|
|
|
17
11
|
if TYPE_CHECKING:
|
|
18
|
-
from playwright.sync_api import Browser, CDPSession, Page
|
|
12
|
+
from playwright.sync_api import Browser, CDPSession, Page
|
|
19
13
|
|
|
20
|
-
black_listed_elements:
|
|
14
|
+
black_listed_elements: set[str] = {
|
|
21
15
|
"html",
|
|
22
16
|
"head",
|
|
23
17
|
"title",
|
|
@@ -40,7 +34,7 @@ class ElementInViewPort(TypedDict):
|
|
|
40
34
|
backend_node_id: int
|
|
41
35
|
node_name: Optional[str]
|
|
42
36
|
node_value: Optional[str]
|
|
43
|
-
node_meta:
|
|
37
|
+
node_meta: list[str]
|
|
44
38
|
is_clickable: bool
|
|
45
39
|
origin_x: int
|
|
46
40
|
origin_y: int
|
|
@@ -67,38 +61,56 @@ class Crawler:
|
|
|
67
61
|
"""
|
|
68
62
|
|
|
69
63
|
def __init__(self) -> None:
|
|
64
|
+
"""Initialize the crawler."""
|
|
70
65
|
try:
|
|
71
66
|
from playwright.sync_api import sync_playwright
|
|
72
|
-
except ImportError:
|
|
73
|
-
|
|
67
|
+
except ImportError as e:
|
|
68
|
+
msg = (
|
|
74
69
|
"Could not import playwright python package. "
|
|
75
70
|
"Please install it with `pip install playwright`."
|
|
76
71
|
)
|
|
72
|
+
raise ImportError(msg) from e
|
|
77
73
|
self.browser: Browser = (
|
|
78
74
|
sync_playwright().start().chromium.launch(headless=False)
|
|
79
75
|
)
|
|
80
76
|
self.page: Page = self.browser.new_page()
|
|
81
77
|
self.page.set_viewport_size({"width": 1280, "height": 1080})
|
|
82
|
-
self.page_element_buffer:
|
|
78
|
+
self.page_element_buffer: dict[int, ElementInViewPort]
|
|
83
79
|
self.client: CDPSession
|
|
84
80
|
|
|
85
81
|
def go_to_page(self, url: str) -> None:
|
|
82
|
+
"""Navigate to the given URL.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
url: The URL to navigate to. If it does not contain a scheme, it will be
|
|
86
|
+
prefixed with "http://".
|
|
87
|
+
"""
|
|
86
88
|
self.page.goto(url=url if "://" in url else "http://" + url)
|
|
87
89
|
self.client = self.page.context.new_cdp_session(self.page)
|
|
88
90
|
self.page_element_buffer = {}
|
|
89
91
|
|
|
90
92
|
def scroll(self, direction: str) -> None:
|
|
93
|
+
"""Scroll the page in the given direction.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
direction: The direction to scroll in, either "up" or "down".
|
|
97
|
+
"""
|
|
91
98
|
if direction == "up":
|
|
92
99
|
self.page.evaluate(
|
|
93
|
-
"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;"
|
|
100
|
+
"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;" # noqa: E501
|
|
94
101
|
)
|
|
95
102
|
elif direction == "down":
|
|
96
103
|
self.page.evaluate(
|
|
97
|
-
"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;"
|
|
104
|
+
"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;" # noqa: E501
|
|
98
105
|
)
|
|
99
106
|
|
|
100
|
-
def click(self,
|
|
101
|
-
|
|
107
|
+
def click(self, id_: Union[str, int]) -> None:
|
|
108
|
+
"""Click on an element with the given id.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
id_: The id of the element to click on.
|
|
112
|
+
"""
|
|
113
|
+
# Inject javascript into the page which removes the target= attribute from links
|
|
102
114
|
js = """
|
|
103
115
|
links = document.getElementsByTagName("a");
|
|
104
116
|
for (var i = 0; i < links.length; i++) {
|
|
@@ -107,7 +119,7 @@ class Crawler:
|
|
|
107
119
|
"""
|
|
108
120
|
self.page.evaluate(js)
|
|
109
121
|
|
|
110
|
-
element = self.page_element_buffer.get(int(
|
|
122
|
+
element = self.page_element_buffer.get(int(id_))
|
|
111
123
|
if element:
|
|
112
124
|
x: float = element["center_x"]
|
|
113
125
|
y: float = element["center_y"]
|
|
@@ -116,14 +128,26 @@ class Crawler:
|
|
|
116
128
|
else:
|
|
117
129
|
print("Could not find element") # noqa: T201
|
|
118
130
|
|
|
119
|
-
def type(self,
|
|
120
|
-
|
|
131
|
+
def type(self, id_: Union[str, int], text: str) -> None:
|
|
132
|
+
"""Type text into an element with the given id.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
id_: The id of the element to type into.
|
|
136
|
+
text: The text to type into the element.
|
|
137
|
+
"""
|
|
138
|
+
self.click(id_)
|
|
121
139
|
self.page.keyboard.type(text)
|
|
122
140
|
|
|
123
141
|
def enter(self) -> None:
|
|
142
|
+
"""Press the Enter key."""
|
|
124
143
|
self.page.keyboard.press("Enter")
|
|
125
144
|
|
|
126
|
-
def crawl(self) ->
|
|
145
|
+
def crawl(self) -> list[str]:
|
|
146
|
+
"""Crawl the current page.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
A list of the elements in the viewport.
|
|
150
|
+
"""
|
|
127
151
|
page = self.page
|
|
128
152
|
page_element_buffer = self.page_element_buffer
|
|
129
153
|
start = time.time()
|
|
@@ -141,10 +165,10 @@ class Crawler:
|
|
|
141
165
|
win_right_bound: float = win_left_bound + win_width
|
|
142
166
|
win_lower_bound: float = win_upper_bound + win_height
|
|
143
167
|
|
|
144
|
-
#
|
|
145
|
-
#
|
|
146
|
-
#
|
|
147
|
-
#
|
|
168
|
+
# percentage_progress_start = (win_upper_bound / document_scroll_height) * 100
|
|
169
|
+
# percentage_progress_end = (
|
|
170
|
+
# (win_height + win_upper_bound) / document_scroll_height
|
|
171
|
+
# ) * 100
|
|
148
172
|
percentage_progress_start = 1
|
|
149
173
|
percentage_progress_end = 2
|
|
150
174
|
|
|
@@ -152,9 +176,7 @@ class Crawler:
|
|
|
152
176
|
{
|
|
153
177
|
"x": 0,
|
|
154
178
|
"y": 0,
|
|
155
|
-
"text": "[scrollbar {:0.2f}-{:0.2f}%]"
|
|
156
|
-
round(percentage_progress_start, 2), round(percentage_progress_end)
|
|
157
|
-
),
|
|
179
|
+
"text": f"[scrollbar {percentage_progress_start:0.2f}-{percentage_progress_end:0.2f}%]", # noqa: E501
|
|
158
180
|
}
|
|
159
181
|
)
|
|
160
182
|
|
|
@@ -162,34 +184,35 @@ class Crawler:
|
|
|
162
184
|
"DOMSnapshot.captureSnapshot",
|
|
163
185
|
{"computedStyles": [], "includeDOMRects": True, "includePaintOrder": True},
|
|
164
186
|
)
|
|
165
|
-
strings:
|
|
166
|
-
document:
|
|
167
|
-
nodes:
|
|
168
|
-
backend_node_id:
|
|
169
|
-
attributes:
|
|
170
|
-
node_value:
|
|
171
|
-
parent:
|
|
172
|
-
node_names:
|
|
173
|
-
is_clickable:
|
|
174
|
-
|
|
175
|
-
input_value:
|
|
176
|
-
input_value_index:
|
|
177
|
-
input_value_values:
|
|
178
|
-
|
|
179
|
-
layout:
|
|
180
|
-
layout_node_index:
|
|
181
|
-
bounds:
|
|
187
|
+
strings: dict[int, str] = tree["strings"]
|
|
188
|
+
document: dict[str, Any] = tree["documents"][0]
|
|
189
|
+
nodes: dict[str, Any] = document["nodes"]
|
|
190
|
+
backend_node_id: dict[int, int] = nodes["backendNodeId"]
|
|
191
|
+
attributes: dict[int, dict[int, Any]] = nodes["attributes"]
|
|
192
|
+
node_value: dict[int, int] = nodes["nodeValue"]
|
|
193
|
+
parent: dict[int, int] = nodes["parentIndex"]
|
|
194
|
+
node_names: dict[int, int] = nodes["nodeName"]
|
|
195
|
+
is_clickable: set[int] = set(nodes["isClickable"]["index"])
|
|
196
|
+
|
|
197
|
+
input_value: dict[str, Any] = nodes["inputValue"]
|
|
198
|
+
input_value_index: list[int] = input_value["index"]
|
|
199
|
+
input_value_values: list[int] = input_value["value"]
|
|
200
|
+
|
|
201
|
+
layout: dict[str, Any] = document["layout"]
|
|
202
|
+
layout_node_index: list[int] = layout["nodeIndex"]
|
|
203
|
+
bounds: dict[int, list[float]] = layout["bounds"]
|
|
182
204
|
|
|
183
205
|
cursor: int = 0
|
|
184
206
|
|
|
185
|
-
child_nodes:
|
|
186
|
-
elements_in_view_port:
|
|
207
|
+
child_nodes: dict[str, list[dict[str, Any]]] = {}
|
|
208
|
+
elements_in_view_port: list[ElementInViewPort] = []
|
|
187
209
|
|
|
188
|
-
anchor_ancestry:
|
|
189
|
-
button_ancestry:
|
|
210
|
+
anchor_ancestry: dict[str, tuple[bool, Optional[int]]] = {"-1": (False, None)}
|
|
211
|
+
button_ancestry: dict[str, tuple[bool, Optional[int]]] = {"-1": (False, None)}
|
|
190
212
|
|
|
191
213
|
def convert_name(
|
|
192
|
-
node_name: Optional[str],
|
|
214
|
+
node_name: Optional[str],
|
|
215
|
+
has_click_handler: Optional[bool], # noqa: FBT001
|
|
193
216
|
) -> str:
|
|
194
217
|
if node_name == "a":
|
|
195
218
|
return "link"
|
|
@@ -201,12 +224,11 @@ class Crawler:
|
|
|
201
224
|
node_name == "button" or has_click_handler
|
|
202
225
|
): # found pages that needed this quirk
|
|
203
226
|
return "button"
|
|
204
|
-
|
|
205
|
-
return "text"
|
|
227
|
+
return "text"
|
|
206
228
|
|
|
207
229
|
def find_attributes(
|
|
208
|
-
attributes:
|
|
209
|
-
) ->
|
|
230
|
+
attributes: dict[int, Any], keys: list[str]
|
|
231
|
+
) -> dict[str, str]:
|
|
210
232
|
values = {}
|
|
211
233
|
|
|
212
234
|
for [key_index, value_index] in zip(*(iter(attributes),) * 2):
|
|
@@ -225,14 +247,14 @@ class Crawler:
|
|
|
225
247
|
return values
|
|
226
248
|
|
|
227
249
|
def add_to_hash_tree(
|
|
228
|
-
hash_tree:
|
|
250
|
+
hash_tree: dict[str, tuple[bool, Optional[int]]],
|
|
229
251
|
tag: str,
|
|
230
252
|
node_id: int,
|
|
231
253
|
node_name: Optional[str],
|
|
232
254
|
parent_id: int,
|
|
233
|
-
) ->
|
|
255
|
+
) -> tuple[bool, Optional[int]]:
|
|
234
256
|
parent_id_str = str(parent_id)
|
|
235
|
-
if not
|
|
257
|
+
if parent_id_str not in hash_tree:
|
|
236
258
|
parent_name = strings[node_names[parent_id]].lower()
|
|
237
259
|
grand_parent_id = parent[parent_id]
|
|
238
260
|
|
|
@@ -242,9 +264,10 @@ class Crawler:
|
|
|
242
264
|
|
|
243
265
|
is_parent_desc_anchor, anchor_id = hash_tree[parent_id_str]
|
|
244
266
|
|
|
245
|
-
# even if the anchor is nested in another anchor, we set the "root" for all
|
|
267
|
+
# even if the anchor is nested in another anchor, we set the "root" for all
|
|
268
|
+
# descendants to be ::Self
|
|
246
269
|
if node_name == tag:
|
|
247
|
-
value:
|
|
270
|
+
value: tuple[bool, Optional[int]] = (True, node_id)
|
|
248
271
|
elif (
|
|
249
272
|
is_parent_desc_anchor
|
|
250
273
|
): # reuse the parent's anchor_id (which could be much higher in the tree)
|
|
@@ -253,7 +276,9 @@ class Crawler:
|
|
|
253
276
|
value = (
|
|
254
277
|
False,
|
|
255
278
|
None,
|
|
256
|
-
)
|
|
279
|
+
)
|
|
280
|
+
# not a descendant of an anchor, most likely it will become text, an
|
|
281
|
+
# interactive element or discarded
|
|
257
282
|
|
|
258
283
|
hash_tree[str(node_id)] = value
|
|
259
284
|
|
|
@@ -272,10 +297,10 @@ class Crawler:
|
|
|
272
297
|
)
|
|
273
298
|
|
|
274
299
|
try:
|
|
275
|
-
cursor = layout_node_index.index(
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
except:
|
|
300
|
+
cursor = layout_node_index.index(index)
|
|
301
|
+
# TODO replace this with proper cursoring, ignoring the fact this is
|
|
302
|
+
# O(n^2) for the moment
|
|
303
|
+
except ValueError:
|
|
279
304
|
continue
|
|
280
305
|
|
|
281
306
|
if node_name in black_listed_elements:
|
|
@@ -302,9 +327,10 @@ class Crawler:
|
|
|
302
327
|
if not partially_is_in_viewport:
|
|
303
328
|
continue
|
|
304
329
|
|
|
305
|
-
meta_data:
|
|
330
|
+
meta_data: list[str] = []
|
|
306
331
|
|
|
307
|
-
# inefficient to grab the same set of keys for kinds of objects, but it's
|
|
332
|
+
# inefficient to grab the same set of keys for kinds of objects, but it's
|
|
333
|
+
# fine for now
|
|
308
334
|
element_attributes = find_attributes(
|
|
309
335
|
attributes[index], ["type", "placeholder", "aria-label", "title", "alt"]
|
|
310
336
|
)
|
|
@@ -325,7 +351,7 @@ class Crawler:
|
|
|
325
351
|
|
|
326
352
|
if node_name == "#text" and ancestor_exception and ancestor_node:
|
|
327
353
|
text = strings[node_value[index]]
|
|
328
|
-
if text
|
|
354
|
+
if text in {"|", "•"}:
|
|
329
355
|
continue
|
|
330
356
|
ancestor_node.append({"type": "type", "value": text})
|
|
331
357
|
else:
|
|
@@ -355,7 +381,9 @@ class Crawler:
|
|
|
355
381
|
element_node_value = strings[node_value[index]]
|
|
356
382
|
if (
|
|
357
383
|
element_node_value == "|"
|
|
358
|
-
|
|
384
|
+
# commonly used as a separator, does not add much context - lets
|
|
385
|
+
# save ourselves some token space
|
|
386
|
+
):
|
|
359
387
|
continue
|
|
360
388
|
elif (
|
|
361
389
|
node_name == "input"
|
|
@@ -368,7 +396,7 @@ class Crawler:
|
|
|
368
396
|
element_node_value = strings[text_index]
|
|
369
397
|
|
|
370
398
|
# remove redundant elements
|
|
371
|
-
if ancestor_exception and (node_name
|
|
399
|
+
if ancestor_exception and (node_name not in {"a", "button"}):
|
|
372
400
|
continue
|
|
373
401
|
|
|
374
402
|
elements_in_view_port.append(
|
|
@@ -386,7 +414,8 @@ class Crawler:
|
|
|
386
414
|
}
|
|
387
415
|
)
|
|
388
416
|
|
|
389
|
-
# lets filter further to remove anything that does not hold any text nor has
|
|
417
|
+
# lets filter further to remove anything that does not hold any text nor has
|
|
418
|
+
# click handlers + merge text from leaf#text nodes with the parent
|
|
390
419
|
elements_of_interest = []
|
|
391
420
|
id_counter = 0
|
|
392
421
|
|
|
@@ -395,7 +424,7 @@ class Crawler:
|
|
|
395
424
|
node_name = element.get("node_name")
|
|
396
425
|
element_node_value = element.get("node_value")
|
|
397
426
|
node_is_clickable = element.get("is_clickable")
|
|
398
|
-
node_meta_data: Optional[
|
|
427
|
+
node_meta_data: Optional[list[str]] = element.get("node_meta")
|
|
399
428
|
|
|
400
429
|
inner_text = f"{element_node_value} " if element_node_value else ""
|
|
401
430
|
meta = ""
|
|
@@ -423,10 +452,7 @@ class Crawler:
|
|
|
423
452
|
# not very elegant, more like a placeholder
|
|
424
453
|
if (
|
|
425
454
|
(converted_node_name != "button" or meta == "")
|
|
426
|
-
and converted_node_name
|
|
427
|
-
and converted_node_name != "input"
|
|
428
|
-
and converted_node_name != "img"
|
|
429
|
-
and converted_node_name != "textarea"
|
|
455
|
+
and converted_node_name not in {"link", "input", "img", "textarea"}
|
|
430
456
|
) and inner_text.strip() == "":
|
|
431
457
|
continue
|
|
432
458
|
|
|
@@ -434,7 +460,7 @@ class Crawler:
|
|
|
434
460
|
|
|
435
461
|
if inner_text != "":
|
|
436
462
|
elements_of_interest.append(
|
|
437
|
-
f"""<{converted_node_name} id={id_counter}{meta}>{inner_text}</{converted_node_name}>"""
|
|
463
|
+
f"""<{converted_node_name} id={id_counter}{meta}>{inner_text}</{converted_node_name}>""" # noqa: E501
|
|
438
464
|
)
|
|
439
465
|
else:
|
|
440
466
|
elements_of_interest.append(
|
|
@@ -442,5 +468,5 @@ class Crawler:
|
|
|
442
468
|
)
|
|
443
469
|
id_counter += 1
|
|
444
470
|
|
|
445
|
-
print("Parsing time: {
|
|
471
|
+
print(f"Parsing time: {time.time() - start:0.2f} seconds") # noqa: T201
|
|
446
472
|
return elements_of_interest
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# flake8: noqa
|
|
2
1
|
from langchain_core.prompts.prompt import PromptTemplate
|
|
3
2
|
|
|
4
3
|
_PROMPT_TEMPLATE = """
|
|
@@ -115,7 +114,7 @@ CURRENT BROWSER CONTENT:
|
|
|
115
114
|
<text id=10>7:00 PM</text>
|
|
116
115
|
<text id=11>2 people</text>
|
|
117
116
|
<input id=12 alt="Location, Restaurant, or Cuisine"></input>
|
|
118
|
-
<button id=13>Let
|
|
117
|
+
<button id=13>Let's go</button>
|
|
119
118
|
<text id=14>It looks like you're in Peninsula. Not correct?</text>
|
|
120
119
|
<button id=15>Get current location</button>
|
|
121
120
|
<button id=16>Next</button>
|
|
@@ -137,7 +136,7 @@ OBJECTIVE: {objective}
|
|
|
137
136
|
CURRENT URL: {url}
|
|
138
137
|
PREVIOUS COMMAND: {previous_command}
|
|
139
138
|
YOUR COMMAND:
|
|
140
|
-
"""
|
|
139
|
+
""" # noqa: E501
|
|
141
140
|
PROMPT = PromptTemplate(
|
|
142
141
|
input_variables=["browser_content", "url", "previous_command", "objective"],
|
|
143
142
|
template=_PROMPT_TEMPLATE,
|
|
@@ -28,17 +28,17 @@ from langchain.chains.structured_output.base import (
|
|
|
28
28
|
|
|
29
29
|
__all__ = [
|
|
30
30
|
"convert_to_openai_function",
|
|
31
|
-
"create_tagging_chain",
|
|
32
|
-
"create_tagging_chain_pydantic",
|
|
33
|
-
"create_extraction_chain_pydantic",
|
|
34
|
-
"create_extraction_chain",
|
|
35
31
|
"create_citation_fuzzy_match_chain",
|
|
36
32
|
"create_citation_fuzzy_match_runnable",
|
|
37
|
-
"
|
|
33
|
+
"create_extraction_chain",
|
|
34
|
+
"create_extraction_chain_pydantic",
|
|
35
|
+
"create_openai_fn_chain",
|
|
36
|
+
"create_openai_fn_runnable", # backwards compatibility
|
|
38
37
|
"create_qa_with_sources_chain",
|
|
38
|
+
"create_qa_with_structure_chain",
|
|
39
39
|
"create_structured_output_chain",
|
|
40
|
-
"create_openai_fn_chain",
|
|
41
40
|
"create_structured_output_runnable", # backwards compatibility
|
|
42
|
-
"
|
|
41
|
+
"create_tagging_chain",
|
|
42
|
+
"create_tagging_chain_pydantic",
|
|
43
43
|
"get_openai_output_parser", # backwards compatibility
|
|
44
44
|
]
|
|
@@ -31,13 +31,13 @@ from langchain.chains.structured_output.base import (
|
|
|
31
31
|
)
|
|
32
32
|
|
|
33
33
|
__all__ = [
|
|
34
|
-
"get_openai_output_parser",
|
|
35
|
-
"create_openai_fn_runnable",
|
|
36
|
-
"create_structured_output_runnable", # deprecated
|
|
37
|
-
"create_openai_fn_chain", # deprecated
|
|
38
|
-
"create_structured_output_chain", # deprecated
|
|
39
34
|
"PYTHON_TO_JSON_TYPES", # backwards compatibility
|
|
40
35
|
"convert_to_openai_function", # backwards compatibility
|
|
36
|
+
"create_openai_fn_chain", # deprecated
|
|
37
|
+
"create_openai_fn_runnable",
|
|
38
|
+
"create_structured_output_chain", # deprecated
|
|
39
|
+
"create_structured_output_runnable", # deprecated
|
|
40
|
+
"get_openai_output_parser",
|
|
41
41
|
]
|
|
42
42
|
|
|
43
43
|
|
|
@@ -121,9 +121,11 @@ def create_openai_fn_chain(
|
|
|
121
121
|
chain = create_openai_fn_chain([RecordPerson, RecordDog], llm, prompt)
|
|
122
122
|
chain.run("Harry was a chubby brown beagle who loved chicken")
|
|
123
123
|
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")
|
|
124
|
+
|
|
124
125
|
""" # noqa: E501
|
|
125
126
|
if not functions:
|
|
126
|
-
|
|
127
|
+
msg = "Need to pass in at least one function. Received zero."
|
|
128
|
+
raise ValueError(msg)
|
|
127
129
|
openai_functions = [convert_to_openai_function(f) for f in functions]
|
|
128
130
|
output_parser = output_parser or get_openai_output_parser(functions)
|
|
129
131
|
llm_kwargs: dict[str, Any] = {
|
|
@@ -131,7 +133,7 @@ def create_openai_fn_chain(
|
|
|
131
133
|
}
|
|
132
134
|
if len(openai_functions) == 1 and enforce_single_function_usage:
|
|
133
135
|
llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
|
|
134
|
-
|
|
136
|
+
return LLMChain(
|
|
135
137
|
llm=llm,
|
|
136
138
|
prompt=prompt,
|
|
137
139
|
output_parser=output_parser,
|
|
@@ -139,11 +141,12 @@ def create_openai_fn_chain(
|
|
|
139
141
|
output_key=output_key,
|
|
140
142
|
**kwargs,
|
|
141
143
|
)
|
|
142
|
-
return llm_chain
|
|
143
144
|
|
|
144
145
|
|
|
145
146
|
@deprecated(
|
|
146
|
-
since="0.1.1",
|
|
147
|
+
since="0.1.1",
|
|
148
|
+
removal="1.0",
|
|
149
|
+
alternative="ChatOpenAI.with_structured_output",
|
|
147
150
|
)
|
|
148
151
|
def create_structured_output_chain(
|
|
149
152
|
output_schema: Union[dict[str, Any], type[BaseModel]],
|
|
@@ -201,6 +204,7 @@ def create_structured_output_chain(
|
|
|
201
204
|
chain = create_structured_output_chain(Dog, llm, prompt)
|
|
202
205
|
chain.run("Harry was a chubby brown beagle who loved chicken")
|
|
203
206
|
# -> Dog(name="Harry", color="brown", fav_food="chicken")
|
|
207
|
+
|
|
204
208
|
""" # noqa: E501
|
|
205
209
|
if isinstance(output_schema, dict):
|
|
206
210
|
function: Any = {
|
|
@@ -220,7 +224,8 @@ def create_structured_output_chain(
|
|
|
220
224
|
|
|
221
225
|
function = _OutputFormatter
|
|
222
226
|
output_parser = output_parser or PydanticAttrOutputFunctionsParser(
|
|
223
|
-
pydantic_schema=_OutputFormatter,
|
|
227
|
+
pydantic_schema=_OutputFormatter,
|
|
228
|
+
attr_name="output",
|
|
224
229
|
)
|
|
225
230
|
return create_openai_fn_chain(
|
|
226
231
|
[function],
|