aip-agents-binary 0.0.0b2__py3-none-any.whl → 0.5.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aip_agents/__init__.py +65 -0
- aip_agents/a2a/__init__.py +19 -0
- aip_agents/a2a/server/__init__.py +10 -0
- aip_agents/a2a/server/base_executor.py +1086 -0
- aip_agents/a2a/server/google_adk_executor.py +198 -0
- aip_agents/a2a/server/langflow_executor.py +180 -0
- aip_agents/a2a/server/langgraph_executor.py +270 -0
- aip_agents/a2a/types.py +232 -0
- aip_agents/agent/__init__.py +27 -0
- aip_agents/agent/base_agent.py +970 -0
- aip_agents/agent/base_langgraph_agent.py +2942 -0
- aip_agents/agent/google_adk_agent.py +926 -0
- aip_agents/agent/google_adk_constants.py +6 -0
- aip_agents/agent/hitl/__init__.py +24 -0
- aip_agents/agent/hitl/config.py +28 -0
- aip_agents/agent/hitl/langgraph_hitl_mixin.py +515 -0
- aip_agents/agent/hitl/manager.py +532 -0
- aip_agents/agent/hitl/models.py +18 -0
- aip_agents/agent/hitl/prompt/__init__.py +9 -0
- aip_agents/agent/hitl/prompt/base.py +42 -0
- aip_agents/agent/hitl/prompt/deferred.py +73 -0
- aip_agents/agent/hitl/registry.py +149 -0
- aip_agents/agent/{interface.pyi → interface.py} +70 -13
- aip_agents/agent/interfaces.py +65 -0
- aip_agents/agent/langflow_agent.py +464 -0
- aip_agents/agent/langgraph_memory_enhancer_agent.py +433 -0
- aip_agents/agent/langgraph_react_agent.py +2514 -0
- aip_agents/agent/system_instruction_context.py +34 -0
- aip_agents/clients/__init__.py +10 -0
- aip_agents/clients/langflow/__init__.py +10 -0
- aip_agents/clients/langflow/client.py +477 -0
- aip_agents/clients/langflow/types.py +18 -0
- aip_agents/constants.py +23 -0
- aip_agents/credentials/manager.py +132 -0
- aip_agents/examples/__init__.py +5 -0
- aip_agents/examples/compare_streaming_client.py +783 -0
- aip_agents/examples/compare_streaming_server.py +142 -0
- aip_agents/examples/demo_memory_recall.py +401 -0
- aip_agents/examples/hello_world_a2a_google_adk_client.py +49 -0
- aip_agents/examples/hello_world_a2a_google_adk_client_agent.py +48 -0
- aip_agents/examples/hello_world_a2a_google_adk_client_streaming.py +60 -0
- aip_agents/examples/hello_world_a2a_google_adk_server.py +79 -0
- aip_agents/examples/hello_world_a2a_langchain_client.py +39 -0
- aip_agents/examples/hello_world_a2a_langchain_client_agent.py +39 -0
- aip_agents/examples/hello_world_a2a_langchain_client_lm_invoker.py +37 -0
- aip_agents/examples/hello_world_a2a_langchain_client_streaming.py +41 -0
- aip_agents/examples/hello_world_a2a_langchain_reference_client_streaming.py +60 -0
- aip_agents/examples/hello_world_a2a_langchain_reference_server.py +105 -0
- aip_agents/examples/hello_world_a2a_langchain_server.py +79 -0
- aip_agents/examples/hello_world_a2a_langchain_server_lm_invoker.py +78 -0
- aip_agents/examples/hello_world_a2a_langflow_client.py +83 -0
- aip_agents/examples/hello_world_a2a_langflow_server.py +82 -0
- aip_agents/examples/hello_world_a2a_langgraph_artifact_client.py +73 -0
- aip_agents/examples/hello_world_a2a_langgraph_artifact_client_streaming.py +76 -0
- aip_agents/examples/hello_world_a2a_langgraph_artifact_server.py +92 -0
- aip_agents/examples/hello_world_a2a_langgraph_client.py +54 -0
- aip_agents/examples/hello_world_a2a_langgraph_client_agent.py +54 -0
- aip_agents/examples/hello_world_a2a_langgraph_client_agent_lm_invoker.py +32 -0
- aip_agents/examples/hello_world_a2a_langgraph_client_streaming.py +50 -0
- aip_agents/examples/hello_world_a2a_langgraph_client_streaming_lm_invoker.py +44 -0
- aip_agents/examples/hello_world_a2a_langgraph_client_streaming_tool_streaming.py +92 -0
- aip_agents/examples/hello_world_a2a_langgraph_server.py +84 -0
- aip_agents/examples/hello_world_a2a_langgraph_server_lm_invoker.py +79 -0
- aip_agents/examples/hello_world_a2a_langgraph_server_tool_streaming.py +132 -0
- aip_agents/examples/hello_world_a2a_mcp_langgraph.py +196 -0
- aip_agents/examples/hello_world_a2a_three_level_agent_hierarchy_client.py +244 -0
- aip_agents/examples/hello_world_a2a_three_level_agent_hierarchy_server.py +251 -0
- aip_agents/examples/hello_world_a2a_with_metadata_langchain_client.py +57 -0
- aip_agents/examples/hello_world_a2a_with_metadata_langchain_server_lm_invoker.py +80 -0
- aip_agents/examples/hello_world_google_adk.py +41 -0
- aip_agents/examples/hello_world_google_adk_mcp_http.py +34 -0
- aip_agents/examples/hello_world_google_adk_mcp_http_stream.py +40 -0
- aip_agents/examples/hello_world_google_adk_mcp_sse.py +44 -0
- aip_agents/examples/hello_world_google_adk_mcp_sse_stream.py +48 -0
- aip_agents/examples/hello_world_google_adk_mcp_stdio.py +44 -0
- aip_agents/examples/hello_world_google_adk_mcp_stdio_stream.py +48 -0
- aip_agents/examples/hello_world_google_adk_stream.py +44 -0
- aip_agents/examples/hello_world_langchain.py +28 -0
- aip_agents/examples/hello_world_langchain_lm_invoker.py +15 -0
- aip_agents/examples/hello_world_langchain_mcp_http.py +34 -0
- aip_agents/examples/hello_world_langchain_mcp_http_interactive.py +130 -0
- aip_agents/examples/hello_world_langchain_mcp_http_stream.py +42 -0
- aip_agents/examples/hello_world_langchain_mcp_multi_server.py +155 -0
- aip_agents/examples/hello_world_langchain_mcp_sse.py +34 -0
- aip_agents/examples/hello_world_langchain_mcp_sse_stream.py +40 -0
- aip_agents/examples/hello_world_langchain_mcp_stdio.py +30 -0
- aip_agents/examples/hello_world_langchain_mcp_stdio_stream.py +41 -0
- aip_agents/examples/hello_world_langchain_stream.py +36 -0
- aip_agents/examples/hello_world_langchain_stream_lm_invoker.py +39 -0
- aip_agents/examples/hello_world_langflow_agent.py +163 -0
- aip_agents/examples/hello_world_langgraph.py +39 -0
- aip_agents/examples/hello_world_langgraph_bosa_twitter.py +41 -0
- aip_agents/examples/hello_world_langgraph_mcp_http.py +31 -0
- aip_agents/examples/hello_world_langgraph_mcp_http_stream.py +34 -0
- aip_agents/examples/hello_world_langgraph_mcp_sse.py +35 -0
- aip_agents/examples/hello_world_langgraph_mcp_sse_stream.py +50 -0
- aip_agents/examples/hello_world_langgraph_mcp_stdio.py +35 -0
- aip_agents/examples/hello_world_langgraph_mcp_stdio_stream.py +50 -0
- aip_agents/examples/hello_world_langgraph_stream.py +43 -0
- aip_agents/examples/hello_world_langgraph_stream_lm_invoker.py +37 -0
- aip_agents/examples/hello_world_model_switch_cli.py +210 -0
- aip_agents/examples/hello_world_multi_agent_adk.py +75 -0
- aip_agents/examples/hello_world_multi_agent_langchain.py +54 -0
- aip_agents/examples/hello_world_multi_agent_langgraph.py +66 -0
- aip_agents/examples/hello_world_multi_agent_langgraph_lm_invoker.py +69 -0
- aip_agents/examples/hello_world_pii_logger.py +21 -0
- aip_agents/examples/hello_world_sentry.py +133 -0
- aip_agents/examples/hello_world_step_limits.py +273 -0
- aip_agents/examples/hello_world_stock_a2a_server.py +103 -0
- aip_agents/examples/hello_world_tool_output_client.py +46 -0
- aip_agents/examples/hello_world_tool_output_server.py +114 -0
- aip_agents/examples/hitl_demo.py +724 -0
- aip_agents/examples/mcp_configs/configs.py +63 -0
- aip_agents/examples/mcp_servers/common.py +76 -0
- aip_agents/examples/mcp_servers/mcp_name.py +29 -0
- aip_agents/examples/mcp_servers/mcp_server_http.py +19 -0
- aip_agents/examples/mcp_servers/mcp_server_sse.py +19 -0
- aip_agents/examples/mcp_servers/mcp_server_stdio.py +19 -0
- aip_agents/examples/mcp_servers/mcp_time.py +10 -0
- aip_agents/examples/pii_demo_langgraph_client.py +69 -0
- aip_agents/examples/pii_demo_langgraph_server.py +126 -0
- aip_agents/examples/pii_demo_multi_agent_client.py +80 -0
- aip_agents/examples/pii_demo_multi_agent_server.py +247 -0
- aip_agents/examples/todolist_planning_a2a_langchain_client.py +70 -0
- aip_agents/examples/todolist_planning_a2a_langgraph_server.py +88 -0
- aip_agents/examples/tools/__init__.py +27 -0
- aip_agents/examples/tools/{adk_arithmetic_tools.pyi → adk_arithmetic_tools.py} +12 -0
- aip_agents/examples/tools/adk_weather_tool.py +60 -0
- aip_agents/examples/tools/data_generator_tool.py +103 -0
- aip_agents/examples/tools/data_visualization_tool.py +312 -0
- aip_agents/examples/tools/image_artifact_tool.py +136 -0
- aip_agents/examples/tools/langchain_arithmetic_tools.py +26 -0
- aip_agents/examples/tools/langchain_currency_exchange_tool.py +88 -0
- aip_agents/examples/tools/langchain_graph_artifact_tool.py +172 -0
- aip_agents/examples/tools/langchain_weather_tool.py +48 -0
- aip_agents/examples/tools/langgraph_streaming_tool.py +130 -0
- aip_agents/examples/tools/mock_retrieval_tool.py +56 -0
- aip_agents/examples/tools/pii_demo_tools.py +189 -0
- aip_agents/examples/tools/random_chart_tool.py +142 -0
- aip_agents/examples/tools/serper_tool.py +202 -0
- aip_agents/examples/tools/stock_tools.py +82 -0
- aip_agents/examples/tools/table_generator_tool.py +167 -0
- aip_agents/examples/tools/time_tool.py +82 -0
- aip_agents/examples/tools/weather_forecast_tool.py +38 -0
- aip_agents/executor/agent_executor.py +473 -0
- aip_agents/executor/base.py +48 -0
- aip_agents/mcp/__init__.py +1 -0
- aip_agents/mcp/client/__init__.py +14 -0
- aip_agents/mcp/client/base_mcp_client.py +369 -0
- aip_agents/mcp/client/connection_manager.py +193 -0
- aip_agents/mcp/client/google_adk/__init__.py +11 -0
- aip_agents/mcp/client/google_adk/client.py +381 -0
- aip_agents/mcp/client/langchain/__init__.py +11 -0
- aip_agents/mcp/client/langchain/client.py +265 -0
- aip_agents/mcp/client/persistent_session.py +359 -0
- aip_agents/mcp/client/session_pool.py +351 -0
- aip_agents/mcp/client/transports.py +215 -0
- aip_agents/mcp/utils/__init__.py +7 -0
- aip_agents/mcp/utils/config_validator.py +139 -0
- aip_agents/memory/__init__.py +14 -0
- aip_agents/memory/adapters/__init__.py +10 -0
- aip_agents/memory/adapters/base_adapter.py +717 -0
- aip_agents/memory/adapters/mem0.py +84 -0
- aip_agents/memory/{base.pyi → base.py} +40 -5
- aip_agents/memory/constants.py +49 -0
- aip_agents/memory/factory.py +86 -0
- aip_agents/memory/guidance.py +20 -0
- aip_agents/memory/simple_memory.py +47 -0
- aip_agents/middleware/__init__.py +17 -0
- aip_agents/middleware/base.py +88 -0
- aip_agents/middleware/manager.py +128 -0
- aip_agents/middleware/todolist.py +274 -0
- aip_agents/schema/__init__.py +69 -0
- aip_agents/schema/a2a.py +56 -0
- aip_agents/schema/agent.py +111 -0
- aip_agents/schema/hitl.py +157 -0
- aip_agents/schema/langgraph.py +37 -0
- aip_agents/schema/model_id.py +97 -0
- aip_agents/schema/step_limit.py +108 -0
- aip_agents/schema/storage.py +40 -0
- aip_agents/sentry/__init__.py +11 -0
- aip_agents/sentry/sentry.py +151 -0
- aip_agents/storage/__init__.py +41 -0
- aip_agents/storage/{base.pyi → base.py} +30 -4
- aip_agents/storage/clients/__init__.py +12 -0
- aip_agents/storage/clients/minio_client.py +318 -0
- aip_agents/storage/config.py +62 -0
- aip_agents/storage/providers/__init__.py +15 -0
- aip_agents/storage/providers/{base.pyi → base.py} +23 -6
- aip_agents/storage/providers/{memory.pyi → memory.py} +38 -3
- aip_agents/storage/providers/object_storage.py +214 -0
- aip_agents/tools/__init__.py +6 -0
- aip_agents/tools/bosa_tools.py +105 -0
- aip_agents/tools/browser_use/__init__.py +82 -0
- aip_agents/tools/browser_use/action_parser.py +103 -0
- aip_agents/tools/browser_use/browser_use_tool.py +1112 -0
- aip_agents/tools/browser_use/llm_config.py +120 -0
- aip_agents/tools/browser_use/minio_storage.py +198 -0
- aip_agents/tools/browser_use/schemas.py +119 -0
- aip_agents/tools/browser_use/session.py +76 -0
- aip_agents/tools/browser_use/session_errors.py +132 -0
- aip_agents/tools/browser_use/steel_session_recording.py +317 -0
- aip_agents/tools/browser_use/streaming.py +813 -0
- aip_agents/tools/browser_use/structured_data_parser.py +257 -0
- aip_agents/tools/browser_use/structured_data_recovery.py +204 -0
- aip_agents/tools/browser_use/types.py +78 -0
- aip_agents/tools/code_sandbox/__init__.py +26 -0
- aip_agents/tools/code_sandbox/constant.py +13 -0
- aip_agents/tools/code_sandbox/e2b_cloud_sandbox_extended.py +257 -0
- aip_agents/tools/code_sandbox/e2b_sandbox_tool.py +411 -0
- aip_agents/tools/constants.py +165 -0
- aip_agents/tools/document_loader/__init__.py +37 -0
- aip_agents/tools/document_loader/base_reader.py +262 -0
- aip_agents/tools/document_loader/docx_reader_tool.py +53 -0
- aip_agents/tools/document_loader/excel_reader_tool.py +160 -0
- aip_agents/tools/document_loader/pdf_reader_tool.py +67 -0
- aip_agents/tools/document_loader/pdf_splitter.py +169 -0
- aip_agents/tools/gl_connector/__init__.py +5 -0
- aip_agents/tools/gl_connector/tool.py +351 -0
- aip_agents/tools/memory_search/__init__.py +22 -0
- aip_agents/tools/memory_search/base.py +200 -0
- aip_agents/tools/memory_search/mem0.py +258 -0
- aip_agents/tools/memory_search/schema.py +48 -0
- aip_agents/tools/memory_search_tool.py +26 -0
- aip_agents/tools/tool_config_injector.py +300 -0
- aip_agents/tools/web_search/__init__.py +15 -0
- aip_agents/tools/web_search/serper_tool.py +187 -0
- aip_agents/types/__init__.py +70 -0
- aip_agents/types/a2a_events.py +13 -0
- aip_agents/utils/__init__.py +79 -0
- aip_agents/utils/a2a_connector.py +1757 -0
- aip_agents/utils/artifact_helpers.py +502 -0
- aip_agents/utils/constants.py +22 -0
- aip_agents/utils/datetime/__init__.py +34 -0
- aip_agents/utils/datetime/normalization.py +231 -0
- aip_agents/utils/datetime/timezone.py +206 -0
- aip_agents/utils/env_loader.py +27 -0
- aip_agents/utils/event_handler_registry.py +58 -0
- aip_agents/utils/file_prompt_utils.py +176 -0
- aip_agents/utils/final_response_builder.py +211 -0
- aip_agents/utils/formatter_llm_client.py +231 -0
- aip_agents/utils/langgraph/__init__.py +19 -0
- aip_agents/utils/langgraph/converter.py +128 -0
- aip_agents/utils/langgraph/tool_managers/__init__.py +15 -0
- aip_agents/utils/langgraph/tool_managers/a2a_tool_manager.py +99 -0
- aip_agents/utils/langgraph/tool_managers/{base_tool_manager.pyi → base_tool_manager.py} +25 -8
- aip_agents/utils/langgraph/tool_managers/delegation_tool_manager.py +1071 -0
- aip_agents/utils/langgraph/tool_output_management.py +967 -0
- aip_agents/utils/logger.py +195 -0
- aip_agents/utils/metadata/__init__.py +27 -0
- aip_agents/utils/metadata/activity_metadata_helper.py +407 -0
- aip_agents/utils/metadata/activity_narrative/__init__.py +35 -0
- aip_agents/utils/metadata/activity_narrative/builder.py +817 -0
- aip_agents/utils/metadata/activity_narrative/constants.py +51 -0
- aip_agents/utils/metadata/activity_narrative/context.py +49 -0
- aip_agents/utils/metadata/activity_narrative/formatters.py +230 -0
- aip_agents/utils/metadata/activity_narrative/utils.py +35 -0
- aip_agents/utils/metadata/schemas/__init__.py +16 -0
- aip_agents/utils/metadata/schemas/activity_schema.py +29 -0
- aip_agents/utils/metadata/schemas/thinking_schema.py +31 -0
- aip_agents/utils/metadata/thinking_metadata_helper.py +38 -0
- aip_agents/utils/metadata_helper.py +358 -0
- aip_agents/utils/name_preprocessor/__init__.py +17 -0
- aip_agents/utils/name_preprocessor/{base_name_preprocessor.pyi → base_name_preprocessor.py} +22 -2
- aip_agents/utils/name_preprocessor/google_name_preprocessor.py +100 -0
- aip_agents/utils/name_preprocessor/name_preprocessor.py +87 -0
- aip_agents/utils/name_preprocessor/{openai_name_preprocessor.pyi → openai_name_preprocessor.py} +19 -5
- aip_agents/utils/pii/__init__.py +25 -0
- aip_agents/utils/pii/pii_handler.py +397 -0
- aip_agents/utils/pii/pii_helper.py +207 -0
- aip_agents/utils/pii/uuid_deanonymizer_mapping.py +195 -0
- aip_agents/utils/reference_helper.py +273 -0
- aip_agents/utils/sse_chunk_transformer.py +831 -0
- aip_agents/utils/step_limit_manager.py +265 -0
- aip_agents/utils/token_usage_helper.py +156 -0
- aip_agents_binary-0.5.12.dist-info/METADATA +689 -0
- aip_agents_binary-0.5.12.dist-info/RECORD +279 -0
- {aip_agents_binary-0.0.0b2.dist-info → aip_agents_binary-0.5.12.dist-info}/WHEEL +2 -1
- aip_agents_binary-0.5.12.dist-info/top_level.txt +1 -0
- aip_agents/__init__.pyi +0 -0
- aip_agents/a2a/__init__.pyi +0 -3
- aip_agents/a2a/server/__init__.pyi +0 -4
- aip_agents/a2a/server/base_executor.pyi +0 -63
- aip_agents/a2a/server/google_adk_executor.pyi +0 -51
- aip_agents/a2a/server/langgraph_executor.pyi +0 -48
- aip_agents/a2a/types.pyi +0 -125
- aip_agents/agent/__init__.pyi +0 -7
- aip_agents/agent/base_agent.pyi +0 -205
- aip_agents/agent/base_langgraph_agent.pyi +0 -164
- aip_agents/agent/google_adk_agent.pyi +0 -128
- aip_agents/agent/langgraph_react_agent.pyi +0 -131
- aip_agents/agent/types.pyi +0 -106
- aip_agents/constants.pyi +0 -6
- aip_agents/examples/__init__.pyi +0 -0
- aip_agents/examples/hello_world_a2a_google_adk_client.pyi +0 -9
- aip_agents/examples/hello_world_a2a_google_adk_client_agent.pyi +0 -9
- aip_agents/examples/hello_world_a2a_google_adk_client_streaming.pyi +0 -9
- aip_agents/examples/hello_world_a2a_google_adk_server.pyi +0 -10
- aip_agents/examples/hello_world_a2a_langchain_client.pyi +0 -5
- aip_agents/examples/hello_world_a2a_langchain_client_agent.pyi +0 -5
- aip_agents/examples/hello_world_a2a_langchain_client_lm_invoker.pyi +0 -5
- aip_agents/examples/hello_world_a2a_langchain_client_streaming.pyi +0 -5
- aip_agents/examples/hello_world_a2a_langchain_reference_client_streaming.pyi +0 -5
- aip_agents/examples/hello_world_a2a_langchain_reference_server.pyi +0 -10
- aip_agents/examples/hello_world_a2a_langchain_server.pyi +0 -10
- aip_agents/examples/hello_world_a2a_langchain_server_lm_invoker.pyi +0 -10
- aip_agents/examples/hello_world_a2a_langgraph_artifact_client.pyi +0 -5
- aip_agents/examples/hello_world_a2a_langgraph_artifact_client_streaming.pyi +0 -5
- aip_agents/examples/hello_world_a2a_langgraph_artifact_server.pyi +0 -11
- aip_agents/examples/hello_world_a2a_langgraph_client.pyi +0 -9
- aip_agents/examples/hello_world_a2a_langgraph_client_agent.pyi +0 -9
- aip_agents/examples/hello_world_a2a_langgraph_client_agent_lm_invoker.pyi +0 -2
- aip_agents/examples/hello_world_a2a_langgraph_client_streaming.pyi +0 -9
- aip_agents/examples/hello_world_a2a_langgraph_client_streaming_lm_invoker.pyi +0 -5
- aip_agents/examples/hello_world_a2a_langgraph_client_streaming_tool_streaming.pyi +0 -5
- aip_agents/examples/hello_world_a2a_langgraph_server.pyi +0 -9
- aip_agents/examples/hello_world_a2a_langgraph_server_lm_invoker.pyi +0 -10
- aip_agents/examples/hello_world_a2a_langgraph_server_tool_streaming.pyi +0 -10
- aip_agents/examples/hello_world_a2a_mcp_langgraph.pyi +0 -48
- aip_agents/examples/hello_world_a2a_multi_agent_coordinator_client.pyi +0 -15
- aip_agents/examples/hello_world_a2a_multi_agent_coordinator_client_streaming.pyi +0 -5
- aip_agents/examples/hello_world_a2a_multi_agent_coordinator_server.pyi +0 -11
- aip_agents/examples/hello_world_a2a_three_level_agent_hierarchy_client.pyi +0 -23
- aip_agents/examples/hello_world_a2a_three_level_agent_hierarchy_server.pyi +0 -17
- aip_agents/examples/hello_world_a2a_with_metadata_langchain_client.pyi +0 -5
- aip_agents/examples/hello_world_a2a_with_metadata_langchain_server_lm_invoker.pyi +0 -10
- aip_agents/examples/hello_world_google_adk.pyi +0 -5
- aip_agents/examples/hello_world_google_adk_mcp_http.pyi +0 -5
- aip_agents/examples/hello_world_google_adk_mcp_http_stream.pyi +0 -5
- aip_agents/examples/hello_world_google_adk_mcp_sse.pyi +0 -5
- aip_agents/examples/hello_world_google_adk_mcp_sse_stream.pyi +0 -5
- aip_agents/examples/hello_world_google_adk_mcp_stdio.pyi +0 -5
- aip_agents/examples/hello_world_google_adk_mcp_stdio_stream.pyi +0 -5
- aip_agents/examples/hello_world_google_adk_stream.pyi +0 -5
- aip_agents/examples/hello_world_langchain.pyi +0 -5
- aip_agents/examples/hello_world_langchain_lm_invoker.pyi +0 -2
- aip_agents/examples/hello_world_langchain_mcp_http.pyi +0 -5
- aip_agents/examples/hello_world_langchain_mcp_http_stream.pyi +0 -5
- aip_agents/examples/hello_world_langchain_mcp_sse.pyi +0 -5
- aip_agents/examples/hello_world_langchain_mcp_sse_stream.pyi +0 -5
- aip_agents/examples/hello_world_langchain_mcp_stdio.pyi +0 -5
- aip_agents/examples/hello_world_langchain_mcp_stdio_stream.pyi +0 -5
- aip_agents/examples/hello_world_langchain_stream.pyi +0 -5
- aip_agents/examples/hello_world_langchain_stream_lm_invoker.pyi +0 -5
- aip_agents/examples/hello_world_langgraph.pyi +0 -5
- aip_agents/examples/hello_world_langgraph_bosa.pyi +0 -5
- aip_agents/examples/hello_world_langgraph_bosa_twitter.pyi +0 -5
- aip_agents/examples/hello_world_langgraph_mcp_http.pyi +0 -5
- aip_agents/examples/hello_world_langgraph_mcp_http_stream.pyi +0 -5
- aip_agents/examples/hello_world_langgraph_mcp_sse.pyi +0 -5
- aip_agents/examples/hello_world_langgraph_mcp_sse_stream.pyi +0 -5
- aip_agents/examples/hello_world_langgraph_mcp_stdio.pyi +0 -5
- aip_agents/examples/hello_world_langgraph_mcp_stdio_stream.pyi +0 -5
- aip_agents/examples/hello_world_langgraph_stream.pyi +0 -5
- aip_agents/examples/hello_world_langgraph_stream_lm_invoker.pyi +0 -5
- aip_agents/examples/hello_world_model_switch_cli.pyi +0 -15
- aip_agents/examples/hello_world_multi_agent_adk.pyi +0 -6
- aip_agents/examples/hello_world_multi_agent_langchain.pyi +0 -5
- aip_agents/examples/hello_world_multi_agent_langgraph.pyi +0 -5
- aip_agents/examples/hello_world_multi_agent_langgraph_lm_invoker.pyi +0 -5
- aip_agents/examples/hello_world_pii_logger.pyi +0 -5
- aip_agents/examples/hello_world_sentry.pyi +0 -21
- aip_agents/examples/hello_world_stock_a2a_server.pyi +0 -12
- aip_agents/examples/hello_world_tool_output_client.pyi +0 -5
- aip_agents/examples/hello_world_tool_output_server.pyi +0 -14
- aip_agents/examples/tools/__init__.pyi +0 -7
- aip_agents/examples/tools/adk_weather_tool.pyi +0 -18
- aip_agents/examples/tools/data_generator_tool.pyi +0 -15
- aip_agents/examples/tools/data_visualization_tool.pyi +0 -17
- aip_agents/examples/tools/image_artifact_tool.pyi +0 -24
- aip_agents/examples/tools/langchain_arithmetic_tools.pyi +0 -10
- aip_agents/examples/tools/langchain_currency_exchange_tool.pyi +0 -21
- aip_agents/examples/tools/langchain_graph_artifact_tool.pyi +0 -23
- aip_agents/examples/tools/langchain_weather_tool.pyi +0 -19
- aip_agents/examples/tools/langgraph_streaming_tool.pyi +0 -43
- aip_agents/examples/tools/pr_details_bosa_tool.pyi +0 -26
- aip_agents/examples/tools/serper_tool.pyi +0 -16
- aip_agents/examples/tools/stock_tools.pyi +0 -21
- aip_agents/examples/tools/table_generator_tool.pyi +0 -22
- aip_agents/examples/tools/time_tool.pyi +0 -15
- aip_agents/examples/tools/weather_forecast_tool.pyi +0 -14
- aip_agents/mcp/__init__.pyi +0 -0
- aip_agents/mcp/client/__init__.pyi +0 -4
- aip_agents/mcp/client/google_adk/__init__.pyi +0 -3
- aip_agents/mcp/client/google_adk/client.pyi +0 -38
- aip_agents/mcp/client/langchain/__init__.pyi +0 -3
- aip_agents/mcp/client/langchain/client.pyi +0 -3
- aip_agents/memory/__init__.pyi +0 -0
- aip_agents/memory/simple_memory.pyi +0 -22
- aip_agents/sentry/__init__.pyi +0 -3
- aip_agents/sentry/sentry.pyi +0 -48
- aip_agents/storage/__init__.pyi +0 -8
- aip_agents/storage/clients/__init__.pyi +0 -3
- aip_agents/storage/clients/minio_client.pyi +0 -137
- aip_agents/storage/config.pyi +0 -66
- aip_agents/storage/providers/__init__.pyi +0 -5
- aip_agents/storage/providers/object_storage.pyi +0 -98
- aip_agents/tools/__init__.pyi +0 -3
- aip_agents/tools/base.pyi +0 -44
- aip_agents/tools/base_bosa_tools.pyi +0 -12
- aip_agents/tools/bosa_connector.pyi +0 -30
- aip_agents/tools/bosa_tools.pyi +0 -37
- aip_agents/tools/bosa_tools_interface.pyi +0 -26
- aip_agents/tools/constants.pyi +0 -130
- aip_agents/tools/nested_agent_tool.pyi +0 -45
- aip_agents/tools/tool_config_injector.pyi +0 -26
- aip_agents/types/__init__.pyi +0 -3
- aip_agents/types/a2a_events.pyi +0 -74
- aip_agents/utils/__init__.pyi +0 -5
- aip_agents/utils/a2a_connector.pyi +0 -135
- aip_agents/utils/artifact_helpers.pyi +0 -179
- aip_agents/utils/langgraph/__init__.pyi +0 -3
- aip_agents/utils/langgraph/converter.pyi +0 -49
- aip_agents/utils/langgraph/tool_managers/__init__.pyi +0 -5
- aip_agents/utils/langgraph/tool_managers/a2a_tool_manager.pyi +0 -35
- aip_agents/utils/langgraph/tool_managers/delegation_tool_manager.pyi +0 -50
- aip_agents/utils/langgraph/tool_output_management.pyi +0 -310
- aip_agents/utils/logger_manager.pyi +0 -151
- aip_agents/utils/metadata_helper.pyi +0 -110
- aip_agents/utils/name_preprocessor/__init__.pyi +0 -6
- aip_agents/utils/name_preprocessor/google_name_preprocessor.pyi +0 -38
- aip_agents/utils/name_preprocessor/name_preprocessor.pyi +0 -41
- aip_agents/utils/reference_helper.pyi +0 -49
- aip_agents/utils/token_usage_helper.pyi +0 -60
- aip_agents_binary-0.0.0b2.dist-info/METADATA +0 -277
- aip_agents_binary-0.0.0b2.dist-info/RECORD +0 -157
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Constants for tools using BOSA Connector.
|
|
2
|
+
|
|
3
|
+
Authors:
|
|
4
|
+
Saul Sayers (saul.sayers@gdplabs.id)
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from enum import Enum, StrEnum
|
|
9
|
+
|
|
10
|
+
BOSA_API_BASE_URL = os.getenv("BOSA_API_BASE_URL")
|
|
11
|
+
BOSA_API_KEY = os.getenv("BOSA_API_KEY")
|
|
12
|
+
BOSA_FETCH_MAX_RETRIES = 3
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ToolType(StrEnum):
|
|
16
|
+
"""Tool types for BOSA Connector."""
|
|
17
|
+
|
|
18
|
+
GLLM = "gllm"
|
|
19
|
+
LANGCHAIN = "langchain"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Action(Enum):
|
|
23
|
+
"""Actions for BOSA Connector."""
|
|
24
|
+
|
|
25
|
+
GITHUB = "github"
|
|
26
|
+
GOOGLE = "google"
|
|
27
|
+
GOOGLE_DRIVE = "google_drive"
|
|
28
|
+
GOOGLE_MAIL = "google_mail"
|
|
29
|
+
TWITTER = "twitter"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class GitHubEndpoint(Enum):
|
|
33
|
+
"""GitHub endpoints for BOSA Connector."""
|
|
34
|
+
|
|
35
|
+
INTEGRATIONS = "integrations"
|
|
36
|
+
USER_HAS_INTEGRATION = "integration-exists"
|
|
37
|
+
SUCCESS_AUTHORIZE_CALLBACK = "success-authorize-callback"
|
|
38
|
+
CREATE_ISSUE_HANDLER = "create_issue"
|
|
39
|
+
GET_ISSUE_HANDLER = "get_issue"
|
|
40
|
+
LIST_ISSUES = "list_issues"
|
|
41
|
+
LIST_ISSUES_COMMENTS = "list_issues_comments"
|
|
42
|
+
SEARCH_ALL_ISSUES = "search_issues"
|
|
43
|
+
GET_COMMITS = "list_commits"
|
|
44
|
+
SEARCH_COMMITS = "search_commits"
|
|
45
|
+
GET_COLLABORATORS = "list_collaborators"
|
|
46
|
+
GET_RELEASES = "list_releases"
|
|
47
|
+
GET_CONTRIBUTORS = "list_contributors"
|
|
48
|
+
GET_LANGUAGES = "list_languages"
|
|
49
|
+
SEARCH_CONTRIBUTIONS = "search_contributions"
|
|
50
|
+
LIST_PULL_REQUESTS = "list_pull_requests"
|
|
51
|
+
SEARCH_PR = "search_pull_requests"
|
|
52
|
+
GET_PULL = "get_pull"
|
|
53
|
+
GET_ALL_CONTRIBUTOR_COMMIT_ACTIVITIES = "get_all_contributor_commit_activities"
|
|
54
|
+
GET_COMMIT_ACTIVITY = "get_the_last_year_of_commit_activity"
|
|
55
|
+
GET_WEEKLY_COMMIT_COUNTS = "get_weekly_commit_count "
|
|
56
|
+
GET_USER_CONTRIBUTION_STATISTICS = "get_user_contribution_statistics"
|
|
57
|
+
LIST_PROJECT_ITEMS = "list_project_items"
|
|
58
|
+
LIST_PROJECTS = "list_projects"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class GoogleDriveEndpoint(Enum):
|
|
62
|
+
"""Google Drive endpoints for BOSA Connector."""
|
|
63
|
+
|
|
64
|
+
INTEGRATIONS = "integrations"
|
|
65
|
+
USER_HAS_INTEGRATION = "integration-exists"
|
|
66
|
+
SUCCESS_AUTHORIZE_CALLBACK = "success-authorize-callback"
|
|
67
|
+
SEARCH_FILES = "search_files"
|
|
68
|
+
GET_FILE = "get_file"
|
|
69
|
+
CREATE_FILE = "create_file"
|
|
70
|
+
CREATE_FOLDER = "create_folder"
|
|
71
|
+
UPDATE_FILE = "update_file"
|
|
72
|
+
UPDATE_FOLDER = "update_folder"
|
|
73
|
+
COPY_FILE = "copy_file"
|
|
74
|
+
DELETE_FILE = "delete_file"
|
|
75
|
+
SUMMARIZE_FOLDER_FILES_BY_TYPE = "summarize_folder_files_by_type"
|
|
76
|
+
SUMMARIZE_TOTAL_FILES_BY_TYPE = "summarize_total_files_by_type"
|
|
77
|
+
RECENT_FILES = "list_recent_files_from_yesterday"
|
|
78
|
+
CREATE_PERMISSION = "create_permission"
|
|
79
|
+
LIST_PERMISSIONS = "list_permissions"
|
|
80
|
+
GET_PERMISSION = "get_permission"
|
|
81
|
+
UPDATE_PERMISSION = "update_permission"
|
|
82
|
+
DELETE_PERMISSION = "delete_permission"
|
|
83
|
+
DOWNLOAD_FILE = "download_file"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class GoogleDocsEndpoint(Enum):
|
|
87
|
+
"""Google Docs endpoints for BOSA Connector."""
|
|
88
|
+
|
|
89
|
+
INTEGRATIONS = "integrations"
|
|
90
|
+
USER_HAS_INTEGRATION = "integration-exists"
|
|
91
|
+
SUCCESS_AUTHORIZE_CALLBACK = "success-authorize-callback"
|
|
92
|
+
GET_DOCUMENT = "get_document"
|
|
93
|
+
LIST_DOCUMENTS = "list_documents"
|
|
94
|
+
CREATE_DOCUMENT = "create_document"
|
|
95
|
+
UPDATE_DOCUMENT = "update_document"
|
|
96
|
+
COPY_CONTENT = "copy_content"
|
|
97
|
+
UPDATE_DOCUMENT_MARKDOWN = "update_document_markdown"
|
|
98
|
+
LIST_COMMENTS = "list_comments"
|
|
99
|
+
SUMMARIZE_COMMENTS = "summarize_comments"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class GoogleEndpoint(Enum):
|
|
103
|
+
"""Google endpoints for BOSA Connector."""
|
|
104
|
+
|
|
105
|
+
INTEGRATIONS = "integrations"
|
|
106
|
+
USER_HAS_INTEGRATION = "integration-exists"
|
|
107
|
+
SUCCESS_AUTHORIZE_CALLBACK = "success-authorize-callback"
|
|
108
|
+
USERINFO = "userinfo"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class TwitterEndpoint(Enum):
|
|
112
|
+
"""Twitter endpoints for BOSA Connector."""
|
|
113
|
+
|
|
114
|
+
INTEGRATIONS = "integrations"
|
|
115
|
+
USER_HAS_INTEGRATION = "integration-exists"
|
|
116
|
+
SUCCESS_AUTHORIZE_CALLBACK = "success-authorize-callback"
|
|
117
|
+
SEARCH = "tweet_search"
|
|
118
|
+
GET_TWEETS = "get_tweets"
|
|
119
|
+
GET_THREAD = "get_thread"
|
|
120
|
+
GET_USERS = "get_users"
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class GoogleMailEndpoint(Enum):
|
|
124
|
+
"""Google Mail endpoints for BOSA Connector."""
|
|
125
|
+
|
|
126
|
+
INTEGRATIONS = "integrations"
|
|
127
|
+
USER_HAS_INTEGRATION = "integration-exists"
|
|
128
|
+
SUCCESS_AUTHORIZE_CALLBACK = "success-authorize-callback"
|
|
129
|
+
CREATE_DRAFT = "create_draft"
|
|
130
|
+
LIST_DRAFTS = "list_drafts"
|
|
131
|
+
SEND_DRAFT = "send_draft"
|
|
132
|
+
GET_DRAFT = "get_draft"
|
|
133
|
+
MODIFY_DRAFT = "modify_draft"
|
|
134
|
+
LIST_LABELS = "list_labels"
|
|
135
|
+
LABEL_STATS = "label_stats"
|
|
136
|
+
GET_LABEL_DETAILS = "get_label_details"
|
|
137
|
+
CREATE_LABELS = "create_labels"
|
|
138
|
+
MODIFY_LABELS = "modify_labels"
|
|
139
|
+
DELETE_LABELS = "delete_labels"
|
|
140
|
+
SEND_EMAIL = "send_email"
|
|
141
|
+
LIST_EMAILS = "list_emails"
|
|
142
|
+
GET_EMAIL_DETAILS = "get_email_details"
|
|
143
|
+
MODIFY_EMAIL = "modify_email"
|
|
144
|
+
DELETE_EMAIL = "delete_email"
|
|
145
|
+
TRASH_EMAIL = "trash_email"
|
|
146
|
+
UNTRASH_EMAIL = "untrash_email"
|
|
147
|
+
LIST_THREADS = "list_threads"
|
|
148
|
+
THREAD_DETAILS = "thread_details"
|
|
149
|
+
MODIFY_THREAD = "modify_thread"
|
|
150
|
+
GET_AUTO_REPLY = "get_auto_reply"
|
|
151
|
+
SET_AUTO_REPLY = "set_auto_reply"
|
|
152
|
+
GET_ATTACHMENT = "get_attachment"
|
|
153
|
+
USERINFO = "userinfo"
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class ActionEndpointMap:
|
|
157
|
+
"""Maps Action enums to their corresponding Endpoint enums."""
|
|
158
|
+
|
|
159
|
+
MAP: dict[Action, type[Enum]] = {
|
|
160
|
+
Action.GITHUB: GitHubEndpoint,
|
|
161
|
+
Action.GOOGLE: GoogleEndpoint,
|
|
162
|
+
Action.GOOGLE_DRIVE: GoogleDriveEndpoint,
|
|
163
|
+
Action.GOOGLE_MAIL: GoogleMailEndpoint,
|
|
164
|
+
Action.TWITTER: TwitterEndpoint,
|
|
165
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Document loader tools package.
|
|
2
|
+
|
|
3
|
+
This package provides tools for reading and extracting content from various document formats.
|
|
4
|
+
|
|
5
|
+
Authors:
|
|
6
|
+
Christian Trisno Sen Long Chen (christian.t.s.l.chen@gdplabs.id)
|
|
7
|
+
Douglas Raevan Faisal (douglas.raevan.faisal@gdplabs.id)
|
|
8
|
+
|
|
9
|
+
References:
|
|
10
|
+
NONE
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from aip_agents.tools.document_loader.base_reader import ( # noqa: F401
|
|
15
|
+
BaseDocumentReaderTool,
|
|
16
|
+
DocumentReaderInput,
|
|
17
|
+
)
|
|
18
|
+
from aip_agents.tools.document_loader.docx_reader_tool import DocxReaderTool # noqa: F401
|
|
19
|
+
from aip_agents.tools.document_loader.excel_reader_tool import ExcelReaderTool # noqa: F401
|
|
20
|
+
from aip_agents.tools.document_loader.pdf_reader_tool import PDFReaderTool # noqa: F401
|
|
21
|
+
from aip_agents.tools.document_loader.pdf_splitter import PDFSplitter # noqa: F401
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"BaseDocumentReaderTool",
|
|
25
|
+
"DocumentReaderInput",
|
|
26
|
+
"PDFReaderTool",
|
|
27
|
+
"DocxReaderTool",
|
|
28
|
+
"ExcelReaderTool",
|
|
29
|
+
"PDFSplitter",
|
|
30
|
+
]
|
|
31
|
+
except ImportError:
|
|
32
|
+
import warnings
|
|
33
|
+
|
|
34
|
+
warnings.warn(
|
|
35
|
+
"Document loader tools not available. Install with: pip install aip-agents[document-loader]", ImportWarning
|
|
36
|
+
)
|
|
37
|
+
__all__ = []
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""Base document reader tool.
|
|
2
|
+
|
|
3
|
+
Authors:
|
|
4
|
+
Christian Trisno Sen Long Chen (christian.t.s.l.chen@gdplabs.id)
|
|
5
|
+
|
|
6
|
+
References:
|
|
7
|
+
https://github.com/GDP-ADMIN/gdplabs-exploration/blob/ai-agent-app/backend/aip_agents/tools/reader/base_reader.py
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import gc
|
|
11
|
+
import os
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
|
|
14
|
+
from gllm_docproc.loader.pipeline_loader import PipelineLoader
|
|
15
|
+
from langchain_core.runnables import RunnableConfig
|
|
16
|
+
from langchain_core.tools import BaseTool
|
|
17
|
+
from pydantic import BaseModel, Field
|
|
18
|
+
from unidecode import unidecode
|
|
19
|
+
|
|
20
|
+
from aip_agents.utils.logger import get_logger
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BaseDocumentConfig(BaseModel):
|
|
26
|
+
"""Base tool configuration schema for document processing with batching functionality.
|
|
27
|
+
|
|
28
|
+
This configuration enables page-by-page batching to optimize memory usage when
|
|
29
|
+
processing large document files. When batching is enabled, documents are processed
|
|
30
|
+
sequentially by pages rather than loading the entire document into memory at once.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
batching (bool): Enable page-by-page batching to reduce memory usage.
|
|
34
|
+
When True, documents are processed page by page sequentially.
|
|
35
|
+
When False, maintains current behavior of loading entire document.
|
|
36
|
+
Defaults to False for backward compatibility.
|
|
37
|
+
batch_size (int): Number of pages to process in each batch.
|
|
38
|
+
Must be between 1 and 100 pages inclusive.
|
|
39
|
+
Larger batch sizes may use more memory but could be more efficient.
|
|
40
|
+
Smaller batch sizes use less memory but may have more overhead.
|
|
41
|
+
Defaults to 10 for balanced memory usage and efficiency.
|
|
42
|
+
|
|
43
|
+
Examples:
|
|
44
|
+
>>> # Default configuration (no batching)
|
|
45
|
+
>>> config = BaseDocumentConfig()
|
|
46
|
+
>>> print(config.batching) # False
|
|
47
|
+
>>> print(config.batch_size) # 10
|
|
48
|
+
|
|
49
|
+
>>> # Enable batching with single page processing
|
|
50
|
+
>>> config = BaseDocumentConfig(batching=True, batch_size=1)
|
|
51
|
+
|
|
52
|
+
>>> # Enable batching with multi-page batches
|
|
53
|
+
>>> config = BaseDocumentConfig(batching=True, batch_size=3)
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
batching: bool = Field(
|
|
57
|
+
default=False,
|
|
58
|
+
description="Enable page-by-page batching to reduce memory usage when processing large documents",
|
|
59
|
+
)
|
|
60
|
+
batch_size: int = Field(
|
|
61
|
+
default=10,
|
|
62
|
+
ge=1,
|
|
63
|
+
le=100,
|
|
64
|
+
description="Number of pages to process per batch (1-100 pages)",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class DocumentReaderInput(BaseModel):
|
|
69
|
+
"""Input schema for the DocumentReader tool."""
|
|
70
|
+
|
|
71
|
+
file_path: str = Field(..., description="Path to the document file to be read")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class BaseDocumentReaderTool(BaseTool, ABC):
|
|
75
|
+
"""Base tool to read and extract text from document files."""
|
|
76
|
+
|
|
77
|
+
name: str = "base_document_reader_tool"
|
|
78
|
+
description: str = "Read a document file and extract its text content."
|
|
79
|
+
args_schema: type[BaseModel] = DocumentReaderInput
|
|
80
|
+
tool_config_schema: type[BaseModel] = BaseDocumentConfig
|
|
81
|
+
loader: PipelineLoader = Field(default_factory=PipelineLoader)
|
|
82
|
+
|
|
83
|
+
def __init__(self):
|
|
84
|
+
"""Initialize the base document reader tool."""
|
|
85
|
+
super().__init__()
|
|
86
|
+
self._setup_loader()
|
|
87
|
+
|
|
88
|
+
@abstractmethod
|
|
89
|
+
def _setup_loader(self):
|
|
90
|
+
"""Set up the specific loaders for each document type."""
|
|
91
|
+
pass # pragma: no cover
|
|
92
|
+
|
|
93
|
+
def _run(self, file_path: str, config: RunnableConfig | None = None) -> str:
|
|
94
|
+
"""Run with optional batching based on configuration.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
file_path: Path to the document file to be read
|
|
98
|
+
config: Optional RunnableConfig containing tool configuration
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Extracted text content from the document
|
|
102
|
+
"""
|
|
103
|
+
tool_config = None
|
|
104
|
+
if hasattr(self, "get_tool_config"):
|
|
105
|
+
tool_config = self.get_tool_config(config)
|
|
106
|
+
|
|
107
|
+
tool_config = tool_config or BaseDocumentConfig()
|
|
108
|
+
|
|
109
|
+
logger.info(f"Batching: {tool_config.batching}, Batch size: {tool_config.batch_size}")
|
|
110
|
+
if tool_config.batching:
|
|
111
|
+
return self._run_with_batching(file_path, tool_config.batch_size)
|
|
112
|
+
else:
|
|
113
|
+
return self._run_standard(file_path)
|
|
114
|
+
|
|
115
|
+
def _run_standard(self, file_path: str) -> str:
|
|
116
|
+
"""Standard processing (existing behavior).
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
file_path: Path to the document file to be read
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Extracted text content from the document
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
return self._process_single_file(file_path)
|
|
126
|
+
except Exception as e:
|
|
127
|
+
return f"Error reading file: {str(e)}"
|
|
128
|
+
|
|
129
|
+
def _run_with_batching(self, file_path: str, batch_size: int) -> str:
|
|
130
|
+
"""Process file using batching with existing loader.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
file_path: Path to the document file to be read
|
|
134
|
+
batch_size: Number of pages to process per batch
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Extracted text content from the document
|
|
138
|
+
"""
|
|
139
|
+
try:
|
|
140
|
+
logger.info(f"Splitting file: {file_path} into {batch_size} pages per batch")
|
|
141
|
+
split_files = self._split_file(file_path, batch_size)
|
|
142
|
+
return self._process_file_batch(split_files)
|
|
143
|
+
except Exception as e:
|
|
144
|
+
return f"Error reading file: {str(e)}"
|
|
145
|
+
|
|
146
|
+
def _process_single_file(self, file_path: str) -> str:
|
|
147
|
+
"""Process a single file and return extracted text.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
file_path: Path to the document file to be read
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Extracted text content from the document (ASCII-normalized)
|
|
154
|
+
"""
|
|
155
|
+
try:
|
|
156
|
+
# Load and process the file
|
|
157
|
+
loaded_elements = self.loader.load(file_path)
|
|
158
|
+
full_text = "\n".join(element["text"] for element in loaded_elements)
|
|
159
|
+
|
|
160
|
+
# Apply unidecode to convert non-ASCII characters to ASCII equivalents
|
|
161
|
+
# This prevents encoding errors by transliterating characters like:
|
|
162
|
+
# "Café" -> "Cafe", "François" -> "Francois", "北京" -> "Bei Jing"
|
|
163
|
+
result = unidecode(full_text).strip()
|
|
164
|
+
|
|
165
|
+
# Explicit memory cleanup
|
|
166
|
+
del loaded_elements
|
|
167
|
+
del full_text
|
|
168
|
+
|
|
169
|
+
# Force garbage collection to free memory immediately
|
|
170
|
+
gc.collect()
|
|
171
|
+
|
|
172
|
+
return result
|
|
173
|
+
|
|
174
|
+
except Exception:
|
|
175
|
+
# Ensure cleanup even on error
|
|
176
|
+
gc.collect()
|
|
177
|
+
raise
|
|
178
|
+
|
|
179
|
+
def _process_file_batch(self, split_files: list[str]) -> str:
|
|
180
|
+
"""Process a batch of split files and return combined text.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
split_files: List of temporary file paths to process
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Combined extracted text content from all files
|
|
187
|
+
"""
|
|
188
|
+
results = []
|
|
189
|
+
errors = []
|
|
190
|
+
|
|
191
|
+
for split_file in split_files:
|
|
192
|
+
try:
|
|
193
|
+
text = self._process_single_file(split_file)
|
|
194
|
+
results.append(text)
|
|
195
|
+
|
|
196
|
+
# Clear the text variable to free memory immediately
|
|
197
|
+
del text
|
|
198
|
+
|
|
199
|
+
except Exception as e:
|
|
200
|
+
error_msg = f"Error processing batch: {str(e)}"
|
|
201
|
+
errors.append(error_msg)
|
|
202
|
+
finally:
|
|
203
|
+
self._cleanup_temp_file(split_file)
|
|
204
|
+
# Force garbage collection after each file to minimize memory usage
|
|
205
|
+
gc.collect()
|
|
206
|
+
|
|
207
|
+
# Combine results
|
|
208
|
+
full_text = "\n".join(results).strip()
|
|
209
|
+
|
|
210
|
+
# Clear intermediate results to free memory
|
|
211
|
+
del results
|
|
212
|
+
gc.collect()
|
|
213
|
+
|
|
214
|
+
if errors:
|
|
215
|
+
error_summary = f"\n\nProcessing completed with {len(errors)} errors:\n" + "\n".join(errors)
|
|
216
|
+
full_text += error_summary
|
|
217
|
+
|
|
218
|
+
return full_text
|
|
219
|
+
|
|
220
|
+
def _cleanup_temp_file(self, file_path: str) -> None:
|
|
221
|
+
"""Clean up temporary file.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
file_path: Path to the temporary file to clean up
|
|
225
|
+
"""
|
|
226
|
+
try:
|
|
227
|
+
os.unlink(file_path)
|
|
228
|
+
except OSError:
|
|
229
|
+
pass
|
|
230
|
+
|
|
231
|
+
def cleanup_memory(self) -> None:
|
|
232
|
+
"""Explicitly clean up memory and force garbage collection.
|
|
233
|
+
|
|
234
|
+
This method can be called after processing to minimize memory usage.
|
|
235
|
+
While it won't reset memory to exactly 0, it will free up as much
|
|
236
|
+
memory as possible by clearing internal caches and forcing garbage collection.
|
|
237
|
+
"""
|
|
238
|
+
# Clear any cached data in the loader if it has a cleanup method
|
|
239
|
+
if hasattr(self.loader, "clear_cache"):
|
|
240
|
+
self.loader.clear_cache()
|
|
241
|
+
|
|
242
|
+
# Force garbage collection multiple times to ensure cleanup
|
|
243
|
+
for _ in range(3):
|
|
244
|
+
gc.collect()
|
|
245
|
+
|
|
246
|
+
# Optional: Clear loader entirely and reinitialize (more aggressive)
|
|
247
|
+
# This is commented out as it may affect performance for subsequent calls
|
|
248
|
+
# self.loader = PipelineLoader()
|
|
249
|
+
# self._setup_loader()
|
|
250
|
+
|
|
251
|
+
@abstractmethod
|
|
252
|
+
def _split_file(self, file_path: str, batch_size: int) -> list[str]:
|
|
253
|
+
"""Split file into temporary files for batch processing.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
file_path: Path to the document file to be split
|
|
257
|
+
batch_size: Number of pages to include in each split file
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
List of temporary file paths containing the split content
|
|
261
|
+
"""
|
|
262
|
+
pass # pragma: no cover
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Base tool to read and extract text from document files.
|
|
2
|
+
|
|
3
|
+
Authors:
|
|
4
|
+
Christian Trisno Sen Long Chen (christian.t.s.l.chen@gdplabs.id)
|
|
5
|
+
|
|
6
|
+
References:
|
|
7
|
+
https://github.com/GDP-ADMIN/gdplabs-exploration/blob/ai-agent-app/backend/aip_agents/tools/
|
|
8
|
+
reader/docx_reader_tool.py
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from gllm_docproc.loader.docx import DOCX2PythonLoader, PythonDOCXTableLoader
|
|
12
|
+
|
|
13
|
+
from aip_agents.tools.document_loader.base_reader import BaseDocumentReaderTool
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DocxReaderTool(BaseDocumentReaderTool):
|
|
17
|
+
"""Tool to read and extract text from Word documents."""
|
|
18
|
+
|
|
19
|
+
name: str = "docx_reader_tool"
|
|
20
|
+
description: str = "Read a Word document and extract its text content. Input should be the path to the Word file."
|
|
21
|
+
|
|
22
|
+
def _setup_loader(self):
|
|
23
|
+
self.loader.add_loader(DOCX2PythonLoader())
|
|
24
|
+
self.loader.add_loader(PythonDOCXTableLoader())
|
|
25
|
+
|
|
26
|
+
def _run_with_batching(self, file_path: str, batch_size: int) -> str:
|
|
27
|
+
"""Run without batching until DOCX splitting is implemented.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
file_path: Path to the DOCX file to be processed
|
|
31
|
+
batch_size: Number of pages to include in each batch (not used currently)
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Extracted text content from the document
|
|
35
|
+
"""
|
|
36
|
+
# TODO: implement DOCX batching with real splits before enabling batching flow.
|
|
37
|
+
return self._run_standard(file_path)
|
|
38
|
+
|
|
39
|
+
def _split_file(self, file_path: str, batch_size: int) -> list[str]:
|
|
40
|
+
"""Split DOCX file into temporary files for batch processing.
|
|
41
|
+
|
|
42
|
+
Note: This is a placeholder implementation. DOCX batching is not
|
|
43
|
+
implemented in this feature but the method is required by the abstract base class.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
file_path: Path to the DOCX file to be split
|
|
47
|
+
batch_size: Number of pages to include in each split file
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
List of temporary file paths containing the split content
|
|
51
|
+
"""
|
|
52
|
+
# Placeholder implementation - DOCX batching not implemented in this feature
|
|
53
|
+
return [file_path]
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Tool to read and extract content from Excel files using gllm_docproc.
|
|
2
|
+
|
|
3
|
+
This tool uses the gllm_docproc loader pipeline to extract content from Excel files,
|
|
4
|
+
providing a consistent interface with other document reader tools (PDF, DOCX).
|
|
5
|
+
|
|
6
|
+
Authors:
|
|
7
|
+
Douglas Raevan Faisal (douglas.raevan.faisal@gdplabs.id)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import zipfile
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from gllm_docproc.loader.xlsx import OpenpyxlLoader
|
|
14
|
+
|
|
15
|
+
from aip_agents.tools.document_loader.base_reader import BaseDocumentReaderTool
|
|
16
|
+
from aip_agents.utils.logger import get_logger
|
|
17
|
+
|
|
18
|
+
logger = get_logger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ExcelReaderTool(BaseDocumentReaderTool):
|
|
22
|
+
"""Tool to read and extract content from Excel files.
|
|
23
|
+
|
|
24
|
+
This tool reads Excel files (.xlsx, .xlsm) and extracts their content using
|
|
25
|
+
the gllm_docproc loader pipeline. The content is formatted as Markdown tables
|
|
26
|
+
for easy readability.
|
|
27
|
+
|
|
28
|
+
Features:
|
|
29
|
+
- Supports .xlsx and .xlsm formats
|
|
30
|
+
- Extracts all sheets or specific sheets
|
|
31
|
+
- Formats output as Markdown tables
|
|
32
|
+
- Configurable row limits and file size limits
|
|
33
|
+
|
|
34
|
+
Examples:
|
|
35
|
+
>>> tool = ExcelReaderTool()
|
|
36
|
+
>>> result = tool._run("/tmp/data.xlsx")
|
|
37
|
+
>>> print(result)
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
name: str = "excel_reader_tool"
|
|
41
|
+
description: str = (
|
|
42
|
+
"Read an Excel file and extract its content as Markdown tables. "
|
|
43
|
+
"Input should be the path to the Excel file (.xlsx or .xlsm format). "
|
|
44
|
+
"Each sheet will be formatted as a Markdown table with the sheet name as a header."
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def _setup_loader(self) -> None:
|
|
48
|
+
"""Set up the XLSX loader for the pipeline.
|
|
49
|
+
|
|
50
|
+
This method initializes the OpenpyxlLoader which handles extraction
|
|
51
|
+
of content from Excel files and formatting as Markdown tables.
|
|
52
|
+
"""
|
|
53
|
+
self.loader.add_loader(OpenpyxlLoader())
|
|
54
|
+
|
|
55
|
+
def _run_with_batching(self, file_path: str, batch_size: int) -> str:
|
|
56
|
+
"""Run without batching until Excel splitting is implemented.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
file_path: Path to the Excel file to be processed
|
|
60
|
+
batch_size: Number of sheets to include in each batch (not used currently)
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Extracted text content from the document
|
|
64
|
+
"""
|
|
65
|
+
# TODO: implement Excel batching with real splits before enabling batching flow.
|
|
66
|
+
return self._run_standard(file_path)
|
|
67
|
+
|
|
68
|
+
def _split_file(self, file_path: str, batch_size: int) -> list[str]:
|
|
69
|
+
"""Split Excel file for batch processing.
|
|
70
|
+
|
|
71
|
+
Note: This is a placeholder implementation. Excel batching by sheets
|
|
72
|
+
could be implemented in the future, but for now we process the entire
|
|
73
|
+
file at once as Excel files are typically smaller than PDFs.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
file_path: Path to the Excel file to be split
|
|
77
|
+
batch_size: Number of sheets to include in each split file (not used currently)
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
List containing the original file path (no splitting performed)
|
|
81
|
+
"""
|
|
82
|
+
# Placeholder implementation - Excel batching not implemented
|
|
83
|
+
# Excel files are typically smaller and batching by sheets would require
|
|
84
|
+
# more complex logic to split workbooks
|
|
85
|
+
logger.info(f"Excel batching not implemented, processing entire file: {file_path}")
|
|
86
|
+
return [file_path]
|
|
87
|
+
|
|
88
|
+
def _validate_excel_file(self, file_path: str) -> str | None:
|
|
89
|
+
"""Validate Excel file and check for potential issues.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
file_path: Path to the Excel file
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Error message if validation fails, None if successful
|
|
96
|
+
"""
|
|
97
|
+
path = Path(file_path)
|
|
98
|
+
|
|
99
|
+
# Check file extension
|
|
100
|
+
if path.suffix.lower() not in [".xlsx", ".xlsm"]:
|
|
101
|
+
return f"Invalid file extension: {path.suffix}. Only .xlsx and .xlsm files are supported."
|
|
102
|
+
|
|
103
|
+
# Check for macro-enabled files and warn
|
|
104
|
+
if path.suffix.lower() == ".xlsm":
|
|
105
|
+
logger.warning(f"Macro-enabled file detected: {file_path}")
|
|
106
|
+
# Check if file actually contains macros
|
|
107
|
+
try:
|
|
108
|
+
with zipfile.ZipFile(file_path, "r") as zip_file:
|
|
109
|
+
if "xl/vbaProject.bin" in zip_file.namelist():
|
|
110
|
+
logger.warning(
|
|
111
|
+
f"VBA macros found in {file_path}. "
|
|
112
|
+
f"Macros will not be executed; only data will be extracted."
|
|
113
|
+
)
|
|
114
|
+
except Exception as e:
|
|
115
|
+
logger.debug(f"Could not check for macros: {e}")
|
|
116
|
+
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
def _run_standard(self, file_path: str) -> str:
|
|
120
|
+
"""Override standard processing to add Excel-specific error handling.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
file_path: Path to the Excel file to be read
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Extracted text content from the document
|
|
127
|
+
"""
|
|
128
|
+
# Validate Excel file
|
|
129
|
+
validation_error = self._validate_excel_file(file_path)
|
|
130
|
+
if validation_error:
|
|
131
|
+
return f"Validation Error: {validation_error}"
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
return self._process_single_file(file_path)
|
|
135
|
+
|
|
136
|
+
except PermissionError:
|
|
137
|
+
return f"Error: Permission denied accessing file: {file_path}"
|
|
138
|
+
|
|
139
|
+
except zipfile.BadZipFile:
|
|
140
|
+
return (
|
|
141
|
+
"Error: File appears to be corrupted or is not a valid Excel format. "
|
|
142
|
+
"Please check the file and try again."
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
except Exception as e:
|
|
146
|
+
error_msg = str(e).lower()
|
|
147
|
+
|
|
148
|
+
# Check for specific Excel errors
|
|
149
|
+
if "password" in error_msg or "encrypted" in error_msg:
|
|
150
|
+
return "Error: File is password-protected or encrypted. Please provide an unprotected file."
|
|
151
|
+
|
|
152
|
+
elif "invalid" in error_msg or "corrupt" in error_msg:
|
|
153
|
+
return "Error: File appears to be corrupted or invalid Excel format. Please verify the file integrity."
|
|
154
|
+
|
|
155
|
+
elif "not supported" in error_msg or "unsupported" in error_msg:
|
|
156
|
+
return f"Error: File contains unsupported features: {str(e)}"
|
|
157
|
+
|
|
158
|
+
else:
|
|
159
|
+
logger.exception(f"Unexpected error processing Excel file: {file_path}")
|
|
160
|
+
return f"Error processing Excel file: {str(e)}"
|