langchain-agentx-python 0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_agentx/__init__.py +46 -0
- langchain_agentx/command/__init__.py +28 -0
- langchain_agentx/command/builtin/__init__.py +25 -0
- langchain_agentx/command/builtin/clear.py +33 -0
- langchain_agentx/command/builtin/compact.py +33 -0
- langchain_agentx/command/builtin/memory.py +37 -0
- langchain_agentx/command/builtin/reload_plugins.py +42 -0
- langchain_agentx/command/context.py +30 -0
- langchain_agentx/command/dispatcher.py +183 -0
- langchain_agentx/command/registry.py +110 -0
- langchain_agentx/command/result.py +25 -0
- langchain_agentx/command/types.py +41 -0
- langchain_agentx/config/__init__.py +14 -0
- langchain_agentx/loop/__init__.py +47 -0
- langchain_agentx/loop/config/__init__.py +20 -0
- langchain_agentx/loop/config/agent_config.py +66 -0
- langchain_agentx/loop/config/agent_loop_config.py +72 -0
- langchain_agentx/loop/config/model_context_resolver.py +105 -0
- langchain_agentx/loop/config/runtime_settings.py +50 -0
- langchain_agentx/loop/config/token_estimator.py +133 -0
- langchain_agentx/loop/context/__init__.py +66 -0
- langchain_agentx/loop/context/blocking_guard.py +97 -0
- langchain_agentx/loop/context/compaction_service.py +60 -0
- langchain_agentx/loop/context/message_utils.py +56 -0
- langchain_agentx/loop/context/pipeline.py +127 -0
- langchain_agentx/loop/context/settings.py +103 -0
- langchain_agentx/loop/context/stages/__init__.py +29 -0
- langchain_agentx/loop/context/stages/autocompact.py +140 -0
- langchain_agentx/loop/context/stages/base.py +32 -0
- langchain_agentx/loop/context/stages/collapse.py +76 -0
- langchain_agentx/loop/context/stages/microcompact.py +76 -0
- langchain_agentx/loop/context/stages/noop.py +33 -0
- langchain_agentx/loop/context/stages/snip.py +71 -0
- langchain_agentx/loop/context/stages/tool_result_budget.py +69 -0
- langchain_agentx/loop/context/types.py +79 -0
- langchain_agentx/loop/exit/__init__.py +1 -0
- langchain_agentx/loop/exit/exit_logic.py +320 -0
- langchain_agentx/loop/exit/reason_codes.py +39 -0
- langchain_agentx/loop/graph/__init__.py +5 -0
- langchain_agentx/loop/graph/builtin_loop_control.py +197 -0
- langchain_agentx/loop/graph/factory.py +1409 -0
- langchain_agentx/loop/graph/graph_edges.py +820 -0
- langchain_agentx/loop/hook/__init__.py +48 -0
- langchain_agentx/loop/hook/async_hook_runner.py +62 -0
- langchain_agentx/loop/hook/config.py +280 -0
- langchain_agentx/loop/hook/engine.py +321 -0
- langchain_agentx/loop/hook/executors/__init__.py +9 -0
- langchain_agentx/loop/hook/executors/agent.py +107 -0
- langchain_agentx/loop/hook/executors/command.py +230 -0
- langchain_agentx/loop/hook/executors/http.py +114 -0
- langchain_agentx/loop/hook/executors/prompt.py +92 -0
- langchain_agentx/loop/hook/graph_wiring.py +134 -0
- langchain_agentx/loop/hook/registry.py +262 -0
- langchain_agentx/loop/hook/trust.py +43 -0
- langchain_agentx/loop/hook/types.py +110 -0
- langchain_agentx/loop/injection/__init__.py +13 -0
- langchain_agentx/loop/injection/dedup.py +74 -0
- langchain_agentx/loop/loop_abort.py +36 -0
- langchain_agentx/loop/model/__init__.py +1 -0
- langchain_agentx/loop/model/model_node.py +648 -0
- langchain_agentx/loop/model/model_nodes.py +661 -0
- langchain_agentx/loop/model/orphan_tool_results.py +38 -0
- langchain_agentx/loop/model/retrier.py +307 -0
- langchain_agentx/loop/model/retry_bridge.py +58 -0
- langchain_agentx/loop/model/retry_events.py +35 -0
- langchain_agentx/loop/model/retry_policy.py +56 -0
- langchain_agentx/loop/model/schema_and_format.py +153 -0
- langchain_agentx/loop/model/tool_and_model_binding.py +227 -0
- langchain_agentx/loop/model/tool_call_degradation_corrector.py +443 -0
- langchain_agentx/loop/model/tool_transcript_guard.py +225 -0
- langchain_agentx/loop/prompt/__init__.py +95 -0
- langchain_agentx/loop/prompt/builder.py +61 -0
- langchain_agentx/loop/prompt/builtin.py +218 -0
- langchain_agentx/loop/prompt/compact.py +408 -0
- langchain_agentx/loop/prompt/sections.py +120 -0
- langchain_agentx/loop/runtime/__init__.py +19 -0
- langchain_agentx/loop/runtime/context.py +34 -0
- langchain_agentx/loop/runtime/context_factory.py +107 -0
- langchain_agentx/loop/runtime/subagent_execution_paths.py +68 -0
- langchain_agentx/loop/subagent/__init__.py +53 -0
- langchain_agentx/loop/subagent/async_runner.py +215 -0
- langchain_agentx/loop/subagent/context.py +209 -0
- langchain_agentx/loop/subagent/fork_worktree_notice.py +25 -0
- langchain_agentx/loop/subagent/graph.py +72 -0
- langchain_agentx/loop/subagent/orchestrator.py +391 -0
- langchain_agentx/loop/subagent/progress.py +30 -0
- langchain_agentx/loop/subagent/prompt.py +52 -0
- langchain_agentx/loop/subagent/runner.py +504 -0
- langchain_agentx/loop/subagent/transcript.py +172 -0
- langchain_agentx/memory/__init__.py +2 -0
- langchain_agentx/memory/instruction/__init__.py +12 -0
- langchain_agentx/memory/instruction/loader.py +325 -0
- langchain_agentx/memory/instruction/resolver.py +24 -0
- langchain_agentx/memory/instruction/runtime.py +83 -0
- langchain_agentx/memory/instruction/sections.py +83 -0
- langchain_agentx/memory/instruction/types.py +59 -0
- langchain_agentx/memory/memdir/__init__.py +77 -0
- langchain_agentx/memory/memdir/age.py +36 -0
- langchain_agentx/memory/memdir/agent_memory.py +380 -0
- langchain_agentx/memory/memdir/extractor.py +309 -0
- langchain_agentx/memory/memdir/loader.py +187 -0
- langchain_agentx/memory/memdir/paths.py +63 -0
- langchain_agentx/memory/memdir/recall.py +45 -0
- langchain_agentx/memory/memdir/runtime.py +43 -0
- langchain_agentx/memory/memdir/scan.py +135 -0
- langchain_agentx/memory/memdir/types.py +104 -0
- langchain_agentx/memory/session/__init__.py +76 -0
- langchain_agentx/memory/session/compact_bridge.py +208 -0
- langchain_agentx/memory/session/prompts.py +172 -0
- langchain_agentx/memory/session/session_memory.py +282 -0
- langchain_agentx/observability/__init__.py +67 -0
- langchain_agentx/observability/evaluation/__init__.py +17 -0
- langchain_agentx/observability/evaluation/checkers/__init__.py +18 -0
- langchain_agentx/observability/evaluation/checkers/base.py +34 -0
- langchain_agentx/observability/evaluation/checkers/compaction.py +38 -0
- langchain_agentx/observability/evaluation/checkers/degradation.py +50 -0
- langchain_agentx/observability/evaluation/checkers/exit_quality.py +42 -0
- langchain_agentx/observability/evaluation/checkers/session_memory.py +45 -0
- langchain_agentx/observability/evaluation/checkers/tool_behavior.py +53 -0
- langchain_agentx/observability/evaluation/retention_scheduler.py +67 -0
- langchain_agentx/observability/evaluation/service.py +102 -0
- langchain_agentx/observability/evaluation/state.py +32 -0
- langchain_agentx/observability/evaluation/store.py +258 -0
- langchain_agentx/observability/events/__init__.py +15 -0
- langchain_agentx/observability/events/langchain_agentx_event_adapter.py +832 -0
- langchain_agentx/observability/logging/__init__.py +15 -0
- langchain_agentx/observability/logging/debug_burst.py +95 -0
- langchain_agentx/observability/logging/logging_config.py +178 -0
- langchain_agentx/observability/logging/logging_contract.py +65 -0
- langchain_agentx/observability/replay/__init__.py +35 -0
- langchain_agentx/observability/replay/cli.py +91 -0
- langchain_agentx/observability/replay/service.py +83 -0
- langchain_agentx/observability/replay/store.py +278 -0
- langchain_agentx/observability/replay/ui.py +47 -0
- langchain_agentx/observability/trace/__init__.py +25 -0
- langchain_agentx/observability/trace/collector.py +560 -0
- langchain_agentx/observability/trace/event_emitter.py +183 -0
- langchain_agentx/observability/trace/hook_event_emitter.py +49 -0
- langchain_agentx/observability/trace/models.py +144 -0
- langchain_agentx/observability/trace/sqlite_store.py +873 -0
- langchain_agentx/observability/trace/trace_callback.py +295 -0
- langchain_agentx/observability/trace/trace_lifecycle_collector.py +114 -0
- langchain_agentx/plugin/__init__.py +26 -0
- langchain_agentx/plugin/builtin.py +53 -0
- langchain_agentx/plugin/config.py +113 -0
- langchain_agentx/plugin/loader.py +386 -0
- langchain_agentx/plugin/manifest.py +154 -0
- langchain_agentx/plugin/registries.py +211 -0
- langchain_agentx/plugin/types.py +142 -0
- langchain_agentx/provider/__init__.py +27 -0
- langchain_agentx/provider/anthropic.py +121 -0
- langchain_agentx/provider/compatible_chat_openai.py +86 -0
- langchain_agentx/provider/env.py +45 -0
- langchain_agentx/provider/model_profile.py +156 -0
- langchain_agentx/provider/openai.py +89 -0
- langchain_agentx/session/__init__.py +17 -0
- langchain_agentx/session/agent_session.py +320 -0
- langchain_agentx/session/conversation_factory.py +87 -0
- langchain_agentx/session/conversation_recovery.py +156 -0
- langchain_agentx/session/conversation_session.py +198 -0
- langchain_agentx/session/factory.py +143 -0
- langchain_agentx/session/protocol.py +25 -0
- langchain_agentx/task_runtime/__init__.py +113 -0
- langchain_agentx/task_runtime/core/__init__.py +51 -0
- langchain_agentx/task_runtime/core/ids.py +33 -0
- langchain_agentx/task_runtime/core/interfaces.py +115 -0
- langchain_agentx/task_runtime/core/notification_priority.py +19 -0
- langchain_agentx/task_runtime/core/types.py +136 -0
- langchain_agentx/task_runtime/integrations/__init__.py +33 -0
- langchain_agentx/task_runtime/integrations/loop_adapter.py +91 -0
- langchain_agentx/task_runtime/integrations/loop_integration.py +61 -0
- langchain_agentx/task_runtime/integrations/prefetch_providers.py +108 -0
- langchain_agentx/task_runtime/integrations/provider_factory.py +103 -0
- langchain_agentx/task_runtime/integrations/queued_command_provider.py +184 -0
- langchain_agentx/task_runtime/integrations/sqlite_queued_command_provider.py +338 -0
- langchain_agentx/task_runtime/integrations/tool_use_summary_provider.py +254 -0
- langchain_agentx/task_runtime/orchestrator/__init__.py +5 -0
- langchain_agentx/task_runtime/orchestrator/runtime.py +386 -0
- langchain_agentx/task_runtime/output/__init__.py +5 -0
- langchain_agentx/task_runtime/output/sink.py +64 -0
- langchain_agentx/task_runtime/policy/__init__.py +11 -0
- langchain_agentx/task_runtime/policy/withhold_visibility.py +32 -0
- langchain_agentx/task_runtime/queue/__init__.py +5 -0
- langchain_agentx/task_runtime/queue/in_memory.py +55 -0
- langchain_agentx/task_runtime/skill_prefetch/__init__.py +4 -0
- langchain_agentx/task_runtime/skill_prefetch/attachments.py +46 -0
- langchain_agentx/task_runtime/skill_prefetch/models.py +37 -0
- langchain_agentx/task_runtime/skill_prefetch/provider.py +344 -0
- langchain_agentx/task_runtime/store/__init__.py +6 -0
- langchain_agentx/task_runtime/store/in_memory.py +81 -0
- langchain_agentx/task_runtime/store/sqlite_store.py +281 -0
- langchain_agentx/task_runtime/tasks/__init__.py +76 -0
- langchain_agentx/task_runtime/tasks/ai_analysis/__init__.py +15 -0
- langchain_agentx/task_runtime/tasks/ai_analysis/base.py +41 -0
- langchain_agentx/task_runtime/tasks/ai_analysis/evaluation.py +67 -0
- langchain_agentx/task_runtime/tasks/ai_analysis/registry.py +36 -0
- langchain_agentx/task_runtime/tasks/ai_analysis/scheduler.py +70 -0
- langchain_agentx/task_runtime/tasks/base/__init__.py +6 -0
- langchain_agentx/task_runtime/tasks/base/contracts.py +24 -0
- langchain_agentx/task_runtime/tasks/custom/__init__.py +7 -0
- langchain_agentx/task_runtime/tasks/custom/executor.py +60 -0
- langchain_agentx/task_runtime/tasks/custom/notification.py +7 -0
- langchain_agentx/task_runtime/tasks/custom/semantics.py +13 -0
- langchain_agentx/task_runtime/tasks/custom/spec.py +33 -0
- langchain_agentx/task_runtime/tasks/dream_task/__init__.py +15 -0
- langchain_agentx/task_runtime/tasks/dream_task/executor.py +61 -0
- langchain_agentx/task_runtime/tasks/dream_task/notification.py +19 -0
- langchain_agentx/task_runtime/tasks/dream_task/semantics.py +13 -0
- langchain_agentx/task_runtime/tasks/dream_task/spec.py +35 -0
- langchain_agentx/task_runtime/tasks/dream_task/state.py +17 -0
- langchain_agentx/task_runtime/tasks/in_process_teammate/__init__.py +12 -0
- langchain_agentx/task_runtime/tasks/in_process_teammate/executor.py +36 -0
- langchain_agentx/task_runtime/tasks/in_process_teammate/notification.py +25 -0
- langchain_agentx/task_runtime/tasks/in_process_teammate/semantics.py +13 -0
- langchain_agentx/task_runtime/tasks/in_process_teammate/spec.py +63 -0
- langchain_agentx/task_runtime/tasks/local_agent/__init__.py +14 -0
- langchain_agentx/task_runtime/tasks/local_agent/executor.py +33 -0
- langchain_agentx/task_runtime/tasks/local_agent/notification.py +21 -0
- langchain_agentx/task_runtime/tasks/local_agent/runner.py +43 -0
- langchain_agentx/task_runtime/tasks/local_agent/semantics.py +13 -0
- langchain_agentx/task_runtime/tasks/local_agent/spec.py +31 -0
- langchain_agentx/task_runtime/tasks/local_bash/__init__.py +13 -0
- langchain_agentx/task_runtime/tasks/local_bash/executor.py +95 -0
- langchain_agentx/task_runtime/tasks/local_bash/notification.py +22 -0
- langchain_agentx/task_runtime/tasks/local_bash/semantics.py +13 -0
- langchain_agentx/task_runtime/tasks/local_bash/spec.py +55 -0
- langchain_agentx/task_runtime/tasks/remote_agent/__init__.py +19 -0
- langchain_agentx/task_runtime/tasks/remote_agent/backend.py +76 -0
- langchain_agentx/task_runtime/tasks/remote_agent/executor.py +37 -0
- langchain_agentx/task_runtime/tasks/remote_agent/notification.py +22 -0
- langchain_agentx/task_runtime/tasks/remote_agent/semantics.py +13 -0
- langchain_agentx/task_runtime/tasks/remote_agent/spec.py +34 -0
- langchain_agentx/task_runtime/tasks/trace_cleanup/__init__.py +19 -0
- langchain_agentx/task_runtime/tasks/trace_cleanup/bootstrap.py +95 -0
- langchain_agentx/task_runtime/tasks/trace_cleanup/executor.py +66 -0
- langchain_agentx/task_runtime/tasks/trace_cleanup/scheduler.py +169 -0
- langchain_agentx/tool_runtime/__init__.py +90 -0
- langchain_agentx/tool_runtime/adapter.py +365 -0
- langchain_agentx/tool_runtime/base.py +319 -0
- langchain_agentx/tool_runtime/errors.py +190 -0
- langchain_agentx/tool_runtime/identical_call_cache.py +110 -0
- langchain_agentx/tool_runtime/loader.py +195 -0
- langchain_agentx/tool_runtime/models.py +260 -0
- langchain_agentx/tool_runtime/permission_context.py +78 -0
- langchain_agentx/tool_runtime/pipeline.py +621 -0
- langchain_agentx/tool_runtime/policy.py +447 -0
- langchain_agentx/tool_runtime/registry.py +81 -0
- langchain_agentx/tool_runtime/resolvers/__init__.py +27 -0
- langchain_agentx/tool_runtime/resolvers/agent_session.py +125 -0
- langchain_agentx/tool_runtime/resolvers/background.py +32 -0
- langchain_agentx/tool_runtime/resolvers/base.py +20 -0
- langchain_agentx/tool_runtime/resolvers/conversation.py +22 -0
- langchain_agentx/tool_runtime/resolvers/workflow.py +73 -0
- langchain_agentx/tool_runtime/session_store.py +132 -0
- langchain_agentx/tool_runtime/smoke_test_runtime.py +294 -0
- langchain_agentx/tool_runtime/state_bridge.py +164 -0
- langchain_agentx/tools/__init__.py +26 -0
- langchain_agentx/tools/agent/__init__.py +9 -0
- langchain_agentx/tools/agent/backend.py +53 -0
- langchain_agentx/tools/agent/built_in/__init__.py +19 -0
- langchain_agentx/tools/agent/built_in/agentx_guide.py +65 -0
- langchain_agentx/tools/agent/built_in/explore.py +80 -0
- langchain_agentx/tools/agent/built_in/general.py +57 -0
- langchain_agentx/tools/agent/built_in/plan.py +89 -0
- langchain_agentx/tools/agent/built_in/statusline_setup.py +64 -0
- langchain_agentx/tools/agent/built_in/verification.py +120 -0
- langchain_agentx/tools/agent/builtin_subagent_loader.py +89 -0
- langchain_agentx/tools/agent/cwd_resolution.py +119 -0
- langchain_agentx/tools/agent/limits.py +26 -0
- langchain_agentx/tools/agent/loader.py +270 -0
- langchain_agentx/tools/agent/models.py +85 -0
- langchain_agentx/tools/agent/prompt.py +120 -0
- langchain_agentx/tools/agent/registry/__init__.py +18 -0
- langchain_agentx/tools/agent/registry/config.py +29 -0
- langchain_agentx/tools/agent/registry/registry.py +47 -0
- langchain_agentx/tools/agent/scope.py +137 -0
- langchain_agentx/tools/agent/tool.py +256 -0
- langchain_agentx/tools/bash/__init__.py +9 -0
- langchain_agentx/tools/bash/ast_security.py +571 -0
- langchain_agentx/tools/bash/backend.py +1447 -0
- langchain_agentx/tools/bash/bash_hardening.py +734 -0
- langchain_agentx/tools/bash/bash_runtime_contract.py +41 -0
- langchain_agentx/tools/bash/cwd_reporter.py +95 -0
- langchain_agentx/tools/bash/limits.py +71 -0
- langchain_agentx/tools/bash/mode_validation.py +282 -0
- langchain_agentx/tools/bash/models.py +131 -0
- langchain_agentx/tools/bash/observability.py +148 -0
- langchain_agentx/tools/bash/output_utils.py +200 -0
- langchain_agentx/tools/bash/path_security.py +2429 -0
- langchain_agentx/tools/bash/prompt.py +68 -0
- langchain_agentx/tools/bash/read_only_validation.py +589 -0
- langchain_agentx/tools/bash/result_presenter.py +324 -0
- langchain_agentx/tools/bash/sandbox_decision.py +133 -0
- langchain_agentx/tools/bash/security.py +311 -0
- langchain_agentx/tools/bash/sed_edit_parser.py +243 -0
- langchain_agentx/tools/bash/sed_validation.py +163 -0
- langchain_agentx/tools/bash/semantics.py +111 -0
- langchain_agentx/tools/bash/session_manager.py +205 -0
- langchain_agentx/tools/bash/session_runtime.py +290 -0
- langchain_agentx/tools/bash/shell_locator.py +191 -0
- langchain_agentx/tools/bash/task_runtime.py +91 -0
- langchain_agentx/tools/bash/tool.py +939 -0
- langchain_agentx/tools/bash/windows_shell_quoting.py +45 -0
- langchain_agentx/tools/glob/__init__.py +9 -0
- langchain_agentx/tools/glob/models.py +57 -0
- langchain_agentx/tools/glob/pagination.py +30 -0
- langchain_agentx/tools/glob/prompt.py +24 -0
- langchain_agentx/tools/glob/rg_list_backend.py +139 -0
- langchain_agentx/tools/glob/rg_pattern.py +44 -0
- langchain_agentx/tools/glob/tool.py +327 -0
- langchain_agentx/tools/grep/__init__.py +7 -0
- langchain_agentx/tools/grep/backend.py +375 -0
- langchain_agentx/tools/grep/models.py +127 -0
- langchain_agentx/tools/grep/prompt.py +30 -0
- langchain_agentx/tools/grep/rg_subprocess_controller.py +114 -0
- langchain_agentx/tools/grep/tool.py +475 -0
- langchain_agentx/tools/read/__init__.py +9 -0
- langchain_agentx/tools/read/backend.py +415 -0
- langchain_agentx/tools/read/limits.py +67 -0
- langchain_agentx/tools/read/models.py +156 -0
- langchain_agentx/tools/read/prompt.py +73 -0
- langchain_agentx/tools/read/tool.py +494 -0
- langchain_agentx/tools/ripgrep_plugin_exclusions.py +137 -0
- langchain_agentx/tools/skill/__init__.py +4 -0
- langchain_agentx/tools/skill/argument_substitution.py +80 -0
- langchain_agentx/tools/skill/loader.py +196 -0
- langchain_agentx/tools/skill/models.py +88 -0
- langchain_agentx/tools/skill/policy.py +80 -0
- langchain_agentx/tools/skill/prompt.py +35 -0
- langchain_agentx/tools/skill/tool.py +222 -0
- langchain_agentx/utils/__init__.py +0 -0
- langchain_agentx/utils/cwd.py +124 -0
- langchain_agentx/utils/host_platform.py +112 -0
- langchain_agentx/utils/path_hierarchy.py +48 -0
- langchain_agentx/utils/path_user_input.py +66 -0
- langchain_agentx/utils/rg_executable.py +18 -0
- langchain_agentx/utils/subprocess_text.py +101 -0
- langchain_agentx/utils/temp_paths.py +77 -0
- langchain_agentx/utils/unc_path.py +25 -0
- langchain_agentx/utils/win_reserved_paths.py +51 -0
- langchain_agentx/workflow/__init__.py +7 -0
- langchain_agentx/workflow/base.py +97 -0
- langchain_agentx/workflow/batch.py +55 -0
- langchain_agentx/workflow/dag.py +54 -0
- langchain_agentx/workspace/__init__.py +13 -0
- langchain_agentx/workspace/config.py +140 -0
- langchain_agentx/workspace/path_key_normalizer.py +30 -0
- langchain_agentx/workspace/resolver.py +74 -0
- langchain_agentx/workspace/validators.py +41 -0
- langchain_agentx_python-0.1.dist-info/LICENSE +201 -0
- langchain_agentx_python-0.1.dist-info/METADATA +513 -0
- langchain_agentx_python-0.1.dist-info/RECORD +354 -0
- langchain_agentx_python-0.1.dist-info/WHEEL +5 -0
- langchain_agentx_python-0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
"""
|
|
2
|
+
tools/read/backend.py — ReadRuntimeTool I/O 后端
|
|
3
|
+
|
|
4
|
+
职责:
|
|
5
|
+
封装文件读取的底层 I/O 操作,与工具 hook 逻辑解耦。
|
|
6
|
+
根据文件扩展名分发到对应的读取方法。
|
|
7
|
+
|
|
8
|
+
FileBackend.read() 是总入口,分发规则:
|
|
9
|
+
.png / .jpg / .jpeg / .gif / .webp → _read_image()
|
|
10
|
+
.pdf → _read_pdf()
|
|
11
|
+
.ipynb → _read_notebook()
|
|
12
|
+
其他 → _read_text()
|
|
13
|
+
|
|
14
|
+
软依赖(懒加载,缺失时给出友好错误):
|
|
15
|
+
Pillow (PIL) — 图片尺寸读取,缺失时 width/height=None
|
|
16
|
+
pdfplumber — PDF 文本提取,缺失时返回 error
|
|
17
|
+
nbformat — Notebook 解析,缺失时返回 error
|
|
18
|
+
|
|
19
|
+
对应 CC:
|
|
20
|
+
_read_image() → readImageWithTokenBudget() + detectImageFormatFromBuffer()
|
|
21
|
+
_read_text() → readFileInRange() + addLineNumbers()
|
|
22
|
+
_read_pdf() → pdfplumber(CC 走原生 base64,我们 v1 走文本提取)
|
|
23
|
+
_read_notebook() → readNotebook();超限错误提示含 Unix jq 与 Windows PowerShell 示例。
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import base64
|
|
29
|
+
import os
|
|
30
|
+
import re
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import TYPE_CHECKING
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from .models import (
|
|
36
|
+
FileUnchangedOutput,
|
|
37
|
+
ImageFileOutput,
|
|
38
|
+
NotebookFileOutput,
|
|
39
|
+
PDFFileOutput,
|
|
40
|
+
ReadToolInput,
|
|
41
|
+
ReadToolOutput,
|
|
42
|
+
TextFileOutput,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
SUPPORTED_IMAGE_EXTS: frozenset[str] = frozenset({"png", "jpg", "jpeg", "gif", "webp"})
|
|
46
|
+
"""支持读取的图片扩展名(对应 CC IMAGE_EXTENSIONS)。"""
|
|
47
|
+
|
|
48
|
+
BINARY_EXTENSIONS: frozenset[str] = frozenset({
|
|
49
|
+
# 可执行文件
|
|
50
|
+
"exe", "dll", "so", "dylib", "bin", "elf",
|
|
51
|
+
# 归档
|
|
52
|
+
"zip", "tar", "gz", "bz2", "xz", "7z", "rar",
|
|
53
|
+
# 编译产物
|
|
54
|
+
"o", "a", "class", "pyc", "pyo", "whl",
|
|
55
|
+
# 媒体(非图片)
|
|
56
|
+
"mp3", "mp4", "avi", "mov", "mkv", "wav", "flac",
|
|
57
|
+
# 文档(非 pdf/ipynb)
|
|
58
|
+
"docx", "xlsx", "pptx", "odt",
|
|
59
|
+
# 数据库
|
|
60
|
+
"db", "sqlite", "sqlite3",
|
|
61
|
+
# 其他
|
|
62
|
+
"iso", "img", "dmg", "pkg",
|
|
63
|
+
})
|
|
64
|
+
"""非文本、非图片、非 PDF 的二进制扩展名,read 工具拒绝处理。"""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# 工具函数
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def detect_image_format(data: bytes) -> str:
|
|
73
|
+
"""
|
|
74
|
+
从文件头 magic bytes 检测图片格式,不信任文件扩展名。
|
|
75
|
+
对应 CC detectImageFormatFromBuffer()。
|
|
76
|
+
"""
|
|
77
|
+
if len(data) >= 8 and data[:8] == b"\x89PNG\r\n\x1a\n":
|
|
78
|
+
return "image/png"
|
|
79
|
+
if len(data) >= 3 and data[:3] == b"\xff\xd8\xff":
|
|
80
|
+
return "image/jpeg"
|
|
81
|
+
if len(data) >= 6 and data[:6] in (b"GIF87a", b"GIF89a"):
|
|
82
|
+
return "image/gif"
|
|
83
|
+
if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
|
|
84
|
+
return "image/webp"
|
|
85
|
+
return "image/jpeg"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def add_line_numbers(content: str, start_line: int = 1) -> str:
|
|
89
|
+
"""
|
|
90
|
+
给文本内容加行号,格式与 CC addLineNumbers() 完全一致:
|
|
91
|
+
' 1\\t<line content>'
|
|
92
|
+
行号右对齐,占 6 位。
|
|
93
|
+
"""
|
|
94
|
+
lines = content.splitlines(keepends=True)
|
|
95
|
+
return "".join(
|
|
96
|
+
f"{start_line + i:6d}\t{line}"
|
|
97
|
+
for i, line in enumerate(lines)
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def parse_page_range(pages_param: str | None, total_pages: int, limits: dict) -> tuple[int, int]:
|
|
102
|
+
"""
|
|
103
|
+
解析 PDF pages 参数,返回 (page_start, page_end)(均 1-indexed)。
|
|
104
|
+
|
|
105
|
+
支持格式:
|
|
106
|
+
"3" → (3, 3)
|
|
107
|
+
"1-5" → (1, 5)
|
|
108
|
+
None → (1, min(total_pages, pdf_max_pages))
|
|
109
|
+
|
|
110
|
+
对应 CC parsePDFPageRange()。
|
|
111
|
+
"""
|
|
112
|
+
max_pages = limits["pdf_max_pages"]
|
|
113
|
+
if pages_param is None:
|
|
114
|
+
return 1, min(total_pages, max_pages)
|
|
115
|
+
|
|
116
|
+
m = re.fullmatch(r"(\d+)(?:-(\d+))?", pages_param.strip())
|
|
117
|
+
if not m:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
f'Invalid pages format: "{pages_param}". '
|
|
120
|
+
f'Use formats like "1-5", "3", or "10-20". Pages are 1-indexed.'
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
start = int(m.group(1))
|
|
124
|
+
end = int(m.group(2)) if m.group(2) else start
|
|
125
|
+
|
|
126
|
+
if start < 1:
|
|
127
|
+
raise ValueError(f"Page number must be >= 1, got {start}.")
|
|
128
|
+
if end > total_pages:
|
|
129
|
+
raise ValueError(
|
|
130
|
+
f"Page {end} exceeds total pages ({total_pages}) in this PDF."
|
|
131
|
+
)
|
|
132
|
+
if start > end:
|
|
133
|
+
raise ValueError(f"Invalid page range: start ({start}) > end ({end}).")
|
|
134
|
+
if end - start + 1 > max_pages:
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f'Page range "{pages_param}" requests {end - start + 1} pages, '
|
|
137
|
+
f"exceeds maximum {max_pages} pages per request."
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return start, end
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# ---------------------------------------------------------------------------
|
|
144
|
+
# FileBackend
|
|
145
|
+
# ---------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class FileBackend:
|
|
149
|
+
"""
|
|
150
|
+
文件读取 I/O 后端,对应 CC imageProcessor.ts 的职责定位。
|
|
151
|
+
|
|
152
|
+
read() 是工具 invoke() 的唯一 I/O 入口,负责根据文件类型分发。
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
def read(
|
|
156
|
+
self,
|
|
157
|
+
path: str,
|
|
158
|
+
inp: "ReadToolInput",
|
|
159
|
+
limits: dict,
|
|
160
|
+
) -> "ReadToolOutput":
|
|
161
|
+
"""
|
|
162
|
+
总入口:根据文件扩展名分发到对应的读取方法。
|
|
163
|
+
path 已在 normalize_input 中经过 realpath 规范化。
|
|
164
|
+
"""
|
|
165
|
+
ext = Path(path).suffix.lower().lstrip(".")
|
|
166
|
+
if ext in SUPPORTED_IMAGE_EXTS:
|
|
167
|
+
return self._read_image(path, limits)
|
|
168
|
+
if ext == "pdf":
|
|
169
|
+
return self._read_pdf(path, inp.pages, limits)
|
|
170
|
+
if ext == "ipynb":
|
|
171
|
+
return self._read_notebook(path, limits)
|
|
172
|
+
return self._read_text(path, inp.offset, inp.limit, limits)
|
|
173
|
+
|
|
174
|
+
# ------------------------------------------------------------------
|
|
175
|
+
# 文本读取
|
|
176
|
+
# ------------------------------------------------------------------
|
|
177
|
+
|
|
178
|
+
def _read_text(
|
|
179
|
+
self,
|
|
180
|
+
path: str,
|
|
181
|
+
offset: int,
|
|
182
|
+
limit: int | None,
|
|
183
|
+
limits: dict,
|
|
184
|
+
) -> "TextFileOutput":
|
|
185
|
+
"""
|
|
186
|
+
读取文本文件,返回带行号内容。
|
|
187
|
+
|
|
188
|
+
对应 CC readFileInRange() + addLineNumbers()。
|
|
189
|
+
|
|
190
|
+
编码策略:
|
|
191
|
+
先尝试 UTF-8,失败后 Latin-1 fallback(errors='replace'),
|
|
192
|
+
确保任何文本文件都能读取,不因编码问题崩溃。
|
|
193
|
+
"""
|
|
194
|
+
from .models import TextFileOutput
|
|
195
|
+
|
|
196
|
+
# 读取全部内容(分页在应用层切片)
|
|
197
|
+
try:
|
|
198
|
+
with open(path, encoding="utf-8") as f:
|
|
199
|
+
raw = f.read()
|
|
200
|
+
except UnicodeDecodeError:
|
|
201
|
+
with open(path, encoding="latin-1", errors="replace") as f:
|
|
202
|
+
raw = f.read()
|
|
203
|
+
|
|
204
|
+
all_lines = raw.splitlines(keepends=True)
|
|
205
|
+
total_lines = len(all_lines)
|
|
206
|
+
|
|
207
|
+
# 空文件
|
|
208
|
+
if total_lines == 0:
|
|
209
|
+
return TextFileOutput(
|
|
210
|
+
file_path=path,
|
|
211
|
+
content=(
|
|
212
|
+
"<system-reminder>Warning: the file exists but the contents are empty."
|
|
213
|
+
"</system-reminder>"
|
|
214
|
+
),
|
|
215
|
+
line_start=0,
|
|
216
|
+
line_end=0,
|
|
217
|
+
total_lines=0,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# offset 超出文件行数(1-indexed:offset=1 表示第1行)
|
|
221
|
+
# offset=0 等价于 offset=1
|
|
222
|
+
effective_offset = max(offset, 1)
|
|
223
|
+
if effective_offset > total_lines:
|
|
224
|
+
return TextFileOutput(
|
|
225
|
+
file_path=path,
|
|
226
|
+
content=(
|
|
227
|
+
f"<system-reminder>Warning: the file exists but is shorter than "
|
|
228
|
+
f"the provided offset ({effective_offset}). "
|
|
229
|
+
f"The file has {total_lines} lines.</system-reminder>"
|
|
230
|
+
),
|
|
231
|
+
line_start=effective_offset,
|
|
232
|
+
line_end=effective_offset,
|
|
233
|
+
total_lines=total_lines,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# 切片:offset 1-indexed → 0-indexed
|
|
237
|
+
start_idx = effective_offset - 1
|
|
238
|
+
max_lines = limits["max_lines"]
|
|
239
|
+
effective_limit = min(limit, max_lines) if limit is not None else max_lines
|
|
240
|
+
end_idx = min(start_idx + effective_limit, total_lines)
|
|
241
|
+
|
|
242
|
+
selected = all_lines[start_idx:end_idx]
|
|
243
|
+
content_raw = "".join(selected)
|
|
244
|
+
|
|
245
|
+
# Token 估算检查(chars / 4 ≈ tokens)
|
|
246
|
+
max_tokens = limits["max_tokens"]
|
|
247
|
+
estimated_tokens = len(content_raw) // 4
|
|
248
|
+
if estimated_tokens > max_tokens:
|
|
249
|
+
raise ValueError(
|
|
250
|
+
f"File content ({estimated_tokens} estimated tokens) exceeds maximum "
|
|
251
|
+
f"allowed tokens ({max_tokens}). "
|
|
252
|
+
f"Use offset and limit parameters to read specific portions of the file."
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
content_with_linenos = add_line_numbers(content_raw, start_line=effective_offset)
|
|
256
|
+
|
|
257
|
+
return TextFileOutput(
|
|
258
|
+
file_path=path,
|
|
259
|
+
content=content_with_linenos,
|
|
260
|
+
line_start=effective_offset,
|
|
261
|
+
line_end=effective_offset + len(selected) - 1,
|
|
262
|
+
total_lines=total_lines,
|
|
263
|
+
truncated=(end_idx < total_lines),
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# ------------------------------------------------------------------
|
|
267
|
+
# 图片读取
|
|
268
|
+
# ------------------------------------------------------------------
|
|
269
|
+
|
|
270
|
+
def _read_image(self, path: str, limits: dict) -> "ImageFileOutput":
|
|
271
|
+
"""
|
|
272
|
+
读取图片文件,返回 base64 编码结果。
|
|
273
|
+
v1:直接 base64,无压缩;v2 补 Pillow resize。
|
|
274
|
+
|
|
275
|
+
对应 CC readImageWithTokenBudget()(v1 不做 token 预算压缩)。
|
|
276
|
+
"""
|
|
277
|
+
from .models import ImageFileOutput
|
|
278
|
+
|
|
279
|
+
size = os.path.getsize(path)
|
|
280
|
+
|
|
281
|
+
if size == 0:
|
|
282
|
+
raise ValueError(f"Image file is empty: {path}")
|
|
283
|
+
|
|
284
|
+
if size > limits["image_max_bytes"]:
|
|
285
|
+
raise ValueError(
|
|
286
|
+
f"Image too large ({size:,} bytes, "
|
|
287
|
+
f"max {limits['image_max_bytes']:,} bytes)."
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
with open(path, "rb") as f:
|
|
291
|
+
data = f.read()
|
|
292
|
+
|
|
293
|
+
media_type = detect_image_format(data)
|
|
294
|
+
|
|
295
|
+
# 尝试获取尺寸(Pillow 软依赖)
|
|
296
|
+
width: int | None = None
|
|
297
|
+
height: int | None = None
|
|
298
|
+
try:
|
|
299
|
+
from PIL import Image # type: ignore[import]
|
|
300
|
+
with Image.open(path) as img:
|
|
301
|
+
width, height = img.size
|
|
302
|
+
except ImportError:
|
|
303
|
+
pass # Pillow 不可用时跳过尺寸信息
|
|
304
|
+
|
|
305
|
+
b64 = base64.b64encode(data).decode("ascii")
|
|
306
|
+
|
|
307
|
+
return ImageFileOutput(
|
|
308
|
+
file_path=path,
|
|
309
|
+
base64=b64,
|
|
310
|
+
media_type=media_type,
|
|
311
|
+
original_size=size,
|
|
312
|
+
width=width,
|
|
313
|
+
height=height,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# ------------------------------------------------------------------
|
|
317
|
+
# PDF 读取
|
|
318
|
+
# ------------------------------------------------------------------
|
|
319
|
+
|
|
320
|
+
def _read_pdf(
|
|
321
|
+
self,
|
|
322
|
+
path: str,
|
|
323
|
+
pages_param: str | None,
|
|
324
|
+
limits: dict,
|
|
325
|
+
) -> "PDFFileOutput":
|
|
326
|
+
"""
|
|
327
|
+
读取 PDF 文件,v1 使用 pdfplumber 提取文本。
|
|
328
|
+
v2:原生 base64 发送(依赖模型多模态支持)或转图片(依赖 pdf2image)。
|
|
329
|
+
|
|
330
|
+
对应 CC extractPDFPages() + parsePDFPageRange()(文本提取路径)。
|
|
331
|
+
"""
|
|
332
|
+
from .models import PDFFileOutput
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
import pdfplumber # type: ignore[import]
|
|
336
|
+
except ImportError as e:
|
|
337
|
+
raise ImportError(
|
|
338
|
+
"PDF reading requires pdfplumber. "
|
|
339
|
+
"Install with: pip install pdfplumber"
|
|
340
|
+
) from e
|
|
341
|
+
|
|
342
|
+
with pdfplumber.open(path) as pdf:
|
|
343
|
+
total_pages = len(pdf.pages)
|
|
344
|
+
|
|
345
|
+
# 超页数阈值且未指定 pages 时,强制要求传 pages 参数
|
|
346
|
+
inline_threshold = limits["pdf_inline_threshold"]
|
|
347
|
+
if total_pages > inline_threshold and pages_param is None:
|
|
348
|
+
raise ValueError(
|
|
349
|
+
f"This PDF has {total_pages} pages, which is too many to read at once. "
|
|
350
|
+
f"Use the pages parameter to read specific page ranges "
|
|
351
|
+
f'(e.g. pages="1-5"). '
|
|
352
|
+
f"Maximum {limits['pdf_max_pages']} pages per request."
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
page_start, page_end = parse_page_range(pages_param, total_pages, limits)
|
|
356
|
+
|
|
357
|
+
texts: list[str] = []
|
|
358
|
+
for page in pdf.pages[page_start - 1 : page_end]:
|
|
359
|
+
text = page.extract_text() or ""
|
|
360
|
+
texts.append(text)
|
|
361
|
+
|
|
362
|
+
content = "\n\n--- Page Break ---\n\n".join(texts)
|
|
363
|
+
|
|
364
|
+
return PDFFileOutput(
|
|
365
|
+
file_path=path,
|
|
366
|
+
content=content,
|
|
367
|
+
page_start=page_start,
|
|
368
|
+
page_end=page_end,
|
|
369
|
+
total_pages=total_pages,
|
|
370
|
+
truncated=(page_end < total_pages),
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# ------------------------------------------------------------------
|
|
374
|
+
# Notebook 读取
|
|
375
|
+
# ------------------------------------------------------------------
|
|
376
|
+
|
|
377
|
+
def _read_notebook(self, path: str, limits: dict) -> "NotebookFileOutput":
|
|
378
|
+
"""
|
|
379
|
+
读取 Jupyter Notebook 文件,返回所有 cells。
|
|
380
|
+
对应 CC readNotebook() → cells。
|
|
381
|
+
"""
|
|
382
|
+
from .models import NotebookFileOutput
|
|
383
|
+
|
|
384
|
+
try:
|
|
385
|
+
import nbformat # type: ignore[import]
|
|
386
|
+
except ImportError as e:
|
|
387
|
+
raise ImportError(
|
|
388
|
+
"Jupyter notebook reading requires nbformat. "
|
|
389
|
+
"Install with: pip install nbformat"
|
|
390
|
+
) from e
|
|
391
|
+
|
|
392
|
+
with open(path, encoding="utf-8") as f:
|
|
393
|
+
nb = nbformat.read(f, as_version=4)
|
|
394
|
+
|
|
395
|
+
# 大小检查(序列化后字节数)
|
|
396
|
+
json_str = nbformat.writes(nb)
|
|
397
|
+
byte_size = len(json_str.encode("utf-8"))
|
|
398
|
+
if byte_size > limits["max_size_bytes"]:
|
|
399
|
+
qpath = repr(path)
|
|
400
|
+
raise ValueError(
|
|
401
|
+
f"Notebook content ({byte_size:,} bytes) exceeds maximum allowed size "
|
|
402
|
+
f"({limits['max_size_bytes']:,} bytes). "
|
|
403
|
+
"Slice the notebook JSON with platform-appropriate tooling, for example:\n"
|
|
404
|
+
f" Unix / Git Bash: jq '.cells[:20]' {qpath}\n"
|
|
405
|
+
f" Windows PowerShell: "
|
|
406
|
+
f"(Get-Content -Raw -LiteralPath {qpath} | ConvertFrom-Json).cells[0..19]\n"
|
|
407
|
+
"Or use the Bash tool with jq (where available) or a short Python one-liner."
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
cells = [dict(cell) for cell in nb.cells]
|
|
411
|
+
|
|
412
|
+
return NotebookFileOutput(
|
|
413
|
+
file_path=path,
|
|
414
|
+
cells=cells,
|
|
415
|
+
)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""
|
|
2
|
+
tools/read/limits.py — ReadRuntimeTool 上限配置
|
|
3
|
+
|
|
4
|
+
职责:
|
|
5
|
+
集中管理 Read 工具所有数值常量和配置读取逻辑。
|
|
6
|
+
全部支持环境变量覆盖,便于测试 mock 和生产调优。
|
|
7
|
+
|
|
8
|
+
两层上限设计(对应 CC limits.ts):
|
|
9
|
+
max_size_bytes — 读取前检查文件总大小(stat),低代价,快速 fail
|
|
10
|
+
max_tokens — 读取后检查输出 token 数,高代价但精确
|
|
11
|
+
v1 用字符数估算(chars / 4 ≈ tokens)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# 文本读取上限
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
DEFAULT_MAX_LINES = int(os.getenv("READ_TOOL_MAX_LINES", "2000"))
|
|
23
|
+
"""单次最多读取行数(对应 CC MAX_LINES_TO_READ=2000)。"""
|
|
24
|
+
|
|
25
|
+
DEFAULT_MAX_SIZE_BYTES = int(os.getenv("READ_TOOL_MAX_SIZE_BYTES", str(256 * 1024)))
|
|
26
|
+
"""文件大小上限(字节),超过此值在 validate_input 时 stat 检查拒绝(对应 CC maxSizeBytes=256KB)。"""
|
|
27
|
+
|
|
28
|
+
DEFAULT_MAX_TOKENS = int(os.getenv("READ_TOOL_MAX_TOKENS", "25000"))
|
|
29
|
+
"""输出 token 数上限(chars / 4 估算),超过时报 MaxFileReadTokenExceededError(对应 CC maxTokens=25000)。"""
|
|
30
|
+
|
|
31
|
+
DEFAULT_MAX_RESULT_CHARS = DEFAULT_MAX_TOKENS * 4
|
|
32
|
+
"""输出字符上限,约等于 max_tokens 换算。"""
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# PDF 相关上限
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
PDF_MAX_PAGES_PER_READ = int(os.getenv("READ_TOOL_PDF_MAX_PAGES", "20"))
|
|
39
|
+
"""单次最多读取 PDF 页数(对应 CC PDF_MAX_PAGES_PER_READ=20)。"""
|
|
40
|
+
|
|
41
|
+
PDF_AT_MENTION_INLINE_THRESHOLD = 10
|
|
42
|
+
"""超过此页数时必须提供 pages 参数(对应 CC PDF_AT_MENTION_INLINE_THRESHOLD=10)。
|
|
43
|
+
不走环境变量,是产品设计决策。"""
|
|
44
|
+
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
# 图片相关上限
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
|
|
49
|
+
IMAGE_MAX_SIZE_BYTES = int(os.getenv("READ_TOOL_IMAGE_MAX_BYTES", str(20 * 1024 * 1024)))
|
|
50
|
+
"""图片文件大小上限(字节,默认 20MB)。图片有独立上限,不受 max_size_bytes 约束。"""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_read_limits() -> dict:
|
|
54
|
+
"""
|
|
55
|
+
返回当前有效配置(支持运行时环境变量覆盖)。
|
|
56
|
+
|
|
57
|
+
对应 CC limits.ts 的 getDefaultFileReadingLimits()。
|
|
58
|
+
"""
|
|
59
|
+
return {
|
|
60
|
+
"max_lines": DEFAULT_MAX_LINES,
|
|
61
|
+
"max_size_bytes": DEFAULT_MAX_SIZE_BYTES,
|
|
62
|
+
"max_tokens": DEFAULT_MAX_TOKENS,
|
|
63
|
+
"max_result_chars": DEFAULT_MAX_RESULT_CHARS,
|
|
64
|
+
"pdf_max_pages": PDF_MAX_PAGES_PER_READ,
|
|
65
|
+
"pdf_inline_threshold": PDF_AT_MENTION_INLINE_THRESHOLD,
|
|
66
|
+
"image_max_bytes": IMAGE_MAX_SIZE_BYTES,
|
|
67
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""
|
|
2
|
+
tools/read/models.py — ReadRuntimeTool 输入输出数据模型
|
|
3
|
+
|
|
4
|
+
职责:
|
|
5
|
+
定义 Read 工具的业务级 Input / Output Pydantic 模型。
|
|
6
|
+
不引入框架级模型(tool_runtime/models.py);不含 I/O 操作。
|
|
7
|
+
|
|
8
|
+
输出模型采用 discriminated union(对应 CC outputSchema 的 z.discriminatedUnion):
|
|
9
|
+
- TextFileOutput 文本文件读取结果
|
|
10
|
+
- ImageFileOutput 图片文件读取结果(base64 编码)
|
|
11
|
+
- PDFFileOutput PDF 文件读取结果(v1 文本提取路径)
|
|
12
|
+
- NotebookFileOutput Jupyter Notebook 读取结果
|
|
13
|
+
- FileUnchangedOutput 重复读取 dedup stub(对应 CC file_unchanged)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import Annotated, Literal
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, Field
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# 输入模型
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ReadToolInput(BaseModel):
|
|
29
|
+
"""
|
|
30
|
+
ReadRuntimeTool 输入参数(对应 CC inputSchema)。
|
|
31
|
+
|
|
32
|
+
字段说明:
|
|
33
|
+
file_path — 文件绝对路径(必须)
|
|
34
|
+
offset — 起始行(1-indexed;0 表示从文件头开始)
|
|
35
|
+
limit — 最多读取行数(None 表示读到文件末尾或 max_lines 上限)
|
|
36
|
+
pages — PDF 专用,页范围如 "1-5"、"3"、"10-20";非 PDF 文件传此参数报错
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
file_path: str = Field(
|
|
40
|
+
...,
|
|
41
|
+
description="The absolute path to the file to read",
|
|
42
|
+
)
|
|
43
|
+
offset: int = Field(
|
|
44
|
+
0,
|
|
45
|
+
ge=0,
|
|
46
|
+
description=(
|
|
47
|
+
"The line number to start reading from (1-indexed). "
|
|
48
|
+
"0 means start from the beginning of the file. "
|
|
49
|
+
"Only provide if the file is too large to read at once."
|
|
50
|
+
),
|
|
51
|
+
)
|
|
52
|
+
limit: int | None = Field(
|
|
53
|
+
None,
|
|
54
|
+
gt=0,
|
|
55
|
+
description=(
|
|
56
|
+
"The number of lines to read. "
|
|
57
|
+
"Only provide if the file is too large to read at once."
|
|
58
|
+
),
|
|
59
|
+
)
|
|
60
|
+
pages: str | None = Field(
|
|
61
|
+
None,
|
|
62
|
+
description=(
|
|
63
|
+
'Page range for PDF files (e.g. "1-5", "3", "10-20"). '
|
|
64
|
+
"Only applicable to PDF files. Maximum 20 pages per request."
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# 输出模型(discriminated union by type)
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class TextFileOutput(BaseModel):
|
|
75
|
+
"""文本文件读取结果(含行号格式)。"""
|
|
76
|
+
|
|
77
|
+
type: Literal["text"] = "text"
|
|
78
|
+
file_path: str
|
|
79
|
+
content: str
|
|
80
|
+
"""带行号的文本内容,格式:' N\\t<line content>'(对应 CC addLineNumbers())。"""
|
|
81
|
+
line_start: int
|
|
82
|
+
"""实际读取起始行(1-indexed)。"""
|
|
83
|
+
line_end: int
|
|
84
|
+
"""实际读取结束行(1-indexed)。"""
|
|
85
|
+
total_lines: int
|
|
86
|
+
"""文件总行数。"""
|
|
87
|
+
truncated: bool = False
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ImageFileOutput(BaseModel):
|
|
91
|
+
"""图片文件读取结果(base64 编码,对应 CC type='image')。"""
|
|
92
|
+
|
|
93
|
+
type: Literal["image"] = "image"
|
|
94
|
+
file_path: str
|
|
95
|
+
base64: str
|
|
96
|
+
"""base64 编码的图片数据(ASCII 字符串)。"""
|
|
97
|
+
media_type: str
|
|
98
|
+
"""MIME 类型,如 'image/png'、'image/jpeg'(从 magic bytes 检测)。"""
|
|
99
|
+
original_size: int
|
|
100
|
+
"""原始文件大小(字节)。"""
|
|
101
|
+
width: int | None = None
|
|
102
|
+
"""图片宽度(像素,Pillow 可用时填写)。"""
|
|
103
|
+
height: int | None = None
|
|
104
|
+
"""图片高度(像素,Pillow 可用时填写)。"""
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class PDFFileOutput(BaseModel):
|
|
108
|
+
"""PDF 文件读取结果(v1:文本提取路径)。"""
|
|
109
|
+
|
|
110
|
+
type: Literal["pdf"] = "pdf"
|
|
111
|
+
file_path: str
|
|
112
|
+
content: str
|
|
113
|
+
"""提取的文本内容,页间以 '\\n\\n--- Page Break ---\\n\\n' 分隔。"""
|
|
114
|
+
page_start: int
|
|
115
|
+
"""实际读取起始页(1-indexed)。"""
|
|
116
|
+
page_end: int
|
|
117
|
+
"""实际读取结束页(1-indexed)。"""
|
|
118
|
+
total_pages: int
|
|
119
|
+
"""PDF 总页数。"""
|
|
120
|
+
truncated: bool = False
|
|
121
|
+
"""是否因 pdf_max_pages 限制而未读完所有页。"""
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class NotebookFileOutput(BaseModel):
|
|
125
|
+
"""Jupyter Notebook 读取结果(对应 CC type='notebook')。"""
|
|
126
|
+
|
|
127
|
+
type: Literal["notebook"] = "notebook"
|
|
128
|
+
file_path: str
|
|
129
|
+
cells: list[dict]
|
|
130
|
+
"""nbformat cells 列表(每个 cell 为 dict,含 cell_type / source / outputs 等字段)。"""
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class FileUnchangedOutput(BaseModel):
|
|
134
|
+
"""
|
|
135
|
+
重复读取 dedup stub(对应 CC type='file_unchanged')。
|
|
136
|
+
|
|
137
|
+
当同一文件、同 offset/limit、mtime 未变时返回。
|
|
138
|
+
model 收到此结果应引用历史 Read 结果,而非重新读取。
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
type: Literal["file_unchanged"] = "file_unchanged"
|
|
142
|
+
file_path: str
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ---------------------------------------------------------------------------
|
|
146
|
+
# Union 类型别名
|
|
147
|
+
# ---------------------------------------------------------------------------
|
|
148
|
+
|
|
149
|
+
ReadToolOutput = Annotated[
|
|
150
|
+
TextFileOutput | ImageFileOutput | PDFFileOutput | NotebookFileOutput | FileUnchangedOutput,
|
|
151
|
+
Field(discriminator="type"),
|
|
152
|
+
]
|
|
153
|
+
"""
|
|
154
|
+
Read 工具输出的 discriminated union。
|
|
155
|
+
对应 CC outputSchema 的 z.discriminatedUnion('type', [...])。
|
|
156
|
+
"""
|