ummaya 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -2
- package/bin/ummaya +10 -1
- package/bun.lock +180 -244
- package/npm-shrinkwrap.json +760 -1760
- package/package.json +39 -22
- package/prompts/manifest.yaml +1 -1
- package/prompts/system_v1.md +1 -0
- package/pyproject.toml +27 -2
- package/specs/2803-document-production-hardening/contracts/document-tools.schema.json +1043 -0
- package/src/ummaya/_canonical/__init__.py +2 -0
- package/src/ummaya/_canonical/baselines.yaml +113 -0
- package/src/ummaya/engine/engine.py +29 -132
- package/src/ummaya/evidence/__init__.py +21 -2
- package/src/ummaya/evidence/dataset_contract.py +193 -0
- package/src/ummaya/evidence/document_authoring_cases.py +33 -0
- package/src/ummaya/evidence/document_harness.py +313 -0
- package/src/ummaya/evidence/document_viewer_ux.py +391 -0
- package/src/ummaya/evidence/gates.py +70 -0
- package/src/ummaya/evidence/json_types.py +20 -0
- package/src/ummaya/evidence/models.py +88 -1
- package/src/ummaya/evidence/output_payload.py +89 -0
- package/src/ummaya/evidence/payload_documents.py +233 -0
- package/src/ummaya/evidence/route_contracts.py +224 -0
- package/src/ummaya/evidence/route_helpers.py +150 -0
- package/src/ummaya/evidence/runner.py +81 -212
- package/src/ummaya/evidence/source_provenance.py +246 -0
- package/src/ummaya/evidence/source_provenance_redaction.py +176 -0
- package/src/ummaya/evidence/tool_layer.py +39 -0
- package/src/ummaya/evidence/tool_layer_models.py +151 -0
- package/src/ummaya/ipc/adapter_manifest_emitter.py +26 -10
- package/src/ummaya/ipc/document_intent_normalization.py +185 -0
- package/src/ummaya/ipc/frame_schema.py +5 -5
- package/src/ummaya/ipc/route_diagnostics.py +73 -0
- package/src/ummaya/ipc/stdio.py +1109 -477
- package/src/ummaya/llm/client.py +102 -3
- package/src/ummaya/llm/config.py +8 -3
- package/src/ummaya/primitives/__init__.py +6 -2
- package/src/ummaya/primitives/delegation.py +1 -1
- package/src/ummaya/primitives/document.py +28 -0
- package/src/ummaya/settings.py +0 -3
- package/src/ummaya/tools/discovery_bridge.py +17 -1
- package/src/ummaya/tools/documents/__init__.py +297 -0
- package/src/ummaya/tools/documents/adapter_registry.py +487 -0
- package/src/ummaya/tools/documents/archive_container_probe.py +167 -0
- package/src/ummaya/tools/documents/artifact_store.py +454 -0
- package/src/ummaya/tools/documents/authoring.py +283 -0
- package/src/ummaya/tools/documents/baselines.py +132 -0
- package/src/ummaya/tools/documents/capability.py +331 -0
- package/src/ummaya/tools/documents/contracts.py +112 -0
- package/src/ummaya/tools/documents/conversion.py +521 -0
- package/src/ummaya/tools/documents/diff.py +275 -0
- package/src/ummaya/tools/documents/engines.py +163 -0
- package/src/ummaya/tools/documents/evaluation.py +291 -0
- package/src/ummaya/tools/documents/explicit_values.py +108 -0
- package/src/ummaya/tools/documents/fixtures.py +174 -0
- package/src/ummaya/tools/documents/format_completion_audit.py +471 -0
- package/src/ummaya/tools/documents/formats/__init__.py +2 -0
- package/src/ummaya/tools/documents/formats/archive.py +528 -0
- package/src/ummaya/tools/documents/formats/base.py +41 -0
- package/src/ummaya/tools/documents/formats/code_file.py +211 -0
- package/src/ummaya/tools/documents/formats/data_file.py +272 -0
- package/src/ummaya/tools/documents/formats/hwp.py +284 -0
- package/src/ummaya/tools/documents/formats/hwpx.py +1837 -0
- package/src/ummaya/tools/documents/formats/odf.py +435 -0
- package/src/ummaya/tools/documents/formats/ooxml.py +1030 -0
- package/src/ummaya/tools/documents/formats/passive.py +766 -0
- package/src/ummaya/tools/documents/formats/pdf.py +702 -0
- package/src/ummaya/tools/documents/formats/text_web.py +268 -0
- package/src/ummaya/tools/documents/hwp_conversion_probe.py +178 -0
- package/src/ummaya/tools/documents/hwp_direct_candidate.py +141 -0
- package/src/ummaya/tools/documents/inspection.py +289 -0
- package/src/ummaya/tools/documents/intake.py +1079 -0
- package/src/ummaya/tools/documents/legacy_office_promotion_probe.py +366 -0
- package/src/ummaya/tools/documents/models.py +1598 -0
- package/src/ummaya/tools/documents/odf_promotion_probe.py +167 -0
- package/src/ummaya/tools/documents/orchestrator.py +96 -0
- package/src/ummaya/tools/documents/passive_capability_probe.py +251 -0
- package/src/ummaya/tools/documents/patch.py +170 -0
- package/src/ummaya/tools/documents/pdfa_conformance.py +284 -0
- package/src/ummaya/tools/documents/pdfa_promotion_probe.py +198 -0
- package/src/ummaya/tools/documents/permissions.py +110 -0
- package/src/ummaya/tools/documents/planner.py +616 -0
- package/src/ummaya/tools/documents/registry.py +2733 -0
- package/src/ummaya/tools/documents/render.py +978 -0
- package/src/ummaya/tools/documents/render_comparison.py +113 -0
- package/src/ummaya/tools/documents/render_comparison_models.py +74 -0
- package/src/ummaya/tools/documents/render_comparison_regions.py +73 -0
- package/src/ummaya/tools/documents/render_comparison_style.py +161 -0
- package/src/ummaya/tools/documents/reread.py +157 -0
- package/src/ummaya/tools/documents/runtime_authoring.py +244 -0
- package/src/ummaya/tools/documents/runtime_authoring_bundle.py +76 -0
- package/src/ummaya/tools/documents/scorecard.py +184 -0
- package/src/ummaya/tools/documents/socratic_planner.py +193 -0
- package/src/ummaya/tools/documents/style.py +48 -0
- package/src/ummaya/tools/documents/tool_defs.py +523 -0
- package/src/ummaya/tools/documents/validate.py +347 -0
- package/src/ummaya/tools/executor.py +29 -0
- package/src/ummaya/tools/live_proxy.py +0 -3
- package/src/ummaya/tools/models.py +5 -1
- package/src/ummaya/tools/register_all.py +8 -0
- package/src/ummaya/tools/registry.py +10 -1
- package/src/ummaya/tools/routing/__init__.py +59 -0
- package/src/ummaya/tools/routing/builder.py +105 -0
- package/src/ummaya/tools/routing/cards.py +29 -0
- package/src/ummaya/tools/routing/decision_service.py +534 -0
- package/src/ummaya/tools/routing/decision_types.py +74 -0
- package/src/ummaya/tools/routing/feasibility.py +122 -0
- package/src/ummaya/tools/routing/intent.py +17 -0
- package/src/ummaya/tools/routing/intent_extractor.py +207 -0
- package/src/ummaya/tools/routing/intent_patterns.py +160 -0
- package/src/ummaya/tools/routing/intent_public_data.py +150 -0
- package/src/ummaya/tools/routing/intent_types.py +48 -0
- package/src/ummaya/tools/routing/lint.py +78 -0
- package/src/ummaya/tools/routing/metadata.py +174 -0
- package/src/ummaya/tools/routing/projection.py +340 -0
- package/src/ummaya/tools/routing/retrieval_policy.py +629 -0
- package/src/ummaya/tools/routing/schema.py +81 -0
- package/src/ummaya/tools/routing/types.py +96 -0
- package/src/ummaya/tools/routing_index.py +2 -2
- package/src/ummaya/tools/search.py +34 -746
- package/tests/fixtures/documents/public_forms/baselines.yaml +113 -0
- package/tui/bun.lock +126 -305
- package/tui/package.json +35 -22
- package/tui/src/.cc-byte-identical-whitelist.yaml +266 -0
- package/tui/src/QueryEngine.ts +12 -8
- package/tui/src/bridge/inboundAttachments.ts +3 -3
- package/tui/src/cli/handlers/auth.ts +3 -12
- package/tui/src/cli/handlers/mcp.tsx +0 -1
- package/tui/src/cli/print.ts +8 -9
- package/tui/src/commands/insights.ts +1 -1
- package/tui/src/commands/install-github-app/types.ts +8 -30
- package/tui/src/commands/plugin/types.ts +6 -28
- package/tui/src/commands/plugin/unifiedTypes.ts +4 -26
- package/tui/src/commands/rename/generateSessionName.ts +1 -1
- package/tui/src/components/Feedback.tsx +1 -1
- package/tui/src/components/LogoV2/EmergencyTip.tsx +11 -2
- package/tui/src/components/LogoV2/WelcomeV2.tsx +1 -3
- package/tui/src/components/ScrollKeybindingHandler.tsx +6 -6
- package/tui/src/components/Spinner/types.ts +6 -28
- package/tui/src/components/agents/generateAgent.ts +1 -1
- package/tui/src/components/agents/new-agent-creation/types.ts +4 -26
- package/tui/src/components/config/EnvSecretIsolatedEditor.tsx +1 -1
- package/tui/src/components/mcp/types.ts +16 -38
- package/tui/src/components/messages/AssistantToolUseMessage.tsx +3 -2
- package/tui/src/components/messages/UserCrossSessionMessage.ts +16 -4
- package/tui/src/components/messages/UserForkBoilerplateMessage.ts +16 -4
- package/tui/src/components/messages/UserGitHubWebhookMessage.ts +16 -4
- package/tui/src/components/messages/UserToolResultMessage/utils.tsx +3 -2
- package/tui/src/components/permissions/MonitorPermissionRequest/MonitorPermissionRequest.ts +9 -4
- package/tui/src/components/permissions/ReviewArtifactPermissionRequest/ReviewArtifactPermissionRequest.ts +9 -4
- package/tui/src/components/primitive/DocumentSocraticReviewBlock.tsx +129 -0
- package/tui/src/components/primitive/DocumentToolResultCard.tsx +224 -0
- package/tui/src/components/primitive/documentSocraticReview.ts +215 -0
- package/tui/src/components/primitive/index.tsx +43 -1
- package/tui/src/components/primitive/types.ts +137 -0
- package/tui/src/components/ui/option.ts +4 -26
- package/tui/src/constants/common.ts +0 -2
- package/tui/src/constants/prompts.ts +4 -3
- package/tui/src/constants/querySource.ts +4 -26
- package/tui/src/entrypoints/sdk/controlTypes.ts +26 -48
- package/tui/src/entrypoints/sdk/coreTypes.generated.ts +3 -25
- package/tui/src/entrypoints/sdk/runtimeTypes.ts +38 -60
- package/tui/src/entrypoints/sdk/sdkUtilityTypes.ts +4 -26
- package/tui/src/entrypoints/sdk/settingsTypes.generated.ts +3 -25
- package/tui/src/entrypoints/sdk/toolTypes.ts +3 -25
- package/tui/src/hooks/toolPermission/handlers/interactiveHandler.ts +10 -0
- package/tui/src/hooks/useApiKeyVerification.ts +1 -1
- package/tui/src/hooks/useVirtualScroll.ts +1 -1
- package/tui/src/ink/ink.tsx +33 -14
- package/tui/src/ink/reconciler.ts +2 -3
- package/tui/src/ink/render-to-screen.ts +30 -10
- package/tui/src/ipc/bridge.ts +62 -15
- package/tui/src/ipc/bridgeSingleton.ts +5 -1
- package/tui/src/ipc/codec.ts +3 -3
- package/tui/src/ipc/frames.generated.ts +12 -12
- package/tui/src/ipc/llmClient.ts +151 -27
- package/tui/src/ipc/schema/frame.schema.json +1 -1
- package/tui/src/keybindings/defaultBindings.ts +4 -0
- package/tui/src/main.tsx +32 -15
- package/tui/src/native-ts/file-index/index.ts +33 -3
- package/tui/src/observability/surface.ts +2 -2
- package/tui/src/probes/toolRegistryProbe.tsx +3 -1
- package/tui/src/projectOnboardingState.ts +7 -6
- package/tui/src/query/chatMessageTypes.ts +18 -0
- package/tui/src/query/chatMessagesBuilder.ts +1 -1
- package/tui/src/query/deps.ts +1 -1
- package/tui/src/query/messageGuards.ts +106 -0
- package/tui/src/query/publicDataTerminalRepair.ts +384 -0
- package/tui/src/query/run.ts +1075 -0
- package/tui/src/query/supportBoundary.ts +168 -0
- package/tui/src/query/toolResultErrors.ts +103 -0
- package/tui/src/query/toolRunner.ts +687 -0
- package/tui/src/query/unavailableToolRepair.ts +118 -0
- package/tui/src/query.ts +9 -2186
- package/tui/src/screens/REPL.tsx +40 -29
- package/tui/src/services/api/adapterManifest.ts +4 -0
- package/tui/src/services/api/backendChat/events.ts +117 -0
- package/tui/src/services/api/backendChat/finalMessage.ts +40 -0
- package/tui/src/services/api/backendChat/frame.ts +9 -0
- package/tui/src/services/api/backendChat/streaming.ts +430 -0
- package/tui/src/services/api/backendChat/types.ts +62 -0
- package/tui/src/services/api/backendChat.ts +1 -0
- package/tui/src/services/api/client.ts +65 -2
- package/tui/src/services/api/errorUtils.ts +5 -5
- package/tui/src/services/api/errors.ts +1 -1
- package/tui/src/services/api/logging.ts +1 -1
- package/tui/src/services/api/ummaya/evidence.ts +194 -0
- package/tui/src/services/api/ummaya/messages.ts +255 -0
- package/tui/src/services/api/ummaya/nonStreaming.ts +66 -0
- package/tui/src/services/api/ummaya/provider.ts +200 -0
- package/tui/src/services/api/ummaya/reasoning.ts +24 -0
- package/tui/src/services/api/ummaya/request.ts +200 -0
- package/tui/src/services/api/ummaya/selectionContext.ts +240 -0
- package/tui/src/services/api/ummaya/streaming.ts +365 -0
- package/tui/src/services/api/ummaya/streamingPayload.ts +129 -0
- package/tui/src/services/api/ummaya/streamingReader.ts +40 -0
- package/tui/src/services/api/ummaya/toolSelection.ts +217 -0
- package/tui/src/services/api/ummaya/types.ts +110 -0
- package/tui/src/services/api/ummaya/usage.ts +30 -0
- package/tui/src/services/api/ummaya.ts +26 -418
- package/tui/src/services/api/withRetry.ts +1 -1
- package/tui/src/services/awaySummary.ts +2 -2
- package/tui/src/services/claudeAiLimits.ts +1 -1
- package/tui/src/services/compact/autoCompact.ts +1 -1
- package/tui/src/services/compact/compact.ts +1 -1
- package/tui/src/services/lsp/types.ts +8 -30
- package/tui/src/services/tips/types.ts +6 -28
- package/tui/src/services/tokenEstimation.ts +1 -1
- package/tui/src/services/toolRegistry/bootGuard.ts +5 -5
- package/tui/src/services/toolUseSummary/toolUseSummaryGenerator.ts +1 -1
- package/tui/src/services/tools/toolExecution.ts +94 -1
- package/tui/src/store/pendingPermissionSlot.ts +1 -1
- package/tui/src/store/session-store.ts +10 -36
- package/tui/src/stubs/any-stub.ts +15 -10
- package/tui/src/stubs/color-diff-napi.ts +37 -23
- package/tui/src/stubs/globals.d.ts +3 -3
- package/tui/src/stubs/macro-preload.ts +23 -12
- package/tui/src/tools/AdapterTool/AdapterTool.ts +1207 -714
- package/tui/src/tools/AdapterTool/routeDiagnostics.ts +75 -0
- package/tui/src/tools/AgentTool/AgentTool.tsx +84 -1371
- package/tui/src/tools/AgentTool/agentToolHandoff.ts +114 -0
- package/tui/src/tools/AgentTool/agentToolPartialResult.ts +16 -0
- package/tui/src/tools/AgentTool/agentToolProgress.ts +32 -0
- package/tui/src/tools/AgentTool/agentToolResolver.ts +161 -0
- package/tui/src/tools/AgentTool/agentToolResult.ts +163 -0
- package/tui/src/tools/AgentTool/agentToolUtils.ts +14 -686
- package/tui/src/tools/AgentTool/asyncAgentLifecycle.ts +208 -0
- package/tui/src/tools/AgentTool/asyncLifecycle.ts +153 -0
- package/tui/src/tools/AgentTool/backgroundedCompletion.ts +126 -0
- package/tui/src/tools/AgentTool/backgroundedLifecycle.ts +174 -0
- package/tui/src/tools/AgentTool/foregroundBackground.ts +83 -0
- package/tui/src/tools/AgentTool/foregroundDrain.tsx +133 -0
- package/tui/src/tools/AgentTool/foregroundFinalize.ts +98 -0
- package/tui/src/tools/AgentTool/foregroundLifecycle.tsx +237 -0
- package/tui/src/tools/AgentTool/foregroundProgress.tsx +169 -0
- package/tui/src/tools/AgentTool/foregroundTask.ts +89 -0
- package/tui/src/tools/AgentTool/forkSubagent.ts +1 -12
- package/tui/src/tools/AgentTool/forkSubagentGate.ts +34 -0
- package/tui/src/tools/AgentTool/launchRouting.ts +203 -0
- package/tui/src/tools/AgentTool/lifecycle.ts +244 -0
- package/tui/src/tools/AgentTool/mcpRouting.ts +73 -0
- package/tui/src/tools/AgentTool/orchestrationSupport.ts +70 -0
- package/tui/src/tools/AgentTool/permissions.ts +39 -0
- package/tui/src/tools/AgentTool/promptSetup.ts +181 -0
- package/tui/src/tools/AgentTool/remoteRouting.ts +62 -0
- package/tui/src/tools/AgentTool/resultMapping.ts +116 -0
- package/tui/src/tools/AgentTool/resumeAgent.ts +39 -107
- package/tui/src/tools/AgentTool/resumeAgentHelpers.ts +140 -0
- package/tui/src/tools/AgentTool/runAgent.ts +1 -1
- package/tui/src/tools/AgentTool/runtimeConfig.ts +57 -0
- package/tui/src/tools/AgentTool/schemas.ts +196 -0
- package/tui/src/tools/AgentTool/sourceVerificationPropagation.ts +263 -0
- package/tui/src/tools/AgentTool/worktreeLifecycle.ts +105 -0
- package/tui/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx +174 -202
- package/tui/src/tools/BashTool/BashTool.tsx +71 -1072
- package/tui/src/tools/BashTool/bashCommandHelpers.ts +12 -12
- package/tui/src/tools/BashTool/bashPermissions/astPreflight.ts +173 -0
- package/tui/src/tools/BashTool/bashPermissions/classifierChecks.ts +199 -0
- package/tui/src/tools/BashTool/bashPermissions/compoundGuards.ts +53 -0
- package/tui/src/tools/BashTool/bashPermissions/constants.ts +99 -0
- package/tui/src/tools/BashTool/bashPermissions/index.ts +38 -0
- package/tui/src/tools/BashTool/bashPermissions/legacyMisparsing.ts +62 -0
- package/tui/src/tools/BashTool/bashPermissions/main.ts +135 -0
- package/tui/src/tools/BashTool/bashPermissions/normalizedCommands.ts +33 -0
- package/tui/src/tools/BashTool/bashPermissions/operatorFlow.ts +98 -0
- package/tui/src/tools/BashTool/bashPermissions/permissionChecks.ts +200 -0
- package/tui/src/tools/BashTool/bashPermissions/prefixSuggestions.ts +88 -0
- package/tui/src/tools/BashTool/bashPermissions/promptClassifierRules.ts +125 -0
- package/tui/src/tools/BashTool/bashPermissions/ruleDelegates.ts +19 -0
- package/tui/src/tools/BashTool/bashPermissions/ruleMatching.ts +145 -0
- package/tui/src/tools/BashTool/bashPermissions/sandboxAutoAllow.ts +75 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandFlow.ts +205 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandGuards.ts +73 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandResultHelpers.ts +116 -0
- package/tui/src/tools/BashTool/bashPermissions/types.ts +26 -0
- package/tui/src/tools/BashTool/bashPermissions/wrapperStripping.ts +139 -0
- package/tui/src/tools/BashTool/bashPermissions.ts +26 -2621
- package/tui/src/tools/BashTool/call.ts +202 -0
- package/tui/src/tools/BashTool/callLoader.ts +35 -0
- package/tui/src/tools/BashTool/commandClassification.ts +151 -0
- package/tui/src/tools/BashTool/commandClassificationLoader.ts +40 -0
- package/tui/src/tools/BashTool/cwdReset.ts +33 -0
- package/tui/src/tools/BashTool/lineTruncation.ts +11 -0
- package/tui/src/tools/BashTool/modeValidation.ts +13 -1
- package/tui/src/tools/BashTool/outputPersistence.ts +42 -0
- package/tui/src/tools/BashTool/permissionClassification.ts +66 -0
- package/tui/src/tools/BashTool/permissionLoader.ts +44 -0
- package/tui/src/tools/BashTool/resultLoader.ts +29 -0
- package/tui/src/tools/BashTool/resultMapping.ts +83 -0
- package/tui/src/tools/BashTool/sandboxPolicy.ts +79 -0
- package/tui/src/tools/BashTool/schemas.ts +65 -0
- package/tui/src/tools/BashTool/sedEditExecution.ts +59 -0
- package/tui/src/tools/BashTool/shellExecution.tsx +245 -0
- package/tui/src/tools/BashTool/shellOutputUtils.ts +85 -0
- package/tui/src/tools/BashTool/shellPermissionGauntlet.ts +97 -0
- package/tui/src/tools/BashTool/uiLoader.ts +37 -0
- package/tui/src/tools/BriefTool/upload.ts +1 -1
- package/tui/src/tools/CalculatorTool/parser.ts +2 -2
- package/tui/src/tools/DocumentPrimitive/DocumentPrimitive.ts +262 -0
- package/tui/src/tools/DocumentPrimitive/dispatchNormalization.ts +270 -0
- package/tui/src/tools/DocumentPrimitive/documentDestinationPath.ts +18 -0
- package/tui/src/tools/DocumentPrimitive/documentMutationGuard.ts +22 -0
- package/tui/src/tools/DocumentPrimitive/documentPatchNormalization.ts +248 -0
- package/tui/src/tools/DocumentPrimitive/documentSourceVerification.ts +245 -0
- package/tui/src/tools/DocumentPrimitive/documentSourceVerificationFields.ts +103 -0
- package/tui/src/tools/DocumentPrimitive/modelVisibleOutput.ts +40 -0
- package/tui/src/tools/DocumentPrimitive/prompt.ts +35 -0
- package/tui/src/tools/FileEditTool/FileEditTool.ts +9 -507
- package/tui/src/tools/FileEditTool/call.ts +228 -0
- package/tui/src/tools/FileEditTool/validateInput.ts +196 -0
- package/tui/src/tools/FileReadTool/imageProcessor.ts +13 -0
- package/tui/src/tools/FileWriteTool/FileWriteTool.ts +7 -300
- package/tui/src/tools/FileWriteTool/call.ts +223 -0
- package/tui/src/tools/FileWriteTool/validateInput.ts +80 -0
- package/tui/src/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts +19 -3
- package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +25 -32
- package/tui/src/tools/LookupPrimitive/prompt.ts +0 -2
- package/tui/src/tools/MCPTool/trustPolicy.ts +118 -0
- package/tui/src/tools/McpAuthTool/McpAuthTool.ts +21 -3
- package/tui/src/tools/NotebookEditTool/NotebookEditTool.ts +7 -326
- package/tui/src/tools/NotebookEditTool/call.ts +254 -0
- package/tui/src/tools/NotebookEditTool/notebookModel.ts +51 -0
- package/tui/src/tools/NotebookEditTool/validateInput.ts +142 -0
- package/tui/src/tools/PowerShellTool/PowerShellTool.tsx +46 -937
- package/tui/src/tools/PowerShellTool/acceptEditsCommandValidation.ts +162 -0
- package/tui/src/tools/PowerShellTool/call.ts +179 -0
- package/tui/src/tools/PowerShellTool/callLoader.ts +37 -0
- package/tui/src/tools/PowerShellTool/commandClassification.ts +86 -0
- package/tui/src/tools/PowerShellTool/modeValidation.ts +25 -332
- package/tui/src/tools/PowerShellTool/outputPersistence.ts +42 -0
- package/tui/src/tools/PowerShellTool/permissionClassification.ts +28 -0
- package/tui/src/tools/PowerShellTool/resultLoader.ts +31 -0
- package/tui/src/tools/PowerShellTool/resultMapping.ts +75 -0
- package/tui/src/tools/PowerShellTool/schemas.ts +40 -0
- package/tui/src/tools/PowerShellTool/shellExecution.tsx +258 -0
- package/tui/src/tools/PowerShellTool/symlinkModeValidation.ts +44 -0
- package/tui/src/tools/PowerShellTool/uiLoader.ts +37 -0
- package/tui/src/tools/PowerShellTool/validation.ts +39 -0
- package/tui/src/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts +19 -3
- package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +1 -11
- package/tui/src/tools/ResolveLocationPrimitive/prompt.ts +2 -6
- package/tui/src/tools/SkillTool/SkillTool.ts +2 -2
- package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +27 -10
- package/tui/src/tools/TaskCreateTool/TaskCreateTool.ts +16 -2
- package/tui/src/tools/TaskGetTool/TaskGetTool.ts +23 -3
- package/tui/src/tools/TaskListTool/TaskListTool.ts +22 -4
- package/tui/src/tools/TaskOutputTool/TaskOutputTool.tsx +46 -547
- package/tui/src/tools/TaskOutputTool/lookup.ts +216 -0
- package/tui/src/tools/TaskOutputTool/render.tsx +257 -0
- package/tui/src/tools/TaskOutputTool/schemas.ts +55 -0
- package/tui/src/tools/TaskOutputTool/serialization.ts +36 -0
- package/tui/src/tools/TaskStopTool/TaskStopTool.ts +10 -0
- package/tui/src/tools/TaskUpdateTool/TaskUpdateTool.ts +14 -364
- package/tui/src/tools/TaskUpdateTool/completion.ts +62 -0
- package/tui/src/tools/TaskUpdateTool/schemas.ts +62 -0
- package/tui/src/tools/TaskUpdateTool/serialization.ts +46 -0
- package/tui/src/tools/TaskUpdateTool/statusUpdate.ts +247 -0
- package/tui/src/tools/TodoWriteTool/TodoWriteTool.ts +21 -2
- package/tui/src/tools/ToolSearchTool/ToolSearchTool.ts +21 -302
- package/tui/src/tools/ToolSearchTool/ccSupportTools.ts +223 -0
- package/tui/src/tools/ToolSearchTool/descriptionCache.ts +50 -0
- package/tui/src/tools/ToolSearchTool/keywordSearch.ts +216 -0
- package/tui/src/tools/ToolSearchTool/prompt.ts +10 -4
- package/tui/src/tools/ToolSearchTool/resultMapping.ts +30 -0
- package/tui/src/tools/ToolSearchTool/schemas.ts +30 -0
- package/tui/src/tools/ToolSearchTool/searchPool.ts +47 -0
- package/tui/src/tools/ToolSearchTool/supportIntentHints.ts +140 -0
- package/tui/src/tools/TranslateTool/TranslateTool.ts +1 -1
- package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +2 -1
- package/tui/src/tools/WebFetchTool/WebFetchTool.ts +43 -138
- package/tui/src/tools/WebFetchTool/call.ts +227 -0
- package/tui/src/tools/WebFetchTool/resolvedAddressSafety.ts +78 -0
- package/tui/src/tools/WebFetchTool/sourceVerification.ts +204 -0
- package/tui/src/tools/WebFetchTool/types.ts +23 -0
- package/tui/src/tools/WebFetchTool/urlSafety.ts +181 -0
- package/tui/src/tools/WebFetchTool/utils.ts +1 -1
- package/tui/src/tools/WebSearchTool/UI.tsx +0 -1
- package/tui/src/tools/WebSearchTool/WebSearchTool.ts +9 -313
- package/tui/src/tools/WebSearchTool/call.ts +33 -0
- package/tui/src/tools/WebSearchTool/responseMapping.ts +190 -0
- package/tui/src/tools/WebSearchTool/resultBlock.ts +47 -0
- package/tui/src/tools/WebSearchTool/schemas.ts +47 -0
- package/tui/src/tools/WebSearchTool/toolSchema.ts +12 -0
- package/tui/src/tools/WorkspaceToolAdapter/WorkspaceToolAdapter.ts +79 -0
- package/tui/src/tools/WorkspaceToolAdapter/allowedRootPolicy.ts +85 -0
- package/tui/src/tools/WorkspaceToolAdapter/documentFormatGuards.ts +73 -0
- package/tui/src/tools/WorkspaceToolAdapter/inputNormalization.ts +105 -0
- package/tui/src/tools/WorkspaceToolAdapter/mcpExposurePolicy.ts +64 -0
- package/tui/src/tools/WorkspaceToolAdapter/toolDefFactory.ts +215 -0
- package/tui/src/tools/WorkspaceToolAdapter/toolNames.ts +6 -0
- package/tui/src/tools/WorkspaceToolAdapter/workspacePolicy.ts +15 -0
- package/tui/src/tools/_shared/dispatchPrimitive.ts +6 -6
- package/tui/src/tools/_shared/documentChangeToPatch.ts +125 -0
- package/tui/src/tools/_shared/documentDispatchArguments.ts +87 -0
- package/tui/src/tools/_shared/documentPrimitiveTimeout.ts +13 -0
- package/tui/src/tools/_shared/documentToolResultRender.ts +98 -0
- package/tui/src/tools/_shared/pendingCallRegistry.ts +1 -6
- package/tui/src/tools/_shared/rootPrimitiveInput.ts +1 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPatterns.ts +58 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPrompt.ts +271 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentRepair.ts +452 -0
- package/tui/src/tools/_shared/toolChoiceRepair/messageAccess.ts +80 -0
- package/tui/src/tools/_shared/toolChoiceRepair/publicDataRepair.ts +92 -0
- package/tui/src/tools/_shared/toolChoiceRepair/supportRepair.ts +135 -0
- package/tui/src/tools/_shared/toolChoiceRepair.ts +55 -860
- package/tui/src/tools/shared/mockDisclaimer.ts +1 -1
- package/tui/src/tools.ts +39 -190
- package/tui/src/types/fileSuggestion.ts +4 -26
- package/tui/src/types/generated/events_mono/claude_code/v1/claude_code_internal_event.ts +186 -148
- package/tui/src/types/generated/events_mono/common/v1/auth.ts +25 -11
- package/tui/src/types/generated/events_mono/growthbook/v1/growthbook_experiment_event.ts +47 -30
- package/tui/src/types/generated/google/protobuf/timestamp.ts +21 -7
- package/tui/src/types/message.ts +80 -102
- package/tui/src/types/messageQueueTypes.ts +6 -28
- package/tui/src/types/notebook.ts +16 -38
- package/tui/src/types/statusLine.ts +4 -26
- package/tui/src/types/tools.ts +24 -46
- package/tui/src/types/utils.ts +6 -28
- package/tui/src/upstreamproxy/relay.ts +7 -3
- package/tui/src/upstreamproxy/upstreamproxy.ts +1 -1
- package/tui/src/utils/assistantMessageFactories.ts +9 -3
- package/tui/src/utils/auth.ts +129 -139
- package/tui/src/utils/bash/ast.ts +23 -23
- package/tui/src/utils/bash/bashParser.ts +5 -5
- package/tui/src/utils/billing.ts +1 -1
- package/tui/src/utils/claudeDesktop.ts +4 -4
- package/tui/src/utils/collapseReadSearch.ts +3 -3
- package/tui/src/utils/cronTasks.ts +1 -1
- package/tui/src/utils/execFileNoThrow.ts +1 -1
- package/tui/src/utils/filePersistence/types.ts +16 -38
- package/tui/src/utils/forkedAgent.ts +1 -1
- package/tui/src/utils/gracefulShutdown.ts +4 -4
- package/tui/src/utils/heapDumpService.ts +12 -8
- package/tui/src/utils/hooks/apiQueryHookHelper.ts +1 -1
- package/tui/src/utils/hooks/execPromptHook.ts +1 -1
- package/tui/src/utils/hooks/skillImprovement.ts +1 -1
- package/tui/src/utils/mcp/dateTimeParser.ts +1 -1
- package/tui/src/utils/messages.ts +18 -0
- package/tui/src/utils/migrateSessions.ts +3 -3
- package/tui/src/utils/model/model.ts +6 -6
- package/tui/src/utils/permissions/yoloClassifier.ts +1 -1
- package/tui/src/utils/plugins/headlessPluginInstall.ts +1 -1
- package/tui/src/utils/plugins/mcpPluginIntegration.ts +1 -1
- package/tui/src/utils/plugins/mcpbHandler.ts +1 -1
- package/tui/src/utils/plugins/pluginLoader.ts +8 -8
- package/tui/src/utils/protectedNamespace.ts +5 -3
- package/tui/src/utils/rawJsonToolCall.ts +242 -0
- package/tui/src/utils/ripgrep.ts +16 -7
- package/tui/src/utils/sessionTitle.ts +1 -1
- package/tui/src/utils/settings/permissionValidation.ts +14 -2
- package/tui/src/utils/shell/prefix.ts +1 -1
- package/tui/src/utils/sideQuery.ts +1 -1
- package/tui/src/utils/systemThemeWatcher.ts +13 -3
- package/tui/src/utils/teleport.tsx +1 -1
- package/uv.lock +426 -45
- package/tui/src/services/api/claude.ts +0 -3540
- package/tui/src/tools/_shared/directPublicDataGuard.ts +0 -362
- package/tui/src/tools/_shared/kmaAnalysisGuard.ts +0 -197
- package/tui/src/tools/_shared/kmaAviationGuard.ts +0 -70
- package/tui/src/tools/_shared/nmcAedGuard.ts +0 -234
- package/tui/src/tools/_shared/protectedCheckGuard.ts +0 -207
- package/tui/src/tools/_shared/textToolCallGuard.ts +0 -91
|
@@ -0,0 +1,1079 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
"""Fail-closed document intake for local Public AX artifacts."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import ast
|
|
7
|
+
import gzip
|
|
8
|
+
import hashlib
|
|
9
|
+
import io
|
|
10
|
+
import json
|
|
11
|
+
import tarfile
|
|
12
|
+
import zipfile
|
|
13
|
+
from pathlib import Path, PurePosixPath
|
|
14
|
+
|
|
15
|
+
import yaml
|
|
16
|
+
from defusedxml import ElementTree # type: ignore[import-untyped]
|
|
17
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
18
|
+
|
|
19
|
+
from ummaya.tools.documents.models import (
|
|
20
|
+
KNOWN_DOCUMENT_FORMAT_FAMILIES,
|
|
21
|
+
PROMOTED_RUNTIME_DOCUMENT_FORMATS,
|
|
22
|
+
BlockedReason,
|
|
23
|
+
DocumentFormat,
|
|
24
|
+
DocumentFormatFamily,
|
|
25
|
+
DocumentIntakeResult,
|
|
26
|
+
DocumentSecurityFinding,
|
|
27
|
+
KnownDocumentFormat,
|
|
28
|
+
SecurityFindingSeverity,
|
|
29
|
+
SecurityState,
|
|
30
|
+
ToolResultStatus,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class DocumentIntakePolicy(BaseModel):
|
|
35
|
+
"""Fail-closed pre-parse limits for user supplied document artifacts."""
|
|
36
|
+
|
|
37
|
+
model_config = ConfigDict(frozen=True, extra="forbid")
|
|
38
|
+
|
|
39
|
+
allowed_formats: frozenset[str] = Field(
|
|
40
|
+
default_factory=lambda: frozenset(format_.value for format_ in KnownDocumentFormat)
|
|
41
|
+
)
|
|
42
|
+
max_raw_bytes: int = 50 * 1024 * 1024
|
|
43
|
+
max_expanded_bytes: int = 200 * 1024 * 1024
|
|
44
|
+
max_entries: int = 5_000
|
|
45
|
+
max_depth: int = 1
|
|
46
|
+
allow_external_links: bool = False
|
|
47
|
+
allow_macros: bool = False
|
|
48
|
+
allow_embedded_active_content: bool = False
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
_EXTENSION_TO_KNOWN_FORMAT: dict[str, str] = {
|
|
52
|
+
".hwpx": "hwpx",
|
|
53
|
+
".hwp": "hwp",
|
|
54
|
+
".hml": "hml",
|
|
55
|
+
".owpml": "owpml",
|
|
56
|
+
".docx": "docx",
|
|
57
|
+
".xlsx": "xlsx",
|
|
58
|
+
".pptx": "pptx",
|
|
59
|
+
".doc": "doc",
|
|
60
|
+
".xls": "xls",
|
|
61
|
+
".ppt": "ppt",
|
|
62
|
+
".pdf": "pdf",
|
|
63
|
+
".pdfa": "pdfa",
|
|
64
|
+
".odt": "odt",
|
|
65
|
+
".ods": "ods",
|
|
66
|
+
".odp": "odp",
|
|
67
|
+
".html": "html",
|
|
68
|
+
".htm": "htm",
|
|
69
|
+
".txt": "txt",
|
|
70
|
+
".rtf": "rtf",
|
|
71
|
+
".md": "md",
|
|
72
|
+
".epub": "epub",
|
|
73
|
+
".csv": "csv",
|
|
74
|
+
".tsv": "tsv",
|
|
75
|
+
".xml": "xml",
|
|
76
|
+
".rdf": "rdf",
|
|
77
|
+
".ttl": "ttl",
|
|
78
|
+
".lod": "lod",
|
|
79
|
+
".json": "json",
|
|
80
|
+
".jsonl": "jsonl",
|
|
81
|
+
".yaml": "yaml",
|
|
82
|
+
".yml": "yml",
|
|
83
|
+
".geojson": "geojson",
|
|
84
|
+
".gpx": "gpx",
|
|
85
|
+
".kml": "kml",
|
|
86
|
+
".fasta": "fasta",
|
|
87
|
+
".sgml": "sgml",
|
|
88
|
+
".dtd": "dtd",
|
|
89
|
+
".py": "py",
|
|
90
|
+
".png": "png",
|
|
91
|
+
".jpg": "jpg",
|
|
92
|
+
".jpeg": "jpeg",
|
|
93
|
+
".gif": "gif",
|
|
94
|
+
".tif": "tif",
|
|
95
|
+
".tiff": "tiff",
|
|
96
|
+
".bmp": "bmp",
|
|
97
|
+
".webp": "webp",
|
|
98
|
+
".shp": "shp",
|
|
99
|
+
".shx": "shx",
|
|
100
|
+
".dbf": "dbf",
|
|
101
|
+
".prj": "prj",
|
|
102
|
+
".stl": "stl",
|
|
103
|
+
".wav": "wav",
|
|
104
|
+
".mp3": "mp3",
|
|
105
|
+
".mp4": "mp4",
|
|
106
|
+
".zip": "zip",
|
|
107
|
+
".7z": "7z",
|
|
108
|
+
".tar": "tar",
|
|
109
|
+
".gz": "gz",
|
|
110
|
+
".etc": "etc",
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
_PROMOTED_FORMAT_VALUES = frozenset(format_.value for format_ in PROMOTED_RUNTIME_DOCUMENT_FORMATS)
|
|
114
|
+
_KNOWN_FORMAT_RUNTIME_ALIASES: dict[str, str] = {
|
|
115
|
+
"pdfa": "pdf",
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _runtime_format_for_known_format(known_format: str) -> str | None:
|
|
120
|
+
if known_format in _PROMOTED_FORMAT_VALUES:
|
|
121
|
+
return known_format
|
|
122
|
+
return _KNOWN_FORMAT_RUNTIME_ALIASES.get(known_format)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
_EXTENSION_TO_FORMAT: dict[str, str] = {
|
|
126
|
+
extension: known_format
|
|
127
|
+
for extension, known_format in _EXTENSION_TO_KNOWN_FORMAT.items()
|
|
128
|
+
if _runtime_format_for_known_format(known_format) is not None
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
_MIME_BY_FORMAT: dict[str, frozenset[str]] = {
|
|
132
|
+
"hwpx": frozenset(
|
|
133
|
+
{
|
|
134
|
+
"application/haansofthwpx",
|
|
135
|
+
"application/vnd.hancom.hwpx",
|
|
136
|
+
"application/x-hwpx",
|
|
137
|
+
"application/owpml",
|
|
138
|
+
"application/zip",
|
|
139
|
+
}
|
|
140
|
+
),
|
|
141
|
+
"owpml": frozenset(
|
|
142
|
+
{
|
|
143
|
+
"application/owpml",
|
|
144
|
+
"application/vnd.hancom.hwpx",
|
|
145
|
+
"application/x-hwpx",
|
|
146
|
+
"application/zip",
|
|
147
|
+
}
|
|
148
|
+
),
|
|
149
|
+
"hwp": frozenset(
|
|
150
|
+
{
|
|
151
|
+
"application/haansofthwp",
|
|
152
|
+
"application/vnd.hancom.hwp",
|
|
153
|
+
"application/x-hwp",
|
|
154
|
+
"application/octet-stream",
|
|
155
|
+
}
|
|
156
|
+
),
|
|
157
|
+
"docx": frozenset(
|
|
158
|
+
{
|
|
159
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
160
|
+
"application/zip",
|
|
161
|
+
}
|
|
162
|
+
),
|
|
163
|
+
"pdf": frozenset({"application/pdf"}),
|
|
164
|
+
"xlsx": frozenset(
|
|
165
|
+
{
|
|
166
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
167
|
+
"application/zip",
|
|
168
|
+
}
|
|
169
|
+
),
|
|
170
|
+
"pptx": frozenset(
|
|
171
|
+
{
|
|
172
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
173
|
+
"application/zip",
|
|
174
|
+
}
|
|
175
|
+
),
|
|
176
|
+
"odt": frozenset(
|
|
177
|
+
{
|
|
178
|
+
"application/vnd.oasis.opendocument.text",
|
|
179
|
+
"application/zip",
|
|
180
|
+
}
|
|
181
|
+
),
|
|
182
|
+
"ods": frozenset(
|
|
183
|
+
{
|
|
184
|
+
"application/vnd.oasis.opendocument.spreadsheet",
|
|
185
|
+
"application/zip",
|
|
186
|
+
}
|
|
187
|
+
),
|
|
188
|
+
"odp": frozenset(
|
|
189
|
+
{
|
|
190
|
+
"application/vnd.oasis.opendocument.presentation",
|
|
191
|
+
"application/zip",
|
|
192
|
+
}
|
|
193
|
+
),
|
|
194
|
+
"html": frozenset({"text/html", "application/xhtml+xml"}),
|
|
195
|
+
"htm": frozenset({"text/html", "application/xhtml+xml"}),
|
|
196
|
+
"txt": frozenset({"text/plain"}),
|
|
197
|
+
"rtf": frozenset({"application/rtf", "text/rtf"}),
|
|
198
|
+
"md": frozenset({"text/markdown", "text/plain"}),
|
|
199
|
+
"epub": frozenset({"application/epub+zip", "application/zip"}),
|
|
200
|
+
"csv": frozenset({"text/csv", "text/plain"}),
|
|
201
|
+
"tsv": frozenset({"text/tab-separated-values", "text/plain"}),
|
|
202
|
+
"xml": frozenset({"application/xml", "text/xml"}),
|
|
203
|
+
"rdf": frozenset({"application/rdf+xml", "application/xml", "text/xml"}),
|
|
204
|
+
"ttl": frozenset({"text/turtle", "text/plain"}),
|
|
205
|
+
"lod": frozenset({"text/plain"}),
|
|
206
|
+
"json": frozenset({"application/json", "text/plain"}),
|
|
207
|
+
"jsonl": frozenset({"application/x-ndjson", "application/json", "text/plain"}),
|
|
208
|
+
"yaml": frozenset({"application/yaml", "text/yaml", "text/plain"}),
|
|
209
|
+
"yml": frozenset({"application/yaml", "text/yaml", "text/plain"}),
|
|
210
|
+
"geojson": frozenset({"application/geo+json", "application/json", "text/plain"}),
|
|
211
|
+
"gpx": frozenset({"application/gpx+xml", "application/xml", "text/xml"}),
|
|
212
|
+
"kml": frozenset({"application/vnd.google-earth.kml+xml", "application/xml", "text/xml"}),
|
|
213
|
+
"fasta": frozenset({"text/plain"}),
|
|
214
|
+
"sgml": frozenset({"text/sgml", "text/plain"}),
|
|
215
|
+
"dtd": frozenset({"application/xml-dtd", "text/plain"}),
|
|
216
|
+
"py": frozenset({"text/x-python", "text/plain"}),
|
|
217
|
+
"hml": frozenset({"application/xml", "text/xml"}),
|
|
218
|
+
"zip": frozenset({"application/zip"}),
|
|
219
|
+
"7z": frozenset({"application/x-7z-compressed", "application/7z"}),
|
|
220
|
+
"tar": frozenset({"application/x-tar", "application/tar"}),
|
|
221
|
+
"gz": frozenset({"application/gzip", "application/x-gzip"}),
|
|
222
|
+
"etc": frozenset({"text/plain"}),
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
_DETECTED_MIME_BY_FORMAT: dict[str, str] = {
|
|
226
|
+
"hwpx": "application/vnd.hancom.hwpx",
|
|
227
|
+
"owpml": "application/owpml",
|
|
228
|
+
"hwp": "application/vnd.hancom.hwp",
|
|
229
|
+
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
230
|
+
"pdf": "application/pdf",
|
|
231
|
+
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
232
|
+
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
233
|
+
"odt": "application/vnd.oasis.opendocument.text",
|
|
234
|
+
"ods": "application/vnd.oasis.opendocument.spreadsheet",
|
|
235
|
+
"odp": "application/vnd.oasis.opendocument.presentation",
|
|
236
|
+
"html": "text/html",
|
|
237
|
+
"htm": "text/html",
|
|
238
|
+
"txt": "text/plain",
|
|
239
|
+
"rtf": "application/rtf",
|
|
240
|
+
"md": "text/markdown",
|
|
241
|
+
"epub": "application/epub+zip",
|
|
242
|
+
"csv": "text/csv",
|
|
243
|
+
"tsv": "text/tab-separated-values",
|
|
244
|
+
"xml": "application/xml",
|
|
245
|
+
"rdf": "application/rdf+xml",
|
|
246
|
+
"ttl": "text/turtle",
|
|
247
|
+
"lod": "text/plain",
|
|
248
|
+
"json": "application/json",
|
|
249
|
+
"jsonl": "application/x-ndjson",
|
|
250
|
+
"yaml": "application/yaml",
|
|
251
|
+
"yml": "application/yaml",
|
|
252
|
+
"geojson": "application/geo+json",
|
|
253
|
+
"gpx": "application/gpx+xml",
|
|
254
|
+
"kml": "application/vnd.google-earth.kml+xml",
|
|
255
|
+
"fasta": "text/plain",
|
|
256
|
+
"sgml": "text/sgml",
|
|
257
|
+
"dtd": "application/xml-dtd",
|
|
258
|
+
"py": "text/x-python",
|
|
259
|
+
"hml": "application/xml",
|
|
260
|
+
"zip": "application/zip",
|
|
261
|
+
"7z": "application/x-7z-compressed",
|
|
262
|
+
"tar": "application/x-tar",
|
|
263
|
+
"gz": "application/gzip",
|
|
264
|
+
"etc": "text/plain",
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
_ODF_MIMETYPE_FORMATS: dict[bytes, str] = {
|
|
268
|
+
b"application/vnd.oasis.opendocument.text": "odt",
|
|
269
|
+
b"application/vnd.oasis.opendocument.spreadsheet": "ods",
|
|
270
|
+
b"application/vnd.oasis.opendocument.presentation": "odp",
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
_TEXT_WEB_FORMAT_VALUES = frozenset({"html", "htm", "txt", "rtf", "md"})
|
|
274
|
+
_DATA_FORMAT_VALUES = frozenset(
|
|
275
|
+
{
|
|
276
|
+
"csv",
|
|
277
|
+
"tsv",
|
|
278
|
+
"xml",
|
|
279
|
+
"rdf",
|
|
280
|
+
"ttl",
|
|
281
|
+
"lod",
|
|
282
|
+
"json",
|
|
283
|
+
"jsonl",
|
|
284
|
+
"yaml",
|
|
285
|
+
"yml",
|
|
286
|
+
"geojson",
|
|
287
|
+
"gpx",
|
|
288
|
+
"kml",
|
|
289
|
+
"fasta",
|
|
290
|
+
"sgml",
|
|
291
|
+
"dtd",
|
|
292
|
+
"hml",
|
|
293
|
+
"etc",
|
|
294
|
+
}
|
|
295
|
+
)
|
|
296
|
+
_CODE_FORMAT_VALUES = frozenset({"py"})
|
|
297
|
+
|
|
298
|
+
_ZIP_FORMAT_MARKERS: tuple[tuple[str, tuple[str, ...]], ...] = (
|
|
299
|
+
("docx", ("word/document.xml",)),
|
|
300
|
+
("xlsx", ("xl/workbook.xml",)),
|
|
301
|
+
("pptx", ("ppt/presentation.xml",)),
|
|
302
|
+
(
|
|
303
|
+
"hwpx",
|
|
304
|
+
(
|
|
305
|
+
"Contents/section0.xml",
|
|
306
|
+
"Contents/header.xml",
|
|
307
|
+
"version.xml",
|
|
308
|
+
"META-INF/manifest.xml",
|
|
309
|
+
),
|
|
310
|
+
),
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
_OLE_SIGNATURE = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
|
|
314
|
+
_PDF_SIGNATURE = b"%PDF-"
|
|
315
|
+
_ZIP_SIGNATURES = (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08")
|
|
316
|
+
_SEVEN_Z_SIGNATURE = b"7z\xbc\xaf\x27\x1c"
|
|
317
|
+
|
|
318
|
+
_MACRO_MARKERS = (
|
|
319
|
+
"vbaproject.bin",
|
|
320
|
+
"/vba",
|
|
321
|
+
"macrosheets/",
|
|
322
|
+
"xl4macrosheets/",
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
_ACTIVE_CONTENT_MARKERS = (
|
|
326
|
+
"/activex/",
|
|
327
|
+
"/embeddings/",
|
|
328
|
+
"oleobject",
|
|
329
|
+
"flash",
|
|
330
|
+
"javascript",
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
_RELATIONSHIP_SUFFIX = ".rels"
|
|
334
|
+
|
|
335
|
+
_BLOCKED_REASON_BY_INTERNAL: dict[str, str] = {
|
|
336
|
+
"unsupported_input": "unsupported_format",
|
|
337
|
+
"unsupported_extension": "unsupported_format",
|
|
338
|
+
"known_unsupported_format": "unsupported_operation",
|
|
339
|
+
"unsupported_compression": "unsupported_format",
|
|
340
|
+
"nested_package": "unsupported_format",
|
|
341
|
+
"raw_size_limit": "oversized_raw_bytes",
|
|
342
|
+
"corrupt_package": "corrupt",
|
|
343
|
+
"encrypted_package": "encrypted",
|
|
344
|
+
"zip_expansion_limit": "oversized_expanded_bytes",
|
|
345
|
+
"zip_entry_limit": "package_entry_limit_exceeded",
|
|
346
|
+
"zip_path_traversal": "path_traversal_detected",
|
|
347
|
+
"active_content": "macro_detected",
|
|
348
|
+
"external_link": "external_link_detected",
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
DEFAULT_INTAKE_POLICY = DocumentIntakePolicy()
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def inspect_document_intake(
|
|
355
|
+
source_path: str | Path,
|
|
356
|
+
*,
|
|
357
|
+
expected_format: str | object | None = None,
|
|
358
|
+
declared_mime_type: str | None = None,
|
|
359
|
+
policy: DocumentIntakePolicy | None = None,
|
|
360
|
+
) -> DocumentIntakeResult:
|
|
361
|
+
"""Validate document bytes before format-specific parsing can run."""
|
|
362
|
+
|
|
363
|
+
active_policy = policy or DEFAULT_INTAKE_POLICY
|
|
364
|
+
path = Path(source_path)
|
|
365
|
+
expected = _format_value(expected_format)
|
|
366
|
+
declared_mime = _normalize_mime(declared_mime_type)
|
|
367
|
+
|
|
368
|
+
if not path.exists() or not path.is_file():
|
|
369
|
+
return _blocked_result(
|
|
370
|
+
path=path,
|
|
371
|
+
expected_format=expected,
|
|
372
|
+
declared_mime_type=declared_mime,
|
|
373
|
+
reason="unsupported_input",
|
|
374
|
+
message="Document intake requires an existing local file.",
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
known_format = _EXTENSION_TO_KNOWN_FORMAT.get(path.suffix.lower())
|
|
378
|
+
if known_format is None or known_format not in active_policy.allowed_formats:
|
|
379
|
+
return _blocked_result(
|
|
380
|
+
path=path,
|
|
381
|
+
expected_format=expected,
|
|
382
|
+
declared_mime_type=declared_mime,
|
|
383
|
+
reason="unsupported_extension",
|
|
384
|
+
message="Document extension is not on the supported allowlist.",
|
|
385
|
+
known_format=known_format,
|
|
386
|
+
next_safe_actions=(
|
|
387
|
+
_next_safe_actions_for_known_format(known_format)
|
|
388
|
+
if known_format is not None
|
|
389
|
+
else ()
|
|
390
|
+
),
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
raw_size = path.stat().st_size
|
|
394
|
+
if raw_size > active_policy.max_raw_bytes:
|
|
395
|
+
return _blocked_result(
|
|
396
|
+
path=path,
|
|
397
|
+
expected_format=expected,
|
|
398
|
+
declared_mime_type=declared_mime,
|
|
399
|
+
reason="raw_size_limit",
|
|
400
|
+
message="Document raw byte size exceeds the intake policy.",
|
|
401
|
+
known_format=known_format,
|
|
402
|
+
byte_size=raw_size,
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
payload = path.read_bytes()
|
|
406
|
+
sha256 = hashlib.sha256(payload).hexdigest()
|
|
407
|
+
|
|
408
|
+
runtime_format = _runtime_format_for_known_format(known_format)
|
|
409
|
+
if runtime_format is None:
|
|
410
|
+
return _blocked_result(
|
|
411
|
+
path=path,
|
|
412
|
+
expected_format=expected,
|
|
413
|
+
declared_mime_type=declared_mime,
|
|
414
|
+
reason="known_unsupported_format",
|
|
415
|
+
message=(
|
|
416
|
+
"Document format is recognized, but no promoted runtime adapter "
|
|
417
|
+
"can safely process this operation yet."
|
|
418
|
+
),
|
|
419
|
+
known_format=known_format,
|
|
420
|
+
byte_size=raw_size,
|
|
421
|
+
sha256=sha256,
|
|
422
|
+
next_safe_actions=_next_safe_actions_for_known_format(known_format),
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
detected_format, expanded_byte_size, package_reason = _detect_format(
|
|
426
|
+
payload,
|
|
427
|
+
active_policy,
|
|
428
|
+
known_format=known_format,
|
|
429
|
+
)
|
|
430
|
+
if package_reason is not None:
|
|
431
|
+
return _blocked_result(
|
|
432
|
+
path=path,
|
|
433
|
+
expected_format=expected,
|
|
434
|
+
declared_mime_type=declared_mime,
|
|
435
|
+
reason=package_reason,
|
|
436
|
+
message=_reason_message(package_reason),
|
|
437
|
+
known_format=known_format,
|
|
438
|
+
byte_size=raw_size,
|
|
439
|
+
sha256=sha256,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
if detected_format is None:
|
|
443
|
+
return _blocked_result(
|
|
444
|
+
path=path,
|
|
445
|
+
expected_format=expected,
|
|
446
|
+
declared_mime_type=declared_mime,
|
|
447
|
+
reason="signature_mismatch",
|
|
448
|
+
message="Document signature or package structure is not supported.",
|
|
449
|
+
known_format=known_format,
|
|
450
|
+
byte_size=raw_size,
|
|
451
|
+
sha256=sha256,
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
if expected is not None and not _expected_format_matches(
|
|
455
|
+
expected=expected,
|
|
456
|
+
detected_format=detected_format,
|
|
457
|
+
known_format=known_format,
|
|
458
|
+
):
|
|
459
|
+
return _blocked_result(
|
|
460
|
+
path=path,
|
|
461
|
+
detected_format=detected_format,
|
|
462
|
+
known_format=known_format,
|
|
463
|
+
expected_format=expected,
|
|
464
|
+
declared_mime_type=declared_mime,
|
|
465
|
+
reason="signature_mismatch",
|
|
466
|
+
message="Expected document format does not match detected content.",
|
|
467
|
+
byte_size=raw_size,
|
|
468
|
+
expanded_byte_size=expanded_byte_size,
|
|
469
|
+
sha256=sha256,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
if not _known_format_matches_detected(
|
|
473
|
+
known_format=known_format,
|
|
474
|
+
detected_format=detected_format,
|
|
475
|
+
):
|
|
476
|
+
return _blocked_result(
|
|
477
|
+
path=path,
|
|
478
|
+
detected_format=detected_format,
|
|
479
|
+
known_format=known_format,
|
|
480
|
+
expected_format=expected,
|
|
481
|
+
declared_mime_type=declared_mime,
|
|
482
|
+
reason="signature_mismatch",
|
|
483
|
+
message="Filename extension does not match detected content.",
|
|
484
|
+
byte_size=raw_size,
|
|
485
|
+
expanded_byte_size=expanded_byte_size,
|
|
486
|
+
sha256=sha256,
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
if declared_mime is not None and not _declared_mime_matches_formats(
|
|
490
|
+
declared_mime=declared_mime,
|
|
491
|
+
known_format=known_format,
|
|
492
|
+
detected_format=detected_format,
|
|
493
|
+
):
|
|
494
|
+
return _blocked_result(
|
|
495
|
+
path=path,
|
|
496
|
+
detected_format=detected_format,
|
|
497
|
+
known_format=known_format,
|
|
498
|
+
expected_format=expected,
|
|
499
|
+
declared_mime_type=declared_mime,
|
|
500
|
+
reason="mime_mismatch",
|
|
501
|
+
message="Declared MIME type does not match detected document format.",
|
|
502
|
+
byte_size=raw_size,
|
|
503
|
+
expanded_byte_size=expanded_byte_size,
|
|
504
|
+
sha256=sha256,
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
return _result(
|
|
508
|
+
status="ok",
|
|
509
|
+
path=path,
|
|
510
|
+
detected_format=_document_format(detected_format),
|
|
511
|
+
known_format=_known_document_format(known_format),
|
|
512
|
+
format_family=_format_family(known_format),
|
|
513
|
+
expected_format=_document_format(expected),
|
|
514
|
+
declared_mime_type=declared_mime,
|
|
515
|
+
mime_type=_DETECTED_MIME_BY_FORMAT[detected_format],
|
|
516
|
+
byte_size=raw_size,
|
|
517
|
+
expanded_byte_size=expanded_byte_size,
|
|
518
|
+
sha256=sha256,
|
|
519
|
+
blocked_reason=None,
|
|
520
|
+
findings=(),
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _detect_format(
|
|
525
|
+
payload: bytes, policy: DocumentIntakePolicy, *, known_format: str
|
|
526
|
+
) -> tuple[str | None, int, str | None]:
|
|
527
|
+
binary_result = _detect_binary_container_format(
|
|
528
|
+
payload,
|
|
529
|
+
policy,
|
|
530
|
+
known_format=known_format,
|
|
531
|
+
)
|
|
532
|
+
if binary_result is not None:
|
|
533
|
+
return binary_result
|
|
534
|
+
|
|
535
|
+
if not payload.startswith(_ZIP_SIGNATURES):
|
|
536
|
+
if _is_text_web_payload(payload, known_format=known_format):
|
|
537
|
+
return known_format, 0, None
|
|
538
|
+
if _is_data_payload(payload, known_format=known_format):
|
|
539
|
+
return known_format, 0, None
|
|
540
|
+
if _is_code_payload(payload, known_format=known_format):
|
|
541
|
+
return known_format, 0, None
|
|
542
|
+
return None, 0, None
|
|
543
|
+
|
|
544
|
+
try:
|
|
545
|
+
with zipfile.ZipFile(PathBytes(payload)) as package:
|
|
546
|
+
package_entries = package.infolist()
|
|
547
|
+
package_reason = _inspect_zip_package(package, package_entries, policy)
|
|
548
|
+
if package_reason is not None:
|
|
549
|
+
return None, 0, package_reason
|
|
550
|
+
|
|
551
|
+
names = frozenset(info.filename for info in package_entries)
|
|
552
|
+
detected = _detect_zip_format(package, names)
|
|
553
|
+
if detected == "hwpx" and known_format == "owpml":
|
|
554
|
+
detected = "owpml"
|
|
555
|
+
expanded_size = sum(info.file_size for info in package_entries)
|
|
556
|
+
return detected, expanded_size, None
|
|
557
|
+
except zipfile.BadZipFile:
|
|
558
|
+
return None, 0, "corrupt_package"
|
|
559
|
+
except NotImplementedError:
|
|
560
|
+
return None, 0, "unsupported_compression"
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
def _detect_binary_container_format(
|
|
564
|
+
payload: bytes,
|
|
565
|
+
policy: DocumentIntakePolicy,
|
|
566
|
+
*,
|
|
567
|
+
known_format: str,
|
|
568
|
+
) -> tuple[str | None, int, str | None] | None:
|
|
569
|
+
if payload.startswith(_PDF_SIGNATURE):
|
|
570
|
+
return "pdf", 0, None
|
|
571
|
+
if payload.startswith(_OLE_SIGNATURE):
|
|
572
|
+
return "hwp", 0, None
|
|
573
|
+
if known_format == "7z" and payload.startswith(_SEVEN_Z_SIGNATURE):
|
|
574
|
+
return "7z", 0, None
|
|
575
|
+
if known_format == "gz" and payload.startswith(b"\x1f\x8b"):
|
|
576
|
+
return _detect_gzip_format(payload, policy)
|
|
577
|
+
if known_format != "tar":
|
|
578
|
+
return None
|
|
579
|
+
tar_detected, tar_expanded_size, tar_reason = _detect_tar_format(payload, policy)
|
|
580
|
+
if tar_detected is not None or tar_reason is not None:
|
|
581
|
+
return tar_detected, tar_expanded_size, tar_reason
|
|
582
|
+
return None
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def _is_text_web_payload(payload: bytes, *, known_format: str) -> bool:
|
|
586
|
+
if known_format not in _TEXT_WEB_FORMAT_VALUES:
|
|
587
|
+
return False
|
|
588
|
+
if b"\x00" in payload:
|
|
589
|
+
return False
|
|
590
|
+
try:
|
|
591
|
+
decoded = payload.decode("utf-8")
|
|
592
|
+
except UnicodeDecodeError:
|
|
593
|
+
return False
|
|
594
|
+
if known_format in {"html", "htm"}:
|
|
595
|
+
lowered = decoded[:4096].lower()
|
|
596
|
+
return "<html" in lowered or "<body" in lowered or "<p" in lowered
|
|
597
|
+
if known_format == "rtf":
|
|
598
|
+
return decoded.lstrip().startswith("{\\rtf")
|
|
599
|
+
return True
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def _is_data_payload(payload: bytes, *, known_format: str) -> bool:
|
|
603
|
+
if known_format not in _DATA_FORMAT_VALUES:
|
|
604
|
+
return False
|
|
605
|
+
if b"\x00" in payload:
|
|
606
|
+
return False
|
|
607
|
+
try:
|
|
608
|
+
decoded = payload.decode("utf-8")
|
|
609
|
+
except UnicodeDecodeError:
|
|
610
|
+
return False
|
|
611
|
+
if known_format in {"json", "geojson"}:
|
|
612
|
+
return _loads_json(decoded)
|
|
613
|
+
if known_format == "jsonl":
|
|
614
|
+
return all(_loads_json(line) for line in decoded.splitlines() if line.strip())
|
|
615
|
+
if known_format in {"yaml", "yml"}:
|
|
616
|
+
return _loads_yaml(decoded)
|
|
617
|
+
if known_format in {"xml", "rdf", "gpx", "kml", "hml"}:
|
|
618
|
+
return _loads_xml(decoded)
|
|
619
|
+
if known_format in {"csv", "tsv"}:
|
|
620
|
+
return bool(decoded.strip())
|
|
621
|
+
return bool(decoded.strip())
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def _is_code_payload(payload: bytes, *, known_format: str) -> bool:
|
|
625
|
+
if known_format not in _CODE_FORMAT_VALUES:
|
|
626
|
+
return False
|
|
627
|
+
if b"\x00" in payload:
|
|
628
|
+
return False
|
|
629
|
+
try:
|
|
630
|
+
decoded = payload.decode("utf-8")
|
|
631
|
+
except UnicodeDecodeError:
|
|
632
|
+
return False
|
|
633
|
+
if not decoded.strip():
|
|
634
|
+
return False
|
|
635
|
+
try:
|
|
636
|
+
ast.parse(decoded)
|
|
637
|
+
except SyntaxError:
|
|
638
|
+
return False
|
|
639
|
+
return True
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def _expected_format_matches(
|
|
643
|
+
*,
|
|
644
|
+
expected: str,
|
|
645
|
+
detected_format: str,
|
|
646
|
+
known_format: str,
|
|
647
|
+
) -> bool:
|
|
648
|
+
return expected in {detected_format, known_format} or (
|
|
649
|
+
_runtime_format_for_known_format(expected) == detected_format
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
def _known_format_matches_detected(
|
|
654
|
+
*,
|
|
655
|
+
known_format: str,
|
|
656
|
+
detected_format: str,
|
|
657
|
+
) -> bool:
|
|
658
|
+
if known_format == "hwp" and detected_format in {"hwpx", "owpml"}:
|
|
659
|
+
return True
|
|
660
|
+
return known_format == detected_format or (
|
|
661
|
+
_runtime_format_for_known_format(known_format) == detected_format
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def _declared_mime_matches_formats(
|
|
666
|
+
*,
|
|
667
|
+
declared_mime: str,
|
|
668
|
+
known_format: str,
|
|
669
|
+
detected_format: str,
|
|
670
|
+
) -> bool:
|
|
671
|
+
allowed_mimes = set(_MIME_BY_FORMAT[detected_format])
|
|
672
|
+
if _known_format_matches_detected(
|
|
673
|
+
known_format=known_format,
|
|
674
|
+
detected_format=detected_format,
|
|
675
|
+
):
|
|
676
|
+
allowed_mimes.update(_MIME_BY_FORMAT.get(known_format, frozenset()))
|
|
677
|
+
return declared_mime in allowed_mimes
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def _loads_json(payload: str) -> bool:
|
|
681
|
+
try:
|
|
682
|
+
json.loads(payload)
|
|
683
|
+
except json.JSONDecodeError:
|
|
684
|
+
return False
|
|
685
|
+
return True
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
def _loads_yaml(payload: str) -> bool:
|
|
689
|
+
try:
|
|
690
|
+
yaml.safe_load(payload)
|
|
691
|
+
except yaml.YAMLError:
|
|
692
|
+
return False
|
|
693
|
+
return True
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def _loads_xml(payload: str) -> bool:
|
|
697
|
+
try:
|
|
698
|
+
ElementTree.fromstring(payload.encode("utf-8"))
|
|
699
|
+
except ElementTree.ParseError:
|
|
700
|
+
return False
|
|
701
|
+
return True
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def _inspect_zip_package(
|
|
705
|
+
package: zipfile.ZipFile,
|
|
706
|
+
entries: list[zipfile.ZipInfo],
|
|
707
|
+
policy: DocumentIntakePolicy,
|
|
708
|
+
) -> str | None:
|
|
709
|
+
if len(entries) > policy.max_entries:
|
|
710
|
+
return "zip_entry_limit"
|
|
711
|
+
|
|
712
|
+
expanded_size = 0
|
|
713
|
+
for entry in entries:
|
|
714
|
+
if entry.flag_bits & 0x1:
|
|
715
|
+
return "encrypted_package"
|
|
716
|
+
if _is_unsafe_package_name(entry.filename):
|
|
717
|
+
return "zip_path_traversal"
|
|
718
|
+
expanded_size += entry.file_size
|
|
719
|
+
if expanded_size > policy.max_expanded_bytes:
|
|
720
|
+
return "zip_expansion_limit"
|
|
721
|
+
if _is_nested_package(entry.filename, policy):
|
|
722
|
+
return "nested_package"
|
|
723
|
+
if _is_macro_entry(entry.filename) and not policy.allow_macros:
|
|
724
|
+
return "active_content"
|
|
725
|
+
if _is_active_content_entry(entry.filename) and not policy.allow_embedded_active_content:
|
|
726
|
+
return "active_content"
|
|
727
|
+
|
|
728
|
+
if not policy.allow_external_links and _has_external_relationship(package, entries):
|
|
729
|
+
return "external_link"
|
|
730
|
+
|
|
731
|
+
return None
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
def _detect_zip_format(package: zipfile.ZipFile, names: frozenset[str]) -> str | None:
|
|
735
|
+
odf_format = _detect_odf_format(package, names)
|
|
736
|
+
if odf_format is not None:
|
|
737
|
+
return odf_format
|
|
738
|
+
if _detect_epub_format(package, names):
|
|
739
|
+
return "epub"
|
|
740
|
+
for document_format, markers in _ZIP_FORMAT_MARKERS:
|
|
741
|
+
if any(marker in names for marker in markers):
|
|
742
|
+
return document_format
|
|
743
|
+
if _is_generic_zip_candidate(names):
|
|
744
|
+
return "zip"
|
|
745
|
+
return None
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
def _detect_epub_format(package: zipfile.ZipFile, names: frozenset[str]) -> bool:
|
|
749
|
+
if "mimetype" not in names:
|
|
750
|
+
return False
|
|
751
|
+
try:
|
|
752
|
+
with package.open("mimetype") as mimetype_file:
|
|
753
|
+
mimetype = mimetype_file.read(256).strip()
|
|
754
|
+
except KeyError:
|
|
755
|
+
return False
|
|
756
|
+
return mimetype == b"application/epub+zip"
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def _is_generic_zip_candidate(names: frozenset[str]) -> bool:
|
|
760
|
+
return bool(names) and "mimetype" not in names
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def _detect_tar_format(
|
|
764
|
+
payload: bytes,
|
|
765
|
+
policy: DocumentIntakePolicy,
|
|
766
|
+
) -> tuple[str | None, int, str | None]:
|
|
767
|
+
try:
|
|
768
|
+
with tarfile.open(fileobj=io.BytesIO(payload), mode="r:*") as package:
|
|
769
|
+
expanded_size = 0
|
|
770
|
+
member_count = 0
|
|
771
|
+
for member in package.getmembers():
|
|
772
|
+
member_count += 1
|
|
773
|
+
if member_count > policy.max_entries:
|
|
774
|
+
return None, 0, "zip_entry_limit"
|
|
775
|
+
if _is_unsafe_package_name(member.name):
|
|
776
|
+
return None, 0, "zip_path_traversal"
|
|
777
|
+
if member.islnk() or member.issym() or member.isdev():
|
|
778
|
+
return None, 0, "active_content"
|
|
779
|
+
expanded_size += max(member.size, 0)
|
|
780
|
+
if expanded_size > policy.max_expanded_bytes:
|
|
781
|
+
return None, 0, "zip_expansion_limit"
|
|
782
|
+
return ("tar", expanded_size, None) if member_count else (None, 0, "corrupt_package")
|
|
783
|
+
except tarfile.TarError:
|
|
784
|
+
return None, 0, None
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
def _detect_gzip_format(
|
|
788
|
+
payload: bytes,
|
|
789
|
+
policy: DocumentIntakePolicy,
|
|
790
|
+
) -> tuple[str | None, int, str | None]:
|
|
791
|
+
try:
|
|
792
|
+
with gzip.GzipFile(fileobj=io.BytesIO(payload)) as package:
|
|
793
|
+
expanded = package.read(policy.max_expanded_bytes + 1)
|
|
794
|
+
except (OSError, EOFError):
|
|
795
|
+
return None, 0, "corrupt_package"
|
|
796
|
+
if len(expanded) > policy.max_expanded_bytes:
|
|
797
|
+
return None, 0, "zip_expansion_limit"
|
|
798
|
+
return "gz", len(expanded), None
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
def _detect_odf_format(package: zipfile.ZipFile, names: frozenset[str]) -> str | None:
|
|
802
|
+
if (
|
|
803
|
+
"mimetype" not in names
|
|
804
|
+
or "META-INF/manifest.xml" not in names
|
|
805
|
+
or "content.xml" not in names
|
|
806
|
+
):
|
|
807
|
+
return None
|
|
808
|
+
try:
|
|
809
|
+
with package.open("mimetype") as mimetype_file:
|
|
810
|
+
mimetype = mimetype_file.read(256).strip()
|
|
811
|
+
except KeyError:
|
|
812
|
+
return None
|
|
813
|
+
return _ODF_MIMETYPE_FORMATS.get(mimetype)
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
def _is_unsafe_package_name(name: str) -> bool:
|
|
817
|
+
if not name or "\x00" in name or "\\" in name:
|
|
818
|
+
return True
|
|
819
|
+
if name.startswith(("/", "~")):
|
|
820
|
+
return True
|
|
821
|
+
path = PurePosixPath(name)
|
|
822
|
+
return path.is_absolute() or ".." in path.parts
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
def _is_nested_package(name: str, policy: DocumentIntakePolicy) -> bool:
|
|
826
|
+
if policy.max_depth > 1:
|
|
827
|
+
return False
|
|
828
|
+
return name.lower().endswith((".zip", ".hwpx", ".owpml", ".docx", ".xlsx", ".pptx", ".jar"))
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def _is_macro_entry(name: str) -> bool:
|
|
832
|
+
normalized = f"/{name.lower()}"
|
|
833
|
+
return any(marker in normalized for marker in _MACRO_MARKERS)
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
def _is_active_content_entry(name: str) -> bool:
|
|
837
|
+
normalized = f"/{name.lower()}"
|
|
838
|
+
return any(marker in normalized for marker in _ACTIVE_CONTENT_MARKERS)
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
def _has_external_relationship(package: zipfile.ZipFile, entries: list[zipfile.ZipInfo]) -> bool:
|
|
842
|
+
for entry in entries:
|
|
843
|
+
if not entry.filename.lower().endswith(_RELATIONSHIP_SUFFIX):
|
|
844
|
+
continue
|
|
845
|
+
with package.open(entry) as relationship_file:
|
|
846
|
+
contents = relationship_file.read(1024 * 1024).lower()
|
|
847
|
+
if b'targetmode="external"' in contents:
|
|
848
|
+
return True
|
|
849
|
+
if b"target='http://" in contents or b'target="http://' in contents:
|
|
850
|
+
return True
|
|
851
|
+
if b"target='https://" in contents or b'target="https://' in contents:
|
|
852
|
+
return True
|
|
853
|
+
return False
|
|
854
|
+
|
|
855
|
+
|
|
856
|
+
def _blocked_result(
|
|
857
|
+
*,
|
|
858
|
+
path: Path,
|
|
859
|
+
reason: str,
|
|
860
|
+
message: str,
|
|
861
|
+
detected_format: str | None = None,
|
|
862
|
+
known_format: str | None = None,
|
|
863
|
+
expected_format: str | None = None,
|
|
864
|
+
declared_mime_type: str | None = None,
|
|
865
|
+
byte_size: int = 0,
|
|
866
|
+
expanded_byte_size: int = 0,
|
|
867
|
+
sha256: str | None = None,
|
|
868
|
+
next_safe_actions: tuple[str, ...] = (),
|
|
869
|
+
) -> DocumentIntakeResult:
|
|
870
|
+
blocked_reason = _blocked_reason(reason)
|
|
871
|
+
finding = _finding(code=blocked_reason, severity="blocked", message=message)
|
|
872
|
+
return _result(
|
|
873
|
+
status="blocked",
|
|
874
|
+
path=path,
|
|
875
|
+
detected_format=_document_format(detected_format),
|
|
876
|
+
known_format=_known_document_format(known_format or detected_format),
|
|
877
|
+
format_family=_format_family(known_format or detected_format),
|
|
878
|
+
expected_format=_document_format(expected_format),
|
|
879
|
+
declared_mime_type=declared_mime_type,
|
|
880
|
+
mime_type=(
|
|
881
|
+
_DETECTED_MIME_BY_FORMAT[detected_format] if detected_format is not None else None
|
|
882
|
+
),
|
|
883
|
+
byte_size=byte_size,
|
|
884
|
+
expanded_byte_size=expanded_byte_size,
|
|
885
|
+
sha256=sha256,
|
|
886
|
+
blocked_reason=blocked_reason,
|
|
887
|
+
findings=(finding,),
|
|
888
|
+
next_safe_actions=next_safe_actions,
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
|
|
892
|
+
def _result(
|
|
893
|
+
*,
|
|
894
|
+
status: str,
|
|
895
|
+
path: Path,
|
|
896
|
+
detected_format: DocumentFormat | None,
|
|
897
|
+
known_format: KnownDocumentFormat | None,
|
|
898
|
+
format_family: DocumentFormatFamily | None,
|
|
899
|
+
expected_format: DocumentFormat | None,
|
|
900
|
+
declared_mime_type: str | None,
|
|
901
|
+
mime_type: str | None,
|
|
902
|
+
byte_size: int,
|
|
903
|
+
expanded_byte_size: int,
|
|
904
|
+
sha256: str | None,
|
|
905
|
+
blocked_reason: BlockedReason | None,
|
|
906
|
+
findings: tuple[DocumentSecurityFinding, ...],
|
|
907
|
+
next_safe_actions: tuple[str, ...] = (),
|
|
908
|
+
) -> DocumentIntakeResult:
|
|
909
|
+
return DocumentIntakeResult(
|
|
910
|
+
tool_id="document_inspect",
|
|
911
|
+
correlation_id=_correlation_id(sha256),
|
|
912
|
+
status=ToolResultStatus(status),
|
|
913
|
+
artifact_refs=[f"sha256:{sha256}"] if sha256 is not None else [],
|
|
914
|
+
source_path=path,
|
|
915
|
+
display_name=path.name,
|
|
916
|
+
detected_format=detected_format,
|
|
917
|
+
known_format=known_format,
|
|
918
|
+
format_family=format_family,
|
|
919
|
+
expected_format=expected_format,
|
|
920
|
+
declared_mime_type=declared_mime_type,
|
|
921
|
+
mime_type=mime_type,
|
|
922
|
+
byte_size=byte_size,
|
|
923
|
+
expanded_byte_size=expanded_byte_size,
|
|
924
|
+
sha256=sha256,
|
|
925
|
+
security_state=SecurityState.accepted if status == "ok" else SecurityState.blocked,
|
|
926
|
+
blocked_reason=blocked_reason,
|
|
927
|
+
findings=list(findings),
|
|
928
|
+
next_safe_actions=list(next_safe_actions),
|
|
929
|
+
text_summary=_text_summary(
|
|
930
|
+
status=status,
|
|
931
|
+
detected_format=detected_format.value if detected_format is not None else None,
|
|
932
|
+
blocked_reason=blocked_reason.value if blocked_reason is not None else None,
|
|
933
|
+
byte_size=byte_size,
|
|
934
|
+
expanded_byte_size=expanded_byte_size,
|
|
935
|
+
),
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
def _finding(
|
|
940
|
+
*,
|
|
941
|
+
code: BlockedReason,
|
|
942
|
+
severity: SecurityFindingSeverity,
|
|
943
|
+
message: str,
|
|
944
|
+
) -> DocumentSecurityFinding:
|
|
945
|
+
return DocumentSecurityFinding(
|
|
946
|
+
finding_id=f"security-{code.value}",
|
|
947
|
+
code=code,
|
|
948
|
+
severity=severity,
|
|
949
|
+
message=message,
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
def _document_format(value: str | None) -> DocumentFormat | None:
|
|
954
|
+
if value is None:
|
|
955
|
+
return None
|
|
956
|
+
try:
|
|
957
|
+
return DocumentFormat(value)
|
|
958
|
+
except ValueError:
|
|
959
|
+
return None
|
|
960
|
+
|
|
961
|
+
|
|
962
|
+
def _known_document_format(value: str | None) -> KnownDocumentFormat | None:
|
|
963
|
+
if value is None:
|
|
964
|
+
return None
|
|
965
|
+
try:
|
|
966
|
+
return KnownDocumentFormat(value)
|
|
967
|
+
except ValueError:
|
|
968
|
+
return None
|
|
969
|
+
|
|
970
|
+
|
|
971
|
+
def _format_family(value: str | None) -> DocumentFormatFamily | None:
|
|
972
|
+
known_format = _known_document_format(value)
|
|
973
|
+
if known_format is None:
|
|
974
|
+
return None
|
|
975
|
+
return KNOWN_DOCUMENT_FORMAT_FAMILIES[known_format]
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def _format_value(value: str | object | None) -> str | None:
|
|
979
|
+
if value is None:
|
|
980
|
+
return None
|
|
981
|
+
candidate = getattr(value, "value", value)
|
|
982
|
+
return str(candidate).lower()
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
def _normalize_mime(value: str | None) -> str | None:
|
|
986
|
+
if value is None:
|
|
987
|
+
return None
|
|
988
|
+
return value.split(";", maxsplit=1)[0].strip().lower()
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
def _blocked_reason(reason: str) -> BlockedReason:
|
|
992
|
+
return BlockedReason(_BLOCKED_REASON_BY_INTERNAL.get(reason, reason))
|
|
993
|
+
|
|
994
|
+
|
|
995
|
+
def _next_safe_actions_for_known_format(known_format: str) -> tuple[str, ...]:
|
|
996
|
+
family = _format_family(known_format)
|
|
997
|
+
if family is DocumentFormatFamily.odf:
|
|
998
|
+
return (
|
|
999
|
+
"Use read-only extraction after an ODF adapter passes promotion gates.",
|
|
1000
|
+
"Convert to a promoted editable derivative only with explicit user approval.",
|
|
1001
|
+
)
|
|
1002
|
+
if family is DocumentFormatFamily.data_file:
|
|
1003
|
+
return (
|
|
1004
|
+
"Use schema or text inspection through the data-file adapter.",
|
|
1005
|
+
"Do not reinterpret the file as an editable public form.",
|
|
1006
|
+
)
|
|
1007
|
+
if family is DocumentFormatFamily.image_scan:
|
|
1008
|
+
return (
|
|
1009
|
+
"Use OCR or visual extraction only after an image-scan adapter is promoted.",
|
|
1010
|
+
"Create a separate editable derivative instead of mutating the raster source.",
|
|
1011
|
+
)
|
|
1012
|
+
if family is DocumentFormatFamily.archive:
|
|
1013
|
+
return (
|
|
1014
|
+
"Enumerate archive members only after secure archive routing is promoted.",
|
|
1015
|
+
"Do not mutate archive children in place.",
|
|
1016
|
+
)
|
|
1017
|
+
if family is DocumentFormatFamily.legacy_office:
|
|
1018
|
+
return (
|
|
1019
|
+
"Use metadata-only inspection unless an explicit conversion bridge is approved.",
|
|
1020
|
+
"Create an editable derivative instead of mutating the legacy binary source.",
|
|
1021
|
+
)
|
|
1022
|
+
if family is DocumentFormatFamily.geospatial_data:
|
|
1023
|
+
return (
|
|
1024
|
+
"Use geospatial metadata inspection or route packaged sidecars as derivatives.",
|
|
1025
|
+
"Do not reinterpret GIS or 3D geometry files as editable public forms.",
|
|
1026
|
+
)
|
|
1027
|
+
if family is DocumentFormatFamily.media_asset:
|
|
1028
|
+
return (
|
|
1029
|
+
"Use media metadata or transcription extraction only after a local adapter "
|
|
1030
|
+
"is approved.",
|
|
1031
|
+
"Create a separate document derivative for written content.",
|
|
1032
|
+
)
|
|
1033
|
+
if family is DocumentFormatFamily.code_file:
|
|
1034
|
+
return (
|
|
1035
|
+
"Use read-only source inspection for context.",
|
|
1036
|
+
"Do not mutate code artifacts through the public-document writer.",
|
|
1037
|
+
)
|
|
1038
|
+
return ("Use a promoted format adapter or request explicit conversion to a derivative.",)
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
def _correlation_id(sha256: str | None) -> str:
|
|
1042
|
+
suffix = sha256[:12] if sha256 is not None else "unavailable"
|
|
1043
|
+
return f"document-intake-{suffix}"
|
|
1044
|
+
|
|
1045
|
+
|
|
1046
|
+
def _text_summary(
|
|
1047
|
+
*,
|
|
1048
|
+
status: str,
|
|
1049
|
+
detected_format: str | None,
|
|
1050
|
+
blocked_reason: str | None,
|
|
1051
|
+
byte_size: int,
|
|
1052
|
+
expanded_byte_size: int,
|
|
1053
|
+
) -> str:
|
|
1054
|
+
if status == "ok":
|
|
1055
|
+
return (
|
|
1056
|
+
f"Document intake accepted {detected_format} artifact "
|
|
1057
|
+
f"({byte_size} raw bytes, {expanded_byte_size} expanded bytes)."
|
|
1058
|
+
)
|
|
1059
|
+
return f"Document intake blocked: {blocked_reason}."
|
|
1060
|
+
|
|
1061
|
+
|
|
1062
|
+
def _reason_message(reason: str) -> str:
|
|
1063
|
+
return {
|
|
1064
|
+
"corrupt_package": "Document package is corrupt or unreadable.",
|
|
1065
|
+
"unsupported_compression": "Document package uses unsupported compression.",
|
|
1066
|
+
"encrypted_package": "Encrypted document package members are blocked.",
|
|
1067
|
+
"zip_path_traversal": "Document package contains unsafe member paths.",
|
|
1068
|
+
"zip_expansion_limit": "Document package expands beyond the intake policy.",
|
|
1069
|
+
"zip_entry_limit": "Document package contains too many entries.",
|
|
1070
|
+
"nested_package": "Nested document packages are blocked at intake.",
|
|
1071
|
+
"active_content": "Document package contains macros or active content.",
|
|
1072
|
+
"external_link": "Document package contains external relationship targets.",
|
|
1073
|
+
}.get(reason, "Document failed intake security validation.")
|
|
1074
|
+
|
|
1075
|
+
|
|
1076
|
+
class PathBytes(io.BytesIO):
|
|
1077
|
+
"""BytesIO subclass with a stable name for ZipFile diagnostics."""
|
|
1078
|
+
|
|
1079
|
+
name = "<document-intake-bytes>"
|