ummaya 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -3
- package/bin/ummaya +10 -1
- package/npm-shrinkwrap.json +253 -2
- package/package.json +5 -1
- package/prompts/manifest.yaml +2 -2
- package/prompts/session_guidance_v1.md +3 -1
- package/prompts/system_v1.md +9 -7
- package/pyproject.toml +26 -7
- package/specs/2803-document-production-hardening/contracts/document-tools.schema.json +1043 -0
- package/src/ummaya/_canonical/__init__.py +2 -0
- package/src/ummaya/context/builder.py +17 -11
- package/src/ummaya/engine/engine.py +30 -113
- package/src/ummaya/engine/query.py +20 -0
- package/src/ummaya/evidence/__init__.py +44 -0
- package/src/ummaya/evidence/__main__.py +7 -0
- package/src/ummaya/evidence/dataset_contract.py +193 -0
- package/src/ummaya/evidence/document_authoring_cases.py +33 -0
- package/src/ummaya/evidence/document_harness.py +313 -0
- package/src/ummaya/evidence/document_viewer_ux.py +391 -0
- package/src/ummaya/evidence/gates.py +70 -0
- package/src/ummaya/evidence/json_types.py +20 -0
- package/src/ummaya/evidence/models.py +145 -0
- package/src/ummaya/evidence/output_payload.py +89 -0
- package/src/ummaya/evidence/payload_documents.py +233 -0
- package/src/ummaya/evidence/route_contracts.py +224 -0
- package/src/ummaya/evidence/route_helpers.py +150 -0
- package/src/ummaya/evidence/runner.py +177 -0
- package/src/ummaya/evidence/source_provenance.py +246 -0
- package/src/ummaya/evidence/source_provenance_redaction.py +176 -0
- package/src/ummaya/evidence/task_registry.py +264 -0
- package/src/ummaya/evidence/tool_layer.py +39 -0
- package/src/ummaya/evidence/tool_layer_models.py +151 -0
- package/src/ummaya/ipc/adapter_manifest_emitter.py +26 -10
- package/src/ummaya/ipc/document_intent_normalization.py +185 -0
- package/src/ummaya/ipc/frame_schema.py +52 -5
- package/src/ummaya/ipc/route_diagnostics.py +73 -0
- package/src/ummaya/ipc/stdio.py +2282 -417
- package/src/ummaya/llm/client.py +234 -59
- package/src/ummaya/llm/config.py +8 -3
- package/src/ummaya/llm/reasoning.py +84 -0
- package/src/ummaya/primitives/__init__.py +6 -2
- package/src/ummaya/primitives/delegation.py +1 -1
- package/src/ummaya/primitives/document.py +28 -0
- package/src/ummaya/settings.py +0 -3
- package/src/ummaya/tools/discovery_bridge.py +34 -2
- package/src/ummaya/tools/documents/__init__.py +297 -0
- package/src/ummaya/tools/documents/adapter_registry.py +487 -0
- package/src/ummaya/tools/documents/archive_container_probe.py +167 -0
- package/src/ummaya/tools/documents/artifact_store.py +454 -0
- package/src/ummaya/tools/documents/authoring.py +283 -0
- package/src/ummaya/tools/documents/baselines.py +114 -0
- package/src/ummaya/tools/documents/capability.py +331 -0
- package/src/ummaya/tools/documents/contracts.py +112 -0
- package/src/ummaya/tools/documents/conversion.py +521 -0
- package/src/ummaya/tools/documents/diff.py +275 -0
- package/src/ummaya/tools/documents/engines.py +163 -0
- package/src/ummaya/tools/documents/evaluation.py +291 -0
- package/src/ummaya/tools/documents/explicit_values.py +108 -0
- package/src/ummaya/tools/documents/fixtures.py +174 -0
- package/src/ummaya/tools/documents/format_completion_audit.py +471 -0
- package/src/ummaya/tools/documents/formats/__init__.py +2 -0
- package/src/ummaya/tools/documents/formats/archive.py +528 -0
- package/src/ummaya/tools/documents/formats/base.py +41 -0
- package/src/ummaya/tools/documents/formats/code_file.py +211 -0
- package/src/ummaya/tools/documents/formats/data_file.py +272 -0
- package/src/ummaya/tools/documents/formats/hwp.py +284 -0
- package/src/ummaya/tools/documents/formats/hwpx.py +1837 -0
- package/src/ummaya/tools/documents/formats/odf.py +435 -0
- package/src/ummaya/tools/documents/formats/ooxml.py +1030 -0
- package/src/ummaya/tools/documents/formats/passive.py +766 -0
- package/src/ummaya/tools/documents/formats/pdf.py +702 -0
- package/src/ummaya/tools/documents/formats/text_web.py +268 -0
- package/src/ummaya/tools/documents/hwp_conversion_probe.py +178 -0
- package/src/ummaya/tools/documents/hwp_direct_candidate.py +141 -0
- package/src/ummaya/tools/documents/inspection.py +289 -0
- package/src/ummaya/tools/documents/intake.py +1079 -0
- package/src/ummaya/tools/documents/legacy_office_promotion_probe.py +366 -0
- package/src/ummaya/tools/documents/models.py +1598 -0
- package/src/ummaya/tools/documents/odf_promotion_probe.py +167 -0
- package/src/ummaya/tools/documents/orchestrator.py +96 -0
- package/src/ummaya/tools/documents/passive_capability_probe.py +251 -0
- package/src/ummaya/tools/documents/patch.py +170 -0
- package/src/ummaya/tools/documents/pdfa_conformance.py +284 -0
- package/src/ummaya/tools/documents/pdfa_promotion_probe.py +198 -0
- package/src/ummaya/tools/documents/permissions.py +110 -0
- package/src/ummaya/tools/documents/planner.py +616 -0
- package/src/ummaya/tools/documents/registry.py +2733 -0
- package/src/ummaya/tools/documents/render.py +978 -0
- package/src/ummaya/tools/documents/render_comparison.py +113 -0
- package/src/ummaya/tools/documents/render_comparison_models.py +74 -0
- package/src/ummaya/tools/documents/render_comparison_regions.py +73 -0
- package/src/ummaya/tools/documents/render_comparison_style.py +161 -0
- package/src/ummaya/tools/documents/reread.py +157 -0
- package/src/ummaya/tools/documents/runtime_authoring.py +244 -0
- package/src/ummaya/tools/documents/runtime_authoring_bundle.py +76 -0
- package/src/ummaya/tools/documents/scorecard.py +184 -0
- package/src/ummaya/tools/documents/socratic_planner.py +193 -0
- package/src/ummaya/tools/documents/style.py +48 -0
- package/src/ummaya/tools/documents/tool_defs.py +523 -0
- package/src/ummaya/tools/documents/validate.py +347 -0
- package/src/ummaya/tools/executor.py +61 -12
- package/src/ummaya/tools/geocoding/kakao_client.py +1 -2
- package/src/ummaya/tools/kma/apihub_catalog.py +984 -1
- package/src/ummaya/tools/kma/apihub_structured_adapter.py +86 -6
- package/src/ummaya/tools/kma/apihub_url_adapter.py +593 -0
- package/src/ummaya/tools/kma/apihub_url_catalog.py +296 -0
- package/src/ummaya/tools/live_proxy.py +0 -3
- package/src/ummaya/tools/location_adapters.py +8 -6
- package/src/ummaya/tools/manifest_metadata.py +16 -3
- package/src/ummaya/tools/models.py +5 -1
- package/src/ummaya/tools/mvp_surface.py +2 -2
- package/src/ummaya/tools/nmc/emergency_search.py +8 -6
- package/src/ummaya/tools/register_all.py +17 -0
- package/src/ummaya/tools/registry.py +10 -1
- package/src/ummaya/tools/resolve_location.py +4 -4
- package/src/ummaya/tools/routing/__init__.py +59 -0
- package/src/ummaya/tools/routing/builder.py +105 -0
- package/src/ummaya/tools/routing/cards.py +29 -0
- package/src/ummaya/tools/routing/decision_service.py +534 -0
- package/src/ummaya/tools/routing/decision_types.py +74 -0
- package/src/ummaya/tools/routing/feasibility.py +122 -0
- package/src/ummaya/tools/routing/intent.py +17 -0
- package/src/ummaya/tools/routing/intent_extractor.py +207 -0
- package/src/ummaya/tools/routing/intent_patterns.py +160 -0
- package/src/ummaya/tools/routing/intent_public_data.py +150 -0
- package/src/ummaya/tools/routing/intent_types.py +48 -0
- package/src/ummaya/tools/routing/lint.py +78 -0
- package/src/ummaya/tools/routing/metadata.py +174 -0
- package/src/ummaya/tools/routing/projection.py +340 -0
- package/src/ummaya/tools/routing/retrieval_policy.py +629 -0
- package/src/ummaya/tools/routing/schema.py +81 -0
- package/src/ummaya/tools/routing/types.py +96 -0
- package/src/ummaya/tools/routing_index.py +2 -2
- package/src/ummaya/tools/search.py +40 -106
- package/src/ummaya/tools/verified_data_go_kr/_manifest.py +115 -25
- package/src/ummaya/tools/verified_data_go_kr/airkorea_air_quality.py +109 -4
- package/src/ummaya/tools/verified_data_go_kr/nmc_aed_site.py +108 -2
- package/src/ummaya/tools/verified_data_go_kr/pps_bid_public_info.py +174 -9
- package/src/ummaya/tools/verified_data_go_kr/tago_bus_arrival.py +66 -3
- package/src/ummaya/tools/verified_data_go_kr/tago_bus_location.py +12 -2
- package/src/ummaya/tools/verified_data_go_kr/tago_bus_route.py +8 -2
- package/src/ummaya/tools/verified_data_go_kr/tago_bus_route_station.py +114 -0
- package/src/ummaya/tools/verified_data_go_kr/tago_bus_station.py +14 -3
- package/src/ummaya/tools/verify_canonical_map.py +21 -0
- package/tests/fixtures/documents/public_forms/baselines.yaml +113 -0
- package/tui/package.json +1 -2
- package/tui/src/.cc-byte-identical-whitelist.yaml +266 -0
- package/tui/src/QueryEngine.ts +12 -4
- package/tui/src/bridge/inboundAttachments.ts +3 -3
- package/tui/src/cli/handlers/auth.ts +4 -13
- package/tui/src/cli/handlers/mcp.tsx +3 -3
- package/tui/src/cli/print.ts +69 -18
- package/tui/src/cli/update.ts +13 -13
- package/tui/src/commands/copy/index.ts +1 -1
- package/tui/src/commands/cost/cost.ts +2 -2
- package/tui/src/commands/init-verifiers.ts +5 -5
- package/tui/src/commands/init.ts +30 -30
- package/tui/src/commands/insights.ts +44 -44
- package/tui/src/commands/install-github-app/install-github-app.tsx +2 -2
- package/tui/src/commands/install-github-app/setupGitHubActions.ts +3 -3
- package/tui/src/commands/install-github-app/types.ts +8 -30
- package/tui/src/commands/install.tsx +5 -5
- package/tui/src/commands/mcp/addCommand.ts +5 -5
- package/tui/src/commands/mcp/xaaIdpCommand.ts +2 -2
- package/tui/src/commands/plugin/ManageMarketplaces.tsx +2 -2
- package/tui/src/commands/plugin/types.ts +6 -28
- package/tui/src/commands/plugin/unifiedTypes.ts +4 -26
- package/tui/src/commands/reasoning/index.ts +13 -0
- package/tui/src/commands/reasoning/reasoning.tsx +177 -0
- package/tui/src/commands/rename/generateSessionName.ts +1 -1
- package/tui/src/commands/thinkback/thinkback.tsx +3 -3
- package/tui/src/commands.ts +2 -0
- package/tui/src/components/Feedback.tsx +1 -1
- package/tui/src/components/LogoV2/EmergencyTip.tsx +11 -2
- package/tui/src/components/LogoV2/WelcomeV2.tsx +1 -3
- package/tui/src/components/Messages.tsx +2 -1
- package/tui/src/components/ScrollKeybindingHandler.tsx +6 -6
- package/tui/src/components/Spinner/types.ts +6 -28
- package/tui/src/components/Spinner.tsx +2 -2
- package/tui/src/components/agents/generateAgent.ts +1 -1
- package/tui/src/components/agents/new-agent-creation/types.ts +4 -26
- package/tui/src/components/config/EnvSecretIsolatedEditor.tsx +1 -1
- package/tui/src/components/design-system/LoadingState.tsx +2 -2
- package/tui/src/components/mcp/types.ts +16 -38
- package/tui/src/components/messages/AssistantToolUseMessage.tsx +3 -2
- package/tui/src/components/messages/UserCrossSessionMessage.ts +16 -4
- package/tui/src/components/messages/UserForkBoilerplateMessage.ts +16 -4
- package/tui/src/components/messages/UserGitHubWebhookMessage.ts +16 -4
- package/tui/src/components/messages/UserToolResultMessage/utils.tsx +3 -2
- package/tui/src/components/permissions/MonitorPermissionRequest/MonitorPermissionRequest.ts +9 -4
- package/tui/src/components/permissions/ReviewArtifactPermissionRequest/ReviewArtifactPermissionRequest.ts +9 -4
- package/tui/src/components/primitive/DocumentSocraticReviewBlock.tsx +129 -0
- package/tui/src/components/primitive/DocumentToolResultCard.tsx +224 -0
- package/tui/src/components/primitive/documentSocraticReview.ts +215 -0
- package/tui/src/components/primitive/index.tsx +43 -1
- package/tui/src/components/primitive/types.ts +137 -0
- package/tui/src/components/ui/option.ts +4 -26
- package/tui/src/constants/common.ts +0 -2
- package/tui/src/constants/prompts.ts +4 -3
- package/tui/src/constants/querySource.ts +4 -26
- package/tui/src/entrypoints/sdk/controlTypes.ts +26 -48
- package/tui/src/entrypoints/sdk/coreTypes.generated.ts +3 -25
- package/tui/src/entrypoints/sdk/runtimeTypes.ts +38 -60
- package/tui/src/entrypoints/sdk/sdkUtilityTypes.ts +4 -26
- package/tui/src/entrypoints/sdk/settingsTypes.generated.ts +3 -25
- package/tui/src/entrypoints/sdk/toolTypes.ts +3 -25
- package/tui/src/hooks/toolPermission/handlers/interactiveHandler.ts +10 -0
- package/tui/src/hooks/useApiKeyVerification.ts +1 -1
- package/tui/src/hooks/useVirtualScroll.ts +1 -1
- package/tui/src/ink/ink.tsx +33 -14
- package/tui/src/ink/reconciler.ts +2 -3
- package/tui/src/ink/render-to-screen.ts +30 -10
- package/tui/src/ipc/bridge.ts +62 -15
- package/tui/src/ipc/bridgeSingleton.ts +5 -1
- package/tui/src/ipc/codec.ts +29 -3
- package/tui/src/ipc/frames.generated.ts +407 -312
- package/tui/src/ipc/llmClient.ts +279 -76
- package/tui/src/ipc/llmTypes.ts +16 -1
- package/tui/src/ipc/schema/frame.schema.json +1 -3475
- package/tui/src/keybindings/defaultBindings.ts +4 -0
- package/tui/src/main.tsx +32 -11
- package/tui/src/native-ts/file-index/index.ts +33 -3
- package/tui/src/observability/surface.ts +2 -2
- package/tui/src/probes/toolRegistryProbe.tsx +3 -1
- package/tui/src/projectOnboardingState.ts +7 -6
- package/tui/src/query/chatMessageTypes.ts +18 -0
- package/tui/src/query/chatMessagesBuilder.ts +1 -1
- package/tui/src/query/deps.ts +1 -1
- package/tui/src/query/messageGuards.ts +106 -0
- package/tui/src/query/publicDataTerminalRepair.ts +384 -0
- package/tui/src/query/run.ts +1075 -0
- package/tui/src/query/supportBoundary.ts +168 -0
- package/tui/src/query/toolResultErrors.ts +103 -0
- package/tui/src/query/toolRunner.ts +687 -0
- package/tui/src/query/unavailableToolRepair.ts +118 -0
- package/tui/src/query.ts +9 -1721
- package/tui/src/screens/REPL.tsx +42 -31
- package/tui/src/services/api/adapterManifest.ts +4 -0
- package/tui/src/services/api/backendChat/events.ts +117 -0
- package/tui/src/services/api/backendChat/finalMessage.ts +40 -0
- package/tui/src/services/api/backendChat/frame.ts +9 -0
- package/tui/src/services/api/backendChat/streaming.ts +430 -0
- package/tui/src/services/api/backendChat/types.ts +62 -0
- package/tui/src/services/api/backendChat.ts +1 -0
- package/tui/src/services/api/client.ts +98 -14
- package/tui/src/services/api/errorUtils.ts +5 -5
- package/tui/src/services/api/errors.ts +1 -1
- package/tui/src/services/api/logging.ts +1 -1
- package/tui/src/services/api/ummaya/evidence.ts +194 -0
- package/tui/src/services/api/ummaya/messages.ts +255 -0
- package/tui/src/services/api/ummaya/nonStreaming.ts +66 -0
- package/tui/src/services/api/ummaya/provider.ts +200 -0
- package/tui/src/services/api/ummaya/reasoning.ts +24 -0
- package/tui/src/services/api/ummaya/request.ts +200 -0
- package/tui/src/services/api/ummaya/selectionContext.ts +240 -0
- package/tui/src/services/api/ummaya/streaming.ts +365 -0
- package/tui/src/services/api/ummaya/streamingPayload.ts +129 -0
- package/tui/src/services/api/ummaya/streamingReader.ts +40 -0
- package/tui/src/services/api/ummaya/toolSelection.ts +217 -0
- package/tui/src/services/api/ummaya/types.ts +110 -0
- package/tui/src/services/api/ummaya/usage.ts +30 -0
- package/tui/src/services/api/ummaya.ts +26 -364
- package/tui/src/services/api/withRetry.ts +1 -1
- package/tui/src/services/awaySummary.ts +2 -2
- package/tui/src/services/claudeAiLimits.ts +1 -1
- package/tui/src/services/compact/autoCompact.ts +1 -1
- package/tui/src/services/compact/compact.ts +1 -1
- package/tui/src/services/lsp/types.ts +8 -30
- package/tui/src/services/tips/types.ts +6 -28
- package/tui/src/services/tokenEstimation.ts +1 -1
- package/tui/src/services/toolRegistry/bootGuard.ts +5 -5
- package/tui/src/services/toolUseSummary/toolUseSummaryGenerator.ts +1 -1
- package/tui/src/services/tools/toolExecution.ts +94 -1
- package/tui/src/skills/bundled/stuck.ts +12 -12
- package/tui/src/state/AppStateStore.ts +7 -0
- package/tui/src/store/pendingPermissionSlot.ts +1 -1
- package/tui/src/store/session-store.ts +10 -36
- package/tui/src/stubs/any-stub.ts +15 -10
- package/tui/src/stubs/color-diff-napi.ts +37 -23
- package/tui/src/stubs/globals.d.ts +3 -3
- package/tui/src/stubs/macro-preload.ts +23 -12
- package/tui/src/tools/AdapterTool/AdapterTool.ts +1239 -163
- package/tui/src/tools/AdapterTool/routeDiagnostics.ts +75 -0
- package/tui/src/tools/AgentTool/AgentTool.tsx +84 -1371
- package/tui/src/tools/AgentTool/agentToolHandoff.ts +114 -0
- package/tui/src/tools/AgentTool/agentToolPartialResult.ts +16 -0
- package/tui/src/tools/AgentTool/agentToolProgress.ts +32 -0
- package/tui/src/tools/AgentTool/agentToolResolver.ts +161 -0
- package/tui/src/tools/AgentTool/agentToolResult.ts +163 -0
- package/tui/src/tools/AgentTool/agentToolUtils.ts +14 -686
- package/tui/src/tools/AgentTool/asyncAgentLifecycle.ts +208 -0
- package/tui/src/tools/AgentTool/asyncLifecycle.ts +153 -0
- package/tui/src/tools/AgentTool/backgroundedCompletion.ts +126 -0
- package/tui/src/tools/AgentTool/backgroundedLifecycle.ts +174 -0
- package/tui/src/tools/AgentTool/foregroundBackground.ts +83 -0
- package/tui/src/tools/AgentTool/foregroundDrain.tsx +133 -0
- package/tui/src/tools/AgentTool/foregroundFinalize.ts +98 -0
- package/tui/src/tools/AgentTool/foregroundLifecycle.tsx +237 -0
- package/tui/src/tools/AgentTool/foregroundProgress.tsx +169 -0
- package/tui/src/tools/AgentTool/foregroundTask.ts +89 -0
- package/tui/src/tools/AgentTool/forkSubagent.ts +1 -12
- package/tui/src/tools/AgentTool/forkSubagentGate.ts +34 -0
- package/tui/src/tools/AgentTool/launchRouting.ts +203 -0
- package/tui/src/tools/AgentTool/lifecycle.ts +244 -0
- package/tui/src/tools/AgentTool/mcpRouting.ts +73 -0
- package/tui/src/tools/AgentTool/orchestrationSupport.ts +70 -0
- package/tui/src/tools/AgentTool/permissions.ts +39 -0
- package/tui/src/tools/AgentTool/promptSetup.ts +181 -0
- package/tui/src/tools/AgentTool/remoteRouting.ts +62 -0
- package/tui/src/tools/AgentTool/resultMapping.ts +116 -0
- package/tui/src/tools/AgentTool/resumeAgent.ts +39 -107
- package/tui/src/tools/AgentTool/resumeAgentHelpers.ts +140 -0
- package/tui/src/tools/AgentTool/runAgent.ts +1 -1
- package/tui/src/tools/AgentTool/runtimeConfig.ts +57 -0
- package/tui/src/tools/AgentTool/schemas.ts +196 -0
- package/tui/src/tools/AgentTool/sourceVerificationPropagation.ts +263 -0
- package/tui/src/tools/AgentTool/worktreeLifecycle.ts +105 -0
- package/tui/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx +174 -202
- package/tui/src/tools/BashTool/BashTool.tsx +71 -1072
- package/tui/src/tools/BashTool/bashCommandHelpers.ts +12 -12
- package/tui/src/tools/BashTool/bashPermissions/astPreflight.ts +173 -0
- package/tui/src/tools/BashTool/bashPermissions/classifierChecks.ts +199 -0
- package/tui/src/tools/BashTool/bashPermissions/compoundGuards.ts +53 -0
- package/tui/src/tools/BashTool/bashPermissions/constants.ts +99 -0
- package/tui/src/tools/BashTool/bashPermissions/index.ts +38 -0
- package/tui/src/tools/BashTool/bashPermissions/legacyMisparsing.ts +62 -0
- package/tui/src/tools/BashTool/bashPermissions/main.ts +135 -0
- package/tui/src/tools/BashTool/bashPermissions/normalizedCommands.ts +33 -0
- package/tui/src/tools/BashTool/bashPermissions/operatorFlow.ts +98 -0
- package/tui/src/tools/BashTool/bashPermissions/permissionChecks.ts +200 -0
- package/tui/src/tools/BashTool/bashPermissions/prefixSuggestions.ts +88 -0
- package/tui/src/tools/BashTool/bashPermissions/promptClassifierRules.ts +125 -0
- package/tui/src/tools/BashTool/bashPermissions/ruleDelegates.ts +19 -0
- package/tui/src/tools/BashTool/bashPermissions/ruleMatching.ts +145 -0
- package/tui/src/tools/BashTool/bashPermissions/sandboxAutoAllow.ts +75 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandFlow.ts +205 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandGuards.ts +73 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandResultHelpers.ts +116 -0
- package/tui/src/tools/BashTool/bashPermissions/types.ts +26 -0
- package/tui/src/tools/BashTool/bashPermissions/wrapperStripping.ts +139 -0
- package/tui/src/tools/BashTool/bashPermissions.ts +26 -2621
- package/tui/src/tools/BashTool/call.ts +202 -0
- package/tui/src/tools/BashTool/callLoader.ts +35 -0
- package/tui/src/tools/BashTool/commandClassification.ts +151 -0
- package/tui/src/tools/BashTool/commandClassificationLoader.ts +40 -0
- package/tui/src/tools/BashTool/cwdReset.ts +33 -0
- package/tui/src/tools/BashTool/lineTruncation.ts +11 -0
- package/tui/src/tools/BashTool/modeValidation.ts +13 -1
- package/tui/src/tools/BashTool/outputPersistence.ts +42 -0
- package/tui/src/tools/BashTool/permissionClassification.ts +66 -0
- package/tui/src/tools/BashTool/permissionLoader.ts +44 -0
- package/tui/src/tools/BashTool/resultLoader.ts +29 -0
- package/tui/src/tools/BashTool/resultMapping.ts +83 -0
- package/tui/src/tools/BashTool/sandboxPolicy.ts +79 -0
- package/tui/src/tools/BashTool/schemas.ts +65 -0
- package/tui/src/tools/BashTool/sedEditExecution.ts +59 -0
- package/tui/src/tools/BashTool/shellExecution.tsx +245 -0
- package/tui/src/tools/BashTool/shellOutputUtils.ts +85 -0
- package/tui/src/tools/BashTool/shellPermissionGauntlet.ts +97 -0
- package/tui/src/tools/BashTool/uiLoader.ts +37 -0
- package/tui/src/tools/BriefTool/upload.ts +1 -1
- package/tui/src/tools/CalculatorTool/parser.ts +2 -2
- package/tui/src/tools/DocumentPrimitive/DocumentPrimitive.ts +262 -0
- package/tui/src/tools/DocumentPrimitive/dispatchNormalization.ts +270 -0
- package/tui/src/tools/DocumentPrimitive/documentDestinationPath.ts +18 -0
- package/tui/src/tools/DocumentPrimitive/documentMutationGuard.ts +22 -0
- package/tui/src/tools/DocumentPrimitive/documentPatchNormalization.ts +248 -0
- package/tui/src/tools/DocumentPrimitive/documentSourceVerification.ts +245 -0
- package/tui/src/tools/DocumentPrimitive/documentSourceVerificationFields.ts +103 -0
- package/tui/src/tools/DocumentPrimitive/modelVisibleOutput.ts +40 -0
- package/tui/src/tools/DocumentPrimitive/prompt.ts +35 -0
- package/tui/src/tools/FileEditTool/FileEditTool.ts +9 -507
- package/tui/src/tools/FileEditTool/call.ts +228 -0
- package/tui/src/tools/FileEditTool/validateInput.ts +196 -0
- package/tui/src/tools/FileReadTool/imageProcessor.ts +13 -0
- package/tui/src/tools/FileWriteTool/FileWriteTool.ts +7 -300
- package/tui/src/tools/FileWriteTool/call.ts +223 -0
- package/tui/src/tools/FileWriteTool/validateInput.ts +80 -0
- package/tui/src/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts +19 -3
- package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +48 -29
- package/tui/src/tools/LookupPrimitive/prompt.ts +6 -7
- package/tui/src/tools/MCPTool/trustPolicy.ts +118 -0
- package/tui/src/tools/McpAuthTool/McpAuthTool.ts +21 -3
- package/tui/src/tools/NotebookEditTool/NotebookEditTool.ts +7 -326
- package/tui/src/tools/NotebookEditTool/call.ts +254 -0
- package/tui/src/tools/NotebookEditTool/notebookModel.ts +51 -0
- package/tui/src/tools/NotebookEditTool/validateInput.ts +142 -0
- package/tui/src/tools/PowerShellTool/PowerShellTool.tsx +46 -937
- package/tui/src/tools/PowerShellTool/acceptEditsCommandValidation.ts +162 -0
- package/tui/src/tools/PowerShellTool/call.ts +179 -0
- package/tui/src/tools/PowerShellTool/callLoader.ts +37 -0
- package/tui/src/tools/PowerShellTool/commandClassification.ts +86 -0
- package/tui/src/tools/PowerShellTool/modeValidation.ts +25 -332
- package/tui/src/tools/PowerShellTool/outputPersistence.ts +42 -0
- package/tui/src/tools/PowerShellTool/permissionClassification.ts +28 -0
- package/tui/src/tools/PowerShellTool/resultLoader.ts +31 -0
- package/tui/src/tools/PowerShellTool/resultMapping.ts +75 -0
- package/tui/src/tools/PowerShellTool/schemas.ts +40 -0
- package/tui/src/tools/PowerShellTool/shellExecution.tsx +258 -0
- package/tui/src/tools/PowerShellTool/symlinkModeValidation.ts +44 -0
- package/tui/src/tools/PowerShellTool/uiLoader.ts +37 -0
- package/tui/src/tools/PowerShellTool/validation.ts +39 -0
- package/tui/src/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts +19 -3
- package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +30 -19
- package/tui/src/tools/ResolveLocationPrimitive/prompt.ts +2 -6
- package/tui/src/tools/SkillTool/SkillTool.ts +2 -2
- package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +51 -18
- package/tui/src/tools/TaskCreateTool/TaskCreateTool.ts +16 -2
- package/tui/src/tools/TaskGetTool/TaskGetTool.ts +23 -3
- package/tui/src/tools/TaskListTool/TaskListTool.ts +22 -4
- package/tui/src/tools/TaskOutputTool/TaskOutputTool.tsx +46 -547
- package/tui/src/tools/TaskOutputTool/lookup.ts +216 -0
- package/tui/src/tools/TaskOutputTool/render.tsx +257 -0
- package/tui/src/tools/TaskOutputTool/schemas.ts +55 -0
- package/tui/src/tools/TaskOutputTool/serialization.ts +36 -0
- package/tui/src/tools/TaskStopTool/TaskStopTool.ts +10 -0
- package/tui/src/tools/TaskUpdateTool/TaskUpdateTool.ts +14 -364
- package/tui/src/tools/TaskUpdateTool/completion.ts +62 -0
- package/tui/src/tools/TaskUpdateTool/schemas.ts +62 -0
- package/tui/src/tools/TaskUpdateTool/serialization.ts +46 -0
- package/tui/src/tools/TaskUpdateTool/statusUpdate.ts +247 -0
- package/tui/src/tools/TodoWriteTool/TodoWriteTool.ts +21 -2
- package/tui/src/tools/ToolSearchTool/ToolSearchTool.ts +21 -302
- package/tui/src/tools/ToolSearchTool/ccSupportTools.ts +223 -0
- package/tui/src/tools/ToolSearchTool/descriptionCache.ts +50 -0
- package/tui/src/tools/ToolSearchTool/keywordSearch.ts +216 -0
- package/tui/src/tools/ToolSearchTool/prompt.ts +10 -4
- package/tui/src/tools/ToolSearchTool/resultMapping.ts +30 -0
- package/tui/src/tools/ToolSearchTool/schemas.ts +30 -0
- package/tui/src/tools/ToolSearchTool/searchPool.ts +47 -0
- package/tui/src/tools/ToolSearchTool/supportIntentHints.ts +140 -0
- package/tui/src/tools/TranslateTool/TranslateTool.ts +1 -1
- package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +27 -10
- package/tui/src/tools/WebFetchTool/WebFetchTool.ts +43 -138
- package/tui/src/tools/WebFetchTool/call.ts +227 -0
- package/tui/src/tools/WebFetchTool/resolvedAddressSafety.ts +78 -0
- package/tui/src/tools/WebFetchTool/sourceVerification.ts +204 -0
- package/tui/src/tools/WebFetchTool/types.ts +23 -0
- package/tui/src/tools/WebFetchTool/urlSafety.ts +181 -0
- package/tui/src/tools/WebFetchTool/utils.ts +1 -1
- package/tui/src/tools/WebSearchTool/UI.tsx +0 -1
- package/tui/src/tools/WebSearchTool/WebSearchTool.ts +9 -313
- package/tui/src/tools/WebSearchTool/call.ts +33 -0
- package/tui/src/tools/WebSearchTool/responseMapping.ts +190 -0
- package/tui/src/tools/WebSearchTool/resultBlock.ts +47 -0
- package/tui/src/tools/WebSearchTool/schemas.ts +47 -0
- package/tui/src/tools/WebSearchTool/toolSchema.ts +12 -0
- package/tui/src/tools/WorkspaceToolAdapter/WorkspaceToolAdapter.ts +79 -0
- package/tui/src/tools/WorkspaceToolAdapter/allowedRootPolicy.ts +85 -0
- package/tui/src/tools/WorkspaceToolAdapter/documentFormatGuards.ts +73 -0
- package/tui/src/tools/WorkspaceToolAdapter/inputNormalization.ts +105 -0
- package/tui/src/tools/WorkspaceToolAdapter/mcpExposurePolicy.ts +64 -0
- package/tui/src/tools/WorkspaceToolAdapter/toolDefFactory.ts +215 -0
- package/tui/src/tools/WorkspaceToolAdapter/toolNames.ts +6 -0
- package/tui/src/tools/WorkspaceToolAdapter/workspacePolicy.ts +15 -0
- package/tui/src/tools/_shared/citizenUserText.ts +49 -0
- package/tui/src/tools/_shared/dispatchPrimitive.ts +6 -6
- package/tui/src/tools/_shared/documentChangeToPatch.ts +125 -0
- package/tui/src/tools/_shared/documentDispatchArguments.ts +87 -0
- package/tui/src/tools/_shared/documentPrimitiveTimeout.ts +13 -0
- package/tui/src/tools/_shared/documentToolResultRender.ts +98 -0
- package/tui/src/tools/_shared/locationInputRepair.ts +112 -0
- package/tui/src/tools/_shared/pendingCallRegistry.ts +1 -6
- package/tui/src/tools/_shared/rootPrimitiveInput.ts +68 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPatterns.ts +58 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPrompt.ts +271 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentRepair.ts +452 -0
- package/tui/src/tools/_shared/toolChoiceRepair/messageAccess.ts +80 -0
- package/tui/src/tools/_shared/toolChoiceRepair/publicDataRepair.ts +92 -0
- package/tui/src/tools/_shared/toolChoiceRepair/supportRepair.ts +135 -0
- package/tui/src/tools/_shared/toolChoiceRepair.ts +61 -0
- package/tui/src/tools/shared/mockDisclaimer.ts +1 -1
- package/tui/src/tools.ts +39 -190
- package/tui/src/types/fileSuggestion.ts +4 -26
- package/tui/src/types/generated/events_mono/claude_code/v1/claude_code_internal_event.ts +186 -148
- package/tui/src/types/generated/events_mono/common/v1/auth.ts +25 -11
- package/tui/src/types/generated/events_mono/growthbook/v1/growthbook_experiment_event.ts +47 -30
- package/tui/src/types/generated/google/protobuf/timestamp.ts +21 -7
- package/tui/src/types/message.ts +80 -102
- package/tui/src/types/messageQueueTypes.ts +6 -28
- package/tui/src/types/notebook.ts +16 -38
- package/tui/src/types/statusLine.ts +4 -26
- package/tui/src/types/tools.ts +24 -46
- package/tui/src/types/utils.ts +6 -28
- package/tui/src/upstreamproxy/relay.ts +7 -3
- package/tui/src/upstreamproxy/upstreamproxy.ts +1 -1
- package/tui/src/utils/assistantMessageFactories.ts +9 -3
- package/tui/src/utils/attachments.ts +1 -1
- package/tui/src/utils/auth.ts +129 -139
- package/tui/src/utils/bash/ast.ts +23 -23
- package/tui/src/utils/bash/bashParser.ts +5 -5
- package/tui/src/utils/billing.ts +1 -1
- package/tui/src/utils/collapseReadSearch.ts +3 -3
- package/tui/src/utils/cronTasks.ts +1 -1
- package/tui/src/utils/execFileNoThrow.ts +1 -1
- package/tui/src/utils/filePersistence/types.ts +16 -38
- package/tui/src/utils/forkedAgent.ts +1 -1
- package/tui/src/utils/gracefulShutdown.ts +4 -4
- package/tui/src/utils/heapDumpService.ts +12 -8
- package/tui/src/utils/hooks/apiQueryHookHelper.ts +1 -1
- package/tui/src/utils/hooks/execPromptHook.ts +1 -1
- package/tui/src/utils/hooks/skillImprovement.ts +1 -1
- package/tui/src/utils/kExaoneReasoning.ts +138 -0
- package/tui/src/utils/mcp/dateTimeParser.ts +1 -1
- package/tui/src/utils/messages.ts +19 -0
- package/tui/src/utils/migrateSessions.ts +3 -3
- package/tui/src/utils/model/model.ts +6 -6
- package/tui/src/utils/multiToolLayout.ts +13 -0
- package/tui/src/utils/permissions/yoloClassifier.ts +1 -1
- package/tui/src/utils/plugins/headlessPluginInstall.ts +1 -1
- package/tui/src/utils/plugins/mcpPluginIntegration.ts +1 -1
- package/tui/src/utils/plugins/mcpbHandler.ts +1 -1
- package/tui/src/utils/plugins/pluginLoader.ts +8 -8
- package/tui/src/utils/processUserInput/processSlashCommand.tsx +2 -2
- package/tui/src/utils/processUserInput/processUserInput.ts +26 -0
- package/tui/src/utils/protectedNamespace.ts +5 -3
- package/tui/src/utils/rawJsonToolCall.ts +242 -0
- package/tui/src/utils/ripgrep.ts +16 -7
- package/tui/src/utils/sessionTitle.ts +1 -1
- package/tui/src/utils/settings/applySettingsChange.ts +4 -0
- package/tui/src/utils/settings/permissionValidation.ts +14 -2
- package/tui/src/utils/settings/types.ts +9 -3
- package/tui/src/utils/shell/prefix.ts +1 -1
- package/tui/src/utils/sideQuery.ts +1 -1
- package/tui/src/utils/stats.ts +1 -1
- package/tui/src/utils/systemThemeWatcher.ts +13 -3
- package/tui/src/utils/teleport.tsx +1 -1
- package/uv.lock +394 -22
- package/assets/copilot-gate-logo.svg +0 -58
- package/assets/govon-logo.svg +0 -40
- package/src/ummaya/eval/__init__.py +0 -5
- package/src/ummaya/eval/retrieval.py +0 -713
- package/tui/src/services/api/claude.ts +0 -3510
- package/tui/src/utils/messageStream.ts +0 -186
|
@@ -0,0 +1,702 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
"""PDF adapter and AcroForm-only mutation boundary."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import io
|
|
7
|
+
import logging
|
|
8
|
+
from collections.abc import Iterator
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from decimal import Decimal
|
|
12
|
+
from enum import StrEnum
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
15
|
+
|
|
16
|
+
from pypdf import PdfReader, PdfWriter
|
|
17
|
+
from pypdf.generic import DictionaryObject, NameObject
|
|
18
|
+
|
|
19
|
+
from ummaya.tools.documents.engines import (
|
|
20
|
+
DocumentInspectionEngine,
|
|
21
|
+
DocumentMutationBlockedError,
|
|
22
|
+
DocumentMutationEngine,
|
|
23
|
+
)
|
|
24
|
+
from ummaya.tools.documents.models import (
|
|
25
|
+
BlockedReason,
|
|
26
|
+
DocumentExtraction,
|
|
27
|
+
DocumentFormat,
|
|
28
|
+
DocumentPatch,
|
|
29
|
+
FieldType,
|
|
30
|
+
FormField,
|
|
31
|
+
ImageReference,
|
|
32
|
+
KnownDocumentFormat,
|
|
33
|
+
MetadataValue,
|
|
34
|
+
OperationType,
|
|
35
|
+
ParagraphBlock,
|
|
36
|
+
ScalarValue,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from ummaya.tools.documents.tool_defs import DocumentFieldPatch
|
|
41
|
+
|
|
42
|
+
PDF_CANDIDATE_ENGINES: tuple[str, ...] = (
|
|
43
|
+
"pypdf-acroform",
|
|
44
|
+
"pypdfium2-render-oracle",
|
|
45
|
+
"qpdf-structure-oracle",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
_PDF_FIELD_PREFIX = "/acroform/fields/"
|
|
49
|
+
_PDF_KOREAN_FORM_FONT_RESOURCE = "/UMMAYA_KR"
|
|
50
|
+
_PDF_KOREAN_FORM_FONT_SIZE = 11.0
|
|
51
|
+
logger = logging.getLogger(__name__)
|
|
52
|
+
_PYPDF_APPEARANCE_LOGGER = "pypdf.generic._appearance_stream"
|
|
53
|
+
_PYPDF_UNSUPPORTED_FONT_WARNING = "characters not supported by font encoding"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class PdfDocumentKind(StrEnum):
|
|
57
|
+
"""PDF structure class used by the AcroForm-only promotion gate."""
|
|
58
|
+
|
|
59
|
+
acroform = "acroform"
|
|
60
|
+
static = "static"
|
|
61
|
+
scanned = "scanned"
|
|
62
|
+
xfa = "xfa"
|
|
63
|
+
encrypted = "encrypted"
|
|
64
|
+
signed = "signed"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass(frozen=True)
|
|
68
|
+
class PdfStructureProfile:
|
|
69
|
+
"""Local PDF structure decision used before any mutation is attempted."""
|
|
70
|
+
|
|
71
|
+
kind: PdfDocumentKind
|
|
72
|
+
page_count: int
|
|
73
|
+
field_count: int
|
|
74
|
+
text_length: int
|
|
75
|
+
image_count: int
|
|
76
|
+
field_names: tuple[str, ...] = ()
|
|
77
|
+
blocked_reason: BlockedReason | None = None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class PdfDocumentAdapter:
|
|
81
|
+
"""PDF adapter boundary backed by pypdf for AcroForm work."""
|
|
82
|
+
|
|
83
|
+
adapter_id: str = "pypdf-acroform-adapter"
|
|
84
|
+
known_formats: tuple[KnownDocumentFormat, ...] = (
|
|
85
|
+
KnownDocumentFormat.pdf,
|
|
86
|
+
KnownDocumentFormat.pdfa,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
def __init__(
|
|
90
|
+
self,
|
|
91
|
+
inspection_engine: DocumentInspectionEngine | None = None,
|
|
92
|
+
*,
|
|
93
|
+
promote_default: bool = True,
|
|
94
|
+
) -> None:
|
|
95
|
+
if inspection_engine is None and promote_default:
|
|
96
|
+
inspection_engine = PypdfAcroFormEngine()
|
|
97
|
+
self.promoted_formats: tuple[DocumentFormat, ...] = (
|
|
98
|
+
(DocumentFormat.pdf,) if inspection_engine is not None else ()
|
|
99
|
+
)
|
|
100
|
+
self._inspection_engine = (
|
|
101
|
+
validate_pdf_engine(inspection_engine) if inspection_engine is not None else None
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def engine_id(self) -> str:
|
|
106
|
+
"""Return the wrapped PDF engine id for diagnostics."""
|
|
107
|
+
if self._inspection_engine is None:
|
|
108
|
+
return self.adapter_id
|
|
109
|
+
return self._inspection_engine.engine_id
|
|
110
|
+
|
|
111
|
+
def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
|
|
112
|
+
"""Inspect a PDF through the promoted engine or known-only classifier."""
|
|
113
|
+
if self._inspection_engine is None:
|
|
114
|
+
profile = classify_pdf_document(path)
|
|
115
|
+
return _extraction_for_profile(
|
|
116
|
+
artifact_id=artifact_id,
|
|
117
|
+
profile=profile,
|
|
118
|
+
engine_id=self.adapter_id,
|
|
119
|
+
warnings=["PDF is registered as known-only because no engine is registered."],
|
|
120
|
+
)
|
|
121
|
+
return self._inspection_engine.inspect(path, artifact_id=artifact_id)
|
|
122
|
+
|
|
123
|
+
def normalize_fill_patches(
|
|
124
|
+
self,
|
|
125
|
+
patches: tuple[DocumentFieldPatch, ...],
|
|
126
|
+
*,
|
|
127
|
+
extraction: DocumentExtraction | None,
|
|
128
|
+
) -> tuple[DocumentFieldPatch, ...]:
|
|
129
|
+
"""Map AcroForm labels to native field paths when the field is known."""
|
|
130
|
+
if extraction is None:
|
|
131
|
+
return patches
|
|
132
|
+
|
|
133
|
+
field_path_by_label = {
|
|
134
|
+
_field_key(field.label): field.path
|
|
135
|
+
for field in extraction.fields
|
|
136
|
+
if field.path.startswith(_PDF_FIELD_PREFIX)
|
|
137
|
+
}
|
|
138
|
+
field_path_by_name = {
|
|
139
|
+
_field_key(field.path.removeprefix(_PDF_FIELD_PREFIX)): field.path
|
|
140
|
+
for field in extraction.fields
|
|
141
|
+
if field.path.startswith(_PDF_FIELD_PREFIX)
|
|
142
|
+
}
|
|
143
|
+
normalized: list[DocumentFieldPatch] = []
|
|
144
|
+
for patch in patches:
|
|
145
|
+
key = _field_key(patch.target_path)
|
|
146
|
+
target_path = (
|
|
147
|
+
patch.target_path
|
|
148
|
+
if patch.target_path.startswith("/")
|
|
149
|
+
else field_path_by_label.get(key)
|
|
150
|
+
or field_path_by_name.get(key)
|
|
151
|
+
or patch.target_path
|
|
152
|
+
)
|
|
153
|
+
normalized.append(patch.model_copy(update={"target_path": target_path}))
|
|
154
|
+
return tuple(normalized)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class PypdfAcroFormEngine:
|
|
158
|
+
"""PDF AcroForm read/write engine backed by pypdf and pypdfium2 evidence."""
|
|
159
|
+
|
|
160
|
+
document_format = DocumentFormat.pdf
|
|
161
|
+
engine_id = "pypdf-acroform"
|
|
162
|
+
render_engine_id = "pypdfium2"
|
|
163
|
+
render_artifact_extension = "png"
|
|
164
|
+
render_mime_type = "image/png"
|
|
165
|
+
|
|
166
|
+
def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
|
|
167
|
+
"""Extract PDF structure, AcroForm fields, text, and image references."""
|
|
168
|
+
profile = classify_pdf_document(path)
|
|
169
|
+
if profile.kind is PdfDocumentKind.encrypted:
|
|
170
|
+
return _extraction_for_profile(
|
|
171
|
+
artifact_id=artifact_id,
|
|
172
|
+
profile=profile,
|
|
173
|
+
engine_id=self.engine_id,
|
|
174
|
+
warnings=["Encrypted PDFs are blocked before page or field extraction."],
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
reader = PdfReader(str(path), strict=False)
|
|
178
|
+
fields = _form_fields(reader)
|
|
179
|
+
paragraphs = _paragraphs(reader, artifact_id=artifact_id)
|
|
180
|
+
images = _image_references(reader)
|
|
181
|
+
return DocumentExtraction(
|
|
182
|
+
artifact_id=artifact_id,
|
|
183
|
+
paragraphs=paragraphs,
|
|
184
|
+
images=images,
|
|
185
|
+
fields=fields,
|
|
186
|
+
metadata=_profile_metadata(profile, engine_id=self.engine_id),
|
|
187
|
+
warnings=_profile_warnings(profile),
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
def apply_patch(self, path: Path, patch: DocumentPatch) -> bytes:
|
|
191
|
+
"""Apply AcroForm field values only when the PDF is fillable and unsigned."""
|
|
192
|
+
profile = classify_pdf_document(path)
|
|
193
|
+
if profile.kind is not PdfDocumentKind.acroform:
|
|
194
|
+
_raise_profile_block(profile)
|
|
195
|
+
|
|
196
|
+
field_values = _field_values_from_patch(patch)
|
|
197
|
+
missing = sorted(set(field_values) - set(profile.field_names))
|
|
198
|
+
if missing:
|
|
199
|
+
raise ValueError(f"PDF AcroForm field not found: {missing}")
|
|
200
|
+
|
|
201
|
+
reader = PdfReader(str(path), strict=False)
|
|
202
|
+
writer = PdfWriter()
|
|
203
|
+
writer.append(reader)
|
|
204
|
+
pypdf_field_values = _field_values_for_pypdf_update(writer, field_values)
|
|
205
|
+
auto_regenerate = _needs_regenerated_acroform_appearance(pypdf_field_values)
|
|
206
|
+
with _suppress_expected_pypdf_appearance_warning(enabled=auto_regenerate):
|
|
207
|
+
for page in writer.pages:
|
|
208
|
+
writer.update_page_form_field_values(
|
|
209
|
+
page,
|
|
210
|
+
pypdf_field_values,
|
|
211
|
+
auto_regenerate=auto_regenerate,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
output = io.BytesIO()
|
|
215
|
+
writer.write(output)
|
|
216
|
+
payload = output.getvalue()
|
|
217
|
+
_verify_acroform_values(payload, field_values)
|
|
218
|
+
_verify_visible_render_change(path.read_bytes(), payload)
|
|
219
|
+
return payload
|
|
220
|
+
|
|
221
|
+
def render(self, path: Path, *, artifact_id: str, output_dir: Path) -> tuple[bytes, ...]:
|
|
222
|
+
"""Render each PDF page to PNG reviewer evidence with annotations visible."""
|
|
223
|
+
_ = artifact_id, output_dir
|
|
224
|
+
return _render_pdf_pages(path.read_bytes())
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def validate_pdf_engine(engine: DocumentInspectionEngine) -> DocumentInspectionEngine:
|
|
228
|
+
"""Validate that an injected engine is scoped to PDF."""
|
|
229
|
+
if engine.document_format is not DocumentFormat.pdf:
|
|
230
|
+
raise ValueError("PDF adapter requires a pdf engine")
|
|
231
|
+
return engine
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def validate_pdf_mutation_engine(engine: DocumentInspectionEngine) -> DocumentMutationEngine:
|
|
235
|
+
"""Validate that an injected PDF engine can mutate fillable derivatives."""
|
|
236
|
+
validate_pdf_engine(engine)
|
|
237
|
+
if not isinstance(engine, DocumentMutationEngine):
|
|
238
|
+
raise ValueError("PDF adapter requires a mutation-capable engine")
|
|
239
|
+
return engine
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def classify_pdf_document(path: Path) -> PdfStructureProfile:
|
|
243
|
+
"""Classify a local PDF without mutating it."""
|
|
244
|
+
reader = PdfReader(str(path), strict=False)
|
|
245
|
+
if reader.is_encrypted:
|
|
246
|
+
return PdfStructureProfile(
|
|
247
|
+
kind=PdfDocumentKind.encrypted,
|
|
248
|
+
page_count=0,
|
|
249
|
+
field_count=0,
|
|
250
|
+
text_length=0,
|
|
251
|
+
image_count=0,
|
|
252
|
+
blocked_reason=BlockedReason.encrypted,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
page_count = len(reader.pages)
|
|
256
|
+
fields = _field_objects(reader)
|
|
257
|
+
field_names = _field_names(reader)
|
|
258
|
+
text_length = _text_length(reader)
|
|
259
|
+
image_count = _image_count(reader)
|
|
260
|
+
acroform = _acroform(reader)
|
|
261
|
+
if acroform is not None and "/XFA" in acroform:
|
|
262
|
+
return PdfStructureProfile(
|
|
263
|
+
kind=PdfDocumentKind.xfa,
|
|
264
|
+
page_count=page_count,
|
|
265
|
+
field_count=len(fields),
|
|
266
|
+
text_length=text_length,
|
|
267
|
+
image_count=image_count,
|
|
268
|
+
field_names=field_names,
|
|
269
|
+
blocked_reason=BlockedReason.xfa_detected,
|
|
270
|
+
)
|
|
271
|
+
if "/Perms" in _root(reader) or _has_signature_field(fields):
|
|
272
|
+
return PdfStructureProfile(
|
|
273
|
+
kind=PdfDocumentKind.signed,
|
|
274
|
+
page_count=page_count,
|
|
275
|
+
field_count=len(fields),
|
|
276
|
+
text_length=text_length,
|
|
277
|
+
image_count=image_count,
|
|
278
|
+
field_names=field_names,
|
|
279
|
+
blocked_reason=BlockedReason.signature_detected,
|
|
280
|
+
)
|
|
281
|
+
if fields:
|
|
282
|
+
return PdfStructureProfile(
|
|
283
|
+
kind=PdfDocumentKind.acroform,
|
|
284
|
+
page_count=page_count,
|
|
285
|
+
field_count=len(fields),
|
|
286
|
+
text_length=text_length,
|
|
287
|
+
image_count=image_count,
|
|
288
|
+
field_names=field_names,
|
|
289
|
+
)
|
|
290
|
+
if image_count > 0 and text_length == 0:
|
|
291
|
+
return PdfStructureProfile(
|
|
292
|
+
kind=PdfDocumentKind.scanned,
|
|
293
|
+
page_count=page_count,
|
|
294
|
+
field_count=0,
|
|
295
|
+
text_length=text_length,
|
|
296
|
+
image_count=image_count,
|
|
297
|
+
blocked_reason=BlockedReason.scanned_pdf,
|
|
298
|
+
)
|
|
299
|
+
return PdfStructureProfile(
|
|
300
|
+
kind=PdfDocumentKind.static,
|
|
301
|
+
page_count=page_count,
|
|
302
|
+
field_count=0,
|
|
303
|
+
text_length=text_length,
|
|
304
|
+
image_count=image_count,
|
|
305
|
+
blocked_reason=BlockedReason.static_pdf,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _field_values_from_patch(patch: DocumentPatch) -> dict[str, str]:
|
|
310
|
+
values: dict[str, str] = {}
|
|
311
|
+
for operation in patch.operations:
|
|
312
|
+
if operation.operation_type is not OperationType.set_field_value:
|
|
313
|
+
raise ValueError(
|
|
314
|
+
"PDF mutation supports AcroForm set_field_value operations only: "
|
|
315
|
+
f"{operation.operation_type.value}"
|
|
316
|
+
)
|
|
317
|
+
if not operation.target_path.startswith(_PDF_FIELD_PREFIX):
|
|
318
|
+
raise ValueError(f"PDF field target must start with {_PDF_FIELD_PREFIX}")
|
|
319
|
+
field_name = operation.target_path.removeprefix(_PDF_FIELD_PREFIX)
|
|
320
|
+
if not field_name:
|
|
321
|
+
raise ValueError("PDF field target is missing the AcroForm field name")
|
|
322
|
+
values[field_name] = "" if operation.value is None else str(operation.value)
|
|
323
|
+
return values
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def _verify_acroform_values(payload: bytes, field_values: dict[str, str]) -> None:
|
|
327
|
+
reader = PdfReader(io.BytesIO(payload), strict=False)
|
|
328
|
+
observed = reader.get_form_text_fields() or {}
|
|
329
|
+
mismatches = {
|
|
330
|
+
field_name: {"expected": expected, "observed": observed.get(field_name)}
|
|
331
|
+
for field_name, expected in field_values.items()
|
|
332
|
+
if observed.get(field_name) != expected
|
|
333
|
+
}
|
|
334
|
+
if mismatches:
|
|
335
|
+
raise ValueError(f"PDF AcroForm re-read mismatch: {mismatches}")
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def _field_values_for_pypdf_update(
|
|
339
|
+
writer: PdfWriter,
|
|
340
|
+
field_values: dict[str, str],
|
|
341
|
+
) -> dict[str, str | tuple[str, str, float]]:
|
|
342
|
+
if not _has_non_ascii_field_value(field_values):
|
|
343
|
+
return dict(field_values)
|
|
344
|
+
font_resource = _register_embedded_unicode_form_font(writer)
|
|
345
|
+
if font_resource is None:
|
|
346
|
+
return dict(field_values)
|
|
347
|
+
return {
|
|
348
|
+
field_name: (
|
|
349
|
+
(value, font_resource, _PDF_KOREAN_FORM_FONT_SIZE)
|
|
350
|
+
if _has_non_ascii_text(value)
|
|
351
|
+
else value
|
|
352
|
+
)
|
|
353
|
+
for field_name, value in field_values.items()
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _needs_regenerated_acroform_appearance(
|
|
358
|
+
field_values: dict[str, str | tuple[str, str, float]],
|
|
359
|
+
) -> bool:
|
|
360
|
+
for value in field_values.values():
|
|
361
|
+
if isinstance(value, tuple):
|
|
362
|
+
continue
|
|
363
|
+
if _has_non_ascii_text(value):
|
|
364
|
+
return True
|
|
365
|
+
return False
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def _has_non_ascii_field_value(field_values: dict[str, str]) -> bool:
|
|
369
|
+
return any(_has_non_ascii_text(value) for value in field_values.values())
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _has_non_ascii_text(value: str) -> bool:
|
|
373
|
+
return any(ord(character) > 0x7F for character in value)
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def _register_embedded_unicode_form_font(writer: PdfWriter) -> str | None:
|
|
377
|
+
acroform = _dict_object(writer._root_object.get("/AcroForm")) # noqa: SLF001
|
|
378
|
+
if acroform is None:
|
|
379
|
+
return None
|
|
380
|
+
font_ref = _first_embedded_unicode_page_font(writer)
|
|
381
|
+
if font_ref is None:
|
|
382
|
+
return None
|
|
383
|
+
|
|
384
|
+
default_resources = _dict_object(acroform.get("/DR"))
|
|
385
|
+
if default_resources is None:
|
|
386
|
+
default_resources = DictionaryObject()
|
|
387
|
+
acroform[NameObject("/DR")] = default_resources
|
|
388
|
+
default_fonts = _dict_object(default_resources.get("/Font"))
|
|
389
|
+
if default_fonts is None:
|
|
390
|
+
default_fonts = DictionaryObject()
|
|
391
|
+
default_resources[NameObject("/Font")] = default_fonts
|
|
392
|
+
font_name = NameObject(_PDF_KOREAN_FORM_FONT_RESOURCE)
|
|
393
|
+
default_fonts[font_name] = font_ref
|
|
394
|
+
return _PDF_KOREAN_FORM_FONT_RESOURCE
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _first_embedded_unicode_page_font(writer: PdfWriter) -> object | None:
|
|
398
|
+
for page in writer.pages:
|
|
399
|
+
resources = _dict_object(page.get("/Resources"))
|
|
400
|
+
fonts = _dict_object(resources.get("/Font")) if resources is not None else None
|
|
401
|
+
if fonts is None:
|
|
402
|
+
continue
|
|
403
|
+
for font_ref in fonts.values():
|
|
404
|
+
font = _dict_object(font_ref)
|
|
405
|
+
if font is None:
|
|
406
|
+
continue
|
|
407
|
+
if _is_embedded_unicode_font(font):
|
|
408
|
+
return cast(object, font_ref)
|
|
409
|
+
return None
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def _is_embedded_unicode_font(font: DictionaryObject) -> bool:
|
|
413
|
+
subtype = str(font.get("/Subtype", ""))
|
|
414
|
+
return (
|
|
415
|
+
subtype in {"/TrueType", "/Type0"}
|
|
416
|
+
and "/ToUnicode" in font
|
|
417
|
+
and ("/FontDescriptor" in font or "/DescendantFonts" in font)
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
@contextmanager
|
|
422
|
+
def _suppress_expected_pypdf_appearance_warning(*, enabled: bool) -> Iterator[None]:
|
|
423
|
+
if not enabled:
|
|
424
|
+
yield
|
|
425
|
+
return
|
|
426
|
+
pypdf_logger = logging.getLogger(_PYPDF_APPEARANCE_LOGGER)
|
|
427
|
+
warning_filter = _ExpectedPypdfAppearanceWarningFilter()
|
|
428
|
+
pypdf_logger.addFilter(warning_filter)
|
|
429
|
+
try:
|
|
430
|
+
yield
|
|
431
|
+
finally:
|
|
432
|
+
pypdf_logger.removeFilter(warning_filter)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
class _ExpectedPypdfAppearanceWarningFilter(logging.Filter):
|
|
436
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
437
|
+
return _PYPDF_UNSUPPORTED_FONT_WARNING not in record.getMessage()
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def _verify_visible_render_change(before_payload: bytes, after_payload: bytes) -> None:
|
|
441
|
+
before_pages = _render_pdf_pages(before_payload)
|
|
442
|
+
after_pages = _render_pdf_pages(after_payload)
|
|
443
|
+
if before_pages == after_pages:
|
|
444
|
+
raise DocumentMutationBlockedError(
|
|
445
|
+
BlockedReason.validation_failed,
|
|
446
|
+
"PDF AcroForm fill did not change visible page rendering.",
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def _render_pdf_pages(payload: bytes) -> tuple[bytes, ...]:
|
|
451
|
+
import pypdfium2 as pdfium # type: ignore[import-untyped] # noqa: PLC0415
|
|
452
|
+
|
|
453
|
+
document = pdfium.PdfDocument(payload)
|
|
454
|
+
try:
|
|
455
|
+
document.init_forms()
|
|
456
|
+
rendered: list[bytes] = []
|
|
457
|
+
for page in document:
|
|
458
|
+
try:
|
|
459
|
+
bitmap = page.render(scale=2)
|
|
460
|
+
try:
|
|
461
|
+
image = bitmap.to_pil()
|
|
462
|
+
output = io.BytesIO()
|
|
463
|
+
image.save(output, format="PNG")
|
|
464
|
+
rendered.append(output.getvalue())
|
|
465
|
+
finally:
|
|
466
|
+
bitmap.close()
|
|
467
|
+
finally:
|
|
468
|
+
page.close()
|
|
469
|
+
return tuple(rendered)
|
|
470
|
+
finally:
|
|
471
|
+
document.close()
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def _raise_profile_block(profile: PdfStructureProfile) -> None:
|
|
475
|
+
reason = profile.blocked_reason or BlockedReason.validation_failed
|
|
476
|
+
raise DocumentMutationBlockedError(
|
|
477
|
+
reason,
|
|
478
|
+
f"PDF mutation blocked: {profile.kind.value} PDF cannot be edited through AcroForm fill.",
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def _extraction_for_profile(
|
|
483
|
+
*,
|
|
484
|
+
artifact_id: str,
|
|
485
|
+
profile: PdfStructureProfile,
|
|
486
|
+
engine_id: str,
|
|
487
|
+
warnings: list[str],
|
|
488
|
+
) -> DocumentExtraction:
|
|
489
|
+
return DocumentExtraction(
|
|
490
|
+
artifact_id=artifact_id,
|
|
491
|
+
metadata=_profile_metadata(profile, engine_id=engine_id),
|
|
492
|
+
warnings=[*warnings, *_profile_warnings(profile)],
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _profile_metadata(
|
|
497
|
+
profile: PdfStructureProfile,
|
|
498
|
+
*,
|
|
499
|
+
engine_id: str,
|
|
500
|
+
) -> dict[str, MetadataValue]:
|
|
501
|
+
return {
|
|
502
|
+
"format": DocumentFormat.pdf.value,
|
|
503
|
+
"engine_id": engine_id,
|
|
504
|
+
"pdf_kind": profile.kind.value,
|
|
505
|
+
"page_count": profile.page_count,
|
|
506
|
+
"field_count": profile.field_count,
|
|
507
|
+
"text_length": profile.text_length,
|
|
508
|
+
"image_count": profile.image_count,
|
|
509
|
+
"mutation_policy": "acroform_only",
|
|
510
|
+
"render_oracle": PypdfAcroFormEngine.render_engine_id,
|
|
511
|
+
"template_overlay_capability": "requires_template_baseline",
|
|
512
|
+
"template_overlay_available": False,
|
|
513
|
+
"template_overlay_required_evidence": (
|
|
514
|
+
"baseline_bounding_boxes_and_pypdfium2_render_comparison"
|
|
515
|
+
),
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def _profile_warnings(profile: PdfStructureProfile) -> list[str]:
|
|
520
|
+
if profile.kind is PdfDocumentKind.acroform:
|
|
521
|
+
return [
|
|
522
|
+
"PDF AcroForm fill is promoted; static, XFA, encrypted, and signed mutation is blocked."
|
|
523
|
+
]
|
|
524
|
+
if profile.kind is PdfDocumentKind.encrypted:
|
|
525
|
+
return ["Encrypted PDFs are blocked for inspection and mutation."]
|
|
526
|
+
warnings = [f"{profile.kind.value} PDF mutation is blocked by the AcroForm-only gate."]
|
|
527
|
+
if profile.kind in {PdfDocumentKind.static, PdfDocumentKind.scanned}:
|
|
528
|
+
warnings.append(
|
|
529
|
+
"PDF template overlay is deferred until a public-form baseline provides "
|
|
530
|
+
"field bounding boxes and pypdfium2 render-comparison evidence."
|
|
531
|
+
)
|
|
532
|
+
return warnings
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def _form_fields(reader: PdfReader) -> list[FormField]:
|
|
536
|
+
extracted_fields = reader.get_fields() or {}
|
|
537
|
+
text_fields = reader.get_form_text_fields() or {}
|
|
538
|
+
fields: list[FormField] = []
|
|
539
|
+
for field_name, raw_field in extracted_fields.items():
|
|
540
|
+
field = cast(dict[str, Any], raw_field)
|
|
541
|
+
field_type = _field_type(field.get("/FT"))
|
|
542
|
+
fields.append(
|
|
543
|
+
FormField(
|
|
544
|
+
field_id=f"pdf-field-{_safe_field_id(field_name)}",
|
|
545
|
+
label=str(field.get("/TU") or field.get("/T") or field_name),
|
|
546
|
+
path=f"{_PDF_FIELD_PREFIX}{field_name}",
|
|
547
|
+
field_type=field_type,
|
|
548
|
+
required=False,
|
|
549
|
+
current_value=text_fields.get(field_name, _scalar_field_value(field.get("/V"))),
|
|
550
|
+
allowed_values=_allowed_values(field.get("/Opt")),
|
|
551
|
+
source_confidence=Decimal("1"),
|
|
552
|
+
)
|
|
553
|
+
)
|
|
554
|
+
return fields
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def _paragraphs(reader: PdfReader, *, artifact_id: str) -> list[ParagraphBlock]:
|
|
558
|
+
paragraphs: list[ParagraphBlock] = []
|
|
559
|
+
for page_index, page in enumerate(reader.pages, start=1):
|
|
560
|
+
text = page.extract_text() or ""
|
|
561
|
+
for line_index, line in enumerate(_non_empty_lines(text), start=1):
|
|
562
|
+
paragraphs.append(
|
|
563
|
+
ParagraphBlock(
|
|
564
|
+
block_id=f"pdf-page-{page_index:03d}-line-{line_index:03d}",
|
|
565
|
+
text=line,
|
|
566
|
+
source_path=f"{artifact_id}/pages/{page_index}/text[{line_index}]",
|
|
567
|
+
)
|
|
568
|
+
)
|
|
569
|
+
return paragraphs
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def _image_references(reader: PdfReader) -> list[ImageReference]:
|
|
573
|
+
images: list[ImageReference] = []
|
|
574
|
+
for page_index, page in enumerate(reader.pages, start=1):
|
|
575
|
+
resources = _dict_object(page.get("/Resources"))
|
|
576
|
+
xobjects = _dict_object(resources.get("/XObject")) if resources is not None else None
|
|
577
|
+
if xobjects is None:
|
|
578
|
+
continue
|
|
579
|
+
for name, value in xobjects.items():
|
|
580
|
+
image = _dict_object(value)
|
|
581
|
+
if image is None or str(image.get("/Subtype")) != "/Image":
|
|
582
|
+
continue
|
|
583
|
+
images.append(
|
|
584
|
+
ImageReference(
|
|
585
|
+
image_id=f"pdf-page-{page_index:03d}-{_safe_field_id(str(name))}",
|
|
586
|
+
source_path=f"/pages/{page_index}/resources/xobject/{name}",
|
|
587
|
+
content_type="application/pdf-image-xobject",
|
|
588
|
+
)
|
|
589
|
+
)
|
|
590
|
+
return images
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def _non_empty_lines(text: str) -> list[str]:
|
|
594
|
+
return [line.strip() for line in text.splitlines() if line.strip()]
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _root(reader: PdfReader) -> DictionaryObject:
|
|
598
|
+
return cast(DictionaryObject, _resolve(reader.trailer["/Root"]))
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
def _acroform(reader: PdfReader) -> DictionaryObject | None:
|
|
602
|
+
return _dict_object(_root(reader).get("/AcroForm"))
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
def _field_objects(reader: PdfReader) -> tuple[DictionaryObject, ...]:
|
|
606
|
+
acroform = _acroform(reader)
|
|
607
|
+
if acroform is None:
|
|
608
|
+
return ()
|
|
609
|
+
fields = acroform.get("/Fields", ())
|
|
610
|
+
return tuple(_walk_field_objects(fields))
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
def _walk_field_objects(fields: object) -> list[DictionaryObject]:
|
|
614
|
+
field_objects: list[DictionaryObject] = []
|
|
615
|
+
for field_ref in cast(Any, fields):
|
|
616
|
+
field = _dict_object(field_ref)
|
|
617
|
+
if field is None:
|
|
618
|
+
continue
|
|
619
|
+
field_objects.append(field)
|
|
620
|
+
kids = field.get("/Kids")
|
|
621
|
+
if kids is not None:
|
|
622
|
+
field_objects.extend(_walk_field_objects(kids))
|
|
623
|
+
return field_objects
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
def _field_names(reader: PdfReader) -> tuple[str, ...]:
|
|
627
|
+
fields = reader.get_fields() or {}
|
|
628
|
+
return tuple(str(field_name) for field_name in fields)
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def _has_signature_field(fields: tuple[DictionaryObject, ...]) -> bool:
|
|
632
|
+
return any(str(field.get("/FT")) == "/Sig" for field in fields)
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def _text_length(reader: PdfReader) -> int:
|
|
636
|
+
total = 0
|
|
637
|
+
for page in reader.pages:
|
|
638
|
+
try:
|
|
639
|
+
total += len(page.extract_text() or "")
|
|
640
|
+
except Exception as exc: # pragma: no cover - malformed page evidence path.
|
|
641
|
+
logger.warning("PDF page text extraction failed during classification: %s", exc)
|
|
642
|
+
continue
|
|
643
|
+
return total
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
def _image_count(reader: PdfReader) -> int:
|
|
647
|
+
return len(_image_references(reader))
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def _dict_object(value: object) -> DictionaryObject | None:
|
|
651
|
+
resolved = _resolve(value)
|
|
652
|
+
if isinstance(resolved, DictionaryObject):
|
|
653
|
+
return resolved
|
|
654
|
+
if isinstance(resolved, dict):
|
|
655
|
+
return cast(DictionaryObject, resolved)
|
|
656
|
+
return None
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def _resolve(value: object) -> object:
|
|
660
|
+
if hasattr(value, "get_object"):
|
|
661
|
+
return cast(Any, value).get_object()
|
|
662
|
+
return value
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def _field_type(raw_type: object) -> FieldType:
|
|
666
|
+
field_type = str(raw_type)
|
|
667
|
+
if field_type == "/Tx":
|
|
668
|
+
return "text"
|
|
669
|
+
if field_type == "/Btn":
|
|
670
|
+
return "checkbox"
|
|
671
|
+
if field_type == "/Ch":
|
|
672
|
+
return "choice"
|
|
673
|
+
if field_type == "/Sig":
|
|
674
|
+
return "signature"
|
|
675
|
+
return "unknown"
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
def _scalar_field_value(value: object) -> str | int | bool | None:
|
|
679
|
+
if value is None:
|
|
680
|
+
return None
|
|
681
|
+
if isinstance(value, str | int | bool):
|
|
682
|
+
return value
|
|
683
|
+
return str(value)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def _allowed_values(value: object) -> list[ScalarValue]:
|
|
687
|
+
if value is None:
|
|
688
|
+
return []
|
|
689
|
+
if isinstance(value, str | int | bool):
|
|
690
|
+
return [value]
|
|
691
|
+
if isinstance(value, list | tuple):
|
|
692
|
+
return [_scalar_field_value(item) for item in value]
|
|
693
|
+
return [str(value)]
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def _field_key(value: str) -> str:
|
|
697
|
+
return "".join(ch for ch in value.casefold() if ch.isalnum())
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
def _safe_field_id(value: str) -> str:
|
|
701
|
+
safe = "".join(ch if ch.isalnum() else "-" for ch in value.lower()).strip("-")
|
|
702
|
+
return safe or "field"
|