ummaya 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -2
- package/bin/ummaya +10 -1
- package/npm-shrinkwrap.json +253 -2
- package/package.json +5 -1
- package/prompts/manifest.yaml +1 -1
- package/prompts/system_v1.md +1 -0
- package/pyproject.toml +26 -2
- package/specs/2803-document-production-hardening/contracts/document-tools.schema.json +1043 -0
- package/src/ummaya/_canonical/__init__.py +2 -0
- package/src/ummaya/engine/engine.py +29 -132
- package/src/ummaya/evidence/__init__.py +21 -2
- package/src/ummaya/evidence/dataset_contract.py +193 -0
- package/src/ummaya/evidence/document_authoring_cases.py +33 -0
- package/src/ummaya/evidence/document_harness.py +313 -0
- package/src/ummaya/evidence/document_viewer_ux.py +391 -0
- package/src/ummaya/evidence/gates.py +70 -0
- package/src/ummaya/evidence/json_types.py +20 -0
- package/src/ummaya/evidence/models.py +88 -1
- package/src/ummaya/evidence/output_payload.py +89 -0
- package/src/ummaya/evidence/payload_documents.py +233 -0
- package/src/ummaya/evidence/route_contracts.py +224 -0
- package/src/ummaya/evidence/route_helpers.py +150 -0
- package/src/ummaya/evidence/runner.py +81 -212
- package/src/ummaya/evidence/source_provenance.py +246 -0
- package/src/ummaya/evidence/source_provenance_redaction.py +176 -0
- package/src/ummaya/evidence/tool_layer.py +39 -0
- package/src/ummaya/evidence/tool_layer_models.py +151 -0
- package/src/ummaya/ipc/adapter_manifest_emitter.py +26 -10
- package/src/ummaya/ipc/document_intent_normalization.py +185 -0
- package/src/ummaya/ipc/frame_schema.py +5 -5
- package/src/ummaya/ipc/route_diagnostics.py +73 -0
- package/src/ummaya/ipc/stdio.py +1109 -477
- package/src/ummaya/llm/client.py +102 -3
- package/src/ummaya/llm/config.py +8 -3
- package/src/ummaya/primitives/__init__.py +6 -2
- package/src/ummaya/primitives/delegation.py +1 -1
- package/src/ummaya/primitives/document.py +28 -0
- package/src/ummaya/settings.py +0 -3
- package/src/ummaya/tools/discovery_bridge.py +17 -1
- package/src/ummaya/tools/documents/__init__.py +297 -0
- package/src/ummaya/tools/documents/adapter_registry.py +487 -0
- package/src/ummaya/tools/documents/archive_container_probe.py +167 -0
- package/src/ummaya/tools/documents/artifact_store.py +454 -0
- package/src/ummaya/tools/documents/authoring.py +283 -0
- package/src/ummaya/tools/documents/baselines.py +114 -0
- package/src/ummaya/tools/documents/capability.py +331 -0
- package/src/ummaya/tools/documents/contracts.py +112 -0
- package/src/ummaya/tools/documents/conversion.py +521 -0
- package/src/ummaya/tools/documents/diff.py +275 -0
- package/src/ummaya/tools/documents/engines.py +163 -0
- package/src/ummaya/tools/documents/evaluation.py +291 -0
- package/src/ummaya/tools/documents/explicit_values.py +108 -0
- package/src/ummaya/tools/documents/fixtures.py +174 -0
- package/src/ummaya/tools/documents/format_completion_audit.py +471 -0
- package/src/ummaya/tools/documents/formats/__init__.py +2 -0
- package/src/ummaya/tools/documents/formats/archive.py +528 -0
- package/src/ummaya/tools/documents/formats/base.py +41 -0
- package/src/ummaya/tools/documents/formats/code_file.py +211 -0
- package/src/ummaya/tools/documents/formats/data_file.py +272 -0
- package/src/ummaya/tools/documents/formats/hwp.py +284 -0
- package/src/ummaya/tools/documents/formats/hwpx.py +1837 -0
- package/src/ummaya/tools/documents/formats/odf.py +435 -0
- package/src/ummaya/tools/documents/formats/ooxml.py +1030 -0
- package/src/ummaya/tools/documents/formats/passive.py +766 -0
- package/src/ummaya/tools/documents/formats/pdf.py +702 -0
- package/src/ummaya/tools/documents/formats/text_web.py +268 -0
- package/src/ummaya/tools/documents/hwp_conversion_probe.py +178 -0
- package/src/ummaya/tools/documents/hwp_direct_candidate.py +141 -0
- package/src/ummaya/tools/documents/inspection.py +289 -0
- package/src/ummaya/tools/documents/intake.py +1079 -0
- package/src/ummaya/tools/documents/legacy_office_promotion_probe.py +366 -0
- package/src/ummaya/tools/documents/models.py +1598 -0
- package/src/ummaya/tools/documents/odf_promotion_probe.py +167 -0
- package/src/ummaya/tools/documents/orchestrator.py +96 -0
- package/src/ummaya/tools/documents/passive_capability_probe.py +251 -0
- package/src/ummaya/tools/documents/patch.py +170 -0
- package/src/ummaya/tools/documents/pdfa_conformance.py +284 -0
- package/src/ummaya/tools/documents/pdfa_promotion_probe.py +198 -0
- package/src/ummaya/tools/documents/permissions.py +110 -0
- package/src/ummaya/tools/documents/planner.py +616 -0
- package/src/ummaya/tools/documents/registry.py +2733 -0
- package/src/ummaya/tools/documents/render.py +978 -0
- package/src/ummaya/tools/documents/render_comparison.py +113 -0
- package/src/ummaya/tools/documents/render_comparison_models.py +74 -0
- package/src/ummaya/tools/documents/render_comparison_regions.py +73 -0
- package/src/ummaya/tools/documents/render_comparison_style.py +161 -0
- package/src/ummaya/tools/documents/reread.py +157 -0
- package/src/ummaya/tools/documents/runtime_authoring.py +244 -0
- package/src/ummaya/tools/documents/runtime_authoring_bundle.py +76 -0
- package/src/ummaya/tools/documents/scorecard.py +184 -0
- package/src/ummaya/tools/documents/socratic_planner.py +193 -0
- package/src/ummaya/tools/documents/style.py +48 -0
- package/src/ummaya/tools/documents/tool_defs.py +523 -0
- package/src/ummaya/tools/documents/validate.py +347 -0
- package/src/ummaya/tools/executor.py +29 -0
- package/src/ummaya/tools/live_proxy.py +0 -3
- package/src/ummaya/tools/models.py +5 -1
- package/src/ummaya/tools/register_all.py +8 -0
- package/src/ummaya/tools/registry.py +10 -1
- package/src/ummaya/tools/routing/__init__.py +59 -0
- package/src/ummaya/tools/routing/builder.py +105 -0
- package/src/ummaya/tools/routing/cards.py +29 -0
- package/src/ummaya/tools/routing/decision_service.py +534 -0
- package/src/ummaya/tools/routing/decision_types.py +74 -0
- package/src/ummaya/tools/routing/feasibility.py +122 -0
- package/src/ummaya/tools/routing/intent.py +17 -0
- package/src/ummaya/tools/routing/intent_extractor.py +207 -0
- package/src/ummaya/tools/routing/intent_patterns.py +160 -0
- package/src/ummaya/tools/routing/intent_public_data.py +150 -0
- package/src/ummaya/tools/routing/intent_types.py +48 -0
- package/src/ummaya/tools/routing/lint.py +78 -0
- package/src/ummaya/tools/routing/metadata.py +174 -0
- package/src/ummaya/tools/routing/projection.py +340 -0
- package/src/ummaya/tools/routing/retrieval_policy.py +629 -0
- package/src/ummaya/tools/routing/schema.py +81 -0
- package/src/ummaya/tools/routing/types.py +96 -0
- package/src/ummaya/tools/routing_index.py +2 -2
- package/src/ummaya/tools/search.py +34 -746
- package/tests/fixtures/documents/public_forms/baselines.yaml +113 -0
- package/tui/package.json +1 -1
- package/tui/src/.cc-byte-identical-whitelist.yaml +266 -0
- package/tui/src/QueryEngine.ts +12 -8
- package/tui/src/bridge/inboundAttachments.ts +3 -3
- package/tui/src/cli/handlers/auth.ts +3 -12
- package/tui/src/cli/print.ts +7 -7
- package/tui/src/commands/insights.ts +1 -1
- package/tui/src/commands/install-github-app/types.ts +8 -30
- package/tui/src/commands/plugin/types.ts +6 -28
- package/tui/src/commands/plugin/unifiedTypes.ts +4 -26
- package/tui/src/commands/rename/generateSessionName.ts +1 -1
- package/tui/src/components/Feedback.tsx +1 -1
- package/tui/src/components/LogoV2/EmergencyTip.tsx +11 -2
- package/tui/src/components/LogoV2/WelcomeV2.tsx +1 -3
- package/tui/src/components/ScrollKeybindingHandler.tsx +6 -6
- package/tui/src/components/Spinner/types.ts +6 -28
- package/tui/src/components/agents/generateAgent.ts +1 -1
- package/tui/src/components/agents/new-agent-creation/types.ts +4 -26
- package/tui/src/components/config/EnvSecretIsolatedEditor.tsx +1 -1
- package/tui/src/components/mcp/types.ts +16 -38
- package/tui/src/components/messages/AssistantToolUseMessage.tsx +3 -2
- package/tui/src/components/messages/UserCrossSessionMessage.ts +16 -4
- package/tui/src/components/messages/UserForkBoilerplateMessage.ts +16 -4
- package/tui/src/components/messages/UserGitHubWebhookMessage.ts +16 -4
- package/tui/src/components/messages/UserToolResultMessage/utils.tsx +3 -2
- package/tui/src/components/permissions/MonitorPermissionRequest/MonitorPermissionRequest.ts +9 -4
- package/tui/src/components/permissions/ReviewArtifactPermissionRequest/ReviewArtifactPermissionRequest.ts +9 -4
- package/tui/src/components/primitive/DocumentSocraticReviewBlock.tsx +129 -0
- package/tui/src/components/primitive/DocumentToolResultCard.tsx +224 -0
- package/tui/src/components/primitive/documentSocraticReview.ts +215 -0
- package/tui/src/components/primitive/index.tsx +43 -1
- package/tui/src/components/primitive/types.ts +137 -0
- package/tui/src/components/ui/option.ts +4 -26
- package/tui/src/constants/common.ts +0 -2
- package/tui/src/constants/prompts.ts +4 -3
- package/tui/src/constants/querySource.ts +4 -26
- package/tui/src/entrypoints/sdk/controlTypes.ts +26 -48
- package/tui/src/entrypoints/sdk/coreTypes.generated.ts +3 -25
- package/tui/src/entrypoints/sdk/runtimeTypes.ts +38 -60
- package/tui/src/entrypoints/sdk/sdkUtilityTypes.ts +4 -26
- package/tui/src/entrypoints/sdk/settingsTypes.generated.ts +3 -25
- package/tui/src/entrypoints/sdk/toolTypes.ts +3 -25
- package/tui/src/hooks/toolPermission/handlers/interactiveHandler.ts +10 -0
- package/tui/src/hooks/useApiKeyVerification.ts +1 -1
- package/tui/src/hooks/useVirtualScroll.ts +1 -1
- package/tui/src/ink/ink.tsx +33 -14
- package/tui/src/ink/reconciler.ts +2 -3
- package/tui/src/ink/render-to-screen.ts +30 -10
- package/tui/src/ipc/bridge.ts +62 -15
- package/tui/src/ipc/bridgeSingleton.ts +5 -1
- package/tui/src/ipc/codec.ts +3 -3
- package/tui/src/ipc/frames.generated.ts +12 -12
- package/tui/src/ipc/llmClient.ts +151 -27
- package/tui/src/ipc/schema/frame.schema.json +1 -1
- package/tui/src/keybindings/defaultBindings.ts +4 -0
- package/tui/src/main.tsx +29 -11
- package/tui/src/native-ts/file-index/index.ts +33 -3
- package/tui/src/observability/surface.ts +2 -2
- package/tui/src/probes/toolRegistryProbe.tsx +3 -1
- package/tui/src/projectOnboardingState.ts +7 -6
- package/tui/src/query/chatMessageTypes.ts +18 -0
- package/tui/src/query/chatMessagesBuilder.ts +1 -1
- package/tui/src/query/deps.ts +1 -1
- package/tui/src/query/messageGuards.ts +106 -0
- package/tui/src/query/publicDataTerminalRepair.ts +384 -0
- package/tui/src/query/run.ts +1075 -0
- package/tui/src/query/supportBoundary.ts +168 -0
- package/tui/src/query/toolResultErrors.ts +103 -0
- package/tui/src/query/toolRunner.ts +687 -0
- package/tui/src/query/unavailableToolRepair.ts +118 -0
- package/tui/src/query.ts +9 -2186
- package/tui/src/screens/REPL.tsx +40 -29
- package/tui/src/services/api/adapterManifest.ts +4 -0
- package/tui/src/services/api/backendChat/events.ts +117 -0
- package/tui/src/services/api/backendChat/finalMessage.ts +40 -0
- package/tui/src/services/api/backendChat/frame.ts +9 -0
- package/tui/src/services/api/backendChat/streaming.ts +430 -0
- package/tui/src/services/api/backendChat/types.ts +62 -0
- package/tui/src/services/api/backendChat.ts +1 -0
- package/tui/src/services/api/client.ts +65 -2
- package/tui/src/services/api/errorUtils.ts +5 -5
- package/tui/src/services/api/errors.ts +1 -1
- package/tui/src/services/api/logging.ts +1 -1
- package/tui/src/services/api/ummaya/evidence.ts +194 -0
- package/tui/src/services/api/ummaya/messages.ts +255 -0
- package/tui/src/services/api/ummaya/nonStreaming.ts +66 -0
- package/tui/src/services/api/ummaya/provider.ts +200 -0
- package/tui/src/services/api/ummaya/reasoning.ts +24 -0
- package/tui/src/services/api/ummaya/request.ts +200 -0
- package/tui/src/services/api/ummaya/selectionContext.ts +240 -0
- package/tui/src/services/api/ummaya/streaming.ts +365 -0
- package/tui/src/services/api/ummaya/streamingPayload.ts +129 -0
- package/tui/src/services/api/ummaya/streamingReader.ts +40 -0
- package/tui/src/services/api/ummaya/toolSelection.ts +217 -0
- package/tui/src/services/api/ummaya/types.ts +110 -0
- package/tui/src/services/api/ummaya/usage.ts +30 -0
- package/tui/src/services/api/ummaya.ts +26 -418
- package/tui/src/services/api/withRetry.ts +1 -1
- package/tui/src/services/awaySummary.ts +2 -2
- package/tui/src/services/claudeAiLimits.ts +1 -1
- package/tui/src/services/compact/autoCompact.ts +1 -1
- package/tui/src/services/compact/compact.ts +1 -1
- package/tui/src/services/lsp/types.ts +8 -30
- package/tui/src/services/tips/types.ts +6 -28
- package/tui/src/services/tokenEstimation.ts +1 -1
- package/tui/src/services/toolRegistry/bootGuard.ts +5 -5
- package/tui/src/services/toolUseSummary/toolUseSummaryGenerator.ts +1 -1
- package/tui/src/services/tools/toolExecution.ts +94 -1
- package/tui/src/store/pendingPermissionSlot.ts +1 -1
- package/tui/src/store/session-store.ts +10 -36
- package/tui/src/stubs/any-stub.ts +15 -10
- package/tui/src/stubs/color-diff-napi.ts +37 -23
- package/tui/src/stubs/globals.d.ts +3 -3
- package/tui/src/stubs/macro-preload.ts +23 -12
- package/tui/src/tools/AdapterTool/AdapterTool.ts +1207 -714
- package/tui/src/tools/AdapterTool/routeDiagnostics.ts +75 -0
- package/tui/src/tools/AgentTool/AgentTool.tsx +84 -1371
- package/tui/src/tools/AgentTool/agentToolHandoff.ts +114 -0
- package/tui/src/tools/AgentTool/agentToolPartialResult.ts +16 -0
- package/tui/src/tools/AgentTool/agentToolProgress.ts +32 -0
- package/tui/src/tools/AgentTool/agentToolResolver.ts +161 -0
- package/tui/src/tools/AgentTool/agentToolResult.ts +163 -0
- package/tui/src/tools/AgentTool/agentToolUtils.ts +14 -686
- package/tui/src/tools/AgentTool/asyncAgentLifecycle.ts +208 -0
- package/tui/src/tools/AgentTool/asyncLifecycle.ts +153 -0
- package/tui/src/tools/AgentTool/backgroundedCompletion.ts +126 -0
- package/tui/src/tools/AgentTool/backgroundedLifecycle.ts +174 -0
- package/tui/src/tools/AgentTool/foregroundBackground.ts +83 -0
- package/tui/src/tools/AgentTool/foregroundDrain.tsx +133 -0
- package/tui/src/tools/AgentTool/foregroundFinalize.ts +98 -0
- package/tui/src/tools/AgentTool/foregroundLifecycle.tsx +237 -0
- package/tui/src/tools/AgentTool/foregroundProgress.tsx +169 -0
- package/tui/src/tools/AgentTool/foregroundTask.ts +89 -0
- package/tui/src/tools/AgentTool/forkSubagent.ts +1 -12
- package/tui/src/tools/AgentTool/forkSubagentGate.ts +34 -0
- package/tui/src/tools/AgentTool/launchRouting.ts +203 -0
- package/tui/src/tools/AgentTool/lifecycle.ts +244 -0
- package/tui/src/tools/AgentTool/mcpRouting.ts +73 -0
- package/tui/src/tools/AgentTool/orchestrationSupport.ts +70 -0
- package/tui/src/tools/AgentTool/permissions.ts +39 -0
- package/tui/src/tools/AgentTool/promptSetup.ts +181 -0
- package/tui/src/tools/AgentTool/remoteRouting.ts +62 -0
- package/tui/src/tools/AgentTool/resultMapping.ts +116 -0
- package/tui/src/tools/AgentTool/resumeAgent.ts +39 -107
- package/tui/src/tools/AgentTool/resumeAgentHelpers.ts +140 -0
- package/tui/src/tools/AgentTool/runAgent.ts +1 -1
- package/tui/src/tools/AgentTool/runtimeConfig.ts +57 -0
- package/tui/src/tools/AgentTool/schemas.ts +196 -0
- package/tui/src/tools/AgentTool/sourceVerificationPropagation.ts +263 -0
- package/tui/src/tools/AgentTool/worktreeLifecycle.ts +105 -0
- package/tui/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx +174 -202
- package/tui/src/tools/BashTool/BashTool.tsx +71 -1072
- package/tui/src/tools/BashTool/bashCommandHelpers.ts +12 -12
- package/tui/src/tools/BashTool/bashPermissions/astPreflight.ts +173 -0
- package/tui/src/tools/BashTool/bashPermissions/classifierChecks.ts +199 -0
- package/tui/src/tools/BashTool/bashPermissions/compoundGuards.ts +53 -0
- package/tui/src/tools/BashTool/bashPermissions/constants.ts +99 -0
- package/tui/src/tools/BashTool/bashPermissions/index.ts +38 -0
- package/tui/src/tools/BashTool/bashPermissions/legacyMisparsing.ts +62 -0
- package/tui/src/tools/BashTool/bashPermissions/main.ts +135 -0
- package/tui/src/tools/BashTool/bashPermissions/normalizedCommands.ts +33 -0
- package/tui/src/tools/BashTool/bashPermissions/operatorFlow.ts +98 -0
- package/tui/src/tools/BashTool/bashPermissions/permissionChecks.ts +200 -0
- package/tui/src/tools/BashTool/bashPermissions/prefixSuggestions.ts +88 -0
- package/tui/src/tools/BashTool/bashPermissions/promptClassifierRules.ts +125 -0
- package/tui/src/tools/BashTool/bashPermissions/ruleDelegates.ts +19 -0
- package/tui/src/tools/BashTool/bashPermissions/ruleMatching.ts +145 -0
- package/tui/src/tools/BashTool/bashPermissions/sandboxAutoAllow.ts +75 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandFlow.ts +205 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandGuards.ts +73 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandResultHelpers.ts +116 -0
- package/tui/src/tools/BashTool/bashPermissions/types.ts +26 -0
- package/tui/src/tools/BashTool/bashPermissions/wrapperStripping.ts +139 -0
- package/tui/src/tools/BashTool/bashPermissions.ts +26 -2621
- package/tui/src/tools/BashTool/call.ts +202 -0
- package/tui/src/tools/BashTool/callLoader.ts +35 -0
- package/tui/src/tools/BashTool/commandClassification.ts +151 -0
- package/tui/src/tools/BashTool/commandClassificationLoader.ts +40 -0
- package/tui/src/tools/BashTool/cwdReset.ts +33 -0
- package/tui/src/tools/BashTool/lineTruncation.ts +11 -0
- package/tui/src/tools/BashTool/modeValidation.ts +13 -1
- package/tui/src/tools/BashTool/outputPersistence.ts +42 -0
- package/tui/src/tools/BashTool/permissionClassification.ts +66 -0
- package/tui/src/tools/BashTool/permissionLoader.ts +44 -0
- package/tui/src/tools/BashTool/resultLoader.ts +29 -0
- package/tui/src/tools/BashTool/resultMapping.ts +83 -0
- package/tui/src/tools/BashTool/sandboxPolicy.ts +79 -0
- package/tui/src/tools/BashTool/schemas.ts +65 -0
- package/tui/src/tools/BashTool/sedEditExecution.ts +59 -0
- package/tui/src/tools/BashTool/shellExecution.tsx +245 -0
- package/tui/src/tools/BashTool/shellOutputUtils.ts +85 -0
- package/tui/src/tools/BashTool/shellPermissionGauntlet.ts +97 -0
- package/tui/src/tools/BashTool/uiLoader.ts +37 -0
- package/tui/src/tools/BriefTool/upload.ts +1 -1
- package/tui/src/tools/CalculatorTool/parser.ts +2 -2
- package/tui/src/tools/DocumentPrimitive/DocumentPrimitive.ts +262 -0
- package/tui/src/tools/DocumentPrimitive/dispatchNormalization.ts +270 -0
- package/tui/src/tools/DocumentPrimitive/documentDestinationPath.ts +18 -0
- package/tui/src/tools/DocumentPrimitive/documentMutationGuard.ts +22 -0
- package/tui/src/tools/DocumentPrimitive/documentPatchNormalization.ts +248 -0
- package/tui/src/tools/DocumentPrimitive/documentSourceVerification.ts +245 -0
- package/tui/src/tools/DocumentPrimitive/documentSourceVerificationFields.ts +103 -0
- package/tui/src/tools/DocumentPrimitive/modelVisibleOutput.ts +40 -0
- package/tui/src/tools/DocumentPrimitive/prompt.ts +35 -0
- package/tui/src/tools/FileEditTool/FileEditTool.ts +9 -507
- package/tui/src/tools/FileEditTool/call.ts +228 -0
- package/tui/src/tools/FileEditTool/validateInput.ts +196 -0
- package/tui/src/tools/FileReadTool/imageProcessor.ts +13 -0
- package/tui/src/tools/FileWriteTool/FileWriteTool.ts +7 -300
- package/tui/src/tools/FileWriteTool/call.ts +223 -0
- package/tui/src/tools/FileWriteTool/validateInput.ts +80 -0
- package/tui/src/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts +19 -3
- package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +25 -32
- package/tui/src/tools/LookupPrimitive/prompt.ts +0 -2
- package/tui/src/tools/MCPTool/trustPolicy.ts +118 -0
- package/tui/src/tools/McpAuthTool/McpAuthTool.ts +21 -3
- package/tui/src/tools/NotebookEditTool/NotebookEditTool.ts +7 -326
- package/tui/src/tools/NotebookEditTool/call.ts +254 -0
- package/tui/src/tools/NotebookEditTool/notebookModel.ts +51 -0
- package/tui/src/tools/NotebookEditTool/validateInput.ts +142 -0
- package/tui/src/tools/PowerShellTool/PowerShellTool.tsx +46 -937
- package/tui/src/tools/PowerShellTool/acceptEditsCommandValidation.ts +162 -0
- package/tui/src/tools/PowerShellTool/call.ts +179 -0
- package/tui/src/tools/PowerShellTool/callLoader.ts +37 -0
- package/tui/src/tools/PowerShellTool/commandClassification.ts +86 -0
- package/tui/src/tools/PowerShellTool/modeValidation.ts +25 -332
- package/tui/src/tools/PowerShellTool/outputPersistence.ts +42 -0
- package/tui/src/tools/PowerShellTool/permissionClassification.ts +28 -0
- package/tui/src/tools/PowerShellTool/resultLoader.ts +31 -0
- package/tui/src/tools/PowerShellTool/resultMapping.ts +75 -0
- package/tui/src/tools/PowerShellTool/schemas.ts +40 -0
- package/tui/src/tools/PowerShellTool/shellExecution.tsx +258 -0
- package/tui/src/tools/PowerShellTool/symlinkModeValidation.ts +44 -0
- package/tui/src/tools/PowerShellTool/uiLoader.ts +37 -0
- package/tui/src/tools/PowerShellTool/validation.ts +39 -0
- package/tui/src/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts +19 -3
- package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +1 -11
- package/tui/src/tools/ResolveLocationPrimitive/prompt.ts +2 -6
- package/tui/src/tools/SkillTool/SkillTool.ts +2 -2
- package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +27 -10
- package/tui/src/tools/TaskCreateTool/TaskCreateTool.ts +16 -2
- package/tui/src/tools/TaskGetTool/TaskGetTool.ts +23 -3
- package/tui/src/tools/TaskListTool/TaskListTool.ts +22 -4
- package/tui/src/tools/TaskOutputTool/TaskOutputTool.tsx +46 -547
- package/tui/src/tools/TaskOutputTool/lookup.ts +216 -0
- package/tui/src/tools/TaskOutputTool/render.tsx +257 -0
- package/tui/src/tools/TaskOutputTool/schemas.ts +55 -0
- package/tui/src/tools/TaskOutputTool/serialization.ts +36 -0
- package/tui/src/tools/TaskStopTool/TaskStopTool.ts +10 -0
- package/tui/src/tools/TaskUpdateTool/TaskUpdateTool.ts +14 -364
- package/tui/src/tools/TaskUpdateTool/completion.ts +62 -0
- package/tui/src/tools/TaskUpdateTool/schemas.ts +62 -0
- package/tui/src/tools/TaskUpdateTool/serialization.ts +46 -0
- package/tui/src/tools/TaskUpdateTool/statusUpdate.ts +247 -0
- package/tui/src/tools/TodoWriteTool/TodoWriteTool.ts +21 -2
- package/tui/src/tools/ToolSearchTool/ToolSearchTool.ts +21 -302
- package/tui/src/tools/ToolSearchTool/ccSupportTools.ts +223 -0
- package/tui/src/tools/ToolSearchTool/descriptionCache.ts +50 -0
- package/tui/src/tools/ToolSearchTool/keywordSearch.ts +216 -0
- package/tui/src/tools/ToolSearchTool/prompt.ts +10 -4
- package/tui/src/tools/ToolSearchTool/resultMapping.ts +30 -0
- package/tui/src/tools/ToolSearchTool/schemas.ts +30 -0
- package/tui/src/tools/ToolSearchTool/searchPool.ts +47 -0
- package/tui/src/tools/ToolSearchTool/supportIntentHints.ts +140 -0
- package/tui/src/tools/TranslateTool/TranslateTool.ts +1 -1
- package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +2 -1
- package/tui/src/tools/WebFetchTool/WebFetchTool.ts +43 -138
- package/tui/src/tools/WebFetchTool/call.ts +227 -0
- package/tui/src/tools/WebFetchTool/resolvedAddressSafety.ts +78 -0
- package/tui/src/tools/WebFetchTool/sourceVerification.ts +204 -0
- package/tui/src/tools/WebFetchTool/types.ts +23 -0
- package/tui/src/tools/WebFetchTool/urlSafety.ts +181 -0
- package/tui/src/tools/WebFetchTool/utils.ts +1 -1
- package/tui/src/tools/WebSearchTool/UI.tsx +0 -1
- package/tui/src/tools/WebSearchTool/WebSearchTool.ts +9 -313
- package/tui/src/tools/WebSearchTool/call.ts +33 -0
- package/tui/src/tools/WebSearchTool/responseMapping.ts +190 -0
- package/tui/src/tools/WebSearchTool/resultBlock.ts +47 -0
- package/tui/src/tools/WebSearchTool/schemas.ts +47 -0
- package/tui/src/tools/WebSearchTool/toolSchema.ts +12 -0
- package/tui/src/tools/WorkspaceToolAdapter/WorkspaceToolAdapter.ts +79 -0
- package/tui/src/tools/WorkspaceToolAdapter/allowedRootPolicy.ts +85 -0
- package/tui/src/tools/WorkspaceToolAdapter/documentFormatGuards.ts +73 -0
- package/tui/src/tools/WorkspaceToolAdapter/inputNormalization.ts +105 -0
- package/tui/src/tools/WorkspaceToolAdapter/mcpExposurePolicy.ts +64 -0
- package/tui/src/tools/WorkspaceToolAdapter/toolDefFactory.ts +215 -0
- package/tui/src/tools/WorkspaceToolAdapter/toolNames.ts +6 -0
- package/tui/src/tools/WorkspaceToolAdapter/workspacePolicy.ts +15 -0
- package/tui/src/tools/_shared/dispatchPrimitive.ts +6 -6
- package/tui/src/tools/_shared/documentChangeToPatch.ts +125 -0
- package/tui/src/tools/_shared/documentDispatchArguments.ts +87 -0
- package/tui/src/tools/_shared/documentPrimitiveTimeout.ts +13 -0
- package/tui/src/tools/_shared/documentToolResultRender.ts +98 -0
- package/tui/src/tools/_shared/pendingCallRegistry.ts +1 -6
- package/tui/src/tools/_shared/rootPrimitiveInput.ts +1 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPatterns.ts +58 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPrompt.ts +271 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentRepair.ts +452 -0
- package/tui/src/tools/_shared/toolChoiceRepair/messageAccess.ts +80 -0
- package/tui/src/tools/_shared/toolChoiceRepair/publicDataRepair.ts +92 -0
- package/tui/src/tools/_shared/toolChoiceRepair/supportRepair.ts +135 -0
- package/tui/src/tools/_shared/toolChoiceRepair.ts +55 -860
- package/tui/src/tools/shared/mockDisclaimer.ts +1 -1
- package/tui/src/tools.ts +39 -190
- package/tui/src/types/fileSuggestion.ts +4 -26
- package/tui/src/types/generated/events_mono/claude_code/v1/claude_code_internal_event.ts +186 -148
- package/tui/src/types/generated/events_mono/common/v1/auth.ts +25 -11
- package/tui/src/types/generated/events_mono/growthbook/v1/growthbook_experiment_event.ts +47 -30
- package/tui/src/types/generated/google/protobuf/timestamp.ts +21 -7
- package/tui/src/types/message.ts +80 -102
- package/tui/src/types/messageQueueTypes.ts +6 -28
- package/tui/src/types/notebook.ts +16 -38
- package/tui/src/types/statusLine.ts +4 -26
- package/tui/src/types/tools.ts +24 -46
- package/tui/src/types/utils.ts +6 -28
- package/tui/src/upstreamproxy/relay.ts +7 -3
- package/tui/src/upstreamproxy/upstreamproxy.ts +1 -1
- package/tui/src/utils/assistantMessageFactories.ts +9 -3
- package/tui/src/utils/auth.ts +129 -139
- package/tui/src/utils/bash/ast.ts +23 -23
- package/tui/src/utils/bash/bashParser.ts +5 -5
- package/tui/src/utils/billing.ts +1 -1
- package/tui/src/utils/collapseReadSearch.ts +3 -3
- package/tui/src/utils/cronTasks.ts +1 -1
- package/tui/src/utils/execFileNoThrow.ts +1 -1
- package/tui/src/utils/filePersistence/types.ts +16 -38
- package/tui/src/utils/forkedAgent.ts +1 -1
- package/tui/src/utils/gracefulShutdown.ts +4 -4
- package/tui/src/utils/heapDumpService.ts +12 -8
- package/tui/src/utils/hooks/apiQueryHookHelper.ts +1 -1
- package/tui/src/utils/hooks/execPromptHook.ts +1 -1
- package/tui/src/utils/hooks/skillImprovement.ts +1 -1
- package/tui/src/utils/mcp/dateTimeParser.ts +1 -1
- package/tui/src/utils/messages.ts +18 -0
- package/tui/src/utils/migrateSessions.ts +3 -3
- package/tui/src/utils/model/model.ts +6 -6
- package/tui/src/utils/permissions/yoloClassifier.ts +1 -1
- package/tui/src/utils/plugins/headlessPluginInstall.ts +1 -1
- package/tui/src/utils/plugins/mcpPluginIntegration.ts +1 -1
- package/tui/src/utils/plugins/mcpbHandler.ts +1 -1
- package/tui/src/utils/plugins/pluginLoader.ts +8 -8
- package/tui/src/utils/protectedNamespace.ts +5 -3
- package/tui/src/utils/rawJsonToolCall.ts +242 -0
- package/tui/src/utils/ripgrep.ts +16 -7
- package/tui/src/utils/sessionTitle.ts +1 -1
- package/tui/src/utils/settings/permissionValidation.ts +14 -2
- package/tui/src/utils/shell/prefix.ts +1 -1
- package/tui/src/utils/sideQuery.ts +1 -1
- package/tui/src/utils/systemThemeWatcher.ts +13 -3
- package/tui/src/utils/teleport.tsx +1 -1
- package/uv.lock +400 -14
- package/tui/src/services/api/claude.ts +0 -3540
- package/tui/src/tools/_shared/directPublicDataGuard.ts +0 -362
- package/tui/src/tools/_shared/kmaAnalysisGuard.ts +0 -197
- package/tui/src/tools/_shared/kmaAviationGuard.ts +0 -70
- package/tui/src/tools/_shared/nmcAedGuard.ts +0 -234
- package/tui/src/tools/_shared/protectedCheckGuard.ts +0 -207
- package/tui/src/tools/_shared/textToolCallGuard.ts +0 -91
|
@@ -0,0 +1,2733 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
"""Registry wiring and execution orchestration for document harness tools."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import contextlib
|
|
7
|
+
import hashlib
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import secrets
|
|
11
|
+
import tempfile
|
|
12
|
+
import unicodedata
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from difflib import SequenceMatcher
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, cast
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel
|
|
19
|
+
|
|
20
|
+
from ummaya.tools.documents.adapter_registry import (
|
|
21
|
+
DocumentAdapterRegistry,
|
|
22
|
+
build_document_adapter_registry_from_engine_registry,
|
|
23
|
+
)
|
|
24
|
+
from ummaya.tools.documents.artifact_store import ArtifactStoreError, DocumentArtifactStore
|
|
25
|
+
from ummaya.tools.documents.baselines import (
|
|
26
|
+
ConformanceBaselineCatalog,
|
|
27
|
+
load_conformance_baselines,
|
|
28
|
+
)
|
|
29
|
+
from ummaya.tools.documents.conversion import (
|
|
30
|
+
DocumentConversionRegistry,
|
|
31
|
+
UnsupportedDocumentConversionError,
|
|
32
|
+
build_default_document_conversion_registry,
|
|
33
|
+
)
|
|
34
|
+
from ummaya.tools.documents.engines import (
|
|
35
|
+
DocumentEngineRegistry,
|
|
36
|
+
build_default_document_engine_registry,
|
|
37
|
+
)
|
|
38
|
+
from ummaya.tools.documents.formats.base import DocumentFormatAdapter
|
|
39
|
+
from ummaya.tools.documents.models import (
|
|
40
|
+
ArtifactLineage,
|
|
41
|
+
AutonomousFillPlan,
|
|
42
|
+
BlockedReason,
|
|
43
|
+
DocumentArtifact,
|
|
44
|
+
DocumentDiff,
|
|
45
|
+
DocumentExtraction,
|
|
46
|
+
DocumentFormat,
|
|
47
|
+
DocumentPatch,
|
|
48
|
+
DocumentPatchOperation,
|
|
49
|
+
DocumentSavedExport,
|
|
50
|
+
DocumentToolResult,
|
|
51
|
+
DocumentWorkflowStep,
|
|
52
|
+
DocumentWorkflowStepStatus,
|
|
53
|
+
OperationType,
|
|
54
|
+
RenderArtifactRecord,
|
|
55
|
+
ToolResultStatus,
|
|
56
|
+
)
|
|
57
|
+
from ummaya.tools.documents.orchestrator import (
|
|
58
|
+
DocumentInspectionOrchestrator,
|
|
59
|
+
DocumentOrchestrator,
|
|
60
|
+
)
|
|
61
|
+
from ummaya.tools.documents.patch import apply_document_patch, copy_for_edit
|
|
62
|
+
from ummaya.tools.documents.pdfa_conformance import (
|
|
63
|
+
PdfaConformanceBridge,
|
|
64
|
+
PdfaConformanceBridgeError,
|
|
65
|
+
build_default_pdfa_conformance_bridge,
|
|
66
|
+
)
|
|
67
|
+
from ummaya.tools.documents.planner import plan_autonomous_fill
|
|
68
|
+
from ummaya.tools.documents.render import render_document_evidence
|
|
69
|
+
from ummaya.tools.documents.runtime_authoring import (
|
|
70
|
+
issue_authoring_drafts_for_unapproved_patches,
|
|
71
|
+
preview_editable_derivative,
|
|
72
|
+
unapproved_narrative_patch_targets,
|
|
73
|
+
)
|
|
74
|
+
from ummaya.tools.documents.runtime_authoring_bundle import IssuedAuthoringDraft
|
|
75
|
+
from ummaya.tools.documents.tool_defs import (
|
|
76
|
+
DOCUMENT_TOOL_IDS,
|
|
77
|
+
DocumentApplyFillRequest,
|
|
78
|
+
DocumentApplyStyleRequest,
|
|
79
|
+
DocumentCopyForEditRequest,
|
|
80
|
+
DocumentExtractRequest,
|
|
81
|
+
DocumentFieldPatch,
|
|
82
|
+
DocumentFormSchemaRequest,
|
|
83
|
+
DocumentInspectRequest,
|
|
84
|
+
DocumentLocator,
|
|
85
|
+
DocumentPrimitiveRequest,
|
|
86
|
+
DocumentRenderRequest,
|
|
87
|
+
DocumentSaveRequest,
|
|
88
|
+
DocumentStylePatch,
|
|
89
|
+
DocumentValidatePublicFormRequest,
|
|
90
|
+
build_document_tool_definitions,
|
|
91
|
+
needs_input_document_tool_result,
|
|
92
|
+
unsupported_document_tool_result,
|
|
93
|
+
)
|
|
94
|
+
from ummaya.tools.documents.validate import validate_public_form
|
|
95
|
+
from ummaya.tools.executor import ToolExecutor
|
|
96
|
+
from ummaya.tools.registry import ToolRegistry
|
|
97
|
+
|
|
98
|
+
_DOCUMENT_STEM_NOISE_RE = re.compile(
|
|
99
|
+
r"(?:hwpx|hwp|docx|pdf|xlsx|pptx|양식|서식|파일|문서)",
|
|
100
|
+
re.IGNORECASE,
|
|
101
|
+
)
|
|
102
|
+
_EXPLICIT_LOCAL_DOCUMENT_PATH_RE = re.compile(
|
|
103
|
+
r"(?:~|/|\.{1,2}/)[^\s\"'`<>|]+\.(?:hwpx|hwp|doc|docx|pdf|xls|xlsx|ppt|pptx)\b",
|
|
104
|
+
re.IGNORECASE,
|
|
105
|
+
)
|
|
106
|
+
_DOCUMENT_SAVE_INTENT_RE = re.compile(r"(저장|내보내|export|save)", re.IGNORECASE)
|
|
107
|
+
_MIN_LOCAL_DOCUMENT_CANDIDATE_SCORE = 0.58
|
|
108
|
+
_COPY_FOR_EDIT_REASON_MAX_LENGTH = 300
|
|
109
|
+
_AUTONOMOUS_FILL_INSTRUCTION_RE = re.compile(
|
|
110
|
+
r"(알아서|문서\s*내용|내용을\s*파악|다음\s*주차|autonomous|infer)",
|
|
111
|
+
re.IGNORECASE,
|
|
112
|
+
)
|
|
113
|
+
_DOCX_TABLE_FILL_TARGET_RE = re.compile(
|
|
114
|
+
r"(?:^|/)tables?/\d+/rows?/\d+/cells?/\d+$|(?:^|/)table/\d+/r\d+c\d+$"
|
|
115
|
+
)
|
|
116
|
+
_XLSX_CELL_FILL_TARGET_RE = re.compile(r"^/sheets/[^/]+/cells/[A-Za-z]{1,3}\d+$")
|
|
117
|
+
_PPTX_TABLE_FILL_TARGET_RE = re.compile(r"^/slides/\d+/tables/\d+/rows/\d+/cells/\d+$")
|
|
118
|
+
_HWPX_TABLE_CELL_SOURCE_RE = re.compile(
|
|
119
|
+
r"^Contents/section[0-9]+\.xml#table\[[1-9][0-9]*\]/"
|
|
120
|
+
r"r[1-9][0-9]*c[1-9][0-9]*$"
|
|
121
|
+
)
|
|
122
|
+
_EDITABLE_DERIVATIVE_FORMAT_BY_SOURCE: dict[DocumentFormat, DocumentFormat] = {
|
|
123
|
+
DocumentFormat.hwp: DocumentFormat.hwpx,
|
|
124
|
+
DocumentFormat.doc: DocumentFormat.docx,
|
|
125
|
+
DocumentFormat.xls: DocumentFormat.xlsx,
|
|
126
|
+
DocumentFormat.ppt: DocumentFormat.pptx,
|
|
127
|
+
}
|
|
128
|
+
_DERIVATIVE_LABEL_BY_FORMAT = {
|
|
129
|
+
DocumentFormat.hwp: ("HWP", "HWPX"),
|
|
130
|
+
DocumentFormat.doc: ("DOC", "DOCX"),
|
|
131
|
+
DocumentFormat.xls: ("XLS", "XLSX"),
|
|
132
|
+
DocumentFormat.ppt: ("PPT", "PPTX"),
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class DocumentToolRuntime:
|
|
137
|
+
"""Session-local runtime state for document harness tool execution."""
|
|
138
|
+
|
|
139
|
+
def __init__(
|
|
140
|
+
self,
|
|
141
|
+
*,
|
|
142
|
+
session_id: str = "default",
|
|
143
|
+
artifact_root: str | Path | None = None,
|
|
144
|
+
engine_registry: DocumentEngineRegistry | None = None,
|
|
145
|
+
adapter_registry: DocumentAdapterRegistry | None = None,
|
|
146
|
+
conversion_registry: DocumentConversionRegistry | None = None,
|
|
147
|
+
orchestrator: DocumentInspectionOrchestrator | None = None,
|
|
148
|
+
baseline_catalog: ConformanceBaselineCatalog | None = None,
|
|
149
|
+
pdfa_conformance_bridge: PdfaConformanceBridge | None = None,
|
|
150
|
+
enable_default_pdfa_conformance_bridge: bool = True,
|
|
151
|
+
) -> None:
|
|
152
|
+
self.store = DocumentArtifactStore(session_id=session_id, root=artifact_root)
|
|
153
|
+
self.engine_registry = engine_registry or build_default_document_engine_registry()
|
|
154
|
+
self.conversion_registry = (
|
|
155
|
+
conversion_registry
|
|
156
|
+
if conversion_registry is not None
|
|
157
|
+
else build_default_document_conversion_registry()
|
|
158
|
+
)
|
|
159
|
+
self.adapter_registry = (
|
|
160
|
+
adapter_registry
|
|
161
|
+
or build_document_adapter_registry_from_engine_registry(self.engine_registry)
|
|
162
|
+
)
|
|
163
|
+
self.orchestrator = orchestrator or DocumentOrchestrator(
|
|
164
|
+
adapter_registry=self.adapter_registry,
|
|
165
|
+
engine_registry=self.engine_registry,
|
|
166
|
+
)
|
|
167
|
+
self.baseline_catalog = baseline_catalog or load_conformance_baselines()
|
|
168
|
+
self.pdfa_conformance_bridge = (
|
|
169
|
+
pdfa_conformance_bridge
|
|
170
|
+
if pdfa_conformance_bridge is not None
|
|
171
|
+
else (
|
|
172
|
+
build_default_pdfa_conformance_bridge()
|
|
173
|
+
if enable_default_pdfa_conformance_bridge
|
|
174
|
+
else None
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
self._artifacts: dict[str, DocumentArtifact] = {}
|
|
178
|
+
self._extractions: dict[str, DocumentExtraction] = {}
|
|
179
|
+
self._diffs_by_artifact_id: dict[str, DocumentDiff] = {}
|
|
180
|
+
self._issued_authoring_drafts: dict[str, IssuedAuthoringDraft] = {}
|
|
181
|
+
|
|
182
|
+
async def handle(self, tool_id: str, request: BaseModel) -> dict[str, Any]: # noqa: C901
|
|
183
|
+
"""Dispatch one validated document tool request."""
|
|
184
|
+
if tool_id == "document":
|
|
185
|
+
result = self.document(cast(DocumentPrimitiveRequest, request))
|
|
186
|
+
elif tool_id == "document_inspect":
|
|
187
|
+
result = self.inspect(cast(DocumentInspectRequest, request))
|
|
188
|
+
elif tool_id == "document_extract":
|
|
189
|
+
result = self.extract(cast(DocumentExtractRequest, request))
|
|
190
|
+
elif tool_id == "document_form_schema":
|
|
191
|
+
result = self.form_schema(cast(DocumentFormSchemaRequest, request))
|
|
192
|
+
elif tool_id == "document_copy_for_edit":
|
|
193
|
+
result = self.copy_for_edit(cast(DocumentCopyForEditRequest, request))
|
|
194
|
+
elif tool_id == "document_apply_fill":
|
|
195
|
+
result = self.apply_fill(cast(DocumentApplyFillRequest, request))
|
|
196
|
+
elif tool_id == "document_apply_style":
|
|
197
|
+
result = self.apply_style(cast(DocumentApplyStyleRequest, request))
|
|
198
|
+
elif tool_id == "document_render":
|
|
199
|
+
result = self.render(cast(DocumentRenderRequest, request))
|
|
200
|
+
elif tool_id == "document_validate_public_form":
|
|
201
|
+
result = self.validate_public_form(cast(DocumentValidatePublicFormRequest, request))
|
|
202
|
+
elif tool_id == "document_save":
|
|
203
|
+
result = self.save(cast(DocumentSaveRequest, request))
|
|
204
|
+
else:
|
|
205
|
+
result = unsupported_document_tool_result(
|
|
206
|
+
tool_id=tool_id,
|
|
207
|
+
correlation_id="unknown",
|
|
208
|
+
message=f"Unknown document harness tool: {tool_id}.",
|
|
209
|
+
)
|
|
210
|
+
return self._with_runtime_workflow_steps(result).model_dump(mode="json")
|
|
211
|
+
|
|
212
|
+
def document(self, request: DocumentPrimitiveRequest) -> DocumentToolResult: # noqa: C901
|
|
213
|
+
"""Run one model-facing document operation through internal stages."""
|
|
214
|
+
source_or_read = self._resolve_artifact_for_read(
|
|
215
|
+
request.document,
|
|
216
|
+
request.correlation_id,
|
|
217
|
+
tool_id="document_inspect",
|
|
218
|
+
)
|
|
219
|
+
if isinstance(source_or_read, DocumentToolResult):
|
|
220
|
+
if source_or_read.status is ToolResultStatus.ok and request.operation in {
|
|
221
|
+
"inspect",
|
|
222
|
+
"extract",
|
|
223
|
+
}:
|
|
224
|
+
return _with_workflow_steps(
|
|
225
|
+
source_or_read.model_copy(
|
|
226
|
+
update={
|
|
227
|
+
"tool_id": "document",
|
|
228
|
+
"correlation_id": request.correlation_id,
|
|
229
|
+
}
|
|
230
|
+
),
|
|
231
|
+
artifacts=self._artifacts,
|
|
232
|
+
)
|
|
233
|
+
if (
|
|
234
|
+
source_or_read.status is ToolResultStatus.ok
|
|
235
|
+
and _is_attachment_context_extraction(source_or_read.extraction)
|
|
236
|
+
and request.operation in {"fill", "save"}
|
|
237
|
+
):
|
|
238
|
+
return self._attachment_context_derivative_result(request, source_or_read)
|
|
239
|
+
if source_or_read.status is ToolResultStatus.ok:
|
|
240
|
+
return unsupported_document_tool_result(
|
|
241
|
+
tool_id="document",
|
|
242
|
+
correlation_id=request.correlation_id,
|
|
243
|
+
artifact_refs=tuple(source_or_read.artifact_refs),
|
|
244
|
+
message=(
|
|
245
|
+
"Document format is known-only and read-only in the current "
|
|
246
|
+
"harness capability profile; mutation, render, and save are "
|
|
247
|
+
"not promoted for this artifact."
|
|
248
|
+
),
|
|
249
|
+
reason=BlockedReason.unsupported_operation,
|
|
250
|
+
)
|
|
251
|
+
return _document_result_from_stage(
|
|
252
|
+
source_or_read,
|
|
253
|
+
correlation_id=request.correlation_id,
|
|
254
|
+
)
|
|
255
|
+
source = source_or_read
|
|
256
|
+
|
|
257
|
+
if request.operation in {"inspect", "extract"}:
|
|
258
|
+
extraction = self._extraction_for_artifact(source, request.correlation_id)
|
|
259
|
+
return _with_workflow_steps(
|
|
260
|
+
DocumentToolResult(
|
|
261
|
+
tool_id="document",
|
|
262
|
+
correlation_id=request.correlation_id,
|
|
263
|
+
status=ToolResultStatus.ok,
|
|
264
|
+
artifact_refs=[source.artifact_id],
|
|
265
|
+
extraction=extraction,
|
|
266
|
+
text_summary="Document inspection completed through the document primitive.",
|
|
267
|
+
),
|
|
268
|
+
artifacts=self._artifacts,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
mutation_result: DocumentToolResult | None = None
|
|
272
|
+
working_artifact_id: str | None = None
|
|
273
|
+
autonomous_plan: AutonomousFillPlan | None = None
|
|
274
|
+
autonomous_save_path: str | None = None
|
|
275
|
+
autonomous_save_display_name: str | None = None
|
|
276
|
+
if request.operation == "style":
|
|
277
|
+
if not request.styles:
|
|
278
|
+
return needs_input_document_tool_result(
|
|
279
|
+
tool_id="document",
|
|
280
|
+
correlation_id=request.correlation_id,
|
|
281
|
+
artifact_refs=(source.artifact_id,),
|
|
282
|
+
message="Document style operation requires at least one bounded style patch.",
|
|
283
|
+
)
|
|
284
|
+
copy_result = self.copy_for_edit(
|
|
285
|
+
DocumentCopyForEditRequest(
|
|
286
|
+
correlation_id=request.correlation_id,
|
|
287
|
+
document=DocumentLocator(artifact_id=source.artifact_id),
|
|
288
|
+
reason=_copy_for_edit_reason(request.instruction),
|
|
289
|
+
)
|
|
290
|
+
)
|
|
291
|
+
if copy_result.status is not ToolResultStatus.ok:
|
|
292
|
+
return _document_result_from_stage(
|
|
293
|
+
copy_result,
|
|
294
|
+
correlation_id=request.correlation_id,
|
|
295
|
+
)
|
|
296
|
+
working_artifact_id = copy_result.artifact_refs[-1]
|
|
297
|
+
mutation_result = self.apply_style(
|
|
298
|
+
DocumentApplyStyleRequest(
|
|
299
|
+
correlation_id=request.correlation_id,
|
|
300
|
+
document=DocumentLocator(artifact_id=working_artifact_id),
|
|
301
|
+
styles=request.styles,
|
|
302
|
+
)
|
|
303
|
+
)
|
|
304
|
+
elif request.operation in {"fill", "validate", "save"}:
|
|
305
|
+
planning_artifact = source
|
|
306
|
+
planning_format = planning_artifact.format
|
|
307
|
+
planning_extraction: DocumentExtraction | None = None
|
|
308
|
+
candidate_patches = request.patches
|
|
309
|
+
candidate_style_patches = request.styles
|
|
310
|
+
if not candidate_patches or _should_prefer_autonomous_fill_plan(
|
|
311
|
+
request.instruction,
|
|
312
|
+
candidate_patches,
|
|
313
|
+
):
|
|
314
|
+
derivative_format = _editable_derivative_format(source.format)
|
|
315
|
+
if derivative_format is not None:
|
|
316
|
+
preview = preview_editable_derivative(
|
|
317
|
+
source,
|
|
318
|
+
correlation_id=request.correlation_id,
|
|
319
|
+
derivative_format=derivative_format,
|
|
320
|
+
conversion_registry=self.conversion_registry,
|
|
321
|
+
adapter_registry=self.adapter_registry,
|
|
322
|
+
)
|
|
323
|
+
if isinstance(preview, DocumentToolResult):
|
|
324
|
+
return _document_result_from_stage(
|
|
325
|
+
preview,
|
|
326
|
+
correlation_id=request.correlation_id,
|
|
327
|
+
)
|
|
328
|
+
planning_extraction = preview.extraction
|
|
329
|
+
planning_format = preview.document_format
|
|
330
|
+
else:
|
|
331
|
+
planning_extraction = self._extraction_for_artifact(
|
|
332
|
+
planning_artifact,
|
|
333
|
+
request.correlation_id,
|
|
334
|
+
)
|
|
335
|
+
document_ir = self.orchestrator.build_document_ir(
|
|
336
|
+
artifact_id=planning_artifact.artifact_id,
|
|
337
|
+
document_format=planning_format,
|
|
338
|
+
extraction=planning_extraction,
|
|
339
|
+
)
|
|
340
|
+
autonomous_plan = plan_autonomous_fill(
|
|
341
|
+
document_ir,
|
|
342
|
+
instruction=request.instruction,
|
|
343
|
+
)
|
|
344
|
+
candidate_patches = _fill_patches_from_autonomous_plan(autonomous_plan)
|
|
345
|
+
missing_required_slot_ids = _missing_required_unfilled_slot_ids(autonomous_plan)
|
|
346
|
+
if missing_required_slot_ids:
|
|
347
|
+
return needs_input_document_tool_result(
|
|
348
|
+
tool_id="document",
|
|
349
|
+
correlation_id=request.correlation_id,
|
|
350
|
+
artifact_refs=(source.artifact_id,),
|
|
351
|
+
message=(
|
|
352
|
+
"Document autonomous fill cannot proceed while required "
|
|
353
|
+
"slot(s) lack safe candidate values: "
|
|
354
|
+
f"{', '.join(missing_required_slot_ids)}."
|
|
355
|
+
),
|
|
356
|
+
)
|
|
357
|
+
if autonomous_plan.requires_human_review and not candidate_patches:
|
|
358
|
+
return needs_input_document_tool_result(
|
|
359
|
+
tool_id="document",
|
|
360
|
+
correlation_id=request.correlation_id,
|
|
361
|
+
artifact_refs=(source.artifact_id,),
|
|
362
|
+
message=(
|
|
363
|
+
"Document autonomous fill requires human review for "
|
|
364
|
+
"blocked or missing slot(s): "
|
|
365
|
+
f"{', '.join(autonomous_plan.blocked_slot_ids)}."
|
|
366
|
+
),
|
|
367
|
+
)
|
|
368
|
+
if not candidate_style_patches:
|
|
369
|
+
candidate_style_patches = _style_patches_from_autonomous_plan(autonomous_plan)
|
|
370
|
+
if autonomous_plan.save_intent is not None:
|
|
371
|
+
autonomous_save_path = autonomous_plan.save_intent.destination_path
|
|
372
|
+
autonomous_save_display_name = (
|
|
373
|
+
autonomous_plan.save_intent.destination_display_name
|
|
374
|
+
)
|
|
375
|
+
if not candidate_patches:
|
|
376
|
+
return needs_input_document_tool_result(
|
|
377
|
+
tool_id="document",
|
|
378
|
+
correlation_id=request.correlation_id,
|
|
379
|
+
artifact_refs=(source.artifact_id,),
|
|
380
|
+
message=(
|
|
381
|
+
"Document fill operation requires at least one explicit patch "
|
|
382
|
+
"or a safe autonomous fill plan."
|
|
383
|
+
),
|
|
384
|
+
)
|
|
385
|
+
if planning_extraction is None:
|
|
386
|
+
derivative_format = _editable_derivative_format(source.format)
|
|
387
|
+
if derivative_format is not None:
|
|
388
|
+
preview = preview_editable_derivative(
|
|
389
|
+
source,
|
|
390
|
+
correlation_id=request.correlation_id,
|
|
391
|
+
derivative_format=derivative_format,
|
|
392
|
+
conversion_registry=self.conversion_registry,
|
|
393
|
+
adapter_registry=self.adapter_registry,
|
|
394
|
+
)
|
|
395
|
+
if isinstance(preview, DocumentToolResult):
|
|
396
|
+
return _document_result_from_stage(
|
|
397
|
+
preview,
|
|
398
|
+
correlation_id=request.correlation_id,
|
|
399
|
+
)
|
|
400
|
+
planning_extraction = preview.extraction
|
|
401
|
+
planning_format = preview.document_format
|
|
402
|
+
else:
|
|
403
|
+
planning_extraction = self._extraction_for_artifact(
|
|
404
|
+
source,
|
|
405
|
+
request.correlation_id,
|
|
406
|
+
)
|
|
407
|
+
unapproved_targets = unapproved_narrative_patch_targets(
|
|
408
|
+
candidate_patches,
|
|
409
|
+
extraction=planning_extraction,
|
|
410
|
+
approved_draft_id=request.approved_draft_id,
|
|
411
|
+
approved_draft_sha256=request.approved_draft_sha256,
|
|
412
|
+
issued_drafts=tuple(self._issued_authoring_drafts.values()),
|
|
413
|
+
)
|
|
414
|
+
if unapproved_targets:
|
|
415
|
+
issued_drafts = issue_authoring_drafts_for_unapproved_patches(
|
|
416
|
+
candidate_patches,
|
|
417
|
+
extraction=planning_extraction,
|
|
418
|
+
)
|
|
419
|
+
for draft in issued_drafts:
|
|
420
|
+
self._issued_authoring_drafts[draft.draft_id] = draft
|
|
421
|
+
return needs_input_document_tool_result(
|
|
422
|
+
tool_id="document",
|
|
423
|
+
correlation_id=request.correlation_id,
|
|
424
|
+
artifact_refs=(source.artifact_id,),
|
|
425
|
+
message=(
|
|
426
|
+
"Narrative document patch requires an approved draft hash before "
|
|
427
|
+
f"mutation: {', '.join(unapproved_targets)}."
|
|
428
|
+
f"{_authoring_draft_approval_message(issued_drafts)}"
|
|
429
|
+
),
|
|
430
|
+
)
|
|
431
|
+
if working_artifact_id is None:
|
|
432
|
+
copy_result = self.copy_for_edit(
|
|
433
|
+
DocumentCopyForEditRequest(
|
|
434
|
+
correlation_id=request.correlation_id,
|
|
435
|
+
document=DocumentLocator(artifact_id=source.artifact_id),
|
|
436
|
+
reason=_copy_for_edit_reason(request.instruction),
|
|
437
|
+
)
|
|
438
|
+
)
|
|
439
|
+
if copy_result.status is not ToolResultStatus.ok:
|
|
440
|
+
return _document_result_from_stage(
|
|
441
|
+
copy_result,
|
|
442
|
+
correlation_id=request.correlation_id,
|
|
443
|
+
)
|
|
444
|
+
working_artifact_id = copy_result.artifact_refs[-1]
|
|
445
|
+
working_artifact = self._artifact_by_id(
|
|
446
|
+
working_artifact_id,
|
|
447
|
+
request.correlation_id,
|
|
448
|
+
)
|
|
449
|
+
if isinstance(working_artifact, DocumentToolResult):
|
|
450
|
+
return _document_result_from_stage(
|
|
451
|
+
working_artifact,
|
|
452
|
+
correlation_id=request.correlation_id,
|
|
453
|
+
)
|
|
454
|
+
if planning_extraction is None:
|
|
455
|
+
planning_extraction = self._extraction_for_artifact(
|
|
456
|
+
working_artifact,
|
|
457
|
+
request.correlation_id,
|
|
458
|
+
)
|
|
459
|
+
patches = _document_primitive_fill_patches(
|
|
460
|
+
candidate_patches,
|
|
461
|
+
adapter=self.adapter_registry.require_promoted(working_artifact.format),
|
|
462
|
+
extraction=planning_extraction,
|
|
463
|
+
)
|
|
464
|
+
if not patches:
|
|
465
|
+
return needs_input_document_tool_result(
|
|
466
|
+
tool_id="document",
|
|
467
|
+
correlation_id=request.correlation_id,
|
|
468
|
+
artifact_refs=(source.artifact_id,),
|
|
469
|
+
message=(
|
|
470
|
+
"Document fill operation could not map any natural-language patch "
|
|
471
|
+
"target to extracted document fields."
|
|
472
|
+
),
|
|
473
|
+
)
|
|
474
|
+
mutation_result = self._apply_patch_result(
|
|
475
|
+
tool_id="document_apply_fill",
|
|
476
|
+
correlation_id=request.correlation_id,
|
|
477
|
+
working=working_artifact,
|
|
478
|
+
patch=_fill_style_patch(
|
|
479
|
+
correlation_id=request.correlation_id,
|
|
480
|
+
patches=patches,
|
|
481
|
+
styles=candidate_style_patches,
|
|
482
|
+
working=working_artifact,
|
|
483
|
+
),
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
if mutation_result is None:
|
|
487
|
+
artifact_refs: tuple[str, ...] = (source.artifact_id,)
|
|
488
|
+
if working_artifact_id is not None:
|
|
489
|
+
artifact_refs = (source.artifact_id, working_artifact_id)
|
|
490
|
+
return unsupported_document_tool_result(
|
|
491
|
+
tool_id="document",
|
|
492
|
+
correlation_id=request.correlation_id,
|
|
493
|
+
artifact_refs=artifact_refs,
|
|
494
|
+
message=f"Unsupported document primitive operation: {request.operation}.",
|
|
495
|
+
)
|
|
496
|
+
if working_artifact_id is None:
|
|
497
|
+
return unsupported_document_tool_result(
|
|
498
|
+
tool_id="document",
|
|
499
|
+
correlation_id=request.correlation_id,
|
|
500
|
+
artifact_refs=(source.artifact_id,),
|
|
501
|
+
message="Document mutation did not create a working copy.",
|
|
502
|
+
)
|
|
503
|
+
if mutation_result.status is not ToolResultStatus.ok:
|
|
504
|
+
return _document_result_from_stage(
|
|
505
|
+
mutation_result,
|
|
506
|
+
correlation_id=request.correlation_id,
|
|
507
|
+
)
|
|
508
|
+
derivative_artifact_id = mutation_result.artifact_refs[-1]
|
|
509
|
+
|
|
510
|
+
render_result = self.render(
|
|
511
|
+
DocumentRenderRequest(
|
|
512
|
+
correlation_id=request.correlation_id,
|
|
513
|
+
document=DocumentLocator(artifact_id=derivative_artifact_id),
|
|
514
|
+
)
|
|
515
|
+
)
|
|
516
|
+
if render_result.status is not ToolResultStatus.ok:
|
|
517
|
+
return _document_result_from_stage(render_result, correlation_id=request.correlation_id)
|
|
518
|
+
|
|
519
|
+
render_artifact_refs = _unique_artifact_refs(
|
|
520
|
+
[source.artifact_id, working_artifact_id, *render_result.artifact_refs]
|
|
521
|
+
)
|
|
522
|
+
text_summary = "Document edit completed with automatic compact diff review evidence."
|
|
523
|
+
if autonomous_plan is not None and autonomous_plan.blocked_slot_ids:
|
|
524
|
+
text_summary = (
|
|
525
|
+
f"{text_summary} Human review is still required for skipped "
|
|
526
|
+
f"slot(s): {', '.join(autonomous_plan.blocked_slot_ids)}."
|
|
527
|
+
)
|
|
528
|
+
result = render_result.model_copy(
|
|
529
|
+
update={
|
|
530
|
+
"tool_id": "document",
|
|
531
|
+
"artifact_refs": render_artifact_refs,
|
|
532
|
+
"text_summary": text_summary,
|
|
533
|
+
}
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
if request.template_id is not None:
|
|
537
|
+
validation_result = self.validate_public_form(
|
|
538
|
+
DocumentValidatePublicFormRequest(
|
|
539
|
+
correlation_id=request.correlation_id,
|
|
540
|
+
document=DocumentLocator(artifact_id=derivative_artifact_id),
|
|
541
|
+
template_id=request.template_id,
|
|
542
|
+
)
|
|
543
|
+
)
|
|
544
|
+
result = result.model_copy(
|
|
545
|
+
update={"validation_report": validation_result.validation_report}
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
destination_path = (
|
|
549
|
+
request.destination_path
|
|
550
|
+
or autonomous_save_path
|
|
551
|
+
or _explicit_save_path_from_instruction(
|
|
552
|
+
request.instruction,
|
|
553
|
+
source_artifact=source,
|
|
554
|
+
)
|
|
555
|
+
)
|
|
556
|
+
if request.destination_display_name is not None or destination_path is not None:
|
|
557
|
+
destination_display_name = (
|
|
558
|
+
request.destination_display_name or autonomous_save_display_name
|
|
559
|
+
)
|
|
560
|
+
if destination_display_name is None:
|
|
561
|
+
destination_display_name = Path(cast(str, destination_path)).name
|
|
562
|
+
save_result = self.save(
|
|
563
|
+
DocumentSaveRequest(
|
|
564
|
+
correlation_id=request.correlation_id,
|
|
565
|
+
document=DocumentLocator(artifact_id=derivative_artifact_id),
|
|
566
|
+
destination_display_name=destination_display_name,
|
|
567
|
+
destination_path=destination_path,
|
|
568
|
+
)
|
|
569
|
+
)
|
|
570
|
+
if save_result.status is not ToolResultStatus.ok:
|
|
571
|
+
return _document_result_from_stage(
|
|
572
|
+
save_result,
|
|
573
|
+
correlation_id=request.correlation_id,
|
|
574
|
+
)
|
|
575
|
+
result = result.model_copy(
|
|
576
|
+
update={
|
|
577
|
+
"artifact_refs": _unique_artifact_refs(
|
|
578
|
+
[*result.artifact_refs, *save_result.artifact_refs]
|
|
579
|
+
),
|
|
580
|
+
"saved_exports": save_result.saved_exports,
|
|
581
|
+
"workflow_steps": _merge_save_workflow_steps(
|
|
582
|
+
result.workflow_steps,
|
|
583
|
+
save_result.workflow_steps,
|
|
584
|
+
),
|
|
585
|
+
}
|
|
586
|
+
)
|
|
587
|
+
return result
|
|
588
|
+
|
|
589
|
+
def inspect(self, request: DocumentInspectRequest) -> DocumentToolResult:
|
|
590
|
+
"""Inspect and store a local source document artifact."""
|
|
591
|
+
locator_guard = self._ambiguous_locator_result(
|
|
592
|
+
request.document,
|
|
593
|
+
request.correlation_id,
|
|
594
|
+
tool_id="document_inspect",
|
|
595
|
+
)
|
|
596
|
+
if locator_guard is not None:
|
|
597
|
+
return locator_guard
|
|
598
|
+
|
|
599
|
+
if request.document.path is None:
|
|
600
|
+
artifact = self._artifact_by_id(request.document.artifact_id, request.correlation_id)
|
|
601
|
+
if isinstance(artifact, DocumentToolResult):
|
|
602
|
+
return artifact
|
|
603
|
+
extraction = self._extraction_for_artifact(artifact, request.correlation_id)
|
|
604
|
+
return DocumentToolResult(
|
|
605
|
+
tool_id="document_inspect",
|
|
606
|
+
correlation_id=request.correlation_id,
|
|
607
|
+
status=ToolResultStatus.ok,
|
|
608
|
+
artifact_refs=[artifact.artifact_id],
|
|
609
|
+
extraction=extraction,
|
|
610
|
+
text_summary="Document artifact is already available in the local harness store.",
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
source_path = Path(request.document.path).expanduser()
|
|
614
|
+
if not source_path.is_file():
|
|
615
|
+
return _missing_local_document_result(
|
|
616
|
+
source_path,
|
|
617
|
+
correlation_id=request.correlation_id,
|
|
618
|
+
tool_id="document_inspect",
|
|
619
|
+
expected_format=request.document.expected_format,
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
result = self.orchestrator.inspect_local_path(
|
|
623
|
+
source_path,
|
|
624
|
+
expected_format=request.document.expected_format,
|
|
625
|
+
correlation_id=request.correlation_id,
|
|
626
|
+
)
|
|
627
|
+
if result.status is not ToolResultStatus.ok:
|
|
628
|
+
return DocumentToolResult(
|
|
629
|
+
tool_id="document_inspect",
|
|
630
|
+
correlation_id=request.correlation_id,
|
|
631
|
+
status=result.status,
|
|
632
|
+
artifact_refs=result.artifact_refs,
|
|
633
|
+
extraction=result.extraction,
|
|
634
|
+
findings=result.findings,
|
|
635
|
+
text_summary=result.text_summary,
|
|
636
|
+
blocked_reason=result.blocked_reason,
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
try:
|
|
640
|
+
document_format = _format_from_extraction_or_suffix(result.extraction, source_path)
|
|
641
|
+
except ValueError:
|
|
642
|
+
fallback_format = request.document.expected_format
|
|
643
|
+
if fallback_format is None:
|
|
644
|
+
return DocumentToolResult(
|
|
645
|
+
tool_id="document_inspect",
|
|
646
|
+
correlation_id=request.correlation_id,
|
|
647
|
+
status=ToolResultStatus.ok,
|
|
648
|
+
artifact_refs=[],
|
|
649
|
+
extraction=result.extraction,
|
|
650
|
+
findings=result.findings,
|
|
651
|
+
text_summary=result.text_summary,
|
|
652
|
+
)
|
|
653
|
+
document_format = fallback_format
|
|
654
|
+
artifact_id = _source_artifact_id(request.correlation_id)
|
|
655
|
+
source_artifact = self._source_artifact_for_inspected_path(
|
|
656
|
+
artifact_id=artifact_id,
|
|
657
|
+
source_path=source_path,
|
|
658
|
+
document_format=document_format,
|
|
659
|
+
correlation_id=request.correlation_id,
|
|
660
|
+
)
|
|
661
|
+
if isinstance(source_artifact, DocumentToolResult):
|
|
662
|
+
return source_artifact
|
|
663
|
+
self._artifacts[source_artifact.artifact_id] = source_artifact
|
|
664
|
+
if result.extraction is not None:
|
|
665
|
+
self._extractions[source_artifact.artifact_id] = result.extraction
|
|
666
|
+
|
|
667
|
+
return DocumentToolResult(
|
|
668
|
+
tool_id="document_inspect",
|
|
669
|
+
correlation_id=request.correlation_id,
|
|
670
|
+
status=ToolResultStatus.ok,
|
|
671
|
+
artifact_refs=[source_artifact.artifact_id],
|
|
672
|
+
extraction=result.extraction,
|
|
673
|
+
findings=result.findings,
|
|
674
|
+
text_summary=result.text_summary,
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
def extract(self, request: DocumentExtractRequest) -> DocumentToolResult:
|
|
678
|
+
"""Return normalized extraction for a source or derivative artifact."""
|
|
679
|
+
artifact = self._resolve_artifact_for_read(
|
|
680
|
+
request.document,
|
|
681
|
+
request.correlation_id,
|
|
682
|
+
tool_id="document_extract",
|
|
683
|
+
)
|
|
684
|
+
if isinstance(artifact, DocumentToolResult):
|
|
685
|
+
if artifact.status is ToolResultStatus.ok and artifact.extraction is not None:
|
|
686
|
+
extraction = _filtered_extraction(
|
|
687
|
+
artifact.extraction,
|
|
688
|
+
include_tables=request.include_tables,
|
|
689
|
+
include_images=request.include_images,
|
|
690
|
+
include_fields=request.include_fields,
|
|
691
|
+
)
|
|
692
|
+
return DocumentToolResult(
|
|
693
|
+
tool_id="document_extract",
|
|
694
|
+
correlation_id=request.correlation_id,
|
|
695
|
+
status=ToolResultStatus.ok,
|
|
696
|
+
artifact_refs=artifact.artifact_refs,
|
|
697
|
+
extraction=extraction,
|
|
698
|
+
text_summary=(
|
|
699
|
+
"Document extraction returned normalized known-only local content."
|
|
700
|
+
),
|
|
701
|
+
)
|
|
702
|
+
return artifact
|
|
703
|
+
extraction = self._extraction_for_artifact(artifact, request.correlation_id)
|
|
704
|
+
extraction = _filtered_extraction(
|
|
705
|
+
extraction,
|
|
706
|
+
include_tables=request.include_tables,
|
|
707
|
+
include_images=request.include_images,
|
|
708
|
+
include_fields=request.include_fields,
|
|
709
|
+
)
|
|
710
|
+
return DocumentToolResult(
|
|
711
|
+
tool_id="document_extract",
|
|
712
|
+
correlation_id=request.correlation_id,
|
|
713
|
+
status=ToolResultStatus.ok,
|
|
714
|
+
artifact_refs=[artifact.artifact_id],
|
|
715
|
+
extraction=extraction,
|
|
716
|
+
text_summary="Document extraction returned normalized local content.",
|
|
717
|
+
)
|
|
718
|
+
|
|
719
|
+
def form_schema(self, request: DocumentFormSchemaRequest) -> DocumentToolResult:
|
|
720
|
+
"""Return fillable fields as the model-facing form schema."""
|
|
721
|
+
artifact = self._resolve_artifact_for_read(
|
|
722
|
+
request.document,
|
|
723
|
+
request.correlation_id,
|
|
724
|
+
tool_id="document_form_schema",
|
|
725
|
+
)
|
|
726
|
+
if isinstance(artifact, DocumentToolResult):
|
|
727
|
+
return artifact
|
|
728
|
+
extraction = self._extraction_for_artifact(artifact, request.correlation_id)
|
|
729
|
+
form_schema = DocumentExtraction(
|
|
730
|
+
artifact_id=extraction.artifact_id,
|
|
731
|
+
fields=extraction.fields,
|
|
732
|
+
metadata=extraction.metadata,
|
|
733
|
+
warnings=extraction.warnings,
|
|
734
|
+
)
|
|
735
|
+
return DocumentToolResult(
|
|
736
|
+
tool_id="document_form_schema",
|
|
737
|
+
correlation_id=request.correlation_id,
|
|
738
|
+
status=ToolResultStatus.ok,
|
|
739
|
+
artifact_refs=[artifact.artifact_id],
|
|
740
|
+
extraction=form_schema,
|
|
741
|
+
text_summary=f"Returned {len(form_schema.fields)} public-form field(s).",
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
def copy_for_edit(self, request: DocumentCopyForEditRequest) -> DocumentToolResult:
|
|
745
|
+
"""Create a working derivative for a source artifact."""
|
|
746
|
+
source = self._resolve_artifact_for_read(
|
|
747
|
+
request.document,
|
|
748
|
+
request.correlation_id,
|
|
749
|
+
tool_id="document_copy_for_edit",
|
|
750
|
+
)
|
|
751
|
+
if isinstance(source, DocumentToolResult):
|
|
752
|
+
return source
|
|
753
|
+
derivative_format = _editable_derivative_format(source.format)
|
|
754
|
+
if derivative_format is not None:
|
|
755
|
+
return self._copy_source_for_edit_as_derivative(
|
|
756
|
+
source,
|
|
757
|
+
request,
|
|
758
|
+
derivative_format=derivative_format,
|
|
759
|
+
)
|
|
760
|
+
artifact_id = _generated_artifact_id("working", request.correlation_id)
|
|
761
|
+
derivative = copy_for_edit(
|
|
762
|
+
self.store,
|
|
763
|
+
source,
|
|
764
|
+
artifact_id=artifact_id,
|
|
765
|
+
destination_name=f"{artifact_id}.{source.format.value}",
|
|
766
|
+
)
|
|
767
|
+
self._artifacts[derivative.artifact_id] = derivative
|
|
768
|
+
self._extractions[derivative.artifact_id] = self._extraction_for_artifact(
|
|
769
|
+
source,
|
|
770
|
+
request.correlation_id,
|
|
771
|
+
)
|
|
772
|
+
return DocumentToolResult(
|
|
773
|
+
tool_id="document_copy_for_edit",
|
|
774
|
+
correlation_id=request.correlation_id,
|
|
775
|
+
status=ToolResultStatus.ok,
|
|
776
|
+
artifact_refs=[source.artifact_id, derivative.artifact_id],
|
|
777
|
+
text_summary="Created a local working copy for document editing.",
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
def _copy_source_for_edit_as_derivative(
|
|
781
|
+
self,
|
|
782
|
+
source: DocumentArtifact,
|
|
783
|
+
request: DocumentCopyForEditRequest,
|
|
784
|
+
*,
|
|
785
|
+
derivative_format: DocumentFormat,
|
|
786
|
+
) -> DocumentToolResult:
|
|
787
|
+
source_label, derivative_label = _conversion_labels(source.format, derivative_format)
|
|
788
|
+
try:
|
|
789
|
+
engine = self.conversion_registry.require(source.format, derivative_format)
|
|
790
|
+
except UnsupportedDocumentConversionError:
|
|
791
|
+
return unsupported_document_tool_result(
|
|
792
|
+
tool_id="document_copy_for_edit",
|
|
793
|
+
correlation_id=request.correlation_id,
|
|
794
|
+
artifact_refs=(source.artifact_id,),
|
|
795
|
+
message=_conversion_missing_message(
|
|
796
|
+
source_format=source.format,
|
|
797
|
+
derivative_format=derivative_format,
|
|
798
|
+
source_label=source_label,
|
|
799
|
+
derivative_label=derivative_label,
|
|
800
|
+
),
|
|
801
|
+
)
|
|
802
|
+
artifact_id = _generated_artifact_id("working", request.correlation_id)
|
|
803
|
+
try:
|
|
804
|
+
payload = engine.convert_for_edit(source)
|
|
805
|
+
except ValueError as exc:
|
|
806
|
+
return unsupported_document_tool_result(
|
|
807
|
+
tool_id="document_copy_for_edit",
|
|
808
|
+
correlation_id=request.correlation_id,
|
|
809
|
+
artifact_refs=(source.artifact_id,),
|
|
810
|
+
message=f"{source_label} to {derivative_label} conversion failed validation: {exc}",
|
|
811
|
+
reason=BlockedReason.validation_failed,
|
|
812
|
+
)
|
|
813
|
+
derivative = self.store.write_derivative(
|
|
814
|
+
source,
|
|
815
|
+
artifact_id=artifact_id,
|
|
816
|
+
lineage=ArtifactLineage.working_copy,
|
|
817
|
+
destination_name=f"{artifact_id}.{derivative_format.value}",
|
|
818
|
+
payload=payload,
|
|
819
|
+
document_format=derivative_format,
|
|
820
|
+
mime_type=_mime_for_format(derivative_format),
|
|
821
|
+
expanded_byte_size=len(payload),
|
|
822
|
+
)
|
|
823
|
+
self._artifacts[derivative.artifact_id] = derivative
|
|
824
|
+
self._extractions[derivative.artifact_id] = self._extraction_for_artifact(
|
|
825
|
+
derivative,
|
|
826
|
+
request.correlation_id,
|
|
827
|
+
)
|
|
828
|
+
return DocumentToolResult(
|
|
829
|
+
tool_id="document_copy_for_edit",
|
|
830
|
+
correlation_id=request.correlation_id,
|
|
831
|
+
status=ToolResultStatus.ok,
|
|
832
|
+
artifact_refs=[source.artifact_id, derivative.artifact_id],
|
|
833
|
+
text_summary=(
|
|
834
|
+
f"Converted {source_label} to editable {derivative_label} derivative "
|
|
835
|
+
"for document editing "
|
|
836
|
+
f"through {engine.engine_id}."
|
|
837
|
+
),
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
def apply_fill(self, request: DocumentApplyFillRequest) -> DocumentToolResult:
|
|
841
|
+
"""Apply value patches to a working derivative."""
|
|
842
|
+
working = self._resolve_artifact_for_write(
|
|
843
|
+
request.document,
|
|
844
|
+
request.correlation_id,
|
|
845
|
+
tool_id="document_apply_fill",
|
|
846
|
+
)
|
|
847
|
+
if isinstance(working, DocumentToolResult):
|
|
848
|
+
return working
|
|
849
|
+
extraction = self._extraction_for_artifact(working, request.correlation_id)
|
|
850
|
+
normalized_request = request.model_copy(
|
|
851
|
+
update={
|
|
852
|
+
"patches": self.adapter_registry.require_promoted(
|
|
853
|
+
working.format
|
|
854
|
+
).normalize_fill_patches(
|
|
855
|
+
request.patches,
|
|
856
|
+
extraction=extraction,
|
|
857
|
+
)
|
|
858
|
+
}
|
|
859
|
+
)
|
|
860
|
+
patch = _fill_patch(normalized_request, working)
|
|
861
|
+
return self._apply_patch_result(
|
|
862
|
+
tool_id="document_apply_fill",
|
|
863
|
+
correlation_id=request.correlation_id,
|
|
864
|
+
working=working,
|
|
865
|
+
patch=patch,
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
def apply_style(self, request: DocumentApplyStyleRequest) -> DocumentToolResult:
|
|
869
|
+
"""Apply style patches to a working derivative."""
|
|
870
|
+
working = self._resolve_artifact_for_write(
|
|
871
|
+
request.document,
|
|
872
|
+
request.correlation_id,
|
|
873
|
+
tool_id="document_apply_style",
|
|
874
|
+
)
|
|
875
|
+
if isinstance(working, DocumentToolResult):
|
|
876
|
+
return working
|
|
877
|
+
patch = _style_patch(request, working)
|
|
878
|
+
return self._apply_patch_result(
|
|
879
|
+
tool_id="document_apply_style",
|
|
880
|
+
correlation_id=request.correlation_id,
|
|
881
|
+
working=working,
|
|
882
|
+
patch=patch,
|
|
883
|
+
)
|
|
884
|
+
|
|
885
|
+
def render(self, request: DocumentRenderRequest) -> DocumentToolResult:
|
|
886
|
+
"""Render local evidence for a document derivative."""
|
|
887
|
+
artifact = self._resolve_artifact_for_read(
|
|
888
|
+
request.document,
|
|
889
|
+
request.correlation_id,
|
|
890
|
+
tool_id="document_render",
|
|
891
|
+
)
|
|
892
|
+
if isinstance(artifact, DocumentToolResult):
|
|
893
|
+
return artifact
|
|
894
|
+
try:
|
|
895
|
+
diff = self._diff_for_artifact(artifact.artifact_id)
|
|
896
|
+
except ArtifactStoreError as exc:
|
|
897
|
+
return DocumentToolResult(
|
|
898
|
+
tool_id="document_diff_lookup",
|
|
899
|
+
correlation_id=request.correlation_id,
|
|
900
|
+
status=ToolResultStatus.failed,
|
|
901
|
+
artifact_refs=[artifact.artifact_id],
|
|
902
|
+
text_summary=f"Document diff metadata failed validation: {exc}",
|
|
903
|
+
)
|
|
904
|
+
baseline_artifact: DocumentArtifact | None = None
|
|
905
|
+
if diff is not None:
|
|
906
|
+
baseline = self._artifact_by_id(diff.source_artifact_id, request.correlation_id)
|
|
907
|
+
if isinstance(baseline, DocumentToolResult):
|
|
908
|
+
return baseline
|
|
909
|
+
baseline_artifact = baseline
|
|
910
|
+
render_result = render_document_evidence(
|
|
911
|
+
self.store,
|
|
912
|
+
artifact,
|
|
913
|
+
engine_registry=self.engine_registry,
|
|
914
|
+
correlation_id=request.correlation_id,
|
|
915
|
+
artifact_id_prefix=_generated_artifact_id("render", request.correlation_id),
|
|
916
|
+
diff=diff,
|
|
917
|
+
baseline_artifact=baseline_artifact,
|
|
918
|
+
)
|
|
919
|
+
if diff is not None and render_result.records:
|
|
920
|
+
diff = diff.model_copy(
|
|
921
|
+
update={
|
|
922
|
+
"render_artifacts": render_result.records,
|
|
923
|
+
"baseline_render_artifacts": render_result.baseline_records,
|
|
924
|
+
"changed_viewports": render_result.changed_viewports,
|
|
925
|
+
"viewport_cameras": render_result.viewport_cameras,
|
|
926
|
+
}
|
|
927
|
+
)
|
|
928
|
+
self._diffs_by_artifact_id[artifact.artifact_id] = diff
|
|
929
|
+
self.store.store_diff(diff)
|
|
930
|
+
blocked_reason = render_result.blocked_reason
|
|
931
|
+
return _with_workflow_steps(
|
|
932
|
+
DocumentToolResult(
|
|
933
|
+
tool_id="document_render",
|
|
934
|
+
correlation_id=request.correlation_id,
|
|
935
|
+
status=render_result.status,
|
|
936
|
+
artifact_refs=[artifact.artifact_id, *render_result.artifact_refs],
|
|
937
|
+
promotion_gate_result=render_result.promotion_gate_result,
|
|
938
|
+
diff=diff,
|
|
939
|
+
render_artifacts=render_result.records,
|
|
940
|
+
text_summary=render_result.text_summary,
|
|
941
|
+
blocked_reason=blocked_reason,
|
|
942
|
+
),
|
|
943
|
+
artifacts=self._artifacts,
|
|
944
|
+
render_records=render_result.records,
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
def validate_public_form(
|
|
948
|
+
self,
|
|
949
|
+
request: DocumentValidatePublicFormRequest,
|
|
950
|
+
) -> DocumentToolResult:
|
|
951
|
+
"""Validate one derivative against the offline conformance baseline catalog."""
|
|
952
|
+
artifact = self._resolve_artifact_for_read(
|
|
953
|
+
request.document,
|
|
954
|
+
request.correlation_id,
|
|
955
|
+
tool_id="document_validate_public_form",
|
|
956
|
+
)
|
|
957
|
+
if isinstance(artifact, DocumentToolResult):
|
|
958
|
+
return artifact
|
|
959
|
+
try:
|
|
960
|
+
baseline = self.baseline_catalog.by_template_id(request.template_id)
|
|
961
|
+
except KeyError:
|
|
962
|
+
return unsupported_document_tool_result(
|
|
963
|
+
tool_id="document_validate_public_form",
|
|
964
|
+
correlation_id=request.correlation_id,
|
|
965
|
+
artifact_refs=(artifact.artifact_id,),
|
|
966
|
+
message=f"Unknown public-form baseline: {request.template_id}.",
|
|
967
|
+
reason=BlockedReason.validation_failed,
|
|
968
|
+
)
|
|
969
|
+
extraction = self._extraction_for_artifact(artifact, request.correlation_id)
|
|
970
|
+
return validate_public_form(
|
|
971
|
+
extraction,
|
|
972
|
+
baseline=baseline,
|
|
973
|
+
artifact_id=artifact.artifact_id,
|
|
974
|
+
correlation_id=request.correlation_id,
|
|
975
|
+
)
|
|
976
|
+
|
|
977
|
+
def save(self, request: DocumentSaveRequest) -> DocumentToolResult:
|
|
978
|
+
"""Persist a reviewed derivative as an export artifact."""
|
|
979
|
+
artifact = self._resolve_artifact_for_write(
|
|
980
|
+
request.document,
|
|
981
|
+
request.correlation_id,
|
|
982
|
+
tool_id="document_save",
|
|
983
|
+
)
|
|
984
|
+
if isinstance(artifact, DocumentToolResult):
|
|
985
|
+
return artifact
|
|
986
|
+
pdfa_export_requested = _pdfa_export_requested(
|
|
987
|
+
artifact,
|
|
988
|
+
destination_display_name=request.destination_display_name,
|
|
989
|
+
destination_path=request.destination_path,
|
|
990
|
+
)
|
|
991
|
+
blocked_destination = _blocked_local_export_destination_result(
|
|
992
|
+
request,
|
|
993
|
+
artifact,
|
|
994
|
+
allow_pdfa_alias=pdfa_export_requested,
|
|
995
|
+
)
|
|
996
|
+
if blocked_destination is not None:
|
|
997
|
+
return blocked_destination
|
|
998
|
+
payload = Path(artifact.source_path).read_bytes()
|
|
999
|
+
pdfa_summary: str | None = None
|
|
1000
|
+
if pdfa_export_requested:
|
|
1001
|
+
if artifact.format is not DocumentFormat.pdf:
|
|
1002
|
+
return unsupported_document_tool_result(
|
|
1003
|
+
tool_id="document_save",
|
|
1004
|
+
correlation_id=request.correlation_id,
|
|
1005
|
+
artifact_refs=(artifact.artifact_id,),
|
|
1006
|
+
message="PDF/A export is only available for PDF derivatives.",
|
|
1007
|
+
reason=BlockedReason.extension_mismatch,
|
|
1008
|
+
)
|
|
1009
|
+
if self.pdfa_conformance_bridge is None:
|
|
1010
|
+
return unsupported_document_tool_result(
|
|
1011
|
+
tool_id="document_save",
|
|
1012
|
+
correlation_id=request.correlation_id,
|
|
1013
|
+
artifact_refs=(artifact.artifact_id,),
|
|
1014
|
+
message=(
|
|
1015
|
+
"PDF/A export requires a local Ghostscript PDF/A exporter "
|
|
1016
|
+
"and veraPDF post-write validator."
|
|
1017
|
+
),
|
|
1018
|
+
reason=BlockedReason.validation_failed,
|
|
1019
|
+
)
|
|
1020
|
+
try:
|
|
1021
|
+
pdfa_result = self.pdfa_conformance_bridge.export_pdfa(payload)
|
|
1022
|
+
except PdfaConformanceBridgeError as exc:
|
|
1023
|
+
return unsupported_document_tool_result(
|
|
1024
|
+
tool_id="document_save",
|
|
1025
|
+
correlation_id=request.correlation_id,
|
|
1026
|
+
artifact_refs=(artifact.artifact_id,),
|
|
1027
|
+
message=f"PDF/A post-write conformance gate failed: {exc}",
|
|
1028
|
+
reason=BlockedReason.validation_failed,
|
|
1029
|
+
)
|
|
1030
|
+
payload = pdfa_result.payload
|
|
1031
|
+
pdfa_summary = (
|
|
1032
|
+
" PDF/A post-write conformance passed through "
|
|
1033
|
+
f"{pdfa_result.report.exporter_id} and "
|
|
1034
|
+
f"{pdfa_result.report.validator_id}."
|
|
1035
|
+
)
|
|
1036
|
+
export_artifact_id = _generated_artifact_id("export", request.correlation_id)
|
|
1037
|
+
saved_exports: tuple[DocumentSavedExport, ...] = ()
|
|
1038
|
+
if request.destination_path is not None:
|
|
1039
|
+
try:
|
|
1040
|
+
saved_exports = (
|
|
1041
|
+
_write_explicit_local_export(
|
|
1042
|
+
artifact,
|
|
1043
|
+
export_artifact_id=export_artifact_id,
|
|
1044
|
+
payload=payload,
|
|
1045
|
+
destination_path=request.destination_path,
|
|
1046
|
+
allow_pdfa_alias=pdfa_export_requested,
|
|
1047
|
+
),
|
|
1048
|
+
)
|
|
1049
|
+
except _LocalExportBlockedError as exc:
|
|
1050
|
+
return unsupported_document_tool_result(
|
|
1051
|
+
tool_id="document_save",
|
|
1052
|
+
correlation_id=request.correlation_id,
|
|
1053
|
+
artifact_refs=(artifact.artifact_id,),
|
|
1054
|
+
message=str(exc),
|
|
1055
|
+
reason=exc.reason,
|
|
1056
|
+
)
|
|
1057
|
+
export_artifact = self.store.write_derivative(
|
|
1058
|
+
artifact,
|
|
1059
|
+
artifact_id=export_artifact_id,
|
|
1060
|
+
lineage=ArtifactLineage.export,
|
|
1061
|
+
destination_name=request.destination_display_name,
|
|
1062
|
+
payload=payload,
|
|
1063
|
+
document_format=artifact.format,
|
|
1064
|
+
mime_type=artifact.mime_type,
|
|
1065
|
+
expanded_byte_size=len(payload),
|
|
1066
|
+
)
|
|
1067
|
+
self._artifacts[export_artifact.artifact_id] = export_artifact
|
|
1068
|
+
self._extractions[export_artifact.artifact_id] = self._extraction_for_artifact(
|
|
1069
|
+
export_artifact,
|
|
1070
|
+
request.correlation_id,
|
|
1071
|
+
)
|
|
1072
|
+
try:
|
|
1073
|
+
diff = self._diff_for_artifact(artifact.artifact_id)
|
|
1074
|
+
except ArtifactStoreError as exc:
|
|
1075
|
+
return DocumentToolResult(
|
|
1076
|
+
tool_id="document_diff_lookup",
|
|
1077
|
+
correlation_id=request.correlation_id,
|
|
1078
|
+
status=ToolResultStatus.failed,
|
|
1079
|
+
artifact_refs=[artifact.artifact_id],
|
|
1080
|
+
text_summary=f"Document diff metadata failed validation: {exc}",
|
|
1081
|
+
)
|
|
1082
|
+
return _with_workflow_steps(
|
|
1083
|
+
DocumentToolResult(
|
|
1084
|
+
tool_id="document_save",
|
|
1085
|
+
correlation_id=request.correlation_id,
|
|
1086
|
+
status=ToolResultStatus.ok,
|
|
1087
|
+
artifact_refs=[artifact.artifact_id, export_artifact.artifact_id],
|
|
1088
|
+
diff=diff,
|
|
1089
|
+
saved_exports=saved_exports,
|
|
1090
|
+
text_summary=(
|
|
1091
|
+
"Saved local export artifact for human review or external handoff."
|
|
1092
|
+
+ (pdfa_summary or "")
|
|
1093
|
+
),
|
|
1094
|
+
),
|
|
1095
|
+
artifacts=self._artifacts,
|
|
1096
|
+
)
|
|
1097
|
+
|
|
1098
|
+
def _apply_patch_result(
|
|
1099
|
+
self,
|
|
1100
|
+
*,
|
|
1101
|
+
tool_id: str,
|
|
1102
|
+
correlation_id: str,
|
|
1103
|
+
working: DocumentArtifact,
|
|
1104
|
+
patch: DocumentPatch,
|
|
1105
|
+
) -> DocumentToolResult:
|
|
1106
|
+
derivative_artifact_id = _generated_artifact_id("derivative", correlation_id)
|
|
1107
|
+
result = apply_document_patch(
|
|
1108
|
+
self.store,
|
|
1109
|
+
working,
|
|
1110
|
+
patch,
|
|
1111
|
+
engine_registry=self.engine_registry,
|
|
1112
|
+
artifact_id=derivative_artifact_id,
|
|
1113
|
+
destination_name=f"{derivative_artifact_id}.{working.format.value}",
|
|
1114
|
+
)
|
|
1115
|
+
if result.status is not ToolResultStatus.ok or result.derivative_artifact is None:
|
|
1116
|
+
return _with_workflow_steps(
|
|
1117
|
+
DocumentToolResult(
|
|
1118
|
+
tool_id=tool_id,
|
|
1119
|
+
correlation_id=correlation_id,
|
|
1120
|
+
status=result.status,
|
|
1121
|
+
artifact_refs=[working.artifact_id],
|
|
1122
|
+
text_summary=result.text_summary,
|
|
1123
|
+
blocked_reason=result.blocked_reason or BlockedReason.unsupported_operation,
|
|
1124
|
+
)
|
|
1125
|
+
)
|
|
1126
|
+
self._artifacts[result.derivative_artifact.artifact_id] = result.derivative_artifact
|
|
1127
|
+
self._extractions[result.derivative_artifact.artifact_id] = self._extraction_for_artifact(
|
|
1128
|
+
result.derivative_artifact,
|
|
1129
|
+
correlation_id,
|
|
1130
|
+
)
|
|
1131
|
+
if result.diff is not None:
|
|
1132
|
+
self._diffs_by_artifact_id[result.derivative_artifact.artifact_id] = result.diff
|
|
1133
|
+
self.store.store_diff(result.diff)
|
|
1134
|
+
return _with_workflow_steps(
|
|
1135
|
+
DocumentToolResult(
|
|
1136
|
+
tool_id=tool_id,
|
|
1137
|
+
correlation_id=correlation_id,
|
|
1138
|
+
status=ToolResultStatus.ok,
|
|
1139
|
+
artifact_refs=[working.artifact_id, result.derivative_artifact.artifact_id],
|
|
1140
|
+
diff=result.diff,
|
|
1141
|
+
text_summary=result.text_summary,
|
|
1142
|
+
),
|
|
1143
|
+
artifacts=self._artifacts,
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
def _resolve_artifact_for_read(
|
|
1147
|
+
self,
|
|
1148
|
+
document: Any,
|
|
1149
|
+
correlation_id: str,
|
|
1150
|
+
*,
|
|
1151
|
+
tool_id: str,
|
|
1152
|
+
) -> DocumentArtifact | DocumentToolResult:
|
|
1153
|
+
locator_guard = self._ambiguous_locator_result(
|
|
1154
|
+
document,
|
|
1155
|
+
correlation_id,
|
|
1156
|
+
tool_id=tool_id,
|
|
1157
|
+
)
|
|
1158
|
+
if locator_guard is not None:
|
|
1159
|
+
return locator_guard
|
|
1160
|
+
|
|
1161
|
+
if (
|
|
1162
|
+
tool_id in _ARTIFACT_ID_REQUIRED_TOOL_IDS
|
|
1163
|
+
and document.artifact_id is None
|
|
1164
|
+
and document.path is not None
|
|
1165
|
+
):
|
|
1166
|
+
return needs_input_document_tool_result(
|
|
1167
|
+
tool_id=tool_id,
|
|
1168
|
+
correlation_id=correlation_id,
|
|
1169
|
+
message=(
|
|
1170
|
+
"Call document_inspect first and pass the returned artifact_id "
|
|
1171
|
+
f"before {tool_id}."
|
|
1172
|
+
),
|
|
1173
|
+
)
|
|
1174
|
+
|
|
1175
|
+
if document.artifact_id is not None:
|
|
1176
|
+
return self._artifact_by_id(document.artifact_id, correlation_id)
|
|
1177
|
+
result = self.inspect(
|
|
1178
|
+
DocumentInspectRequest(correlation_id=correlation_id, document=document)
|
|
1179
|
+
)
|
|
1180
|
+
if result.status is not ToolResultStatus.ok or not result.artifact_refs:
|
|
1181
|
+
return result
|
|
1182
|
+
return self._artifact_by_id(result.artifact_refs[0], correlation_id)
|
|
1183
|
+
|
|
1184
|
+
def _resolve_artifact_for_write(
|
|
1185
|
+
self,
|
|
1186
|
+
document: Any,
|
|
1187
|
+
correlation_id: str,
|
|
1188
|
+
*,
|
|
1189
|
+
tool_id: str,
|
|
1190
|
+
) -> DocumentArtifact | DocumentToolResult:
|
|
1191
|
+
artifact = self._resolve_artifact_for_read(
|
|
1192
|
+
document,
|
|
1193
|
+
correlation_id,
|
|
1194
|
+
tool_id=tool_id,
|
|
1195
|
+
)
|
|
1196
|
+
if isinstance(artifact, DocumentToolResult):
|
|
1197
|
+
return artifact
|
|
1198
|
+
if artifact.lineage is ArtifactLineage.source:
|
|
1199
|
+
return unsupported_document_tool_result(
|
|
1200
|
+
tool_id="document_write_boundary",
|
|
1201
|
+
correlation_id=correlation_id,
|
|
1202
|
+
artifact_refs=(artifact.artifact_id,),
|
|
1203
|
+
message="Document writes require a working copy created by document_copy_for_edit.",
|
|
1204
|
+
reason=BlockedReason.permission_denied,
|
|
1205
|
+
)
|
|
1206
|
+
return artifact
|
|
1207
|
+
|
|
1208
|
+
def _ambiguous_locator_result(
|
|
1209
|
+
self,
|
|
1210
|
+
document: Any,
|
|
1211
|
+
correlation_id: str,
|
|
1212
|
+
*,
|
|
1213
|
+
tool_id: str,
|
|
1214
|
+
) -> DocumentToolResult | None:
|
|
1215
|
+
artifact_id = getattr(document, "artifact_id", None)
|
|
1216
|
+
path = getattr(document, "path", None)
|
|
1217
|
+
if artifact_id is None or path is None:
|
|
1218
|
+
return None
|
|
1219
|
+
return needs_input_document_tool_result(
|
|
1220
|
+
tool_id=tool_id,
|
|
1221
|
+
correlation_id=correlation_id,
|
|
1222
|
+
artifact_refs=(artifact_id,),
|
|
1223
|
+
message=(
|
|
1224
|
+
"Document locator is ambiguous: pass artifact_id for an existing "
|
|
1225
|
+
"local artifact or path for first inspection, not both."
|
|
1226
|
+
),
|
|
1227
|
+
)
|
|
1228
|
+
|
|
1229
|
+
def _artifact_by_id(
|
|
1230
|
+
self,
|
|
1231
|
+
artifact_id: str | None,
|
|
1232
|
+
correlation_id: str,
|
|
1233
|
+
) -> DocumentArtifact | DocumentToolResult:
|
|
1234
|
+
if artifact_id is None:
|
|
1235
|
+
return needs_input_document_tool_result(
|
|
1236
|
+
tool_id="document_artifact_lookup",
|
|
1237
|
+
correlation_id=correlation_id,
|
|
1238
|
+
message="artifact_id is required for this document tool call.",
|
|
1239
|
+
)
|
|
1240
|
+
artifact = self._artifacts.get(artifact_id)
|
|
1241
|
+
if artifact is None:
|
|
1242
|
+
try:
|
|
1243
|
+
artifact = self.store.load_artifact(artifact_id)
|
|
1244
|
+
except ArtifactStoreError as exc:
|
|
1245
|
+
return DocumentToolResult(
|
|
1246
|
+
tool_id="document_artifact_lookup",
|
|
1247
|
+
correlation_id=correlation_id,
|
|
1248
|
+
status=ToolResultStatus.failed,
|
|
1249
|
+
artifact_refs=[artifact_id],
|
|
1250
|
+
text_summary=f"Document artifact metadata failed validation: {exc}",
|
|
1251
|
+
)
|
|
1252
|
+
if artifact is None:
|
|
1253
|
+
return needs_input_document_tool_result(
|
|
1254
|
+
tool_id="document_artifact_lookup",
|
|
1255
|
+
correlation_id=correlation_id,
|
|
1256
|
+
artifact_refs=(artifact_id,),
|
|
1257
|
+
message=f"Unknown local document artifact: {artifact_id}.",
|
|
1258
|
+
)
|
|
1259
|
+
self._artifacts[artifact.artifact_id] = artifact
|
|
1260
|
+
return artifact
|
|
1261
|
+
|
|
1262
|
+
def _diff_for_artifact(self, artifact_id: str) -> DocumentDiff | None:
|
|
1263
|
+
diff = self._diffs_by_artifact_id.get(artifact_id)
|
|
1264
|
+
if diff is not None:
|
|
1265
|
+
return diff
|
|
1266
|
+
diff = self.store.load_diff(artifact_id)
|
|
1267
|
+
if diff is not None:
|
|
1268
|
+
self._diffs_by_artifact_id[artifact_id] = diff
|
|
1269
|
+
return diff
|
|
1270
|
+
|
|
1271
|
+
def _extraction_for_artifact(
|
|
1272
|
+
self,
|
|
1273
|
+
artifact: DocumentArtifact,
|
|
1274
|
+
correlation_id: str,
|
|
1275
|
+
) -> DocumentExtraction:
|
|
1276
|
+
extraction = self._extractions.get(artifact.artifact_id)
|
|
1277
|
+
if extraction is not None:
|
|
1278
|
+
return extraction
|
|
1279
|
+
adapter = self.adapter_registry.require_promoted(artifact.format)
|
|
1280
|
+
extraction = adapter.inspect(Path(artifact.source_path), artifact_id=correlation_id)
|
|
1281
|
+
self._extractions[artifact.artifact_id] = extraction
|
|
1282
|
+
return extraction
|
|
1283
|
+
|
|
1284
|
+
def _existing_source_artifact_for_path(
|
|
1285
|
+
self,
|
|
1286
|
+
*,
|
|
1287
|
+
artifact_id: str,
|
|
1288
|
+
source_path: Path,
|
|
1289
|
+
document_format: DocumentFormat,
|
|
1290
|
+
) -> DocumentArtifact | None:
|
|
1291
|
+
artifact = self._artifacts.get(artifact_id)
|
|
1292
|
+
if artifact is None:
|
|
1293
|
+
artifact = self.store.load_artifact(artifact_id)
|
|
1294
|
+
if artifact is None:
|
|
1295
|
+
return None
|
|
1296
|
+
source = source_path.expanduser().resolve()
|
|
1297
|
+
source_sha256 = hashlib.sha256(source.read_bytes()).hexdigest()
|
|
1298
|
+
if (
|
|
1299
|
+
artifact.lineage is ArtifactLineage.source
|
|
1300
|
+
and artifact.format is document_format
|
|
1301
|
+
and artifact.display_name == source.name
|
|
1302
|
+
and artifact.sha256 == source_sha256
|
|
1303
|
+
):
|
|
1304
|
+
self._artifacts[artifact.artifact_id] = artifact
|
|
1305
|
+
return artifact
|
|
1306
|
+
return None
|
|
1307
|
+
|
|
1308
|
+
def _source_artifact_for_inspected_path(
|
|
1309
|
+
self,
|
|
1310
|
+
*,
|
|
1311
|
+
artifact_id: str,
|
|
1312
|
+
source_path: Path,
|
|
1313
|
+
document_format: DocumentFormat,
|
|
1314
|
+
correlation_id: str,
|
|
1315
|
+
) -> DocumentArtifact | DocumentToolResult:
|
|
1316
|
+
existing = self._existing_source_artifact_for_path(
|
|
1317
|
+
artifact_id=artifact_id,
|
|
1318
|
+
source_path=source_path,
|
|
1319
|
+
document_format=document_format,
|
|
1320
|
+
)
|
|
1321
|
+
if existing is not None:
|
|
1322
|
+
return existing
|
|
1323
|
+
try:
|
|
1324
|
+
return self.store.store_source(
|
|
1325
|
+
source_path,
|
|
1326
|
+
artifact_id=artifact_id,
|
|
1327
|
+
document_format=document_format,
|
|
1328
|
+
mime_type=_mime_for_format(document_format),
|
|
1329
|
+
)
|
|
1330
|
+
except ArtifactStoreError as exc:
|
|
1331
|
+
return DocumentToolResult(
|
|
1332
|
+
tool_id="document_inspect",
|
|
1333
|
+
correlation_id=correlation_id,
|
|
1334
|
+
status=ToolResultStatus.failed,
|
|
1335
|
+
artifact_refs=[artifact_id],
|
|
1336
|
+
text_summary=f"Document source artifact storage failed: {exc}",
|
|
1337
|
+
blocked_reason=BlockedReason.validation_failed,
|
|
1338
|
+
)
|
|
1339
|
+
|
|
1340
|
+
def _with_runtime_workflow_steps(self, result: DocumentToolResult) -> DocumentToolResult:
|
|
1341
|
+
return _with_workflow_steps(result, artifacts=self._artifacts)
|
|
1342
|
+
|
|
1343
|
+
def _attachment_context_derivative_result(
|
|
1344
|
+
self,
|
|
1345
|
+
request: DocumentPrimitiveRequest,
|
|
1346
|
+
source_result: DocumentToolResult,
|
|
1347
|
+
) -> DocumentToolResult:
|
|
1348
|
+
extraction = source_result.extraction
|
|
1349
|
+
if extraction is None or request.document.path is None:
|
|
1350
|
+
return unsupported_document_tool_result(
|
|
1351
|
+
tool_id="document",
|
|
1352
|
+
correlation_id=request.correlation_id,
|
|
1353
|
+
artifact_refs=tuple(source_result.artifact_refs),
|
|
1354
|
+
message=(
|
|
1355
|
+
"Attachment-context derivative creation requires a local source path "
|
|
1356
|
+
"with extraction metadata."
|
|
1357
|
+
),
|
|
1358
|
+
reason=BlockedReason.unsupported_operation,
|
|
1359
|
+
)
|
|
1360
|
+
source_path = Path(request.document.path).expanduser().resolve()
|
|
1361
|
+
payload = _attachment_context_markdown_payload(
|
|
1362
|
+
extraction,
|
|
1363
|
+
source_path=source_path,
|
|
1364
|
+
instruction=request.instruction,
|
|
1365
|
+
).encode("utf-8")
|
|
1366
|
+
source_artifact_id = _generated_artifact_id(
|
|
1367
|
+
"source",
|
|
1368
|
+
f"{request.correlation_id}-attachment-context",
|
|
1369
|
+
)
|
|
1370
|
+
source_display_name = _attachment_context_display_name(source_path)
|
|
1371
|
+
with tempfile.TemporaryDirectory(prefix="ummaya-attachment-context-") as raw_temp_dir:
|
|
1372
|
+
generated_path = Path(raw_temp_dir) / source_display_name
|
|
1373
|
+
generated_path.write_bytes(payload)
|
|
1374
|
+
generated_source = self.store.store_source(
|
|
1375
|
+
generated_path,
|
|
1376
|
+
artifact_id=source_artifact_id,
|
|
1377
|
+
document_format=DocumentFormat.md,
|
|
1378
|
+
mime_type=_mime_for_format(DocumentFormat.md),
|
|
1379
|
+
display_name=source_display_name,
|
|
1380
|
+
)
|
|
1381
|
+
self._artifacts[generated_source.artifact_id] = generated_source
|
|
1382
|
+
self._extractions[generated_source.artifact_id] = self._extraction_for_artifact(
|
|
1383
|
+
generated_source,
|
|
1384
|
+
request.correlation_id,
|
|
1385
|
+
)
|
|
1386
|
+
|
|
1387
|
+
copy_result = self.copy_for_edit(
|
|
1388
|
+
DocumentCopyForEditRequest(
|
|
1389
|
+
correlation_id=request.correlation_id,
|
|
1390
|
+
document=DocumentLocator(artifact_id=generated_source.artifact_id),
|
|
1391
|
+
reason=_copy_for_edit_reason(request.instruction),
|
|
1392
|
+
)
|
|
1393
|
+
)
|
|
1394
|
+
if copy_result.status is not ToolResultStatus.ok:
|
|
1395
|
+
return _document_result_from_stage(copy_result, correlation_id=request.correlation_id)
|
|
1396
|
+
working_artifact_id = copy_result.artifact_refs[-1]
|
|
1397
|
+
|
|
1398
|
+
render_result = self.render(
|
|
1399
|
+
DocumentRenderRequest(
|
|
1400
|
+
correlation_id=request.correlation_id,
|
|
1401
|
+
document=DocumentLocator(artifact_id=working_artifact_id),
|
|
1402
|
+
)
|
|
1403
|
+
)
|
|
1404
|
+
if render_result.status is not ToolResultStatus.ok:
|
|
1405
|
+
return _document_result_from_stage(render_result, correlation_id=request.correlation_id)
|
|
1406
|
+
|
|
1407
|
+
result = render_result.model_copy(
|
|
1408
|
+
update={
|
|
1409
|
+
"tool_id": "document",
|
|
1410
|
+
"artifact_refs": _unique_artifact_refs(
|
|
1411
|
+
[
|
|
1412
|
+
generated_source.artifact_id,
|
|
1413
|
+
working_artifact_id,
|
|
1414
|
+
*render_result.artifact_refs,
|
|
1415
|
+
]
|
|
1416
|
+
),
|
|
1417
|
+
"extraction": extraction,
|
|
1418
|
+
"text_summary": (
|
|
1419
|
+
"Attachment context derivative document created with local render evidence."
|
|
1420
|
+
),
|
|
1421
|
+
}
|
|
1422
|
+
)
|
|
1423
|
+
|
|
1424
|
+
if (
|
|
1425
|
+
request.operation == "save"
|
|
1426
|
+
or request.destination_display_name is not None
|
|
1427
|
+
or request.destination_path is not None
|
|
1428
|
+
):
|
|
1429
|
+
destination_display_name = request.destination_display_name
|
|
1430
|
+
if destination_display_name is None and request.destination_path is not None:
|
|
1431
|
+
destination_display_name = Path(request.destination_path).name
|
|
1432
|
+
if destination_display_name is None:
|
|
1433
|
+
destination_display_name = source_display_name
|
|
1434
|
+
save_result = self.save(
|
|
1435
|
+
DocumentSaveRequest(
|
|
1436
|
+
correlation_id=request.correlation_id,
|
|
1437
|
+
document=DocumentLocator(artifact_id=working_artifact_id),
|
|
1438
|
+
destination_display_name=destination_display_name,
|
|
1439
|
+
destination_path=request.destination_path,
|
|
1440
|
+
)
|
|
1441
|
+
)
|
|
1442
|
+
if save_result.status is not ToolResultStatus.ok:
|
|
1443
|
+
return _document_result_from_stage(
|
|
1444
|
+
save_result,
|
|
1445
|
+
correlation_id=request.correlation_id,
|
|
1446
|
+
)
|
|
1447
|
+
result = result.model_copy(
|
|
1448
|
+
update={
|
|
1449
|
+
"artifact_refs": _unique_artifact_refs(
|
|
1450
|
+
[*result.artifact_refs, *save_result.artifact_refs]
|
|
1451
|
+
),
|
|
1452
|
+
"saved_exports": save_result.saved_exports,
|
|
1453
|
+
"workflow_steps": _merge_save_workflow_steps(
|
|
1454
|
+
result.workflow_steps,
|
|
1455
|
+
save_result.workflow_steps,
|
|
1456
|
+
),
|
|
1457
|
+
}
|
|
1458
|
+
)
|
|
1459
|
+
return result
|
|
1460
|
+
|
|
1461
|
+
|
|
1462
|
+
class _SessionDocumentRuntimePool:
|
|
1463
|
+
"""Lazily allocate one document runtime per caller session."""
|
|
1464
|
+
|
|
1465
|
+
def __init__(
|
|
1466
|
+
self,
|
|
1467
|
+
*,
|
|
1468
|
+
artifact_root: str | Path | None = None,
|
|
1469
|
+
engine_registry: DocumentEngineRegistry | None = None,
|
|
1470
|
+
adapter_registry: DocumentAdapterRegistry | None = None,
|
|
1471
|
+
conversion_registry: DocumentConversionRegistry | None = None,
|
|
1472
|
+
baseline_catalog: ConformanceBaselineCatalog | None = None,
|
|
1473
|
+
) -> None:
|
|
1474
|
+
self._artifact_root = artifact_root
|
|
1475
|
+
self._engine_registry = engine_registry
|
|
1476
|
+
self._adapter_registry = adapter_registry
|
|
1477
|
+
self._conversion_registry = conversion_registry
|
|
1478
|
+
self._baseline_catalog = baseline_catalog
|
|
1479
|
+
self._runtimes: dict[str, DocumentToolRuntime] = {}
|
|
1480
|
+
|
|
1481
|
+
def runtime_for(self, session_identity: object | None) -> DocumentToolRuntime:
|
|
1482
|
+
session_id = _runtime_session_id(session_identity)
|
|
1483
|
+
runtime = self._runtimes.get(session_id)
|
|
1484
|
+
if runtime is None:
|
|
1485
|
+
runtime = DocumentToolRuntime(
|
|
1486
|
+
session_id=session_id,
|
|
1487
|
+
artifact_root=self._artifact_root,
|
|
1488
|
+
engine_registry=self._engine_registry,
|
|
1489
|
+
adapter_registry=self._adapter_registry,
|
|
1490
|
+
conversion_registry=self._conversion_registry,
|
|
1491
|
+
baseline_catalog=self._baseline_catalog,
|
|
1492
|
+
)
|
|
1493
|
+
self._runtimes[session_id] = runtime
|
|
1494
|
+
return runtime
|
|
1495
|
+
|
|
1496
|
+
|
|
1497
|
+
def register_document_tools(
|
|
1498
|
+
registry: ToolRegistry,
|
|
1499
|
+
executor: ToolExecutor,
|
|
1500
|
+
*,
|
|
1501
|
+
runtime: DocumentToolRuntime | None = None,
|
|
1502
|
+
artifact_root: str | Path | None = None,
|
|
1503
|
+
engine_registry: DocumentEngineRegistry | None = None,
|
|
1504
|
+
adapter_registry: DocumentAdapterRegistry | None = None,
|
|
1505
|
+
conversion_registry: DocumentConversionRegistry | None = None,
|
|
1506
|
+
baseline_catalog: ConformanceBaselineCatalog | None = None,
|
|
1507
|
+
) -> None:
|
|
1508
|
+
"""Register document harness tools and their executor adapters."""
|
|
1509
|
+
runtime_pool = None
|
|
1510
|
+
if runtime is None:
|
|
1511
|
+
runtime_pool = _SessionDocumentRuntimePool(
|
|
1512
|
+
artifact_root=artifact_root,
|
|
1513
|
+
engine_registry=engine_registry,
|
|
1514
|
+
adapter_registry=adapter_registry,
|
|
1515
|
+
conversion_registry=conversion_registry,
|
|
1516
|
+
baseline_catalog=baseline_catalog,
|
|
1517
|
+
)
|
|
1518
|
+
|
|
1519
|
+
for tool in build_document_tool_definitions():
|
|
1520
|
+
registry.register(tool)
|
|
1521
|
+
|
|
1522
|
+
if runtime is not None:
|
|
1523
|
+
|
|
1524
|
+
async def _adapter(inp: BaseModel, *, _tool_id: str = tool.id) -> dict[str, Any]:
|
|
1525
|
+
return await runtime.handle(_tool_id, inp)
|
|
1526
|
+
|
|
1527
|
+
executor.register_adapter(tool.id, _adapter)
|
|
1528
|
+
continue
|
|
1529
|
+
|
|
1530
|
+
assert runtime_pool is not None
|
|
1531
|
+
active_pool = runtime_pool
|
|
1532
|
+
|
|
1533
|
+
async def _session_adapter(
|
|
1534
|
+
inp: BaseModel,
|
|
1535
|
+
session_identity: object | None,
|
|
1536
|
+
*,
|
|
1537
|
+
_tool_id: str = tool.id,
|
|
1538
|
+
_runtime_pool: _SessionDocumentRuntimePool = active_pool,
|
|
1539
|
+
) -> dict[str, Any]:
|
|
1540
|
+
return await _runtime_pool.runtime_for(session_identity).handle(_tool_id, inp)
|
|
1541
|
+
|
|
1542
|
+
executor.register_session_adapter(tool.id, _session_adapter)
|
|
1543
|
+
|
|
1544
|
+
|
|
1545
|
+
_SAFE_RUNTIME_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_.-]{0,127}$")
|
|
1546
|
+
|
|
1547
|
+
|
|
1548
|
+
def _runtime_session_id(session_identity: object | None) -> str:
|
|
1549
|
+
if session_identity is None:
|
|
1550
|
+
return "anonymous"
|
|
1551
|
+
raw = str(session_identity).strip() or "anonymous"
|
|
1552
|
+
if _SAFE_RUNTIME_SESSION_ID_RE.fullmatch(raw):
|
|
1553
|
+
return raw
|
|
1554
|
+
digest = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:12]
|
|
1555
|
+
label = re.sub(r"[^A-Za-z0-9_.-]+", "-", raw).strip("._-")[:48] or "session"
|
|
1556
|
+
return f"{label}-{digest}"
|
|
1557
|
+
|
|
1558
|
+
|
|
1559
|
+
def _source_artifact_id(correlation_id: str) -> str:
|
|
1560
|
+
return _generated_artifact_id("source", correlation_id)
|
|
1561
|
+
|
|
1562
|
+
|
|
1563
|
+
_SAFE_ARTIFACT_COMPONENT_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_.-]{0,127}$")
|
|
1564
|
+
_MAX_ARTIFACT_COMPONENT_LENGTH = 128
|
|
1565
|
+
|
|
1566
|
+
|
|
1567
|
+
def _generated_artifact_id(prefix: str, correlation_id: str) -> str:
|
|
1568
|
+
"""Build a store-safe artifact id from model-supplied correlation text."""
|
|
1569
|
+
safe_prefix = _ascii_component(prefix, fallback="artifact", max_length=32)
|
|
1570
|
+
raw = correlation_id.strip()
|
|
1571
|
+
candidate = f"{safe_prefix}-{raw}"
|
|
1572
|
+
if len(candidate) <= _MAX_ARTIFACT_COMPONENT_LENGTH and _SAFE_ARTIFACT_COMPONENT_RE.fullmatch(
|
|
1573
|
+
candidate
|
|
1574
|
+
):
|
|
1575
|
+
return candidate
|
|
1576
|
+
|
|
1577
|
+
digest = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:12]
|
|
1578
|
+
max_slug_length = _MAX_ARTIFACT_COMPONENT_LENGTH - len(safe_prefix) - len(digest) - 2
|
|
1579
|
+
slug = _ascii_component(raw, fallback="corr", max_length=max_slug_length)
|
|
1580
|
+
return f"{safe_prefix}-{slug}-{digest}"
|
|
1581
|
+
|
|
1582
|
+
|
|
1583
|
+
def _ascii_component(value: str, *, fallback: str, max_length: int) -> str:
|
|
1584
|
+
component = re.sub(r"[^A-Za-z0-9_.-]+", "-", value.strip()).strip("._-")
|
|
1585
|
+
component = re.sub(r"-{2,}", "-", component)
|
|
1586
|
+
if not component:
|
|
1587
|
+
component = fallback
|
|
1588
|
+
if not re.match(r"^[A-Za-z0-9]", component):
|
|
1589
|
+
component = f"{fallback}-{component}"
|
|
1590
|
+
component = component[: max(max_length, 1)].strip("._-")
|
|
1591
|
+
if not component:
|
|
1592
|
+
component = fallback
|
|
1593
|
+
if not re.match(r"^[A-Za-z0-9]", component):
|
|
1594
|
+
component = f"{fallback}-{component}"
|
|
1595
|
+
return component[:max_length]
|
|
1596
|
+
|
|
1597
|
+
|
|
1598
|
+
def _format_from_extraction_or_suffix(
|
|
1599
|
+
extraction: DocumentExtraction | None,
|
|
1600
|
+
source_path: Path,
|
|
1601
|
+
) -> DocumentFormat:
|
|
1602
|
+
if extraction is not None and isinstance(extraction.metadata.get("format"), str):
|
|
1603
|
+
return _coerce_document_format(extraction.metadata["format"])
|
|
1604
|
+
return _coerce_document_format(source_path.suffix.lower().lstrip("."))
|
|
1605
|
+
|
|
1606
|
+
|
|
1607
|
+
def _coerce_document_format(value: object) -> DocumentFormat:
|
|
1608
|
+
if isinstance(value, DocumentFormat):
|
|
1609
|
+
return value
|
|
1610
|
+
if isinstance(value, str):
|
|
1611
|
+
return DocumentFormat(value)
|
|
1612
|
+
raise ValueError(f"Unsupported document format value: {value!r}")
|
|
1613
|
+
|
|
1614
|
+
|
|
1615
|
+
_ATTACHMENT_CONTEXT_MUTATION_POLICIES = frozenset(
|
|
1616
|
+
{
|
|
1617
|
+
"extraction_only",
|
|
1618
|
+
"metadata_only_geospatial_asset",
|
|
1619
|
+
"metadata_only_media_asset",
|
|
1620
|
+
}
|
|
1621
|
+
)
|
|
1622
|
+
|
|
1623
|
+
|
|
1624
|
+
def _is_attachment_context_extraction(extraction: DocumentExtraction | None) -> bool:
|
|
1625
|
+
if extraction is None:
|
|
1626
|
+
return False
|
|
1627
|
+
return extraction.metadata.get("mutation_policy") in _ATTACHMENT_CONTEXT_MUTATION_POLICIES
|
|
1628
|
+
|
|
1629
|
+
|
|
1630
|
+
def _attachment_context_display_name(source_path: Path) -> str:
|
|
1631
|
+
safe_stem = _ascii_component(source_path.stem, fallback="attachment", max_length=80)
|
|
1632
|
+
return f"{safe_stem}-context.md"
|
|
1633
|
+
|
|
1634
|
+
|
|
1635
|
+
def _attachment_context_markdown_payload(
|
|
1636
|
+
extraction: DocumentExtraction,
|
|
1637
|
+
*,
|
|
1638
|
+
source_path: Path,
|
|
1639
|
+
instruction: str,
|
|
1640
|
+
) -> str:
|
|
1641
|
+
source_sha256 = hashlib.sha256(source_path.read_bytes()).hexdigest()
|
|
1642
|
+
known_format = _metadata_as_text(extraction.metadata.get("known_format"))
|
|
1643
|
+
mutation_policy = _metadata_as_text(extraction.metadata.get("mutation_policy"))
|
|
1644
|
+
byte_size = _metadata_as_text(extraction.metadata.get("byte_size"))
|
|
1645
|
+
lines = [
|
|
1646
|
+
"# Attachment Context Derivative",
|
|
1647
|
+
"",
|
|
1648
|
+
"This generated document records attachment evidence for public-document authoring.",
|
|
1649
|
+
"It does not mutate the original attachment file.",
|
|
1650
|
+
"",
|
|
1651
|
+
"## Source",
|
|
1652
|
+
f"- source_file: {source_path.name}",
|
|
1653
|
+
f"- source_sha256: {source_sha256}",
|
|
1654
|
+
f"- known_format: {known_format}",
|
|
1655
|
+
f"- byte_size: {byte_size}",
|
|
1656
|
+
f"- mutation_policy: {mutation_policy}",
|
|
1657
|
+
"",
|
|
1658
|
+
"## Extracted References",
|
|
1659
|
+
]
|
|
1660
|
+
if extraction.images:
|
|
1661
|
+
for image in extraction.images:
|
|
1662
|
+
lines.append(f"- image: {image.image_id} ({image.content_type})")
|
|
1663
|
+
if extraction.paragraphs:
|
|
1664
|
+
for paragraph in extraction.paragraphs[:12]:
|
|
1665
|
+
lines.append(f"- paragraph: {paragraph.text}")
|
|
1666
|
+
if not extraction.images and not extraction.paragraphs:
|
|
1667
|
+
lines.append("- metadata-only attachment; no document text was extracted.")
|
|
1668
|
+
lines.extend(
|
|
1669
|
+
[
|
|
1670
|
+
"",
|
|
1671
|
+
"## Runtime Boundaries",
|
|
1672
|
+
"- OCR text: not available; no OCR runtime was applied.",
|
|
1673
|
+
"- Geospatial feature extraction: not available unless a vetted GDAL bridge is active.",
|
|
1674
|
+
(
|
|
1675
|
+
"- Media transcript: not available unless a vetted "
|
|
1676
|
+
"ffprobe/transcription bridge is active."
|
|
1677
|
+
),
|
|
1678
|
+
"",
|
|
1679
|
+
"## User Instruction",
|
|
1680
|
+
instruction.strip(),
|
|
1681
|
+
"",
|
|
1682
|
+
]
|
|
1683
|
+
)
|
|
1684
|
+
if extraction.warnings:
|
|
1685
|
+
lines.extend(["## Warnings", *[f"- {warning}" for warning in extraction.warnings], ""])
|
|
1686
|
+
return "\n".join(lines)
|
|
1687
|
+
|
|
1688
|
+
|
|
1689
|
+
def _metadata_as_text(value: object) -> str:
|
|
1690
|
+
if value is None:
|
|
1691
|
+
return "unknown"
|
|
1692
|
+
return str(value)
|
|
1693
|
+
|
|
1694
|
+
|
|
1695
|
+
def _filtered_extraction(
|
|
1696
|
+
extraction: DocumentExtraction,
|
|
1697
|
+
*,
|
|
1698
|
+
include_tables: bool,
|
|
1699
|
+
include_images: bool,
|
|
1700
|
+
include_fields: bool,
|
|
1701
|
+
) -> DocumentExtraction:
|
|
1702
|
+
updates: dict[str, list[object]] = {}
|
|
1703
|
+
if not include_tables:
|
|
1704
|
+
updates["tables"] = []
|
|
1705
|
+
if not include_images:
|
|
1706
|
+
updates["images"] = []
|
|
1707
|
+
if not include_fields:
|
|
1708
|
+
updates["fields"] = []
|
|
1709
|
+
if not updates:
|
|
1710
|
+
return extraction
|
|
1711
|
+
return extraction.model_copy(update=updates)
|
|
1712
|
+
|
|
1713
|
+
|
|
1714
|
+
def _missing_local_document_result(
|
|
1715
|
+
path: Path,
|
|
1716
|
+
*,
|
|
1717
|
+
correlation_id: str,
|
|
1718
|
+
tool_id: str,
|
|
1719
|
+
expected_format: DocumentFormat | None,
|
|
1720
|
+
) -> DocumentToolResult:
|
|
1721
|
+
candidates = _matching_local_document_candidates(path, expected_format=expected_format)
|
|
1722
|
+
lines = [f"Document path does not exist: {path}."]
|
|
1723
|
+
if candidates:
|
|
1724
|
+
lines.append("Matching local candidates require explicit selection:")
|
|
1725
|
+
lines.extend(f"- {candidate}" for candidate in candidates[:5])
|
|
1726
|
+
else:
|
|
1727
|
+
lines.append("No matching local document candidates were found in the requested directory.")
|
|
1728
|
+
return needs_input_document_tool_result(
|
|
1729
|
+
tool_id=tool_id,
|
|
1730
|
+
correlation_id=correlation_id,
|
|
1731
|
+
message="\n".join(lines),
|
|
1732
|
+
)
|
|
1733
|
+
|
|
1734
|
+
|
|
1735
|
+
def _matching_local_document_candidates(
|
|
1736
|
+
path: Path,
|
|
1737
|
+
*,
|
|
1738
|
+
expected_format: DocumentFormat | None,
|
|
1739
|
+
) -> list[Path]:
|
|
1740
|
+
parent = path.parent
|
|
1741
|
+
if not parent.is_dir():
|
|
1742
|
+
return []
|
|
1743
|
+
suffixes = (
|
|
1744
|
+
(f".{expected_format.value}",)
|
|
1745
|
+
if expected_format is not None
|
|
1746
|
+
else tuple(f".{document_format.value}" for document_format in DocumentFormat)
|
|
1747
|
+
)
|
|
1748
|
+
requested_stem = _normalized_document_stem(path.stem)
|
|
1749
|
+
if not requested_stem:
|
|
1750
|
+
return []
|
|
1751
|
+
scored_candidates: list[tuple[float, Path]] = []
|
|
1752
|
+
for candidate in sorted(parent.iterdir(), key=lambda item: item.name):
|
|
1753
|
+
if not candidate.is_file() or candidate.suffix.lower() not in suffixes:
|
|
1754
|
+
continue
|
|
1755
|
+
candidate_stem = _normalized_document_stem(candidate.stem)
|
|
1756
|
+
score = _document_stem_match_score(requested_stem, candidate_stem)
|
|
1757
|
+
if score >= _MIN_LOCAL_DOCUMENT_CANDIDATE_SCORE:
|
|
1758
|
+
scored_candidates.append((score, candidate))
|
|
1759
|
+
return [
|
|
1760
|
+
candidate
|
|
1761
|
+
for _, candidate in sorted(
|
|
1762
|
+
scored_candidates,
|
|
1763
|
+
key=lambda item: (-item[0], item[1].name),
|
|
1764
|
+
)
|
|
1765
|
+
]
|
|
1766
|
+
|
|
1767
|
+
|
|
1768
|
+
def _normalized_document_stem(value: str) -> str:
|
|
1769
|
+
normalized = unicodedata.normalize("NFC", value).casefold()
|
|
1770
|
+
alphanumeric = re.sub(r"[^0-9a-z가-힣]+", "", normalized)
|
|
1771
|
+
return _DOCUMENT_STEM_NOISE_RE.sub("", alphanumeric)
|
|
1772
|
+
|
|
1773
|
+
|
|
1774
|
+
def _document_stem_match_score(requested_stem: str, candidate_stem: str) -> float:
|
|
1775
|
+
if not requested_stem or not candidate_stem:
|
|
1776
|
+
return 0.0
|
|
1777
|
+
if requested_stem in candidate_stem or candidate_stem in requested_stem:
|
|
1778
|
+
return 1.0
|
|
1779
|
+
return SequenceMatcher(None, requested_stem, candidate_stem).ratio()
|
|
1780
|
+
|
|
1781
|
+
|
|
1782
|
+
def _mime_for_format(document_format: Any) -> str:
|
|
1783
|
+
value = str(getattr(document_format, "value", document_format))
|
|
1784
|
+
return {
|
|
1785
|
+
"hwpx": "application/owpml",
|
|
1786
|
+
"owpml": "application/owpml",
|
|
1787
|
+
"hwp": "application/x-hwp",
|
|
1788
|
+
"doc": "application/msword",
|
|
1789
|
+
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
1790
|
+
"pdf": "application/pdf",
|
|
1791
|
+
"xls": "application/vnd.ms-excel",
|
|
1792
|
+
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
1793
|
+
"ppt": "application/vnd.ms-powerpoint",
|
|
1794
|
+
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
1795
|
+
"md": "text/markdown",
|
|
1796
|
+
"epub": "application/epub+zip",
|
|
1797
|
+
"zip": "application/zip",
|
|
1798
|
+
"tar": "application/x-tar",
|
|
1799
|
+
"gz": "application/gzip",
|
|
1800
|
+
}.get(value, "application/octet-stream")
|
|
1801
|
+
|
|
1802
|
+
|
|
1803
|
+
def _editable_derivative_format(document_format: DocumentFormat) -> DocumentFormat | None:
|
|
1804
|
+
return _EDITABLE_DERIVATIVE_FORMAT_BY_SOURCE.get(document_format)
|
|
1805
|
+
|
|
1806
|
+
|
|
1807
|
+
def _conversion_labels(
|
|
1808
|
+
source_format: DocumentFormat,
|
|
1809
|
+
derivative_format: DocumentFormat,
|
|
1810
|
+
) -> tuple[str, str]:
|
|
1811
|
+
return _DERIVATIVE_LABEL_BY_FORMAT.get(
|
|
1812
|
+
source_format,
|
|
1813
|
+
(source_format.value.upper(), derivative_format.value.upper()),
|
|
1814
|
+
)
|
|
1815
|
+
|
|
1816
|
+
|
|
1817
|
+
def _conversion_missing_message(
|
|
1818
|
+
*,
|
|
1819
|
+
source_format: DocumentFormat,
|
|
1820
|
+
derivative_format: DocumentFormat,
|
|
1821
|
+
source_label: str,
|
|
1822
|
+
derivative_label: str,
|
|
1823
|
+
) -> str:
|
|
1824
|
+
if source_format is DocumentFormat.hwp:
|
|
1825
|
+
return (
|
|
1826
|
+
"HWP binary direct writing is blocked. HWP to HWPX conversion is "
|
|
1827
|
+
"required before editing legacy HWP files. Direct HWP binary working "
|
|
1828
|
+
"copies remain blocked. Use a HWPX or DOCX editable template, or "
|
|
1829
|
+
"register a vetted local HWP to HWPX conversion engine."
|
|
1830
|
+
)
|
|
1831
|
+
return (
|
|
1832
|
+
f"{source_label} binary direct writing is blocked. {source_label} to "
|
|
1833
|
+
f"{derivative_label} conversion is required before editing legacy Office "
|
|
1834
|
+
f"files. Direct {source_label} binary working copies remain blocked. "
|
|
1835
|
+
f"Install or register a vetted local LibreOffice/soffice conversion bridge "
|
|
1836
|
+
f"for {source_format.value} -> {derivative_format.value}."
|
|
1837
|
+
)
|
|
1838
|
+
|
|
1839
|
+
|
|
1840
|
+
def _document_primitive_fill_patches(
|
|
1841
|
+
patches: tuple[DocumentFieldPatch, ...],
|
|
1842
|
+
*,
|
|
1843
|
+
adapter: DocumentFormatAdapter,
|
|
1844
|
+
extraction: DocumentExtraction | None,
|
|
1845
|
+
) -> tuple[DocumentFieldPatch, ...]:
|
|
1846
|
+
return adapter.normalize_fill_patches(patches, extraction=extraction)
|
|
1847
|
+
|
|
1848
|
+
|
|
1849
|
+
def _should_prefer_autonomous_fill_plan(
|
|
1850
|
+
instruction: str,
|
|
1851
|
+
patches: tuple[DocumentFieldPatch, ...],
|
|
1852
|
+
) -> bool:
|
|
1853
|
+
"""Return True when deterministic planning should replace model-supplied patches."""
|
|
1854
|
+
return bool(patches) and _AUTONOMOUS_FILL_INSTRUCTION_RE.search(instruction) is not None
|
|
1855
|
+
|
|
1856
|
+
|
|
1857
|
+
def _authoring_draft_approval_message(
|
|
1858
|
+
issued_drafts: tuple[IssuedAuthoringDraft, ...],
|
|
1859
|
+
) -> str:
|
|
1860
|
+
if not issued_drafts:
|
|
1861
|
+
return ""
|
|
1862
|
+
draft = issued_drafts[0]
|
|
1863
|
+
return (
|
|
1864
|
+
" After user approval, retry with "
|
|
1865
|
+
f"approved_draft_id={draft.draft_id} and "
|
|
1866
|
+
f"approved_draft_sha256={draft.draft_sha256}."
|
|
1867
|
+
)
|
|
1868
|
+
|
|
1869
|
+
|
|
1870
|
+
def _copy_for_edit_reason(instruction: str) -> str:
|
|
1871
|
+
"""Bound long citizen instructions to the copy-for-edit audit field."""
|
|
1872
|
+
normalized = " ".join(instruction.split())
|
|
1873
|
+
if len(normalized) <= _COPY_FOR_EDIT_REASON_MAX_LENGTH:
|
|
1874
|
+
return normalized
|
|
1875
|
+
suffix = "…"
|
|
1876
|
+
return normalized[: _COPY_FOR_EDIT_REASON_MAX_LENGTH - len(suffix)].rstrip() + suffix
|
|
1877
|
+
|
|
1878
|
+
|
|
1879
|
+
def _fill_patches_from_autonomous_plan(
|
|
1880
|
+
plan: AutonomousFillPlan,
|
|
1881
|
+
) -> tuple[DocumentFieldPatch, ...]:
|
|
1882
|
+
return tuple(
|
|
1883
|
+
DocumentFieldPatch(
|
|
1884
|
+
target_path=slot.source_anchor.format_path,
|
|
1885
|
+
value=slot.candidate_value,
|
|
1886
|
+
)
|
|
1887
|
+
for slot in plan.slots
|
|
1888
|
+
if not slot.protected and slot.candidate_value is not None
|
|
1889
|
+
)
|
|
1890
|
+
|
|
1891
|
+
|
|
1892
|
+
def _style_patches_from_autonomous_plan(
|
|
1893
|
+
plan: AutonomousFillPlan,
|
|
1894
|
+
) -> tuple[DocumentStylePatch, ...]:
|
|
1895
|
+
return tuple(
|
|
1896
|
+
DocumentStylePatch(
|
|
1897
|
+
target_path=style_intent.target_path,
|
|
1898
|
+
font_family=style_intent.style.font_family,
|
|
1899
|
+
font_size_pt=style_intent.style.font_size_pt,
|
|
1900
|
+
bold=style_intent.style.bold,
|
|
1901
|
+
italic=style_intent.style.italic,
|
|
1902
|
+
underline=style_intent.style.underline,
|
|
1903
|
+
font_color_rgb=style_intent.style.font_color_rgb,
|
|
1904
|
+
fill_color_rgb=style_intent.style.fill_color_rgb,
|
|
1905
|
+
alignment=style_intent.style.alignment,
|
|
1906
|
+
)
|
|
1907
|
+
for style_intent in plan.style_intents
|
|
1908
|
+
)
|
|
1909
|
+
|
|
1910
|
+
|
|
1911
|
+
def _missing_required_unfilled_slot_ids(plan: AutonomousFillPlan) -> tuple[str, ...]:
|
|
1912
|
+
blocked_slot_ids = set(plan.blocked_slot_ids)
|
|
1913
|
+
return tuple(
|
|
1914
|
+
slot.slot_id
|
|
1915
|
+
for slot in plan.slots
|
|
1916
|
+
if slot.slot_id in blocked_slot_ids
|
|
1917
|
+
and slot.required
|
|
1918
|
+
and not slot.protected
|
|
1919
|
+
and slot.candidate_value is None
|
|
1920
|
+
)
|
|
1921
|
+
|
|
1922
|
+
|
|
1923
|
+
def _fill_patch(
|
|
1924
|
+
request: DocumentApplyFillRequest,
|
|
1925
|
+
working: DocumentArtifact,
|
|
1926
|
+
) -> DocumentPatch:
|
|
1927
|
+
return DocumentPatch(
|
|
1928
|
+
patch_id=f"fill-{request.correlation_id}",
|
|
1929
|
+
target_artifact_id=working.artifact_id,
|
|
1930
|
+
operations=[
|
|
1931
|
+
_field_patch_operation(
|
|
1932
|
+
item,
|
|
1933
|
+
index=index,
|
|
1934
|
+
document_format=working.format,
|
|
1935
|
+
)
|
|
1936
|
+
for index, item in enumerate(request.patches, start=1)
|
|
1937
|
+
],
|
|
1938
|
+
dry_run=request.dry_run,
|
|
1939
|
+
expected_format=working.format,
|
|
1940
|
+
destination_policy="working_copy",
|
|
1941
|
+
)
|
|
1942
|
+
|
|
1943
|
+
|
|
1944
|
+
def _fill_style_patch(
|
|
1945
|
+
*,
|
|
1946
|
+
correlation_id: str,
|
|
1947
|
+
patches: tuple[DocumentFieldPatch, ...],
|
|
1948
|
+
styles: tuple[DocumentStylePatch, ...],
|
|
1949
|
+
working: DocumentArtifact,
|
|
1950
|
+
) -> DocumentPatch:
|
|
1951
|
+
operations = [
|
|
1952
|
+
_field_patch_operation(
|
|
1953
|
+
item,
|
|
1954
|
+
index=index,
|
|
1955
|
+
document_format=working.format,
|
|
1956
|
+
)
|
|
1957
|
+
for index, item in enumerate(patches, start=1)
|
|
1958
|
+
]
|
|
1959
|
+
operations.extend(
|
|
1960
|
+
_style_patch_operation(item, index=index, document_format=working.format)
|
|
1961
|
+
for index, item in enumerate(styles, start=1)
|
|
1962
|
+
)
|
|
1963
|
+
return DocumentPatch(
|
|
1964
|
+
patch_id=f"fill-style-{correlation_id}",
|
|
1965
|
+
target_artifact_id=working.artifact_id,
|
|
1966
|
+
operations=operations,
|
|
1967
|
+
dry_run=False,
|
|
1968
|
+
expected_format=working.format,
|
|
1969
|
+
destination_policy="working_copy",
|
|
1970
|
+
)
|
|
1971
|
+
|
|
1972
|
+
|
|
1973
|
+
def _style_patch(request: DocumentApplyStyleRequest, working: DocumentArtifact) -> DocumentPatch:
|
|
1974
|
+
return DocumentPatch(
|
|
1975
|
+
patch_id=f"style-{request.correlation_id}",
|
|
1976
|
+
target_artifact_id=working.artifact_id,
|
|
1977
|
+
operations=[
|
|
1978
|
+
_style_patch_operation(item, index=index, document_format=working.format)
|
|
1979
|
+
for index, item in enumerate(request.styles, start=1)
|
|
1980
|
+
],
|
|
1981
|
+
dry_run=request.dry_run,
|
|
1982
|
+
expected_format=working.format,
|
|
1983
|
+
destination_policy="working_copy",
|
|
1984
|
+
)
|
|
1985
|
+
|
|
1986
|
+
|
|
1987
|
+
def _field_patch_operation(
|
|
1988
|
+
item: DocumentFieldPatch,
|
|
1989
|
+
*,
|
|
1990
|
+
index: int,
|
|
1991
|
+
document_format: DocumentFormat,
|
|
1992
|
+
) -> DocumentPatchOperation:
|
|
1993
|
+
return DocumentPatchOperation(
|
|
1994
|
+
operation_id=f"fill-{index:03d}",
|
|
1995
|
+
operation_type=_field_patch_operation_type(
|
|
1996
|
+
item.target_path,
|
|
1997
|
+
document_format=document_format,
|
|
1998
|
+
),
|
|
1999
|
+
target_path=item.target_path,
|
|
2000
|
+
value=item.value,
|
|
2001
|
+
)
|
|
2002
|
+
|
|
2003
|
+
|
|
2004
|
+
def _field_patch_operation_type(
|
|
2005
|
+
target_path: str,
|
|
2006
|
+
*,
|
|
2007
|
+
document_format: DocumentFormat,
|
|
2008
|
+
) -> OperationType:
|
|
2009
|
+
if document_format is DocumentFormat.xlsx and _XLSX_CELL_FILL_TARGET_RE.match(target_path):
|
|
2010
|
+
return OperationType.set_table_cell
|
|
2011
|
+
if document_format is DocumentFormat.docx and _DOCX_TABLE_FILL_TARGET_RE.search(target_path):
|
|
2012
|
+
return OperationType.set_table_cell
|
|
2013
|
+
if document_format is DocumentFormat.pptx and _PPTX_TABLE_FILL_TARGET_RE.match(target_path):
|
|
2014
|
+
return OperationType.set_table_cell
|
|
2015
|
+
if document_format in {DocumentFormat.hwpx, DocumentFormat.owpml} and (
|
|
2016
|
+
_HWPX_TABLE_CELL_SOURCE_RE.match(target_path)
|
|
2017
|
+
):
|
|
2018
|
+
return OperationType.set_table_cell
|
|
2019
|
+
return OperationType.set_field_value
|
|
2020
|
+
|
|
2021
|
+
|
|
2022
|
+
def _style_patch_operation(
|
|
2023
|
+
item: DocumentStylePatch,
|
|
2024
|
+
*,
|
|
2025
|
+
index: int,
|
|
2026
|
+
document_format: DocumentFormat,
|
|
2027
|
+
) -> DocumentPatchOperation:
|
|
2028
|
+
return DocumentPatchOperation(
|
|
2029
|
+
operation_id=f"style-{index:03d}",
|
|
2030
|
+
operation_type=_style_patch_operation_type(
|
|
2031
|
+
item.target_path,
|
|
2032
|
+
document_format=document_format,
|
|
2033
|
+
),
|
|
2034
|
+
target_path=item.target_path,
|
|
2035
|
+
style=item.to_style_descriptor(style_id=f"style-{index:03d}"),
|
|
2036
|
+
)
|
|
2037
|
+
|
|
2038
|
+
|
|
2039
|
+
def _style_patch_operation_type(
|
|
2040
|
+
target_path: str,
|
|
2041
|
+
*,
|
|
2042
|
+
document_format: DocumentFormat,
|
|
2043
|
+
) -> OperationType:
|
|
2044
|
+
if document_format is DocumentFormat.xlsx and _XLSX_CELL_FILL_TARGET_RE.match(target_path):
|
|
2045
|
+
return OperationType.set_cell_style
|
|
2046
|
+
if document_format is DocumentFormat.docx and "/runs/" in target_path:
|
|
2047
|
+
return OperationType.set_run_style
|
|
2048
|
+
if document_format is DocumentFormat.docx and _DOCX_TABLE_FILL_TARGET_RE.search(target_path):
|
|
2049
|
+
return OperationType.set_cell_style
|
|
2050
|
+
return OperationType.set_paragraph_style
|
|
2051
|
+
|
|
2052
|
+
|
|
2053
|
+
_WORKFLOW_DEFINITION: tuple[tuple[str, str], ...] = (
|
|
2054
|
+
("inspect", "Inspect"),
|
|
2055
|
+
("field_schema", "Field schema"),
|
|
2056
|
+
("working_copy", "Working copy"),
|
|
2057
|
+
("fill_style", "Fill/style"),
|
|
2058
|
+
("diff", "Diff"),
|
|
2059
|
+
("render", "Render"),
|
|
2060
|
+
("validate", "Validate"),
|
|
2061
|
+
("save", "Save"),
|
|
2062
|
+
)
|
|
2063
|
+
|
|
2064
|
+
_WORKFLOW_STEP_INDEX = {
|
|
2065
|
+
step_id: index for index, (step_id, _label) in enumerate(_WORKFLOW_DEFINITION)
|
|
2066
|
+
}
|
|
2067
|
+
|
|
2068
|
+
_TOOL_WORKFLOW_STEP_ID = {
|
|
2069
|
+
"document_inspect": "inspect",
|
|
2070
|
+
"document_extract": "field_schema",
|
|
2071
|
+
"document_form_schema": "field_schema",
|
|
2072
|
+
"document_copy_for_edit": "working_copy",
|
|
2073
|
+
"document_apply_fill": "fill_style",
|
|
2074
|
+
"document_apply_style": "fill_style",
|
|
2075
|
+
"document_render": "render",
|
|
2076
|
+
"document_validate_public_form": "validate",
|
|
2077
|
+
"document_save": "save",
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
_ARTIFACT_ID_REQUIRED_TOOL_IDS = frozenset(
|
|
2081
|
+
{
|
|
2082
|
+
"document_copy_for_edit",
|
|
2083
|
+
"document_apply_fill",
|
|
2084
|
+
"document_apply_style",
|
|
2085
|
+
"document_render",
|
|
2086
|
+
"document_validate_public_form",
|
|
2087
|
+
"document_save",
|
|
2088
|
+
}
|
|
2089
|
+
)
|
|
2090
|
+
|
|
2091
|
+
|
|
2092
|
+
def _with_workflow_steps(
|
|
2093
|
+
result: DocumentToolResult,
|
|
2094
|
+
*,
|
|
2095
|
+
artifacts: dict[str, DocumentArtifact] | None = None,
|
|
2096
|
+
render_records: tuple[RenderArtifactRecord, ...] = (),
|
|
2097
|
+
) -> DocumentToolResult:
|
|
2098
|
+
if result.workflow_steps:
|
|
2099
|
+
return result
|
|
2100
|
+
workflow_steps = _workflow_steps_for_result(
|
|
2101
|
+
result,
|
|
2102
|
+
artifacts=artifacts or {},
|
|
2103
|
+
render_records=render_records,
|
|
2104
|
+
)
|
|
2105
|
+
if not workflow_steps:
|
|
2106
|
+
return result
|
|
2107
|
+
return result.model_copy(update={"workflow_steps": workflow_steps})
|
|
2108
|
+
|
|
2109
|
+
|
|
2110
|
+
def _workflow_steps_for_result(
|
|
2111
|
+
result: DocumentToolResult,
|
|
2112
|
+
*,
|
|
2113
|
+
artifacts: dict[str, DocumentArtifact],
|
|
2114
|
+
render_records: tuple[RenderArtifactRecord, ...],
|
|
2115
|
+
) -> list[DocumentWorkflowStep]:
|
|
2116
|
+
current_step_id = _TOOL_WORKFLOW_STEP_ID.get(result.tool_id)
|
|
2117
|
+
if current_step_id is None:
|
|
2118
|
+
return []
|
|
2119
|
+
current_index = _WORKFLOW_STEP_INDEX[current_step_id]
|
|
2120
|
+
statuses = [DocumentWorkflowStepStatus.pending for _step in _WORKFLOW_DEFINITION]
|
|
2121
|
+
|
|
2122
|
+
if result.status is ToolResultStatus.ok:
|
|
2123
|
+
_mark_ok_workflow_statuses(result, statuses, current_index)
|
|
2124
|
+
elif result.status is ToolResultStatus.blocked:
|
|
2125
|
+
for index in range(_completed_before_blocked_step(current_step_id) + 1):
|
|
2126
|
+
statuses[index] = DocumentWorkflowStepStatus.completed
|
|
2127
|
+
statuses[current_index] = DocumentWorkflowStepStatus.blocked
|
|
2128
|
+
statuses[_WORKFLOW_STEP_INDEX["save"]] = DocumentWorkflowStepStatus.skipped
|
|
2129
|
+
elif result.status is ToolResultStatus.failed:
|
|
2130
|
+
for index in range(max(current_index - 1, -1) + 1):
|
|
2131
|
+
statuses[index] = DocumentWorkflowStepStatus.completed
|
|
2132
|
+
statuses[current_index] = DocumentWorkflowStepStatus.failed
|
|
2133
|
+
statuses[_WORKFLOW_STEP_INDEX["save"]] = DocumentWorkflowStepStatus.skipped
|
|
2134
|
+
elif result.status is ToolResultStatus.needs_input:
|
|
2135
|
+
statuses[current_index] = DocumentWorkflowStepStatus.current
|
|
2136
|
+
|
|
2137
|
+
return [
|
|
2138
|
+
_workflow_step(
|
|
2139
|
+
result,
|
|
2140
|
+
step_id=step_id,
|
|
2141
|
+
label=label,
|
|
2142
|
+
status=statuses[index],
|
|
2143
|
+
artifacts=artifacts,
|
|
2144
|
+
render_records=render_records,
|
|
2145
|
+
)
|
|
2146
|
+
for index, (step_id, label) in enumerate(_WORKFLOW_DEFINITION)
|
|
2147
|
+
]
|
|
2148
|
+
|
|
2149
|
+
|
|
2150
|
+
def _mark_ok_workflow_statuses(
|
|
2151
|
+
result: DocumentToolResult,
|
|
2152
|
+
statuses: list[DocumentWorkflowStepStatus],
|
|
2153
|
+
current_index: int,
|
|
2154
|
+
) -> None:
|
|
2155
|
+
if result.tool_id == "document_save":
|
|
2156
|
+
completed_through = (
|
|
2157
|
+
_WORKFLOW_STEP_INDEX["diff"]
|
|
2158
|
+
if result.diff is not None
|
|
2159
|
+
else _WORKFLOW_STEP_INDEX["working_copy"]
|
|
2160
|
+
)
|
|
2161
|
+
for index in range(completed_through + 1):
|
|
2162
|
+
statuses[index] = DocumentWorkflowStepStatus.completed
|
|
2163
|
+
statuses[_WORKFLOW_STEP_INDEX["save"]] = DocumentWorkflowStepStatus.completed
|
|
2164
|
+
return
|
|
2165
|
+
|
|
2166
|
+
completed_through = _completed_workflow_index(result, current_index)
|
|
2167
|
+
for index in range(completed_through + 1):
|
|
2168
|
+
statuses[index] = DocumentWorkflowStepStatus.completed
|
|
2169
|
+
|
|
2170
|
+
|
|
2171
|
+
def _workflow_step(
|
|
2172
|
+
result: DocumentToolResult,
|
|
2173
|
+
*,
|
|
2174
|
+
step_id: str,
|
|
2175
|
+
label: str,
|
|
2176
|
+
status: DocumentWorkflowStepStatus,
|
|
2177
|
+
artifacts: dict[str, DocumentArtifact],
|
|
2178
|
+
render_records: tuple[RenderArtifactRecord, ...],
|
|
2179
|
+
) -> DocumentWorkflowStep:
|
|
2180
|
+
artifact_id = _workflow_artifact_id(result, step_id, render_records)
|
|
2181
|
+
artifact_sha256 = _workflow_artifact_sha256(
|
|
2182
|
+
artifact_id,
|
|
2183
|
+
artifacts=artifacts,
|
|
2184
|
+
render_records=render_records,
|
|
2185
|
+
)
|
|
2186
|
+
return DocumentWorkflowStep(
|
|
2187
|
+
step_id=step_id,
|
|
2188
|
+
label=label,
|
|
2189
|
+
status=status,
|
|
2190
|
+
artifact_id=artifact_id,
|
|
2191
|
+
artifact_sha256=artifact_sha256,
|
|
2192
|
+
detail=_workflow_detail(result, step_id),
|
|
2193
|
+
)
|
|
2194
|
+
|
|
2195
|
+
|
|
2196
|
+
def _workflow_artifact_id(
|
|
2197
|
+
result: DocumentToolResult,
|
|
2198
|
+
step_id: str,
|
|
2199
|
+
render_records: tuple[RenderArtifactRecord, ...],
|
|
2200
|
+
) -> str | None:
|
|
2201
|
+
if result.tool_id == "document_save":
|
|
2202
|
+
return _save_workflow_artifact_id(result, step_id)
|
|
2203
|
+
if step_id == "fill_style" and result.diff is not None:
|
|
2204
|
+
return result.diff.derivative_artifact_id
|
|
2205
|
+
if step_id == "diff" and result.diff is not None:
|
|
2206
|
+
return result.diff.derivative_artifact_id
|
|
2207
|
+
if step_id == "render" and render_records:
|
|
2208
|
+
return render_records[0].render_artifact_id
|
|
2209
|
+
return _workflow_artifact_id_from_refs(result, step_id)
|
|
2210
|
+
|
|
2211
|
+
|
|
2212
|
+
def _save_workflow_artifact_id(result: DocumentToolResult, step_id: str) -> str | None:
|
|
2213
|
+
if step_id == "save" and len(result.artifact_refs) > 1:
|
|
2214
|
+
return result.artifact_refs[1]
|
|
2215
|
+
if step_id in {"inspect", "field_schema", "working_copy", "fill_style", "diff"}:
|
|
2216
|
+
return result.artifact_refs[0] if result.artifact_refs else None
|
|
2217
|
+
return None
|
|
2218
|
+
|
|
2219
|
+
|
|
2220
|
+
def _workflow_artifact_id_from_refs(
|
|
2221
|
+
result: DocumentToolResult,
|
|
2222
|
+
step_id: str,
|
|
2223
|
+
) -> str | None:
|
|
2224
|
+
if result.tool_id in {"document_render", "document_validate_public_form"} and step_id in {
|
|
2225
|
+
"working_copy",
|
|
2226
|
+
"fill_style",
|
|
2227
|
+
"diff",
|
|
2228
|
+
}:
|
|
2229
|
+
if step_id == "working_copy" and result.diff is not None:
|
|
2230
|
+
return result.diff.source_artifact_id
|
|
2231
|
+
return result.artifact_refs[0] if result.artifact_refs else None
|
|
2232
|
+
if step_id in {"inspect", "field_schema"} and result.artifact_refs:
|
|
2233
|
+
return result.artifact_refs[0]
|
|
2234
|
+
if step_id == "working_copy" and len(result.artifact_refs) > 1:
|
|
2235
|
+
return result.artifact_refs[1]
|
|
2236
|
+
if step_id == "render" and result.tool_id == "document_render" and result.artifact_refs:
|
|
2237
|
+
return result.artifact_refs[0]
|
|
2238
|
+
return None
|
|
2239
|
+
|
|
2240
|
+
|
|
2241
|
+
def _workflow_artifact_sha256(
|
|
2242
|
+
artifact_id: str | None,
|
|
2243
|
+
*,
|
|
2244
|
+
artifacts: dict[str, DocumentArtifact],
|
|
2245
|
+
render_records: tuple[RenderArtifactRecord, ...],
|
|
2246
|
+
) -> str | None:
|
|
2247
|
+
if artifact_id is None:
|
|
2248
|
+
return None
|
|
2249
|
+
artifact = artifacts.get(artifact_id)
|
|
2250
|
+
if artifact is not None:
|
|
2251
|
+
return artifact.sha256
|
|
2252
|
+
for record in render_records:
|
|
2253
|
+
if record.render_artifact_id == artifact_id:
|
|
2254
|
+
return record.render_sha256
|
|
2255
|
+
return None
|
|
2256
|
+
|
|
2257
|
+
|
|
2258
|
+
def _workflow_detail(result: DocumentToolResult, step_id: str) -> str | None:
|
|
2259
|
+
if step_id == "diff" and result.diff is not None:
|
|
2260
|
+
return result.diff.diff_id
|
|
2261
|
+
if step_id == "render" and result.promotion_gate_result is not None:
|
|
2262
|
+
failures = result.promotion_gate_result.hard_gate_failures
|
|
2263
|
+
return failures[0] if failures else result.promotion_gate_result.promotion_state.value
|
|
2264
|
+
return None
|
|
2265
|
+
|
|
2266
|
+
|
|
2267
|
+
def _completed_workflow_index(result: DocumentToolResult, current_index: int) -> int:
|
|
2268
|
+
if (
|
|
2269
|
+
result.tool_id in {"document_apply_fill", "document_apply_style"}
|
|
2270
|
+
and result.diff is not None
|
|
2271
|
+
):
|
|
2272
|
+
return _WORKFLOW_STEP_INDEX["diff"]
|
|
2273
|
+
return current_index
|
|
2274
|
+
|
|
2275
|
+
|
|
2276
|
+
def _completed_before_blocked_step(current_step_id: str) -> int:
|
|
2277
|
+
if current_step_id == "render":
|
|
2278
|
+
return _WORKFLOW_STEP_INDEX["working_copy"]
|
|
2279
|
+
return max(_WORKFLOW_STEP_INDEX[current_step_id] - 1, -1)
|
|
2280
|
+
|
|
2281
|
+
|
|
2282
|
+
def _document_result_from_stage(
|
|
2283
|
+
result: DocumentToolResult,
|
|
2284
|
+
*,
|
|
2285
|
+
correlation_id: str,
|
|
2286
|
+
) -> DocumentToolResult:
|
|
2287
|
+
return result.model_copy(update={"tool_id": "document", "correlation_id": correlation_id})
|
|
2288
|
+
|
|
2289
|
+
|
|
2290
|
+
def _unique_artifact_refs(values: list[str]) -> list[str]:
|
|
2291
|
+
seen: set[str] = set()
|
|
2292
|
+
unique: list[str] = []
|
|
2293
|
+
for value in values:
|
|
2294
|
+
if value in seen:
|
|
2295
|
+
continue
|
|
2296
|
+
seen.add(value)
|
|
2297
|
+
unique.append(value)
|
|
2298
|
+
return unique
|
|
2299
|
+
|
|
2300
|
+
|
|
2301
|
+
def _merge_save_workflow_steps(
|
|
2302
|
+
base_steps: list[DocumentWorkflowStep],
|
|
2303
|
+
save_steps: list[DocumentWorkflowStep],
|
|
2304
|
+
) -> list[DocumentWorkflowStep]:
|
|
2305
|
+
save_by_id = {step.step_id: step for step in save_steps}
|
|
2306
|
+
merged: list[DocumentWorkflowStep] = []
|
|
2307
|
+
for step in base_steps:
|
|
2308
|
+
if step.step_id == "save":
|
|
2309
|
+
merged.append(save_by_id.get("save", step))
|
|
2310
|
+
else:
|
|
2311
|
+
merged.append(step)
|
|
2312
|
+
return merged
|
|
2313
|
+
|
|
2314
|
+
|
|
2315
|
+
def _explicit_save_path_from_instruction(
|
|
2316
|
+
instruction: str,
|
|
2317
|
+
*,
|
|
2318
|
+
source_artifact: DocumentArtifact,
|
|
2319
|
+
) -> str | None:
|
|
2320
|
+
if not _DOCUMENT_SAVE_INTENT_RE.search(instruction):
|
|
2321
|
+
return None
|
|
2322
|
+
source_path = Path(source_artifact.source_path).expanduser().resolve()
|
|
2323
|
+
candidates: list[Path] = []
|
|
2324
|
+
allowed_suffixes = {f".{source_artifact.format.value}"}
|
|
2325
|
+
derivative_format = _editable_derivative_format(source_artifact.format)
|
|
2326
|
+
if derivative_format is not None:
|
|
2327
|
+
allowed_suffixes.add(f".{derivative_format.value}")
|
|
2328
|
+
for match in _EXPLICIT_LOCAL_DOCUMENT_PATH_RE.finditer(instruction):
|
|
2329
|
+
candidate = Path(match.group(0).rstrip(".,;:)]})")).expanduser().resolve()
|
|
2330
|
+
if candidate == source_path:
|
|
2331
|
+
continue
|
|
2332
|
+
if candidate.suffix.lower() not in allowed_suffixes:
|
|
2333
|
+
continue
|
|
2334
|
+
candidates.append(candidate)
|
|
2335
|
+
if not candidates:
|
|
2336
|
+
return None
|
|
2337
|
+
return str(candidates[-1])
|
|
2338
|
+
|
|
2339
|
+
|
|
2340
|
+
class _LocalExportBlockedError(ValueError):
|
|
2341
|
+
"""Raised when an explicit local export path is unsafe or incompatible."""
|
|
2342
|
+
|
|
2343
|
+
def __init__(self, reason: BlockedReason, message: str) -> None:
|
|
2344
|
+
super().__init__(message)
|
|
2345
|
+
self.reason = reason
|
|
2346
|
+
|
|
2347
|
+
|
|
2348
|
+
@dataclass(frozen=True, slots=True)
|
|
2349
|
+
class _LocalExportTempFile:
|
|
2350
|
+
path: Path
|
|
2351
|
+
name: str
|
|
2352
|
+
fd: int
|
|
2353
|
+
|
|
2354
|
+
|
|
2355
|
+
def _write_explicit_local_export(
|
|
2356
|
+
source_artifact: DocumentArtifact,
|
|
2357
|
+
*,
|
|
2358
|
+
export_artifact_id: str,
|
|
2359
|
+
payload: bytes,
|
|
2360
|
+
destination_path: str,
|
|
2361
|
+
allow_pdfa_alias: bool = False,
|
|
2362
|
+
) -> DocumentSavedExport:
|
|
2363
|
+
destination = _validated_local_export_destination(
|
|
2364
|
+
destination_path,
|
|
2365
|
+
document_format=source_artifact.format,
|
|
2366
|
+
allow_pdfa_alias=allow_pdfa_alias,
|
|
2367
|
+
)
|
|
2368
|
+
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
2369
|
+
_raise_if_local_export_parent_is_symlink(destination)
|
|
2370
|
+
parent_fd = _open_local_export_parent(destination.parent)
|
|
2371
|
+
_raise_if_local_export_parent_changed(parent_fd, destination)
|
|
2372
|
+
try:
|
|
2373
|
+
temp_file = _write_tempfile_for_local_export(destination, payload, parent_fd)
|
|
2374
|
+
try:
|
|
2375
|
+
_publish_tempfile_without_clobber(temp_file, destination, parent_fd)
|
|
2376
|
+
_fsync_directory_fd_best_effort(parent_fd)
|
|
2377
|
+
finally:
|
|
2378
|
+
try:
|
|
2379
|
+
_cleanup_local_export_entry_by_fd(temp_file.name, parent_fd)
|
|
2380
|
+
with contextlib.suppress(OSError):
|
|
2381
|
+
if temp_file.path.exists():
|
|
2382
|
+
temp_file.path.unlink()
|
|
2383
|
+
finally:
|
|
2384
|
+
os.close(temp_file.fd)
|
|
2385
|
+
finally:
|
|
2386
|
+
os.close(parent_fd)
|
|
2387
|
+
return DocumentSavedExport(
|
|
2388
|
+
export_artifact_id=export_artifact_id,
|
|
2389
|
+
source_artifact_id=source_artifact.artifact_id,
|
|
2390
|
+
local_path=destination,
|
|
2391
|
+
sha256=hashlib.sha256(payload).hexdigest(),
|
|
2392
|
+
byte_size=len(payload),
|
|
2393
|
+
overwrite_existing=False,
|
|
2394
|
+
)
|
|
2395
|
+
|
|
2396
|
+
|
|
2397
|
+
def _blocked_local_export_destination_result(
|
|
2398
|
+
request: DocumentSaveRequest,
|
|
2399
|
+
artifact: DocumentArtifact,
|
|
2400
|
+
*,
|
|
2401
|
+
allow_pdfa_alias: bool = False,
|
|
2402
|
+
) -> DocumentToolResult | None:
|
|
2403
|
+
if request.destination_path is None:
|
|
2404
|
+
return None
|
|
2405
|
+
try:
|
|
2406
|
+
_validated_local_export_destination(
|
|
2407
|
+
request.destination_path,
|
|
2408
|
+
document_format=artifact.format,
|
|
2409
|
+
allow_pdfa_alias=allow_pdfa_alias,
|
|
2410
|
+
)
|
|
2411
|
+
except _LocalExportBlockedError as exc:
|
|
2412
|
+
return unsupported_document_tool_result(
|
|
2413
|
+
tool_id="document_save",
|
|
2414
|
+
correlation_id=request.correlation_id,
|
|
2415
|
+
artifact_refs=(artifact.artifact_id,),
|
|
2416
|
+
message=str(exc),
|
|
2417
|
+
reason=exc.reason,
|
|
2418
|
+
)
|
|
2419
|
+
return None
|
|
2420
|
+
|
|
2421
|
+
|
|
2422
|
+
def _validated_local_export_destination(
|
|
2423
|
+
destination_path: str,
|
|
2424
|
+
*,
|
|
2425
|
+
document_format: DocumentFormat,
|
|
2426
|
+
allow_pdfa_alias: bool = False,
|
|
2427
|
+
) -> Path:
|
|
2428
|
+
raw_destination = _absolute_local_export_path(Path(destination_path).expanduser())
|
|
2429
|
+
if raw_destination.is_symlink():
|
|
2430
|
+
raise _LocalExportBlockedError(
|
|
2431
|
+
BlockedReason.validation_failed,
|
|
2432
|
+
f"Document local export destination is a symbolic link: {raw_destination}",
|
|
2433
|
+
)
|
|
2434
|
+
_raise_if_local_export_path_has_symlinked_ancestor(raw_destination)
|
|
2435
|
+
destination = raw_destination
|
|
2436
|
+
if destination.name in {"", ".", ".."} or destination.name.startswith("."):
|
|
2437
|
+
raise _LocalExportBlockedError(
|
|
2438
|
+
BlockedReason.hidden_destination,
|
|
2439
|
+
f"Document local export destination is hidden or invalid: {destination}",
|
|
2440
|
+
)
|
|
2441
|
+
if any(part.startswith(".") for part in destination.parts if part not in {"/", "."}):
|
|
2442
|
+
raise _LocalExportBlockedError(
|
|
2443
|
+
BlockedReason.hidden_destination,
|
|
2444
|
+
f"Document local export destination contains a hidden path component: {destination}",
|
|
2445
|
+
)
|
|
2446
|
+
if destination.exists() and destination.is_dir():
|
|
2447
|
+
raise _LocalExportBlockedError(
|
|
2448
|
+
BlockedReason.validation_failed,
|
|
2449
|
+
f"Document local export destination is a directory: {destination}",
|
|
2450
|
+
)
|
|
2451
|
+
if destination.exists():
|
|
2452
|
+
raise _LocalExportBlockedError(
|
|
2453
|
+
BlockedReason.validation_failed,
|
|
2454
|
+
f"Document local export destination already exists: {destination}",
|
|
2455
|
+
)
|
|
2456
|
+
expected_suffix = f".{document_format.value}"
|
|
2457
|
+
allowed_suffixes = {expected_suffix}
|
|
2458
|
+
if document_format in {DocumentFormat.hwpx, DocumentFormat.owpml}:
|
|
2459
|
+
allowed_suffixes.update({".hwpx", ".owpml"})
|
|
2460
|
+
if allow_pdfa_alias and document_format is DocumentFormat.pdf:
|
|
2461
|
+
allowed_suffixes.add(".pdfa")
|
|
2462
|
+
if destination.suffix.lower() not in allowed_suffixes:
|
|
2463
|
+
raise _LocalExportBlockedError(
|
|
2464
|
+
BlockedReason.extension_mismatch,
|
|
2465
|
+
(
|
|
2466
|
+
"Document local export destination extension must match "
|
|
2467
|
+
f"{' or '.join(sorted(allowed_suffixes))}: {destination}"
|
|
2468
|
+
),
|
|
2469
|
+
)
|
|
2470
|
+
return destination
|
|
2471
|
+
|
|
2472
|
+
|
|
2473
|
+
def _absolute_local_export_path(path: Path) -> Path:
|
|
2474
|
+
if path.is_absolute():
|
|
2475
|
+
return path
|
|
2476
|
+
return Path.cwd() / path
|
|
2477
|
+
|
|
2478
|
+
|
|
2479
|
+
def _pdfa_export_requested(
|
|
2480
|
+
artifact: DocumentArtifact,
|
|
2481
|
+
*,
|
|
2482
|
+
destination_display_name: str,
|
|
2483
|
+
destination_path: str | None,
|
|
2484
|
+
) -> bool:
|
|
2485
|
+
if artifact.format is not DocumentFormat.pdf:
|
|
2486
|
+
return False
|
|
2487
|
+
if Path(destination_display_name).suffix.lower() == ".pdfa":
|
|
2488
|
+
return True
|
|
2489
|
+
if (
|
|
2490
|
+
destination_path is not None
|
|
2491
|
+
and Path(destination_path).expanduser().suffix.lower() == ".pdfa"
|
|
2492
|
+
):
|
|
2493
|
+
return True
|
|
2494
|
+
return Path(artifact.display_name).suffix.lower() == ".pdfa"
|
|
2495
|
+
|
|
2496
|
+
|
|
2497
|
+
def _write_tempfile_for_local_export(
|
|
2498
|
+
destination: Path,
|
|
2499
|
+
payload: bytes,
|
|
2500
|
+
parent_fd: int,
|
|
2501
|
+
) -> _LocalExportTempFile:
|
|
2502
|
+
temp_name = _local_export_temp_name(destination)
|
|
2503
|
+
open_flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
|
|
2504
|
+
if hasattr(os, "O_NOFOLLOW"):
|
|
2505
|
+
open_flags |= os.O_NOFOLLOW
|
|
2506
|
+
file_fd: int | None = None
|
|
2507
|
+
try:
|
|
2508
|
+
file_fd = os.open(temp_name, open_flags, 0o600, dir_fd=parent_fd)
|
|
2509
|
+
except FileExistsError as exc:
|
|
2510
|
+
raise _LocalExportBlockedError(
|
|
2511
|
+
BlockedReason.validation_failed,
|
|
2512
|
+
f"Document local export temp path already exists: {destination.parent / temp_name}",
|
|
2513
|
+
) from exc
|
|
2514
|
+
except OSError as exc:
|
|
2515
|
+
raise _LocalExportBlockedError(
|
|
2516
|
+
BlockedReason.validation_failed,
|
|
2517
|
+
f"Document local export temp path is unavailable: {destination.parent / temp_name}",
|
|
2518
|
+
) from exc
|
|
2519
|
+
temp_path = destination.parent / temp_name
|
|
2520
|
+
try:
|
|
2521
|
+
_raise_if_local_export_parent_changed(parent_fd, destination)
|
|
2522
|
+
_write_all_to_fd(file_fd, payload)
|
|
2523
|
+
os.fsync(file_fd)
|
|
2524
|
+
_raise_if_local_export_parent_changed(parent_fd, destination)
|
|
2525
|
+
except (OSError, _LocalExportBlockedError):
|
|
2526
|
+
if file_fd is not None:
|
|
2527
|
+
os.close(file_fd)
|
|
2528
|
+
_cleanup_local_export_entry_by_fd(temp_name, parent_fd)
|
|
2529
|
+
with contextlib.suppress(OSError):
|
|
2530
|
+
if temp_path.exists():
|
|
2531
|
+
temp_path.unlink()
|
|
2532
|
+
raise
|
|
2533
|
+
if file_fd is None:
|
|
2534
|
+
raise _LocalExportBlockedError(
|
|
2535
|
+
BlockedReason.validation_failed,
|
|
2536
|
+
f"Document local export temp path is unavailable: {temp_path}",
|
|
2537
|
+
)
|
|
2538
|
+
return _LocalExportTempFile(path=temp_path, name=temp_name, fd=file_fd)
|
|
2539
|
+
|
|
2540
|
+
|
|
2541
|
+
def _local_export_temp_name(destination: Path) -> str:
|
|
2542
|
+
return f".{destination.name}.{secrets.token_hex(8)}.tmp"
|
|
2543
|
+
|
|
2544
|
+
|
|
2545
|
+
def _write_all_to_fd(file_fd: int, payload: bytes) -> None:
|
|
2546
|
+
view = memoryview(payload)
|
|
2547
|
+
while view:
|
|
2548
|
+
written = os.write(file_fd, view)
|
|
2549
|
+
view = view[written:]
|
|
2550
|
+
|
|
2551
|
+
|
|
2552
|
+
def _publish_tempfile_without_clobber(
|
|
2553
|
+
temp_file: _LocalExportTempFile,
|
|
2554
|
+
destination: Path,
|
|
2555
|
+
parent_fd: int,
|
|
2556
|
+
) -> None:
|
|
2557
|
+
_raise_if_local_export_parent_changed(parent_fd, destination)
|
|
2558
|
+
if not _local_export_entry_matches_open_file(parent_fd, temp_file.name, temp_file.fd):
|
|
2559
|
+
_cleanup_local_export_entry_by_fd(temp_file.name, parent_fd)
|
|
2560
|
+
raise _LocalExportBlockedError(
|
|
2561
|
+
BlockedReason.validation_failed,
|
|
2562
|
+
f"Document local export temp path changed during publish: {temp_file.path}",
|
|
2563
|
+
)
|
|
2564
|
+
try:
|
|
2565
|
+
os.link(
|
|
2566
|
+
temp_file.name,
|
|
2567
|
+
destination.name,
|
|
2568
|
+
src_dir_fd=parent_fd,
|
|
2569
|
+
dst_dir_fd=parent_fd,
|
|
2570
|
+
follow_symlinks=False,
|
|
2571
|
+
)
|
|
2572
|
+
if not _local_export_parent_is_current(parent_fd, destination.parent):
|
|
2573
|
+
_unlink_local_export_entry_by_fd(destination.name, parent_fd)
|
|
2574
|
+
_unlink_local_export_entry_by_fd(temp_file.name, parent_fd)
|
|
2575
|
+
raise _LocalExportBlockedError(
|
|
2576
|
+
BlockedReason.validation_failed,
|
|
2577
|
+
f"Document local export destination changed during publish: {destination}",
|
|
2578
|
+
)
|
|
2579
|
+
if not destination.exists():
|
|
2580
|
+
_unlink_local_export_entry_by_fd(destination.name, parent_fd)
|
|
2581
|
+
_unlink_local_export_entry_by_fd(temp_file.name, parent_fd)
|
|
2582
|
+
raise _LocalExportBlockedError(
|
|
2583
|
+
BlockedReason.validation_failed,
|
|
2584
|
+
f"Document local export destination is unavailable after publish: {destination}",
|
|
2585
|
+
)
|
|
2586
|
+
if not _local_export_entry_matches_open_file(parent_fd, destination.name, temp_file.fd):
|
|
2587
|
+
_unlink_local_export_entry_by_fd(destination.name, parent_fd)
|
|
2588
|
+
_unlink_local_export_entry_by_fd(temp_file.name, parent_fd)
|
|
2589
|
+
raise _LocalExportBlockedError(
|
|
2590
|
+
BlockedReason.validation_failed,
|
|
2591
|
+
f"Document local export temp path changed during publish: {temp_file.path}",
|
|
2592
|
+
)
|
|
2593
|
+
except FileExistsError as exc:
|
|
2594
|
+
if _local_export_entry_matches_open_file(parent_fd, destination.name, temp_file.fd):
|
|
2595
|
+
_unlink_local_export_entry_by_fd(destination.name, parent_fd)
|
|
2596
|
+
raise _LocalExportBlockedError(
|
|
2597
|
+
BlockedReason.validation_failed,
|
|
2598
|
+
f"Document local export destination already exists: {destination}",
|
|
2599
|
+
) from exc
|
|
2600
|
+
except FileNotFoundError as exc:
|
|
2601
|
+
raise _LocalExportBlockedError(
|
|
2602
|
+
BlockedReason.validation_failed,
|
|
2603
|
+
f"Document local export destination changed during publish: {destination}",
|
|
2604
|
+
) from exc
|
|
2605
|
+
except OSError as exc:
|
|
2606
|
+
_cleanup_local_export_entry_by_fd(temp_file.name, parent_fd)
|
|
2607
|
+
raise _LocalExportBlockedError(
|
|
2608
|
+
BlockedReason.validation_failed,
|
|
2609
|
+
f"Document local export destination changed during publish: {destination}",
|
|
2610
|
+
) from exc
|
|
2611
|
+
|
|
2612
|
+
|
|
2613
|
+
def _raise_if_local_export_parent_is_symlink(destination: Path) -> None:
|
|
2614
|
+
_raise_if_local_export_path_has_symlinked_ancestor(destination)
|
|
2615
|
+
|
|
2616
|
+
|
|
2617
|
+
def _raise_if_local_export_path_has_symlinked_ancestor(destination: Path) -> None:
|
|
2618
|
+
ancestor_chain = [*reversed(destination.parent.parents), destination.parent]
|
|
2619
|
+
for ancestor in ancestor_chain:
|
|
2620
|
+
if ancestor.is_symlink():
|
|
2621
|
+
raise _LocalExportBlockedError(
|
|
2622
|
+
BlockedReason.validation_failed,
|
|
2623
|
+
f"Document local export destination parent is a symbolic link: {ancestor}",
|
|
2624
|
+
)
|
|
2625
|
+
|
|
2626
|
+
|
|
2627
|
+
def _local_export_path_has_symlinked_ancestor(destination: Path) -> bool:
|
|
2628
|
+
try:
|
|
2629
|
+
_raise_if_local_export_path_has_symlinked_ancestor(destination)
|
|
2630
|
+
except _LocalExportBlockedError:
|
|
2631
|
+
return True
|
|
2632
|
+
return False
|
|
2633
|
+
|
|
2634
|
+
|
|
2635
|
+
def _open_local_export_parent(directory: Path) -> int:
|
|
2636
|
+
flags = os.O_RDONLY
|
|
2637
|
+
if hasattr(os, "O_DIRECTORY"):
|
|
2638
|
+
flags |= os.O_DIRECTORY
|
|
2639
|
+
directory = _absolute_local_export_path(directory)
|
|
2640
|
+
nofollow_flags = flags
|
|
2641
|
+
if hasattr(os, "O_NOFOLLOW"):
|
|
2642
|
+
nofollow_flags |= os.O_NOFOLLOW
|
|
2643
|
+
parts = directory.parts
|
|
2644
|
+
if not parts:
|
|
2645
|
+
raise _LocalExportBlockedError(
|
|
2646
|
+
BlockedReason.validation_failed,
|
|
2647
|
+
f"Document local export destination parent is unavailable: {directory}",
|
|
2648
|
+
)
|
|
2649
|
+
current_fd: int | None = None
|
|
2650
|
+
try:
|
|
2651
|
+
current_fd = os.open(parts[0], flags)
|
|
2652
|
+
for part in parts[1:]:
|
|
2653
|
+
next_fd = os.open(part, nofollow_flags, dir_fd=current_fd)
|
|
2654
|
+
os.close(current_fd)
|
|
2655
|
+
current_fd = next_fd
|
|
2656
|
+
return current_fd
|
|
2657
|
+
except OSError as exc:
|
|
2658
|
+
if current_fd is not None:
|
|
2659
|
+
os.close(current_fd)
|
|
2660
|
+
raise _LocalExportBlockedError(
|
|
2661
|
+
BlockedReason.validation_failed,
|
|
2662
|
+
f"Document local export destination parent is unavailable: {directory}",
|
|
2663
|
+
) from exc
|
|
2664
|
+
|
|
2665
|
+
|
|
2666
|
+
def _raise_if_local_export_parent_changed(parent_fd: int, destination: Path) -> None:
|
|
2667
|
+
if not _local_export_parent_is_current(parent_fd, destination.parent):
|
|
2668
|
+
raise _LocalExportBlockedError(
|
|
2669
|
+
BlockedReason.validation_failed,
|
|
2670
|
+
"Document local export destination parent changed during publish: "
|
|
2671
|
+
f"{destination.parent}",
|
|
2672
|
+
)
|
|
2673
|
+
|
|
2674
|
+
|
|
2675
|
+
def _local_export_parent_is_current(parent_fd: int, directory: Path) -> bool:
|
|
2676
|
+
if _local_export_path_has_symlinked_ancestor(directory / "__ummaya_parent_probe__"):
|
|
2677
|
+
return False
|
|
2678
|
+
try:
|
|
2679
|
+
current_stat = directory.stat()
|
|
2680
|
+
opened_stat = os.fstat(parent_fd)
|
|
2681
|
+
except OSError:
|
|
2682
|
+
return False
|
|
2683
|
+
return current_stat.st_dev == opened_stat.st_dev and current_stat.st_ino == opened_stat.st_ino
|
|
2684
|
+
|
|
2685
|
+
|
|
2686
|
+
def _local_export_entry_matches_open_file(
|
|
2687
|
+
parent_fd: int,
|
|
2688
|
+
entry_name: str,
|
|
2689
|
+
file_fd: int,
|
|
2690
|
+
) -> bool:
|
|
2691
|
+
try:
|
|
2692
|
+
entry_stat = os.stat(entry_name, dir_fd=parent_fd, follow_symlinks=False)
|
|
2693
|
+
file_stat = os.fstat(file_fd)
|
|
2694
|
+
except OSError:
|
|
2695
|
+
return False
|
|
2696
|
+
return entry_stat.st_dev == file_stat.st_dev and entry_stat.st_ino == file_stat.st_ino
|
|
2697
|
+
|
|
2698
|
+
|
|
2699
|
+
def _unlink_local_export_entry_by_fd(entry_name: str, parent_fd: int) -> None:
|
|
2700
|
+
try:
|
|
2701
|
+
os.unlink(entry_name, dir_fd=parent_fd)
|
|
2702
|
+
except FileNotFoundError:
|
|
2703
|
+
return
|
|
2704
|
+
|
|
2705
|
+
|
|
2706
|
+
def _cleanup_local_export_entry_by_fd(entry_name: str, parent_fd: int) -> None:
|
|
2707
|
+
with contextlib.suppress(OSError):
|
|
2708
|
+
os.unlink(entry_name, dir_fd=parent_fd)
|
|
2709
|
+
|
|
2710
|
+
|
|
2711
|
+
def _fsync_directory_best_effort(directory: Path) -> None:
|
|
2712
|
+
try:
|
|
2713
|
+
directory_fd = os.open(directory, os.O_RDONLY)
|
|
2714
|
+
except OSError:
|
|
2715
|
+
return
|
|
2716
|
+
try:
|
|
2717
|
+
os.fsync(directory_fd)
|
|
2718
|
+
except OSError:
|
|
2719
|
+
pass
|
|
2720
|
+
finally:
|
|
2721
|
+
os.close(directory_fd)
|
|
2722
|
+
|
|
2723
|
+
|
|
2724
|
+
def _fsync_directory_fd_best_effort(directory_fd: int) -> None:
|
|
2725
|
+
with contextlib.suppress(OSError):
|
|
2726
|
+
os.fsync(directory_fd)
|
|
2727
|
+
|
|
2728
|
+
|
|
2729
|
+
__all__ = [
|
|
2730
|
+
"DOCUMENT_TOOL_IDS",
|
|
2731
|
+
"DocumentToolRuntime",
|
|
2732
|
+
"register_document_tools",
|
|
2733
|
+
]
|