ummaya 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -2
- package/bin/ummaya +10 -1
- package/npm-shrinkwrap.json +253 -2
- package/package.json +5 -1
- package/prompts/manifest.yaml +1 -1
- package/prompts/system_v1.md +1 -0
- package/pyproject.toml +26 -2
- package/specs/2803-document-production-hardening/contracts/document-tools.schema.json +1043 -0
- package/src/ummaya/_canonical/__init__.py +2 -0
- package/src/ummaya/engine/engine.py +29 -132
- package/src/ummaya/evidence/__init__.py +21 -2
- package/src/ummaya/evidence/dataset_contract.py +193 -0
- package/src/ummaya/evidence/document_authoring_cases.py +33 -0
- package/src/ummaya/evidence/document_harness.py +313 -0
- package/src/ummaya/evidence/document_viewer_ux.py +391 -0
- package/src/ummaya/evidence/gates.py +70 -0
- package/src/ummaya/evidence/json_types.py +20 -0
- package/src/ummaya/evidence/models.py +88 -1
- package/src/ummaya/evidence/output_payload.py +89 -0
- package/src/ummaya/evidence/payload_documents.py +233 -0
- package/src/ummaya/evidence/route_contracts.py +224 -0
- package/src/ummaya/evidence/route_helpers.py +150 -0
- package/src/ummaya/evidence/runner.py +81 -212
- package/src/ummaya/evidence/source_provenance.py +246 -0
- package/src/ummaya/evidence/source_provenance_redaction.py +176 -0
- package/src/ummaya/evidence/tool_layer.py +39 -0
- package/src/ummaya/evidence/tool_layer_models.py +151 -0
- package/src/ummaya/ipc/adapter_manifest_emitter.py +26 -10
- package/src/ummaya/ipc/document_intent_normalization.py +185 -0
- package/src/ummaya/ipc/frame_schema.py +5 -5
- package/src/ummaya/ipc/route_diagnostics.py +73 -0
- package/src/ummaya/ipc/stdio.py +1109 -477
- package/src/ummaya/llm/client.py +102 -3
- package/src/ummaya/llm/config.py +8 -3
- package/src/ummaya/primitives/__init__.py +6 -2
- package/src/ummaya/primitives/delegation.py +1 -1
- package/src/ummaya/primitives/document.py +28 -0
- package/src/ummaya/settings.py +0 -3
- package/src/ummaya/tools/discovery_bridge.py +17 -1
- package/src/ummaya/tools/documents/__init__.py +297 -0
- package/src/ummaya/tools/documents/adapter_registry.py +487 -0
- package/src/ummaya/tools/documents/archive_container_probe.py +167 -0
- package/src/ummaya/tools/documents/artifact_store.py +454 -0
- package/src/ummaya/tools/documents/authoring.py +283 -0
- package/src/ummaya/tools/documents/baselines.py +114 -0
- package/src/ummaya/tools/documents/capability.py +331 -0
- package/src/ummaya/tools/documents/contracts.py +112 -0
- package/src/ummaya/tools/documents/conversion.py +521 -0
- package/src/ummaya/tools/documents/diff.py +275 -0
- package/src/ummaya/tools/documents/engines.py +163 -0
- package/src/ummaya/tools/documents/evaluation.py +291 -0
- package/src/ummaya/tools/documents/explicit_values.py +108 -0
- package/src/ummaya/tools/documents/fixtures.py +174 -0
- package/src/ummaya/tools/documents/format_completion_audit.py +471 -0
- package/src/ummaya/tools/documents/formats/__init__.py +2 -0
- package/src/ummaya/tools/documents/formats/archive.py +528 -0
- package/src/ummaya/tools/documents/formats/base.py +41 -0
- package/src/ummaya/tools/documents/formats/code_file.py +211 -0
- package/src/ummaya/tools/documents/formats/data_file.py +272 -0
- package/src/ummaya/tools/documents/formats/hwp.py +284 -0
- package/src/ummaya/tools/documents/formats/hwpx.py +1837 -0
- package/src/ummaya/tools/documents/formats/odf.py +435 -0
- package/src/ummaya/tools/documents/formats/ooxml.py +1030 -0
- package/src/ummaya/tools/documents/formats/passive.py +766 -0
- package/src/ummaya/tools/documents/formats/pdf.py +702 -0
- package/src/ummaya/tools/documents/formats/text_web.py +268 -0
- package/src/ummaya/tools/documents/hwp_conversion_probe.py +178 -0
- package/src/ummaya/tools/documents/hwp_direct_candidate.py +141 -0
- package/src/ummaya/tools/documents/inspection.py +289 -0
- package/src/ummaya/tools/documents/intake.py +1079 -0
- package/src/ummaya/tools/documents/legacy_office_promotion_probe.py +366 -0
- package/src/ummaya/tools/documents/models.py +1598 -0
- package/src/ummaya/tools/documents/odf_promotion_probe.py +167 -0
- package/src/ummaya/tools/documents/orchestrator.py +96 -0
- package/src/ummaya/tools/documents/passive_capability_probe.py +251 -0
- package/src/ummaya/tools/documents/patch.py +170 -0
- package/src/ummaya/tools/documents/pdfa_conformance.py +284 -0
- package/src/ummaya/tools/documents/pdfa_promotion_probe.py +198 -0
- package/src/ummaya/tools/documents/permissions.py +110 -0
- package/src/ummaya/tools/documents/planner.py +616 -0
- package/src/ummaya/tools/documents/registry.py +2733 -0
- package/src/ummaya/tools/documents/render.py +978 -0
- package/src/ummaya/tools/documents/render_comparison.py +113 -0
- package/src/ummaya/tools/documents/render_comparison_models.py +74 -0
- package/src/ummaya/tools/documents/render_comparison_regions.py +73 -0
- package/src/ummaya/tools/documents/render_comparison_style.py +161 -0
- package/src/ummaya/tools/documents/reread.py +157 -0
- package/src/ummaya/tools/documents/runtime_authoring.py +244 -0
- package/src/ummaya/tools/documents/runtime_authoring_bundle.py +76 -0
- package/src/ummaya/tools/documents/scorecard.py +184 -0
- package/src/ummaya/tools/documents/socratic_planner.py +193 -0
- package/src/ummaya/tools/documents/style.py +48 -0
- package/src/ummaya/tools/documents/tool_defs.py +523 -0
- package/src/ummaya/tools/documents/validate.py +347 -0
- package/src/ummaya/tools/executor.py +29 -0
- package/src/ummaya/tools/live_proxy.py +0 -3
- package/src/ummaya/tools/models.py +5 -1
- package/src/ummaya/tools/register_all.py +8 -0
- package/src/ummaya/tools/registry.py +10 -1
- package/src/ummaya/tools/routing/__init__.py +59 -0
- package/src/ummaya/tools/routing/builder.py +105 -0
- package/src/ummaya/tools/routing/cards.py +29 -0
- package/src/ummaya/tools/routing/decision_service.py +534 -0
- package/src/ummaya/tools/routing/decision_types.py +74 -0
- package/src/ummaya/tools/routing/feasibility.py +122 -0
- package/src/ummaya/tools/routing/intent.py +17 -0
- package/src/ummaya/tools/routing/intent_extractor.py +207 -0
- package/src/ummaya/tools/routing/intent_patterns.py +160 -0
- package/src/ummaya/tools/routing/intent_public_data.py +150 -0
- package/src/ummaya/tools/routing/intent_types.py +48 -0
- package/src/ummaya/tools/routing/lint.py +78 -0
- package/src/ummaya/tools/routing/metadata.py +174 -0
- package/src/ummaya/tools/routing/projection.py +340 -0
- package/src/ummaya/tools/routing/retrieval_policy.py +629 -0
- package/src/ummaya/tools/routing/schema.py +81 -0
- package/src/ummaya/tools/routing/types.py +96 -0
- package/src/ummaya/tools/routing_index.py +2 -2
- package/src/ummaya/tools/search.py +34 -746
- package/tests/fixtures/documents/public_forms/baselines.yaml +113 -0
- package/tui/package.json +1 -1
- package/tui/src/.cc-byte-identical-whitelist.yaml +266 -0
- package/tui/src/QueryEngine.ts +12 -8
- package/tui/src/bridge/inboundAttachments.ts +3 -3
- package/tui/src/cli/handlers/auth.ts +3 -12
- package/tui/src/cli/print.ts +7 -7
- package/tui/src/commands/insights.ts +1 -1
- package/tui/src/commands/install-github-app/types.ts +8 -30
- package/tui/src/commands/plugin/types.ts +6 -28
- package/tui/src/commands/plugin/unifiedTypes.ts +4 -26
- package/tui/src/commands/rename/generateSessionName.ts +1 -1
- package/tui/src/components/Feedback.tsx +1 -1
- package/tui/src/components/LogoV2/EmergencyTip.tsx +11 -2
- package/tui/src/components/LogoV2/WelcomeV2.tsx +1 -3
- package/tui/src/components/ScrollKeybindingHandler.tsx +6 -6
- package/tui/src/components/Spinner/types.ts +6 -28
- package/tui/src/components/agents/generateAgent.ts +1 -1
- package/tui/src/components/agents/new-agent-creation/types.ts +4 -26
- package/tui/src/components/config/EnvSecretIsolatedEditor.tsx +1 -1
- package/tui/src/components/mcp/types.ts +16 -38
- package/tui/src/components/messages/AssistantToolUseMessage.tsx +3 -2
- package/tui/src/components/messages/UserCrossSessionMessage.ts +16 -4
- package/tui/src/components/messages/UserForkBoilerplateMessage.ts +16 -4
- package/tui/src/components/messages/UserGitHubWebhookMessage.ts +16 -4
- package/tui/src/components/messages/UserToolResultMessage/utils.tsx +3 -2
- package/tui/src/components/permissions/MonitorPermissionRequest/MonitorPermissionRequest.ts +9 -4
- package/tui/src/components/permissions/ReviewArtifactPermissionRequest/ReviewArtifactPermissionRequest.ts +9 -4
- package/tui/src/components/primitive/DocumentSocraticReviewBlock.tsx +129 -0
- package/tui/src/components/primitive/DocumentToolResultCard.tsx +224 -0
- package/tui/src/components/primitive/documentSocraticReview.ts +215 -0
- package/tui/src/components/primitive/index.tsx +43 -1
- package/tui/src/components/primitive/types.ts +137 -0
- package/tui/src/components/ui/option.ts +4 -26
- package/tui/src/constants/common.ts +0 -2
- package/tui/src/constants/prompts.ts +4 -3
- package/tui/src/constants/querySource.ts +4 -26
- package/tui/src/entrypoints/sdk/controlTypes.ts +26 -48
- package/tui/src/entrypoints/sdk/coreTypes.generated.ts +3 -25
- package/tui/src/entrypoints/sdk/runtimeTypes.ts +38 -60
- package/tui/src/entrypoints/sdk/sdkUtilityTypes.ts +4 -26
- package/tui/src/entrypoints/sdk/settingsTypes.generated.ts +3 -25
- package/tui/src/entrypoints/sdk/toolTypes.ts +3 -25
- package/tui/src/hooks/toolPermission/handlers/interactiveHandler.ts +10 -0
- package/tui/src/hooks/useApiKeyVerification.ts +1 -1
- package/tui/src/hooks/useVirtualScroll.ts +1 -1
- package/tui/src/ink/ink.tsx +33 -14
- package/tui/src/ink/reconciler.ts +2 -3
- package/tui/src/ink/render-to-screen.ts +30 -10
- package/tui/src/ipc/bridge.ts +62 -15
- package/tui/src/ipc/bridgeSingleton.ts +5 -1
- package/tui/src/ipc/codec.ts +3 -3
- package/tui/src/ipc/frames.generated.ts +12 -12
- package/tui/src/ipc/llmClient.ts +151 -27
- package/tui/src/ipc/schema/frame.schema.json +1 -1
- package/tui/src/keybindings/defaultBindings.ts +4 -0
- package/tui/src/main.tsx +29 -11
- package/tui/src/native-ts/file-index/index.ts +33 -3
- package/tui/src/observability/surface.ts +2 -2
- package/tui/src/probes/toolRegistryProbe.tsx +3 -1
- package/tui/src/projectOnboardingState.ts +7 -6
- package/tui/src/query/chatMessageTypes.ts +18 -0
- package/tui/src/query/chatMessagesBuilder.ts +1 -1
- package/tui/src/query/deps.ts +1 -1
- package/tui/src/query/messageGuards.ts +106 -0
- package/tui/src/query/publicDataTerminalRepair.ts +384 -0
- package/tui/src/query/run.ts +1075 -0
- package/tui/src/query/supportBoundary.ts +168 -0
- package/tui/src/query/toolResultErrors.ts +103 -0
- package/tui/src/query/toolRunner.ts +687 -0
- package/tui/src/query/unavailableToolRepair.ts +118 -0
- package/tui/src/query.ts +9 -2186
- package/tui/src/screens/REPL.tsx +40 -29
- package/tui/src/services/api/adapterManifest.ts +4 -0
- package/tui/src/services/api/backendChat/events.ts +117 -0
- package/tui/src/services/api/backendChat/finalMessage.ts +40 -0
- package/tui/src/services/api/backendChat/frame.ts +9 -0
- package/tui/src/services/api/backendChat/streaming.ts +430 -0
- package/tui/src/services/api/backendChat/types.ts +62 -0
- package/tui/src/services/api/backendChat.ts +1 -0
- package/tui/src/services/api/client.ts +65 -2
- package/tui/src/services/api/errorUtils.ts +5 -5
- package/tui/src/services/api/errors.ts +1 -1
- package/tui/src/services/api/logging.ts +1 -1
- package/tui/src/services/api/ummaya/evidence.ts +194 -0
- package/tui/src/services/api/ummaya/messages.ts +255 -0
- package/tui/src/services/api/ummaya/nonStreaming.ts +66 -0
- package/tui/src/services/api/ummaya/provider.ts +200 -0
- package/tui/src/services/api/ummaya/reasoning.ts +24 -0
- package/tui/src/services/api/ummaya/request.ts +200 -0
- package/tui/src/services/api/ummaya/selectionContext.ts +240 -0
- package/tui/src/services/api/ummaya/streaming.ts +365 -0
- package/tui/src/services/api/ummaya/streamingPayload.ts +129 -0
- package/tui/src/services/api/ummaya/streamingReader.ts +40 -0
- package/tui/src/services/api/ummaya/toolSelection.ts +217 -0
- package/tui/src/services/api/ummaya/types.ts +110 -0
- package/tui/src/services/api/ummaya/usage.ts +30 -0
- package/tui/src/services/api/ummaya.ts +26 -418
- package/tui/src/services/api/withRetry.ts +1 -1
- package/tui/src/services/awaySummary.ts +2 -2
- package/tui/src/services/claudeAiLimits.ts +1 -1
- package/tui/src/services/compact/autoCompact.ts +1 -1
- package/tui/src/services/compact/compact.ts +1 -1
- package/tui/src/services/lsp/types.ts +8 -30
- package/tui/src/services/tips/types.ts +6 -28
- package/tui/src/services/tokenEstimation.ts +1 -1
- package/tui/src/services/toolRegistry/bootGuard.ts +5 -5
- package/tui/src/services/toolUseSummary/toolUseSummaryGenerator.ts +1 -1
- package/tui/src/services/tools/toolExecution.ts +94 -1
- package/tui/src/store/pendingPermissionSlot.ts +1 -1
- package/tui/src/store/session-store.ts +10 -36
- package/tui/src/stubs/any-stub.ts +15 -10
- package/tui/src/stubs/color-diff-napi.ts +37 -23
- package/tui/src/stubs/globals.d.ts +3 -3
- package/tui/src/stubs/macro-preload.ts +23 -12
- package/tui/src/tools/AdapterTool/AdapterTool.ts +1207 -714
- package/tui/src/tools/AdapterTool/routeDiagnostics.ts +75 -0
- package/tui/src/tools/AgentTool/AgentTool.tsx +84 -1371
- package/tui/src/tools/AgentTool/agentToolHandoff.ts +114 -0
- package/tui/src/tools/AgentTool/agentToolPartialResult.ts +16 -0
- package/tui/src/tools/AgentTool/agentToolProgress.ts +32 -0
- package/tui/src/tools/AgentTool/agentToolResolver.ts +161 -0
- package/tui/src/tools/AgentTool/agentToolResult.ts +163 -0
- package/tui/src/tools/AgentTool/agentToolUtils.ts +14 -686
- package/tui/src/tools/AgentTool/asyncAgentLifecycle.ts +208 -0
- package/tui/src/tools/AgentTool/asyncLifecycle.ts +153 -0
- package/tui/src/tools/AgentTool/backgroundedCompletion.ts +126 -0
- package/tui/src/tools/AgentTool/backgroundedLifecycle.ts +174 -0
- package/tui/src/tools/AgentTool/foregroundBackground.ts +83 -0
- package/tui/src/tools/AgentTool/foregroundDrain.tsx +133 -0
- package/tui/src/tools/AgentTool/foregroundFinalize.ts +98 -0
- package/tui/src/tools/AgentTool/foregroundLifecycle.tsx +237 -0
- package/tui/src/tools/AgentTool/foregroundProgress.tsx +169 -0
- package/tui/src/tools/AgentTool/foregroundTask.ts +89 -0
- package/tui/src/tools/AgentTool/forkSubagent.ts +1 -12
- package/tui/src/tools/AgentTool/forkSubagentGate.ts +34 -0
- package/tui/src/tools/AgentTool/launchRouting.ts +203 -0
- package/tui/src/tools/AgentTool/lifecycle.ts +244 -0
- package/tui/src/tools/AgentTool/mcpRouting.ts +73 -0
- package/tui/src/tools/AgentTool/orchestrationSupport.ts +70 -0
- package/tui/src/tools/AgentTool/permissions.ts +39 -0
- package/tui/src/tools/AgentTool/promptSetup.ts +181 -0
- package/tui/src/tools/AgentTool/remoteRouting.ts +62 -0
- package/tui/src/tools/AgentTool/resultMapping.ts +116 -0
- package/tui/src/tools/AgentTool/resumeAgent.ts +39 -107
- package/tui/src/tools/AgentTool/resumeAgentHelpers.ts +140 -0
- package/tui/src/tools/AgentTool/runAgent.ts +1 -1
- package/tui/src/tools/AgentTool/runtimeConfig.ts +57 -0
- package/tui/src/tools/AgentTool/schemas.ts +196 -0
- package/tui/src/tools/AgentTool/sourceVerificationPropagation.ts +263 -0
- package/tui/src/tools/AgentTool/worktreeLifecycle.ts +105 -0
- package/tui/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx +174 -202
- package/tui/src/tools/BashTool/BashTool.tsx +71 -1072
- package/tui/src/tools/BashTool/bashCommandHelpers.ts +12 -12
- package/tui/src/tools/BashTool/bashPermissions/astPreflight.ts +173 -0
- package/tui/src/tools/BashTool/bashPermissions/classifierChecks.ts +199 -0
- package/tui/src/tools/BashTool/bashPermissions/compoundGuards.ts +53 -0
- package/tui/src/tools/BashTool/bashPermissions/constants.ts +99 -0
- package/tui/src/tools/BashTool/bashPermissions/index.ts +38 -0
- package/tui/src/tools/BashTool/bashPermissions/legacyMisparsing.ts +62 -0
- package/tui/src/tools/BashTool/bashPermissions/main.ts +135 -0
- package/tui/src/tools/BashTool/bashPermissions/normalizedCommands.ts +33 -0
- package/tui/src/tools/BashTool/bashPermissions/operatorFlow.ts +98 -0
- package/tui/src/tools/BashTool/bashPermissions/permissionChecks.ts +200 -0
- package/tui/src/tools/BashTool/bashPermissions/prefixSuggestions.ts +88 -0
- package/tui/src/tools/BashTool/bashPermissions/promptClassifierRules.ts +125 -0
- package/tui/src/tools/BashTool/bashPermissions/ruleDelegates.ts +19 -0
- package/tui/src/tools/BashTool/bashPermissions/ruleMatching.ts +145 -0
- package/tui/src/tools/BashTool/bashPermissions/sandboxAutoAllow.ts +75 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandFlow.ts +205 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandGuards.ts +73 -0
- package/tui/src/tools/BashTool/bashPermissions/subcommandResultHelpers.ts +116 -0
- package/tui/src/tools/BashTool/bashPermissions/types.ts +26 -0
- package/tui/src/tools/BashTool/bashPermissions/wrapperStripping.ts +139 -0
- package/tui/src/tools/BashTool/bashPermissions.ts +26 -2621
- package/tui/src/tools/BashTool/call.ts +202 -0
- package/tui/src/tools/BashTool/callLoader.ts +35 -0
- package/tui/src/tools/BashTool/commandClassification.ts +151 -0
- package/tui/src/tools/BashTool/commandClassificationLoader.ts +40 -0
- package/tui/src/tools/BashTool/cwdReset.ts +33 -0
- package/tui/src/tools/BashTool/lineTruncation.ts +11 -0
- package/tui/src/tools/BashTool/modeValidation.ts +13 -1
- package/tui/src/tools/BashTool/outputPersistence.ts +42 -0
- package/tui/src/tools/BashTool/permissionClassification.ts +66 -0
- package/tui/src/tools/BashTool/permissionLoader.ts +44 -0
- package/tui/src/tools/BashTool/resultLoader.ts +29 -0
- package/tui/src/tools/BashTool/resultMapping.ts +83 -0
- package/tui/src/tools/BashTool/sandboxPolicy.ts +79 -0
- package/tui/src/tools/BashTool/schemas.ts +65 -0
- package/tui/src/tools/BashTool/sedEditExecution.ts +59 -0
- package/tui/src/tools/BashTool/shellExecution.tsx +245 -0
- package/tui/src/tools/BashTool/shellOutputUtils.ts +85 -0
- package/tui/src/tools/BashTool/shellPermissionGauntlet.ts +97 -0
- package/tui/src/tools/BashTool/uiLoader.ts +37 -0
- package/tui/src/tools/BriefTool/upload.ts +1 -1
- package/tui/src/tools/CalculatorTool/parser.ts +2 -2
- package/tui/src/tools/DocumentPrimitive/DocumentPrimitive.ts +262 -0
- package/tui/src/tools/DocumentPrimitive/dispatchNormalization.ts +270 -0
- package/tui/src/tools/DocumentPrimitive/documentDestinationPath.ts +18 -0
- package/tui/src/tools/DocumentPrimitive/documentMutationGuard.ts +22 -0
- package/tui/src/tools/DocumentPrimitive/documentPatchNormalization.ts +248 -0
- package/tui/src/tools/DocumentPrimitive/documentSourceVerification.ts +245 -0
- package/tui/src/tools/DocumentPrimitive/documentSourceVerificationFields.ts +103 -0
- package/tui/src/tools/DocumentPrimitive/modelVisibleOutput.ts +40 -0
- package/tui/src/tools/DocumentPrimitive/prompt.ts +35 -0
- package/tui/src/tools/FileEditTool/FileEditTool.ts +9 -507
- package/tui/src/tools/FileEditTool/call.ts +228 -0
- package/tui/src/tools/FileEditTool/validateInput.ts +196 -0
- package/tui/src/tools/FileReadTool/imageProcessor.ts +13 -0
- package/tui/src/tools/FileWriteTool/FileWriteTool.ts +7 -300
- package/tui/src/tools/FileWriteTool/call.ts +223 -0
- package/tui/src/tools/FileWriteTool/validateInput.ts +80 -0
- package/tui/src/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts +19 -3
- package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +25 -32
- package/tui/src/tools/LookupPrimitive/prompt.ts +0 -2
- package/tui/src/tools/MCPTool/trustPolicy.ts +118 -0
- package/tui/src/tools/McpAuthTool/McpAuthTool.ts +21 -3
- package/tui/src/tools/NotebookEditTool/NotebookEditTool.ts +7 -326
- package/tui/src/tools/NotebookEditTool/call.ts +254 -0
- package/tui/src/tools/NotebookEditTool/notebookModel.ts +51 -0
- package/tui/src/tools/NotebookEditTool/validateInput.ts +142 -0
- package/tui/src/tools/PowerShellTool/PowerShellTool.tsx +46 -937
- package/tui/src/tools/PowerShellTool/acceptEditsCommandValidation.ts +162 -0
- package/tui/src/tools/PowerShellTool/call.ts +179 -0
- package/tui/src/tools/PowerShellTool/callLoader.ts +37 -0
- package/tui/src/tools/PowerShellTool/commandClassification.ts +86 -0
- package/tui/src/tools/PowerShellTool/modeValidation.ts +25 -332
- package/tui/src/tools/PowerShellTool/outputPersistence.ts +42 -0
- package/tui/src/tools/PowerShellTool/permissionClassification.ts +28 -0
- package/tui/src/tools/PowerShellTool/resultLoader.ts +31 -0
- package/tui/src/tools/PowerShellTool/resultMapping.ts +75 -0
- package/tui/src/tools/PowerShellTool/schemas.ts +40 -0
- package/tui/src/tools/PowerShellTool/shellExecution.tsx +258 -0
- package/tui/src/tools/PowerShellTool/symlinkModeValidation.ts +44 -0
- package/tui/src/tools/PowerShellTool/uiLoader.ts +37 -0
- package/tui/src/tools/PowerShellTool/validation.ts +39 -0
- package/tui/src/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts +19 -3
- package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +1 -11
- package/tui/src/tools/ResolveLocationPrimitive/prompt.ts +2 -6
- package/tui/src/tools/SkillTool/SkillTool.ts +2 -2
- package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +27 -10
- package/tui/src/tools/TaskCreateTool/TaskCreateTool.ts +16 -2
- package/tui/src/tools/TaskGetTool/TaskGetTool.ts +23 -3
- package/tui/src/tools/TaskListTool/TaskListTool.ts +22 -4
- package/tui/src/tools/TaskOutputTool/TaskOutputTool.tsx +46 -547
- package/tui/src/tools/TaskOutputTool/lookup.ts +216 -0
- package/tui/src/tools/TaskOutputTool/render.tsx +257 -0
- package/tui/src/tools/TaskOutputTool/schemas.ts +55 -0
- package/tui/src/tools/TaskOutputTool/serialization.ts +36 -0
- package/tui/src/tools/TaskStopTool/TaskStopTool.ts +10 -0
- package/tui/src/tools/TaskUpdateTool/TaskUpdateTool.ts +14 -364
- package/tui/src/tools/TaskUpdateTool/completion.ts +62 -0
- package/tui/src/tools/TaskUpdateTool/schemas.ts +62 -0
- package/tui/src/tools/TaskUpdateTool/serialization.ts +46 -0
- package/tui/src/tools/TaskUpdateTool/statusUpdate.ts +247 -0
- package/tui/src/tools/TodoWriteTool/TodoWriteTool.ts +21 -2
- package/tui/src/tools/ToolSearchTool/ToolSearchTool.ts +21 -302
- package/tui/src/tools/ToolSearchTool/ccSupportTools.ts +223 -0
- package/tui/src/tools/ToolSearchTool/descriptionCache.ts +50 -0
- package/tui/src/tools/ToolSearchTool/keywordSearch.ts +216 -0
- package/tui/src/tools/ToolSearchTool/prompt.ts +10 -4
- package/tui/src/tools/ToolSearchTool/resultMapping.ts +30 -0
- package/tui/src/tools/ToolSearchTool/schemas.ts +30 -0
- package/tui/src/tools/ToolSearchTool/searchPool.ts +47 -0
- package/tui/src/tools/ToolSearchTool/supportIntentHints.ts +140 -0
- package/tui/src/tools/TranslateTool/TranslateTool.ts +1 -1
- package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +2 -1
- package/tui/src/tools/WebFetchTool/WebFetchTool.ts +43 -138
- package/tui/src/tools/WebFetchTool/call.ts +227 -0
- package/tui/src/tools/WebFetchTool/resolvedAddressSafety.ts +78 -0
- package/tui/src/tools/WebFetchTool/sourceVerification.ts +204 -0
- package/tui/src/tools/WebFetchTool/types.ts +23 -0
- package/tui/src/tools/WebFetchTool/urlSafety.ts +181 -0
- package/tui/src/tools/WebFetchTool/utils.ts +1 -1
- package/tui/src/tools/WebSearchTool/UI.tsx +0 -1
- package/tui/src/tools/WebSearchTool/WebSearchTool.ts +9 -313
- package/tui/src/tools/WebSearchTool/call.ts +33 -0
- package/tui/src/tools/WebSearchTool/responseMapping.ts +190 -0
- package/tui/src/tools/WebSearchTool/resultBlock.ts +47 -0
- package/tui/src/tools/WebSearchTool/schemas.ts +47 -0
- package/tui/src/tools/WebSearchTool/toolSchema.ts +12 -0
- package/tui/src/tools/WorkspaceToolAdapter/WorkspaceToolAdapter.ts +79 -0
- package/tui/src/tools/WorkspaceToolAdapter/allowedRootPolicy.ts +85 -0
- package/tui/src/tools/WorkspaceToolAdapter/documentFormatGuards.ts +73 -0
- package/tui/src/tools/WorkspaceToolAdapter/inputNormalization.ts +105 -0
- package/tui/src/tools/WorkspaceToolAdapter/mcpExposurePolicy.ts +64 -0
- package/tui/src/tools/WorkspaceToolAdapter/toolDefFactory.ts +215 -0
- package/tui/src/tools/WorkspaceToolAdapter/toolNames.ts +6 -0
- package/tui/src/tools/WorkspaceToolAdapter/workspacePolicy.ts +15 -0
- package/tui/src/tools/_shared/dispatchPrimitive.ts +6 -6
- package/tui/src/tools/_shared/documentChangeToPatch.ts +125 -0
- package/tui/src/tools/_shared/documentDispatchArguments.ts +87 -0
- package/tui/src/tools/_shared/documentPrimitiveTimeout.ts +13 -0
- package/tui/src/tools/_shared/documentToolResultRender.ts +98 -0
- package/tui/src/tools/_shared/pendingCallRegistry.ts +1 -6
- package/tui/src/tools/_shared/rootPrimitiveInput.ts +1 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPatterns.ts +58 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPrompt.ts +271 -0
- package/tui/src/tools/_shared/toolChoiceRepair/documentRepair.ts +452 -0
- package/tui/src/tools/_shared/toolChoiceRepair/messageAccess.ts +80 -0
- package/tui/src/tools/_shared/toolChoiceRepair/publicDataRepair.ts +92 -0
- package/tui/src/tools/_shared/toolChoiceRepair/supportRepair.ts +135 -0
- package/tui/src/tools/_shared/toolChoiceRepair.ts +55 -860
- package/tui/src/tools/shared/mockDisclaimer.ts +1 -1
- package/tui/src/tools.ts +39 -190
- package/tui/src/types/fileSuggestion.ts +4 -26
- package/tui/src/types/generated/events_mono/claude_code/v1/claude_code_internal_event.ts +186 -148
- package/tui/src/types/generated/events_mono/common/v1/auth.ts +25 -11
- package/tui/src/types/generated/events_mono/growthbook/v1/growthbook_experiment_event.ts +47 -30
- package/tui/src/types/generated/google/protobuf/timestamp.ts +21 -7
- package/tui/src/types/message.ts +80 -102
- package/tui/src/types/messageQueueTypes.ts +6 -28
- package/tui/src/types/notebook.ts +16 -38
- package/tui/src/types/statusLine.ts +4 -26
- package/tui/src/types/tools.ts +24 -46
- package/tui/src/types/utils.ts +6 -28
- package/tui/src/upstreamproxy/relay.ts +7 -3
- package/tui/src/upstreamproxy/upstreamproxy.ts +1 -1
- package/tui/src/utils/assistantMessageFactories.ts +9 -3
- package/tui/src/utils/auth.ts +129 -139
- package/tui/src/utils/bash/ast.ts +23 -23
- package/tui/src/utils/bash/bashParser.ts +5 -5
- package/tui/src/utils/billing.ts +1 -1
- package/tui/src/utils/collapseReadSearch.ts +3 -3
- package/tui/src/utils/cronTasks.ts +1 -1
- package/tui/src/utils/execFileNoThrow.ts +1 -1
- package/tui/src/utils/filePersistence/types.ts +16 -38
- package/tui/src/utils/forkedAgent.ts +1 -1
- package/tui/src/utils/gracefulShutdown.ts +4 -4
- package/tui/src/utils/heapDumpService.ts +12 -8
- package/tui/src/utils/hooks/apiQueryHookHelper.ts +1 -1
- package/tui/src/utils/hooks/execPromptHook.ts +1 -1
- package/tui/src/utils/hooks/skillImprovement.ts +1 -1
- package/tui/src/utils/mcp/dateTimeParser.ts +1 -1
- package/tui/src/utils/messages.ts +18 -0
- package/tui/src/utils/migrateSessions.ts +3 -3
- package/tui/src/utils/model/model.ts +6 -6
- package/tui/src/utils/permissions/yoloClassifier.ts +1 -1
- package/tui/src/utils/plugins/headlessPluginInstall.ts +1 -1
- package/tui/src/utils/plugins/mcpPluginIntegration.ts +1 -1
- package/tui/src/utils/plugins/mcpbHandler.ts +1 -1
- package/tui/src/utils/plugins/pluginLoader.ts +8 -8
- package/tui/src/utils/protectedNamespace.ts +5 -3
- package/tui/src/utils/rawJsonToolCall.ts +242 -0
- package/tui/src/utils/ripgrep.ts +16 -7
- package/tui/src/utils/sessionTitle.ts +1 -1
- package/tui/src/utils/settings/permissionValidation.ts +14 -2
- package/tui/src/utils/shell/prefix.ts +1 -1
- package/tui/src/utils/sideQuery.ts +1 -1
- package/tui/src/utils/systemThemeWatcher.ts +13 -3
- package/tui/src/utils/teleport.tsx +1 -1
- package/uv.lock +400 -14
- package/tui/src/services/api/claude.ts +0 -3540
- package/tui/src/tools/_shared/directPublicDataGuard.ts +0 -362
- package/tui/src/tools/_shared/kmaAnalysisGuard.ts +0 -197
- package/tui/src/tools/_shared/kmaAviationGuard.ts +0 -70
- package/tui/src/tools/_shared/nmcAedGuard.ts +0 -234
- package/tui/src/tools/_shared/protectedCheckGuard.ts +0 -207
- package/tui/src/tools/_shared/textToolCallGuard.ts +0 -91
|
@@ -0,0 +1,1837 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
"""HWPX engine-adapter boundary."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import io
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import shutil
|
|
11
|
+
import subprocess
|
|
12
|
+
import unicodedata
|
|
13
|
+
import xml.etree.ElementTree as ET
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from decimal import Decimal
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import TYPE_CHECKING, cast
|
|
18
|
+
from zipfile import ZIP_STORED, BadZipFile, ZipFile, ZipInfo
|
|
19
|
+
|
|
20
|
+
from defusedxml import ElementTree # type: ignore[import-untyped]
|
|
21
|
+
|
|
22
|
+
from ummaya.tools.documents.engines import DocumentInspectionEngine, DocumentMutationEngine
|
|
23
|
+
from ummaya.tools.documents.models import (
|
|
24
|
+
BorderDescriptor,
|
|
25
|
+
DocumentExtraction,
|
|
26
|
+
DocumentFormat,
|
|
27
|
+
DocumentPatch,
|
|
28
|
+
FormField,
|
|
29
|
+
KnownDocumentFormat,
|
|
30
|
+
OperationType,
|
|
31
|
+
ParagraphBlock,
|
|
32
|
+
StyleAlignment,
|
|
33
|
+
StyleDescriptor,
|
|
34
|
+
TableBlock,
|
|
35
|
+
TableCell,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from ummaya.tools.documents.tool_defs import DocumentFieldPatch
|
|
40
|
+
|
|
41
|
+
HWPX_CANDIDATE_ENGINES: tuple[str, ...] = (
|
|
42
|
+
"hwpx-package-text",
|
|
43
|
+
"rhwp-node-wasm",
|
|
44
|
+
"python-hwpx",
|
|
45
|
+
"hwpx-mcp-server",
|
|
46
|
+
"rhwp",
|
|
47
|
+
"direct-owpml-oracle",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
_TEXT_TARGET_RE = re.compile(r"^/hwpx/text\[(?P<index>[1-9][0-9]*)\]$")
|
|
51
|
+
_HWPX_TABLE_CELL_ALIAS_RE = re.compile(
|
|
52
|
+
r"^(?:/body/section\[[1-9][0-9]*\])?/table\[(?P<table>[1-9][0-9]*)\]/"
|
|
53
|
+
r"(?:(?:cells\[(?P<row_bracket>[1-9][0-9]*)\]\[(?P<col_bracket>[1-9][0-9]*)\])|"
|
|
54
|
+
r"(?:cell\[(?P<row_csv>[1-9][0-9]*),(?P<col_csv>[1-9][0-9]*)\]))$"
|
|
55
|
+
)
|
|
56
|
+
_HWPX_TABLE_CELL_SOURCE_RE = re.compile(
|
|
57
|
+
r"^(?P<member>Contents/section[0-9]+\.xml)#table\[(?P<table>[1-9][0-9]*)\]/"
|
|
58
|
+
r"r(?P<row>[1-9][0-9]*)c(?P<column>[1-9][0-9]*)$"
|
|
59
|
+
)
|
|
60
|
+
_HWPX_ACTIVITY_PERIOD_VALUE_RE = re.compile(
|
|
61
|
+
r"\b[0-9]{4}\.[0-9]{2}\.[0-9]{2}\s*~\s*[0-9]{4}\.[0-9]{2}\.[0-9]{2}\b"
|
|
62
|
+
)
|
|
63
|
+
_DOCUMENT_WEEK_VALUE_RE = re.compile(r"[0-9]{1,3}")
|
|
64
|
+
_SECTION_PREFIX = "Contents/section"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
_HWPX_COMPATIBLE_FORMATS = frozenset({DocumentFormat.hwpx, DocumentFormat.owpml})
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(frozen=True)
|
|
71
|
+
class _HwpXTextRecord:
|
|
72
|
+
element: ET.Element
|
|
73
|
+
char_style_id: str | None
|
|
74
|
+
para_style_id: str | None
|
|
75
|
+
named_style_id: str | None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class _HwpXTableCellTarget:
|
|
80
|
+
member: str
|
|
81
|
+
table_index: int
|
|
82
|
+
row_index: int
|
|
83
|
+
column_index: int
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass(frozen=True)
|
|
87
|
+
class _HwpXStyleRefs:
|
|
88
|
+
char_pr_id: str | None = None
|
|
89
|
+
para_pr_id: str | None = None
|
|
90
|
+
style_id: str | None = None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass(frozen=True)
|
|
94
|
+
class _HwpXPatchBuckets:
|
|
95
|
+
text_replacements: dict[int, str]
|
|
96
|
+
table_cell_replacements: dict[_HwpXTableCellTarget, str]
|
|
97
|
+
text_styles: dict[int, StyleDescriptor]
|
|
98
|
+
table_cell_styles: dict[_HwpXTableCellTarget, StyleDescriptor]
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def has_style_mutations(self) -> bool:
|
|
102
|
+
return bool(self.text_styles or self.table_cell_styles)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def validate_hwpx_engine(engine: DocumentInspectionEngine) -> DocumentInspectionEngine:
|
|
106
|
+
"""Validate that an injected engine is scoped to an OWPML/HWPX package."""
|
|
107
|
+
if engine.document_format not in _HWPX_COMPATIBLE_FORMATS:
|
|
108
|
+
raise ValueError("HWPX adapter requires a hwpx-compatible engine")
|
|
109
|
+
return engine
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def validate_hwpx_mutation_engine(engine: DocumentInspectionEngine) -> DocumentMutationEngine:
|
|
113
|
+
"""Validate that an injected HWPX engine can safely mutate derivatives."""
|
|
114
|
+
validate_hwpx_engine(engine)
|
|
115
|
+
if not isinstance(engine, DocumentMutationEngine):
|
|
116
|
+
raise ValueError("HWPX adapter requires a mutation-capable engine")
|
|
117
|
+
return engine
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class HwpXPackageTextEngine:
|
|
121
|
+
"""Text-node HWPX engine for deterministic local package edits."""
|
|
122
|
+
|
|
123
|
+
document_format = DocumentFormat.hwpx
|
|
124
|
+
engine_id = "hwpx-package-text"
|
|
125
|
+
render_engine_id = "rhwp-node-wasm"
|
|
126
|
+
render_artifact_extension = "svg"
|
|
127
|
+
render_mime_type = "image/svg+xml"
|
|
128
|
+
|
|
129
|
+
def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
|
|
130
|
+
"""Extract HWPX text nodes as LLM-addressable paragraphs and fields."""
|
|
131
|
+
paragraphs: list[ParagraphBlock] = []
|
|
132
|
+
fields: list[FormField] = []
|
|
133
|
+
tables: list[TableBlock] = []
|
|
134
|
+
text_index = 1
|
|
135
|
+
section_count = 0
|
|
136
|
+
text_records: list[tuple[int, str, str, str]] = []
|
|
137
|
+
semantic_labels: dict[int, str] = {}
|
|
138
|
+
style_map: list[StyleDescriptor] = []
|
|
139
|
+
|
|
140
|
+
with ZipFile(path) as archive:
|
|
141
|
+
style_map = _style_map_from_header(archive)
|
|
142
|
+
for member in _section_members(archive):
|
|
143
|
+
section_count += 1
|
|
144
|
+
root = ElementTree.fromstring(archive.read(member))
|
|
145
|
+
text_index_by_element_id: dict[int, int] = {}
|
|
146
|
+
for record in _text_records(root):
|
|
147
|
+
elem = record.element
|
|
148
|
+
text = elem.text or ""
|
|
149
|
+
if not text:
|
|
150
|
+
continue
|
|
151
|
+
target_path = f"/hwpx/text[{text_index}]"
|
|
152
|
+
source_path = f"{member}#text[{text_index}]"
|
|
153
|
+
text_index_by_element_id[id(elem)] = text_index
|
|
154
|
+
paragraphs.append(
|
|
155
|
+
ParagraphBlock(
|
|
156
|
+
block_id=f"hwpx-text-{text_index:03d}",
|
|
157
|
+
text=text,
|
|
158
|
+
source_path=source_path,
|
|
159
|
+
style_id=record.char_style_id
|
|
160
|
+
or record.named_style_id
|
|
161
|
+
or record.para_style_id,
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
text_records.append((text_index, text, target_path, source_path))
|
|
165
|
+
text_index += 1
|
|
166
|
+
tables.extend(
|
|
167
|
+
_table_blocks(
|
|
168
|
+
root,
|
|
169
|
+
member=member,
|
|
170
|
+
table_start_index=len(tables) + 1,
|
|
171
|
+
text_index_by_element_id=text_index_by_element_id,
|
|
172
|
+
semantic_labels=semantic_labels,
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
for record_index, text, target_path, _source_path in text_records:
|
|
177
|
+
fields.append(
|
|
178
|
+
FormField(
|
|
179
|
+
field_id=f"hwpx-text-{record_index:03d}",
|
|
180
|
+
label=semantic_labels.get(record_index)
|
|
181
|
+
or _semantic_label_for_text_value(text)
|
|
182
|
+
or f"HWPX text node {record_index}",
|
|
183
|
+
path=target_path,
|
|
184
|
+
field_type="text",
|
|
185
|
+
required=False,
|
|
186
|
+
current_value=text,
|
|
187
|
+
source_confidence=Decimal("1"),
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return DocumentExtraction(
|
|
192
|
+
artifact_id=artifact_id,
|
|
193
|
+
paragraphs=paragraphs,
|
|
194
|
+
tables=tables,
|
|
195
|
+
fields=fields,
|
|
196
|
+
metadata={
|
|
197
|
+
"format": self.document_format.value,
|
|
198
|
+
"engine_id": self.engine_id,
|
|
199
|
+
"section_count": section_count,
|
|
200
|
+
"text_node_count": len(paragraphs),
|
|
201
|
+
"table_count": len(tables),
|
|
202
|
+
"style_map_count": len(style_map),
|
|
203
|
+
},
|
|
204
|
+
style_map=style_map,
|
|
205
|
+
warnings=[
|
|
206
|
+
"HWPX package text engine edits text nodes only; page SVG render evidence is "
|
|
207
|
+
"delegated to the RHWP Node/WASM bridge."
|
|
208
|
+
],
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def apply_patch(self, path: Path, patch: DocumentPatch) -> bytes:
|
|
212
|
+
"""Apply ordered HWPX text-node patches and return derivative package bytes."""
|
|
213
|
+
patch_buckets = _hwpx_patch_buckets_from_patch(patch)
|
|
214
|
+
namespace_maps: dict[str, list[tuple[str, str]]] = {}
|
|
215
|
+
section_payloads: dict[str, bytes] = {}
|
|
216
|
+
text_index = 1
|
|
217
|
+
table_index = 1
|
|
218
|
+
applied_table_cell_targets: set[_HwpXTableCellTarget] = set()
|
|
219
|
+
applied_text_style_targets: set[int] = set()
|
|
220
|
+
applied_table_cell_style_targets: set[_HwpXTableCellTarget] = set()
|
|
221
|
+
|
|
222
|
+
with ZipFile(path) as archive:
|
|
223
|
+
text_style_refs, table_cell_style_refs, header_payload = _hwpx_style_refs_from_buckets(
|
|
224
|
+
archive, patch_buckets
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
for member in _section_members(archive):
|
|
228
|
+
payload = archive.read(member)
|
|
229
|
+
namespace_maps[member] = _namespace_map(payload)
|
|
230
|
+
root = ElementTree.fromstring(payload)
|
|
231
|
+
(
|
|
232
|
+
text_index,
|
|
233
|
+
table_index,
|
|
234
|
+
applied_targets,
|
|
235
|
+
applied_style_indexes,
|
|
236
|
+
applied_style_targets,
|
|
237
|
+
) = _apply_hwpx_section_mutations(
|
|
238
|
+
root,
|
|
239
|
+
member=member,
|
|
240
|
+
text_index=text_index,
|
|
241
|
+
table_index=table_index,
|
|
242
|
+
text_replacements=patch_buckets.text_replacements,
|
|
243
|
+
table_cell_replacements=patch_buckets.table_cell_replacements,
|
|
244
|
+
text_style_refs=text_style_refs,
|
|
245
|
+
table_cell_style_refs=table_cell_style_refs,
|
|
246
|
+
)
|
|
247
|
+
applied_table_cell_targets.update(applied_targets)
|
|
248
|
+
applied_text_style_targets.update(applied_style_indexes)
|
|
249
|
+
applied_table_cell_style_targets.update(applied_style_targets)
|
|
250
|
+
section_payloads[member] = _serialize_section(root, namespace_maps[member])
|
|
251
|
+
|
|
252
|
+
_raise_for_missing_hwpx_patch_targets(
|
|
253
|
+
patch_buckets=patch_buckets,
|
|
254
|
+
text_style_refs=text_style_refs,
|
|
255
|
+
table_cell_style_refs=table_cell_style_refs,
|
|
256
|
+
text_index=text_index,
|
|
257
|
+
applied_table_cell_targets=applied_table_cell_targets,
|
|
258
|
+
applied_text_style_targets=applied_text_style_targets,
|
|
259
|
+
applied_table_cell_style_targets=applied_table_cell_style_targets,
|
|
260
|
+
)
|
|
261
|
+
return _rewrite_hwpx_package(
|
|
262
|
+
archive,
|
|
263
|
+
section_payloads=section_payloads,
|
|
264
|
+
header_payload=header_payload,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
def render(self, path: Path, *, artifact_id: str, output_dir: Path) -> tuple[bytes, ...]:
|
|
268
|
+
"""Render HWPX page SVG evidence through the RHWP Node/WASM bridge."""
|
|
269
|
+
_ = artifact_id
|
|
270
|
+
if _uses_hwpxjs_html_render(path):
|
|
271
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
272
|
+
return (_render_with_hwpxjs_html(path),)
|
|
273
|
+
return _render_with_rhwp_node(path, output_dir=output_dir)
|
|
274
|
+
|
|
275
|
+
def render_artifact_extension_for(self, path: Path) -> str:
|
|
276
|
+
"""Return the render artifact extension selected by HWPX package structure."""
|
|
277
|
+
return "html" if _uses_hwpxjs_html_render(path) else self.render_artifact_extension
|
|
278
|
+
|
|
279
|
+
def render_mime_type_for(self, path: Path) -> str:
|
|
280
|
+
"""Return the render MIME selected by HWPX package structure."""
|
|
281
|
+
return "text/html" if _uses_hwpxjs_html_render(path) else self.render_mime_type
|
|
282
|
+
|
|
283
|
+
def render_engine_id_for(self, path: Path) -> str:
|
|
284
|
+
"""Return the render engine selected by HWPX package structure."""
|
|
285
|
+
return "hwpxjs-html-render" if _uses_hwpxjs_html_render(path) else self.render_engine_id
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
class OwpmlPackageTextEngine(HwpXPackageTextEngine):
|
|
289
|
+
"""OWPML extension alias backed by the same package text engine as HWPX."""
|
|
290
|
+
|
|
291
|
+
document_format = DocumentFormat.owpml
|
|
292
|
+
engine_id = "owpml-package-text"
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
class HwpXDocumentAdapter:
|
|
296
|
+
"""HWPX adapter for native package inspection and target normalization."""
|
|
297
|
+
|
|
298
|
+
known_formats: tuple[KnownDocumentFormat, ...] = (
|
|
299
|
+
KnownDocumentFormat.hwpx,
|
|
300
|
+
KnownDocumentFormat.owpml,
|
|
301
|
+
)
|
|
302
|
+
promoted_formats: tuple[DocumentFormat, ...] = (DocumentFormat.hwpx, DocumentFormat.owpml)
|
|
303
|
+
|
|
304
|
+
def __init__(self, *, inspection_engine: DocumentInspectionEngine | None = None) -> None:
|
|
305
|
+
engine = inspection_engine or HwpXPackageTextEngine()
|
|
306
|
+
self._inspection_engine = validate_hwpx_engine(engine)
|
|
307
|
+
self.adapter_id = f"{self._inspection_engine.engine_id}-adapter"
|
|
308
|
+
|
|
309
|
+
@property
|
|
310
|
+
def engine_id(self) -> str:
|
|
311
|
+
"""Return the wrapped HWPX engine id for diagnostics."""
|
|
312
|
+
return self._inspection_engine.engine_id
|
|
313
|
+
|
|
314
|
+
def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
|
|
315
|
+
"""Inspect a local HWPX package through the wrapped engine."""
|
|
316
|
+
return self._inspection_engine.inspect(path, artifact_id=artifact_id)
|
|
317
|
+
|
|
318
|
+
def normalize_fill_patches(
|
|
319
|
+
self,
|
|
320
|
+
patches: tuple[DocumentFieldPatch, ...],
|
|
321
|
+
*,
|
|
322
|
+
extraction: DocumentExtraction | None,
|
|
323
|
+
) -> tuple[DocumentFieldPatch, ...]:
|
|
324
|
+
"""Map semantic/table aliases to native HWPX text-node targets."""
|
|
325
|
+
if extraction is None:
|
|
326
|
+
return patches
|
|
327
|
+
|
|
328
|
+
normalized_patches: list[DocumentFieldPatch] = []
|
|
329
|
+
for patch in patches:
|
|
330
|
+
normalized_target = _normalized_fill_target(patch.target_path, extraction)
|
|
331
|
+
if (
|
|
332
|
+
not patch.target_path.strip().startswith("/")
|
|
333
|
+
and normalized_target == patch.target_path
|
|
334
|
+
and _semantic_target_group(_semantic_field_key(patch.target_path)) is None
|
|
335
|
+
and not _is_known_hwpx_native_fill_target(patch.target_path, extraction)
|
|
336
|
+
):
|
|
337
|
+
continue
|
|
338
|
+
normalized_value = _normalized_fill_value(
|
|
339
|
+
patch.value,
|
|
340
|
+
original_target=patch.target_path,
|
|
341
|
+
normalized_target=normalized_target,
|
|
342
|
+
extraction=extraction,
|
|
343
|
+
)
|
|
344
|
+
normalized_patches.append(
|
|
345
|
+
patch.model_copy(
|
|
346
|
+
update={"target_path": normalized_target, "value": normalized_value}
|
|
347
|
+
)
|
|
348
|
+
)
|
|
349
|
+
return tuple(normalized_patches)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _is_known_hwpx_native_fill_target(
|
|
353
|
+
target_path: str,
|
|
354
|
+
extraction: DocumentExtraction,
|
|
355
|
+
) -> bool:
|
|
356
|
+
if target_path in {field.path for field in extraction.fields}:
|
|
357
|
+
return True
|
|
358
|
+
return target_path in set(_hwpx_table_cell_alias_map(extraction).values())
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _normalized_fill_target(
|
|
362
|
+
target_path: str,
|
|
363
|
+
extraction: DocumentExtraction,
|
|
364
|
+
) -> str:
|
|
365
|
+
semantic_target = _semantic_hwpx_field_target(target_path, extraction)
|
|
366
|
+
if semantic_target is not None:
|
|
367
|
+
return semantic_target
|
|
368
|
+
alias_map = _hwpx_table_cell_alias_map(extraction)
|
|
369
|
+
if target_path in alias_map:
|
|
370
|
+
return alias_map[target_path]
|
|
371
|
+
match = _HWPX_TABLE_CELL_ALIAS_RE.match(target_path)
|
|
372
|
+
if match is None:
|
|
373
|
+
return target_path
|
|
374
|
+
row = match.group("row_bracket") or match.group("row_csv")
|
|
375
|
+
column = match.group("col_bracket") or match.group("col_csv")
|
|
376
|
+
if row is None or column is None:
|
|
377
|
+
return target_path
|
|
378
|
+
coordinate_key = f"/table[{match.group('table')}]/cells[{row}][{column}]"
|
|
379
|
+
return alias_map.get(coordinate_key, target_path)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _normalized_fill_value(
|
|
383
|
+
value: object,
|
|
384
|
+
*,
|
|
385
|
+
original_target: str,
|
|
386
|
+
normalized_target: str,
|
|
387
|
+
extraction: DocumentExtraction,
|
|
388
|
+
) -> object:
|
|
389
|
+
target_group = _semantic_target_group(_semantic_field_key(original_target))
|
|
390
|
+
if target_group is None:
|
|
391
|
+
target_group = _semantic_group_for_extracted_path(normalized_target, extraction)
|
|
392
|
+
if target_group != "week_label":
|
|
393
|
+
return value
|
|
394
|
+
week_value = _numeric_week_value(value)
|
|
395
|
+
return f"{week_value}주차" if week_value is not None else value
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _semantic_group_for_extracted_path(
|
|
399
|
+
target_path: str,
|
|
400
|
+
extraction: DocumentExtraction,
|
|
401
|
+
) -> str | None:
|
|
402
|
+
for field in extraction.fields:
|
|
403
|
+
if field.path != target_path:
|
|
404
|
+
continue
|
|
405
|
+
if not isinstance(field.current_value, str):
|
|
406
|
+
continue
|
|
407
|
+
if re.fullmatch(r"[0-9]+주차", _semantic_field_key(field.current_value)):
|
|
408
|
+
return "week_label"
|
|
409
|
+
return None
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def _semantic_label_for_text_value(value: str) -> str | None:
|
|
413
|
+
normalized = unicodedata.normalize("NFKC", value)
|
|
414
|
+
value_key = _semantic_field_key(normalized)
|
|
415
|
+
if re.fullmatch(r"[0-9]+주차", value_key):
|
|
416
|
+
return "주차"
|
|
417
|
+
if _HWPX_ACTIVITY_PERIOD_VALUE_RE.search(normalized):
|
|
418
|
+
return "활동기간"
|
|
419
|
+
if value_key in {"특이사항", "비고"}:
|
|
420
|
+
return None
|
|
421
|
+
if "특이사항" in value_key:
|
|
422
|
+
return "특이사항"
|
|
423
|
+
return None
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _numeric_week_value(value: object) -> str | None:
|
|
427
|
+
if isinstance(value, int):
|
|
428
|
+
return str(value)
|
|
429
|
+
if not isinstance(value, str):
|
|
430
|
+
return None
|
|
431
|
+
normalized = unicodedata.normalize("NFKC", value).strip()
|
|
432
|
+
if _DOCUMENT_WEEK_VALUE_RE.fullmatch(normalized) is None:
|
|
433
|
+
return None
|
|
434
|
+
return normalized.lstrip("0") or "0"
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def _semantic_hwpx_field_target(
|
|
438
|
+
target_path: str,
|
|
439
|
+
extraction: DocumentExtraction,
|
|
440
|
+
) -> str | None:
|
|
441
|
+
"""Map conservative semantic field names to extracted HWPX form labels."""
|
|
442
|
+
normalized_target = _semantic_field_key(target_path)
|
|
443
|
+
if not normalized_target or target_path.strip().startswith("/"):
|
|
444
|
+
return None
|
|
445
|
+
|
|
446
|
+
exact_matches = [
|
|
447
|
+
field.path
|
|
448
|
+
for field in extraction.fields
|
|
449
|
+
if _semantic_field_key(field.label) == normalized_target
|
|
450
|
+
]
|
|
451
|
+
if len(exact_matches) == 1:
|
|
452
|
+
return exact_matches[0]
|
|
453
|
+
|
|
454
|
+
target_group = _semantic_target_group(normalized_target)
|
|
455
|
+
if target_group is None:
|
|
456
|
+
return None
|
|
457
|
+
|
|
458
|
+
group_matches = [
|
|
459
|
+
field.path
|
|
460
|
+
for field in extraction.fields
|
|
461
|
+
if _semantic_label_group(_semantic_field_key(field.label)) == target_group
|
|
462
|
+
]
|
|
463
|
+
unique_matches = list(dict.fromkeys(group_matches))
|
|
464
|
+
if len(unique_matches) == 1:
|
|
465
|
+
return unique_matches[0]
|
|
466
|
+
|
|
467
|
+
value_matches = _semantic_hwpx_value_matches(target_group, extraction)
|
|
468
|
+
return value_matches[0] if len(value_matches) == 1 else None
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def _semantic_hwpx_value_matches(
|
|
472
|
+
target_group: str,
|
|
473
|
+
extraction: DocumentExtraction,
|
|
474
|
+
) -> list[str]:
|
|
475
|
+
matches: list[str] = []
|
|
476
|
+
for field in extraction.fields:
|
|
477
|
+
if not isinstance(field.current_value, str):
|
|
478
|
+
continue
|
|
479
|
+
value = unicodedata.normalize("NFKC", field.current_value)
|
|
480
|
+
value_key = _semantic_field_key(value)
|
|
481
|
+
if (target_group == "activity_period" and _HWPX_ACTIVITY_PERIOD_VALUE_RE.search(value)) or (
|
|
482
|
+
target_group == "week_label" and re.fullmatch(r"[0-9]+주차", value_key)
|
|
483
|
+
):
|
|
484
|
+
matches.append(field.path)
|
|
485
|
+
return list(dict.fromkeys(matches))
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def _semantic_field_key(value: str) -> str:
|
|
489
|
+
normalized = unicodedata.normalize("NFKC", value).casefold()
|
|
490
|
+
return re.sub(r"[^0-9a-z가-힣]+", "", normalized)
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def _semantic_target_group(normalized_target: str) -> str | None:
|
|
494
|
+
if not normalized_target:
|
|
495
|
+
return None
|
|
496
|
+
if _matches_special_notes_target(normalized_target):
|
|
497
|
+
return "special_notes"
|
|
498
|
+
if _matches_team_name_target(normalized_target):
|
|
499
|
+
return "team_name"
|
|
500
|
+
if _matches_week_label_target(normalized_target):
|
|
501
|
+
return "week_label"
|
|
502
|
+
if _matches_activity_period_target(normalized_target):
|
|
503
|
+
return "activity_period"
|
|
504
|
+
return None
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def _matches_special_notes_target(normalized_target: str) -> bool:
|
|
508
|
+
return any(
|
|
509
|
+
token in normalized_target
|
|
510
|
+
for token in ("특이", "비고", "special", "remark", "remarks", "note", "notes")
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def _matches_team_name_target(normalized_target: str) -> bool:
|
|
515
|
+
return (
|
|
516
|
+
"팀명" in normalized_target
|
|
517
|
+
or ("team" in normalized_target and "name" in normalized_target)
|
|
518
|
+
or normalized_target == "team"
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _matches_week_label_target(normalized_target: str) -> bool:
|
|
523
|
+
return "주차" in normalized_target or normalized_target in {
|
|
524
|
+
"week",
|
|
525
|
+
"weeknumber",
|
|
526
|
+
"weeklabel",
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def _matches_activity_period_target(normalized_target: str) -> bool:
|
|
531
|
+
return (
|
|
532
|
+
"활동일시" in normalized_target
|
|
533
|
+
or "활동기간" in normalized_target
|
|
534
|
+
or (
|
|
535
|
+
"activity" in normalized_target
|
|
536
|
+
and any(token in normalized_target for token in ("period", "date", "time"))
|
|
537
|
+
)
|
|
538
|
+
or "weekperiod" in normalized_target
|
|
539
|
+
or normalized_target.endswith("period")
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
def _semantic_label_group(normalized_label: str) -> str | None:
|
|
544
|
+
if "특이사항" in normalized_label or normalized_label == "비고":
|
|
545
|
+
return "special_notes"
|
|
546
|
+
if normalized_label == "팀명":
|
|
547
|
+
return "team_name"
|
|
548
|
+
if "주차" in normalized_label:
|
|
549
|
+
return "week_label"
|
|
550
|
+
if normalized_label in {"활동일시", "활동기간"}:
|
|
551
|
+
return "activity_period"
|
|
552
|
+
return None
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def _hwpx_table_cell_alias_map(extraction: DocumentExtraction) -> dict[str, str]:
|
|
556
|
+
aliases: dict[str, str] = {}
|
|
557
|
+
for table_index, table in enumerate(extraction.tables, start=1):
|
|
558
|
+
for cell in table.cells:
|
|
559
|
+
native_target = cell.field_path or cell.source_path
|
|
560
|
+
row = cell.row_index + 1
|
|
561
|
+
column = cell.column_index + 1
|
|
562
|
+
aliases[cell.source_path] = native_target
|
|
563
|
+
aliases[f"/table[{table_index}]/cell[{row},{column}]"] = native_target
|
|
564
|
+
aliases[f"/table[{table_index}]/cells[{row}][{column}]"] = native_target
|
|
565
|
+
aliases[f"/body/section[1]/table[{table_index}]/cell[{row},{column}]"] = native_target
|
|
566
|
+
aliases[f"/body/section[1]/table[{table_index}]/cells[{row}][{column}]"] = native_target
|
|
567
|
+
return aliases
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
def _section_members(archive: ZipFile) -> list[str]:
|
|
571
|
+
return sorted(
|
|
572
|
+
member.filename
|
|
573
|
+
for member in archive.infolist()
|
|
574
|
+
if member.filename.startswith(_SECTION_PREFIX) and member.filename.endswith(".xml")
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def _text_elements(root: ET.Element) -> list[ET.Element]:
|
|
579
|
+
return [elem for elem in root.iter() if elem.tag.rsplit("}", 1)[-1] == "t"]
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def _text_records(root: ET.Element) -> list[_HwpXTextRecord]:
|
|
583
|
+
records: list[_HwpXTextRecord] = []
|
|
584
|
+
parent_by_id = _parent_by_element_id(root)
|
|
585
|
+
for text_element in _text_elements(root):
|
|
586
|
+
run = _nearest_ancestor_by_local_name(text_element, "run", parent_by_id)
|
|
587
|
+
paragraph = _nearest_ancestor_by_local_name(text_element, "p", parent_by_id)
|
|
588
|
+
if paragraph is None:
|
|
589
|
+
continue
|
|
590
|
+
para_pr_id = _local_attr(paragraph, "paraPrIDRef")
|
|
591
|
+
named_style_id = _local_attr(paragraph, "styleIDRef")
|
|
592
|
+
char_pr_id = _local_attr(run, "charPrIDRef") if run is not None else None
|
|
593
|
+
records.append(
|
|
594
|
+
_HwpXTextRecord(
|
|
595
|
+
element=text_element,
|
|
596
|
+
char_style_id=f"charPr-{char_pr_id}" if char_pr_id else None,
|
|
597
|
+
para_style_id=f"paraPr-{para_pr_id}" if para_pr_id else None,
|
|
598
|
+
named_style_id=f"style-{named_style_id}" if named_style_id else None,
|
|
599
|
+
)
|
|
600
|
+
)
|
|
601
|
+
return records
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def _parent_by_element_id(root: ET.Element) -> dict[int, ET.Element]:
|
|
605
|
+
return {id(child): parent for parent in root.iter() for child in list(parent)}
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def _nearest_ancestor_by_local_name(
|
|
609
|
+
element: ET.Element,
|
|
610
|
+
name: str,
|
|
611
|
+
parent_by_id: dict[int, ET.Element],
|
|
612
|
+
) -> ET.Element | None:
|
|
613
|
+
current = parent_by_id.get(id(element))
|
|
614
|
+
while current is not None:
|
|
615
|
+
if _local_name(current.tag) == name:
|
|
616
|
+
return current
|
|
617
|
+
current = parent_by_id.get(id(current))
|
|
618
|
+
return None
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def _style_map_from_header(archive: ZipFile) -> list[StyleDescriptor]:
|
|
622
|
+
try:
|
|
623
|
+
header = ElementTree.fromstring(archive.read("Contents/header.xml"))
|
|
624
|
+
except (KeyError, ElementTree.ParseError):
|
|
625
|
+
return []
|
|
626
|
+
font_faces = _font_faces_by_id(header)
|
|
627
|
+
border_fills = _border_fill_styles_by_id(header)
|
|
628
|
+
char_styles = _char_styles_by_id(header, font_faces=font_faces, border_fills=border_fills)
|
|
629
|
+
para_styles = _para_styles_by_id(header)
|
|
630
|
+
named_styles = _named_styles_by_id(
|
|
631
|
+
header,
|
|
632
|
+
char_styles=char_styles,
|
|
633
|
+
para_styles=para_styles,
|
|
634
|
+
)
|
|
635
|
+
return [
|
|
636
|
+
*border_fills.values(),
|
|
637
|
+
*char_styles.values(),
|
|
638
|
+
*para_styles.values(),
|
|
639
|
+
*named_styles.values(),
|
|
640
|
+
]
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
def _font_faces_by_id(root: ET.Element) -> dict[str, str]:
|
|
644
|
+
faces: dict[str, str] = {}
|
|
645
|
+
for fontface in _elements_by_local_name(root, "fontface"):
|
|
646
|
+
lang = (_local_attr(fontface, "lang") or "").casefold()
|
|
647
|
+
if lang not in {"hangul", "korean", "latin", ""}:
|
|
648
|
+
continue
|
|
649
|
+
for font in _child_elements_by_local_name(fontface, "font"):
|
|
650
|
+
font_id = _local_attr(font, "id")
|
|
651
|
+
face = _local_attr(font, "face")
|
|
652
|
+
if font_id is not None and face:
|
|
653
|
+
faces.setdefault(font_id, face)
|
|
654
|
+
return faces
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def _border_fill_styles_by_id(root: ET.Element) -> dict[str, StyleDescriptor]:
|
|
658
|
+
styles: dict[str, StyleDescriptor] = {}
|
|
659
|
+
for border_fill in _elements_by_local_name(root, "borderFill"):
|
|
660
|
+
border_fill_id = _local_attr(border_fill, "id")
|
|
661
|
+
if border_fill_id is None:
|
|
662
|
+
continue
|
|
663
|
+
fill_color = _border_fill_color(border_fill)
|
|
664
|
+
border = _border_descriptor(border_fill)
|
|
665
|
+
styles[border_fill_id] = StyleDescriptor(
|
|
666
|
+
style_id=f"borderFill-{border_fill_id}",
|
|
667
|
+
target_path=f"Contents/header.xml#borderFill[{border_fill_id}]",
|
|
668
|
+
fill_color_rgb=fill_color,
|
|
669
|
+
border=border,
|
|
670
|
+
)
|
|
671
|
+
return styles
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def _char_styles_by_id(
|
|
675
|
+
root: ET.Element,
|
|
676
|
+
*,
|
|
677
|
+
font_faces: dict[str, str],
|
|
678
|
+
border_fills: dict[str, StyleDescriptor],
|
|
679
|
+
) -> dict[str, StyleDescriptor]:
|
|
680
|
+
styles: dict[str, StyleDescriptor] = {}
|
|
681
|
+
for char_pr in _elements_by_local_name(root, "charPr"):
|
|
682
|
+
char_pr_id = _local_attr(char_pr, "id")
|
|
683
|
+
if char_pr_id is None:
|
|
684
|
+
continue
|
|
685
|
+
border_fill_id = _local_attr(char_pr, "borderFillIDRef")
|
|
686
|
+
border_fill = border_fills.get(border_fill_id or "")
|
|
687
|
+
font_id = _font_ref_id(char_pr)
|
|
688
|
+
font_color = _rgb(_local_attr(char_pr, "textColor"))
|
|
689
|
+
shade_color = _rgb(_local_attr(char_pr, "shadeColor"))
|
|
690
|
+
styles[char_pr_id] = StyleDescriptor(
|
|
691
|
+
style_id=f"charPr-{char_pr_id}",
|
|
692
|
+
target_path=f"Contents/header.xml#charPr[{char_pr_id}]",
|
|
693
|
+
font_family=font_faces.get(font_id or ""),
|
|
694
|
+
font_size_pt=_hwpx_height_to_points(_local_attr(char_pr, "height")),
|
|
695
|
+
bold=_has_child(char_pr, "bold") or None,
|
|
696
|
+
italic=_has_child(char_pr, "italic") or None,
|
|
697
|
+
underline=_has_child(char_pr, "underline") or None,
|
|
698
|
+
font_color_rgb=font_color,
|
|
699
|
+
fill_color_rgb=shade_color or (border_fill.fill_color_rgb if border_fill else None),
|
|
700
|
+
border=border_fill.border if border_fill else None,
|
|
701
|
+
)
|
|
702
|
+
return styles
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
def _para_styles_by_id(root: ET.Element) -> dict[str, StyleDescriptor]:
|
|
706
|
+
styles: dict[str, StyleDescriptor] = {}
|
|
707
|
+
for para_pr in _elements_by_local_name(root, "paraPr"):
|
|
708
|
+
para_pr_id = _local_attr(para_pr, "id")
|
|
709
|
+
if para_pr_id is None:
|
|
710
|
+
continue
|
|
711
|
+
styles[para_pr_id] = StyleDescriptor(
|
|
712
|
+
style_id=f"paraPr-{para_pr_id}",
|
|
713
|
+
target_path=f"Contents/header.xml#paraPr[{para_pr_id}]",
|
|
714
|
+
alignment=_hwpx_alignment(_first_child_by_local_name(para_pr, "align")),
|
|
715
|
+
)
|
|
716
|
+
return styles
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
def _named_styles_by_id(
|
|
720
|
+
root: ET.Element,
|
|
721
|
+
*,
|
|
722
|
+
char_styles: dict[str, StyleDescriptor],
|
|
723
|
+
para_styles: dict[str, StyleDescriptor],
|
|
724
|
+
) -> dict[str, StyleDescriptor]:
|
|
725
|
+
styles: dict[str, StyleDescriptor] = {}
|
|
726
|
+
for style in _elements_by_local_name(root, "style"):
|
|
727
|
+
style_id = _local_attr(style, "id")
|
|
728
|
+
if style_id is None:
|
|
729
|
+
continue
|
|
730
|
+
char_style = char_styles.get(_local_attr(style, "charPrIDRef") or "")
|
|
731
|
+
para_style = para_styles.get(_local_attr(style, "paraPrIDRef") or "")
|
|
732
|
+
styles[style_id] = _merge_hwpx_styles(
|
|
733
|
+
style_id=f"style-{style_id}",
|
|
734
|
+
target_path=f"Contents/header.xml#style[{style_id}]",
|
|
735
|
+
char_style=char_style,
|
|
736
|
+
para_style=para_style,
|
|
737
|
+
)
|
|
738
|
+
return styles
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def _merge_hwpx_styles(
|
|
742
|
+
*,
|
|
743
|
+
style_id: str,
|
|
744
|
+
target_path: str,
|
|
745
|
+
char_style: StyleDescriptor | None,
|
|
746
|
+
para_style: StyleDescriptor | None,
|
|
747
|
+
) -> StyleDescriptor:
|
|
748
|
+
return StyleDescriptor(
|
|
749
|
+
style_id=style_id,
|
|
750
|
+
target_path=target_path,
|
|
751
|
+
font_family=char_style.font_family if char_style else None,
|
|
752
|
+
font_size_pt=char_style.font_size_pt if char_style else None,
|
|
753
|
+
bold=char_style.bold if char_style else None,
|
|
754
|
+
italic=char_style.italic if char_style else None,
|
|
755
|
+
underline=char_style.underline if char_style else None,
|
|
756
|
+
font_color_rgb=char_style.font_color_rgb if char_style else None,
|
|
757
|
+
fill_color_rgb=char_style.fill_color_rgb if char_style else None,
|
|
758
|
+
alignment=para_style.alignment if para_style else None,
|
|
759
|
+
line_spacing=para_style.line_spacing if para_style else None,
|
|
760
|
+
border=char_style.border if char_style else None,
|
|
761
|
+
number_format=char_style.number_format if char_style else None,
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def _font_ref_id(char_pr: ET.Element) -> str | None:
|
|
766
|
+
font_ref = _first_child_by_local_name(char_pr, "fontRef")
|
|
767
|
+
if font_ref is None:
|
|
768
|
+
return None
|
|
769
|
+
return (
|
|
770
|
+
_local_attr(font_ref, "hangul")
|
|
771
|
+
or _local_attr(font_ref, "latin")
|
|
772
|
+
or _local_attr(font_ref, "hanja")
|
|
773
|
+
or _local_attr(font_ref, "other")
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
def _border_fill_color(border_fill: ET.Element) -> str | None:
|
|
778
|
+
for brush_name in ("winBrush", "gradation", "imgBrush"):
|
|
779
|
+
brush = _first_descendant_by_local_name(border_fill, brush_name)
|
|
780
|
+
if brush is None:
|
|
781
|
+
continue
|
|
782
|
+
color = _rgb(_local_attr(brush, "faceColor") or _local_attr(brush, "color"))
|
|
783
|
+
if color is not None:
|
|
784
|
+
return color
|
|
785
|
+
return None
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
def _border_descriptor(border_fill: ET.Element) -> BorderDescriptor | None:
|
|
789
|
+
for border_name in ("leftBorder", "topBorder", "rightBorder", "bottomBorder"):
|
|
790
|
+
border = _first_child_by_local_name(border_fill, border_name)
|
|
791
|
+
if border is None:
|
|
792
|
+
continue
|
|
793
|
+
border_type = _local_attr(border, "type")
|
|
794
|
+
if border_type is None or border_type == "NONE":
|
|
795
|
+
continue
|
|
796
|
+
return BorderDescriptor(
|
|
797
|
+
style=border_type,
|
|
798
|
+
width_pt=_hwpx_measure_to_points(_local_attr(border, "width")),
|
|
799
|
+
color_rgb=_rgb(_local_attr(border, "color")),
|
|
800
|
+
)
|
|
801
|
+
return None
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
def _hwpx_height_to_points(value: str | None) -> Decimal | None:
|
|
805
|
+
if value is None:
|
|
806
|
+
return None
|
|
807
|
+
try:
|
|
808
|
+
return Decimal(value) / Decimal("100")
|
|
809
|
+
except ArithmeticError:
|
|
810
|
+
return None
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
def _hwpx_measure_to_points(value: str | None) -> Decimal | None:
|
|
814
|
+
if value is None:
|
|
815
|
+
return None
|
|
816
|
+
match = re.search(r"([0-9]+(?:\.[0-9]+)?)\s*mm", value)
|
|
817
|
+
if match is None:
|
|
818
|
+
return None
|
|
819
|
+
try:
|
|
820
|
+
return (Decimal(match.group(1)) * Decimal("2.834645669")).quantize(Decimal("0.01"))
|
|
821
|
+
except ArithmeticError:
|
|
822
|
+
return None
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
def _hwpx_alignment(align: ET.Element | None) -> StyleAlignment | None:
|
|
826
|
+
if align is None:
|
|
827
|
+
return None
|
|
828
|
+
horizontal = (_local_attr(align, "horizontal") or "").casefold()
|
|
829
|
+
alignment_by_hwpx_value: dict[str, StyleAlignment] = {
|
|
830
|
+
"left": "left",
|
|
831
|
+
"center": "center",
|
|
832
|
+
"right": "right",
|
|
833
|
+
"justify": "justify",
|
|
834
|
+
"distributed": "distributed",
|
|
835
|
+
}
|
|
836
|
+
return alignment_by_hwpx_value.get(horizontal)
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def _rgb(value: str | None) -> str | None:
|
|
840
|
+
if value is None:
|
|
841
|
+
return None
|
|
842
|
+
normalized = value.strip()
|
|
843
|
+
if not normalized or normalized.casefold() == "none":
|
|
844
|
+
return None
|
|
845
|
+
if normalized.startswith("#"):
|
|
846
|
+
normalized = normalized[1:]
|
|
847
|
+
return normalized.upper() if re.fullmatch(r"[0-9A-Fa-f]{6}", normalized) else None
|
|
848
|
+
|
|
849
|
+
|
|
850
|
+
def _local_attr(element: ET.Element, name: str) -> str | None:
|
|
851
|
+
for key, value in element.attrib.items():
|
|
852
|
+
if _local_name(key) == name:
|
|
853
|
+
return value
|
|
854
|
+
return None
|
|
855
|
+
|
|
856
|
+
|
|
857
|
+
def _has_child(element: ET.Element, name: str) -> bool:
|
|
858
|
+
return _first_child_by_local_name(element, name) is not None
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
def _first_child_by_local_name(element: ET.Element, name: str) -> ET.Element | None:
|
|
862
|
+
for child in list(element):
|
|
863
|
+
if _local_name(child.tag) == name:
|
|
864
|
+
return child
|
|
865
|
+
return None
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
def _first_descendant_by_local_name(element: ET.Element, name: str) -> ET.Element | None:
|
|
869
|
+
for descendant in element.iter():
|
|
870
|
+
if descendant is not element and _local_name(descendant.tag) == name:
|
|
871
|
+
return descendant
|
|
872
|
+
return None
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
def _table_blocks(
|
|
876
|
+
root: ET.Element,
|
|
877
|
+
*,
|
|
878
|
+
member: str,
|
|
879
|
+
table_start_index: int,
|
|
880
|
+
text_index_by_element_id: dict[int, int],
|
|
881
|
+
semantic_labels: dict[int, str],
|
|
882
|
+
) -> list[TableBlock]:
|
|
883
|
+
tables: list[TableBlock] = []
|
|
884
|
+
for table_offset, table in enumerate(_elements_by_local_name(root, "tbl")):
|
|
885
|
+
table_index = table_start_index + table_offset
|
|
886
|
+
cells: list[TableCell] = []
|
|
887
|
+
for row_index, row in enumerate(_child_elements_by_local_name(table, "tr")):
|
|
888
|
+
row_cells = _child_elements_by_local_name(row, "tc")
|
|
889
|
+
row_text_nodes: list[list[ET.Element]] = []
|
|
890
|
+
for column_index, cell in enumerate(row_cells):
|
|
891
|
+
text_nodes = [elem for elem in _text_elements(cell) if elem.text]
|
|
892
|
+
row_text_nodes.append(text_nodes)
|
|
893
|
+
text = "".join(elem.text or "" for elem in text_nodes)
|
|
894
|
+
first_text_index = (
|
|
895
|
+
text_index_by_element_id.get(id(text_nodes[0])) if text_nodes else None
|
|
896
|
+
)
|
|
897
|
+
cells.append(
|
|
898
|
+
TableCell(
|
|
899
|
+
row_index=row_index,
|
|
900
|
+
column_index=column_index,
|
|
901
|
+
text=text,
|
|
902
|
+
row_span=_span_attribute(cell, "rowSpan"),
|
|
903
|
+
column_span=_span_attribute(cell, "colSpan"),
|
|
904
|
+
source_path=(
|
|
905
|
+
f"{member}#table[{table_index}]/r{row_index + 1}c{column_index + 1}"
|
|
906
|
+
),
|
|
907
|
+
field_path=(
|
|
908
|
+
f"/hwpx/text[{first_text_index}]"
|
|
909
|
+
if first_text_index is not None
|
|
910
|
+
else None
|
|
911
|
+
),
|
|
912
|
+
)
|
|
913
|
+
)
|
|
914
|
+
pair_start = 1 if len(row_cells) > 2 and len(row_cells) % 2 == 1 else 0
|
|
915
|
+
for label_column in range(pair_start, len(row_cells) - 1, 2):
|
|
916
|
+
label = _cell_text(row_cells[label_column]).strip()
|
|
917
|
+
value_text_nodes = row_text_nodes[label_column + 1]
|
|
918
|
+
if not label or not value_text_nodes:
|
|
919
|
+
continue
|
|
920
|
+
first_value_index = text_index_by_element_id.get(id(value_text_nodes[0]))
|
|
921
|
+
if first_value_index is not None:
|
|
922
|
+
semantic_labels[first_value_index] = label
|
|
923
|
+
tables.append(
|
|
924
|
+
TableBlock(
|
|
925
|
+
block_id=f"hwpx-table-{table_index:03d}",
|
|
926
|
+
source_path=f"{member}#table[{table_index}]",
|
|
927
|
+
cells=cells,
|
|
928
|
+
)
|
|
929
|
+
)
|
|
930
|
+
return tables
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
def _elements_by_local_name(root: ET.Element, name: str) -> list[ET.Element]:
|
|
934
|
+
return [elem for elem in root.iter() if _local_name(elem.tag) == name]
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
def _child_elements_by_local_name(root: ET.Element, name: str) -> list[ET.Element]:
|
|
938
|
+
return [elem for elem in list(root) if _local_name(elem.tag) == name]
|
|
939
|
+
|
|
940
|
+
|
|
941
|
+
def _cell_text(cell: ET.Element) -> str:
|
|
942
|
+
return "".join(elem.text or "" for elem in _text_elements(cell) if elem.text)
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
def _span_attribute(cell: ET.Element, name: str) -> int:
|
|
946
|
+
for key, value in cell.attrib.items():
|
|
947
|
+
if _local_name(key) == name:
|
|
948
|
+
try:
|
|
949
|
+
return max(1, int(value))
|
|
950
|
+
except ValueError:
|
|
951
|
+
return 1
|
|
952
|
+
return 1
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
def _local_name(tag: str) -> str:
|
|
956
|
+
return tag.rsplit("}", 1)[-1]
|
|
957
|
+
|
|
958
|
+
|
|
959
|
+
def _hwpx_patch_buckets_from_patch(patch: DocumentPatch) -> _HwpXPatchBuckets:
|
|
960
|
+
text_replacements: dict[int, str] = {}
|
|
961
|
+
table_cell_replacements: dict[_HwpXTableCellTarget, str] = {}
|
|
962
|
+
text_styles: dict[int, StyleDescriptor] = {}
|
|
963
|
+
table_cell_styles: dict[_HwpXTableCellTarget, StyleDescriptor] = {}
|
|
964
|
+
for operation in patch.operations:
|
|
965
|
+
if operation.operation_type in {
|
|
966
|
+
OperationType.set_field_value,
|
|
967
|
+
OperationType.replace_text,
|
|
968
|
+
OperationType.set_table_cell,
|
|
969
|
+
}:
|
|
970
|
+
value = "" if operation.value is None else str(operation.value)
|
|
971
|
+
text_match = _TEXT_TARGET_RE.match(operation.target_path)
|
|
972
|
+
if text_match is not None:
|
|
973
|
+
text_replacements[int(text_match.group("index"))] = value
|
|
974
|
+
continue
|
|
975
|
+
table_cell_target = _hwpx_table_cell_target(operation.target_path)
|
|
976
|
+
if table_cell_target is not None:
|
|
977
|
+
table_cell_replacements[table_cell_target] = value
|
|
978
|
+
continue
|
|
979
|
+
if operation.operation_type is OperationType.set_table_cell:
|
|
980
|
+
raise ValueError(
|
|
981
|
+
f"Unsupported HWPX table cell target path: {operation.target_path}"
|
|
982
|
+
)
|
|
983
|
+
raise ValueError(f"Unsupported HWPX text target path: {operation.target_path}")
|
|
984
|
+
if operation.operation_type in {
|
|
985
|
+
OperationType.set_paragraph_style,
|
|
986
|
+
OperationType.set_run_style,
|
|
987
|
+
OperationType.set_cell_style,
|
|
988
|
+
}:
|
|
989
|
+
if operation.style is None:
|
|
990
|
+
raise ValueError("HWPX style operation requires style")
|
|
991
|
+
text_match = _TEXT_TARGET_RE.match(operation.target_path)
|
|
992
|
+
if text_match is not None:
|
|
993
|
+
text_styles[int(text_match.group("index"))] = operation.style
|
|
994
|
+
continue
|
|
995
|
+
table_cell_target = _hwpx_table_cell_target(operation.target_path)
|
|
996
|
+
if table_cell_target is not None:
|
|
997
|
+
table_cell_styles[table_cell_target] = operation.style
|
|
998
|
+
continue
|
|
999
|
+
raise ValueError(f"Unsupported HWPX style target path: {operation.target_path}")
|
|
1000
|
+
raise ValueError(f"Unsupported HWPX operation: {operation.operation_type.value}")
|
|
1001
|
+
return _HwpXPatchBuckets(
|
|
1002
|
+
text_replacements=text_replacements,
|
|
1003
|
+
table_cell_replacements=table_cell_replacements,
|
|
1004
|
+
text_styles=text_styles,
|
|
1005
|
+
table_cell_styles=table_cell_styles,
|
|
1006
|
+
)
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
def _hwpx_header_payload(archive: ZipFile) -> bytes:
|
|
1010
|
+
try:
|
|
1011
|
+
return archive.read("Contents/header.xml")
|
|
1012
|
+
except KeyError:
|
|
1013
|
+
return b'<hh:head xmlns:hh="http://www.hancom.co.kr/hwpml/2011/head" />'
|
|
1014
|
+
|
|
1015
|
+
|
|
1016
|
+
def _hwpx_style_refs_from_buckets(
|
|
1017
|
+
archive: ZipFile,
|
|
1018
|
+
patch_buckets: _HwpXPatchBuckets,
|
|
1019
|
+
) -> tuple[dict[int, _HwpXStyleRefs], dict[_HwpXTableCellTarget, _HwpXStyleRefs], bytes | None]:
|
|
1020
|
+
if not patch_buckets.has_style_mutations:
|
|
1021
|
+
return {}, {}, None
|
|
1022
|
+
header_source = _hwpx_header_payload(archive)
|
|
1023
|
+
header_namespaces = _namespace_map(header_source)
|
|
1024
|
+
header_root = ElementTree.fromstring(header_source)
|
|
1025
|
+
text_style_refs = {
|
|
1026
|
+
target_index: _ensure_hwpx_style_refs(header_root, style)
|
|
1027
|
+
for target_index, style in patch_buckets.text_styles.items()
|
|
1028
|
+
}
|
|
1029
|
+
table_cell_style_refs = {
|
|
1030
|
+
target: _ensure_hwpx_style_refs(header_root, style)
|
|
1031
|
+
for target, style in patch_buckets.table_cell_styles.items()
|
|
1032
|
+
}
|
|
1033
|
+
return (
|
|
1034
|
+
text_style_refs,
|
|
1035
|
+
table_cell_style_refs,
|
|
1036
|
+
_serialize_section(
|
|
1037
|
+
header_root,
|
|
1038
|
+
header_namespaces,
|
|
1039
|
+
),
|
|
1040
|
+
)
|
|
1041
|
+
|
|
1042
|
+
|
|
1043
|
+
def _raise_for_missing_hwpx_patch_targets(
|
|
1044
|
+
*,
|
|
1045
|
+
patch_buckets: _HwpXPatchBuckets,
|
|
1046
|
+
text_style_refs: dict[int, _HwpXStyleRefs],
|
|
1047
|
+
table_cell_style_refs: dict[_HwpXTableCellTarget, _HwpXStyleRefs],
|
|
1048
|
+
text_index: int,
|
|
1049
|
+
applied_table_cell_targets: set[_HwpXTableCellTarget],
|
|
1050
|
+
applied_text_style_targets: set[int],
|
|
1051
|
+
applied_table_cell_style_targets: set[_HwpXTableCellTarget],
|
|
1052
|
+
) -> None:
|
|
1053
|
+
missing = set(patch_buckets.text_replacements) - set(range(1, text_index))
|
|
1054
|
+
if missing:
|
|
1055
|
+
raise ValueError(f"HWPX text target not found: {sorted(missing)}")
|
|
1056
|
+
missing_text_style_targets = set(text_style_refs) - applied_text_style_targets
|
|
1057
|
+
if missing_text_style_targets:
|
|
1058
|
+
raise ValueError(f"HWPX text target not found: {sorted(missing_text_style_targets)}")
|
|
1059
|
+
missing_table_cell_targets = (
|
|
1060
|
+
set(patch_buckets.table_cell_replacements) - applied_table_cell_targets
|
|
1061
|
+
)
|
|
1062
|
+
_raise_for_missing_hwpx_table_cell_targets(missing_table_cell_targets)
|
|
1063
|
+
missing_table_cell_style_targets = set(table_cell_style_refs) - applied_table_cell_style_targets
|
|
1064
|
+
_raise_for_missing_hwpx_table_cell_targets(missing_table_cell_style_targets)
|
|
1065
|
+
|
|
1066
|
+
|
|
1067
|
+
def _raise_for_missing_hwpx_table_cell_targets(
|
|
1068
|
+
missing_targets: set[_HwpXTableCellTarget],
|
|
1069
|
+
) -> None:
|
|
1070
|
+
if not missing_targets:
|
|
1071
|
+
return
|
|
1072
|
+
missing_paths = [
|
|
1073
|
+
_hwpx_table_cell_target_path(target)
|
|
1074
|
+
for target in sorted(missing_targets, key=_hwpx_table_cell_target_sort_key)
|
|
1075
|
+
]
|
|
1076
|
+
raise ValueError(f"HWPX table cell target not found: {missing_paths}")
|
|
1077
|
+
|
|
1078
|
+
|
|
1079
|
+
def _rewrite_hwpx_package(
|
|
1080
|
+
archive: ZipFile,
|
|
1081
|
+
*,
|
|
1082
|
+
section_payloads: dict[str, bytes],
|
|
1083
|
+
header_payload: bytes | None,
|
|
1084
|
+
) -> bytes:
|
|
1085
|
+
output = io.BytesIO()
|
|
1086
|
+
with ZipFile(output, "w") as rewritten:
|
|
1087
|
+
wrote_header = False
|
|
1088
|
+
infos = archive.infolist()
|
|
1089
|
+
mimetype_info = next((info for info in infos if info.filename == "mimetype"), None)
|
|
1090
|
+
if mimetype_info is not None:
|
|
1091
|
+
rewritten.writestr(
|
|
1092
|
+
_stored_hwpx_mimetype_info(mimetype_info),
|
|
1093
|
+
archive.read(mimetype_info.filename),
|
|
1094
|
+
)
|
|
1095
|
+
for info in infos:
|
|
1096
|
+
if info.filename == "mimetype":
|
|
1097
|
+
continue
|
|
1098
|
+
data = archive.read(info.filename)
|
|
1099
|
+
if info.filename in section_payloads:
|
|
1100
|
+
data = section_payloads[info.filename]
|
|
1101
|
+
elif info.filename == "Contents/header.xml" and header_payload is not None:
|
|
1102
|
+
data = header_payload
|
|
1103
|
+
wrote_header = True
|
|
1104
|
+
elif info.filename == "Preview/PrvText.txt":
|
|
1105
|
+
data = _preview_text(section_payloads).encode("utf-8")
|
|
1106
|
+
rewritten.writestr(info, data)
|
|
1107
|
+
if header_payload is not None and not wrote_header:
|
|
1108
|
+
rewritten.writestr("Contents/header.xml", header_payload)
|
|
1109
|
+
return output.getvalue()
|
|
1110
|
+
|
|
1111
|
+
|
|
1112
|
+
def _stored_hwpx_mimetype_info(info: ZipInfo) -> ZipInfo:
|
|
1113
|
+
stored = ZipInfo(info.filename, date_time=info.date_time)
|
|
1114
|
+
stored.compress_type = ZIP_STORED
|
|
1115
|
+
stored.comment = info.comment
|
|
1116
|
+
stored.extra = info.extra
|
|
1117
|
+
stored.external_attr = info.external_attr
|
|
1118
|
+
stored.create_system = info.create_system
|
|
1119
|
+
return stored
|
|
1120
|
+
|
|
1121
|
+
|
|
1122
|
+
def _ensure_hwpx_style_refs(root: ET.Element, style: StyleDescriptor) -> _HwpXStyleRefs:
|
|
1123
|
+
char_pr_id = _append_hwpx_char_pr(root, style) if _hwpx_style_has_char_props(style) else None
|
|
1124
|
+
para_pr_id = _append_hwpx_para_pr(root, style) if _hwpx_style_has_para_props(style) else None
|
|
1125
|
+
style_id = (
|
|
1126
|
+
_append_hwpx_named_style(root, char_pr_id=char_pr_id, para_pr_id=para_pr_id)
|
|
1127
|
+
if char_pr_id is not None or para_pr_id is not None
|
|
1128
|
+
else None
|
|
1129
|
+
)
|
|
1130
|
+
return _HwpXStyleRefs(
|
|
1131
|
+
char_pr_id=char_pr_id,
|
|
1132
|
+
para_pr_id=para_pr_id,
|
|
1133
|
+
style_id=style_id,
|
|
1134
|
+
)
|
|
1135
|
+
|
|
1136
|
+
|
|
1137
|
+
def _hwpx_style_has_char_props(style: StyleDescriptor) -> bool:
|
|
1138
|
+
return any(
|
|
1139
|
+
value is not None
|
|
1140
|
+
for value in (
|
|
1141
|
+
style.font_family,
|
|
1142
|
+
style.font_size_pt,
|
|
1143
|
+
style.bold,
|
|
1144
|
+
style.italic,
|
|
1145
|
+
style.underline,
|
|
1146
|
+
style.font_color_rgb,
|
|
1147
|
+
style.fill_color_rgb,
|
|
1148
|
+
style.border,
|
|
1149
|
+
)
|
|
1150
|
+
)
|
|
1151
|
+
|
|
1152
|
+
|
|
1153
|
+
def _hwpx_style_has_para_props(style: StyleDescriptor) -> bool:
|
|
1154
|
+
return style.alignment is not None or style.line_spacing is not None
|
|
1155
|
+
|
|
1156
|
+
|
|
1157
|
+
def _append_hwpx_char_pr(root: ET.Element, style: StyleDescriptor) -> str:
|
|
1158
|
+
container = _find_or_create_direct_child(root, "charProperties")
|
|
1159
|
+
char_pr_id = str(_next_numeric_id(root, "charPr"))
|
|
1160
|
+
attributes: dict[str, str] = {"id": char_pr_id}
|
|
1161
|
+
if style.font_size_pt is not None:
|
|
1162
|
+
attributes["height"] = _hwpx_points_to_height(style.font_size_pt)
|
|
1163
|
+
if style.font_color_rgb is not None:
|
|
1164
|
+
attributes["textColor"] = _hwpx_color(style.font_color_rgb)
|
|
1165
|
+
if style.fill_color_rgb is not None:
|
|
1166
|
+
attributes["shadeColor"] = _hwpx_color(style.fill_color_rgb)
|
|
1167
|
+
if style.fill_color_rgb is not None or style.border is not None:
|
|
1168
|
+
attributes["borderFillIDRef"] = _append_hwpx_border_fill(root, style)
|
|
1169
|
+
char_pr = ET.Element(_qualified_child_tag(container, "charPr"), attributes)
|
|
1170
|
+
if style.font_family is not None:
|
|
1171
|
+
font_id = _ensure_hwpx_font(root, style.font_family)
|
|
1172
|
+
char_pr.append(
|
|
1173
|
+
ET.Element(
|
|
1174
|
+
_qualified_child_tag(char_pr, "fontRef"),
|
|
1175
|
+
{
|
|
1176
|
+
"hangul": font_id,
|
|
1177
|
+
"latin": font_id,
|
|
1178
|
+
"hanja": font_id,
|
|
1179
|
+
"japanese": font_id,
|
|
1180
|
+
"other": font_id,
|
|
1181
|
+
"symbol": font_id,
|
|
1182
|
+
"user": font_id,
|
|
1183
|
+
},
|
|
1184
|
+
)
|
|
1185
|
+
)
|
|
1186
|
+
if style.bold is True:
|
|
1187
|
+
char_pr.append(ET.Element(_qualified_child_tag(char_pr, "bold")))
|
|
1188
|
+
if style.italic is True:
|
|
1189
|
+
char_pr.append(ET.Element(_qualified_child_tag(char_pr, "italic")))
|
|
1190
|
+
if style.underline is True:
|
|
1191
|
+
char_pr.append(ET.Element(_qualified_child_tag(char_pr, "underline")))
|
|
1192
|
+
container.append(char_pr)
|
|
1193
|
+
_refresh_item_count(container, "charPr")
|
|
1194
|
+
return char_pr_id
|
|
1195
|
+
|
|
1196
|
+
|
|
1197
|
+
def _append_hwpx_para_pr(root: ET.Element, style: StyleDescriptor) -> str:
|
|
1198
|
+
container = _find_or_create_direct_child(root, "paraProperties")
|
|
1199
|
+
para_pr_id = str(_next_numeric_id(root, "paraPr"))
|
|
1200
|
+
para_pr = ET.Element(
|
|
1201
|
+
_qualified_child_tag(container, "paraPr"),
|
|
1202
|
+
{"id": para_pr_id, "tabPrIDRef": "0"},
|
|
1203
|
+
)
|
|
1204
|
+
if style.alignment is not None:
|
|
1205
|
+
para_pr.append(
|
|
1206
|
+
ET.Element(
|
|
1207
|
+
_qualified_child_tag(para_pr, "align"),
|
|
1208
|
+
{"horizontal": style.alignment.upper(), "vertical": "BASELINE"},
|
|
1209
|
+
)
|
|
1210
|
+
)
|
|
1211
|
+
container.append(para_pr)
|
|
1212
|
+
_refresh_item_count(container, "paraPr")
|
|
1213
|
+
return para_pr_id
|
|
1214
|
+
|
|
1215
|
+
|
|
1216
|
+
def _append_hwpx_named_style(
|
|
1217
|
+
root: ET.Element,
|
|
1218
|
+
*,
|
|
1219
|
+
char_pr_id: str | None,
|
|
1220
|
+
para_pr_id: str | None,
|
|
1221
|
+
) -> str:
|
|
1222
|
+
container = _find_or_create_direct_child(root, "styles")
|
|
1223
|
+
style_id = str(_next_numeric_id(root, "style"))
|
|
1224
|
+
attributes = {
|
|
1225
|
+
"id": style_id,
|
|
1226
|
+
"type": "PARA",
|
|
1227
|
+
"name": f"UMMAYAStyle{style_id}",
|
|
1228
|
+
"engName": f"UMMAYAStyle{style_id}",
|
|
1229
|
+
"nextStyleIDRef": "0",
|
|
1230
|
+
"langID": "1042",
|
|
1231
|
+
"lockForm": "0",
|
|
1232
|
+
}
|
|
1233
|
+
if para_pr_id is not None:
|
|
1234
|
+
attributes["paraPrIDRef"] = para_pr_id
|
|
1235
|
+
if char_pr_id is not None:
|
|
1236
|
+
attributes["charPrIDRef"] = char_pr_id
|
|
1237
|
+
container.append(ET.Element(_qualified_child_tag(container, "style"), attributes))
|
|
1238
|
+
_refresh_item_count(container, "style")
|
|
1239
|
+
return style_id
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
def _append_hwpx_border_fill(root: ET.Element, style: StyleDescriptor) -> str:
|
|
1243
|
+
container = _find_or_create_direct_child(root, "borderFills")
|
|
1244
|
+
border_fill_id = str(_next_numeric_id(root, "borderFill"))
|
|
1245
|
+
border_type = style.border.style.upper() if style.border is not None else "NONE"
|
|
1246
|
+
border_width = (
|
|
1247
|
+
_hwpx_points_to_mm(style.border.width_pt)
|
|
1248
|
+
if style.border is not None and style.border.width_pt is not None
|
|
1249
|
+
else "0.10 mm"
|
|
1250
|
+
)
|
|
1251
|
+
border_color = (
|
|
1252
|
+
_hwpx_color(style.border.color_rgb)
|
|
1253
|
+
if style.border is not None and style.border.color_rgb is not None
|
|
1254
|
+
else "#000000"
|
|
1255
|
+
)
|
|
1256
|
+
border_fill = ET.Element(
|
|
1257
|
+
_qualified_child_tag(container, "borderFill"),
|
|
1258
|
+
{"id": border_fill_id, "threeD": "0", "shadow": "0", "centerLine": "NONE"},
|
|
1259
|
+
)
|
|
1260
|
+
for border_name in ("slash", "backSlash"):
|
|
1261
|
+
border_fill.append(
|
|
1262
|
+
ET.Element(_qualified_child_tag(border_fill, border_name), {"type": "NONE"})
|
|
1263
|
+
)
|
|
1264
|
+
for border_name in ("leftBorder", "rightBorder", "topBorder", "bottomBorder"):
|
|
1265
|
+
border_fill.append(
|
|
1266
|
+
ET.Element(
|
|
1267
|
+
_qualified_child_tag(border_fill, border_name),
|
|
1268
|
+
{"type": border_type, "width": border_width, "color": border_color},
|
|
1269
|
+
)
|
|
1270
|
+
)
|
|
1271
|
+
if style.fill_color_rgb is not None:
|
|
1272
|
+
fill_brush = ET.Element(_qualified_child_tag(border_fill, "fillBrush"))
|
|
1273
|
+
fill_brush.append(
|
|
1274
|
+
ET.Element(
|
|
1275
|
+
_qualified_child_tag(fill_brush, "winBrush"),
|
|
1276
|
+
{
|
|
1277
|
+
"faceColor": _hwpx_color(style.fill_color_rgb),
|
|
1278
|
+
"hatchColor": "#000000",
|
|
1279
|
+
"alpha": "0",
|
|
1280
|
+
},
|
|
1281
|
+
)
|
|
1282
|
+
)
|
|
1283
|
+
border_fill.append(fill_brush)
|
|
1284
|
+
container.append(border_fill)
|
|
1285
|
+
_refresh_item_count(container, "borderFill")
|
|
1286
|
+
return border_fill_id
|
|
1287
|
+
|
|
1288
|
+
|
|
1289
|
+
def _ensure_hwpx_font(root: ET.Element, font_family: str) -> str:
|
|
1290
|
+
fontfaces = _find_or_create_direct_child(root, "fontfaces")
|
|
1291
|
+
fontface = _first_child_by_local_name(fontfaces, "fontface")
|
|
1292
|
+
if fontface is None:
|
|
1293
|
+
fontface = ET.Element(
|
|
1294
|
+
_qualified_child_tag(fontfaces, "fontface"),
|
|
1295
|
+
{"lang": "HANGUL", "fontCnt": "0"},
|
|
1296
|
+
)
|
|
1297
|
+
fontfaces.append(fontface)
|
|
1298
|
+
for font in _child_elements_by_local_name(fontface, "font"):
|
|
1299
|
+
if _local_attr(font, "face") == font_family:
|
|
1300
|
+
font_id = _local_attr(font, "id")
|
|
1301
|
+
if font_id is not None:
|
|
1302
|
+
return font_id
|
|
1303
|
+
font_id = str(_next_numeric_id(root, "font"))
|
|
1304
|
+
fontface.append(
|
|
1305
|
+
ET.Element(
|
|
1306
|
+
_qualified_child_tag(fontface, "font"),
|
|
1307
|
+
{"id": font_id, "face": font_family, "type": "TTF", "isEmbedded": "0"},
|
|
1308
|
+
)
|
|
1309
|
+
)
|
|
1310
|
+
fontface.set("fontCnt", str(len(_child_elements_by_local_name(fontface, "font"))))
|
|
1311
|
+
_refresh_item_count(fontfaces, "fontface")
|
|
1312
|
+
return font_id
|
|
1313
|
+
|
|
1314
|
+
|
|
1315
|
+
def _find_or_create_direct_child(root: ET.Element, name: str) -> ET.Element:
|
|
1316
|
+
existing = _first_child_by_local_name(root, name)
|
|
1317
|
+
if existing is not None:
|
|
1318
|
+
return existing
|
|
1319
|
+
child = ET.Element(_qualified_child_tag(root, name))
|
|
1320
|
+
root.append(child)
|
|
1321
|
+
return child
|
|
1322
|
+
|
|
1323
|
+
|
|
1324
|
+
def _next_numeric_id(root: ET.Element, name: str) -> int:
|
|
1325
|
+
used_ids: list[int] = []
|
|
1326
|
+
for element in _elements_by_local_name(root, name):
|
|
1327
|
+
raw_id = _local_attr(element, "id")
|
|
1328
|
+
if raw_id is None:
|
|
1329
|
+
continue
|
|
1330
|
+
try:
|
|
1331
|
+
used_ids.append(int(raw_id))
|
|
1332
|
+
except ValueError:
|
|
1333
|
+
continue
|
|
1334
|
+
return max(used_ids, default=-1) + 1
|
|
1335
|
+
|
|
1336
|
+
|
|
1337
|
+
def _refresh_item_count(container: ET.Element, child_name: str) -> None:
|
|
1338
|
+
container.set("itemCnt", str(len(_child_elements_by_local_name(container, child_name))))
|
|
1339
|
+
|
|
1340
|
+
|
|
1341
|
+
def _hwpx_color(value: str) -> str:
|
|
1342
|
+
return f"#{_rgb(value) or value.upper()}"
|
|
1343
|
+
|
|
1344
|
+
|
|
1345
|
+
def _hwpx_points_to_height(points: Decimal) -> str:
|
|
1346
|
+
return str(int((points * Decimal("100")).to_integral_value()))
|
|
1347
|
+
|
|
1348
|
+
|
|
1349
|
+
def _hwpx_points_to_mm(points: Decimal) -> str:
|
|
1350
|
+
millimeters = (points / Decimal("2.834645669")).quantize(Decimal("0.01"))
|
|
1351
|
+
return f"{millimeters} mm"
|
|
1352
|
+
|
|
1353
|
+
|
|
1354
|
+
def _apply_hwpx_section_mutations(
|
|
1355
|
+
root: ET.Element,
|
|
1356
|
+
*,
|
|
1357
|
+
member: str,
|
|
1358
|
+
text_index: int,
|
|
1359
|
+
table_index: int,
|
|
1360
|
+
text_replacements: dict[int, str],
|
|
1361
|
+
table_cell_replacements: dict[_HwpXTableCellTarget, str],
|
|
1362
|
+
text_style_refs: dict[int, _HwpXStyleRefs],
|
|
1363
|
+
table_cell_style_refs: dict[_HwpXTableCellTarget, _HwpXStyleRefs],
|
|
1364
|
+
) -> tuple[
|
|
1365
|
+
int,
|
|
1366
|
+
int,
|
|
1367
|
+
set[_HwpXTableCellTarget],
|
|
1368
|
+
set[int],
|
|
1369
|
+
set[_HwpXTableCellTarget],
|
|
1370
|
+
]:
|
|
1371
|
+
applied_table_cell_targets: set[_HwpXTableCellTarget] = set()
|
|
1372
|
+
applied_text_style_targets: set[int] = set()
|
|
1373
|
+
applied_table_cell_style_targets: set[_HwpXTableCellTarget] = set()
|
|
1374
|
+
parent_by_id = _parent_by_element_id(root)
|
|
1375
|
+
for elem in _text_elements(root):
|
|
1376
|
+
if not elem.text:
|
|
1377
|
+
continue
|
|
1378
|
+
if text_index in text_replacements:
|
|
1379
|
+
elem.text = text_replacements[text_index]
|
|
1380
|
+
refs = text_style_refs.get(text_index)
|
|
1381
|
+
if refs is not None:
|
|
1382
|
+
_apply_hwpx_style_refs_to_text(elem, refs, parent_by_id)
|
|
1383
|
+
applied_text_style_targets.add(text_index)
|
|
1384
|
+
text_index += 1
|
|
1385
|
+
for table in _elements_by_local_name(root, "tbl"):
|
|
1386
|
+
applied_table_cell_targets.update(
|
|
1387
|
+
_apply_hwpx_table_cell_replacements(
|
|
1388
|
+
table,
|
|
1389
|
+
member=member,
|
|
1390
|
+
table_index=table_index,
|
|
1391
|
+
table_cell_replacements=table_cell_replacements,
|
|
1392
|
+
)
|
|
1393
|
+
)
|
|
1394
|
+
applied_table_cell_style_targets.update(
|
|
1395
|
+
_apply_hwpx_table_cell_styles(
|
|
1396
|
+
table,
|
|
1397
|
+
member=member,
|
|
1398
|
+
table_index=table_index,
|
|
1399
|
+
table_cell_style_refs=table_cell_style_refs,
|
|
1400
|
+
)
|
|
1401
|
+
)
|
|
1402
|
+
table_index += 1
|
|
1403
|
+
return (
|
|
1404
|
+
text_index,
|
|
1405
|
+
table_index,
|
|
1406
|
+
applied_table_cell_targets,
|
|
1407
|
+
applied_text_style_targets,
|
|
1408
|
+
applied_table_cell_style_targets,
|
|
1409
|
+
)
|
|
1410
|
+
|
|
1411
|
+
|
|
1412
|
+
def _apply_hwpx_style_refs_to_text(
|
|
1413
|
+
text: ET.Element,
|
|
1414
|
+
refs: _HwpXStyleRefs,
|
|
1415
|
+
parent_by_id: dict[int, ET.Element],
|
|
1416
|
+
) -> None:
|
|
1417
|
+
paragraph = _nearest_ancestor_by_local_name(text, "p", parent_by_id)
|
|
1418
|
+
run = _nearest_ancestor_by_local_name(text, "run", parent_by_id)
|
|
1419
|
+
if paragraph is None or run is None:
|
|
1420
|
+
raise ValueError("HWPX text style target has no paragraph/run container")
|
|
1421
|
+
_set_hwpx_style_refs(paragraph=paragraph, run=run, refs=refs)
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
def _apply_hwpx_table_cell_styles(
|
|
1425
|
+
table: ET.Element,
|
|
1426
|
+
*,
|
|
1427
|
+
member: str,
|
|
1428
|
+
table_index: int,
|
|
1429
|
+
table_cell_style_refs: dict[_HwpXTableCellTarget, _HwpXStyleRefs],
|
|
1430
|
+
) -> set[_HwpXTableCellTarget]:
|
|
1431
|
+
applied_targets: set[_HwpXTableCellTarget] = set()
|
|
1432
|
+
for target, refs in table_cell_style_refs.items():
|
|
1433
|
+
if target.member != member or target.table_index != table_index:
|
|
1434
|
+
continue
|
|
1435
|
+
cell = _hwpx_table_cell_element(
|
|
1436
|
+
table,
|
|
1437
|
+
row_index=target.row_index,
|
|
1438
|
+
column_index=target.column_index,
|
|
1439
|
+
)
|
|
1440
|
+
paragraphs = _elements_by_local_name(cell, "p")
|
|
1441
|
+
if not paragraphs:
|
|
1442
|
+
paragraph, run = _ensure_hwpx_cell_paragraph_and_run(cell)
|
|
1443
|
+
_set_hwpx_style_refs(paragraph=paragraph, run=run, refs=refs)
|
|
1444
|
+
for paragraph in paragraphs:
|
|
1445
|
+
runs = _child_elements_by_local_name(paragraph, "run")
|
|
1446
|
+
if not runs:
|
|
1447
|
+
run = ET.Element(_qualified_child_tag(paragraph, "run"))
|
|
1448
|
+
paragraph.append(run)
|
|
1449
|
+
runs = [run]
|
|
1450
|
+
for run in runs:
|
|
1451
|
+
_set_hwpx_style_refs(paragraph=paragraph, run=run, refs=refs)
|
|
1452
|
+
applied_targets.add(target)
|
|
1453
|
+
return applied_targets
|
|
1454
|
+
|
|
1455
|
+
|
|
1456
|
+
def _set_hwpx_style_refs(
|
|
1457
|
+
*,
|
|
1458
|
+
paragraph: ET.Element,
|
|
1459
|
+
run: ET.Element,
|
|
1460
|
+
refs: _HwpXStyleRefs,
|
|
1461
|
+
) -> None:
|
|
1462
|
+
if refs.para_pr_id is not None:
|
|
1463
|
+
paragraph.set("paraPrIDRef", refs.para_pr_id)
|
|
1464
|
+
if refs.style_id is not None:
|
|
1465
|
+
paragraph.set("styleIDRef", refs.style_id)
|
|
1466
|
+
if refs.char_pr_id is not None:
|
|
1467
|
+
run.set("charPrIDRef", refs.char_pr_id)
|
|
1468
|
+
|
|
1469
|
+
|
|
1470
|
+
def _apply_hwpx_table_cell_replacements(
|
|
1471
|
+
table: ET.Element,
|
|
1472
|
+
*,
|
|
1473
|
+
member: str,
|
|
1474
|
+
table_index: int,
|
|
1475
|
+
table_cell_replacements: dict[_HwpXTableCellTarget, str],
|
|
1476
|
+
) -> set[_HwpXTableCellTarget]:
|
|
1477
|
+
applied_targets: set[_HwpXTableCellTarget] = set()
|
|
1478
|
+
for target, value in table_cell_replacements.items():
|
|
1479
|
+
if target.member != member or target.table_index != table_index:
|
|
1480
|
+
continue
|
|
1481
|
+
_set_hwpx_table_cell_text(
|
|
1482
|
+
table,
|
|
1483
|
+
row_index=target.row_index,
|
|
1484
|
+
column_index=target.column_index,
|
|
1485
|
+
value=value,
|
|
1486
|
+
)
|
|
1487
|
+
applied_targets.add(target)
|
|
1488
|
+
return applied_targets
|
|
1489
|
+
|
|
1490
|
+
|
|
1491
|
+
def _hwpx_table_cell_target(target_path: str) -> _HwpXTableCellTarget | None:
|
|
1492
|
+
match = _HWPX_TABLE_CELL_SOURCE_RE.match(target_path)
|
|
1493
|
+
if match is None:
|
|
1494
|
+
return None
|
|
1495
|
+
return _HwpXTableCellTarget(
|
|
1496
|
+
member=match.group("member"),
|
|
1497
|
+
table_index=int(match.group("table")),
|
|
1498
|
+
row_index=int(match.group("row")),
|
|
1499
|
+
column_index=int(match.group("column")),
|
|
1500
|
+
)
|
|
1501
|
+
|
|
1502
|
+
|
|
1503
|
+
def _hwpx_table_cell_target_path(target: _HwpXTableCellTarget) -> str:
|
|
1504
|
+
return f"{target.member}#table[{target.table_index}]/r{target.row_index}c{target.column_index}"
|
|
1505
|
+
|
|
1506
|
+
|
|
1507
|
+
def _hwpx_table_cell_target_sort_key(
|
|
1508
|
+
target: _HwpXTableCellTarget,
|
|
1509
|
+
) -> tuple[str, int, int, int]:
|
|
1510
|
+
return (target.member, target.table_index, target.row_index, target.column_index)
|
|
1511
|
+
|
|
1512
|
+
|
|
1513
|
+
def _set_hwpx_table_cell_text(
|
|
1514
|
+
table: ET.Element,
|
|
1515
|
+
*,
|
|
1516
|
+
row_index: int,
|
|
1517
|
+
column_index: int,
|
|
1518
|
+
value: str,
|
|
1519
|
+
) -> None:
|
|
1520
|
+
cell = _hwpx_table_cell_element(table, row_index=row_index, column_index=column_index)
|
|
1521
|
+
text_nodes = _text_elements(cell)
|
|
1522
|
+
if text_nodes:
|
|
1523
|
+
text_nodes[0].text = value
|
|
1524
|
+
for extra_text_node in text_nodes[1:]:
|
|
1525
|
+
extra_text_node.text = ""
|
|
1526
|
+
return
|
|
1527
|
+
_paragraph, _run = _ensure_hwpx_cell_paragraph_and_run(cell)
|
|
1528
|
+
text = _first_child_by_local_name(_run, "t")
|
|
1529
|
+
if text is None:
|
|
1530
|
+
text = ET.Element(_qualified_child_tag(_run, "t"))
|
|
1531
|
+
_run.append(text)
|
|
1532
|
+
text.text = value
|
|
1533
|
+
|
|
1534
|
+
|
|
1535
|
+
def _hwpx_table_cell_element(
|
|
1536
|
+
table: ET.Element,
|
|
1537
|
+
*,
|
|
1538
|
+
row_index: int,
|
|
1539
|
+
column_index: int,
|
|
1540
|
+
) -> ET.Element:
|
|
1541
|
+
rows = _child_elements_by_local_name(table, "tr")
|
|
1542
|
+
if row_index > len(rows):
|
|
1543
|
+
raise ValueError(f"HWPX table row target not found: {row_index}")
|
|
1544
|
+
cells = _child_elements_by_local_name(rows[row_index - 1], "tc")
|
|
1545
|
+
if column_index > len(cells):
|
|
1546
|
+
raise ValueError(f"HWPX table cell target not found: {row_index},{column_index}")
|
|
1547
|
+
return cells[column_index - 1]
|
|
1548
|
+
|
|
1549
|
+
|
|
1550
|
+
def _ensure_hwpx_cell_paragraph_and_run(cell: ET.Element) -> tuple[ET.Element, ET.Element]:
|
|
1551
|
+
paragraph = _first_child_by_local_name(cell, "p")
|
|
1552
|
+
if paragraph is None:
|
|
1553
|
+
paragraph = ET.Element(_qualified_child_tag(cell, "p"))
|
|
1554
|
+
cell.append(paragraph)
|
|
1555
|
+
run = _first_child_by_local_name(paragraph, "run")
|
|
1556
|
+
if run is None:
|
|
1557
|
+
run = ET.Element(_qualified_child_tag(paragraph, "run"))
|
|
1558
|
+
paragraph.append(run)
|
|
1559
|
+
return paragraph, run
|
|
1560
|
+
|
|
1561
|
+
|
|
1562
|
+
def _qualified_child_tag(parent: ET.Element, local_name: str) -> str:
|
|
1563
|
+
if parent.tag.startswith("{") and "}" in parent.tag:
|
|
1564
|
+
namespace = parent.tag.split("}", 1)[0][1:]
|
|
1565
|
+
return f"{{{namespace}}}{local_name}"
|
|
1566
|
+
return local_name
|
|
1567
|
+
|
|
1568
|
+
|
|
1569
|
+
def _namespace_map(payload: bytes) -> list[tuple[str, str]]:
|
|
1570
|
+
namespaces: list[tuple[str, str]] = []
|
|
1571
|
+
for _event, namespace in ElementTree.iterparse(io.BytesIO(payload), events=("start-ns",)):
|
|
1572
|
+
prefix, uri = namespace
|
|
1573
|
+
namespaces.append((str(prefix), str(uri)))
|
|
1574
|
+
return namespaces
|
|
1575
|
+
|
|
1576
|
+
|
|
1577
|
+
def _serialize_section(root: ET.Element, namespaces: list[tuple[str, str]]) -> bytes:
|
|
1578
|
+
for prefix, uri in namespaces:
|
|
1579
|
+
ET.register_namespace(prefix, uri)
|
|
1580
|
+
return cast(
|
|
1581
|
+
bytes,
|
|
1582
|
+
ET.tostring(root, encoding="utf-8", xml_declaration=True, short_empty_elements=True),
|
|
1583
|
+
)
|
|
1584
|
+
|
|
1585
|
+
|
|
1586
|
+
def _preview_text(section_payloads: dict[str, bytes]) -> str:
|
|
1587
|
+
texts: list[str] = []
|
|
1588
|
+
for member in sorted(section_payloads):
|
|
1589
|
+
root = ElementTree.fromstring(section_payloads[member])
|
|
1590
|
+
texts.extend(elem.text or "" for elem in _text_elements(root) if elem.text)
|
|
1591
|
+
return "".join(f"<{text}>" for text in texts)
|
|
1592
|
+
|
|
1593
|
+
|
|
1594
|
+
def _uses_hwpxjs_html_render(path: Path) -> bool:
|
|
1595
|
+
"""Return whether this HWPX package needs the hwpxjs HTML renderer."""
|
|
1596
|
+
try:
|
|
1597
|
+
with ZipFile(path) as archive:
|
|
1598
|
+
for member in _section_members(archive):
|
|
1599
|
+
root = ElementTree.fromstring(archive.read(member))
|
|
1600
|
+
for table in _elements_by_local_name(root, "tbl"):
|
|
1601
|
+
if _table_missing_rhwp_geometry(table):
|
|
1602
|
+
return True
|
|
1603
|
+
except (BadZipFile, ElementTree.ParseError):
|
|
1604
|
+
return False
|
|
1605
|
+
return False
|
|
1606
|
+
|
|
1607
|
+
|
|
1608
|
+
def _table_missing_rhwp_geometry(table: ET.Element) -> bool:
|
|
1609
|
+
if not _has_direct_child(table, "sz") or not _has_direct_child(table, "pos"):
|
|
1610
|
+
return True
|
|
1611
|
+
for row in _child_elements_by_local_name(table, "tr"):
|
|
1612
|
+
for cell in _child_elements_by_local_name(row, "tc"):
|
|
1613
|
+
if not _has_direct_child(cell, "cellAddr"):
|
|
1614
|
+
return True
|
|
1615
|
+
if not _has_direct_child(cell, "cellSpan"):
|
|
1616
|
+
return True
|
|
1617
|
+
if not _has_direct_child(cell, "cellSz"):
|
|
1618
|
+
return True
|
|
1619
|
+
return False
|
|
1620
|
+
|
|
1621
|
+
|
|
1622
|
+
def _has_direct_child(element: ET.Element, name: str) -> bool:
|
|
1623
|
+
return any(_local_name(child.tag) == name for child in list(element))
|
|
1624
|
+
|
|
1625
|
+
|
|
1626
|
+
def _render_with_hwpxjs_html(path: Path) -> bytes:
|
|
1627
|
+
executable = _hwpxjs_binary()
|
|
1628
|
+
completed = subprocess.run( # noqa: S603 - executable is resolved local CLI, no shell.
|
|
1629
|
+
[str(executable), "html", str(path)],
|
|
1630
|
+
cwd=str(_rhwp_package_root()),
|
|
1631
|
+
stdin=subprocess.DEVNULL,
|
|
1632
|
+
capture_output=True,
|
|
1633
|
+
text=True,
|
|
1634
|
+
timeout=_RHWP_NODE_TIMEOUT_SECONDS,
|
|
1635
|
+
check=False,
|
|
1636
|
+
)
|
|
1637
|
+
if completed.returncode != 0:
|
|
1638
|
+
stderr = completed.stderr.strip()
|
|
1639
|
+
output_summary = stderr or completed.stdout.strip()
|
|
1640
|
+
raise RuntimeError(f"hwpxjs HTML render bridge failed: {output_summary}")
|
|
1641
|
+
body = completed.stdout.strip()
|
|
1642
|
+
if not body:
|
|
1643
|
+
raise RuntimeError("hwpxjs HTML render bridge produced no reviewer evidence")
|
|
1644
|
+
return (
|
|
1645
|
+
'<!doctype html><html><head><meta charset="utf-8">'
|
|
1646
|
+
"<style>"
|
|
1647
|
+
"body{font-family:-apple-system,BlinkMacSystemFont,'Apple SD Gothic Neo',sans-serif;"
|
|
1648
|
+
"margin:24px;color:#111;line-height:1.45}"
|
|
1649
|
+
"table.hwpx-table{border-collapse:collapse;margin:12px 0;width:100%}"
|
|
1650
|
+
"table.hwpx-table td,table.hwpx-table th{border:1px solid #555;padding:6px 8px;"
|
|
1651
|
+
"vertical-align:top}"
|
|
1652
|
+
'</style></head><body data-ummaya-render-engine="hwpxjs-html-render">'
|
|
1653
|
+
f"{body}</body></html>"
|
|
1654
|
+
).encode()
|
|
1655
|
+
|
|
1656
|
+
|
|
1657
|
+
def _hwpxjs_binary() -> Path:
|
|
1658
|
+
configured = os.environ.get("UMMAYA_HWPXJS")
|
|
1659
|
+
if configured:
|
|
1660
|
+
candidate = Path(configured).expanduser()
|
|
1661
|
+
if not candidate.is_absolute():
|
|
1662
|
+
resolved = shutil.which(configured)
|
|
1663
|
+
if resolved is not None:
|
|
1664
|
+
candidate = Path(resolved)
|
|
1665
|
+
candidate = candidate.resolve(strict=False)
|
|
1666
|
+
if candidate.is_file() and os.access(candidate, os.X_OK):
|
|
1667
|
+
return candidate
|
|
1668
|
+
raise RuntimeError(f"UMMAYA_HWPXJS is not executable: {configured}")
|
|
1669
|
+
|
|
1670
|
+
path_candidate = shutil.which("hwpxjs")
|
|
1671
|
+
if path_candidate is not None:
|
|
1672
|
+
candidate = Path(path_candidate).resolve(strict=False)
|
|
1673
|
+
if candidate.is_file() and os.access(candidate, os.X_OK):
|
|
1674
|
+
return candidate
|
|
1675
|
+
for root in (Path.cwd(), _rhwp_package_root()):
|
|
1676
|
+
candidate = root / "node_modules" / ".bin" / "hwpxjs"
|
|
1677
|
+
if candidate.is_file() and os.access(candidate, os.X_OK):
|
|
1678
|
+
return candidate.resolve(strict=False)
|
|
1679
|
+
raise RuntimeError("hwpxjs executable is required for HWPX HTML rendering")
|
|
1680
|
+
|
|
1681
|
+
|
|
1682
|
+
_RHWP_NODE_TIMEOUT_SECONDS = 45
|
|
1683
|
+
|
|
1684
|
+
_RHWP_RENDER_BRIDGE_JS = r"""
|
|
1685
|
+
import { createHash } from 'node:crypto';
|
|
1686
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
1687
|
+
import { join, resolve } from 'node:path';
|
|
1688
|
+
import { createRequire } from 'node:module';
|
|
1689
|
+
import { pathToFileURL } from 'node:url';
|
|
1690
|
+
|
|
1691
|
+
const [inputPath, outputDir] = process.argv.slice(1);
|
|
1692
|
+
if (!inputPath || !outputDir) {
|
|
1693
|
+
throw new Error('Usage: rhwp render bridge requires <inputPath> <outputDir>');
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
const packageRoot = resolve(process.env.UMMAYA_PACKAGE_ROOT || process.cwd());
|
|
1697
|
+
const packageJsonPath = join(packageRoot, 'package.json');
|
|
1698
|
+
if (!existsSync(packageJsonPath)) {
|
|
1699
|
+
throw new Error(`UMMAYA package root does not contain package.json: ${packageRoot}`);
|
|
1700
|
+
}
|
|
1701
|
+
|
|
1702
|
+
globalThis.measureTextWidth = (_font, text) => {
|
|
1703
|
+
let width = 0;
|
|
1704
|
+
for (const char of String(text)) {
|
|
1705
|
+
width += char.charCodeAt(0) > 0x7f ? 14 : 8;
|
|
1706
|
+
}
|
|
1707
|
+
return width;
|
|
1708
|
+
};
|
|
1709
|
+
|
|
1710
|
+
const require = createRequire(pathToFileURL(packageJsonPath));
|
|
1711
|
+
const rhwpModulePath = require.resolve('@rhwp/core/rhwp.js');
|
|
1712
|
+
const rhwpWasmPath = require.resolve('@rhwp/core/rhwp_bg.wasm');
|
|
1713
|
+
const rhwp = await import(pathToFileURL(rhwpModulePath).href);
|
|
1714
|
+
|
|
1715
|
+
await rhwp.default({ module_or_path: readFileSync(rhwpWasmPath) });
|
|
1716
|
+
|
|
1717
|
+
const data = readFileSync(resolve(inputPath));
|
|
1718
|
+
const doc = new rhwp.HwpDocument(new Uint8Array(data));
|
|
1719
|
+
const pageCount = doc.pageCount();
|
|
1720
|
+
mkdirSync(resolve(outputDir), { recursive: true });
|
|
1721
|
+
|
|
1722
|
+
const artifacts = [];
|
|
1723
|
+
for (let index = 0; index < pageCount; index += 1) {
|
|
1724
|
+
const svg = doc.renderPageSvg(index);
|
|
1725
|
+
const pageNumber = index + 1;
|
|
1726
|
+
const outputName = `rhwp-page-${String(pageNumber).padStart(3, '0')}.svg`;
|
|
1727
|
+
const outputPath = join(resolve(outputDir), outputName);
|
|
1728
|
+
writeFileSync(outputPath, svg);
|
|
1729
|
+
artifacts.push({
|
|
1730
|
+
pageNumber,
|
|
1731
|
+
path: outputPath,
|
|
1732
|
+
sha256: createHash('sha256').update(svg).digest('hex'),
|
|
1733
|
+
});
|
|
1734
|
+
}
|
|
1735
|
+
|
|
1736
|
+
console.log(JSON.stringify({
|
|
1737
|
+
engineId: 'rhwp-node-wasm',
|
|
1738
|
+
rhwpVersion: rhwp.version(),
|
|
1739
|
+
pageCount,
|
|
1740
|
+
artifacts,
|
|
1741
|
+
}));
|
|
1742
|
+
"""
|
|
1743
|
+
|
|
1744
|
+
|
|
1745
|
+
def _render_with_rhwp_node(path: Path, *, output_dir: Path) -> tuple[bytes, ...]:
|
|
1746
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
1747
|
+
completed = subprocess.run( # noqa: S603
|
|
1748
|
+
[
|
|
1749
|
+
_node_binary(),
|
|
1750
|
+
"--input-type=module",
|
|
1751
|
+
"-e",
|
|
1752
|
+
_RHWP_RENDER_BRIDGE_JS,
|
|
1753
|
+
str(path),
|
|
1754
|
+
str(output_dir),
|
|
1755
|
+
],
|
|
1756
|
+
cwd=str(_rhwp_package_root()),
|
|
1757
|
+
env=_rhwp_bridge_env(),
|
|
1758
|
+
capture_output=True,
|
|
1759
|
+
text=True,
|
|
1760
|
+
timeout=_RHWP_NODE_TIMEOUT_SECONDS,
|
|
1761
|
+
check=False,
|
|
1762
|
+
)
|
|
1763
|
+
if completed.returncode != 0:
|
|
1764
|
+
stderr = completed.stderr.strip()
|
|
1765
|
+
raise RuntimeError(f"RHWP render bridge failed: {stderr or completed.stdout.strip()}")
|
|
1766
|
+
|
|
1767
|
+
bridge_result = _parse_rhwp_bridge_result(completed.stdout)
|
|
1768
|
+
payloads: list[bytes] = []
|
|
1769
|
+
for artifact_path in bridge_result:
|
|
1770
|
+
_require_render_path_inside(artifact_path, output_dir)
|
|
1771
|
+
payloads.append(artifact_path.read_bytes())
|
|
1772
|
+
if not payloads:
|
|
1773
|
+
raise RuntimeError("RHWP render bridge produced no page SVG artifacts")
|
|
1774
|
+
return tuple(payloads)
|
|
1775
|
+
|
|
1776
|
+
|
|
1777
|
+
def _node_binary() -> str:
|
|
1778
|
+
configured = os.environ.get("UMMAYA_NODE")
|
|
1779
|
+
if configured:
|
|
1780
|
+
resolved = shutil.which(configured) if not Path(configured).is_absolute() else configured
|
|
1781
|
+
if resolved:
|
|
1782
|
+
return resolved
|
|
1783
|
+
raise RuntimeError(f"UMMAYA_NODE is not executable: {configured}")
|
|
1784
|
+
|
|
1785
|
+
detected = shutil.which("node")
|
|
1786
|
+
if detected is None:
|
|
1787
|
+
raise RuntimeError("node executable is required for RHWP HWPX rendering")
|
|
1788
|
+
return detected
|
|
1789
|
+
|
|
1790
|
+
|
|
1791
|
+
def _rhwp_package_root() -> Path:
|
|
1792
|
+
candidates = [
|
|
1793
|
+
os.environ.get("UMMAYA_PACKAGE_ROOT"),
|
|
1794
|
+
str(Path.cwd()),
|
|
1795
|
+
str(Path(__file__).resolve().parents[5]),
|
|
1796
|
+
]
|
|
1797
|
+
for candidate in candidates:
|
|
1798
|
+
if not candidate:
|
|
1799
|
+
continue
|
|
1800
|
+
root = Path(candidate).expanduser().resolve()
|
|
1801
|
+
if (root / "package.json").is_file():
|
|
1802
|
+
return root
|
|
1803
|
+
return Path.cwd().resolve()
|
|
1804
|
+
|
|
1805
|
+
|
|
1806
|
+
def _rhwp_bridge_env() -> dict[str, str]:
|
|
1807
|
+
env = dict(os.environ)
|
|
1808
|
+
env["UMMAYA_PACKAGE_ROOT"] = str(_rhwp_package_root())
|
|
1809
|
+
return env
|
|
1810
|
+
|
|
1811
|
+
|
|
1812
|
+
def _parse_rhwp_bridge_result(stdout: str) -> list[Path]:
|
|
1813
|
+
parsed = json.loads(stdout)
|
|
1814
|
+
if not isinstance(parsed, dict):
|
|
1815
|
+
raise RuntimeError("RHWP render bridge returned a non-object result")
|
|
1816
|
+
artifacts = parsed.get("artifacts")
|
|
1817
|
+
if not isinstance(artifacts, list):
|
|
1818
|
+
raise RuntimeError("RHWP render bridge result is missing artifacts")
|
|
1819
|
+
|
|
1820
|
+
paths: list[Path] = []
|
|
1821
|
+
for artifact in artifacts:
|
|
1822
|
+
if not isinstance(artifact, dict):
|
|
1823
|
+
raise RuntimeError("RHWP render bridge artifact is not an object")
|
|
1824
|
+
path_value = artifact.get("path")
|
|
1825
|
+
if not isinstance(path_value, str):
|
|
1826
|
+
raise RuntimeError("RHWP render bridge artifact is missing path")
|
|
1827
|
+
paths.append(Path(path_value).expanduser().resolve())
|
|
1828
|
+
return paths
|
|
1829
|
+
|
|
1830
|
+
|
|
1831
|
+
def _require_render_path_inside(candidate: Path, root: Path) -> None:
|
|
1832
|
+
resolved_root = root.resolve()
|
|
1833
|
+
resolved_candidate = candidate.resolve()
|
|
1834
|
+
if resolved_candidate != resolved_root and resolved_root not in resolved_candidate.parents:
|
|
1835
|
+
raise RuntimeError(
|
|
1836
|
+
f"RHWP render bridge path escapes output directory: {resolved_candidate}"
|
|
1837
|
+
)
|