ummaya 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (477) hide show
  1. package/README.md +15 -2
  2. package/bin/ummaya +10 -1
  3. package/npm-shrinkwrap.json +253 -2
  4. package/package.json +5 -1
  5. package/prompts/manifest.yaml +1 -1
  6. package/prompts/system_v1.md +1 -0
  7. package/pyproject.toml +26 -2
  8. package/specs/2803-document-production-hardening/contracts/document-tools.schema.json +1043 -0
  9. package/src/ummaya/_canonical/__init__.py +2 -0
  10. package/src/ummaya/engine/engine.py +29 -132
  11. package/src/ummaya/evidence/__init__.py +21 -2
  12. package/src/ummaya/evidence/dataset_contract.py +193 -0
  13. package/src/ummaya/evidence/document_authoring_cases.py +33 -0
  14. package/src/ummaya/evidence/document_harness.py +313 -0
  15. package/src/ummaya/evidence/document_viewer_ux.py +391 -0
  16. package/src/ummaya/evidence/gates.py +70 -0
  17. package/src/ummaya/evidence/json_types.py +20 -0
  18. package/src/ummaya/evidence/models.py +88 -1
  19. package/src/ummaya/evidence/output_payload.py +89 -0
  20. package/src/ummaya/evidence/payload_documents.py +233 -0
  21. package/src/ummaya/evidence/route_contracts.py +224 -0
  22. package/src/ummaya/evidence/route_helpers.py +150 -0
  23. package/src/ummaya/evidence/runner.py +81 -212
  24. package/src/ummaya/evidence/source_provenance.py +246 -0
  25. package/src/ummaya/evidence/source_provenance_redaction.py +176 -0
  26. package/src/ummaya/evidence/tool_layer.py +39 -0
  27. package/src/ummaya/evidence/tool_layer_models.py +151 -0
  28. package/src/ummaya/ipc/adapter_manifest_emitter.py +26 -10
  29. package/src/ummaya/ipc/document_intent_normalization.py +185 -0
  30. package/src/ummaya/ipc/frame_schema.py +5 -5
  31. package/src/ummaya/ipc/route_diagnostics.py +73 -0
  32. package/src/ummaya/ipc/stdio.py +1109 -477
  33. package/src/ummaya/llm/client.py +102 -3
  34. package/src/ummaya/llm/config.py +8 -3
  35. package/src/ummaya/primitives/__init__.py +6 -2
  36. package/src/ummaya/primitives/delegation.py +1 -1
  37. package/src/ummaya/primitives/document.py +28 -0
  38. package/src/ummaya/settings.py +0 -3
  39. package/src/ummaya/tools/discovery_bridge.py +17 -1
  40. package/src/ummaya/tools/documents/__init__.py +297 -0
  41. package/src/ummaya/tools/documents/adapter_registry.py +487 -0
  42. package/src/ummaya/tools/documents/archive_container_probe.py +167 -0
  43. package/src/ummaya/tools/documents/artifact_store.py +454 -0
  44. package/src/ummaya/tools/documents/authoring.py +283 -0
  45. package/src/ummaya/tools/documents/baselines.py +114 -0
  46. package/src/ummaya/tools/documents/capability.py +331 -0
  47. package/src/ummaya/tools/documents/contracts.py +112 -0
  48. package/src/ummaya/tools/documents/conversion.py +521 -0
  49. package/src/ummaya/tools/documents/diff.py +275 -0
  50. package/src/ummaya/tools/documents/engines.py +163 -0
  51. package/src/ummaya/tools/documents/evaluation.py +291 -0
  52. package/src/ummaya/tools/documents/explicit_values.py +108 -0
  53. package/src/ummaya/tools/documents/fixtures.py +174 -0
  54. package/src/ummaya/tools/documents/format_completion_audit.py +471 -0
  55. package/src/ummaya/tools/documents/formats/__init__.py +2 -0
  56. package/src/ummaya/tools/documents/formats/archive.py +528 -0
  57. package/src/ummaya/tools/documents/formats/base.py +41 -0
  58. package/src/ummaya/tools/documents/formats/code_file.py +211 -0
  59. package/src/ummaya/tools/documents/formats/data_file.py +272 -0
  60. package/src/ummaya/tools/documents/formats/hwp.py +284 -0
  61. package/src/ummaya/tools/documents/formats/hwpx.py +1837 -0
  62. package/src/ummaya/tools/documents/formats/odf.py +435 -0
  63. package/src/ummaya/tools/documents/formats/ooxml.py +1030 -0
  64. package/src/ummaya/tools/documents/formats/passive.py +766 -0
  65. package/src/ummaya/tools/documents/formats/pdf.py +702 -0
  66. package/src/ummaya/tools/documents/formats/text_web.py +268 -0
  67. package/src/ummaya/tools/documents/hwp_conversion_probe.py +178 -0
  68. package/src/ummaya/tools/documents/hwp_direct_candidate.py +141 -0
  69. package/src/ummaya/tools/documents/inspection.py +289 -0
  70. package/src/ummaya/tools/documents/intake.py +1079 -0
  71. package/src/ummaya/tools/documents/legacy_office_promotion_probe.py +366 -0
  72. package/src/ummaya/tools/documents/models.py +1598 -0
  73. package/src/ummaya/tools/documents/odf_promotion_probe.py +167 -0
  74. package/src/ummaya/tools/documents/orchestrator.py +96 -0
  75. package/src/ummaya/tools/documents/passive_capability_probe.py +251 -0
  76. package/src/ummaya/tools/documents/patch.py +170 -0
  77. package/src/ummaya/tools/documents/pdfa_conformance.py +284 -0
  78. package/src/ummaya/tools/documents/pdfa_promotion_probe.py +198 -0
  79. package/src/ummaya/tools/documents/permissions.py +110 -0
  80. package/src/ummaya/tools/documents/planner.py +616 -0
  81. package/src/ummaya/tools/documents/registry.py +2733 -0
  82. package/src/ummaya/tools/documents/render.py +978 -0
  83. package/src/ummaya/tools/documents/render_comparison.py +113 -0
  84. package/src/ummaya/tools/documents/render_comparison_models.py +74 -0
  85. package/src/ummaya/tools/documents/render_comparison_regions.py +73 -0
  86. package/src/ummaya/tools/documents/render_comparison_style.py +161 -0
  87. package/src/ummaya/tools/documents/reread.py +157 -0
  88. package/src/ummaya/tools/documents/runtime_authoring.py +244 -0
  89. package/src/ummaya/tools/documents/runtime_authoring_bundle.py +76 -0
  90. package/src/ummaya/tools/documents/scorecard.py +184 -0
  91. package/src/ummaya/tools/documents/socratic_planner.py +193 -0
  92. package/src/ummaya/tools/documents/style.py +48 -0
  93. package/src/ummaya/tools/documents/tool_defs.py +523 -0
  94. package/src/ummaya/tools/documents/validate.py +347 -0
  95. package/src/ummaya/tools/executor.py +29 -0
  96. package/src/ummaya/tools/live_proxy.py +0 -3
  97. package/src/ummaya/tools/models.py +5 -1
  98. package/src/ummaya/tools/register_all.py +8 -0
  99. package/src/ummaya/tools/registry.py +10 -1
  100. package/src/ummaya/tools/routing/__init__.py +59 -0
  101. package/src/ummaya/tools/routing/builder.py +105 -0
  102. package/src/ummaya/tools/routing/cards.py +29 -0
  103. package/src/ummaya/tools/routing/decision_service.py +534 -0
  104. package/src/ummaya/tools/routing/decision_types.py +74 -0
  105. package/src/ummaya/tools/routing/feasibility.py +122 -0
  106. package/src/ummaya/tools/routing/intent.py +17 -0
  107. package/src/ummaya/tools/routing/intent_extractor.py +207 -0
  108. package/src/ummaya/tools/routing/intent_patterns.py +160 -0
  109. package/src/ummaya/tools/routing/intent_public_data.py +150 -0
  110. package/src/ummaya/tools/routing/intent_types.py +48 -0
  111. package/src/ummaya/tools/routing/lint.py +78 -0
  112. package/src/ummaya/tools/routing/metadata.py +174 -0
  113. package/src/ummaya/tools/routing/projection.py +340 -0
  114. package/src/ummaya/tools/routing/retrieval_policy.py +629 -0
  115. package/src/ummaya/tools/routing/schema.py +81 -0
  116. package/src/ummaya/tools/routing/types.py +96 -0
  117. package/src/ummaya/tools/routing_index.py +2 -2
  118. package/src/ummaya/tools/search.py +34 -746
  119. package/tests/fixtures/documents/public_forms/baselines.yaml +113 -0
  120. package/tui/package.json +1 -1
  121. package/tui/src/.cc-byte-identical-whitelist.yaml +266 -0
  122. package/tui/src/QueryEngine.ts +12 -8
  123. package/tui/src/bridge/inboundAttachments.ts +3 -3
  124. package/tui/src/cli/handlers/auth.ts +3 -12
  125. package/tui/src/cli/print.ts +7 -7
  126. package/tui/src/commands/insights.ts +1 -1
  127. package/tui/src/commands/install-github-app/types.ts +8 -30
  128. package/tui/src/commands/plugin/types.ts +6 -28
  129. package/tui/src/commands/plugin/unifiedTypes.ts +4 -26
  130. package/tui/src/commands/rename/generateSessionName.ts +1 -1
  131. package/tui/src/components/Feedback.tsx +1 -1
  132. package/tui/src/components/LogoV2/EmergencyTip.tsx +11 -2
  133. package/tui/src/components/LogoV2/WelcomeV2.tsx +1 -3
  134. package/tui/src/components/ScrollKeybindingHandler.tsx +6 -6
  135. package/tui/src/components/Spinner/types.ts +6 -28
  136. package/tui/src/components/agents/generateAgent.ts +1 -1
  137. package/tui/src/components/agents/new-agent-creation/types.ts +4 -26
  138. package/tui/src/components/config/EnvSecretIsolatedEditor.tsx +1 -1
  139. package/tui/src/components/mcp/types.ts +16 -38
  140. package/tui/src/components/messages/AssistantToolUseMessage.tsx +3 -2
  141. package/tui/src/components/messages/UserCrossSessionMessage.ts +16 -4
  142. package/tui/src/components/messages/UserForkBoilerplateMessage.ts +16 -4
  143. package/tui/src/components/messages/UserGitHubWebhookMessage.ts +16 -4
  144. package/tui/src/components/messages/UserToolResultMessage/utils.tsx +3 -2
  145. package/tui/src/components/permissions/MonitorPermissionRequest/MonitorPermissionRequest.ts +9 -4
  146. package/tui/src/components/permissions/ReviewArtifactPermissionRequest/ReviewArtifactPermissionRequest.ts +9 -4
  147. package/tui/src/components/primitive/DocumentSocraticReviewBlock.tsx +129 -0
  148. package/tui/src/components/primitive/DocumentToolResultCard.tsx +224 -0
  149. package/tui/src/components/primitive/documentSocraticReview.ts +215 -0
  150. package/tui/src/components/primitive/index.tsx +43 -1
  151. package/tui/src/components/primitive/types.ts +137 -0
  152. package/tui/src/components/ui/option.ts +4 -26
  153. package/tui/src/constants/common.ts +0 -2
  154. package/tui/src/constants/prompts.ts +4 -3
  155. package/tui/src/constants/querySource.ts +4 -26
  156. package/tui/src/entrypoints/sdk/controlTypes.ts +26 -48
  157. package/tui/src/entrypoints/sdk/coreTypes.generated.ts +3 -25
  158. package/tui/src/entrypoints/sdk/runtimeTypes.ts +38 -60
  159. package/tui/src/entrypoints/sdk/sdkUtilityTypes.ts +4 -26
  160. package/tui/src/entrypoints/sdk/settingsTypes.generated.ts +3 -25
  161. package/tui/src/entrypoints/sdk/toolTypes.ts +3 -25
  162. package/tui/src/hooks/toolPermission/handlers/interactiveHandler.ts +10 -0
  163. package/tui/src/hooks/useApiKeyVerification.ts +1 -1
  164. package/tui/src/hooks/useVirtualScroll.ts +1 -1
  165. package/tui/src/ink/ink.tsx +33 -14
  166. package/tui/src/ink/reconciler.ts +2 -3
  167. package/tui/src/ink/render-to-screen.ts +30 -10
  168. package/tui/src/ipc/bridge.ts +62 -15
  169. package/tui/src/ipc/bridgeSingleton.ts +5 -1
  170. package/tui/src/ipc/codec.ts +3 -3
  171. package/tui/src/ipc/frames.generated.ts +12 -12
  172. package/tui/src/ipc/llmClient.ts +151 -27
  173. package/tui/src/ipc/schema/frame.schema.json +1 -1
  174. package/tui/src/keybindings/defaultBindings.ts +4 -0
  175. package/tui/src/main.tsx +29 -11
  176. package/tui/src/native-ts/file-index/index.ts +33 -3
  177. package/tui/src/observability/surface.ts +2 -2
  178. package/tui/src/probes/toolRegistryProbe.tsx +3 -1
  179. package/tui/src/projectOnboardingState.ts +7 -6
  180. package/tui/src/query/chatMessageTypes.ts +18 -0
  181. package/tui/src/query/chatMessagesBuilder.ts +1 -1
  182. package/tui/src/query/deps.ts +1 -1
  183. package/tui/src/query/messageGuards.ts +106 -0
  184. package/tui/src/query/publicDataTerminalRepair.ts +384 -0
  185. package/tui/src/query/run.ts +1075 -0
  186. package/tui/src/query/supportBoundary.ts +168 -0
  187. package/tui/src/query/toolResultErrors.ts +103 -0
  188. package/tui/src/query/toolRunner.ts +687 -0
  189. package/tui/src/query/unavailableToolRepair.ts +118 -0
  190. package/tui/src/query.ts +9 -2186
  191. package/tui/src/screens/REPL.tsx +40 -29
  192. package/tui/src/services/api/adapterManifest.ts +4 -0
  193. package/tui/src/services/api/backendChat/events.ts +117 -0
  194. package/tui/src/services/api/backendChat/finalMessage.ts +40 -0
  195. package/tui/src/services/api/backendChat/frame.ts +9 -0
  196. package/tui/src/services/api/backendChat/streaming.ts +430 -0
  197. package/tui/src/services/api/backendChat/types.ts +62 -0
  198. package/tui/src/services/api/backendChat.ts +1 -0
  199. package/tui/src/services/api/client.ts +65 -2
  200. package/tui/src/services/api/errorUtils.ts +5 -5
  201. package/tui/src/services/api/errors.ts +1 -1
  202. package/tui/src/services/api/logging.ts +1 -1
  203. package/tui/src/services/api/ummaya/evidence.ts +194 -0
  204. package/tui/src/services/api/ummaya/messages.ts +255 -0
  205. package/tui/src/services/api/ummaya/nonStreaming.ts +66 -0
  206. package/tui/src/services/api/ummaya/provider.ts +200 -0
  207. package/tui/src/services/api/ummaya/reasoning.ts +24 -0
  208. package/tui/src/services/api/ummaya/request.ts +200 -0
  209. package/tui/src/services/api/ummaya/selectionContext.ts +240 -0
  210. package/tui/src/services/api/ummaya/streaming.ts +365 -0
  211. package/tui/src/services/api/ummaya/streamingPayload.ts +129 -0
  212. package/tui/src/services/api/ummaya/streamingReader.ts +40 -0
  213. package/tui/src/services/api/ummaya/toolSelection.ts +217 -0
  214. package/tui/src/services/api/ummaya/types.ts +110 -0
  215. package/tui/src/services/api/ummaya/usage.ts +30 -0
  216. package/tui/src/services/api/ummaya.ts +26 -418
  217. package/tui/src/services/api/withRetry.ts +1 -1
  218. package/tui/src/services/awaySummary.ts +2 -2
  219. package/tui/src/services/claudeAiLimits.ts +1 -1
  220. package/tui/src/services/compact/autoCompact.ts +1 -1
  221. package/tui/src/services/compact/compact.ts +1 -1
  222. package/tui/src/services/lsp/types.ts +8 -30
  223. package/tui/src/services/tips/types.ts +6 -28
  224. package/tui/src/services/tokenEstimation.ts +1 -1
  225. package/tui/src/services/toolRegistry/bootGuard.ts +5 -5
  226. package/tui/src/services/toolUseSummary/toolUseSummaryGenerator.ts +1 -1
  227. package/tui/src/services/tools/toolExecution.ts +94 -1
  228. package/tui/src/store/pendingPermissionSlot.ts +1 -1
  229. package/tui/src/store/session-store.ts +10 -36
  230. package/tui/src/stubs/any-stub.ts +15 -10
  231. package/tui/src/stubs/color-diff-napi.ts +37 -23
  232. package/tui/src/stubs/globals.d.ts +3 -3
  233. package/tui/src/stubs/macro-preload.ts +23 -12
  234. package/tui/src/tools/AdapterTool/AdapterTool.ts +1207 -714
  235. package/tui/src/tools/AdapterTool/routeDiagnostics.ts +75 -0
  236. package/tui/src/tools/AgentTool/AgentTool.tsx +84 -1371
  237. package/tui/src/tools/AgentTool/agentToolHandoff.ts +114 -0
  238. package/tui/src/tools/AgentTool/agentToolPartialResult.ts +16 -0
  239. package/tui/src/tools/AgentTool/agentToolProgress.ts +32 -0
  240. package/tui/src/tools/AgentTool/agentToolResolver.ts +161 -0
  241. package/tui/src/tools/AgentTool/agentToolResult.ts +163 -0
  242. package/tui/src/tools/AgentTool/agentToolUtils.ts +14 -686
  243. package/tui/src/tools/AgentTool/asyncAgentLifecycle.ts +208 -0
  244. package/tui/src/tools/AgentTool/asyncLifecycle.ts +153 -0
  245. package/tui/src/tools/AgentTool/backgroundedCompletion.ts +126 -0
  246. package/tui/src/tools/AgentTool/backgroundedLifecycle.ts +174 -0
  247. package/tui/src/tools/AgentTool/foregroundBackground.ts +83 -0
  248. package/tui/src/tools/AgentTool/foregroundDrain.tsx +133 -0
  249. package/tui/src/tools/AgentTool/foregroundFinalize.ts +98 -0
  250. package/tui/src/tools/AgentTool/foregroundLifecycle.tsx +237 -0
  251. package/tui/src/tools/AgentTool/foregroundProgress.tsx +169 -0
  252. package/tui/src/tools/AgentTool/foregroundTask.ts +89 -0
  253. package/tui/src/tools/AgentTool/forkSubagent.ts +1 -12
  254. package/tui/src/tools/AgentTool/forkSubagentGate.ts +34 -0
  255. package/tui/src/tools/AgentTool/launchRouting.ts +203 -0
  256. package/tui/src/tools/AgentTool/lifecycle.ts +244 -0
  257. package/tui/src/tools/AgentTool/mcpRouting.ts +73 -0
  258. package/tui/src/tools/AgentTool/orchestrationSupport.ts +70 -0
  259. package/tui/src/tools/AgentTool/permissions.ts +39 -0
  260. package/tui/src/tools/AgentTool/promptSetup.ts +181 -0
  261. package/tui/src/tools/AgentTool/remoteRouting.ts +62 -0
  262. package/tui/src/tools/AgentTool/resultMapping.ts +116 -0
  263. package/tui/src/tools/AgentTool/resumeAgent.ts +39 -107
  264. package/tui/src/tools/AgentTool/resumeAgentHelpers.ts +140 -0
  265. package/tui/src/tools/AgentTool/runAgent.ts +1 -1
  266. package/tui/src/tools/AgentTool/runtimeConfig.ts +57 -0
  267. package/tui/src/tools/AgentTool/schemas.ts +196 -0
  268. package/tui/src/tools/AgentTool/sourceVerificationPropagation.ts +263 -0
  269. package/tui/src/tools/AgentTool/worktreeLifecycle.ts +105 -0
  270. package/tui/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx +174 -202
  271. package/tui/src/tools/BashTool/BashTool.tsx +71 -1072
  272. package/tui/src/tools/BashTool/bashCommandHelpers.ts +12 -12
  273. package/tui/src/tools/BashTool/bashPermissions/astPreflight.ts +173 -0
  274. package/tui/src/tools/BashTool/bashPermissions/classifierChecks.ts +199 -0
  275. package/tui/src/tools/BashTool/bashPermissions/compoundGuards.ts +53 -0
  276. package/tui/src/tools/BashTool/bashPermissions/constants.ts +99 -0
  277. package/tui/src/tools/BashTool/bashPermissions/index.ts +38 -0
  278. package/tui/src/tools/BashTool/bashPermissions/legacyMisparsing.ts +62 -0
  279. package/tui/src/tools/BashTool/bashPermissions/main.ts +135 -0
  280. package/tui/src/tools/BashTool/bashPermissions/normalizedCommands.ts +33 -0
  281. package/tui/src/tools/BashTool/bashPermissions/operatorFlow.ts +98 -0
  282. package/tui/src/tools/BashTool/bashPermissions/permissionChecks.ts +200 -0
  283. package/tui/src/tools/BashTool/bashPermissions/prefixSuggestions.ts +88 -0
  284. package/tui/src/tools/BashTool/bashPermissions/promptClassifierRules.ts +125 -0
  285. package/tui/src/tools/BashTool/bashPermissions/ruleDelegates.ts +19 -0
  286. package/tui/src/tools/BashTool/bashPermissions/ruleMatching.ts +145 -0
  287. package/tui/src/tools/BashTool/bashPermissions/sandboxAutoAllow.ts +75 -0
  288. package/tui/src/tools/BashTool/bashPermissions/subcommandFlow.ts +205 -0
  289. package/tui/src/tools/BashTool/bashPermissions/subcommandGuards.ts +73 -0
  290. package/tui/src/tools/BashTool/bashPermissions/subcommandResultHelpers.ts +116 -0
  291. package/tui/src/tools/BashTool/bashPermissions/types.ts +26 -0
  292. package/tui/src/tools/BashTool/bashPermissions/wrapperStripping.ts +139 -0
  293. package/tui/src/tools/BashTool/bashPermissions.ts +26 -2621
  294. package/tui/src/tools/BashTool/call.ts +202 -0
  295. package/tui/src/tools/BashTool/callLoader.ts +35 -0
  296. package/tui/src/tools/BashTool/commandClassification.ts +151 -0
  297. package/tui/src/tools/BashTool/commandClassificationLoader.ts +40 -0
  298. package/tui/src/tools/BashTool/cwdReset.ts +33 -0
  299. package/tui/src/tools/BashTool/lineTruncation.ts +11 -0
  300. package/tui/src/tools/BashTool/modeValidation.ts +13 -1
  301. package/tui/src/tools/BashTool/outputPersistence.ts +42 -0
  302. package/tui/src/tools/BashTool/permissionClassification.ts +66 -0
  303. package/tui/src/tools/BashTool/permissionLoader.ts +44 -0
  304. package/tui/src/tools/BashTool/resultLoader.ts +29 -0
  305. package/tui/src/tools/BashTool/resultMapping.ts +83 -0
  306. package/tui/src/tools/BashTool/sandboxPolicy.ts +79 -0
  307. package/tui/src/tools/BashTool/schemas.ts +65 -0
  308. package/tui/src/tools/BashTool/sedEditExecution.ts +59 -0
  309. package/tui/src/tools/BashTool/shellExecution.tsx +245 -0
  310. package/tui/src/tools/BashTool/shellOutputUtils.ts +85 -0
  311. package/tui/src/tools/BashTool/shellPermissionGauntlet.ts +97 -0
  312. package/tui/src/tools/BashTool/uiLoader.ts +37 -0
  313. package/tui/src/tools/BriefTool/upload.ts +1 -1
  314. package/tui/src/tools/CalculatorTool/parser.ts +2 -2
  315. package/tui/src/tools/DocumentPrimitive/DocumentPrimitive.ts +262 -0
  316. package/tui/src/tools/DocumentPrimitive/dispatchNormalization.ts +270 -0
  317. package/tui/src/tools/DocumentPrimitive/documentDestinationPath.ts +18 -0
  318. package/tui/src/tools/DocumentPrimitive/documentMutationGuard.ts +22 -0
  319. package/tui/src/tools/DocumentPrimitive/documentPatchNormalization.ts +248 -0
  320. package/tui/src/tools/DocumentPrimitive/documentSourceVerification.ts +245 -0
  321. package/tui/src/tools/DocumentPrimitive/documentSourceVerificationFields.ts +103 -0
  322. package/tui/src/tools/DocumentPrimitive/modelVisibleOutput.ts +40 -0
  323. package/tui/src/tools/DocumentPrimitive/prompt.ts +35 -0
  324. package/tui/src/tools/FileEditTool/FileEditTool.ts +9 -507
  325. package/tui/src/tools/FileEditTool/call.ts +228 -0
  326. package/tui/src/tools/FileEditTool/validateInput.ts +196 -0
  327. package/tui/src/tools/FileReadTool/imageProcessor.ts +13 -0
  328. package/tui/src/tools/FileWriteTool/FileWriteTool.ts +7 -300
  329. package/tui/src/tools/FileWriteTool/call.ts +223 -0
  330. package/tui/src/tools/FileWriteTool/validateInput.ts +80 -0
  331. package/tui/src/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts +19 -3
  332. package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +25 -32
  333. package/tui/src/tools/LookupPrimitive/prompt.ts +0 -2
  334. package/tui/src/tools/MCPTool/trustPolicy.ts +118 -0
  335. package/tui/src/tools/McpAuthTool/McpAuthTool.ts +21 -3
  336. package/tui/src/tools/NotebookEditTool/NotebookEditTool.ts +7 -326
  337. package/tui/src/tools/NotebookEditTool/call.ts +254 -0
  338. package/tui/src/tools/NotebookEditTool/notebookModel.ts +51 -0
  339. package/tui/src/tools/NotebookEditTool/validateInput.ts +142 -0
  340. package/tui/src/tools/PowerShellTool/PowerShellTool.tsx +46 -937
  341. package/tui/src/tools/PowerShellTool/acceptEditsCommandValidation.ts +162 -0
  342. package/tui/src/tools/PowerShellTool/call.ts +179 -0
  343. package/tui/src/tools/PowerShellTool/callLoader.ts +37 -0
  344. package/tui/src/tools/PowerShellTool/commandClassification.ts +86 -0
  345. package/tui/src/tools/PowerShellTool/modeValidation.ts +25 -332
  346. package/tui/src/tools/PowerShellTool/outputPersistence.ts +42 -0
  347. package/tui/src/tools/PowerShellTool/permissionClassification.ts +28 -0
  348. package/tui/src/tools/PowerShellTool/resultLoader.ts +31 -0
  349. package/tui/src/tools/PowerShellTool/resultMapping.ts +75 -0
  350. package/tui/src/tools/PowerShellTool/schemas.ts +40 -0
  351. package/tui/src/tools/PowerShellTool/shellExecution.tsx +258 -0
  352. package/tui/src/tools/PowerShellTool/symlinkModeValidation.ts +44 -0
  353. package/tui/src/tools/PowerShellTool/uiLoader.ts +37 -0
  354. package/tui/src/tools/PowerShellTool/validation.ts +39 -0
  355. package/tui/src/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts +19 -3
  356. package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +1 -11
  357. package/tui/src/tools/ResolveLocationPrimitive/prompt.ts +2 -6
  358. package/tui/src/tools/SkillTool/SkillTool.ts +2 -2
  359. package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +27 -10
  360. package/tui/src/tools/TaskCreateTool/TaskCreateTool.ts +16 -2
  361. package/tui/src/tools/TaskGetTool/TaskGetTool.ts +23 -3
  362. package/tui/src/tools/TaskListTool/TaskListTool.ts +22 -4
  363. package/tui/src/tools/TaskOutputTool/TaskOutputTool.tsx +46 -547
  364. package/tui/src/tools/TaskOutputTool/lookup.ts +216 -0
  365. package/tui/src/tools/TaskOutputTool/render.tsx +257 -0
  366. package/tui/src/tools/TaskOutputTool/schemas.ts +55 -0
  367. package/tui/src/tools/TaskOutputTool/serialization.ts +36 -0
  368. package/tui/src/tools/TaskStopTool/TaskStopTool.ts +10 -0
  369. package/tui/src/tools/TaskUpdateTool/TaskUpdateTool.ts +14 -364
  370. package/tui/src/tools/TaskUpdateTool/completion.ts +62 -0
  371. package/tui/src/tools/TaskUpdateTool/schemas.ts +62 -0
  372. package/tui/src/tools/TaskUpdateTool/serialization.ts +46 -0
  373. package/tui/src/tools/TaskUpdateTool/statusUpdate.ts +247 -0
  374. package/tui/src/tools/TodoWriteTool/TodoWriteTool.ts +21 -2
  375. package/tui/src/tools/ToolSearchTool/ToolSearchTool.ts +21 -302
  376. package/tui/src/tools/ToolSearchTool/ccSupportTools.ts +223 -0
  377. package/tui/src/tools/ToolSearchTool/descriptionCache.ts +50 -0
  378. package/tui/src/tools/ToolSearchTool/keywordSearch.ts +216 -0
  379. package/tui/src/tools/ToolSearchTool/prompt.ts +10 -4
  380. package/tui/src/tools/ToolSearchTool/resultMapping.ts +30 -0
  381. package/tui/src/tools/ToolSearchTool/schemas.ts +30 -0
  382. package/tui/src/tools/ToolSearchTool/searchPool.ts +47 -0
  383. package/tui/src/tools/ToolSearchTool/supportIntentHints.ts +140 -0
  384. package/tui/src/tools/TranslateTool/TranslateTool.ts +1 -1
  385. package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +2 -1
  386. package/tui/src/tools/WebFetchTool/WebFetchTool.ts +43 -138
  387. package/tui/src/tools/WebFetchTool/call.ts +227 -0
  388. package/tui/src/tools/WebFetchTool/resolvedAddressSafety.ts +78 -0
  389. package/tui/src/tools/WebFetchTool/sourceVerification.ts +204 -0
  390. package/tui/src/tools/WebFetchTool/types.ts +23 -0
  391. package/tui/src/tools/WebFetchTool/urlSafety.ts +181 -0
  392. package/tui/src/tools/WebFetchTool/utils.ts +1 -1
  393. package/tui/src/tools/WebSearchTool/UI.tsx +0 -1
  394. package/tui/src/tools/WebSearchTool/WebSearchTool.ts +9 -313
  395. package/tui/src/tools/WebSearchTool/call.ts +33 -0
  396. package/tui/src/tools/WebSearchTool/responseMapping.ts +190 -0
  397. package/tui/src/tools/WebSearchTool/resultBlock.ts +47 -0
  398. package/tui/src/tools/WebSearchTool/schemas.ts +47 -0
  399. package/tui/src/tools/WebSearchTool/toolSchema.ts +12 -0
  400. package/tui/src/tools/WorkspaceToolAdapter/WorkspaceToolAdapter.ts +79 -0
  401. package/tui/src/tools/WorkspaceToolAdapter/allowedRootPolicy.ts +85 -0
  402. package/tui/src/tools/WorkspaceToolAdapter/documentFormatGuards.ts +73 -0
  403. package/tui/src/tools/WorkspaceToolAdapter/inputNormalization.ts +105 -0
  404. package/tui/src/tools/WorkspaceToolAdapter/mcpExposurePolicy.ts +64 -0
  405. package/tui/src/tools/WorkspaceToolAdapter/toolDefFactory.ts +215 -0
  406. package/tui/src/tools/WorkspaceToolAdapter/toolNames.ts +6 -0
  407. package/tui/src/tools/WorkspaceToolAdapter/workspacePolicy.ts +15 -0
  408. package/tui/src/tools/_shared/dispatchPrimitive.ts +6 -6
  409. package/tui/src/tools/_shared/documentChangeToPatch.ts +125 -0
  410. package/tui/src/tools/_shared/documentDispatchArguments.ts +87 -0
  411. package/tui/src/tools/_shared/documentPrimitiveTimeout.ts +13 -0
  412. package/tui/src/tools/_shared/documentToolResultRender.ts +98 -0
  413. package/tui/src/tools/_shared/pendingCallRegistry.ts +1 -6
  414. package/tui/src/tools/_shared/rootPrimitiveInput.ts +1 -0
  415. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPatterns.ts +58 -0
  416. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPrompt.ts +271 -0
  417. package/tui/src/tools/_shared/toolChoiceRepair/documentRepair.ts +452 -0
  418. package/tui/src/tools/_shared/toolChoiceRepair/messageAccess.ts +80 -0
  419. package/tui/src/tools/_shared/toolChoiceRepair/publicDataRepair.ts +92 -0
  420. package/tui/src/tools/_shared/toolChoiceRepair/supportRepair.ts +135 -0
  421. package/tui/src/tools/_shared/toolChoiceRepair.ts +55 -860
  422. package/tui/src/tools/shared/mockDisclaimer.ts +1 -1
  423. package/tui/src/tools.ts +39 -190
  424. package/tui/src/types/fileSuggestion.ts +4 -26
  425. package/tui/src/types/generated/events_mono/claude_code/v1/claude_code_internal_event.ts +186 -148
  426. package/tui/src/types/generated/events_mono/common/v1/auth.ts +25 -11
  427. package/tui/src/types/generated/events_mono/growthbook/v1/growthbook_experiment_event.ts +47 -30
  428. package/tui/src/types/generated/google/protobuf/timestamp.ts +21 -7
  429. package/tui/src/types/message.ts +80 -102
  430. package/tui/src/types/messageQueueTypes.ts +6 -28
  431. package/tui/src/types/notebook.ts +16 -38
  432. package/tui/src/types/statusLine.ts +4 -26
  433. package/tui/src/types/tools.ts +24 -46
  434. package/tui/src/types/utils.ts +6 -28
  435. package/tui/src/upstreamproxy/relay.ts +7 -3
  436. package/tui/src/upstreamproxy/upstreamproxy.ts +1 -1
  437. package/tui/src/utils/assistantMessageFactories.ts +9 -3
  438. package/tui/src/utils/auth.ts +129 -139
  439. package/tui/src/utils/bash/ast.ts +23 -23
  440. package/tui/src/utils/bash/bashParser.ts +5 -5
  441. package/tui/src/utils/billing.ts +1 -1
  442. package/tui/src/utils/collapseReadSearch.ts +3 -3
  443. package/tui/src/utils/cronTasks.ts +1 -1
  444. package/tui/src/utils/execFileNoThrow.ts +1 -1
  445. package/tui/src/utils/filePersistence/types.ts +16 -38
  446. package/tui/src/utils/forkedAgent.ts +1 -1
  447. package/tui/src/utils/gracefulShutdown.ts +4 -4
  448. package/tui/src/utils/heapDumpService.ts +12 -8
  449. package/tui/src/utils/hooks/apiQueryHookHelper.ts +1 -1
  450. package/tui/src/utils/hooks/execPromptHook.ts +1 -1
  451. package/tui/src/utils/hooks/skillImprovement.ts +1 -1
  452. package/tui/src/utils/mcp/dateTimeParser.ts +1 -1
  453. package/tui/src/utils/messages.ts +18 -0
  454. package/tui/src/utils/migrateSessions.ts +3 -3
  455. package/tui/src/utils/model/model.ts +6 -6
  456. package/tui/src/utils/permissions/yoloClassifier.ts +1 -1
  457. package/tui/src/utils/plugins/headlessPluginInstall.ts +1 -1
  458. package/tui/src/utils/plugins/mcpPluginIntegration.ts +1 -1
  459. package/tui/src/utils/plugins/mcpbHandler.ts +1 -1
  460. package/tui/src/utils/plugins/pluginLoader.ts +8 -8
  461. package/tui/src/utils/protectedNamespace.ts +5 -3
  462. package/tui/src/utils/rawJsonToolCall.ts +242 -0
  463. package/tui/src/utils/ripgrep.ts +16 -7
  464. package/tui/src/utils/sessionTitle.ts +1 -1
  465. package/tui/src/utils/settings/permissionValidation.ts +14 -2
  466. package/tui/src/utils/shell/prefix.ts +1 -1
  467. package/tui/src/utils/sideQuery.ts +1 -1
  468. package/tui/src/utils/systemThemeWatcher.ts +13 -3
  469. package/tui/src/utils/teleport.tsx +1 -1
  470. package/uv.lock +400 -14
  471. package/tui/src/services/api/claude.ts +0 -3540
  472. package/tui/src/tools/_shared/directPublicDataGuard.ts +0 -362
  473. package/tui/src/tools/_shared/kmaAnalysisGuard.ts +0 -197
  474. package/tui/src/tools/_shared/kmaAviationGuard.ts +0 -70
  475. package/tui/src/tools/_shared/nmcAedGuard.ts +0 -234
  476. package/tui/src/tools/_shared/protectedCheckGuard.ts +0 -207
  477. package/tui/src/tools/_shared/textToolCallGuard.ts +0 -91
@@ -0,0 +1,1837 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """HWPX engine-adapter boundary."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import io
7
+ import json
8
+ import os
9
+ import re
10
+ import shutil
11
+ import subprocess
12
+ import unicodedata
13
+ import xml.etree.ElementTree as ET
14
+ from dataclasses import dataclass
15
+ from decimal import Decimal
16
+ from pathlib import Path
17
+ from typing import TYPE_CHECKING, cast
18
+ from zipfile import ZIP_STORED, BadZipFile, ZipFile, ZipInfo
19
+
20
+ from defusedxml import ElementTree # type: ignore[import-untyped]
21
+
22
+ from ummaya.tools.documents.engines import DocumentInspectionEngine, DocumentMutationEngine
23
+ from ummaya.tools.documents.models import (
24
+ BorderDescriptor,
25
+ DocumentExtraction,
26
+ DocumentFormat,
27
+ DocumentPatch,
28
+ FormField,
29
+ KnownDocumentFormat,
30
+ OperationType,
31
+ ParagraphBlock,
32
+ StyleAlignment,
33
+ StyleDescriptor,
34
+ TableBlock,
35
+ TableCell,
36
+ )
37
+
38
+ if TYPE_CHECKING:
39
+ from ummaya.tools.documents.tool_defs import DocumentFieldPatch
40
+
41
+ HWPX_CANDIDATE_ENGINES: tuple[str, ...] = (
42
+ "hwpx-package-text",
43
+ "rhwp-node-wasm",
44
+ "python-hwpx",
45
+ "hwpx-mcp-server",
46
+ "rhwp",
47
+ "direct-owpml-oracle",
48
+ )
49
+
50
+ _TEXT_TARGET_RE = re.compile(r"^/hwpx/text\[(?P<index>[1-9][0-9]*)\]$")
51
+ _HWPX_TABLE_CELL_ALIAS_RE = re.compile(
52
+ r"^(?:/body/section\[[1-9][0-9]*\])?/table\[(?P<table>[1-9][0-9]*)\]/"
53
+ r"(?:(?:cells\[(?P<row_bracket>[1-9][0-9]*)\]\[(?P<col_bracket>[1-9][0-9]*)\])|"
54
+ r"(?:cell\[(?P<row_csv>[1-9][0-9]*),(?P<col_csv>[1-9][0-9]*)\]))$"
55
+ )
56
+ _HWPX_TABLE_CELL_SOURCE_RE = re.compile(
57
+ r"^(?P<member>Contents/section[0-9]+\.xml)#table\[(?P<table>[1-9][0-9]*)\]/"
58
+ r"r(?P<row>[1-9][0-9]*)c(?P<column>[1-9][0-9]*)$"
59
+ )
60
+ _HWPX_ACTIVITY_PERIOD_VALUE_RE = re.compile(
61
+ r"\b[0-9]{4}\.[0-9]{2}\.[0-9]{2}\s*~\s*[0-9]{4}\.[0-9]{2}\.[0-9]{2}\b"
62
+ )
63
+ _DOCUMENT_WEEK_VALUE_RE = re.compile(r"[0-9]{1,3}")
64
+ _SECTION_PREFIX = "Contents/section"
65
+
66
+
67
+ _HWPX_COMPATIBLE_FORMATS = frozenset({DocumentFormat.hwpx, DocumentFormat.owpml})
68
+
69
+
70
+ @dataclass(frozen=True)
71
+ class _HwpXTextRecord:
72
+ element: ET.Element
73
+ char_style_id: str | None
74
+ para_style_id: str | None
75
+ named_style_id: str | None
76
+
77
+
78
+ @dataclass(frozen=True)
79
+ class _HwpXTableCellTarget:
80
+ member: str
81
+ table_index: int
82
+ row_index: int
83
+ column_index: int
84
+
85
+
86
+ @dataclass(frozen=True)
87
+ class _HwpXStyleRefs:
88
+ char_pr_id: str | None = None
89
+ para_pr_id: str | None = None
90
+ style_id: str | None = None
91
+
92
+
93
+ @dataclass(frozen=True)
94
+ class _HwpXPatchBuckets:
95
+ text_replacements: dict[int, str]
96
+ table_cell_replacements: dict[_HwpXTableCellTarget, str]
97
+ text_styles: dict[int, StyleDescriptor]
98
+ table_cell_styles: dict[_HwpXTableCellTarget, StyleDescriptor]
99
+
100
+ @property
101
+ def has_style_mutations(self) -> bool:
102
+ return bool(self.text_styles or self.table_cell_styles)
103
+
104
+
105
+ def validate_hwpx_engine(engine: DocumentInspectionEngine) -> DocumentInspectionEngine:
106
+ """Validate that an injected engine is scoped to an OWPML/HWPX package."""
107
+ if engine.document_format not in _HWPX_COMPATIBLE_FORMATS:
108
+ raise ValueError("HWPX adapter requires a hwpx-compatible engine")
109
+ return engine
110
+
111
+
112
+ def validate_hwpx_mutation_engine(engine: DocumentInspectionEngine) -> DocumentMutationEngine:
113
+ """Validate that an injected HWPX engine can safely mutate derivatives."""
114
+ validate_hwpx_engine(engine)
115
+ if not isinstance(engine, DocumentMutationEngine):
116
+ raise ValueError("HWPX adapter requires a mutation-capable engine")
117
+ return engine
118
+
119
+
120
+ class HwpXPackageTextEngine:
121
+ """Text-node HWPX engine for deterministic local package edits."""
122
+
123
+ document_format = DocumentFormat.hwpx
124
+ engine_id = "hwpx-package-text"
125
+ render_engine_id = "rhwp-node-wasm"
126
+ render_artifact_extension = "svg"
127
+ render_mime_type = "image/svg+xml"
128
+
129
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
130
+ """Extract HWPX text nodes as LLM-addressable paragraphs and fields."""
131
+ paragraphs: list[ParagraphBlock] = []
132
+ fields: list[FormField] = []
133
+ tables: list[TableBlock] = []
134
+ text_index = 1
135
+ section_count = 0
136
+ text_records: list[tuple[int, str, str, str]] = []
137
+ semantic_labels: dict[int, str] = {}
138
+ style_map: list[StyleDescriptor] = []
139
+
140
+ with ZipFile(path) as archive:
141
+ style_map = _style_map_from_header(archive)
142
+ for member in _section_members(archive):
143
+ section_count += 1
144
+ root = ElementTree.fromstring(archive.read(member))
145
+ text_index_by_element_id: dict[int, int] = {}
146
+ for record in _text_records(root):
147
+ elem = record.element
148
+ text = elem.text or ""
149
+ if not text:
150
+ continue
151
+ target_path = f"/hwpx/text[{text_index}]"
152
+ source_path = f"{member}#text[{text_index}]"
153
+ text_index_by_element_id[id(elem)] = text_index
154
+ paragraphs.append(
155
+ ParagraphBlock(
156
+ block_id=f"hwpx-text-{text_index:03d}",
157
+ text=text,
158
+ source_path=source_path,
159
+ style_id=record.char_style_id
160
+ or record.named_style_id
161
+ or record.para_style_id,
162
+ )
163
+ )
164
+ text_records.append((text_index, text, target_path, source_path))
165
+ text_index += 1
166
+ tables.extend(
167
+ _table_blocks(
168
+ root,
169
+ member=member,
170
+ table_start_index=len(tables) + 1,
171
+ text_index_by_element_id=text_index_by_element_id,
172
+ semantic_labels=semantic_labels,
173
+ )
174
+ )
175
+
176
+ for record_index, text, target_path, _source_path in text_records:
177
+ fields.append(
178
+ FormField(
179
+ field_id=f"hwpx-text-{record_index:03d}",
180
+ label=semantic_labels.get(record_index)
181
+ or _semantic_label_for_text_value(text)
182
+ or f"HWPX text node {record_index}",
183
+ path=target_path,
184
+ field_type="text",
185
+ required=False,
186
+ current_value=text,
187
+ source_confidence=Decimal("1"),
188
+ )
189
+ )
190
+
191
+ return DocumentExtraction(
192
+ artifact_id=artifact_id,
193
+ paragraphs=paragraphs,
194
+ tables=tables,
195
+ fields=fields,
196
+ metadata={
197
+ "format": self.document_format.value,
198
+ "engine_id": self.engine_id,
199
+ "section_count": section_count,
200
+ "text_node_count": len(paragraphs),
201
+ "table_count": len(tables),
202
+ "style_map_count": len(style_map),
203
+ },
204
+ style_map=style_map,
205
+ warnings=[
206
+ "HWPX package text engine edits text nodes only; page SVG render evidence is "
207
+ "delegated to the RHWP Node/WASM bridge."
208
+ ],
209
+ )
210
+
211
+ def apply_patch(self, path: Path, patch: DocumentPatch) -> bytes:
212
+ """Apply ordered HWPX text-node patches and return derivative package bytes."""
213
+ patch_buckets = _hwpx_patch_buckets_from_patch(patch)
214
+ namespace_maps: dict[str, list[tuple[str, str]]] = {}
215
+ section_payloads: dict[str, bytes] = {}
216
+ text_index = 1
217
+ table_index = 1
218
+ applied_table_cell_targets: set[_HwpXTableCellTarget] = set()
219
+ applied_text_style_targets: set[int] = set()
220
+ applied_table_cell_style_targets: set[_HwpXTableCellTarget] = set()
221
+
222
+ with ZipFile(path) as archive:
223
+ text_style_refs, table_cell_style_refs, header_payload = _hwpx_style_refs_from_buckets(
224
+ archive, patch_buckets
225
+ )
226
+
227
+ for member in _section_members(archive):
228
+ payload = archive.read(member)
229
+ namespace_maps[member] = _namespace_map(payload)
230
+ root = ElementTree.fromstring(payload)
231
+ (
232
+ text_index,
233
+ table_index,
234
+ applied_targets,
235
+ applied_style_indexes,
236
+ applied_style_targets,
237
+ ) = _apply_hwpx_section_mutations(
238
+ root,
239
+ member=member,
240
+ text_index=text_index,
241
+ table_index=table_index,
242
+ text_replacements=patch_buckets.text_replacements,
243
+ table_cell_replacements=patch_buckets.table_cell_replacements,
244
+ text_style_refs=text_style_refs,
245
+ table_cell_style_refs=table_cell_style_refs,
246
+ )
247
+ applied_table_cell_targets.update(applied_targets)
248
+ applied_text_style_targets.update(applied_style_indexes)
249
+ applied_table_cell_style_targets.update(applied_style_targets)
250
+ section_payloads[member] = _serialize_section(root, namespace_maps[member])
251
+
252
+ _raise_for_missing_hwpx_patch_targets(
253
+ patch_buckets=patch_buckets,
254
+ text_style_refs=text_style_refs,
255
+ table_cell_style_refs=table_cell_style_refs,
256
+ text_index=text_index,
257
+ applied_table_cell_targets=applied_table_cell_targets,
258
+ applied_text_style_targets=applied_text_style_targets,
259
+ applied_table_cell_style_targets=applied_table_cell_style_targets,
260
+ )
261
+ return _rewrite_hwpx_package(
262
+ archive,
263
+ section_payloads=section_payloads,
264
+ header_payload=header_payload,
265
+ )
266
+
267
+ def render(self, path: Path, *, artifact_id: str, output_dir: Path) -> tuple[bytes, ...]:
268
+ """Render HWPX page SVG evidence through the RHWP Node/WASM bridge."""
269
+ _ = artifact_id
270
+ if _uses_hwpxjs_html_render(path):
271
+ output_dir.mkdir(parents=True, exist_ok=True)
272
+ return (_render_with_hwpxjs_html(path),)
273
+ return _render_with_rhwp_node(path, output_dir=output_dir)
274
+
275
+ def render_artifact_extension_for(self, path: Path) -> str:
276
+ """Return the render artifact extension selected by HWPX package structure."""
277
+ return "html" if _uses_hwpxjs_html_render(path) else self.render_artifact_extension
278
+
279
+ def render_mime_type_for(self, path: Path) -> str:
280
+ """Return the render MIME selected by HWPX package structure."""
281
+ return "text/html" if _uses_hwpxjs_html_render(path) else self.render_mime_type
282
+
283
+ def render_engine_id_for(self, path: Path) -> str:
284
+ """Return the render engine selected by HWPX package structure."""
285
+ return "hwpxjs-html-render" if _uses_hwpxjs_html_render(path) else self.render_engine_id
286
+
287
+
288
+ class OwpmlPackageTextEngine(HwpXPackageTextEngine):
289
+ """OWPML extension alias backed by the same package text engine as HWPX."""
290
+
291
+ document_format = DocumentFormat.owpml
292
+ engine_id = "owpml-package-text"
293
+
294
+
295
+ class HwpXDocumentAdapter:
296
+ """HWPX adapter for native package inspection and target normalization."""
297
+
298
+ known_formats: tuple[KnownDocumentFormat, ...] = (
299
+ KnownDocumentFormat.hwpx,
300
+ KnownDocumentFormat.owpml,
301
+ )
302
+ promoted_formats: tuple[DocumentFormat, ...] = (DocumentFormat.hwpx, DocumentFormat.owpml)
303
+
304
+ def __init__(self, *, inspection_engine: DocumentInspectionEngine | None = None) -> None:
305
+ engine = inspection_engine or HwpXPackageTextEngine()
306
+ self._inspection_engine = validate_hwpx_engine(engine)
307
+ self.adapter_id = f"{self._inspection_engine.engine_id}-adapter"
308
+
309
+ @property
310
+ def engine_id(self) -> str:
311
+ """Return the wrapped HWPX engine id for diagnostics."""
312
+ return self._inspection_engine.engine_id
313
+
314
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
315
+ """Inspect a local HWPX package through the wrapped engine."""
316
+ return self._inspection_engine.inspect(path, artifact_id=artifact_id)
317
+
318
+ def normalize_fill_patches(
319
+ self,
320
+ patches: tuple[DocumentFieldPatch, ...],
321
+ *,
322
+ extraction: DocumentExtraction | None,
323
+ ) -> tuple[DocumentFieldPatch, ...]:
324
+ """Map semantic/table aliases to native HWPX text-node targets."""
325
+ if extraction is None:
326
+ return patches
327
+
328
+ normalized_patches: list[DocumentFieldPatch] = []
329
+ for patch in patches:
330
+ normalized_target = _normalized_fill_target(patch.target_path, extraction)
331
+ if (
332
+ not patch.target_path.strip().startswith("/")
333
+ and normalized_target == patch.target_path
334
+ and _semantic_target_group(_semantic_field_key(patch.target_path)) is None
335
+ and not _is_known_hwpx_native_fill_target(patch.target_path, extraction)
336
+ ):
337
+ continue
338
+ normalized_value = _normalized_fill_value(
339
+ patch.value,
340
+ original_target=patch.target_path,
341
+ normalized_target=normalized_target,
342
+ extraction=extraction,
343
+ )
344
+ normalized_patches.append(
345
+ patch.model_copy(
346
+ update={"target_path": normalized_target, "value": normalized_value}
347
+ )
348
+ )
349
+ return tuple(normalized_patches)
350
+
351
+
352
+ def _is_known_hwpx_native_fill_target(
353
+ target_path: str,
354
+ extraction: DocumentExtraction,
355
+ ) -> bool:
356
+ if target_path in {field.path for field in extraction.fields}:
357
+ return True
358
+ return target_path in set(_hwpx_table_cell_alias_map(extraction).values())
359
+
360
+
361
+ def _normalized_fill_target(
362
+ target_path: str,
363
+ extraction: DocumentExtraction,
364
+ ) -> str:
365
+ semantic_target = _semantic_hwpx_field_target(target_path, extraction)
366
+ if semantic_target is not None:
367
+ return semantic_target
368
+ alias_map = _hwpx_table_cell_alias_map(extraction)
369
+ if target_path in alias_map:
370
+ return alias_map[target_path]
371
+ match = _HWPX_TABLE_CELL_ALIAS_RE.match(target_path)
372
+ if match is None:
373
+ return target_path
374
+ row = match.group("row_bracket") or match.group("row_csv")
375
+ column = match.group("col_bracket") or match.group("col_csv")
376
+ if row is None or column is None:
377
+ return target_path
378
+ coordinate_key = f"/table[{match.group('table')}]/cells[{row}][{column}]"
379
+ return alias_map.get(coordinate_key, target_path)
380
+
381
+
382
+ def _normalized_fill_value(
383
+ value: object,
384
+ *,
385
+ original_target: str,
386
+ normalized_target: str,
387
+ extraction: DocumentExtraction,
388
+ ) -> object:
389
+ target_group = _semantic_target_group(_semantic_field_key(original_target))
390
+ if target_group is None:
391
+ target_group = _semantic_group_for_extracted_path(normalized_target, extraction)
392
+ if target_group != "week_label":
393
+ return value
394
+ week_value = _numeric_week_value(value)
395
+ return f"{week_value}주차" if week_value is not None else value
396
+
397
+
398
+ def _semantic_group_for_extracted_path(
399
+ target_path: str,
400
+ extraction: DocumentExtraction,
401
+ ) -> str | None:
402
+ for field in extraction.fields:
403
+ if field.path != target_path:
404
+ continue
405
+ if not isinstance(field.current_value, str):
406
+ continue
407
+ if re.fullmatch(r"[0-9]+주차", _semantic_field_key(field.current_value)):
408
+ return "week_label"
409
+ return None
410
+
411
+
412
+ def _semantic_label_for_text_value(value: str) -> str | None:
413
+ normalized = unicodedata.normalize("NFKC", value)
414
+ value_key = _semantic_field_key(normalized)
415
+ if re.fullmatch(r"[0-9]+주차", value_key):
416
+ return "주차"
417
+ if _HWPX_ACTIVITY_PERIOD_VALUE_RE.search(normalized):
418
+ return "활동기간"
419
+ if value_key in {"특이사항", "비고"}:
420
+ return None
421
+ if "특이사항" in value_key:
422
+ return "특이사항"
423
+ return None
424
+
425
+
426
+ def _numeric_week_value(value: object) -> str | None:
427
+ if isinstance(value, int):
428
+ return str(value)
429
+ if not isinstance(value, str):
430
+ return None
431
+ normalized = unicodedata.normalize("NFKC", value).strip()
432
+ if _DOCUMENT_WEEK_VALUE_RE.fullmatch(normalized) is None:
433
+ return None
434
+ return normalized.lstrip("0") or "0"
435
+
436
+
437
+ def _semantic_hwpx_field_target(
438
+ target_path: str,
439
+ extraction: DocumentExtraction,
440
+ ) -> str | None:
441
+ """Map conservative semantic field names to extracted HWPX form labels."""
442
+ normalized_target = _semantic_field_key(target_path)
443
+ if not normalized_target or target_path.strip().startswith("/"):
444
+ return None
445
+
446
+ exact_matches = [
447
+ field.path
448
+ for field in extraction.fields
449
+ if _semantic_field_key(field.label) == normalized_target
450
+ ]
451
+ if len(exact_matches) == 1:
452
+ return exact_matches[0]
453
+
454
+ target_group = _semantic_target_group(normalized_target)
455
+ if target_group is None:
456
+ return None
457
+
458
+ group_matches = [
459
+ field.path
460
+ for field in extraction.fields
461
+ if _semantic_label_group(_semantic_field_key(field.label)) == target_group
462
+ ]
463
+ unique_matches = list(dict.fromkeys(group_matches))
464
+ if len(unique_matches) == 1:
465
+ return unique_matches[0]
466
+
467
+ value_matches = _semantic_hwpx_value_matches(target_group, extraction)
468
+ return value_matches[0] if len(value_matches) == 1 else None
469
+
470
+
471
+ def _semantic_hwpx_value_matches(
472
+ target_group: str,
473
+ extraction: DocumentExtraction,
474
+ ) -> list[str]:
475
+ matches: list[str] = []
476
+ for field in extraction.fields:
477
+ if not isinstance(field.current_value, str):
478
+ continue
479
+ value = unicodedata.normalize("NFKC", field.current_value)
480
+ value_key = _semantic_field_key(value)
481
+ if (target_group == "activity_period" and _HWPX_ACTIVITY_PERIOD_VALUE_RE.search(value)) or (
482
+ target_group == "week_label" and re.fullmatch(r"[0-9]+주차", value_key)
483
+ ):
484
+ matches.append(field.path)
485
+ return list(dict.fromkeys(matches))
486
+
487
+
488
+ def _semantic_field_key(value: str) -> str:
489
+ normalized = unicodedata.normalize("NFKC", value).casefold()
490
+ return re.sub(r"[^0-9a-z가-힣]+", "", normalized)
491
+
492
+
493
+ def _semantic_target_group(normalized_target: str) -> str | None:
494
+ if not normalized_target:
495
+ return None
496
+ if _matches_special_notes_target(normalized_target):
497
+ return "special_notes"
498
+ if _matches_team_name_target(normalized_target):
499
+ return "team_name"
500
+ if _matches_week_label_target(normalized_target):
501
+ return "week_label"
502
+ if _matches_activity_period_target(normalized_target):
503
+ return "activity_period"
504
+ return None
505
+
506
+
507
+ def _matches_special_notes_target(normalized_target: str) -> bool:
508
+ return any(
509
+ token in normalized_target
510
+ for token in ("특이", "비고", "special", "remark", "remarks", "note", "notes")
511
+ )
512
+
513
+
514
+ def _matches_team_name_target(normalized_target: str) -> bool:
515
+ return (
516
+ "팀명" in normalized_target
517
+ or ("team" in normalized_target and "name" in normalized_target)
518
+ or normalized_target == "team"
519
+ )
520
+
521
+
522
+ def _matches_week_label_target(normalized_target: str) -> bool:
523
+ return "주차" in normalized_target or normalized_target in {
524
+ "week",
525
+ "weeknumber",
526
+ "weeklabel",
527
+ }
528
+
529
+
530
+ def _matches_activity_period_target(normalized_target: str) -> bool:
531
+ return (
532
+ "활동일시" in normalized_target
533
+ or "활동기간" in normalized_target
534
+ or (
535
+ "activity" in normalized_target
536
+ and any(token in normalized_target for token in ("period", "date", "time"))
537
+ )
538
+ or "weekperiod" in normalized_target
539
+ or normalized_target.endswith("period")
540
+ )
541
+
542
+
543
+ def _semantic_label_group(normalized_label: str) -> str | None:
544
+ if "특이사항" in normalized_label or normalized_label == "비고":
545
+ return "special_notes"
546
+ if normalized_label == "팀명":
547
+ return "team_name"
548
+ if "주차" in normalized_label:
549
+ return "week_label"
550
+ if normalized_label in {"활동일시", "활동기간"}:
551
+ return "activity_period"
552
+ return None
553
+
554
+
555
+ def _hwpx_table_cell_alias_map(extraction: DocumentExtraction) -> dict[str, str]:
556
+ aliases: dict[str, str] = {}
557
+ for table_index, table in enumerate(extraction.tables, start=1):
558
+ for cell in table.cells:
559
+ native_target = cell.field_path or cell.source_path
560
+ row = cell.row_index + 1
561
+ column = cell.column_index + 1
562
+ aliases[cell.source_path] = native_target
563
+ aliases[f"/table[{table_index}]/cell[{row},{column}]"] = native_target
564
+ aliases[f"/table[{table_index}]/cells[{row}][{column}]"] = native_target
565
+ aliases[f"/body/section[1]/table[{table_index}]/cell[{row},{column}]"] = native_target
566
+ aliases[f"/body/section[1]/table[{table_index}]/cells[{row}][{column}]"] = native_target
567
+ return aliases
568
+
569
+
570
+ def _section_members(archive: ZipFile) -> list[str]:
571
+ return sorted(
572
+ member.filename
573
+ for member in archive.infolist()
574
+ if member.filename.startswith(_SECTION_PREFIX) and member.filename.endswith(".xml")
575
+ )
576
+
577
+
578
+ def _text_elements(root: ET.Element) -> list[ET.Element]:
579
+ return [elem for elem in root.iter() if elem.tag.rsplit("}", 1)[-1] == "t"]
580
+
581
+
582
+ def _text_records(root: ET.Element) -> list[_HwpXTextRecord]:
583
+ records: list[_HwpXTextRecord] = []
584
+ parent_by_id = _parent_by_element_id(root)
585
+ for text_element in _text_elements(root):
586
+ run = _nearest_ancestor_by_local_name(text_element, "run", parent_by_id)
587
+ paragraph = _nearest_ancestor_by_local_name(text_element, "p", parent_by_id)
588
+ if paragraph is None:
589
+ continue
590
+ para_pr_id = _local_attr(paragraph, "paraPrIDRef")
591
+ named_style_id = _local_attr(paragraph, "styleIDRef")
592
+ char_pr_id = _local_attr(run, "charPrIDRef") if run is not None else None
593
+ records.append(
594
+ _HwpXTextRecord(
595
+ element=text_element,
596
+ char_style_id=f"charPr-{char_pr_id}" if char_pr_id else None,
597
+ para_style_id=f"paraPr-{para_pr_id}" if para_pr_id else None,
598
+ named_style_id=f"style-{named_style_id}" if named_style_id else None,
599
+ )
600
+ )
601
+ return records
602
+
603
+
604
+ def _parent_by_element_id(root: ET.Element) -> dict[int, ET.Element]:
605
+ return {id(child): parent for parent in root.iter() for child in list(parent)}
606
+
607
+
608
+ def _nearest_ancestor_by_local_name(
609
+ element: ET.Element,
610
+ name: str,
611
+ parent_by_id: dict[int, ET.Element],
612
+ ) -> ET.Element | None:
613
+ current = parent_by_id.get(id(element))
614
+ while current is not None:
615
+ if _local_name(current.tag) == name:
616
+ return current
617
+ current = parent_by_id.get(id(current))
618
+ return None
619
+
620
+
621
+ def _style_map_from_header(archive: ZipFile) -> list[StyleDescriptor]:
622
+ try:
623
+ header = ElementTree.fromstring(archive.read("Contents/header.xml"))
624
+ except (KeyError, ElementTree.ParseError):
625
+ return []
626
+ font_faces = _font_faces_by_id(header)
627
+ border_fills = _border_fill_styles_by_id(header)
628
+ char_styles = _char_styles_by_id(header, font_faces=font_faces, border_fills=border_fills)
629
+ para_styles = _para_styles_by_id(header)
630
+ named_styles = _named_styles_by_id(
631
+ header,
632
+ char_styles=char_styles,
633
+ para_styles=para_styles,
634
+ )
635
+ return [
636
+ *border_fills.values(),
637
+ *char_styles.values(),
638
+ *para_styles.values(),
639
+ *named_styles.values(),
640
+ ]
641
+
642
+
643
+ def _font_faces_by_id(root: ET.Element) -> dict[str, str]:
644
+ faces: dict[str, str] = {}
645
+ for fontface in _elements_by_local_name(root, "fontface"):
646
+ lang = (_local_attr(fontface, "lang") or "").casefold()
647
+ if lang not in {"hangul", "korean", "latin", ""}:
648
+ continue
649
+ for font in _child_elements_by_local_name(fontface, "font"):
650
+ font_id = _local_attr(font, "id")
651
+ face = _local_attr(font, "face")
652
+ if font_id is not None and face:
653
+ faces.setdefault(font_id, face)
654
+ return faces
655
+
656
+
657
+ def _border_fill_styles_by_id(root: ET.Element) -> dict[str, StyleDescriptor]:
658
+ styles: dict[str, StyleDescriptor] = {}
659
+ for border_fill in _elements_by_local_name(root, "borderFill"):
660
+ border_fill_id = _local_attr(border_fill, "id")
661
+ if border_fill_id is None:
662
+ continue
663
+ fill_color = _border_fill_color(border_fill)
664
+ border = _border_descriptor(border_fill)
665
+ styles[border_fill_id] = StyleDescriptor(
666
+ style_id=f"borderFill-{border_fill_id}",
667
+ target_path=f"Contents/header.xml#borderFill[{border_fill_id}]",
668
+ fill_color_rgb=fill_color,
669
+ border=border,
670
+ )
671
+ return styles
672
+
673
+
674
+ def _char_styles_by_id(
675
+ root: ET.Element,
676
+ *,
677
+ font_faces: dict[str, str],
678
+ border_fills: dict[str, StyleDescriptor],
679
+ ) -> dict[str, StyleDescriptor]:
680
+ styles: dict[str, StyleDescriptor] = {}
681
+ for char_pr in _elements_by_local_name(root, "charPr"):
682
+ char_pr_id = _local_attr(char_pr, "id")
683
+ if char_pr_id is None:
684
+ continue
685
+ border_fill_id = _local_attr(char_pr, "borderFillIDRef")
686
+ border_fill = border_fills.get(border_fill_id or "")
687
+ font_id = _font_ref_id(char_pr)
688
+ font_color = _rgb(_local_attr(char_pr, "textColor"))
689
+ shade_color = _rgb(_local_attr(char_pr, "shadeColor"))
690
+ styles[char_pr_id] = StyleDescriptor(
691
+ style_id=f"charPr-{char_pr_id}",
692
+ target_path=f"Contents/header.xml#charPr[{char_pr_id}]",
693
+ font_family=font_faces.get(font_id or ""),
694
+ font_size_pt=_hwpx_height_to_points(_local_attr(char_pr, "height")),
695
+ bold=_has_child(char_pr, "bold") or None,
696
+ italic=_has_child(char_pr, "italic") or None,
697
+ underline=_has_child(char_pr, "underline") or None,
698
+ font_color_rgb=font_color,
699
+ fill_color_rgb=shade_color or (border_fill.fill_color_rgb if border_fill else None),
700
+ border=border_fill.border if border_fill else None,
701
+ )
702
+ return styles
703
+
704
+
705
+ def _para_styles_by_id(root: ET.Element) -> dict[str, StyleDescriptor]:
706
+ styles: dict[str, StyleDescriptor] = {}
707
+ for para_pr in _elements_by_local_name(root, "paraPr"):
708
+ para_pr_id = _local_attr(para_pr, "id")
709
+ if para_pr_id is None:
710
+ continue
711
+ styles[para_pr_id] = StyleDescriptor(
712
+ style_id=f"paraPr-{para_pr_id}",
713
+ target_path=f"Contents/header.xml#paraPr[{para_pr_id}]",
714
+ alignment=_hwpx_alignment(_first_child_by_local_name(para_pr, "align")),
715
+ )
716
+ return styles
717
+
718
+
719
+ def _named_styles_by_id(
720
+ root: ET.Element,
721
+ *,
722
+ char_styles: dict[str, StyleDescriptor],
723
+ para_styles: dict[str, StyleDescriptor],
724
+ ) -> dict[str, StyleDescriptor]:
725
+ styles: dict[str, StyleDescriptor] = {}
726
+ for style in _elements_by_local_name(root, "style"):
727
+ style_id = _local_attr(style, "id")
728
+ if style_id is None:
729
+ continue
730
+ char_style = char_styles.get(_local_attr(style, "charPrIDRef") or "")
731
+ para_style = para_styles.get(_local_attr(style, "paraPrIDRef") or "")
732
+ styles[style_id] = _merge_hwpx_styles(
733
+ style_id=f"style-{style_id}",
734
+ target_path=f"Contents/header.xml#style[{style_id}]",
735
+ char_style=char_style,
736
+ para_style=para_style,
737
+ )
738
+ return styles
739
+
740
+
741
+ def _merge_hwpx_styles(
742
+ *,
743
+ style_id: str,
744
+ target_path: str,
745
+ char_style: StyleDescriptor | None,
746
+ para_style: StyleDescriptor | None,
747
+ ) -> StyleDescriptor:
748
+ return StyleDescriptor(
749
+ style_id=style_id,
750
+ target_path=target_path,
751
+ font_family=char_style.font_family if char_style else None,
752
+ font_size_pt=char_style.font_size_pt if char_style else None,
753
+ bold=char_style.bold if char_style else None,
754
+ italic=char_style.italic if char_style else None,
755
+ underline=char_style.underline if char_style else None,
756
+ font_color_rgb=char_style.font_color_rgb if char_style else None,
757
+ fill_color_rgb=char_style.fill_color_rgb if char_style else None,
758
+ alignment=para_style.alignment if para_style else None,
759
+ line_spacing=para_style.line_spacing if para_style else None,
760
+ border=char_style.border if char_style else None,
761
+ number_format=char_style.number_format if char_style else None,
762
+ )
763
+
764
+
765
+ def _font_ref_id(char_pr: ET.Element) -> str | None:
766
+ font_ref = _first_child_by_local_name(char_pr, "fontRef")
767
+ if font_ref is None:
768
+ return None
769
+ return (
770
+ _local_attr(font_ref, "hangul")
771
+ or _local_attr(font_ref, "latin")
772
+ or _local_attr(font_ref, "hanja")
773
+ or _local_attr(font_ref, "other")
774
+ )
775
+
776
+
777
+ def _border_fill_color(border_fill: ET.Element) -> str | None:
778
+ for brush_name in ("winBrush", "gradation", "imgBrush"):
779
+ brush = _first_descendant_by_local_name(border_fill, brush_name)
780
+ if brush is None:
781
+ continue
782
+ color = _rgb(_local_attr(brush, "faceColor") or _local_attr(brush, "color"))
783
+ if color is not None:
784
+ return color
785
+ return None
786
+
787
+
788
+ def _border_descriptor(border_fill: ET.Element) -> BorderDescriptor | None:
789
+ for border_name in ("leftBorder", "topBorder", "rightBorder", "bottomBorder"):
790
+ border = _first_child_by_local_name(border_fill, border_name)
791
+ if border is None:
792
+ continue
793
+ border_type = _local_attr(border, "type")
794
+ if border_type is None or border_type == "NONE":
795
+ continue
796
+ return BorderDescriptor(
797
+ style=border_type,
798
+ width_pt=_hwpx_measure_to_points(_local_attr(border, "width")),
799
+ color_rgb=_rgb(_local_attr(border, "color")),
800
+ )
801
+ return None
802
+
803
+
804
+ def _hwpx_height_to_points(value: str | None) -> Decimal | None:
805
+ if value is None:
806
+ return None
807
+ try:
808
+ return Decimal(value) / Decimal("100")
809
+ except ArithmeticError:
810
+ return None
811
+
812
+
813
+ def _hwpx_measure_to_points(value: str | None) -> Decimal | None:
814
+ if value is None:
815
+ return None
816
+ match = re.search(r"([0-9]+(?:\.[0-9]+)?)\s*mm", value)
817
+ if match is None:
818
+ return None
819
+ try:
820
+ return (Decimal(match.group(1)) * Decimal("2.834645669")).quantize(Decimal("0.01"))
821
+ except ArithmeticError:
822
+ return None
823
+
824
+
825
+ def _hwpx_alignment(align: ET.Element | None) -> StyleAlignment | None:
826
+ if align is None:
827
+ return None
828
+ horizontal = (_local_attr(align, "horizontal") or "").casefold()
829
+ alignment_by_hwpx_value: dict[str, StyleAlignment] = {
830
+ "left": "left",
831
+ "center": "center",
832
+ "right": "right",
833
+ "justify": "justify",
834
+ "distributed": "distributed",
835
+ }
836
+ return alignment_by_hwpx_value.get(horizontal)
837
+
838
+
839
+ def _rgb(value: str | None) -> str | None:
840
+ if value is None:
841
+ return None
842
+ normalized = value.strip()
843
+ if not normalized or normalized.casefold() == "none":
844
+ return None
845
+ if normalized.startswith("#"):
846
+ normalized = normalized[1:]
847
+ return normalized.upper() if re.fullmatch(r"[0-9A-Fa-f]{6}", normalized) else None
848
+
849
+
850
+ def _local_attr(element: ET.Element, name: str) -> str | None:
851
+ for key, value in element.attrib.items():
852
+ if _local_name(key) == name:
853
+ return value
854
+ return None
855
+
856
+
857
+ def _has_child(element: ET.Element, name: str) -> bool:
858
+ return _first_child_by_local_name(element, name) is not None
859
+
860
+
861
+ def _first_child_by_local_name(element: ET.Element, name: str) -> ET.Element | None:
862
+ for child in list(element):
863
+ if _local_name(child.tag) == name:
864
+ return child
865
+ return None
866
+
867
+
868
+ def _first_descendant_by_local_name(element: ET.Element, name: str) -> ET.Element | None:
869
+ for descendant in element.iter():
870
+ if descendant is not element and _local_name(descendant.tag) == name:
871
+ return descendant
872
+ return None
873
+
874
+
875
+ def _table_blocks(
876
+ root: ET.Element,
877
+ *,
878
+ member: str,
879
+ table_start_index: int,
880
+ text_index_by_element_id: dict[int, int],
881
+ semantic_labels: dict[int, str],
882
+ ) -> list[TableBlock]:
883
+ tables: list[TableBlock] = []
884
+ for table_offset, table in enumerate(_elements_by_local_name(root, "tbl")):
885
+ table_index = table_start_index + table_offset
886
+ cells: list[TableCell] = []
887
+ for row_index, row in enumerate(_child_elements_by_local_name(table, "tr")):
888
+ row_cells = _child_elements_by_local_name(row, "tc")
889
+ row_text_nodes: list[list[ET.Element]] = []
890
+ for column_index, cell in enumerate(row_cells):
891
+ text_nodes = [elem for elem in _text_elements(cell) if elem.text]
892
+ row_text_nodes.append(text_nodes)
893
+ text = "".join(elem.text or "" for elem in text_nodes)
894
+ first_text_index = (
895
+ text_index_by_element_id.get(id(text_nodes[0])) if text_nodes else None
896
+ )
897
+ cells.append(
898
+ TableCell(
899
+ row_index=row_index,
900
+ column_index=column_index,
901
+ text=text,
902
+ row_span=_span_attribute(cell, "rowSpan"),
903
+ column_span=_span_attribute(cell, "colSpan"),
904
+ source_path=(
905
+ f"{member}#table[{table_index}]/r{row_index + 1}c{column_index + 1}"
906
+ ),
907
+ field_path=(
908
+ f"/hwpx/text[{first_text_index}]"
909
+ if first_text_index is not None
910
+ else None
911
+ ),
912
+ )
913
+ )
914
+ pair_start = 1 if len(row_cells) > 2 and len(row_cells) % 2 == 1 else 0
915
+ for label_column in range(pair_start, len(row_cells) - 1, 2):
916
+ label = _cell_text(row_cells[label_column]).strip()
917
+ value_text_nodes = row_text_nodes[label_column + 1]
918
+ if not label or not value_text_nodes:
919
+ continue
920
+ first_value_index = text_index_by_element_id.get(id(value_text_nodes[0]))
921
+ if first_value_index is not None:
922
+ semantic_labels[first_value_index] = label
923
+ tables.append(
924
+ TableBlock(
925
+ block_id=f"hwpx-table-{table_index:03d}",
926
+ source_path=f"{member}#table[{table_index}]",
927
+ cells=cells,
928
+ )
929
+ )
930
+ return tables
931
+
932
+
933
+ def _elements_by_local_name(root: ET.Element, name: str) -> list[ET.Element]:
934
+ return [elem for elem in root.iter() if _local_name(elem.tag) == name]
935
+
936
+
937
+ def _child_elements_by_local_name(root: ET.Element, name: str) -> list[ET.Element]:
938
+ return [elem for elem in list(root) if _local_name(elem.tag) == name]
939
+
940
+
941
+ def _cell_text(cell: ET.Element) -> str:
942
+ return "".join(elem.text or "" for elem in _text_elements(cell) if elem.text)
943
+
944
+
945
+ def _span_attribute(cell: ET.Element, name: str) -> int:
946
+ for key, value in cell.attrib.items():
947
+ if _local_name(key) == name:
948
+ try:
949
+ return max(1, int(value))
950
+ except ValueError:
951
+ return 1
952
+ return 1
953
+
954
+
955
+ def _local_name(tag: str) -> str:
956
+ return tag.rsplit("}", 1)[-1]
957
+
958
+
959
+ def _hwpx_patch_buckets_from_patch(patch: DocumentPatch) -> _HwpXPatchBuckets:
960
+ text_replacements: dict[int, str] = {}
961
+ table_cell_replacements: dict[_HwpXTableCellTarget, str] = {}
962
+ text_styles: dict[int, StyleDescriptor] = {}
963
+ table_cell_styles: dict[_HwpXTableCellTarget, StyleDescriptor] = {}
964
+ for operation in patch.operations:
965
+ if operation.operation_type in {
966
+ OperationType.set_field_value,
967
+ OperationType.replace_text,
968
+ OperationType.set_table_cell,
969
+ }:
970
+ value = "" if operation.value is None else str(operation.value)
971
+ text_match = _TEXT_TARGET_RE.match(operation.target_path)
972
+ if text_match is not None:
973
+ text_replacements[int(text_match.group("index"))] = value
974
+ continue
975
+ table_cell_target = _hwpx_table_cell_target(operation.target_path)
976
+ if table_cell_target is not None:
977
+ table_cell_replacements[table_cell_target] = value
978
+ continue
979
+ if operation.operation_type is OperationType.set_table_cell:
980
+ raise ValueError(
981
+ f"Unsupported HWPX table cell target path: {operation.target_path}"
982
+ )
983
+ raise ValueError(f"Unsupported HWPX text target path: {operation.target_path}")
984
+ if operation.operation_type in {
985
+ OperationType.set_paragraph_style,
986
+ OperationType.set_run_style,
987
+ OperationType.set_cell_style,
988
+ }:
989
+ if operation.style is None:
990
+ raise ValueError("HWPX style operation requires style")
991
+ text_match = _TEXT_TARGET_RE.match(operation.target_path)
992
+ if text_match is not None:
993
+ text_styles[int(text_match.group("index"))] = operation.style
994
+ continue
995
+ table_cell_target = _hwpx_table_cell_target(operation.target_path)
996
+ if table_cell_target is not None:
997
+ table_cell_styles[table_cell_target] = operation.style
998
+ continue
999
+ raise ValueError(f"Unsupported HWPX style target path: {operation.target_path}")
1000
+ raise ValueError(f"Unsupported HWPX operation: {operation.operation_type.value}")
1001
+ return _HwpXPatchBuckets(
1002
+ text_replacements=text_replacements,
1003
+ table_cell_replacements=table_cell_replacements,
1004
+ text_styles=text_styles,
1005
+ table_cell_styles=table_cell_styles,
1006
+ )
1007
+
1008
+
1009
+ def _hwpx_header_payload(archive: ZipFile) -> bytes:
1010
+ try:
1011
+ return archive.read("Contents/header.xml")
1012
+ except KeyError:
1013
+ return b'<hh:head xmlns:hh="http://www.hancom.co.kr/hwpml/2011/head" />'
1014
+
1015
+
1016
+ def _hwpx_style_refs_from_buckets(
1017
+ archive: ZipFile,
1018
+ patch_buckets: _HwpXPatchBuckets,
1019
+ ) -> tuple[dict[int, _HwpXStyleRefs], dict[_HwpXTableCellTarget, _HwpXStyleRefs], bytes | None]:
1020
+ if not patch_buckets.has_style_mutations:
1021
+ return {}, {}, None
1022
+ header_source = _hwpx_header_payload(archive)
1023
+ header_namespaces = _namespace_map(header_source)
1024
+ header_root = ElementTree.fromstring(header_source)
1025
+ text_style_refs = {
1026
+ target_index: _ensure_hwpx_style_refs(header_root, style)
1027
+ for target_index, style in patch_buckets.text_styles.items()
1028
+ }
1029
+ table_cell_style_refs = {
1030
+ target: _ensure_hwpx_style_refs(header_root, style)
1031
+ for target, style in patch_buckets.table_cell_styles.items()
1032
+ }
1033
+ return (
1034
+ text_style_refs,
1035
+ table_cell_style_refs,
1036
+ _serialize_section(
1037
+ header_root,
1038
+ header_namespaces,
1039
+ ),
1040
+ )
1041
+
1042
+
1043
+ def _raise_for_missing_hwpx_patch_targets(
1044
+ *,
1045
+ patch_buckets: _HwpXPatchBuckets,
1046
+ text_style_refs: dict[int, _HwpXStyleRefs],
1047
+ table_cell_style_refs: dict[_HwpXTableCellTarget, _HwpXStyleRefs],
1048
+ text_index: int,
1049
+ applied_table_cell_targets: set[_HwpXTableCellTarget],
1050
+ applied_text_style_targets: set[int],
1051
+ applied_table_cell_style_targets: set[_HwpXTableCellTarget],
1052
+ ) -> None:
1053
+ missing = set(patch_buckets.text_replacements) - set(range(1, text_index))
1054
+ if missing:
1055
+ raise ValueError(f"HWPX text target not found: {sorted(missing)}")
1056
+ missing_text_style_targets = set(text_style_refs) - applied_text_style_targets
1057
+ if missing_text_style_targets:
1058
+ raise ValueError(f"HWPX text target not found: {sorted(missing_text_style_targets)}")
1059
+ missing_table_cell_targets = (
1060
+ set(patch_buckets.table_cell_replacements) - applied_table_cell_targets
1061
+ )
1062
+ _raise_for_missing_hwpx_table_cell_targets(missing_table_cell_targets)
1063
+ missing_table_cell_style_targets = set(table_cell_style_refs) - applied_table_cell_style_targets
1064
+ _raise_for_missing_hwpx_table_cell_targets(missing_table_cell_style_targets)
1065
+
1066
+
1067
+ def _raise_for_missing_hwpx_table_cell_targets(
1068
+ missing_targets: set[_HwpXTableCellTarget],
1069
+ ) -> None:
1070
+ if not missing_targets:
1071
+ return
1072
+ missing_paths = [
1073
+ _hwpx_table_cell_target_path(target)
1074
+ for target in sorted(missing_targets, key=_hwpx_table_cell_target_sort_key)
1075
+ ]
1076
+ raise ValueError(f"HWPX table cell target not found: {missing_paths}")
1077
+
1078
+
1079
+ def _rewrite_hwpx_package(
1080
+ archive: ZipFile,
1081
+ *,
1082
+ section_payloads: dict[str, bytes],
1083
+ header_payload: bytes | None,
1084
+ ) -> bytes:
1085
+ output = io.BytesIO()
1086
+ with ZipFile(output, "w") as rewritten:
1087
+ wrote_header = False
1088
+ infos = archive.infolist()
1089
+ mimetype_info = next((info for info in infos if info.filename == "mimetype"), None)
1090
+ if mimetype_info is not None:
1091
+ rewritten.writestr(
1092
+ _stored_hwpx_mimetype_info(mimetype_info),
1093
+ archive.read(mimetype_info.filename),
1094
+ )
1095
+ for info in infos:
1096
+ if info.filename == "mimetype":
1097
+ continue
1098
+ data = archive.read(info.filename)
1099
+ if info.filename in section_payloads:
1100
+ data = section_payloads[info.filename]
1101
+ elif info.filename == "Contents/header.xml" and header_payload is not None:
1102
+ data = header_payload
1103
+ wrote_header = True
1104
+ elif info.filename == "Preview/PrvText.txt":
1105
+ data = _preview_text(section_payloads).encode("utf-8")
1106
+ rewritten.writestr(info, data)
1107
+ if header_payload is not None and not wrote_header:
1108
+ rewritten.writestr("Contents/header.xml", header_payload)
1109
+ return output.getvalue()
1110
+
1111
+
1112
+ def _stored_hwpx_mimetype_info(info: ZipInfo) -> ZipInfo:
1113
+ stored = ZipInfo(info.filename, date_time=info.date_time)
1114
+ stored.compress_type = ZIP_STORED
1115
+ stored.comment = info.comment
1116
+ stored.extra = info.extra
1117
+ stored.external_attr = info.external_attr
1118
+ stored.create_system = info.create_system
1119
+ return stored
1120
+
1121
+
1122
+ def _ensure_hwpx_style_refs(root: ET.Element, style: StyleDescriptor) -> _HwpXStyleRefs:
1123
+ char_pr_id = _append_hwpx_char_pr(root, style) if _hwpx_style_has_char_props(style) else None
1124
+ para_pr_id = _append_hwpx_para_pr(root, style) if _hwpx_style_has_para_props(style) else None
1125
+ style_id = (
1126
+ _append_hwpx_named_style(root, char_pr_id=char_pr_id, para_pr_id=para_pr_id)
1127
+ if char_pr_id is not None or para_pr_id is not None
1128
+ else None
1129
+ )
1130
+ return _HwpXStyleRefs(
1131
+ char_pr_id=char_pr_id,
1132
+ para_pr_id=para_pr_id,
1133
+ style_id=style_id,
1134
+ )
1135
+
1136
+
1137
+ def _hwpx_style_has_char_props(style: StyleDescriptor) -> bool:
1138
+ return any(
1139
+ value is not None
1140
+ for value in (
1141
+ style.font_family,
1142
+ style.font_size_pt,
1143
+ style.bold,
1144
+ style.italic,
1145
+ style.underline,
1146
+ style.font_color_rgb,
1147
+ style.fill_color_rgb,
1148
+ style.border,
1149
+ )
1150
+ )
1151
+
1152
+
1153
+ def _hwpx_style_has_para_props(style: StyleDescriptor) -> bool:
1154
+ return style.alignment is not None or style.line_spacing is not None
1155
+
1156
+
1157
+ def _append_hwpx_char_pr(root: ET.Element, style: StyleDescriptor) -> str:
1158
+ container = _find_or_create_direct_child(root, "charProperties")
1159
+ char_pr_id = str(_next_numeric_id(root, "charPr"))
1160
+ attributes: dict[str, str] = {"id": char_pr_id}
1161
+ if style.font_size_pt is not None:
1162
+ attributes["height"] = _hwpx_points_to_height(style.font_size_pt)
1163
+ if style.font_color_rgb is not None:
1164
+ attributes["textColor"] = _hwpx_color(style.font_color_rgb)
1165
+ if style.fill_color_rgb is not None:
1166
+ attributes["shadeColor"] = _hwpx_color(style.fill_color_rgb)
1167
+ if style.fill_color_rgb is not None or style.border is not None:
1168
+ attributes["borderFillIDRef"] = _append_hwpx_border_fill(root, style)
1169
+ char_pr = ET.Element(_qualified_child_tag(container, "charPr"), attributes)
1170
+ if style.font_family is not None:
1171
+ font_id = _ensure_hwpx_font(root, style.font_family)
1172
+ char_pr.append(
1173
+ ET.Element(
1174
+ _qualified_child_tag(char_pr, "fontRef"),
1175
+ {
1176
+ "hangul": font_id,
1177
+ "latin": font_id,
1178
+ "hanja": font_id,
1179
+ "japanese": font_id,
1180
+ "other": font_id,
1181
+ "symbol": font_id,
1182
+ "user": font_id,
1183
+ },
1184
+ )
1185
+ )
1186
+ if style.bold is True:
1187
+ char_pr.append(ET.Element(_qualified_child_tag(char_pr, "bold")))
1188
+ if style.italic is True:
1189
+ char_pr.append(ET.Element(_qualified_child_tag(char_pr, "italic")))
1190
+ if style.underline is True:
1191
+ char_pr.append(ET.Element(_qualified_child_tag(char_pr, "underline")))
1192
+ container.append(char_pr)
1193
+ _refresh_item_count(container, "charPr")
1194
+ return char_pr_id
1195
+
1196
+
1197
+ def _append_hwpx_para_pr(root: ET.Element, style: StyleDescriptor) -> str:
1198
+ container = _find_or_create_direct_child(root, "paraProperties")
1199
+ para_pr_id = str(_next_numeric_id(root, "paraPr"))
1200
+ para_pr = ET.Element(
1201
+ _qualified_child_tag(container, "paraPr"),
1202
+ {"id": para_pr_id, "tabPrIDRef": "0"},
1203
+ )
1204
+ if style.alignment is not None:
1205
+ para_pr.append(
1206
+ ET.Element(
1207
+ _qualified_child_tag(para_pr, "align"),
1208
+ {"horizontal": style.alignment.upper(), "vertical": "BASELINE"},
1209
+ )
1210
+ )
1211
+ container.append(para_pr)
1212
+ _refresh_item_count(container, "paraPr")
1213
+ return para_pr_id
1214
+
1215
+
1216
+ def _append_hwpx_named_style(
1217
+ root: ET.Element,
1218
+ *,
1219
+ char_pr_id: str | None,
1220
+ para_pr_id: str | None,
1221
+ ) -> str:
1222
+ container = _find_or_create_direct_child(root, "styles")
1223
+ style_id = str(_next_numeric_id(root, "style"))
1224
+ attributes = {
1225
+ "id": style_id,
1226
+ "type": "PARA",
1227
+ "name": f"UMMAYAStyle{style_id}",
1228
+ "engName": f"UMMAYAStyle{style_id}",
1229
+ "nextStyleIDRef": "0",
1230
+ "langID": "1042",
1231
+ "lockForm": "0",
1232
+ }
1233
+ if para_pr_id is not None:
1234
+ attributes["paraPrIDRef"] = para_pr_id
1235
+ if char_pr_id is not None:
1236
+ attributes["charPrIDRef"] = char_pr_id
1237
+ container.append(ET.Element(_qualified_child_tag(container, "style"), attributes))
1238
+ _refresh_item_count(container, "style")
1239
+ return style_id
1240
+
1241
+
1242
+ def _append_hwpx_border_fill(root: ET.Element, style: StyleDescriptor) -> str:
1243
+ container = _find_or_create_direct_child(root, "borderFills")
1244
+ border_fill_id = str(_next_numeric_id(root, "borderFill"))
1245
+ border_type = style.border.style.upper() if style.border is not None else "NONE"
1246
+ border_width = (
1247
+ _hwpx_points_to_mm(style.border.width_pt)
1248
+ if style.border is not None and style.border.width_pt is not None
1249
+ else "0.10 mm"
1250
+ )
1251
+ border_color = (
1252
+ _hwpx_color(style.border.color_rgb)
1253
+ if style.border is not None and style.border.color_rgb is not None
1254
+ else "#000000"
1255
+ )
1256
+ border_fill = ET.Element(
1257
+ _qualified_child_tag(container, "borderFill"),
1258
+ {"id": border_fill_id, "threeD": "0", "shadow": "0", "centerLine": "NONE"},
1259
+ )
1260
+ for border_name in ("slash", "backSlash"):
1261
+ border_fill.append(
1262
+ ET.Element(_qualified_child_tag(border_fill, border_name), {"type": "NONE"})
1263
+ )
1264
+ for border_name in ("leftBorder", "rightBorder", "topBorder", "bottomBorder"):
1265
+ border_fill.append(
1266
+ ET.Element(
1267
+ _qualified_child_tag(border_fill, border_name),
1268
+ {"type": border_type, "width": border_width, "color": border_color},
1269
+ )
1270
+ )
1271
+ if style.fill_color_rgb is not None:
1272
+ fill_brush = ET.Element(_qualified_child_tag(border_fill, "fillBrush"))
1273
+ fill_brush.append(
1274
+ ET.Element(
1275
+ _qualified_child_tag(fill_brush, "winBrush"),
1276
+ {
1277
+ "faceColor": _hwpx_color(style.fill_color_rgb),
1278
+ "hatchColor": "#000000",
1279
+ "alpha": "0",
1280
+ },
1281
+ )
1282
+ )
1283
+ border_fill.append(fill_brush)
1284
+ container.append(border_fill)
1285
+ _refresh_item_count(container, "borderFill")
1286
+ return border_fill_id
1287
+
1288
+
1289
+ def _ensure_hwpx_font(root: ET.Element, font_family: str) -> str:
1290
+ fontfaces = _find_or_create_direct_child(root, "fontfaces")
1291
+ fontface = _first_child_by_local_name(fontfaces, "fontface")
1292
+ if fontface is None:
1293
+ fontface = ET.Element(
1294
+ _qualified_child_tag(fontfaces, "fontface"),
1295
+ {"lang": "HANGUL", "fontCnt": "0"},
1296
+ )
1297
+ fontfaces.append(fontface)
1298
+ for font in _child_elements_by_local_name(fontface, "font"):
1299
+ if _local_attr(font, "face") == font_family:
1300
+ font_id = _local_attr(font, "id")
1301
+ if font_id is not None:
1302
+ return font_id
1303
+ font_id = str(_next_numeric_id(root, "font"))
1304
+ fontface.append(
1305
+ ET.Element(
1306
+ _qualified_child_tag(fontface, "font"),
1307
+ {"id": font_id, "face": font_family, "type": "TTF", "isEmbedded": "0"},
1308
+ )
1309
+ )
1310
+ fontface.set("fontCnt", str(len(_child_elements_by_local_name(fontface, "font"))))
1311
+ _refresh_item_count(fontfaces, "fontface")
1312
+ return font_id
1313
+
1314
+
1315
+ def _find_or_create_direct_child(root: ET.Element, name: str) -> ET.Element:
1316
+ existing = _first_child_by_local_name(root, name)
1317
+ if existing is not None:
1318
+ return existing
1319
+ child = ET.Element(_qualified_child_tag(root, name))
1320
+ root.append(child)
1321
+ return child
1322
+
1323
+
1324
+ def _next_numeric_id(root: ET.Element, name: str) -> int:
1325
+ used_ids: list[int] = []
1326
+ for element in _elements_by_local_name(root, name):
1327
+ raw_id = _local_attr(element, "id")
1328
+ if raw_id is None:
1329
+ continue
1330
+ try:
1331
+ used_ids.append(int(raw_id))
1332
+ except ValueError:
1333
+ continue
1334
+ return max(used_ids, default=-1) + 1
1335
+
1336
+
1337
+ def _refresh_item_count(container: ET.Element, child_name: str) -> None:
1338
+ container.set("itemCnt", str(len(_child_elements_by_local_name(container, child_name))))
1339
+
1340
+
1341
+ def _hwpx_color(value: str) -> str:
1342
+ return f"#{_rgb(value) or value.upper()}"
1343
+
1344
+
1345
+ def _hwpx_points_to_height(points: Decimal) -> str:
1346
+ return str(int((points * Decimal("100")).to_integral_value()))
1347
+
1348
+
1349
+ def _hwpx_points_to_mm(points: Decimal) -> str:
1350
+ millimeters = (points / Decimal("2.834645669")).quantize(Decimal("0.01"))
1351
+ return f"{millimeters} mm"
1352
+
1353
+
1354
+ def _apply_hwpx_section_mutations(
1355
+ root: ET.Element,
1356
+ *,
1357
+ member: str,
1358
+ text_index: int,
1359
+ table_index: int,
1360
+ text_replacements: dict[int, str],
1361
+ table_cell_replacements: dict[_HwpXTableCellTarget, str],
1362
+ text_style_refs: dict[int, _HwpXStyleRefs],
1363
+ table_cell_style_refs: dict[_HwpXTableCellTarget, _HwpXStyleRefs],
1364
+ ) -> tuple[
1365
+ int,
1366
+ int,
1367
+ set[_HwpXTableCellTarget],
1368
+ set[int],
1369
+ set[_HwpXTableCellTarget],
1370
+ ]:
1371
+ applied_table_cell_targets: set[_HwpXTableCellTarget] = set()
1372
+ applied_text_style_targets: set[int] = set()
1373
+ applied_table_cell_style_targets: set[_HwpXTableCellTarget] = set()
1374
+ parent_by_id = _parent_by_element_id(root)
1375
+ for elem in _text_elements(root):
1376
+ if not elem.text:
1377
+ continue
1378
+ if text_index in text_replacements:
1379
+ elem.text = text_replacements[text_index]
1380
+ refs = text_style_refs.get(text_index)
1381
+ if refs is not None:
1382
+ _apply_hwpx_style_refs_to_text(elem, refs, parent_by_id)
1383
+ applied_text_style_targets.add(text_index)
1384
+ text_index += 1
1385
+ for table in _elements_by_local_name(root, "tbl"):
1386
+ applied_table_cell_targets.update(
1387
+ _apply_hwpx_table_cell_replacements(
1388
+ table,
1389
+ member=member,
1390
+ table_index=table_index,
1391
+ table_cell_replacements=table_cell_replacements,
1392
+ )
1393
+ )
1394
+ applied_table_cell_style_targets.update(
1395
+ _apply_hwpx_table_cell_styles(
1396
+ table,
1397
+ member=member,
1398
+ table_index=table_index,
1399
+ table_cell_style_refs=table_cell_style_refs,
1400
+ )
1401
+ )
1402
+ table_index += 1
1403
+ return (
1404
+ text_index,
1405
+ table_index,
1406
+ applied_table_cell_targets,
1407
+ applied_text_style_targets,
1408
+ applied_table_cell_style_targets,
1409
+ )
1410
+
1411
+
1412
+ def _apply_hwpx_style_refs_to_text(
1413
+ text: ET.Element,
1414
+ refs: _HwpXStyleRefs,
1415
+ parent_by_id: dict[int, ET.Element],
1416
+ ) -> None:
1417
+ paragraph = _nearest_ancestor_by_local_name(text, "p", parent_by_id)
1418
+ run = _nearest_ancestor_by_local_name(text, "run", parent_by_id)
1419
+ if paragraph is None or run is None:
1420
+ raise ValueError("HWPX text style target has no paragraph/run container")
1421
+ _set_hwpx_style_refs(paragraph=paragraph, run=run, refs=refs)
1422
+
1423
+
1424
+ def _apply_hwpx_table_cell_styles(
1425
+ table: ET.Element,
1426
+ *,
1427
+ member: str,
1428
+ table_index: int,
1429
+ table_cell_style_refs: dict[_HwpXTableCellTarget, _HwpXStyleRefs],
1430
+ ) -> set[_HwpXTableCellTarget]:
1431
+ applied_targets: set[_HwpXTableCellTarget] = set()
1432
+ for target, refs in table_cell_style_refs.items():
1433
+ if target.member != member or target.table_index != table_index:
1434
+ continue
1435
+ cell = _hwpx_table_cell_element(
1436
+ table,
1437
+ row_index=target.row_index,
1438
+ column_index=target.column_index,
1439
+ )
1440
+ paragraphs = _elements_by_local_name(cell, "p")
1441
+ if not paragraphs:
1442
+ paragraph, run = _ensure_hwpx_cell_paragraph_and_run(cell)
1443
+ _set_hwpx_style_refs(paragraph=paragraph, run=run, refs=refs)
1444
+ for paragraph in paragraphs:
1445
+ runs = _child_elements_by_local_name(paragraph, "run")
1446
+ if not runs:
1447
+ run = ET.Element(_qualified_child_tag(paragraph, "run"))
1448
+ paragraph.append(run)
1449
+ runs = [run]
1450
+ for run in runs:
1451
+ _set_hwpx_style_refs(paragraph=paragraph, run=run, refs=refs)
1452
+ applied_targets.add(target)
1453
+ return applied_targets
1454
+
1455
+
1456
+ def _set_hwpx_style_refs(
1457
+ *,
1458
+ paragraph: ET.Element,
1459
+ run: ET.Element,
1460
+ refs: _HwpXStyleRefs,
1461
+ ) -> None:
1462
+ if refs.para_pr_id is not None:
1463
+ paragraph.set("paraPrIDRef", refs.para_pr_id)
1464
+ if refs.style_id is not None:
1465
+ paragraph.set("styleIDRef", refs.style_id)
1466
+ if refs.char_pr_id is not None:
1467
+ run.set("charPrIDRef", refs.char_pr_id)
1468
+
1469
+
1470
+ def _apply_hwpx_table_cell_replacements(
1471
+ table: ET.Element,
1472
+ *,
1473
+ member: str,
1474
+ table_index: int,
1475
+ table_cell_replacements: dict[_HwpXTableCellTarget, str],
1476
+ ) -> set[_HwpXTableCellTarget]:
1477
+ applied_targets: set[_HwpXTableCellTarget] = set()
1478
+ for target, value in table_cell_replacements.items():
1479
+ if target.member != member or target.table_index != table_index:
1480
+ continue
1481
+ _set_hwpx_table_cell_text(
1482
+ table,
1483
+ row_index=target.row_index,
1484
+ column_index=target.column_index,
1485
+ value=value,
1486
+ )
1487
+ applied_targets.add(target)
1488
+ return applied_targets
1489
+
1490
+
1491
+ def _hwpx_table_cell_target(target_path: str) -> _HwpXTableCellTarget | None:
1492
+ match = _HWPX_TABLE_CELL_SOURCE_RE.match(target_path)
1493
+ if match is None:
1494
+ return None
1495
+ return _HwpXTableCellTarget(
1496
+ member=match.group("member"),
1497
+ table_index=int(match.group("table")),
1498
+ row_index=int(match.group("row")),
1499
+ column_index=int(match.group("column")),
1500
+ )
1501
+
1502
+
1503
+ def _hwpx_table_cell_target_path(target: _HwpXTableCellTarget) -> str:
1504
+ return f"{target.member}#table[{target.table_index}]/r{target.row_index}c{target.column_index}"
1505
+
1506
+
1507
+ def _hwpx_table_cell_target_sort_key(
1508
+ target: _HwpXTableCellTarget,
1509
+ ) -> tuple[str, int, int, int]:
1510
+ return (target.member, target.table_index, target.row_index, target.column_index)
1511
+
1512
+
1513
+ def _set_hwpx_table_cell_text(
1514
+ table: ET.Element,
1515
+ *,
1516
+ row_index: int,
1517
+ column_index: int,
1518
+ value: str,
1519
+ ) -> None:
1520
+ cell = _hwpx_table_cell_element(table, row_index=row_index, column_index=column_index)
1521
+ text_nodes = _text_elements(cell)
1522
+ if text_nodes:
1523
+ text_nodes[0].text = value
1524
+ for extra_text_node in text_nodes[1:]:
1525
+ extra_text_node.text = ""
1526
+ return
1527
+ _paragraph, _run = _ensure_hwpx_cell_paragraph_and_run(cell)
1528
+ text = _first_child_by_local_name(_run, "t")
1529
+ if text is None:
1530
+ text = ET.Element(_qualified_child_tag(_run, "t"))
1531
+ _run.append(text)
1532
+ text.text = value
1533
+
1534
+
1535
+ def _hwpx_table_cell_element(
1536
+ table: ET.Element,
1537
+ *,
1538
+ row_index: int,
1539
+ column_index: int,
1540
+ ) -> ET.Element:
1541
+ rows = _child_elements_by_local_name(table, "tr")
1542
+ if row_index > len(rows):
1543
+ raise ValueError(f"HWPX table row target not found: {row_index}")
1544
+ cells = _child_elements_by_local_name(rows[row_index - 1], "tc")
1545
+ if column_index > len(cells):
1546
+ raise ValueError(f"HWPX table cell target not found: {row_index},{column_index}")
1547
+ return cells[column_index - 1]
1548
+
1549
+
1550
+ def _ensure_hwpx_cell_paragraph_and_run(cell: ET.Element) -> tuple[ET.Element, ET.Element]:
1551
+ paragraph = _first_child_by_local_name(cell, "p")
1552
+ if paragraph is None:
1553
+ paragraph = ET.Element(_qualified_child_tag(cell, "p"))
1554
+ cell.append(paragraph)
1555
+ run = _first_child_by_local_name(paragraph, "run")
1556
+ if run is None:
1557
+ run = ET.Element(_qualified_child_tag(paragraph, "run"))
1558
+ paragraph.append(run)
1559
+ return paragraph, run
1560
+
1561
+
1562
+ def _qualified_child_tag(parent: ET.Element, local_name: str) -> str:
1563
+ if parent.tag.startswith("{") and "}" in parent.tag:
1564
+ namespace = parent.tag.split("}", 1)[0][1:]
1565
+ return f"{{{namespace}}}{local_name}"
1566
+ return local_name
1567
+
1568
+
1569
+ def _namespace_map(payload: bytes) -> list[tuple[str, str]]:
1570
+ namespaces: list[tuple[str, str]] = []
1571
+ for _event, namespace in ElementTree.iterparse(io.BytesIO(payload), events=("start-ns",)):
1572
+ prefix, uri = namespace
1573
+ namespaces.append((str(prefix), str(uri)))
1574
+ return namespaces
1575
+
1576
+
1577
+ def _serialize_section(root: ET.Element, namespaces: list[tuple[str, str]]) -> bytes:
1578
+ for prefix, uri in namespaces:
1579
+ ET.register_namespace(prefix, uri)
1580
+ return cast(
1581
+ bytes,
1582
+ ET.tostring(root, encoding="utf-8", xml_declaration=True, short_empty_elements=True),
1583
+ )
1584
+
1585
+
1586
+ def _preview_text(section_payloads: dict[str, bytes]) -> str:
1587
+ texts: list[str] = []
1588
+ for member in sorted(section_payloads):
1589
+ root = ElementTree.fromstring(section_payloads[member])
1590
+ texts.extend(elem.text or "" for elem in _text_elements(root) if elem.text)
1591
+ return "".join(f"<{text}>" for text in texts)
1592
+
1593
+
1594
+ def _uses_hwpxjs_html_render(path: Path) -> bool:
1595
+ """Return whether this HWPX package needs the hwpxjs HTML renderer."""
1596
+ try:
1597
+ with ZipFile(path) as archive:
1598
+ for member in _section_members(archive):
1599
+ root = ElementTree.fromstring(archive.read(member))
1600
+ for table in _elements_by_local_name(root, "tbl"):
1601
+ if _table_missing_rhwp_geometry(table):
1602
+ return True
1603
+ except (BadZipFile, ElementTree.ParseError):
1604
+ return False
1605
+ return False
1606
+
1607
+
1608
+ def _table_missing_rhwp_geometry(table: ET.Element) -> bool:
1609
+ if not _has_direct_child(table, "sz") or not _has_direct_child(table, "pos"):
1610
+ return True
1611
+ for row in _child_elements_by_local_name(table, "tr"):
1612
+ for cell in _child_elements_by_local_name(row, "tc"):
1613
+ if not _has_direct_child(cell, "cellAddr"):
1614
+ return True
1615
+ if not _has_direct_child(cell, "cellSpan"):
1616
+ return True
1617
+ if not _has_direct_child(cell, "cellSz"):
1618
+ return True
1619
+ return False
1620
+
1621
+
1622
+ def _has_direct_child(element: ET.Element, name: str) -> bool:
1623
+ return any(_local_name(child.tag) == name for child in list(element))
1624
+
1625
+
1626
+ def _render_with_hwpxjs_html(path: Path) -> bytes:
1627
+ executable = _hwpxjs_binary()
1628
+ completed = subprocess.run( # noqa: S603 - executable is resolved local CLI, no shell.
1629
+ [str(executable), "html", str(path)],
1630
+ cwd=str(_rhwp_package_root()),
1631
+ stdin=subprocess.DEVNULL,
1632
+ capture_output=True,
1633
+ text=True,
1634
+ timeout=_RHWP_NODE_TIMEOUT_SECONDS,
1635
+ check=False,
1636
+ )
1637
+ if completed.returncode != 0:
1638
+ stderr = completed.stderr.strip()
1639
+ output_summary = stderr or completed.stdout.strip()
1640
+ raise RuntimeError(f"hwpxjs HTML render bridge failed: {output_summary}")
1641
+ body = completed.stdout.strip()
1642
+ if not body:
1643
+ raise RuntimeError("hwpxjs HTML render bridge produced no reviewer evidence")
1644
+ return (
1645
+ '<!doctype html><html><head><meta charset="utf-8">'
1646
+ "<style>"
1647
+ "body{font-family:-apple-system,BlinkMacSystemFont,'Apple SD Gothic Neo',sans-serif;"
1648
+ "margin:24px;color:#111;line-height:1.45}"
1649
+ "table.hwpx-table{border-collapse:collapse;margin:12px 0;width:100%}"
1650
+ "table.hwpx-table td,table.hwpx-table th{border:1px solid #555;padding:6px 8px;"
1651
+ "vertical-align:top}"
1652
+ '</style></head><body data-ummaya-render-engine="hwpxjs-html-render">'
1653
+ f"{body}</body></html>"
1654
+ ).encode()
1655
+
1656
+
1657
+ def _hwpxjs_binary() -> Path:
1658
+ configured = os.environ.get("UMMAYA_HWPXJS")
1659
+ if configured:
1660
+ candidate = Path(configured).expanduser()
1661
+ if not candidate.is_absolute():
1662
+ resolved = shutil.which(configured)
1663
+ if resolved is not None:
1664
+ candidate = Path(resolved)
1665
+ candidate = candidate.resolve(strict=False)
1666
+ if candidate.is_file() and os.access(candidate, os.X_OK):
1667
+ return candidate
1668
+ raise RuntimeError(f"UMMAYA_HWPXJS is not executable: {configured}")
1669
+
1670
+ path_candidate = shutil.which("hwpxjs")
1671
+ if path_candidate is not None:
1672
+ candidate = Path(path_candidate).resolve(strict=False)
1673
+ if candidate.is_file() and os.access(candidate, os.X_OK):
1674
+ return candidate
1675
+ for root in (Path.cwd(), _rhwp_package_root()):
1676
+ candidate = root / "node_modules" / ".bin" / "hwpxjs"
1677
+ if candidate.is_file() and os.access(candidate, os.X_OK):
1678
+ return candidate.resolve(strict=False)
1679
+ raise RuntimeError("hwpxjs executable is required for HWPX HTML rendering")
1680
+
1681
+
1682
+ _RHWP_NODE_TIMEOUT_SECONDS = 45
1683
+
1684
+ _RHWP_RENDER_BRIDGE_JS = r"""
1685
+ import { createHash } from 'node:crypto';
1686
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
1687
+ import { join, resolve } from 'node:path';
1688
+ import { createRequire } from 'node:module';
1689
+ import { pathToFileURL } from 'node:url';
1690
+
1691
+ const [inputPath, outputDir] = process.argv.slice(1);
1692
+ if (!inputPath || !outputDir) {
1693
+ throw new Error('Usage: rhwp render bridge requires <inputPath> <outputDir>');
1694
+ }
1695
+
1696
+ const packageRoot = resolve(process.env.UMMAYA_PACKAGE_ROOT || process.cwd());
1697
+ const packageJsonPath = join(packageRoot, 'package.json');
1698
+ if (!existsSync(packageJsonPath)) {
1699
+ throw new Error(`UMMAYA package root does not contain package.json: ${packageRoot}`);
1700
+ }
1701
+
1702
+ globalThis.measureTextWidth = (_font, text) => {
1703
+ let width = 0;
1704
+ for (const char of String(text)) {
1705
+ width += char.charCodeAt(0) > 0x7f ? 14 : 8;
1706
+ }
1707
+ return width;
1708
+ };
1709
+
1710
+ const require = createRequire(pathToFileURL(packageJsonPath));
1711
+ const rhwpModulePath = require.resolve('@rhwp/core/rhwp.js');
1712
+ const rhwpWasmPath = require.resolve('@rhwp/core/rhwp_bg.wasm');
1713
+ const rhwp = await import(pathToFileURL(rhwpModulePath).href);
1714
+
1715
+ await rhwp.default({ module_or_path: readFileSync(rhwpWasmPath) });
1716
+
1717
+ const data = readFileSync(resolve(inputPath));
1718
+ const doc = new rhwp.HwpDocument(new Uint8Array(data));
1719
+ const pageCount = doc.pageCount();
1720
+ mkdirSync(resolve(outputDir), { recursive: true });
1721
+
1722
+ const artifacts = [];
1723
+ for (let index = 0; index < pageCount; index += 1) {
1724
+ const svg = doc.renderPageSvg(index);
1725
+ const pageNumber = index + 1;
1726
+ const outputName = `rhwp-page-${String(pageNumber).padStart(3, '0')}.svg`;
1727
+ const outputPath = join(resolve(outputDir), outputName);
1728
+ writeFileSync(outputPath, svg);
1729
+ artifacts.push({
1730
+ pageNumber,
1731
+ path: outputPath,
1732
+ sha256: createHash('sha256').update(svg).digest('hex'),
1733
+ });
1734
+ }
1735
+
1736
+ console.log(JSON.stringify({
1737
+ engineId: 'rhwp-node-wasm',
1738
+ rhwpVersion: rhwp.version(),
1739
+ pageCount,
1740
+ artifacts,
1741
+ }));
1742
+ """
1743
+
1744
+
1745
+ def _render_with_rhwp_node(path: Path, *, output_dir: Path) -> tuple[bytes, ...]:
1746
+ output_dir.mkdir(parents=True, exist_ok=True)
1747
+ completed = subprocess.run( # noqa: S603
1748
+ [
1749
+ _node_binary(),
1750
+ "--input-type=module",
1751
+ "-e",
1752
+ _RHWP_RENDER_BRIDGE_JS,
1753
+ str(path),
1754
+ str(output_dir),
1755
+ ],
1756
+ cwd=str(_rhwp_package_root()),
1757
+ env=_rhwp_bridge_env(),
1758
+ capture_output=True,
1759
+ text=True,
1760
+ timeout=_RHWP_NODE_TIMEOUT_SECONDS,
1761
+ check=False,
1762
+ )
1763
+ if completed.returncode != 0:
1764
+ stderr = completed.stderr.strip()
1765
+ raise RuntimeError(f"RHWP render bridge failed: {stderr or completed.stdout.strip()}")
1766
+
1767
+ bridge_result = _parse_rhwp_bridge_result(completed.stdout)
1768
+ payloads: list[bytes] = []
1769
+ for artifact_path in bridge_result:
1770
+ _require_render_path_inside(artifact_path, output_dir)
1771
+ payloads.append(artifact_path.read_bytes())
1772
+ if not payloads:
1773
+ raise RuntimeError("RHWP render bridge produced no page SVG artifacts")
1774
+ return tuple(payloads)
1775
+
1776
+
1777
+ def _node_binary() -> str:
1778
+ configured = os.environ.get("UMMAYA_NODE")
1779
+ if configured:
1780
+ resolved = shutil.which(configured) if not Path(configured).is_absolute() else configured
1781
+ if resolved:
1782
+ return resolved
1783
+ raise RuntimeError(f"UMMAYA_NODE is not executable: {configured}")
1784
+
1785
+ detected = shutil.which("node")
1786
+ if detected is None:
1787
+ raise RuntimeError("node executable is required for RHWP HWPX rendering")
1788
+ return detected
1789
+
1790
+
1791
+ def _rhwp_package_root() -> Path:
1792
+ candidates = [
1793
+ os.environ.get("UMMAYA_PACKAGE_ROOT"),
1794
+ str(Path.cwd()),
1795
+ str(Path(__file__).resolve().parents[5]),
1796
+ ]
1797
+ for candidate in candidates:
1798
+ if not candidate:
1799
+ continue
1800
+ root = Path(candidate).expanduser().resolve()
1801
+ if (root / "package.json").is_file():
1802
+ return root
1803
+ return Path.cwd().resolve()
1804
+
1805
+
1806
+ def _rhwp_bridge_env() -> dict[str, str]:
1807
+ env = dict(os.environ)
1808
+ env["UMMAYA_PACKAGE_ROOT"] = str(_rhwp_package_root())
1809
+ return env
1810
+
1811
+
1812
+ def _parse_rhwp_bridge_result(stdout: str) -> list[Path]:
1813
+ parsed = json.loads(stdout)
1814
+ if not isinstance(parsed, dict):
1815
+ raise RuntimeError("RHWP render bridge returned a non-object result")
1816
+ artifacts = parsed.get("artifacts")
1817
+ if not isinstance(artifacts, list):
1818
+ raise RuntimeError("RHWP render bridge result is missing artifacts")
1819
+
1820
+ paths: list[Path] = []
1821
+ for artifact in artifacts:
1822
+ if not isinstance(artifact, dict):
1823
+ raise RuntimeError("RHWP render bridge artifact is not an object")
1824
+ path_value = artifact.get("path")
1825
+ if not isinstance(path_value, str):
1826
+ raise RuntimeError("RHWP render bridge artifact is missing path")
1827
+ paths.append(Path(path_value).expanduser().resolve())
1828
+ return paths
1829
+
1830
+
1831
+ def _require_render_path_inside(candidate: Path, root: Path) -> None:
1832
+ resolved_root = root.resolve()
1833
+ resolved_candidate = candidate.resolve()
1834
+ if resolved_candidate != resolved_root and resolved_root not in resolved_candidate.parents:
1835
+ raise RuntimeError(
1836
+ f"RHWP render bridge path escapes output directory: {resolved_candidate}"
1837
+ )