ummaya 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (534) hide show
  1. package/README.md +17 -3
  2. package/bin/ummaya +10 -1
  3. package/npm-shrinkwrap.json +253 -2
  4. package/package.json +5 -1
  5. package/prompts/manifest.yaml +2 -2
  6. package/prompts/session_guidance_v1.md +3 -1
  7. package/prompts/system_v1.md +9 -7
  8. package/pyproject.toml +26 -7
  9. package/specs/2803-document-production-hardening/contracts/document-tools.schema.json +1043 -0
  10. package/src/ummaya/_canonical/__init__.py +2 -0
  11. package/src/ummaya/context/builder.py +17 -11
  12. package/src/ummaya/engine/engine.py +30 -113
  13. package/src/ummaya/engine/query.py +20 -0
  14. package/src/ummaya/evidence/__init__.py +44 -0
  15. package/src/ummaya/evidence/__main__.py +7 -0
  16. package/src/ummaya/evidence/dataset_contract.py +193 -0
  17. package/src/ummaya/evidence/document_authoring_cases.py +33 -0
  18. package/src/ummaya/evidence/document_harness.py +313 -0
  19. package/src/ummaya/evidence/document_viewer_ux.py +391 -0
  20. package/src/ummaya/evidence/gates.py +70 -0
  21. package/src/ummaya/evidence/json_types.py +20 -0
  22. package/src/ummaya/evidence/models.py +145 -0
  23. package/src/ummaya/evidence/output_payload.py +89 -0
  24. package/src/ummaya/evidence/payload_documents.py +233 -0
  25. package/src/ummaya/evidence/route_contracts.py +224 -0
  26. package/src/ummaya/evidence/route_helpers.py +150 -0
  27. package/src/ummaya/evidence/runner.py +177 -0
  28. package/src/ummaya/evidence/source_provenance.py +246 -0
  29. package/src/ummaya/evidence/source_provenance_redaction.py +176 -0
  30. package/src/ummaya/evidence/task_registry.py +264 -0
  31. package/src/ummaya/evidence/tool_layer.py +39 -0
  32. package/src/ummaya/evidence/tool_layer_models.py +151 -0
  33. package/src/ummaya/ipc/adapter_manifest_emitter.py +26 -10
  34. package/src/ummaya/ipc/document_intent_normalization.py +185 -0
  35. package/src/ummaya/ipc/frame_schema.py +52 -5
  36. package/src/ummaya/ipc/route_diagnostics.py +73 -0
  37. package/src/ummaya/ipc/stdio.py +2282 -417
  38. package/src/ummaya/llm/client.py +234 -59
  39. package/src/ummaya/llm/config.py +8 -3
  40. package/src/ummaya/llm/reasoning.py +84 -0
  41. package/src/ummaya/primitives/__init__.py +6 -2
  42. package/src/ummaya/primitives/delegation.py +1 -1
  43. package/src/ummaya/primitives/document.py +28 -0
  44. package/src/ummaya/settings.py +0 -3
  45. package/src/ummaya/tools/discovery_bridge.py +34 -2
  46. package/src/ummaya/tools/documents/__init__.py +297 -0
  47. package/src/ummaya/tools/documents/adapter_registry.py +487 -0
  48. package/src/ummaya/tools/documents/archive_container_probe.py +167 -0
  49. package/src/ummaya/tools/documents/artifact_store.py +454 -0
  50. package/src/ummaya/tools/documents/authoring.py +283 -0
  51. package/src/ummaya/tools/documents/baselines.py +114 -0
  52. package/src/ummaya/tools/documents/capability.py +331 -0
  53. package/src/ummaya/tools/documents/contracts.py +112 -0
  54. package/src/ummaya/tools/documents/conversion.py +521 -0
  55. package/src/ummaya/tools/documents/diff.py +275 -0
  56. package/src/ummaya/tools/documents/engines.py +163 -0
  57. package/src/ummaya/tools/documents/evaluation.py +291 -0
  58. package/src/ummaya/tools/documents/explicit_values.py +108 -0
  59. package/src/ummaya/tools/documents/fixtures.py +174 -0
  60. package/src/ummaya/tools/documents/format_completion_audit.py +471 -0
  61. package/src/ummaya/tools/documents/formats/__init__.py +2 -0
  62. package/src/ummaya/tools/documents/formats/archive.py +528 -0
  63. package/src/ummaya/tools/documents/formats/base.py +41 -0
  64. package/src/ummaya/tools/documents/formats/code_file.py +211 -0
  65. package/src/ummaya/tools/documents/formats/data_file.py +272 -0
  66. package/src/ummaya/tools/documents/formats/hwp.py +284 -0
  67. package/src/ummaya/tools/documents/formats/hwpx.py +1837 -0
  68. package/src/ummaya/tools/documents/formats/odf.py +435 -0
  69. package/src/ummaya/tools/documents/formats/ooxml.py +1030 -0
  70. package/src/ummaya/tools/documents/formats/passive.py +766 -0
  71. package/src/ummaya/tools/documents/formats/pdf.py +702 -0
  72. package/src/ummaya/tools/documents/formats/text_web.py +268 -0
  73. package/src/ummaya/tools/documents/hwp_conversion_probe.py +178 -0
  74. package/src/ummaya/tools/documents/hwp_direct_candidate.py +141 -0
  75. package/src/ummaya/tools/documents/inspection.py +289 -0
  76. package/src/ummaya/tools/documents/intake.py +1079 -0
  77. package/src/ummaya/tools/documents/legacy_office_promotion_probe.py +366 -0
  78. package/src/ummaya/tools/documents/models.py +1598 -0
  79. package/src/ummaya/tools/documents/odf_promotion_probe.py +167 -0
  80. package/src/ummaya/tools/documents/orchestrator.py +96 -0
  81. package/src/ummaya/tools/documents/passive_capability_probe.py +251 -0
  82. package/src/ummaya/tools/documents/patch.py +170 -0
  83. package/src/ummaya/tools/documents/pdfa_conformance.py +284 -0
  84. package/src/ummaya/tools/documents/pdfa_promotion_probe.py +198 -0
  85. package/src/ummaya/tools/documents/permissions.py +110 -0
  86. package/src/ummaya/tools/documents/planner.py +616 -0
  87. package/src/ummaya/tools/documents/registry.py +2733 -0
  88. package/src/ummaya/tools/documents/render.py +978 -0
  89. package/src/ummaya/tools/documents/render_comparison.py +113 -0
  90. package/src/ummaya/tools/documents/render_comparison_models.py +74 -0
  91. package/src/ummaya/tools/documents/render_comparison_regions.py +73 -0
  92. package/src/ummaya/tools/documents/render_comparison_style.py +161 -0
  93. package/src/ummaya/tools/documents/reread.py +157 -0
  94. package/src/ummaya/tools/documents/runtime_authoring.py +244 -0
  95. package/src/ummaya/tools/documents/runtime_authoring_bundle.py +76 -0
  96. package/src/ummaya/tools/documents/scorecard.py +184 -0
  97. package/src/ummaya/tools/documents/socratic_planner.py +193 -0
  98. package/src/ummaya/tools/documents/style.py +48 -0
  99. package/src/ummaya/tools/documents/tool_defs.py +523 -0
  100. package/src/ummaya/tools/documents/validate.py +347 -0
  101. package/src/ummaya/tools/executor.py +61 -12
  102. package/src/ummaya/tools/geocoding/kakao_client.py +1 -2
  103. package/src/ummaya/tools/kma/apihub_catalog.py +984 -1
  104. package/src/ummaya/tools/kma/apihub_structured_adapter.py +86 -6
  105. package/src/ummaya/tools/kma/apihub_url_adapter.py +593 -0
  106. package/src/ummaya/tools/kma/apihub_url_catalog.py +296 -0
  107. package/src/ummaya/tools/live_proxy.py +0 -3
  108. package/src/ummaya/tools/location_adapters.py +8 -6
  109. package/src/ummaya/tools/manifest_metadata.py +16 -3
  110. package/src/ummaya/tools/models.py +5 -1
  111. package/src/ummaya/tools/mvp_surface.py +2 -2
  112. package/src/ummaya/tools/nmc/emergency_search.py +8 -6
  113. package/src/ummaya/tools/register_all.py +17 -0
  114. package/src/ummaya/tools/registry.py +10 -1
  115. package/src/ummaya/tools/resolve_location.py +4 -4
  116. package/src/ummaya/tools/routing/__init__.py +59 -0
  117. package/src/ummaya/tools/routing/builder.py +105 -0
  118. package/src/ummaya/tools/routing/cards.py +29 -0
  119. package/src/ummaya/tools/routing/decision_service.py +534 -0
  120. package/src/ummaya/tools/routing/decision_types.py +74 -0
  121. package/src/ummaya/tools/routing/feasibility.py +122 -0
  122. package/src/ummaya/tools/routing/intent.py +17 -0
  123. package/src/ummaya/tools/routing/intent_extractor.py +207 -0
  124. package/src/ummaya/tools/routing/intent_patterns.py +160 -0
  125. package/src/ummaya/tools/routing/intent_public_data.py +150 -0
  126. package/src/ummaya/tools/routing/intent_types.py +48 -0
  127. package/src/ummaya/tools/routing/lint.py +78 -0
  128. package/src/ummaya/tools/routing/metadata.py +174 -0
  129. package/src/ummaya/tools/routing/projection.py +340 -0
  130. package/src/ummaya/tools/routing/retrieval_policy.py +629 -0
  131. package/src/ummaya/tools/routing/schema.py +81 -0
  132. package/src/ummaya/tools/routing/types.py +96 -0
  133. package/src/ummaya/tools/routing_index.py +2 -2
  134. package/src/ummaya/tools/search.py +40 -106
  135. package/src/ummaya/tools/verified_data_go_kr/_manifest.py +115 -25
  136. package/src/ummaya/tools/verified_data_go_kr/airkorea_air_quality.py +109 -4
  137. package/src/ummaya/tools/verified_data_go_kr/nmc_aed_site.py +108 -2
  138. package/src/ummaya/tools/verified_data_go_kr/pps_bid_public_info.py +174 -9
  139. package/src/ummaya/tools/verified_data_go_kr/tago_bus_arrival.py +66 -3
  140. package/src/ummaya/tools/verified_data_go_kr/tago_bus_location.py +12 -2
  141. package/src/ummaya/tools/verified_data_go_kr/tago_bus_route.py +8 -2
  142. package/src/ummaya/tools/verified_data_go_kr/tago_bus_route_station.py +114 -0
  143. package/src/ummaya/tools/verified_data_go_kr/tago_bus_station.py +14 -3
  144. package/src/ummaya/tools/verify_canonical_map.py +21 -0
  145. package/tests/fixtures/documents/public_forms/baselines.yaml +113 -0
  146. package/tui/package.json +1 -2
  147. package/tui/src/.cc-byte-identical-whitelist.yaml +266 -0
  148. package/tui/src/QueryEngine.ts +12 -4
  149. package/tui/src/bridge/inboundAttachments.ts +3 -3
  150. package/tui/src/cli/handlers/auth.ts +4 -13
  151. package/tui/src/cli/handlers/mcp.tsx +3 -3
  152. package/tui/src/cli/print.ts +69 -18
  153. package/tui/src/cli/update.ts +13 -13
  154. package/tui/src/commands/copy/index.ts +1 -1
  155. package/tui/src/commands/cost/cost.ts +2 -2
  156. package/tui/src/commands/init-verifiers.ts +5 -5
  157. package/tui/src/commands/init.ts +30 -30
  158. package/tui/src/commands/insights.ts +44 -44
  159. package/tui/src/commands/install-github-app/install-github-app.tsx +2 -2
  160. package/tui/src/commands/install-github-app/setupGitHubActions.ts +3 -3
  161. package/tui/src/commands/install-github-app/types.ts +8 -30
  162. package/tui/src/commands/install.tsx +5 -5
  163. package/tui/src/commands/mcp/addCommand.ts +5 -5
  164. package/tui/src/commands/mcp/xaaIdpCommand.ts +2 -2
  165. package/tui/src/commands/plugin/ManageMarketplaces.tsx +2 -2
  166. package/tui/src/commands/plugin/types.ts +6 -28
  167. package/tui/src/commands/plugin/unifiedTypes.ts +4 -26
  168. package/tui/src/commands/reasoning/index.ts +13 -0
  169. package/tui/src/commands/reasoning/reasoning.tsx +177 -0
  170. package/tui/src/commands/rename/generateSessionName.ts +1 -1
  171. package/tui/src/commands/thinkback/thinkback.tsx +3 -3
  172. package/tui/src/commands.ts +2 -0
  173. package/tui/src/components/Feedback.tsx +1 -1
  174. package/tui/src/components/LogoV2/EmergencyTip.tsx +11 -2
  175. package/tui/src/components/LogoV2/WelcomeV2.tsx +1 -3
  176. package/tui/src/components/Messages.tsx +2 -1
  177. package/tui/src/components/ScrollKeybindingHandler.tsx +6 -6
  178. package/tui/src/components/Spinner/types.ts +6 -28
  179. package/tui/src/components/Spinner.tsx +2 -2
  180. package/tui/src/components/agents/generateAgent.ts +1 -1
  181. package/tui/src/components/agents/new-agent-creation/types.ts +4 -26
  182. package/tui/src/components/config/EnvSecretIsolatedEditor.tsx +1 -1
  183. package/tui/src/components/design-system/LoadingState.tsx +2 -2
  184. package/tui/src/components/mcp/types.ts +16 -38
  185. package/tui/src/components/messages/AssistantToolUseMessage.tsx +3 -2
  186. package/tui/src/components/messages/UserCrossSessionMessage.ts +16 -4
  187. package/tui/src/components/messages/UserForkBoilerplateMessage.ts +16 -4
  188. package/tui/src/components/messages/UserGitHubWebhookMessage.ts +16 -4
  189. package/tui/src/components/messages/UserToolResultMessage/utils.tsx +3 -2
  190. package/tui/src/components/permissions/MonitorPermissionRequest/MonitorPermissionRequest.ts +9 -4
  191. package/tui/src/components/permissions/ReviewArtifactPermissionRequest/ReviewArtifactPermissionRequest.ts +9 -4
  192. package/tui/src/components/primitive/DocumentSocraticReviewBlock.tsx +129 -0
  193. package/tui/src/components/primitive/DocumentToolResultCard.tsx +224 -0
  194. package/tui/src/components/primitive/documentSocraticReview.ts +215 -0
  195. package/tui/src/components/primitive/index.tsx +43 -1
  196. package/tui/src/components/primitive/types.ts +137 -0
  197. package/tui/src/components/ui/option.ts +4 -26
  198. package/tui/src/constants/common.ts +0 -2
  199. package/tui/src/constants/prompts.ts +4 -3
  200. package/tui/src/constants/querySource.ts +4 -26
  201. package/tui/src/entrypoints/sdk/controlTypes.ts +26 -48
  202. package/tui/src/entrypoints/sdk/coreTypes.generated.ts +3 -25
  203. package/tui/src/entrypoints/sdk/runtimeTypes.ts +38 -60
  204. package/tui/src/entrypoints/sdk/sdkUtilityTypes.ts +4 -26
  205. package/tui/src/entrypoints/sdk/settingsTypes.generated.ts +3 -25
  206. package/tui/src/entrypoints/sdk/toolTypes.ts +3 -25
  207. package/tui/src/hooks/toolPermission/handlers/interactiveHandler.ts +10 -0
  208. package/tui/src/hooks/useApiKeyVerification.ts +1 -1
  209. package/tui/src/hooks/useVirtualScroll.ts +1 -1
  210. package/tui/src/ink/ink.tsx +33 -14
  211. package/tui/src/ink/reconciler.ts +2 -3
  212. package/tui/src/ink/render-to-screen.ts +30 -10
  213. package/tui/src/ipc/bridge.ts +62 -15
  214. package/tui/src/ipc/bridgeSingleton.ts +5 -1
  215. package/tui/src/ipc/codec.ts +29 -3
  216. package/tui/src/ipc/frames.generated.ts +407 -312
  217. package/tui/src/ipc/llmClient.ts +279 -76
  218. package/tui/src/ipc/llmTypes.ts +16 -1
  219. package/tui/src/ipc/schema/frame.schema.json +1 -3475
  220. package/tui/src/keybindings/defaultBindings.ts +4 -0
  221. package/tui/src/main.tsx +32 -11
  222. package/tui/src/native-ts/file-index/index.ts +33 -3
  223. package/tui/src/observability/surface.ts +2 -2
  224. package/tui/src/probes/toolRegistryProbe.tsx +3 -1
  225. package/tui/src/projectOnboardingState.ts +7 -6
  226. package/tui/src/query/chatMessageTypes.ts +18 -0
  227. package/tui/src/query/chatMessagesBuilder.ts +1 -1
  228. package/tui/src/query/deps.ts +1 -1
  229. package/tui/src/query/messageGuards.ts +106 -0
  230. package/tui/src/query/publicDataTerminalRepair.ts +384 -0
  231. package/tui/src/query/run.ts +1075 -0
  232. package/tui/src/query/supportBoundary.ts +168 -0
  233. package/tui/src/query/toolResultErrors.ts +103 -0
  234. package/tui/src/query/toolRunner.ts +687 -0
  235. package/tui/src/query/unavailableToolRepair.ts +118 -0
  236. package/tui/src/query.ts +9 -1721
  237. package/tui/src/screens/REPL.tsx +42 -31
  238. package/tui/src/services/api/adapterManifest.ts +4 -0
  239. package/tui/src/services/api/backendChat/events.ts +117 -0
  240. package/tui/src/services/api/backendChat/finalMessage.ts +40 -0
  241. package/tui/src/services/api/backendChat/frame.ts +9 -0
  242. package/tui/src/services/api/backendChat/streaming.ts +430 -0
  243. package/tui/src/services/api/backendChat/types.ts +62 -0
  244. package/tui/src/services/api/backendChat.ts +1 -0
  245. package/tui/src/services/api/client.ts +98 -14
  246. package/tui/src/services/api/errorUtils.ts +5 -5
  247. package/tui/src/services/api/errors.ts +1 -1
  248. package/tui/src/services/api/logging.ts +1 -1
  249. package/tui/src/services/api/ummaya/evidence.ts +194 -0
  250. package/tui/src/services/api/ummaya/messages.ts +255 -0
  251. package/tui/src/services/api/ummaya/nonStreaming.ts +66 -0
  252. package/tui/src/services/api/ummaya/provider.ts +200 -0
  253. package/tui/src/services/api/ummaya/reasoning.ts +24 -0
  254. package/tui/src/services/api/ummaya/request.ts +200 -0
  255. package/tui/src/services/api/ummaya/selectionContext.ts +240 -0
  256. package/tui/src/services/api/ummaya/streaming.ts +365 -0
  257. package/tui/src/services/api/ummaya/streamingPayload.ts +129 -0
  258. package/tui/src/services/api/ummaya/streamingReader.ts +40 -0
  259. package/tui/src/services/api/ummaya/toolSelection.ts +217 -0
  260. package/tui/src/services/api/ummaya/types.ts +110 -0
  261. package/tui/src/services/api/ummaya/usage.ts +30 -0
  262. package/tui/src/services/api/ummaya.ts +26 -364
  263. package/tui/src/services/api/withRetry.ts +1 -1
  264. package/tui/src/services/awaySummary.ts +2 -2
  265. package/tui/src/services/claudeAiLimits.ts +1 -1
  266. package/tui/src/services/compact/autoCompact.ts +1 -1
  267. package/tui/src/services/compact/compact.ts +1 -1
  268. package/tui/src/services/lsp/types.ts +8 -30
  269. package/tui/src/services/tips/types.ts +6 -28
  270. package/tui/src/services/tokenEstimation.ts +1 -1
  271. package/tui/src/services/toolRegistry/bootGuard.ts +5 -5
  272. package/tui/src/services/toolUseSummary/toolUseSummaryGenerator.ts +1 -1
  273. package/tui/src/services/tools/toolExecution.ts +94 -1
  274. package/tui/src/skills/bundled/stuck.ts +12 -12
  275. package/tui/src/state/AppStateStore.ts +7 -0
  276. package/tui/src/store/pendingPermissionSlot.ts +1 -1
  277. package/tui/src/store/session-store.ts +10 -36
  278. package/tui/src/stubs/any-stub.ts +15 -10
  279. package/tui/src/stubs/color-diff-napi.ts +37 -23
  280. package/tui/src/stubs/globals.d.ts +3 -3
  281. package/tui/src/stubs/macro-preload.ts +23 -12
  282. package/tui/src/tools/AdapterTool/AdapterTool.ts +1239 -163
  283. package/tui/src/tools/AdapterTool/routeDiagnostics.ts +75 -0
  284. package/tui/src/tools/AgentTool/AgentTool.tsx +84 -1371
  285. package/tui/src/tools/AgentTool/agentToolHandoff.ts +114 -0
  286. package/tui/src/tools/AgentTool/agentToolPartialResult.ts +16 -0
  287. package/tui/src/tools/AgentTool/agentToolProgress.ts +32 -0
  288. package/tui/src/tools/AgentTool/agentToolResolver.ts +161 -0
  289. package/tui/src/tools/AgentTool/agentToolResult.ts +163 -0
  290. package/tui/src/tools/AgentTool/agentToolUtils.ts +14 -686
  291. package/tui/src/tools/AgentTool/asyncAgentLifecycle.ts +208 -0
  292. package/tui/src/tools/AgentTool/asyncLifecycle.ts +153 -0
  293. package/tui/src/tools/AgentTool/backgroundedCompletion.ts +126 -0
  294. package/tui/src/tools/AgentTool/backgroundedLifecycle.ts +174 -0
  295. package/tui/src/tools/AgentTool/foregroundBackground.ts +83 -0
  296. package/tui/src/tools/AgentTool/foregroundDrain.tsx +133 -0
  297. package/tui/src/tools/AgentTool/foregroundFinalize.ts +98 -0
  298. package/tui/src/tools/AgentTool/foregroundLifecycle.tsx +237 -0
  299. package/tui/src/tools/AgentTool/foregroundProgress.tsx +169 -0
  300. package/tui/src/tools/AgentTool/foregroundTask.ts +89 -0
  301. package/tui/src/tools/AgentTool/forkSubagent.ts +1 -12
  302. package/tui/src/tools/AgentTool/forkSubagentGate.ts +34 -0
  303. package/tui/src/tools/AgentTool/launchRouting.ts +203 -0
  304. package/tui/src/tools/AgentTool/lifecycle.ts +244 -0
  305. package/tui/src/tools/AgentTool/mcpRouting.ts +73 -0
  306. package/tui/src/tools/AgentTool/orchestrationSupport.ts +70 -0
  307. package/tui/src/tools/AgentTool/permissions.ts +39 -0
  308. package/tui/src/tools/AgentTool/promptSetup.ts +181 -0
  309. package/tui/src/tools/AgentTool/remoteRouting.ts +62 -0
  310. package/tui/src/tools/AgentTool/resultMapping.ts +116 -0
  311. package/tui/src/tools/AgentTool/resumeAgent.ts +39 -107
  312. package/tui/src/tools/AgentTool/resumeAgentHelpers.ts +140 -0
  313. package/tui/src/tools/AgentTool/runAgent.ts +1 -1
  314. package/tui/src/tools/AgentTool/runtimeConfig.ts +57 -0
  315. package/tui/src/tools/AgentTool/schemas.ts +196 -0
  316. package/tui/src/tools/AgentTool/sourceVerificationPropagation.ts +263 -0
  317. package/tui/src/tools/AgentTool/worktreeLifecycle.ts +105 -0
  318. package/tui/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx +174 -202
  319. package/tui/src/tools/BashTool/BashTool.tsx +71 -1072
  320. package/tui/src/tools/BashTool/bashCommandHelpers.ts +12 -12
  321. package/tui/src/tools/BashTool/bashPermissions/astPreflight.ts +173 -0
  322. package/tui/src/tools/BashTool/bashPermissions/classifierChecks.ts +199 -0
  323. package/tui/src/tools/BashTool/bashPermissions/compoundGuards.ts +53 -0
  324. package/tui/src/tools/BashTool/bashPermissions/constants.ts +99 -0
  325. package/tui/src/tools/BashTool/bashPermissions/index.ts +38 -0
  326. package/tui/src/tools/BashTool/bashPermissions/legacyMisparsing.ts +62 -0
  327. package/tui/src/tools/BashTool/bashPermissions/main.ts +135 -0
  328. package/tui/src/tools/BashTool/bashPermissions/normalizedCommands.ts +33 -0
  329. package/tui/src/tools/BashTool/bashPermissions/operatorFlow.ts +98 -0
  330. package/tui/src/tools/BashTool/bashPermissions/permissionChecks.ts +200 -0
  331. package/tui/src/tools/BashTool/bashPermissions/prefixSuggestions.ts +88 -0
  332. package/tui/src/tools/BashTool/bashPermissions/promptClassifierRules.ts +125 -0
  333. package/tui/src/tools/BashTool/bashPermissions/ruleDelegates.ts +19 -0
  334. package/tui/src/tools/BashTool/bashPermissions/ruleMatching.ts +145 -0
  335. package/tui/src/tools/BashTool/bashPermissions/sandboxAutoAllow.ts +75 -0
  336. package/tui/src/tools/BashTool/bashPermissions/subcommandFlow.ts +205 -0
  337. package/tui/src/tools/BashTool/bashPermissions/subcommandGuards.ts +73 -0
  338. package/tui/src/tools/BashTool/bashPermissions/subcommandResultHelpers.ts +116 -0
  339. package/tui/src/tools/BashTool/bashPermissions/types.ts +26 -0
  340. package/tui/src/tools/BashTool/bashPermissions/wrapperStripping.ts +139 -0
  341. package/tui/src/tools/BashTool/bashPermissions.ts +26 -2621
  342. package/tui/src/tools/BashTool/call.ts +202 -0
  343. package/tui/src/tools/BashTool/callLoader.ts +35 -0
  344. package/tui/src/tools/BashTool/commandClassification.ts +151 -0
  345. package/tui/src/tools/BashTool/commandClassificationLoader.ts +40 -0
  346. package/tui/src/tools/BashTool/cwdReset.ts +33 -0
  347. package/tui/src/tools/BashTool/lineTruncation.ts +11 -0
  348. package/tui/src/tools/BashTool/modeValidation.ts +13 -1
  349. package/tui/src/tools/BashTool/outputPersistence.ts +42 -0
  350. package/tui/src/tools/BashTool/permissionClassification.ts +66 -0
  351. package/tui/src/tools/BashTool/permissionLoader.ts +44 -0
  352. package/tui/src/tools/BashTool/resultLoader.ts +29 -0
  353. package/tui/src/tools/BashTool/resultMapping.ts +83 -0
  354. package/tui/src/tools/BashTool/sandboxPolicy.ts +79 -0
  355. package/tui/src/tools/BashTool/schemas.ts +65 -0
  356. package/tui/src/tools/BashTool/sedEditExecution.ts +59 -0
  357. package/tui/src/tools/BashTool/shellExecution.tsx +245 -0
  358. package/tui/src/tools/BashTool/shellOutputUtils.ts +85 -0
  359. package/tui/src/tools/BashTool/shellPermissionGauntlet.ts +97 -0
  360. package/tui/src/tools/BashTool/uiLoader.ts +37 -0
  361. package/tui/src/tools/BriefTool/upload.ts +1 -1
  362. package/tui/src/tools/CalculatorTool/parser.ts +2 -2
  363. package/tui/src/tools/DocumentPrimitive/DocumentPrimitive.ts +262 -0
  364. package/tui/src/tools/DocumentPrimitive/dispatchNormalization.ts +270 -0
  365. package/tui/src/tools/DocumentPrimitive/documentDestinationPath.ts +18 -0
  366. package/tui/src/tools/DocumentPrimitive/documentMutationGuard.ts +22 -0
  367. package/tui/src/tools/DocumentPrimitive/documentPatchNormalization.ts +248 -0
  368. package/tui/src/tools/DocumentPrimitive/documentSourceVerification.ts +245 -0
  369. package/tui/src/tools/DocumentPrimitive/documentSourceVerificationFields.ts +103 -0
  370. package/tui/src/tools/DocumentPrimitive/modelVisibleOutput.ts +40 -0
  371. package/tui/src/tools/DocumentPrimitive/prompt.ts +35 -0
  372. package/tui/src/tools/FileEditTool/FileEditTool.ts +9 -507
  373. package/tui/src/tools/FileEditTool/call.ts +228 -0
  374. package/tui/src/tools/FileEditTool/validateInput.ts +196 -0
  375. package/tui/src/tools/FileReadTool/imageProcessor.ts +13 -0
  376. package/tui/src/tools/FileWriteTool/FileWriteTool.ts +7 -300
  377. package/tui/src/tools/FileWriteTool/call.ts +223 -0
  378. package/tui/src/tools/FileWriteTool/validateInput.ts +80 -0
  379. package/tui/src/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts +19 -3
  380. package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +48 -29
  381. package/tui/src/tools/LookupPrimitive/prompt.ts +6 -7
  382. package/tui/src/tools/MCPTool/trustPolicy.ts +118 -0
  383. package/tui/src/tools/McpAuthTool/McpAuthTool.ts +21 -3
  384. package/tui/src/tools/NotebookEditTool/NotebookEditTool.ts +7 -326
  385. package/tui/src/tools/NotebookEditTool/call.ts +254 -0
  386. package/tui/src/tools/NotebookEditTool/notebookModel.ts +51 -0
  387. package/tui/src/tools/NotebookEditTool/validateInput.ts +142 -0
  388. package/tui/src/tools/PowerShellTool/PowerShellTool.tsx +46 -937
  389. package/tui/src/tools/PowerShellTool/acceptEditsCommandValidation.ts +162 -0
  390. package/tui/src/tools/PowerShellTool/call.ts +179 -0
  391. package/tui/src/tools/PowerShellTool/callLoader.ts +37 -0
  392. package/tui/src/tools/PowerShellTool/commandClassification.ts +86 -0
  393. package/tui/src/tools/PowerShellTool/modeValidation.ts +25 -332
  394. package/tui/src/tools/PowerShellTool/outputPersistence.ts +42 -0
  395. package/tui/src/tools/PowerShellTool/permissionClassification.ts +28 -0
  396. package/tui/src/tools/PowerShellTool/resultLoader.ts +31 -0
  397. package/tui/src/tools/PowerShellTool/resultMapping.ts +75 -0
  398. package/tui/src/tools/PowerShellTool/schemas.ts +40 -0
  399. package/tui/src/tools/PowerShellTool/shellExecution.tsx +258 -0
  400. package/tui/src/tools/PowerShellTool/symlinkModeValidation.ts +44 -0
  401. package/tui/src/tools/PowerShellTool/uiLoader.ts +37 -0
  402. package/tui/src/tools/PowerShellTool/validation.ts +39 -0
  403. package/tui/src/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts +19 -3
  404. package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +30 -19
  405. package/tui/src/tools/ResolveLocationPrimitive/prompt.ts +2 -6
  406. package/tui/src/tools/SkillTool/SkillTool.ts +2 -2
  407. package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +51 -18
  408. package/tui/src/tools/TaskCreateTool/TaskCreateTool.ts +16 -2
  409. package/tui/src/tools/TaskGetTool/TaskGetTool.ts +23 -3
  410. package/tui/src/tools/TaskListTool/TaskListTool.ts +22 -4
  411. package/tui/src/tools/TaskOutputTool/TaskOutputTool.tsx +46 -547
  412. package/tui/src/tools/TaskOutputTool/lookup.ts +216 -0
  413. package/tui/src/tools/TaskOutputTool/render.tsx +257 -0
  414. package/tui/src/tools/TaskOutputTool/schemas.ts +55 -0
  415. package/tui/src/tools/TaskOutputTool/serialization.ts +36 -0
  416. package/tui/src/tools/TaskStopTool/TaskStopTool.ts +10 -0
  417. package/tui/src/tools/TaskUpdateTool/TaskUpdateTool.ts +14 -364
  418. package/tui/src/tools/TaskUpdateTool/completion.ts +62 -0
  419. package/tui/src/tools/TaskUpdateTool/schemas.ts +62 -0
  420. package/tui/src/tools/TaskUpdateTool/serialization.ts +46 -0
  421. package/tui/src/tools/TaskUpdateTool/statusUpdate.ts +247 -0
  422. package/tui/src/tools/TodoWriteTool/TodoWriteTool.ts +21 -2
  423. package/tui/src/tools/ToolSearchTool/ToolSearchTool.ts +21 -302
  424. package/tui/src/tools/ToolSearchTool/ccSupportTools.ts +223 -0
  425. package/tui/src/tools/ToolSearchTool/descriptionCache.ts +50 -0
  426. package/tui/src/tools/ToolSearchTool/keywordSearch.ts +216 -0
  427. package/tui/src/tools/ToolSearchTool/prompt.ts +10 -4
  428. package/tui/src/tools/ToolSearchTool/resultMapping.ts +30 -0
  429. package/tui/src/tools/ToolSearchTool/schemas.ts +30 -0
  430. package/tui/src/tools/ToolSearchTool/searchPool.ts +47 -0
  431. package/tui/src/tools/ToolSearchTool/supportIntentHints.ts +140 -0
  432. package/tui/src/tools/TranslateTool/TranslateTool.ts +1 -1
  433. package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +27 -10
  434. package/tui/src/tools/WebFetchTool/WebFetchTool.ts +43 -138
  435. package/tui/src/tools/WebFetchTool/call.ts +227 -0
  436. package/tui/src/tools/WebFetchTool/resolvedAddressSafety.ts +78 -0
  437. package/tui/src/tools/WebFetchTool/sourceVerification.ts +204 -0
  438. package/tui/src/tools/WebFetchTool/types.ts +23 -0
  439. package/tui/src/tools/WebFetchTool/urlSafety.ts +181 -0
  440. package/tui/src/tools/WebFetchTool/utils.ts +1 -1
  441. package/tui/src/tools/WebSearchTool/UI.tsx +0 -1
  442. package/tui/src/tools/WebSearchTool/WebSearchTool.ts +9 -313
  443. package/tui/src/tools/WebSearchTool/call.ts +33 -0
  444. package/tui/src/tools/WebSearchTool/responseMapping.ts +190 -0
  445. package/tui/src/tools/WebSearchTool/resultBlock.ts +47 -0
  446. package/tui/src/tools/WebSearchTool/schemas.ts +47 -0
  447. package/tui/src/tools/WebSearchTool/toolSchema.ts +12 -0
  448. package/tui/src/tools/WorkspaceToolAdapter/WorkspaceToolAdapter.ts +79 -0
  449. package/tui/src/tools/WorkspaceToolAdapter/allowedRootPolicy.ts +85 -0
  450. package/tui/src/tools/WorkspaceToolAdapter/documentFormatGuards.ts +73 -0
  451. package/tui/src/tools/WorkspaceToolAdapter/inputNormalization.ts +105 -0
  452. package/tui/src/tools/WorkspaceToolAdapter/mcpExposurePolicy.ts +64 -0
  453. package/tui/src/tools/WorkspaceToolAdapter/toolDefFactory.ts +215 -0
  454. package/tui/src/tools/WorkspaceToolAdapter/toolNames.ts +6 -0
  455. package/tui/src/tools/WorkspaceToolAdapter/workspacePolicy.ts +15 -0
  456. package/tui/src/tools/_shared/citizenUserText.ts +49 -0
  457. package/tui/src/tools/_shared/dispatchPrimitive.ts +6 -6
  458. package/tui/src/tools/_shared/documentChangeToPatch.ts +125 -0
  459. package/tui/src/tools/_shared/documentDispatchArguments.ts +87 -0
  460. package/tui/src/tools/_shared/documentPrimitiveTimeout.ts +13 -0
  461. package/tui/src/tools/_shared/documentToolResultRender.ts +98 -0
  462. package/tui/src/tools/_shared/locationInputRepair.ts +112 -0
  463. package/tui/src/tools/_shared/pendingCallRegistry.ts +1 -6
  464. package/tui/src/tools/_shared/rootPrimitiveInput.ts +68 -0
  465. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPatterns.ts +58 -0
  466. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPrompt.ts +271 -0
  467. package/tui/src/tools/_shared/toolChoiceRepair/documentRepair.ts +452 -0
  468. package/tui/src/tools/_shared/toolChoiceRepair/messageAccess.ts +80 -0
  469. package/tui/src/tools/_shared/toolChoiceRepair/publicDataRepair.ts +92 -0
  470. package/tui/src/tools/_shared/toolChoiceRepair/supportRepair.ts +135 -0
  471. package/tui/src/tools/_shared/toolChoiceRepair.ts +61 -0
  472. package/tui/src/tools/shared/mockDisclaimer.ts +1 -1
  473. package/tui/src/tools.ts +39 -190
  474. package/tui/src/types/fileSuggestion.ts +4 -26
  475. package/tui/src/types/generated/events_mono/claude_code/v1/claude_code_internal_event.ts +186 -148
  476. package/tui/src/types/generated/events_mono/common/v1/auth.ts +25 -11
  477. package/tui/src/types/generated/events_mono/growthbook/v1/growthbook_experiment_event.ts +47 -30
  478. package/tui/src/types/generated/google/protobuf/timestamp.ts +21 -7
  479. package/tui/src/types/message.ts +80 -102
  480. package/tui/src/types/messageQueueTypes.ts +6 -28
  481. package/tui/src/types/notebook.ts +16 -38
  482. package/tui/src/types/statusLine.ts +4 -26
  483. package/tui/src/types/tools.ts +24 -46
  484. package/tui/src/types/utils.ts +6 -28
  485. package/tui/src/upstreamproxy/relay.ts +7 -3
  486. package/tui/src/upstreamproxy/upstreamproxy.ts +1 -1
  487. package/tui/src/utils/assistantMessageFactories.ts +9 -3
  488. package/tui/src/utils/attachments.ts +1 -1
  489. package/tui/src/utils/auth.ts +129 -139
  490. package/tui/src/utils/bash/ast.ts +23 -23
  491. package/tui/src/utils/bash/bashParser.ts +5 -5
  492. package/tui/src/utils/billing.ts +1 -1
  493. package/tui/src/utils/collapseReadSearch.ts +3 -3
  494. package/tui/src/utils/cronTasks.ts +1 -1
  495. package/tui/src/utils/execFileNoThrow.ts +1 -1
  496. package/tui/src/utils/filePersistence/types.ts +16 -38
  497. package/tui/src/utils/forkedAgent.ts +1 -1
  498. package/tui/src/utils/gracefulShutdown.ts +4 -4
  499. package/tui/src/utils/heapDumpService.ts +12 -8
  500. package/tui/src/utils/hooks/apiQueryHookHelper.ts +1 -1
  501. package/tui/src/utils/hooks/execPromptHook.ts +1 -1
  502. package/tui/src/utils/hooks/skillImprovement.ts +1 -1
  503. package/tui/src/utils/kExaoneReasoning.ts +138 -0
  504. package/tui/src/utils/mcp/dateTimeParser.ts +1 -1
  505. package/tui/src/utils/messages.ts +19 -0
  506. package/tui/src/utils/migrateSessions.ts +3 -3
  507. package/tui/src/utils/model/model.ts +6 -6
  508. package/tui/src/utils/multiToolLayout.ts +13 -0
  509. package/tui/src/utils/permissions/yoloClassifier.ts +1 -1
  510. package/tui/src/utils/plugins/headlessPluginInstall.ts +1 -1
  511. package/tui/src/utils/plugins/mcpPluginIntegration.ts +1 -1
  512. package/tui/src/utils/plugins/mcpbHandler.ts +1 -1
  513. package/tui/src/utils/plugins/pluginLoader.ts +8 -8
  514. package/tui/src/utils/processUserInput/processSlashCommand.tsx +2 -2
  515. package/tui/src/utils/processUserInput/processUserInput.ts +26 -0
  516. package/tui/src/utils/protectedNamespace.ts +5 -3
  517. package/tui/src/utils/rawJsonToolCall.ts +242 -0
  518. package/tui/src/utils/ripgrep.ts +16 -7
  519. package/tui/src/utils/sessionTitle.ts +1 -1
  520. package/tui/src/utils/settings/applySettingsChange.ts +4 -0
  521. package/tui/src/utils/settings/permissionValidation.ts +14 -2
  522. package/tui/src/utils/settings/types.ts +9 -3
  523. package/tui/src/utils/shell/prefix.ts +1 -1
  524. package/tui/src/utils/sideQuery.ts +1 -1
  525. package/tui/src/utils/stats.ts +1 -1
  526. package/tui/src/utils/systemThemeWatcher.ts +13 -3
  527. package/tui/src/utils/teleport.tsx +1 -1
  528. package/uv.lock +394 -22
  529. package/assets/copilot-gate-logo.svg +0 -58
  530. package/assets/govon-logo.svg +0 -40
  531. package/src/ummaya/eval/__init__.py +0 -5
  532. package/src/ummaya/eval/retrieval.py +0 -713
  533. package/tui/src/services/api/claude.ts +0 -3510
  534. package/tui/src/utils/messageStream.ts +0 -186
@@ -0,0 +1,1079 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Fail-closed document intake for local Public AX artifacts."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import ast
7
+ import gzip
8
+ import hashlib
9
+ import io
10
+ import json
11
+ import tarfile
12
+ import zipfile
13
+ from pathlib import Path, PurePosixPath
14
+
15
+ import yaml
16
+ from defusedxml import ElementTree # type: ignore[import-untyped]
17
+ from pydantic import BaseModel, ConfigDict, Field
18
+
19
+ from ummaya.tools.documents.models import (
20
+ KNOWN_DOCUMENT_FORMAT_FAMILIES,
21
+ PROMOTED_RUNTIME_DOCUMENT_FORMATS,
22
+ BlockedReason,
23
+ DocumentFormat,
24
+ DocumentFormatFamily,
25
+ DocumentIntakeResult,
26
+ DocumentSecurityFinding,
27
+ KnownDocumentFormat,
28
+ SecurityFindingSeverity,
29
+ SecurityState,
30
+ ToolResultStatus,
31
+ )
32
+
33
+
34
+ class DocumentIntakePolicy(BaseModel):
35
+ """Fail-closed pre-parse limits for user supplied document artifacts."""
36
+
37
+ model_config = ConfigDict(frozen=True, extra="forbid")
38
+
39
+ allowed_formats: frozenset[str] = Field(
40
+ default_factory=lambda: frozenset(format_.value for format_ in KnownDocumentFormat)
41
+ )
42
+ max_raw_bytes: int = 50 * 1024 * 1024
43
+ max_expanded_bytes: int = 200 * 1024 * 1024
44
+ max_entries: int = 5_000
45
+ max_depth: int = 1
46
+ allow_external_links: bool = False
47
+ allow_macros: bool = False
48
+ allow_embedded_active_content: bool = False
49
+
50
+
51
+ _EXTENSION_TO_KNOWN_FORMAT: dict[str, str] = {
52
+ ".hwpx": "hwpx",
53
+ ".hwp": "hwp",
54
+ ".hml": "hml",
55
+ ".owpml": "owpml",
56
+ ".docx": "docx",
57
+ ".xlsx": "xlsx",
58
+ ".pptx": "pptx",
59
+ ".doc": "doc",
60
+ ".xls": "xls",
61
+ ".ppt": "ppt",
62
+ ".pdf": "pdf",
63
+ ".pdfa": "pdfa",
64
+ ".odt": "odt",
65
+ ".ods": "ods",
66
+ ".odp": "odp",
67
+ ".html": "html",
68
+ ".htm": "htm",
69
+ ".txt": "txt",
70
+ ".rtf": "rtf",
71
+ ".md": "md",
72
+ ".epub": "epub",
73
+ ".csv": "csv",
74
+ ".tsv": "tsv",
75
+ ".xml": "xml",
76
+ ".rdf": "rdf",
77
+ ".ttl": "ttl",
78
+ ".lod": "lod",
79
+ ".json": "json",
80
+ ".jsonl": "jsonl",
81
+ ".yaml": "yaml",
82
+ ".yml": "yml",
83
+ ".geojson": "geojson",
84
+ ".gpx": "gpx",
85
+ ".kml": "kml",
86
+ ".fasta": "fasta",
87
+ ".sgml": "sgml",
88
+ ".dtd": "dtd",
89
+ ".py": "py",
90
+ ".png": "png",
91
+ ".jpg": "jpg",
92
+ ".jpeg": "jpeg",
93
+ ".gif": "gif",
94
+ ".tif": "tif",
95
+ ".tiff": "tiff",
96
+ ".bmp": "bmp",
97
+ ".webp": "webp",
98
+ ".shp": "shp",
99
+ ".shx": "shx",
100
+ ".dbf": "dbf",
101
+ ".prj": "prj",
102
+ ".stl": "stl",
103
+ ".wav": "wav",
104
+ ".mp3": "mp3",
105
+ ".mp4": "mp4",
106
+ ".zip": "zip",
107
+ ".7z": "7z",
108
+ ".tar": "tar",
109
+ ".gz": "gz",
110
+ ".etc": "etc",
111
+ }
112
+
113
+ _PROMOTED_FORMAT_VALUES = frozenset(format_.value for format_ in PROMOTED_RUNTIME_DOCUMENT_FORMATS)
114
+ _KNOWN_FORMAT_RUNTIME_ALIASES: dict[str, str] = {
115
+ "pdfa": "pdf",
116
+ }
117
+
118
+
119
+ def _runtime_format_for_known_format(known_format: str) -> str | None:
120
+ if known_format in _PROMOTED_FORMAT_VALUES:
121
+ return known_format
122
+ return _KNOWN_FORMAT_RUNTIME_ALIASES.get(known_format)
123
+
124
+
125
+ _EXTENSION_TO_FORMAT: dict[str, str] = {
126
+ extension: known_format
127
+ for extension, known_format in _EXTENSION_TO_KNOWN_FORMAT.items()
128
+ if _runtime_format_for_known_format(known_format) is not None
129
+ }
130
+
131
+ _MIME_BY_FORMAT: dict[str, frozenset[str]] = {
132
+ "hwpx": frozenset(
133
+ {
134
+ "application/haansofthwpx",
135
+ "application/vnd.hancom.hwpx",
136
+ "application/x-hwpx",
137
+ "application/owpml",
138
+ "application/zip",
139
+ }
140
+ ),
141
+ "owpml": frozenset(
142
+ {
143
+ "application/owpml",
144
+ "application/vnd.hancom.hwpx",
145
+ "application/x-hwpx",
146
+ "application/zip",
147
+ }
148
+ ),
149
+ "hwp": frozenset(
150
+ {
151
+ "application/haansofthwp",
152
+ "application/vnd.hancom.hwp",
153
+ "application/x-hwp",
154
+ "application/octet-stream",
155
+ }
156
+ ),
157
+ "docx": frozenset(
158
+ {
159
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
160
+ "application/zip",
161
+ }
162
+ ),
163
+ "pdf": frozenset({"application/pdf"}),
164
+ "xlsx": frozenset(
165
+ {
166
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
167
+ "application/zip",
168
+ }
169
+ ),
170
+ "pptx": frozenset(
171
+ {
172
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
173
+ "application/zip",
174
+ }
175
+ ),
176
+ "odt": frozenset(
177
+ {
178
+ "application/vnd.oasis.opendocument.text",
179
+ "application/zip",
180
+ }
181
+ ),
182
+ "ods": frozenset(
183
+ {
184
+ "application/vnd.oasis.opendocument.spreadsheet",
185
+ "application/zip",
186
+ }
187
+ ),
188
+ "odp": frozenset(
189
+ {
190
+ "application/vnd.oasis.opendocument.presentation",
191
+ "application/zip",
192
+ }
193
+ ),
194
+ "html": frozenset({"text/html", "application/xhtml+xml"}),
195
+ "htm": frozenset({"text/html", "application/xhtml+xml"}),
196
+ "txt": frozenset({"text/plain"}),
197
+ "rtf": frozenset({"application/rtf", "text/rtf"}),
198
+ "md": frozenset({"text/markdown", "text/plain"}),
199
+ "epub": frozenset({"application/epub+zip", "application/zip"}),
200
+ "csv": frozenset({"text/csv", "text/plain"}),
201
+ "tsv": frozenset({"text/tab-separated-values", "text/plain"}),
202
+ "xml": frozenset({"application/xml", "text/xml"}),
203
+ "rdf": frozenset({"application/rdf+xml", "application/xml", "text/xml"}),
204
+ "ttl": frozenset({"text/turtle", "text/plain"}),
205
+ "lod": frozenset({"text/plain"}),
206
+ "json": frozenset({"application/json", "text/plain"}),
207
+ "jsonl": frozenset({"application/x-ndjson", "application/json", "text/plain"}),
208
+ "yaml": frozenset({"application/yaml", "text/yaml", "text/plain"}),
209
+ "yml": frozenset({"application/yaml", "text/yaml", "text/plain"}),
210
+ "geojson": frozenset({"application/geo+json", "application/json", "text/plain"}),
211
+ "gpx": frozenset({"application/gpx+xml", "application/xml", "text/xml"}),
212
+ "kml": frozenset({"application/vnd.google-earth.kml+xml", "application/xml", "text/xml"}),
213
+ "fasta": frozenset({"text/plain"}),
214
+ "sgml": frozenset({"text/sgml", "text/plain"}),
215
+ "dtd": frozenset({"application/xml-dtd", "text/plain"}),
216
+ "py": frozenset({"text/x-python", "text/plain"}),
217
+ "hml": frozenset({"application/xml", "text/xml"}),
218
+ "zip": frozenset({"application/zip"}),
219
+ "7z": frozenset({"application/x-7z-compressed", "application/7z"}),
220
+ "tar": frozenset({"application/x-tar", "application/tar"}),
221
+ "gz": frozenset({"application/gzip", "application/x-gzip"}),
222
+ "etc": frozenset({"text/plain"}),
223
+ }
224
+
225
+ _DETECTED_MIME_BY_FORMAT: dict[str, str] = {
226
+ "hwpx": "application/vnd.hancom.hwpx",
227
+ "owpml": "application/owpml",
228
+ "hwp": "application/vnd.hancom.hwp",
229
+ "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
230
+ "pdf": "application/pdf",
231
+ "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
232
+ "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
233
+ "odt": "application/vnd.oasis.opendocument.text",
234
+ "ods": "application/vnd.oasis.opendocument.spreadsheet",
235
+ "odp": "application/vnd.oasis.opendocument.presentation",
236
+ "html": "text/html",
237
+ "htm": "text/html",
238
+ "txt": "text/plain",
239
+ "rtf": "application/rtf",
240
+ "md": "text/markdown",
241
+ "epub": "application/epub+zip",
242
+ "csv": "text/csv",
243
+ "tsv": "text/tab-separated-values",
244
+ "xml": "application/xml",
245
+ "rdf": "application/rdf+xml",
246
+ "ttl": "text/turtle",
247
+ "lod": "text/plain",
248
+ "json": "application/json",
249
+ "jsonl": "application/x-ndjson",
250
+ "yaml": "application/yaml",
251
+ "yml": "application/yaml",
252
+ "geojson": "application/geo+json",
253
+ "gpx": "application/gpx+xml",
254
+ "kml": "application/vnd.google-earth.kml+xml",
255
+ "fasta": "text/plain",
256
+ "sgml": "text/sgml",
257
+ "dtd": "application/xml-dtd",
258
+ "py": "text/x-python",
259
+ "hml": "application/xml",
260
+ "zip": "application/zip",
261
+ "7z": "application/x-7z-compressed",
262
+ "tar": "application/x-tar",
263
+ "gz": "application/gzip",
264
+ "etc": "text/plain",
265
+ }
266
+
267
+ _ODF_MIMETYPE_FORMATS: dict[bytes, str] = {
268
+ b"application/vnd.oasis.opendocument.text": "odt",
269
+ b"application/vnd.oasis.opendocument.spreadsheet": "ods",
270
+ b"application/vnd.oasis.opendocument.presentation": "odp",
271
+ }
272
+
273
+ _TEXT_WEB_FORMAT_VALUES = frozenset({"html", "htm", "txt", "rtf", "md"})
274
+ _DATA_FORMAT_VALUES = frozenset(
275
+ {
276
+ "csv",
277
+ "tsv",
278
+ "xml",
279
+ "rdf",
280
+ "ttl",
281
+ "lod",
282
+ "json",
283
+ "jsonl",
284
+ "yaml",
285
+ "yml",
286
+ "geojson",
287
+ "gpx",
288
+ "kml",
289
+ "fasta",
290
+ "sgml",
291
+ "dtd",
292
+ "hml",
293
+ "etc",
294
+ }
295
+ )
296
+ _CODE_FORMAT_VALUES = frozenset({"py"})
297
+
298
+ _ZIP_FORMAT_MARKERS: tuple[tuple[str, tuple[str, ...]], ...] = (
299
+ ("docx", ("word/document.xml",)),
300
+ ("xlsx", ("xl/workbook.xml",)),
301
+ ("pptx", ("ppt/presentation.xml",)),
302
+ (
303
+ "hwpx",
304
+ (
305
+ "Contents/section0.xml",
306
+ "Contents/header.xml",
307
+ "version.xml",
308
+ "META-INF/manifest.xml",
309
+ ),
310
+ ),
311
+ )
312
+
313
+ _OLE_SIGNATURE = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
314
+ _PDF_SIGNATURE = b"%PDF-"
315
+ _ZIP_SIGNATURES = (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08")
316
+ _SEVEN_Z_SIGNATURE = b"7z\xbc\xaf\x27\x1c"
317
+
318
+ _MACRO_MARKERS = (
319
+ "vbaproject.bin",
320
+ "/vba",
321
+ "macrosheets/",
322
+ "xl4macrosheets/",
323
+ )
324
+
325
+ _ACTIVE_CONTENT_MARKERS = (
326
+ "/activex/",
327
+ "/embeddings/",
328
+ "oleobject",
329
+ "flash",
330
+ "javascript",
331
+ )
332
+
333
+ _RELATIONSHIP_SUFFIX = ".rels"
334
+
335
+ _BLOCKED_REASON_BY_INTERNAL: dict[str, str] = {
336
+ "unsupported_input": "unsupported_format",
337
+ "unsupported_extension": "unsupported_format",
338
+ "known_unsupported_format": "unsupported_operation",
339
+ "unsupported_compression": "unsupported_format",
340
+ "nested_package": "unsupported_format",
341
+ "raw_size_limit": "oversized_raw_bytes",
342
+ "corrupt_package": "corrupt",
343
+ "encrypted_package": "encrypted",
344
+ "zip_expansion_limit": "oversized_expanded_bytes",
345
+ "zip_entry_limit": "package_entry_limit_exceeded",
346
+ "zip_path_traversal": "path_traversal_detected",
347
+ "active_content": "macro_detected",
348
+ "external_link": "external_link_detected",
349
+ }
350
+
351
+ DEFAULT_INTAKE_POLICY = DocumentIntakePolicy()
352
+
353
+
354
+ def inspect_document_intake(
355
+ source_path: str | Path,
356
+ *,
357
+ expected_format: str | object | None = None,
358
+ declared_mime_type: str | None = None,
359
+ policy: DocumentIntakePolicy | None = None,
360
+ ) -> DocumentIntakeResult:
361
+ """Validate document bytes before format-specific parsing can run."""
362
+
363
+ active_policy = policy or DEFAULT_INTAKE_POLICY
364
+ path = Path(source_path)
365
+ expected = _format_value(expected_format)
366
+ declared_mime = _normalize_mime(declared_mime_type)
367
+
368
+ if not path.exists() or not path.is_file():
369
+ return _blocked_result(
370
+ path=path,
371
+ expected_format=expected,
372
+ declared_mime_type=declared_mime,
373
+ reason="unsupported_input",
374
+ message="Document intake requires an existing local file.",
375
+ )
376
+
377
+ known_format = _EXTENSION_TO_KNOWN_FORMAT.get(path.suffix.lower())
378
+ if known_format is None or known_format not in active_policy.allowed_formats:
379
+ return _blocked_result(
380
+ path=path,
381
+ expected_format=expected,
382
+ declared_mime_type=declared_mime,
383
+ reason="unsupported_extension",
384
+ message="Document extension is not on the supported allowlist.",
385
+ known_format=known_format,
386
+ next_safe_actions=(
387
+ _next_safe_actions_for_known_format(known_format)
388
+ if known_format is not None
389
+ else ()
390
+ ),
391
+ )
392
+
393
+ raw_size = path.stat().st_size
394
+ if raw_size > active_policy.max_raw_bytes:
395
+ return _blocked_result(
396
+ path=path,
397
+ expected_format=expected,
398
+ declared_mime_type=declared_mime,
399
+ reason="raw_size_limit",
400
+ message="Document raw byte size exceeds the intake policy.",
401
+ known_format=known_format,
402
+ byte_size=raw_size,
403
+ )
404
+
405
+ payload = path.read_bytes()
406
+ sha256 = hashlib.sha256(payload).hexdigest()
407
+
408
+ runtime_format = _runtime_format_for_known_format(known_format)
409
+ if runtime_format is None:
410
+ return _blocked_result(
411
+ path=path,
412
+ expected_format=expected,
413
+ declared_mime_type=declared_mime,
414
+ reason="known_unsupported_format",
415
+ message=(
416
+ "Document format is recognized, but no promoted runtime adapter "
417
+ "can safely process this operation yet."
418
+ ),
419
+ known_format=known_format,
420
+ byte_size=raw_size,
421
+ sha256=sha256,
422
+ next_safe_actions=_next_safe_actions_for_known_format(known_format),
423
+ )
424
+
425
+ detected_format, expanded_byte_size, package_reason = _detect_format(
426
+ payload,
427
+ active_policy,
428
+ known_format=known_format,
429
+ )
430
+ if package_reason is not None:
431
+ return _blocked_result(
432
+ path=path,
433
+ expected_format=expected,
434
+ declared_mime_type=declared_mime,
435
+ reason=package_reason,
436
+ message=_reason_message(package_reason),
437
+ known_format=known_format,
438
+ byte_size=raw_size,
439
+ sha256=sha256,
440
+ )
441
+
442
+ if detected_format is None:
443
+ return _blocked_result(
444
+ path=path,
445
+ expected_format=expected,
446
+ declared_mime_type=declared_mime,
447
+ reason="signature_mismatch",
448
+ message="Document signature or package structure is not supported.",
449
+ known_format=known_format,
450
+ byte_size=raw_size,
451
+ sha256=sha256,
452
+ )
453
+
454
+ if expected is not None and not _expected_format_matches(
455
+ expected=expected,
456
+ detected_format=detected_format,
457
+ known_format=known_format,
458
+ ):
459
+ return _blocked_result(
460
+ path=path,
461
+ detected_format=detected_format,
462
+ known_format=known_format,
463
+ expected_format=expected,
464
+ declared_mime_type=declared_mime,
465
+ reason="signature_mismatch",
466
+ message="Expected document format does not match detected content.",
467
+ byte_size=raw_size,
468
+ expanded_byte_size=expanded_byte_size,
469
+ sha256=sha256,
470
+ )
471
+
472
+ if not _known_format_matches_detected(
473
+ known_format=known_format,
474
+ detected_format=detected_format,
475
+ ):
476
+ return _blocked_result(
477
+ path=path,
478
+ detected_format=detected_format,
479
+ known_format=known_format,
480
+ expected_format=expected,
481
+ declared_mime_type=declared_mime,
482
+ reason="signature_mismatch",
483
+ message="Filename extension does not match detected content.",
484
+ byte_size=raw_size,
485
+ expanded_byte_size=expanded_byte_size,
486
+ sha256=sha256,
487
+ )
488
+
489
+ if declared_mime is not None and not _declared_mime_matches_formats(
490
+ declared_mime=declared_mime,
491
+ known_format=known_format,
492
+ detected_format=detected_format,
493
+ ):
494
+ return _blocked_result(
495
+ path=path,
496
+ detected_format=detected_format,
497
+ known_format=known_format,
498
+ expected_format=expected,
499
+ declared_mime_type=declared_mime,
500
+ reason="mime_mismatch",
501
+ message="Declared MIME type does not match detected document format.",
502
+ byte_size=raw_size,
503
+ expanded_byte_size=expanded_byte_size,
504
+ sha256=sha256,
505
+ )
506
+
507
+ return _result(
508
+ status="ok",
509
+ path=path,
510
+ detected_format=_document_format(detected_format),
511
+ known_format=_known_document_format(known_format),
512
+ format_family=_format_family(known_format),
513
+ expected_format=_document_format(expected),
514
+ declared_mime_type=declared_mime,
515
+ mime_type=_DETECTED_MIME_BY_FORMAT[detected_format],
516
+ byte_size=raw_size,
517
+ expanded_byte_size=expanded_byte_size,
518
+ sha256=sha256,
519
+ blocked_reason=None,
520
+ findings=(),
521
+ )
522
+
523
+
524
+ def _detect_format(
525
+ payload: bytes, policy: DocumentIntakePolicy, *, known_format: str
526
+ ) -> tuple[str | None, int, str | None]:
527
+ binary_result = _detect_binary_container_format(
528
+ payload,
529
+ policy,
530
+ known_format=known_format,
531
+ )
532
+ if binary_result is not None:
533
+ return binary_result
534
+
535
+ if not payload.startswith(_ZIP_SIGNATURES):
536
+ if _is_text_web_payload(payload, known_format=known_format):
537
+ return known_format, 0, None
538
+ if _is_data_payload(payload, known_format=known_format):
539
+ return known_format, 0, None
540
+ if _is_code_payload(payload, known_format=known_format):
541
+ return known_format, 0, None
542
+ return None, 0, None
543
+
544
+ try:
545
+ with zipfile.ZipFile(PathBytes(payload)) as package:
546
+ package_entries = package.infolist()
547
+ package_reason = _inspect_zip_package(package, package_entries, policy)
548
+ if package_reason is not None:
549
+ return None, 0, package_reason
550
+
551
+ names = frozenset(info.filename for info in package_entries)
552
+ detected = _detect_zip_format(package, names)
553
+ if detected == "hwpx" and known_format == "owpml":
554
+ detected = "owpml"
555
+ expanded_size = sum(info.file_size for info in package_entries)
556
+ return detected, expanded_size, None
557
+ except zipfile.BadZipFile:
558
+ return None, 0, "corrupt_package"
559
+ except NotImplementedError:
560
+ return None, 0, "unsupported_compression"
561
+
562
+
563
+ def _detect_binary_container_format(
564
+ payload: bytes,
565
+ policy: DocumentIntakePolicy,
566
+ *,
567
+ known_format: str,
568
+ ) -> tuple[str | None, int, str | None] | None:
569
+ if payload.startswith(_PDF_SIGNATURE):
570
+ return "pdf", 0, None
571
+ if payload.startswith(_OLE_SIGNATURE):
572
+ return "hwp", 0, None
573
+ if known_format == "7z" and payload.startswith(_SEVEN_Z_SIGNATURE):
574
+ return "7z", 0, None
575
+ if known_format == "gz" and payload.startswith(b"\x1f\x8b"):
576
+ return _detect_gzip_format(payload, policy)
577
+ if known_format != "tar":
578
+ return None
579
+ tar_detected, tar_expanded_size, tar_reason = _detect_tar_format(payload, policy)
580
+ if tar_detected is not None or tar_reason is not None:
581
+ return tar_detected, tar_expanded_size, tar_reason
582
+ return None
583
+
584
+
585
+ def _is_text_web_payload(payload: bytes, *, known_format: str) -> bool:
586
+ if known_format not in _TEXT_WEB_FORMAT_VALUES:
587
+ return False
588
+ if b"\x00" in payload:
589
+ return False
590
+ try:
591
+ decoded = payload.decode("utf-8")
592
+ except UnicodeDecodeError:
593
+ return False
594
+ if known_format in {"html", "htm"}:
595
+ lowered = decoded[:4096].lower()
596
+ return "<html" in lowered or "<body" in lowered or "<p" in lowered
597
+ if known_format == "rtf":
598
+ return decoded.lstrip().startswith("{\\rtf")
599
+ return True
600
+
601
+
602
+ def _is_data_payload(payload: bytes, *, known_format: str) -> bool:
603
+ if known_format not in _DATA_FORMAT_VALUES:
604
+ return False
605
+ if b"\x00" in payload:
606
+ return False
607
+ try:
608
+ decoded = payload.decode("utf-8")
609
+ except UnicodeDecodeError:
610
+ return False
611
+ if known_format in {"json", "geojson"}:
612
+ return _loads_json(decoded)
613
+ if known_format == "jsonl":
614
+ return all(_loads_json(line) for line in decoded.splitlines() if line.strip())
615
+ if known_format in {"yaml", "yml"}:
616
+ return _loads_yaml(decoded)
617
+ if known_format in {"xml", "rdf", "gpx", "kml", "hml"}:
618
+ return _loads_xml(decoded)
619
+ if known_format in {"csv", "tsv"}:
620
+ return bool(decoded.strip())
621
+ return bool(decoded.strip())
622
+
623
+
624
+ def _is_code_payload(payload: bytes, *, known_format: str) -> bool:
625
+ if known_format not in _CODE_FORMAT_VALUES:
626
+ return False
627
+ if b"\x00" in payload:
628
+ return False
629
+ try:
630
+ decoded = payload.decode("utf-8")
631
+ except UnicodeDecodeError:
632
+ return False
633
+ if not decoded.strip():
634
+ return False
635
+ try:
636
+ ast.parse(decoded)
637
+ except SyntaxError:
638
+ return False
639
+ return True
640
+
641
+
642
+ def _expected_format_matches(
643
+ *,
644
+ expected: str,
645
+ detected_format: str,
646
+ known_format: str,
647
+ ) -> bool:
648
+ return expected in {detected_format, known_format} or (
649
+ _runtime_format_for_known_format(expected) == detected_format
650
+ )
651
+
652
+
653
+ def _known_format_matches_detected(
654
+ *,
655
+ known_format: str,
656
+ detected_format: str,
657
+ ) -> bool:
658
+ if known_format == "hwp" and detected_format in {"hwpx", "owpml"}:
659
+ return True
660
+ return known_format == detected_format or (
661
+ _runtime_format_for_known_format(known_format) == detected_format
662
+ )
663
+
664
+
665
+ def _declared_mime_matches_formats(
666
+ *,
667
+ declared_mime: str,
668
+ known_format: str,
669
+ detected_format: str,
670
+ ) -> bool:
671
+ allowed_mimes = set(_MIME_BY_FORMAT[detected_format])
672
+ if _known_format_matches_detected(
673
+ known_format=known_format,
674
+ detected_format=detected_format,
675
+ ):
676
+ allowed_mimes.update(_MIME_BY_FORMAT.get(known_format, frozenset()))
677
+ return declared_mime in allowed_mimes
678
+
679
+
680
+ def _loads_json(payload: str) -> bool:
681
+ try:
682
+ json.loads(payload)
683
+ except json.JSONDecodeError:
684
+ return False
685
+ return True
686
+
687
+
688
+ def _loads_yaml(payload: str) -> bool:
689
+ try:
690
+ yaml.safe_load(payload)
691
+ except yaml.YAMLError:
692
+ return False
693
+ return True
694
+
695
+
696
+ def _loads_xml(payload: str) -> bool:
697
+ try:
698
+ ElementTree.fromstring(payload.encode("utf-8"))
699
+ except ElementTree.ParseError:
700
+ return False
701
+ return True
702
+
703
+
704
+ def _inspect_zip_package(
705
+ package: zipfile.ZipFile,
706
+ entries: list[zipfile.ZipInfo],
707
+ policy: DocumentIntakePolicy,
708
+ ) -> str | None:
709
+ if len(entries) > policy.max_entries:
710
+ return "zip_entry_limit"
711
+
712
+ expanded_size = 0
713
+ for entry in entries:
714
+ if entry.flag_bits & 0x1:
715
+ return "encrypted_package"
716
+ if _is_unsafe_package_name(entry.filename):
717
+ return "zip_path_traversal"
718
+ expanded_size += entry.file_size
719
+ if expanded_size > policy.max_expanded_bytes:
720
+ return "zip_expansion_limit"
721
+ if _is_nested_package(entry.filename, policy):
722
+ return "nested_package"
723
+ if _is_macro_entry(entry.filename) and not policy.allow_macros:
724
+ return "active_content"
725
+ if _is_active_content_entry(entry.filename) and not policy.allow_embedded_active_content:
726
+ return "active_content"
727
+
728
+ if not policy.allow_external_links and _has_external_relationship(package, entries):
729
+ return "external_link"
730
+
731
+ return None
732
+
733
+
734
+ def _detect_zip_format(package: zipfile.ZipFile, names: frozenset[str]) -> str | None:
735
+ odf_format = _detect_odf_format(package, names)
736
+ if odf_format is not None:
737
+ return odf_format
738
+ if _detect_epub_format(package, names):
739
+ return "epub"
740
+ for document_format, markers in _ZIP_FORMAT_MARKERS:
741
+ if any(marker in names for marker in markers):
742
+ return document_format
743
+ if _is_generic_zip_candidate(names):
744
+ return "zip"
745
+ return None
746
+
747
+
748
+ def _detect_epub_format(package: zipfile.ZipFile, names: frozenset[str]) -> bool:
749
+ if "mimetype" not in names:
750
+ return False
751
+ try:
752
+ with package.open("mimetype") as mimetype_file:
753
+ mimetype = mimetype_file.read(256).strip()
754
+ except KeyError:
755
+ return False
756
+ return mimetype == b"application/epub+zip"
757
+
758
+
759
+ def _is_generic_zip_candidate(names: frozenset[str]) -> bool:
760
+ return bool(names) and "mimetype" not in names
761
+
762
+
763
+ def _detect_tar_format(
764
+ payload: bytes,
765
+ policy: DocumentIntakePolicy,
766
+ ) -> tuple[str | None, int, str | None]:
767
+ try:
768
+ with tarfile.open(fileobj=io.BytesIO(payload), mode="r:*") as package:
769
+ expanded_size = 0
770
+ member_count = 0
771
+ for member in package.getmembers():
772
+ member_count += 1
773
+ if member_count > policy.max_entries:
774
+ return None, 0, "zip_entry_limit"
775
+ if _is_unsafe_package_name(member.name):
776
+ return None, 0, "zip_path_traversal"
777
+ if member.islnk() or member.issym() or member.isdev():
778
+ return None, 0, "active_content"
779
+ expanded_size += max(member.size, 0)
780
+ if expanded_size > policy.max_expanded_bytes:
781
+ return None, 0, "zip_expansion_limit"
782
+ return ("tar", expanded_size, None) if member_count else (None, 0, "corrupt_package")
783
+ except tarfile.TarError:
784
+ return None, 0, None
785
+
786
+
787
+ def _detect_gzip_format(
788
+ payload: bytes,
789
+ policy: DocumentIntakePolicy,
790
+ ) -> tuple[str | None, int, str | None]:
791
+ try:
792
+ with gzip.GzipFile(fileobj=io.BytesIO(payload)) as package:
793
+ expanded = package.read(policy.max_expanded_bytes + 1)
794
+ except (OSError, EOFError):
795
+ return None, 0, "corrupt_package"
796
+ if len(expanded) > policy.max_expanded_bytes:
797
+ return None, 0, "zip_expansion_limit"
798
+ return "gz", len(expanded), None
799
+
800
+
801
+ def _detect_odf_format(package: zipfile.ZipFile, names: frozenset[str]) -> str | None:
802
+ if (
803
+ "mimetype" not in names
804
+ or "META-INF/manifest.xml" not in names
805
+ or "content.xml" not in names
806
+ ):
807
+ return None
808
+ try:
809
+ with package.open("mimetype") as mimetype_file:
810
+ mimetype = mimetype_file.read(256).strip()
811
+ except KeyError:
812
+ return None
813
+ return _ODF_MIMETYPE_FORMATS.get(mimetype)
814
+
815
+
816
+ def _is_unsafe_package_name(name: str) -> bool:
817
+ if not name or "\x00" in name or "\\" in name:
818
+ return True
819
+ if name.startswith(("/", "~")):
820
+ return True
821
+ path = PurePosixPath(name)
822
+ return path.is_absolute() or ".." in path.parts
823
+
824
+
825
+ def _is_nested_package(name: str, policy: DocumentIntakePolicy) -> bool:
826
+ if policy.max_depth > 1:
827
+ return False
828
+ return name.lower().endswith((".zip", ".hwpx", ".owpml", ".docx", ".xlsx", ".pptx", ".jar"))
829
+
830
+
831
+ def _is_macro_entry(name: str) -> bool:
832
+ normalized = f"/{name.lower()}"
833
+ return any(marker in normalized for marker in _MACRO_MARKERS)
834
+
835
+
836
+ def _is_active_content_entry(name: str) -> bool:
837
+ normalized = f"/{name.lower()}"
838
+ return any(marker in normalized for marker in _ACTIVE_CONTENT_MARKERS)
839
+
840
+
841
+ def _has_external_relationship(package: zipfile.ZipFile, entries: list[zipfile.ZipInfo]) -> bool:
842
+ for entry in entries:
843
+ if not entry.filename.lower().endswith(_RELATIONSHIP_SUFFIX):
844
+ continue
845
+ with package.open(entry) as relationship_file:
846
+ contents = relationship_file.read(1024 * 1024).lower()
847
+ if b'targetmode="external"' in contents:
848
+ return True
849
+ if b"target='http://" in contents or b'target="http://' in contents:
850
+ return True
851
+ if b"target='https://" in contents or b'target="https://' in contents:
852
+ return True
853
+ return False
854
+
855
+
856
+ def _blocked_result(
857
+ *,
858
+ path: Path,
859
+ reason: str,
860
+ message: str,
861
+ detected_format: str | None = None,
862
+ known_format: str | None = None,
863
+ expected_format: str | None = None,
864
+ declared_mime_type: str | None = None,
865
+ byte_size: int = 0,
866
+ expanded_byte_size: int = 0,
867
+ sha256: str | None = None,
868
+ next_safe_actions: tuple[str, ...] = (),
869
+ ) -> DocumentIntakeResult:
870
+ blocked_reason = _blocked_reason(reason)
871
+ finding = _finding(code=blocked_reason, severity="blocked", message=message)
872
+ return _result(
873
+ status="blocked",
874
+ path=path,
875
+ detected_format=_document_format(detected_format),
876
+ known_format=_known_document_format(known_format or detected_format),
877
+ format_family=_format_family(known_format or detected_format),
878
+ expected_format=_document_format(expected_format),
879
+ declared_mime_type=declared_mime_type,
880
+ mime_type=(
881
+ _DETECTED_MIME_BY_FORMAT[detected_format] if detected_format is not None else None
882
+ ),
883
+ byte_size=byte_size,
884
+ expanded_byte_size=expanded_byte_size,
885
+ sha256=sha256,
886
+ blocked_reason=blocked_reason,
887
+ findings=(finding,),
888
+ next_safe_actions=next_safe_actions,
889
+ )
890
+
891
+
892
+ def _result(
893
+ *,
894
+ status: str,
895
+ path: Path,
896
+ detected_format: DocumentFormat | None,
897
+ known_format: KnownDocumentFormat | None,
898
+ format_family: DocumentFormatFamily | None,
899
+ expected_format: DocumentFormat | None,
900
+ declared_mime_type: str | None,
901
+ mime_type: str | None,
902
+ byte_size: int,
903
+ expanded_byte_size: int,
904
+ sha256: str | None,
905
+ blocked_reason: BlockedReason | None,
906
+ findings: tuple[DocumentSecurityFinding, ...],
907
+ next_safe_actions: tuple[str, ...] = (),
908
+ ) -> DocumentIntakeResult:
909
+ return DocumentIntakeResult(
910
+ tool_id="document_inspect",
911
+ correlation_id=_correlation_id(sha256),
912
+ status=ToolResultStatus(status),
913
+ artifact_refs=[f"sha256:{sha256}"] if sha256 is not None else [],
914
+ source_path=path,
915
+ display_name=path.name,
916
+ detected_format=detected_format,
917
+ known_format=known_format,
918
+ format_family=format_family,
919
+ expected_format=expected_format,
920
+ declared_mime_type=declared_mime_type,
921
+ mime_type=mime_type,
922
+ byte_size=byte_size,
923
+ expanded_byte_size=expanded_byte_size,
924
+ sha256=sha256,
925
+ security_state=SecurityState.accepted if status == "ok" else SecurityState.blocked,
926
+ blocked_reason=blocked_reason,
927
+ findings=list(findings),
928
+ next_safe_actions=list(next_safe_actions),
929
+ text_summary=_text_summary(
930
+ status=status,
931
+ detected_format=detected_format.value if detected_format is not None else None,
932
+ blocked_reason=blocked_reason.value if blocked_reason is not None else None,
933
+ byte_size=byte_size,
934
+ expanded_byte_size=expanded_byte_size,
935
+ ),
936
+ )
937
+
938
+
939
+ def _finding(
940
+ *,
941
+ code: BlockedReason,
942
+ severity: SecurityFindingSeverity,
943
+ message: str,
944
+ ) -> DocumentSecurityFinding:
945
+ return DocumentSecurityFinding(
946
+ finding_id=f"security-{code.value}",
947
+ code=code,
948
+ severity=severity,
949
+ message=message,
950
+ )
951
+
952
+
953
+ def _document_format(value: str | None) -> DocumentFormat | None:
954
+ if value is None:
955
+ return None
956
+ try:
957
+ return DocumentFormat(value)
958
+ except ValueError:
959
+ return None
960
+
961
+
962
+ def _known_document_format(value: str | None) -> KnownDocumentFormat | None:
963
+ if value is None:
964
+ return None
965
+ try:
966
+ return KnownDocumentFormat(value)
967
+ except ValueError:
968
+ return None
969
+
970
+
971
+ def _format_family(value: str | None) -> DocumentFormatFamily | None:
972
+ known_format = _known_document_format(value)
973
+ if known_format is None:
974
+ return None
975
+ return KNOWN_DOCUMENT_FORMAT_FAMILIES[known_format]
976
+
977
+
978
+ def _format_value(value: str | object | None) -> str | None:
979
+ if value is None:
980
+ return None
981
+ candidate = getattr(value, "value", value)
982
+ return str(candidate).lower()
983
+
984
+
985
+ def _normalize_mime(value: str | None) -> str | None:
986
+ if value is None:
987
+ return None
988
+ return value.split(";", maxsplit=1)[0].strip().lower()
989
+
990
+
991
+ def _blocked_reason(reason: str) -> BlockedReason:
992
+ return BlockedReason(_BLOCKED_REASON_BY_INTERNAL.get(reason, reason))
993
+
994
+
995
+ def _next_safe_actions_for_known_format(known_format: str) -> tuple[str, ...]:
996
+ family = _format_family(known_format)
997
+ if family is DocumentFormatFamily.odf:
998
+ return (
999
+ "Use read-only extraction after an ODF adapter passes promotion gates.",
1000
+ "Convert to a promoted editable derivative only with explicit user approval.",
1001
+ )
1002
+ if family is DocumentFormatFamily.data_file:
1003
+ return (
1004
+ "Use schema or text inspection through the data-file adapter.",
1005
+ "Do not reinterpret the file as an editable public form.",
1006
+ )
1007
+ if family is DocumentFormatFamily.image_scan:
1008
+ return (
1009
+ "Use OCR or visual extraction only after an image-scan adapter is promoted.",
1010
+ "Create a separate editable derivative instead of mutating the raster source.",
1011
+ )
1012
+ if family is DocumentFormatFamily.archive:
1013
+ return (
1014
+ "Enumerate archive members only after secure archive routing is promoted.",
1015
+ "Do not mutate archive children in place.",
1016
+ )
1017
+ if family is DocumentFormatFamily.legacy_office:
1018
+ return (
1019
+ "Use metadata-only inspection unless an explicit conversion bridge is approved.",
1020
+ "Create an editable derivative instead of mutating the legacy binary source.",
1021
+ )
1022
+ if family is DocumentFormatFamily.geospatial_data:
1023
+ return (
1024
+ "Use geospatial metadata inspection or route packaged sidecars as derivatives.",
1025
+ "Do not reinterpret GIS or 3D geometry files as editable public forms.",
1026
+ )
1027
+ if family is DocumentFormatFamily.media_asset:
1028
+ return (
1029
+ "Use media metadata or transcription extraction only after a local adapter "
1030
+ "is approved.",
1031
+ "Create a separate document derivative for written content.",
1032
+ )
1033
+ if family is DocumentFormatFamily.code_file:
1034
+ return (
1035
+ "Use read-only source inspection for context.",
1036
+ "Do not mutate code artifacts through the public-document writer.",
1037
+ )
1038
+ return ("Use a promoted format adapter or request explicit conversion to a derivative.",)
1039
+
1040
+
1041
+ def _correlation_id(sha256: str | None) -> str:
1042
+ suffix = sha256[:12] if sha256 is not None else "unavailable"
1043
+ return f"document-intake-{suffix}"
1044
+
1045
+
1046
+ def _text_summary(
1047
+ *,
1048
+ status: str,
1049
+ detected_format: str | None,
1050
+ blocked_reason: str | None,
1051
+ byte_size: int,
1052
+ expanded_byte_size: int,
1053
+ ) -> str:
1054
+ if status == "ok":
1055
+ return (
1056
+ f"Document intake accepted {detected_format} artifact "
1057
+ f"({byte_size} raw bytes, {expanded_byte_size} expanded bytes)."
1058
+ )
1059
+ return f"Document intake blocked: {blocked_reason}."
1060
+
1061
+
1062
+ def _reason_message(reason: str) -> str:
1063
+ return {
1064
+ "corrupt_package": "Document package is corrupt or unreadable.",
1065
+ "unsupported_compression": "Document package uses unsupported compression.",
1066
+ "encrypted_package": "Encrypted document package members are blocked.",
1067
+ "zip_path_traversal": "Document package contains unsafe member paths.",
1068
+ "zip_expansion_limit": "Document package expands beyond the intake policy.",
1069
+ "zip_entry_limit": "Document package contains too many entries.",
1070
+ "nested_package": "Nested document packages are blocked at intake.",
1071
+ "active_content": "Document package contains macros or active content.",
1072
+ "external_link": "Document package contains external relationship targets.",
1073
+ }.get(reason, "Document failed intake security validation.")
1074
+
1075
+
1076
+ class PathBytes(io.BytesIO):
1077
+ """BytesIO subclass with a stable name for ZipFile diagnostics."""
1078
+
1079
+ name = "<document-intake-bytes>"