ummaya 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (534) hide show
  1. package/README.md +17 -3
  2. package/bin/ummaya +10 -1
  3. package/npm-shrinkwrap.json +253 -2
  4. package/package.json +5 -1
  5. package/prompts/manifest.yaml +2 -2
  6. package/prompts/session_guidance_v1.md +3 -1
  7. package/prompts/system_v1.md +9 -7
  8. package/pyproject.toml +26 -7
  9. package/specs/2803-document-production-hardening/contracts/document-tools.schema.json +1043 -0
  10. package/src/ummaya/_canonical/__init__.py +2 -0
  11. package/src/ummaya/context/builder.py +17 -11
  12. package/src/ummaya/engine/engine.py +30 -113
  13. package/src/ummaya/engine/query.py +20 -0
  14. package/src/ummaya/evidence/__init__.py +44 -0
  15. package/src/ummaya/evidence/__main__.py +7 -0
  16. package/src/ummaya/evidence/dataset_contract.py +193 -0
  17. package/src/ummaya/evidence/document_authoring_cases.py +33 -0
  18. package/src/ummaya/evidence/document_harness.py +313 -0
  19. package/src/ummaya/evidence/document_viewer_ux.py +391 -0
  20. package/src/ummaya/evidence/gates.py +70 -0
  21. package/src/ummaya/evidence/json_types.py +20 -0
  22. package/src/ummaya/evidence/models.py +145 -0
  23. package/src/ummaya/evidence/output_payload.py +89 -0
  24. package/src/ummaya/evidence/payload_documents.py +233 -0
  25. package/src/ummaya/evidence/route_contracts.py +224 -0
  26. package/src/ummaya/evidence/route_helpers.py +150 -0
  27. package/src/ummaya/evidence/runner.py +177 -0
  28. package/src/ummaya/evidence/source_provenance.py +246 -0
  29. package/src/ummaya/evidence/source_provenance_redaction.py +176 -0
  30. package/src/ummaya/evidence/task_registry.py +264 -0
  31. package/src/ummaya/evidence/tool_layer.py +39 -0
  32. package/src/ummaya/evidence/tool_layer_models.py +151 -0
  33. package/src/ummaya/ipc/adapter_manifest_emitter.py +26 -10
  34. package/src/ummaya/ipc/document_intent_normalization.py +185 -0
  35. package/src/ummaya/ipc/frame_schema.py +52 -5
  36. package/src/ummaya/ipc/route_diagnostics.py +73 -0
  37. package/src/ummaya/ipc/stdio.py +2282 -417
  38. package/src/ummaya/llm/client.py +234 -59
  39. package/src/ummaya/llm/config.py +8 -3
  40. package/src/ummaya/llm/reasoning.py +84 -0
  41. package/src/ummaya/primitives/__init__.py +6 -2
  42. package/src/ummaya/primitives/delegation.py +1 -1
  43. package/src/ummaya/primitives/document.py +28 -0
  44. package/src/ummaya/settings.py +0 -3
  45. package/src/ummaya/tools/discovery_bridge.py +34 -2
  46. package/src/ummaya/tools/documents/__init__.py +297 -0
  47. package/src/ummaya/tools/documents/adapter_registry.py +487 -0
  48. package/src/ummaya/tools/documents/archive_container_probe.py +167 -0
  49. package/src/ummaya/tools/documents/artifact_store.py +454 -0
  50. package/src/ummaya/tools/documents/authoring.py +283 -0
  51. package/src/ummaya/tools/documents/baselines.py +114 -0
  52. package/src/ummaya/tools/documents/capability.py +331 -0
  53. package/src/ummaya/tools/documents/contracts.py +112 -0
  54. package/src/ummaya/tools/documents/conversion.py +521 -0
  55. package/src/ummaya/tools/documents/diff.py +275 -0
  56. package/src/ummaya/tools/documents/engines.py +163 -0
  57. package/src/ummaya/tools/documents/evaluation.py +291 -0
  58. package/src/ummaya/tools/documents/explicit_values.py +108 -0
  59. package/src/ummaya/tools/documents/fixtures.py +174 -0
  60. package/src/ummaya/tools/documents/format_completion_audit.py +471 -0
  61. package/src/ummaya/tools/documents/formats/__init__.py +2 -0
  62. package/src/ummaya/tools/documents/formats/archive.py +528 -0
  63. package/src/ummaya/tools/documents/formats/base.py +41 -0
  64. package/src/ummaya/tools/documents/formats/code_file.py +211 -0
  65. package/src/ummaya/tools/documents/formats/data_file.py +272 -0
  66. package/src/ummaya/tools/documents/formats/hwp.py +284 -0
  67. package/src/ummaya/tools/documents/formats/hwpx.py +1837 -0
  68. package/src/ummaya/tools/documents/formats/odf.py +435 -0
  69. package/src/ummaya/tools/documents/formats/ooxml.py +1030 -0
  70. package/src/ummaya/tools/documents/formats/passive.py +766 -0
  71. package/src/ummaya/tools/documents/formats/pdf.py +702 -0
  72. package/src/ummaya/tools/documents/formats/text_web.py +268 -0
  73. package/src/ummaya/tools/documents/hwp_conversion_probe.py +178 -0
  74. package/src/ummaya/tools/documents/hwp_direct_candidate.py +141 -0
  75. package/src/ummaya/tools/documents/inspection.py +289 -0
  76. package/src/ummaya/tools/documents/intake.py +1079 -0
  77. package/src/ummaya/tools/documents/legacy_office_promotion_probe.py +366 -0
  78. package/src/ummaya/tools/documents/models.py +1598 -0
  79. package/src/ummaya/tools/documents/odf_promotion_probe.py +167 -0
  80. package/src/ummaya/tools/documents/orchestrator.py +96 -0
  81. package/src/ummaya/tools/documents/passive_capability_probe.py +251 -0
  82. package/src/ummaya/tools/documents/patch.py +170 -0
  83. package/src/ummaya/tools/documents/pdfa_conformance.py +284 -0
  84. package/src/ummaya/tools/documents/pdfa_promotion_probe.py +198 -0
  85. package/src/ummaya/tools/documents/permissions.py +110 -0
  86. package/src/ummaya/tools/documents/planner.py +616 -0
  87. package/src/ummaya/tools/documents/registry.py +2733 -0
  88. package/src/ummaya/tools/documents/render.py +978 -0
  89. package/src/ummaya/tools/documents/render_comparison.py +113 -0
  90. package/src/ummaya/tools/documents/render_comparison_models.py +74 -0
  91. package/src/ummaya/tools/documents/render_comparison_regions.py +73 -0
  92. package/src/ummaya/tools/documents/render_comparison_style.py +161 -0
  93. package/src/ummaya/tools/documents/reread.py +157 -0
  94. package/src/ummaya/tools/documents/runtime_authoring.py +244 -0
  95. package/src/ummaya/tools/documents/runtime_authoring_bundle.py +76 -0
  96. package/src/ummaya/tools/documents/scorecard.py +184 -0
  97. package/src/ummaya/tools/documents/socratic_planner.py +193 -0
  98. package/src/ummaya/tools/documents/style.py +48 -0
  99. package/src/ummaya/tools/documents/tool_defs.py +523 -0
  100. package/src/ummaya/tools/documents/validate.py +347 -0
  101. package/src/ummaya/tools/executor.py +61 -12
  102. package/src/ummaya/tools/geocoding/kakao_client.py +1 -2
  103. package/src/ummaya/tools/kma/apihub_catalog.py +984 -1
  104. package/src/ummaya/tools/kma/apihub_structured_adapter.py +86 -6
  105. package/src/ummaya/tools/kma/apihub_url_adapter.py +593 -0
  106. package/src/ummaya/tools/kma/apihub_url_catalog.py +296 -0
  107. package/src/ummaya/tools/live_proxy.py +0 -3
  108. package/src/ummaya/tools/location_adapters.py +8 -6
  109. package/src/ummaya/tools/manifest_metadata.py +16 -3
  110. package/src/ummaya/tools/models.py +5 -1
  111. package/src/ummaya/tools/mvp_surface.py +2 -2
  112. package/src/ummaya/tools/nmc/emergency_search.py +8 -6
  113. package/src/ummaya/tools/register_all.py +17 -0
  114. package/src/ummaya/tools/registry.py +10 -1
  115. package/src/ummaya/tools/resolve_location.py +4 -4
  116. package/src/ummaya/tools/routing/__init__.py +59 -0
  117. package/src/ummaya/tools/routing/builder.py +105 -0
  118. package/src/ummaya/tools/routing/cards.py +29 -0
  119. package/src/ummaya/tools/routing/decision_service.py +534 -0
  120. package/src/ummaya/tools/routing/decision_types.py +74 -0
  121. package/src/ummaya/tools/routing/feasibility.py +122 -0
  122. package/src/ummaya/tools/routing/intent.py +17 -0
  123. package/src/ummaya/tools/routing/intent_extractor.py +207 -0
  124. package/src/ummaya/tools/routing/intent_patterns.py +160 -0
  125. package/src/ummaya/tools/routing/intent_public_data.py +150 -0
  126. package/src/ummaya/tools/routing/intent_types.py +48 -0
  127. package/src/ummaya/tools/routing/lint.py +78 -0
  128. package/src/ummaya/tools/routing/metadata.py +174 -0
  129. package/src/ummaya/tools/routing/projection.py +340 -0
  130. package/src/ummaya/tools/routing/retrieval_policy.py +629 -0
  131. package/src/ummaya/tools/routing/schema.py +81 -0
  132. package/src/ummaya/tools/routing/types.py +96 -0
  133. package/src/ummaya/tools/routing_index.py +2 -2
  134. package/src/ummaya/tools/search.py +40 -106
  135. package/src/ummaya/tools/verified_data_go_kr/_manifest.py +115 -25
  136. package/src/ummaya/tools/verified_data_go_kr/airkorea_air_quality.py +109 -4
  137. package/src/ummaya/tools/verified_data_go_kr/nmc_aed_site.py +108 -2
  138. package/src/ummaya/tools/verified_data_go_kr/pps_bid_public_info.py +174 -9
  139. package/src/ummaya/tools/verified_data_go_kr/tago_bus_arrival.py +66 -3
  140. package/src/ummaya/tools/verified_data_go_kr/tago_bus_location.py +12 -2
  141. package/src/ummaya/tools/verified_data_go_kr/tago_bus_route.py +8 -2
  142. package/src/ummaya/tools/verified_data_go_kr/tago_bus_route_station.py +114 -0
  143. package/src/ummaya/tools/verified_data_go_kr/tago_bus_station.py +14 -3
  144. package/src/ummaya/tools/verify_canonical_map.py +21 -0
  145. package/tests/fixtures/documents/public_forms/baselines.yaml +113 -0
  146. package/tui/package.json +1 -2
  147. package/tui/src/.cc-byte-identical-whitelist.yaml +266 -0
  148. package/tui/src/QueryEngine.ts +12 -4
  149. package/tui/src/bridge/inboundAttachments.ts +3 -3
  150. package/tui/src/cli/handlers/auth.ts +4 -13
  151. package/tui/src/cli/handlers/mcp.tsx +3 -3
  152. package/tui/src/cli/print.ts +69 -18
  153. package/tui/src/cli/update.ts +13 -13
  154. package/tui/src/commands/copy/index.ts +1 -1
  155. package/tui/src/commands/cost/cost.ts +2 -2
  156. package/tui/src/commands/init-verifiers.ts +5 -5
  157. package/tui/src/commands/init.ts +30 -30
  158. package/tui/src/commands/insights.ts +44 -44
  159. package/tui/src/commands/install-github-app/install-github-app.tsx +2 -2
  160. package/tui/src/commands/install-github-app/setupGitHubActions.ts +3 -3
  161. package/tui/src/commands/install-github-app/types.ts +8 -30
  162. package/tui/src/commands/install.tsx +5 -5
  163. package/tui/src/commands/mcp/addCommand.ts +5 -5
  164. package/tui/src/commands/mcp/xaaIdpCommand.ts +2 -2
  165. package/tui/src/commands/plugin/ManageMarketplaces.tsx +2 -2
  166. package/tui/src/commands/plugin/types.ts +6 -28
  167. package/tui/src/commands/plugin/unifiedTypes.ts +4 -26
  168. package/tui/src/commands/reasoning/index.ts +13 -0
  169. package/tui/src/commands/reasoning/reasoning.tsx +177 -0
  170. package/tui/src/commands/rename/generateSessionName.ts +1 -1
  171. package/tui/src/commands/thinkback/thinkback.tsx +3 -3
  172. package/tui/src/commands.ts +2 -0
  173. package/tui/src/components/Feedback.tsx +1 -1
  174. package/tui/src/components/LogoV2/EmergencyTip.tsx +11 -2
  175. package/tui/src/components/LogoV2/WelcomeV2.tsx +1 -3
  176. package/tui/src/components/Messages.tsx +2 -1
  177. package/tui/src/components/ScrollKeybindingHandler.tsx +6 -6
  178. package/tui/src/components/Spinner/types.ts +6 -28
  179. package/tui/src/components/Spinner.tsx +2 -2
  180. package/tui/src/components/agents/generateAgent.ts +1 -1
  181. package/tui/src/components/agents/new-agent-creation/types.ts +4 -26
  182. package/tui/src/components/config/EnvSecretIsolatedEditor.tsx +1 -1
  183. package/tui/src/components/design-system/LoadingState.tsx +2 -2
  184. package/tui/src/components/mcp/types.ts +16 -38
  185. package/tui/src/components/messages/AssistantToolUseMessage.tsx +3 -2
  186. package/tui/src/components/messages/UserCrossSessionMessage.ts +16 -4
  187. package/tui/src/components/messages/UserForkBoilerplateMessage.ts +16 -4
  188. package/tui/src/components/messages/UserGitHubWebhookMessage.ts +16 -4
  189. package/tui/src/components/messages/UserToolResultMessage/utils.tsx +3 -2
  190. package/tui/src/components/permissions/MonitorPermissionRequest/MonitorPermissionRequest.ts +9 -4
  191. package/tui/src/components/permissions/ReviewArtifactPermissionRequest/ReviewArtifactPermissionRequest.ts +9 -4
  192. package/tui/src/components/primitive/DocumentSocraticReviewBlock.tsx +129 -0
  193. package/tui/src/components/primitive/DocumentToolResultCard.tsx +224 -0
  194. package/tui/src/components/primitive/documentSocraticReview.ts +215 -0
  195. package/tui/src/components/primitive/index.tsx +43 -1
  196. package/tui/src/components/primitive/types.ts +137 -0
  197. package/tui/src/components/ui/option.ts +4 -26
  198. package/tui/src/constants/common.ts +0 -2
  199. package/tui/src/constants/prompts.ts +4 -3
  200. package/tui/src/constants/querySource.ts +4 -26
  201. package/tui/src/entrypoints/sdk/controlTypes.ts +26 -48
  202. package/tui/src/entrypoints/sdk/coreTypes.generated.ts +3 -25
  203. package/tui/src/entrypoints/sdk/runtimeTypes.ts +38 -60
  204. package/tui/src/entrypoints/sdk/sdkUtilityTypes.ts +4 -26
  205. package/tui/src/entrypoints/sdk/settingsTypes.generated.ts +3 -25
  206. package/tui/src/entrypoints/sdk/toolTypes.ts +3 -25
  207. package/tui/src/hooks/toolPermission/handlers/interactiveHandler.ts +10 -0
  208. package/tui/src/hooks/useApiKeyVerification.ts +1 -1
  209. package/tui/src/hooks/useVirtualScroll.ts +1 -1
  210. package/tui/src/ink/ink.tsx +33 -14
  211. package/tui/src/ink/reconciler.ts +2 -3
  212. package/tui/src/ink/render-to-screen.ts +30 -10
  213. package/tui/src/ipc/bridge.ts +62 -15
  214. package/tui/src/ipc/bridgeSingleton.ts +5 -1
  215. package/tui/src/ipc/codec.ts +29 -3
  216. package/tui/src/ipc/frames.generated.ts +407 -312
  217. package/tui/src/ipc/llmClient.ts +279 -76
  218. package/tui/src/ipc/llmTypes.ts +16 -1
  219. package/tui/src/ipc/schema/frame.schema.json +1 -3475
  220. package/tui/src/keybindings/defaultBindings.ts +4 -0
  221. package/tui/src/main.tsx +32 -11
  222. package/tui/src/native-ts/file-index/index.ts +33 -3
  223. package/tui/src/observability/surface.ts +2 -2
  224. package/tui/src/probes/toolRegistryProbe.tsx +3 -1
  225. package/tui/src/projectOnboardingState.ts +7 -6
  226. package/tui/src/query/chatMessageTypes.ts +18 -0
  227. package/tui/src/query/chatMessagesBuilder.ts +1 -1
  228. package/tui/src/query/deps.ts +1 -1
  229. package/tui/src/query/messageGuards.ts +106 -0
  230. package/tui/src/query/publicDataTerminalRepair.ts +384 -0
  231. package/tui/src/query/run.ts +1075 -0
  232. package/tui/src/query/supportBoundary.ts +168 -0
  233. package/tui/src/query/toolResultErrors.ts +103 -0
  234. package/tui/src/query/toolRunner.ts +687 -0
  235. package/tui/src/query/unavailableToolRepair.ts +118 -0
  236. package/tui/src/query.ts +9 -1721
  237. package/tui/src/screens/REPL.tsx +42 -31
  238. package/tui/src/services/api/adapterManifest.ts +4 -0
  239. package/tui/src/services/api/backendChat/events.ts +117 -0
  240. package/tui/src/services/api/backendChat/finalMessage.ts +40 -0
  241. package/tui/src/services/api/backendChat/frame.ts +9 -0
  242. package/tui/src/services/api/backendChat/streaming.ts +430 -0
  243. package/tui/src/services/api/backendChat/types.ts +62 -0
  244. package/tui/src/services/api/backendChat.ts +1 -0
  245. package/tui/src/services/api/client.ts +98 -14
  246. package/tui/src/services/api/errorUtils.ts +5 -5
  247. package/tui/src/services/api/errors.ts +1 -1
  248. package/tui/src/services/api/logging.ts +1 -1
  249. package/tui/src/services/api/ummaya/evidence.ts +194 -0
  250. package/tui/src/services/api/ummaya/messages.ts +255 -0
  251. package/tui/src/services/api/ummaya/nonStreaming.ts +66 -0
  252. package/tui/src/services/api/ummaya/provider.ts +200 -0
  253. package/tui/src/services/api/ummaya/reasoning.ts +24 -0
  254. package/tui/src/services/api/ummaya/request.ts +200 -0
  255. package/tui/src/services/api/ummaya/selectionContext.ts +240 -0
  256. package/tui/src/services/api/ummaya/streaming.ts +365 -0
  257. package/tui/src/services/api/ummaya/streamingPayload.ts +129 -0
  258. package/tui/src/services/api/ummaya/streamingReader.ts +40 -0
  259. package/tui/src/services/api/ummaya/toolSelection.ts +217 -0
  260. package/tui/src/services/api/ummaya/types.ts +110 -0
  261. package/tui/src/services/api/ummaya/usage.ts +30 -0
  262. package/tui/src/services/api/ummaya.ts +26 -364
  263. package/tui/src/services/api/withRetry.ts +1 -1
  264. package/tui/src/services/awaySummary.ts +2 -2
  265. package/tui/src/services/claudeAiLimits.ts +1 -1
  266. package/tui/src/services/compact/autoCompact.ts +1 -1
  267. package/tui/src/services/compact/compact.ts +1 -1
  268. package/tui/src/services/lsp/types.ts +8 -30
  269. package/tui/src/services/tips/types.ts +6 -28
  270. package/tui/src/services/tokenEstimation.ts +1 -1
  271. package/tui/src/services/toolRegistry/bootGuard.ts +5 -5
  272. package/tui/src/services/toolUseSummary/toolUseSummaryGenerator.ts +1 -1
  273. package/tui/src/services/tools/toolExecution.ts +94 -1
  274. package/tui/src/skills/bundled/stuck.ts +12 -12
  275. package/tui/src/state/AppStateStore.ts +7 -0
  276. package/tui/src/store/pendingPermissionSlot.ts +1 -1
  277. package/tui/src/store/session-store.ts +10 -36
  278. package/tui/src/stubs/any-stub.ts +15 -10
  279. package/tui/src/stubs/color-diff-napi.ts +37 -23
  280. package/tui/src/stubs/globals.d.ts +3 -3
  281. package/tui/src/stubs/macro-preload.ts +23 -12
  282. package/tui/src/tools/AdapterTool/AdapterTool.ts +1239 -163
  283. package/tui/src/tools/AdapterTool/routeDiagnostics.ts +75 -0
  284. package/tui/src/tools/AgentTool/AgentTool.tsx +84 -1371
  285. package/tui/src/tools/AgentTool/agentToolHandoff.ts +114 -0
  286. package/tui/src/tools/AgentTool/agentToolPartialResult.ts +16 -0
  287. package/tui/src/tools/AgentTool/agentToolProgress.ts +32 -0
  288. package/tui/src/tools/AgentTool/agentToolResolver.ts +161 -0
  289. package/tui/src/tools/AgentTool/agentToolResult.ts +163 -0
  290. package/tui/src/tools/AgentTool/agentToolUtils.ts +14 -686
  291. package/tui/src/tools/AgentTool/asyncAgentLifecycle.ts +208 -0
  292. package/tui/src/tools/AgentTool/asyncLifecycle.ts +153 -0
  293. package/tui/src/tools/AgentTool/backgroundedCompletion.ts +126 -0
  294. package/tui/src/tools/AgentTool/backgroundedLifecycle.ts +174 -0
  295. package/tui/src/tools/AgentTool/foregroundBackground.ts +83 -0
  296. package/tui/src/tools/AgentTool/foregroundDrain.tsx +133 -0
  297. package/tui/src/tools/AgentTool/foregroundFinalize.ts +98 -0
  298. package/tui/src/tools/AgentTool/foregroundLifecycle.tsx +237 -0
  299. package/tui/src/tools/AgentTool/foregroundProgress.tsx +169 -0
  300. package/tui/src/tools/AgentTool/foregroundTask.ts +89 -0
  301. package/tui/src/tools/AgentTool/forkSubagent.ts +1 -12
  302. package/tui/src/tools/AgentTool/forkSubagentGate.ts +34 -0
  303. package/tui/src/tools/AgentTool/launchRouting.ts +203 -0
  304. package/tui/src/tools/AgentTool/lifecycle.ts +244 -0
  305. package/tui/src/tools/AgentTool/mcpRouting.ts +73 -0
  306. package/tui/src/tools/AgentTool/orchestrationSupport.ts +70 -0
  307. package/tui/src/tools/AgentTool/permissions.ts +39 -0
  308. package/tui/src/tools/AgentTool/promptSetup.ts +181 -0
  309. package/tui/src/tools/AgentTool/remoteRouting.ts +62 -0
  310. package/tui/src/tools/AgentTool/resultMapping.ts +116 -0
  311. package/tui/src/tools/AgentTool/resumeAgent.ts +39 -107
  312. package/tui/src/tools/AgentTool/resumeAgentHelpers.ts +140 -0
  313. package/tui/src/tools/AgentTool/runAgent.ts +1 -1
  314. package/tui/src/tools/AgentTool/runtimeConfig.ts +57 -0
  315. package/tui/src/tools/AgentTool/schemas.ts +196 -0
  316. package/tui/src/tools/AgentTool/sourceVerificationPropagation.ts +263 -0
  317. package/tui/src/tools/AgentTool/worktreeLifecycle.ts +105 -0
  318. package/tui/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx +174 -202
  319. package/tui/src/tools/BashTool/BashTool.tsx +71 -1072
  320. package/tui/src/tools/BashTool/bashCommandHelpers.ts +12 -12
  321. package/tui/src/tools/BashTool/bashPermissions/astPreflight.ts +173 -0
  322. package/tui/src/tools/BashTool/bashPermissions/classifierChecks.ts +199 -0
  323. package/tui/src/tools/BashTool/bashPermissions/compoundGuards.ts +53 -0
  324. package/tui/src/tools/BashTool/bashPermissions/constants.ts +99 -0
  325. package/tui/src/tools/BashTool/bashPermissions/index.ts +38 -0
  326. package/tui/src/tools/BashTool/bashPermissions/legacyMisparsing.ts +62 -0
  327. package/tui/src/tools/BashTool/bashPermissions/main.ts +135 -0
  328. package/tui/src/tools/BashTool/bashPermissions/normalizedCommands.ts +33 -0
  329. package/tui/src/tools/BashTool/bashPermissions/operatorFlow.ts +98 -0
  330. package/tui/src/tools/BashTool/bashPermissions/permissionChecks.ts +200 -0
  331. package/tui/src/tools/BashTool/bashPermissions/prefixSuggestions.ts +88 -0
  332. package/tui/src/tools/BashTool/bashPermissions/promptClassifierRules.ts +125 -0
  333. package/tui/src/tools/BashTool/bashPermissions/ruleDelegates.ts +19 -0
  334. package/tui/src/tools/BashTool/bashPermissions/ruleMatching.ts +145 -0
  335. package/tui/src/tools/BashTool/bashPermissions/sandboxAutoAllow.ts +75 -0
  336. package/tui/src/tools/BashTool/bashPermissions/subcommandFlow.ts +205 -0
  337. package/tui/src/tools/BashTool/bashPermissions/subcommandGuards.ts +73 -0
  338. package/tui/src/tools/BashTool/bashPermissions/subcommandResultHelpers.ts +116 -0
  339. package/tui/src/tools/BashTool/bashPermissions/types.ts +26 -0
  340. package/tui/src/tools/BashTool/bashPermissions/wrapperStripping.ts +139 -0
  341. package/tui/src/tools/BashTool/bashPermissions.ts +26 -2621
  342. package/tui/src/tools/BashTool/call.ts +202 -0
  343. package/tui/src/tools/BashTool/callLoader.ts +35 -0
  344. package/tui/src/tools/BashTool/commandClassification.ts +151 -0
  345. package/tui/src/tools/BashTool/commandClassificationLoader.ts +40 -0
  346. package/tui/src/tools/BashTool/cwdReset.ts +33 -0
  347. package/tui/src/tools/BashTool/lineTruncation.ts +11 -0
  348. package/tui/src/tools/BashTool/modeValidation.ts +13 -1
  349. package/tui/src/tools/BashTool/outputPersistence.ts +42 -0
  350. package/tui/src/tools/BashTool/permissionClassification.ts +66 -0
  351. package/tui/src/tools/BashTool/permissionLoader.ts +44 -0
  352. package/tui/src/tools/BashTool/resultLoader.ts +29 -0
  353. package/tui/src/tools/BashTool/resultMapping.ts +83 -0
  354. package/tui/src/tools/BashTool/sandboxPolicy.ts +79 -0
  355. package/tui/src/tools/BashTool/schemas.ts +65 -0
  356. package/tui/src/tools/BashTool/sedEditExecution.ts +59 -0
  357. package/tui/src/tools/BashTool/shellExecution.tsx +245 -0
  358. package/tui/src/tools/BashTool/shellOutputUtils.ts +85 -0
  359. package/tui/src/tools/BashTool/shellPermissionGauntlet.ts +97 -0
  360. package/tui/src/tools/BashTool/uiLoader.ts +37 -0
  361. package/tui/src/tools/BriefTool/upload.ts +1 -1
  362. package/tui/src/tools/CalculatorTool/parser.ts +2 -2
  363. package/tui/src/tools/DocumentPrimitive/DocumentPrimitive.ts +262 -0
  364. package/tui/src/tools/DocumentPrimitive/dispatchNormalization.ts +270 -0
  365. package/tui/src/tools/DocumentPrimitive/documentDestinationPath.ts +18 -0
  366. package/tui/src/tools/DocumentPrimitive/documentMutationGuard.ts +22 -0
  367. package/tui/src/tools/DocumentPrimitive/documentPatchNormalization.ts +248 -0
  368. package/tui/src/tools/DocumentPrimitive/documentSourceVerification.ts +245 -0
  369. package/tui/src/tools/DocumentPrimitive/documentSourceVerificationFields.ts +103 -0
  370. package/tui/src/tools/DocumentPrimitive/modelVisibleOutput.ts +40 -0
  371. package/tui/src/tools/DocumentPrimitive/prompt.ts +35 -0
  372. package/tui/src/tools/FileEditTool/FileEditTool.ts +9 -507
  373. package/tui/src/tools/FileEditTool/call.ts +228 -0
  374. package/tui/src/tools/FileEditTool/validateInput.ts +196 -0
  375. package/tui/src/tools/FileReadTool/imageProcessor.ts +13 -0
  376. package/tui/src/tools/FileWriteTool/FileWriteTool.ts +7 -300
  377. package/tui/src/tools/FileWriteTool/call.ts +223 -0
  378. package/tui/src/tools/FileWriteTool/validateInput.ts +80 -0
  379. package/tui/src/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts +19 -3
  380. package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +48 -29
  381. package/tui/src/tools/LookupPrimitive/prompt.ts +6 -7
  382. package/tui/src/tools/MCPTool/trustPolicy.ts +118 -0
  383. package/tui/src/tools/McpAuthTool/McpAuthTool.ts +21 -3
  384. package/tui/src/tools/NotebookEditTool/NotebookEditTool.ts +7 -326
  385. package/tui/src/tools/NotebookEditTool/call.ts +254 -0
  386. package/tui/src/tools/NotebookEditTool/notebookModel.ts +51 -0
  387. package/tui/src/tools/NotebookEditTool/validateInput.ts +142 -0
  388. package/tui/src/tools/PowerShellTool/PowerShellTool.tsx +46 -937
  389. package/tui/src/tools/PowerShellTool/acceptEditsCommandValidation.ts +162 -0
  390. package/tui/src/tools/PowerShellTool/call.ts +179 -0
  391. package/tui/src/tools/PowerShellTool/callLoader.ts +37 -0
  392. package/tui/src/tools/PowerShellTool/commandClassification.ts +86 -0
  393. package/tui/src/tools/PowerShellTool/modeValidation.ts +25 -332
  394. package/tui/src/tools/PowerShellTool/outputPersistence.ts +42 -0
  395. package/tui/src/tools/PowerShellTool/permissionClassification.ts +28 -0
  396. package/tui/src/tools/PowerShellTool/resultLoader.ts +31 -0
  397. package/tui/src/tools/PowerShellTool/resultMapping.ts +75 -0
  398. package/tui/src/tools/PowerShellTool/schemas.ts +40 -0
  399. package/tui/src/tools/PowerShellTool/shellExecution.tsx +258 -0
  400. package/tui/src/tools/PowerShellTool/symlinkModeValidation.ts +44 -0
  401. package/tui/src/tools/PowerShellTool/uiLoader.ts +37 -0
  402. package/tui/src/tools/PowerShellTool/validation.ts +39 -0
  403. package/tui/src/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts +19 -3
  404. package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +30 -19
  405. package/tui/src/tools/ResolveLocationPrimitive/prompt.ts +2 -6
  406. package/tui/src/tools/SkillTool/SkillTool.ts +2 -2
  407. package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +51 -18
  408. package/tui/src/tools/TaskCreateTool/TaskCreateTool.ts +16 -2
  409. package/tui/src/tools/TaskGetTool/TaskGetTool.ts +23 -3
  410. package/tui/src/tools/TaskListTool/TaskListTool.ts +22 -4
  411. package/tui/src/tools/TaskOutputTool/TaskOutputTool.tsx +46 -547
  412. package/tui/src/tools/TaskOutputTool/lookup.ts +216 -0
  413. package/tui/src/tools/TaskOutputTool/render.tsx +257 -0
  414. package/tui/src/tools/TaskOutputTool/schemas.ts +55 -0
  415. package/tui/src/tools/TaskOutputTool/serialization.ts +36 -0
  416. package/tui/src/tools/TaskStopTool/TaskStopTool.ts +10 -0
  417. package/tui/src/tools/TaskUpdateTool/TaskUpdateTool.ts +14 -364
  418. package/tui/src/tools/TaskUpdateTool/completion.ts +62 -0
  419. package/tui/src/tools/TaskUpdateTool/schemas.ts +62 -0
  420. package/tui/src/tools/TaskUpdateTool/serialization.ts +46 -0
  421. package/tui/src/tools/TaskUpdateTool/statusUpdate.ts +247 -0
  422. package/tui/src/tools/TodoWriteTool/TodoWriteTool.ts +21 -2
  423. package/tui/src/tools/ToolSearchTool/ToolSearchTool.ts +21 -302
  424. package/tui/src/tools/ToolSearchTool/ccSupportTools.ts +223 -0
  425. package/tui/src/tools/ToolSearchTool/descriptionCache.ts +50 -0
  426. package/tui/src/tools/ToolSearchTool/keywordSearch.ts +216 -0
  427. package/tui/src/tools/ToolSearchTool/prompt.ts +10 -4
  428. package/tui/src/tools/ToolSearchTool/resultMapping.ts +30 -0
  429. package/tui/src/tools/ToolSearchTool/schemas.ts +30 -0
  430. package/tui/src/tools/ToolSearchTool/searchPool.ts +47 -0
  431. package/tui/src/tools/ToolSearchTool/supportIntentHints.ts +140 -0
  432. package/tui/src/tools/TranslateTool/TranslateTool.ts +1 -1
  433. package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +27 -10
  434. package/tui/src/tools/WebFetchTool/WebFetchTool.ts +43 -138
  435. package/tui/src/tools/WebFetchTool/call.ts +227 -0
  436. package/tui/src/tools/WebFetchTool/resolvedAddressSafety.ts +78 -0
  437. package/tui/src/tools/WebFetchTool/sourceVerification.ts +204 -0
  438. package/tui/src/tools/WebFetchTool/types.ts +23 -0
  439. package/tui/src/tools/WebFetchTool/urlSafety.ts +181 -0
  440. package/tui/src/tools/WebFetchTool/utils.ts +1 -1
  441. package/tui/src/tools/WebSearchTool/UI.tsx +0 -1
  442. package/tui/src/tools/WebSearchTool/WebSearchTool.ts +9 -313
  443. package/tui/src/tools/WebSearchTool/call.ts +33 -0
  444. package/tui/src/tools/WebSearchTool/responseMapping.ts +190 -0
  445. package/tui/src/tools/WebSearchTool/resultBlock.ts +47 -0
  446. package/tui/src/tools/WebSearchTool/schemas.ts +47 -0
  447. package/tui/src/tools/WebSearchTool/toolSchema.ts +12 -0
  448. package/tui/src/tools/WorkspaceToolAdapter/WorkspaceToolAdapter.ts +79 -0
  449. package/tui/src/tools/WorkspaceToolAdapter/allowedRootPolicy.ts +85 -0
  450. package/tui/src/tools/WorkspaceToolAdapter/documentFormatGuards.ts +73 -0
  451. package/tui/src/tools/WorkspaceToolAdapter/inputNormalization.ts +105 -0
  452. package/tui/src/tools/WorkspaceToolAdapter/mcpExposurePolicy.ts +64 -0
  453. package/tui/src/tools/WorkspaceToolAdapter/toolDefFactory.ts +215 -0
  454. package/tui/src/tools/WorkspaceToolAdapter/toolNames.ts +6 -0
  455. package/tui/src/tools/WorkspaceToolAdapter/workspacePolicy.ts +15 -0
  456. package/tui/src/tools/_shared/citizenUserText.ts +49 -0
  457. package/tui/src/tools/_shared/dispatchPrimitive.ts +6 -6
  458. package/tui/src/tools/_shared/documentChangeToPatch.ts +125 -0
  459. package/tui/src/tools/_shared/documentDispatchArguments.ts +87 -0
  460. package/tui/src/tools/_shared/documentPrimitiveTimeout.ts +13 -0
  461. package/tui/src/tools/_shared/documentToolResultRender.ts +98 -0
  462. package/tui/src/tools/_shared/locationInputRepair.ts +112 -0
  463. package/tui/src/tools/_shared/pendingCallRegistry.ts +1 -6
  464. package/tui/src/tools/_shared/rootPrimitiveInput.ts +68 -0
  465. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPatterns.ts +58 -0
  466. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPrompt.ts +271 -0
  467. package/tui/src/tools/_shared/toolChoiceRepair/documentRepair.ts +452 -0
  468. package/tui/src/tools/_shared/toolChoiceRepair/messageAccess.ts +80 -0
  469. package/tui/src/tools/_shared/toolChoiceRepair/publicDataRepair.ts +92 -0
  470. package/tui/src/tools/_shared/toolChoiceRepair/supportRepair.ts +135 -0
  471. package/tui/src/tools/_shared/toolChoiceRepair.ts +61 -0
  472. package/tui/src/tools/shared/mockDisclaimer.ts +1 -1
  473. package/tui/src/tools.ts +39 -190
  474. package/tui/src/types/fileSuggestion.ts +4 -26
  475. package/tui/src/types/generated/events_mono/claude_code/v1/claude_code_internal_event.ts +186 -148
  476. package/tui/src/types/generated/events_mono/common/v1/auth.ts +25 -11
  477. package/tui/src/types/generated/events_mono/growthbook/v1/growthbook_experiment_event.ts +47 -30
  478. package/tui/src/types/generated/google/protobuf/timestamp.ts +21 -7
  479. package/tui/src/types/message.ts +80 -102
  480. package/tui/src/types/messageQueueTypes.ts +6 -28
  481. package/tui/src/types/notebook.ts +16 -38
  482. package/tui/src/types/statusLine.ts +4 -26
  483. package/tui/src/types/tools.ts +24 -46
  484. package/tui/src/types/utils.ts +6 -28
  485. package/tui/src/upstreamproxy/relay.ts +7 -3
  486. package/tui/src/upstreamproxy/upstreamproxy.ts +1 -1
  487. package/tui/src/utils/assistantMessageFactories.ts +9 -3
  488. package/tui/src/utils/attachments.ts +1 -1
  489. package/tui/src/utils/auth.ts +129 -139
  490. package/tui/src/utils/bash/ast.ts +23 -23
  491. package/tui/src/utils/bash/bashParser.ts +5 -5
  492. package/tui/src/utils/billing.ts +1 -1
  493. package/tui/src/utils/collapseReadSearch.ts +3 -3
  494. package/tui/src/utils/cronTasks.ts +1 -1
  495. package/tui/src/utils/execFileNoThrow.ts +1 -1
  496. package/tui/src/utils/filePersistence/types.ts +16 -38
  497. package/tui/src/utils/forkedAgent.ts +1 -1
  498. package/tui/src/utils/gracefulShutdown.ts +4 -4
  499. package/tui/src/utils/heapDumpService.ts +12 -8
  500. package/tui/src/utils/hooks/apiQueryHookHelper.ts +1 -1
  501. package/tui/src/utils/hooks/execPromptHook.ts +1 -1
  502. package/tui/src/utils/hooks/skillImprovement.ts +1 -1
  503. package/tui/src/utils/kExaoneReasoning.ts +138 -0
  504. package/tui/src/utils/mcp/dateTimeParser.ts +1 -1
  505. package/tui/src/utils/messages.ts +19 -0
  506. package/tui/src/utils/migrateSessions.ts +3 -3
  507. package/tui/src/utils/model/model.ts +6 -6
  508. package/tui/src/utils/multiToolLayout.ts +13 -0
  509. package/tui/src/utils/permissions/yoloClassifier.ts +1 -1
  510. package/tui/src/utils/plugins/headlessPluginInstall.ts +1 -1
  511. package/tui/src/utils/plugins/mcpPluginIntegration.ts +1 -1
  512. package/tui/src/utils/plugins/mcpbHandler.ts +1 -1
  513. package/tui/src/utils/plugins/pluginLoader.ts +8 -8
  514. package/tui/src/utils/processUserInput/processSlashCommand.tsx +2 -2
  515. package/tui/src/utils/processUserInput/processUserInput.ts +26 -0
  516. package/tui/src/utils/protectedNamespace.ts +5 -3
  517. package/tui/src/utils/rawJsonToolCall.ts +242 -0
  518. package/tui/src/utils/ripgrep.ts +16 -7
  519. package/tui/src/utils/sessionTitle.ts +1 -1
  520. package/tui/src/utils/settings/applySettingsChange.ts +4 -0
  521. package/tui/src/utils/settings/permissionValidation.ts +14 -2
  522. package/tui/src/utils/settings/types.ts +9 -3
  523. package/tui/src/utils/shell/prefix.ts +1 -1
  524. package/tui/src/utils/sideQuery.ts +1 -1
  525. package/tui/src/utils/stats.ts +1 -1
  526. package/tui/src/utils/systemThemeWatcher.ts +13 -3
  527. package/tui/src/utils/teleport.tsx +1 -1
  528. package/uv.lock +394 -22
  529. package/assets/copilot-gate-logo.svg +0 -58
  530. package/assets/govon-logo.svg +0 -40
  531. package/src/ummaya/eval/__init__.py +0 -5
  532. package/src/ummaya/eval/retrieval.py +0 -713
  533. package/tui/src/services/api/claude.ts +0 -3510
  534. package/tui/src/utils/messageStream.ts +0 -186
@@ -0,0 +1,766 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Known-only passive adapters for non-promoted document families."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import csv
7
+ import gzip
8
+ import io
9
+ import json
10
+ import tarfile
11
+ import zipfile
12
+ from html.parser import HTMLParser
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING
15
+ from xml.etree import ElementTree as StdElementTree
16
+
17
+ import yaml
18
+ from defusedxml import ElementTree # type: ignore[import-untyped]
19
+
20
+ from ummaya.tools.documents.models import (
21
+ DocumentExtraction,
22
+ DocumentFormat,
23
+ ImageReference,
24
+ KnownDocumentFormat,
25
+ MetadataValue,
26
+ ParagraphBlock,
27
+ TableBlock,
28
+ TableCell,
29
+ )
30
+
31
+ if TYPE_CHECKING:
32
+ from ummaya.tools.documents.tool_defs import DocumentFieldPatch
33
+
34
+
35
+ _ODF_FORMATS: tuple[KnownDocumentFormat, ...] = (
36
+ KnownDocumentFormat.odt,
37
+ KnownDocumentFormat.ods,
38
+ KnownDocumentFormat.odp,
39
+ )
40
+ _DATA_FORMATS: tuple[KnownDocumentFormat, ...] = (
41
+ KnownDocumentFormat.csv,
42
+ KnownDocumentFormat.tsv,
43
+ KnownDocumentFormat.xml,
44
+ KnownDocumentFormat.rdf,
45
+ KnownDocumentFormat.ttl,
46
+ KnownDocumentFormat.lod,
47
+ KnownDocumentFormat.json,
48
+ KnownDocumentFormat.jsonl,
49
+ KnownDocumentFormat.yaml,
50
+ KnownDocumentFormat.yml,
51
+ KnownDocumentFormat.geojson,
52
+ KnownDocumentFormat.gpx,
53
+ KnownDocumentFormat.kml,
54
+ KnownDocumentFormat.fasta,
55
+ KnownDocumentFormat.sgml,
56
+ KnownDocumentFormat.dtd,
57
+ KnownDocumentFormat.hml,
58
+ KnownDocumentFormat.etc,
59
+ )
60
+ _TEXT_WEB_FORMATS: tuple[KnownDocumentFormat, ...] = (
61
+ KnownDocumentFormat.html,
62
+ KnownDocumentFormat.htm,
63
+ KnownDocumentFormat.txt,
64
+ KnownDocumentFormat.rtf,
65
+ KnownDocumentFormat.md,
66
+ )
67
+ _LEGACY_OFFICE_FORMATS: tuple[KnownDocumentFormat, ...] = (
68
+ KnownDocumentFormat.doc,
69
+ KnownDocumentFormat.xls,
70
+ KnownDocumentFormat.ppt,
71
+ )
72
+ _CODE_FORMATS: tuple[KnownDocumentFormat, ...] = (KnownDocumentFormat.python,)
73
+ _IMAGE_FORMATS: tuple[KnownDocumentFormat, ...] = (
74
+ KnownDocumentFormat.png,
75
+ KnownDocumentFormat.jpg,
76
+ KnownDocumentFormat.jpeg,
77
+ KnownDocumentFormat.gif,
78
+ KnownDocumentFormat.tif,
79
+ KnownDocumentFormat.tiff,
80
+ KnownDocumentFormat.bmp,
81
+ KnownDocumentFormat.webp,
82
+ )
83
+ _GEOSPATIAL_FORMATS: tuple[KnownDocumentFormat, ...] = (
84
+ KnownDocumentFormat.shp,
85
+ KnownDocumentFormat.shx,
86
+ KnownDocumentFormat.dbf,
87
+ KnownDocumentFormat.prj,
88
+ KnownDocumentFormat.stl,
89
+ )
90
+ _MEDIA_FORMATS: tuple[KnownDocumentFormat, ...] = (
91
+ KnownDocumentFormat.wav,
92
+ KnownDocumentFormat.mp3,
93
+ KnownDocumentFormat.mp4,
94
+ )
95
+ _ARCHIVE_FORMATS: tuple[KnownDocumentFormat, ...] = (
96
+ KnownDocumentFormat.epub,
97
+ KnownDocumentFormat.zip,
98
+ KnownDocumentFormat.seven_z,
99
+ KnownDocumentFormat.tar,
100
+ KnownDocumentFormat.gz,
101
+ )
102
+
103
+ _KNOWN_BY_EXTENSION = {
104
+ ".odt": KnownDocumentFormat.odt,
105
+ ".ods": KnownDocumentFormat.ods,
106
+ ".odp": KnownDocumentFormat.odp,
107
+ ".doc": KnownDocumentFormat.doc,
108
+ ".xls": KnownDocumentFormat.xls,
109
+ ".ppt": KnownDocumentFormat.ppt,
110
+ ".csv": KnownDocumentFormat.csv,
111
+ ".tsv": KnownDocumentFormat.tsv,
112
+ ".xml": KnownDocumentFormat.xml,
113
+ ".rdf": KnownDocumentFormat.rdf,
114
+ ".ttl": KnownDocumentFormat.ttl,
115
+ ".lod": KnownDocumentFormat.lod,
116
+ ".json": KnownDocumentFormat.json,
117
+ ".jsonl": KnownDocumentFormat.jsonl,
118
+ ".yaml": KnownDocumentFormat.yaml,
119
+ ".yml": KnownDocumentFormat.yml,
120
+ ".geojson": KnownDocumentFormat.geojson,
121
+ ".gpx": KnownDocumentFormat.gpx,
122
+ ".kml": KnownDocumentFormat.kml,
123
+ ".fasta": KnownDocumentFormat.fasta,
124
+ ".sgml": KnownDocumentFormat.sgml,
125
+ ".dtd": KnownDocumentFormat.dtd,
126
+ ".hml": KnownDocumentFormat.hml,
127
+ ".etc": KnownDocumentFormat.etc,
128
+ ".py": KnownDocumentFormat.python,
129
+ ".html": KnownDocumentFormat.html,
130
+ ".htm": KnownDocumentFormat.htm,
131
+ ".txt": KnownDocumentFormat.txt,
132
+ ".rtf": KnownDocumentFormat.rtf,
133
+ ".md": KnownDocumentFormat.md,
134
+ ".png": KnownDocumentFormat.png,
135
+ ".jpg": KnownDocumentFormat.jpg,
136
+ ".jpeg": KnownDocumentFormat.jpeg,
137
+ ".gif": KnownDocumentFormat.gif,
138
+ ".tif": KnownDocumentFormat.tif,
139
+ ".tiff": KnownDocumentFormat.tiff,
140
+ ".bmp": KnownDocumentFormat.bmp,
141
+ ".webp": KnownDocumentFormat.webp,
142
+ ".shp": KnownDocumentFormat.shp,
143
+ ".shx": KnownDocumentFormat.shx,
144
+ ".dbf": KnownDocumentFormat.dbf,
145
+ ".prj": KnownDocumentFormat.prj,
146
+ ".stl": KnownDocumentFormat.stl,
147
+ ".wav": KnownDocumentFormat.wav,
148
+ ".mp3": KnownDocumentFormat.mp3,
149
+ ".mp4": KnownDocumentFormat.mp4,
150
+ ".epub": KnownDocumentFormat.epub,
151
+ ".zip": KnownDocumentFormat.zip,
152
+ ".7z": KnownDocumentFormat.seven_z,
153
+ ".tar": KnownDocumentFormat.tar,
154
+ ".gz": KnownDocumentFormat.gz,
155
+ }
156
+
157
+ _IMAGE_MIME = {
158
+ KnownDocumentFormat.png: "image/png",
159
+ KnownDocumentFormat.jpg: "image/jpeg",
160
+ KnownDocumentFormat.jpeg: "image/jpeg",
161
+ KnownDocumentFormat.gif: "image/gif",
162
+ KnownDocumentFormat.tif: "image/tiff",
163
+ KnownDocumentFormat.tiff: "image/tiff",
164
+ KnownDocumentFormat.bmp: "image/bmp",
165
+ KnownDocumentFormat.webp: "image/webp",
166
+ }
167
+
168
+
169
+ class _KnownOnlyAdapterBase:
170
+ """Shared known-only adapter behavior."""
171
+
172
+ adapter_id: str
173
+ known_formats: tuple[KnownDocumentFormat, ...]
174
+ promoted_formats: tuple[DocumentFormat, ...] = ()
175
+
176
+ @property
177
+ def engine_id(self) -> str:
178
+ """Return adapter id for diagnostics."""
179
+ return self.adapter_id
180
+
181
+ def normalize_fill_patches(
182
+ self,
183
+ patches: tuple[DocumentFieldPatch, ...],
184
+ *,
185
+ extraction: DocumentExtraction | None,
186
+ ) -> tuple[DocumentFieldPatch, ...]:
187
+ """No passive adapter normalizes fill patches because writes are not promoted."""
188
+ _ = extraction
189
+ return patches
190
+
191
+
192
+ class OdfDocumentAdapter(_KnownOnlyAdapterBase):
193
+ """Read-only ODF package candidate backed by ZIP/XML inspection."""
194
+
195
+ adapter_id = "odf-package-read-only-adapter"
196
+ known_formats = _ODF_FORMATS
197
+
198
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
199
+ """Extract ODF package text from content.xml without claiming mutation."""
200
+ known_format = _known_format(path)
201
+ paragraphs: list[ParagraphBlock] = []
202
+ warnings: list[str] = []
203
+ metadata: dict[str, MetadataValue] = _base_metadata(
204
+ path,
205
+ known_format=known_format,
206
+ adapter_id=self.adapter_id,
207
+ mutation_policy="read_only_odf_candidate",
208
+ )
209
+ try:
210
+ with zipfile.ZipFile(path) as archive:
211
+ metadata["package_entry_count"] = len(archive.infolist())
212
+ if "content.xml" in archive.namelist():
213
+ root = ElementTree.fromstring(archive.read("content.xml"))
214
+ paragraphs = _paragraphs_from_text_lines(
215
+ artifact_id,
216
+ _xml_text_lines(root),
217
+ source_prefix="content.xml",
218
+ )
219
+ else:
220
+ warnings.append("ODF package does not contain content.xml.")
221
+ except zipfile.BadZipFile:
222
+ warnings.append("ODF read-only candidate could not open the package as ZIP.")
223
+
224
+ return DocumentExtraction(
225
+ artifact_id=artifact_id,
226
+ paragraphs=paragraphs,
227
+ metadata=metadata,
228
+ warnings=warnings,
229
+ )
230
+
231
+
232
+ class DataFileDocumentAdapter(_KnownOnlyAdapterBase):
233
+ """Read-only data-file adapter with serializer round-trip evidence."""
234
+
235
+ adapter_id = "data-file-read-only-adapter"
236
+ known_formats = _DATA_FORMATS
237
+
238
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
239
+ """Parse structured data files through safe local serializers."""
240
+ known_format = _known_format(path)
241
+ if known_format in {KnownDocumentFormat.csv, KnownDocumentFormat.tsv}:
242
+ return _inspect_delimited(path, artifact_id=artifact_id, known_format=known_format)
243
+ if known_format in {
244
+ KnownDocumentFormat.json,
245
+ KnownDocumentFormat.jsonl,
246
+ KnownDocumentFormat.geojson,
247
+ }:
248
+ return _inspect_json(path, artifact_id=artifact_id, known_format=known_format)
249
+ if known_format in {KnownDocumentFormat.yaml, KnownDocumentFormat.yml}:
250
+ return _inspect_yaml(path, artifact_id=artifact_id, known_format=known_format)
251
+ if known_format in {
252
+ KnownDocumentFormat.xml,
253
+ KnownDocumentFormat.rdf,
254
+ KnownDocumentFormat.gpx,
255
+ KnownDocumentFormat.kml,
256
+ KnownDocumentFormat.hml,
257
+ }:
258
+ return _inspect_xml(path, artifact_id=artifact_id, known_format=known_format)
259
+ if known_format in {
260
+ KnownDocumentFormat.ttl,
261
+ KnownDocumentFormat.lod,
262
+ KnownDocumentFormat.fasta,
263
+ KnownDocumentFormat.sgml,
264
+ KnownDocumentFormat.dtd,
265
+ KnownDocumentFormat.etc,
266
+ }:
267
+ return _inspect_text_data(path, artifact_id=artifact_id, known_format=known_format)
268
+ return DocumentExtraction(
269
+ artifact_id=artifact_id,
270
+ metadata=_base_metadata(
271
+ path,
272
+ known_format=known_format,
273
+ adapter_id=self.adapter_id,
274
+ mutation_policy="read_only_data_file",
275
+ ),
276
+ warnings=[f"No passive data parser is implemented for {known_format.value}."],
277
+ )
278
+
279
+
280
+ class LegacyOfficeDocumentAdapter(_KnownOnlyAdapterBase):
281
+ """Metadata-only adapter for pre-OOXML Office binaries."""
282
+
283
+ adapter_id = "legacy-office-metadata-only-adapter"
284
+ known_formats = _LEGACY_OFFICE_FORMATS
285
+
286
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
287
+ """Identify legacy Office documents without parsing binary internals."""
288
+ known_format = _known_format(path)
289
+ return DocumentExtraction(
290
+ artifact_id=artifact_id,
291
+ metadata=_base_metadata(
292
+ path,
293
+ known_format=known_format,
294
+ adapter_id=self.adapter_id,
295
+ mutation_policy="conversion_required_legacy_office",
296
+ ),
297
+ warnings=[
298
+ "Legacy Office binary inspection is metadata-only until an explicit local "
299
+ "conversion bridge is approved."
300
+ ],
301
+ )
302
+
303
+
304
+ class TextWebExportAdapter(_KnownOnlyAdapterBase):
305
+ """Read-only HTML, text, RTF, and Markdown export adapter."""
306
+
307
+ adapter_id = "text-web-export-read-only-adapter"
308
+ known_formats = _TEXT_WEB_FORMATS
309
+
310
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
311
+ """Extract visible text lines from text and web-export formats."""
312
+ known_format = _known_format(path)
313
+ payload = path.read_text(encoding="utf-8", errors="replace")
314
+ lines = (
315
+ _html_text_lines(payload)
316
+ if known_format in {KnownDocumentFormat.html, KnownDocumentFormat.htm}
317
+ else _plain_text_lines(_strip_minimal_rtf(payload))
318
+ )
319
+ return DocumentExtraction(
320
+ artifact_id=artifact_id,
321
+ paragraphs=_paragraphs_from_text_lines(
322
+ artifact_id,
323
+ lines,
324
+ source_prefix=path.name,
325
+ ),
326
+ metadata=_base_metadata(
327
+ path,
328
+ known_format=known_format,
329
+ adapter_id=self.adapter_id,
330
+ mutation_policy="read_only_text_export",
331
+ ),
332
+ )
333
+
334
+
335
+ class CodeFileDocumentAdapter(_KnownOnlyAdapterBase):
336
+ """Read-only source-code export adapter for public-data attachments."""
337
+
338
+ adapter_id = "code-file-read-only-adapter"
339
+ known_formats = _CODE_FORMATS
340
+
341
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
342
+ """Extract source lines for context without using the document writer."""
343
+ payload = path.read_text(encoding="utf-8", errors="replace")
344
+ return DocumentExtraction(
345
+ artifact_id=artifact_id,
346
+ paragraphs=_paragraphs_from_text_lines(
347
+ artifact_id,
348
+ _plain_text_lines(payload)[:200],
349
+ source_prefix=path.name,
350
+ ),
351
+ metadata=_base_metadata(
352
+ path,
353
+ known_format=_known_format(path),
354
+ adapter_id=self.adapter_id,
355
+ mutation_policy="read_only_code_file",
356
+ ),
357
+ warnings=["Code files are not public-form documents and cannot be mutated here."],
358
+ )
359
+
360
+
361
+ class ImageScanDocumentAdapter(_KnownOnlyAdapterBase):
362
+ """Extraction-only image/scan adapter."""
363
+
364
+ adapter_id = "image-scan-extraction-only-adapter"
365
+ known_formats = _IMAGE_FORMATS
366
+
367
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
368
+ """Return an image reference without claiming OCR or write support."""
369
+ known_format = _known_format(path)
370
+ return DocumentExtraction(
371
+ artifact_id=artifact_id,
372
+ images=[
373
+ ImageReference(
374
+ image_id=f"image-{known_format.value}",
375
+ source_path=str(path),
376
+ content_type=_IMAGE_MIME.get(known_format, "image/unknown"),
377
+ )
378
+ ],
379
+ metadata=_base_metadata(
380
+ path,
381
+ known_format=known_format,
382
+ adapter_id=self.adapter_id,
383
+ mutation_policy="extraction_only",
384
+ ),
385
+ warnings=["Image scan adapter does not mutate raster originals."],
386
+ )
387
+
388
+
389
+ class GeospatialDocumentAdapter(_KnownOnlyAdapterBase):
390
+ """Metadata-only geospatial and 3D model adapter."""
391
+
392
+ adapter_id = "geospatial-metadata-only-adapter"
393
+ known_formats = _GEOSPATIAL_FORMATS
394
+
395
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
396
+ """Classify GIS/model artifacts without claiming document editing."""
397
+ known_format = _known_format(path)
398
+ paragraphs = (
399
+ _paragraphs_from_text_lines(
400
+ artifact_id,
401
+ _plain_text_lines(path.read_text(encoding="utf-8", errors="replace"))[:40],
402
+ source_prefix=path.name,
403
+ )
404
+ if known_format in {KnownDocumentFormat.prj, KnownDocumentFormat.stl}
405
+ else []
406
+ )
407
+ return DocumentExtraction(
408
+ artifact_id=artifact_id,
409
+ paragraphs=paragraphs,
410
+ metadata=_base_metadata(
411
+ path,
412
+ known_format=known_format,
413
+ adapter_id=self.adapter_id,
414
+ mutation_policy="metadata_only_geospatial_asset",
415
+ ),
416
+ warnings=[
417
+ "Geospatial and 3D geometry files are classified for routing, not mutated "
418
+ "as public documents."
419
+ ],
420
+ )
421
+
422
+
423
+ class MediaAssetDocumentAdapter(_KnownOnlyAdapterBase):
424
+ """Metadata-only audio/video adapter."""
425
+
426
+ adapter_id = "media-asset-metadata-only-adapter"
427
+ known_formats = _MEDIA_FORMATS
428
+
429
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
430
+ """Classify media attachments without transcription or mutation claims."""
431
+ return DocumentExtraction(
432
+ artifact_id=artifact_id,
433
+ metadata=_base_metadata(
434
+ path,
435
+ known_format=_known_format(path),
436
+ adapter_id=self.adapter_id,
437
+ mutation_policy="metadata_only_media_asset",
438
+ ),
439
+ warnings=[
440
+ "Media files need a dedicated transcription or extraction adapter before "
441
+ "content can be written into a public document derivative."
442
+ ],
443
+ )
444
+
445
+
446
+ class ArchiveDocumentSetAdapter(_KnownOnlyAdapterBase):
447
+ """Read-only archive enumerator for secure child routing."""
448
+
449
+ adapter_id = "archive-document-set-read-only-adapter"
450
+ known_formats = _ARCHIVE_FORMATS
451
+
452
+ def __init__(
453
+ self,
454
+ known_formats: tuple[KnownDocumentFormat, ...] | None = None,
455
+ ) -> None:
456
+ self.known_formats = known_formats or _ARCHIVE_FORMATS
457
+
458
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
459
+ """Enumerate archive members without mutating children in place."""
460
+ known_format = _known_format(path)
461
+ names, warnings = _archive_member_names(path, known_format=known_format)
462
+ metadata = _base_metadata(
463
+ path,
464
+ known_format=known_format,
465
+ adapter_id=self.adapter_id,
466
+ mutation_policy="archive_read_only",
467
+ )
468
+ metadata["entry_count"] = len(names)
469
+ metadata["child_mutation_policy"] = "route_children_as_derivatives"
470
+ return DocumentExtraction(
471
+ artifact_id=artifact_id,
472
+ paragraphs=_paragraphs_from_text_lines(
473
+ artifact_id,
474
+ names,
475
+ source_prefix=path.name,
476
+ ),
477
+ metadata=metadata,
478
+ warnings=warnings,
479
+ )
480
+
481
+
482
+ def _inspect_delimited(
483
+ path: Path,
484
+ *,
485
+ artifact_id: str,
486
+ known_format: KnownDocumentFormat,
487
+ ) -> DocumentExtraction:
488
+ delimiter = "\t" if known_format is KnownDocumentFormat.tsv else ","
489
+ payload = path.read_text(encoding="utf-8-sig", errors="replace")
490
+ rows = list(csv.reader(io.StringIO(payload), delimiter=delimiter))
491
+ serialized = io.StringIO()
492
+ writer = csv.writer(serialized, delimiter=delimiter, lineterminator="\n")
493
+ writer.writerows(rows)
494
+ reparsed = list(csv.reader(io.StringIO(serialized.getvalue()), delimiter=delimiter))
495
+ metadata = _base_metadata(
496
+ path,
497
+ known_format=known_format,
498
+ adapter_id=DataFileDocumentAdapter.adapter_id,
499
+ mutation_policy="read_only_data_file",
500
+ )
501
+ metadata.update(
502
+ {
503
+ "serializer": known_format.value,
504
+ "round_trip_passed": rows == reparsed,
505
+ "row_count": len(rows),
506
+ "column_count": max((len(row) for row in rows), default=0),
507
+ }
508
+ )
509
+ return DocumentExtraction(
510
+ artifact_id=artifact_id,
511
+ tables=[_table_from_rows(rows, source_path=path.name)],
512
+ paragraphs=_paragraphs_from_text_lines(
513
+ artifact_id,
514
+ [",".join(row) for row in rows],
515
+ source_prefix=path.name,
516
+ ),
517
+ metadata=metadata,
518
+ )
519
+
520
+
521
+ def _inspect_json(
522
+ path: Path,
523
+ *,
524
+ artifact_id: str,
525
+ known_format: KnownDocumentFormat,
526
+ ) -> DocumentExtraction:
527
+ payload = path.read_text(encoding="utf-8")
528
+ if known_format is KnownDocumentFormat.jsonl:
529
+ values = [json.loads(line) for line in payload.splitlines() if line.strip()]
530
+ serialized = "\n".join(
531
+ json.dumps(value, ensure_ascii=False, sort_keys=True) for value in values
532
+ )
533
+ reparsed: object = [json.loads(line) for line in serialized.splitlines()]
534
+ parsed: object = values
535
+ else:
536
+ parsed = json.loads(payload)
537
+ serialized = json.dumps(parsed, ensure_ascii=False, sort_keys=True)
538
+ reparsed = json.loads(serialized)
539
+ return _structured_data_extraction(
540
+ artifact_id,
541
+ path=path,
542
+ known_format=known_format,
543
+ serializer=known_format.value,
544
+ parsed=parsed,
545
+ round_trip_passed=parsed == reparsed,
546
+ )
547
+
548
+
549
+ def _inspect_yaml(
550
+ path: Path,
551
+ *,
552
+ artifact_id: str,
553
+ known_format: KnownDocumentFormat,
554
+ ) -> DocumentExtraction:
555
+ parsed = yaml.safe_load(path.read_text(encoding="utf-8"))
556
+ serialized = yaml.safe_dump(parsed, allow_unicode=True, sort_keys=True)
557
+ return _structured_data_extraction(
558
+ artifact_id,
559
+ path=path,
560
+ known_format=known_format,
561
+ serializer="yaml.safe_load/safe_dump",
562
+ parsed=parsed,
563
+ round_trip_passed=parsed == yaml.safe_load(serialized),
564
+ )
565
+
566
+
567
+ def _inspect_xml(
568
+ path: Path,
569
+ *,
570
+ artifact_id: str,
571
+ known_format: KnownDocumentFormat,
572
+ ) -> DocumentExtraction:
573
+ root = ElementTree.fromstring(path.read_bytes())
574
+ serialized = StdElementTree.tostring(root, encoding="unicode")
575
+ reparsed = ElementTree.fromstring(serialized.encode("utf-8"))
576
+ return _structured_data_extraction(
577
+ artifact_id,
578
+ path=path,
579
+ known_format=known_format,
580
+ serializer="defusedxml.ElementTree",
581
+ parsed={"root_tag": _local_name(root.tag), "text": " ".join(_xml_text_lines(root))},
582
+ round_trip_passed=_local_name(root.tag) == _local_name(reparsed.tag),
583
+ )
584
+
585
+
586
+ def _inspect_text_data(
587
+ path: Path,
588
+ *,
589
+ artifact_id: str,
590
+ known_format: KnownDocumentFormat,
591
+ ) -> DocumentExtraction:
592
+ payload = path.read_text(encoding="utf-8", errors="replace")
593
+ lines = _plain_text_lines(payload)[:200]
594
+ metadata = _base_metadata(
595
+ path,
596
+ known_format=known_format,
597
+ adapter_id=DataFileDocumentAdapter.adapter_id,
598
+ mutation_policy="read_only_data_file",
599
+ )
600
+ metadata["serializer"] = "plain-text-preview"
601
+ metadata["round_trip_passed"] = True
602
+ metadata["line_count"] = len(lines)
603
+ return DocumentExtraction(
604
+ artifact_id=artifact_id,
605
+ paragraphs=_paragraphs_from_text_lines(
606
+ artifact_id,
607
+ lines,
608
+ source_prefix=path.name,
609
+ ),
610
+ metadata=metadata,
611
+ )
612
+
613
+
614
+ def _structured_data_extraction(
615
+ artifact_id: str,
616
+ *,
617
+ path: Path,
618
+ known_format: KnownDocumentFormat,
619
+ serializer: str,
620
+ parsed: object,
621
+ round_trip_passed: bool,
622
+ ) -> DocumentExtraction:
623
+ metadata = _base_metadata(
624
+ path,
625
+ known_format=known_format,
626
+ adapter_id=DataFileDocumentAdapter.adapter_id,
627
+ mutation_policy="read_only_data_file",
628
+ )
629
+ metadata.update(
630
+ {
631
+ "serializer": serializer,
632
+ "round_trip_passed": round_trip_passed,
633
+ "root_type": type(parsed).__name__,
634
+ }
635
+ )
636
+ lines = _structured_preview_lines(parsed)
637
+ return DocumentExtraction(
638
+ artifact_id=artifact_id,
639
+ paragraphs=_paragraphs_from_text_lines(
640
+ artifact_id,
641
+ lines,
642
+ source_prefix=path.name,
643
+ ),
644
+ metadata=metadata,
645
+ )
646
+
647
+
648
+ def _table_from_rows(rows: list[list[str]], *, source_path: str) -> TableBlock:
649
+ cells: list[TableCell] = []
650
+ for row_index, row in enumerate(rows):
651
+ for column_index, value in enumerate(row):
652
+ cells.append(
653
+ TableCell(
654
+ row_index=row_index,
655
+ column_index=column_index,
656
+ text=value,
657
+ source_path=f"{source_path}#r{row_index + 1}c{column_index + 1}",
658
+ )
659
+ )
660
+ return TableBlock(block_id="data-table-001", source_path=source_path, cells=cells)
661
+
662
+
663
+ def _archive_member_names(
664
+ path: Path,
665
+ *,
666
+ known_format: KnownDocumentFormat,
667
+ ) -> tuple[list[str], list[str]]:
668
+ if known_format is KnownDocumentFormat.zip:
669
+ with zipfile.ZipFile(path) as archive:
670
+ return _safe_member_names(archive.namelist()), []
671
+ if known_format is KnownDocumentFormat.tar:
672
+ with tarfile.open(path) as archive:
673
+ return _safe_member_names(archive.getnames()), []
674
+ if known_format is KnownDocumentFormat.gz:
675
+ with gzip.open(path) as payload:
676
+ payload.read(1)
677
+ return [path.with_suffix("").name or path.name], [
678
+ "Gzip payload is treated as one compressed child candidate."
679
+ ]
680
+ return [], ["7z archive enumeration is known but not promoted without a 7z runtime."]
681
+
682
+
683
+ def _safe_member_names(names: list[str]) -> list[str]:
684
+ return sorted(name for name in names if name and not name.startswith("/") and ".." not in name)
685
+
686
+
687
+ def _html_text_lines(payload: str) -> list[str]:
688
+ parser = _VisibleTextParser()
689
+ parser.feed(payload)
690
+ return parser.lines
691
+
692
+
693
+ def _strip_minimal_rtf(payload: str) -> str:
694
+ if not payload.lstrip().startswith("{\\rtf"):
695
+ return payload
696
+ stripped = payload.replace("\\par", "\n")
697
+ return "".join(ch for ch in stripped if ch not in "{}")
698
+
699
+
700
+ def _plain_text_lines(payload: str) -> list[str]:
701
+ return [line.strip() for line in payload.splitlines() if line.strip()]
702
+
703
+
704
+ def _xml_text_lines(root: StdElementTree.Element) -> list[str]:
705
+ return [text.strip() for text in root.itertext() if text and text.strip()]
706
+
707
+
708
+ def _structured_preview_lines(value: object) -> list[str]:
709
+ if isinstance(value, dict):
710
+ return [f"{key}: {preview}" for key, preview in list(value.items())[:20]]
711
+ if isinstance(value, list):
712
+ return [json.dumps(item, ensure_ascii=False, sort_keys=True) for item in value[:20]]
713
+ return [str(value)]
714
+
715
+
716
+ def _paragraphs_from_text_lines(
717
+ artifact_id: str,
718
+ lines: list[str],
719
+ *,
720
+ source_prefix: str,
721
+ ) -> list[ParagraphBlock]:
722
+ return [
723
+ ParagraphBlock(
724
+ block_id=f"{artifact_id}-line-{index:03d}",
725
+ text=line,
726
+ source_path=f"{source_prefix}#line[{index}]",
727
+ )
728
+ for index, line in enumerate(lines, start=1)
729
+ if line
730
+ ]
731
+
732
+
733
+ def _base_metadata(
734
+ path: Path,
735
+ *,
736
+ known_format: KnownDocumentFormat,
737
+ adapter_id: str,
738
+ mutation_policy: str,
739
+ ) -> dict[str, MetadataValue]:
740
+ return {
741
+ "adapter_id": adapter_id,
742
+ "known_format": known_format.value,
743
+ "mutation_policy": mutation_policy,
744
+ "byte_size": path.stat().st_size,
745
+ }
746
+
747
+
748
+ def _known_format(path: Path) -> KnownDocumentFormat:
749
+ return _KNOWN_BY_EXTENSION.get(path.suffix.lower(), KnownDocumentFormat.txt)
750
+
751
+
752
+ def _local_name(tag: str) -> str:
753
+ return tag.rsplit("}", 1)[-1] if "}" in tag else tag
754
+
755
+
756
+ class _VisibleTextParser(HTMLParser):
757
+ """Small HTML text extractor for passive public-form exports."""
758
+
759
+ def __init__(self) -> None:
760
+ super().__init__(convert_charrefs=True)
761
+ self.lines: list[str] = []
762
+
763
+ def handle_data(self, data: str) -> None:
764
+ text = data.strip()
765
+ if text:
766
+ self.lines.append(text)